summaryrefslogblamecommitdiffstats
path: root/src/video_core/shader/decode/image.cpp
blob: e6cc831fc6643f384738db48b42f61145de83e34 (plain) (tree)
1
2
3
4
5
6
7
8
9



                                            

                         
                    

                       

                          
                             
                                
                               

                                          
                                               
                                          
                                        
                                        




                                 
                                   
                               


                                    

           
                                                                     
























































































                                           
     
                                                                 





































































































































                                                                                                




















                                                                             









                                                                                    
                                    
                            







                                                                                     















                                                                                             






                                                                                              

                                                        










                                                                                               
                                                                                        







                                                                                                    







                                                                                                    






                                                                     









                                                                                               


                                                                                            
                     
 
                                            
                                                                                                    
                          

                                                                      
 




                              
             


              
                            



                                                                                             






                                                                  


                                                                                     

                          

                                                                                       




                                                    
                                                      














                                                               
                    
                      
             



                                                                                       

            


                                                    

                                                  
 
                                                  
                                                                                           


              
                                                                                      




              
                                                                                      







                                                                                           

     

                                                                 

 
                                                                                               











                                                                                           
     
 

                                                                      

 
                                  
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.

#pragma optimize("", off)

#include <algorithm>
#include <vector>
#include <fmt/format.h>

#include "common/assert.h"
#include "common/bit_field.h"
#include "common/common_types.h"
#include "common/logging/log.h"
#include "core/core.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/node_helper.h"
#include "video_core/shader/shader_ir.h"
#include "video_core/textures/texture.h"

namespace VideoCommon::Shader {

using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
using Tegra::Shader::PredCondition;
using Tegra::Shader::StoreType;
using Tegra::Texture::ComponentType;
using Tegra::Texture::TextureFormat;
using Tegra::Texture::TICEntry;

namespace {
ComponentType GetComponentType(TICEntry tic, std::size_t component) {
    const TextureFormat format{tic.format};
    switch (format) {
    case TextureFormat::R16_G16_B16_A16:
    case TextureFormat::R32_G32_B32_A32:
    case TextureFormat::R32_G32_B32:
    case TextureFormat::R32_G32:
    case TextureFormat::R16_G16:
    case TextureFormat::R32:
    case TextureFormat::R16:
    case TextureFormat::R8:
    case TextureFormat::R1:
        if (0 == component) {
            return tic.r_type;
        }
        if (1 == component) {
            return tic.g_type;
        }
        if (2 == component) {
            return tic.b_type;
        }
        if (3 == component) {
            return tic.a_type;
        }
        break;
    case TextureFormat::A8R8G8B8:
        if (0 == component) {
            return tic.a_type;
        }
        if (1 == component) {
            return tic.r_type;
        }
        if (2 == component) {
            return tic.g_type;
        }
        if (3 == component) {
            return tic.b_type;
        }
        break;
    case TextureFormat::A2B10G10R10:
    case TextureFormat::A4B4G4R4:
    case TextureFormat::A5B5G5R1:
    case TextureFormat::A1B5G5R5:
        if (0 == component) {
            return tic.a_type;
        }
        if (1 == component) {
            return tic.b_type;
        }
        if (2 == component) {
            return tic.g_type;
        }
        if (3 == component) {
            return tic.r_type;
        }
        break;
    case TextureFormat::R32_B24G8:
        if (0 == component) {
            return tic.r_type;
        }
        if (1 == component) {
            return tic.b_type;
        }
        if (2 == component) {
            return tic.g_type;
        }
        break;
    case TextureFormat::B5G6R5:
    case TextureFormat::B6G5R5:
        if (0 == component) {
            return tic.b_type;
        }
        if (1 == component) {
            return tic.g_type;
        }
        if (2 == component) {
            return tic.r_type;
        }
        break;
    case TextureFormat::G8R24:
    case TextureFormat::G24R8:
    case TextureFormat::G8R8:
    case TextureFormat::G4R4:
        if (0 == component) {
            return tic.g_type;
        }
        if (1 == component) {
            return tic.r_type;
        }
        break;
    }
    UNIMPLEMENTED_MSG("texture format not implement={}", format);
    return ComponentType::FLOAT;
}

bool IsComponentEnabled(std::size_t component_mask, std::size_t component) {
    constexpr u8 R = 0b0001;
    constexpr u8 G = 0b0010;
    constexpr u8 B = 0b0100;
    constexpr u8 A = 0b1000;
    constexpr std::array<u8, 16> mask = {
        0,   (R),     (G),     (R | G),     (B),     (R | B),     (G | B),     (R | G | B),
        (A), (R | A), (G | A), (R | G | A), (B | A), (R | B | A), (G | B | A), (R | G | B | A)};
    return std::bitset<4>{mask.at(component_mask)}.test(component);
}

u32 GetComponentSize(TextureFormat format, std::size_t component) {
    switch (format) {
    case TextureFormat::R32_G32_B32_A32:
        return 32;
    case TextureFormat::R16_G16_B16_A16:
        return 16;
    case TextureFormat::R32_G32_B32:
        return (0 == component || 1 == component || 2 == component) ? 32 : 0;
    case TextureFormat::R32_G32:
        return (0 == component || 1 == component) ? 32 : 0;
    case TextureFormat::R16_G16:
        return (0 == component || 1 == component) ? 16 : 0;
    case TextureFormat::R32:
        return (0 == component) ? 32 : 0;
    case TextureFormat::R16:
        return (0 == component) ? 16 : 0;
    case TextureFormat::R8:
        return (0 == component) ? 8 : 0;
    case TextureFormat::R1:
        return (0 == component) ? 1 : 0;
    case TextureFormat::A8R8G8B8:
        return 8;
    case TextureFormat::A2B10G10R10:
        return (3 == component || 2 == component || 1 == component) ? 10 : 2;
    case TextureFormat::A4B4G4R4:
        return 4;
    case TextureFormat::A5B5G5R1:
        return (0 == component || 1 == component || 2 == component) ? 5 : 1;
    case TextureFormat::A1B5G5R5:
        return (1 == component || 2 == component || 3 == component) ? 5 : 1;
    case TextureFormat::R32_B24G8:
        if (0 == component) {
            return 32;
        }
        if (1 == component) {
            return 24;
        }
        if (2 == component) {
            return 8;
        }
        return 0;
    case TextureFormat::B5G6R5:
        if (0 == component || 2 == component) {
            return 5;
        }
        if (1 == component) {
            return 6;
        }
        return 0;
    case TextureFormat::B6G5R5:
        if (1 == component || 2 == component) {
            return 5;
        }
        if (0 == component) {
            return 6;
        }
        return 0;
    case TextureFormat::G8R24:
        if (0 == component) {
            return 8;
        }
        if (1 == component) {
            return 24;
        }
        return 0;
    case TextureFormat::G24R8:
        if (0 == component) {
            return 8;
        }
        if (1 == component) {
            return 24;
        }
        return 0;
    case TextureFormat::G8R8:
        return (0 == component || 1 == component) ? 8 : 0;
    case TextureFormat::G4R4:
        return (0 == component || 1 == component) ? 4 : 0;
    default:
        UNIMPLEMENTED_MSG("texture format not implement={}", format);
        return 0;
    }
}

std::size_t GetImageComponentMask(TextureFormat format) {
    constexpr u8 R = 0b0001;
    constexpr u8 G = 0b0010;
    constexpr u8 B = 0b0100;
    constexpr u8 A = 0b1000;
    switch (format) {
    case TextureFormat::R32_G32_B32_A32:
    case TextureFormat::R16_G16_B16_A16:
    case TextureFormat::A8R8G8B8:
    case TextureFormat::A2B10G10R10:
    case TextureFormat::A4B4G4R4:
    case TextureFormat::A5B5G5R1:
    case TextureFormat::A1B5G5R5:
        return std::size_t{R | G | B | A};
    case TextureFormat::R32_G32_B32:
    case TextureFormat::R32_B24G8:
    case TextureFormat::B5G6R5:
    case TextureFormat::B6G5R5:
        return std::size_t{R | G | B};
    case TextureFormat::R32_G32:
    case TextureFormat::R16_G16:
    case TextureFormat::G8R24:
    case TextureFormat::G24R8:
    case TextureFormat::G8R8:
    case TextureFormat::G4R4:
        return std::size_t{R | G};
    case TextureFormat::R32:
    case TextureFormat::R16:
    case TextureFormat::R8:
    case TextureFormat::R1:
        return std::size_t{R};
    default:
        UNIMPLEMENTED_MSG("texture format not implement={}", format);
        return std::size_t{R | G | B | A};
    }
}

std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) {
    switch (image_type) {
    case Tegra::Shader::ImageType::Texture1D:
    case Tegra::Shader::ImageType::TextureBuffer:
        return 1;
    case Tegra::Shader::ImageType::Texture1DArray:
    case Tegra::Shader::ImageType::Texture2D:
        return 2;
    case Tegra::Shader::ImageType::Texture2DArray:
    case Tegra::Shader::ImageType::Texture3D:
        return 3;
    }
    UNREACHABLE();
    return 1;
}
} // Anonymous namespace

u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);

    const auto GetCoordinates = [this, instr](Tegra::Shader::ImageType image_type) {
        std::vector<Node> coords;
        const std::size_t num_coords{GetImageTypeNumCoordinates(image_type)};
        coords.reserve(num_coords);
        for (std::size_t i = 0; i < num_coords; ++i) {
            coords.push_back(GetRegister(instr.gpr8.Value() + i));
        }
        return coords;
    };

    switch (opcode->get().GetId()) {
    case OpCode::Id::SULD: {
        UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store !=
                         Tegra::Shader::OutOfBoundsStore::Ignore);

        const auto type{instr.suldst.image_type};
        auto& image{instr.suldst.is_immediate ? GetImage(instr.image, type)
                                              : GetBindlessImage(instr.gpr39, type)};
        image.MarkRead();

        if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::P) {
            u32 indexer = 0;
            for (u32 element = 0; element < 4; ++element) {
                if (!instr.suldst.IsComponentEnabled(element)) {
                    continue;
                }
                MetaImage meta{image, {}, element};
                Node value = Operation(OperationCode::ImageLoad, meta, GetCoordinates(type));
                SetTemporary(bb, indexer++, std::move(value));
            }
            for (u32 i = 0; i < indexer; ++i) {
                SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
            }
        } else if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::D_BA) {
            UNIMPLEMENTED_IF(instr.suldst.GetStoreDataLayout() != StoreType::Bits32);

            const auto maxwell3d = &Core::System::GetInstance().GPU().Maxwell3D();
            const auto tex_info = maxwell3d->GetStageTexture(shader_stage, image.GetOffset());

            const auto comp_mask = GetImageComponentMask(tex_info.tic.format);
            // TODO(namkazt): let's suppose image format is same as store type. we check on it
            // later.

            switch (instr.suldst.GetStoreDataLayout()) {
            case StoreType::Bits32: {
                u32 shifted_counter = 0;
                Node value = Immediate(0);
                for (u32 element = 0; element < 4; ++element) {
                    if (!IsComponentEnabled(comp_mask, element)) {
                        continue;
                    }
                    const auto component_type = GetComponentType(tex_info.tic, element);
                    const auto component_size = GetComponentSize(tex_info.tic.format, element);
                    bool is_signed = true;
                    MetaImage meta{image, {}, element};
                    const Node original_value =
                        Operation(OperationCode::ImageLoad, meta, GetCoordinates(type));
                    Node converted_value = [&] {
                        switch (component_type) {
                        case ComponentType::SNORM: {
                            // range [-1.0, 1.0]
                            auto cnv_value = Operation(OperationCode::FMul, NO_PRECISE,
                                                       original_value, Immediate(128.f));
                            return SignedOperation(OperationCode::ICastFloat, is_signed, NO_PRECISE,
                                                   std::move(cnv_value));
                        }
                        case ComponentType::UNORM: {
                            // range [0.0, 1.0]
                            auto cnv_value = Operation(OperationCode::FMul, NO_PRECISE,
                                                       original_value, Immediate(255.f));
                            is_signed = false;
                            return SignedOperation(OperationCode::ICastFloat, is_signed, NO_PRECISE,
                                                   std::move(cnv_value));
                        }
                        case ComponentType::SINT: // range [-128,128]
                            return original_value;
                        case ComponentType::UINT: // range [0, 255]
                            is_signed = false;
                            return original_value;
                        case ComponentType::FLOAT:
                            return original_value;
                        default:
                            UNIMPLEMENTED_MSG("Unimplement component type={}", component_type);
                            return original_value;
                        }
                    }();
                    // shift element to correct position
                    shifted_counter += component_size;
                    const auto shifted = 32 - shifted_counter;
                    if (shifted > 0) {
                        converted_value =
                            SignedOperation(OperationCode::ILogicalShiftLeft, is_signed,
                                            std::move(converted_value), Immediate(shifted));
                    }

                    // add value into result
                    value = Operation(OperationCode::UBitwiseOr, value, std::move(converted_value));
                    break;
                }
                SetRegister(bb, instr.gpr0.Value(), std::move(value));

                break;
            }
            default:
                UNREACHABLE();
                break;
            }
        }
        break;
    }
    case OpCode::Id::SUST: {
        UNIMPLEMENTED_IF(instr.suldst.mode != Tegra::Shader::SurfaceDataMode::P);
        UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store !=
                         Tegra::Shader::OutOfBoundsStore::Ignore);
        UNIMPLEMENTED_IF(instr.suldst.component_mask_selector != 0xf); // Ensure we have RGBA

        std::vector<Node> values;
        constexpr std::size_t hardcoded_size{4};
        for (std::size_t i = 0; i < hardcoded_size; ++i) {
            values.push_back(GetRegister(instr.gpr0.Value() + i));
        }

        const auto type{instr.suldst.image_type};
        auto& image{instr.suldst.is_immediate ? GetImage(instr.image, type)
                                              : GetBindlessImage(instr.gpr39, type)};
        image.MarkWrite();

        MetaImage meta{image, std::move(values)};
        bb.push_back(Operation(OperationCode::ImageStore, meta, GetCoordinates(type)));
        break;
    }
    case OpCode::Id::SUATOM: {
        UNIMPLEMENTED_IF(instr.suatom_d.is_ba != 0);

        const OperationCode operation_code = [instr] {
            switch (instr.suatom_d.operation_type) {
            case Tegra::Shader::ImageAtomicOperationType::S32:
            case Tegra::Shader::ImageAtomicOperationType::U32:
                switch (instr.suatom_d.operation) {
                case Tegra::Shader::ImageAtomicOperation::Add:
                    return OperationCode::AtomicImageAdd;
                case Tegra::Shader::ImageAtomicOperation::And:
                    return OperationCode::AtomicImageAnd;
                case Tegra::Shader::ImageAtomicOperation::Or:
                    return OperationCode::AtomicImageOr;
                case Tegra::Shader::ImageAtomicOperation::Xor:
                    return OperationCode::AtomicImageXor;
                case Tegra::Shader::ImageAtomicOperation::Exch:
                    return OperationCode::AtomicImageExchange;
                }
            default:
                break;
            }
            UNIMPLEMENTED_MSG("Unimplemented operation={} type={}",
                              static_cast<u64>(instr.suatom_d.operation.Value()),
                              static_cast<u64>(instr.suatom_d.operation_type.Value()));
            return OperationCode::AtomicImageAdd;
        }();

        Node value = GetRegister(instr.gpr0);

        const auto type = instr.suatom_d.image_type;
        auto& image = GetImage(instr.image, type);
        image.MarkAtomic();

        MetaImage meta{image, {std::move(value)}};
        SetRegister(bb, instr.gpr0, Operation(operation_code, meta, GetCoordinates(type)));
        break;
    }
    default:
        UNIMPLEMENTED_MSG("Unhandled image instruction: {}", opcode->get().GetName());
    }

    return pc;
}

Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) {
    const auto offset = static_cast<u32>(image.index.Value());

    const auto it =
        std::find_if(std::begin(used_images), std::end(used_images),
                     [offset](const Image& entry) { return entry.GetOffset() == offset; });
    if (it != std::end(used_images)) {
        ASSERT(!it->IsBindless() && it->GetType() == it->GetType());
        return *it;
    }

    const auto next_index = static_cast<u32>(used_images.size());
    return used_images.emplace_back(next_index, offset, type);
}

Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) {
    const Node image_register = GetRegister(reg);
    const auto [base_image, buffer, offset] =
        TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()));

    const auto it =
        std::find_if(std::begin(used_images), std::end(used_images),
                     [buffer = buffer, offset = offset](const Image& entry) {
                         return entry.GetBuffer() == buffer && entry.GetOffset() == offset;
                     });
    if (it != std::end(used_images)) {
        ASSERT(it->IsBindless() && it->GetType() == it->GetType());
        return *it;
    }

    const auto next_index = static_cast<u32>(used_images.size());
    return used_images.emplace_back(next_index, offset, buffer, type);
}

} // namespace VideoCommon::Shader