From 2930dccecc933d6748772e9f51a5724fe1e6771b Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 8 Feb 2021 02:54:35 -0300 Subject: spirv: Initial SPIR-V support --- .../renderer_vulkan/vk_shader_decompiler.cpp | 3166 -------------------- .../renderer_vulkan/vk_shader_decompiler.h | 99 - 2 files changed, 3265 deletions(-) delete mode 100644 src/video_core/renderer_vulkan/vk_shader_decompiler.cpp delete mode 100644 src/video_core/renderer_vulkan/vk_shader_decompiler.h (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp deleted file mode 100644 index c6846d886..000000000 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ /dev/null @@ -1,3166 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include -#include -#include -#include -#include - -#include - -#include - -#include "common/alignment.h" -#include "common/assert.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/engines/shader_header.h" -#include "video_core/engines/shader_type.h" -#include "video_core/renderer_vulkan/vk_shader_decompiler.h" -#include "video_core/shader/node.h" -#include "video_core/shader/shader_ir.h" -#include "video_core/shader/transform_feedback.h" -#include "video_core/vulkan_common/vulkan_device.h" - -namespace Vulkan { - -namespace { - -using Sirit::Id; -using Tegra::Engines::ShaderType; -using Tegra::Shader::Attribute; -using Tegra::Shader::PixelImap; -using Tegra::Shader::Register; -using namespace VideoCommon::Shader; - -using Maxwell = Tegra::Engines::Maxwell3D::Regs; -using Operation = const OperationNode&; - -class ASTDecompiler; -class ExprDecompiler; - -// TODO(Rodrigo): Use rasterizer's value -constexpr u32 MaxConstBufferFloats = 0x4000; -constexpr u32 MaxConstBufferElements = MaxConstBufferFloats / 4; - -constexpr u32 NumInputPatches = 32; // This value seems to be the standard - -enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat }; - -class Expression final { -public: - Expression(Id id_, Type type_) : id{id_}, type{type_} { - ASSERT(type_ != Type::Void); - } - Expression() : type{Type::Void} {} - - Id id{}; - Type type{}; -}; -static_assert(std::is_standard_layout_v); - -struct TexelBuffer { - Id image_type{}; - Id image{}; -}; - -struct SampledImage { - Id image_type{}; - Id sampler_type{}; - Id sampler_pointer_type{}; - Id variable{}; -}; - -struct StorageImage { - Id image_type{}; - Id image{}; -}; - -struct AttributeType { - Type type; - Id scalar; - Id vector; -}; - -struct VertexIndices { - std::optional position; - std::optional layer; - std::optional viewport; - std::optional point_size; - std::optional clip_distances; -}; - -struct GenericVaryingDescription { - Id id = nullptr; - u32 first_element = 0; - bool is_scalar = false; -}; - -spv::Dim GetSamplerDim(const SamplerEntry& sampler) { - ASSERT(!sampler.is_buffer); - switch (sampler.type) { - case Tegra::Shader::TextureType::Texture1D: - return spv::Dim::Dim1D; - case Tegra::Shader::TextureType::Texture2D: - return spv::Dim::Dim2D; - case Tegra::Shader::TextureType::Texture3D: - return spv::Dim::Dim3D; - case Tegra::Shader::TextureType::TextureCube: - return spv::Dim::Cube; - default: - UNIMPLEMENTED_MSG("Unimplemented sampler type={}", sampler.type); - return spv::Dim::Dim2D; - } -} - -std::pair GetImageDim(const ImageEntry& image) { - switch (image.type) { - case Tegra::Shader::ImageType::Texture1D: - return {spv::Dim::Dim1D, false}; - case Tegra::Shader::ImageType::TextureBuffer: - return {spv::Dim::Buffer, false}; - case Tegra::Shader::ImageType::Texture1DArray: - return {spv::Dim::Dim1D, true}; - case Tegra::Shader::ImageType::Texture2D: - return {spv::Dim::Dim2D, false}; - case Tegra::Shader::ImageType::Texture2DArray: - return {spv::Dim::Dim2D, true}; - case Tegra::Shader::ImageType::Texture3D: - return {spv::Dim::Dim3D, false}; - default: - UNIMPLEMENTED_MSG("Unimplemented image type={}", image.type); - return {spv::Dim::Dim2D, false}; - } -} - -/// Returns the number of vertices present in a primitive topology. -u32 GetNumPrimitiveTopologyVertices(Maxwell::PrimitiveTopology primitive_topology) { - switch (primitive_topology) { - case Maxwell::PrimitiveTopology::Points: - return 1; - case Maxwell::PrimitiveTopology::Lines: - case Maxwell::PrimitiveTopology::LineLoop: - case Maxwell::PrimitiveTopology::LineStrip: - return 2; - case Maxwell::PrimitiveTopology::Triangles: - case Maxwell::PrimitiveTopology::TriangleStrip: - case Maxwell::PrimitiveTopology::TriangleFan: - return 3; - case Maxwell::PrimitiveTopology::LinesAdjacency: - case Maxwell::PrimitiveTopology::LineStripAdjacency: - return 4; - case Maxwell::PrimitiveTopology::TrianglesAdjacency: - case Maxwell::PrimitiveTopology::TriangleStripAdjacency: - return 6; - case Maxwell::PrimitiveTopology::Quads: - UNIMPLEMENTED_MSG("Quads"); - return 3; - case Maxwell::PrimitiveTopology::QuadStrip: - UNIMPLEMENTED_MSG("QuadStrip"); - return 3; - case Maxwell::PrimitiveTopology::Polygon: - UNIMPLEMENTED_MSG("Polygon"); - return 3; - case Maxwell::PrimitiveTopology::Patches: - UNIMPLEMENTED_MSG("Patches"); - return 3; - default: - UNREACHABLE(); - return 3; - } -} - -spv::ExecutionMode GetExecutionMode(Maxwell::TessellationPrimitive primitive) { - switch (primitive) { - case Maxwell::TessellationPrimitive::Isolines: - return spv::ExecutionMode::Isolines; - case Maxwell::TessellationPrimitive::Triangles: - return spv::ExecutionMode::Triangles; - case Maxwell::TessellationPrimitive::Quads: - return spv::ExecutionMode::Quads; - } - UNREACHABLE(); - return spv::ExecutionMode::Triangles; -} - -spv::ExecutionMode GetExecutionMode(Maxwell::TessellationSpacing spacing) { - switch (spacing) { - case Maxwell::TessellationSpacing::Equal: - return spv::ExecutionMode::SpacingEqual; - case Maxwell::TessellationSpacing::FractionalOdd: - return spv::ExecutionMode::SpacingFractionalOdd; - case Maxwell::TessellationSpacing::FractionalEven: - return spv::ExecutionMode::SpacingFractionalEven; - } - UNREACHABLE(); - return spv::ExecutionMode::SpacingEqual; -} - -spv::ExecutionMode GetExecutionMode(Maxwell::PrimitiveTopology input_topology) { - switch (input_topology) { - case Maxwell::PrimitiveTopology::Points: - return spv::ExecutionMode::InputPoints; - case Maxwell::PrimitiveTopology::Lines: - case Maxwell::PrimitiveTopology::LineLoop: - case Maxwell::PrimitiveTopology::LineStrip: - return spv::ExecutionMode::InputLines; - case Maxwell::PrimitiveTopology::Triangles: - case Maxwell::PrimitiveTopology::TriangleStrip: - case Maxwell::PrimitiveTopology::TriangleFan: - return spv::ExecutionMode::Triangles; - case Maxwell::PrimitiveTopology::LinesAdjacency: - case Maxwell::PrimitiveTopology::LineStripAdjacency: - return spv::ExecutionMode::InputLinesAdjacency; - case Maxwell::PrimitiveTopology::TrianglesAdjacency: - case Maxwell::PrimitiveTopology::TriangleStripAdjacency: - return spv::ExecutionMode::InputTrianglesAdjacency; - case Maxwell::PrimitiveTopology::Quads: - UNIMPLEMENTED_MSG("Quads"); - return spv::ExecutionMode::Triangles; - case Maxwell::PrimitiveTopology::QuadStrip: - UNIMPLEMENTED_MSG("QuadStrip"); - return spv::ExecutionMode::Triangles; - case Maxwell::PrimitiveTopology::Polygon: - UNIMPLEMENTED_MSG("Polygon"); - return spv::ExecutionMode::Triangles; - case Maxwell::PrimitiveTopology::Patches: - UNIMPLEMENTED_MSG("Patches"); - return spv::ExecutionMode::Triangles; - } - UNREACHABLE(); - return spv::ExecutionMode::Triangles; -} - -spv::ExecutionMode GetExecutionMode(Tegra::Shader::OutputTopology output_topology) { - switch (output_topology) { - case Tegra::Shader::OutputTopology::PointList: - return spv::ExecutionMode::OutputPoints; - case Tegra::Shader::OutputTopology::LineStrip: - return spv::ExecutionMode::OutputLineStrip; - case Tegra::Shader::OutputTopology::TriangleStrip: - return spv::ExecutionMode::OutputTriangleStrip; - default: - UNREACHABLE(); - return spv::ExecutionMode::OutputPoints; - } -} - -/// Returns true if an attribute index is one of the 32 generic attributes -constexpr bool IsGenericAttribute(Attribute::Index attribute) { - return attribute >= Attribute::Index::Attribute_0 && - attribute <= Attribute::Index::Attribute_31; -} - -/// Returns the location of a generic attribute -u32 GetGenericAttributeLocation(Attribute::Index attribute) { - ASSERT(IsGenericAttribute(attribute)); - return static_cast(attribute) - static_cast(Attribute::Index::Attribute_0); -} - -/// Returns true if an object has to be treated as precise -bool IsPrecise(Operation operand) { - const auto& meta{operand.GetMeta()}; - if (std::holds_alternative(meta)) { - return std::get(meta).precise; - } - return false; -} - -class SPIRVDecompiler final : public Sirit::Module { -public: - explicit SPIRVDecompiler(const Device& device_, const ShaderIR& ir_, ShaderType stage_, - const Registry& registry_, const Specialization& specialization_) - : Module(0x00010300), device{device_}, ir{ir_}, stage{stage_}, header{ir_.GetHeader()}, - registry{registry_}, specialization{specialization_} { - if (stage_ != ShaderType::Compute) { - transform_feedback = BuildTransformFeedback(registry_.GetGraphicsInfo()); - } - - AddCapability(spv::Capability::Shader); - AddCapability(spv::Capability::UniformAndStorageBuffer16BitAccess); - AddCapability(spv::Capability::ImageQuery); - AddCapability(spv::Capability::Image1D); - AddCapability(spv::Capability::ImageBuffer); - AddCapability(spv::Capability::ImageGatherExtended); - AddCapability(spv::Capability::SampledBuffer); - AddCapability(spv::Capability::StorageImageWriteWithoutFormat); - AddCapability(spv::Capability::DrawParameters); - AddCapability(spv::Capability::SubgroupBallotKHR); - AddCapability(spv::Capability::SubgroupVoteKHR); - AddExtension("SPV_KHR_16bit_storage"); - AddExtension("SPV_KHR_shader_ballot"); - AddExtension("SPV_KHR_subgroup_vote"); - AddExtension("SPV_KHR_storage_buffer_storage_class"); - AddExtension("SPV_KHR_variable_pointers"); - AddExtension("SPV_KHR_shader_draw_parameters"); - - if (!transform_feedback.empty()) { - if (device.IsExtTransformFeedbackSupported()) { - AddCapability(spv::Capability::TransformFeedback); - } else { - LOG_ERROR(Render_Vulkan, "Shader requires transform feedbacks but these are not " - "supported on this device"); - } - } - if (ir.UsesLayer() || ir.UsesViewportIndex()) { - if (ir.UsesViewportIndex()) { - AddCapability(spv::Capability::MultiViewport); - } - if (stage != ShaderType::Geometry && device.IsExtShaderViewportIndexLayerSupported()) { - AddExtension("SPV_EXT_shader_viewport_index_layer"); - AddCapability(spv::Capability::ShaderViewportIndexLayerEXT); - } - } - if (device.IsFormatlessImageLoadSupported()) { - AddCapability(spv::Capability::StorageImageReadWithoutFormat); - } - if (device.IsFloat16Supported()) { - AddCapability(spv::Capability::Float16); - } - t_scalar_half = Name(TypeFloat(device_.IsFloat16Supported() ? 16 : 32), "scalar_half"); - t_half = Name(TypeVector(t_scalar_half, 2), "half"); - - const Id main = Decompile(); - - switch (stage) { - case ShaderType::Vertex: - AddEntryPoint(spv::ExecutionModel::Vertex, main, "main", interfaces); - break; - case ShaderType::TesselationControl: - AddCapability(spv::Capability::Tessellation); - AddEntryPoint(spv::ExecutionModel::TessellationControl, main, "main", interfaces); - AddExecutionMode(main, spv::ExecutionMode::OutputVertices, - header.common2.threads_per_input_primitive); - break; - case ShaderType::TesselationEval: { - const auto& info = registry.GetGraphicsInfo(); - AddCapability(spv::Capability::Tessellation); - AddEntryPoint(spv::ExecutionModel::TessellationEvaluation, main, "main", interfaces); - AddExecutionMode(main, GetExecutionMode(info.tessellation_primitive)); - AddExecutionMode(main, GetExecutionMode(info.tessellation_spacing)); - AddExecutionMode(main, info.tessellation_clockwise - ? spv::ExecutionMode::VertexOrderCw - : spv::ExecutionMode::VertexOrderCcw); - break; - } - case ShaderType::Geometry: { - const auto& info = registry.GetGraphicsInfo(); - AddCapability(spv::Capability::Geometry); - AddEntryPoint(spv::ExecutionModel::Geometry, main, "main", interfaces); - AddExecutionMode(main, GetExecutionMode(info.primitive_topology)); - AddExecutionMode(main, GetExecutionMode(header.common3.output_topology)); - AddExecutionMode(main, spv::ExecutionMode::OutputVertices, - header.common4.max_output_vertices); - // TODO(Rodrigo): Where can we get this info from? - AddExecutionMode(main, spv::ExecutionMode::Invocations, 1U); - break; - } - case ShaderType::Fragment: - AddEntryPoint(spv::ExecutionModel::Fragment, main, "main", interfaces); - AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft); - if (header.ps.omap.depth) { - AddExecutionMode(main, spv::ExecutionMode::DepthReplacing); - } - if (specialization.early_fragment_tests) { - AddExecutionMode(main, spv::ExecutionMode::EarlyFragmentTests); - } - break; - case ShaderType::Compute: - const auto workgroup_size = specialization.workgroup_size; - AddExecutionMode(main, spv::ExecutionMode::LocalSize, workgroup_size[0], - workgroup_size[1], workgroup_size[2]); - AddEntryPoint(spv::ExecutionModel::GLCompute, main, "main", interfaces); - break; - } - } - -private: - Id Decompile() { - DeclareCommon(); - DeclareVertex(); - DeclareTessControl(); - DeclareTessEval(); - DeclareGeometry(); - DeclareFragment(); - DeclareCompute(); - DeclareRegisters(); - DeclareCustomVariables(); - DeclarePredicates(); - DeclareLocalMemory(); - DeclareSharedMemory(); - DeclareInternalFlags(); - DeclareInputAttributes(); - DeclareOutputAttributes(); - - u32 binding = specialization.base_binding; - binding = DeclareConstantBuffers(binding); - binding = DeclareGlobalBuffers(binding); - binding = DeclareUniformTexels(binding); - binding = DeclareSamplers(binding); - binding = DeclareStorageTexels(binding); - binding = DeclareImages(binding); - - const Id main = OpFunction(t_void, {}, TypeFunction(t_void)); - AddLabel(); - - if (ir.IsDecompiled()) { - DeclareFlowVariables(); - DecompileAST(); - } else { - AllocateLabels(); - DecompileBranchMode(); - } - - OpReturn(); - OpFunctionEnd(); - - return main; - } - - void DefinePrologue() { - if (stage == ShaderType::Vertex) { - // Clear Position to avoid reading trash on the Z conversion. - const auto position_index = out_indices.position.value(); - const Id position = AccessElement(t_out_float4, out_vertex, position_index); - OpStore(position, v_varying_default); - - if (specialization.point_size) { - const u32 point_size_index = out_indices.point_size.value(); - const Id out_point_size = AccessElement(t_out_float, out_vertex, point_size_index); - OpStore(out_point_size, Constant(t_float, *specialization.point_size)); - } - } - } - - void DecompileAST(); - - void DecompileBranchMode() { - const u32 first_address = ir.GetBasicBlocks().begin()->first; - const Id loop_label = OpLabel("loop"); - const Id merge_label = OpLabel("merge"); - const Id dummy_label = OpLabel(); - const Id jump_label = OpLabel(); - continue_label = OpLabel("continue"); - - std::vector literals; - std::vector branch_labels; - for (const auto& [literal, label] : labels) { - literals.push_back(literal); - branch_labels.push_back(label); - } - - jmp_to = OpVariable(TypePointer(spv::StorageClass::Function, t_uint), - spv::StorageClass::Function, Constant(t_uint, first_address)); - AddLocalVariable(jmp_to); - - std::tie(ssy_flow_stack, ssy_flow_stack_top) = CreateFlowStack(); - std::tie(pbk_flow_stack, pbk_flow_stack_top) = CreateFlowStack(); - - Name(jmp_to, "jmp_to"); - Name(ssy_flow_stack, "ssy_flow_stack"); - Name(ssy_flow_stack_top, "ssy_flow_stack_top"); - Name(pbk_flow_stack, "pbk_flow_stack"); - Name(pbk_flow_stack_top, "pbk_flow_stack_top"); - - DefinePrologue(); - - OpBranch(loop_label); - AddLabel(loop_label); - OpLoopMerge(merge_label, continue_label, spv::LoopControlMask::MaskNone); - OpBranch(dummy_label); - - AddLabel(dummy_label); - const Id default_branch = OpLabel(); - const Id jmp_to_load = OpLoad(t_uint, jmp_to); - OpSelectionMerge(jump_label, spv::SelectionControlMask::MaskNone); - OpSwitch(jmp_to_load, default_branch, literals, branch_labels); - - AddLabel(default_branch); - OpReturn(); - - for (const auto& [address, bb] : ir.GetBasicBlocks()) { - AddLabel(labels.at(address)); - - VisitBasicBlock(bb); - - const auto next_it = labels.lower_bound(address + 1); - const Id next_label = next_it != labels.end() ? next_it->second : default_branch; - OpBranch(next_label); - } - - AddLabel(jump_label); - OpBranch(continue_label); - AddLabel(continue_label); - OpBranch(loop_label); - AddLabel(merge_label); - } - -private: - friend class ASTDecompiler; - friend class ExprDecompiler; - - static constexpr auto INTERNAL_FLAGS_COUNT = static_cast(InternalFlag::Amount); - - void AllocateLabels() { - for (const auto& pair : ir.GetBasicBlocks()) { - const u32 address = pair.first; - labels.emplace(address, OpLabel(fmt::format("label_0x{:x}", address))); - } - } - - void DeclareCommon() { - thread_id = - DeclareInputBuiltIn(spv::BuiltIn::SubgroupLocalInvocationId, t_in_uint, "thread_id"); - thread_masks[0] = - DeclareInputBuiltIn(spv::BuiltIn::SubgroupEqMask, t_in_uint4, "thread_eq_mask"); - thread_masks[1] = - DeclareInputBuiltIn(spv::BuiltIn::SubgroupGeMask, t_in_uint4, "thread_ge_mask"); - thread_masks[2] = - DeclareInputBuiltIn(spv::BuiltIn::SubgroupGtMask, t_in_uint4, "thread_gt_mask"); - thread_masks[3] = - DeclareInputBuiltIn(spv::BuiltIn::SubgroupLeMask, t_in_uint4, "thread_le_mask"); - thread_masks[4] = - DeclareInputBuiltIn(spv::BuiltIn::SubgroupLtMask, t_in_uint4, "thread_lt_mask"); - } - - void DeclareVertex() { - if (stage != ShaderType::Vertex) { - return; - } - Id out_vertex_struct; - std::tie(out_vertex_struct, out_indices) = DeclareVertexStruct(); - const Id vertex_ptr = TypePointer(spv::StorageClass::Output, out_vertex_struct); - out_vertex = OpVariable(vertex_ptr, spv::StorageClass::Output); - interfaces.push_back(AddGlobalVariable(Name(out_vertex, "out_vertex"))); - - // Declare input attributes - vertex_index = DeclareInputBuiltIn(spv::BuiltIn::VertexIndex, t_in_int, "vertex_index"); - instance_index = - DeclareInputBuiltIn(spv::BuiltIn::InstanceIndex, t_in_int, "instance_index"); - base_vertex = DeclareInputBuiltIn(spv::BuiltIn::BaseVertex, t_in_int, "base_vertex"); - base_instance = DeclareInputBuiltIn(spv::BuiltIn::BaseInstance, t_in_int, "base_instance"); - } - - void DeclareTessControl() { - if (stage != ShaderType::TesselationControl) { - return; - } - DeclareInputVertexArray(NumInputPatches); - DeclareOutputVertexArray(header.common2.threads_per_input_primitive); - - tess_level_outer = DeclareBuiltIn( - spv::BuiltIn::TessLevelOuter, spv::StorageClass::Output, - TypePointer(spv::StorageClass::Output, TypeArray(t_float, Constant(t_uint, 4U))), - "tess_level_outer"); - Decorate(tess_level_outer, spv::Decoration::Patch); - - tess_level_inner = DeclareBuiltIn( - spv::BuiltIn::TessLevelInner, spv::StorageClass::Output, - TypePointer(spv::StorageClass::Output, TypeArray(t_float, Constant(t_uint, 2U))), - "tess_level_inner"); - Decorate(tess_level_inner, spv::Decoration::Patch); - - invocation_id = DeclareInputBuiltIn(spv::BuiltIn::InvocationId, t_in_int, "invocation_id"); - } - - void DeclareTessEval() { - if (stage != ShaderType::TesselationEval) { - return; - } - DeclareInputVertexArray(NumInputPatches); - DeclareOutputVertex(); - - tess_coord = DeclareInputBuiltIn(spv::BuiltIn::TessCoord, t_in_float3, "tess_coord"); - } - - void DeclareGeometry() { - if (stage != ShaderType::Geometry) { - return; - } - const auto& info = registry.GetGraphicsInfo(); - const u32 num_input = GetNumPrimitiveTopologyVertices(info.primitive_topology); - DeclareInputVertexArray(num_input); - DeclareOutputVertex(); - } - - void DeclareFragment() { - if (stage != ShaderType::Fragment) { - return; - } - - for (u32 rt = 0; rt < static_cast(std::size(frag_colors)); ++rt) { - if (!IsRenderTargetEnabled(rt)) { - continue; - } - const Id id = AddGlobalVariable(OpVariable(t_out_float4, spv::StorageClass::Output)); - Name(id, fmt::format("frag_color{}", rt)); - Decorate(id, spv::Decoration::Location, rt); - - frag_colors[rt] = id; - interfaces.push_back(id); - } - - if (header.ps.omap.depth) { - frag_depth = AddGlobalVariable(OpVariable(t_out_float, spv::StorageClass::Output)); - Name(frag_depth, "frag_depth"); - Decorate(frag_depth, spv::Decoration::BuiltIn, - static_cast(spv::BuiltIn::FragDepth)); - - interfaces.push_back(frag_depth); - } - - frag_coord = DeclareInputBuiltIn(spv::BuiltIn::FragCoord, t_in_float4, "frag_coord"); - front_facing = DeclareInputBuiltIn(spv::BuiltIn::FrontFacing, t_in_bool, "front_facing"); - point_coord = DeclareInputBuiltIn(spv::BuiltIn::PointCoord, t_in_float2, "point_coord"); - } - - void DeclareCompute() { - if (stage != ShaderType::Compute) { - return; - } - - workgroup_id = DeclareInputBuiltIn(spv::BuiltIn::WorkgroupId, t_in_uint3, "workgroup_id"); - local_invocation_id = - DeclareInputBuiltIn(spv::BuiltIn::LocalInvocationId, t_in_uint3, "local_invocation_id"); - } - - void DeclareRegisters() { - for (const u32 gpr : ir.GetRegisters()) { - const Id id = OpVariable(t_prv_float, spv::StorageClass::Private, v_float_zero); - Name(id, fmt::format("gpr_{}", gpr)); - registers.emplace(gpr, AddGlobalVariable(id)); - } - } - - void DeclareCustomVariables() { - const u32 num_custom_variables = ir.GetNumCustomVariables(); - for (u32 i = 0; i < num_custom_variables; ++i) { - const Id id = OpVariable(t_prv_float, spv::StorageClass::Private, v_float_zero); - Name(id, fmt::format("custom_var_{}", i)); - custom_variables.emplace(i, AddGlobalVariable(id)); - } - } - - void DeclarePredicates() { - for (const auto pred : ir.GetPredicates()) { - const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); - Name(id, fmt::format("pred_{}", static_cast(pred))); - predicates.emplace(pred, AddGlobalVariable(id)); - } - } - - void DeclareFlowVariables() { - for (u32 i = 0; i < ir.GetASTNumVariables(); i++) { - const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); - Name(id, fmt::format("flow_var_{}", static_cast(i))); - flow_variables.emplace(i, AddGlobalVariable(id)); - } - } - - void DeclareLocalMemory() { - // TODO(Rodrigo): Unstub kernel local memory size and pass it from a register at - // specialization time. - const u64 lmem_size = stage == ShaderType::Compute ? 0x400 : header.GetLocalMemorySize(); - if (lmem_size == 0) { - return; - } - const auto element_count = static_cast(Common::AlignUp(lmem_size, 4) / 4); - const Id type_array = TypeArray(t_float, Constant(t_uint, element_count)); - const Id type_pointer = TypePointer(spv::StorageClass::Private, type_array); - Name(type_pointer, "LocalMemory"); - - local_memory = - OpVariable(type_pointer, spv::StorageClass::Private, ConstantNull(type_array)); - AddGlobalVariable(Name(local_memory, "local_memory")); - } - - void DeclareSharedMemory() { - if (stage != ShaderType::Compute) { - return; - } - t_smem_uint = TypePointer(spv::StorageClass::Workgroup, t_uint); - - u32 smem_size = specialization.shared_memory_size * 4; - if (smem_size == 0) { - // Avoid declaring an empty array. - return; - } - const u32 limit = device.GetMaxComputeSharedMemorySize(); - if (smem_size > limit) { - LOG_ERROR(Render_Vulkan, "Shared memory size {} is clamped to host's limit {}", - smem_size, limit); - smem_size = limit; - } - - const Id type_array = TypeArray(t_uint, Constant(t_uint, smem_size / 4)); - const Id type_pointer = TypePointer(spv::StorageClass::Workgroup, type_array); - Name(type_pointer, "SharedMemory"); - - shared_memory = OpVariable(type_pointer, spv::StorageClass::Workgroup); - AddGlobalVariable(Name(shared_memory, "shared_memory")); - } - - void DeclareInternalFlags() { - static constexpr std::array names{"zero", "sign", "carry", "overflow"}; - - for (std::size_t flag = 0; flag < INTERNAL_FLAGS_COUNT; ++flag) { - const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); - internal_flags[flag] = AddGlobalVariable(Name(id, names[flag])); - } - } - - void DeclareInputVertexArray(u32 length) { - constexpr auto storage = spv::StorageClass::Input; - std::tie(in_indices, in_vertex) = DeclareVertexArray(storage, "in_indices", length); - } - - void DeclareOutputVertexArray(u32 length) { - constexpr auto storage = spv::StorageClass::Output; - std::tie(out_indices, out_vertex) = DeclareVertexArray(storage, "out_indices", length); - } - - std::tuple DeclareVertexArray(spv::StorageClass storage_class, - std::string name, u32 length) { - const auto [struct_id, indices] = DeclareVertexStruct(); - const Id vertex_array = TypeArray(struct_id, Constant(t_uint, length)); - const Id vertex_ptr = TypePointer(storage_class, vertex_array); - const Id vertex = OpVariable(vertex_ptr, storage_class); - AddGlobalVariable(Name(vertex, std::move(name))); - interfaces.push_back(vertex); - return {indices, vertex}; - } - - void DeclareOutputVertex() { - Id out_vertex_struct; - std::tie(out_vertex_struct, out_indices) = DeclareVertexStruct(); - const Id out_vertex_ptr = TypePointer(spv::StorageClass::Output, out_vertex_struct); - out_vertex = OpVariable(out_vertex_ptr, spv::StorageClass::Output); - interfaces.push_back(AddGlobalVariable(Name(out_vertex, "out_vertex"))); - } - - void DeclareInputAttributes() { - for (const auto index : ir.GetInputAttributes()) { - if (!IsGenericAttribute(index)) { - continue; - } - const u32 location = GetGenericAttributeLocation(index); - if (!IsAttributeEnabled(location)) { - continue; - } - const auto type_descriptor = GetAttributeType(location); - Id type; - if (IsInputAttributeArray()) { - type = GetTypeVectorDefinitionLut(type_descriptor.type).at(3); - type = TypeArray(type, Constant(t_uint, GetNumInputVertices())); - type = TypePointer(spv::StorageClass::Input, type); - } else { - type = type_descriptor.vector; - } - const Id id = OpVariable(type, spv::StorageClass::Input); - AddGlobalVariable(Name(id, fmt::format("in_attr{}", location))); - input_attributes.emplace(index, id); - interfaces.push_back(id); - - Decorate(id, spv::Decoration::Location, location); - - if (stage != ShaderType::Fragment) { - continue; - } - switch (header.ps.GetPixelImap(location)) { - case PixelImap::Constant: - Decorate(id, spv::Decoration::Flat); - break; - case PixelImap::Perspective: - // Default - break; - case PixelImap::ScreenLinear: - Decorate(id, spv::Decoration::NoPerspective); - break; - default: - UNREACHABLE_MSG("Unused attribute being fetched"); - } - } - } - - void DeclareOutputAttributes() { - if (stage == ShaderType::Compute || stage == ShaderType::Fragment) { - return; - } - - UNIMPLEMENTED_IF(registry.GetGraphicsInfo().tfb_enabled && stage != ShaderType::Vertex); - for (const auto index : ir.GetOutputAttributes()) { - if (!IsGenericAttribute(index)) { - continue; - } - DeclareOutputAttribute(index); - } - } - - void DeclareOutputAttribute(Attribute::Index index) { - static constexpr std::string_view swizzle = "xyzw"; - - const u32 location = GetGenericAttributeLocation(index); - u8 element = 0; - while (element < 4) { - const std::size_t remainder = 4 - element; - - std::size_t num_components = remainder; - const std::optional tfb = GetTransformFeedbackInfo(index, element); - if (tfb) { - num_components = tfb->components; - } - - Id type = GetTypeVectorDefinitionLut(Type::Float).at(num_components - 1); - Id varying_default = v_varying_default; - if (IsOutputAttributeArray()) { - const u32 num = GetNumOutputVertices(); - type = TypeArray(type, Constant(t_uint, num)); - if (device.GetDriverID() != VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR) { - // Intel's proprietary driver fails to setup defaults for arrayed output - // attributes. - varying_default = ConstantComposite(type, std::vector(num, varying_default)); - } - } - type = TypePointer(spv::StorageClass::Output, type); - - std::string name = fmt::format("out_attr{}", location); - if (num_components < 4 || element > 0) { - name = fmt::format("{}_{}", name, swizzle.substr(element, num_components)); - } - - const Id id = OpVariable(type, spv::StorageClass::Output, varying_default); - Name(AddGlobalVariable(id), name); - - GenericVaryingDescription description; - description.id = id; - description.first_element = element; - description.is_scalar = num_components == 1; - for (u32 i = 0; i < num_components; ++i) { - const u8 offset = static_cast(static_cast(index) * 4 + element + i); - output_attributes.emplace(offset, description); - } - interfaces.push_back(id); - - Decorate(id, spv::Decoration::Location, location); - if (element > 0) { - Decorate(id, spv::Decoration::Component, static_cast(element)); - } - if (tfb && device.IsExtTransformFeedbackSupported()) { - Decorate(id, spv::Decoration::XfbBuffer, static_cast(tfb->buffer)); - Decorate(id, spv::Decoration::XfbStride, static_cast(tfb->stride)); - Decorate(id, spv::Decoration::Offset, static_cast(tfb->offset)); - } - - element = static_cast(static_cast(element) + num_components); - } - } - - std::optional GetTransformFeedbackInfo(Attribute::Index index, u8 element = 0) { - const u8 location = static_cast(static_cast(index) * 4 + element); - const auto it = transform_feedback.find(location); - if (it == transform_feedback.end()) { - return {}; - } - return it->second; - } - - u32 DeclareConstantBuffers(u32 binding) { - for (const auto& [index, size] : ir.GetConstantBuffers()) { - const Id type = device.IsKhrUniformBufferStandardLayoutSupported() ? t_cbuf_scalar_ubo - : t_cbuf_std140_ubo; - const Id id = OpVariable(type, spv::StorageClass::Uniform); - AddGlobalVariable(Name(id, fmt::format("cbuf_{}", index))); - - Decorate(id, spv::Decoration::Binding, binding++); - Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); - constant_buffers.emplace(index, id); - } - return binding; - } - - u32 DeclareGlobalBuffers(u32 binding) { - for (const auto& [base, usage] : ir.GetGlobalMemory()) { - const Id id = OpVariable(t_gmem_ssbo, spv::StorageClass::StorageBuffer); - AddGlobalVariable( - Name(id, fmt::format("gmem_{}_{}", base.cbuf_index, base.cbuf_offset))); - - Decorate(id, spv::Decoration::Binding, binding++); - Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); - global_buffers.emplace(base, id); - } - return binding; - } - - u32 DeclareUniformTexels(u32 binding) { - for (const auto& sampler : ir.GetSamplers()) { - if (!sampler.is_buffer) { - continue; - } - ASSERT(!sampler.is_array); - ASSERT(!sampler.is_shadow); - - constexpr auto dim = spv::Dim::Buffer; - constexpr int depth = 0; - constexpr int arrayed = 0; - constexpr bool ms = false; - constexpr int sampled = 1; - constexpr auto format = spv::ImageFormat::Unknown; - const Id image_type = TypeImage(t_float, dim, depth, arrayed, ms, sampled, format); - const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, image_type); - const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant); - AddGlobalVariable(Name(id, fmt::format("sampler_{}", sampler.index))); - Decorate(id, spv::Decoration::Binding, binding++); - Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); - - uniform_texels.emplace(sampler.index, TexelBuffer{image_type, id}); - } - return binding; - } - - u32 DeclareSamplers(u32 binding) { - for (const auto& sampler : ir.GetSamplers()) { - if (sampler.is_buffer) { - continue; - } - const auto dim = GetSamplerDim(sampler); - const int depth = sampler.is_shadow ? 1 : 0; - const int arrayed = sampler.is_array ? 1 : 0; - constexpr bool ms = false; - constexpr int sampled = 1; - constexpr auto format = spv::ImageFormat::Unknown; - const Id image_type = TypeImage(t_float, dim, depth, arrayed, ms, sampled, format); - const Id sampler_type = TypeSampledImage(image_type); - const Id sampler_pointer_type = - TypePointer(spv::StorageClass::UniformConstant, sampler_type); - const Id type = sampler.is_indexed - ? TypeArray(sampler_type, Constant(t_uint, sampler.size)) - : sampler_type; - const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, type); - const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant); - AddGlobalVariable(Name(id, fmt::format("sampler_{}", sampler.index))); - Decorate(id, spv::Decoration::Binding, binding++); - Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); - - sampled_images.emplace( - sampler.index, SampledImage{image_type, sampler_type, sampler_pointer_type, id}); - } - return binding; - } - - u32 DeclareStorageTexels(u32 binding) { - for (const auto& image : ir.GetImages()) { - if (image.type != Tegra::Shader::ImageType::TextureBuffer) { - continue; - } - DeclareImage(image, binding); - } - return binding; - } - - u32 DeclareImages(u32 binding) { - for (const auto& image : ir.GetImages()) { - if (image.type == Tegra::Shader::ImageType::TextureBuffer) { - continue; - } - DeclareImage(image, binding); - } - return binding; - } - - void DeclareImage(const ImageEntry& image, u32& binding) { - const auto [dim, arrayed] = GetImageDim(image); - constexpr int depth = 0; - constexpr bool ms = false; - constexpr int sampled = 2; // This won't be accessed with a sampler - const auto format = image.is_atomic ? spv::ImageFormat::R32ui : spv::ImageFormat::Unknown; - const Id image_type = TypeImage(t_uint, dim, depth, arrayed, ms, sampled, format, {}); - const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, image_type); - const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant); - AddGlobalVariable(Name(id, fmt::format("image_{}", image.index))); - - Decorate(id, spv::Decoration::Binding, binding++); - Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); - if (image.is_read && !image.is_written) { - Decorate(id, spv::Decoration::NonWritable); - } else if (image.is_written && !image.is_read) { - Decorate(id, spv::Decoration::NonReadable); - } - - images.emplace(image.index, StorageImage{image_type, id}); - } - - bool IsRenderTargetEnabled(u32 rt) const { - for (u32 component = 0; component < 4; ++component) { - if (header.ps.IsColorComponentOutputEnabled(rt, component)) { - return true; - } - } - return false; - } - - bool IsInputAttributeArray() const { - return stage == ShaderType::TesselationControl || stage == ShaderType::TesselationEval || - stage == ShaderType::Geometry; - } - - bool IsOutputAttributeArray() const { - return stage == ShaderType::TesselationControl; - } - - bool IsAttributeEnabled(u32 location) const { - return stage != ShaderType::Vertex || specialization.enabled_attributes[location]; - } - - u32 GetNumInputVertices() const { - switch (stage) { - case ShaderType::Geometry: - return GetNumPrimitiveTopologyVertices(registry.GetGraphicsInfo().primitive_topology); - case ShaderType::TesselationControl: - case ShaderType::TesselationEval: - return NumInputPatches; - default: - UNREACHABLE(); - return 1; - } - } - - u32 GetNumOutputVertices() const { - switch (stage) { - case ShaderType::TesselationControl: - return header.common2.threads_per_input_primitive; - default: - UNREACHABLE(); - return 1; - } - } - - std::tuple DeclareVertexStruct() { - struct BuiltIn { - Id type; - spv::BuiltIn builtin; - const char* name; - }; - std::vector members; - members.reserve(4); - - const auto AddBuiltIn = [&](Id type, spv::BuiltIn builtin, const char* name) { - const auto index = static_cast(members.size()); - members.push_back(BuiltIn{type, builtin, name}); - return index; - }; - - VertexIndices indices; - indices.position = AddBuiltIn(t_float4, spv::BuiltIn::Position, "position"); - - if (ir.UsesLayer()) { - if (stage != ShaderType::Vertex || device.IsExtShaderViewportIndexLayerSupported()) { - indices.layer = AddBuiltIn(t_int, spv::BuiltIn::Layer, "layer"); - } else { - LOG_ERROR( - Render_Vulkan, - "Shader requires Layer but it's not supported on this stage with this device."); - } - } - - if (ir.UsesViewportIndex()) { - if (stage != ShaderType::Vertex || device.IsExtShaderViewportIndexLayerSupported()) { - indices.viewport = AddBuiltIn(t_int, spv::BuiltIn::ViewportIndex, "viewport_index"); - } else { - LOG_ERROR(Render_Vulkan, "Shader requires ViewportIndex but it's not supported on " - "this stage with this device."); - } - } - - if (ir.UsesPointSize() || specialization.point_size) { - indices.point_size = AddBuiltIn(t_float, spv::BuiltIn::PointSize, "point_size"); - } - - const auto& ir_output_attributes = ir.GetOutputAttributes(); - const bool declare_clip_distances = std::any_of( - ir_output_attributes.begin(), ir_output_attributes.end(), [](const auto& index) { - return index == Attribute::Index::ClipDistances0123 || - index == Attribute::Index::ClipDistances4567; - }); - if (declare_clip_distances) { - indices.clip_distances = AddBuiltIn(TypeArray(t_float, Constant(t_uint, 8)), - spv::BuiltIn::ClipDistance, "clip_distances"); - } - - std::vector member_types; - member_types.reserve(members.size()); - for (std::size_t i = 0; i < members.size(); ++i) { - member_types.push_back(members[i].type); - } - const Id per_vertex_struct = Name(TypeStruct(member_types), "PerVertex"); - Decorate(per_vertex_struct, spv::Decoration::Block); - - for (std::size_t index = 0; index < members.size(); ++index) { - const auto& member = members[index]; - MemberName(per_vertex_struct, static_cast(index), member.name); - MemberDecorate(per_vertex_struct, static_cast(index), spv::Decoration::BuiltIn, - static_cast(member.builtin)); - } - - return {per_vertex_struct, indices}; - } - - void VisitBasicBlock(const NodeBlock& bb) { - for (const auto& node : bb) { - Visit(node); - } - } - - Expression Visit(const Node& node) { - if (const auto operation = std::get_if(&*node)) { - if (const auto amend_index = operation->GetAmendIndex()) { - [[maybe_unused]] const Type type = Visit(ir.GetAmendNode(*amend_index)).type; - ASSERT(type == Type::Void); - } - const auto operation_index = static_cast(operation->GetCode()); - const auto decompiler = operation_decompilers[operation_index]; - if (decompiler == nullptr) { - UNREACHABLE_MSG("Operation decompiler {} not defined", operation_index); - } - return (this->*decompiler)(*operation); - } - - if (const auto gpr = std::get_if(&*node)) { - const u32 index = gpr->GetIndex(); - if (index == Register::ZeroIndex) { - return {v_float_zero, Type::Float}; - } - return {OpLoad(t_float, registers.at(index)), Type::Float}; - } - - if (const auto cv = std::get_if(&*node)) { - const u32 index = cv->GetIndex(); - return {OpLoad(t_float, custom_variables.at(index)), Type::Float}; - } - - if (const auto immediate = std::get_if(&*node)) { - return {Constant(t_uint, immediate->GetValue()), Type::Uint}; - } - - if (const auto predicate = std::get_if(&*node)) { - const auto value = [&]() -> Id { - switch (const auto index = predicate->GetIndex(); index) { - case Tegra::Shader::Pred::UnusedIndex: - return v_true; - case Tegra::Shader::Pred::NeverExecute: - return v_false; - default: - return OpLoad(t_bool, predicates.at(index)); - } - }(); - if (predicate->IsNegated()) { - return {OpLogicalNot(t_bool, value), Type::Bool}; - } - return {value, Type::Bool}; - } - - if (const auto abuf = std::get_if(&*node)) { - const auto attribute = abuf->GetIndex(); - const u32 element = abuf->GetElement(); - const auto& buffer = abuf->GetBuffer(); - - const auto ArrayPass = [&](Id pointer_type, Id composite, std::vector indices) { - std::vector members; - members.reserve(std::size(indices) + 1); - - if (buffer && IsInputAttributeArray()) { - members.push_back(AsUint(Visit(buffer))); - } - for (const u32 index : indices) { - members.push_back(Constant(t_uint, index)); - } - return OpAccessChain(pointer_type, composite, members); - }; - - switch (attribute) { - case Attribute::Index::Position: { - if (stage == ShaderType::Fragment) { - return {OpLoad(t_float, AccessElement(t_in_float, frag_coord, element)), - Type::Float}; - } - const std::vector elements = {in_indices.position.value(), element}; - return {OpLoad(t_float, ArrayPass(t_in_float, in_vertex, elements)), Type::Float}; - } - case Attribute::Index::PointCoord: { - switch (element) { - case 0: - case 1: - return {OpCompositeExtract(t_float, OpLoad(t_float2, point_coord), element), - Type::Float}; - } - UNIMPLEMENTED_MSG("Unimplemented point coord element={}", element); - return {v_float_zero, Type::Float}; - } - case Attribute::Index::TessCoordInstanceIDVertexID: - // TODO(Subv): Find out what the values are for the first two elements when inside a - // vertex shader, and what's the value of the fourth element when inside a Tess Eval - // shader. - switch (element) { - case 0: - case 1: - return {OpLoad(t_float, AccessElement(t_in_float, tess_coord, element)), - Type::Float}; - case 2: - return { - OpISub(t_int, OpLoad(t_int, instance_index), OpLoad(t_int, base_instance)), - Type::Int}; - case 3: - return {OpISub(t_int, OpLoad(t_int, vertex_index), OpLoad(t_int, base_vertex)), - Type::Int}; - } - UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element); - return {Constant(t_uint, 0U), Type::Uint}; - case Attribute::Index::FrontFacing: - // TODO(Subv): Find out what the values are for the other elements. - ASSERT(stage == ShaderType::Fragment); - if (element == 3) { - const Id is_front_facing = OpLoad(t_bool, front_facing); - const Id true_value = Constant(t_int, static_cast(-1)); - const Id false_value = Constant(t_int, 0); - return {OpSelect(t_int, is_front_facing, true_value, false_value), Type::Int}; - } - UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element); - return {v_float_zero, Type::Float}; - default: - if (!IsGenericAttribute(attribute)) { - break; - } - const u32 location = GetGenericAttributeLocation(attribute); - if (!IsAttributeEnabled(location)) { - // Disabled attributes (also known as constant attributes) always return zero. - return {v_float_zero, Type::Float}; - } - const auto type_descriptor = GetAttributeType(location); - const Type type = type_descriptor.type; - const Id attribute_id = input_attributes.at(attribute); - const std::vector elements = {element}; - const Id pointer = ArrayPass(type_descriptor.scalar, attribute_id, elements); - return {OpLoad(GetTypeDefinition(type), pointer), type}; - } - UNIMPLEMENTED_MSG("Unhandled input attribute: {}", attribute); - return {v_float_zero, Type::Float}; - } - - if (const auto cbuf = std::get_if(&*node)) { - const Node& offset = cbuf->GetOffset(); - const Id buffer_id = constant_buffers.at(cbuf->GetIndex()); - - Id pointer{}; - if (device.IsKhrUniformBufferStandardLayoutSupported()) { - const Id buffer_offset = - OpShiftRightLogical(t_uint, AsUint(Visit(offset)), Constant(t_uint, 2U)); - pointer = - OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0U), buffer_offset); - } else { - Id buffer_index{}; - Id buffer_element{}; - if (const auto immediate = std::get_if(&*offset)) { - // Direct access - const u32 offset_imm = immediate->GetValue(); - ASSERT(offset_imm % 4 == 0); - buffer_index = Constant(t_uint, offset_imm / 16); - buffer_element = Constant(t_uint, (offset_imm / 4) % 4); - } else if (std::holds_alternative(*offset)) { - // Indirect access - const Id offset_id = AsUint(Visit(offset)); - const Id unsafe_offset = OpUDiv(t_uint, offset_id, Constant(t_uint, 4)); - const Id final_offset = - OpUMod(t_uint, unsafe_offset, Constant(t_uint, MaxConstBufferElements - 1)); - buffer_index = OpUDiv(t_uint, final_offset, Constant(t_uint, 4)); - buffer_element = OpUMod(t_uint, final_offset, Constant(t_uint, 4)); - } else { - UNREACHABLE_MSG("Unmanaged offset node type"); - } - pointer = OpAccessChain(t_cbuf_float, buffer_id, v_uint_zero, buffer_index, - buffer_element); - } - return {OpLoad(t_float, pointer), Type::Float}; - } - - if (const auto gmem = std::get_if(&*node)) { - return {OpLoad(t_uint, GetGlobalMemoryPointer(*gmem)), Type::Uint}; - } - - if (const auto lmem = std::get_if(&*node)) { - Id address = AsUint(Visit(lmem->GetAddress())); - address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U)); - const Id pointer = OpAccessChain(t_prv_float, local_memory, address); - return {OpLoad(t_float, pointer), Type::Float}; - } - - if (const auto smem = std::get_if(&*node)) { - return {OpLoad(t_uint, GetSharedMemoryPointer(*smem)), Type::Uint}; - } - - if (const auto internal_flag = std::get_if(&*node)) { - const Id flag = internal_flags.at(static_cast(internal_flag->GetFlag())); - return {OpLoad(t_bool, flag), Type::Bool}; - } - - if (const auto conditional = std::get_if(&*node)) { - if (const auto amend_index = conditional->GetAmendIndex()) { - [[maybe_unused]] const Type type = Visit(ir.GetAmendNode(*amend_index)).type; - ASSERT(type == Type::Void); - } - // It's invalid to call conditional on nested nodes, use an operation instead - const Id true_label = OpLabel(); - const Id skip_label = OpLabel(); - const Id condition = AsBool(Visit(conditional->GetCondition())); - OpSelectionMerge(skip_label, spv::SelectionControlMask::MaskNone); - OpBranchConditional(condition, true_label, skip_label); - AddLabel(true_label); - - conditional_branch_set = true; - inside_branch = false; - VisitBasicBlock(conditional->GetCode()); - conditional_branch_set = false; - if (!inside_branch) { - OpBranch(skip_label); - } else { - inside_branch = false; - } - AddLabel(skip_label); - return {}; - } - - if (const auto comment = std::get_if(&*node)) { - if (device.HasDebuggingToolAttached()) { - // We should insert comments with OpString instead of using named variables - Name(OpUndef(t_int), comment->GetText()); - } - return {}; - } - - UNREACHABLE(); - return {}; - } - - template - Expression Unary(Operation operation) { - const Id type_def = GetTypeDefinition(result_type); - const Id op_a = As(Visit(operation[0]), type_a); - - const Id value = (this->*func)(type_def, op_a); - if (IsPrecise(operation)) { - Decorate(value, spv::Decoration::NoContraction); - } - return {value, result_type}; - } - - template - Expression Binary(Operation operation) { - const Id type_def = GetTypeDefinition(result_type); - const Id op_a = As(Visit(operation[0]), type_a); - const Id op_b = As(Visit(operation[1]), type_b); - - const Id value = (this->*func)(type_def, op_a, op_b); - if (IsPrecise(operation)) { - Decorate(value, spv::Decoration::NoContraction); - } - return {value, result_type}; - } - - template - Expression Ternary(Operation operation) { - const Id type_def = GetTypeDefinition(result_type); - const Id op_a = As(Visit(operation[0]), type_a); - const Id op_b = As(Visit(operation[1]), type_b); - const Id op_c = As(Visit(operation[2]), type_c); - - const Id value = (this->*func)(type_def, op_a, op_b, op_c); - if (IsPrecise(operation)) { - Decorate(value, spv::Decoration::NoContraction); - } - return {value, result_type}; - } - - template - Expression Quaternary(Operation operation) { - const Id type_def = GetTypeDefinition(result_type); - const Id op_a = As(Visit(operation[0]), type_a); - const Id op_b = As(Visit(operation[1]), type_b); - const Id op_c = As(Visit(operation[2]), type_c); - const Id op_d = As(Visit(operation[3]), type_d); - - const Id value = (this->*func)(type_def, op_a, op_b, op_c, op_d); - if (IsPrecise(operation)) { - Decorate(value, spv::Decoration::NoContraction); - } - return {value, result_type}; - } - - Expression Assign(Operation operation) { - const Node& dest = operation[0]; - const Node& src = operation[1]; - - Expression target{}; - if (const auto gpr = std::get_if(&*dest)) { - if (gpr->GetIndex() == Register::ZeroIndex) { - // Writing to Register::ZeroIndex is a no op but we still have to visit its source - // because it might have side effects. - Visit(src); - return {}; - } - target = {registers.at(gpr->GetIndex()), Type::Float}; - - } else if (const auto abuf = std::get_if(&*dest)) { - const auto& buffer = abuf->GetBuffer(); - const auto ArrayPass = [&](Id pointer_type, Id composite, std::vector indices) { - std::vector members; - members.reserve(std::size(indices) + 1); - - if (buffer && IsOutputAttributeArray()) { - members.push_back(AsUint(Visit(buffer))); - } - for (const u32 index : indices) { - members.push_back(Constant(t_uint, index)); - } - return OpAccessChain(pointer_type, composite, members); - }; - - target = [&]() -> Expression { - const u32 element = abuf->GetElement(); - switch (const auto attribute = abuf->GetIndex(); attribute) { - case Attribute::Index::Position: { - const u32 index = out_indices.position.value(); - return {ArrayPass(t_out_float, out_vertex, {index, element}), Type::Float}; - } - case Attribute::Index::LayerViewportPointSize: - switch (element) { - case 1: { - if (!out_indices.layer) { - return {}; - } - const u32 index = out_indices.layer.value(); - return {AccessElement(t_out_int, out_vertex, index), Type::Int}; - } - case 2: { - if (!out_indices.viewport) { - return {}; - } - const u32 index = out_indices.viewport.value(); - return {AccessElement(t_out_int, out_vertex, index), Type::Int}; - } - case 3: { - const auto index = out_indices.point_size.value(); - return {AccessElement(t_out_float, out_vertex, index), Type::Float}; - } - default: - UNIMPLEMENTED_MSG("LayerViewportPoint element={}", abuf->GetElement()); - return {}; - } - case Attribute::Index::ClipDistances0123: { - const u32 index = out_indices.clip_distances.value(); - return {AccessElement(t_out_float, out_vertex, index, element), Type::Float}; - } - case Attribute::Index::ClipDistances4567: { - const u32 index = out_indices.clip_distances.value(); - return {AccessElement(t_out_float, out_vertex, index, element + 4), - Type::Float}; - } - default: - if (IsGenericAttribute(attribute)) { - const u8 offset = static_cast(static_cast(attribute) * 4 + element); - const GenericVaryingDescription description = output_attributes.at(offset); - const Id composite = description.id; - std::vector indices; - if (!description.is_scalar) { - indices.push_back(element - description.first_element); - } - return {ArrayPass(t_out_float, composite, indices), Type::Float}; - } - UNIMPLEMENTED_MSG("Unhandled output attribute: {}", - static_cast(attribute)); - return {}; - } - }(); - - } else if (const auto patch = std::get_if(&*dest)) { - target = [&]() -> Expression { - const u32 offset = patch->GetOffset(); - switch (offset) { - case 0: - case 1: - case 2: - case 3: - return {AccessElement(t_out_float, tess_level_outer, offset % 4), Type::Float}; - case 4: - case 5: - return {AccessElement(t_out_float, tess_level_inner, offset % 4), Type::Float}; - } - UNIMPLEMENTED_MSG("Unhandled patch output offset: {}", offset); - return {}; - }(); - - } else if (const auto lmem = std::get_if(&*dest)) { - Id address = AsUint(Visit(lmem->GetAddress())); - address = OpUDiv(t_uint, address, Constant(t_uint, 4)); - target = {OpAccessChain(t_prv_float, local_memory, address), Type::Float}; - - } else if (const auto smem = std::get_if(&*dest)) { - target = {GetSharedMemoryPointer(*smem), Type::Uint}; - - } else if (const auto gmem = std::get_if(&*dest)) { - target = {GetGlobalMemoryPointer(*gmem), Type::Uint}; - - } else if (const auto cv = std::get_if(&*dest)) { - target = {custom_variables.at(cv->GetIndex()), Type::Float}; - - } else { - UNIMPLEMENTED(); - } - - if (!target.id) { - // On failure we return a nullptr target.id, skip these stores. - return {}; - } - - OpStore(target.id, As(Visit(src), target.type)); - return {}; - } - - template - Expression FCastHalf(Operation operation) { - const Id value = AsHalfFloat(Visit(operation[0])); - return {GetFloatFromHalfScalar(OpCompositeExtract(t_scalar_half, value, offset)), - Type::Float}; - } - - Expression FSwizzleAdd(Operation operation) { - const Id minus = Constant(t_float, -1.0f); - const Id plus = v_float_one; - const Id zero = v_float_zero; - const Id lut_a = ConstantComposite(t_float4, minus, plus, minus, zero); - const Id lut_b = ConstantComposite(t_float4, minus, minus, plus, minus); - - Id mask = OpLoad(t_uint, thread_id); - mask = OpBitwiseAnd(t_uint, mask, Constant(t_uint, 3)); - mask = OpShiftLeftLogical(t_uint, mask, Constant(t_uint, 1)); - mask = OpShiftRightLogical(t_uint, AsUint(Visit(operation[2])), mask); - mask = OpBitwiseAnd(t_uint, mask, Constant(t_uint, 3)); - - const Id modifier_a = OpVectorExtractDynamic(t_float, lut_a, mask); - const Id modifier_b = OpVectorExtractDynamic(t_float, lut_b, mask); - - const Id op_a = OpFMul(t_float, AsFloat(Visit(operation[0])), modifier_a); - const Id op_b = OpFMul(t_float, AsFloat(Visit(operation[1])), modifier_b); - return {OpFAdd(t_float, op_a, op_b), Type::Float}; - } - - Expression HNegate(Operation operation) { - const bool is_f16 = device.IsFloat16Supported(); - const Id minus_one = Constant(t_scalar_half, is_f16 ? 0xbc00 : 0xbf800000); - const Id one = Constant(t_scalar_half, is_f16 ? 0x3c00 : 0x3f800000); - const auto GetNegate = [&](std::size_t index) { - return OpSelect(t_scalar_half, AsBool(Visit(operation[index])), minus_one, one); - }; - const Id negation = OpCompositeConstruct(t_half, GetNegate(1), GetNegate(2)); - return {OpFMul(t_half, AsHalfFloat(Visit(operation[0])), negation), Type::HalfFloat}; - } - - Expression HClamp(Operation operation) { - const auto Pack = [&](std::size_t index) { - const Id scalar = GetHalfScalarFromFloat(AsFloat(Visit(operation[index]))); - return OpCompositeConstruct(t_half, scalar, scalar); - }; - const Id value = AsHalfFloat(Visit(operation[0])); - const Id min = Pack(1); - const Id max = Pack(2); - - const Id clamped = OpFClamp(t_half, value, min, max); - if (IsPrecise(operation)) { - Decorate(clamped, spv::Decoration::NoContraction); - } - return {clamped, Type::HalfFloat}; - } - - Expression HCastFloat(Operation operation) { - const Id value = GetHalfScalarFromFloat(AsFloat(Visit(operation[0]))); - return {OpCompositeConstruct(t_half, value, Constant(t_scalar_half, 0)), Type::HalfFloat}; - } - - Expression HUnpack(Operation operation) { - Expression operand = Visit(operation[0]); - const auto type = std::get(operation.GetMeta()); - if (type == Tegra::Shader::HalfType::H0_H1) { - return operand; - } - const auto value = [&] { - switch (std::get(operation.GetMeta())) { - case Tegra::Shader::HalfType::F32: - return GetHalfScalarFromFloat(AsFloat(operand)); - case Tegra::Shader::HalfType::H0_H0: - return OpCompositeExtract(t_scalar_half, AsHalfFloat(operand), 0); - case Tegra::Shader::HalfType::H1_H1: - return OpCompositeExtract(t_scalar_half, AsHalfFloat(operand), 1); - default: - UNREACHABLE(); - return ConstantNull(t_half); - } - }(); - return {OpCompositeConstruct(t_half, value, value), Type::HalfFloat}; - } - - Expression HMergeF32(Operation operation) { - const Id value = AsHalfFloat(Visit(operation[0])); - return {GetFloatFromHalfScalar(OpCompositeExtract(t_scalar_half, value, 0)), Type::Float}; - } - - template - Expression HMergeHN(Operation operation) { - const Id target = AsHalfFloat(Visit(operation[0])); - const Id source = AsHalfFloat(Visit(operation[1])); - const Id object = OpCompositeExtract(t_scalar_half, source, offset); - return {OpCompositeInsert(t_half, object, target, offset), Type::HalfFloat}; - } - - Expression HPack2(Operation operation) { - const Id low = GetHalfScalarFromFloat(AsFloat(Visit(operation[0]))); - const Id high = GetHalfScalarFromFloat(AsFloat(Visit(operation[1]))); - return {OpCompositeConstruct(t_half, low, high), Type::HalfFloat}; - } - - Expression LogicalAddCarry(Operation operation) { - const Id op_a = AsUint(Visit(operation[0])); - const Id op_b = AsUint(Visit(operation[1])); - - const Id result = OpIAddCarry(TypeStruct({t_uint, t_uint}), op_a, op_b); - const Id carry = OpCompositeExtract(t_uint, result, 1); - return {OpINotEqual(t_bool, carry, v_uint_zero), Type::Bool}; - } - - Expression LogicalAssign(Operation operation) { - const Node& dest = operation[0]; - const Node& src = operation[1]; - - Id target{}; - if (const auto pred = std::get_if(&*dest)) { - ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment"); - - const auto index = pred->GetIndex(); - switch (index) { - case Tegra::Shader::Pred::NeverExecute: - case Tegra::Shader::Pred::UnusedIndex: - // Writing to these predicates is a no-op - return {}; - } - target = predicates.at(index); - - } else if (const auto flag = std::get_if(&*dest)) { - target = internal_flags.at(static_cast(flag->GetFlag())); - } - - OpStore(target, AsBool(Visit(src))); - return {}; - } - - Expression LogicalFOrdered(Operation operation) { - // Emulate SPIR-V's OpOrdered - const Id op_a = AsFloat(Visit(operation[0])); - const Id op_b = AsFloat(Visit(operation[1])); - const Id is_num_a = OpFOrdEqual(t_bool, op_a, op_a); - const Id is_num_b = OpFOrdEqual(t_bool, op_b, op_b); - return {OpLogicalAnd(t_bool, is_num_a, is_num_b), Type::Bool}; - } - - Expression LogicalFUnordered(Operation operation) { - // Emulate SPIR-V's OpUnordered - const Id op_a = AsFloat(Visit(operation[0])); - const Id op_b = AsFloat(Visit(operation[1])); - const Id is_nan_a = OpIsNan(t_bool, op_a); - const Id is_nan_b = OpIsNan(t_bool, op_b); - return {OpLogicalOr(t_bool, is_nan_a, is_nan_b), Type::Bool}; - } - - Id GetTextureSampler(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - ASSERT(!meta.sampler.is_buffer); - - const auto& entry = sampled_images.at(meta.sampler.index); - Id sampler = entry.variable; - if (meta.sampler.is_indexed) { - const Id index = AsInt(Visit(meta.index)); - sampler = OpAccessChain(entry.sampler_pointer_type, sampler, index); - } - return OpLoad(entry.sampler_type, sampler); - } - - Id GetTextureImage(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - const u32 index = meta.sampler.index; - if (meta.sampler.is_buffer) { - const auto& entry = uniform_texels.at(index); - return OpLoad(entry.image_type, entry.image); - } else { - const auto& entry = sampled_images.at(index); - return OpImage(entry.image_type, GetTextureSampler(operation)); - } - } - - Id GetImage(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - const auto entry = images.at(meta.image.index); - return OpLoad(entry.image_type, entry.image); - } - - Id AssembleVector(const std::vector& coords, Type type) { - const Id coords_type = GetTypeVectorDefinitionLut(type).at(coords.size() - 1); - return coords.size() == 1 ? coords[0] : OpCompositeConstruct(coords_type, coords); - } - - Id GetCoordinates(Operation operation, Type type) { - std::vector coords; - for (std::size_t i = 0; i < operation.GetOperandsCount(); ++i) { - coords.push_back(As(Visit(operation[i]), type)); - } - if (const auto meta = std::get_if(&operation.GetMeta())) { - // Add array coordinate for textures - if (meta->sampler.is_array) { - Id array = AsInt(Visit(meta->array)); - if (type == Type::Float) { - array = OpConvertSToF(t_float, array); - } - coords.push_back(array); - } - } - return AssembleVector(coords, type); - } - - Id GetOffsetCoordinates(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - std::vector coords; - coords.reserve(meta.aoffi.size()); - for (const auto& coord : meta.aoffi) { - coords.push_back(AsInt(Visit(coord))); - } - return AssembleVector(coords, Type::Int); - } - - std::pair GetDerivatives(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - const auto& derivatives = meta.derivates; - ASSERT(derivatives.size() % 2 == 0); - - const std::size_t components = derivatives.size() / 2; - std::vector dx, dy; - dx.reserve(components); - dy.reserve(components); - for (std::size_t index = 0; index < components; ++index) { - dx.push_back(AsFloat(Visit(derivatives.at(index * 2 + 0)))); - dy.push_back(AsFloat(Visit(derivatives.at(index * 2 + 1)))); - } - return {AssembleVector(dx, Type::Float), AssembleVector(dy, Type::Float)}; - } - - Expression GetTextureElement(Operation operation, Id sample_value, Type type) { - const auto& meta = std::get(operation.GetMeta()); - const auto type_def = GetTypeDefinition(type); - return {OpCompositeExtract(type_def, sample_value, meta.element), type}; - } - - Expression Texture(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - - const bool can_implicit = stage == ShaderType::Fragment; - const Id sampler = GetTextureSampler(operation); - const Id coords = GetCoordinates(operation, Type::Float); - - std::vector operands; - spv::ImageOperandsMask mask{}; - if (meta.bias) { - mask = mask | spv::ImageOperandsMask::Bias; - operands.push_back(AsFloat(Visit(meta.bias))); - } - - if (!can_implicit) { - mask = mask | spv::ImageOperandsMask::Lod; - operands.push_back(v_float_zero); - } - - if (!meta.aoffi.empty()) { - mask = mask | spv::ImageOperandsMask::Offset; - operands.push_back(GetOffsetCoordinates(operation)); - } - - if (meta.depth_compare) { - // Depth sampling - UNIMPLEMENTED_IF(meta.bias); - const Id dref = AsFloat(Visit(meta.depth_compare)); - if (can_implicit) { - return { - OpImageSampleDrefImplicitLod(t_float, sampler, coords, dref, mask, operands), - Type::Float}; - } else { - return { - OpImageSampleDrefExplicitLod(t_float, sampler, coords, dref, mask, operands), - Type::Float}; - } - } - - Id texture; - if (can_implicit) { - texture = OpImageSampleImplicitLod(t_float4, sampler, coords, mask, operands); - } else { - texture = OpImageSampleExplicitLod(t_float4, sampler, coords, mask, operands); - } - return GetTextureElement(operation, texture, Type::Float); - } - - Expression TextureLod(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - - const Id sampler = GetTextureSampler(operation); - const Id coords = GetCoordinates(operation, Type::Float); - const Id lod = AsFloat(Visit(meta.lod)); - - spv::ImageOperandsMask mask = spv::ImageOperandsMask::Lod; - std::vector operands{lod}; - - if (!meta.aoffi.empty()) { - mask = mask | spv::ImageOperandsMask::Offset; - operands.push_back(GetOffsetCoordinates(operation)); - } - - if (meta.sampler.is_shadow) { - const Id dref = AsFloat(Visit(meta.depth_compare)); - return {OpImageSampleDrefExplicitLod(t_float, sampler, coords, dref, mask, operands), - Type::Float}; - } - const Id texture = OpImageSampleExplicitLod(t_float4, sampler, coords, mask, operands); - return GetTextureElement(operation, texture, Type::Float); - } - - Expression TextureGather(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - - const Id coords = GetCoordinates(operation, Type::Float); - - spv::ImageOperandsMask mask = spv::ImageOperandsMask::MaskNone; - std::vector operands; - Id texture{}; - - if (!meta.aoffi.empty()) { - mask = mask | spv::ImageOperandsMask::Offset; - operands.push_back(GetOffsetCoordinates(operation)); - } - - if (meta.sampler.is_shadow) { - texture = OpImageDrefGather(t_float4, GetTextureSampler(operation), coords, - AsFloat(Visit(meta.depth_compare)), mask, operands); - } else { - u32 component_value = 0; - if (meta.component) { - const auto component = std::get_if(&*meta.component); - ASSERT_MSG(component, "Component is not an immediate value"); - component_value = component->GetValue(); - } - texture = OpImageGather(t_float4, GetTextureSampler(operation), coords, - Constant(t_uint, component_value), mask, operands); - } - return GetTextureElement(operation, texture, Type::Float); - } - - Expression TextureQueryDimensions(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - UNIMPLEMENTED_IF(!meta.aoffi.empty()); - UNIMPLEMENTED_IF(meta.depth_compare); - - const auto image_id = GetTextureImage(operation); - if (meta.element == 3) { - return {OpImageQueryLevels(t_int, image_id), Type::Int}; - } - - const Id lod = AsUint(Visit(operation[0])); - const std::size_t coords_count = [&meta] { - switch (const auto type = meta.sampler.type) { - case Tegra::Shader::TextureType::Texture1D: - return 1; - case Tegra::Shader::TextureType::Texture2D: - case Tegra::Shader::TextureType::TextureCube: - return 2; - case Tegra::Shader::TextureType::Texture3D: - return 3; - default: - UNREACHABLE_MSG("Invalid texture type={}", type); - return 2; - } - }(); - - if (meta.element >= coords_count) { - return {v_float_zero, Type::Float}; - } - - const std::array types = {t_int, t_int2, t_int3}; - const Id sizes = OpImageQuerySizeLod(types.at(coords_count - 1), image_id, lod); - const Id size = OpCompositeExtract(t_int, sizes, meta.element); - return {size, Type::Int}; - } - - Expression TextureQueryLod(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - UNIMPLEMENTED_IF(!meta.aoffi.empty()); - UNIMPLEMENTED_IF(meta.depth_compare); - - if (meta.element >= 2) { - UNREACHABLE_MSG("Invalid element"); - return {v_float_zero, Type::Float}; - } - const auto sampler_id = GetTextureSampler(operation); - - const Id multiplier = Constant(t_float, 256.0f); - const Id multipliers = ConstantComposite(t_float2, multiplier, multiplier); - - const Id coords = GetCoordinates(operation, Type::Float); - Id size = OpImageQueryLod(t_float2, sampler_id, coords); - size = OpFMul(t_float2, size, multipliers); - size = OpConvertFToS(t_int2, size); - return GetTextureElement(operation, size, Type::Int); - } - - Expression TexelFetch(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - UNIMPLEMENTED_IF(meta.depth_compare); - - const Id image = GetTextureImage(operation); - const Id coords = GetCoordinates(operation, Type::Int); - - spv::ImageOperandsMask mask = spv::ImageOperandsMask::MaskNone; - std::vector operands; - Id fetch; - - if (meta.lod && !meta.sampler.is_buffer) { - mask = mask | spv::ImageOperandsMask::Lod; - operands.push_back(AsInt(Visit(meta.lod))); - } - - if (!meta.aoffi.empty()) { - mask = mask | spv::ImageOperandsMask::Offset; - operands.push_back(GetOffsetCoordinates(operation)); - } - - fetch = OpImageFetch(t_float4, image, coords, mask, operands); - return GetTextureElement(operation, fetch, Type::Float); - } - - Expression TextureGradient(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - UNIMPLEMENTED_IF(!meta.aoffi.empty()); - - const Id sampler = GetTextureSampler(operation); - const Id coords = GetCoordinates(operation, Type::Float); - const auto [dx, dy] = GetDerivatives(operation); - const std::vector grad = {dx, dy}; - - static constexpr auto mask = spv::ImageOperandsMask::Grad; - const Id texture = OpImageSampleExplicitLod(t_float4, sampler, coords, mask, grad); - return GetTextureElement(operation, texture, Type::Float); - } - - Expression ImageLoad(Operation operation) { - if (!device.IsFormatlessImageLoadSupported()) { - return {v_float_zero, Type::Float}; - } - - const auto& meta{std::get(operation.GetMeta())}; - - const Id coords = GetCoordinates(operation, Type::Int); - const Id texel = OpImageRead(t_uint4, GetImage(operation), coords); - - return {OpCompositeExtract(t_uint, texel, meta.element), Type::Uint}; - } - - Expression ImageStore(Operation operation) { - const auto meta{std::get(operation.GetMeta())}; - std::vector colors; - for (const auto& value : meta.values) { - colors.push_back(AsUint(Visit(value))); - } - - const Id coords = GetCoordinates(operation, Type::Int); - const Id texel = OpCompositeConstruct(t_uint4, colors); - - OpImageWrite(GetImage(operation), coords, texel, {}); - return {}; - } - - template - Expression AtomicImage(Operation operation) { - const auto& meta{std::get(operation.GetMeta())}; - ASSERT(meta.values.size() == 1); - - const Id coordinate = GetCoordinates(operation, Type::Int); - const Id image = images.at(meta.image.index).image; - const Id sample = v_uint_zero; - const Id pointer = OpImageTexelPointer(t_image_uint, image, coordinate, sample); - - const Id scope = Constant(t_uint, static_cast(spv::Scope::Device)); - const Id semantics = v_uint_zero; - const Id value = AsUint(Visit(meta.values[0])); - return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint}; - } - - template - Expression Atomic(Operation operation) { - Id pointer; - if (const auto smem = std::get_if(&*operation[0])) { - pointer = GetSharedMemoryPointer(*smem); - } else if (const auto gmem = std::get_if(&*operation[0])) { - pointer = GetGlobalMemoryPointer(*gmem); - } else { - UNREACHABLE(); - return {v_float_zero, Type::Float}; - } - const Id scope = Constant(t_uint, static_cast(spv::Scope::Device)); - const Id semantics = v_uint_zero; - const Id value = AsUint(Visit(operation[1])); - - return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint}; - } - - template - Expression Reduce(Operation operation) { - Atomic(operation); - return {}; - } - - Expression Branch(Operation operation) { - const auto& target = std::get(*operation[0]); - OpStore(jmp_to, Constant(t_uint, target.GetValue())); - OpBranch(continue_label); - inside_branch = true; - if (!conditional_branch_set) { - AddLabel(); - } - return {}; - } - - Expression BranchIndirect(Operation operation) { - const Id op_a = AsUint(Visit(operation[0])); - - OpStore(jmp_to, op_a); - OpBranch(continue_label); - inside_branch = true; - if (!conditional_branch_set) { - AddLabel(); - } - return {}; - } - - Expression PushFlowStack(Operation operation) { - const auto& target = std::get(*operation[0]); - const auto [flow_stack, flow_stack_top] = GetFlowStack(operation); - const Id current = OpLoad(t_uint, flow_stack_top); - const Id next = OpIAdd(t_uint, current, Constant(t_uint, 1)); - const Id access = OpAccessChain(t_func_uint, flow_stack, current); - - OpStore(access, Constant(t_uint, target.GetValue())); - OpStore(flow_stack_top, next); - return {}; - } - - Expression PopFlowStack(Operation operation) { - const auto [flow_stack, flow_stack_top] = GetFlowStack(operation); - const Id current = OpLoad(t_uint, flow_stack_top); - const Id previous = OpISub(t_uint, current, Constant(t_uint, 1)); - const Id access = OpAccessChain(t_func_uint, flow_stack, previous); - const Id target = OpLoad(t_uint, access); - - OpStore(flow_stack_top, previous); - OpStore(jmp_to, target); - OpBranch(continue_label); - inside_branch = true; - if (!conditional_branch_set) { - AddLabel(); - } - return {}; - } - - Id MaxwellToSpirvComparison(Maxwell::ComparisonOp compare_op, Id operand_1, Id operand_2) { - using Compare = Maxwell::ComparisonOp; - switch (compare_op) { - case Compare::NeverOld: - return v_false; // Never let the test pass - case Compare::LessOld: - return OpFOrdLessThan(t_bool, operand_1, operand_2); - case Compare::EqualOld: - return OpFOrdEqual(t_bool, operand_1, operand_2); - case Compare::LessEqualOld: - return OpFOrdLessThanEqual(t_bool, operand_1, operand_2); - case Compare::GreaterOld: - return OpFOrdGreaterThan(t_bool, operand_1, operand_2); - case Compare::NotEqualOld: - return OpFOrdNotEqual(t_bool, operand_1, operand_2); - case Compare::GreaterEqualOld: - return OpFOrdGreaterThanEqual(t_bool, operand_1, operand_2); - default: - UNREACHABLE(); - return v_true; - } - } - - void AlphaTest(Id pointer) { - if (specialization.alpha_test_func == Maxwell::ComparisonOp::AlwaysOld) { - return; - } - const Id true_label = OpLabel(); - const Id discard_label = OpLabel(); - const Id alpha_reference = Constant(t_float, specialization.alpha_test_ref); - const Id alpha_value = OpLoad(t_float, pointer); - const Id condition = - MaxwellToSpirvComparison(specialization.alpha_test_func, alpha_value, alpha_reference); - - OpBranchConditional(condition, true_label, discard_label); - AddLabel(discard_label); - OpKill(); - AddLabel(true_label); - } - - void PreExit() { - if (stage == ShaderType::Vertex && specialization.ndc_minus_one_to_one) { - const u32 position_index = out_indices.position.value(); - const Id z_pointer = AccessElement(t_out_float, out_vertex, position_index, 2U); - const Id w_pointer = AccessElement(t_out_float, out_vertex, position_index, 3U); - Id depth = OpLoad(t_float, z_pointer); - depth = OpFAdd(t_float, depth, OpLoad(t_float, w_pointer)); - depth = OpFMul(t_float, depth, Constant(t_float, 0.5f)); - OpStore(z_pointer, depth); - } - if (stage == ShaderType::Fragment) { - const auto SafeGetRegister = [this](u32 reg) { - if (const auto it = registers.find(reg); it != registers.end()) { - return OpLoad(t_float, it->second); - } - return v_float_zero; - }; - - UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0, - "Sample mask write is unimplemented"); - - // Write the color outputs using the data in the shader registers, disabled - // rendertargets/components are skipped in the register assignment. - u32 current_reg = 0; - for (u32 rt = 0; rt < Maxwell::NumRenderTargets; ++rt) { - // TODO(Subv): Figure out how dual-source blending is configured in the Switch. - for (u32 component = 0; component < 4; ++component) { - if (!header.ps.IsColorComponentOutputEnabled(rt, component)) { - continue; - } - const Id pointer = AccessElement(t_out_float, frag_colors[rt], component); - OpStore(pointer, SafeGetRegister(current_reg)); - if (rt == 0 && component == 3) { - AlphaTest(pointer); - } - ++current_reg; - } - } - if (header.ps.omap.depth) { - // The depth output is always 2 registers after the last color output, and - // current_reg already contains one past the last color register. - OpStore(frag_depth, SafeGetRegister(current_reg + 1)); - } - } - } - - Expression Exit(Operation operation) { - PreExit(); - inside_branch = true; - if (conditional_branch_set) { - OpReturn(); - } else { - const Id dummy = OpLabel(); - OpBranch(dummy); - AddLabel(dummy); - OpReturn(); - AddLabel(); - } - return {}; - } - - Expression Discard(Operation operation) { - inside_branch = true; - if (conditional_branch_set) { - OpKill(); - } else { - const Id dummy = OpLabel(); - OpBranch(dummy); - AddLabel(dummy); - OpKill(); - AddLabel(); - } - return {}; - } - - Expression EmitVertex(Operation) { - OpEmitVertex(); - return {}; - } - - Expression EndPrimitive(Operation operation) { - OpEndPrimitive(); - return {}; - } - - Expression InvocationId(Operation) { - return {OpLoad(t_int, invocation_id), Type::Int}; - } - - Expression YNegate(Operation) { - LOG_WARNING(Render_Vulkan, "(STUBBED)"); - return {Constant(t_float, 1.0f), Type::Float}; - } - - template - Expression LocalInvocationId(Operation) { - const Id id = OpLoad(t_uint3, local_invocation_id); - return {OpCompositeExtract(t_uint, id, element), Type::Uint}; - } - - template - Expression WorkGroupId(Operation operation) { - const Id id = OpLoad(t_uint3, workgroup_id); - return {OpCompositeExtract(t_uint, id, element), Type::Uint}; - } - - Expression BallotThread(Operation operation) { - const Id predicate = AsBool(Visit(operation[0])); - const Id ballot = OpSubgroupBallotKHR(t_uint4, predicate); - - if (!device.IsWarpSizePotentiallyBiggerThanGuest()) { - // Guest-like devices can just return the first index. - return {OpCompositeExtract(t_uint, ballot, 0U), Type::Uint}; - } - - // The others will have to return what is local to the current thread. - // For instance a device with a warp size of 64 will return the upper uint when the current - // thread is 38. - const Id tid = OpLoad(t_uint, thread_id); - const Id thread_index = OpShiftRightLogical(t_uint, tid, Constant(t_uint, 5)); - return {OpVectorExtractDynamic(t_uint, ballot, thread_index), Type::Uint}; - } - - template - Expression Vote(Operation operation) { - // TODO(Rodrigo): Handle devices with different warp sizes - const Id predicate = AsBool(Visit(operation[0])); - return {(this->*func)(t_bool, predicate), Type::Bool}; - } - - Expression ThreadId(Operation) { - return {OpLoad(t_uint, thread_id), Type::Uint}; - } - - template - Expression ThreadMask(Operation) { - // TODO(Rodrigo): Handle devices with different warp sizes - const Id mask = thread_masks[index]; - return {OpLoad(t_uint, AccessElement(t_in_uint, mask, 0)), Type::Uint}; - } - - Expression ShuffleIndexed(Operation operation) { - const Id value = AsFloat(Visit(operation[0])); - const Id index = AsUint(Visit(operation[1])); - return {OpSubgroupReadInvocationKHR(t_float, value, index), Type::Float}; - } - - Expression Barrier(Operation) { - if (!ir.IsDecompiled()) { - LOG_ERROR(Render_Vulkan, "OpBarrier used by shader is not decompiled"); - return {}; - } - - const auto scope = spv::Scope::Workgroup; - const auto memory = spv::Scope::Workgroup; - const auto semantics = - spv::MemorySemanticsMask::WorkgroupMemory | spv::MemorySemanticsMask::AcquireRelease; - OpControlBarrier(Constant(t_uint, static_cast(scope)), - Constant(t_uint, static_cast(memory)), - Constant(t_uint, static_cast(semantics))); - return {}; - } - - template - Expression MemoryBarrier(Operation) { - const auto semantics = - spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::UniformMemory | - spv::MemorySemanticsMask::WorkgroupMemory | - spv::MemorySemanticsMask::AtomicCounterMemory | spv::MemorySemanticsMask::ImageMemory; - - OpMemoryBarrier(Constant(t_uint, static_cast(scope)), - Constant(t_uint, static_cast(semantics))); - return {}; - } - - Id DeclareBuiltIn(spv::BuiltIn builtin, spv::StorageClass storage, Id type, std::string name) { - const Id id = OpVariable(type, storage); - Decorate(id, spv::Decoration::BuiltIn, static_cast(builtin)); - AddGlobalVariable(Name(id, std::move(name))); - interfaces.push_back(id); - return id; - } - - Id DeclareInputBuiltIn(spv::BuiltIn builtin, Id type, std::string name) { - return DeclareBuiltIn(builtin, spv::StorageClass::Input, type, std::move(name)); - } - - template - Id AccessElement(Id pointer_type, Id composite, Args... elements_) { - std::vector members; - auto elements = {elements_...}; - for (const auto element : elements) { - members.push_back(Constant(t_uint, element)); - } - - return OpAccessChain(pointer_type, composite, members); - } - - Id As(Expression expr, Type wanted_type) { - switch (wanted_type) { - case Type::Bool: - return AsBool(expr); - case Type::Bool2: - return AsBool2(expr); - case Type::Float: - return AsFloat(expr); - case Type::Int: - return AsInt(expr); - case Type::Uint: - return AsUint(expr); - case Type::HalfFloat: - return AsHalfFloat(expr); - default: - UNREACHABLE(); - return expr.id; - } - } - - Id AsBool(Expression expr) { - ASSERT(expr.type == Type::Bool); - return expr.id; - } - - Id AsBool2(Expression expr) { - ASSERT(expr.type == Type::Bool2); - return expr.id; - } - - Id AsFloat(Expression expr) { - switch (expr.type) { - case Type::Float: - return expr.id; - case Type::Int: - case Type::Uint: - return OpBitcast(t_float, expr.id); - case Type::HalfFloat: - if (device.IsFloat16Supported()) { - return OpBitcast(t_float, expr.id); - } - return OpBitcast(t_float, OpPackHalf2x16(t_uint, expr.id)); - default: - UNREACHABLE(); - return expr.id; - } - } - - Id AsInt(Expression expr) { - switch (expr.type) { - case Type::Int: - return expr.id; - case Type::Float: - case Type::Uint: - return OpBitcast(t_int, expr.id); - case Type::HalfFloat: - if (device.IsFloat16Supported()) { - return OpBitcast(t_int, expr.id); - } - return OpPackHalf2x16(t_int, expr.id); - default: - UNREACHABLE(); - return expr.id; - } - } - - Id AsUint(Expression expr) { - switch (expr.type) { - case Type::Uint: - return expr.id; - case Type::Float: - case Type::Int: - return OpBitcast(t_uint, expr.id); - case Type::HalfFloat: - if (device.IsFloat16Supported()) { - return OpBitcast(t_uint, expr.id); - } - return OpPackHalf2x16(t_uint, expr.id); - default: - UNREACHABLE(); - return expr.id; - } - } - - Id AsHalfFloat(Expression expr) { - switch (expr.type) { - case Type::HalfFloat: - return expr.id; - case Type::Float: - case Type::Int: - case Type::Uint: - if (device.IsFloat16Supported()) { - return OpBitcast(t_half, expr.id); - } - return OpUnpackHalf2x16(t_half, AsUint(expr)); - default: - UNREACHABLE(); - return expr.id; - } - } - - Id GetHalfScalarFromFloat(Id value) { - if (device.IsFloat16Supported()) { - return OpFConvert(t_scalar_half, value); - } - return value; - } - - Id GetFloatFromHalfScalar(Id value) { - if (device.IsFloat16Supported()) { - return OpFConvert(t_float, value); - } - return value; - } - - AttributeType GetAttributeType(u32 location) const { - if (stage != ShaderType::Vertex) { - return {Type::Float, t_in_float, t_in_float4}; - } - switch (specialization.attribute_types.at(location)) { - case Maxwell::VertexAttribute::Type::SignedNorm: - case Maxwell::VertexAttribute::Type::UnsignedNorm: - case Maxwell::VertexAttribute::Type::UnsignedScaled: - case Maxwell::VertexAttribute::Type::SignedScaled: - case Maxwell::VertexAttribute::Type::Float: - return {Type::Float, t_in_float, t_in_float4}; - case Maxwell::VertexAttribute::Type::SignedInt: - return {Type::Int, t_in_int, t_in_int4}; - case Maxwell::VertexAttribute::Type::UnsignedInt: - return {Type::Uint, t_in_uint, t_in_uint4}; - default: - UNREACHABLE(); - return {Type::Float, t_in_float, t_in_float4}; - } - } - - Id GetTypeDefinition(Type type) const { - switch (type) { - case Type::Bool: - return t_bool; - case Type::Bool2: - return t_bool2; - case Type::Float: - return t_float; - case Type::Int: - return t_int; - case Type::Uint: - return t_uint; - case Type::HalfFloat: - return t_half; - default: - UNREACHABLE(); - return {}; - } - } - - std::array GetTypeVectorDefinitionLut(Type type) const { - switch (type) { - case Type::Float: - return {t_float, t_float2, t_float3, t_float4}; - case Type::Int: - return {t_int, t_int2, t_int3, t_int4}; - case Type::Uint: - return {t_uint, t_uint2, t_uint3, t_uint4}; - default: - UNIMPLEMENTED(); - return {}; - } - } - - std::tuple CreateFlowStack() { - // TODO(Rodrigo): Figure out the actual depth of the flow stack, for now it seems unlikely - // that shaders will use 20 nested SSYs and PBKs. - constexpr u32 FLOW_STACK_SIZE = 20; - constexpr auto storage_class = spv::StorageClass::Function; - - const Id flow_stack_type = TypeArray(t_uint, Constant(t_uint, FLOW_STACK_SIZE)); - const Id stack = OpVariable(TypePointer(storage_class, flow_stack_type), storage_class, - ConstantNull(flow_stack_type)); - const Id top = OpVariable(t_func_uint, storage_class, Constant(t_uint, 0)); - AddLocalVariable(stack); - AddLocalVariable(top); - return std::tie(stack, top); - } - - std::pair GetFlowStack(Operation operation) { - const auto stack_class = std::get(operation.GetMeta()); - switch (stack_class) { - case MetaStackClass::Ssy: - return {ssy_flow_stack, ssy_flow_stack_top}; - case MetaStackClass::Pbk: - return {pbk_flow_stack, pbk_flow_stack_top}; - } - UNREACHABLE(); - return {}; - } - - Id GetGlobalMemoryPointer(const GmemNode& gmem) { - const Id real = AsUint(Visit(gmem.GetRealAddress())); - const Id base = AsUint(Visit(gmem.GetBaseAddress())); - const Id diff = OpISub(t_uint, real, base); - const Id offset = OpShiftRightLogical(t_uint, diff, Constant(t_uint, 2)); - const Id buffer = global_buffers.at(gmem.GetDescriptor()); - return OpAccessChain(t_gmem_uint, buffer, Constant(t_uint, 0), offset); - } - - Id GetSharedMemoryPointer(const SmemNode& smem) { - ASSERT(stage == ShaderType::Compute); - Id address = AsUint(Visit(smem.GetAddress())); - address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U)); - return OpAccessChain(t_smem_uint, shared_memory, address); - } - - static constexpr std::array operation_decompilers = { - &SPIRVDecompiler::Assign, - - &SPIRVDecompiler::Ternary<&Module::OpSelect, Type::Float, Type::Bool, Type::Float, - Type::Float>, - - &SPIRVDecompiler::Binary<&Module::OpFAdd, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFMul, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFDiv, Type::Float>, - &SPIRVDecompiler::Ternary<&Module::OpFma, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpFNegate, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::Float>, - &SPIRVDecompiler::Ternary<&Module::OpFClamp, Type::Float>, - &SPIRVDecompiler::FCastHalf<0>, - &SPIRVDecompiler::FCastHalf<1>, - &SPIRVDecompiler::Binary<&Module::OpFMin, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFMax, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpCos, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpSin, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpExp2, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpLog2, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpInverseSqrt, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpSqrt, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpRoundEven, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpFloor, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpCeil, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpTrunc, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpConvertSToF, Type::Float, Type::Int>, - &SPIRVDecompiler::Unary<&Module::OpConvertUToF, Type::Float, Type::Uint>, - &SPIRVDecompiler::FSwizzleAdd, - - &SPIRVDecompiler::Binary<&Module::OpIAdd, Type::Int>, - &SPIRVDecompiler::Binary<&Module::OpIMul, Type::Int>, - &SPIRVDecompiler::Binary<&Module::OpSDiv, Type::Int>, - &SPIRVDecompiler::Unary<&Module::OpSNegate, Type::Int>, - &SPIRVDecompiler::Unary<&Module::OpSAbs, Type::Int>, - &SPIRVDecompiler::Binary<&Module::OpSMin, Type::Int>, - &SPIRVDecompiler::Binary<&Module::OpSMax, Type::Int>, - - &SPIRVDecompiler::Unary<&Module::OpConvertFToS, Type::Int, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpBitcast, Type::Int, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpShiftLeftLogical, Type::Int, Type::Int, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpShiftRightLogical, Type::Int, Type::Int, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpShiftRightArithmetic, Type::Int, Type::Int, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpBitwiseAnd, Type::Int>, - &SPIRVDecompiler::Binary<&Module::OpBitwiseOr, Type::Int>, - &SPIRVDecompiler::Binary<&Module::OpBitwiseXor, Type::Int>, - &SPIRVDecompiler::Unary<&Module::OpNot, Type::Int>, - &SPIRVDecompiler::Quaternary<&Module::OpBitFieldInsert, Type::Int>, - &SPIRVDecompiler::Ternary<&Module::OpBitFieldSExtract, Type::Int>, - &SPIRVDecompiler::Unary<&Module::OpBitCount, Type::Int>, - &SPIRVDecompiler::Unary<&Module::OpFindSMsb, Type::Int>, - - &SPIRVDecompiler::Binary<&Module::OpIAdd, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpIMul, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpUDiv, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpUMin, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpUMax, Type::Uint>, - &SPIRVDecompiler::Unary<&Module::OpConvertFToU, Type::Uint, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpBitcast, Type::Uint, Type::Int>, - &SPIRVDecompiler::Binary<&Module::OpShiftLeftLogical, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpShiftRightLogical, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpShiftRightLogical, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpBitwiseAnd, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpBitwiseOr, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpBitwiseXor, Type::Uint>, - &SPIRVDecompiler::Unary<&Module::OpNot, Type::Uint>, - &SPIRVDecompiler::Quaternary<&Module::OpBitFieldInsert, Type::Uint>, - &SPIRVDecompiler::Ternary<&Module::OpBitFieldUExtract, Type::Uint>, - &SPIRVDecompiler::Unary<&Module::OpBitCount, Type::Uint>, - &SPIRVDecompiler::Unary<&Module::OpFindUMsb, Type::Uint>, - - &SPIRVDecompiler::Binary<&Module::OpFAdd, Type::HalfFloat>, - &SPIRVDecompiler::Binary<&Module::OpFMul, Type::HalfFloat>, - &SPIRVDecompiler::Ternary<&Module::OpFma, Type::HalfFloat>, - &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::HalfFloat>, - &SPIRVDecompiler::HNegate, - &SPIRVDecompiler::HClamp, - &SPIRVDecompiler::HCastFloat, - &SPIRVDecompiler::HUnpack, - &SPIRVDecompiler::HMergeF32, - &SPIRVDecompiler::HMergeHN<0>, - &SPIRVDecompiler::HMergeHN<1>, - &SPIRVDecompiler::HPack2, - - &SPIRVDecompiler::LogicalAssign, - &SPIRVDecompiler::Binary<&Module::OpLogicalAnd, Type::Bool>, - &SPIRVDecompiler::Binary<&Module::OpLogicalOr, Type::Bool>, - &SPIRVDecompiler::Binary<&Module::OpLogicalNotEqual, Type::Bool>, - &SPIRVDecompiler::Unary<&Module::OpLogicalNot, Type::Bool>, - &SPIRVDecompiler::Binary<&Module::OpVectorExtractDynamic, Type::Bool, Type::Bool2, - Type::Uint>, - &SPIRVDecompiler::Unary<&Module::OpAll, Type::Bool, Type::Bool2>, - - &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFOrdLessThanEqual, Type::Bool, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool, Type::Float>, - &SPIRVDecompiler::LogicalFOrdered, - &SPIRVDecompiler::LogicalFUnordered, - &SPIRVDecompiler::Binary<&Module::OpFUnordLessThan, Type::Bool, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFUnordEqual, Type::Bool, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFUnordLessThanEqual, Type::Bool, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFUnordGreaterThan, Type::Bool, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFUnordNotEqual, Type::Bool, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFUnordGreaterThanEqual, Type::Bool, Type::Float>, - - &SPIRVDecompiler::Binary<&Module::OpSLessThan, Type::Bool, Type::Int>, - &SPIRVDecompiler::Binary<&Module::OpIEqual, Type::Bool, Type::Int>, - &SPIRVDecompiler::Binary<&Module::OpSLessThanEqual, Type::Bool, Type::Int>, - &SPIRVDecompiler::Binary<&Module::OpSGreaterThan, Type::Bool, Type::Int>, - &SPIRVDecompiler::Binary<&Module::OpINotEqual, Type::Bool, Type::Int>, - &SPIRVDecompiler::Binary<&Module::OpSGreaterThanEqual, Type::Bool, Type::Int>, - - &SPIRVDecompiler::Binary<&Module::OpULessThan, Type::Bool, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpIEqual, Type::Bool, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpULessThanEqual, Type::Bool, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpUGreaterThan, Type::Bool, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpINotEqual, Type::Bool, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpUGreaterThanEqual, Type::Bool, Type::Uint>, - - &SPIRVDecompiler::LogicalAddCarry, - - &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool2, Type::HalfFloat>, - &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool2, Type::HalfFloat>, - &SPIRVDecompiler::Binary<&Module::OpFOrdLessThanEqual, Type::Bool2, Type::HalfFloat>, - &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool2, Type::HalfFloat>, - &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool2, Type::HalfFloat>, - &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool2, Type::HalfFloat>, - // TODO(Rodrigo): Should these use the OpFUnord* variants? - &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool2, Type::HalfFloat>, - &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool2, Type::HalfFloat>, - &SPIRVDecompiler::Binary<&Module::OpFOrdLessThanEqual, Type::Bool2, Type::HalfFloat>, - &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool2, Type::HalfFloat>, - &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool2, Type::HalfFloat>, - &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool2, Type::HalfFloat>, - - &SPIRVDecompiler::Texture, - &SPIRVDecompiler::TextureLod, - &SPIRVDecompiler::TextureGather, - &SPIRVDecompiler::TextureQueryDimensions, - &SPIRVDecompiler::TextureQueryLod, - &SPIRVDecompiler::TexelFetch, - &SPIRVDecompiler::TextureGradient, - - &SPIRVDecompiler::ImageLoad, - &SPIRVDecompiler::ImageStore, - &SPIRVDecompiler::AtomicImage<&Module::OpAtomicIAdd>, - &SPIRVDecompiler::AtomicImage<&Module::OpAtomicAnd>, - &SPIRVDecompiler::AtomicImage<&Module::OpAtomicOr>, - &SPIRVDecompiler::AtomicImage<&Module::OpAtomicXor>, - &SPIRVDecompiler::AtomicImage<&Module::OpAtomicExchange>, - - &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicUMin>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicUMax>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicAnd>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicOr>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicXor>, - - &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicSMin>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicSMax>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicAnd>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicOr>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicXor>, - - &SPIRVDecompiler::Reduce<&Module::OpAtomicIAdd>, - &SPIRVDecompiler::Reduce<&Module::OpAtomicUMin>, - &SPIRVDecompiler::Reduce<&Module::OpAtomicUMax>, - &SPIRVDecompiler::Reduce<&Module::OpAtomicAnd>, - &SPIRVDecompiler::Reduce<&Module::OpAtomicOr>, - &SPIRVDecompiler::Reduce<&Module::OpAtomicXor>, - - &SPIRVDecompiler::Reduce<&Module::OpAtomicIAdd>, - &SPIRVDecompiler::Reduce<&Module::OpAtomicSMin>, - &SPIRVDecompiler::Reduce<&Module::OpAtomicSMax>, - &SPIRVDecompiler::Reduce<&Module::OpAtomicAnd>, - &SPIRVDecompiler::Reduce<&Module::OpAtomicOr>, - &SPIRVDecompiler::Reduce<&Module::OpAtomicXor>, - - &SPIRVDecompiler::Branch, - &SPIRVDecompiler::BranchIndirect, - &SPIRVDecompiler::PushFlowStack, - &SPIRVDecompiler::PopFlowStack, - &SPIRVDecompiler::Exit, - &SPIRVDecompiler::Discard, - - &SPIRVDecompiler::EmitVertex, - &SPIRVDecompiler::EndPrimitive, - - &SPIRVDecompiler::InvocationId, - &SPIRVDecompiler::YNegate, - &SPIRVDecompiler::LocalInvocationId<0>, - &SPIRVDecompiler::LocalInvocationId<1>, - &SPIRVDecompiler::LocalInvocationId<2>, - &SPIRVDecompiler::WorkGroupId<0>, - &SPIRVDecompiler::WorkGroupId<1>, - &SPIRVDecompiler::WorkGroupId<2>, - - &SPIRVDecompiler::BallotThread, - &SPIRVDecompiler::Vote<&Module::OpSubgroupAllKHR>, - &SPIRVDecompiler::Vote<&Module::OpSubgroupAnyKHR>, - &SPIRVDecompiler::Vote<&Module::OpSubgroupAllEqualKHR>, - - &SPIRVDecompiler::ThreadId, - &SPIRVDecompiler::ThreadMask<0>, // Eq - &SPIRVDecompiler::ThreadMask<1>, // Ge - &SPIRVDecompiler::ThreadMask<2>, // Gt - &SPIRVDecompiler::ThreadMask<3>, // Le - &SPIRVDecompiler::ThreadMask<4>, // Lt - &SPIRVDecompiler::ShuffleIndexed, - - &SPIRVDecompiler::Barrier, - &SPIRVDecompiler::MemoryBarrier, - &SPIRVDecompiler::MemoryBarrier, - }; - static_assert(operation_decompilers.size() == static_cast(OperationCode::Amount)); - - const Device& device; - const ShaderIR& ir; - const ShaderType stage; - const Tegra::Shader::Header header; - const Registry& registry; - const Specialization& specialization; - std::unordered_map transform_feedback; - - const Id t_void = Name(TypeVoid(), "void"); - - const Id t_bool = Name(TypeBool(), "bool"); - const Id t_bool2 = Name(TypeVector(t_bool, 2), "bool2"); - - const Id t_int = Name(TypeInt(32, true), "int"); - const Id t_int2 = Name(TypeVector(t_int, 2), "int2"); - const Id t_int3 = Name(TypeVector(t_int, 3), "int3"); - const Id t_int4 = Name(TypeVector(t_int, 4), "int4"); - - const Id t_uint = Name(TypeInt(32, false), "uint"); - const Id t_uint2 = Name(TypeVector(t_uint, 2), "uint2"); - const Id t_uint3 = Name(TypeVector(t_uint, 3), "uint3"); - const Id t_uint4 = Name(TypeVector(t_uint, 4), "uint4"); - - const Id t_float = Name(TypeFloat(32), "float"); - const Id t_float2 = Name(TypeVector(t_float, 2), "float2"); - const Id t_float3 = Name(TypeVector(t_float, 3), "float3"); - const Id t_float4 = Name(TypeVector(t_float, 4), "float4"); - - const Id t_prv_bool = Name(TypePointer(spv::StorageClass::Private, t_bool), "prv_bool"); - const Id t_prv_float = Name(TypePointer(spv::StorageClass::Private, t_float), "prv_float"); - - const Id t_func_uint = Name(TypePointer(spv::StorageClass::Function, t_uint), "func_uint"); - - const Id t_in_bool = Name(TypePointer(spv::StorageClass::Input, t_bool), "in_bool"); - const Id t_in_int = Name(TypePointer(spv::StorageClass::Input, t_int), "in_int"); - const Id t_in_int4 = Name(TypePointer(spv::StorageClass::Input, t_int4), "in_int4"); - const Id t_in_uint = Name(TypePointer(spv::StorageClass::Input, t_uint), "in_uint"); - const Id t_in_uint3 = Name(TypePointer(spv::StorageClass::Input, t_uint3), "in_uint3"); - const Id t_in_uint4 = Name(TypePointer(spv::StorageClass::Input, t_uint4), "in_uint4"); - const Id t_in_float = Name(TypePointer(spv::StorageClass::Input, t_float), "in_float"); - const Id t_in_float2 = Name(TypePointer(spv::StorageClass::Input, t_float2), "in_float2"); - const Id t_in_float3 = Name(TypePointer(spv::StorageClass::Input, t_float3), "in_float3"); - const Id t_in_float4 = Name(TypePointer(spv::StorageClass::Input, t_float4), "in_float4"); - - const Id t_out_int = Name(TypePointer(spv::StorageClass::Output, t_int), "out_int"); - - const Id t_out_float = Name(TypePointer(spv::StorageClass::Output, t_float), "out_float"); - const Id t_out_float4 = Name(TypePointer(spv::StorageClass::Output, t_float4), "out_float4"); - - const Id t_cbuf_float = TypePointer(spv::StorageClass::Uniform, t_float); - const Id t_cbuf_std140 = Decorate( - Name(TypeArray(t_float4, Constant(t_uint, MaxConstBufferElements)), "CbufStd140Array"), - spv::Decoration::ArrayStride, 16U); - const Id t_cbuf_scalar = Decorate( - Name(TypeArray(t_float, Constant(t_uint, MaxConstBufferFloats)), "CbufScalarArray"), - spv::Decoration::ArrayStride, 4U); - const Id t_cbuf_std140_struct = MemberDecorate( - Decorate(TypeStruct(t_cbuf_std140), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); - const Id t_cbuf_scalar_struct = MemberDecorate( - Decorate(TypeStruct(t_cbuf_scalar), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); - const Id t_cbuf_std140_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_std140_struct); - const Id t_cbuf_scalar_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_scalar_struct); - - Id t_smem_uint{}; - - const Id t_gmem_uint = TypePointer(spv::StorageClass::StorageBuffer, t_uint); - const Id t_gmem_array = - Name(Decorate(TypeRuntimeArray(t_uint), spv::Decoration::ArrayStride, 4U), "GmemArray"); - const Id t_gmem_struct = MemberDecorate( - Decorate(TypeStruct(t_gmem_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); - const Id t_gmem_ssbo = TypePointer(spv::StorageClass::StorageBuffer, t_gmem_struct); - - const Id t_image_uint = TypePointer(spv::StorageClass::Image, t_uint); - - const Id v_float_zero = Constant(t_float, 0.0f); - const Id v_float_one = Constant(t_float, 1.0f); - const Id v_uint_zero = Constant(t_uint, 0); - - // Nvidia uses these defaults for varyings (e.g. position and generic attributes) - const Id v_varying_default = - ConstantComposite(t_float4, v_float_zero, v_float_zero, v_float_zero, v_float_one); - - const Id v_true = ConstantTrue(t_bool); - const Id v_false = ConstantFalse(t_bool); - - Id t_scalar_half{}; - Id t_half{}; - - Id out_vertex{}; - Id in_vertex{}; - std::map registers; - std::map custom_variables; - std::map predicates; - std::map flow_variables; - Id local_memory{}; - Id shared_memory{}; - std::array internal_flags{}; - std::map input_attributes; - std::unordered_map output_attributes; - std::map constant_buffers; - std::map global_buffers; - std::map uniform_texels; - std::map sampled_images; - std::map images; - - std::array frag_colors{}; - Id instance_index{}; - Id vertex_index{}; - Id base_instance{}; - Id base_vertex{}; - Id frag_depth{}; - Id frag_coord{}; - Id front_facing{}; - Id point_coord{}; - Id tess_level_outer{}; - Id tess_level_inner{}; - Id tess_coord{}; - Id invocation_id{}; - Id workgroup_id{}; - Id local_invocation_id{}; - Id thread_id{}; - std::array thread_masks{}; // eq, ge, gt, le, lt - - VertexIndices in_indices; - VertexIndices out_indices; - - std::vector interfaces; - - Id jmp_to{}; - Id ssy_flow_stack_top{}; - Id pbk_flow_stack_top{}; - Id ssy_flow_stack{}; - Id pbk_flow_stack{}; - Id continue_label{}; - std::map labels; - - bool conditional_branch_set{}; - bool inside_branch{}; -}; - -class ExprDecompiler { -public: - explicit ExprDecompiler(SPIRVDecompiler& decomp_) : decomp{decomp_} {} - - Id operator()(const ExprAnd& expr) { - const Id type_def = decomp.GetTypeDefinition(Type::Bool); - const Id op1 = Visit(expr.operand1); - const Id op2 = Visit(expr.operand2); - return decomp.OpLogicalAnd(type_def, op1, op2); - } - - Id operator()(const ExprOr& expr) { - const Id type_def = decomp.GetTypeDefinition(Type::Bool); - const Id op1 = Visit(expr.operand1); - const Id op2 = Visit(expr.operand2); - return decomp.OpLogicalOr(type_def, op1, op2); - } - - Id operator()(const ExprNot& expr) { - const Id type_def = decomp.GetTypeDefinition(Type::Bool); - const Id op1 = Visit(expr.operand1); - return decomp.OpLogicalNot(type_def, op1); - } - - Id operator()(const ExprPredicate& expr) { - const auto pred = static_cast(expr.predicate); - return decomp.OpLoad(decomp.t_bool, decomp.predicates.at(pred)); - } - - Id operator()(const ExprCondCode& expr) { - return decomp.AsBool(decomp.Visit(decomp.ir.GetConditionCode(expr.cc))); - } - - Id operator()(const ExprVar& expr) { - return decomp.OpLoad(decomp.t_bool, decomp.flow_variables.at(expr.var_index)); - } - - Id operator()(const ExprBoolean& expr) { - return expr.value ? decomp.v_true : decomp.v_false; - } - - Id operator()(const ExprGprEqual& expr) { - const Id target = decomp.Constant(decomp.t_uint, expr.value); - Id gpr = decomp.OpLoad(decomp.t_float, decomp.registers.at(expr.gpr)); - gpr = decomp.OpBitcast(decomp.t_uint, gpr); - return decomp.OpIEqual(decomp.t_bool, gpr, target); - } - - Id Visit(const Expr& node) { - return std::visit(*this, *node); - } - -private: - SPIRVDecompiler& decomp; -}; - -class ASTDecompiler { -public: - explicit ASTDecompiler(SPIRVDecompiler& decomp_) : decomp{decomp_} {} - - void operator()(const ASTProgram& ast) { - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - } - - void operator()(const ASTIfThen& ast) { - ExprDecompiler expr_parser{decomp}; - const Id condition = expr_parser.Visit(ast.condition); - const Id then_label = decomp.OpLabel(); - const Id endif_label = decomp.OpLabel(); - decomp.OpSelectionMerge(endif_label, spv::SelectionControlMask::MaskNone); - decomp.OpBranchConditional(condition, then_label, endif_label); - decomp.AddLabel(then_label); - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - decomp.OpBranch(endif_label); - decomp.AddLabel(endif_label); - } - - void operator()([[maybe_unused]] const ASTIfElse& ast) { - UNREACHABLE(); - } - - void operator()([[maybe_unused]] const ASTBlockEncoded& ast) { - UNREACHABLE(); - } - - void operator()(const ASTBlockDecoded& ast) { - decomp.VisitBasicBlock(ast.nodes); - } - - void operator()(const ASTVarSet& ast) { - ExprDecompiler expr_parser{decomp}; - const Id condition = expr_parser.Visit(ast.condition); - decomp.OpStore(decomp.flow_variables.at(ast.index), condition); - } - - void operator()([[maybe_unused]] const ASTLabel& ast) { - // Do nothing - } - - void operator()([[maybe_unused]] const ASTGoto& ast) { - UNREACHABLE(); - } - - void operator()(const ASTDoWhile& ast) { - const Id loop_label = decomp.OpLabel(); - const Id endloop_label = decomp.OpLabel(); - const Id loop_start_block = decomp.OpLabel(); - const Id loop_continue_block = decomp.OpLabel(); - current_loop_exit = endloop_label; - decomp.OpBranch(loop_label); - decomp.AddLabel(loop_label); - decomp.OpLoopMerge(endloop_label, loop_continue_block, spv::LoopControlMask::MaskNone); - decomp.OpBranch(loop_start_block); - decomp.AddLabel(loop_start_block); - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - decomp.OpBranch(loop_continue_block); - decomp.AddLabel(loop_continue_block); - ExprDecompiler expr_parser{decomp}; - const Id condition = expr_parser.Visit(ast.condition); - decomp.OpBranchConditional(condition, loop_label, endloop_label); - decomp.AddLabel(endloop_label); - } - - void operator()(const ASTReturn& ast) { - if (!VideoCommon::Shader::ExprIsTrue(ast.condition)) { - ExprDecompiler expr_parser{decomp}; - const Id condition = expr_parser.Visit(ast.condition); - const Id then_label = decomp.OpLabel(); - const Id endif_label = decomp.OpLabel(); - decomp.OpSelectionMerge(endif_label, spv::SelectionControlMask::MaskNone); - decomp.OpBranchConditional(condition, then_label, endif_label); - decomp.AddLabel(then_label); - if (ast.kills) { - decomp.OpKill(); - } else { - decomp.PreExit(); - decomp.OpReturn(); - } - decomp.AddLabel(endif_label); - } else { - const Id next_block = decomp.OpLabel(); - decomp.OpBranch(next_block); - decomp.AddLabel(next_block); - if (ast.kills) { - decomp.OpKill(); - } else { - decomp.PreExit(); - decomp.OpReturn(); - } - decomp.AddLabel(decomp.OpLabel()); - } - } - - void operator()(const ASTBreak& ast) { - if (!VideoCommon::Shader::ExprIsTrue(ast.condition)) { - ExprDecompiler expr_parser{decomp}; - const Id condition = expr_parser.Visit(ast.condition); - const Id then_label = decomp.OpLabel(); - const Id endif_label = decomp.OpLabel(); - decomp.OpSelectionMerge(endif_label, spv::SelectionControlMask::MaskNone); - decomp.OpBranchConditional(condition, then_label, endif_label); - decomp.AddLabel(then_label); - decomp.OpBranch(current_loop_exit); - decomp.AddLabel(endif_label); - } else { - const Id next_block = decomp.OpLabel(); - decomp.OpBranch(next_block); - decomp.AddLabel(next_block); - decomp.OpBranch(current_loop_exit); - decomp.AddLabel(decomp.OpLabel()); - } - } - - void Visit(const ASTNode& node) { - std::visit(*this, *node->GetInnerData()); - } - -private: - SPIRVDecompiler& decomp; - Id current_loop_exit{}; -}; - -void SPIRVDecompiler::DecompileAST() { - const u32 num_flow_variables = ir.GetASTNumVariables(); - for (u32 i = 0; i < num_flow_variables; i++) { - const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); - Name(id, fmt::format("flow_var_{}", i)); - flow_variables.emplace(i, AddGlobalVariable(id)); - } - - DefinePrologue(); - - const ASTNode program = ir.GetASTProgram(); - ASTDecompiler decompiler{*this}; - decompiler.Visit(program); - - const Id next_block = OpLabel(); - OpBranch(next_block); - AddLabel(next_block); -} - -} // Anonymous namespace - -ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) { - ShaderEntries entries; - for (const auto& cbuf : ir.GetConstantBuffers()) { - entries.const_buffers.emplace_back(cbuf.second, cbuf.first); - } - for (const auto& [base, usage] : ir.GetGlobalMemory()) { - entries.global_buffers.emplace_back(GlobalBufferEntry{ - .cbuf_index = base.cbuf_index, - .cbuf_offset = base.cbuf_offset, - .is_written = usage.is_written, - }); - } - for (const auto& sampler : ir.GetSamplers()) { - if (sampler.is_buffer) { - entries.uniform_texels.emplace_back(sampler); - } else { - entries.samplers.emplace_back(sampler); - } - } - for (const auto& image : ir.GetImages()) { - if (image.type == Tegra::Shader::ImageType::TextureBuffer) { - entries.storage_texels.emplace_back(image); - } else { - entries.images.emplace_back(image); - } - } - for (const auto& attribute : ir.GetInputAttributes()) { - if (IsGenericAttribute(attribute)) { - entries.attributes.insert(GetGenericAttributeLocation(attribute)); - } - } - for (const auto& buffer : entries.const_buffers) { - entries.enabled_uniform_buffers |= 1U << buffer.GetIndex(); - } - entries.clip_distances = ir.GetClipDistances(); - entries.shader_length = ir.GetLength(); - entries.uses_warps = ir.UsesWarps(); - return entries; -} - -std::vector Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, - ShaderType stage, const VideoCommon::Shader::Registry& registry, - const Specialization& specialization) { - return SPIRVDecompiler(device, ir, stage, registry, specialization).Assemble(); -} - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h deleted file mode 100644 index 5d94132a5..000000000 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h +++ /dev/null @@ -1,99 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include - -#include "common/common_types.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" - -namespace Vulkan { - -class Device; - -using Maxwell = Tegra::Engines::Maxwell3D::Regs; -using UniformTexelEntry = VideoCommon::Shader::SamplerEntry; -using SamplerEntry = VideoCommon::Shader::SamplerEntry; -using StorageTexelEntry = VideoCommon::Shader::ImageEntry; -using ImageEntry = VideoCommon::Shader::ImageEntry; - -constexpr u32 DESCRIPTOR_SET = 0; - -class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer { -public: - explicit constexpr ConstBufferEntry(const ConstBuffer& entry_, u32 index_) - : ConstBuffer{entry_}, index{index_} {} - - constexpr u32 GetIndex() const { - return index; - } - -private: - u32 index{}; -}; - -struct GlobalBufferEntry { - u32 cbuf_index{}; - u32 cbuf_offset{}; - bool is_written{}; -}; - -struct ShaderEntries { - u32 NumBindings() const { - return static_cast(const_buffers.size() + global_buffers.size() + - uniform_texels.size() + samplers.size() + storage_texels.size() + - images.size()); - } - - std::vector const_buffers; - std::vector global_buffers; - std::vector uniform_texels; - std::vector samplers; - std::vector storage_texels; - std::vector images; - std::set attributes; - std::array clip_distances{}; - std::size_t shader_length{}; - u32 enabled_uniform_buffers{}; - bool uses_warps{}; -}; - -struct Specialization final { - u32 base_binding{}; - - // Compute specific - std::array workgroup_size{}; - u32 shared_memory_size{}; - - // Graphics specific - std::optional point_size; - std::bitset enabled_attributes; - std::array attribute_types{}; - bool ndc_minus_one_to_one{}; - bool early_fragment_tests{}; - float alpha_test_ref{}; - Maxwell::ComparisonOp alpha_test_func{}; -}; -// Old gcc versions don't consider this trivially copyable. -// static_assert(std::is_trivially_copyable_v); - -struct SPIRVShader { - std::vector code; - ShaderEntries entries; -}; - -ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir); - -std::vector Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, - Tegra::Engines::ShaderType stage, - const VideoCommon::Shader::Registry& registry, - const Specialization& specialization); - -} // namespace Vulkan -- cgit v1.2.3 From c67d64365a712830fe140dd36e24e2efd9b8a812 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 16 Feb 2021 20:52:12 -0300 Subject: shader: Remove old shader management --- src/video_core/CMakeLists.txt | 64 - .../engines/const_buffer_engine_interface.h | 103 - src/video_core/engines/kepler_compute.cpp | 44 +- src/video_core/engines/kepler_compute.h | 20 +- src/video_core/engines/maxwell_3d.cpp | 38 - src/video_core/engines/maxwell_3d.h | 20 +- src/video_core/guest_driver.cpp | 37 - src/video_core/guest_driver.h | 46 - src/video_core/rasterizer_interface.h | 16 +- .../renderer_opengl/gl_arb_decompiler.cpp | 2124 -------------- src/video_core/renderer_opengl/gl_arb_decompiler.h | 29 - src/video_core/renderer_opengl/gl_rasterizer.cpp | 314 +- src/video_core/renderer_opengl/gl_rasterizer.h | 33 +- src/video_core/renderer_opengl/gl_shader_cache.cpp | 564 +--- src/video_core/renderer_opengl/gl_shader_cache.h | 102 +- .../renderer_opengl/gl_shader_decompiler.cpp | 2986 -------------------- .../renderer_opengl/gl_shader_decompiler.h | 69 - .../renderer_opengl/gl_shader_disk_cache.cpp | 482 ---- .../renderer_opengl/gl_shader_disk_cache.h | 176 -- src/video_core/renderer_vulkan/blit_image.cpp | 1 - .../renderer_vulkan/vk_compute_pipeline.cpp | 136 +- .../renderer_vulkan/vk_compute_pipeline.h | 47 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 484 ---- .../renderer_vulkan/vk_graphics_pipeline.h | 103 - .../renderer_vulkan/vk_pipeline_cache.cpp | 375 +-- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 91 +- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 361 +-- src/video_core/renderer_vulkan/vk_rasterizer.h | 47 +- src/video_core/shader/ast.cpp | 752 ----- src/video_core/shader/ast.h | 398 --- src/video_core/shader/async_shaders.cpp | 234 -- src/video_core/shader/async_shaders.h | 138 - src/video_core/shader/compiler_settings.cpp | 26 - src/video_core/shader/compiler_settings.h | 26 - src/video_core/shader/control_flow.cpp | 751 ----- src/video_core/shader/control_flow.h | 117 - src/video_core/shader/decode.cpp | 368 --- src/video_core/shader/decode/arithmetic.cpp | 166 -- src/video_core/shader/decode/arithmetic_half.cpp | 101 - .../shader/decode/arithmetic_half_immediate.cpp | 54 - .../shader/decode/arithmetic_immediate.cpp | 53 - .../shader/decode/arithmetic_integer.cpp | 375 --- .../shader/decode/arithmetic_integer_immediate.cpp | 99 - src/video_core/shader/decode/bfe.cpp | 77 - src/video_core/shader/decode/bfi.cpp | 45 - src/video_core/shader/decode/conversion.cpp | 321 --- src/video_core/shader/decode/ffma.cpp | 62 - src/video_core/shader/decode/float_set.cpp | 58 - .../shader/decode/float_set_predicate.cpp | 57 - src/video_core/shader/decode/half_set.cpp | 115 - .../shader/decode/half_set_predicate.cpp | 80 - src/video_core/shader/decode/hfma2.cpp | 73 - src/video_core/shader/decode/image.cpp | 536 ---- src/video_core/shader/decode/integer_set.cpp | 49 - .../shader/decode/integer_set_predicate.cpp | 53 - src/video_core/shader/decode/memory.cpp | 493 ---- src/video_core/shader/decode/other.cpp | 322 --- .../shader/decode/predicate_set_predicate.cpp | 68 - .../shader/decode/predicate_set_register.cpp | 46 - .../shader/decode/register_set_predicate.cpp | 86 - src/video_core/shader/decode/shift.cpp | 153 - src/video_core/shader/decode/texture.cpp | 935 ------ src/video_core/shader/decode/video.cpp | 169 -- src/video_core/shader/decode/warp.cpp | 117 - src/video_core/shader/decode/xmad.cpp | 156 - src/video_core/shader/expr.cpp | 93 - src/video_core/shader/expr.h | 156 - src/video_core/shader/memory_util.cpp | 76 - src/video_core/shader/memory_util.h | 43 - src/video_core/shader/node.h | 701 ----- src/video_core/shader/node_helper.cpp | 115 - src/video_core/shader/node_helper.h | 71 - src/video_core/shader/registry.cpp | 181 -- src/video_core/shader/registry.h | 172 -- src/video_core/shader/shader_ir.cpp | 464 --- src/video_core/shader/shader_ir.h | 479 ---- src/video_core/shader/track.cpp | 236 -- src/video_core/shader/transform_feedback.cpp | 115 - src/video_core/shader/transform_feedback.h | 23 - 79 files changed, 53 insertions(+), 19513 deletions(-) delete mode 100644 src/video_core/engines/const_buffer_engine_interface.h delete mode 100644 src/video_core/guest_driver.cpp delete mode 100644 src/video_core/guest_driver.h delete mode 100644 src/video_core/renderer_opengl/gl_arb_decompiler.cpp delete mode 100644 src/video_core/renderer_opengl/gl_arb_decompiler.h delete mode 100644 src/video_core/renderer_opengl/gl_shader_decompiler.cpp delete mode 100644 src/video_core/renderer_opengl/gl_shader_decompiler.h delete mode 100644 src/video_core/renderer_opengl/gl_shader_disk_cache.cpp delete mode 100644 src/video_core/renderer_opengl/gl_shader_disk_cache.h delete mode 100644 src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp delete mode 100644 src/video_core/renderer_vulkan/vk_graphics_pipeline.h delete mode 100644 src/video_core/shader/ast.cpp delete mode 100644 src/video_core/shader/ast.h delete mode 100644 src/video_core/shader/async_shaders.cpp delete mode 100644 src/video_core/shader/async_shaders.h delete mode 100644 src/video_core/shader/compiler_settings.cpp delete mode 100644 src/video_core/shader/compiler_settings.h delete mode 100644 src/video_core/shader/control_flow.cpp delete mode 100644 src/video_core/shader/control_flow.h delete mode 100644 src/video_core/shader/decode.cpp delete mode 100644 src/video_core/shader/decode/arithmetic.cpp delete mode 100644 src/video_core/shader/decode/arithmetic_half.cpp delete mode 100644 src/video_core/shader/decode/arithmetic_half_immediate.cpp delete mode 100644 src/video_core/shader/decode/arithmetic_immediate.cpp delete mode 100644 src/video_core/shader/decode/arithmetic_integer.cpp delete mode 100644 src/video_core/shader/decode/arithmetic_integer_immediate.cpp delete mode 100644 src/video_core/shader/decode/bfe.cpp delete mode 100644 src/video_core/shader/decode/bfi.cpp delete mode 100644 src/video_core/shader/decode/conversion.cpp delete mode 100644 src/video_core/shader/decode/ffma.cpp delete mode 100644 src/video_core/shader/decode/float_set.cpp delete mode 100644 src/video_core/shader/decode/float_set_predicate.cpp delete mode 100644 src/video_core/shader/decode/half_set.cpp delete mode 100644 src/video_core/shader/decode/half_set_predicate.cpp delete mode 100644 src/video_core/shader/decode/hfma2.cpp delete mode 100644 src/video_core/shader/decode/image.cpp delete mode 100644 src/video_core/shader/decode/integer_set.cpp delete mode 100644 src/video_core/shader/decode/integer_set_predicate.cpp delete mode 100644 src/video_core/shader/decode/memory.cpp delete mode 100644 src/video_core/shader/decode/other.cpp delete mode 100644 src/video_core/shader/decode/predicate_set_predicate.cpp delete mode 100644 src/video_core/shader/decode/predicate_set_register.cpp delete mode 100644 src/video_core/shader/decode/register_set_predicate.cpp delete mode 100644 src/video_core/shader/decode/shift.cpp delete mode 100644 src/video_core/shader/decode/texture.cpp delete mode 100644 src/video_core/shader/decode/video.cpp delete mode 100644 src/video_core/shader/decode/warp.cpp delete mode 100644 src/video_core/shader/decode/xmad.cpp delete mode 100644 src/video_core/shader/expr.cpp delete mode 100644 src/video_core/shader/expr.h delete mode 100644 src/video_core/shader/memory_util.cpp delete mode 100644 src/video_core/shader/memory_util.h delete mode 100644 src/video_core/shader/node.h delete mode 100644 src/video_core/shader/node_helper.cpp delete mode 100644 src/video_core/shader/node_helper.h delete mode 100644 src/video_core/shader/registry.cpp delete mode 100644 src/video_core/shader/registry.h delete mode 100644 src/video_core/shader/shader_ir.cpp delete mode 100644 src/video_core/shader/shader_ir.h delete mode 100644 src/video_core/shader/track.cpp delete mode 100644 src/video_core/shader/transform_feedback.cpp delete mode 100644 src/video_core/shader/transform_feedback.h (limited to 'src/video_core') diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index e4de55f4d..c5ce71706 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -29,7 +29,6 @@ add_library(video_core STATIC dirty_flags.h dma_pusher.cpp dma_pusher.h - engines/const_buffer_engine_interface.h engines/const_buffer_info.h engines/engine_interface.h engines/engine_upload.cpp @@ -61,8 +60,6 @@ add_library(video_core STATIC gpu.h gpu_thread.cpp gpu_thread.h - guest_driver.cpp - guest_driver.h memory_manager.cpp memory_manager.h query_cache.h @@ -71,8 +68,6 @@ add_library(video_core STATIC rasterizer_interface.h renderer_base.cpp renderer_base.h - renderer_opengl/gl_arb_decompiler.cpp - renderer_opengl/gl_arb_decompiler.h renderer_opengl/gl_buffer_cache.cpp renderer_opengl/gl_buffer_cache.h renderer_opengl/gl_device.cpp @@ -85,10 +80,6 @@ add_library(video_core STATIC renderer_opengl/gl_resource_manager.h renderer_opengl/gl_shader_cache.cpp renderer_opengl/gl_shader_cache.h - renderer_opengl/gl_shader_decompiler.cpp - renderer_opengl/gl_shader_decompiler.h - renderer_opengl/gl_shader_disk_cache.cpp - renderer_opengl/gl_shader_disk_cache.h renderer_opengl/gl_shader_manager.cpp renderer_opengl/gl_shader_manager.h renderer_opengl/gl_shader_util.cpp @@ -128,8 +119,6 @@ add_library(video_core STATIC renderer_vulkan/vk_descriptor_pool.h renderer_vulkan/vk_fence_manager.cpp renderer_vulkan/vk_fence_manager.h - renderer_vulkan/vk_graphics_pipeline.cpp - renderer_vulkan/vk_graphics_pipeline.h renderer_vulkan/vk_master_semaphore.cpp renderer_vulkan/vk_master_semaphore.h renderer_vulkan/vk_pipeline_cache.cpp @@ -142,8 +131,6 @@ add_library(video_core STATIC renderer_vulkan/vk_resource_pool.h renderer_vulkan/vk_scheduler.cpp renderer_vulkan/vk_scheduler.h - renderer_vulkan/vk_shader_decompiler.cpp - renderer_vulkan/vk_shader_decompiler.h renderer_vulkan/vk_shader_util.cpp renderer_vulkan/vk_shader_util.h renderer_vulkan/vk_staging_buffer_pool.cpp @@ -159,57 +146,6 @@ add_library(video_core STATIC shader_cache.h shader_notify.cpp shader_notify.h - shader/decode/arithmetic.cpp - shader/decode/arithmetic_immediate.cpp - shader/decode/bfe.cpp - shader/decode/bfi.cpp - shader/decode/shift.cpp - shader/decode/arithmetic_integer.cpp - shader/decode/arithmetic_integer_immediate.cpp - shader/decode/arithmetic_half.cpp - shader/decode/arithmetic_half_immediate.cpp - shader/decode/ffma.cpp - shader/decode/hfma2.cpp - shader/decode/conversion.cpp - shader/decode/memory.cpp - shader/decode/texture.cpp - shader/decode/image.cpp - shader/decode/float_set_predicate.cpp - shader/decode/integer_set_predicate.cpp - shader/decode/half_set_predicate.cpp - shader/decode/predicate_set_register.cpp - shader/decode/predicate_set_predicate.cpp - shader/decode/register_set_predicate.cpp - shader/decode/float_set.cpp - shader/decode/integer_set.cpp - shader/decode/half_set.cpp - shader/decode/video.cpp - shader/decode/warp.cpp - shader/decode/xmad.cpp - shader/decode/other.cpp - shader/ast.cpp - shader/ast.h - shader/async_shaders.cpp - shader/async_shaders.h - shader/compiler_settings.cpp - shader/compiler_settings.h - shader/control_flow.cpp - shader/control_flow.h - shader/decode.cpp - shader/expr.cpp - shader/expr.h - shader/memory_util.cpp - shader/memory_util.h - shader/node_helper.cpp - shader/node_helper.h - shader/node.h - shader/registry.cpp - shader/registry.h - shader/shader_ir.cpp - shader/shader_ir.h - shader/track.cpp - shader/transform_feedback.cpp - shader/transform_feedback.h surface.cpp surface.h texture_cache/accelerated_swizzle.cpp diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h deleted file mode 100644 index f46e81bb7..000000000 --- a/src/video_core/engines/const_buffer_engine_interface.h +++ /dev/null @@ -1,103 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include "common/bit_field.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/engines/shader_type.h" -#include "video_core/guest_driver.h" -#include "video_core/textures/texture.h" - -namespace Tegra::Engines { - -struct SamplerDescriptor { - union { - u32 raw = 0; - BitField<0, 2, Tegra::Shader::TextureType> texture_type; - BitField<2, 3, Tegra::Texture::ComponentType> r_type; - BitField<5, 1, u32> is_array; - BitField<6, 1, u32> is_buffer; - BitField<7, 1, u32> is_shadow; - BitField<8, 3, Tegra::Texture::ComponentType> g_type; - BitField<11, 3, Tegra::Texture::ComponentType> b_type; - BitField<14, 3, Tegra::Texture::ComponentType> a_type; - BitField<17, 7, Tegra::Texture::TextureFormat> format; - }; - - bool operator==(const SamplerDescriptor& rhs) const noexcept { - return raw == rhs.raw; - } - - bool operator!=(const SamplerDescriptor& rhs) const noexcept { - return !operator==(rhs); - } - - static SamplerDescriptor FromTIC(const Tegra::Texture::TICEntry& tic) { - using Tegra::Shader::TextureType; - SamplerDescriptor result; - - result.format.Assign(tic.format.Value()); - result.r_type.Assign(tic.r_type.Value()); - result.g_type.Assign(tic.g_type.Value()); - result.b_type.Assign(tic.b_type.Value()); - result.a_type.Assign(tic.a_type.Value()); - - switch (tic.texture_type.Value()) { - case Tegra::Texture::TextureType::Texture1D: - result.texture_type.Assign(TextureType::Texture1D); - return result; - case Tegra::Texture::TextureType::Texture2D: - result.texture_type.Assign(TextureType::Texture2D); - return result; - case Tegra::Texture::TextureType::Texture3D: - result.texture_type.Assign(TextureType::Texture3D); - return result; - case Tegra::Texture::TextureType::TextureCubemap: - result.texture_type.Assign(TextureType::TextureCube); - return result; - case Tegra::Texture::TextureType::Texture1DArray: - result.texture_type.Assign(TextureType::Texture1D); - result.is_array.Assign(1); - return result; - case Tegra::Texture::TextureType::Texture2DArray: - result.texture_type.Assign(TextureType::Texture2D); - result.is_array.Assign(1); - return result; - case Tegra::Texture::TextureType::Texture1DBuffer: - result.texture_type.Assign(TextureType::Texture1D); - result.is_buffer.Assign(1); - return result; - case Tegra::Texture::TextureType::Texture2DNoMipmap: - result.texture_type.Assign(TextureType::Texture2D); - return result; - case Tegra::Texture::TextureType::TextureCubeArray: - result.texture_type.Assign(TextureType::TextureCube); - result.is_array.Assign(1); - return result; - default: - result.texture_type.Assign(TextureType::Texture2D); - return result; - } - } -}; -static_assert(std::is_trivially_copyable_v); - -class ConstBufferEngineInterface { -public: - virtual ~ConstBufferEngineInterface() = default; - virtual u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const = 0; - virtual SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const = 0; - virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, - u64 offset) const = 0; - virtual SamplerDescriptor AccessSampler(u32 handle) const = 0; - virtual u32 GetBoundBuffer() const = 0; - - virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0; - virtual const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const = 0; -}; - -} // namespace Tegra::Engines diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index a9b75091e..cae93c470 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -57,53 +57,11 @@ void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amoun } } -u32 KeplerCompute::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const { - ASSERT(stage == ShaderType::Compute); - const auto& buffer = launch_description.const_buffer_config[const_buffer]; - u32 result; - std::memcpy(&result, memory_manager.GetPointer(buffer.Address() + offset), sizeof(u32)); - return result; -} - -SamplerDescriptor KeplerCompute::AccessBoundSampler(ShaderType stage, u64 offset) const { - return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle)); -} - -SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 const_buffer, - u64 offset) const { - ASSERT(stage == ShaderType::Compute); - const auto& tex_info_buffer = launch_description.const_buffer_config[const_buffer]; - const GPUVAddr tex_info_address = tex_info_buffer.Address() + offset; - return AccessSampler(memory_manager.Read(tex_info_address)); -} - -SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const { - const Texture::TextureHandle tex_handle{handle}; - const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id); - const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id); - - SamplerDescriptor result = SamplerDescriptor::FromTIC(tic); - result.is_shadow.Assign(tsc.depth_compare_enabled.Value()); - return result; -} - -VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() { - return rasterizer->AccessGuestDriverProfile(); -} - -const VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() const { - return rasterizer->AccessGuestDriverProfile(); -} - void KeplerCompute::ProcessLaunch() { const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32)); - - const GPUVAddr code_addr = regs.code_loc.Address() + launch_description.program_start; - LOG_TRACE(HW_GPU, "Compute invocation launched at address 0x{:016x}", code_addr); - - rasterizer->DispatchCompute(code_addr); + rasterizer->DispatchCompute(); } Texture::TICEntry KeplerCompute::GetTICEntry(u32 tic_index) const { diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index 7c40cba38..0d7683c2d 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -10,7 +10,6 @@ #include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" -#include "video_core/engines/const_buffer_engine_interface.h" #include "video_core/engines/engine_interface.h" #include "video_core/engines/engine_upload.h" #include "video_core/engines/shader_type.h" @@ -40,7 +39,7 @@ namespace Tegra::Engines { #define KEPLER_COMPUTE_REG_INDEX(field_name) \ (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32)) -class KeplerCompute final : public ConstBufferEngineInterface, public EngineInterface { +class KeplerCompute final : public EngineInterface { public: explicit KeplerCompute(Core::System& system, MemoryManager& memory_manager); ~KeplerCompute(); @@ -209,23 +208,6 @@ public: void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) override; - u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; - - SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override; - - SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, - u64 offset) const override; - - SamplerDescriptor AccessSampler(u32 handle) const override; - - u32 GetBoundBuffer() const override { - return regs.tex_cb_index; - } - - VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override; - - const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override; - private: void ProcessLaunch(); diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index aab6b8f7a..103a51fd0 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -670,42 +670,4 @@ void Maxwell3D::ProcessClearBuffers() { rasterizer->Clear(); } -u32 Maxwell3D::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const { - ASSERT(stage != ShaderType::Compute); - const auto& shader_stage = state.shader_stages[static_cast(stage)]; - const auto& buffer = shader_stage.const_buffers[const_buffer]; - return memory_manager.Read(buffer.address + offset); -} - -SamplerDescriptor Maxwell3D::AccessBoundSampler(ShaderType stage, u64 offset) const { - return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle)); -} - -SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_buffer, - u64 offset) const { - ASSERT(stage != ShaderType::Compute); - const auto& shader = state.shader_stages[static_cast(stage)]; - const auto& tex_info_buffer = shader.const_buffers[const_buffer]; - const GPUVAddr tex_info_address = tex_info_buffer.address + offset; - return AccessSampler(memory_manager.Read(tex_info_address)); -} - -SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const { - const Texture::TextureHandle tex_handle{handle}; - const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id); - const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id); - - SamplerDescriptor result = SamplerDescriptor::FromTIC(tic); - result.is_shadow.Assign(tsc.depth_compare_enabled.Value()); - return result; -} - -VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() { - return rasterizer->AccessGuestDriverProfile(); -} - -const VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() const { - return rasterizer->AccessGuestDriverProfile(); -} - } // namespace Tegra::Engines diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 335383955..cbf94412b 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -17,7 +17,6 @@ #include "common/common_funcs.h" #include "common/common_types.h" #include "common/math_util.h" -#include "video_core/engines/const_buffer_engine_interface.h" #include "video_core/engines/const_buffer_info.h" #include "video_core/engines/engine_interface.h" #include "video_core/engines/engine_upload.h" @@ -49,7 +48,7 @@ namespace Tegra::Engines { #define MAXWELL3D_REG_INDEX(field_name) \ (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32)) -class Maxwell3D final : public ConstBufferEngineInterface, public EngineInterface { +class Maxwell3D final : public EngineInterface { public: explicit Maxwell3D(Core::System& system, MemoryManager& memory_manager); ~Maxwell3D(); @@ -1424,23 +1423,6 @@ public: void FlushMMEInlineDraw(); - u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; - - SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override; - - SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, - u64 offset) const override; - - SamplerDescriptor AccessSampler(u32 handle) const override; - - u32 GetBoundBuffer() const override { - return regs.tex_cb_index; - } - - VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override; - - const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override; - bool ShouldExecute() const { return execute_on; } diff --git a/src/video_core/guest_driver.cpp b/src/video_core/guest_driver.cpp deleted file mode 100644 index f058f2744..000000000 --- a/src/video_core/guest_driver.cpp +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include - -#include "common/common_types.h" -#include "video_core/guest_driver.h" - -namespace VideoCore { - -void GuestDriverProfile::DeduceTextureHandlerSize(std::vector bound_offsets) { - if (texture_handler_size) { - return; - } - const std::size_t size = bound_offsets.size(); - if (size < 2) { - return; - } - std::sort(bound_offsets.begin(), bound_offsets.end(), std::less{}); - u32 min_val = std::numeric_limits::max(); - for (std::size_t i = 1; i < size; ++i) { - if (bound_offsets[i] == bound_offsets[i - 1]) { - continue; - } - const u32 new_min = bound_offsets[i] - bound_offsets[i - 1]; - min_val = std::min(min_val, new_min); - } - if (min_val > 2) { - return; - } - texture_handler_size = min_texture_handler_size * min_val; -} - -} // namespace VideoCore diff --git a/src/video_core/guest_driver.h b/src/video_core/guest_driver.h deleted file mode 100644 index 21e569ba1..000000000 --- a/src/video_core/guest_driver.h +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include - -#include "common/common_types.h" - -namespace VideoCore { - -/** - * The GuestDriverProfile class is used to learn about the GPU drivers behavior and collect - * information necessary for impossible to avoid HLE methods like shader tracks as they are - * Entscheidungsproblems. - */ -class GuestDriverProfile { -public: - explicit GuestDriverProfile() = default; - explicit GuestDriverProfile(std::optional texture_handler_size_) - : texture_handler_size{texture_handler_size_} {} - - void DeduceTextureHandlerSize(std::vector bound_offsets); - - u32 GetTextureHandlerSize() const { - return texture_handler_size.value_or(default_texture_handler_size); - } - - bool IsTextureHandlerSizeKnown() const { - return texture_handler_size.has_value(); - } - -private: - // Minimum size of texture handler any driver can use. - static constexpr u32 min_texture_handler_size = 4; - - // This goes with Vulkan and OpenGL standards but Nvidia GPUs can easily use 4 bytes instead. - // Thus, certain drivers may squish the size. - static constexpr u32 default_texture_handler_size = 8; - - std::optional texture_handler_size = default_texture_handler_size; -}; - -} // namespace VideoCore diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 58014c1c3..b094fc064 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -11,7 +11,6 @@ #include "common/common_types.h" #include "video_core/engines/fermi_2d.h" #include "video_core/gpu.h" -#include "video_core/guest_driver.h" namespace Tegra { class MemoryManager; @@ -45,7 +44,7 @@ public: virtual void Clear() = 0; /// Dispatches a compute shader invocation - virtual void DispatchCompute(GPUVAddr code_addr) = 0; + virtual void DispatchCompute() = 0; /// Resets the counter of a query virtual void ResetCounter(QueryType type) = 0; @@ -136,18 +135,5 @@ public: /// Initialize disk cached resources for the game being emulated virtual void LoadDiskResources(u64 title_id, std::stop_token stop_loading, const DiskResourceLoadCallback& callback) {} - - /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver. - [[nodiscard]] GuestDriverProfile& AccessGuestDriverProfile() { - return guest_driver_profile; - } - - /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver. - [[nodiscard]] const GuestDriverProfile& AccessGuestDriverProfile() const { - return guest_driver_profile; - } - -private: - GuestDriverProfile guest_driver_profile{}; }; } // namespace VideoCore diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp deleted file mode 100644 index e8d8d2aa5..000000000 --- a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp +++ /dev/null @@ -1,2124 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "common/alignment.h" -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/renderer_opengl/gl_arb_decompiler.h" -#include "video_core/renderer_opengl/gl_device.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" - -// Predicates in the decompiled code follow the convention that -1 means true and 0 means false. -// GLASM lacks booleans, so they have to be implemented as integers. -// Using -1 for true is useful because both CMP.S and NOT.U can negate it, and CMP.S can be used to -// select between two values, because -1 will be evaluated as true and 0 as false. - -namespace OpenGL { - -namespace { - -using Tegra::Engines::ShaderType; -using Tegra::Shader::Attribute; -using Tegra::Shader::PixelImap; -using Tegra::Shader::Register; -using namespace VideoCommon::Shader; -using Operation = const OperationNode&; - -constexpr std::array INTERNAL_FLAG_NAMES = {"ZERO", "SIGN", "CARRY", "OVERFLOW"}; - -char Swizzle(std::size_t component) { - static constexpr std::string_view SWIZZLE{"xyzw"}; - return SWIZZLE.at(component); -} - -constexpr bool IsGenericAttribute(Attribute::Index index) { - return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31; -} - -u32 GetGenericAttributeIndex(Attribute::Index index) { - ASSERT(IsGenericAttribute(index)); - return static_cast(index) - static_cast(Attribute::Index::Attribute_0); -} - -std::string_view Modifiers(Operation operation) { - const auto meta = std::get_if(&operation.GetMeta()); - if (meta && meta->precise) { - return ".PREC"; - } - return ""; -} - -std::string_view GetInputFlags(PixelImap attribute) { - switch (attribute) { - case PixelImap::Perspective: - return ""; - case PixelImap::Constant: - return "FLAT "; - case PixelImap::ScreenLinear: - return "NOPERSPECTIVE "; - case PixelImap::Unused: - break; - } - UNIMPLEMENTED_MSG("Unknown attribute usage index={}", attribute); - return {}; -} - -std::string_view ImageType(Tegra::Shader::ImageType image_type) { - switch (image_type) { - case Tegra::Shader::ImageType::Texture1D: - return "1D"; - case Tegra::Shader::ImageType::TextureBuffer: - return "BUFFER"; - case Tegra::Shader::ImageType::Texture1DArray: - return "ARRAY1D"; - case Tegra::Shader::ImageType::Texture2D: - return "2D"; - case Tegra::Shader::ImageType::Texture2DArray: - return "ARRAY2D"; - case Tegra::Shader::ImageType::Texture3D: - return "3D"; - } - UNREACHABLE(); - return {}; -} - -std::string_view StackName(MetaStackClass stack) { - switch (stack) { - case MetaStackClass::Ssy: - return "SSY"; - case MetaStackClass::Pbk: - return "PBK"; - } - UNREACHABLE(); - return ""; -}; - -std::string_view PrimitiveDescription(Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology topology) { - switch (topology) { - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Points: - return "POINTS"; - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Lines: - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStrip: - return "LINES"; - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LinesAdjacency: - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStripAdjacency: - return "LINES_ADJACENCY"; - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Triangles: - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStrip: - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleFan: - return "TRIANGLES"; - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency: - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency: - return "TRIANGLES_ADJACENCY"; - default: - UNIMPLEMENTED_MSG("topology={}", topology); - return "POINTS"; - } -} - -std::string_view TopologyName(Tegra::Shader::OutputTopology topology) { - switch (topology) { - case Tegra::Shader::OutputTopology::PointList: - return "POINTS"; - case Tegra::Shader::OutputTopology::LineStrip: - return "LINE_STRIP"; - case Tegra::Shader::OutputTopology::TriangleStrip: - return "TRIANGLE_STRIP"; - default: - UNIMPLEMENTED_MSG("Unknown output topology: {}", topology); - return "points"; - } -} - -std::string_view StageInputName(ShaderType stage) { - switch (stage) { - case ShaderType::Vertex: - case ShaderType::Geometry: - return "vertex"; - case ShaderType::Fragment: - return "fragment"; - case ShaderType::Compute: - return "invocation"; - default: - UNREACHABLE(); - return ""; - } -} - -std::string TextureType(const MetaTexture& meta) { - if (meta.sampler.is_buffer) { - return "BUFFER"; - } - std::string type; - if (meta.sampler.is_shadow) { - type += "SHADOW"; - } - if (meta.sampler.is_array) { - type += "ARRAY"; - } - type += [&meta] { - switch (meta.sampler.type) { - case Tegra::Shader::TextureType::Texture1D: - return "1D"; - case Tegra::Shader::TextureType::Texture2D: - return "2D"; - case Tegra::Shader::TextureType::Texture3D: - return "3D"; - case Tegra::Shader::TextureType::TextureCube: - return "CUBE"; - } - UNREACHABLE(); - return "2D"; - }(); - return type; -} - -class ARBDecompiler final { -public: - explicit ARBDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_, - ShaderType stage_, std::string_view identifier); - - std::string Code() const { - return shader_source; - } - -private: - void DefineGlobalMemory(); - - void DeclareHeader(); - void DeclareVertex(); - void DeclareGeometry(); - void DeclareFragment(); - void DeclareCompute(); - void DeclareInputAttributes(); - void DeclareOutputAttributes(); - void DeclareLocalMemory(); - void DeclareGlobalMemory(); - void DeclareConstantBuffers(); - void DeclareRegisters(); - void DeclareTemporaries(); - void DeclarePredicates(); - void DeclareInternalFlags(); - - void InitializeVariables(); - - void DecompileAST(); - void DecompileBranchMode(); - - void VisitAST(const ASTNode& node); - std::string VisitExpression(const Expr& node); - - void VisitBlock(const NodeBlock& bb); - - std::string Visit(const Node& node); - - std::tuple BuildCoords(Operation); - std::string BuildAoffi(Operation); - std::string GlobalMemoryPointer(const GmemNode& gmem); - void Exit(); - - std::string Assign(Operation); - std::string Select(Operation); - std::string FClamp(Operation); - std::string FCastHalf0(Operation); - std::string FCastHalf1(Operation); - std::string FSqrt(Operation); - std::string FSwizzleAdd(Operation); - std::string HAdd2(Operation); - std::string HMul2(Operation); - std::string HFma2(Operation); - std::string HAbsolute(Operation); - std::string HNegate(Operation); - std::string HClamp(Operation); - std::string HCastFloat(Operation); - std::string HUnpack(Operation); - std::string HMergeF32(Operation); - std::string HMergeH0(Operation); - std::string HMergeH1(Operation); - std::string HPack2(Operation); - std::string LogicalAssign(Operation); - std::string LogicalPick2(Operation); - std::string LogicalAnd2(Operation); - std::string FloatOrdered(Operation); - std::string FloatUnordered(Operation); - std::string LogicalAddCarry(Operation); - std::string Texture(Operation); - std::string TextureGather(Operation); - std::string TextureQueryDimensions(Operation); - std::string TextureQueryLod(Operation); - std::string TexelFetch(Operation); - std::string TextureGradient(Operation); - std::string ImageLoad(Operation); - std::string ImageStore(Operation); - std::string Branch(Operation); - std::string BranchIndirect(Operation); - std::string PushFlowStack(Operation); - std::string PopFlowStack(Operation); - std::string Exit(Operation); - std::string Discard(Operation); - std::string EmitVertex(Operation); - std::string EndPrimitive(Operation); - std::string InvocationId(Operation); - std::string YNegate(Operation); - std::string ThreadId(Operation); - std::string ShuffleIndexed(Operation); - std::string Barrier(Operation); - std::string MemoryBarrierGroup(Operation); - std::string MemoryBarrierGlobal(Operation); - - template - std::string Unary(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("{}{} {}, {};", op, Modifiers(operation), temporary, Visit(operation[0])); - return temporary; - } - - template - std::string Binary(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("{}{} {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]), - Visit(operation[1])); - return temporary; - } - - template - std::string Trinary(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("{}{} {}, {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]), - Visit(operation[1]), Visit(operation[2])); - return temporary; - } - - template - std::string FloatComparison(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("TRUNC.U.CC RC.x, {};", Binary(operation)); - AddLine("MOV.S {}, 0;", temporary); - AddLine("MOV.S {} (NE.x), -1;", temporary); - - const std::string op_a = Visit(operation[0]); - const std::string op_b = Visit(operation[1]); - if constexpr (unordered) { - AddLine("SNE.F RC.x, {}, {};", op_a, op_a); - AddLine("TRUNC.U.CC RC.x, RC.x;"); - AddLine("MOV.S {} (NE.x), -1;", temporary); - AddLine("SNE.F RC.x, {}, {};", op_b, op_b); - AddLine("TRUNC.U.CC RC.x, RC.x;"); - AddLine("MOV.S {} (NE.x), -1;", temporary); - } else if (op == SNE_F) { - AddLine("SNE.F RC.x, {}, {};", op_a, op_a); - AddLine("TRUNC.U.CC RC.x, RC.x;"); - AddLine("MOV.S {} (NE.x), 0;", temporary); - AddLine("SNE.F RC.x, {}, {};", op_b, op_b); - AddLine("TRUNC.U.CC RC.x, RC.x;"); - AddLine("MOV.S {} (NE.x), 0;", temporary); - } - return temporary; - } - - template - std::string HalfComparison(Operation operation) { - std::string tmp1 = AllocVectorTemporary(); - const std::string tmp2 = AllocVectorTemporary(); - const std::string op_a = Visit(operation[0]); - const std::string op_b = Visit(operation[1]); - AddLine("UP2H.F {}, {};", tmp1, op_a); - AddLine("UP2H.F {}, {};", tmp2, op_b); - AddLine("{} {}, {}, {};", op, tmp1, tmp1, tmp2); - AddLine("TRUNC.U.CC RC.xy, {};", tmp1); - AddLine("MOV.S {}.xy, {{0, 0, 0, 0}};", tmp1); - AddLine("MOV.S {}.x (NE.x), -1;", tmp1); - AddLine("MOV.S {}.y (NE.y), -1;", tmp1); - if constexpr (is_nan) { - AddLine("MOVC.F RC.x, {};", op_a); - AddLine("MOV.S {}.x (NAN.x), -1;", tmp1); - AddLine("MOVC.F RC.x, {};", op_b); - AddLine("MOV.S {}.y (NAN.x), -1;", tmp1); - } - return tmp1; - } - - template - std::string AtomicImage(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index; - const std::size_t num_coords = operation.GetOperandsCount(); - const std::size_t num_values = meta.values.size(); - - const std::string coord = AllocVectorTemporary(); - const std::string value = AllocVectorTemporary(); - for (std::size_t i = 0; i < num_coords; ++i) { - AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i])); - } - for (std::size_t i = 0; i < num_values; ++i) { - AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i])); - } - - AddLine("ATOMIM.{}.{} {}.x, {}, {}, image[{}], {};", op, type, coord, value, coord, - image_id, ImageType(meta.image.type)); - return fmt::format("{}.x", coord); - } - - template - std::string Atomic(Operation operation) { - std::string temporary = AllocTemporary(); - std::string address; - std::string_view opname; - bool robust = false; - if (const auto gmem = std::get_if(&*operation[0])) { - address = GlobalMemoryPointer(*gmem); - opname = "ATOM"; - robust = true; - } else if (const auto smem = std::get_if(&*operation[0])) { - address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress())); - opname = "ATOMS"; - } else { - UNREACHABLE(); - return "{0, 0, 0, 0}"; - } - if (robust) { - AddLine("IF NE.x;"); - } - AddLine("{}.{}.{} {}, {}, {};", opname, op, type, temporary, Visit(operation[1]), address); - if (robust) { - AddLine("ELSE;"); - AddLine("MOV.S {}, 0;", temporary); - AddLine("ENDIF;"); - } - return temporary; - } - - template - std::string Negate(Operation operation) { - std::string temporary = AllocTemporary(); - if constexpr (type == 'F') { - AddLine("MOV.F32 {}, -{};", temporary, Visit(operation[0])); - } else { - AddLine("MOV.{} {}, -{};", type, temporary, Visit(operation[0])); - } - return temporary; - } - - template - std::string Absolute(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("MOV.{} {}, |{}|;", type, temporary, Visit(operation[0])); - return temporary; - } - - template - std::string BitfieldInsert(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[3])); - AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[2])); - AddLine("BFI.{} {}.x, {}, {}, {};", type, temporary, temporary, Visit(operation[1]), - Visit(operation[0])); - return fmt::format("{}.x", temporary); - } - - template - std::string BitfieldExtract(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[2])); - AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[1])); - AddLine("BFE.{} {}.x, {}, {};", type, temporary, temporary, Visit(operation[0])); - return fmt::format("{}.x", temporary); - } - - template - std::string LocalInvocationId(Operation) { - return fmt::format("invocation.localid.{}", swizzle); - } - - template - std::string WorkGroupId(Operation) { - return fmt::format("invocation.groupid.{}", swizzle); - } - - template - std::string ThreadMask(Operation) { - return fmt::format("{}.thread{}{}mask", StageInputName(stage), c1, c2); - } - - template - void AddExpression(std::string_view text, Args&&... args) { - shader_source += fmt::format(fmt::runtime(text), std::forward(args)...); - } - - template - void AddLine(std::string_view text, Args&&... args) { - AddExpression(text, std::forward(args)...); - shader_source += '\n'; - } - - std::string AllocLongVectorTemporary() { - max_long_temporaries = std::max(max_long_temporaries, num_long_temporaries + 1); - return fmt::format("L{}", num_long_temporaries++); - } - - std::string AllocLongTemporary() { - return fmt::format("{}.x", AllocLongVectorTemporary()); - } - - std::string AllocVectorTemporary() { - max_temporaries = std::max(max_temporaries, num_temporaries + 1); - return fmt::format("T{}", num_temporaries++); - } - - std::string AllocTemporary() { - return fmt::format("{}.x", AllocVectorTemporary()); - } - - void ResetTemporaries() noexcept { - num_temporaries = 0; - num_long_temporaries = 0; - } - - const Device& device; - const ShaderIR& ir; - const Registry& registry; - const ShaderType stage; - - std::size_t num_temporaries = 0; - std::size_t max_temporaries = 0; - - std::size_t num_long_temporaries = 0; - std::size_t max_long_temporaries = 0; - - std::map global_memory_names; - - std::string shader_source; - - static constexpr std::string_view ADD_F32 = "ADD.F32"; - static constexpr std::string_view ADD_S = "ADD.S"; - static constexpr std::string_view ADD_U = "ADD.U"; - static constexpr std::string_view MUL_F32 = "MUL.F32"; - static constexpr std::string_view MUL_S = "MUL.S"; - static constexpr std::string_view MUL_U = "MUL.U"; - static constexpr std::string_view DIV_F32 = "DIV.F32"; - static constexpr std::string_view DIV_S = "DIV.S"; - static constexpr std::string_view DIV_U = "DIV.U"; - static constexpr std::string_view MAD_F32 = "MAD.F32"; - static constexpr std::string_view RSQ_F32 = "RSQ.F32"; - static constexpr std::string_view COS_F32 = "COS.F32"; - static constexpr std::string_view SIN_F32 = "SIN.F32"; - static constexpr std::string_view EX2_F32 = "EX2.F32"; - static constexpr std::string_view LG2_F32 = "LG2.F32"; - static constexpr std::string_view SLT_F = "SLT.F32"; - static constexpr std::string_view SLT_S = "SLT.S"; - static constexpr std::string_view SLT_U = "SLT.U"; - static constexpr std::string_view SEQ_F = "SEQ.F32"; - static constexpr std::string_view SEQ_S = "SEQ.S"; - static constexpr std::string_view SEQ_U = "SEQ.U"; - static constexpr std::string_view SLE_F = "SLE.F32"; - static constexpr std::string_view SLE_S = "SLE.S"; - static constexpr std::string_view SLE_U = "SLE.U"; - static constexpr std::string_view SGT_F = "SGT.F32"; - static constexpr std::string_view SGT_S = "SGT.S"; - static constexpr std::string_view SGT_U = "SGT.U"; - static constexpr std::string_view SNE_F = "SNE.F32"; - static constexpr std::string_view SNE_S = "SNE.S"; - static constexpr std::string_view SNE_U = "SNE.U"; - static constexpr std::string_view SGE_F = "SGE.F32"; - static constexpr std::string_view SGE_S = "SGE.S"; - static constexpr std::string_view SGE_U = "SGE.U"; - static constexpr std::string_view AND_S = "AND.S"; - static constexpr std::string_view AND_U = "AND.U"; - static constexpr std::string_view TRUNC_F = "TRUNC.F"; - static constexpr std::string_view TRUNC_S = "TRUNC.S"; - static constexpr std::string_view TRUNC_U = "TRUNC.U"; - static constexpr std::string_view SHL_S = "SHL.S"; - static constexpr std::string_view SHL_U = "SHL.U"; - static constexpr std::string_view SHR_S = "SHR.S"; - static constexpr std::string_view SHR_U = "SHR.U"; - static constexpr std::string_view OR_S = "OR.S"; - static constexpr std::string_view OR_U = "OR.U"; - static constexpr std::string_view XOR_S = "XOR.S"; - static constexpr std::string_view XOR_U = "XOR.U"; - static constexpr std::string_view NOT_S = "NOT.S"; - static constexpr std::string_view NOT_U = "NOT.U"; - static constexpr std::string_view BTC_S = "BTC.S"; - static constexpr std::string_view BTC_U = "BTC.U"; - static constexpr std::string_view BTFM_S = "BTFM.S"; - static constexpr std::string_view BTFM_U = "BTFM.U"; - static constexpr std::string_view ROUND_F = "ROUND.F"; - static constexpr std::string_view CEIL_F = "CEIL.F"; - static constexpr std::string_view FLR_F = "FLR.F"; - static constexpr std::string_view I2F_S = "I2F.S"; - static constexpr std::string_view I2F_U = "I2F.U"; - static constexpr std::string_view MIN_F = "MIN.F"; - static constexpr std::string_view MIN_S = "MIN.S"; - static constexpr std::string_view MIN_U = "MIN.U"; - static constexpr std::string_view MAX_F = "MAX.F"; - static constexpr std::string_view MAX_S = "MAX.S"; - static constexpr std::string_view MAX_U = "MAX.U"; - static constexpr std::string_view MOV_U = "MOV.U"; - static constexpr std::string_view TGBALLOT_U = "TGBALLOT.U"; - static constexpr std::string_view TGALL_U = "TGALL.U"; - static constexpr std::string_view TGANY_U = "TGANY.U"; - static constexpr std::string_view TGEQ_U = "TGEQ.U"; - static constexpr std::string_view EXCH = "EXCH"; - static constexpr std::string_view ADD = "ADD"; - static constexpr std::string_view MIN = "MIN"; - static constexpr std::string_view MAX = "MAX"; - static constexpr std::string_view AND = "AND"; - static constexpr std::string_view OR = "OR"; - static constexpr std::string_view XOR = "XOR"; - static constexpr std::string_view U32 = "U32"; - static constexpr std::string_view S32 = "S32"; - - static constexpr std::size_t NUM_ENTRIES = static_cast(OperationCode::Amount); - using DecompilerType = std::string (ARBDecompiler::*)(Operation); - static constexpr std::array OPERATION_DECOMPILERS = { - &ARBDecompiler::Assign, - - &ARBDecompiler::Select, - - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Trinary, - &ARBDecompiler::Negate<'F'>, - &ARBDecompiler::Absolute<'F'>, - &ARBDecompiler::FClamp, - &ARBDecompiler::FCastHalf0, - &ARBDecompiler::FCastHalf1, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::FSqrt, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::FSwizzleAdd, - - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Negate<'S'>, - &ARBDecompiler::Absolute<'S'>, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Unary, - &ARBDecompiler::BitfieldInsert<'S'>, - &ARBDecompiler::BitfieldExtract<'S'>, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Unary, - &ARBDecompiler::BitfieldInsert<'U'>, - &ARBDecompiler::BitfieldExtract<'U'>, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - - &ARBDecompiler::HAdd2, - &ARBDecompiler::HMul2, - &ARBDecompiler::HFma2, - &ARBDecompiler::HAbsolute, - &ARBDecompiler::HNegate, - &ARBDecompiler::HClamp, - &ARBDecompiler::HCastFloat, - &ARBDecompiler::HUnpack, - &ARBDecompiler::HMergeF32, - &ARBDecompiler::HMergeH0, - &ARBDecompiler::HMergeH1, - &ARBDecompiler::HPack2, - - &ARBDecompiler::LogicalAssign, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Unary, - &ARBDecompiler::LogicalPick2, - &ARBDecompiler::LogicalAnd2, - - &ARBDecompiler::FloatComparison, - &ARBDecompiler::FloatComparison, - &ARBDecompiler::FloatComparison, - &ARBDecompiler::FloatComparison, - &ARBDecompiler::FloatComparison, - &ARBDecompiler::FloatComparison, - &ARBDecompiler::FloatOrdered, - &ARBDecompiler::FloatUnordered, - &ARBDecompiler::FloatComparison, - &ARBDecompiler::FloatComparison, - &ARBDecompiler::FloatComparison, - &ARBDecompiler::FloatComparison, - &ARBDecompiler::FloatComparison, - &ARBDecompiler::FloatComparison, - - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - - &ARBDecompiler::LogicalAddCarry, - - &ARBDecompiler::HalfComparison, - &ARBDecompiler::HalfComparison, - &ARBDecompiler::HalfComparison, - &ARBDecompiler::HalfComparison, - &ARBDecompiler::HalfComparison, - &ARBDecompiler::HalfComparison, - &ARBDecompiler::HalfComparison, - &ARBDecompiler::HalfComparison, - &ARBDecompiler::HalfComparison, - &ARBDecompiler::HalfComparison, - &ARBDecompiler::HalfComparison, - &ARBDecompiler::HalfComparison, - - &ARBDecompiler::Texture, - &ARBDecompiler::Texture, - &ARBDecompiler::TextureGather, - &ARBDecompiler::TextureQueryDimensions, - &ARBDecompiler::TextureQueryLod, - &ARBDecompiler::TexelFetch, - &ARBDecompiler::TextureGradient, - - &ARBDecompiler::ImageLoad, - &ARBDecompiler::ImageStore, - - &ARBDecompiler::AtomicImage, - &ARBDecompiler::AtomicImage, - &ARBDecompiler::AtomicImage, - &ARBDecompiler::AtomicImage, - &ARBDecompiler::AtomicImage, - - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - - &ARBDecompiler::Branch, - &ARBDecompiler::BranchIndirect, - &ARBDecompiler::PushFlowStack, - &ARBDecompiler::PopFlowStack, - &ARBDecompiler::Exit, - &ARBDecompiler::Discard, - - &ARBDecompiler::EmitVertex, - &ARBDecompiler::EndPrimitive, - - &ARBDecompiler::InvocationId, - &ARBDecompiler::YNegate, - &ARBDecompiler::LocalInvocationId<'x'>, - &ARBDecompiler::LocalInvocationId<'y'>, - &ARBDecompiler::LocalInvocationId<'z'>, - &ARBDecompiler::WorkGroupId<'x'>, - &ARBDecompiler::WorkGroupId<'y'>, - &ARBDecompiler::WorkGroupId<'z'>, - - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - - &ARBDecompiler::ThreadId, - &ARBDecompiler::ThreadMask<'e', 'q'>, - &ARBDecompiler::ThreadMask<'g', 'e'>, - &ARBDecompiler::ThreadMask<'g', 't'>, - &ARBDecompiler::ThreadMask<'l', 'e'>, - &ARBDecompiler::ThreadMask<'l', 't'>, - &ARBDecompiler::ShuffleIndexed, - - &ARBDecompiler::Barrier, - &ARBDecompiler::MemoryBarrierGroup, - &ARBDecompiler::MemoryBarrierGlobal, - }; -}; - -ARBDecompiler::ARBDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_, - ShaderType stage_, std::string_view identifier) - : device{device_}, ir{ir_}, registry{registry_}, stage{stage_} { - DefineGlobalMemory(); - - AddLine("TEMP RC;"); - AddLine("TEMP FSWZA[4];"); - AddLine("TEMP FSWZB[4];"); - if (ir.IsDecompiled()) { - DecompileAST(); - } else { - DecompileBranchMode(); - } - AddLine("END"); - - const std::string code = std::move(shader_source); - DeclareHeader(); - DeclareVertex(); - DeclareGeometry(); - DeclareFragment(); - DeclareCompute(); - DeclareInputAttributes(); - DeclareOutputAttributes(); - DeclareLocalMemory(); - DeclareGlobalMemory(); - DeclareConstantBuffers(); - DeclareRegisters(); - DeclareTemporaries(); - DeclarePredicates(); - DeclareInternalFlags(); - - shader_source += code; -} - -std::string_view HeaderStageName(ShaderType stage) { - switch (stage) { - case ShaderType::Vertex: - return "vp"; - case ShaderType::Geometry: - return "gp"; - case ShaderType::Fragment: - return "fp"; - case ShaderType::Compute: - return "cp"; - default: - UNREACHABLE(); - return ""; - } -} - -void ARBDecompiler::DefineGlobalMemory() { - u32 binding = 0; - for (const auto& pair : ir.GetGlobalMemory()) { - const GlobalMemoryBase base = pair.first; - global_memory_names.emplace(base, binding); - ++binding; - } -} - -void ARBDecompiler::DeclareHeader() { - AddLine("!!NV{}5.0", HeaderStageName(stage)); - // Enabling this allows us to cheat on some instructions like TXL with SHADOWARRAY2D - AddLine("OPTION NV_internal;"); - AddLine("OPTION NV_gpu_program_fp64;"); - AddLine("OPTION NV_shader_thread_group;"); - if (ir.UsesWarps() && device.HasWarpIntrinsics()) { - AddLine("OPTION NV_shader_thread_shuffle;"); - } - if (stage == ShaderType::Vertex) { - if (device.HasNvViewportArray2()) { - AddLine("OPTION NV_viewport_array2;"); - } - } - if (stage == ShaderType::Fragment) { - AddLine("OPTION ARB_draw_buffers;"); - } - if (device.HasImageLoadFormatted()) { - AddLine("OPTION EXT_shader_image_load_formatted;"); - } -} - -void ARBDecompiler::DeclareVertex() { - if (stage != ShaderType::Vertex) { - return; - } - AddLine("OUTPUT result_clip[] = {{ result.clip[0..7] }};"); -} - -void ARBDecompiler::DeclareGeometry() { - if (stage != ShaderType::Geometry) { - return; - } - const auto& info = registry.GetGraphicsInfo(); - const auto& header = ir.GetHeader(); - AddLine("PRIMITIVE_IN {};", PrimitiveDescription(info.primitive_topology)); - AddLine("PRIMITIVE_OUT {};", TopologyName(header.common3.output_topology)); - AddLine("VERTICES_OUT {};", header.common4.max_output_vertices.Value()); - AddLine("ATTRIB vertex_position = vertex.position;"); -} - -void ARBDecompiler::DeclareFragment() { - if (stage != ShaderType::Fragment) { - return; - } - AddLine("OUTPUT result_color7 = result.color[7];"); - AddLine("OUTPUT result_color6 = result.color[6];"); - AddLine("OUTPUT result_color5 = result.color[5];"); - AddLine("OUTPUT result_color4 = result.color[4];"); - AddLine("OUTPUT result_color3 = result.color[3];"); - AddLine("OUTPUT result_color2 = result.color[2];"); - AddLine("OUTPUT result_color1 = result.color[1];"); - AddLine("OUTPUT result_color0 = result.color;"); -} - -void ARBDecompiler::DeclareCompute() { - if (stage != ShaderType::Compute) { - return; - } - const ComputeInfo& info = registry.GetComputeInfo(); - AddLine("GROUP_SIZE {} {} {};", info.workgroup_size[0], info.workgroup_size[1], - info.workgroup_size[2]); - if (info.shared_memory_size_in_words == 0) { - return; - } - const u32 limit = device.GetMaxComputeSharedMemorySize(); - u32 size_in_bytes = info.shared_memory_size_in_words * 4; - if (size_in_bytes > limit) { - LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}", - size_in_bytes, limit); - size_in_bytes = limit; - } - - AddLine("SHARED_MEMORY {};", size_in_bytes); - AddLine("SHARED shared_mem[] = {{program.sharedmem}};"); -} - -void ARBDecompiler::DeclareInputAttributes() { - if (stage == ShaderType::Compute) { - return; - } - const std::string_view stage_name = StageInputName(stage); - for (const auto attribute : ir.GetInputAttributes()) { - if (!IsGenericAttribute(attribute)) { - continue; - } - const u32 index = GetGenericAttributeIndex(attribute); - - std::string_view suffix; - if (stage == ShaderType::Fragment) { - const auto input_mode{ir.GetHeader().ps.GetPixelImap(index)}; - if (input_mode == PixelImap::Unused) { - return; - } - suffix = GetInputFlags(input_mode); - } - AddLine("{}ATTRIB in_attr{}[] = {{ {}.attrib[{}..{}] }};", suffix, index, stage_name, index, - index); - } -} - -void ARBDecompiler::DeclareOutputAttributes() { - if (stage == ShaderType::Compute) { - return; - } - for (const auto attribute : ir.GetOutputAttributes()) { - if (!IsGenericAttribute(attribute)) { - continue; - } - const u32 index = GetGenericAttributeIndex(attribute); - AddLine("OUTPUT out_attr{}[] = {{ result.attrib[{}..{}] }};", index, index, index); - } -} - -void ARBDecompiler::DeclareLocalMemory() { - u64 size = 0; - if (stage == ShaderType::Compute) { - size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL; - } else { - size = ir.GetHeader().GetLocalMemorySize(); - } - if (size == 0) { - return; - } - const u64 element_count = Common::AlignUp(size, 4) / 4; - AddLine("TEMP lmem[{}];", element_count); -} - -void ARBDecompiler::DeclareGlobalMemory() { - const size_t num_entries = ir.GetGlobalMemory().size(); - if (num_entries > 0) { - AddLine("PARAM c[{}] = {{ program.local[0..{}] }};", num_entries, num_entries - 1); - } -} - -void ARBDecompiler::DeclareConstantBuffers() { - u32 binding = 0; - for (const auto& cbuf : ir.GetConstantBuffers()) { - AddLine("CBUFFER cbuf{}[] = {{ program.buffer[{}] }};", cbuf.first, binding); - ++binding; - } -} - -void ARBDecompiler::DeclareRegisters() { - for (const u32 gpr : ir.GetRegisters()) { - AddLine("TEMP R{};", gpr); - } -} - -void ARBDecompiler::DeclareTemporaries() { - for (std::size_t i = 0; i < max_temporaries; ++i) { - AddLine("TEMP T{};", i); - } - for (std::size_t i = 0; i < max_long_temporaries; ++i) { - AddLine("LONG TEMP L{};", i); - } -} - -void ARBDecompiler::DeclarePredicates() { - for (const Tegra::Shader::Pred pred : ir.GetPredicates()) { - AddLine("TEMP P{};", static_cast(pred)); - } -} - -void ARBDecompiler::DeclareInternalFlags() { - for (const char* name : INTERNAL_FLAG_NAMES) { - AddLine("TEMP {};", name); - } -} - -void ARBDecompiler::InitializeVariables() { - AddLine("MOV.F32 FSWZA[0], -1;"); - AddLine("MOV.F32 FSWZA[1], 1;"); - AddLine("MOV.F32 FSWZA[2], -1;"); - AddLine("MOV.F32 FSWZA[3], 0;"); - AddLine("MOV.F32 FSWZB[0], -1;"); - AddLine("MOV.F32 FSWZB[1], -1;"); - AddLine("MOV.F32 FSWZB[2], 1;"); - AddLine("MOV.F32 FSWZB[3], -1;"); - - if (stage == ShaderType::Vertex || stage == ShaderType::Geometry) { - AddLine("MOV.F result.position, {{0, 0, 0, 1}};"); - } - for (const auto attribute : ir.GetOutputAttributes()) { - if (!IsGenericAttribute(attribute)) { - continue; - } - const u32 index = GetGenericAttributeIndex(attribute); - AddLine("MOV.F result.attrib[{}], {{0, 0, 0, 1}};", index); - } - for (const u32 gpr : ir.GetRegisters()) { - AddLine("MOV.F R{}, {{0, 0, 0, 0}};", gpr); - } - for (const Tegra::Shader::Pred pred : ir.GetPredicates()) { - AddLine("MOV.U P{}, {{0, 0, 0, 0}};", static_cast(pred)); - } -} - -void ARBDecompiler::DecompileAST() { - const u32 num_flow_variables = ir.GetASTNumVariables(); - for (u32 i = 0; i < num_flow_variables; ++i) { - AddLine("TEMP F{};", i); - } - for (u32 i = 0; i < num_flow_variables; ++i) { - AddLine("MOV.U F{}, {{0, 0, 0, 0}};", i); - } - - InitializeVariables(); - - VisitAST(ir.GetASTProgram()); -} - -void ARBDecompiler::DecompileBranchMode() { - static constexpr u32 FLOW_STACK_SIZE = 20; - if (!ir.IsFlowStackDisabled()) { - AddLine("TEMP SSY[{}];", FLOW_STACK_SIZE); - AddLine("TEMP PBK[{}];", FLOW_STACK_SIZE); - AddLine("TEMP SSY_TOP;"); - AddLine("TEMP PBK_TOP;"); - } - - AddLine("TEMP PC;"); - - if (!ir.IsFlowStackDisabled()) { - AddLine("MOV.U SSY_TOP.x, 0;"); - AddLine("MOV.U PBK_TOP.x, 0;"); - } - - InitializeVariables(); - - const auto basic_block_end = ir.GetBasicBlocks().end(); - auto basic_block_it = ir.GetBasicBlocks().begin(); - const u32 first_address = basic_block_it->first; - AddLine("MOV.U PC.x, {};", first_address); - - AddLine("REP;"); - - std::size_t num_blocks = 0; - while (basic_block_it != basic_block_end) { - const auto& [address, bb] = *basic_block_it; - ++num_blocks; - - AddLine("SEQ.S.CC RC.x, PC.x, {};", address); - AddLine("IF NE.x;"); - - VisitBlock(bb); - - ++basic_block_it; - - if (basic_block_it != basic_block_end) { - const auto op = std::get_if(&*bb[bb.size() - 1]); - if (!op || op->GetCode() != OperationCode::Branch) { - const u32 next_address = basic_block_it->first; - AddLine("MOV.U PC.x, {};", next_address); - AddLine("CONT;"); - } - } - - AddLine("ELSE;"); - } - AddLine("RET;"); - while (num_blocks--) { - AddLine("ENDIF;"); - } - - AddLine("ENDREP;"); -} - -void ARBDecompiler::VisitAST(const ASTNode& node) { - if (const auto ast = std::get_if(&*node->GetInnerData())) { - for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) { - VisitAST(current); - } - } else if (const auto if_then = std::get_if(&*node->GetInnerData())) { - const std::string condition = VisitExpression(if_then->condition); - ResetTemporaries(); - - AddLine("MOVC.U RC.x, {};", condition); - AddLine("IF NE.x;"); - for (ASTNode current = if_then->nodes.GetFirst(); current; current = current->GetNext()) { - VisitAST(current); - } - AddLine("ENDIF;"); - } else if (const auto if_else = std::get_if(&*node->GetInnerData())) { - AddLine("ELSE;"); - for (ASTNode current = if_else->nodes.GetFirst(); current; current = current->GetNext()) { - VisitAST(current); - } - } else if (const auto decoded = std::get_if(&*node->GetInnerData())) { - VisitBlock(decoded->nodes); - } else if (const auto var_set = std::get_if(&*node->GetInnerData())) { - AddLine("MOV.U F{}, {};", var_set->index, VisitExpression(var_set->condition)); - ResetTemporaries(); - } else if (const auto do_while = std::get_if(&*node->GetInnerData())) { - const std::string condition = VisitExpression(do_while->condition); - ResetTemporaries(); - AddLine("REP;"); - for (ASTNode current = do_while->nodes.GetFirst(); current; current = current->GetNext()) { - VisitAST(current); - } - AddLine("MOVC.U RC.x, {};", condition); - AddLine("BRK (NE.x);"); - AddLine("ENDREP;"); - } else if (const auto ast_return = std::get_if(&*node->GetInnerData())) { - const bool is_true = ExprIsTrue(ast_return->condition); - if (!is_true) { - AddLine("MOVC.U RC.x, {};", VisitExpression(ast_return->condition)); - AddLine("IF NE.x;"); - ResetTemporaries(); - } - if (ast_return->kills) { - AddLine("KIL TR;"); - } else { - Exit(); - } - if (!is_true) { - AddLine("ENDIF;"); - } - } else if (const auto ast_break = std::get_if(&*node->GetInnerData())) { - if (ExprIsTrue(ast_break->condition)) { - AddLine("BRK;"); - } else { - AddLine("MOVC.U RC.x, {};", VisitExpression(ast_break->condition)); - AddLine("BRK (NE.x);"); - ResetTemporaries(); - } - } else if (std::holds_alternative(*node->GetInnerData())) { - // Nothing to do - } else { - UNREACHABLE(); - } -} - -std::string ARBDecompiler::VisitExpression(const Expr& node) { - if (const auto expr = std::get_if(&*node)) { - std::string result = AllocTemporary(); - AddLine("AND.U {}, {}, {};", result, VisitExpression(expr->operand1), - VisitExpression(expr->operand2)); - return result; - } - if (const auto expr = std::get_if(&*node)) { - std::string result = AllocTemporary(); - AddLine("OR.U {}, {}, {};", result, VisitExpression(expr->operand1), - VisitExpression(expr->operand2)); - return result; - } - if (const auto expr = std::get_if(&*node)) { - std::string result = AllocTemporary(); - AddLine("CMP.S {}, {}, 0, -1;", result, VisitExpression(expr->operand1)); - return result; - } - if (const auto expr = std::get_if(&*node)) { - return fmt::format("P{}.x", static_cast(expr->predicate)); - } - if (const auto expr = std::get_if(&*node)) { - return Visit(ir.GetConditionCode(expr->cc)); - } - if (const auto expr = std::get_if(&*node)) { - return fmt::format("F{}.x", expr->var_index); - } - if (const auto expr = std::get_if(&*node)) { - return expr->value ? "0xffffffff" : "0"; - } - if (const auto expr = std::get_if(&*node)) { - std::string result = AllocTemporary(); - AddLine("SEQ.U {}, R{}.x, {};", result, expr->gpr, expr->value); - return result; - } - UNREACHABLE(); - return "0"; -} - -void ARBDecompiler::VisitBlock(const NodeBlock& bb) { - for (const auto& node : bb) { - Visit(node); - } -} - -std::string ARBDecompiler::Visit(const Node& node) { - if (const auto operation = std::get_if(&*node)) { - if (const auto amend_index = operation->GetAmendIndex()) { - Visit(ir.GetAmendNode(*amend_index)); - } - const std::size_t index = static_cast(operation->GetCode()); - if (index >= OPERATION_DECOMPILERS.size()) { - UNREACHABLE_MSG("Out of bounds operation: {}", index); - return {}; - } - const auto decompiler = OPERATION_DECOMPILERS[index]; - if (decompiler == nullptr) { - UNREACHABLE_MSG("Undefined operation: {}", index); - return {}; - } - return (this->*decompiler)(*operation); - } - - if (const auto gpr = std::get_if(&*node)) { - const u32 index = gpr->GetIndex(); - if (index == Register::ZeroIndex) { - return "{0, 0, 0, 0}.x"; - } - return fmt::format("R{}.x", index); - } - - if (const auto cv = std::get_if(&*node)) { - return fmt::format("CV{}.x", cv->GetIndex()); - } - - if (const auto immediate = std::get_if(&*node)) { - std::string temporary = AllocTemporary(); - AddLine("MOV.U {}, {};", temporary, immediate->GetValue()); - return temporary; - } - - if (const auto predicate = std::get_if(&*node)) { - std::string temporary = AllocTemporary(); - switch (const auto index = predicate->GetIndex(); index) { - case Tegra::Shader::Pred::UnusedIndex: - AddLine("MOV.S {}, -1;", temporary); - break; - case Tegra::Shader::Pred::NeverExecute: - AddLine("MOV.S {}, 0;", temporary); - break; - default: - AddLine("MOV.S {}, P{}.x;", temporary, static_cast(index)); - break; - } - if (predicate->IsNegated()) { - AddLine("CMP.S {}, {}, 0, -1;", temporary, temporary); - } - return temporary; - } - - if (const auto abuf = std::get_if(&*node)) { - if (abuf->IsPhysicalBuffer()) { - UNIMPLEMENTED_MSG("Physical buffers are not implemented"); - return "{0, 0, 0, 0}.x"; - } - - const Attribute::Index index = abuf->GetIndex(); - const u32 element = abuf->GetElement(); - const char swizzle = Swizzle(element); - switch (index) { - case Attribute::Index::Position: { - if (stage == ShaderType::Geometry) { - return fmt::format("{}_position[{}].{}", StageInputName(stage), - Visit(abuf->GetBuffer()), swizzle); - } else { - return fmt::format("{}.position.{}", StageInputName(stage), swizzle); - } - } - case Attribute::Index::TessCoordInstanceIDVertexID: - ASSERT(stage == ShaderType::Vertex); - switch (element) { - case 2: - return "vertex.instance"; - case 3: - return "vertex.id"; - } - UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element); - break; - case Attribute::Index::PointCoord: - switch (element) { - case 0: - return "fragment.pointcoord.x"; - case 1: - return "fragment.pointcoord.y"; - } - UNIMPLEMENTED(); - break; - case Attribute::Index::FrontFacing: { - ASSERT(stage == ShaderType::Fragment); - ASSERT(element == 3); - const std::string temporary = AllocVectorTemporary(); - AddLine("SGT.S RC.x, fragment.facing, {{0, 0, 0, 0}};"); - AddLine("MOV.U.CC RC.x, -RC;"); - AddLine("MOV.S {}.x, 0;", temporary); - AddLine("MOV.S {}.x (NE.x), -1;", temporary); - return fmt::format("{}.x", temporary); - } - default: - if (IsGenericAttribute(index)) { - if (stage == ShaderType::Geometry) { - return fmt::format("in_attr{}[{}][0].{}", GetGenericAttributeIndex(index), - Visit(abuf->GetBuffer()), swizzle); - } else { - return fmt::format("{}.attrib[{}].{}", StageInputName(stage), - GetGenericAttributeIndex(index), swizzle); - } - } - UNIMPLEMENTED_MSG("Unimplemented input attribute={}", index); - break; - } - return "{0, 0, 0, 0}.x"; - } - - if (const auto cbuf = std::get_if(&*node)) { - std::string offset_string; - const auto& offset = cbuf->GetOffset(); - if (const auto imm = std::get_if(&*offset)) { - offset_string = std::to_string(imm->GetValue()); - } else { - offset_string = Visit(offset); - } - std::string temporary = AllocTemporary(); - AddLine("LDC.F32 {}, cbuf{}[{}];", temporary, cbuf->GetIndex(), offset_string); - return temporary; - } - - if (const auto gmem = std::get_if(&*node)) { - std::string temporary = AllocTemporary(); - AddLine("MOV {}, 0;", temporary); - AddLine("LOAD.U32 {} (NE.x), {};", temporary, GlobalMemoryPointer(*gmem)); - return temporary; - } - - if (const auto lmem = std::get_if(&*node)) { - std::string temporary = Visit(lmem->GetAddress()); - AddLine("SHR.U {}, {}, 2;", temporary, temporary); - AddLine("MOV.U {}, lmem[{}].x;", temporary, temporary); - return temporary; - } - - if (const auto smem = std::get_if(&*node)) { - std::string temporary = Visit(smem->GetAddress()); - AddLine("LDS.U32 {}, shared_mem[{}];", temporary, temporary); - return temporary; - } - - if (const auto internal_flag = std::get_if(&*node)) { - const std::size_t index = static_cast(internal_flag->GetFlag()); - return fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]); - } - - if (const auto conditional = std::get_if(&*node)) { - if (const auto amend_index = conditional->GetAmendIndex()) { - Visit(ir.GetAmendNode(*amend_index)); - } - AddLine("MOVC.U RC.x, {};", Visit(conditional->GetCondition())); - AddLine("IF NE.x;"); - VisitBlock(conditional->GetCode()); - AddLine("ENDIF;"); - return {}; - } - - if ([[maybe_unused]] const auto cmt = std::get_if(&*node)) { - // Uncommenting this will generate invalid code. GLASM lacks comments. - // AddLine("// {}", cmt->GetText()); - return {}; - } - - UNIMPLEMENTED(); - return {}; -} - -std::tuple ARBDecompiler::BuildCoords(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - UNIMPLEMENTED_IF(meta.sampler.is_indexed); - - const bool is_extended = meta.sampler.is_shadow && meta.sampler.is_array && - meta.sampler.type == Tegra::Shader::TextureType::TextureCube; - const std::size_t count = operation.GetOperandsCount(); - std::string temporary = AllocVectorTemporary(); - std::size_t i = 0; - for (; i < count; ++i) { - AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i])); - } - if (meta.sampler.is_array) { - AddLine("I2F.S {}.{}, {};", temporary, Swizzle(i), Visit(meta.array)); - ++i; - } - if (meta.sampler.is_shadow) { - std::string compare = Visit(meta.depth_compare); - if (is_extended) { - ASSERT(i == 4); - std::string extra_coord = AllocVectorTemporary(); - AddLine("MOV.F {}.x, {};", extra_coord, compare); - return {fmt::format("{}, {}", temporary, extra_coord), extra_coord, 0}; - } - AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), compare); - ++i; - } - return {temporary, temporary, i}; -} - -std::string ARBDecompiler::BuildAoffi(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - if (meta.aoffi.empty()) { - return {}; - } - const std::string temporary = AllocVectorTemporary(); - std::size_t i = 0; - for (auto& node : meta.aoffi) { - AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i++), Visit(node)); - } - return fmt::format(", offset({})", temporary); -} - -std::string ARBDecompiler::GlobalMemoryPointer(const GmemNode& gmem) { - // Read a bindless SSBO, return its address and set CC accordingly - // address = c[binding].xy - // length = c[binding].z - const u32 binding = global_memory_names.at(gmem.GetDescriptor()); - - const std::string pointer = AllocLongVectorTemporary(); - std::string temporary = AllocTemporary(); - - AddLine("PK64.U {}, c[{}];", pointer, binding); - AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem.GetRealAddress()), - Visit(gmem.GetBaseAddress())); - AddLine("CVT.U64.U32 {}.z, {};", pointer, temporary); - AddLine("ADD.U64 {}.x, {}.x, {}.z;", pointer, pointer, pointer); - // Compare offset to length and set CC - AddLine("SLT.U.CC RC.x, {}, c[{}].z;", temporary, binding); - return fmt::format("{}.x", pointer); -} - -void ARBDecompiler::Exit() { - if (stage != ShaderType::Fragment) { - AddLine("RET;"); - return; - } - - const auto safe_get_register = [this](u32 reg) -> std::string { - if (ir.GetRegisters().contains(reg)) { - return fmt::format("R{}.x", reg); - } - return "{0, 0, 0, 0}.x"; - }; - - const auto& header = ir.GetHeader(); - u32 current_reg = 0; - for (u32 rt = 0; rt < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; ++rt) { - for (u32 component = 0; component < 4; ++component) { - if (!header.ps.IsColorComponentOutputEnabled(rt, component)) { - continue; - } - AddLine("MOV.F result_color{}.{}, {};", rt, Swizzle(component), - safe_get_register(current_reg)); - ++current_reg; - } - } - if (header.ps.omap.depth) { - AddLine("MOV.F result.depth.z, {};", safe_get_register(current_reg + 1)); - } - - AddLine("RET;"); -} - -std::string ARBDecompiler::Assign(Operation operation) { - const Node& dest = operation[0]; - const Node& src = operation[1]; - - std::string dest_name; - if (const auto gpr = std::get_if(&*dest)) { - if (gpr->GetIndex() == Register::ZeroIndex) { - // Writing to Register::ZeroIndex is a no op - return {}; - } - dest_name = fmt::format("R{}.x", gpr->GetIndex()); - } else if (const auto abuf = std::get_if(&*dest)) { - const u32 element = abuf->GetElement(); - const char swizzle = Swizzle(element); - switch (const Attribute::Index index = abuf->GetIndex()) { - case Attribute::Index::Position: - dest_name = fmt::format("result.position.{}", swizzle); - break; - case Attribute::Index::LayerViewportPointSize: - switch (element) { - case 0: - UNIMPLEMENTED(); - return {}; - case 1: - case 2: - if (!device.HasNvViewportArray2()) { - LOG_ERROR( - Render_OpenGL, - "NV_viewport_array2 is missing. Maxwell gen 2 or better is required."); - return {}; - } - dest_name = element == 1 ? "result.layer.x" : "result.viewport.x"; - break; - case 3: - dest_name = "result.pointsize.x"; - break; - } - break; - case Attribute::Index::ClipDistances0123: - dest_name = fmt::format("result.clip[{}].x", element); - break; - case Attribute::Index::ClipDistances4567: - dest_name = fmt::format("result.clip[{}].x", element + 4); - break; - default: - if (!IsGenericAttribute(index)) { - UNREACHABLE(); - return {}; - } - dest_name = - fmt::format("result.attrib[{}].{}", GetGenericAttributeIndex(index), swizzle); - break; - } - } else if (const auto lmem = std::get_if(&*dest)) { - const std::string address = Visit(lmem->GetAddress()); - AddLine("SHR.U {}, {}, 2;", address, address); - dest_name = fmt::format("lmem[{}].x", address); - } else if (const auto smem = std::get_if(&*dest)) { - AddLine("STS.U32 {}, shared_mem[{}];", Visit(src), Visit(smem->GetAddress())); - ResetTemporaries(); - return {}; - } else if (const auto gmem = std::get_if(&*dest)) { - AddLine("IF NE.x;"); - AddLine("STORE.U32 {}, {};", Visit(src), GlobalMemoryPointer(*gmem)); - AddLine("ENDIF;"); - ResetTemporaries(); - return {}; - } else { - UNREACHABLE(); - ResetTemporaries(); - return {}; - } - - AddLine("MOV.U {}, {};", dest_name, Visit(src)); - ResetTemporaries(); - return {}; -} - -std::string ARBDecompiler::Select(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("CMP.S {}, {}, {}, {};", temporary, Visit(operation[0]), Visit(operation[1]), - Visit(operation[2])); - return temporary; -} - -std::string ARBDecompiler::FClamp(Operation operation) { - // 1.0f in hex, replace with std::bit_cast on C++20 - static constexpr u32 POSITIVE_ONE = 0x3f800000; - - std::string temporary = AllocTemporary(); - const Node& value = operation[0]; - const Node& low = operation[1]; - const Node& high = operation[2]; - const auto* const imm_low = std::get_if(&*low); - const auto* const imm_high = std::get_if(&*high); - if (imm_low && imm_high && imm_low->GetValue() == 0 && imm_high->GetValue() == POSITIVE_ONE) { - AddLine("MOV.F32.SAT {}, {};", temporary, Visit(value)); - } else { - AddLine("MIN.F {}, {}, {};", temporary, Visit(value), Visit(high)); - AddLine("MAX.F {}, {}, {};", temporary, temporary, Visit(low)); - } - return temporary; -} - -std::string ARBDecompiler::FCastHalf0(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.x, {};", temporary, Visit(operation[0])); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::FCastHalf1(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.y, {};", temporary, Visit(operation[0])); - AddLine("MOV {}.x, {}.y;", temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::FSqrt(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("RSQ.F32 {}, {};", temporary, Visit(operation[0])); - AddLine("RCP.F32 {}, {};", temporary, temporary); - return temporary; -} - -std::string ARBDecompiler::FSwizzleAdd(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - if (!device.HasWarpIntrinsics()) { - LOG_ERROR(Render_OpenGL, - "NV_shader_thread_shuffle is missing. Kepler or better is required."); - AddLine("ADD.F {}.x, {}, {};", temporary, Visit(operation[0]), Visit(operation[1])); - return fmt::format("{}.x", temporary); - } - - AddLine("AND.U {}.z, {}.threadid, 3;", temporary, StageInputName(stage)); - AddLine("SHL.U {}.z, {}.z, 1;", temporary, temporary); - AddLine("SHR.U {}.z, {}, {}.z;", temporary, Visit(operation[2]), temporary); - AddLine("AND.U {}.z, {}.z, 3;", temporary, temporary); - AddLine("MUL.F32 {}.x, {}, FSWZA[{}.z];", temporary, Visit(operation[0]), temporary); - AddLine("MUL.F32 {}.y, {}, FSWZB[{}.z];", temporary, Visit(operation[1]), temporary); - AddLine("ADD.F32 {}.x, {}.x, {}.y;", temporary, temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::HAdd2(Operation operation) { - const std::string tmp1 = AllocVectorTemporary(); - const std::string tmp2 = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); - AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1])); - AddLine("ADD.F16 {}, {}, {};", tmp1, tmp1, tmp2); - AddLine("PK2H.F {}.x, {};", tmp1, tmp1); - return fmt::format("{}.x", tmp1); -} - -std::string ARBDecompiler::HMul2(Operation operation) { - const std::string tmp1 = AllocVectorTemporary(); - const std::string tmp2 = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); - AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1])); - AddLine("MUL.F16 {}, {}, {};", tmp1, tmp1, tmp2); - AddLine("PK2H.F {}.x, {};", tmp1, tmp1); - return fmt::format("{}.x", tmp1); -} - -std::string ARBDecompiler::HFma2(Operation operation) { - const std::string tmp1 = AllocVectorTemporary(); - const std::string tmp2 = AllocVectorTemporary(); - const std::string tmp3 = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); - AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1])); - AddLine("UP2H.F {}.xy, {};", tmp3, Visit(operation[2])); - AddLine("MAD.F16 {}, {}, {}, {};", tmp1, tmp1, tmp2, tmp3); - AddLine("PK2H.F {}.x, {};", tmp1, tmp1); - return fmt::format("{}.x", tmp1); -} - -std::string ARBDecompiler::HAbsolute(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); - AddLine("PK2H.F {}.x, |{}|;", temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::HNegate(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); - AddLine("MOVC.S RC.x, {};", Visit(operation[1])); - AddLine("MOV.F {}.x (NE.x), -{}.x;", temporary, temporary); - AddLine("MOVC.S RC.x, {};", Visit(operation[2])); - AddLine("MOV.F {}.y (NE.x), -{}.y;", temporary, temporary); - AddLine("PK2H.F {}.x, {};", temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::HClamp(Operation operation) { - const std::string tmp1 = AllocVectorTemporary(); - const std::string tmp2 = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); - AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[1])); - AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2); - AddLine("MAX.F {}, {}, {};", tmp1, tmp1, tmp2); - AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[2])); - AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2); - AddLine("MIN.F {}, {}, {};", tmp1, tmp1, tmp2); - AddLine("PK2H.F {}.x, {};", tmp1, tmp1); - return fmt::format("{}.x", tmp1); -} - -std::string ARBDecompiler::HCastFloat(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("MOV.F {}.y, {{0, 0, 0, 0}};", temporary); - AddLine("MOV.F {}.x, {};", temporary, Visit(operation[0])); - AddLine("PK2H.F {}.x, {};", temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::HUnpack(Operation operation) { - std::string operand = Visit(operation[0]); - switch (std::get(operation.GetMeta())) { - case Tegra::Shader::HalfType::H0_H1: - return operand; - case Tegra::Shader::HalfType::F32: { - const std::string temporary = AllocVectorTemporary(); - AddLine("MOV.U {}.x, {};", temporary, operand); - AddLine("MOV.U {}.y, {}.x;", temporary, temporary); - AddLine("PK2H.F {}.x, {};", temporary, temporary); - return fmt::format("{}.x", temporary); - } - case Tegra::Shader::HalfType::H0_H0: { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", temporary, operand); - AddLine("MOV.U {}.y, {}.x;", temporary, temporary); - AddLine("PK2H.F {}.x, {};", temporary, temporary); - return fmt::format("{}.x", temporary); - } - case Tegra::Shader::HalfType::H1_H1: { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", temporary, operand); - AddLine("MOV.U {}.x, {}.y;", temporary, temporary); - AddLine("PK2H.F {}.x, {};", temporary, temporary); - return fmt::format("{}.x", temporary); - } - } - UNREACHABLE(); - return "{0, 0, 0, 0}.x"; -} - -std::string ARBDecompiler::HMergeF32(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::HMergeH0(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); - AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1])); - AddLine("MOV.U {}.x, {}.z;", temporary, temporary); - AddLine("PK2H.F {}.x, {};", temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::HMergeH1(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); - AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1])); - AddLine("MOV.U {}.y, {}.w;", temporary, temporary); - AddLine("PK2H.F {}.x, {};", temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::HPack2(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("MOV.U {}.x, {};", temporary, Visit(operation[0])); - AddLine("MOV.U {}.y, {};", temporary, Visit(operation[1])); - AddLine("PK2H.F {}.x, {};", temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::LogicalAssign(Operation operation) { - const Node& dest = operation[0]; - const Node& src = operation[1]; - - std::string target; - - if (const auto pred = std::get_if(&*dest)) { - ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment"); - - const Tegra::Shader::Pred index = pred->GetIndex(); - switch (index) { - case Tegra::Shader::Pred::NeverExecute: - case Tegra::Shader::Pred::UnusedIndex: - // Writing to these predicates is a no-op - return {}; - } - target = fmt::format("P{}.x", static_cast(index)); - } else if (const auto internal_flag = std::get_if(&*dest)) { - const std::size_t index = static_cast(internal_flag->GetFlag()); - target = fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]); - } else { - UNREACHABLE(); - ResetTemporaries(); - return {}; - } - - AddLine("MOV.U {}, {};", target, Visit(src)); - ResetTemporaries(); - return {}; -} - -std::string ARBDecompiler::LogicalPick2(Operation operation) { - std::string temporary = AllocTemporary(); - const u32 index = std::get(*operation[1]).GetValue(); - AddLine("MOV.U {}, {}.{};", temporary, Visit(operation[0]), Swizzle(index)); - return temporary; -} - -std::string ARBDecompiler::LogicalAnd2(Operation operation) { - std::string temporary = AllocTemporary(); - const std::string op = Visit(operation[0]); - AddLine("AND.U {}, {}.x, {}.y;", temporary, op, op); - return temporary; -} - -std::string ARBDecompiler::FloatOrdered(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("MOVC.F32 RC.x, {};", Visit(operation[0])); - AddLine("MOVC.F32 RC.y, {};", Visit(operation[1])); - AddLine("MOV.S {}, -1;", temporary); - AddLine("MOV.S {} (NAN.x), 0;", temporary); - AddLine("MOV.S {} (NAN.y), 0;", temporary); - return temporary; -} - -std::string ARBDecompiler::FloatUnordered(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("MOVC.F32 RC.x, {};", Visit(operation[0])); - AddLine("MOVC.F32 RC.y, {};", Visit(operation[1])); - AddLine("MOV.S {}, 0;", temporary); - AddLine("MOV.S {} (NAN.x), -1;", temporary); - AddLine("MOV.S {} (NAN.y), -1;", temporary); - return temporary; -} - -std::string ARBDecompiler::LogicalAddCarry(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("ADDC.U RC, {}, {};", Visit(operation[0]), Visit(operation[1])); - AddLine("MOV.S {}, 0;", temporary); - AddLine("IF CF.x;"); - AddLine("MOV.S {}, -1;", temporary); - AddLine("ENDIF;"); - return temporary; -} - -std::string ARBDecompiler::Texture(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; - const auto [coords, temporary, swizzle] = BuildCoords(operation); - - std::string_view opcode = "TEX"; - std::string extra; - if (meta.bias) { - ASSERT(!meta.lod); - opcode = "TXB"; - - if (swizzle < 4) { - AddLine("MOV.F {}.w, {};", temporary, Visit(meta.bias)); - } else { - const std::string bias = AllocTemporary(); - AddLine("MOV.F {}, {};", bias, Visit(meta.bias)); - extra = fmt::format(" {},", bias); - } - } - if (meta.lod) { - ASSERT(!meta.bias); - opcode = "TXL"; - - if (swizzle < 4) { - AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod)); - } else { - const std::string lod = AllocTemporary(); - AddLine("MOV.F {}, {};", lod, Visit(meta.lod)); - extra = fmt::format(" {},", lod); - } - } - - AddLine("{}.F {}, {},{} texture[{}], {}{};", opcode, temporary, coords, extra, sampler_id, - TextureType(meta), BuildAoffi(operation)); - AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::TextureGather(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; - const auto [coords, temporary, swizzle] = BuildCoords(operation); - - std::string comp; - if (!meta.sampler.is_shadow) { - const auto& immediate = std::get(*meta.component); - comp = fmt::format(".{}", Swizzle(immediate.GetValue())); - } - - AddLine("TXG.F {}, {}, texture[{}]{}, {}{};", temporary, temporary, sampler_id, comp, - TextureType(meta), BuildAoffi(operation)); - AddLine("MOV.U {}.x, {}.{};", temporary, coords, Swizzle(meta.element)); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::TextureQueryDimensions(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - const std::string temporary = AllocVectorTemporary(); - const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; - - ASSERT(!meta.sampler.is_array); - - const std::string lod = operation.GetOperandsCount() > 0 ? Visit(operation[0]) : "0"; - AddLine("TXQ {}, {}, texture[{}], {};", temporary, lod, sampler_id, TextureType(meta)); - AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::TextureQueryLod(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - const std::string temporary = AllocVectorTemporary(); - const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; - - ASSERT(!meta.sampler.is_array); - - const std::size_t count = operation.GetOperandsCount(); - for (std::size_t i = 0; i < count; ++i) { - AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i])); - } - AddLine("LOD.F {}, {}, texture[{}], {};", temporary, temporary, sampler_id, TextureType(meta)); - AddLine("MUL.F32 {}, {}, {{256, 256, 0, 0}};", temporary, temporary); - AddLine("TRUNC.S {}, {};", temporary, temporary); - AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::TexelFetch(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; - const auto [coords, temporary, swizzle] = BuildCoords(operation); - - if (!meta.sampler.is_buffer) { - ASSERT(swizzle < 4); - AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod)); - } - AddLine("TXF.F {}, {}, texture[{}], {}{};", temporary, coords, sampler_id, TextureType(meta), - BuildAoffi(operation)); - AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::TextureGradient(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; - const std::string ddx = AllocVectorTemporary(); - const std::string ddy = AllocVectorTemporary(); - const std::string coord = std::get<1>(BuildCoords(operation)); - - const std::size_t num_components = meta.derivates.size() / 2; - for (std::size_t index = 0; index < num_components; ++index) { - const char swizzle = Swizzle(index); - AddLine("MOV.F {}.{}, {};", ddx, swizzle, Visit(meta.derivates[index * 2])); - AddLine("MOV.F {}.{}, {};", ddy, swizzle, Visit(meta.derivates[index * 2 + 1])); - } - - const std::string_view result = coord; - AddLine("TXD.F {}, {}, {}, {}, texture[{}], {}{};", result, coord, ddx, ddy, sampler_id, - TextureType(meta), BuildAoffi(operation)); - AddLine("MOV.F {}.x, {}.{};", result, result, Swizzle(meta.element)); - return fmt::format("{}.x", result); -} - -std::string ARBDecompiler::ImageLoad(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index; - const std::size_t count = operation.GetOperandsCount(); - const std::string_view type = ImageType(meta.image.type); - - const std::string temporary = AllocVectorTemporary(); - for (std::size_t i = 0; i < count; ++i) { - AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i), Visit(operation[i])); - } - AddLine("LOADIM.F {}, {}, image[{}], {};", temporary, temporary, image_id, type); - AddLine("MOV.F {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::ImageStore(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index; - const std::size_t num_coords = operation.GetOperandsCount(); - const std::size_t num_values = meta.values.size(); - const std::string_view type = ImageType(meta.image.type); - - const std::string coord = AllocVectorTemporary(); - const std::string value = AllocVectorTemporary(); - for (std::size_t i = 0; i < num_coords; ++i) { - AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i])); - } - for (std::size_t i = 0; i < num_values; ++i) { - AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i])); - } - AddLine("STOREIM.F image[{}], {}, {}, {};", image_id, value, coord, type); - return {}; -} - -std::string ARBDecompiler::Branch(Operation operation) { - const auto target = std::get(*operation[0]); - AddLine("MOV.U PC.x, {};", target.GetValue()); - AddLine("CONT;"); - return {}; -} - -std::string ARBDecompiler::BranchIndirect(Operation operation) { - AddLine("MOV.U PC.x, {};", Visit(operation[0])); - AddLine("CONT;"); - return {}; -} - -std::string ARBDecompiler::PushFlowStack(Operation operation) { - const auto stack = std::get(operation.GetMeta()); - const u32 target = std::get(*operation[0]).GetValue(); - const std::string_view stack_name = StackName(stack); - AddLine("MOV.U {}[{}_TOP.x].x, {};", stack_name, stack_name, target); - AddLine("ADD.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name); - return {}; -} - -std::string ARBDecompiler::PopFlowStack(Operation operation) { - const auto stack = std::get(operation.GetMeta()); - const std::string_view stack_name = StackName(stack); - AddLine("SUB.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name); - AddLine("MOV.U PC.x, {}[{}_TOP.x].x;", stack_name, stack_name); - AddLine("CONT;"); - return {}; -} - -std::string ARBDecompiler::Exit(Operation) { - Exit(); - return {}; -} - -std::string ARBDecompiler::Discard(Operation) { - AddLine("KIL TR;"); - return {}; -} - -std::string ARBDecompiler::EmitVertex(Operation) { - AddLine("EMIT;"); - return {}; -} - -std::string ARBDecompiler::EndPrimitive(Operation) { - AddLine("ENDPRIM;"); - return {}; -} - -std::string ARBDecompiler::InvocationId(Operation) { - return "primitive.invocation"; -} - -std::string ARBDecompiler::YNegate(Operation) { - LOG_WARNING(Render_OpenGL, "(STUBBED)"); - std::string temporary = AllocTemporary(); - AddLine("MOV.F {}, 1;", temporary); - return temporary; -} - -std::string ARBDecompiler::ThreadId(Operation) { - return fmt::format("{}.threadid", StageInputName(stage)); -} - -std::string ARBDecompiler::ShuffleIndexed(Operation operation) { - if (!device.HasWarpIntrinsics()) { - LOG_ERROR(Render_OpenGL, - "NV_shader_thread_shuffle is missing. Kepler or better is required."); - return Visit(operation[0]); - } - const std::string temporary = AllocVectorTemporary(); - AddLine("SHFIDX.U {}, {}, {}, {{31, 0, 0, 0}};", temporary, Visit(operation[0]), - Visit(operation[1])); - AddLine("MOV.U {}.x, {}.y;", temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::Barrier(Operation) { - AddLine("BAR;"); - return {}; -} - -std::string ARBDecompiler::MemoryBarrierGroup(Operation) { - AddLine("MEMBAR.CTA;"); - return {}; -} - -std::string ARBDecompiler::MemoryBarrierGlobal(Operation) { - AddLine("MEMBAR;"); - return {}; -} - -} // Anonymous namespace - -std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, - const VideoCommon::Shader::Registry& registry, - Tegra::Engines::ShaderType stage, std::string_view identifier) { - return ARBDecompiler(device, ir, registry, stage, identifier).Code(); -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.h b/src/video_core/renderer_opengl/gl_arb_decompiler.h deleted file mode 100644 index 6afc87220..000000000 --- a/src/video_core/renderer_opengl/gl_arb_decompiler.h +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include - -#include "common/common_types.h" - -namespace Tegra::Engines { -enum class ShaderType : u32; -} - -namespace VideoCommon::Shader { -class ShaderIR; -class Registry; -} // namespace VideoCommon::Shader - -namespace OpenGL { - -class Device; - -std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, - const VideoCommon::Shader::Registry& registry, - Tegra::Engines::ShaderType stage, std::string_view identifier); - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index ceb3abcb2..3551dbdcc 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -54,40 +54,6 @@ namespace { constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16; -struct TextureHandle { - constexpr TextureHandle(u32 data, bool via_header_index) { - const Tegra::Texture::TextureHandle handle{data}; - image = handle.tic_id; - sampler = via_header_index ? image : handle.tsc_id.Value(); - } - - u32 image; - u32 sampler; -}; - -template -TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry, - ShaderType shader_type, size_t index = 0) { - if constexpr (std::is_same_v) { - if (entry.is_separated) { - const u32 buffer_1 = entry.buffer; - const u32 buffer_2 = entry.secondary_buffer; - const u32 offset_1 = entry.offset; - const u32 offset_2 = entry.secondary_offset; - const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1); - const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2); - return TextureHandle(handle_1 | handle_2, via_header_index); - } - } - if (entry.is_bindless) { - const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset); - return TextureHandle(raw, via_header_index); - } - const u32 buffer = engine.GetBoundBuffer(); - const u64 offset = (entry.offset + index) * sizeof(u32); - return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index); -} - /// Translates hardware transform feedback indices /// @param location Hardware location /// @return Pair of ARB_transform_feedback3 token stream first and third arguments @@ -119,44 +85,6 @@ std::pair TransformFeedbackEnum(u8 location) { void oglEnable(GLenum cap, bool state) { (state ? glEnable : glDisable)(cap); } - -ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) { - if (entry.is_buffer) { - return ImageViewType::Buffer; - } - switch (entry.type) { - case Tegra::Shader::TextureType::Texture1D: - return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D; - case Tegra::Shader::TextureType::Texture2D: - return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D; - case Tegra::Shader::TextureType::Texture3D: - return ImageViewType::e3D; - case Tegra::Shader::TextureType::TextureCube: - return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube; - } - UNREACHABLE(); - return ImageViewType::e2D; -} - -ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) { - switch (entry.type) { - case Tegra::Shader::ImageType::Texture1D: - return ImageViewType::e1D; - case Tegra::Shader::ImageType::Texture1DArray: - return ImageViewType::e1DArray; - case Tegra::Shader::ImageType::Texture2D: - return ImageViewType::e2D; - case Tegra::Shader::ImageType::Texture2DArray: - return ImageViewType::e2DArray; - case Tegra::Shader::ImageType::Texture3D: - return ImageViewType::e3D; - case Tegra::Shader::ImageType::TextureBuffer: - return ImageViewType::Buffer; - } - UNREACHABLE(); - return ImageViewType::e2D; -} - } // Anonymous namespace RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, @@ -172,12 +100,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device), query_cache(*this, maxwell3d, gpu_memory), accelerate_dma(buffer_cache), - fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache), - async_shaders(emu_window_) { - if (device.UseAsynchronousShaders()) { - async_shaders.AllocateWorkers(); - } -} + fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {} RasterizerOpenGL::~RasterizerOpenGL() = default; @@ -244,117 +167,8 @@ void RasterizerOpenGL::SyncVertexInstances() { } } -void RasterizerOpenGL::SetupShaders(bool is_indexed) { - u32 clip_distances = 0; - - std::array shaders{}; - image_view_indices.clear(); - sampler_handles.clear(); - - texture_cache.SynchronizeGraphicsDescriptors(); - - for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - const auto& shader_config = maxwell3d.regs.shader_config[index]; - const auto program{static_cast(index)}; - - // Skip stages that are not enabled - if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { - switch (program) { - case Maxwell::ShaderProgram::Geometry: - program_manager.UseGeometryShader(0); - break; - case Maxwell::ShaderProgram::Fragment: - program_manager.UseFragmentShader(0); - break; - default: - break; - } - continue; - } - // Currently this stages are not supported in the OpenGL backend. - // TODO(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL - if (program == Maxwell::ShaderProgram::TesselationControl || - program == Maxwell::ShaderProgram::TesselationEval) { - continue; - } - - Shader* const shader = shader_cache.GetStageProgram(program, async_shaders); - const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0; - switch (program) { - case Maxwell::ShaderProgram::VertexA: - case Maxwell::ShaderProgram::VertexB: - program_manager.UseVertexShader(program_handle); - break; - case Maxwell::ShaderProgram::Geometry: - program_manager.UseGeometryShader(program_handle); - break; - case Maxwell::ShaderProgram::Fragment: - program_manager.UseFragmentShader(program_handle); - break; - default: - UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index, - shader_config.enable.Value(), shader_config.offset); - break; - } - - // Stage indices are 0 - 5 - const size_t stage = index == 0 ? 0 : index - 1; - shaders[stage] = shader; - - SetupDrawTextures(shader, stage); - SetupDrawImages(shader, stage); - - buffer_cache.SetEnabledUniformBuffers(stage, shader->GetEntries().enabled_uniform_buffers); - - buffer_cache.UnbindGraphicsStorageBuffers(stage); - u32 ssbo_index = 0; - for (const auto& buffer : shader->GetEntries().global_memory_entries) { - buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index, - buffer.cbuf_offset, buffer.is_written); - ++ssbo_index; - } - - // Workaround for Intel drivers. - // When a clip distance is enabled but not set in the shader it crops parts of the screen - // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the - // clip distances only when it's written by a shader stage. - clip_distances |= shader->GetEntries().clip_distances; - - // When VertexA is enabled, we have dual vertex shaders - if (program == Maxwell::ShaderProgram::VertexA) { - // VertexB was combined with VertexA, so we skip the VertexB iteration - ++index; - } - } - SyncClipEnabled(clip_distances); - maxwell3d.dirty.flags[Dirty::Shaders] = false; - - buffer_cache.UpdateGraphicsBuffers(is_indexed); - - const std::span indices_span(image_view_indices.data(), image_view_indices.size()); - texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); - - buffer_cache.BindHostGeometryBuffers(is_indexed); - - size_t image_view_index = 0; - size_t texture_index = 0; - size_t image_index = 0; - for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { - const Shader* const shader = shaders[stage]; - if (!shader) { - continue; - } - buffer_cache.BindHostStageBuffers(stage); - const auto& base = device.GetBaseBindings(stage); - BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index, - texture_index, image_index); - } -} - void RasterizerOpenGL::LoadDiskResources(u64 title_id, std::stop_token stop_loading, - const VideoCore::DiskResourceLoadCallback& callback) { - shader_cache.LoadDiskCache(title_id, stop_loading, callback); -} + const VideoCore::DiskResourceLoadCallback& callback) {} void RasterizerOpenGL::Clear() { MICROPROFILE_SCOPE(OpenGL_Clears); @@ -434,7 +248,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { // Setup shaders and their used resources. std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; - SetupShaders(is_indexed); texture_cache.UpdateRenderTargets(false); state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); @@ -488,27 +301,8 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { gpu.TickWork(); } -void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { - Shader* const kernel = shader_cache.GetComputeKernel(code_addr); - - std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; - BindComputeTextures(kernel); - - const auto& entries = kernel->GetEntries(); - buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers); - buffer_cache.UnbindComputeStorageBuffers(); - u32 ssbo_index = 0; - for (const auto& buffer : entries.global_memory_entries) { - buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset, - buffer.is_written); - ++ssbo_index; - } - buffer_cache.UpdateComputeBuffers(); - buffer_cache.BindHostComputeBuffers(); - - const auto& launch_desc = kepler_compute.launch_description; - glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); - ++num_queued_commands; +void RasterizerOpenGL::DispatchCompute() { + UNREACHABLE_MSG("Not implemented"); } void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) { @@ -726,106 +520,6 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, return true; } -void RasterizerOpenGL::BindComputeTextures(Shader* kernel) { - image_view_indices.clear(); - sampler_handles.clear(); - - texture_cache.SynchronizeComputeDescriptors(); - - SetupComputeTextures(kernel); - SetupComputeImages(kernel); - - const std::span indices_span(image_view_indices.data(), image_view_indices.size()); - texture_cache.FillComputeImageViews(indices_span, image_view_ids); - - program_manager.BindCompute(kernel->GetHandle()); - size_t image_view_index = 0; - size_t texture_index = 0; - size_t image_index = 0; - BindTextures(kernel->GetEntries(), 0, 0, image_view_index, texture_index, image_index); -} - -void RasterizerOpenGL::BindTextures(const ShaderEntries& entries, GLuint base_texture, - GLuint base_image, size_t& image_view_index, - size_t& texture_index, size_t& image_index) { - const GLuint* const samplers = sampler_handles.data() + texture_index; - const GLuint* const textures = texture_handles.data() + texture_index; - const GLuint* const images = image_handles.data() + image_index; - - const size_t num_samplers = entries.samplers.size(); - for (const auto& sampler : entries.samplers) { - for (size_t i = 0; i < sampler.size; ++i) { - const ImageViewId image_view_id = image_view_ids[image_view_index++]; - const ImageView& image_view = texture_cache.GetImageView(image_view_id); - const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(sampler)); - texture_handles[texture_index++] = handle; - } - } - const size_t num_images = entries.images.size(); - for (size_t unit = 0; unit < num_images; ++unit) { - // TODO: Mark as modified - const ImageViewId image_view_id = image_view_ids[image_view_index++]; - const ImageView& image_view = texture_cache.GetImageView(image_view_id); - const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(entries.images[unit])); - image_handles[image_index] = handle; - ++image_index; - } - if (num_samplers > 0) { - glBindSamplers(base_texture, static_cast(num_samplers), samplers); - glBindTextures(base_texture, static_cast(num_samplers), textures); - } - if (num_images > 0) { - glBindImageTextures(base_image, static_cast(num_images), images); - } -} - -void RasterizerOpenGL::SetupDrawTextures(const Shader* shader, size_t stage_index) { - const bool via_header_index = - maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; - for (const auto& entry : shader->GetEntries().samplers) { - const auto shader_type = static_cast(stage_index); - for (size_t index = 0; index < entry.size; ++index) { - const auto handle = - GetTextureInfo(maxwell3d, via_header_index, entry, shader_type, index); - const Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler); - sampler_handles.push_back(sampler->Handle()); - image_view_indices.push_back(handle.image); - } - } -} - -void RasterizerOpenGL::SetupComputeTextures(const Shader* kernel) { - const bool via_header_index = kepler_compute.launch_description.linked_tsc; - for (const auto& entry : kernel->GetEntries().samplers) { - for (size_t i = 0; i < entry.size; ++i) { - const auto handle = - GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute, i); - const Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); - sampler_handles.push_back(sampler->Handle()); - image_view_indices.push_back(handle.image); - } - } -} - -void RasterizerOpenGL::SetupDrawImages(const Shader* shader, size_t stage_index) { - const bool via_header_index = - maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; - for (const auto& entry : shader->GetEntries().images) { - const auto shader_type = static_cast(stage_index); - const auto handle = GetTextureInfo(maxwell3d, via_header_index, entry, shader_type); - image_view_indices.push_back(handle.image); - } -} - -void RasterizerOpenGL::SetupComputeImages(const Shader* shader) { - const bool via_header_index = kepler_compute.launch_description.linked_tsc; - for (const auto& entry : shader->GetEntries().images) { - const auto handle = - GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute); - image_view_indices.push_back(handle.image); - } -} - void RasterizerOpenGL::SyncState() { SyncViewport(); SyncRasterizeEnable(); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index d30ad698f..1f58f8791 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -28,11 +28,9 @@ #include "video_core/renderer_opengl/gl_query_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_cache.h" -#include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_state_tracker.h" #include "video_core/renderer_opengl/gl_texture_cache.h" -#include "video_core/shader/async_shaders.h" #include "video_core/textures/texture.h" namespace Core::Memory { @@ -81,7 +79,7 @@ public: void Draw(bool is_indexed, bool is_instanced) override; void Clear() override; - void DispatchCompute(GPUVAddr code_addr) override; + void DispatchCompute() override; void ResetCounter(VideoCore::QueryType type) override; void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional timestamp) override; void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; @@ -118,36 +116,11 @@ public: return num_queued_commands > 0; } - VideoCommon::Shader::AsyncShaders& GetAsyncShaders() { - return async_shaders; - } - - const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const { - return async_shaders; - } - private: static constexpr size_t MAX_TEXTURES = 192; static constexpr size_t MAX_IMAGES = 48; static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES; - void BindComputeTextures(Shader* kernel); - - void BindTextures(const ShaderEntries& entries, GLuint base_texture, GLuint base_image, - size_t& image_view_index, size_t& texture_index, size_t& image_index); - - /// Configures the current textures to use for the draw command. - void SetupDrawTextures(const Shader* shader, size_t stage_index); - - /// Configures the textures used in a compute shader. - void SetupComputeTextures(const Shader* kernel); - - /// Configures images in a graphics shader. - void SetupDrawImages(const Shader* shader, size_t stage_index); - - /// Configures images in a compute shader. - void SetupComputeImages(const Shader* shader); - /// Syncs state to match guest's void SyncState(); @@ -230,8 +203,6 @@ private: /// End a transform feedback void EndTransformFeedback(); - void SetupShaders(bool is_indexed); - Tegra::GPU& gpu; Tegra::Engines::Maxwell3D& maxwell3d; Tegra::Engines::KeplerCompute& kepler_compute; @@ -251,8 +222,6 @@ private: AccelerateDMA accelerate_dma; FenceManagerOpenGL fence_manager; - VideoCommon::Shader::AsyncShaders async_shaders; - boost::container::static_vector image_view_indices; std::array image_view_ids; boost::container::static_vector sampler_handles; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 5a01c59ec..4dd166156 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -20,307 +20,19 @@ #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/shader_type.h" #include "video_core/memory_manager.h" -#include "video_core/renderer_opengl/gl_arb_decompiler.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_cache.h" -#include "video_core/renderer_opengl/gl_shader_decompiler.h" -#include "video_core/renderer_opengl/gl_shader_disk_cache.h" #include "video_core/renderer_opengl/gl_state_tracker.h" -#include "video_core/shader/memory_util.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" #include "video_core/shader_cache.h" #include "video_core/shader_notify.h" namespace OpenGL { -using Tegra::Engines::ShaderType; -using VideoCommon::Shader::GetShaderAddress; -using VideoCommon::Shader::GetShaderCode; -using VideoCommon::Shader::GetUniqueIdentifier; -using VideoCommon::Shader::KERNEL_MAIN_OFFSET; -using VideoCommon::Shader::ProgramCode; -using VideoCommon::Shader::Registry; -using VideoCommon::Shader::ShaderIR; -using VideoCommon::Shader::STAGE_MAIN_OFFSET; - -namespace { - -constexpr VideoCommon::Shader::CompilerSettings COMPILER_SETTINGS{}; - -/// Gets the shader type from a Maxwell program type -constexpr GLenum GetGLShaderType(ShaderType shader_type) { - switch (shader_type) { - case ShaderType::Vertex: - return GL_VERTEX_SHADER; - case ShaderType::Geometry: - return GL_GEOMETRY_SHADER; - case ShaderType::Fragment: - return GL_FRAGMENT_SHADER; - case ShaderType::Compute: - return GL_COMPUTE_SHADER; - default: - return GL_NONE; - } -} - -constexpr const char* GetShaderTypeName(ShaderType shader_type) { - switch (shader_type) { - case ShaderType::Vertex: - return "VS"; - case ShaderType::TesselationControl: - return "HS"; - case ShaderType::TesselationEval: - return "DS"; - case ShaderType::Geometry: - return "GS"; - case ShaderType::Fragment: - return "FS"; - case ShaderType::Compute: - return "CS"; - } - return "UNK"; -} - -constexpr ShaderType GetShaderType(Maxwell::ShaderProgram program_type) { - switch (program_type) { - case Maxwell::ShaderProgram::VertexA: - case Maxwell::ShaderProgram::VertexB: - return ShaderType::Vertex; - case Maxwell::ShaderProgram::TesselationControl: - return ShaderType::TesselationControl; - case Maxwell::ShaderProgram::TesselationEval: - return ShaderType::TesselationEval; - case Maxwell::ShaderProgram::Geometry: - return ShaderType::Geometry; - case Maxwell::ShaderProgram::Fragment: - return ShaderType::Fragment; - } - return {}; -} - -constexpr GLenum AssemblyEnum(ShaderType shader_type) { - switch (shader_type) { - case ShaderType::Vertex: - return GL_VERTEX_PROGRAM_NV; - case ShaderType::TesselationControl: - return GL_TESS_CONTROL_PROGRAM_NV; - case ShaderType::TesselationEval: - return GL_TESS_EVALUATION_PROGRAM_NV; - case ShaderType::Geometry: - return GL_GEOMETRY_PROGRAM_NV; - case ShaderType::Fragment: - return GL_FRAGMENT_PROGRAM_NV; - case ShaderType::Compute: - return GL_COMPUTE_PROGRAM_NV; - } - return {}; -} - -std::string MakeShaderID(u64 unique_identifier, ShaderType shader_type) { - return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier); -} - -std::shared_ptr MakeRegistry(const ShaderDiskCacheEntry& entry) { - const VideoCore::GuestDriverProfile guest_profile{entry.texture_handler_size}; - const VideoCommon::Shader::SerializedRegistryInfo info{guest_profile, entry.bound_buffer, - entry.graphics_info, entry.compute_info}; - auto registry = std::make_shared(entry.type, info); - for (const auto& [address, value] : entry.keys) { - const auto [buffer, offset] = address; - registry->InsertKey(buffer, offset, value); - } - for (const auto& [offset, sampler] : entry.bound_samplers) { - registry->InsertBoundSampler(offset, sampler); - } - for (const auto& [key, sampler] : entry.bindless_samplers) { - const auto [buffer, offset] = key; - registry->InsertBindlessSampler(buffer, offset, sampler); - } - return registry; -} - -std::unordered_set GetSupportedFormats() { - GLint num_formats; - glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats); - - std::vector formats(num_formats); - glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data()); - - std::unordered_set supported_formats; - for (const GLint format : formats) { - supported_formats.insert(static_cast(format)); - } - return supported_formats; -} - -} // Anonymous namespace - -ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 unique_identifier, - const ShaderIR& ir, const Registry& registry, bool hint_retrievable) { - if (device.UseDriverCache()) { - // Ignore hint retrievable if we are using the driver cache - hint_retrievable = false; - } - const std::string shader_id = MakeShaderID(unique_identifier, shader_type); - LOG_INFO(Render_OpenGL, "{}", shader_id); - - auto program = std::make_shared(); - - if (device.UseAssemblyShaders()) { - const std::string arb = - DecompileAssemblyShader(device, ir, registry, shader_type, shader_id); - - GLuint& arb_prog = program->assembly_program.handle; - -// Commented out functions signal OpenGL errors but are compatible with apitrace. -// Use them only to capture and replay on apitrace. -#if 0 - glGenProgramsNV(1, &arb_prog); - glLoadProgramNV(AssemblyEnum(shader_type), arb_prog, static_cast(arb.size()), - reinterpret_cast(arb.data())); -#else - glGenProgramsARB(1, &arb_prog); - glNamedProgramStringEXT(arb_prog, AssemblyEnum(shader_type), GL_PROGRAM_FORMAT_ASCII_ARB, - static_cast(arb.size()), arb.data()); -#endif - const auto err = reinterpret_cast(glGetString(GL_PROGRAM_ERROR_STRING_NV)); - if (err && *err) { - LOG_CRITICAL(Render_OpenGL, "{}", err); - LOG_INFO(Render_OpenGL, "\n{}", arb); - } - } else { - const std::string glsl = DecompileShader(device, ir, registry, shader_type, shader_id); - OGLShader shader; - shader.Create(glsl.c_str(), GetGLShaderType(shader_type)); - - program->source_program.Create(true, hint_retrievable, shader.handle); - } - - return program; -} - -Shader::Shader(std::shared_ptr registry_, ShaderEntries entries_, - ProgramSharedPtr program_, bool is_built_) - : registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)}, - is_built{is_built_} { - handle = program->assembly_program.handle; - if (handle == 0) { - handle = program->source_program.handle; - } - if (is_built) { - ASSERT(handle != 0); - } -} +Shader::Shader() = default; Shader::~Shader() = default; -GLuint Shader::GetHandle() const { - DEBUG_ASSERT(registry->IsConsistent()); - return handle; -} - -bool Shader::IsBuilt() const { - return is_built; -} - -void Shader::AsyncOpenGLBuilt(OGLProgram new_program) { - program->source_program = std::move(new_program); - handle = program->source_program.handle; - is_built = true; -} - -void Shader::AsyncGLASMBuilt(OGLAssemblyProgram new_program) { - program->assembly_program = std::move(new_program); - handle = program->assembly_program.handle; - is_built = true; -} - -std::unique_ptr Shader::CreateStageFromMemory( - const ShaderParameters& params, Maxwell::ShaderProgram program_type, ProgramCode code, - ProgramCode code_b, VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr) { - const auto shader_type = GetShaderType(program_type); - - auto& gpu = params.gpu; - gpu.ShaderNotify().MarkSharderBuilding(); - - auto registry = std::make_shared(shader_type, gpu.Maxwell3D()); - if (!async_shaders.IsShaderAsync(gpu) || !params.device.UseAsynchronousShaders()) { - const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry); - // TODO(Rodrigo): Handle VertexA shaders - // std::optional ir_b; - // if (!code_b.empty()) { - // ir_b.emplace(code_b, STAGE_MAIN_OFFSET); - // } - auto program = - BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry); - ShaderDiskCacheEntry entry; - entry.type = shader_type; - entry.code = std::move(code); - entry.code_b = std::move(code_b); - entry.unique_identifier = params.unique_identifier; - entry.bound_buffer = registry->GetBoundBuffer(); - entry.graphics_info = registry->GetGraphicsInfo(); - entry.keys = registry->GetKeys(); - entry.bound_samplers = registry->GetBoundSamplers(); - entry.bindless_samplers = registry->GetBindlessSamplers(); - params.disk_cache.SaveEntry(std::move(entry)); - - gpu.ShaderNotify().MarkShaderComplete(); - - return std::unique_ptr(new Shader(std::move(registry), - MakeEntries(params.device, ir, shader_type), - std::move(program), true)); - } else { - // Required for entries - const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry); - auto entries = MakeEntries(params.device, ir, shader_type); - - async_shaders.QueueOpenGLShader(params.device, shader_type, params.unique_identifier, - std::move(code), std::move(code_b), STAGE_MAIN_OFFSET, - COMPILER_SETTINGS, *registry, cpu_addr); - - auto program = std::make_shared(); - return std::unique_ptr( - new Shader(std::move(registry), std::move(entries), std::move(program), false)); - } -} - -std::unique_ptr Shader::CreateKernelFromMemory(const ShaderParameters& params, - ProgramCode code) { - auto& gpu = params.gpu; - gpu.ShaderNotify().MarkSharderBuilding(); - - auto registry = std::make_shared(ShaderType::Compute, params.engine); - const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry); - const u64 uid = params.unique_identifier; - auto program = BuildShader(params.device, ShaderType::Compute, uid, ir, *registry); - - ShaderDiskCacheEntry entry; - entry.type = ShaderType::Compute; - entry.code = std::move(code); - entry.unique_identifier = uid; - entry.bound_buffer = registry->GetBoundBuffer(); - entry.compute_info = registry->GetComputeInfo(); - entry.keys = registry->GetKeys(); - entry.bound_samplers = registry->GetBoundSamplers(); - entry.bindless_samplers = registry->GetBindlessSamplers(); - params.disk_cache.SaveEntry(std::move(entry)); - - gpu.ShaderNotify().MarkShaderComplete(); - - return std::unique_ptr(new Shader(std::move(registry), - MakeEntries(params.device, ir, ShaderType::Compute), - std::move(program))); -} - -std::unique_ptr Shader::CreateFromCache(const ShaderParameters& params, - const PrecompiledShader& precompiled_shader) { - return std::unique_ptr(new Shader( - precompiled_shader.registry, precompiled_shader.entries, precompiled_shader.program)); -} - ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_, @@ -331,278 +43,4 @@ ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_, ShaderCacheOpenGL::~ShaderCacheOpenGL() = default; -void ShaderCacheOpenGL::LoadDiskCache(u64 title_id, std::stop_token stop_loading, - const VideoCore::DiskResourceLoadCallback& callback) { - disk_cache.BindTitleID(title_id); - const std::optional transferable = disk_cache.LoadTransferable(); - - LOG_INFO(Render_OpenGL, "Total Shader Count: {}", - transferable.has_value() ? transferable->size() : 0); - - if (!transferable) { - return; - } - - std::vector gl_cache; - if (!device.UseAssemblyShaders() && !device.UseDriverCache()) { - // Only load precompiled cache when we are not using assembly shaders - gl_cache = disk_cache.LoadPrecompiled(); - } - const auto supported_formats = GetSupportedFormats(); - - // Track if precompiled cache was altered during loading to know if we have to - // serialize the virtual precompiled cache file back to the hard drive - bool precompiled_cache_altered = false; - - // Inform the frontend about shader build initialization - if (callback) { - callback(VideoCore::LoadCallbackStage::Build, 0, transferable->size()); - } - - std::mutex mutex; - std::size_t built_shaders = 0; // It doesn't have be atomic since it's used behind a mutex - std::atomic_bool gl_cache_failed = false; - - const auto find_precompiled = [&gl_cache](u64 id) { - return std::ranges::find(gl_cache, id, &ShaderDiskCachePrecompiled::unique_identifier); - }; - - const auto worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin, - std::size_t end) { - const auto scope = context->Acquire(); - - for (std::size_t i = begin; i < end; ++i) { - if (stop_loading.stop_requested()) { - return; - } - const auto& entry = (*transferable)[i]; - const u64 uid = entry.unique_identifier; - const auto it = find_precompiled(uid); - const auto precompiled_entry = it != gl_cache.end() ? &*it : nullptr; - - const bool is_compute = entry.type == ShaderType::Compute; - const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; - auto registry = MakeRegistry(entry); - const ShaderIR ir(entry.code, main_offset, COMPILER_SETTINGS, *registry); - - ProgramSharedPtr program; - if (precompiled_entry) { - // If the shader is precompiled, attempt to load it with - program = GeneratePrecompiledProgram(entry, *precompiled_entry, supported_formats); - if (!program) { - gl_cache_failed = true; - } - } - if (!program) { - // Otherwise compile it from GLSL - program = BuildShader(device, entry.type, uid, ir, *registry, true); - } - - PrecompiledShader shader; - shader.program = std::move(program); - shader.registry = std::move(registry); - shader.entries = MakeEntries(device, ir, entry.type); - - std::scoped_lock lock{mutex}; - if (callback) { - callback(VideoCore::LoadCallbackStage::Build, ++built_shaders, - transferable->size()); - } - runtime_cache.emplace(entry.unique_identifier, std::move(shader)); - } - }; - - const std::size_t num_workers{std::max(1U, std::thread::hardware_concurrency())}; - const std::size_t bucket_size{transferable->size() / num_workers}; - std::vector> contexts(num_workers); - std::vector threads(num_workers); - for (std::size_t i = 0; i < num_workers; ++i) { - const bool is_last_worker = i + 1 == num_workers; - const std::size_t start{bucket_size * i}; - const std::size_t end{is_last_worker ? transferable->size() : start + bucket_size}; - - // On some platforms the shared context has to be created from the GUI thread - contexts[i] = emu_window.CreateSharedContext(); - threads[i] = std::thread(worker, contexts[i].get(), start, end); - } - for (auto& thread : threads) { - thread.join(); - } - - if (gl_cache_failed) { - // Invalidate the precompiled cache if a shader dumped shader was rejected - disk_cache.InvalidatePrecompiled(); - precompiled_cache_altered = true; - return; - } - if (stop_loading.stop_requested()) { - return; - } - - if (device.UseAssemblyShaders() || device.UseDriverCache()) { - // Don't store precompiled binaries for assembly shaders or when using the driver cache - return; - } - - // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw - // before precompiling them - - for (std::size_t i = 0; i < transferable->size(); ++i) { - const u64 id = (*transferable)[i].unique_identifier; - const auto it = find_precompiled(id); - if (it == gl_cache.end()) { - const GLuint program = runtime_cache.at(id).program->source_program.handle; - disk_cache.SavePrecompiled(id, program); - precompiled_cache_altered = true; - } - } - - if (precompiled_cache_altered) { - disk_cache.SaveVirtualPrecompiledFile(); - } -} - -ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram( - const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry, - const std::unordered_set& supported_formats) { - if (!supported_formats.contains(precompiled_entry.binary_format)) { - LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format, removing"); - return {}; - } - - auto program = std::make_shared(); - GLuint& handle = program->source_program.handle; - handle = glCreateProgram(); - glProgramParameteri(handle, GL_PROGRAM_SEPARABLE, GL_TRUE); - glProgramBinary(handle, precompiled_entry.binary_format, precompiled_entry.binary.data(), - static_cast(precompiled_entry.binary.size())); - - GLint link_status; - glGetProgramiv(handle, GL_LINK_STATUS, &link_status); - if (link_status == GL_FALSE) { - LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver, removing"); - return {}; - } - - return program; -} - -Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program, - VideoCommon::Shader::AsyncShaders& async_shaders) { - if (!maxwell3d.dirty.flags[Dirty::Shaders]) { - auto* last_shader = last_shaders[static_cast(program)]; - if (last_shader->IsBuilt()) { - return last_shader; - } - } - - const GPUVAddr address{GetShaderAddress(maxwell3d, program)}; - - if (device.UseAsynchronousShaders() && async_shaders.HasCompletedWork()) { - auto completed_work = async_shaders.GetCompletedWork(); - for (auto& work : completed_work) { - Shader* shader = TryGet(work.cpu_address); - gpu.ShaderNotify().MarkShaderComplete(); - if (shader == nullptr) { - continue; - } - using namespace VideoCommon::Shader; - if (work.backend == AsyncShaders::Backend::OpenGL) { - shader->AsyncOpenGLBuilt(std::move(work.program.opengl)); - } else if (work.backend == AsyncShaders::Backend::GLASM) { - shader->AsyncGLASMBuilt(std::move(work.program.glasm)); - } - - auto& registry = shader->GetRegistry(); - - ShaderDiskCacheEntry entry; - entry.type = work.shader_type; - entry.code = std::move(work.code); - entry.code_b = std::move(work.code_b); - entry.unique_identifier = work.uid; - entry.bound_buffer = registry.GetBoundBuffer(); - entry.graphics_info = registry.GetGraphicsInfo(); - entry.keys = registry.GetKeys(); - entry.bound_samplers = registry.GetBoundSamplers(); - entry.bindless_samplers = registry.GetBindlessSamplers(); - disk_cache.SaveEntry(std::move(entry)); - } - } - - // Look up shader in the cache based on address - const std::optional cpu_addr{gpu_memory.GpuToCpuAddress(address)}; - if (Shader* const shader{cpu_addr ? TryGet(*cpu_addr) : null_shader.get()}) { - return last_shaders[static_cast(program)] = shader; - } - - const u8* const host_ptr{gpu_memory.GetPointer(address)}; - - // No shader found - create a new one - ProgramCode code{GetShaderCode(gpu_memory, address, host_ptr, false)}; - ProgramCode code_b; - if (program == Maxwell::ShaderProgram::VertexA) { - const GPUVAddr address_b{GetShaderAddress(maxwell3d, Maxwell::ShaderProgram::VertexB)}; - const u8* host_ptr_b = gpu_memory.GetPointer(address_b); - code_b = GetShaderCode(gpu_memory, address_b, host_ptr_b, false); - } - const std::size_t code_size = code.size() * sizeof(u64); - - const u64 unique_identifier = GetUniqueIdentifier( - GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b); - - const ShaderParameters params{gpu, maxwell3d, disk_cache, device, - *cpu_addr, host_ptr, unique_identifier}; - - std::unique_ptr shader; - const auto found = runtime_cache.find(unique_identifier); - if (found == runtime_cache.end()) { - shader = Shader::CreateStageFromMemory(params, program, std::move(code), std::move(code_b), - async_shaders, cpu_addr.value_or(0)); - } else { - shader = Shader::CreateFromCache(params, found->second); - } - - Shader* const result = shader.get(); - if (cpu_addr) { - Register(std::move(shader), *cpu_addr, code_size); - } else { - null_shader = std::move(shader); - } - - return last_shaders[static_cast(program)] = result; -} - -Shader* ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { - const std::optional cpu_addr{gpu_memory.GpuToCpuAddress(code_addr)}; - - if (Shader* const kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get()) { - return kernel; - } - - // No kernel found, create a new one - const u8* host_ptr{gpu_memory.GetPointer(code_addr)}; - ProgramCode code{GetShaderCode(gpu_memory, code_addr, host_ptr, true)}; - const std::size_t code_size{code.size() * sizeof(u64)}; - const u64 unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)}; - - const ShaderParameters params{gpu, kepler_compute, disk_cache, device, - *cpu_addr, host_ptr, unique_identifier}; - - std::unique_ptr kernel; - const auto found = runtime_cache.find(unique_identifier); - if (found == runtime_cache.end()) { - kernel = Shader::CreateKernelFromMemory(params, std::move(code)); - } else { - kernel = Shader::CreateFromCache(params, found->second); - } - - Shader* const result = kernel.get(); - if (cpu_addr) { - Register(std::move(kernel), *cpu_addr, code_size); - } else { - null_kernel = std::move(kernel); - } - return result; -} - } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index b30308b6f..ad3d15a76 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -19,10 +19,6 @@ #include "common/common_types.h" #include "video_core/engines/shader_type.h" #include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_shader_decompiler.h" -#include "video_core/renderer_opengl/gl_shader_disk_cache.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" #include "video_core/shader_cache.h" namespace Tegra { @@ -33,10 +29,6 @@ namespace Core::Frontend { class EmuWindow; } -namespace VideoCommon::Shader { -class AsyncShaders; -} - namespace OpenGL { class Device; @@ -44,77 +36,10 @@ class RasterizerOpenGL; using Maxwell = Tegra::Engines::Maxwell3D::Regs; -struct ProgramHandle { - OGLProgram source_program; - OGLAssemblyProgram assembly_program; -}; -using ProgramSharedPtr = std::shared_ptr; - -struct PrecompiledShader { - ProgramSharedPtr program; - std::shared_ptr registry; - ShaderEntries entries; -}; - -struct ShaderParameters { - Tegra::GPU& gpu; - Tegra::Engines::ConstBufferEngineInterface& engine; - ShaderDiskCacheOpenGL& disk_cache; - const Device& device; - VAddr cpu_addr; - const u8* host_ptr; - u64 unique_identifier; -}; - -ProgramSharedPtr BuildShader(const Device& device, Tegra::Engines::ShaderType shader_type, - u64 unique_identifier, const VideoCommon::Shader::ShaderIR& ir, - const VideoCommon::Shader::Registry& registry, - bool hint_retrievable = false); - -class Shader final { +class Shader { public: + explicit Shader(); ~Shader(); - - /// Gets the GL program handle for the shader - GLuint GetHandle() const; - - bool IsBuilt() const; - - /// Gets the shader entries for the shader - const ShaderEntries& GetEntries() const { - return entries; - } - - const VideoCommon::Shader::Registry& GetRegistry() const { - return *registry; - } - - /// Mark a OpenGL shader as built - void AsyncOpenGLBuilt(OGLProgram new_program); - - /// Mark a GLASM shader as built - void AsyncGLASMBuilt(OGLAssemblyProgram new_program); - - static std::unique_ptr CreateStageFromMemory( - const ShaderParameters& params, Maxwell::ShaderProgram program_type, - ProgramCode program_code, ProgramCode program_code_b, - VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr); - - static std::unique_ptr CreateKernelFromMemory(const ShaderParameters& params, - ProgramCode code); - - static std::unique_ptr CreateFromCache(const ShaderParameters& params, - const PrecompiledShader& precompiled_shader); - -private: - explicit Shader(std::shared_ptr registry, ShaderEntries entries, - ProgramSharedPtr program, bool is_built_ = true); - - std::shared_ptr registry; - ShaderEntries entries; - ProgramSharedPtr program; - GLuint handle = 0; - bool is_built{}; }; class ShaderCacheOpenGL final : public VideoCommon::ShaderCache { @@ -126,36 +51,13 @@ public: Tegra::MemoryManager& gpu_memory_, const Device& device_); ~ShaderCacheOpenGL() override; - /// Loads disk cache for the current game - void LoadDiskCache(u64 title_id, std::stop_token stop_loading, - const VideoCore::DiskResourceLoadCallback& callback); - - /// Gets the current specified shader stage program - Shader* GetStageProgram(Maxwell::ShaderProgram program, - VideoCommon::Shader::AsyncShaders& async_shaders); - - /// Gets a compute kernel in the passed address - Shader* GetComputeKernel(GPUVAddr code_addr); - private: - ProgramSharedPtr GeneratePrecompiledProgram( - const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry, - const std::unordered_set& supported_formats); - Core::Frontend::EmuWindow& emu_window; Tegra::GPU& gpu; Tegra::MemoryManager& gpu_memory; Tegra::Engines::Maxwell3D& maxwell3d; Tegra::Engines::KeplerCompute& kepler_compute; const Device& device; - - ShaderDiskCacheOpenGL disk_cache; - std::unordered_map runtime_cache; - - std::unique_ptr null_shader; - std::unique_ptr null_kernel; - - std::array last_shaders{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp deleted file mode 100644 index 9c28498e8..000000000 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ /dev/null @@ -1,2986 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include -#include -#include -#include - -#include - -#include "common/alignment.h" -#include "common/assert.h" -#include "common/common_types.h" -#include "common/div_ceil.h" -#include "common/logging/log.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" -#include "video_core/renderer_opengl/gl_device.h" -#include "video_core/renderer_opengl/gl_rasterizer.h" -#include "video_core/renderer_opengl/gl_shader_decompiler.h" -#include "video_core/shader/ast.h" -#include "video_core/shader/node.h" -#include "video_core/shader/shader_ir.h" -#include "video_core/shader/transform_feedback.h" - -namespace OpenGL { - -namespace { - -using Tegra::Engines::ShaderType; -using Tegra::Shader::Attribute; -using Tegra::Shader::Header; -using Tegra::Shader::IpaInterpMode; -using Tegra::Shader::IpaMode; -using Tegra::Shader::IpaSampleMode; -using Tegra::Shader::PixelImap; -using Tegra::Shader::Register; -using Tegra::Shader::TextureType; - -using namespace VideoCommon::Shader; -using namespace std::string_literals; - -using Maxwell = Tegra::Engines::Maxwell3D::Regs; -using Operation = const OperationNode&; - -class ASTDecompiler; -class ExprDecompiler; - -enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat }; - -constexpr std::array FLOAT_TYPES{"float", "vec2", "vec3", "vec4"}; - -constexpr std::string_view INPUT_ATTRIBUTE_NAME = "in_attr"; -constexpr std::string_view OUTPUT_ATTRIBUTE_NAME = "out_attr"; - -struct TextureOffset {}; -struct TextureDerivates {}; -using TextureArgument = std::pair; -using TextureIR = std::variant; - -constexpr u32 MAX_CONSTBUFFER_SCALARS = static_cast(Maxwell::MaxConstBufferSize) / sizeof(u32); -constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_SCALARS / sizeof(u32); - -constexpr std::string_view COMMON_DECLARATIONS = R"(#define ftoi floatBitsToInt -#define ftou floatBitsToUint -#define itof intBitsToFloat -#define utof uintBitsToFloat - -bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{ - bvec2 is_nan1 = isnan(pair1); - bvec2 is_nan2 = isnan(pair2); - return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y); -}} - -const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f ); -const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f ); -)"; - -class ShaderWriter final { -public: - void AddExpression(std::string_view text) { - DEBUG_ASSERT(scope >= 0); - if (!text.empty()) { - AppendIndentation(); - } - shader_source += text; - } - - // Forwards all arguments directly to libfmt. - // Note that all formatting requirements for fmt must be - // obeyed when using this function. (e.g. {{ must be used - // printing the character '{' is desirable. Ditto for }} and '}', - // etc). - template - void AddLine(std::string_view text, Args&&... args) { - AddExpression(fmt::format(fmt::runtime(text), std::forward(args)...)); - AddNewLine(); - } - - void AddNewLine() { - DEBUG_ASSERT(scope >= 0); - shader_source += '\n'; - } - - std::string GenerateTemporary() { - return fmt::format("tmp{}", temporary_index++); - } - - std::string GetResult() { - return std::move(shader_source); - } - - s32 scope = 0; - -private: - void AppendIndentation() { - shader_source.append(static_cast(scope) * 4, ' '); - } - - std::string shader_source; - u32 temporary_index = 1; -}; - -class Expression final { -public: - Expression(std::string code_, Type type_) : code{std::move(code_)}, type{type_} { - ASSERT(type != Type::Void); - } - Expression() : type{Type::Void} {} - - Type GetType() const { - return type; - } - - std::string GetCode() const { - return code; - } - - void CheckVoid() const { - ASSERT(type == Type::Void); - } - - std::string As(Type type_) const { - switch (type_) { - case Type::Bool: - return AsBool(); - case Type::Bool2: - return AsBool2(); - case Type::Float: - return AsFloat(); - case Type::Int: - return AsInt(); - case Type::Uint: - return AsUint(); - case Type::HalfFloat: - return AsHalfFloat(); - default: - UNREACHABLE_MSG("Invalid type"); - return code; - } - } - - std::string AsBool() const { - switch (type) { - case Type::Bool: - return code; - default: - UNREACHABLE_MSG("Incompatible types"); - return code; - } - } - - std::string AsBool2() const { - switch (type) { - case Type::Bool2: - return code; - default: - UNREACHABLE_MSG("Incompatible types"); - return code; - } - } - - std::string AsFloat() const { - switch (type) { - case Type::Float: - return code; - case Type::Uint: - return fmt::format("utof({})", code); - case Type::Int: - return fmt::format("itof({})", code); - case Type::HalfFloat: - return fmt::format("utof(packHalf2x16({}))", code); - default: - UNREACHABLE_MSG("Incompatible types"); - return code; - } - } - - std::string AsInt() const { - switch (type) { - case Type::Float: - return fmt::format("ftoi({})", code); - case Type::Uint: - return fmt::format("int({})", code); - case Type::Int: - return code; - case Type::HalfFloat: - return fmt::format("int(packHalf2x16({}))", code); - default: - UNREACHABLE_MSG("Incompatible types"); - return code; - } - } - - std::string AsUint() const { - switch (type) { - case Type::Float: - return fmt::format("ftou({})", code); - case Type::Uint: - return code; - case Type::Int: - return fmt::format("uint({})", code); - case Type::HalfFloat: - return fmt::format("packHalf2x16({})", code); - default: - UNREACHABLE_MSG("Incompatible types"); - return code; - } - } - - std::string AsHalfFloat() const { - switch (type) { - case Type::Float: - return fmt::format("unpackHalf2x16(ftou({}))", code); - case Type::Uint: - return fmt::format("unpackHalf2x16({})", code); - case Type::Int: - return fmt::format("unpackHalf2x16(int({}))", code); - case Type::HalfFloat: - return code; - default: - UNREACHABLE_MSG("Incompatible types"); - return code; - } - } - -private: - std::string code; - Type type{}; -}; - -const char* GetTypeString(Type type) { - switch (type) { - case Type::Bool: - return "bool"; - case Type::Bool2: - return "bvec2"; - case Type::Float: - return "float"; - case Type::Int: - return "int"; - case Type::Uint: - return "uint"; - case Type::HalfFloat: - return "vec2"; - default: - UNREACHABLE_MSG("Invalid type"); - return ""; - } -} - -const char* GetImageTypeDeclaration(Tegra::Shader::ImageType image_type) { - switch (image_type) { - case Tegra::Shader::ImageType::Texture1D: - return "1D"; - case Tegra::Shader::ImageType::TextureBuffer: - return "Buffer"; - case Tegra::Shader::ImageType::Texture1DArray: - return "1DArray"; - case Tegra::Shader::ImageType::Texture2D: - return "2D"; - case Tegra::Shader::ImageType::Texture2DArray: - return "2DArray"; - case Tegra::Shader::ImageType::Texture3D: - return "3D"; - default: - UNREACHABLE(); - return "1D"; - } -} - -/// Describes primitive behavior on geometry shaders -std::pair GetPrimitiveDescription(Maxwell::PrimitiveTopology topology) { - switch (topology) { - case Maxwell::PrimitiveTopology::Points: - return {"points", 1}; - case Maxwell::PrimitiveTopology::Lines: - case Maxwell::PrimitiveTopology::LineStrip: - return {"lines", 2}; - case Maxwell::PrimitiveTopology::LinesAdjacency: - case Maxwell::PrimitiveTopology::LineStripAdjacency: - return {"lines_adjacency", 4}; - case Maxwell::PrimitiveTopology::Triangles: - case Maxwell::PrimitiveTopology::TriangleStrip: - case Maxwell::PrimitiveTopology::TriangleFan: - return {"triangles", 3}; - case Maxwell::PrimitiveTopology::TrianglesAdjacency: - case Maxwell::PrimitiveTopology::TriangleStripAdjacency: - return {"triangles_adjacency", 6}; - default: - UNIMPLEMENTED_MSG("topology={}", topology); - return {"points", 1}; - } -} - -/// Generates code to use for a swizzle operation. -constexpr const char* GetSwizzle(std::size_t element) { - constexpr std::array swizzle = {".x", ".y", ".z", ".w"}; - return swizzle.at(element); -} - -constexpr const char* GetColorSwizzle(std::size_t element) { - constexpr std::array swizzle = {".r", ".g", ".b", ".a"}; - return swizzle.at(element); -} - -/// Translate topology -std::string GetTopologyName(Tegra::Shader::OutputTopology topology) { - switch (topology) { - case Tegra::Shader::OutputTopology::PointList: - return "points"; - case Tegra::Shader::OutputTopology::LineStrip: - return "line_strip"; - case Tegra::Shader::OutputTopology::TriangleStrip: - return "triangle_strip"; - default: - UNIMPLEMENTED_MSG("Unknown output topology: {}", topology); - return "points"; - } -} - -/// Returns true if an object has to be treated as precise -bool IsPrecise(Operation operand) { - const auto& meta{operand.GetMeta()}; - if (const auto arithmetic = std::get_if(&meta)) { - return arithmetic->precise; - } - return false; -} - -bool IsPrecise(const Node& node) { - if (const auto operation = std::get_if(&*node)) { - return IsPrecise(*operation); - } - return false; -} - -constexpr bool IsGenericAttribute(Attribute::Index index) { - return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31; -} - -constexpr bool IsLegacyTexCoord(Attribute::Index index) { - return static_cast(index) >= static_cast(Attribute::Index::TexCoord_0) && - static_cast(index) <= static_cast(Attribute::Index::TexCoord_7); -} - -constexpr Attribute::Index ToGenericAttribute(u64 value) { - return static_cast(value + static_cast(Attribute::Index::Attribute_0)); -} - -constexpr int GetLegacyTexCoordIndex(Attribute::Index index) { - return static_cast(index) - static_cast(Attribute::Index::TexCoord_0); -} - -u32 GetGenericAttributeIndex(Attribute::Index index) { - ASSERT(IsGenericAttribute(index)); - return static_cast(index) - static_cast(Attribute::Index::Attribute_0); -} - -constexpr const char* GetFlowStackPrefix(MetaStackClass stack) { - switch (stack) { - case MetaStackClass::Ssy: - return "ssy"; - case MetaStackClass::Pbk: - return "pbk"; - } - return {}; -} - -std::string FlowStackName(MetaStackClass stack) { - return fmt::format("{}_flow_stack", GetFlowStackPrefix(stack)); -} - -std::string FlowStackTopName(MetaStackClass stack) { - return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); -} - -struct GenericVaryingDescription { - std::string name; - u8 first_element = 0; - bool is_scalar = false; -}; - -class GLSLDecompiler final { -public: - explicit GLSLDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_, - ShaderType stage_, std::string_view identifier_, - std::string_view suffix_) - : device{device_}, ir{ir_}, registry{registry_}, stage{stage_}, - identifier{identifier_}, suffix{suffix_}, header{ir.GetHeader()} { - if (stage != ShaderType::Compute) { - transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo()); - } - } - - void Decompile() { - DeclareHeader(); - DeclareVertex(); - DeclareGeometry(); - DeclareFragment(); - DeclareCompute(); - DeclareInputAttributes(); - DeclareOutputAttributes(); - DeclareImages(); - DeclareSamplers(); - DeclareGlobalMemory(); - DeclareConstantBuffers(); - DeclareLocalMemory(); - DeclareRegisters(); - DeclarePredicates(); - DeclareInternalFlags(); - DeclareCustomVariables(); - DeclarePhysicalAttributeReader(); - - code.AddLine("void main() {{"); - ++code.scope; - - if (stage == ShaderType::Vertex) { - code.AddLine("gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);"); - } - - if (ir.IsDecompiled()) { - DecompileAST(); - } else { - DecompileBranchMode(); - } - - --code.scope; - code.AddLine("}}"); - } - - std::string GetResult() { - return code.GetResult(); - } - -private: - friend class ASTDecompiler; - friend class ExprDecompiler; - - void DecompileBranchMode() { - // VM's program counter - const auto first_address = ir.GetBasicBlocks().begin()->first; - code.AddLine("uint jmp_to = {}U;", first_address); - - // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems - // unlikely that shaders will use 20 nested SSYs and PBKs. - constexpr u32 FLOW_STACK_SIZE = 20; - if (!ir.IsFlowStackDisabled()) { - for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) { - code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE); - code.AddLine("uint {} = 0U;", FlowStackTopName(stack)); - } - } - - code.AddLine("while (true) {{"); - ++code.scope; - - code.AddLine("switch (jmp_to) {{"); - - for (const auto& pair : ir.GetBasicBlocks()) { - const auto& [address, bb] = pair; - code.AddLine("case 0x{:X}U: {{", address); - ++code.scope; - - VisitBlock(bb); - - --code.scope; - code.AddLine("}}"); - } - - code.AddLine("default: return;"); - code.AddLine("}}"); - - --code.scope; - code.AddLine("}}"); - } - - void DecompileAST(); - - void DeclareHeader() { - if (!identifier.empty()) { - code.AddLine("// {}", identifier); - } - const bool use_compatibility = ir.UsesLegacyVaryings() || ir.UsesYNegate(); - code.AddLine("#version 440 {}", use_compatibility ? "compatibility" : "core"); - code.AddLine("#extension GL_ARB_separate_shader_objects : enable"); - if (device.HasShaderBallot()) { - code.AddLine("#extension GL_ARB_shader_ballot : require"); - } - if (device.HasVertexViewportLayer()) { - code.AddLine("#extension GL_ARB_shader_viewport_layer_array : require"); - } - if (device.HasImageLoadFormatted()) { - code.AddLine("#extension GL_EXT_shader_image_load_formatted : require"); - } - if (device.HasTextureShadowLod()) { - code.AddLine("#extension GL_EXT_texture_shadow_lod : require"); - } - if (device.HasWarpIntrinsics()) { - code.AddLine("#extension GL_NV_gpu_shader5 : require"); - code.AddLine("#extension GL_NV_shader_thread_group : require"); - code.AddLine("#extension GL_NV_shader_thread_shuffle : require"); - } - // This pragma stops Nvidia's driver from over optimizing math (probably using fp16 - // operations) on places where we don't want to. - // Thanks to Ryujinx for finding this workaround. - code.AddLine("#pragma optionNV(fastmath off)"); - - code.AddNewLine(); - - code.AddLine(COMMON_DECLARATIONS); - } - - void DeclareVertex() { - if (stage != ShaderType::Vertex) { - return; - } - - DeclareVertexRedeclarations(); - } - - void DeclareGeometry() { - if (stage != ShaderType::Geometry) { - return; - } - - const auto& info = registry.GetGraphicsInfo(); - const auto input_topology = info.primitive_topology; - const auto [glsl_topology, max_vertices] = GetPrimitiveDescription(input_topology); - max_input_vertices = max_vertices; - code.AddLine("layout ({}) in;", glsl_topology); - - const auto topology = GetTopologyName(header.common3.output_topology); - const auto max_output_vertices = header.common4.max_output_vertices.Value(); - code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_output_vertices); - code.AddNewLine(); - - code.AddLine("in gl_PerVertex {{"); - ++code.scope; - code.AddLine("vec4 gl_Position;"); - --code.scope; - code.AddLine("}} gl_in[];"); - - DeclareVertexRedeclarations(); - } - - void DeclareFragment() { - if (stage != ShaderType::Fragment) { - return; - } - if (ir.UsesLegacyVaryings()) { - code.AddLine("in gl_PerFragment {{"); - ++code.scope; - code.AddLine("vec4 gl_TexCoord[8];"); - code.AddLine("vec4 gl_Color;"); - code.AddLine("vec4 gl_SecondaryColor;"); - --code.scope; - code.AddLine("}};"); - } - - for (u32 rt = 0; rt < Maxwell::NumRenderTargets; ++rt) { - code.AddLine("layout (location = {}) out vec4 frag_color{};", rt, rt); - } - } - - void DeclareCompute() { - if (stage != ShaderType::Compute) { - return; - } - const auto& info = registry.GetComputeInfo(); - if (u32 size = info.shared_memory_size_in_words * 4; size > 0) { - const u32 limit = device.GetMaxComputeSharedMemorySize(); - if (size > limit) { - LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}", - size, limit); - size = limit; - } - - code.AddLine("shared uint smem[{}];", size / 4); - code.AddNewLine(); - } - code.AddLine("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;", - info.workgroup_size[0], info.workgroup_size[1], info.workgroup_size[2]); - code.AddNewLine(); - } - - void DeclareVertexRedeclarations() { - code.AddLine("out gl_PerVertex {{"); - ++code.scope; - - auto pos_xfb = GetTransformFeedbackDecoration(Attribute::Index::Position); - if (!pos_xfb.empty()) { - pos_xfb = fmt::format("layout ({}) ", pos_xfb); - } - const char* pos_type = - FLOAT_TYPES.at(GetNumComponents(Attribute::Index::Position).value_or(4) - 1); - code.AddLine("{}{} gl_Position;", pos_xfb, pos_type); - - for (const auto attribute : ir.GetOutputAttributes()) { - if (attribute == Attribute::Index::ClipDistances0123 || - attribute == Attribute::Index::ClipDistances4567) { - code.AddLine("float gl_ClipDistance[];"); - break; - } - } - - if (stage != ShaderType::Geometry && - (stage != ShaderType::Vertex || device.HasVertexViewportLayer())) { - if (ir.UsesLayer()) { - code.AddLine("int gl_Layer;"); - } - if (ir.UsesViewportIndex()) { - code.AddLine("int gl_ViewportIndex;"); - } - } else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && stage == ShaderType::Vertex && - !device.HasVertexViewportLayer()) { - LOG_ERROR( - Render_OpenGL, - "GL_ARB_shader_viewport_layer_array is not available and its required by a shader"); - } - - if (ir.UsesPointSize()) { - code.AddLine("float gl_PointSize;"); - } - - if (ir.UsesLegacyVaryings()) { - code.AddLine("vec4 gl_TexCoord[8];"); - code.AddLine("vec4 gl_FrontColor;"); - code.AddLine("vec4 gl_FrontSecondaryColor;"); - code.AddLine("vec4 gl_BackColor;"); - code.AddLine("vec4 gl_BackSecondaryColor;"); - } - - --code.scope; - code.AddLine("}};"); - code.AddNewLine(); - - if (stage == ShaderType::Geometry) { - if (ir.UsesLayer()) { - code.AddLine("out int gl_Layer;"); - } - if (ir.UsesViewportIndex()) { - code.AddLine("out int gl_ViewportIndex;"); - } - } - code.AddNewLine(); - } - - void DeclareRegisters() { - const auto& registers = ir.GetRegisters(); - for (const u32 gpr : registers) { - code.AddLine("float {} = 0.0f;", GetRegister(gpr)); - } - if (!registers.empty()) { - code.AddNewLine(); - } - } - - void DeclareCustomVariables() { - const u32 num_custom_variables = ir.GetNumCustomVariables(); - for (u32 i = 0; i < num_custom_variables; ++i) { - code.AddLine("float {} = 0.0f;", GetCustomVariable(i)); - } - if (num_custom_variables > 0) { - code.AddNewLine(); - } - } - - void DeclarePredicates() { - const auto& predicates = ir.GetPredicates(); - for (const auto pred : predicates) { - code.AddLine("bool {} = false;", GetPredicate(pred)); - } - if (!predicates.empty()) { - code.AddNewLine(); - } - } - - void DeclareLocalMemory() { - u64 local_memory_size = 0; - if (stage == ShaderType::Compute) { - local_memory_size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL; - } else { - local_memory_size = header.GetLocalMemorySize(); - } - if (local_memory_size == 0) { - return; - } - const u64 element_count = Common::AlignUp(local_memory_size, 4) / 4; - code.AddLine("uint {}[{}];", GetLocalMemory(), element_count); - code.AddNewLine(); - } - - void DeclareInternalFlags() { - for (u32 flag = 0; flag < static_cast(InternalFlag::Amount); flag++) { - const auto flag_code = static_cast(flag); - code.AddLine("bool {} = false;", GetInternalFlag(flag_code)); - } - code.AddNewLine(); - } - - const char* GetInputFlags(PixelImap attribute) { - switch (attribute) { - case PixelImap::Perspective: - return "smooth"; - case PixelImap::Constant: - return "flat"; - case PixelImap::ScreenLinear: - return "noperspective"; - case PixelImap::Unused: - break; - } - UNIMPLEMENTED_MSG("Unknown attribute usage index={}", attribute); - return {}; - } - - void DeclareInputAttributes() { - if (ir.HasPhysicalAttributes()) { - const u32 num_inputs{GetNumPhysicalInputAttributes()}; - for (u32 i = 0; i < num_inputs; ++i) { - DeclareInputAttribute(ToGenericAttribute(i), true); - } - code.AddNewLine(); - return; - } - - const auto& attributes = ir.GetInputAttributes(); - for (const auto index : attributes) { - if (IsGenericAttribute(index)) { - DeclareInputAttribute(index, false); - } - } - if (!attributes.empty()) { - code.AddNewLine(); - } - } - - void DeclareInputAttribute(Attribute::Index index, bool skip_unused) { - const u32 location{GetGenericAttributeIndex(index)}; - - std::string name{GetGenericInputAttribute(index)}; - if (stage == ShaderType::Geometry) { - name = "gs_" + name + "[]"; - } - - std::string suffix_; - if (stage == ShaderType::Fragment) { - const auto input_mode{header.ps.GetPixelImap(location)}; - if (input_mode == PixelImap::Unused) { - return; - } - suffix_ = GetInputFlags(input_mode); - } - - code.AddLine("layout (location = {}) {} in vec4 {};", location, suffix_, name); - } - - void DeclareOutputAttributes() { - if (ir.HasPhysicalAttributes() && stage != ShaderType::Fragment) { - for (u32 i = 0; i < GetNumPhysicalVaryings(); ++i) { - DeclareOutputAttribute(ToGenericAttribute(i)); - } - code.AddNewLine(); - return; - } - - const auto& attributes = ir.GetOutputAttributes(); - for (const auto index : attributes) { - if (IsGenericAttribute(index)) { - DeclareOutputAttribute(index); - } - } - if (!attributes.empty()) { - code.AddNewLine(); - } - } - - std::optional GetNumComponents(Attribute::Index index, u8 element = 0) const { - const u8 location = static_cast(static_cast(index) * 4 + element); - const auto it = transform_feedback.find(location); - if (it == transform_feedback.end()) { - return std::nullopt; - } - return it->second.components; - } - - std::string GetTransformFeedbackDecoration(Attribute::Index index, u8 element = 0) const { - const u8 location = static_cast(static_cast(index) * 4 + element); - const auto it = transform_feedback.find(location); - if (it == transform_feedback.end()) { - return {}; - } - - const VaryingTFB& tfb = it->second; - return fmt::format("xfb_buffer = {}, xfb_offset = {}, xfb_stride = {}", tfb.buffer, - tfb.offset, tfb.stride); - } - - void DeclareOutputAttribute(Attribute::Index index) { - static constexpr std::string_view swizzle = "xyzw"; - u8 element = 0; - while (element < 4) { - auto xfb = GetTransformFeedbackDecoration(index, element); - if (!xfb.empty()) { - xfb = fmt::format(", {}", xfb); - } - const std::size_t remainder = 4 - element; - const std::size_t num_components = GetNumComponents(index, element).value_or(remainder); - const char* const type = FLOAT_TYPES.at(num_components - 1); - - const u32 location = GetGenericAttributeIndex(index); - - GenericVaryingDescription description; - description.first_element = static_cast(element); - description.is_scalar = num_components == 1; - description.name = AppendSuffix(location, OUTPUT_ATTRIBUTE_NAME); - if (element != 0 || num_components != 4) { - const std::string_view name_swizzle = swizzle.substr(element, num_components); - description.name = fmt::format("{}_{}", description.name, name_swizzle); - } - for (std::size_t i = 0; i < num_components; ++i) { - const u8 offset = static_cast(location * 4 + element + i); - varying_description.insert({offset, description}); - } - - code.AddLine("layout (location = {}, component = {}{}) out {} {};", location, element, - xfb, type, description.name); - - element = static_cast(static_cast(element) + num_components); - } - } - - void DeclareConstantBuffers() { - u32 binding = device.GetBaseBindings(stage).uniform_buffer; - for (const auto& [index, info] : ir.GetConstantBuffers()) { - const u32 num_elements = Common::DivCeil(info.GetSize(), 4 * sizeof(u32)); - const u32 size = info.IsIndirect() ? MAX_CONSTBUFFER_ELEMENTS : num_elements; - code.AddLine("layout (std140, binding = {}) uniform {} {{", binding++, - GetConstBufferBlock(index)); - code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), size); - code.AddLine("}};"); - code.AddNewLine(); - } - } - - void DeclareGlobalMemory() { - u32 binding = device.GetBaseBindings(stage).shader_storage_buffer; - for (const auto& [base, usage] : ir.GetGlobalMemory()) { - // Since we don't know how the shader will use the shader, hint the driver to disable as - // much optimizations as possible - std::string qualifier = "coherent volatile"; - if (usage.is_read && !usage.is_written) { - qualifier += " readonly"; - } else if (usage.is_written && !usage.is_read) { - qualifier += " writeonly"; - } - - code.AddLine("layout (std430, binding = {}) {} buffer {} {{", binding++, qualifier, - GetGlobalMemoryBlock(base)); - code.AddLine(" uint {}[];", GetGlobalMemory(base)); - code.AddLine("}};"); - code.AddNewLine(); - } - } - - void DeclareSamplers() { - u32 binding = device.GetBaseBindings(stage).sampler; - for (const auto& sampler : ir.GetSamplers()) { - const std::string name = GetSampler(sampler); - const std::string description = fmt::format("layout (binding = {}) uniform", binding); - binding += sampler.is_indexed ? sampler.size : 1; - - std::string sampler_type = [&]() { - if (sampler.is_buffer) { - return "samplerBuffer"; - } - switch (sampler.type) { - case TextureType::Texture1D: - return "sampler1D"; - case TextureType::Texture2D: - return "sampler2D"; - case TextureType::Texture3D: - return "sampler3D"; - case TextureType::TextureCube: - return "samplerCube"; - default: - UNREACHABLE(); - return "sampler2D"; - } - }(); - if (sampler.is_array) { - sampler_type += "Array"; - } - if (sampler.is_shadow) { - sampler_type += "Shadow"; - } - - if (!sampler.is_indexed) { - code.AddLine("{} {} {};", description, sampler_type, name); - } else { - code.AddLine("{} {} {}[{}];", description, sampler_type, name, sampler.size); - } - } - if (!ir.GetSamplers().empty()) { - code.AddNewLine(); - } - } - - void DeclarePhysicalAttributeReader() { - if (!ir.HasPhysicalAttributes()) { - return; - } - code.AddLine("float ReadPhysicalAttribute(uint physical_address) {{"); - ++code.scope; - code.AddLine("switch (physical_address) {{"); - - // Just declare generic attributes for now. - const auto num_attributes{static_cast(GetNumPhysicalInputAttributes())}; - for (u32 index = 0; index < num_attributes; ++index) { - const auto attribute{ToGenericAttribute(index)}; - for (u32 element = 0; element < 4; ++element) { - constexpr u32 generic_base = 0x80; - constexpr u32 generic_stride = 16; - constexpr u32 element_stride = 4; - const u32 address{generic_base + index * generic_stride + element * element_stride}; - - const bool declared = stage != ShaderType::Fragment || - header.ps.GetPixelImap(index) != PixelImap::Unused; - const std::string value = - declared ? ReadAttribute(attribute, element).AsFloat() : "0.0f"; - code.AddLine("case 0x{:X}U: return {};", address, value); - } - } - - code.AddLine("default: return 0;"); - - code.AddLine("}}"); - --code.scope; - code.AddLine("}}"); - code.AddNewLine(); - } - - void DeclareImages() { - u32 binding = device.GetBaseBindings(stage).image; - for (const auto& image : ir.GetImages()) { - std::string qualifier = "coherent volatile"; - if (image.is_read && !image.is_written) { - qualifier += " readonly"; - } else if (image.is_written && !image.is_read) { - qualifier += " writeonly"; - } - - const char* format = image.is_atomic ? "r32ui, " : ""; - const char* type_declaration = GetImageTypeDeclaration(image.type); - code.AddLine("layout ({}binding = {}) {} uniform uimage{} {};", format, binding++, - qualifier, type_declaration, GetImage(image)); - } - if (!ir.GetImages().empty()) { - code.AddNewLine(); - } - } - - void VisitBlock(const NodeBlock& bb) { - for (const auto& node : bb) { - Visit(node).CheckVoid(); - } - } - - Expression Visit(const Node& node) { - if (const auto operation = std::get_if(&*node)) { - if (const auto amend_index = operation->GetAmendIndex()) { - Visit(ir.GetAmendNode(*amend_index)).CheckVoid(); - } - const auto operation_index = static_cast(operation->GetCode()); - if (operation_index >= operation_decompilers.size()) { - UNREACHABLE_MSG("Out of bounds operation: {}", operation_index); - return {}; - } - const auto decompiler = operation_decompilers[operation_index]; - if (decompiler == nullptr) { - UNREACHABLE_MSG("Undefined operation: {}", operation_index); - return {}; - } - return (this->*decompiler)(*operation); - } - - if (const auto gpr = std::get_if(&*node)) { - const u32 index = gpr->GetIndex(); - if (index == Register::ZeroIndex) { - return {"0U", Type::Uint}; - } - return {GetRegister(index), Type::Float}; - } - - if (const auto cv = std::get_if(&*node)) { - const u32 index = cv->GetIndex(); - return {GetCustomVariable(index), Type::Float}; - } - - if (const auto immediate = std::get_if(&*node)) { - const u32 value = immediate->GetValue(); - if (value < 10) { - // For eyecandy avoid using hex numbers on single digits - return {fmt::format("{}U", immediate->GetValue()), Type::Uint}; - } - return {fmt::format("0x{:X}U", immediate->GetValue()), Type::Uint}; - } - - if (const auto predicate = std::get_if(&*node)) { - const auto value = [&]() -> std::string { - switch (const auto index = predicate->GetIndex(); index) { - case Tegra::Shader::Pred::UnusedIndex: - return "true"; - case Tegra::Shader::Pred::NeverExecute: - return "false"; - default: - return GetPredicate(index); - } - }(); - if (predicate->IsNegated()) { - return {fmt::format("!({})", value), Type::Bool}; - } - return {value, Type::Bool}; - } - - if (const auto abuf = std::get_if(&*node)) { - UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ShaderType::Geometry, - "Physical attributes in geometry shaders are not implemented"); - if (abuf->IsPhysicalBuffer()) { - return {fmt::format("ReadPhysicalAttribute({})", - Visit(abuf->GetPhysicalAddress()).AsUint()), - Type::Float}; - } - return ReadAttribute(abuf->GetIndex(), abuf->GetElement(), abuf->GetBuffer()); - } - - if (const auto cbuf = std::get_if(&*node)) { - const Node offset = cbuf->GetOffset(); - - if (const auto immediate = std::get_if(&*offset)) { - // Direct access - const u32 offset_imm = immediate->GetValue(); - ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access"); - return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()), - offset_imm / (4 * 4), (offset_imm / 4) % 4), - Type::Uint}; - } - - // Indirect access - const std::string final_offset = code.GenerateTemporary(); - code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint()); - - if (!device.HasComponentIndexingBug()) { - return {fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()), - final_offset, final_offset), - Type::Uint}; - } - - // AMD's proprietary GLSL compiler emits ill code for variable component access. - // To bypass this driver bug generate 4 ifs, one per each component. - const std::string pack = code.GenerateTemporary(); - code.AddLine("uvec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()), - final_offset); - - const std::string result = code.GenerateTemporary(); - code.AddLine("uint {};", result); - for (u32 swizzle = 0; swizzle < 4; ++swizzle) { - code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result, pack, - GetSwizzle(swizzle)); - } - return {result, Type::Uint}; - } - - if (const auto gmem = std::get_if(&*node)) { - const std::string real = Visit(gmem->GetRealAddress()).AsUint(); - const std::string base = Visit(gmem->GetBaseAddress()).AsUint(); - const std::string final_offset = fmt::format("({} - {}) >> 2", real, base); - return {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset), - Type::Uint}; - } - - if (const auto lmem = std::get_if(&*node)) { - return { - fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()), - Type::Uint}; - } - - if (const auto smem = std::get_if(&*node)) { - return {fmt::format("smem[{} >> 2]", Visit(smem->GetAddress()).AsUint()), Type::Uint}; - } - - if (const auto internal_flag = std::get_if(&*node)) { - return {GetInternalFlag(internal_flag->GetFlag()), Type::Bool}; - } - - if (const auto conditional = std::get_if(&*node)) { - if (const auto amend_index = conditional->GetAmendIndex()) { - Visit(ir.GetAmendNode(*amend_index)).CheckVoid(); - } - // It's invalid to call conditional on nested nodes, use an operation instead - code.AddLine("if ({}) {{", Visit(conditional->GetCondition()).AsBool()); - ++code.scope; - - VisitBlock(conditional->GetCode()); - - --code.scope; - code.AddLine("}}"); - return {}; - } - - if (const auto comment = std::get_if(&*node)) { - code.AddLine("// " + comment->GetText()); - return {}; - } - - UNREACHABLE(); - return {}; - } - - Expression ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) { - const auto GeometryPass = [&](std::string_view name) { - if (stage == ShaderType::Geometry && buffer) { - // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games - // set an 0x80000000 index for those and the shader fails to build. Find out why - // this happens and what's its intent. - return fmt::format("gs_{}[{} % {}]", name, Visit(buffer).AsUint(), - max_input_vertices.value()); - } - return std::string(name); - }; - - switch (attribute) { - case Attribute::Index::Position: - switch (stage) { - case ShaderType::Geometry: - return {fmt::format("gl_in[{}].gl_Position{}", Visit(buffer).AsUint(), - GetSwizzle(element)), - Type::Float}; - case ShaderType::Fragment: - return {"gl_FragCoord"s + GetSwizzle(element), Type::Float}; - default: - UNREACHABLE(); - return {"0", Type::Int}; - } - case Attribute::Index::FrontColor: - return {"gl_Color"s + GetSwizzle(element), Type::Float}; - case Attribute::Index::FrontSecondaryColor: - return {"gl_SecondaryColor"s + GetSwizzle(element), Type::Float}; - case Attribute::Index::PointCoord: - switch (element) { - case 0: - return {"gl_PointCoord.x", Type::Float}; - case 1: - return {"gl_PointCoord.y", Type::Float}; - case 2: - case 3: - return {"0.0f", Type::Float}; - } - UNREACHABLE(); - return {"0", Type::Int}; - case Attribute::Index::TessCoordInstanceIDVertexID: - // TODO(Subv): Find out what the values are for the first two elements when inside a - // vertex shader, and what's the value of the fourth element when inside a Tess Eval - // shader. - ASSERT(stage == ShaderType::Vertex); - switch (element) { - case 2: - // Config pack's first value is instance_id. - return {"gl_InstanceID", Type::Int}; - case 3: - return {"gl_VertexID", Type::Int}; - } - UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element); - return {"0", Type::Int}; - case Attribute::Index::FrontFacing: - // TODO(Subv): Find out what the values are for the other elements. - ASSERT(stage == ShaderType::Fragment); - switch (element) { - case 3: - return {"(gl_FrontFacing ? -1 : 0)", Type::Int}; - } - UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element); - return {"0", Type::Int}; - default: - if (IsGenericAttribute(attribute)) { - return {GeometryPass(GetGenericInputAttribute(attribute)) + GetSwizzle(element), - Type::Float}; - } - if (IsLegacyTexCoord(attribute)) { - UNIMPLEMENTED_IF(stage == ShaderType::Geometry); - return {fmt::format("gl_TexCoord[{}]{}", GetLegacyTexCoordIndex(attribute), - GetSwizzle(element)), - Type::Float}; - } - break; - } - UNIMPLEMENTED_MSG("Unhandled input attribute: {}", attribute); - return {"0", Type::Int}; - } - - Expression ApplyPrecise(Operation operation, std::string value, Type type) { - if (!IsPrecise(operation)) { - return {std::move(value), type}; - } - // Old Nvidia drivers have a bug with precise and texture sampling. These are more likely to - // be found in fragment shaders, so we disable precise there. There are vertex shaders that - // also fail to build but nobody seems to care about those. - // Note: Only bugged drivers will skip precise. - const bool disable_precise = device.HasPreciseBug() && stage == ShaderType::Fragment; - - std::string temporary = code.GenerateTemporary(); - code.AddLine("{}{} {} = {};", disable_precise ? "" : "precise ", GetTypeString(type), - temporary, value); - return {std::move(temporary), type}; - } - - Expression VisitOperand(Operation operation, std::size_t operand_index) { - const auto& operand = operation[operand_index]; - const bool parent_precise = IsPrecise(operation); - const bool child_precise = IsPrecise(operand); - const bool child_trivial = !std::holds_alternative(*operand); - if (!parent_precise || child_precise || child_trivial) { - return Visit(operand); - } - - Expression value = Visit(operand); - std::string temporary = code.GenerateTemporary(); - code.AddLine("{} {} = {};", GetTypeString(value.GetType()), temporary, value.GetCode()); - return {std::move(temporary), value.GetType()}; - } - - std::optional GetOutputAttribute(const AbufNode* abuf) { - const u32 element = abuf->GetElement(); - switch (const auto attribute = abuf->GetIndex()) { - case Attribute::Index::Position: - return {{"gl_Position"s + GetSwizzle(element), Type::Float}}; - case Attribute::Index::LayerViewportPointSize: - switch (element) { - case 0: - UNIMPLEMENTED(); - return std::nullopt; - case 1: - if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) { - return std::nullopt; - } - return {{"gl_Layer", Type::Int}}; - case 2: - if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) { - return std::nullopt; - } - return {{"gl_ViewportIndex", Type::Int}}; - case 3: - return {{"gl_PointSize", Type::Float}}; - } - return std::nullopt; - case Attribute::Index::FrontColor: - return {{"gl_FrontColor"s + GetSwizzle(element), Type::Float}}; - case Attribute::Index::FrontSecondaryColor: - return {{"gl_FrontSecondaryColor"s + GetSwizzle(element), Type::Float}}; - case Attribute::Index::BackColor: - return {{"gl_BackColor"s + GetSwizzle(element), Type::Float}}; - case Attribute::Index::BackSecondaryColor: - return {{"gl_BackSecondaryColor"s + GetSwizzle(element), Type::Float}}; - case Attribute::Index::ClipDistances0123: - return {{fmt::format("gl_ClipDistance[{}]", element), Type::Float}}; - case Attribute::Index::ClipDistances4567: - return {{fmt::format("gl_ClipDistance[{}]", element + 4), Type::Float}}; - default: - if (IsGenericAttribute(attribute)) { - return {{GetGenericOutputAttribute(attribute, element), Type::Float}}; - } - if (IsLegacyTexCoord(attribute)) { - return {{fmt::format("gl_TexCoord[{}]{}", GetLegacyTexCoordIndex(attribute), - GetSwizzle(element)), - Type::Float}}; - } - UNIMPLEMENTED_MSG("Unhandled output attribute: {}", attribute); - return std::nullopt; - } - } - - Expression GenerateUnary(Operation operation, std::string_view func, Type result_type, - Type type_a) { - std::string op_str = fmt::format("{}({})", func, VisitOperand(operation, 0).As(type_a)); - return ApplyPrecise(operation, std::move(op_str), result_type); - } - - Expression GenerateBinaryInfix(Operation operation, std::string_view func, Type result_type, - Type type_a, Type type_b) { - const std::string op_a = VisitOperand(operation, 0).As(type_a); - const std::string op_b = VisitOperand(operation, 1).As(type_b); - std::string op_str = fmt::format("({} {} {})", op_a, func, op_b); - - return ApplyPrecise(operation, std::move(op_str), result_type); - } - - Expression GenerateBinaryCall(Operation operation, std::string_view func, Type result_type, - Type type_a, Type type_b) { - const std::string op_a = VisitOperand(operation, 0).As(type_a); - const std::string op_b = VisitOperand(operation, 1).As(type_b); - std::string op_str = fmt::format("{}({}, {})", func, op_a, op_b); - - return ApplyPrecise(operation, std::move(op_str), result_type); - } - - Expression GenerateTernary(Operation operation, std::string_view func, Type result_type, - Type type_a, Type type_b, Type type_c) { - const std::string op_a = VisitOperand(operation, 0).As(type_a); - const std::string op_b = VisitOperand(operation, 1).As(type_b); - const std::string op_c = VisitOperand(operation, 2).As(type_c); - std::string op_str = fmt::format("{}({}, {}, {})", func, op_a, op_b, op_c); - - return ApplyPrecise(operation, std::move(op_str), result_type); - } - - Expression GenerateQuaternary(Operation operation, const std::string& func, Type result_type, - Type type_a, Type type_b, Type type_c, Type type_d) { - const std::string op_a = VisitOperand(operation, 0).As(type_a); - const std::string op_b = VisitOperand(operation, 1).As(type_b); - const std::string op_c = VisitOperand(operation, 2).As(type_c); - const std::string op_d = VisitOperand(operation, 3).As(type_d); - std::string op_str = fmt::format("{}({}, {}, {}, {})", func, op_a, op_b, op_c, op_d); - - return ApplyPrecise(operation, std::move(op_str), result_type); - } - - std::string GenerateTexture(Operation operation, const std::string& function_suffix, - const std::vector& extras, bool separate_dc = false) { - constexpr std::array coord_constructors = {"float", "vec2", "vec3", "vec4"}; - - const auto meta = std::get_if(&operation.GetMeta()); - ASSERT(meta); - - const std::size_t count = operation.GetOperandsCount(); - const bool has_array = meta->sampler.is_array; - const bool has_shadow = meta->sampler.is_shadow; - const bool workaround_lod_array_shadow_as_grad = - !device.HasTextureShadowLod() && function_suffix == "Lod" && meta->sampler.is_shadow && - ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) || - meta->sampler.type == TextureType::TextureCube); - - std::string expr = "texture"; - - if (workaround_lod_array_shadow_as_grad) { - expr += "Grad"; - } else { - expr += function_suffix; - } - - if (!meta->aoffi.empty()) { - expr += "Offset"; - } else if (!meta->ptp.empty()) { - expr += "Offsets"; - } - if (!meta->sampler.is_indexed) { - expr += '(' + GetSampler(meta->sampler) + ", "; - } else { - expr += '(' + GetSampler(meta->sampler) + '[' + Visit(meta->index).AsUint() + "], "; - } - expr += coord_constructors.at(count + (has_array ? 1 : 0) + - (has_shadow && !separate_dc ? 1 : 0) - 1); - expr += '('; - for (std::size_t i = 0; i < count; ++i) { - expr += Visit(operation[i]).AsFloat(); - - const std::size_t next = i + 1; - if (next < count) - expr += ", "; - } - if (has_array) { - expr += ", float(" + Visit(meta->array).AsInt() + ')'; - } - if (has_shadow) { - if (separate_dc) { - expr += "), " + Visit(meta->depth_compare).AsFloat(); - } else { - expr += ", " + Visit(meta->depth_compare).AsFloat() + ')'; - } - } else { - expr += ')'; - } - - if (workaround_lod_array_shadow_as_grad) { - switch (meta->sampler.type) { - case TextureType::Texture2D: - return expr + ", vec2(0.0), vec2(0.0))"; - case TextureType::TextureCube: - return expr + ", vec3(0.0), vec3(0.0))"; - default: - UNREACHABLE(); - break; - } - } - - for (const auto& variant : extras) { - if (const auto argument = std::get_if(&variant)) { - expr += GenerateTextureArgument(*argument); - } else if (std::holds_alternative(variant)) { - if (!meta->aoffi.empty()) { - expr += GenerateTextureAoffi(meta->aoffi); - } else if (!meta->ptp.empty()) { - expr += GenerateTexturePtp(meta->ptp); - } - } else if (std::holds_alternative(variant)) { - expr += GenerateTextureDerivates(meta->derivates); - } else { - UNREACHABLE(); - } - } - - return expr + ')'; - } - - std::string GenerateTextureArgument(const TextureArgument& argument) { - const auto& [type, operand] = argument; - if (operand == nullptr) { - return {}; - } - - std::string expr = ", "; - switch (type) { - case Type::Int: - if (const auto immediate = std::get_if(&*operand)) { - // Inline the string as an immediate integer in GLSL (some extra arguments are - // required to be constant) - expr += std::to_string(static_cast(immediate->GetValue())); - } else { - expr += Visit(operand).AsInt(); - } - break; - case Type::Float: - expr += Visit(operand).AsFloat(); - break; - default: { - const auto type_int = static_cast(type); - UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int); - expr += '0'; - break; - } - } - return expr; - } - - std::string ReadTextureOffset(const Node& value) { - if (const auto immediate = std::get_if(&*value)) { - // Inline the string as an immediate integer in GLSL (AOFFI arguments are required - // to be constant by the standard). - return std::to_string(static_cast(immediate->GetValue())); - } else if (device.HasVariableAoffi()) { - // Avoid using variable AOFFI on unsupported devices. - return Visit(value).AsInt(); - } else { - // Insert 0 on devices not supporting variable AOFFI. - return "0"; - } - } - - std::string GenerateTextureAoffi(const std::vector& aoffi) { - if (aoffi.empty()) { - return {}; - } - constexpr std::array coord_constructors = {"int", "ivec2", "ivec3"}; - std::string expr = ", "; - expr += coord_constructors.at(aoffi.size() - 1); - expr += '('; - - for (std::size_t index = 0; index < aoffi.size(); ++index) { - expr += ReadTextureOffset(aoffi.at(index)); - if (index + 1 < aoffi.size()) { - expr += ", "; - } - } - expr += ')'; - - return expr; - } - - std::string GenerateTexturePtp(const std::vector& ptp) { - static constexpr std::size_t num_vectors = 4; - ASSERT(ptp.size() == num_vectors * 2); - - std::string expr = ", ivec2[]("; - for (std::size_t vector = 0; vector < num_vectors; ++vector) { - const bool has_next = vector + 1 < num_vectors; - expr += fmt::format("ivec2({}, {}){}", ReadTextureOffset(ptp.at(vector * 2)), - ReadTextureOffset(ptp.at(vector * 2 + 1)), has_next ? ", " : ""); - } - expr += ')'; - return expr; - } - - std::string GenerateTextureDerivates(const std::vector& derivates) { - if (derivates.empty()) { - return {}; - } - constexpr std::array coord_constructors = {"float", "vec2", "vec3"}; - std::string expr = ", "; - const std::size_t components = derivates.size() / 2; - std::string dx = coord_constructors.at(components - 1); - std::string dy = coord_constructors.at(components - 1); - dx += '('; - dy += '('; - - for (std::size_t index = 0; index < components; ++index) { - const auto& operand_x{derivates.at(index * 2)}; - const auto& operand_y{derivates.at(index * 2 + 1)}; - dx += Visit(operand_x).AsFloat(); - dy += Visit(operand_y).AsFloat(); - - if (index + 1 < components) { - dx += ", "; - dy += ", "; - } - } - dx += ')'; - dy += ')'; - expr += dx + ", " + dy; - - return expr; - } - - std::string BuildIntegerCoordinates(Operation operation) { - constexpr std::array constructors{"int(", "ivec2(", "ivec3(", "ivec4("}; - const std::size_t coords_count{operation.GetOperandsCount()}; - std::string expr = constructors.at(coords_count - 1); - for (std::size_t i = 0; i < coords_count; ++i) { - expr += VisitOperand(operation, i).AsInt(); - if (i + 1 < coords_count) { - expr += ", "; - } - } - expr += ')'; - return expr; - } - - std::string BuildImageValues(Operation operation) { - constexpr std::array constructors{"uint", "uvec2", "uvec3", "uvec4"}; - const auto& meta{std::get(operation.GetMeta())}; - - const std::size_t values_count{meta.values.size()}; - std::string expr = fmt::format("{}(", constructors.at(values_count - 1)); - for (std::size_t i = 0; i < values_count; ++i) { - expr += Visit(meta.values.at(i)).AsUint(); - if (i + 1 < values_count) { - expr += ", "; - } - } - expr += ')'; - return expr; - } - - Expression Assign(Operation operation) { - const Node& dest = operation[0]; - const Node& src = operation[1]; - - Expression target; - if (const auto gpr = std::get_if(&*dest)) { - if (gpr->GetIndex() == Register::ZeroIndex) { - // Writing to Register::ZeroIndex is a no op but we still have to visit the source - // as it might have side effects. - code.AddLine("{};", Visit(src).GetCode()); - return {}; - } - target = {GetRegister(gpr->GetIndex()), Type::Float}; - } else if (const auto abuf = std::get_if(&*dest)) { - UNIMPLEMENTED_IF(abuf->IsPhysicalBuffer()); - auto output = GetOutputAttribute(abuf); - if (!output) { - return {}; - } - target = std::move(*output); - } else if (const auto lmem = std::get_if(&*dest)) { - target = { - fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()), - Type::Uint}; - } else if (const auto smem = std::get_if(&*dest)) { - ASSERT(stage == ShaderType::Compute); - target = {fmt::format("smem[{} >> 2]", Visit(smem->GetAddress()).AsUint()), Type::Uint}; - } else if (const auto gmem = std::get_if(&*dest)) { - const std::string real = Visit(gmem->GetRealAddress()).AsUint(); - const std::string base = Visit(gmem->GetBaseAddress()).AsUint(); - const std::string final_offset = fmt::format("({} - {}) >> 2", real, base); - target = {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset), - Type::Uint}; - } else if (const auto cv = std::get_if(&*dest)) { - target = {GetCustomVariable(cv->GetIndex()), Type::Float}; - } else { - UNREACHABLE_MSG("Assign called without a proper target"); - } - - code.AddLine("{} = {};", target.GetCode(), Visit(src).As(target.GetType())); - return {}; - } - - template - Expression Add(Operation operation) { - return GenerateBinaryInfix(operation, "+", type, type, type); - } - - template - Expression Mul(Operation operation) { - return GenerateBinaryInfix(operation, "*", type, type, type); - } - - template - Expression Div(Operation operation) { - return GenerateBinaryInfix(operation, "/", type, type, type); - } - - template - Expression Fma(Operation operation) { - return GenerateTernary(operation, "fma", type, type, type, type); - } - - template - Expression Negate(Operation operation) { - return GenerateUnary(operation, "-", type, type); - } - - template - Expression Absolute(Operation operation) { - return GenerateUnary(operation, "abs", type, type); - } - - Expression FClamp(Operation operation) { - return GenerateTernary(operation, "clamp", Type::Float, Type::Float, Type::Float, - Type::Float); - } - - Expression FCastHalf0(Operation operation) { - return {fmt::format("({})[0]", VisitOperand(operation, 0).AsHalfFloat()), Type::Float}; - } - - Expression FCastHalf1(Operation operation) { - return {fmt::format("({})[1]", VisitOperand(operation, 0).AsHalfFloat()), Type::Float}; - } - - template - Expression Min(Operation operation) { - return GenerateBinaryCall(operation, "min", type, type, type); - } - - template - Expression Max(Operation operation) { - return GenerateBinaryCall(operation, "max", type, type, type); - } - - Expression Select(Operation operation) { - const std::string condition = Visit(operation[0]).AsBool(); - const std::string true_case = Visit(operation[1]).AsUint(); - const std::string false_case = Visit(operation[2]).AsUint(); - std::string op_str = fmt::format("({} ? {} : {})", condition, true_case, false_case); - - return ApplyPrecise(operation, std::move(op_str), Type::Uint); - } - - Expression FCos(Operation operation) { - return GenerateUnary(operation, "cos", Type::Float, Type::Float); - } - - Expression FSin(Operation operation) { - return GenerateUnary(operation, "sin", Type::Float, Type::Float); - } - - Expression FExp2(Operation operation) { - return GenerateUnary(operation, "exp2", Type::Float, Type::Float); - } - - Expression FLog2(Operation operation) { - return GenerateUnary(operation, "log2", Type::Float, Type::Float); - } - - Expression FInverseSqrt(Operation operation) { - return GenerateUnary(operation, "inversesqrt", Type::Float, Type::Float); - } - - Expression FSqrt(Operation operation) { - return GenerateUnary(operation, "sqrt", Type::Float, Type::Float); - } - - Expression FRoundEven(Operation operation) { - return GenerateUnary(operation, "roundEven", Type::Float, Type::Float); - } - - Expression FFloor(Operation operation) { - return GenerateUnary(operation, "floor", Type::Float, Type::Float); - } - - Expression FCeil(Operation operation) { - return GenerateUnary(operation, "ceil", Type::Float, Type::Float); - } - - Expression FTrunc(Operation operation) { - return GenerateUnary(operation, "trunc", Type::Float, Type::Float); - } - - template - Expression FCastInteger(Operation operation) { - return GenerateUnary(operation, "float", Type::Float, type); - } - - Expression FSwizzleAdd(Operation operation) { - const std::string op_a = VisitOperand(operation, 0).AsFloat(); - const std::string op_b = VisitOperand(operation, 1).AsFloat(); - - if (!device.HasShaderBallot()) { - LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader"); - return {fmt::format("{} + {}", op_a, op_b), Type::Float}; - } - - const std::string instr_mask = VisitOperand(operation, 2).AsUint(); - const std::string mask = code.GenerateTemporary(); - code.AddLine("uint {} = ({} >> ((gl_SubGroupInvocationARB & 3) << 1)) & 3;", mask, - instr_mask); - - const std::string modifier_a = fmt::format("fswzadd_modifiers_a[{}]", mask); - const std::string modifier_b = fmt::format("fswzadd_modifiers_b[{}]", mask); - return {fmt::format("(({} * {}) + ({} * {}))", op_a, modifier_a, op_b, modifier_b), - Type::Float}; - } - - Expression ICastFloat(Operation operation) { - return GenerateUnary(operation, "int", Type::Int, Type::Float); - } - - Expression ICastUnsigned(Operation operation) { - return GenerateUnary(operation, "int", Type::Int, Type::Uint); - } - - template - Expression LogicalShiftLeft(Operation operation) { - return GenerateBinaryInfix(operation, "<<", type, type, Type::Uint); - } - - Expression ILogicalShiftRight(Operation operation) { - const std::string op_a = VisitOperand(operation, 0).AsUint(); - const std::string op_b = VisitOperand(operation, 1).AsUint(); - std::string op_str = fmt::format("int({} >> {})", op_a, op_b); - - return ApplyPrecise(operation, std::move(op_str), Type::Int); - } - - Expression IArithmeticShiftRight(Operation operation) { - return GenerateBinaryInfix(operation, ">>", Type::Int, Type::Int, Type::Uint); - } - - template - Expression BitwiseAnd(Operation operation) { - return GenerateBinaryInfix(operation, "&", type, type, type); - } - - template - Expression BitwiseOr(Operation operation) { - return GenerateBinaryInfix(operation, "|", type, type, type); - } - - template - Expression BitwiseXor(Operation operation) { - return GenerateBinaryInfix(operation, "^", type, type, type); - } - - template - Expression BitwiseNot(Operation operation) { - return GenerateUnary(operation, "~", type, type); - } - - Expression UCastFloat(Operation operation) { - return GenerateUnary(operation, "uint", Type::Uint, Type::Float); - } - - Expression UCastSigned(Operation operation) { - return GenerateUnary(operation, "uint", Type::Uint, Type::Int); - } - - Expression UShiftRight(Operation operation) { - return GenerateBinaryInfix(operation, ">>", Type::Uint, Type::Uint, Type::Uint); - } - - template - Expression BitfieldInsert(Operation operation) { - return GenerateQuaternary(operation, "bitfieldInsert", type, type, type, Type::Int, - Type::Int); - } - - template - Expression BitfieldExtract(Operation operation) { - return GenerateTernary(operation, "bitfieldExtract", type, type, Type::Int, Type::Int); - } - - template - Expression BitCount(Operation operation) { - return GenerateUnary(operation, "bitCount", type, type); - } - - template - Expression BitMSB(Operation operation) { - return GenerateUnary(operation, "findMSB", type, type); - } - - Expression HNegate(Operation operation) { - const auto GetNegate = [&](std::size_t index) { - return VisitOperand(operation, index).AsBool() + " ? -1 : 1"; - }; - return {fmt::format("({} * vec2({}, {}))", VisitOperand(operation, 0).AsHalfFloat(), - GetNegate(1), GetNegate(2)), - Type::HalfFloat}; - } - - Expression HClamp(Operation operation) { - const std::string value = VisitOperand(operation, 0).AsHalfFloat(); - const std::string min = VisitOperand(operation, 1).AsFloat(); - const std::string max = VisitOperand(operation, 2).AsFloat(); - std::string clamped = fmt::format("clamp({}, vec2({}), vec2({}))", value, min, max); - - return ApplyPrecise(operation, std::move(clamped), Type::HalfFloat); - } - - Expression HCastFloat(Operation operation) { - return {fmt::format("vec2({}, 0.0f)", VisitOperand(operation, 0).AsFloat()), - Type::HalfFloat}; - } - - Expression HUnpack(Operation operation) { - Expression operand = VisitOperand(operation, 0); - switch (std::get(operation.GetMeta())) { - case Tegra::Shader::HalfType::H0_H1: - return operand; - case Tegra::Shader::HalfType::F32: - return {fmt::format("vec2({})", operand.AsFloat()), Type::HalfFloat}; - case Tegra::Shader::HalfType::H0_H0: - return {fmt::format("vec2({}[0])", operand.AsHalfFloat()), Type::HalfFloat}; - case Tegra::Shader::HalfType::H1_H1: - return {fmt::format("vec2({}[1])", operand.AsHalfFloat()), Type::HalfFloat}; - } - UNREACHABLE(); - return {"0", Type::Int}; - } - - Expression HMergeF32(Operation operation) { - return {fmt::format("float({}[0])", VisitOperand(operation, 0).AsHalfFloat()), Type::Float}; - } - - Expression HMergeH0(Operation operation) { - const std::string dest = VisitOperand(operation, 0).AsUint(); - const std::string src = VisitOperand(operation, 1).AsUint(); - return {fmt::format("vec2(unpackHalf2x16({}).x, unpackHalf2x16({}).y)", src, dest), - Type::HalfFloat}; - } - - Expression HMergeH1(Operation operation) { - const std::string dest = VisitOperand(operation, 0).AsUint(); - const std::string src = VisitOperand(operation, 1).AsUint(); - return {fmt::format("vec2(unpackHalf2x16({}).x, unpackHalf2x16({}).y)", dest, src), - Type::HalfFloat}; - } - - Expression HPack2(Operation operation) { - return {fmt::format("vec2({}, {})", VisitOperand(operation, 0).AsFloat(), - VisitOperand(operation, 1).AsFloat()), - Type::HalfFloat}; - } - - template - Expression Comparison(Operation operation) { - static_assert(!unordered || type == Type::Float); - - Expression expr = GenerateBinaryInfix(operation, op, Type::Bool, type, type); - - if constexpr (op.compare("!=") == 0 && type == Type::Float && !unordered) { - // GLSL's operator!=(float, float) doesn't seem be ordered. This happens on both AMD's - // and Nvidia's proprietary stacks. Manually force an ordered comparison. - return {fmt::format("({} && !isnan({}) && !isnan({}))", expr.AsBool(), - VisitOperand(operation, 0).AsFloat(), - VisitOperand(operation, 1).AsFloat()), - Type::Bool}; - } - if constexpr (!unordered) { - return expr; - } - // Unordered comparisons are always true for NaN operands. - return {fmt::format("({} || isnan({}) || isnan({}))", expr.AsBool(), - VisitOperand(operation, 0).AsFloat(), - VisitOperand(operation, 1).AsFloat()), - Type::Bool}; - } - - Expression FOrdered(Operation operation) { - return {fmt::format("(!isnan({}) && !isnan({}))", VisitOperand(operation, 0).AsFloat(), - VisitOperand(operation, 1).AsFloat()), - Type::Bool}; - } - - Expression FUnordered(Operation operation) { - return {fmt::format("(isnan({}) || isnan({}))", VisitOperand(operation, 0).AsFloat(), - VisitOperand(operation, 1).AsFloat()), - Type::Bool}; - } - - Expression LogicalAddCarry(Operation operation) { - const std::string carry = code.GenerateTemporary(); - code.AddLine("uint {};", carry); - code.AddLine("uaddCarry({}, {}, {});", VisitOperand(operation, 0).AsUint(), - VisitOperand(operation, 1).AsUint(), carry); - return {fmt::format("({} != 0)", carry), Type::Bool}; - } - - Expression LogicalAssign(Operation operation) { - const Node& dest = operation[0]; - const Node& src = operation[1]; - - std::string target; - - if (const auto pred = std::get_if(&*dest)) { - ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment"); - - const auto index = pred->GetIndex(); - switch (index) { - case Tegra::Shader::Pred::NeverExecute: - case Tegra::Shader::Pred::UnusedIndex: - // Writing to these predicates is a no-op - return {}; - } - target = GetPredicate(index); - } else if (const auto flag = std::get_if(&*dest)) { - target = GetInternalFlag(flag->GetFlag()); - } - - code.AddLine("{} = {};", target, Visit(src).AsBool()); - return {}; - } - - Expression LogicalAnd(Operation operation) { - return GenerateBinaryInfix(operation, "&&", Type::Bool, Type::Bool, Type::Bool); - } - - Expression LogicalOr(Operation operation) { - return GenerateBinaryInfix(operation, "||", Type::Bool, Type::Bool, Type::Bool); - } - - Expression LogicalXor(Operation operation) { - return GenerateBinaryInfix(operation, "^^", Type::Bool, Type::Bool, Type::Bool); - } - - Expression LogicalNegate(Operation operation) { - return GenerateUnary(operation, "!", Type::Bool, Type::Bool); - } - - Expression LogicalPick2(Operation operation) { - return {fmt::format("{}[{}]", VisitOperand(operation, 0).AsBool2(), - VisitOperand(operation, 1).AsUint()), - Type::Bool}; - } - - Expression LogicalAnd2(Operation operation) { - return GenerateUnary(operation, "all", Type::Bool, Type::Bool2); - } - - template - Expression GenerateHalfComparison(Operation operation, std::string_view compare_op) { - Expression comparison = GenerateBinaryCall(operation, compare_op, Type::Bool2, - Type::HalfFloat, Type::HalfFloat); - if constexpr (!with_nan) { - return comparison; - } - return {fmt::format("HalfFloatNanComparison({}, {}, {})", comparison.AsBool2(), - VisitOperand(operation, 0).AsHalfFloat(), - VisitOperand(operation, 1).AsHalfFloat()), - Type::Bool2}; - } - - template - Expression Logical2HLessThan(Operation operation) { - return GenerateHalfComparison(operation, "lessThan"); - } - - template - Expression Logical2HEqual(Operation operation) { - return GenerateHalfComparison(operation, "equal"); - } - - template - Expression Logical2HLessEqual(Operation operation) { - return GenerateHalfComparison(operation, "lessThanEqual"); - } - - template - Expression Logical2HGreaterThan(Operation operation) { - return GenerateHalfComparison(operation, "greaterThan"); - } - - template - Expression Logical2HNotEqual(Operation operation) { - return GenerateHalfComparison(operation, "notEqual"); - } - - template - Expression Logical2HGreaterEqual(Operation operation) { - return GenerateHalfComparison(operation, "greaterThanEqual"); - } - - Expression Texture(Operation operation) { - const auto meta = std::get(operation.GetMeta()); - const bool separate_dc = meta.sampler.type == TextureType::TextureCube && - meta.sampler.is_array && meta.sampler.is_shadow; - // TODO: Replace this with an array and make GenerateTexture use C++20 std::span - const std::vector extras{ - TextureOffset{}, - TextureArgument{Type::Float, meta.bias}, - }; - std::string expr = GenerateTexture(operation, "", extras, separate_dc); - if (meta.sampler.is_shadow) { - expr = fmt::format("vec4({})", expr); - } - return {expr + GetSwizzle(meta.element), Type::Float}; - } - - Expression TextureLod(Operation operation) { - const auto meta = std::get_if(&operation.GetMeta()); - ASSERT(meta); - - std::string expr{}; - - if (!device.HasTextureShadowLod() && meta->sampler.is_shadow && - ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) || - meta->sampler.type == TextureType::TextureCube)) { - LOG_ERROR(Render_OpenGL, - "Device lacks GL_EXT_texture_shadow_lod, using textureGrad as a workaround"); - expr = GenerateTexture(operation, "Lod", {}); - } else { - expr = GenerateTexture(operation, "Lod", - {TextureArgument{Type::Float, meta->lod}, TextureOffset{}}); - } - - if (meta->sampler.is_shadow) { - expr = "vec4(" + expr + ')'; - } - return {expr + GetSwizzle(meta->element), Type::Float}; - } - - Expression TextureGather(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - - const auto type = meta.sampler.is_shadow ? Type::Float : Type::Int; - const bool separate_dc = meta.sampler.is_shadow; - - std::vector ir_; - if (meta.sampler.is_shadow) { - ir_ = {TextureOffset{}}; - } else { - ir_ = {TextureOffset{}, TextureArgument{type, meta.component}}; - } - return {GenerateTexture(operation, "Gather", ir_, separate_dc) + GetSwizzle(meta.element), - Type::Float}; - } - - Expression TextureQueryDimensions(Operation operation) { - const auto meta = std::get_if(&operation.GetMeta()); - ASSERT(meta); - - const std::string sampler = GetSampler(meta->sampler); - const std::string lod = VisitOperand(operation, 0).AsInt(); - - switch (meta->element) { - case 0: - case 1: - return {fmt::format("textureSize({}, {}){}", sampler, lod, GetSwizzle(meta->element)), - Type::Int}; - case 3: - return {fmt::format("textureQueryLevels({})", sampler), Type::Int}; - } - UNREACHABLE(); - return {"0", Type::Int}; - } - - Expression TextureQueryLod(Operation operation) { - const auto meta = std::get_if(&operation.GetMeta()); - ASSERT(meta); - - if (meta->element < 2) { - return {fmt::format("int(({} * vec2(256)){})", - GenerateTexture(operation, "QueryLod", {}), - GetSwizzle(meta->element)), - Type::Int}; - } - return {"0", Type::Int}; - } - - Expression TexelFetch(Operation operation) { - constexpr std::array constructors = {"int", "ivec2", "ivec3", "ivec4"}; - const auto meta = std::get_if(&operation.GetMeta()); - ASSERT(meta); - UNIMPLEMENTED_IF(meta->sampler.is_array); - const std::size_t count = operation.GetOperandsCount(); - - std::string expr = "texelFetch("; - expr += GetSampler(meta->sampler); - expr += ", "; - - expr += constructors.at(operation.GetOperandsCount() + (meta->array ? 1 : 0) - 1); - expr += '('; - for (std::size_t i = 0; i < count; ++i) { - if (i > 0) { - expr += ", "; - } - expr += VisitOperand(operation, i).AsInt(); - } - if (meta->array) { - expr += ", "; - expr += Visit(meta->array).AsInt(); - } - expr += ')'; - - if (meta->lod && !meta->sampler.is_buffer) { - expr += ", "; - expr += Visit(meta->lod).AsInt(); - } - expr += ')'; - expr += GetSwizzle(meta->element); - - return {std::move(expr), Type::Float}; - } - - Expression TextureGradient(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - std::string expr = - GenerateTexture(operation, "Grad", {TextureDerivates{}, TextureOffset{}}); - return {std::move(expr) + GetSwizzle(meta.element), Type::Float}; - } - - Expression ImageLoad(Operation operation) { - if (!device.HasImageLoadFormatted()) { - LOG_ERROR(Render_OpenGL, - "Device lacks GL_EXT_shader_image_load_formatted, stubbing image load"); - return {"0", Type::Int}; - } - - const auto& meta{std::get(operation.GetMeta())}; - return {fmt::format("imageLoad({}, {}){}", GetImage(meta.image), - BuildIntegerCoordinates(operation), GetSwizzle(meta.element)), - Type::Uint}; - } - - Expression ImageStore(Operation operation) { - const auto& meta{std::get(operation.GetMeta())}; - code.AddLine("imageStore({}, {}, {});", GetImage(meta.image), - BuildIntegerCoordinates(operation), BuildImageValues(operation)); - return {}; - } - - template - Expression AtomicImage(Operation operation) { - const auto& meta{std::get(operation.GetMeta())}; - ASSERT(meta.values.size() == 1); - - return {fmt::format("imageAtomic{}({}, {}, {})", opname, GetImage(meta.image), - BuildIntegerCoordinates(operation), Visit(meta.values[0]).AsUint()), - Type::Uint}; - } - - template - Expression Atomic(Operation operation) { - if ((opname == Func::Min || opname == Func::Max) && type == Type::Int) { - UNIMPLEMENTED_MSG("Unimplemented Min & Max for atomic operations"); - return {}; - } - return {fmt::format("atomic{}({}, {})", opname, Visit(operation[0]).GetCode(), - Visit(operation[1]).AsUint()), - Type::Uint}; - } - - template - Expression Reduce(Operation operation) { - code.AddLine("{};", Atomic(operation).GetCode()); - return {}; - } - - Expression Branch(Operation operation) { - const auto target = std::get_if(&*operation[0]); - UNIMPLEMENTED_IF(!target); - - code.AddLine("jmp_to = 0x{:X}U;", target->GetValue()); - code.AddLine("break;"); - return {}; - } - - Expression BranchIndirect(Operation operation) { - const std::string op_a = VisitOperand(operation, 0).AsUint(); - - code.AddLine("jmp_to = {};", op_a); - code.AddLine("break;"); - return {}; - } - - Expression PushFlowStack(Operation operation) { - const auto stack = std::get(operation.GetMeta()); - const auto target = std::get_if(&*operation[0]); - UNIMPLEMENTED_IF(!target); - - code.AddLine("{}[{}++] = 0x{:X}U;", FlowStackName(stack), FlowStackTopName(stack), - target->GetValue()); - return {}; - } - - Expression PopFlowStack(Operation operation) { - const auto stack = std::get(operation.GetMeta()); - code.AddLine("jmp_to = {}[--{}];", FlowStackName(stack), FlowStackTopName(stack)); - code.AddLine("break;"); - return {}; - } - - void PreExit() { - if (stage != ShaderType::Fragment) { - return; - } - const auto& used_registers = ir.GetRegisters(); - const auto SafeGetRegister = [&](u32 reg) -> Expression { - // TODO(Rodrigo): Replace with contains once C++20 releases - if (used_registers.find(reg) != used_registers.end()) { - return {GetRegister(reg), Type::Float}; - } - return {"0.0f", Type::Float}; - }; - - UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0, "Sample mask write is unimplemented"); - - // Write the color outputs using the data in the shader registers, disabled - // rendertargets/components are skipped in the register assignment. - u32 current_reg = 0; - for (u32 render_target = 0; render_target < Maxwell::NumRenderTargets; ++render_target) { - // TODO(Subv): Figure out how dual-source blending is configured in the Switch. - for (u32 component = 0; component < 4; ++component) { - if (header.ps.IsColorComponentOutputEnabled(render_target, component)) { - code.AddLine("frag_color{}{} = {};", render_target, GetColorSwizzle(component), - SafeGetRegister(current_reg).AsFloat()); - ++current_reg; - } - } - } - if (header.ps.omap.depth) { - // The depth output is always 2 registers after the last color output, and current_reg - // already contains one past the last color register. - code.AddLine("gl_FragDepth = {};", SafeGetRegister(current_reg + 1).AsFloat()); - } - } - - Expression Exit(Operation operation) { - PreExit(); - code.AddLine("return;"); - return {}; - } - - Expression Discard(Operation operation) { - // Enclose "discard" in a conditional, so that GLSL compilation does not complain - // about unexecuted instructions that may follow this. - code.AddLine("if (true) {{"); - ++code.scope; - code.AddLine("discard;"); - --code.scope; - code.AddLine("}}"); - return {}; - } - - Expression EmitVertex(Operation operation) { - ASSERT_MSG(stage == ShaderType::Geometry, - "EmitVertex is expected to be used in a geometry shader."); - code.AddLine("EmitVertex();"); - return {}; - } - - Expression EndPrimitive(Operation operation) { - ASSERT_MSG(stage == ShaderType::Geometry, - "EndPrimitive is expected to be used in a geometry shader."); - code.AddLine("EndPrimitive();"); - return {}; - } - - Expression InvocationId(Operation operation) { - return {"gl_InvocationID", Type::Int}; - } - - Expression YNegate(Operation operation) { - // Y_NEGATE is mapped to this uniform value - return {"gl_FrontMaterial.ambient.a", Type::Float}; - } - - template - Expression LocalInvocationId(Operation) { - return {"gl_LocalInvocationID"s + GetSwizzle(element), Type::Uint}; - } - - template - Expression WorkGroupId(Operation) { - return {"gl_WorkGroupID"s + GetSwizzle(element), Type::Uint}; - } - - Expression BallotThread(Operation operation) { - const std::string value = VisitOperand(operation, 0).AsBool(); - if (!device.HasWarpIntrinsics()) { - LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader"); - // Stub on non-Nvidia devices by simulating all threads voting the same as the active - // one. - return {fmt::format("({} ? 0xFFFFFFFFU : 0U)", value), Type::Uint}; - } - return {fmt::format("ballotThreadNV({})", value), Type::Uint}; - } - - Expression Vote(Operation operation, const char* func) { - const std::string value = VisitOperand(operation, 0).AsBool(); - if (!device.HasWarpIntrinsics()) { - LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader"); - // Stub with a warp size of one. - return {value, Type::Bool}; - } - return {fmt::format("{}({})", func, value), Type::Bool}; - } - - Expression VoteAll(Operation operation) { - return Vote(operation, "allThreadsNV"); - } - - Expression VoteAny(Operation operation) { - return Vote(operation, "anyThreadNV"); - } - - Expression VoteEqual(Operation operation) { - if (!device.HasWarpIntrinsics()) { - LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader"); - // We must return true here since a stub for a theoretical warp size of 1. - // This will always return an equal result across all votes. - return {"true", Type::Bool}; - } - return Vote(operation, "allThreadsEqualNV"); - } - - Expression ThreadId(Operation operation) { - if (!device.HasShaderBallot()) { - LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader"); - return {"0U", Type::Uint}; - } - return {"gl_SubGroupInvocationARB", Type::Uint}; - } - - template - Expression ThreadMask(Operation) { - if (device.HasWarpIntrinsics()) { - return {fmt::format("gl_Thread{}MaskNV", comparison), Type::Uint}; - } - if (device.HasShaderBallot()) { - return {fmt::format("uint(gl_SubGroup{}MaskARB)", comparison), Type::Uint}; - } - LOG_ERROR(Render_OpenGL, "Thread mask intrinsics are required by the shader"); - return {"0U", Type::Uint}; - } - - Expression ShuffleIndexed(Operation operation) { - std::string value = VisitOperand(operation, 0).AsFloat(); - - if (!device.HasShaderBallot()) { - LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader"); - return {std::move(value), Type::Float}; - } - - const std::string index = VisitOperand(operation, 1).AsUint(); - return {fmt::format("readInvocationARB({}, {})", value, index), Type::Float}; - } - - Expression Barrier(Operation) { - if (!ir.IsDecompiled()) { - LOG_ERROR(Render_OpenGL, "barrier() used but shader is not decompiled"); - return {}; - } - code.AddLine("barrier();"); - return {}; - } - - Expression MemoryBarrierGroup(Operation) { - code.AddLine("groupMemoryBarrier();"); - return {}; - } - - Expression MemoryBarrierGlobal(Operation) { - code.AddLine("memoryBarrier();"); - return {}; - } - - struct Func final { - Func() = delete; - ~Func() = delete; - - static constexpr std::string_view LessThan = "<"; - static constexpr std::string_view Equal = "=="; - static constexpr std::string_view LessEqual = "<="; - static constexpr std::string_view GreaterThan = ">"; - static constexpr std::string_view NotEqual = "!="; - static constexpr std::string_view GreaterEqual = ">="; - - static constexpr std::string_view Eq = "Eq"; - static constexpr std::string_view Ge = "Ge"; - static constexpr std::string_view Gt = "Gt"; - static constexpr std::string_view Le = "Le"; - static constexpr std::string_view Lt = "Lt"; - - static constexpr std::string_view Add = "Add"; - static constexpr std::string_view Min = "Min"; - static constexpr std::string_view Max = "Max"; - static constexpr std::string_view And = "And"; - static constexpr std::string_view Or = "Or"; - static constexpr std::string_view Xor = "Xor"; - static constexpr std::string_view Exchange = "Exchange"; - }; - - static constexpr std::array operation_decompilers = { - &GLSLDecompiler::Assign, - - &GLSLDecompiler::Select, - - &GLSLDecompiler::Add, - &GLSLDecompiler::Mul, - &GLSLDecompiler::Div, - &GLSLDecompiler::Fma, - &GLSLDecompiler::Negate, - &GLSLDecompiler::Absolute, - &GLSLDecompiler::FClamp, - &GLSLDecompiler::FCastHalf0, - &GLSLDecompiler::FCastHalf1, - &GLSLDecompiler::Min, - &GLSLDecompiler::Max, - &GLSLDecompiler::FCos, - &GLSLDecompiler::FSin, - &GLSLDecompiler::FExp2, - &GLSLDecompiler::FLog2, - &GLSLDecompiler::FInverseSqrt, - &GLSLDecompiler::FSqrt, - &GLSLDecompiler::FRoundEven, - &GLSLDecompiler::FFloor, - &GLSLDecompiler::FCeil, - &GLSLDecompiler::FTrunc, - &GLSLDecompiler::FCastInteger, - &GLSLDecompiler::FCastInteger, - &GLSLDecompiler::FSwizzleAdd, - - &GLSLDecompiler::Add, - &GLSLDecompiler::Mul, - &GLSLDecompiler::Div, - &GLSLDecompiler::Negate, - &GLSLDecompiler::Absolute, - &GLSLDecompiler::Min, - &GLSLDecompiler::Max, - - &GLSLDecompiler::ICastFloat, - &GLSLDecompiler::ICastUnsigned, - &GLSLDecompiler::LogicalShiftLeft, - &GLSLDecompiler::ILogicalShiftRight, - &GLSLDecompiler::IArithmeticShiftRight, - &GLSLDecompiler::BitwiseAnd, - &GLSLDecompiler::BitwiseOr, - &GLSLDecompiler::BitwiseXor, - &GLSLDecompiler::BitwiseNot, - &GLSLDecompiler::BitfieldInsert, - &GLSLDecompiler::BitfieldExtract, - &GLSLDecompiler::BitCount, - &GLSLDecompiler::BitMSB, - - &GLSLDecompiler::Add, - &GLSLDecompiler::Mul, - &GLSLDecompiler::Div, - &GLSLDecompiler::Min, - &GLSLDecompiler::Max, - &GLSLDecompiler::UCastFloat, - &GLSLDecompiler::UCastSigned, - &GLSLDecompiler::LogicalShiftLeft, - &GLSLDecompiler::UShiftRight, - &GLSLDecompiler::UShiftRight, - &GLSLDecompiler::BitwiseAnd, - &GLSLDecompiler::BitwiseOr, - &GLSLDecompiler::BitwiseXor, - &GLSLDecompiler::BitwiseNot, - &GLSLDecompiler::BitfieldInsert, - &GLSLDecompiler::BitfieldExtract, - &GLSLDecompiler::BitCount, - &GLSLDecompiler::BitMSB, - - &GLSLDecompiler::Add, - &GLSLDecompiler::Mul, - &GLSLDecompiler::Fma, - &GLSLDecompiler::Absolute, - &GLSLDecompiler::HNegate, - &GLSLDecompiler::HClamp, - &GLSLDecompiler::HCastFloat, - &GLSLDecompiler::HUnpack, - &GLSLDecompiler::HMergeF32, - &GLSLDecompiler::HMergeH0, - &GLSLDecompiler::HMergeH1, - &GLSLDecompiler::HPack2, - - &GLSLDecompiler::LogicalAssign, - &GLSLDecompiler::LogicalAnd, - &GLSLDecompiler::LogicalOr, - &GLSLDecompiler::LogicalXor, - &GLSLDecompiler::LogicalNegate, - &GLSLDecompiler::LogicalPick2, - &GLSLDecompiler::LogicalAnd2, - - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::FOrdered, - &GLSLDecompiler::FUnordered, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - - &GLSLDecompiler::LogicalAddCarry, - - &GLSLDecompiler::Logical2HLessThan, - &GLSLDecompiler::Logical2HEqual, - &GLSLDecompiler::Logical2HLessEqual, - &GLSLDecompiler::Logical2HGreaterThan, - &GLSLDecompiler::Logical2HNotEqual, - &GLSLDecompiler::Logical2HGreaterEqual, - &GLSLDecompiler::Logical2HLessThan, - &GLSLDecompiler::Logical2HEqual, - &GLSLDecompiler::Logical2HLessEqual, - &GLSLDecompiler::Logical2HGreaterThan, - &GLSLDecompiler::Logical2HNotEqual, - &GLSLDecompiler::Logical2HGreaterEqual, - - &GLSLDecompiler::Texture, - &GLSLDecompiler::TextureLod, - &GLSLDecompiler::TextureGather, - &GLSLDecompiler::TextureQueryDimensions, - &GLSLDecompiler::TextureQueryLod, - &GLSLDecompiler::TexelFetch, - &GLSLDecompiler::TextureGradient, - - &GLSLDecompiler::ImageLoad, - &GLSLDecompiler::ImageStore, - - &GLSLDecompiler::AtomicImage, - &GLSLDecompiler::AtomicImage, - &GLSLDecompiler::AtomicImage, - &GLSLDecompiler::AtomicImage, - &GLSLDecompiler::AtomicImage, - - &GLSLDecompiler::Atomic, - &GLSLDecompiler::Atomic, - &GLSLDecompiler::Atomic, - &GLSLDecompiler::Atomic, - &GLSLDecompiler::Atomic, - &GLSLDecompiler::Atomic, - &GLSLDecompiler::Atomic, - - &GLSLDecompiler::Atomic, - &GLSLDecompiler::Atomic, - &GLSLDecompiler::Atomic, - &GLSLDecompiler::Atomic, - &GLSLDecompiler::Atomic, - &GLSLDecompiler::Atomic, - &GLSLDecompiler::Atomic, - - &GLSLDecompiler::Reduce, - &GLSLDecompiler::Reduce, - &GLSLDecompiler::Reduce, - &GLSLDecompiler::Reduce, - &GLSLDecompiler::Reduce, - &GLSLDecompiler::Reduce, - - &GLSLDecompiler::Reduce, - &GLSLDecompiler::Reduce, - &GLSLDecompiler::Reduce, - &GLSLDecompiler::Reduce, - &GLSLDecompiler::Reduce, - &GLSLDecompiler::Reduce, - - &GLSLDecompiler::Branch, - &GLSLDecompiler::BranchIndirect, - &GLSLDecompiler::PushFlowStack, - &GLSLDecompiler::PopFlowStack, - &GLSLDecompiler::Exit, - &GLSLDecompiler::Discard, - - &GLSLDecompiler::EmitVertex, - &GLSLDecompiler::EndPrimitive, - - &GLSLDecompiler::InvocationId, - &GLSLDecompiler::YNegate, - &GLSLDecompiler::LocalInvocationId<0>, - &GLSLDecompiler::LocalInvocationId<1>, - &GLSLDecompiler::LocalInvocationId<2>, - &GLSLDecompiler::WorkGroupId<0>, - &GLSLDecompiler::WorkGroupId<1>, - &GLSLDecompiler::WorkGroupId<2>, - - &GLSLDecompiler::BallotThread, - &GLSLDecompiler::VoteAll, - &GLSLDecompiler::VoteAny, - &GLSLDecompiler::VoteEqual, - - &GLSLDecompiler::ThreadId, - &GLSLDecompiler::ThreadMask, - &GLSLDecompiler::ThreadMask, - &GLSLDecompiler::ThreadMask, - &GLSLDecompiler::ThreadMask, - &GLSLDecompiler::ThreadMask, - &GLSLDecompiler::ShuffleIndexed, - - &GLSLDecompiler::Barrier, - &GLSLDecompiler::MemoryBarrierGroup, - &GLSLDecompiler::MemoryBarrierGlobal, - }; - static_assert(operation_decompilers.size() == static_cast(OperationCode::Amount)); - - std::string GetRegister(u32 index) const { - return AppendSuffix(index, "gpr"); - } - - std::string GetCustomVariable(u32 index) const { - return AppendSuffix(index, "custom_var"); - } - - std::string GetPredicate(Tegra::Shader::Pred pred) const { - return AppendSuffix(static_cast(pred), "pred"); - } - - std::string GetGenericInputAttribute(Attribute::Index attribute) const { - return AppendSuffix(GetGenericAttributeIndex(attribute), INPUT_ATTRIBUTE_NAME); - } - - std::unordered_map varying_description; - - std::string GetGenericOutputAttribute(Attribute::Index attribute, std::size_t element) const { - const u8 offset = static_cast(GetGenericAttributeIndex(attribute) * 4 + element); - const auto& description = varying_description.at(offset); - if (description.is_scalar) { - return description.name; - } - return fmt::format("{}[{}]", description.name, element - description.first_element); - } - - std::string GetConstBuffer(u32 index) const { - return AppendSuffix(index, "cbuf"); - } - - std::string GetGlobalMemory(const GlobalMemoryBase& descriptor) const { - return fmt::format("gmem_{}_{}_{}", descriptor.cbuf_index, descriptor.cbuf_offset, suffix); - } - - std::string GetGlobalMemoryBlock(const GlobalMemoryBase& descriptor) const { - return fmt::format("gmem_block_{}_{}_{}", descriptor.cbuf_index, descriptor.cbuf_offset, - suffix); - } - - std::string GetConstBufferBlock(u32 index) const { - return AppendSuffix(index, "cbuf_block"); - } - - std::string GetLocalMemory() const { - if (suffix.empty()) { - return "lmem"; - } else { - return "lmem_" + std::string{suffix}; - } - } - - std::string GetInternalFlag(InternalFlag flag) const { - constexpr std::array InternalFlagNames = {"zero_flag", "sign_flag", "carry_flag", - "overflow_flag"}; - const auto index = static_cast(flag); - ASSERT(index < static_cast(InternalFlag::Amount)); - - if (suffix.empty()) { - return InternalFlagNames[index]; - } else { - return fmt::format("{}_{}", InternalFlagNames[index], suffix); - } - } - - std::string GetSampler(const SamplerEntry& sampler) const { - return AppendSuffix(sampler.index, "sampler"); - } - - std::string GetImage(const ImageEntry& image) const { - return AppendSuffix(image.index, "image"); - } - - std::string AppendSuffix(u32 index, std::string_view name) const { - if (suffix.empty()) { - return fmt::format("{}{}", name, index); - } else { - return fmt::format("{}{}_{}", name, index, suffix); - } - } - - u32 GetNumPhysicalInputAttributes() const { - return stage == ShaderType::Vertex ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings(); - } - - u32 GetNumPhysicalAttributes() const { - return std::min(device.GetMaxVertexAttributes(), Maxwell::NumVertexAttributes); - } - - u32 GetNumPhysicalVaryings() const { - return std::min(device.GetMaxVaryings(), Maxwell::NumVaryings); - } - - const Device& device; - const ShaderIR& ir; - const Registry& registry; - const ShaderType stage; - const std::string_view identifier; - const std::string_view suffix; - const Header header; - std::unordered_map transform_feedback; - - ShaderWriter code; - - std::optional max_input_vertices; -}; - -std::string GetFlowVariable(u32 index) { - return fmt::format("flow_var{}", index); -} - -class ExprDecompiler { -public: - explicit ExprDecompiler(GLSLDecompiler& decomp_) : decomp{decomp_} {} - - void operator()(const ExprAnd& expr) { - inner += '('; - std::visit(*this, *expr.operand1); - inner += " && "; - std::visit(*this, *expr.operand2); - inner += ')'; - } - - void operator()(const ExprOr& expr) { - inner += '('; - std::visit(*this, *expr.operand1); - inner += " || "; - std::visit(*this, *expr.operand2); - inner += ')'; - } - - void operator()(const ExprNot& expr) { - inner += '!'; - std::visit(*this, *expr.operand1); - } - - void operator()(const ExprPredicate& expr) { - const auto pred = static_cast(expr.predicate); - inner += decomp.GetPredicate(pred); - } - - void operator()(const ExprCondCode& expr) { - inner += decomp.Visit(decomp.ir.GetConditionCode(expr.cc)).AsBool(); - } - - void operator()(const ExprVar& expr) { - inner += GetFlowVariable(expr.var_index); - } - - void operator()(const ExprBoolean& expr) { - inner += expr.value ? "true" : "false"; - } - - void operator()(VideoCommon::Shader::ExprGprEqual& expr) { - inner += fmt::format("(ftou({}) == {})", decomp.GetRegister(expr.gpr), expr.value); - } - - const std::string& GetResult() const { - return inner; - } - -private: - GLSLDecompiler& decomp; - std::string inner; -}; - -class ASTDecompiler { -public: - explicit ASTDecompiler(GLSLDecompiler& decomp_) : decomp{decomp_} {} - - void operator()(const ASTProgram& ast) { - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - } - - void operator()(const ASTIfThen& ast) { - ExprDecompiler expr_parser{decomp}; - std::visit(expr_parser, *ast.condition); - decomp.code.AddLine("if ({}) {{", expr_parser.GetResult()); - decomp.code.scope++; - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - decomp.code.scope--; - decomp.code.AddLine("}}"); - } - - void operator()(const ASTIfElse& ast) { - decomp.code.AddLine("else {{"); - decomp.code.scope++; - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - decomp.code.scope--; - decomp.code.AddLine("}}"); - } - - void operator()([[maybe_unused]] const ASTBlockEncoded& ast) { - UNREACHABLE(); - } - - void operator()(const ASTBlockDecoded& ast) { - decomp.VisitBlock(ast.nodes); - } - - void operator()(const ASTVarSet& ast) { - ExprDecompiler expr_parser{decomp}; - std::visit(expr_parser, *ast.condition); - decomp.code.AddLine("{} = {};", GetFlowVariable(ast.index), expr_parser.GetResult()); - } - - void operator()(const ASTLabel& ast) { - decomp.code.AddLine("// Label_{}:", ast.index); - } - - void operator()([[maybe_unused]] const ASTGoto& ast) { - UNREACHABLE(); - } - - void operator()(const ASTDoWhile& ast) { - ExprDecompiler expr_parser{decomp}; - std::visit(expr_parser, *ast.condition); - decomp.code.AddLine("do {{"); - decomp.code.scope++; - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - decomp.code.scope--; - decomp.code.AddLine("}} while({});", expr_parser.GetResult()); - } - - void operator()(const ASTReturn& ast) { - const bool is_true = VideoCommon::Shader::ExprIsTrue(ast.condition); - if (!is_true) { - ExprDecompiler expr_parser{decomp}; - std::visit(expr_parser, *ast.condition); - decomp.code.AddLine("if ({}) {{", expr_parser.GetResult()); - decomp.code.scope++; - } - if (ast.kills) { - decomp.code.AddLine("discard;"); - } else { - decomp.PreExit(); - decomp.code.AddLine("return;"); - } - if (!is_true) { - decomp.code.scope--; - decomp.code.AddLine("}}"); - } - } - - void operator()(const ASTBreak& ast) { - const bool is_true = VideoCommon::Shader::ExprIsTrue(ast.condition); - if (!is_true) { - ExprDecompiler expr_parser{decomp}; - std::visit(expr_parser, *ast.condition); - decomp.code.AddLine("if ({}) {{", expr_parser.GetResult()); - decomp.code.scope++; - } - decomp.code.AddLine("break;"); - if (!is_true) { - decomp.code.scope--; - decomp.code.AddLine("}}"); - } - } - - void Visit(const ASTNode& node) { - std::visit(*this, *node->GetInnerData()); - } - -private: - GLSLDecompiler& decomp; -}; - -void GLSLDecompiler::DecompileAST() { - const u32 num_flow_variables = ir.GetASTNumVariables(); - for (u32 i = 0; i < num_flow_variables; i++) { - code.AddLine("bool {} = false;", GetFlowVariable(i)); - } - - ASTDecompiler decompiler{*this}; - decompiler.Visit(ir.GetASTProgram()); -} - -} // Anonymous namespace - -ShaderEntries MakeEntries(const Device& device, const ShaderIR& ir, ShaderType stage) { - ShaderEntries entries; - for (const auto& cbuf : ir.GetConstantBuffers()) { - entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(), - cbuf.first); - } - for (const auto& [base, usage] : ir.GetGlobalMemory()) { - entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset, usage.is_read, - usage.is_written); - } - for (const auto& sampler : ir.GetSamplers()) { - entries.samplers.emplace_back(sampler); - } - for (const auto& image : ir.GetImages()) { - entries.images.emplace_back(image); - } - const auto clip_distances = ir.GetClipDistances(); - for (std::size_t i = 0; i < std::size(clip_distances); ++i) { - entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i; - } - for (const auto& buffer : entries.const_buffers) { - entries.enabled_uniform_buffers |= 1U << buffer.GetIndex(); - } - entries.shader_length = ir.GetLength(); - return entries; -} - -std::string DecompileShader(const Device& device, const ShaderIR& ir, const Registry& registry, - ShaderType stage, std::string_view identifier, - std::string_view suffix) { - GLSLDecompiler decompiler(device, ir, registry, stage, identifier, suffix); - decompiler.Decompile(); - return decompiler.GetResult(); -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h deleted file mode 100644 index 0397a000c..000000000 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ /dev/null @@ -1,69 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include -#include -#include "common/common_types.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" - -namespace OpenGL { - -class Device; - -using Maxwell = Tegra::Engines::Maxwell3D::Regs; -using SamplerEntry = VideoCommon::Shader::SamplerEntry; -using ImageEntry = VideoCommon::Shader::ImageEntry; - -class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer { -public: - explicit ConstBufferEntry(u32 max_offset_, bool is_indirect_, u32 index_) - : ConstBuffer{max_offset_, is_indirect_}, index{index_} {} - - u32 GetIndex() const { - return index; - } - -private: - u32 index = 0; -}; - -struct GlobalMemoryEntry { - constexpr explicit GlobalMemoryEntry(u32 cbuf_index_, u32 cbuf_offset_, bool is_read_, - bool is_written_) - : cbuf_index{cbuf_index_}, cbuf_offset{cbuf_offset_}, is_read{is_read_}, is_written{ - is_written_} {} - - u32 cbuf_index = 0; - u32 cbuf_offset = 0; - bool is_read = false; - bool is_written = false; -}; - -struct ShaderEntries { - std::vector const_buffers; - std::vector global_memory_entries; - std::vector samplers; - std::vector images; - std::size_t shader_length{}; - u32 clip_distances{}; - u32 enabled_uniform_buffers{}; -}; - -ShaderEntries MakeEntries(const Device& device, const VideoCommon::Shader::ShaderIR& ir, - Tegra::Engines::ShaderType stage); - -std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, - const VideoCommon::Shader::Registry& registry, - Tegra::Engines::ShaderType stage, std::string_view identifier, - std::string_view suffix = {}); - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp deleted file mode 100644 index 0deb86517..000000000 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ /dev/null @@ -1,482 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include - -#include - -#include "common/assert.h" -#include "common/common_types.h" -#include "common/fs/file.h" -#include "common/fs/fs.h" -#include "common/fs/path_util.h" -#include "common/logging/log.h" -#include "common/scm_rev.h" -#include "common/settings.h" -#include "common/zstd_compression.h" -#include "core/core.h" -#include "core/hle/kernel/k_process.h" -#include "video_core/engines/shader_type.h" -#include "video_core/renderer_opengl/gl_shader_cache.h" -#include "video_core/renderer_opengl/gl_shader_disk_cache.h" - -namespace OpenGL { - -using Tegra::Engines::ShaderType; -using VideoCommon::Shader::BindlessSamplerMap; -using VideoCommon::Shader::BoundSamplerMap; -using VideoCommon::Shader::KeyMap; -using VideoCommon::Shader::SeparateSamplerKey; -using ShaderCacheVersionHash = std::array; - -struct ConstBufferKey { - u32 cbuf = 0; - u32 offset = 0; - u32 value = 0; -}; - -struct BoundSamplerEntry { - u32 offset = 0; - Tegra::Engines::SamplerDescriptor sampler; -}; - -struct SeparateSamplerEntry { - u32 cbuf1 = 0; - u32 cbuf2 = 0; - u32 offset1 = 0; - u32 offset2 = 0; - Tegra::Engines::SamplerDescriptor sampler; -}; - -struct BindlessSamplerEntry { - u32 cbuf = 0; - u32 offset = 0; - Tegra::Engines::SamplerDescriptor sampler; -}; - -namespace { - -constexpr u32 NativeVersion = 21; - -ShaderCacheVersionHash GetShaderCacheVersionHash() { - ShaderCacheVersionHash hash{}; - const std::size_t length = std::min(std::strlen(Common::g_shader_cache_version), hash.size()); - std::memcpy(hash.data(), Common::g_shader_cache_version, length); - return hash; -} - -} // Anonymous namespace - -ShaderDiskCacheEntry::ShaderDiskCacheEntry() = default; - -ShaderDiskCacheEntry::~ShaderDiskCacheEntry() = default; - -bool ShaderDiskCacheEntry::Load(Common::FS::IOFile& file) { - if (!file.ReadObject(type)) { - return false; - } - u32 code_size; - u32 code_size_b; - if (!file.ReadObject(code_size) || !file.ReadObject(code_size_b)) { - return false; - } - code.resize(code_size); - code_b.resize(code_size_b); - if (file.Read(code) != code_size) { - return false; - } - if (HasProgramA() && file.Read(code_b) != code_size_b) { - return false; - } - - u8 is_texture_handler_size_known; - u32 texture_handler_size_value; - u32 num_keys; - u32 num_bound_samplers; - u32 num_separate_samplers; - u32 num_bindless_samplers; - if (!file.ReadObject(unique_identifier) || !file.ReadObject(bound_buffer) || - !file.ReadObject(is_texture_handler_size_known) || - !file.ReadObject(texture_handler_size_value) || !file.ReadObject(graphics_info) || - !file.ReadObject(compute_info) || !file.ReadObject(num_keys) || - !file.ReadObject(num_bound_samplers) || !file.ReadObject(num_separate_samplers) || - !file.ReadObject(num_bindless_samplers)) { - return false; - } - if (is_texture_handler_size_known) { - texture_handler_size = texture_handler_size_value; - } - - std::vector flat_keys(num_keys); - std::vector flat_bound_samplers(num_bound_samplers); - std::vector flat_separate_samplers(num_separate_samplers); - std::vector flat_bindless_samplers(num_bindless_samplers); - if (file.Read(flat_keys) != flat_keys.size() || - file.Read(flat_bound_samplers) != flat_bound_samplers.size() || - file.Read(flat_separate_samplers) != flat_separate_samplers.size() || - file.Read(flat_bindless_samplers) != flat_bindless_samplers.size()) { - return false; - } - for (const auto& entry : flat_keys) { - keys.insert({{entry.cbuf, entry.offset}, entry.value}); - } - for (const auto& entry : flat_bound_samplers) { - bound_samplers.emplace(entry.offset, entry.sampler); - } - for (const auto& entry : flat_separate_samplers) { - SeparateSamplerKey key; - key.buffers = {entry.cbuf1, entry.cbuf2}; - key.offsets = {entry.offset1, entry.offset2}; - separate_samplers.emplace(key, entry.sampler); - } - for (const auto& entry : flat_bindless_samplers) { - bindless_samplers.insert({{entry.cbuf, entry.offset}, entry.sampler}); - } - - return true; -} - -bool ShaderDiskCacheEntry::Save(Common::FS::IOFile& file) const { - if (!file.WriteObject(static_cast(type)) || - !file.WriteObject(static_cast(code.size())) || - !file.WriteObject(static_cast(code_b.size()))) { - return false; - } - if (file.Write(code) != code.size()) { - return false; - } - if (HasProgramA() && file.Write(code_b) != code_b.size()) { - return false; - } - - if (!file.WriteObject(unique_identifier) || !file.WriteObject(bound_buffer) || - !file.WriteObject(static_cast(texture_handler_size.has_value())) || - !file.WriteObject(texture_handler_size.value_or(0)) || !file.WriteObject(graphics_info) || - !file.WriteObject(compute_info) || !file.WriteObject(static_cast(keys.size())) || - !file.WriteObject(static_cast(bound_samplers.size())) || - !file.WriteObject(static_cast(separate_samplers.size())) || - !file.WriteObject(static_cast(bindless_samplers.size()))) { - return false; - } - - std::vector flat_keys; - flat_keys.reserve(keys.size()); - for (const auto& [address, value] : keys) { - flat_keys.push_back(ConstBufferKey{address.first, address.second, value}); - } - - std::vector flat_bound_samplers; - flat_bound_samplers.reserve(bound_samplers.size()); - for (const auto& [address, sampler] : bound_samplers) { - flat_bound_samplers.push_back(BoundSamplerEntry{address, sampler}); - } - - std::vector flat_separate_samplers; - flat_separate_samplers.reserve(separate_samplers.size()); - for (const auto& [key, sampler] : separate_samplers) { - SeparateSamplerEntry entry; - std::tie(entry.cbuf1, entry.cbuf2) = key.buffers; - std::tie(entry.offset1, entry.offset2) = key.offsets; - entry.sampler = sampler; - flat_separate_samplers.push_back(entry); - } - - std::vector flat_bindless_samplers; - flat_bindless_samplers.reserve(bindless_samplers.size()); - for (const auto& [address, sampler] : bindless_samplers) { - flat_bindless_samplers.push_back( - BindlessSamplerEntry{address.first, address.second, sampler}); - } - - return file.Write(flat_keys) == flat_keys.size() && - file.Write(flat_bound_samplers) == flat_bound_samplers.size() && - file.Write(flat_separate_samplers) == flat_separate_samplers.size() && - file.Write(flat_bindless_samplers) == flat_bindless_samplers.size(); -} - -ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL() = default; - -ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default; - -void ShaderDiskCacheOpenGL::BindTitleID(u64 title_id_) { - title_id = title_id_; -} - -std::optional> ShaderDiskCacheOpenGL::LoadTransferable() { - // Skip games without title id - const bool has_title_id = title_id != 0; - if (!Settings::values.use_disk_shader_cache.GetValue() || !has_title_id) { - return std::nullopt; - } - - Common::FS::IOFile file{GetTransferablePath(), Common::FS::FileAccessMode::Read, - Common::FS::FileType::BinaryFile}; - if (!file.IsOpen()) { - LOG_INFO(Render_OpenGL, "No transferable shader cache found"); - is_usable = true; - return std::nullopt; - } - - u32 version{}; - if (!file.ReadObject(version)) { - LOG_ERROR(Render_OpenGL, "Failed to get transferable cache version, skipping it"); - return std::nullopt; - } - - if (version < NativeVersion) { - LOG_INFO(Render_OpenGL, "Transferable shader cache is old, removing"); - file.Close(); - InvalidateTransferable(); - is_usable = true; - return std::nullopt; - } - if (version > NativeVersion) { - LOG_WARNING(Render_OpenGL, "Transferable shader cache was generated with a newer version " - "of the emulator, skipping"); - return std::nullopt; - } - - // Version is valid, load the shaders - std::vector entries; - while (static_cast(file.Tell()) < file.GetSize()) { - ShaderDiskCacheEntry& entry = entries.emplace_back(); - if (!entry.Load(file)) { - LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry, skipping"); - return std::nullopt; - } - } - - is_usable = true; - return {std::move(entries)}; -} - -std::vector ShaderDiskCacheOpenGL::LoadPrecompiled() { - if (!is_usable) { - return {}; - } - - Common::FS::IOFile file{GetPrecompiledPath(), Common::FS::FileAccessMode::Read, - Common::FS::FileType::BinaryFile}; - if (!file.IsOpen()) { - LOG_INFO(Render_OpenGL, "No precompiled shader cache found"); - return {}; - } - - if (const auto result = LoadPrecompiledFile(file)) { - return *result; - } - - LOG_INFO(Render_OpenGL, "Failed to load precompiled cache"); - file.Close(); - InvalidatePrecompiled(); - return {}; -} - -std::optional> ShaderDiskCacheOpenGL::LoadPrecompiledFile( - Common::FS::IOFile& file) { - // Read compressed file from disk and decompress to virtual precompiled cache file - std::vector compressed(file.GetSize()); - if (file.Read(compressed) != file.GetSize()) { - return std::nullopt; - } - const std::vector decompressed = Common::Compression::DecompressDataZSTD(compressed); - SaveArrayToPrecompiled(decompressed.data(), decompressed.size()); - precompiled_cache_virtual_file_offset = 0; - - ShaderCacheVersionHash file_hash{}; - if (!LoadArrayFromPrecompiled(file_hash.data(), file_hash.size())) { - precompiled_cache_virtual_file_offset = 0; - return std::nullopt; - } - if (GetShaderCacheVersionHash() != file_hash) { - LOG_INFO(Render_OpenGL, "Precompiled cache is from another version of the emulator"); - precompiled_cache_virtual_file_offset = 0; - return std::nullopt; - } - - std::vector entries; - while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) { - u32 binary_size; - auto& entry = entries.emplace_back(); - if (!LoadObjectFromPrecompiled(entry.unique_identifier) || - !LoadObjectFromPrecompiled(entry.binary_format) || - !LoadObjectFromPrecompiled(binary_size)) { - return std::nullopt; - } - - entry.binary.resize(binary_size); - if (!LoadArrayFromPrecompiled(entry.binary.data(), entry.binary.size())) { - return std::nullopt; - } - } - return entries; -} - -void ShaderDiskCacheOpenGL::InvalidateTransferable() { - if (!Common::FS::RemoveFile(GetTransferablePath())) { - LOG_ERROR(Render_OpenGL, "Failed to invalidate transferable file={}", - Common::FS::PathToUTF8String(GetTransferablePath())); - } - InvalidatePrecompiled(); -} - -void ShaderDiskCacheOpenGL::InvalidatePrecompiled() { - // Clear virtaul precompiled cache file - precompiled_cache_virtual_file.Resize(0); - - if (!Common::FS::RemoveFile(GetPrecompiledPath())) { - LOG_ERROR(Render_OpenGL, "Failed to invalidate precompiled file={}", - Common::FS::PathToUTF8String(GetPrecompiledPath())); - } -} - -void ShaderDiskCacheOpenGL::SaveEntry(const ShaderDiskCacheEntry& entry) { - if (!is_usable) { - return; - } - - const u64 id = entry.unique_identifier; - if (stored_transferable.contains(id)) { - // The shader already exists - return; - } - - Common::FS::IOFile file = AppendTransferableFile(); - if (!file.IsOpen()) { - return; - } - if (!entry.Save(file)) { - LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry, removing"); - file.Close(); - InvalidateTransferable(); - return; - } - - stored_transferable.insert(id); -} - -void ShaderDiskCacheOpenGL::SavePrecompiled(u64 unique_identifier, GLuint program) { - if (!is_usable) { - return; - } - - // TODO(Rodrigo): This is a design smell. I shouldn't be having to manually write the header - // when writing the dump. This should be done the moment I get access to write to the virtual - // file. - if (precompiled_cache_virtual_file.GetSize() == 0) { - SavePrecompiledHeaderToVirtualPrecompiledCache(); - } - - GLint binary_length; - glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length); - - GLenum binary_format; - std::vector binary(binary_length); - glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data()); - - if (!SaveObjectToPrecompiled(unique_identifier) || !SaveObjectToPrecompiled(binary_format) || - !SaveObjectToPrecompiled(static_cast(binary.size())) || - !SaveArrayToPrecompiled(binary.data(), binary.size())) { - LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016X}, removing", - unique_identifier); - InvalidatePrecompiled(); - } -} - -Common::FS::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const { - if (!EnsureDirectories()) { - return {}; - } - - const auto transferable_path{GetTransferablePath()}; - const bool existed = Common::FS::Exists(transferable_path); - - Common::FS::IOFile file{transferable_path, Common::FS::FileAccessMode::Append, - Common::FS::FileType::BinaryFile}; - if (!file.IsOpen()) { - LOG_ERROR(Render_OpenGL, "Failed to open transferable cache in path={}", - Common::FS::PathToUTF8String(transferable_path)); - return {}; - } - if (!existed || file.GetSize() == 0) { - // If the file didn't exist, write its version - if (!file.WriteObject(NativeVersion)) { - LOG_ERROR(Render_OpenGL, "Failed to write transferable cache version in path={}", - Common::FS::PathToUTF8String(transferable_path)); - return {}; - } - } - return file; -} - -void ShaderDiskCacheOpenGL::SavePrecompiledHeaderToVirtualPrecompiledCache() { - const auto hash{GetShaderCacheVersionHash()}; - if (!SaveArrayToPrecompiled(hash.data(), hash.size())) { - LOG_ERROR( - Render_OpenGL, - "Failed to write precompiled cache version hash to virtual precompiled cache file"); - } -} - -void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() { - precompiled_cache_virtual_file_offset = 0; - const std::vector uncompressed = precompiled_cache_virtual_file.ReadAllBytes(); - const std::vector compressed = - Common::Compression::CompressDataZSTDDefault(uncompressed.data(), uncompressed.size()); - - const auto precompiled_path = GetPrecompiledPath(); - Common::FS::IOFile file{precompiled_path, Common::FS::FileAccessMode::Write, - Common::FS::FileType::BinaryFile}; - - if (!file.IsOpen()) { - LOG_ERROR(Render_OpenGL, "Failed to open precompiled cache in path={}", - Common::FS::PathToUTF8String(precompiled_path)); - return; - } - if (file.Write(compressed) != compressed.size()) { - LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version in path={}", - Common::FS::PathToUTF8String(precompiled_path)); - } -} - -bool ShaderDiskCacheOpenGL::EnsureDirectories() const { - const auto CreateDir = [](const std::filesystem::path& dir) { - if (!Common::FS::CreateDir(dir)) { - LOG_ERROR(Render_OpenGL, "Failed to create directory={}", - Common::FS::PathToUTF8String(dir)); - return false; - } - return true; - }; - - return CreateDir(Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)) && - CreateDir(GetBaseDir()) && CreateDir(GetTransferableDir()) && - CreateDir(GetPrecompiledDir()); -} - -std::filesystem::path ShaderDiskCacheOpenGL::GetTransferablePath() const { - return GetTransferableDir() / fmt::format("{}.bin", GetTitleID()); -} - -std::filesystem::path ShaderDiskCacheOpenGL::GetPrecompiledPath() const { - return GetPrecompiledDir() / fmt::format("{}.bin", GetTitleID()); -} - -std::filesystem::path ShaderDiskCacheOpenGL::GetTransferableDir() const { - return GetBaseDir() / "transferable"; -} - -std::filesystem::path ShaderDiskCacheOpenGL::GetPrecompiledDir() const { - return GetBaseDir() / "precompiled"; -} - -std::filesystem::path ShaderDiskCacheOpenGL::GetBaseDir() const { - return Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir) / "opengl"; -} - -std::string ShaderDiskCacheOpenGL::GetTitleID() const { - return fmt::format("{:016X}", title_id); -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h deleted file mode 100644 index f8bc23868..000000000 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ /dev/null @@ -1,176 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "common/assert.h" -#include "common/common_types.h" -#include "core/file_sys/vfs_vector.h" -#include "video_core/engines/shader_type.h" -#include "video_core/shader/registry.h" - -namespace Common::FS { -class IOFile; -} - -namespace OpenGL { - -using ProgramCode = std::vector; - -/// Describes a shader and how it's used by the guest GPU -struct ShaderDiskCacheEntry { - ShaderDiskCacheEntry(); - ~ShaderDiskCacheEntry(); - - bool Load(Common::FS::IOFile& file); - - bool Save(Common::FS::IOFile& file) const; - - bool HasProgramA() const { - return !code.empty() && !code_b.empty(); - } - - Tegra::Engines::ShaderType type{}; - ProgramCode code; - ProgramCode code_b; - - u64 unique_identifier = 0; - std::optional texture_handler_size; - u32 bound_buffer = 0; - VideoCommon::Shader::GraphicsInfo graphics_info; - VideoCommon::Shader::ComputeInfo compute_info; - VideoCommon::Shader::KeyMap keys; - VideoCommon::Shader::BoundSamplerMap bound_samplers; - VideoCommon::Shader::SeparateSamplerMap separate_samplers; - VideoCommon::Shader::BindlessSamplerMap bindless_samplers; -}; - -/// Contains an OpenGL dumped binary program -struct ShaderDiskCachePrecompiled { - u64 unique_identifier = 0; - GLenum binary_format = 0; - std::vector binary; -}; - -class ShaderDiskCacheOpenGL { -public: - explicit ShaderDiskCacheOpenGL(); - ~ShaderDiskCacheOpenGL(); - - /// Binds a title ID for all future operations. - void BindTitleID(u64 title_id); - - /// Loads transferable cache. If file has a old version or on failure, it deletes the file. - std::optional> LoadTransferable(); - - /// Loads current game's precompiled cache. Invalidates on failure. - std::vector LoadPrecompiled(); - - /// Removes the transferable (and precompiled) cache file. - void InvalidateTransferable(); - - /// Removes the precompiled cache file and clears virtual precompiled cache file. - void InvalidatePrecompiled(); - - /// Saves a raw dump to the transferable file. Checks for collisions. - void SaveEntry(const ShaderDiskCacheEntry& entry); - - /// Saves a dump entry to the precompiled file. Does not check for collisions. - void SavePrecompiled(u64 unique_identifier, GLuint program); - - /// Serializes virtual precompiled shader cache file to real file - void SaveVirtualPrecompiledFile(); - -private: - /// Loads the transferable cache. Returns empty on failure. - std::optional> LoadPrecompiledFile( - Common::FS::IOFile& file); - - /// Opens current game's transferable file and write it's header if it doesn't exist - Common::FS::IOFile AppendTransferableFile() const; - - /// Save precompiled header to precompiled_cache_in_memory - void SavePrecompiledHeaderToVirtualPrecompiledCache(); - - /// Create shader disk cache directories. Returns true on success. - bool EnsureDirectories() const; - - /// Gets current game's transferable file path - std::filesystem::path GetTransferablePath() const; - - /// Gets current game's precompiled file path - std::filesystem::path GetPrecompiledPath() const; - - /// Get user's transferable directory path - std::filesystem::path GetTransferableDir() const; - - /// Get user's precompiled directory path - std::filesystem::path GetPrecompiledDir() const; - - /// Get user's shader directory path - std::filesystem::path GetBaseDir() const; - - /// Get current game's title id - std::string GetTitleID() const; - - template - bool SaveArrayToPrecompiled(const T* data, std::size_t length) { - const std::size_t write_length = precompiled_cache_virtual_file.WriteArray( - data, length, precompiled_cache_virtual_file_offset); - precompiled_cache_virtual_file_offset += write_length; - return write_length == sizeof(T) * length; - } - - template - bool LoadArrayFromPrecompiled(T* data, std::size_t length) { - const std::size_t read_length = precompiled_cache_virtual_file.ReadArray( - data, length, precompiled_cache_virtual_file_offset); - precompiled_cache_virtual_file_offset += read_length; - return read_length == sizeof(T) * length; - } - - template - bool SaveObjectToPrecompiled(const T& object) { - return SaveArrayToPrecompiled(&object, 1); - } - - bool SaveObjectToPrecompiled(bool object) { - const auto value = static_cast(object); - return SaveArrayToPrecompiled(&value, 1); - } - - template - bool LoadObjectFromPrecompiled(T& object) { - return LoadArrayFromPrecompiled(&object, 1); - } - - // Stores whole precompiled cache which will be read from or saved to the precompiled chache - // file - FileSys::VectorVfsFile precompiled_cache_virtual_file; - // Stores the current offset of the precompiled cache file for IO purposes - std::size_t precompiled_cache_virtual_file_offset = 0; - - // Stored transferable shaders - std::unordered_set stored_transferable; - - /// Title ID to operate on - u64 title_id = 0; - - // The cache has been loaded at boot - bool is_usable = false; -}; - -} // namespace OpenGL diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index b7f5b8bc2..6c0d5c7f4 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -323,7 +323,6 @@ void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Regi cmdbuf.SetScissor(0, scissor); cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants); } - } // Anonymous namespace BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_, diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 3a48219b7..7a3660496 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -8,146 +8,14 @@ #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" -#include "video_core/renderer_vulkan/vk_shader_decompiler.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { -VKComputePipeline::VKComputePipeline(const Device& device_, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool_, - VKUpdateDescriptorQueue& update_descriptor_queue_, - const SPIRVShader& shader_) - : device{device_}, scheduler{scheduler_}, entries{shader_.entries}, - descriptor_set_layout{CreateDescriptorSetLayout()}, - descriptor_allocator{descriptor_pool_, *descriptor_set_layout}, - update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()}, - descriptor_template{CreateDescriptorUpdateTemplate()}, - shader_module{CreateShaderModule(shader_.code)}, pipeline{CreatePipeline()} {} +ComputePipeline::ComputePipeline() = default; -VKComputePipeline::~VKComputePipeline() = default; - -VkDescriptorSet VKComputePipeline::CommitDescriptorSet() { - if (!descriptor_template) { - return {}; - } - const VkDescriptorSet set = descriptor_allocator.Commit(); - update_descriptor_queue.Send(*descriptor_template, set); - return set; -} - -vk::DescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const { - std::vector bindings; - u32 binding = 0; - const auto add_bindings = [&](VkDescriptorType descriptor_type, std::size_t num_entries) { - // TODO(Rodrigo): Maybe make individual bindings here? - for (u32 bindpoint = 0; bindpoint < static_cast(num_entries); ++bindpoint) { - bindings.push_back({ - .binding = binding++, - .descriptorType = descriptor_type, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }); - } - }; - add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, entries.const_buffers.size()); - add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, entries.global_buffers.size()); - add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, entries.uniform_texels.size()); - add_bindings(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, entries.samplers.size()); - add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, entries.storage_texels.size()); - add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, entries.images.size()); - - return device.GetLogical().CreateDescriptorSetLayout({ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .bindingCount = static_cast(bindings.size()), - .pBindings = bindings.data(), - }); -} - -vk::PipelineLayout VKComputePipeline::CreatePipelineLayout() const { - return device.GetLogical().CreatePipelineLayout({ - .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .setLayoutCount = 1, - .pSetLayouts = descriptor_set_layout.address(), - .pushConstantRangeCount = 0, - .pPushConstantRanges = nullptr, - }); -} - -vk::DescriptorUpdateTemplateKHR VKComputePipeline::CreateDescriptorUpdateTemplate() const { - std::vector template_entries; - u32 binding = 0; - u32 offset = 0; - FillDescriptorUpdateTemplateEntries(entries, binding, offset, template_entries); - if (template_entries.empty()) { - // If the shader doesn't use descriptor sets, skip template creation. - return {}; - } - - return device.GetLogical().CreateDescriptorUpdateTemplateKHR({ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, - .pNext = nullptr, - .flags = 0, - .descriptorUpdateEntryCount = static_cast(template_entries.size()), - .pDescriptorUpdateEntries = template_entries.data(), - .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR, - .descriptorSetLayout = *descriptor_set_layout, - .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, - .pipelineLayout = *layout, - .set = DESCRIPTOR_SET, - }); -} - -vk::ShaderModule VKComputePipeline::CreateShaderModule(const std::vector& code) const { - device.SaveShader(code); - - return device.GetLogical().CreateShaderModule({ - .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .codeSize = code.size() * sizeof(u32), - .pCode = code.data(), - }); -} - -vk::Pipeline VKComputePipeline::CreatePipeline() const { - - VkComputePipelineCreateInfo ci{ - .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = - { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = VK_SHADER_STAGE_COMPUTE_BIT, - .module = *shader_module, - .pName = "main", - .pSpecializationInfo = nullptr, - }, - .layout = *layout, - .basePipelineHandle = nullptr, - .basePipelineIndex = 0, - }; - - const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, - .pNext = nullptr, - .requiredSubgroupSize = GuestWarpSize, - }; - - if (entries.uses_warps && device.IsGuestWarpSizeSupported(VK_SHADER_STAGE_COMPUTE_BIT)) { - ci.stage.pNext = &subgroup_size_ci; - } - - return device.GetLogical().CreateComputePipeline(ci); -} +ComputePipeline::~ComputePipeline() = default; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 7e16575ac..433d8bb3d 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -6,7 +6,6 @@ #include "common/common_types.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" -#include "video_core/renderer_vulkan/vk_shader_decompiler.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { @@ -15,50 +14,10 @@ class Device; class VKScheduler; class VKUpdateDescriptorQueue; -class VKComputePipeline final { +class ComputePipeline { public: - explicit VKComputePipeline(const Device& device_, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool_, - VKUpdateDescriptorQueue& update_descriptor_queue_, - const SPIRVShader& shader_); - ~VKComputePipeline(); - - VkDescriptorSet CommitDescriptorSet(); - - VkPipeline GetHandle() const { - return *pipeline; - } - - VkPipelineLayout GetLayout() const { - return *layout; - } - - const ShaderEntries& GetEntries() const { - return entries; - } - -private: - vk::DescriptorSetLayout CreateDescriptorSetLayout() const; - - vk::PipelineLayout CreatePipelineLayout() const; - - vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate() const; - - vk::ShaderModule CreateShaderModule(const std::vector& code) const; - - vk::Pipeline CreatePipeline() const; - - const Device& device; - VKScheduler& scheduler; - ShaderEntries entries; - - vk::DescriptorSetLayout descriptor_set_layout; - DescriptorAllocator descriptor_allocator; - VKUpdateDescriptorQueue& update_descriptor_queue; - vk::PipelineLayout layout; - vk::DescriptorUpdateTemplateKHR descriptor_template; - vk::ShaderModule shader_module; - vk::Pipeline pipeline; + explicit ComputePipeline(); + ~ComputePipeline(); }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp deleted file mode 100644 index fc6dd83eb..000000000 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ /dev/null @@ -1,484 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include -#include - -#include "common/common_types.h" -#include "common/microprofile.h" -#include "video_core/renderer_vulkan/fixed_pipeline_state.h" -#include "video_core/renderer_vulkan/maxwell_to_vk.h" -#include "video_core/renderer_vulkan/vk_descriptor_pool.h" -#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" -#include "video_core/renderer_vulkan/vk_pipeline_cache.h" -#include "video_core/renderer_vulkan/vk_scheduler.h" -#include "video_core/renderer_vulkan/vk_update_descriptor.h" -#include "video_core/vulkan_common/vulkan_device.h" -#include "video_core/vulkan_common/vulkan_wrapper.h" - -namespace Vulkan { - -MICROPROFILE_DECLARE(Vulkan_PipelineCache); - -namespace { - -template -VkStencilOpState GetStencilFaceState(const StencilFace& face) { - return { - .failOp = MaxwellToVK::StencilOp(face.ActionStencilFail()), - .passOp = MaxwellToVK::StencilOp(face.ActionDepthPass()), - .depthFailOp = MaxwellToVK::StencilOp(face.ActionDepthFail()), - .compareOp = MaxwellToVK::ComparisonOp(face.TestFunc()), - .compareMask = 0, - .writeMask = 0, - .reference = 0, - }; -} - -bool SupportsPrimitiveRestart(VkPrimitiveTopology topology) { - static constexpr std::array unsupported_topologies = { - VK_PRIMITIVE_TOPOLOGY_POINT_LIST, - VK_PRIMITIVE_TOPOLOGY_LINE_LIST, - VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, - VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY, - VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY, - VK_PRIMITIVE_TOPOLOGY_PATCH_LIST}; - return std::find(std::begin(unsupported_topologies), std::end(unsupported_topologies), - topology) == std::end(unsupported_topologies); -} - -VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) { - union Swizzle { - u32 raw; - BitField<0, 3, Maxwell::ViewportSwizzle> x; - BitField<4, 3, Maxwell::ViewportSwizzle> y; - BitField<8, 3, Maxwell::ViewportSwizzle> z; - BitField<12, 3, Maxwell::ViewportSwizzle> w; - }; - const Swizzle unpacked{swizzle}; - - return { - .x = MaxwellToVK::ViewportSwizzle(unpacked.x), - .y = MaxwellToVK::ViewportSwizzle(unpacked.y), - .z = MaxwellToVK::ViewportSwizzle(unpacked.z), - .w = MaxwellToVK::ViewportSwizzle(unpacked.w), - }; -} - -VkSampleCountFlagBits ConvertMsaaMode(Tegra::Texture::MsaaMode msaa_mode) { - switch (msaa_mode) { - case Tegra::Texture::MsaaMode::Msaa1x1: - return VK_SAMPLE_COUNT_1_BIT; - case Tegra::Texture::MsaaMode::Msaa2x1: - case Tegra::Texture::MsaaMode::Msaa2x1_D3D: - return VK_SAMPLE_COUNT_2_BIT; - case Tegra::Texture::MsaaMode::Msaa2x2: - case Tegra::Texture::MsaaMode::Msaa2x2_VC4: - case Tegra::Texture::MsaaMode::Msaa2x2_VC12: - return VK_SAMPLE_COUNT_4_BIT; - case Tegra::Texture::MsaaMode::Msaa4x2: - case Tegra::Texture::MsaaMode::Msaa4x2_D3D: - case Tegra::Texture::MsaaMode::Msaa4x2_VC8: - case Tegra::Texture::MsaaMode::Msaa4x2_VC24: - return VK_SAMPLE_COUNT_8_BIT; - case Tegra::Texture::MsaaMode::Msaa4x4: - return VK_SAMPLE_COUNT_16_BIT; - default: - UNREACHABLE_MSG("Invalid msaa_mode={}", static_cast(msaa_mode)); - return VK_SAMPLE_COUNT_1_BIT; - } -} - -} // Anonymous namespace - -VKGraphicsPipeline::VKGraphicsPipeline(const Device& device_, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool_, - VKUpdateDescriptorQueue& update_descriptor_queue_, - const GraphicsPipelineCacheKey& key, - vk::Span bindings, - const SPIRVProgram& program, u32 num_color_buffers) - : device{device_}, scheduler{scheduler_}, cache_key{key}, hash{cache_key.Hash()}, - descriptor_set_layout{CreateDescriptorSetLayout(bindings)}, - descriptor_allocator{descriptor_pool_, *descriptor_set_layout}, - update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()}, - descriptor_template{CreateDescriptorUpdateTemplate(program)}, - modules(CreateShaderModules(program)), - pipeline(CreatePipeline(program, cache_key.renderpass, num_color_buffers)) {} - -VKGraphicsPipeline::~VKGraphicsPipeline() = default; - -VkDescriptorSet VKGraphicsPipeline::CommitDescriptorSet() { - if (!descriptor_template) { - return {}; - } - const VkDescriptorSet set = descriptor_allocator.Commit(); - update_descriptor_queue.Send(*descriptor_template, set); - return set; -} - -vk::DescriptorSetLayout VKGraphicsPipeline::CreateDescriptorSetLayout( - vk::Span bindings) const { - const VkDescriptorSetLayoutCreateInfo ci{ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .bindingCount = bindings.size(), - .pBindings = bindings.data(), - }; - return device.GetLogical().CreateDescriptorSetLayout(ci); -} - -vk::PipelineLayout VKGraphicsPipeline::CreatePipelineLayout() const { - const VkPipelineLayoutCreateInfo ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .setLayoutCount = 1, - .pSetLayouts = descriptor_set_layout.address(), - .pushConstantRangeCount = 0, - .pPushConstantRanges = nullptr, - }; - return device.GetLogical().CreatePipelineLayout(ci); -} - -vk::DescriptorUpdateTemplateKHR VKGraphicsPipeline::CreateDescriptorUpdateTemplate( - const SPIRVProgram& program) const { - std::vector template_entries; - u32 binding = 0; - u32 offset = 0; - for (const auto& stage : program) { - if (stage) { - FillDescriptorUpdateTemplateEntries(stage->entries, binding, offset, template_entries); - } - } - if (template_entries.empty()) { - // If the shader doesn't use descriptor sets, skip template creation. - return {}; - } - - const VkDescriptorUpdateTemplateCreateInfoKHR ci{ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, - .pNext = nullptr, - .flags = 0, - .descriptorUpdateEntryCount = static_cast(template_entries.size()), - .pDescriptorUpdateEntries = template_entries.data(), - .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR, - .descriptorSetLayout = *descriptor_set_layout, - .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, - .pipelineLayout = *layout, - .set = DESCRIPTOR_SET, - }; - return device.GetLogical().CreateDescriptorUpdateTemplateKHR(ci); -} - -std::vector VKGraphicsPipeline::CreateShaderModules( - const SPIRVProgram& program) const { - VkShaderModuleCreateInfo ci{ - .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .codeSize = 0, - .pCode = nullptr, - }; - - std::vector shader_modules; - shader_modules.reserve(Maxwell::MaxShaderStage); - for (std::size_t i = 0; i < Maxwell::MaxShaderStage; ++i) { - const auto& stage = program[i]; - if (!stage) { - continue; - } - - device.SaveShader(stage->code); - - ci.codeSize = stage->code.size() * sizeof(u32); - ci.pCode = stage->code.data(); - shader_modules.push_back(device.GetLogical().CreateShaderModule(ci)); - } - return shader_modules; -} - -vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program, - VkRenderPass renderpass, - u32 num_color_buffers) const { - const auto& state = cache_key.fixed_state; - const auto& viewport_swizzles = state.viewport_swizzles; - - FixedPipelineState::DynamicState dynamic; - if (device.IsExtExtendedDynamicStateSupported()) { - // Insert dummy values, as long as they are valid they don't matter as extended dynamic - // state is ignored - dynamic.raw1 = 0; - dynamic.raw2 = 0; - dynamic.vertex_strides.fill(0); - } else { - dynamic = state.dynamic_state; - } - - std::vector vertex_bindings; - std::vector vertex_binding_divisors; - for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { - const bool instanced = state.binding_divisors[index] != 0; - const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; - vertex_bindings.push_back({ - .binding = static_cast(index), - .stride = dynamic.vertex_strides[index], - .inputRate = rate, - }); - if (instanced) { - vertex_binding_divisors.push_back({ - .binding = static_cast(index), - .divisor = state.binding_divisors[index], - }); - } - } - - std::vector vertex_attributes; - const auto& input_attributes = program[0]->entries.attributes; - for (std::size_t index = 0; index < state.attributes.size(); ++index) { - const auto& attribute = state.attributes[index]; - if (!attribute.enabled) { - continue; - } - if (!input_attributes.contains(static_cast(index))) { - // Skip attributes not used by the vertex shaders. - continue; - } - vertex_attributes.push_back({ - .location = static_cast(index), - .binding = attribute.buffer, - .format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size()), - .offset = attribute.offset, - }); - } - - VkPipelineVertexInputStateCreateInfo vertex_input_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .vertexBindingDescriptionCount = static_cast(vertex_bindings.size()), - .pVertexBindingDescriptions = vertex_bindings.data(), - .vertexAttributeDescriptionCount = static_cast(vertex_attributes.size()), - .pVertexAttributeDescriptions = vertex_attributes.data(), - }; - - const VkPipelineVertexInputDivisorStateCreateInfoEXT input_divisor_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT, - .pNext = nullptr, - .vertexBindingDivisorCount = static_cast(vertex_binding_divisors.size()), - .pVertexBindingDivisors = vertex_binding_divisors.data(), - }; - if (!vertex_binding_divisors.empty()) { - vertex_input_ci.pNext = &input_divisor_ci; - } - - const auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, state.topology); - const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .topology = MaxwellToVK::PrimitiveTopology(device, state.topology), - .primitiveRestartEnable = state.primitive_restart_enable != 0 && - SupportsPrimitiveRestart(input_assembly_topology), - }; - - const VkPipelineTessellationStateCreateInfo tessellation_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .patchControlPoints = state.patch_control_points_minus_one.Value() + 1, - }; - - VkPipelineViewportStateCreateInfo viewport_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .viewportCount = Maxwell::NumViewports, - .pViewports = nullptr, - .scissorCount = Maxwell::NumViewports, - .pScissors = nullptr, - }; - - std::array swizzles; - std::ranges::transform(viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle); - VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV, - .pNext = nullptr, - .flags = 0, - .viewportCount = Maxwell::NumViewports, - .pViewportSwizzles = swizzles.data(), - }; - if (device.IsNvViewportSwizzleSupported()) { - viewport_ci.pNext = &swizzle_ci; - } - - const VkPipelineRasterizationStateCreateInfo rasterization_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .depthClampEnable = - static_cast(state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE), - .rasterizerDiscardEnable = - static_cast(state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE), - .polygonMode = VK_POLYGON_MODE_FILL, - .cullMode = static_cast( - dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE), - .frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()), - .depthBiasEnable = state.depth_bias_enable, - .depthBiasConstantFactor = 0.0f, - .depthBiasClamp = 0.0f, - .depthBiasSlopeFactor = 0.0f, - .lineWidth = 1.0f, - }; - - const VkPipelineMultisampleStateCreateInfo multisample_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .rasterizationSamples = ConvertMsaaMode(state.msaa_mode), - .sampleShadingEnable = VK_FALSE, - .minSampleShading = 0.0f, - .pSampleMask = nullptr, - .alphaToCoverageEnable = VK_FALSE, - .alphaToOneEnable = VK_FALSE, - }; - - const VkPipelineDepthStencilStateCreateInfo depth_stencil_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .depthTestEnable = dynamic.depth_test_enable, - .depthWriteEnable = dynamic.depth_write_enable, - .depthCompareOp = dynamic.depth_test_enable - ? MaxwellToVK::ComparisonOp(dynamic.DepthTestFunc()) - : VK_COMPARE_OP_ALWAYS, - .depthBoundsTestEnable = dynamic.depth_bounds_enable, - .stencilTestEnable = dynamic.stencil_enable, - .front = GetStencilFaceState(dynamic.front), - .back = GetStencilFaceState(dynamic.back), - .minDepthBounds = 0.0f, - .maxDepthBounds = 0.0f, - }; - - std::array cb_attachments; - for (std::size_t index = 0; index < num_color_buffers; ++index) { - static constexpr std::array COMPONENT_TABLE{ - VK_COLOR_COMPONENT_R_BIT, - VK_COLOR_COMPONENT_G_BIT, - VK_COLOR_COMPONENT_B_BIT, - VK_COLOR_COMPONENT_A_BIT, - }; - const auto& blend = state.attachments[index]; - - VkColorComponentFlags color_components = 0; - for (std::size_t i = 0; i < COMPONENT_TABLE.size(); ++i) { - if (blend.Mask()[i]) { - color_components |= COMPONENT_TABLE[i]; - } - } - - cb_attachments[index] = { - .blendEnable = blend.enable != 0, - .srcColorBlendFactor = MaxwellToVK::BlendFactor(blend.SourceRGBFactor()), - .dstColorBlendFactor = MaxwellToVK::BlendFactor(blend.DestRGBFactor()), - .colorBlendOp = MaxwellToVK::BlendEquation(blend.EquationRGB()), - .srcAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.SourceAlphaFactor()), - .dstAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.DestAlphaFactor()), - .alphaBlendOp = MaxwellToVK::BlendEquation(blend.EquationAlpha()), - .colorWriteMask = color_components, - }; - } - - const VkPipelineColorBlendStateCreateInfo color_blend_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .logicOpEnable = VK_FALSE, - .logicOp = VK_LOGIC_OP_COPY, - .attachmentCount = num_color_buffers, - .pAttachments = cb_attachments.data(), - .blendConstants = {}, - }; - - std::vector dynamic_states{ - VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, - VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS, - VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, - VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE, - }; - if (device.IsExtExtendedDynamicStateSupported()) { - static constexpr std::array extended{ - VK_DYNAMIC_STATE_CULL_MODE_EXT, - VK_DYNAMIC_STATE_FRONT_FACE_EXT, - VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT, - VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT, - VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT, - VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT, - VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE_EXT, - VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT, - VK_DYNAMIC_STATE_STENCIL_OP_EXT, - }; - dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end()); - } - - const VkPipelineDynamicStateCreateInfo dynamic_state_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .dynamicStateCount = static_cast(dynamic_states.size()), - .pDynamicStates = dynamic_states.data(), - }; - - const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, - .pNext = nullptr, - .requiredSubgroupSize = GuestWarpSize, - }; - - std::vector shader_stages; - std::size_t module_index = 0; - for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { - if (!program[stage]) { - continue; - } - - VkPipelineShaderStageCreateInfo& stage_ci = shader_stages.emplace_back(); - stage_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; - stage_ci.pNext = nullptr; - stage_ci.flags = 0; - stage_ci.stage = MaxwellToVK::ShaderStage(static_cast(stage)); - stage_ci.module = *modules[module_index++]; - stage_ci.pName = "main"; - stage_ci.pSpecializationInfo = nullptr; - - if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(stage_ci.stage)) { - stage_ci.pNext = &subgroup_size_ci; - } - } - return device.GetLogical().CreateGraphicsPipeline(VkGraphicsPipelineCreateInfo{ - .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stageCount = static_cast(shader_stages.size()), - .pStages = shader_stages.data(), - .pVertexInputState = &vertex_input_ci, - .pInputAssemblyState = &input_assembly_ci, - .pTessellationState = &tessellation_ci, - .pViewportState = &viewport_ci, - .pRasterizationState = &rasterization_ci, - .pMultisampleState = &multisample_ci, - .pDepthStencilState = &depth_stencil_ci, - .pColorBlendState = &color_blend_ci, - .pDynamicState = &dynamic_state_ci, - .layout = *layout, - .renderPass = renderpass, - .subpass = 0, - .basePipelineHandle = nullptr, - .basePipelineIndex = 0, - }); -} - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h deleted file mode 100644 index 8b6a98fe0..000000000 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ /dev/null @@ -1,103 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include - -#include "common/common_types.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/renderer_vulkan/fixed_pipeline_state.h" -#include "video_core/renderer_vulkan/vk_descriptor_pool.h" -#include "video_core/renderer_vulkan/vk_shader_decompiler.h" -#include "video_core/vulkan_common/vulkan_wrapper.h" - -namespace Vulkan { - -using Maxwell = Tegra::Engines::Maxwell3D::Regs; - -struct GraphicsPipelineCacheKey { - VkRenderPass renderpass; - std::array shaders; - FixedPipelineState fixed_state; - - std::size_t Hash() const noexcept; - - bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept; - - bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept { - return !operator==(rhs); - } - - std::size_t Size() const noexcept { - return sizeof(renderpass) + sizeof(shaders) + fixed_state.Size(); - } -}; -static_assert(std::has_unique_object_representations_v); -static_assert(std::is_trivially_copyable_v); -static_assert(std::is_trivially_constructible_v); - -class Device; -class VKDescriptorPool; -class VKScheduler; -class VKUpdateDescriptorQueue; - -using SPIRVProgram = std::array, Maxwell::MaxShaderStage>; - -class VKGraphicsPipeline final { -public: - explicit VKGraphicsPipeline(const Device& device_, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool, - VKUpdateDescriptorQueue& update_descriptor_queue_, - const GraphicsPipelineCacheKey& key, - vk::Span bindings, - const SPIRVProgram& program, u32 num_color_buffers); - ~VKGraphicsPipeline(); - - VkDescriptorSet CommitDescriptorSet(); - - VkPipeline GetHandle() const { - return *pipeline; - } - - VkPipelineLayout GetLayout() const { - return *layout; - } - - GraphicsPipelineCacheKey GetCacheKey() const { - return cache_key; - } - -private: - vk::DescriptorSetLayout CreateDescriptorSetLayout( - vk::Span bindings) const; - - vk::PipelineLayout CreatePipelineLayout() const; - - vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate( - const SPIRVProgram& program) const; - - std::vector CreateShaderModules(const SPIRVProgram& program) const; - - vk::Pipeline CreatePipeline(const SPIRVProgram& program, VkRenderPass renderpass, - u32 num_color_buffers) const; - - const Device& device; - VKScheduler& scheduler; - const GraphicsPipelineCacheKey cache_key; - const u64 hash; - - vk::DescriptorSetLayout descriptor_set_layout; - DescriptorAllocator descriptor_allocator; - VKUpdateDescriptorQueue& update_descriptor_queue; - vk::PipelineLayout layout; - vk::DescriptorUpdateTemplateKHR descriptor_template; - std::vector modules; - - vk::Pipeline pipeline; -}; - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 8991505ca..7d0ba1180 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -19,49 +19,27 @@ #include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" -#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" -#include "video_core/shader/compiler_settings.h" -#include "video_core/shader/memory_util.h" #include "video_core/shader_cache.h" #include "video_core/shader_notify.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { - MICROPROFILE_DECLARE(Vulkan_PipelineCache); using Tegra::Engines::ShaderType; -using VideoCommon::Shader::GetShaderAddress; -using VideoCommon::Shader::GetShaderCode; -using VideoCommon::Shader::KERNEL_MAIN_OFFSET; -using VideoCommon::Shader::ProgramCode; -using VideoCommon::Shader::STAGE_MAIN_OFFSET; namespace { - -constexpr VkDescriptorType UNIFORM_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; -constexpr VkDescriptorType STORAGE_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; -constexpr VkDescriptorType UNIFORM_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; -constexpr VkDescriptorType COMBINED_IMAGE_SAMPLER = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; -constexpr VkDescriptorType STORAGE_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; -constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - -constexpr VideoCommon::Shader::CompilerSettings compiler_settings{ - .depth = VideoCommon::Shader::CompileDepth::FullDecompile, - .disable_else_derivation = true, -}; - -constexpr std::size_t GetStageFromProgram(std::size_t program) { +size_t StageFromProgram(size_t program) { return program == 0 ? 0 : program - 1; } -constexpr ShaderType GetStageFromProgram(Maxwell::ShaderProgram program) { - return static_cast(GetStageFromProgram(static_cast(program))); +ShaderType StageFromProgram(Maxwell::ShaderProgram program) { + return static_cast(StageFromProgram(static_cast(program))); } ShaderType GetShaderType(Maxwell::ShaderProgram program) { @@ -81,165 +59,35 @@ ShaderType GetShaderType(Maxwell::ShaderProgram program) { return ShaderType::Vertex; } } - -template -void AddBindings(std::vector& bindings, u32& binding, - VkShaderStageFlags stage_flags, const Container& container) { - const u32 num_entries = static_cast(std::size(container)); - for (std::size_t i = 0; i < num_entries; ++i) { - u32 count = 1; - if constexpr (descriptor_type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) { - // Combined image samplers can be arrayed. - count = container[i].size; - } - bindings.push_back({ - .binding = binding++, - .descriptorType = descriptor_type, - .descriptorCount = count, - .stageFlags = stage_flags, - .pImmutableSamplers = nullptr, - }); - } -} - -u32 FillDescriptorLayout(const ShaderEntries& entries, - std::vector& bindings, - Maxwell::ShaderProgram program_type, u32 base_binding) { - const ShaderType stage = GetStageFromProgram(program_type); - const VkShaderStageFlags flags = MaxwellToVK::ShaderStage(stage); - - u32 binding = base_binding; - AddBindings(bindings, binding, flags, entries.const_buffers); - AddBindings(bindings, binding, flags, entries.global_buffers); - AddBindings(bindings, binding, flags, entries.uniform_texels); - AddBindings(bindings, binding, flags, entries.samplers); - AddBindings(bindings, binding, flags, entries.storage_texels); - AddBindings(bindings, binding, flags, entries.images); - return binding; -} - } // Anonymous namespace -std::size_t GraphicsPipelineCacheKey::Hash() const noexcept { - const u64 hash = Common::CityHash64(reinterpret_cast(this), Size()); - return static_cast(hash); -} - -bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) const noexcept { - return std::memcmp(&rhs, this, Size()) == 0; -} - -std::size_t ComputePipelineCacheKey::Hash() const noexcept { +size_t ComputePipelineCacheKey::Hash() const noexcept { const u64 hash = Common::CityHash64(reinterpret_cast(this), sizeof *this); - return static_cast(hash); + return static_cast(hash); } bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) const noexcept { return std::memcmp(&rhs, this, sizeof *this) == 0; } -Shader::Shader(Tegra::Engines::ConstBufferEngineInterface& engine_, ShaderType stage_, - GPUVAddr gpu_addr_, VAddr cpu_addr_, ProgramCode program_code_, u32 main_offset_) - : gpu_addr(gpu_addr_), program_code(std::move(program_code_)), registry(stage_, engine_), - shader_ir(program_code, main_offset_, compiler_settings, registry), - entries(GenerateShaderEntries(shader_ir)) {} +Shader::Shader() = default; Shader::~Shader() = default; -VKPipelineCache::VKPipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, - Tegra::Engines::Maxwell3D& maxwell3d_, - Tegra::Engines::KeplerCompute& kepler_compute_, - Tegra::MemoryManager& gpu_memory_, const Device& device_, - VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, - VKUpdateDescriptorQueue& update_descriptor_queue_) +PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, + Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::Engines::KeplerCompute& kepler_compute_, + Tegra::MemoryManager& gpu_memory_, const Device& device_, + VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, + VKUpdateDescriptorQueue& update_descriptor_queue_) : VideoCommon::ShaderCache{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{ update_descriptor_queue_} {} -VKPipelineCache::~VKPipelineCache() = default; - -std::array VKPipelineCache::GetShaders() { - std::array shaders{}; - - for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - const auto program{static_cast(index)}; - - // Skip stages that are not enabled - if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { - continue; - } - - const GPUVAddr gpu_addr{GetShaderAddress(maxwell3d, program)}; - const std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); - ASSERT(cpu_addr); - - Shader* result = cpu_addr ? TryGet(*cpu_addr) : null_shader.get(); - if (!result) { - const u8* const host_ptr{gpu_memory.GetPointer(gpu_addr)}; - - // No shader found - create a new one - static constexpr u32 stage_offset = STAGE_MAIN_OFFSET; - const auto stage = static_cast(index == 0 ? 0 : index - 1); - ProgramCode code = GetShaderCode(gpu_memory, gpu_addr, host_ptr, false); - const std::size_t size_in_bytes = code.size() * sizeof(u64); - - auto shader = std::make_unique(maxwell3d, stage, gpu_addr, *cpu_addr, - std::move(code), stage_offset); - result = shader.get(); - - if (cpu_addr) { - Register(std::move(shader), *cpu_addr, size_in_bytes); - } else { - null_shader = std::move(shader); - } - } - shaders[index] = result; - } - return last_shaders = shaders; -} - -VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline( - const GraphicsPipelineCacheKey& key, u32 num_color_buffers, - VideoCommon::Shader::AsyncShaders& async_shaders) { - MICROPROFILE_SCOPE(Vulkan_PipelineCache); - - if (last_graphics_pipeline && last_graphics_key == key) { - return last_graphics_pipeline; - } - last_graphics_key = key; - - if (device.UseAsynchronousShaders() && async_shaders.IsShaderAsync(gpu)) { - std::unique_lock lock{pipeline_cache}; - const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key); - if (is_cache_miss) { - gpu.ShaderNotify().MarkSharderBuilding(); - LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); - const auto [program, bindings] = DecompileShaders(key.fixed_state); - async_shaders.QueueVulkanShader(this, device, scheduler, descriptor_pool, - update_descriptor_queue, bindings, program, key, - num_color_buffers); - } - last_graphics_pipeline = pair->second.get(); - return last_graphics_pipeline; - } - - const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key); - auto& entry = pair->second; - if (is_cache_miss) { - gpu.ShaderNotify().MarkSharderBuilding(); - LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); - const auto [program, bindings] = DecompileShaders(key.fixed_state); - entry = std::make_unique(device, scheduler, descriptor_pool, - update_descriptor_queue, key, bindings, - program, num_color_buffers); - gpu.ShaderNotify().MarkShaderComplete(); - } - last_graphics_pipeline = entry.get(); - return last_graphics_pipeline; -} +PipelineCache::~PipelineCache() = default; -VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) { +ComputePipeline& PipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) { MICROPROFILE_SCOPE(Vulkan_PipelineCache); const auto [pair, is_cache_miss] = compute_cache.try_emplace(key); @@ -248,200 +96,9 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach return *entry; } LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); - - const GPUVAddr gpu_addr = key.shader; - - const std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); - ASSERT(cpu_addr); - - Shader* shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get(); - if (!shader) { - // No shader found - create a new one - const auto host_ptr = gpu_memory.GetPointer(gpu_addr); - - ProgramCode code = GetShaderCode(gpu_memory, gpu_addr, host_ptr, true); - const std::size_t size_in_bytes = code.size() * sizeof(u64); - - auto shader_info = std::make_unique(kepler_compute, ShaderType::Compute, gpu_addr, - *cpu_addr, std::move(code), KERNEL_MAIN_OFFSET); - shader = shader_info.get(); - - if (cpu_addr) { - Register(std::move(shader_info), *cpu_addr, size_in_bytes); - } else { - null_kernel = std::move(shader_info); - } - } - - const Specialization specialization{ - .base_binding = 0, - .workgroup_size = key.workgroup_size, - .shared_memory_size = key.shared_memory_size, - .point_size = std::nullopt, - .enabled_attributes = {}, - .attribute_types = {}, - .ndc_minus_one_to_one = false, - }; - const SPIRVShader spirv_shader{Decompile(device, shader->GetIR(), ShaderType::Compute, - shader->GetRegistry(), specialization), - shader->GetEntries()}; - entry = std::make_unique(device, scheduler, descriptor_pool, - update_descriptor_queue, spirv_shader); - return *entry; -} - -void VKPipelineCache::EmplacePipeline(std::unique_ptr pipeline) { - gpu.ShaderNotify().MarkShaderComplete(); - std::unique_lock lock{pipeline_cache}; - graphics_cache.at(pipeline->GetCacheKey()) = std::move(pipeline); -} - -void VKPipelineCache::OnShaderRemoval(Shader* shader) { - bool finished = false; - const auto Finish = [&] { - // TODO(Rodrigo): Instead of finishing here, wait for the fences that use this pipeline and - // flush. - if (finished) { - return; - } - finished = true; - scheduler.Finish(); - }; - - const GPUVAddr invalidated_addr = shader->GetGpuAddr(); - for (auto it = graphics_cache.begin(); it != graphics_cache.end();) { - auto& entry = it->first; - if (std::find(entry.shaders.begin(), entry.shaders.end(), invalidated_addr) == - entry.shaders.end()) { - ++it; - continue; - } - Finish(); - it = graphics_cache.erase(it); - } - for (auto it = compute_cache.begin(); it != compute_cache.end();) { - auto& entry = it->first; - if (entry.shader != invalidated_addr) { - ++it; - continue; - } - Finish(); - it = compute_cache.erase(it); - } -} - -std::pair> -VKPipelineCache::DecompileShaders(const FixedPipelineState& fixed_state) { - Specialization specialization; - if (fixed_state.topology == Maxwell::PrimitiveTopology::Points) { - float point_size; - std::memcpy(&point_size, &fixed_state.point_size, sizeof(float)); - specialization.point_size = point_size; - ASSERT(point_size != 0.0f); - } - for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) { - const auto& attribute = fixed_state.attributes[i]; - specialization.enabled_attributes[i] = attribute.enabled.Value() != 0; - specialization.attribute_types[i] = attribute.Type(); - } - specialization.ndc_minus_one_to_one = fixed_state.ndc_minus_one_to_one; - specialization.early_fragment_tests = fixed_state.early_z; - - // Alpha test - specialization.alpha_test_func = - FixedPipelineState::UnpackComparisonOp(fixed_state.alpha_test_func.Value()); - specialization.alpha_test_ref = Common::BitCast(fixed_state.alpha_test_ref); - - SPIRVProgram program; - std::vector bindings; - - for (std::size_t index = 1; index < Maxwell::MaxShaderProgram; ++index) { - const auto program_enum = static_cast(index); - // Skip stages that are not enabled - if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { - continue; - } - const GPUVAddr gpu_addr = GetShaderAddress(maxwell3d, program_enum); - const std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); - Shader* const shader = cpu_addr ? TryGet(*cpu_addr) : null_shader.get(); - - const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5 - const ShaderType program_type = GetShaderType(program_enum); - const auto& entries = shader->GetEntries(); - program[stage] = { - Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization), - entries, - }; - - const u32 old_binding = specialization.base_binding; - specialization.base_binding = - FillDescriptorLayout(entries, bindings, program_enum, specialization.base_binding); - ASSERT(old_binding + entries.NumBindings() == specialization.base_binding); - } - return {std::move(program), std::move(bindings)}; + throw "Bad"; } -template -void AddEntry(std::vector& template_entries, u32& binding, - u32& offset, const Container& container) { - static constexpr u32 entry_size = static_cast(sizeof(DescriptorUpdateEntry)); - const u32 count = static_cast(std::size(container)); - - if constexpr (descriptor_type == COMBINED_IMAGE_SAMPLER) { - for (u32 i = 0; i < count; ++i) { - const u32 num_samplers = container[i].size; - template_entries.push_back({ - .dstBinding = binding, - .dstArrayElement = 0, - .descriptorCount = num_samplers, - .descriptorType = descriptor_type, - .offset = offset, - .stride = entry_size, - }); - - ++binding; - offset += num_samplers * entry_size; - } - return; - } - - if constexpr (descriptor_type == UNIFORM_TEXEL_BUFFER || - descriptor_type == STORAGE_TEXEL_BUFFER) { - // Nvidia has a bug where updating multiple texels at once causes the driver to crash. - // Note: Fixed in driver Windows 443.24, Linux 440.66.15 - for (u32 i = 0; i < count; ++i) { - template_entries.push_back({ - .dstBinding = binding + i, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = descriptor_type, - .offset = static_cast(offset + i * entry_size), - .stride = entry_size, - }); - } - } else if (count > 0) { - template_entries.push_back({ - .dstBinding = binding, - .dstArrayElement = 0, - .descriptorCount = count, - .descriptorType = descriptor_type, - .offset = offset, - .stride = entry_size, - }); - } - offset += count * entry_size; - binding += count; -} - -void FillDescriptorUpdateTemplateEntries( - const ShaderEntries& entries, u32& binding, u32& offset, - std::vector& template_entries) { - AddEntry(template_entries, offset, binding, entries.const_buffers); - AddEntry(template_entries, offset, binding, entries.global_buffers); - AddEntry(template_entries, offset, binding, entries.uniform_texels); - AddEntry(template_entries, offset, binding, entries.samplers); - AddEntry(template_entries, offset, binding, entries.storage_texels); - AddEntry(template_entries, offset, binding, entries.images); -} +void PipelineCache::OnShaderRemoval(Shader*) {} } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 89d635a3d..e3e63340d 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -15,15 +15,8 @@ #include #include "common/common_types.h" -#include "video_core/engines/const_buffer_engine_interface.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" -#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" -#include "video_core/renderer_vulkan/vk_shader_decompiler.h" -#include "video_core/shader/async_shaders.h" -#include "video_core/shader/memory_util.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" #include "video_core/shader_cache.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -35,7 +28,7 @@ namespace Vulkan { class Device; class RasterizerVulkan; -class VKComputePipeline; +class ComputePipeline; class VKDescriptorPool; class VKScheduler; class VKUpdateDescriptorQueue; @@ -47,7 +40,7 @@ struct ComputePipelineCacheKey { u32 shared_memory_size; std::array workgroup_size; - std::size_t Hash() const noexcept; + size_t Hash() const noexcept; bool operator==(const ComputePipelineCacheKey& rhs) const noexcept; @@ -63,16 +56,9 @@ static_assert(std::is_trivially_constructible_v); namespace std { -template <> -struct hash { - std::size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept { - return k.Hash(); - } -}; - template <> struct hash { - std::size_t operator()(const Vulkan::ComputePipelineCacheKey& k) const noexcept { + size_t operator()(const Vulkan::ComputePipelineCacheKey& k) const noexcept { return k.Hash(); } }; @@ -83,66 +69,26 @@ namespace Vulkan { class Shader { public: - explicit Shader(Tegra::Engines::ConstBufferEngineInterface& engine_, - Tegra::Engines::ShaderType stage_, GPUVAddr gpu_addr, VAddr cpu_addr_, - VideoCommon::Shader::ProgramCode program_code, u32 main_offset_); + explicit Shader(); ~Shader(); - - GPUVAddr GetGpuAddr() const { - return gpu_addr; - } - - VideoCommon::Shader::ShaderIR& GetIR() { - return shader_ir; - } - - const VideoCommon::Shader::ShaderIR& GetIR() const { - return shader_ir; - } - - const VideoCommon::Shader::Registry& GetRegistry() const { - return registry; - } - - const ShaderEntries& GetEntries() const { - return entries; - } - -private: - GPUVAddr gpu_addr{}; - VideoCommon::Shader::ProgramCode program_code; - VideoCommon::Shader::Registry registry; - VideoCommon::Shader::ShaderIR shader_ir; - ShaderEntries entries; }; -class VKPipelineCache final : public VideoCommon::ShaderCache { +class PipelineCache final : public VideoCommon::ShaderCache { public: - explicit VKPipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu, - Tegra::Engines::Maxwell3D& maxwell3d, - Tegra::Engines::KeplerCompute& kepler_compute, - Tegra::MemoryManager& gpu_memory, const Device& device, - VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, - VKUpdateDescriptorQueue& update_descriptor_queue); - ~VKPipelineCache() override; - - std::array GetShaders(); + explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu, + Tegra::Engines::Maxwell3D& maxwell3d, + Tegra::Engines::KeplerCompute& kepler_compute, + Tegra::MemoryManager& gpu_memory, const Device& device, + VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, + VKUpdateDescriptorQueue& update_descriptor_queue); + ~PipelineCache() override; - VKGraphicsPipeline* GetGraphicsPipeline(const GraphicsPipelineCacheKey& key, - u32 num_color_buffers, - VideoCommon::Shader::AsyncShaders& async_shaders); - - VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); - - void EmplacePipeline(std::unique_ptr pipeline); + ComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); protected: void OnShaderRemoval(Shader* shader) final; private: - std::pair> DecompileShaders( - const FixedPipelineState& fixed_state); - Tegra::GPU& gpu; Tegra::Engines::Maxwell3D& maxwell3d; Tegra::Engines::KeplerCompute& kepler_compute; @@ -158,17 +104,8 @@ private: std::array last_shaders{}; - GraphicsPipelineCacheKey last_graphics_key; - VKGraphicsPipeline* last_graphics_pipeline = nullptr; - std::mutex pipeline_cache; - std::unordered_map> - graphics_cache; - std::unordered_map> compute_cache; + std::unordered_map> compute_cache; }; -void FillDescriptorUpdateTemplateEntries( - const ShaderEntries& entries, u32& binding, u32& offset, - std::vector& template_entries); - } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index f57c15b37..f152297d9 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -24,7 +24,6 @@ #include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" -#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -97,15 +96,6 @@ VkRect2D GetScissorState(const Maxwell& regs, size_t index) { return scissor; } -std::array GetShaderAddresses( - const std::array& shaders) { - std::array addresses; - for (size_t i = 0; i < std::size(addresses); ++i) { - addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0; - } - return addresses; -} - struct TextureHandle { constexpr TextureHandle(u32 data, bool via_header_index) { const Tegra::Texture::TextureHandle handle{data}; @@ -117,98 +107,6 @@ struct TextureHandle { u32 sampler; }; -template -TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry, - size_t stage, size_t index = 0) { - const auto shader_type = static_cast(stage); - if constexpr (std::is_same_v) { - if (entry.is_separated) { - const u32 buffer_1 = entry.buffer; - const u32 buffer_2 = entry.secondary_buffer; - const u32 offset_1 = entry.offset; - const u32 offset_2 = entry.secondary_offset; - const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1); - const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2); - return TextureHandle(handle_1 | handle_2, via_header_index); - } - } - if (entry.is_bindless) { - const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset); - return TextureHandle(raw, via_header_index); - } - const u32 buffer = engine.GetBoundBuffer(); - const u64 offset = (entry.offset + index) * sizeof(u32); - return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index); -} - -ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) { - if (entry.is_buffer) { - return ImageViewType::e2D; - } - switch (entry.type) { - case Tegra::Shader::TextureType::Texture1D: - return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D; - case Tegra::Shader::TextureType::Texture2D: - return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D; - case Tegra::Shader::TextureType::Texture3D: - return ImageViewType::e3D; - case Tegra::Shader::TextureType::TextureCube: - return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube; - } - UNREACHABLE(); - return ImageViewType::e2D; -} - -ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) { - switch (entry.type) { - case Tegra::Shader::ImageType::Texture1D: - return ImageViewType::e1D; - case Tegra::Shader::ImageType::Texture1DArray: - return ImageViewType::e1DArray; - case Tegra::Shader::ImageType::Texture2D: - return ImageViewType::e2D; - case Tegra::Shader::ImageType::Texture2DArray: - return ImageViewType::e2DArray; - case Tegra::Shader::ImageType::Texture3D: - return ImageViewType::e3D; - case Tegra::Shader::ImageType::TextureBuffer: - return ImageViewType::Buffer; - } - UNREACHABLE(); - return ImageViewType::e2D; -} - -void PushImageDescriptors(const ShaderEntries& entries, TextureCache& texture_cache, - VKUpdateDescriptorQueue& update_descriptor_queue, - ImageViewId*& image_view_id_ptr, VkSampler*& sampler_ptr) { - for ([[maybe_unused]] const auto& entry : entries.uniform_texels) { - const ImageViewId image_view_id = *image_view_id_ptr++; - const ImageView& image_view = texture_cache.GetImageView(image_view_id); - update_descriptor_queue.AddTexelBuffer(image_view.BufferView()); - } - for (const auto& entry : entries.samplers) { - for (size_t i = 0; i < entry.size; ++i) { - const VkSampler sampler = *sampler_ptr++; - const ImageViewId image_view_id = *image_view_id_ptr++; - const ImageView& image_view = texture_cache.GetImageView(image_view_id); - const VkImageView handle = image_view.Handle(ImageViewTypeFromEntry(entry)); - update_descriptor_queue.AddSampledImage(handle, sampler); - } - } - for ([[maybe_unused]] const auto& entry : entries.storage_texels) { - const ImageViewId image_view_id = *image_view_id_ptr++; - const ImageView& image_view = texture_cache.GetImageView(image_view_id); - update_descriptor_queue.AddTexelBuffer(image_view.BufferView()); - } - for (const auto& entry : entries.images) { - // TODO: Mark as modified - const ImageViewId image_view_id = *image_view_id_ptr++; - const ImageView& image_view = texture_cache.GetImageView(image_view_id); - const VkImageView handle = image_view.Handle(ImageViewTypeFromEntry(entry)); - update_descriptor_queue.AddImage(handle); - } -} - DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced, bool is_indexed) { DrawParams params{ @@ -253,71 +151,14 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra descriptor_pool, update_descriptor_queue), query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{buffer_cache}, fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), - wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) { + wfi_event(device.GetLogical().CreateEvent()) { scheduler.SetQueryCache(query_cache); - if (device.UseAsynchronousShaders()) { - async_shaders.AllocateWorkers(); - } } RasterizerVulkan::~RasterizerVulkan() = default; void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { - MICROPROFILE_SCOPE(Vulkan_Drawing); - - SCOPE_EXIT({ gpu.TickWork(); }); - FlushWork(); - - query_cache.UpdateCounters(); - - graphics_key.fixed_state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported()); - - std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; - - texture_cache.SynchronizeGraphicsDescriptors(); - texture_cache.UpdateRenderTargets(false); - - const auto shaders = pipeline_cache.GetShaders(); - graphics_key.shaders = GetShaderAddresses(shaders); - - SetupShaderDescriptors(shaders, is_indexed); - - const Framebuffer* const framebuffer = texture_cache.GetFramebuffer(); - graphics_key.renderpass = framebuffer->RenderPass(); - - VKGraphicsPipeline* const pipeline = pipeline_cache.GetGraphicsPipeline( - graphics_key, framebuffer->NumColorBuffers(), async_shaders); - if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) { - // Async graphics pipeline was not ready. - return; - } - - BeginTransformFeedback(); - - scheduler.RequestRenderpass(framebuffer); - scheduler.BindGraphicsPipeline(pipeline->GetHandle()); - UpdateDynamicStates(); - - const auto& regs = maxwell3d.regs; - const u32 num_instances = maxwell3d.mme_draw.instance_count; - const DrawParams draw_params = MakeDrawParams(regs, num_instances, is_instanced, is_indexed); - const VkPipelineLayout pipeline_layout = pipeline->GetLayout(); - const VkDescriptorSet descriptor_set = pipeline->CommitDescriptorSet(); - scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) { - if (descriptor_set) { - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, - DESCRIPTOR_SET, descriptor_set, nullptr); - } - if (draw_params.is_indexed) { - cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, 0, - draw_params.base_vertex, draw_params.base_instance); - } else { - cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances, - draw_params.base_vertex, draw_params.base_instance); - } - }); - - EndTransformFeedback(); + UNREACHABLE_MSG("Rendering not implemented {} {}", is_indexed, is_instanced); } void RasterizerVulkan::Clear() { @@ -395,73 +236,8 @@ void RasterizerVulkan::Clear() { }); } -void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { - MICROPROFILE_SCOPE(Vulkan_Compute); - - query_cache.UpdateCounters(); - - const auto& launch_desc = kepler_compute.launch_description; - auto& pipeline = pipeline_cache.GetComputePipeline({ - .shader = code_addr, - .shared_memory_size = launch_desc.shared_alloc, - .workgroup_size{ - launch_desc.block_dim_x, - launch_desc.block_dim_y, - launch_desc.block_dim_z, - }, - }); - - // Compute dispatches can't be executed inside a renderpass - scheduler.RequestOutsideRenderPassOperationContext(); - - image_view_indices.clear(); - sampler_handles.clear(); - - std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; - - const auto& entries = pipeline.GetEntries(); - buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers); - buffer_cache.UnbindComputeStorageBuffers(); - u32 ssbo_index = 0; - for (const auto& buffer : entries.global_buffers) { - buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset, - buffer.is_written); - ++ssbo_index; - } - buffer_cache.UpdateComputeBuffers(); - - texture_cache.SynchronizeComputeDescriptors(); - - SetupComputeUniformTexels(entries); - SetupComputeTextures(entries); - SetupComputeStorageTexels(entries); - SetupComputeImages(entries); - - const std::span indices_span(image_view_indices.data(), image_view_indices.size()); - texture_cache.FillComputeImageViews(indices_span, image_view_ids); - - update_descriptor_queue.Acquire(); - - buffer_cache.BindHostComputeBuffers(); - - ImageViewId* image_view_id_ptr = image_view_ids.data(); - VkSampler* sampler_ptr = sampler_handles.data(); - PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr, - sampler_ptr); - - const VkPipeline pipeline_handle = pipeline.GetHandle(); - const VkPipelineLayout pipeline_layout = pipeline.GetLayout(); - const VkDescriptorSet descriptor_set = pipeline.CommitDescriptorSet(); - scheduler.Record([grid_x = launch_desc.grid_dim_x, grid_y = launch_desc.grid_dim_y, - grid_z = launch_desc.grid_dim_z, pipeline_handle, pipeline_layout, - descriptor_set](vk::CommandBuffer cmdbuf) { - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_handle); - if (descriptor_set) { - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, - DESCRIPTOR_SET, descriptor_set, nullptr); - } - cmdbuf.Dispatch(grid_x, grid_y, grid_z); - }); +void RasterizerVulkan::DispatchCompute() { + UNREACHABLE_MSG("Not implemented"); } void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) { @@ -716,52 +492,6 @@ bool AccelerateDMA::BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 return buffer_cache.DMACopy(src_address, dest_address, amount); } -void RasterizerVulkan::SetupShaderDescriptors( - const std::array& shaders, bool is_indexed) { - image_view_indices.clear(); - sampler_handles.clear(); - for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { - Shader* const shader = shaders[stage + 1]; - if (!shader) { - continue; - } - const ShaderEntries& entries = shader->GetEntries(); - SetupGraphicsUniformTexels(entries, stage); - SetupGraphicsTextures(entries, stage); - SetupGraphicsStorageTexels(entries, stage); - SetupGraphicsImages(entries, stage); - - buffer_cache.SetEnabledUniformBuffers(stage, entries.enabled_uniform_buffers); - buffer_cache.UnbindGraphicsStorageBuffers(stage); - u32 ssbo_index = 0; - for (const auto& buffer : entries.global_buffers) { - buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index, - buffer.cbuf_offset, buffer.is_written); - ++ssbo_index; - } - } - const std::span indices_span(image_view_indices.data(), image_view_indices.size()); - buffer_cache.UpdateGraphicsBuffers(is_indexed); - texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); - - buffer_cache.BindHostGeometryBuffers(is_indexed); - - update_descriptor_queue.Acquire(); - - ImageViewId* image_view_id_ptr = image_view_ids.data(); - VkSampler* sampler_ptr = sampler_handles.data(); - for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { - // Skip VertexA stage - Shader* const shader = shaders[stage + 1]; - if (!shader) { - continue; - } - buffer_cache.BindHostStageBuffers(stage); - PushImageDescriptors(shader->GetEntries(), texture_cache, update_descriptor_queue, - image_view_id_ptr, sampler_ptr); - } -} - void RasterizerVulkan::UpdateDynamicStates() { auto& regs = maxwell3d.regs; UpdateViewportsState(regs); @@ -810,89 +540,6 @@ void RasterizerVulkan::EndTransformFeedback() { [](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); }); } -void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, size_t stage) { - const auto& regs = maxwell3d.regs; - const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; - for (const auto& entry : entries.uniform_texels) { - const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage); - image_view_indices.push_back(handle.image); - } -} - -void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_t stage) { - const auto& regs = maxwell3d.regs; - const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; - for (const auto& entry : entries.samplers) { - for (size_t index = 0; index < entry.size; ++index) { - const TextureHandle handle = - GetTextureInfo(maxwell3d, via_header_index, entry, stage, index); - image_view_indices.push_back(handle.image); - - Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler); - sampler_handles.push_back(sampler->Handle()); - } - } -} - -void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, size_t stage) { - const auto& regs = maxwell3d.regs; - const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; - for (const auto& entry : entries.storage_texels) { - const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage); - image_view_indices.push_back(handle.image); - } -} - -void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t stage) { - const auto& regs = maxwell3d.regs; - const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; - for (const auto& entry : entries.images) { - const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage); - image_view_indices.push_back(handle.image); - } -} - -void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) { - const bool via_header_index = kepler_compute.launch_description.linked_tsc; - for (const auto& entry : entries.uniform_texels) { - const TextureHandle handle = - GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX); - image_view_indices.push_back(handle.image); - } -} - -void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) { - const bool via_header_index = kepler_compute.launch_description.linked_tsc; - for (const auto& entry : entries.samplers) { - for (size_t index = 0; index < entry.size; ++index) { - const TextureHandle handle = GetTextureInfo(kepler_compute, via_header_index, entry, - COMPUTE_SHADER_INDEX, index); - image_view_indices.push_back(handle.image); - - Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); - sampler_handles.push_back(sampler->Handle()); - } - } -} - -void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) { - const bool via_header_index = kepler_compute.launch_description.linked_tsc; - for (const auto& entry : entries.storage_texels) { - const TextureHandle handle = - GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX); - image_view_indices.push_back(handle.image); - } -} - -void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) { - const bool via_header_index = kepler_compute.launch_description.linked_tsc; - for (const auto& entry : entries.images) { - const TextureHandle handle = - GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX); - image_view_indices.push_back(handle.image); - } -} - void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) { if (!state_tracker.TouchViewports()) { return; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 2065209be..31017dc2b 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -28,7 +28,6 @@ #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" -#include "video_core/shader/async_shaders.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -73,7 +72,7 @@ public: void Draw(bool is_indexed, bool is_instanced) override; void Clear() override; - void DispatchCompute(GPUVAddr code_addr) override; + void DispatchCompute() override; void ResetCounter(VideoCore::QueryType type) override; void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional timestamp) override; void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; @@ -103,19 +102,6 @@ public: bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, u32 pixel_stride) override; - VideoCommon::Shader::AsyncShaders& GetAsyncShaders() { - return async_shaders; - } - - const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const { - return async_shaders; - } - - /// Maximum supported size that a constbuffer can have in bytes. - static constexpr size_t MaxConstbufferSize = 0x10000; - static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0, - "The maximum size of a constbuffer must be a multiple of the size of GLvec4"); - private: static constexpr size_t MAX_TEXTURES = 192; static constexpr size_t MAX_IMAGES = 48; @@ -125,40 +111,12 @@ private: void FlushWork(); - /// Setup descriptors in the graphics pipeline. - void SetupShaderDescriptors(const std::array& shaders, - bool is_indexed); - void UpdateDynamicStates(); void BeginTransformFeedback(); void EndTransformFeedback(); - /// Setup uniform texels in the graphics pipeline. - void SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage); - - /// Setup textures in the graphics pipeline. - void SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage); - - /// Setup storage texels in the graphics pipeline. - void SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage); - - /// Setup images in the graphics pipeline. - void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage); - - /// Setup texel buffers in the compute pipeline. - void SetupComputeUniformTexels(const ShaderEntries& entries); - - /// Setup textures in the compute pipeline. - void SetupComputeTextures(const ShaderEntries& entries); - - /// Setup storage texels in the compute pipeline. - void SetupComputeStorageTexels(const ShaderEntries& entries); - - /// Setup images in the compute pipeline. - void SetupComputeImages(const ShaderEntries& entries); - void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs); @@ -198,13 +156,12 @@ private: TextureCache texture_cache; BufferCacheRuntime buffer_cache_runtime; BufferCache buffer_cache; - VKPipelineCache pipeline_cache; + PipelineCache pipeline_cache; VKQueryCache query_cache; AccelerateDMA accelerate_dma; VKFenceManager fence_manager; vk::Event wfi_event; - VideoCommon::Shader::AsyncShaders async_shaders; boost::container::static_vector image_view_indices; std::array image_view_ids; diff --git a/src/video_core/shader/ast.cpp b/src/video_core/shader/ast.cpp deleted file mode 100644 index db11144c7..000000000 --- a/src/video_core/shader/ast.cpp +++ /dev/null @@ -1,752 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include - -#include - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/shader/ast.h" -#include "video_core/shader/expr.h" - -namespace VideoCommon::Shader { - -ASTZipper::ASTZipper() = default; - -void ASTZipper::Init(const ASTNode new_first, const ASTNode parent) { - ASSERT(new_first->manager == nullptr); - first = new_first; - last = new_first; - - ASTNode current = first; - while (current) { - current->manager = this; - current->parent = parent; - last = current; - current = current->next; - } -} - -void ASTZipper::PushBack(const ASTNode new_node) { - ASSERT(new_node->manager == nullptr); - new_node->previous = last; - if (last) { - last->next = new_node; - } - new_node->next.reset(); - last = new_node; - if (!first) { - first = new_node; - } - new_node->manager = this; -} - -void ASTZipper::PushFront(const ASTNode new_node) { - ASSERT(new_node->manager == nullptr); - new_node->previous.reset(); - new_node->next = first; - if (first) { - first->previous = new_node; - } - if (last == first) { - last = new_node; - } - first = new_node; - new_node->manager = this; -} - -void ASTZipper::InsertAfter(const ASTNode new_node, const ASTNode at_node) { - ASSERT(new_node->manager == nullptr); - if (!at_node) { - PushFront(new_node); - return; - } - const ASTNode next = at_node->next; - if (next) { - next->previous = new_node; - } - new_node->previous = at_node; - if (at_node == last) { - last = new_node; - } - new_node->next = next; - at_node->next = new_node; - new_node->manager = this; -} - -void ASTZipper::InsertBefore(const ASTNode new_node, const ASTNode at_node) { - ASSERT(new_node->manager == nullptr); - if (!at_node) { - PushBack(new_node); - return; - } - const ASTNode previous = at_node->previous; - if (previous) { - previous->next = new_node; - } - new_node->next = at_node; - if (at_node == first) { - first = new_node; - } - new_node->previous = previous; - at_node->previous = new_node; - new_node->manager = this; -} - -void ASTZipper::DetachTail(ASTNode node) { - ASSERT(node->manager == this); - if (node == first) { - first.reset(); - last.reset(); - return; - } - - last = node->previous; - last->next.reset(); - node->previous.reset(); - - ASTNode current = std::move(node); - while (current) { - current->manager = nullptr; - current->parent.reset(); - current = current->next; - } -} - -void ASTZipper::DetachSegment(const ASTNode start, const ASTNode end) { - ASSERT(start->manager == this && end->manager == this); - if (start == end) { - DetachSingle(start); - return; - } - const ASTNode prev = start->previous; - const ASTNode post = end->next; - if (!prev) { - first = post; - } else { - prev->next = post; - } - if (!post) { - last = prev; - } else { - post->previous = prev; - } - start->previous.reset(); - end->next.reset(); - ASTNode current = start; - bool found = false; - while (current) { - current->manager = nullptr; - current->parent.reset(); - found |= current == end; - current = current->next; - } - ASSERT(found); -} - -void ASTZipper::DetachSingle(const ASTNode node) { - ASSERT(node->manager == this); - const ASTNode prev = node->previous; - const ASTNode post = node->next; - node->previous.reset(); - node->next.reset(); - if (!prev) { - first = post; - } else { - prev->next = post; - } - if (!post) { - last = prev; - } else { - post->previous = prev; - } - - node->manager = nullptr; - node->parent.reset(); -} - -void ASTZipper::Remove(const ASTNode node) { - ASSERT(node->manager == this); - const ASTNode next = node->next; - const ASTNode previous = node->previous; - if (previous) { - previous->next = next; - } - if (next) { - next->previous = previous; - } - node->parent.reset(); - node->manager = nullptr; - if (node == last) { - last = previous; - } - if (node == first) { - first = next; - } -} - -class ExprPrinter final { -public: - void operator()(const ExprAnd& expr) { - inner += "( "; - std::visit(*this, *expr.operand1); - inner += " && "; - std::visit(*this, *expr.operand2); - inner += ')'; - } - - void operator()(const ExprOr& expr) { - inner += "( "; - std::visit(*this, *expr.operand1); - inner += " || "; - std::visit(*this, *expr.operand2); - inner += ')'; - } - - void operator()(const ExprNot& expr) { - inner += "!"; - std::visit(*this, *expr.operand1); - } - - void operator()(const ExprPredicate& expr) { - inner += fmt::format("P{}", expr.predicate); - } - - void operator()(const ExprCondCode& expr) { - inner += fmt::format("CC{}", expr.cc); - } - - void operator()(const ExprVar& expr) { - inner += fmt::format("V{}", expr.var_index); - } - - void operator()(const ExprBoolean& expr) { - inner += expr.value ? "true" : "false"; - } - - void operator()(const ExprGprEqual& expr) { - inner += fmt::format("(gpr_{} == {})", expr.gpr, expr.value); - } - - const std::string& GetResult() const { - return inner; - } - -private: - std::string inner; -}; - -class ASTPrinter { -public: - void operator()(const ASTProgram& ast) { - scope++; - inner += "program {\n"; - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - inner += "}\n"; - scope--; - } - - void operator()(const ASTIfThen& ast) { - ExprPrinter expr_parser{}; - std::visit(expr_parser, *ast.condition); - inner += fmt::format("{}if ({}) {{\n", Indent(), expr_parser.GetResult()); - scope++; - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - scope--; - inner += fmt::format("{}}}\n", Indent()); - } - - void operator()(const ASTIfElse& ast) { - inner += Indent(); - inner += "else {\n"; - - scope++; - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - scope--; - - inner += Indent(); - inner += "}\n"; - } - - void operator()(const ASTBlockEncoded& ast) { - inner += fmt::format("{}Block({}, {});\n", Indent(), ast.start, ast.end); - } - - void operator()([[maybe_unused]] const ASTBlockDecoded& ast) { - inner += Indent(); - inner += "Block;\n"; - } - - void operator()(const ASTVarSet& ast) { - ExprPrinter expr_parser{}; - std::visit(expr_parser, *ast.condition); - inner += fmt::format("{}V{} := {};\n", Indent(), ast.index, expr_parser.GetResult()); - } - - void operator()(const ASTLabel& ast) { - inner += fmt::format("Label_{}:\n", ast.index); - } - - void operator()(const ASTGoto& ast) { - ExprPrinter expr_parser{}; - std::visit(expr_parser, *ast.condition); - inner += - fmt::format("{}({}) -> goto Label_{};\n", Indent(), expr_parser.GetResult(), ast.label); - } - - void operator()(const ASTDoWhile& ast) { - ExprPrinter expr_parser{}; - std::visit(expr_parser, *ast.condition); - inner += fmt::format("{}do {{\n", Indent()); - scope++; - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - scope--; - inner += fmt::format("{}}} while ({});\n", Indent(), expr_parser.GetResult()); - } - - void operator()(const ASTReturn& ast) { - ExprPrinter expr_parser{}; - std::visit(expr_parser, *ast.condition); - inner += fmt::format("{}({}) -> {};\n", Indent(), expr_parser.GetResult(), - ast.kills ? "discard" : "exit"); - } - - void operator()(const ASTBreak& ast) { - ExprPrinter expr_parser{}; - std::visit(expr_parser, *ast.condition); - inner += fmt::format("{}({}) -> break;\n", Indent(), expr_parser.GetResult()); - } - - void Visit(const ASTNode& node) { - std::visit(*this, *node->GetInnerData()); - } - - const std::string& GetResult() const { - return inner; - } - -private: - std::string_view Indent() { - if (space_segment_scope == scope) { - return space_segment; - } - - // Ensure that we don't exceed our view. - ASSERT(scope * 2 < spaces.size()); - - space_segment = spaces.substr(0, scope * 2); - space_segment_scope = scope; - return space_segment; - } - - std::string inner{}; - std::string_view space_segment; - - u32 scope{}; - u32 space_segment_scope{}; - - static constexpr std::string_view spaces{" "}; -}; - -std::string ASTManager::Print() const { - ASTPrinter printer{}; - printer.Visit(main_node); - return printer.GetResult(); -} - -ASTManager::ASTManager(bool do_full_decompile, bool disable_else_derivation_) - : full_decompile{do_full_decompile}, disable_else_derivation{disable_else_derivation_} {} - -ASTManager::~ASTManager() { - Clear(); -} - -void ASTManager::Init() { - main_node = ASTBase::Make(ASTNode{}); - program = std::get_if(main_node->GetInnerData()); - false_condition = MakeExpr(false); -} - -void ASTManager::DeclareLabel(u32 address) { - const auto pair = labels_map.emplace(address, labels_count); - if (pair.second) { - labels_count++; - labels.resize(labels_count); - } -} - -void ASTManager::InsertLabel(u32 address) { - const u32 index = labels_map[address]; - const ASTNode label = ASTBase::Make(main_node, index); - labels[index] = label; - program->nodes.PushBack(label); -} - -void ASTManager::InsertGoto(Expr condition, u32 address) { - const u32 index = labels_map[address]; - const ASTNode goto_node = ASTBase::Make(main_node, std::move(condition), index); - gotos.push_back(goto_node); - program->nodes.PushBack(goto_node); -} - -void ASTManager::InsertBlock(u32 start_address, u32 end_address) { - ASTNode block = ASTBase::Make(main_node, start_address, end_address); - program->nodes.PushBack(std::move(block)); -} - -void ASTManager::InsertReturn(Expr condition, bool kills) { - ASTNode node = ASTBase::Make(main_node, std::move(condition), kills); - program->nodes.PushBack(std::move(node)); -} - -// The decompile algorithm is based on -// "Taming control flow: A structured approach to eliminating goto statements" -// by AM Erosa, LJ Hendren 1994. In general, the idea is to get gotos to be -// on the same structured level as the label which they jump to. This is done, -// through outward/inward movements and lifting. Once they are at the same -// level, you can enclose them in an "if" structure or a "do-while" structure. -void ASTManager::Decompile() { - auto it = gotos.begin(); - while (it != gotos.end()) { - const ASTNode goto_node = *it; - const auto label_index = goto_node->GetGotoLabel(); - if (!label_index) { - return; - } - const ASTNode label = labels[*label_index]; - if (!full_decompile) { - // We only decompile backward jumps - if (!IsBackwardsJump(goto_node, label)) { - it++; - continue; - } - } - if (IndirectlyRelated(goto_node, label)) { - while (!DirectlyRelated(goto_node, label)) { - MoveOutward(goto_node); - } - } - if (DirectlyRelated(goto_node, label)) { - u32 goto_level = goto_node->GetLevel(); - const u32 label_level = label->GetLevel(); - while (label_level < goto_level) { - MoveOutward(goto_node); - goto_level--; - } - // TODO(Blinkhawk): Implement Lifting and Inward Movements - } - if (label->GetParent() == goto_node->GetParent()) { - bool is_loop = false; - ASTNode current = goto_node->GetPrevious(); - while (current) { - if (current == label) { - is_loop = true; - break; - } - current = current->GetPrevious(); - } - - if (is_loop) { - EncloseDoWhile(goto_node, label); - } else { - EncloseIfThen(goto_node, label); - } - it = gotos.erase(it); - continue; - } - it++; - } - if (full_decompile) { - for (const ASTNode& label : labels) { - auto& manager = label->GetManager(); - manager.Remove(label); - } - labels.clear(); - } else { - auto label_it = labels.begin(); - while (label_it != labels.end()) { - bool can_remove = true; - ASTNode label = *label_it; - for (const ASTNode& goto_node : gotos) { - const auto label_index = goto_node->GetGotoLabel(); - if (!label_index) { - return; - } - ASTNode& glabel = labels[*label_index]; - if (glabel == label) { - can_remove = false; - break; - } - } - if (can_remove) { - label->MarkLabelUnused(); - } - } - } -} - -bool ASTManager::IsBackwardsJump(ASTNode goto_node, ASTNode label_node) const { - u32 goto_level = goto_node->GetLevel(); - u32 label_level = label_node->GetLevel(); - while (goto_level > label_level) { - goto_level--; - goto_node = goto_node->GetParent(); - } - while (label_level > goto_level) { - label_level--; - label_node = label_node->GetParent(); - } - while (goto_node->GetParent() != label_node->GetParent()) { - goto_node = goto_node->GetParent(); - label_node = label_node->GetParent(); - } - ASTNode current = goto_node->GetPrevious(); - while (current) { - if (current == label_node) { - return true; - } - current = current->GetPrevious(); - } - return false; -} - -bool ASTManager::IndirectlyRelated(const ASTNode& first, const ASTNode& second) const { - return !(first->GetParent() == second->GetParent() || DirectlyRelated(first, second)); -} - -bool ASTManager::DirectlyRelated(const ASTNode& first, const ASTNode& second) const { - if (first->GetParent() == second->GetParent()) { - return false; - } - const u32 first_level = first->GetLevel(); - const u32 second_level = second->GetLevel(); - u32 min_level; - u32 max_level; - ASTNode max; - ASTNode min; - if (first_level > second_level) { - min_level = second_level; - min = second; - max_level = first_level; - max = first; - } else { - min_level = first_level; - min = first; - max_level = second_level; - max = second; - } - - while (max_level > min_level) { - max_level--; - max = max->GetParent(); - } - - return min->GetParent() == max->GetParent(); -} - -void ASTManager::ShowCurrentState(std::string_view state) const { - LOG_CRITICAL(HW_GPU, "\nState {}:\n\n{}\n", state, Print()); - SanityCheck(); -} - -void ASTManager::SanityCheck() const { - for (const auto& label : labels) { - if (!label->GetParent()) { - LOG_CRITICAL(HW_GPU, "Sanity Check Failed"); - } - } -} - -void ASTManager::EncloseDoWhile(ASTNode goto_node, ASTNode label) { - ASTZipper& zipper = goto_node->GetManager(); - const ASTNode loop_start = label->GetNext(); - if (loop_start == goto_node) { - zipper.Remove(goto_node); - return; - } - const ASTNode parent = label->GetParent(); - const Expr condition = goto_node->GetGotoCondition(); - zipper.DetachSegment(loop_start, goto_node); - const ASTNode do_while_node = ASTBase::Make(parent, condition); - ASTZipper* sub_zipper = do_while_node->GetSubNodes(); - sub_zipper->Init(loop_start, do_while_node); - zipper.InsertAfter(do_while_node, label); - sub_zipper->Remove(goto_node); -} - -void ASTManager::EncloseIfThen(ASTNode goto_node, ASTNode label) { - ASTZipper& zipper = goto_node->GetManager(); - const ASTNode if_end = label->GetPrevious(); - if (if_end == goto_node) { - zipper.Remove(goto_node); - return; - } - const ASTNode prev = goto_node->GetPrevious(); - const Expr condition = goto_node->GetGotoCondition(); - bool do_else = false; - if (!disable_else_derivation && prev->IsIfThen()) { - const Expr if_condition = prev->GetIfCondition(); - do_else = ExprAreEqual(if_condition, condition); - } - const ASTNode parent = label->GetParent(); - zipper.DetachSegment(goto_node, if_end); - ASTNode if_node; - if (do_else) { - if_node = ASTBase::Make(parent); - } else { - Expr neg_condition = MakeExprNot(condition); - if_node = ASTBase::Make(parent, neg_condition); - } - ASTZipper* sub_zipper = if_node->GetSubNodes(); - sub_zipper->Init(goto_node, if_node); - zipper.InsertAfter(if_node, prev); - sub_zipper->Remove(goto_node); -} - -void ASTManager::MoveOutward(ASTNode goto_node) { - ASTZipper& zipper = goto_node->GetManager(); - const ASTNode parent = goto_node->GetParent(); - ASTZipper& zipper2 = parent->GetManager(); - const ASTNode grandpa = parent->GetParent(); - const bool is_loop = parent->IsLoop(); - const bool is_else = parent->IsIfElse(); - const bool is_if = parent->IsIfThen(); - - const ASTNode prev = goto_node->GetPrevious(); - const ASTNode post = goto_node->GetNext(); - - const Expr condition = goto_node->GetGotoCondition(); - zipper.DetachSingle(goto_node); - if (is_loop) { - const u32 var_index = NewVariable(); - const Expr var_condition = MakeExpr(var_index); - const ASTNode var_node = ASTBase::Make(parent, var_index, condition); - const ASTNode var_node_init = ASTBase::Make(parent, var_index, false_condition); - zipper2.InsertBefore(var_node_init, parent); - zipper.InsertAfter(var_node, prev); - goto_node->SetGotoCondition(var_condition); - const ASTNode break_node = ASTBase::Make(parent, var_condition); - zipper.InsertAfter(break_node, var_node); - } else if (is_if || is_else) { - const u32 var_index = NewVariable(); - const Expr var_condition = MakeExpr(var_index); - const ASTNode var_node = ASTBase::Make(parent, var_index, condition); - const ASTNode var_node_init = ASTBase::Make(parent, var_index, false_condition); - if (is_if) { - zipper2.InsertBefore(var_node_init, parent); - } else { - zipper2.InsertBefore(var_node_init, parent->GetPrevious()); - } - zipper.InsertAfter(var_node, prev); - goto_node->SetGotoCondition(var_condition); - if (post) { - zipper.DetachTail(post); - const ASTNode if_node = ASTBase::Make(parent, MakeExprNot(var_condition)); - ASTZipper* sub_zipper = if_node->GetSubNodes(); - sub_zipper->Init(post, if_node); - zipper.InsertAfter(if_node, var_node); - } - } else { - UNREACHABLE(); - } - const ASTNode next = parent->GetNext(); - if (is_if && next && next->IsIfElse()) { - zipper2.InsertAfter(goto_node, next); - goto_node->SetParent(grandpa); - return; - } - zipper2.InsertAfter(goto_node, parent); - goto_node->SetParent(grandpa); -} - -class ASTClearer { -public: - ASTClearer() = default; - - void operator()(const ASTProgram& ast) { - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - } - - void operator()(const ASTIfThen& ast) { - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - } - - void operator()(const ASTIfElse& ast) { - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - } - - void operator()([[maybe_unused]] const ASTBlockEncoded& ast) {} - - void operator()(ASTBlockDecoded& ast) { - ast.nodes.clear(); - } - - void operator()([[maybe_unused]] const ASTVarSet& ast) {} - - void operator()([[maybe_unused]] const ASTLabel& ast) {} - - void operator()([[maybe_unused]] const ASTGoto& ast) {} - - void operator()(const ASTDoWhile& ast) { - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - } - - void operator()([[maybe_unused]] const ASTReturn& ast) {} - - void operator()([[maybe_unused]] const ASTBreak& ast) {} - - void Visit(const ASTNode& node) { - std::visit(*this, *node->GetInnerData()); - node->Clear(); - } -}; - -void ASTManager::Clear() { - if (!main_node) { - return; - } - ASTClearer clearer{}; - clearer.Visit(main_node); - main_node.reset(); - program = nullptr; - labels_map.clear(); - labels.clear(); - gotos.clear(); -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/ast.h b/src/video_core/shader/ast.h deleted file mode 100644 index dc49b369e..000000000 --- a/src/video_core/shader/ast.h +++ /dev/null @@ -1,398 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include -#include -#include -#include - -#include "video_core/shader/expr.h" -#include "video_core/shader/node.h" - -namespace VideoCommon::Shader { - -class ASTBase; -class ASTBlockDecoded; -class ASTBlockEncoded; -class ASTBreak; -class ASTDoWhile; -class ASTGoto; -class ASTIfElse; -class ASTIfThen; -class ASTLabel; -class ASTProgram; -class ASTReturn; -class ASTVarSet; - -using ASTData = std::variant; - -using ASTNode = std::shared_ptr; - -enum class ASTZipperType : u32 { - Program, - IfThen, - IfElse, - Loop, -}; - -class ASTZipper final { -public: - explicit ASTZipper(); - - void Init(ASTNode first, ASTNode parent); - - ASTNode GetFirst() const { - return first; - } - - ASTNode GetLast() const { - return last; - } - - void PushBack(ASTNode new_node); - void PushFront(ASTNode new_node); - void InsertAfter(ASTNode new_node, ASTNode at_node); - void InsertBefore(ASTNode new_node, ASTNode at_node); - void DetachTail(ASTNode node); - void DetachSingle(ASTNode node); - void DetachSegment(ASTNode start, ASTNode end); - void Remove(ASTNode node); - - ASTNode first; - ASTNode last; -}; - -class ASTProgram { -public: - ASTZipper nodes{}; -}; - -class ASTIfThen { -public: - explicit ASTIfThen(Expr condition_) : condition{std::move(condition_)} {} - Expr condition; - ASTZipper nodes{}; -}; - -class ASTIfElse { -public: - ASTZipper nodes{}; -}; - -class ASTBlockEncoded { -public: - explicit ASTBlockEncoded(u32 start_, u32 _) : start{start_}, end{_} {} - u32 start; - u32 end; -}; - -class ASTBlockDecoded { -public: - explicit ASTBlockDecoded(NodeBlock&& new_nodes_) : nodes(std::move(new_nodes_)) {} - NodeBlock nodes; -}; - -class ASTVarSet { -public: - explicit ASTVarSet(u32 index_, Expr condition_) - : index{index_}, condition{std::move(condition_)} {} - - u32 index; - Expr condition; -}; - -class ASTLabel { -public: - explicit ASTLabel(u32 index_) : index{index_} {} - u32 index; - bool unused{}; -}; - -class ASTGoto { -public: - explicit ASTGoto(Expr condition_, u32 label_) - : condition{std::move(condition_)}, label{label_} {} - - Expr condition; - u32 label; -}; - -class ASTDoWhile { -public: - explicit ASTDoWhile(Expr condition_) : condition{std::move(condition_)} {} - Expr condition; - ASTZipper nodes{}; -}; - -class ASTReturn { -public: - explicit ASTReturn(Expr condition_, bool kills_) - : condition{std::move(condition_)}, kills{kills_} {} - - Expr condition; - bool kills; -}; - -class ASTBreak { -public: - explicit ASTBreak(Expr condition_) : condition{std::move(condition_)} {} - Expr condition; -}; - -class ASTBase { -public: - explicit ASTBase(ASTNode parent_, ASTData data_) - : data{std::move(data_)}, parent{std::move(parent_)} {} - - template - static ASTNode Make(ASTNode parent, Args&&... args) { - return std::make_shared(std::move(parent), - ASTData(U(std::forward(args)...))); - } - - void SetParent(ASTNode new_parent) { - parent = std::move(new_parent); - } - - ASTNode& GetParent() { - return parent; - } - - const ASTNode& GetParent() const { - return parent; - } - - u32 GetLevel() const { - u32 level = 0; - auto next_parent = parent; - while (next_parent) { - next_parent = next_parent->GetParent(); - level++; - } - return level; - } - - ASTData* GetInnerData() { - return &data; - } - - const ASTData* GetInnerData() const { - return &data; - } - - ASTNode GetNext() const { - return next; - } - - ASTNode GetPrevious() const { - return previous; - } - - ASTZipper& GetManager() { - return *manager; - } - - const ASTZipper& GetManager() const { - return *manager; - } - - std::optional GetGotoLabel() const { - if (const auto* inner = std::get_if(&data)) { - return {inner->label}; - } - return std::nullopt; - } - - Expr GetGotoCondition() const { - if (const auto* inner = std::get_if(&data)) { - return inner->condition; - } - return nullptr; - } - - void MarkLabelUnused() { - if (auto* inner = std::get_if(&data)) { - inner->unused = true; - } - } - - bool IsLabelUnused() const { - if (const auto* inner = std::get_if(&data)) { - return inner->unused; - } - return true; - } - - std::optional GetLabelIndex() const { - if (const auto* inner = std::get_if(&data)) { - return {inner->index}; - } - return std::nullopt; - } - - Expr GetIfCondition() const { - if (const auto* inner = std::get_if(&data)) { - return inner->condition; - } - return nullptr; - } - - void SetGotoCondition(Expr new_condition) { - if (auto* inner = std::get_if(&data)) { - inner->condition = std::move(new_condition); - } - } - - bool IsIfThen() const { - return std::holds_alternative(data); - } - - bool IsIfElse() const { - return std::holds_alternative(data); - } - - bool IsBlockEncoded() const { - return std::holds_alternative(data); - } - - void TransformBlockEncoded(NodeBlock&& nodes) { - data = ASTBlockDecoded(std::move(nodes)); - } - - bool IsLoop() const { - return std::holds_alternative(data); - } - - ASTZipper* GetSubNodes() { - if (std::holds_alternative(data)) { - return &std::get_if(&data)->nodes; - } - if (std::holds_alternative(data)) { - return &std::get_if(&data)->nodes; - } - if (std::holds_alternative(data)) { - return &std::get_if(&data)->nodes; - } - if (std::holds_alternative(data)) { - return &std::get_if(&data)->nodes; - } - return nullptr; - } - - void Clear() { - next.reset(); - previous.reset(); - parent.reset(); - manager = nullptr; - } - -private: - friend class ASTZipper; - - ASTData data; - ASTNode parent; - ASTNode next; - ASTNode previous; - ASTZipper* manager{}; -}; - -class ASTManager final { -public: - explicit ASTManager(bool do_full_decompile, bool disable_else_derivation_); - ~ASTManager(); - - ASTManager(const ASTManager& o) = delete; - ASTManager& operator=(const ASTManager& other) = delete; - - ASTManager(ASTManager&& other) noexcept = default; - ASTManager& operator=(ASTManager&& other) noexcept = default; - - void Init(); - - void DeclareLabel(u32 address); - - void InsertLabel(u32 address); - - void InsertGoto(Expr condition, u32 address); - - void InsertBlock(u32 start_address, u32 end_address); - - void InsertReturn(Expr condition, bool kills); - - std::string Print() const; - - void Decompile(); - - void ShowCurrentState(std::string_view state) const; - - void SanityCheck() const; - - void Clear(); - - bool IsFullyDecompiled() const { - if (full_decompile) { - return gotos.empty(); - } - - for (ASTNode goto_node : gotos) { - auto label_index = goto_node->GetGotoLabel(); - if (!label_index) { - return false; - } - ASTNode glabel = labels[*label_index]; - if (IsBackwardsJump(goto_node, glabel)) { - return false; - } - } - return true; - } - - ASTNode GetProgram() const { - return main_node; - } - - u32 GetVariables() const { - return variables; - } - - const std::vector& GetLabels() const { - return labels; - } - -private: - bool IsBackwardsJump(ASTNode goto_node, ASTNode label_node) const; - - bool IndirectlyRelated(const ASTNode& first, const ASTNode& second) const; - - bool DirectlyRelated(const ASTNode& first, const ASTNode& second) const; - - void EncloseDoWhile(ASTNode goto_node, ASTNode label); - - void EncloseIfThen(ASTNode goto_node, ASTNode label); - - void MoveOutward(ASTNode goto_node); - - u32 NewVariable() { - return variables++; - } - - bool full_decompile{}; - bool disable_else_derivation{}; - std::unordered_map labels_map{}; - u32 labels_count{}; - std::vector labels{}; - std::list gotos{}; - u32 variables{}; - ASTProgram* program{}; - ASTNode main_node{}; - Expr false_condition{}; -}; - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp deleted file mode 100644 index 02adcf9c7..000000000 --- a/src/video_core/shader/async_shaders.cpp +++ /dev/null @@ -1,234 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include -#include -#include "video_core/engines/maxwell_3d.h" -#include "video_core/renderer_base.h" -#include "video_core/renderer_opengl/gl_shader_cache.h" -#include "video_core/shader/async_shaders.h" - -namespace VideoCommon::Shader { - -AsyncShaders::AsyncShaders(Core::Frontend::EmuWindow& emu_window_) : emu_window(emu_window_) {} - -AsyncShaders::~AsyncShaders() { - KillWorkers(); -} - -void AsyncShaders::AllocateWorkers() { - // Use at least one thread - u32 num_workers = 1; - - // Deduce how many more threads we can use - const u32 thread_count = std::thread::hardware_concurrency(); - if (thread_count >= 8) { - // Increase async workers by 1 for every 2 threads >= 8 - num_workers += 1 + (thread_count - 8) / 2; - } - - // If we already have workers queued, ignore - if (num_workers == worker_threads.size()) { - return; - } - - // If workers already exist, clear them - if (!worker_threads.empty()) { - FreeWorkers(); - } - - // Create workers - for (std::size_t i = 0; i < num_workers; i++) { - context_list.push_back(emu_window.CreateSharedContext()); - worker_threads.emplace_back(&AsyncShaders::ShaderCompilerThread, this, - context_list[i].get()); - } -} - -void AsyncShaders::FreeWorkers() { - // Mark all threads to quit - is_thread_exiting.store(true); - cv.notify_all(); - for (auto& thread : worker_threads) { - thread.join(); - } - // Clear our shared contexts - context_list.clear(); - - // Clear our worker threads - worker_threads.clear(); -} - -void AsyncShaders::KillWorkers() { - is_thread_exiting.store(true); - cv.notify_all(); - for (auto& thread : worker_threads) { - thread.detach(); - } - // Clear our shared contexts - context_list.clear(); - - // Clear our worker threads - worker_threads.clear(); -} - -bool AsyncShaders::HasWorkQueued() const { - return !pending_queue.empty(); -} - -bool AsyncShaders::HasCompletedWork() const { - std::shared_lock lock{completed_mutex}; - return !finished_work.empty(); -} - -bool AsyncShaders::IsShaderAsync(const Tegra::GPU& gpu) const { - const auto& regs = gpu.Maxwell3D().regs; - - // If something is using depth, we can assume that games are not rendering anything which will - // be used one time. - if (regs.zeta_enable) { - return true; - } - - // If games are using a small index count, we can assume these are full screen quads. Usually - // these shaders are only used once for building textures so we can assume they can't be built - // async - if (regs.index_array.count <= 6 || regs.vertex_buffer.count <= 6) { - return false; - } - - return true; -} - -std::vector AsyncShaders::GetCompletedWork() { - std::vector results; - { - std::unique_lock lock{completed_mutex}; - results = std::move(finished_work); - finished_work.clear(); - } - return results; -} - -void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device, - Tegra::Engines::ShaderType shader_type, u64 uid, - std::vector code, std::vector code_b, - u32 main_offset, CompilerSettings compiler_settings, - const Registry& registry, VAddr cpu_addr) { - std::unique_lock lock(queue_mutex); - pending_queue.push({ - .backend = device.UseAssemblyShaders() ? Backend::GLASM : Backend::OpenGL, - .device = &device, - .shader_type = shader_type, - .uid = uid, - .code = std::move(code), - .code_b = std::move(code_b), - .main_offset = main_offset, - .compiler_settings = compiler_settings, - .registry = registry, - .cpu_address = cpu_addr, - .pp_cache = nullptr, - .vk_device = nullptr, - .scheduler = nullptr, - .descriptor_pool = nullptr, - .update_descriptor_queue = nullptr, - .bindings{}, - .program{}, - .key{}, - .num_color_buffers = 0, - }); - cv.notify_one(); -} - -void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, - const Vulkan::Device& device, Vulkan::VKScheduler& scheduler, - Vulkan::VKDescriptorPool& descriptor_pool, - Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue, - std::vector bindings, - Vulkan::SPIRVProgram program, - Vulkan::GraphicsPipelineCacheKey key, u32 num_color_buffers) { - std::unique_lock lock(queue_mutex); - pending_queue.push({ - .backend = Backend::Vulkan, - .device = nullptr, - .shader_type{}, - .uid = 0, - .code{}, - .code_b{}, - .main_offset = 0, - .compiler_settings{}, - .registry{}, - .cpu_address = 0, - .pp_cache = pp_cache, - .vk_device = &device, - .scheduler = &scheduler, - .descriptor_pool = &descriptor_pool, - .update_descriptor_queue = &update_descriptor_queue, - .bindings = std::move(bindings), - .program = std::move(program), - .key = key, - .num_color_buffers = num_color_buffers, - }); - cv.notify_one(); -} - -void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context) { - while (!is_thread_exiting.load(std::memory_order_relaxed)) { - std::unique_lock lock{queue_mutex}; - cv.wait(lock, [this] { return HasWorkQueued() || is_thread_exiting; }); - if (is_thread_exiting) { - return; - } - - // Partial lock to allow all threads to read at the same time - if (!HasWorkQueued()) { - continue; - } - // Another thread beat us, just unlock and wait for the next load - if (pending_queue.empty()) { - continue; - } - - // Pull work from queue - WorkerParams work = std::move(pending_queue.front()); - pending_queue.pop(); - lock.unlock(); - - if (work.backend == Backend::OpenGL || work.backend == Backend::GLASM) { - const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, *work.registry); - const auto scope = context->Acquire(); - auto program = - OpenGL::BuildShader(*work.device, work.shader_type, work.uid, ir, *work.registry); - Result result{}; - result.backend = work.backend; - result.cpu_address = work.cpu_address; - result.uid = work.uid; - result.code = std::move(work.code); - result.code_b = std::move(work.code_b); - result.shader_type = work.shader_type; - - if (work.backend == Backend::OpenGL) { - result.program.opengl = std::move(program->source_program); - } else if (work.backend == Backend::GLASM) { - result.program.glasm = std::move(program->assembly_program); - } - - { - std::unique_lock complete_lock(completed_mutex); - finished_work.push_back(std::move(result)); - } - } else if (work.backend == Backend::Vulkan) { - auto pipeline = std::make_unique( - *work.vk_device, *work.scheduler, *work.descriptor_pool, - *work.update_descriptor_queue, work.key, work.bindings, work.program, - work.num_color_buffers); - - work.pp_cache->EmplacePipeline(std::move(pipeline)); - } - } -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h deleted file mode 100644 index 7fdff6e56..000000000 --- a/src/video_core/shader/async_shaders.h +++ /dev/null @@ -1,138 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include - -#include - -#include "common/common_types.h" -#include "video_core/renderer_opengl/gl_device.h" -#include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_shader_decompiler.h" -#include "video_core/renderer_vulkan/vk_pipeline_cache.h" -#include "video_core/renderer_vulkan/vk_scheduler.h" -#include "video_core/vulkan_common/vulkan_device.h" - -namespace Core::Frontend { -class EmuWindow; -class GraphicsContext; -} // namespace Core::Frontend - -namespace Tegra { -class GPU; -} - -namespace Vulkan { -class VKPipelineCache; -} - -namespace VideoCommon::Shader { - -class AsyncShaders { -public: - enum class Backend { - OpenGL, - GLASM, - Vulkan, - }; - - struct ResultPrograms { - OpenGL::OGLProgram opengl; - OpenGL::OGLAssemblyProgram glasm; - }; - - struct Result { - u64 uid; - VAddr cpu_address; - Backend backend; - ResultPrograms program; - std::vector code; - std::vector code_b; - Tegra::Engines::ShaderType shader_type; - }; - - explicit AsyncShaders(Core::Frontend::EmuWindow& emu_window_); - ~AsyncShaders(); - - /// Start up shader worker threads - void AllocateWorkers(); - - /// Clear the shader queue and kill all worker threads - void FreeWorkers(); - - // Force end all threads - void KillWorkers(); - - /// Check to see if any shaders have actually been compiled - [[nodiscard]] bool HasCompletedWork() const; - - /// Deduce if a shader can be build on another thread of MUST be built in sync. We cannot build - /// every shader async as some shaders are only built and executed once. We try to "guess" which - /// shader would be used only once - [[nodiscard]] bool IsShaderAsync(const Tegra::GPU& gpu) const; - - /// Pulls completed compiled shaders - [[nodiscard]] std::vector GetCompletedWork(); - - void QueueOpenGLShader(const OpenGL::Device& device, Tegra::Engines::ShaderType shader_type, - u64 uid, std::vector code, std::vector code_b, u32 main_offset, - CompilerSettings compiler_settings, const Registry& registry, - VAddr cpu_addr); - - void QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, const Vulkan::Device& device, - Vulkan::VKScheduler& scheduler, - Vulkan::VKDescriptorPool& descriptor_pool, - Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue, - std::vector bindings, - Vulkan::SPIRVProgram program, Vulkan::GraphicsPipelineCacheKey key, - u32 num_color_buffers); - -private: - void ShaderCompilerThread(Core::Frontend::GraphicsContext* context); - - /// Check our worker queue to see if we have any work queued already - [[nodiscard]] bool HasWorkQueued() const; - - struct WorkerParams { - Backend backend; - // For OGL - const OpenGL::Device* device; - Tegra::Engines::ShaderType shader_type; - u64 uid; - std::vector code; - std::vector code_b; - u32 main_offset; - CompilerSettings compiler_settings; - std::optional registry; - VAddr cpu_address; - - // For Vulkan - Vulkan::VKPipelineCache* pp_cache; - const Vulkan::Device* vk_device; - Vulkan::VKScheduler* scheduler; - Vulkan::VKDescriptorPool* descriptor_pool; - Vulkan::VKUpdateDescriptorQueue* update_descriptor_queue; - std::vector bindings; - Vulkan::SPIRVProgram program; - Vulkan::GraphicsPipelineCacheKey key; - u32 num_color_buffers; - }; - - std::condition_variable cv; - mutable std::mutex queue_mutex; - mutable std::shared_mutex completed_mutex; - std::atomic is_thread_exiting{}; - std::vector> context_list; - std::vector worker_threads; - std::queue pending_queue; - std::vector finished_work; - Core::Frontend::EmuWindow& emu_window; -}; - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/compiler_settings.cpp b/src/video_core/shader/compiler_settings.cpp deleted file mode 100644 index cddcbd4f0..000000000 --- a/src/video_core/shader/compiler_settings.cpp +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "video_core/shader/compiler_settings.h" - -namespace VideoCommon::Shader { - -std::string CompileDepthAsString(const CompileDepth cd) { - switch (cd) { - case CompileDepth::BruteForce: - return "Brute Force Compile"; - case CompileDepth::FlowStack: - return "Simple Flow Stack Mode"; - case CompileDepth::NoFlowStack: - return "Remove Flow Stack"; - case CompileDepth::DecompileBackwards: - return "Decompile Backward Jumps"; - case CompileDepth::FullDecompile: - return "Full Decompilation"; - default: - return "Unknown Compiler Process"; - } -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/compiler_settings.h b/src/video_core/shader/compiler_settings.h deleted file mode 100644 index 916018c01..000000000 --- a/src/video_core/shader/compiler_settings.h +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include "video_core/engines/shader_bytecode.h" - -namespace VideoCommon::Shader { - -enum class CompileDepth : u32 { - BruteForce = 0, - FlowStack = 1, - NoFlowStack = 2, - DecompileBackwards = 3, - FullDecompile = 4, -}; - -std::string CompileDepthAsString(CompileDepth cd); - -struct CompilerSettings { - CompileDepth depth{CompileDepth::NoFlowStack}; - bool disable_else_derivation{true}; -}; - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp deleted file mode 100644 index 43d965f2f..000000000 --- a/src/video_core/shader/control_flow.cpp +++ /dev/null @@ -1,751 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include -#include -#include -#include - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/shader/ast.h" -#include "video_core/shader/control_flow.h" -#include "video_core/shader/memory_util.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -namespace { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -constexpr s32 unassigned_branch = -2; - -struct Query { - u32 address{}; - std::stack ssy_stack{}; - std::stack pbk_stack{}; -}; - -struct BlockStack { - BlockStack() = default; - explicit BlockStack(const Query& q) : ssy_stack{q.ssy_stack}, pbk_stack{q.pbk_stack} {} - std::stack ssy_stack{}; - std::stack pbk_stack{}; -}; - -template -BlockBranchInfo MakeBranchInfo(Args&&... args) { - static_assert(std::is_convertible_v); - return std::make_shared(T(std::forward(args)...)); -} - -bool BlockBranchIsIgnored(BlockBranchInfo first) { - bool ignore = false; - if (std::holds_alternative(*first)) { - const auto branch = std::get_if(first.get()); - ignore = branch->ignore; - } - return ignore; -} - -struct BlockInfo { - u32 start{}; - u32 end{}; - bool visited{}; - BlockBranchInfo branch{}; - - bool IsInside(const u32 address) const { - return start <= address && address <= end; - } -}; - -struct CFGRebuildState { - explicit CFGRebuildState(const ProgramCode& program_code_, u32 start_, Registry& registry_) - : program_code{program_code_}, registry{registry_}, start{start_} {} - - const ProgramCode& program_code; - Registry& registry; - u32 start{}; - std::vector block_info; - std::list inspect_queries; - std::list queries; - std::unordered_map registered; - std::set labels; - std::map ssy_labels; - std::map pbk_labels; - std::unordered_map stacks; - ASTManager* manager{}; -}; - -enum class BlockCollision : u32 { None, Found, Inside }; - -std::pair TryGetBlock(CFGRebuildState& state, u32 address) { - const auto& blocks = state.block_info; - for (u32 index = 0; index < blocks.size(); index++) { - if (blocks[index].start == address) { - return {BlockCollision::Found, index}; - } - if (blocks[index].IsInside(address)) { - return {BlockCollision::Inside, index}; - } - } - return {BlockCollision::None, 0xFFFFFFFF}; -} - -struct ParseInfo { - BlockBranchInfo branch_info{}; - u32 end_address{}; -}; - -BlockInfo& CreateBlockInfo(CFGRebuildState& state, u32 start, u32 end) { - auto& it = state.block_info.emplace_back(); - it.start = start; - it.end = end; - const u32 index = static_cast(state.block_info.size() - 1); - state.registered.insert({start, index}); - return it; -} - -Pred GetPredicate(u32 index, bool negated) { - return static_cast(static_cast(index) + (negated ? 8ULL : 0ULL)); -} - -enum class ParseResult : u32 { - ControlCaught, - BlockEnd, - AbnormalFlow, -}; - -struct BranchIndirectInfo { - u32 buffer{}; - u32 offset{}; - u32 entries{}; - s32 relative_position{}; -}; - -struct BufferInfo { - u32 index; - u32 offset; -}; - -std::optional> GetBRXInfo(const CFGRebuildState& state, u32& pos) { - const Instruction instr = state.program_code[pos]; - const auto opcode = OpCode::Decode(instr); - if (opcode->get().GetId() != OpCode::Id::BRX) { - return std::nullopt; - } - if (instr.brx.constant_buffer != 0) { - return std::nullopt; - } - --pos; - return std::make_pair(instr.brx.GetBranchExtend(), instr.gpr8.Value()); -} - -template -// requires std::predicate -// requires std::invocable -std::optional TrackInstruction(const CFGRebuildState& state, u32& pos, TestCallable test, - PackCallable pack) { - for (; pos >= state.start; --pos) { - if (IsSchedInstruction(pos, state.start)) { - continue; - } - const Instruction instr = state.program_code[pos]; - const auto opcode = OpCode::Decode(instr); - if (!opcode) { - continue; - } - if (test(instr, opcode->get())) { - --pos; - return std::make_optional(pack(instr, opcode->get())); - } - } - return std::nullopt; -} - -std::optional> TrackLDC(const CFGRebuildState& state, u32& pos, - u64 brx_tracked_register) { - return TrackInstruction>( - state, pos, - [brx_tracked_register](auto instr, const auto& opcode) { - return opcode.GetId() == OpCode::Id::LD_C && - instr.gpr0.Value() == brx_tracked_register && - instr.ld_c.type.Value() == Tegra::Shader::UniformType::Single; - }, - [](auto instr, const auto& opcode) { - const BufferInfo info = {static_cast(instr.cbuf36.index.Value()), - static_cast(instr.cbuf36.GetOffset())}; - return std::make_pair(info, instr.gpr8.Value()); - }); -} - -std::optional TrackSHLRegister(const CFGRebuildState& state, u32& pos, - u64 ldc_tracked_register) { - return TrackInstruction( - state, pos, - [ldc_tracked_register](auto instr, const auto& opcode) { - return opcode.GetId() == OpCode::Id::SHL_IMM && - instr.gpr0.Value() == ldc_tracked_register; - }, - [](auto instr, const auto&) { return instr.gpr8.Value(); }); -} - -std::optional TrackIMNMXValue(const CFGRebuildState& state, u32& pos, - u64 shl_tracked_register) { - return TrackInstruction( - state, pos, - [shl_tracked_register](auto instr, const auto& opcode) { - return opcode.GetId() == OpCode::Id::IMNMX_IMM && - instr.gpr0.Value() == shl_tracked_register; - }, - [](auto instr, const auto&) { - return static_cast(instr.alu.GetSignedImm20_20() + 1); - }); -} - -std::optional TrackBranchIndirectInfo(const CFGRebuildState& state, u32 pos) { - const auto brx_info = GetBRXInfo(state, pos); - if (!brx_info) { - return std::nullopt; - } - const auto [relative_position, brx_tracked_register] = *brx_info; - - const auto ldc_info = TrackLDC(state, pos, brx_tracked_register); - if (!ldc_info) { - return std::nullopt; - } - const auto [buffer_info, ldc_tracked_register] = *ldc_info; - - const auto shl_tracked_register = TrackSHLRegister(state, pos, ldc_tracked_register); - if (!shl_tracked_register) { - return std::nullopt; - } - - const auto entries = TrackIMNMXValue(state, pos, *shl_tracked_register); - if (!entries) { - return std::nullopt; - } - - return BranchIndirectInfo{buffer_info.index, buffer_info.offset, *entries, relative_position}; -} - -std::pair ParseCode(CFGRebuildState& state, u32 address) { - u32 offset = static_cast(address); - const u32 end_address = static_cast(state.program_code.size()); - ParseInfo parse_info{}; - SingleBranch single_branch{}; - - const auto insert_label = [](CFGRebuildState& rebuild_state, u32 label_address) { - const auto pair = rebuild_state.labels.emplace(label_address); - if (pair.second) { - rebuild_state.inspect_queries.push_back(label_address); - } - }; - - while (true) { - if (offset >= end_address) { - // ASSERT_OR_EXECUTE can't be used, as it ignores the break - ASSERT_MSG(false, "Shader passed the current limit!"); - - single_branch.address = exit_branch; - single_branch.ignore = false; - break; - } - if (state.registered.contains(offset)) { - single_branch.address = offset; - single_branch.ignore = true; - break; - } - if (IsSchedInstruction(offset, state.start)) { - offset++; - continue; - } - const Instruction instr = {state.program_code[offset]}; - const auto opcode = OpCode::Decode(instr); - if (!opcode || opcode->get().GetType() != OpCode::Type::Flow) { - offset++; - continue; - } - - switch (opcode->get().GetId()) { - case OpCode::Id::EXIT: { - const auto pred_index = static_cast(instr.pred.pred_index); - single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); - if (single_branch.condition.predicate == Pred::NeverExecute) { - offset++; - continue; - } - const ConditionCode cc = instr.flow_condition_code; - single_branch.condition.cc = cc; - if (cc == ConditionCode::F) { - offset++; - continue; - } - single_branch.address = exit_branch; - single_branch.kill = false; - single_branch.is_sync = false; - single_branch.is_brk = false; - single_branch.ignore = false; - parse_info.end_address = offset; - parse_info.branch_info = MakeBranchInfo( - single_branch.condition, single_branch.address, single_branch.kill, - single_branch.is_sync, single_branch.is_brk, single_branch.ignore); - - return {ParseResult::ControlCaught, parse_info}; - } - case OpCode::Id::BRA: { - if (instr.bra.constant_buffer != 0) { - return {ParseResult::AbnormalFlow, parse_info}; - } - const auto pred_index = static_cast(instr.pred.pred_index); - single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); - if (single_branch.condition.predicate == Pred::NeverExecute) { - offset++; - continue; - } - const ConditionCode cc = instr.flow_condition_code; - single_branch.condition.cc = cc; - if (cc == ConditionCode::F) { - offset++; - continue; - } - const u32 branch_offset = offset + instr.bra.GetBranchTarget(); - if (branch_offset == 0) { - single_branch.address = exit_branch; - } else { - single_branch.address = branch_offset; - } - insert_label(state, branch_offset); - single_branch.kill = false; - single_branch.is_sync = false; - single_branch.is_brk = false; - single_branch.ignore = false; - parse_info.end_address = offset; - parse_info.branch_info = MakeBranchInfo( - single_branch.condition, single_branch.address, single_branch.kill, - single_branch.is_sync, single_branch.is_brk, single_branch.ignore); - - return {ParseResult::ControlCaught, parse_info}; - } - case OpCode::Id::SYNC: { - const auto pred_index = static_cast(instr.pred.pred_index); - single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); - if (single_branch.condition.predicate == Pred::NeverExecute) { - offset++; - continue; - } - const ConditionCode cc = instr.flow_condition_code; - single_branch.condition.cc = cc; - if (cc == ConditionCode::F) { - offset++; - continue; - } - single_branch.address = unassigned_branch; - single_branch.kill = false; - single_branch.is_sync = true; - single_branch.is_brk = false; - single_branch.ignore = false; - parse_info.end_address = offset; - parse_info.branch_info = MakeBranchInfo( - single_branch.condition, single_branch.address, single_branch.kill, - single_branch.is_sync, single_branch.is_brk, single_branch.ignore); - - return {ParseResult::ControlCaught, parse_info}; - } - case OpCode::Id::BRK: { - const auto pred_index = static_cast(instr.pred.pred_index); - single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); - if (single_branch.condition.predicate == Pred::NeverExecute) { - offset++; - continue; - } - const ConditionCode cc = instr.flow_condition_code; - single_branch.condition.cc = cc; - if (cc == ConditionCode::F) { - offset++; - continue; - } - single_branch.address = unassigned_branch; - single_branch.kill = false; - single_branch.is_sync = false; - single_branch.is_brk = true; - single_branch.ignore = false; - parse_info.end_address = offset; - parse_info.branch_info = MakeBranchInfo( - single_branch.condition, single_branch.address, single_branch.kill, - single_branch.is_sync, single_branch.is_brk, single_branch.ignore); - - return {ParseResult::ControlCaught, parse_info}; - } - case OpCode::Id::KIL: { - const auto pred_index = static_cast(instr.pred.pred_index); - single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); - if (single_branch.condition.predicate == Pred::NeverExecute) { - offset++; - continue; - } - const ConditionCode cc = instr.flow_condition_code; - single_branch.condition.cc = cc; - if (cc == ConditionCode::F) { - offset++; - continue; - } - single_branch.address = exit_branch; - single_branch.kill = true; - single_branch.is_sync = false; - single_branch.is_brk = false; - single_branch.ignore = false; - parse_info.end_address = offset; - parse_info.branch_info = MakeBranchInfo( - single_branch.condition, single_branch.address, single_branch.kill, - single_branch.is_sync, single_branch.is_brk, single_branch.ignore); - - return {ParseResult::ControlCaught, parse_info}; - } - case OpCode::Id::SSY: { - const u32 target = offset + instr.bra.GetBranchTarget(); - insert_label(state, target); - state.ssy_labels.emplace(offset, target); - break; - } - case OpCode::Id::PBK: { - const u32 target = offset + instr.bra.GetBranchTarget(); - insert_label(state, target); - state.pbk_labels.emplace(offset, target); - break; - } - case OpCode::Id::BRX: { - const auto tmp = TrackBranchIndirectInfo(state, offset); - if (!tmp) { - LOG_WARNING(HW_GPU, "BRX Track Unsuccesful"); - return {ParseResult::AbnormalFlow, parse_info}; - } - - const auto result = *tmp; - const s32 pc_target = offset + result.relative_position; - std::vector branches; - for (u32 i = 0; i < result.entries; i++) { - auto key = state.registry.ObtainKey(result.buffer, result.offset + i * 4); - if (!key) { - return {ParseResult::AbnormalFlow, parse_info}; - } - u32 value = *key; - u32 target = static_cast((value >> 3) + pc_target); - insert_label(state, target); - branches.emplace_back(value, target); - } - parse_info.end_address = offset; - parse_info.branch_info = MakeBranchInfo( - static_cast(instr.gpr8.Value()), std::move(branches)); - - return {ParseResult::ControlCaught, parse_info}; - } - default: - break; - } - - offset++; - } - single_branch.kill = false; - single_branch.is_sync = false; - single_branch.is_brk = false; - parse_info.end_address = offset - 1; - parse_info.branch_info = MakeBranchInfo( - single_branch.condition, single_branch.address, single_branch.kill, single_branch.is_sync, - single_branch.is_brk, single_branch.ignore); - return {ParseResult::BlockEnd, parse_info}; -} - -bool TryInspectAddress(CFGRebuildState& state) { - if (state.inspect_queries.empty()) { - return false; - } - - const u32 address = state.inspect_queries.front(); - state.inspect_queries.pop_front(); - const auto [result, block_index] = TryGetBlock(state, address); - switch (result) { - case BlockCollision::Found: { - return true; - } - case BlockCollision::Inside: { - // This case is the tricky one: - // We need to split the block into 2 separate blocks - const u32 end = state.block_info[block_index].end; - BlockInfo& new_block = CreateBlockInfo(state, address, end); - BlockInfo& current_block = state.block_info[block_index]; - current_block.end = address - 1; - new_block.branch = std::move(current_block.branch); - BlockBranchInfo forward_branch = MakeBranchInfo(); - const auto branch = std::get_if(forward_branch.get()); - branch->address = address; - branch->ignore = true; - current_block.branch = std::move(forward_branch); - return true; - } - default: - break; - } - const auto [parse_result, parse_info] = ParseCode(state, address); - if (parse_result == ParseResult::AbnormalFlow) { - // if it's AbnormalFlow, we end it as false, ending the CFG reconstruction - return false; - } - - BlockInfo& block_info = CreateBlockInfo(state, address, parse_info.end_address); - block_info.branch = parse_info.branch_info; - if (std::holds_alternative(*block_info.branch)) { - const auto branch = std::get_if(block_info.branch.get()); - if (branch->condition.IsUnconditional()) { - return true; - } - const u32 fallthrough_address = parse_info.end_address + 1; - state.inspect_queries.push_front(fallthrough_address); - return true; - } - return true; -} - -bool TryQuery(CFGRebuildState& state) { - const auto gather_labels = [](std::stack& cc, std::map& labels, - BlockInfo& block) { - auto gather_start = labels.lower_bound(block.start); - const auto gather_end = labels.upper_bound(block.end); - while (gather_start != gather_end) { - cc.push(gather_start->second); - ++gather_start; - } - }; - if (state.queries.empty()) { - return false; - } - - Query& q = state.queries.front(); - const u32 block_index = state.registered[q.address]; - BlockInfo& block = state.block_info[block_index]; - // If the block is visited, check if the stacks match, else gather the ssy/pbk - // labels into the current stack and look if the branch at the end of the block - // consumes a label. Schedule new queries accordingly - if (block.visited) { - BlockStack& stack = state.stacks[q.address]; - const bool all_okay = (stack.ssy_stack.empty() || q.ssy_stack == stack.ssy_stack) && - (stack.pbk_stack.empty() || q.pbk_stack == stack.pbk_stack); - state.queries.pop_front(); - return all_okay; - } - block.visited = true; - state.stacks.insert_or_assign(q.address, BlockStack{q}); - - Query q2(q); - state.queries.pop_front(); - gather_labels(q2.ssy_stack, state.ssy_labels, block); - gather_labels(q2.pbk_stack, state.pbk_labels, block); - if (std::holds_alternative(*block.branch)) { - auto* branch = std::get_if(block.branch.get()); - if (!branch->condition.IsUnconditional()) { - q2.address = block.end + 1; - state.queries.push_back(q2); - } - - auto& conditional_query = state.queries.emplace_back(q2); - if (branch->is_sync) { - if (branch->address == unassigned_branch) { - branch->address = conditional_query.ssy_stack.top(); - } - conditional_query.ssy_stack.pop(); - } - if (branch->is_brk) { - if (branch->address == unassigned_branch) { - branch->address = conditional_query.pbk_stack.top(); - } - conditional_query.pbk_stack.pop(); - } - conditional_query.address = branch->address; - return true; - } - - const auto* multi_branch = std::get_if(block.branch.get()); - for (const auto& branch_case : multi_branch->branches) { - auto& conditional_query = state.queries.emplace_back(q2); - conditional_query.address = branch_case.address; - } - - return true; -} - -void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) { - const auto get_expr = [](const Condition& cond) -> Expr { - Expr result; - if (cond.cc != ConditionCode::T) { - result = MakeExpr(cond.cc); - } - if (cond.predicate != Pred::UnusedIndex) { - u32 pred = static_cast(cond.predicate); - bool negate = false; - if (pred > 7) { - negate = true; - pred -= 8; - } - Expr extra = MakeExpr(pred); - if (negate) { - extra = MakeExpr(std::move(extra)); - } - if (result) { - return MakeExpr(std::move(extra), std::move(result)); - } - return extra; - } - if (result) { - return result; - } - return MakeExpr(true); - }; - - if (std::holds_alternative(*branch_info)) { - const auto* branch = std::get_if(branch_info.get()); - if (branch->address < 0) { - if (branch->kill) { - mm.InsertReturn(get_expr(branch->condition), true); - return; - } - mm.InsertReturn(get_expr(branch->condition), false); - return; - } - mm.InsertGoto(get_expr(branch->condition), branch->address); - return; - } - const auto* multi_branch = std::get_if(branch_info.get()); - for (const auto& branch_case : multi_branch->branches) { - mm.InsertGoto(MakeExpr(multi_branch->gpr, branch_case.cmp_value), - branch_case.address); - } -} - -void DecompileShader(CFGRebuildState& state) { - state.manager->Init(); - for (auto label : state.labels) { - state.manager->DeclareLabel(label); - } - for (const auto& block : state.block_info) { - if (state.labels.contains(block.start)) { - state.manager->InsertLabel(block.start); - } - const bool ignore = BlockBranchIsIgnored(block.branch); - const u32 end = ignore ? block.end + 1 : block.end; - state.manager->InsertBlock(block.start, end); - if (!ignore) { - InsertBranch(*state.manager, block.branch); - } - } - state.manager->Decompile(); -} - -} // Anonymous namespace - -std::unique_ptr ScanFlow(const ProgramCode& program_code, u32 start_address, - const CompilerSettings& settings, - Registry& registry) { - auto result_out = std::make_unique(); - if (settings.depth == CompileDepth::BruteForce) { - result_out->settings.depth = CompileDepth::BruteForce; - return result_out; - } - - CFGRebuildState state{program_code, start_address, registry}; - // Inspect Code and generate blocks - state.labels.clear(); - state.labels.emplace(start_address); - state.inspect_queries.push_back(state.start); - while (!state.inspect_queries.empty()) { - if (!TryInspectAddress(state)) { - result_out->settings.depth = CompileDepth::BruteForce; - return result_out; - } - } - - bool use_flow_stack = true; - - bool decompiled = false; - - if (settings.depth != CompileDepth::FlowStack) { - // Decompile Stacks - state.queries.push_back(Query{state.start, {}, {}}); - decompiled = true; - while (!state.queries.empty()) { - if (!TryQuery(state)) { - decompiled = false; - break; - } - } - } - - use_flow_stack = !decompiled; - - // Sort and organize results - std::sort(state.block_info.begin(), state.block_info.end(), - [](const BlockInfo& a, const BlockInfo& b) -> bool { return a.start < b.start; }); - if (decompiled && settings.depth != CompileDepth::NoFlowStack) { - ASTManager manager{settings.depth != CompileDepth::DecompileBackwards, - settings.disable_else_derivation}; - state.manager = &manager; - DecompileShader(state); - decompiled = state.manager->IsFullyDecompiled(); - if (!decompiled) { - if (settings.depth == CompileDepth::FullDecompile) { - LOG_CRITICAL(HW_GPU, "Failed to remove all the gotos!:"); - } else { - LOG_CRITICAL(HW_GPU, "Failed to remove all backward gotos!:"); - } - state.manager->ShowCurrentState("Of Shader"); - state.manager->Clear(); - } else { - auto characteristics = std::make_unique(); - characteristics->start = start_address; - characteristics->settings.depth = settings.depth; - characteristics->manager = std::move(manager); - characteristics->end = state.block_info.back().end + 1; - return characteristics; - } - } - - result_out->start = start_address; - result_out->settings.depth = - use_flow_stack ? CompileDepth::FlowStack : CompileDepth::NoFlowStack; - result_out->blocks.clear(); - for (auto& block : state.block_info) { - ShaderBlock new_block{}; - new_block.start = block.start; - new_block.end = block.end; - new_block.ignore_branch = BlockBranchIsIgnored(block.branch); - if (!new_block.ignore_branch) { - new_block.branch = block.branch; - } - result_out->end = std::max(result_out->end, block.end); - result_out->blocks.push_back(new_block); - } - if (!use_flow_stack) { - result_out->labels = std::move(state.labels); - return result_out; - } - - auto back = result_out->blocks.begin(); - auto next = std::next(back); - while (next != result_out->blocks.end()) { - if (!state.labels.contains(next->start) && next->start == back->end + 1) { - back->end = next->end; - next = result_out->blocks.erase(next); - continue; - } - back = next; - ++next; - } - - return result_out; -} -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h deleted file mode 100644 index 37bf96492..000000000 --- a/src/video_core/shader/control_flow.h +++ /dev/null @@ -1,117 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include - -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/ast.h" -#include "video_core/shader/compiler_settings.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::ConditionCode; -using Tegra::Shader::Pred; - -constexpr s32 exit_branch = -1; - -struct Condition { - Pred predicate{Pred::UnusedIndex}; - ConditionCode cc{ConditionCode::T}; - - bool IsUnconditional() const { - return predicate == Pred::UnusedIndex && cc == ConditionCode::T; - } - - bool operator==(const Condition& other) const { - return std::tie(predicate, cc) == std::tie(other.predicate, other.cc); - } - - bool operator!=(const Condition& other) const { - return !operator==(other); - } -}; - -class SingleBranch { -public: - SingleBranch() = default; - explicit SingleBranch(Condition condition_, s32 address_, bool kill_, bool is_sync_, - bool is_brk_, bool ignore_) - : condition{condition_}, address{address_}, kill{kill_}, is_sync{is_sync_}, is_brk{is_brk_}, - ignore{ignore_} {} - - bool operator==(const SingleBranch& b) const { - return std::tie(condition, address, kill, is_sync, is_brk, ignore) == - std::tie(b.condition, b.address, b.kill, b.is_sync, b.is_brk, b.ignore); - } - - bool operator!=(const SingleBranch& b) const { - return !operator==(b); - } - - Condition condition{}; - s32 address{exit_branch}; - bool kill{}; - bool is_sync{}; - bool is_brk{}; - bool ignore{}; -}; - -struct CaseBranch { - explicit CaseBranch(u32 cmp_value_, u32 address_) : cmp_value{cmp_value_}, address{address_} {} - u32 cmp_value; - u32 address; -}; - -class MultiBranch { -public: - explicit MultiBranch(u32 gpr_, std::vector&& branches_) - : gpr{gpr_}, branches{std::move(branches_)} {} - - u32 gpr{}; - std::vector branches{}; -}; - -using BranchData = std::variant; -using BlockBranchInfo = std::shared_ptr; - -bool BlockBranchInfoAreEqual(BlockBranchInfo first, BlockBranchInfo second); - -struct ShaderBlock { - u32 start{}; - u32 end{}; - bool ignore_branch{}; - BlockBranchInfo branch{}; - - bool operator==(const ShaderBlock& sb) const { - return std::tie(start, end, ignore_branch) == - std::tie(sb.start, sb.end, sb.ignore_branch) && - BlockBranchInfoAreEqual(branch, sb.branch); - } - - bool operator!=(const ShaderBlock& sb) const { - return !operator==(sb); - } -}; - -struct ShaderCharacteristics { - std::list blocks{}; - std::set labels{}; - u32 start{}; - u32 end{}; - ASTManager manager{true, true}; - CompilerSettings settings{}; -}; - -std::unique_ptr ScanFlow(const ProgramCode& program_code, u32 start_address, - const CompilerSettings& settings, - Registry& registry); - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp deleted file mode 100644 index 6576d1208..000000000 --- a/src/video_core/shader/decode.cpp +++ /dev/null @@ -1,368 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include - -#include - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/engines/shader_header.h" -#include "video_core/shader/control_flow.h" -#include "video_core/shader/memory_util.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -namespace { - -void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver, - const std::list& used_samplers) { - if (gpu_driver.IsTextureHandlerSizeKnown() || used_samplers.size() <= 1) { - return; - } - u32 count{}; - std::vector bound_offsets; - for (const auto& sampler : used_samplers) { - if (sampler.is_bindless) { - continue; - } - ++count; - bound_offsets.emplace_back(sampler.offset); - } - if (count > 1) { - gpu_driver.DeduceTextureHandlerSize(std::move(bound_offsets)); - } -} - -std::optional TryDeduceSamplerSize(const SamplerEntry& sampler_to_deduce, - VideoCore::GuestDriverProfile& gpu_driver, - const std::list& used_samplers) { - const u32 base_offset = sampler_to_deduce.offset; - u32 max_offset{std::numeric_limits::max()}; - for (const auto& sampler : used_samplers) { - if (sampler.is_bindless) { - continue; - } - if (sampler.offset > base_offset) { - max_offset = std::min(sampler.offset, max_offset); - } - } - if (max_offset == std::numeric_limits::max()) { - return std::nullopt; - } - return ((max_offset - base_offset) * 4) / gpu_driver.GetTextureHandlerSize(); -} - -} // Anonymous namespace - -class ASTDecoder { -public: - explicit ASTDecoder(ShaderIR& ir_) : ir(ir_) {} - - void operator()(ASTProgram& ast) { - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - } - - void operator()(ASTIfThen& ast) { - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - } - - void operator()(ASTIfElse& ast) { - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - } - - void operator()(ASTBlockEncoded& ast) {} - - void operator()(ASTBlockDecoded& ast) {} - - void operator()(ASTVarSet& ast) {} - - void operator()(ASTLabel& ast) {} - - void operator()(ASTGoto& ast) {} - - void operator()(ASTDoWhile& ast) { - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - } - - void operator()(ASTReturn& ast) {} - - void operator()(ASTBreak& ast) {} - - void Visit(ASTNode& node) { - std::visit(*this, *node->GetInnerData()); - if (node->IsBlockEncoded()) { - auto block = std::get_if(node->GetInnerData()); - NodeBlock bb = ir.DecodeRange(block->start, block->end); - node->TransformBlockEncoded(std::move(bb)); - } - } - -private: - ShaderIR& ir; -}; - -void ShaderIR::Decode() { - std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); - - decompiled = false; - auto info = ScanFlow(program_code, main_offset, settings, registry); - auto& shader_info = *info; - coverage_begin = shader_info.start; - coverage_end = shader_info.end; - switch (shader_info.settings.depth) { - case CompileDepth::FlowStack: { - for (const auto& block : shader_info.blocks) { - basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)}); - } - break; - } - case CompileDepth::NoFlowStack: { - disable_flow_stack = true; - const auto insert_block = [this](NodeBlock& nodes, u32 label) { - if (label == static_cast(exit_branch)) { - return; - } - basic_blocks.insert({label, nodes}); - }; - const auto& blocks = shader_info.blocks; - NodeBlock current_block; - u32 current_label = static_cast(exit_branch); - for (const auto& block : blocks) { - if (shader_info.labels.contains(block.start)) { - insert_block(current_block, current_label); - current_block.clear(); - current_label = block.start; - } - if (!block.ignore_branch) { - DecodeRangeInner(current_block, block.start, block.end); - InsertControlFlow(current_block, block); - } else { - DecodeRangeInner(current_block, block.start, block.end + 1); - } - } - insert_block(current_block, current_label); - break; - } - case CompileDepth::DecompileBackwards: - case CompileDepth::FullDecompile: { - program_manager = std::move(shader_info.manager); - disable_flow_stack = true; - decompiled = true; - ASTDecoder decoder{*this}; - ASTNode program = GetASTProgram(); - decoder.Visit(program); - break; - } - default: - LOG_CRITICAL(HW_GPU, "Unknown decompilation mode!"); - [[fallthrough]]; - case CompileDepth::BruteForce: { - const auto shader_end = static_cast(program_code.size()); - coverage_begin = main_offset; - coverage_end = shader_end; - for (u32 label = main_offset; label < shader_end; ++label) { - basic_blocks.insert({label, DecodeRange(label, label + 1)}); - } - break; - } - } - if (settings.depth != shader_info.settings.depth) { - LOG_WARNING( - HW_GPU, "Decompiling to this setting \"{}\" failed, downgrading to this setting \"{}\"", - CompileDepthAsString(settings.depth), CompileDepthAsString(shader_info.settings.depth)); - } -} - -NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) { - NodeBlock basic_block; - DecodeRangeInner(basic_block, begin, end); - return basic_block; -} - -void ShaderIR::DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end) { - for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) { - pc = DecodeInstr(bb, pc); - } -} - -void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) { - const auto apply_conditions = [&](const Condition& cond, Node n) -> Node { - Node result = n; - if (cond.cc != ConditionCode::T) { - result = Conditional(GetConditionCode(cond.cc), {result}); - } - if (cond.predicate != Pred::UnusedIndex) { - u32 pred = static_cast(cond.predicate); - const bool is_neg = pred > 7; - if (is_neg) { - pred -= 8; - } - result = Conditional(GetPredicate(pred, is_neg), {result}); - } - return result; - }; - if (std::holds_alternative(*block.branch)) { - auto branch = std::get_if(block.branch.get()); - if (branch->address < 0) { - if (branch->kill) { - Node n = Operation(OperationCode::Discard); - n = apply_conditions(branch->condition, n); - bb.push_back(n); - global_code.push_back(n); - return; - } - Node n = Operation(OperationCode::Exit); - n = apply_conditions(branch->condition, n); - bb.push_back(n); - global_code.push_back(n); - return; - } - Node n = Operation(OperationCode::Branch, Immediate(branch->address)); - n = apply_conditions(branch->condition, n); - bb.push_back(n); - global_code.push_back(n); - return; - } - auto multi_branch = std::get_if(block.branch.get()); - Node op_a = GetRegister(multi_branch->gpr); - for (auto& branch_case : multi_branch->branches) { - Node n = Operation(OperationCode::Branch, Immediate(branch_case.address)); - Node op_b = Immediate(branch_case.cmp_value); - Node condition = - GetPredicateComparisonInteger(Tegra::Shader::PredCondition::EQ, false, op_a, op_b); - auto result = Conditional(condition, {n}); - bb.push_back(result); - global_code.push_back(result); - } -} - -u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { - // Ignore sched instructions when generating code. - if (IsSchedInstruction(pc, main_offset)) { - return pc + 1; - } - - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - const u32 nv_address = ConvertAddressToNvidiaSpace(pc); - - // Decoding failure - if (!opcode) { - UNIMPLEMENTED_MSG("Unhandled instruction: {0:x}", instr.value); - bb.push_back(Comment(fmt::format("{:05x} Unimplemented Shader instruction (0x{:016x})", - nv_address, instr.value))); - return pc + 1; - } - - bb.push_back(Comment( - fmt::format("{:05x} {} (0x{:016x})", nv_address, opcode->get().GetName(), instr.value))); - - using Tegra::Shader::Pred; - UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute, - "NeverExecute predicate not implemented"); - - static const std::map decoders = { - {OpCode::Type::Arithmetic, &ShaderIR::DecodeArithmetic}, - {OpCode::Type::ArithmeticImmediate, &ShaderIR::DecodeArithmeticImmediate}, - {OpCode::Type::Bfe, &ShaderIR::DecodeBfe}, - {OpCode::Type::Bfi, &ShaderIR::DecodeBfi}, - {OpCode::Type::Shift, &ShaderIR::DecodeShift}, - {OpCode::Type::ArithmeticInteger, &ShaderIR::DecodeArithmeticInteger}, - {OpCode::Type::ArithmeticIntegerImmediate, &ShaderIR::DecodeArithmeticIntegerImmediate}, - {OpCode::Type::ArithmeticHalf, &ShaderIR::DecodeArithmeticHalf}, - {OpCode::Type::ArithmeticHalfImmediate, &ShaderIR::DecodeArithmeticHalfImmediate}, - {OpCode::Type::Ffma, &ShaderIR::DecodeFfma}, - {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2}, - {OpCode::Type::Conversion, &ShaderIR::DecodeConversion}, - {OpCode::Type::Warp, &ShaderIR::DecodeWarp}, - {OpCode::Type::Memory, &ShaderIR::DecodeMemory}, - {OpCode::Type::Texture, &ShaderIR::DecodeTexture}, - {OpCode::Type::Image, &ShaderIR::DecodeImage}, - {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate}, - {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate}, - {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate}, - {OpCode::Type::PredicateSetRegister, &ShaderIR::DecodePredicateSetRegister}, - {OpCode::Type::PredicateSetPredicate, &ShaderIR::DecodePredicateSetPredicate}, - {OpCode::Type::RegisterSetPredicate, &ShaderIR::DecodeRegisterSetPredicate}, - {OpCode::Type::FloatSet, &ShaderIR::DecodeFloatSet}, - {OpCode::Type::IntegerSet, &ShaderIR::DecodeIntegerSet}, - {OpCode::Type::HalfSet, &ShaderIR::DecodeHalfSet}, - {OpCode::Type::Video, &ShaderIR::DecodeVideo}, - {OpCode::Type::Xmad, &ShaderIR::DecodeXmad}, - }; - - std::vector tmp_block; - if (const auto decoder = decoders.find(opcode->get().GetType()); decoder != decoders.end()) { - pc = (this->*decoder->second)(tmp_block, pc); - } else { - pc = DecodeOther(tmp_block, pc); - } - - // Some instructions (like SSY) don't have a predicate field, they are always unconditionally - // executed. - const bool can_be_predicated = OpCode::IsPredicatedInstruction(opcode->get().GetId()); - const auto pred_index = static_cast(instr.pred.pred_index); - - if (can_be_predicated && pred_index != static_cast(Pred::UnusedIndex)) { - const Node conditional = - Conditional(GetPredicate(pred_index, instr.negate_pred != 0), std::move(tmp_block)); - global_code.push_back(conditional); - bb.push_back(conditional); - } else { - for (auto& node : tmp_block) { - global_code.push_back(node); - bb.push_back(node); - } - } - - return pc + 1; -} - -void ShaderIR::PostDecode() { - // Deduce texture handler size if needed - auto gpu_driver = registry.AccessGuestDriverProfile(); - DeduceTextureHandlerSize(gpu_driver, used_samplers); - // Deduce Indexed Samplers - if (!uses_indexed_samplers) { - return; - } - for (auto& sampler : used_samplers) { - if (!sampler.is_indexed) { - continue; - } - if (const auto size = TryDeduceSamplerSize(sampler, gpu_driver, used_samplers)) { - sampler.size = *size; - } else { - LOG_CRITICAL(HW_GPU, "Failed to deduce size of indexed sampler"); - sampler.size = 1; - } - } -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp deleted file mode 100644 index 15eb700e7..000000000 --- a/src/video_core/shader/decode/arithmetic.cpp +++ /dev/null @@ -1,166 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::SubOp; - -u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - Node op_a = GetRegister(instr.gpr8); - - Node op_b = [&] { - if (instr.is_b_imm) { - return GetImmediate19(instr); - } else if (instr.is_b_gpr) { - return GetRegister(instr.gpr20); - } else { - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - } - }(); - - switch (opcode->get().GetId()) { - case OpCode::Id::MOV_C: - case OpCode::Id::MOV_R: { - // MOV does not have neither 'abs' nor 'neg' bits. - SetRegister(bb, instr.gpr0, op_b); - break; - } - case OpCode::Id::FMUL_C: - case OpCode::Id::FMUL_R: - case OpCode::Id::FMUL_IMM: { - // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit. - if (instr.fmul.tab5cb8_2 != 0) { - LOG_DEBUG(HW_GPU, "FMUL tab5cb8_2({}) is not implemented", - instr.fmul.tab5cb8_2.Value()); - } - if (instr.fmul.tab5c68_0 != 1) { - LOG_DEBUG(HW_GPU, "FMUL tab5cb8_0({}) is not implemented", - instr.fmul.tab5c68_0.Value()); - } - - op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b); - - static constexpr std::array FmulPostFactor = { - 1.000f, // None - 0.500f, // Divide 2 - 0.250f, // Divide 4 - 0.125f, // Divide 8 - 8.000f, // Mul 8 - 4.000f, // Mul 4 - 2.000f, // Mul 2 - }; - - if (instr.fmul.postfactor != 0) { - op_a = Operation(OperationCode::FMul, NO_PRECISE, op_a, - Immediate(FmulPostFactor[instr.fmul.postfactor])); - } - - // TODO(Rodrigo): Should precise be used when there's a postfactor? - Node value = Operation(OperationCode::FMul, PRECISE, op_a, op_b); - - value = GetSaturatedFloat(value, instr.alu.saturate_d); - - SetInternalFlagsFromFloat(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::FADD_C: - case OpCode::Id::FADD_R: - case OpCode::Id::FADD_IMM: { - op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a); - op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); - - Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b); - value = GetSaturatedFloat(value, instr.alu.saturate_d); - - SetInternalFlagsFromFloat(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::MUFU: { - op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a); - - Node value = [&]() { - switch (instr.sub_op) { - case SubOp::Cos: - return Operation(OperationCode::FCos, PRECISE, op_a); - case SubOp::Sin: - return Operation(OperationCode::FSin, PRECISE, op_a); - case SubOp::Ex2: - return Operation(OperationCode::FExp2, PRECISE, op_a); - case SubOp::Lg2: - return Operation(OperationCode::FLog2, PRECISE, op_a); - case SubOp::Rcp: - return Operation(OperationCode::FDiv, PRECISE, Immediate(1.0f), op_a); - case SubOp::Rsq: - return Operation(OperationCode::FInverseSqrt, PRECISE, op_a); - case SubOp::Sqrt: - return Operation(OperationCode::FSqrt, PRECISE, op_a); - default: - UNIMPLEMENTED_MSG("Unhandled MUFU sub op={0:x}", instr.sub_op.Value()); - return Immediate(0); - } - }(); - value = GetSaturatedFloat(value, instr.alu.saturate_d); - - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::FMNMX_C: - case OpCode::Id::FMNMX_R: - case OpCode::Id::FMNMX_IMM: { - op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a); - op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); - - const Node condition = GetPredicate(instr.alu.fmnmx.pred, instr.alu.fmnmx.negate_pred != 0); - - const Node min = Operation(OperationCode::FMin, NO_PRECISE, op_a, op_b); - const Node max = Operation(OperationCode::FMax, NO_PRECISE, op_a, op_b); - const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max); - - SetInternalFlagsFromFloat(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::FCMP_RR: - case OpCode::Id::FCMP_RC: - case OpCode::Id::FCMP_IMMR: { - UNIMPLEMENTED_IF(instr.fcmp.ftz == 0); - Node op_c = GetRegister(instr.gpr39); - Node comp = GetPredicateComparisonFloat(instr.fcmp.cond, std::move(op_c), Immediate(0.0f)); - SetRegister( - bb, instr.gpr0, - Operation(OperationCode::Select, std::move(comp), std::move(op_a), std::move(op_b))); - break; - } - case OpCode::Id::RRO_C: - case OpCode::Id::RRO_R: - case OpCode::Id::RRO_IMM: { - LOG_DEBUG(HW_GPU, "(STUBBED) RRO used"); - - // Currently RRO is only implemented as a register move. - op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); - SetRegister(bb, instr.gpr0, op_b); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled arithmetic instruction: {}", opcode->get().GetName()); - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp deleted file mode 100644 index 88103fede..000000000 --- a/src/video_core/shader/decode/arithmetic_half.cpp +++ /dev/null @@ -1,101 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::HalfType; -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - bool negate_a = false; - bool negate_b = false; - bool absolute_a = false; - bool absolute_b = false; - - switch (opcode->get().GetId()) { - case OpCode::Id::HADD2_R: - if (instr.alu_half.ftz == 0) { - LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); - } - negate_a = ((instr.value >> 43) & 1) != 0; - negate_b = ((instr.value >> 31) & 1) != 0; - absolute_a = ((instr.value >> 44) & 1) != 0; - absolute_b = ((instr.value >> 30) & 1) != 0; - break; - case OpCode::Id::HADD2_C: - if (instr.alu_half.ftz == 0) { - LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); - } - negate_a = ((instr.value >> 43) & 1) != 0; - negate_b = ((instr.value >> 56) & 1) != 0; - absolute_a = ((instr.value >> 44) & 1) != 0; - absolute_b = ((instr.value >> 54) & 1) != 0; - break; - case OpCode::Id::HMUL2_R: - negate_a = ((instr.value >> 43) & 1) != 0; - absolute_a = ((instr.value >> 44) & 1) != 0; - absolute_b = ((instr.value >> 30) & 1) != 0; - break; - case OpCode::Id::HMUL2_C: - negate_b = ((instr.value >> 31) & 1) != 0; - absolute_a = ((instr.value >> 44) & 1) != 0; - absolute_b = ((instr.value >> 54) & 1) != 0; - break; - default: - UNREACHABLE(); - break; - } - - Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half.type_a); - op_a = GetOperandAbsNegHalf(op_a, absolute_a, negate_a); - - auto [type_b, op_b] = [this, instr, opcode]() -> std::pair { - switch (opcode->get().GetId()) { - case OpCode::Id::HADD2_C: - case OpCode::Id::HMUL2_C: - return {HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; - case OpCode::Id::HADD2_R: - case OpCode::Id::HMUL2_R: - return {instr.alu_half.type_b, GetRegister(instr.gpr20)}; - default: - UNREACHABLE(); - return {HalfType::F32, Immediate(0)}; - } - }(); - op_b = UnpackHalfFloat(op_b, type_b); - op_b = GetOperandAbsNegHalf(op_b, absolute_b, negate_b); - - Node value = [this, opcode, op_a, op_b = op_b] { - switch (opcode->get().GetId()) { - case OpCode::Id::HADD2_C: - case OpCode::Id::HADD2_R: - return Operation(OperationCode::HAdd, PRECISE, op_a, op_b); - case OpCode::Id::HMUL2_C: - case OpCode::Id::HMUL2_R: - return Operation(OperationCode::HMul, PRECISE, op_a, op_b); - default: - UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName()); - return Immediate(0); - } - }(); - value = GetSaturatedHalfFloat(value, instr.alu_half.saturate); - value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half.merge); - - SetRegister(bb, instr.gpr0, value); - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp deleted file mode 100644 index d179b9873..000000000 --- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) { - if (instr.alu_half_imm.ftz == 0) { - LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); - } - } else { - if (instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::FTZ) { - LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); - } - } - - Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half_imm.type_a); - op_a = GetOperandAbsNegHalf(op_a, instr.alu_half_imm.abs_a, instr.alu_half_imm.negate_a); - - const Node op_b = UnpackHalfImmediate(instr, true); - - Node value = [&]() { - switch (opcode->get().GetId()) { - case OpCode::Id::HADD2_IMM: - return Operation(OperationCode::HAdd, PRECISE, op_a, op_b); - case OpCode::Id::HMUL2_IMM: - return Operation(OperationCode::HMul, PRECISE, op_a, op_b); - default: - UNREACHABLE(); - return Immediate(0); - } - }(); - - value = GetSaturatedHalfFloat(value, instr.alu_half_imm.saturate); - value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half_imm.merge); - SetRegister(bb, instr.gpr0, value); - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/arithmetic_immediate.cpp b/src/video_core/shader/decode/arithmetic_immediate.cpp deleted file mode 100644 index f1875967c..000000000 --- a/src/video_core/shader/decode/arithmetic_immediate.cpp +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -u32 ShaderIR::DecodeArithmeticImmediate(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - switch (opcode->get().GetId()) { - case OpCode::Id::MOV32_IMM: { - SetRegister(bb, instr.gpr0, GetImmediate32(instr)); - break; - } - case OpCode::Id::FMUL32_IMM: { - Node value = - Operation(OperationCode::FMul, PRECISE, GetRegister(instr.gpr8), GetImmediate32(instr)); - value = GetSaturatedFloat(value, instr.fmul32.saturate); - - SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc); - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::FADD32I: { - const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fadd32i.abs_a, - instr.fadd32i.negate_a); - const Node op_b = GetOperandAbsNegFloat(GetImmediate32(instr), instr.fadd32i.abs_b, - instr.fadd32i.negate_b); - - const Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b); - SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc); - SetRegister(bb, instr.gpr0, value); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled arithmetic immediate instruction: {}", - opcode->get().GetName()); - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp deleted file mode 100644 index 7b5bb7003..000000000 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ /dev/null @@ -1,375 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::IAdd3Height; -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::Pred; -using Tegra::Shader::Register; - -u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - Node op_a = GetRegister(instr.gpr8); - Node op_b = [&]() { - if (instr.is_b_imm) { - return Immediate(instr.alu.GetSignedImm20_20()); - } else if (instr.is_b_gpr) { - return GetRegister(instr.gpr20); - } else { - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - } - }(); - - switch (opcode->get().GetId()) { - case OpCode::Id::IADD_C: - case OpCode::Id::IADD_R: - case OpCode::Id::IADD_IMM: { - UNIMPLEMENTED_IF_MSG(instr.alu.saturate_d, "IADD.SAT"); - UNIMPLEMENTED_IF_MSG(instr.iadd.x && instr.generates_cc, "IADD.X Rd.CC"); - - op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true); - op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true); - - Node value = Operation(OperationCode::UAdd, op_a, op_b); - - if (instr.iadd.x) { - Node carry = GetInternalFlag(InternalFlag::Carry); - Node x = Operation(OperationCode::Select, std::move(carry), Immediate(1), Immediate(0)); - value = Operation(OperationCode::UAdd, std::move(value), std::move(x)); - } - - if (instr.generates_cc) { - const Node i0 = Immediate(0); - - Node zero = Operation(OperationCode::LogicalIEqual, value, i0); - Node sign = Operation(OperationCode::LogicalILessThan, value, i0); - Node carry = Operation(OperationCode::LogicalAddCarry, op_a, op_b); - - Node pos_a = Operation(OperationCode::LogicalIGreaterThan, op_a, i0); - Node pos_b = Operation(OperationCode::LogicalIGreaterThan, op_b, i0); - Node pos = Operation(OperationCode::LogicalAnd, std::move(pos_a), std::move(pos_b)); - Node overflow = Operation(OperationCode::LogicalAnd, pos, sign); - - SetInternalFlag(bb, InternalFlag::Zero, std::move(zero)); - SetInternalFlag(bb, InternalFlag::Sign, std::move(sign)); - SetInternalFlag(bb, InternalFlag::Carry, std::move(carry)); - SetInternalFlag(bb, InternalFlag::Overflow, std::move(overflow)); - } - SetRegister(bb, instr.gpr0, std::move(value)); - break; - } - case OpCode::Id::IADD3_C: - case OpCode::Id::IADD3_R: - case OpCode::Id::IADD3_IMM: { - Node op_c = GetRegister(instr.gpr39); - - const auto ApplyHeight = [&](IAdd3Height height, Node value) { - switch (height) { - case IAdd3Height::None: - return value; - case IAdd3Height::LowerHalfWord: - return BitfieldExtract(value, 0, 16); - case IAdd3Height::UpperHalfWord: - return BitfieldExtract(value, 16, 16); - default: - UNIMPLEMENTED_MSG("Unhandled IADD3 height: {}", height); - return Immediate(0); - } - }; - - if (opcode->get().GetId() == OpCode::Id::IADD3_R) { - op_a = ApplyHeight(instr.iadd3.height_a, op_a); - op_b = ApplyHeight(instr.iadd3.height_b, op_b); - op_c = ApplyHeight(instr.iadd3.height_c, op_c); - } - - op_a = GetOperandAbsNegInteger(op_a, false, instr.iadd3.neg_a, true); - op_b = GetOperandAbsNegInteger(op_b, false, instr.iadd3.neg_b, true); - op_c = GetOperandAbsNegInteger(op_c, false, instr.iadd3.neg_c, true); - - const Node value = [&] { - Node add_ab = Operation(OperationCode::IAdd, NO_PRECISE, op_a, op_b); - if (opcode->get().GetId() != OpCode::Id::IADD3_R) { - return Operation(OperationCode::IAdd, NO_PRECISE, add_ab, op_c); - } - const Node shifted = [&] { - switch (instr.iadd3.mode) { - case Tegra::Shader::IAdd3Mode::RightShift: - // TODO(tech4me): According to - // https://envytools.readthedocs.io/en/latest/hw/graph/maxwell/cuda/int.html?highlight=iadd3 - // The addition between op_a and op_b should be done in uint33, more - // investigation required - return Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, add_ab, - Immediate(16)); - case Tegra::Shader::IAdd3Mode::LeftShift: - return Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, add_ab, - Immediate(16)); - default: - return add_ab; - } - }(); - return Operation(OperationCode::IAdd, NO_PRECISE, shifted, op_c); - }(); - - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::ISCADD_C: - case OpCode::Id::ISCADD_R: - case OpCode::Id::ISCADD_IMM: { - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in ISCADD is not implemented"); - - op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true); - op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true); - - const Node shift = Immediate(static_cast(instr.alu_integer.shift_amount)); - const Node shifted_a = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, shift); - const Node value = Operation(OperationCode::IAdd, NO_PRECISE, shifted_a, op_b); - - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::POPC_C: - case OpCode::Id::POPC_R: - case OpCode::Id::POPC_IMM: { - if (instr.popc.invert) { - op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b); - } - const Node value = Operation(OperationCode::IBitCount, PRECISE, op_b); - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::FLO_R: - case OpCode::Id::FLO_C: - case OpCode::Id::FLO_IMM: { - Node value; - if (instr.flo.invert) { - op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_b)); - } - if (instr.flo.is_signed) { - value = Operation(OperationCode::IBitMSB, NO_PRECISE, std::move(op_b)); - } else { - value = Operation(OperationCode::UBitMSB, NO_PRECISE, std::move(op_b)); - } - if (instr.flo.sh) { - value = - Operation(OperationCode::UBitwiseXor, NO_PRECISE, std::move(value), Immediate(31)); - } - SetRegister(bb, instr.gpr0, std::move(value)); - break; - } - case OpCode::Id::SEL_C: - case OpCode::Id::SEL_R: - case OpCode::Id::SEL_IMM: { - const Node condition = GetPredicate(instr.sel.pred, instr.sel.neg_pred != 0); - const Node value = Operation(OperationCode::Select, PRECISE, condition, op_a, op_b); - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::ICMP_CR: - case OpCode::Id::ICMP_R: - case OpCode::Id::ICMP_RC: - case OpCode::Id::ICMP_IMM: { - const Node zero = Immediate(0); - - const auto [op_rhs, test] = [&]() -> std::pair { - switch (opcode->get().GetId()) { - case OpCode::Id::ICMP_CR: - return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), - GetRegister(instr.gpr39)}; - case OpCode::Id::ICMP_R: - return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)}; - case OpCode::Id::ICMP_RC: - return {GetRegister(instr.gpr39), - GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; - case OpCode::Id::ICMP_IMM: - return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)}; - default: - UNREACHABLE(); - return {zero, zero}; - } - }(); - const Node op_lhs = GetRegister(instr.gpr8); - const Node comparison = - GetPredicateComparisonInteger(instr.icmp.cond, instr.icmp.is_signed != 0, test, zero); - SetRegister(bb, instr.gpr0, Operation(OperationCode::Select, comparison, op_lhs, op_rhs)); - break; - } - case OpCode::Id::LOP_C: - case OpCode::Id::LOP_R: - case OpCode::Id::LOP_IMM: { - if (instr.alu.lop.invert_a) - op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a); - if (instr.alu.lop.invert_b) - op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b); - - WriteLogicOperation(bb, instr.gpr0, instr.alu.lop.operation, op_a, op_b, - instr.alu.lop.pred_result_mode, instr.alu.lop.pred48, - instr.generates_cc); - break; - } - case OpCode::Id::LOP3_C: - case OpCode::Id::LOP3_R: - case OpCode::Id::LOP3_IMM: { - const Node op_c = GetRegister(instr.gpr39); - const Node lut = [&]() { - if (opcode->get().GetId() == OpCode::Id::LOP3_R) { - return Immediate(instr.alu.lop3.GetImmLut28()); - } else { - return Immediate(instr.alu.lop3.GetImmLut48()); - } - }(); - - WriteLop3Instruction(bb, instr.gpr0, op_a, op_b, op_c, lut, instr.generates_cc); - break; - } - case OpCode::Id::IMNMX_C: - case OpCode::Id::IMNMX_R: - case OpCode::Id::IMNMX_IMM: { - UNIMPLEMENTED_IF(instr.imnmx.exchange != Tegra::Shader::IMinMaxExchange::None); - - const bool is_signed = instr.imnmx.is_signed; - - const Node condition = GetPredicate(instr.imnmx.pred, instr.imnmx.negate_pred != 0); - const Node min = SignedOperation(OperationCode::IMin, is_signed, NO_PRECISE, op_a, op_b); - const Node max = SignedOperation(OperationCode::IMax, is_signed, NO_PRECISE, op_a, op_b); - const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max); - - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::LEA_R2: - case OpCode::Id::LEA_R1: - case OpCode::Id::LEA_IMM: - case OpCode::Id::LEA_RZ: - case OpCode::Id::LEA_HI: { - auto [op_a_, op_b_, op_c_] = [&]() -> std::tuple { - switch (opcode->get().GetId()) { - case OpCode::Id::LEA_R2: { - return {GetRegister(instr.gpr20), GetRegister(instr.gpr39), - Immediate(static_cast(instr.lea.r2.entry_a))}; - } - case OpCode::Id::LEA_R1: { - const bool neg = instr.lea.r1.neg != 0; - return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), - GetRegister(instr.gpr20), - Immediate(static_cast(instr.lea.r1.entry_a))}; - } - case OpCode::Id::LEA_IMM: { - const bool neg = instr.lea.imm.neg != 0; - return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), - Immediate(static_cast(instr.lea.imm.entry_a)), - Immediate(static_cast(instr.lea.imm.entry_b))}; - } - case OpCode::Id::LEA_RZ: { - const bool neg = instr.lea.rz.neg != 0; - return {GetConstBuffer(instr.lea.rz.cb_index, instr.lea.rz.cb_offset), - GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), - Immediate(static_cast(instr.lea.rz.entry_a))}; - } - case OpCode::Id::LEA_HI: - default: - UNIMPLEMENTED_MSG("Unhandled LEA subinstruction: {}", opcode->get().GetName()); - - return {Immediate(static_cast(instr.lea.imm.entry_a)), GetRegister(instr.gpr8), - Immediate(static_cast(instr.lea.imm.entry_b))}; - } - }(); - - UNIMPLEMENTED_IF_MSG(instr.lea.pred48 != static_cast(Pred::UnusedIndex), - "Unhandled LEA Predicate"); - - Node value = - Operation(OperationCode::ILogicalShiftLeft, std::move(op_a_), std::move(op_c_)); - value = Operation(OperationCode::IAdd, std::move(op_b_), std::move(value)); - SetRegister(bb, instr.gpr0, std::move(value)); - - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled ArithmeticInteger instruction: {}", opcode->get().GetName()); - } - - return pc; -} - -void ShaderIR::WriteLop3Instruction(NodeBlock& bb, Register dest, Node op_a, Node op_b, Node op_c, - Node imm_lut, bool sets_cc) { - const Node lop3_fast = [&](const Node na, const Node nb, const Node nc, const Node ttbl) { - Node value = Immediate(0); - const ImmediateNode imm = std::get(*ttbl); - if (imm.GetValue() & 0x01) { - const Node a = Operation(OperationCode::IBitwiseNot, na); - const Node b = Operation(OperationCode::IBitwiseNot, nb); - const Node c = Operation(OperationCode::IBitwiseNot, nc); - Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, b); - r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c); - value = Operation(OperationCode::IBitwiseOr, value, r); - } - if (imm.GetValue() & 0x02) { - const Node a = Operation(OperationCode::IBitwiseNot, na); - const Node b = Operation(OperationCode::IBitwiseNot, nb); - Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, b); - r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc); - value = Operation(OperationCode::IBitwiseOr, value, r); - } - if (imm.GetValue() & 0x04) { - const Node a = Operation(OperationCode::IBitwiseNot, na); - const Node c = Operation(OperationCode::IBitwiseNot, nc); - Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, nb); - r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c); - value = Operation(OperationCode::IBitwiseOr, value, r); - } - if (imm.GetValue() & 0x08) { - const Node a = Operation(OperationCode::IBitwiseNot, na); - Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, nb); - r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc); - value = Operation(OperationCode::IBitwiseOr, value, r); - } - if (imm.GetValue() & 0x10) { - const Node b = Operation(OperationCode::IBitwiseNot, nb); - const Node c = Operation(OperationCode::IBitwiseNot, nc); - Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, b); - r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c); - value = Operation(OperationCode::IBitwiseOr, value, r); - } - if (imm.GetValue() & 0x20) { - const Node b = Operation(OperationCode::IBitwiseNot, nb); - Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, b); - r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc); - value = Operation(OperationCode::IBitwiseOr, value, r); - } - if (imm.GetValue() & 0x40) { - const Node c = Operation(OperationCode::IBitwiseNot, nc); - Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, nb); - r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c); - value = Operation(OperationCode::IBitwiseOr, value, r); - } - if (imm.GetValue() & 0x80) { - Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, nb); - r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc); - value = Operation(OperationCode::IBitwiseOr, value, r); - } - return value; - }(op_a, op_b, op_c, imm_lut); - - SetInternalFlagsFromInteger(bb, lop3_fast, sets_cc); - SetRegister(bb, dest, lop3_fast); -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp deleted file mode 100644 index 73580277a..000000000 --- a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp +++ /dev/null @@ -1,99 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::LogicOperation; -using Tegra::Shader::OpCode; -using Tegra::Shader::Pred; -using Tegra::Shader::PredicateResultMode; -using Tegra::Shader::Register; - -u32 ShaderIR::DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - Node op_a = GetRegister(instr.gpr8); - Node op_b = Immediate(static_cast(instr.alu.imm20_32)); - - switch (opcode->get().GetId()) { - case OpCode::Id::IADD32I: { - UNIMPLEMENTED_IF_MSG(instr.iadd32i.saturate, "IADD32I saturation is not implemented"); - - op_a = GetOperandAbsNegInteger(std::move(op_a), false, instr.iadd32i.negate_a != 0, true); - - Node value = Operation(OperationCode::IAdd, PRECISE, std::move(op_a), std::move(op_b)); - - SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc != 0); - SetRegister(bb, instr.gpr0, std::move(value)); - break; - } - case OpCode::Id::LOP32I: { - if (instr.alu.lop32i.invert_a) { - op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_a)); - } - - if (instr.alu.lop32i.invert_b) { - op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_b)); - } - - WriteLogicOperation(bb, instr.gpr0, instr.alu.lop32i.operation, std::move(op_a), - std::move(op_b), PredicateResultMode::None, Pred::UnusedIndex, - instr.op_32.generates_cc != 0); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled ArithmeticIntegerImmediate instruction: {}", - opcode->get().GetName()); - } - - return pc; -} - -void ShaderIR::WriteLogicOperation(NodeBlock& bb, Register dest, LogicOperation logic_op, Node op_a, - Node op_b, PredicateResultMode predicate_mode, Pred predicate, - bool sets_cc) { - Node result = [&] { - switch (logic_op) { - case LogicOperation::And: - return Operation(OperationCode::IBitwiseAnd, PRECISE, std::move(op_a), std::move(op_b)); - case LogicOperation::Or: - return Operation(OperationCode::IBitwiseOr, PRECISE, std::move(op_a), std::move(op_b)); - case LogicOperation::Xor: - return Operation(OperationCode::IBitwiseXor, PRECISE, std::move(op_a), std::move(op_b)); - case LogicOperation::PassB: - return op_b; - default: - UNIMPLEMENTED_MSG("Unimplemented logic operation={}", logic_op); - return Immediate(0); - } - }(); - - SetInternalFlagsFromInteger(bb, result, sets_cc); - SetRegister(bb, dest, result); - - // Write the predicate value depending on the predicate mode. - switch (predicate_mode) { - case PredicateResultMode::None: - // Do nothing. - return; - case PredicateResultMode::NotZero: { - // Set the predicate to true if the result is not zero. - Node compare = Operation(OperationCode::LogicalINotEqual, std::move(result), Immediate(0)); - SetPredicate(bb, static_cast(predicate), std::move(compare)); - break; - } - default: - UNIMPLEMENTED_MSG("Unimplemented predicate result mode: {}", predicate_mode); - } -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/bfe.cpp b/src/video_core/shader/decode/bfe.cpp deleted file mode 100644 index 8e3b46e8e..000000000 --- a/src/video_core/shader/decode/bfe.cpp +++ /dev/null @@ -1,77 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -u32 ShaderIR::DecodeBfe(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - Node op_a = GetRegister(instr.gpr8); - Node op_b = [&] { - switch (opcode->get().GetId()) { - case OpCode::Id::BFE_R: - return GetRegister(instr.gpr20); - case OpCode::Id::BFE_C: - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - case OpCode::Id::BFE_IMM: - return Immediate(instr.alu.GetSignedImm20_20()); - default: - UNREACHABLE(); - return Immediate(0); - } - }(); - - UNIMPLEMENTED_IF_MSG(instr.bfe.rd_cc, "Condition codes in BFE is not implemented"); - - const bool is_signed = instr.bfe.is_signed; - - // using reverse parallel method in - // https://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel - // note for later if possible to implement faster method. - if (instr.bfe.brev) { - const auto swap = [&](u32 s, u32 mask) { - Node v1 = - SignedOperation(OperationCode::ILogicalShiftRight, is_signed, op_a, Immediate(s)); - if (mask != 0) { - v1 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v1), - Immediate(mask)); - } - Node v2 = op_a; - if (mask != 0) { - v2 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v2), - Immediate(mask)); - } - v2 = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, std::move(v2), - Immediate(s)); - return SignedOperation(OperationCode::IBitwiseOr, is_signed, std::move(v1), - std::move(v2)); - }; - op_a = swap(1, 0x55555555U); - op_a = swap(2, 0x33333333U); - op_a = swap(4, 0x0F0F0F0FU); - op_a = swap(8, 0x00FF00FFU); - op_a = swap(16, 0); - } - - const auto offset = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b, - Immediate(0), Immediate(8)); - const auto bits = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b, - Immediate(8), Immediate(8)); - auto result = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_a, offset, bits); - SetRegister(bb, instr.gpr0, std::move(result)); - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/bfi.cpp b/src/video_core/shader/decode/bfi.cpp deleted file mode 100644 index 70d1c055b..000000000 --- a/src/video_core/shader/decode/bfi.cpp +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -u32 ShaderIR::DecodeBfi(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - const auto [packed_shift, base] = [&]() -> std::pair { - switch (opcode->get().GetId()) { - case OpCode::Id::BFI_RC: - return {GetRegister(instr.gpr39), - GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; - case OpCode::Id::BFI_IMM_R: - return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)}; - default: - UNREACHABLE(); - return {Immediate(0), Immediate(0)}; - } - }(); - const Node insert = GetRegister(instr.gpr8); - const Node offset = BitfieldExtract(packed_shift, 0, 8); - const Node bits = BitfieldExtract(packed_shift, 8, 8); - - const Node value = - Operation(OperationCode::UBitfieldInsert, PRECISE, base, insert, offset, bits); - - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp deleted file mode 100644 index fea7a54df..000000000 --- a/src/video_core/shader/decode/conversion.cpp +++ /dev/null @@ -1,321 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::Register; - -namespace { - -constexpr OperationCode GetFloatSelector(u64 selector) { - return selector == 0 ? OperationCode::FCastHalf0 : OperationCode::FCastHalf1; -} - -constexpr u32 SizeInBits(Register::Size size) { - switch (size) { - case Register::Size::Byte: - return 8; - case Register::Size::Short: - return 16; - case Register::Size::Word: - return 32; - case Register::Size::Long: - return 64; - } - return 0; -} - -constexpr std::optional> IntegerSaturateBounds(Register::Size src_size, - Register::Size dst_size, - bool src_signed, - bool dst_signed) { - const u32 dst_bits = SizeInBits(dst_size); - if (src_size == Register::Size::Word && dst_size == Register::Size::Word) { - if (src_signed == dst_signed) { - return std::nullopt; - } - return std::make_pair(0, std::numeric_limits::max()); - } - if (dst_signed) { - // Signed destination, clamp to [-128, 127] for instance - return std::make_pair(-(1 << (dst_bits - 1)), (1 << (dst_bits - 1)) - 1); - } else { - // Unsigned destination - if (dst_bits == 32) { - // Avoid shifting by 32, that is undefined behavior - return std::make_pair(0, s32(std::numeric_limits::max())); - } - return std::make_pair(0, (1 << dst_bits) - 1); - } -} - -} // Anonymous namespace - -u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - switch (opcode->get().GetId()) { - case OpCode::Id::I2I_R: - case OpCode::Id::I2I_C: - case OpCode::Id::I2I_IMM: { - const bool src_signed = instr.conversion.is_input_signed; - const bool dst_signed = instr.conversion.is_output_signed; - const Register::Size src_size = instr.conversion.src_size; - const Register::Size dst_size = instr.conversion.dst_size; - const u32 selector = static_cast(instr.conversion.int_src.selector); - - Node value = [this, instr, opcode] { - switch (opcode->get().GetId()) { - case OpCode::Id::I2I_R: - return GetRegister(instr.gpr20); - case OpCode::Id::I2I_C: - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - case OpCode::Id::I2I_IMM: - return Immediate(instr.alu.GetSignedImm20_20()); - default: - UNREACHABLE(); - return Immediate(0); - } - }(); - - // Ensure the source selector is valid - switch (instr.conversion.src_size) { - case Register::Size::Byte: - break; - case Register::Size::Short: - ASSERT(selector == 0 || selector == 2); - break; - default: - ASSERT(selector == 0); - break; - } - - if (src_size != Register::Size::Word || selector != 0) { - value = SignedOperation(OperationCode::IBitfieldExtract, src_signed, std::move(value), - Immediate(selector * 8), Immediate(SizeInBits(src_size))); - } - - value = GetOperandAbsNegInteger(std::move(value), instr.conversion.abs_a, - instr.conversion.negate_a, src_signed); - - if (instr.alu.saturate_d) { - if (src_signed && !dst_signed) { - Node is_negative = Operation(OperationCode::LogicalUGreaterEqual, value, - Immediate(1 << (SizeInBits(src_size) - 1))); - value = Operation(OperationCode::Select, std::move(is_negative), Immediate(0), - std::move(value)); - - // Simplify generated expressions, this can be removed without semantic impact - SetTemporary(bb, 0, std::move(value)); - value = GetTemporary(0); - - if (dst_size != Register::Size::Word) { - const Node limit = Immediate((1 << SizeInBits(dst_size)) - 1); - Node is_large = - Operation(OperationCode::LogicalUGreaterThan, std::move(value), limit); - value = Operation(OperationCode::Select, std::move(is_large), limit, - std::move(value)); - } - } else if (const std::optional bounds = - IntegerSaturateBounds(src_size, dst_size, src_signed, dst_signed)) { - value = SignedOperation(OperationCode::IMax, src_signed, std::move(value), - Immediate(bounds->first)); - value = SignedOperation(OperationCode::IMin, src_signed, std::move(value), - Immediate(bounds->second)); - } - } else if (dst_size != Register::Size::Word) { - // No saturation, we only have to mask the result - Node mask = Immediate((1 << SizeInBits(dst_size)) - 1); - value = Operation(OperationCode::UBitwiseAnd, std::move(value), std::move(mask)); - } - - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, std::move(value)); - break; - } - case OpCode::Id::I2F_R: - case OpCode::Id::I2F_C: - case OpCode::Id::I2F_IMM: { - UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long); - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in I2F is not implemented"); - - Node value = [&] { - switch (opcode->get().GetId()) { - case OpCode::Id::I2F_R: - return GetRegister(instr.gpr20); - case OpCode::Id::I2F_C: - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - case OpCode::Id::I2F_IMM: - return Immediate(instr.alu.GetSignedImm20_20()); - default: - UNREACHABLE(); - return Immediate(0); - } - }(); - - const bool input_signed = instr.conversion.is_input_signed; - - if (const u32 offset = static_cast(instr.conversion.int_src.selector); offset > 0) { - ASSERT(instr.conversion.src_size == Register::Size::Byte || - instr.conversion.src_size == Register::Size::Short); - if (instr.conversion.src_size == Register::Size::Short) { - ASSERT(offset == 0 || offset == 2); - } - value = SignedOperation(OperationCode::ILogicalShiftRight, input_signed, - std::move(value), Immediate(offset * 8)); - } - - value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed); - value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, false, input_signed); - value = SignedOperation(OperationCode::FCastInteger, input_signed, PRECISE, value); - value = GetOperandAbsNegFloat(value, false, instr.conversion.negate_a); - - SetInternalFlagsFromFloat(bb, value, instr.generates_cc); - - if (instr.conversion.dst_size == Register::Size::Short) { - value = Operation(OperationCode::HCastFloat, PRECISE, value); - } - - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::F2F_R: - case OpCode::Id::F2F_C: - case OpCode::Id::F2F_IMM: { - UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long); - UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long); - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in F2F is not implemented"); - - Node value = [&]() { - switch (opcode->get().GetId()) { - case OpCode::Id::F2F_R: - return GetRegister(instr.gpr20); - case OpCode::Id::F2F_C: - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - case OpCode::Id::F2F_IMM: - return GetImmediate19(instr); - default: - UNREACHABLE(); - return Immediate(0); - } - }(); - - if (instr.conversion.src_size == Register::Size::Short) { - value = Operation(GetFloatSelector(instr.conversion.float_src.selector), NO_PRECISE, - std::move(value)); - } else { - ASSERT(instr.conversion.float_src.selector == 0); - } - - value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); - - value = [&] { - if (instr.conversion.src_size != instr.conversion.dst_size) { - // Rounding operations only matter when the source and destination conversion size - // is the same. - return value; - } - switch (instr.conversion.f2f.GetRoundingMode()) { - case Tegra::Shader::F2fRoundingOp::None: - return value; - case Tegra::Shader::F2fRoundingOp::Round: - return Operation(OperationCode::FRoundEven, value); - case Tegra::Shader::F2fRoundingOp::Floor: - return Operation(OperationCode::FFloor, value); - case Tegra::Shader::F2fRoundingOp::Ceil: - return Operation(OperationCode::FCeil, value); - case Tegra::Shader::F2fRoundingOp::Trunc: - return Operation(OperationCode::FTrunc, value); - default: - UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}", - instr.conversion.f2f.rounding.Value()); - return value; - } - }(); - value = GetSaturatedFloat(value, instr.alu.saturate_d); - - SetInternalFlagsFromFloat(bb, value, instr.generates_cc); - - if (instr.conversion.dst_size == Register::Size::Short) { - value = Operation(OperationCode::HCastFloat, PRECISE, value); - } - - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::F2I_R: - case OpCode::Id::F2I_C: - case OpCode::Id::F2I_IMM: { - UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long); - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in F2I is not implemented"); - Node value = [&]() { - switch (opcode->get().GetId()) { - case OpCode::Id::F2I_R: - return GetRegister(instr.gpr20); - case OpCode::Id::F2I_C: - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - case OpCode::Id::F2I_IMM: - return GetImmediate19(instr); - default: - UNREACHABLE(); - return Immediate(0); - } - }(); - - if (instr.conversion.src_size == Register::Size::Short) { - value = Operation(GetFloatSelector(instr.conversion.float_src.selector), NO_PRECISE, - std::move(value)); - } else { - ASSERT(instr.conversion.float_src.selector == 0); - } - - value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); - - value = [&]() { - switch (instr.conversion.f2i.rounding) { - case Tegra::Shader::F2iRoundingOp::RoundEven: - return Operation(OperationCode::FRoundEven, PRECISE, value); - case Tegra::Shader::F2iRoundingOp::Floor: - return Operation(OperationCode::FFloor, PRECISE, value); - case Tegra::Shader::F2iRoundingOp::Ceil: - return Operation(OperationCode::FCeil, PRECISE, value); - case Tegra::Shader::F2iRoundingOp::Trunc: - return Operation(OperationCode::FTrunc, PRECISE, value); - default: - UNIMPLEMENTED_MSG("Unimplemented F2I rounding mode {}", - instr.conversion.f2i.rounding.Value()); - return Immediate(0); - } - }(); - const bool is_signed = instr.conversion.is_output_signed; - value = SignedOperation(OperationCode::ICastFloat, is_signed, PRECISE, value); - value = ConvertIntegerSize(value, instr.conversion.dst_size, is_signed); - - SetRegister(bb, instr.gpr0, value); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled conversion instruction: {}", opcode->get().GetName()); - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp deleted file mode 100644 index 5973588d6..000000000 --- a/src/video_core/shader/decode/ffma.cpp +++ /dev/null @@ -1,62 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented"); - if (instr.ffma.tab5980_0 != 1) { - LOG_DEBUG(HW_GPU, "FFMA tab5980_0({}) not implemented", instr.ffma.tab5980_0.Value()); - } - if (instr.ffma.tab5980_1 != 0) { - LOG_DEBUG(HW_GPU, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value()); - } - - const Node op_a = GetRegister(instr.gpr8); - - auto [op_b, op_c] = [&]() -> std::tuple { - switch (opcode->get().GetId()) { - case OpCode::Id::FFMA_CR: { - return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), - GetRegister(instr.gpr39)}; - } - case OpCode::Id::FFMA_RR: - return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)}; - case OpCode::Id::FFMA_RC: { - return {GetRegister(instr.gpr39), - GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; - } - case OpCode::Id::FFMA_IMM: - return {GetImmediate19(instr), GetRegister(instr.gpr39)}; - default: - UNIMPLEMENTED_MSG("Unhandled FFMA instruction: {}", opcode->get().GetName()); - return {Immediate(0), Immediate(0)}; - } - }(); - - op_b = GetOperandAbsNegFloat(op_b, false, instr.ffma.negate_b); - op_c = GetOperandAbsNegFloat(op_c, false, instr.ffma.negate_c); - - Node value = Operation(OperationCode::FFma, PRECISE, op_a, op_b, op_c); - value = GetSaturatedFloat(value, instr.alu.saturate_d); - - SetInternalFlagsFromFloat(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/float_set.cpp b/src/video_core/shader/decode/float_set.cpp deleted file mode 100644 index 5614e8a0d..000000000 --- a/src/video_core/shader/decode/float_set.cpp +++ /dev/null @@ -1,58 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -u32 ShaderIR::DecodeFloatSet(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - - const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fset.abs_a != 0, - instr.fset.neg_a != 0); - - Node op_b = [&]() { - if (instr.is_b_imm) { - return GetImmediate19(instr); - } else if (instr.is_b_gpr) { - return GetRegister(instr.gpr20); - } else { - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - } - }(); - - op_b = GetOperandAbsNegFloat(op_b, instr.fset.abs_b != 0, instr.fset.neg_b != 0); - - // The fset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the - // condition is true, and to 0 otherwise. - const Node second_pred = GetPredicate(instr.fset.pred39, instr.fset.neg_pred != 0); - - const OperationCode combiner = GetPredicateCombiner(instr.fset.op); - const Node first_pred = GetPredicateComparisonFloat(instr.fset.cond, op_a, op_b); - - const Node predicate = Operation(combiner, first_pred, second_pred); - - const Node true_value = instr.fset.bf ? Immediate(1.0f) : Immediate(-1); - const Node false_value = instr.fset.bf ? Immediate(0.0f) : Immediate(0); - const Node value = - Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value); - - if (instr.fset.bf) { - SetInternalFlagsFromFloat(bb, value, instr.generates_cc); - } else { - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - } - SetRegister(bb, instr.gpr0, value); - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/float_set_predicate.cpp b/src/video_core/shader/decode/float_set_predicate.cpp deleted file mode 100644 index 200c2c983..000000000 --- a/src/video_core/shader/decode/float_set_predicate.cpp +++ /dev/null @@ -1,57 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::Pred; - -u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - - Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fsetp.abs_a != 0, - instr.fsetp.neg_a != 0); - Node op_b = [&]() { - if (instr.is_b_imm) { - return GetImmediate19(instr); - } else if (instr.is_b_gpr) { - return GetRegister(instr.gpr20); - } else { - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - } - }(); - op_b = GetOperandAbsNegFloat(std::move(op_b), instr.fsetp.abs_b, instr.fsetp.neg_b); - - // We can't use the constant predicate as destination. - ASSERT(instr.fsetp.pred3 != static_cast(Pred::UnusedIndex)); - - const Node predicate = - GetPredicateComparisonFloat(instr.fsetp.cond, std::move(op_a), std::move(op_b)); - const Node second_pred = GetPredicate(instr.fsetp.pred39, instr.fsetp.neg_pred != 0); - - const OperationCode combiner = GetPredicateCombiner(instr.fsetp.op); - const Node value = Operation(combiner, predicate, second_pred); - - // Set the primary predicate to the result of Predicate OP SecondPredicate - SetPredicate(bb, instr.fsetp.pred3, value); - - if (instr.fsetp.pred0 != static_cast(Pred::UnusedIndex)) { - // Set the secondary predicate to the result of !Predicate OP SecondPredicate, - // if enabled - const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate); - const Node second_value = Operation(combiner, negated_pred, second_pred); - SetPredicate(bb, instr.fsetp.pred0, second_value); - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp deleted file mode 100644 index fa83108cd..000000000 --- a/src/video_core/shader/decode/half_set.cpp +++ /dev/null @@ -1,115 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include - -#include "common/assert.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using std::move; -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::PredCondition; - -u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - PredCondition cond{}; - bool bf = false; - bool ftz = false; - bool neg_a = false; - bool abs_a = false; - bool neg_b = false; - bool abs_b = false; - switch (opcode->get().GetId()) { - case OpCode::Id::HSET2_C: - case OpCode::Id::HSET2_IMM: - cond = instr.hsetp2.cbuf_and_imm.cond; - bf = instr.Bit(53); - ftz = instr.Bit(54); - neg_a = instr.Bit(43); - abs_a = instr.Bit(44); - neg_b = instr.Bit(56); - abs_b = instr.Bit(54); - break; - case OpCode::Id::HSET2_R: - cond = instr.hsetp2.reg.cond; - bf = instr.Bit(49); - ftz = instr.Bit(50); - neg_a = instr.Bit(43); - abs_a = instr.Bit(44); - neg_b = instr.Bit(31); - abs_b = instr.Bit(30); - break; - default: - UNREACHABLE(); - } - - Node op_b = [this, instr, opcode] { - switch (opcode->get().GetId()) { - case OpCode::Id::HSET2_C: - // Inform as unimplemented as this is not tested. - UNIMPLEMENTED_MSG("HSET2_C is not implemented"); - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - case OpCode::Id::HSET2_R: - return GetRegister(instr.gpr20); - case OpCode::Id::HSET2_IMM: - return UnpackHalfImmediate(instr, true); - default: - UNREACHABLE(); - return Node{}; - } - }(); - - if (!ftz) { - LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); - } - - Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a); - op_a = GetOperandAbsNegHalf(op_a, abs_a, neg_a); - - switch (opcode->get().GetId()) { - case OpCode::Id::HSET2_R: - op_b = GetOperandAbsNegHalf(move(op_b), abs_b, neg_b); - [[fallthrough]]; - case OpCode::Id::HSET2_C: - op_b = UnpackHalfFloat(move(op_b), instr.hset2.type_b); - break; - default: - break; - } - - Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred); - - Node comparison_pair = GetPredicateComparisonHalf(cond, op_a, op_b); - - const OperationCode combiner = GetPredicateCombiner(instr.hset2.op); - - // HSET2 operates on each half float in the pack. - std::array values; - for (u32 i = 0; i < 2; ++i) { - const u32 raw_value = bf ? 0x3c00 : 0xffff; - Node true_value = Immediate(raw_value << (i * 16)); - Node false_value = Immediate(0); - - Node comparison = Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i)); - Node predicate = Operation(combiner, comparison, second_pred); - values[i] = - Operation(OperationCode::Select, predicate, move(true_value), move(false_value)); - } - - Node value = Operation(OperationCode::UBitwiseOr, values[0], values[1]); - SetRegister(bb, instr.gpr0, move(value)); - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp deleted file mode 100644 index 310655619..000000000 --- a/src/video_core/shader/decode/half_set_predicate.cpp +++ /dev/null @@ -1,80 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::Pred; - -u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - if (instr.hsetp2.ftz != 0) { - LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); - } - - Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a); - op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a); - - Tegra::Shader::PredCondition cond{}; - bool h_and{}; - Node op_b{}; - switch (opcode->get().GetId()) { - case OpCode::Id::HSETP2_C: - cond = instr.hsetp2.cbuf_and_imm.cond; - h_and = instr.hsetp2.cbuf_and_imm.h_and; - op_b = GetOperandAbsNegHalf(GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), - instr.hsetp2.cbuf.abs_b, instr.hsetp2.cbuf.negate_b); - // F32 is hardcoded in hardware - op_b = UnpackHalfFloat(std::move(op_b), Tegra::Shader::HalfType::F32); - break; - case OpCode::Id::HSETP2_IMM: - cond = instr.hsetp2.cbuf_and_imm.cond; - h_and = instr.hsetp2.cbuf_and_imm.h_and; - op_b = UnpackHalfImmediate(instr, true); - break; - case OpCode::Id::HSETP2_R: - cond = instr.hsetp2.reg.cond; - h_and = instr.hsetp2.reg.h_and; - op_b = - GetOperandAbsNegHalf(UnpackHalfFloat(GetRegister(instr.gpr20), instr.hsetp2.reg.type_b), - instr.hsetp2.reg.abs_b, instr.hsetp2.reg.negate_b); - break; - default: - UNREACHABLE(); - op_b = Immediate(0); - } - - const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op); - const Node combined_pred = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred); - - const auto Write = [&](u64 dest, Node src) { - SetPredicate(bb, dest, Operation(combiner, std::move(src), combined_pred)); - }; - - const Node comparison = GetPredicateComparisonHalf(cond, op_a, op_b); - const u64 first = instr.hsetp2.pred3; - const u64 second = instr.hsetp2.pred0; - if (h_and) { - Node joined = Operation(OperationCode::LogicalAnd2, comparison); - Write(first, joined); - Write(second, Operation(OperationCode::LogicalNegate, std::move(joined))); - } else { - Write(first, Operation(OperationCode::LogicalPick2, comparison, Immediate(0U))); - Write(second, Operation(OperationCode::LogicalPick2, comparison, Immediate(1U))); - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp deleted file mode 100644 index 5b44cb79c..000000000 --- a/src/video_core/shader/decode/hfma2.cpp +++ /dev/null @@ -1,73 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::HalfPrecision; -using Tegra::Shader::HalfType; -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) { - DEBUG_ASSERT(instr.hfma2.rr.precision == HalfPrecision::None); - } else { - DEBUG_ASSERT(instr.hfma2.precision == HalfPrecision::None); - } - - constexpr auto identity = HalfType::H0_H1; - bool neg_b{}, neg_c{}; - auto [saturate, type_b, op_b, type_c, - op_c] = [&]() -> std::tuple { - switch (opcode->get().GetId()) { - case OpCode::Id::HFMA2_CR: - neg_b = instr.hfma2.negate_b; - neg_c = instr.hfma2.negate_c; - return {instr.hfma2.saturate, HalfType::F32, - GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), - instr.hfma2.type_reg39, GetRegister(instr.gpr39)}; - case OpCode::Id::HFMA2_RC: - neg_b = instr.hfma2.negate_b; - neg_c = instr.hfma2.negate_c; - return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39), - HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; - case OpCode::Id::HFMA2_RR: - neg_b = instr.hfma2.rr.negate_b; - neg_c = instr.hfma2.rr.negate_c; - return {instr.hfma2.rr.saturate, instr.hfma2.type_b, GetRegister(instr.gpr20), - instr.hfma2.rr.type_c, GetRegister(instr.gpr39)}; - case OpCode::Id::HFMA2_IMM_R: - neg_c = instr.hfma2.negate_c; - return {instr.hfma2.saturate, identity, UnpackHalfImmediate(instr, true), - instr.hfma2.type_reg39, GetRegister(instr.gpr39)}; - default: - return {false, identity, Immediate(0), identity, Immediate(0)}; - } - }(); - - const Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hfma2.type_a); - op_b = GetOperandAbsNegHalf(UnpackHalfFloat(op_b, type_b), false, neg_b); - op_c = GetOperandAbsNegHalf(UnpackHalfFloat(op_c, type_c), false, neg_c); - - Node value = Operation(OperationCode::HFma, PRECISE, op_a, op_b, op_c); - value = GetSaturatedHalfFloat(value, saturate); - value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge); - - SetRegister(bb, instr.gpr0, value); - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp deleted file mode 100644 index 5470e8cf4..000000000 --- a/src/video_core/shader/decode/image.cpp +++ /dev/null @@ -1,536 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include - -#include "common/assert.h" -#include "common/bit_field.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" -#include "video_core/textures/texture.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::PredCondition; -using Tegra::Shader::StoreType; -using Tegra::Texture::ComponentType; -using Tegra::Texture::TextureFormat; -using Tegra::Texture::TICEntry; - -namespace { - -ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor, - std::size_t component) { - const TextureFormat format{descriptor.format}; - switch (format) { - case TextureFormat::R16G16B16A16: - case TextureFormat::R32G32B32A32: - case TextureFormat::R32G32B32: - case TextureFormat::R32G32: - case TextureFormat::R16G16: - case TextureFormat::R32: - case TextureFormat::R16: - case TextureFormat::R8: - case TextureFormat::R1: - if (component == 0) { - return descriptor.r_type; - } - if (component == 1) { - return descriptor.g_type; - } - if (component == 2) { - return descriptor.b_type; - } - if (component == 3) { - return descriptor.a_type; - } - break; - case TextureFormat::A8R8G8B8: - if (component == 0) { - return descriptor.a_type; - } - if (component == 1) { - return descriptor.r_type; - } - if (component == 2) { - return descriptor.g_type; - } - if (component == 3) { - return descriptor.b_type; - } - break; - case TextureFormat::A2B10G10R10: - case TextureFormat::A4B4G4R4: - case TextureFormat::A5B5G5R1: - case TextureFormat::A1B5G5R5: - if (component == 0) { - return descriptor.a_type; - } - if (component == 1) { - return descriptor.b_type; - } - if (component == 2) { - return descriptor.g_type; - } - if (component == 3) { - return descriptor.r_type; - } - break; - case TextureFormat::R32_B24G8: - if (component == 0) { - return descriptor.r_type; - } - if (component == 1) { - return descriptor.b_type; - } - if (component == 2) { - return descriptor.g_type; - } - break; - case TextureFormat::B5G6R5: - case TextureFormat::B6G5R5: - case TextureFormat::B10G11R11: - if (component == 0) { - return descriptor.b_type; - } - if (component == 1) { - return descriptor.g_type; - } - if (component == 2) { - return descriptor.r_type; - } - break; - case TextureFormat::R24G8: - case TextureFormat::R8G24: - case TextureFormat::R8G8: - case TextureFormat::G4R4: - if (component == 0) { - return descriptor.g_type; - } - if (component == 1) { - return descriptor.r_type; - } - break; - default: - break; - } - UNIMPLEMENTED_MSG("Texture format not implemented={}", format); - return ComponentType::FLOAT; -} - -bool IsComponentEnabled(std::size_t component_mask, std::size_t component) { - constexpr u8 R = 0b0001; - constexpr u8 G = 0b0010; - constexpr u8 B = 0b0100; - constexpr u8 A = 0b1000; - constexpr std::array mask = { - 0, (R), (G), (R | G), (B), (R | B), (G | B), (R | G | B), - (A), (R | A), (G | A), (R | G | A), (B | A), (R | B | A), (G | B | A), (R | G | B | A)}; - return std::bitset<4>{mask.at(component_mask)}.test(component); -} - -u32 GetComponentSize(TextureFormat format, std::size_t component) { - switch (format) { - case TextureFormat::R32G32B32A32: - return 32; - case TextureFormat::R16G16B16A16: - return 16; - case TextureFormat::R32G32B32: - return component <= 2 ? 32 : 0; - case TextureFormat::R32G32: - return component <= 1 ? 32 : 0; - case TextureFormat::R16G16: - return component <= 1 ? 16 : 0; - case TextureFormat::R32: - return component == 0 ? 32 : 0; - case TextureFormat::R16: - return component == 0 ? 16 : 0; - case TextureFormat::R8: - return component == 0 ? 8 : 0; - case TextureFormat::R1: - return component == 0 ? 1 : 0; - case TextureFormat::A8R8G8B8: - return 8; - case TextureFormat::A2B10G10R10: - return (component == 3 || component == 2 || component == 1) ? 10 : 2; - case TextureFormat::A4B4G4R4: - return 4; - case TextureFormat::A5B5G5R1: - return (component == 0 || component == 1 || component == 2) ? 5 : 1; - case TextureFormat::A1B5G5R5: - return (component == 1 || component == 2 || component == 3) ? 5 : 1; - case TextureFormat::R32_B24G8: - if (component == 0) { - return 32; - } - if (component == 1) { - return 24; - } - if (component == 2) { - return 8; - } - return 0; - case TextureFormat::B5G6R5: - if (component == 0 || component == 2) { - return 5; - } - if (component == 1) { - return 6; - } - return 0; - case TextureFormat::B6G5R5: - if (component == 1 || component == 2) { - return 5; - } - if (component == 0) { - return 6; - } - return 0; - case TextureFormat::B10G11R11: - if (component == 1 || component == 2) { - return 11; - } - if (component == 0) { - return 10; - } - return 0; - case TextureFormat::R24G8: - if (component == 0) { - return 8; - } - if (component == 1) { - return 24; - } - return 0; - case TextureFormat::R8G24: - if (component == 0) { - return 24; - } - if (component == 1) { - return 8; - } - return 0; - case TextureFormat::R8G8: - return (component == 0 || component == 1) ? 8 : 0; - case TextureFormat::G4R4: - return (component == 0 || component == 1) ? 4 : 0; - default: - UNIMPLEMENTED_MSG("Texture format not implemented={}", format); - return 0; - } -} - -std::size_t GetImageComponentMask(TextureFormat format) { - constexpr u8 R = 0b0001; - constexpr u8 G = 0b0010; - constexpr u8 B = 0b0100; - constexpr u8 A = 0b1000; - switch (format) { - case TextureFormat::R32G32B32A32: - case TextureFormat::R16G16B16A16: - case TextureFormat::A8R8G8B8: - case TextureFormat::A2B10G10R10: - case TextureFormat::A4B4G4R4: - case TextureFormat::A5B5G5R1: - case TextureFormat::A1B5G5R5: - return std::size_t{R | G | B | A}; - case TextureFormat::R32G32B32: - case TextureFormat::R32_B24G8: - case TextureFormat::B5G6R5: - case TextureFormat::B6G5R5: - case TextureFormat::B10G11R11: - return std::size_t{R | G | B}; - case TextureFormat::R32G32: - case TextureFormat::R16G16: - case TextureFormat::R24G8: - case TextureFormat::R8G24: - case TextureFormat::R8G8: - case TextureFormat::G4R4: - return std::size_t{R | G}; - case TextureFormat::R32: - case TextureFormat::R16: - case TextureFormat::R8: - case TextureFormat::R1: - return std::size_t{R}; - default: - UNIMPLEMENTED_MSG("Texture format not implemented={}", format); - return std::size_t{R | G | B | A}; - } -} - -std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) { - switch (image_type) { - case Tegra::Shader::ImageType::Texture1D: - case Tegra::Shader::ImageType::TextureBuffer: - return 1; - case Tegra::Shader::ImageType::Texture1DArray: - case Tegra::Shader::ImageType::Texture2D: - return 2; - case Tegra::Shader::ImageType::Texture2DArray: - case Tegra::Shader::ImageType::Texture3D: - return 3; - } - UNREACHABLE(); - return 1; -} -} // Anonymous namespace - -std::pair ShaderIR::GetComponentValue(ComponentType component_type, u32 component_size, - Node original_value) { - switch (component_type) { - case ComponentType::SNORM: { - // range [-1.0, 1.0] - auto cnv_value = Operation(OperationCode::FMul, original_value, - Immediate(static_cast(1 << component_size) / 2.f - 1.f)); - cnv_value = Operation(OperationCode::ICastFloat, std::move(cnv_value)); - return {BitfieldExtract(std::move(cnv_value), 0, component_size), true}; - } - case ComponentType::SINT: - case ComponentType::UNORM: { - bool is_signed = component_type == ComponentType::SINT; - // range [0.0, 1.0] - auto cnv_value = Operation(OperationCode::FMul, original_value, - Immediate(static_cast(1 << component_size) - 1.f)); - return {SignedOperation(OperationCode::ICastFloat, is_signed, std::move(cnv_value)), - is_signed}; - } - case ComponentType::UINT: // range [0, (1 << component_size) - 1] - return {std::move(original_value), false}; - case ComponentType::FLOAT: - if (component_size == 16) { - return {Operation(OperationCode::HCastFloat, original_value), true}; - } else { - return {std::move(original_value), true}; - } - default: - UNIMPLEMENTED_MSG("Unimplemented component type={}", component_type); - return {std::move(original_value), true}; - } -} - -u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - const auto GetCoordinates = [this, instr](Tegra::Shader::ImageType image_type) { - std::vector coords; - const std::size_t num_coords{GetImageTypeNumCoordinates(image_type)}; - coords.reserve(num_coords); - for (std::size_t i = 0; i < num_coords; ++i) { - coords.push_back(GetRegister(instr.gpr8.Value() + i)); - } - return coords; - }; - - switch (opcode->get().GetId()) { - case OpCode::Id::SULD: { - UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store != - Tegra::Shader::OutOfBoundsStore::Ignore); - - const auto type{instr.suldst.image_type}; - auto& image{instr.suldst.is_immediate ? GetImage(instr.image, type) - : GetBindlessImage(instr.gpr39, type)}; - image.MarkRead(); - - if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::P) { - u32 indexer = 0; - for (u32 element = 0; element < 4; ++element) { - if (!instr.suldst.IsComponentEnabled(element)) { - continue; - } - MetaImage meta{image, {}, element}; - Node value = Operation(OperationCode::ImageLoad, meta, GetCoordinates(type)); - SetTemporary(bb, indexer++, std::move(value)); - } - for (u32 i = 0; i < indexer; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); - } - } else if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::D_BA) { - UNIMPLEMENTED_IF(instr.suldst.GetStoreDataLayout() != StoreType::Bits32 && - instr.suldst.GetStoreDataLayout() != StoreType::Bits64); - - auto descriptor = [this, instr] { - std::optional sampler_descriptor; - if (instr.suldst.is_immediate) { - sampler_descriptor = - registry.ObtainBoundSampler(static_cast(instr.image.index.Value())); - } else { - const Node image_register = GetRegister(instr.gpr39); - const auto result = TrackCbuf(image_register, global_code, - static_cast(global_code.size())); - const auto buffer = std::get<1>(result); - const auto offset = std::get<2>(result); - sampler_descriptor = registry.ObtainBindlessSampler(buffer, offset); - } - if (!sampler_descriptor) { - UNREACHABLE_MSG("Failed to obtain image descriptor"); - } - return *sampler_descriptor; - }(); - - const auto comp_mask = GetImageComponentMask(descriptor.format); - - switch (instr.suldst.GetStoreDataLayout()) { - case StoreType::Bits32: - case StoreType::Bits64: { - u32 indexer = 0; - u32 shifted_counter = 0; - Node value = Immediate(0); - for (u32 element = 0; element < 4; ++element) { - if (!IsComponentEnabled(comp_mask, element)) { - continue; - } - const auto component_type = GetComponentType(descriptor, element); - const auto component_size = GetComponentSize(descriptor.format, element); - MetaImage meta{image, {}, element}; - - auto [converted_value, is_signed] = GetComponentValue( - component_type, component_size, - Operation(OperationCode::ImageLoad, meta, GetCoordinates(type))); - - // shift element to correct position - const auto shifted = shifted_counter; - if (shifted > 0) { - converted_value = - SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, - std::move(converted_value), Immediate(shifted)); - } - shifted_counter += component_size; - - // add value into result - value = Operation(OperationCode::UBitwiseOr, value, std::move(converted_value)); - - // if we shifted enough for 1 byte -> we save it into temp - if (shifted_counter >= 32) { - SetTemporary(bb, indexer++, std::move(value)); - // reset counter and value to prepare pack next byte - value = Immediate(0); - shifted_counter = 0; - } - } - for (u32 i = 0; i < indexer; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); - } - break; - } - default: - UNREACHABLE(); - break; - } - } - break; - } - case OpCode::Id::SUST: { - UNIMPLEMENTED_IF(instr.suldst.mode != Tegra::Shader::SurfaceDataMode::P); - UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store != - Tegra::Shader::OutOfBoundsStore::Ignore); - UNIMPLEMENTED_IF(instr.suldst.component_mask_selector != 0xf); // Ensure we have RGBA - - std::vector values; - constexpr std::size_t hardcoded_size{4}; - for (std::size_t i = 0; i < hardcoded_size; ++i) { - values.push_back(GetRegister(instr.gpr0.Value() + i)); - } - - const auto type{instr.suldst.image_type}; - auto& image{instr.suldst.is_immediate ? GetImage(instr.image, type) - : GetBindlessImage(instr.gpr39, type)}; - image.MarkWrite(); - - MetaImage meta{image, std::move(values)}; - bb.push_back(Operation(OperationCode::ImageStore, meta, GetCoordinates(type))); - break; - } - case OpCode::Id::SUATOM: { - UNIMPLEMENTED_IF(instr.suatom_d.is_ba != 0); - - const OperationCode operation_code = [instr] { - switch (instr.suatom_d.operation_type) { - case Tegra::Shader::ImageAtomicOperationType::S32: - case Tegra::Shader::ImageAtomicOperationType::U32: - switch (instr.suatom_d.operation) { - case Tegra::Shader::ImageAtomicOperation::Add: - return OperationCode::AtomicImageAdd; - case Tegra::Shader::ImageAtomicOperation::And: - return OperationCode::AtomicImageAnd; - case Tegra::Shader::ImageAtomicOperation::Or: - return OperationCode::AtomicImageOr; - case Tegra::Shader::ImageAtomicOperation::Xor: - return OperationCode::AtomicImageXor; - case Tegra::Shader::ImageAtomicOperation::Exch: - return OperationCode::AtomicImageExchange; - default: - break; - } - break; - default: - break; - } - UNIMPLEMENTED_MSG("Unimplemented operation={}, type={}", - static_cast(instr.suatom_d.operation.Value()), - static_cast(instr.suatom_d.operation_type.Value())); - return OperationCode::AtomicImageAdd; - }(); - - Node value = GetRegister(instr.gpr0); - - const auto type = instr.suatom_d.image_type; - auto& image = GetImage(instr.image, type); - image.MarkAtomic(); - - MetaImage meta{image, {std::move(value)}}; - SetRegister(bb, instr.gpr0, Operation(operation_code, meta, GetCoordinates(type))); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled image instruction: {}", opcode->get().GetName()); - } - - return pc; -} - -ImageEntry& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) { - const auto offset = static_cast(image.index.Value()); - - const auto it = - std::find_if(std::begin(used_images), std::end(used_images), - [offset](const ImageEntry& entry) { return entry.offset == offset; }); - if (it != std::end(used_images)) { - ASSERT(!it->is_bindless && it->type == type); - return *it; - } - - const auto next_index = static_cast(used_images.size()); - return used_images.emplace_back(next_index, offset, type); -} - -ImageEntry& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) { - const Node image_register = GetRegister(reg); - const auto result = - TrackCbuf(image_register, global_code, static_cast(global_code.size())); - - const auto buffer = std::get<1>(result); - const auto offset = std::get<2>(result); - - const auto it = std::find_if(std::begin(used_images), std::end(used_images), - [buffer, offset](const ImageEntry& entry) { - return entry.buffer == buffer && entry.offset == offset; - }); - if (it != std::end(used_images)) { - ASSERT(it->is_bindless && it->type == type); - return *it; - } - - const auto next_index = static_cast(used_images.size()); - return used_images.emplace_back(next_index, offset, buffer, type); -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/integer_set.cpp b/src/video_core/shader/decode/integer_set.cpp deleted file mode 100644 index 59809bcd8..000000000 --- a/src/video_core/shader/decode/integer_set.cpp +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -u32 ShaderIR::DecodeIntegerSet(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - - const Node op_a = GetRegister(instr.gpr8); - const Node op_b = [&]() { - if (instr.is_b_imm) { - return Immediate(instr.alu.GetSignedImm20_20()); - } else if (instr.is_b_gpr) { - return GetRegister(instr.gpr20); - } else { - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - } - }(); - - // The iset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the condition - // is true, and to 0 otherwise. - const Node second_pred = GetPredicate(instr.iset.pred39, instr.iset.neg_pred != 0); - const Node first_pred = - GetPredicateComparisonInteger(instr.iset.cond, instr.iset.is_signed, op_a, op_b); - - const OperationCode combiner = GetPredicateCombiner(instr.iset.op); - - const Node predicate = Operation(combiner, first_pred, second_pred); - - const Node true_value = instr.iset.bf ? Immediate(1.0f) : Immediate(-1); - const Node false_value = instr.iset.bf ? Immediate(0.0f) : Immediate(0); - const Node value = - Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value); - - SetRegister(bb, instr.gpr0, value); - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/integer_set_predicate.cpp b/src/video_core/shader/decode/integer_set_predicate.cpp deleted file mode 100644 index 25e48fef8..000000000 --- a/src/video_core/shader/decode/integer_set_predicate.cpp +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::Pred; - -u32 ShaderIR::DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - - const Node op_a = GetRegister(instr.gpr8); - - const Node op_b = [&]() { - if (instr.is_b_imm) { - return Immediate(instr.alu.GetSignedImm20_20()); - } else if (instr.is_b_gpr) { - return GetRegister(instr.gpr20); - } else { - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - } - }(); - - // We can't use the constant predicate as destination. - ASSERT(instr.isetp.pred3 != static_cast(Pred::UnusedIndex)); - - const Node second_pred = GetPredicate(instr.isetp.pred39, instr.isetp.neg_pred != 0); - const Node predicate = - GetPredicateComparisonInteger(instr.isetp.cond, instr.isetp.is_signed, op_a, op_b); - - // Set the primary predicate to the result of Predicate OP SecondPredicate - const OperationCode combiner = GetPredicateCombiner(instr.isetp.op); - const Node value = Operation(combiner, predicate, second_pred); - SetPredicate(bb, instr.isetp.pred3, value); - - if (instr.isetp.pred0 != static_cast(Pred::UnusedIndex)) { - // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled - const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate); - SetPredicate(bb, instr.isetp.pred0, Operation(combiner, negated_pred, second_pred)); - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp deleted file mode 100644 index 7728f600e..000000000 --- a/src/video_core/shader/decode/memory.cpp +++ /dev/null @@ -1,493 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include - -#include - -#include "common/alignment.h" -#include "common/assert.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using std::move; -using Tegra::Shader::AtomicOp; -using Tegra::Shader::AtomicType; -using Tegra::Shader::Attribute; -using Tegra::Shader::GlobalAtomicType; -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::Register; -using Tegra::Shader::StoreType; - -namespace { - -OperationCode GetAtomOperation(AtomicOp op) { - switch (op) { - case AtomicOp::Add: - return OperationCode::AtomicIAdd; - case AtomicOp::Min: - return OperationCode::AtomicIMin; - case AtomicOp::Max: - return OperationCode::AtomicIMax; - case AtomicOp::And: - return OperationCode::AtomicIAnd; - case AtomicOp::Or: - return OperationCode::AtomicIOr; - case AtomicOp::Xor: - return OperationCode::AtomicIXor; - case AtomicOp::Exch: - return OperationCode::AtomicIExchange; - default: - UNIMPLEMENTED_MSG("op={}", op); - return OperationCode::AtomicIAdd; - } -} - -bool IsUnaligned(Tegra::Shader::UniformType uniform_type) { - return uniform_type == Tegra::Shader::UniformType::UnsignedByte || - uniform_type == Tegra::Shader::UniformType::UnsignedShort; -} - -u32 GetUnalignedMask(Tegra::Shader::UniformType uniform_type) { - switch (uniform_type) { - case Tegra::Shader::UniformType::UnsignedByte: - return 0b11; - case Tegra::Shader::UniformType::UnsignedShort: - return 0b10; - default: - UNREACHABLE(); - return 0; - } -} - -u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) { - switch (uniform_type) { - case Tegra::Shader::UniformType::UnsignedByte: - return 8; - case Tegra::Shader::UniformType::UnsignedShort: - return 16; - case Tegra::Shader::UniformType::Single: - return 32; - case Tegra::Shader::UniformType::Double: - return 64; - case Tegra::Shader::UniformType::Quad: - case Tegra::Shader::UniformType::UnsignedQuad: - return 128; - default: - UNIMPLEMENTED_MSG("Unimplemented size={}!", uniform_type); - return 32; - } -} - -Node ExtractUnaligned(Node value, Node address, u32 mask, u32 size) { - Node offset = Operation(OperationCode::UBitwiseAnd, address, Immediate(mask)); - offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3)); - return Operation(OperationCode::UBitfieldExtract, move(value), move(offset), Immediate(size)); -} - -Node InsertUnaligned(Node dest, Node value, Node address, u32 mask, u32 size) { - Node offset = Operation(OperationCode::UBitwiseAnd, move(address), Immediate(mask)); - offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3)); - return Operation(OperationCode::UBitfieldInsert, move(dest), move(value), move(offset), - Immediate(size)); -} - -Node Sign16Extend(Node value) { - Node sign = Operation(OperationCode::UBitwiseAnd, value, Immediate(1U << 15)); - Node is_sign = Operation(OperationCode::LogicalUEqual, move(sign), Immediate(1U << 15)); - Node extend = Operation(OperationCode::Select, is_sign, Immediate(0xFFFF0000), Immediate(0)); - return Operation(OperationCode::UBitwiseOr, move(value), move(extend)); -} - -} // Anonymous namespace - -u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - switch (opcode->get().GetId()) { - case OpCode::Id::LD_A: { - // Note: Shouldn't this be interp mode flat? As in no interpolation made. - UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, - "Indirect attribute loads are not supported"); - UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0, - "Unaligned attribute loads are not supported"); - UNIMPLEMENTED_IF_MSG(instr.attribute.fmt20.IsPhysical() && - instr.attribute.fmt20.size != Tegra::Shader::AttributeSize::Word, - "Non-32 bits PHYS reads are not implemented"); - - const Node buffer{GetRegister(instr.gpr39)}; - - u64 next_element = instr.attribute.fmt20.element; - auto next_index = static_cast(instr.attribute.fmt20.index.Value()); - - const auto LoadNextElement = [&](u32 reg_offset) { - const Node attribute{instr.attribute.fmt20.IsPhysical() - ? GetPhysicalInputAttribute(instr.gpr8, buffer) - : GetInputAttribute(static_cast(next_index), - next_element, buffer)}; - - SetRegister(bb, instr.gpr0.Value() + reg_offset, attribute); - - // Load the next attribute element into the following register. If the element - // to load goes beyond the vec4 size, load the first element of the next - // attribute. - next_element = (next_element + 1) % 4; - next_index = next_index + (next_element == 0 ? 1 : 0); - }; - - const u32 num_words = static_cast(instr.attribute.fmt20.size.Value()) + 1; - for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) { - LoadNextElement(reg_offset); - } - break; - } - case OpCode::Id::LD_C: { - UNIMPLEMENTED_IF(instr.ld_c.unknown != 0); - - Node index = GetRegister(instr.gpr8); - - const Node op_a = - GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index); - - switch (instr.ld_c.type.Value()) { - case Tegra::Shader::UniformType::Single: - SetRegister(bb, instr.gpr0, op_a); - break; - - case Tegra::Shader::UniformType::Double: { - const Node op_b = - GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 4, index); - - SetTemporary(bb, 0, op_a); - SetTemporary(bb, 1, op_b); - SetRegister(bb, instr.gpr0, GetTemporary(0)); - SetRegister(bb, instr.gpr0.Value() + 1, GetTemporary(1)); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled type: {}", instr.ld_c.type.Value()); - } - break; - } - case OpCode::Id::LD_L: - LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}", instr.ld_l.unknown); - [[fallthrough]]; - case OpCode::Id::LD_S: { - const auto GetAddress = [&](s32 offset) { - ASSERT(offset % 4 == 0); - const Node immediate_offset = Immediate(static_cast(instr.smem_imm) + offset); - return Operation(OperationCode::IAdd, GetRegister(instr.gpr8), immediate_offset); - }; - const auto GetMemory = [&](s32 offset) { - return opcode->get().GetId() == OpCode::Id::LD_S ? GetSharedMemory(GetAddress(offset)) - : GetLocalMemory(GetAddress(offset)); - }; - - switch (instr.ldst_sl.type.Value()) { - case StoreType::Signed16: - SetRegister(bb, instr.gpr0, - Sign16Extend(ExtractUnaligned(GetMemory(0), GetAddress(0), 0b10, 16))); - break; - case StoreType::Bits32: - case StoreType::Bits64: - case StoreType::Bits128: { - const u32 count = [&] { - switch (instr.ldst_sl.type.Value()) { - case StoreType::Bits32: - return 1; - case StoreType::Bits64: - return 2; - case StoreType::Bits128: - return 4; - default: - UNREACHABLE(); - return 0; - } - }(); - for (u32 i = 0; i < count; ++i) { - SetTemporary(bb, i, GetMemory(i * 4)); - } - for (u32 i = 0; i < count; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); - } - break; - } - default: - UNIMPLEMENTED_MSG("{} Unhandled type: {}", opcode->get().GetName(), - instr.ldst_sl.type.Value()); - } - break; - } - case OpCode::Id::LD: - case OpCode::Id::LDG: { - const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType { - switch (opcode->get().GetId()) { - case OpCode::Id::LD: - UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended LD is not implemented"); - return instr.generic.type; - case OpCode::Id::LDG: - return instr.ldg.type; - default: - UNREACHABLE(); - return {}; - } - }(); - - const auto [real_address_base, base_address, descriptor] = - TrackGlobalMemory(bb, instr, true, false); - - const u32 size = GetMemorySize(type); - const u32 count = Common::AlignUp(size, 32) / 32; - if (!real_address_base || !base_address) { - // Tracking failed, load zeroes. - for (u32 i = 0; i < count; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, Immediate(0.0f)); - } - break; - } - - for (u32 i = 0; i < count; ++i) { - const Node it_offset = Immediate(i * 4); - const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset); - Node gmem = MakeNode(real_address, base_address, descriptor); - - // To handle unaligned loads get the bytes used to dereference global memory and extract - // those bytes from the loaded u32. - if (IsUnaligned(type)) { - gmem = ExtractUnaligned(gmem, real_address, GetUnalignedMask(type), size); - } - - SetTemporary(bb, i, gmem); - } - - for (u32 i = 0; i < count; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); - } - break; - } - case OpCode::Id::ST_A: { - UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, - "Indirect attribute loads are not supported"); - UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0, - "Unaligned attribute loads are not supported"); - - u64 element = instr.attribute.fmt20.element; - auto index = static_cast(instr.attribute.fmt20.index.Value()); - - const u32 num_words = static_cast(instr.attribute.fmt20.size.Value()) + 1; - for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) { - Node dest; - if (instr.attribute.fmt20.patch) { - const u32 offset = static_cast(index) * 4 + static_cast(element); - dest = MakeNode(offset); - } else { - dest = GetOutputAttribute(static_cast(index), element, - GetRegister(instr.gpr39)); - } - const auto src = GetRegister(instr.gpr0.Value() + reg_offset); - - bb.push_back(Operation(OperationCode::Assign, dest, src)); - - // Load the next attribute element into the following register. If the element to load - // goes beyond the vec4 size, load the first element of the next attribute. - element = (element + 1) % 4; - index = index + (element == 0 ? 1 : 0); - } - break; - } - case OpCode::Id::ST_L: - LOG_DEBUG(HW_GPU, "ST_L cache management mode: {}", instr.st_l.cache_management.Value()); - [[fallthrough]]; - case OpCode::Id::ST_S: { - const auto GetAddress = [&](s32 offset) { - ASSERT(offset % 4 == 0); - const Node immediate = Immediate(static_cast(instr.smem_imm) + offset); - return Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), immediate); - }; - - const bool is_local = opcode->get().GetId() == OpCode::Id::ST_L; - const auto set_memory = is_local ? &ShaderIR::SetLocalMemory : &ShaderIR::SetSharedMemory; - const auto get_memory = is_local ? &ShaderIR::GetLocalMemory : &ShaderIR::GetSharedMemory; - - switch (instr.ldst_sl.type.Value()) { - case StoreType::Bits128: - (this->*set_memory)(bb, GetAddress(12), GetRegister(instr.gpr0.Value() + 3)); - (this->*set_memory)(bb, GetAddress(8), GetRegister(instr.gpr0.Value() + 2)); - [[fallthrough]]; - case StoreType::Bits64: - (this->*set_memory)(bb, GetAddress(4), GetRegister(instr.gpr0.Value() + 1)); - [[fallthrough]]; - case StoreType::Bits32: - (this->*set_memory)(bb, GetAddress(0), GetRegister(instr.gpr0)); - break; - case StoreType::Unsigned16: - case StoreType::Signed16: { - Node address = GetAddress(0); - Node memory = (this->*get_memory)(address); - (this->*set_memory)( - bb, address, InsertUnaligned(memory, GetRegister(instr.gpr0), address, 0b10, 16)); - break; - } - default: - UNIMPLEMENTED_MSG("{} unhandled type: {}", opcode->get().GetName(), - instr.ldst_sl.type.Value()); - } - break; - } - case OpCode::Id::ST: - case OpCode::Id::STG: { - const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType { - switch (opcode->get().GetId()) { - case OpCode::Id::ST: - UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended ST is not implemented"); - return instr.generic.type; - case OpCode::Id::STG: - return instr.stg.type; - default: - UNREACHABLE(); - return {}; - } - }(); - - // For unaligned reads we have to read memory too. - const bool is_read = IsUnaligned(type); - const auto [real_address_base, base_address, descriptor] = - TrackGlobalMemory(bb, instr, is_read, true); - if (!real_address_base || !base_address) { - // Tracking failed, skip the store. - break; - } - - const u32 size = GetMemorySize(type); - const u32 count = Common::AlignUp(size, 32) / 32; - for (u32 i = 0; i < count; ++i) { - const Node it_offset = Immediate(i * 4); - const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset); - const Node gmem = MakeNode(real_address, base_address, descriptor); - Node value = GetRegister(instr.gpr0.Value() + i); - - if (IsUnaligned(type)) { - const u32 mask = GetUnalignedMask(type); - value = InsertUnaligned(gmem, move(value), real_address, mask, size); - } - - bb.push_back(Operation(OperationCode::Assign, gmem, value)); - } - break; - } - case OpCode::Id::RED: { - UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32, "type={}", - instr.red.type.Value()); - const auto [real_address, base_address, descriptor] = - TrackGlobalMemory(bb, instr, true, true); - if (!real_address || !base_address) { - // Tracking failed, skip atomic. - break; - } - Node gmem = MakeNode(real_address, base_address, descriptor); - Node value = GetRegister(instr.gpr0); - bb.push_back(Operation(GetAtomOperation(instr.red.operation), move(gmem), move(value))); - break; - } - case OpCode::Id::ATOM: { - UNIMPLEMENTED_IF_MSG(instr.atom.operation == AtomicOp::Inc || - instr.atom.operation == AtomicOp::Dec || - instr.atom.operation == AtomicOp::SafeAdd, - "operation={}", instr.atom.operation.Value()); - UNIMPLEMENTED_IF_MSG(instr.atom.type == GlobalAtomicType::S64 || - instr.atom.type == GlobalAtomicType::U64 || - instr.atom.type == GlobalAtomicType::F16x2_FTZ_RN || - instr.atom.type == GlobalAtomicType::F32_FTZ_RN, - "type={}", instr.atom.type.Value()); - - const auto [real_address, base_address, descriptor] = - TrackGlobalMemory(bb, instr, true, true); - if (!real_address || !base_address) { - // Tracking failed, skip atomic. - break; - } - - const bool is_signed = - instr.atom.type == GlobalAtomicType::S32 || instr.atom.type == GlobalAtomicType::S64; - Node gmem = MakeNode(real_address, base_address, descriptor); - SetRegister(bb, instr.gpr0, - SignedOperation(GetAtomOperation(instr.atom.operation), is_signed, gmem, - GetRegister(instr.gpr20))); - break; - } - case OpCode::Id::ATOMS: { - UNIMPLEMENTED_IF_MSG(instr.atoms.operation == AtomicOp::Inc || - instr.atoms.operation == AtomicOp::Dec, - "operation={}", instr.atoms.operation.Value()); - UNIMPLEMENTED_IF_MSG(instr.atoms.type == AtomicType::S64 || - instr.atoms.type == AtomicType::U64, - "type={}", instr.atoms.type.Value()); - const bool is_signed = - instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64; - const s32 offset = instr.atoms.GetImmediateOffset(); - Node address = GetRegister(instr.gpr8); - address = Operation(OperationCode::IAdd, move(address), Immediate(offset)); - SetRegister(bb, instr.gpr0, - SignedOperation(GetAtomOperation(instr.atoms.operation), is_signed, - GetSharedMemory(move(address)), GetRegister(instr.gpr20))); - break; - } - case OpCode::Id::AL2P: { - // Ignore al2p.direction since we don't care about it. - - // Calculate emulation fake physical address. - const Node fixed_address{Immediate(static_cast(instr.al2p.address))}; - const Node reg{GetRegister(instr.gpr8)}; - const Node fake_address{Operation(OperationCode::IAdd, NO_PRECISE, reg, fixed_address)}; - - // Set the fake address to target register. - SetRegister(bb, instr.gpr0, fake_address); - - // Signal the shader IR to declare all possible attributes and varyings - uses_physical_attributes = true; - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName()); - } - - return pc; -} - -std::tuple ShaderIR::TrackGlobalMemory(NodeBlock& bb, - Instruction instr, - bool is_read, bool is_write) { - const auto addr_register{GetRegister(instr.gmem.gpr)}; - const auto immediate_offset{static_cast(instr.gmem.offset)}; - - const auto [base_address, index, offset] = - TrackCbuf(addr_register, global_code, static_cast(global_code.size())); - ASSERT_OR_EXECUTE_MSG( - base_address != nullptr, { return std::make_tuple(nullptr, nullptr, GlobalMemoryBase{}); }, - "Global memory tracking failed"); - - bb.push_back(Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", index, offset))); - - const GlobalMemoryBase descriptor{index, offset}; - const auto& entry = used_global_memory.try_emplace(descriptor).first; - auto& usage = entry->second; - usage.is_written |= is_write; - usage.is_read |= is_read; - - const auto real_address = - Operation(OperationCode::UAdd, NO_PRECISE, Immediate(immediate_offset), addr_register); - - return {real_address, base_address, descriptor}; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp deleted file mode 100644 index 5f88537bc..000000000 --- a/src/video_core/shader/decode/other.cpp +++ /dev/null @@ -1,322 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using std::move; -using Tegra::Shader::ConditionCode; -using Tegra::Shader::Instruction; -using Tegra::Shader::IpaInterpMode; -using Tegra::Shader::OpCode; -using Tegra::Shader::PixelImap; -using Tegra::Shader::Register; -using Tegra::Shader::SystemVariable; - -using Index = Tegra::Shader::Attribute::Index; - -u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - switch (opcode->get().GetId()) { - case OpCode::Id::NOP: { - UNIMPLEMENTED_IF(instr.nop.cc != Tegra::Shader::ConditionCode::T); - UNIMPLEMENTED_IF(instr.nop.trigger != 0); - // With the previous preconditions, this instruction is a no-operation. - break; - } - case OpCode::Id::EXIT: { - const ConditionCode cc = instr.flow_condition_code; - UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "EXIT condition code used: {}", cc); - - switch (instr.flow.cond) { - case Tegra::Shader::FlowCondition::Always: - bb.push_back(Operation(OperationCode::Exit)); - if (instr.pred.pred_index == static_cast(Pred::UnusedIndex)) { - // If this is an unconditional exit then just end processing here, - // otherwise we have to account for the possibility of the condition - // not being met, so continue processing the next instruction. - pc = MAX_PROGRAM_LENGTH - 1; - } - break; - - case Tegra::Shader::FlowCondition::Fcsm_Tr: - // TODO(bunnei): What is this used for? If we assume this conditon is not - // satisifed, dual vertex shaders in Farming Simulator make more sense - UNIMPLEMENTED_MSG("Skipping unknown FlowCondition::Fcsm_Tr"); - break; - - default: - UNIMPLEMENTED_MSG("Unhandled flow condition: {}", instr.flow.cond.Value()); - } - break; - } - case OpCode::Id::KIL: { - UNIMPLEMENTED_IF(instr.flow.cond != Tegra::Shader::FlowCondition::Always); - - const ConditionCode cc = instr.flow_condition_code; - UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "KIL condition code used: {}", cc); - - bb.push_back(Operation(OperationCode::Discard)); - break; - } - case OpCode::Id::S2R: { - const Node value = [this, instr] { - switch (instr.sys20) { - case SystemVariable::LaneId: - return Operation(OperationCode::ThreadId); - case SystemVariable::InvocationId: - return Operation(OperationCode::InvocationId); - case SystemVariable::Ydirection: - uses_y_negate = true; - return Operation(OperationCode::YNegate); - case SystemVariable::InvocationInfo: - LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete"); - return Immediate(0x00ff'0000U); - case SystemVariable::WscaleFactorXY: - UNIMPLEMENTED_MSG("S2R WscaleFactorXY is not implemented"); - return Immediate(0U); - case SystemVariable::WscaleFactorZ: - UNIMPLEMENTED_MSG("S2R WscaleFactorZ is not implemented"); - return Immediate(0U); - case SystemVariable::Tid: { - Node val = Immediate(0); - val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdX), 0, 9); - val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdY), 16, 9); - val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdZ), 26, 5); - return val; - } - case SystemVariable::TidX: - return Operation(OperationCode::LocalInvocationIdX); - case SystemVariable::TidY: - return Operation(OperationCode::LocalInvocationIdY); - case SystemVariable::TidZ: - return Operation(OperationCode::LocalInvocationIdZ); - case SystemVariable::CtaIdX: - return Operation(OperationCode::WorkGroupIdX); - case SystemVariable::CtaIdY: - return Operation(OperationCode::WorkGroupIdY); - case SystemVariable::CtaIdZ: - return Operation(OperationCode::WorkGroupIdZ); - case SystemVariable::EqMask: - case SystemVariable::LtMask: - case SystemVariable::LeMask: - case SystemVariable::GtMask: - case SystemVariable::GeMask: - uses_warps = true; - switch (instr.sys20) { - case SystemVariable::EqMask: - return Operation(OperationCode::ThreadEqMask); - case SystemVariable::LtMask: - return Operation(OperationCode::ThreadLtMask); - case SystemVariable::LeMask: - return Operation(OperationCode::ThreadLeMask); - case SystemVariable::GtMask: - return Operation(OperationCode::ThreadGtMask); - case SystemVariable::GeMask: - return Operation(OperationCode::ThreadGeMask); - default: - UNREACHABLE(); - return Immediate(0u); - } - default: - UNIMPLEMENTED_MSG("Unhandled system move: {}", instr.sys20.Value()); - return Immediate(0u); - } - }(); - SetRegister(bb, instr.gpr0, value); - - break; - } - case OpCode::Id::BRA: { - Node branch; - if (instr.bra.constant_buffer == 0) { - const u32 target = pc + instr.bra.GetBranchTarget(); - branch = Operation(OperationCode::Branch, Immediate(target)); - } else { - const u32 target = pc + 1; - const Node op_a = GetConstBuffer(instr.cbuf36.index, instr.cbuf36.GetOffset()); - const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, - PRECISE, op_a, Immediate(3)); - const Node operand = - Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); - branch = Operation(OperationCode::BranchIndirect, operand); - } - - const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; - if (cc != Tegra::Shader::ConditionCode::T) { - bb.push_back(Conditional(GetConditionCode(cc), {branch})); - } else { - bb.push_back(branch); - } - break; - } - case OpCode::Id::BRX: { - Node operand; - if (instr.brx.constant_buffer != 0) { - const s32 target = pc + 1; - const Node index = GetRegister(instr.gpr8); - const Node op_a = - GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index); - const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, - PRECISE, op_a, Immediate(3)); - operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); - } else { - const s32 target = pc + instr.brx.GetBranchExtend(); - const Node op_a = GetRegister(instr.gpr8); - const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, - PRECISE, op_a, Immediate(3)); - operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); - } - const Node branch = Operation(OperationCode::BranchIndirect, operand); - - const ConditionCode cc = instr.flow_condition_code; - if (cc != ConditionCode::T) { - bb.push_back(Conditional(GetConditionCode(cc), {branch})); - } else { - bb.push_back(branch); - } - break; - } - case OpCode::Id::SSY: { - UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, - "Constant buffer flow is not supported"); - - if (disable_flow_stack) { - break; - } - - // The SSY opcode tells the GPU where to re-converge divergent execution paths with SYNC. - const u32 target = pc + instr.bra.GetBranchTarget(); - bb.push_back( - Operation(OperationCode::PushFlowStack, MetaStackClass::Ssy, Immediate(target))); - break; - } - case OpCode::Id::PBK: { - UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, - "Constant buffer PBK is not supported"); - - if (disable_flow_stack) { - break; - } - - // PBK pushes to a stack the address where BRK will jump to. - const u32 target = pc + instr.bra.GetBranchTarget(); - bb.push_back( - Operation(OperationCode::PushFlowStack, MetaStackClass::Pbk, Immediate(target))); - break; - } - case OpCode::Id::SYNC: { - const ConditionCode cc = instr.flow_condition_code; - UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "SYNC condition code used: {}", cc); - - if (decompiled) { - break; - } - - // The SYNC opcode jumps to the address previously set by the SSY opcode - bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Ssy)); - break; - } - case OpCode::Id::BRK: { - const ConditionCode cc = instr.flow_condition_code; - UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "BRK condition code used: {}", cc); - if (decompiled) { - break; - } - - // The BRK opcode jumps to the address previously set by the PBK opcode - bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Pbk)); - break; - } - case OpCode::Id::IPA: { - const bool is_physical = instr.ipa.idx && instr.gpr8.Value() != 0xff; - const auto attribute = instr.attribute.fmt28; - const Index index = attribute.index; - - Node value = is_physical ? GetPhysicalInputAttribute(instr.gpr8) - : GetInputAttribute(index, attribute.element); - - // Code taken from Ryujinx. - if (index >= Index::Attribute_0 && index <= Index::Attribute_31) { - const u32 location = static_cast(index) - static_cast(Index::Attribute_0); - if (header.ps.GetPixelImap(location) == PixelImap::Perspective) { - Node position_w = GetInputAttribute(Index::Position, 3); - value = Operation(OperationCode::FMul, move(value), move(position_w)); - } - } - - if (instr.ipa.interp_mode == IpaInterpMode::Multiply) { - value = Operation(OperationCode::FMul, move(value), GetRegister(instr.gpr20)); - } - - value = GetSaturatedFloat(move(value), instr.ipa.saturate); - - SetRegister(bb, instr.gpr0, move(value)); - break; - } - case OpCode::Id::OUT_R: { - UNIMPLEMENTED_IF_MSG(instr.gpr20.Value() != Register::ZeroIndex, - "Stream buffer is not supported"); - - if (instr.out.emit) { - // gpr0 is used to store the next address and gpr8 contains the address to emit. - // Hardware uses pointers here but we just ignore it - bb.push_back(Operation(OperationCode::EmitVertex)); - SetRegister(bb, instr.gpr0, Immediate(0)); - } - if (instr.out.cut) { - bb.push_back(Operation(OperationCode::EndPrimitive)); - } - break; - } - case OpCode::Id::ISBERD: { - UNIMPLEMENTED_IF(instr.isberd.o != 0); - UNIMPLEMENTED_IF(instr.isberd.skew != 0); - UNIMPLEMENTED_IF(instr.isberd.shift != Tegra::Shader::IsberdShift::None); - UNIMPLEMENTED_IF(instr.isberd.mode != Tegra::Shader::IsberdMode::None); - LOG_WARNING(HW_GPU, "ISBERD instruction is incomplete"); - SetRegister(bb, instr.gpr0, GetRegister(instr.gpr8)); - break; - } - case OpCode::Id::BAR: { - UNIMPLEMENTED_IF_MSG(instr.value != 0xF0A81B8000070000ULL, "BAR is not BAR.SYNC 0x0"); - bb.push_back(Operation(OperationCode::Barrier)); - break; - } - case OpCode::Id::MEMBAR: { - UNIMPLEMENTED_IF(instr.membar.unknown != Tegra::Shader::MembarUnknown::Default); - const OperationCode type = [instr] { - switch (instr.membar.type) { - case Tegra::Shader::MembarType::CTA: - return OperationCode::MemoryBarrierGroup; - case Tegra::Shader::MembarType::GL: - return OperationCode::MemoryBarrierGlobal; - default: - UNIMPLEMENTED_MSG("MEMBAR type={}", instr.membar.type.Value()); - return OperationCode::MemoryBarrierGlobal; - } - }(); - bb.push_back(Operation(type)); - break; - } - case OpCode::Id::DEPBAR: { - LOG_DEBUG(HW_GPU, "DEPBAR instruction is stubbed"); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled instruction: {}", opcode->get().GetName()); - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/predicate_set_predicate.cpp b/src/video_core/shader/decode/predicate_set_predicate.cpp deleted file mode 100644 index 9290d22eb..000000000 --- a/src/video_core/shader/decode/predicate_set_predicate.cpp +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::Pred; - -u32 ShaderIR::DecodePredicateSetPredicate(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - switch (opcode->get().GetId()) { - case OpCode::Id::PSETP: { - const Node op_a = GetPredicate(instr.psetp.pred12, instr.psetp.neg_pred12 != 0); - const Node op_b = GetPredicate(instr.psetp.pred29, instr.psetp.neg_pred29 != 0); - - // We can't use the constant predicate as destination. - ASSERT(instr.psetp.pred3 != static_cast(Pred::UnusedIndex)); - - const Node second_pred = GetPredicate(instr.psetp.pred39, instr.psetp.neg_pred39 != 0); - - const OperationCode combiner = GetPredicateCombiner(instr.psetp.op); - const Node predicate = Operation(combiner, op_a, op_b); - - // Set the primary predicate to the result of Predicate OP SecondPredicate - SetPredicate(bb, instr.psetp.pred3, Operation(combiner, predicate, second_pred)); - - if (instr.psetp.pred0 != static_cast(Pred::UnusedIndex)) { - // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if - // enabled - SetPredicate(bb, instr.psetp.pred0, - Operation(combiner, Operation(OperationCode::LogicalNegate, predicate), - second_pred)); - } - break; - } - case OpCode::Id::CSETP: { - const Node pred = GetPredicate(instr.csetp.pred39, instr.csetp.neg_pred39 != 0); - const Node condition_code = GetConditionCode(instr.csetp.cc); - - const OperationCode combiner = GetPredicateCombiner(instr.csetp.op); - - if (instr.csetp.pred3 != static_cast(Pred::UnusedIndex)) { - SetPredicate(bb, instr.csetp.pred3, Operation(combiner, condition_code, pred)); - } - if (instr.csetp.pred0 != static_cast(Pred::UnusedIndex)) { - const Node neg_cc = Operation(OperationCode::LogicalNegate, condition_code); - SetPredicate(bb, instr.csetp.pred0, Operation(combiner, neg_cc, pred)); - } - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled predicate instruction: {}", opcode->get().GetName()); - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/predicate_set_register.cpp b/src/video_core/shader/decode/predicate_set_register.cpp deleted file mode 100644 index 84dbc50fe..000000000 --- a/src/video_core/shader/decode/predicate_set_register.cpp +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -u32 ShaderIR::DecodePredicateSetRegister(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in PSET is not implemented"); - - const Node op_a = GetPredicate(instr.pset.pred12, instr.pset.neg_pred12 != 0); - const Node op_b = GetPredicate(instr.pset.pred29, instr.pset.neg_pred29 != 0); - const Node first_pred = Operation(GetPredicateCombiner(instr.pset.cond), op_a, op_b); - - const Node second_pred = GetPredicate(instr.pset.pred39, instr.pset.neg_pred39 != 0); - - const OperationCode combiner = GetPredicateCombiner(instr.pset.op); - const Node predicate = Operation(combiner, first_pred, second_pred); - - const Node true_value = instr.pset.bf ? Immediate(1.0f) : Immediate(0xffffffff); - const Node false_value = instr.pset.bf ? Immediate(0.0f) : Immediate(0); - const Node value = - Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value); - - if (instr.pset.bf) { - SetInternalFlagsFromFloat(bb, value, instr.generates_cc); - } else { - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - } - SetRegister(bb, instr.gpr0, value); - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/register_set_predicate.cpp b/src/video_core/shader/decode/register_set_predicate.cpp deleted file mode 100644 index 6116c31aa..000000000 --- a/src/video_core/shader/decode/register_set_predicate.cpp +++ /dev/null @@ -1,86 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using std::move; -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -namespace { -constexpr u64 NUM_CONDITION_CODES = 4; -constexpr u64 NUM_PREDICATES = 7; -} // namespace - -u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - Node apply_mask = [this, opcode, instr] { - switch (opcode->get().GetId()) { - case OpCode::Id::R2P_IMM: - case OpCode::Id::P2R_IMM: - return Immediate(static_cast(instr.p2r_r2p.immediate_mask)); - default: - UNREACHABLE(); - return Immediate(0); - } - }(); - - const u32 offset = static_cast(instr.p2r_r2p.byte) * 8; - - const bool cc = instr.p2r_r2p.mode == Tegra::Shader::R2pMode::Cc; - const u64 num_entries = cc ? NUM_CONDITION_CODES : NUM_PREDICATES; - const auto get_entry = [this, cc](u64 entry) { - return cc ? GetInternalFlag(static_cast(entry)) : GetPredicate(entry); - }; - - switch (opcode->get().GetId()) { - case OpCode::Id::R2P_IMM: { - Node mask = GetRegister(instr.gpr8); - - for (u64 entry = 0; entry < num_entries; ++entry) { - const u32 shift = static_cast(entry); - - Node apply = BitfieldExtract(apply_mask, shift, 1); - Node condition = Operation(OperationCode::LogicalUNotEqual, apply, Immediate(0)); - - Node compare = BitfieldExtract(mask, offset + shift, 1); - Node value = Operation(OperationCode::LogicalUNotEqual, move(compare), Immediate(0)); - - Node code = Operation(OperationCode::LogicalAssign, get_entry(entry), move(value)); - bb.push_back(Conditional(condition, {move(code)})); - } - break; - } - case OpCode::Id::P2R_IMM: { - Node value = Immediate(0); - for (u64 entry = 0; entry < num_entries; ++entry) { - Node bit = Operation(OperationCode::Select, get_entry(entry), Immediate(1U << entry), - Immediate(0)); - value = Operation(OperationCode::UBitwiseOr, move(value), move(bit)); - } - value = Operation(OperationCode::UBitwiseAnd, move(value), apply_mask); - value = BitfieldInsert(GetRegister(instr.gpr8), move(value), offset, 8); - - SetRegister(bb, instr.gpr0, move(value)); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled P2R/R2R instruction: {}", opcode->get().GetName()); - break; - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp deleted file mode 100644 index a53819c15..000000000 --- a/src/video_core/shader/decode/shift.cpp +++ /dev/null @@ -1,153 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using std::move; -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::ShfType; -using Tegra::Shader::ShfXmode; - -namespace { - -Node IsFull(Node shift) { - return Operation(OperationCode::LogicalIEqual, move(shift), Immediate(32)); -} - -Node Shift(OperationCode opcode, Node value, Node shift) { - Node shifted = Operation(opcode, move(value), shift); - return Operation(OperationCode::Select, IsFull(move(shift)), Immediate(0), move(shifted)); -} - -Node ClampShift(Node shift, s32 size = 32) { - shift = Operation(OperationCode::IMax, move(shift), Immediate(0)); - return Operation(OperationCode::IMin, move(shift), Immediate(size)); -} - -Node WrapShift(Node shift, s32 size = 32) { - return Operation(OperationCode::UBitwiseAnd, move(shift), Immediate(size - 1)); -} - -Node ShiftRight(Node low, Node high, Node shift, Node low_shift, ShfType type) { - // These values are used when the shift value is less than 32 - Node less_low = Shift(OperationCode::ILogicalShiftRight, low, shift); - Node less_high = Shift(OperationCode::ILogicalShiftLeft, high, low_shift); - Node less = Operation(OperationCode::IBitwiseOr, move(less_high), move(less_low)); - - if (type == ShfType::Bits32) { - // On 32 bit shifts we are either full (shifting 32) or shifting less than 32 bits - return Operation(OperationCode::Select, IsFull(move(shift)), move(high), move(less)); - } - - // And these when it's larger than or 32 - const bool is_signed = type == ShfType::S64; - const auto opcode = SignedToUnsignedCode(OperationCode::IArithmeticShiftRight, is_signed); - Node reduced = Operation(OperationCode::IAdd, shift, Immediate(-32)); - Node greater = Shift(opcode, high, move(reduced)); - - Node is_less = Operation(OperationCode::LogicalILessThan, shift, Immediate(32)); - Node is_zero = Operation(OperationCode::LogicalIEqual, move(shift), Immediate(0)); - - Node value = Operation(OperationCode::Select, move(is_less), move(less), move(greater)); - return Operation(OperationCode::Select, move(is_zero), move(high), move(value)); -} - -Node ShiftLeft(Node low, Node high, Node shift, Node low_shift, ShfType type) { - // These values are used when the shift value is less than 32 - Node less_low = Operation(OperationCode::ILogicalShiftRight, low, low_shift); - Node less_high = Operation(OperationCode::ILogicalShiftLeft, high, shift); - Node less = Operation(OperationCode::IBitwiseOr, move(less_low), move(less_high)); - - if (type == ShfType::Bits32) { - // On 32 bit shifts we are either full (shifting 32) or shifting less than 32 bits - return Operation(OperationCode::Select, IsFull(move(shift)), move(low), move(less)); - } - - // And these when it's larger than or 32 - Node reduced = Operation(OperationCode::IAdd, shift, Immediate(-32)); - Node greater = Shift(OperationCode::ILogicalShiftLeft, move(low), move(reduced)); - - Node is_less = Operation(OperationCode::LogicalILessThan, shift, Immediate(32)); - Node is_zero = Operation(OperationCode::LogicalIEqual, move(shift), Immediate(0)); - - Node value = Operation(OperationCode::Select, move(is_less), move(less), move(greater)); - return Operation(OperationCode::Select, move(is_zero), move(high), move(value)); -} - -} // Anonymous namespace - -u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - Node op_a = GetRegister(instr.gpr8); - Node op_b = [this, instr] { - if (instr.is_b_imm) { - return Immediate(instr.alu.GetSignedImm20_20()); - } else if (instr.is_b_gpr) { - return GetRegister(instr.gpr20); - } else { - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - } - }(); - - switch (const auto opid = opcode->get().GetId(); opid) { - case OpCode::Id::SHR_C: - case OpCode::Id::SHR_R: - case OpCode::Id::SHR_IMM: { - op_b = instr.shr.wrap ? WrapShift(move(op_b)) : ClampShift(move(op_b)); - - Node value = SignedOperation(OperationCode::IArithmeticShiftRight, instr.shift.is_signed, - move(op_a), move(op_b)); - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, move(value)); - break; - } - case OpCode::Id::SHL_C: - case OpCode::Id::SHL_R: - case OpCode::Id::SHL_IMM: { - Node value = Operation(OperationCode::ILogicalShiftLeft, op_a, op_b); - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, move(value)); - break; - } - case OpCode::Id::SHF_RIGHT_R: - case OpCode::Id::SHF_RIGHT_IMM: - case OpCode::Id::SHF_LEFT_R: - case OpCode::Id::SHF_LEFT_IMM: { - UNIMPLEMENTED_IF(instr.generates_cc); - UNIMPLEMENTED_IF_MSG(instr.shf.xmode != ShfXmode::None, "xmode={}", - instr.shf.xmode.Value()); - - if (instr.is_b_imm) { - op_b = Immediate(static_cast(instr.shf.immediate)); - } - const s32 size = instr.shf.type == ShfType::Bits32 ? 32 : 64; - Node shift = instr.shf.wrap ? WrapShift(move(op_b), size) : ClampShift(move(op_b), size); - - Node negated_shift = Operation(OperationCode::INegate, shift); - Node low_shift = Operation(OperationCode::IAdd, move(negated_shift), Immediate(32)); - - const bool is_right = opid == OpCode::Id::SHF_RIGHT_R || opid == OpCode::Id::SHF_RIGHT_IMM; - Node value = (is_right ? ShiftRight : ShiftLeft)( - move(op_a), GetRegister(instr.gpr39), move(shift), move(low_shift), instr.shf.type); - - SetRegister(bb, instr.gpr0, move(value)); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled shift instruction: {}", opcode->get().GetName()); - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp deleted file mode 100644 index c69681e8d..000000000 --- a/src/video_core/shader/decode/texture.cpp +++ /dev/null @@ -1,935 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include - -#include "common/assert.h" -#include "common/bit_field.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::Register; -using Tegra::Shader::TextureMiscMode; -using Tegra::Shader::TextureProcessMode; -using Tegra::Shader::TextureType; - -static std::size_t GetCoordCount(TextureType texture_type) { - switch (texture_type) { - case TextureType::Texture1D: - return 1; - case TextureType::Texture2D: - return 2; - case TextureType::Texture3D: - case TextureType::TextureCube: - return 3; - default: - UNIMPLEMENTED_MSG("Unhandled texture type: {}", texture_type); - return 0; - } -} - -u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - bool is_bindless = false; - switch (opcode->get().GetId()) { - case OpCode::Id::TEX: { - const TextureType texture_type{instr.tex.texture_type}; - const bool is_array = instr.tex.array != 0; - const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI); - const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC); - const auto process_mode = instr.tex.GetTextureProcessMode(); - WriteTexInstructionFloat( - bb, instr, - GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, is_aoffi, {})); - break; - } - case OpCode::Id::TEX_B: { - UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI), - "AOFFI is not implemented"); - - const TextureType texture_type{instr.tex_b.texture_type}; - const bool is_array = instr.tex_b.array != 0; - const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI); - const bool depth_compare = instr.tex_b.UsesMiscMode(TextureMiscMode::DC); - const auto process_mode = instr.tex_b.GetTextureProcessMode(); - WriteTexInstructionFloat(bb, instr, - GetTexCode(instr, texture_type, process_mode, depth_compare, - is_array, is_aoffi, {instr.gpr20})); - break; - } - case OpCode::Id::TEXS: { - const TextureType texture_type{instr.texs.GetTextureType()}; - const bool is_array{instr.texs.IsArrayTexture()}; - const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC); - const auto process_mode = instr.texs.GetTextureProcessMode(); - - const Node4 components = - GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array); - - if (instr.texs.fp32_flag) { - WriteTexsInstructionFloat(bb, instr, components); - } else { - WriteTexsInstructionHalfFloat(bb, instr, components); - } - break; - } - case OpCode::Id::TLD4_B: { - is_bindless = true; - [[fallthrough]]; - } - case OpCode::Id::TLD4: { - UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV), - "NDV is not implemented"); - const auto texture_type = instr.tld4.texture_type.Value(); - const bool depth_compare = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::DC) - : instr.tld4.UsesMiscMode(TextureMiscMode::DC); - const bool is_array = instr.tld4.array != 0; - const bool is_aoffi = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::AOFFI) - : instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI); - const bool is_ptp = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::PTP) - : instr.tld4.UsesMiscMode(TextureMiscMode::PTP); - WriteTexInstructionFloat(bb, instr, - GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi, - is_ptp, is_bindless)); - break; - } - case OpCode::Id::TLD4S: { - constexpr std::size_t num_coords = 2; - const bool is_aoffi = instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI); - const bool is_depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC); - const Node op_a = GetRegister(instr.gpr8); - const Node op_b = GetRegister(instr.gpr20); - - // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction. - std::vector coords; - std::vector aoffi; - Node depth_compare; - if (is_depth_compare) { - // Note: TLD4S coordinate encoding works just like TEXS's - const Node op_y = GetRegister(instr.gpr8.Value() + 1); - coords.push_back(op_a); - coords.push_back(op_y); - if (is_aoffi) { - aoffi = GetAoffiCoordinates(op_b, num_coords, true); - depth_compare = GetRegister(instr.gpr20.Value() + 1); - } else { - depth_compare = op_b; - } - } else { - // There's no depth compare - coords.push_back(op_a); - if (is_aoffi) { - coords.push_back(GetRegister(instr.gpr8.Value() + 1)); - aoffi = GetAoffiCoordinates(op_b, num_coords, true); - } else { - coords.push_back(op_b); - } - } - const Node component = Immediate(static_cast(instr.tld4s.component)); - - SamplerInfo info; - info.is_shadow = is_depth_compare; - const std::optional sampler = GetSampler(instr.sampler, info); - - Node4 values; - for (u32 element = 0; element < values.size(); ++element) { - MetaTexture meta{*sampler, {}, depth_compare, aoffi, {}, {}, - {}, {}, component, element, {}}; - values[element] = Operation(OperationCode::TextureGather, meta, coords); - } - - if (instr.tld4s.fp16_flag) { - WriteTexsInstructionHalfFloat(bb, instr, values, true); - } else { - WriteTexsInstructionFloat(bb, instr, values, true); - } - break; - } - case OpCode::Id::TXD_B: - is_bindless = true; - [[fallthrough]]; - case OpCode::Id::TXD: { - UNIMPLEMENTED_IF_MSG(instr.txd.UsesMiscMode(TextureMiscMode::AOFFI), - "AOFFI is not implemented"); - - const bool is_array = instr.txd.is_array != 0; - const auto derivate_reg = instr.gpr20.Value(); - const auto texture_type = instr.txd.texture_type.Value(); - const auto coord_count = GetCoordCount(texture_type); - u64 base_reg = instr.gpr8.Value(); - Node index_var; - SamplerInfo info; - info.type = texture_type; - info.is_array = is_array; - const std::optional sampler = - is_bindless ? GetBindlessSampler(base_reg, info, index_var) - : GetSampler(instr.sampler, info); - Node4 values; - if (!sampler) { - std::generate(values.begin(), values.end(), [this] { return Immediate(0); }); - WriteTexInstructionFloat(bb, instr, values); - break; - } - - if (is_bindless) { - base_reg++; - } - - std::vector coords; - std::vector derivates; - for (std::size_t i = 0; i < coord_count; ++i) { - coords.push_back(GetRegister(base_reg + i)); - const std::size_t derivate = i * 2; - derivates.push_back(GetRegister(derivate_reg + derivate)); - derivates.push_back(GetRegister(derivate_reg + derivate + 1)); - } - - Node array_node = {}; - if (is_array) { - const Node info_reg = GetRegister(base_reg + coord_count); - array_node = BitfieldExtract(info_reg, 0, 16); - } - - for (u32 element = 0; element < values.size(); ++element) { - MetaTexture meta{*sampler, array_node, {}, {}, {}, derivates, - {}, {}, {}, element, index_var}; - values[element] = Operation(OperationCode::TextureGradient, std::move(meta), coords); - } - - WriteTexInstructionFloat(bb, instr, values); - - break; - } - case OpCode::Id::TXQ_B: - is_bindless = true; - [[fallthrough]]; - case OpCode::Id::TXQ: { - Node index_var; - const std::optional sampler = - is_bindless ? GetBindlessSampler(instr.gpr8, {}, index_var) - : GetSampler(instr.sampler, {}); - - if (!sampler) { - u32 indexer = 0; - for (u32 element = 0; element < 4; ++element) { - if (!instr.txq.IsComponentEnabled(element)) { - continue; - } - const Node value = Immediate(0); - SetTemporary(bb, indexer++, value); - } - for (u32 i = 0; i < indexer; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); - } - break; - } - - u32 indexer = 0; - switch (instr.txq.query_type) { - case Tegra::Shader::TextureQueryType::Dimension: { - for (u32 element = 0; element < 4; ++element) { - if (!instr.txq.IsComponentEnabled(element)) { - continue; - } - MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var}; - const Node value = - Operation(OperationCode::TextureQueryDimensions, meta, - GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0))); - SetTemporary(bb, indexer++, value); - } - for (u32 i = 0; i < indexer; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); - } - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled texture query type: {}", instr.txq.query_type.Value()); - } - break; - } - case OpCode::Id::TMML_B: - is_bindless = true; - [[fallthrough]]; - case OpCode::Id::TMML: { - UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV), - "NDV is not implemented"); - - const auto texture_type = instr.tmml.texture_type.Value(); - const bool is_array = instr.tmml.array != 0; - SamplerInfo info; - info.type = texture_type; - info.is_array = is_array; - Node index_var; - const std::optional sampler = - is_bindless ? GetBindlessSampler(instr.gpr20, info, index_var) - : GetSampler(instr.sampler, info); - - if (!sampler) { - u32 indexer = 0; - for (u32 element = 0; element < 2; ++element) { - if (!instr.tmml.IsComponentEnabled(element)) { - continue; - } - const Node value = Immediate(0); - SetTemporary(bb, indexer++, value); - } - for (u32 i = 0; i < indexer; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); - } - break; - } - - const u64 base_index = is_array ? 1 : 0; - const u64 num_components = [texture_type] { - switch (texture_type) { - case TextureType::Texture1D: - return 1; - case TextureType::Texture2D: - return 2; - case TextureType::TextureCube: - return 3; - default: - UNIMPLEMENTED_MSG("Unhandled texture type {}", texture_type); - return 2; - } - }(); - // TODO: What's the array component used for? - - std::vector coords; - coords.reserve(num_components); - for (u64 component = 0; component < num_components; ++component) { - coords.push_back(GetRegister(instr.gpr8.Value() + base_index + component)); - } - - u32 indexer = 0; - for (u32 element = 0; element < 2; ++element) { - if (!instr.tmml.IsComponentEnabled(element)) { - continue; - } - MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var}; - Node value = Operation(OperationCode::TextureQueryLod, meta, coords); - SetTemporary(bb, indexer++, std::move(value)); - } - for (u32 i = 0; i < indexer; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); - } - break; - } - case OpCode::Id::TLD: { - UNIMPLEMENTED_IF_MSG(instr.tld.aoffi, "AOFFI is not implemented"); - UNIMPLEMENTED_IF_MSG(instr.tld.ms, "MS is not implemented"); - UNIMPLEMENTED_IF_MSG(instr.tld.cl, "CL is not implemented"); - - WriteTexInstructionFloat(bb, instr, GetTldCode(instr)); - break; - } - case OpCode::Id::TLDS: { - const TextureType texture_type{instr.tlds.GetTextureType()}; - const bool is_array{instr.tlds.IsArrayTexture()}; - - UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI), - "AOFFI is not implemented"); - UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented"); - - const Node4 components = GetTldsCode(instr, texture_type, is_array); - - if (instr.tlds.fp32_flag) { - WriteTexsInstructionFloat(bb, instr, components); - } else { - WriteTexsInstructionHalfFloat(bb, instr, components); - } - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName()); - } - - return pc; -} - -ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo( - SamplerInfo info, std::optional sampler) { - if (info.IsComplete()) { - return info; - } - if (!sampler) { - LOG_WARNING(HW_GPU, "Unknown sampler info"); - info.type = info.type.value_or(Tegra::Shader::TextureType::Texture2D); - info.is_array = info.is_array.value_or(false); - info.is_shadow = info.is_shadow.value_or(false); - info.is_buffer = info.is_buffer.value_or(false); - return info; - } - info.type = info.type.value_or(sampler->texture_type); - info.is_array = info.is_array.value_or(sampler->is_array != 0); - info.is_shadow = info.is_shadow.value_or(sampler->is_shadow != 0); - info.is_buffer = info.is_buffer.value_or(sampler->is_buffer != 0); - return info; -} - -std::optional ShaderIR::GetSampler(Tegra::Shader::Sampler sampler, - SamplerInfo sampler_info) { - const u32 offset = static_cast(sampler.index.Value()); - const auto info = GetSamplerInfo(sampler_info, registry.ObtainBoundSampler(offset)); - - // If this sampler has already been used, return the existing mapping. - const auto it = - std::find_if(used_samplers.begin(), used_samplers.end(), - [offset](const SamplerEntry& entry) { return entry.offset == offset; }); - if (it != used_samplers.end()) { - ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array && - it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer); - return *it; - } - - // Otherwise create a new mapping for this sampler - const auto next_index = static_cast(used_samplers.size()); - return used_samplers.emplace_back(next_index, offset, *info.type, *info.is_array, - *info.is_shadow, *info.is_buffer, false); -} - -std::optional ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, - SamplerInfo info, Node& index_var) { - const Node sampler_register = GetRegister(reg); - const auto [base_node, tracked_sampler_info] = - TrackBindlessSampler(sampler_register, global_code, static_cast(global_code.size())); - if (!base_node) { - UNREACHABLE(); - return std::nullopt; - } - - if (const auto sampler_info = std::get_if(&*tracked_sampler_info)) { - const u32 buffer = sampler_info->index; - const u32 offset = sampler_info->offset; - info = GetSamplerInfo(info, registry.ObtainBindlessSampler(buffer, offset)); - - // If this sampler has already been used, return the existing mapping. - const auto it = std::find_if(used_samplers.begin(), used_samplers.end(), - [buffer, offset](const SamplerEntry& entry) { - return entry.buffer == buffer && entry.offset == offset; - }); - if (it != used_samplers.end()) { - ASSERT(it->is_bindless && it->type == info.type && it->is_array == info.is_array && - it->is_shadow == info.is_shadow); - return *it; - } - - // Otherwise create a new mapping for this sampler - const auto next_index = static_cast(used_samplers.size()); - return used_samplers.emplace_back(next_index, offset, buffer, *info.type, *info.is_array, - *info.is_shadow, *info.is_buffer, false); - } - if (const auto sampler_info = std::get_if(&*tracked_sampler_info)) { - const std::pair indices = sampler_info->indices; - const std::pair offsets = sampler_info->offsets; - info = GetSamplerInfo(info, registry.ObtainSeparateSampler(indices, offsets)); - - // Try to use an already created sampler if it exists - const auto it = - std::find_if(used_samplers.begin(), used_samplers.end(), - [indices, offsets](const SamplerEntry& entry) { - return offsets == std::pair{entry.offset, entry.secondary_offset} && - indices == std::pair{entry.buffer, entry.secondary_buffer}; - }); - if (it != used_samplers.end()) { - ASSERT(it->is_separated && it->type == info.type && it->is_array == info.is_array && - it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer); - return *it; - } - - // Otherwise create a new mapping for this sampler - const u32 next_index = static_cast(used_samplers.size()); - return used_samplers.emplace_back(next_index, offsets, indices, *info.type, *info.is_array, - *info.is_shadow, *info.is_buffer); - } - if (const auto sampler_info = std::get_if(&*tracked_sampler_info)) { - const u32 base_offset = sampler_info->base_offset / 4; - index_var = GetCustomVariable(sampler_info->bindless_var); - info = GetSamplerInfo(info, registry.ObtainBoundSampler(base_offset)); - - // If this sampler has already been used, return the existing mapping. - const auto it = std::find_if( - used_samplers.begin(), used_samplers.end(), - [base_offset](const SamplerEntry& entry) { return entry.offset == base_offset; }); - if (it != used_samplers.end()) { - ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array && - it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer && - it->is_indexed); - return *it; - } - - uses_indexed_samplers = true; - // Otherwise create a new mapping for this sampler - const auto next_index = static_cast(used_samplers.size()); - return used_samplers.emplace_back(next_index, base_offset, *info.type, *info.is_array, - *info.is_shadow, *info.is_buffer, true); - } - return std::nullopt; -} - -void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) { - u32 dest_elem = 0; - for (u32 elem = 0; elem < 4; ++elem) { - if (!instr.tex.IsComponentEnabled(elem)) { - // Skip disabled components - continue; - } - SetTemporary(bb, dest_elem++, components[elem]); - } - // After writing values in temporals, move them to the real registers - for (u32 i = 0; i < dest_elem; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); - } -} - -void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components, - bool ignore_mask) { - // TEXS has two destination registers and a swizzle. The first two elements in the swizzle - // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1 - - u32 dest_elem = 0; - for (u32 component = 0; component < 4; ++component) { - if (!instr.texs.IsComponentEnabled(component) && !ignore_mask) - continue; - SetTemporary(bb, dest_elem++, components[component]); - } - - for (u32 i = 0; i < dest_elem; ++i) { - if (i < 2) { - // Write the first two swizzle components to gpr0 and gpr0+1 - SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporary(i)); - } else { - ASSERT(instr.texs.HasTwoDestinations()); - // Write the rest of the swizzle components to gpr28 and gpr28+1 - SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporary(i)); - } - } -} - -void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr, - const Node4& components, bool ignore_mask) { - // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half - // float instruction). - - Node4 values; - u32 dest_elem = 0; - for (u32 component = 0; component < 4; ++component) { - if (!instr.texs.IsComponentEnabled(component) && !ignore_mask) - continue; - values[dest_elem++] = components[component]; - } - if (dest_elem == 0) - return; - - std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); }); - - const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]); - if (dest_elem <= 2) { - SetRegister(bb, instr.gpr0, first_value); - return; - } - - SetTemporary(bb, 0, first_value); - SetTemporary(bb, 1, Operation(OperationCode::HPack2, values[2], values[3])); - - SetRegister(bb, instr.gpr0, GetTemporary(0)); - SetRegister(bb, instr.gpr28, GetTemporary(1)); -} - -Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, - TextureProcessMode process_mode, std::vector coords, - Node array, Node depth_compare, u32 bias_offset, - std::vector aoffi, - std::optional bindless_reg) { - const bool is_array = array != nullptr; - const bool is_shadow = depth_compare != nullptr; - const bool is_bindless = bindless_reg.has_value(); - - ASSERT_MSG(texture_type != TextureType::Texture3D || !is_array || !is_shadow, - "Illegal texture type"); - - SamplerInfo info; - info.type = texture_type; - info.is_array = is_array; - info.is_shadow = is_shadow; - info.is_buffer = false; - - Node index_var; - const std::optional sampler = - is_bindless ? GetBindlessSampler(*bindless_reg, info, index_var) - : GetSampler(instr.sampler, info); - if (!sampler) { - return {Immediate(0), Immediate(0), Immediate(0), Immediate(0)}; - } - - const bool lod_needed = process_mode == TextureProcessMode::LZ || - process_mode == TextureProcessMode::LL || - process_mode == TextureProcessMode::LLA; - const OperationCode opcode = lod_needed ? OperationCode::TextureLod : OperationCode::Texture; - - Node bias; - Node lod; - switch (process_mode) { - case TextureProcessMode::None: - break; - case TextureProcessMode::LZ: - lod = Immediate(0.0f); - break; - case TextureProcessMode::LB: - // If present, lod or bias are always stored in the register indexed by the gpr20 field with - // an offset depending on the usage of the other registers. - bias = GetRegister(instr.gpr20.Value() + bias_offset); - break; - case TextureProcessMode::LL: - lod = GetRegister(instr.gpr20.Value() + bias_offset); - break; - default: - UNIMPLEMENTED_MSG("Unimplemented process mode={}", process_mode); - break; - } - - Node4 values; - for (u32 element = 0; element < values.size(); ++element) { - MetaTexture meta{*sampler, array, depth_compare, aoffi, {}, {}, bias, - lod, {}, element, index_var}; - values[element] = Operation(opcode, meta, coords); - } - - return values; -} - -Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, - TextureProcessMode process_mode, bool depth_compare, bool is_array, - bool is_aoffi, std::optional bindless_reg) { - const bool lod_bias_enabled{ - (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ)}; - - const bool is_bindless = bindless_reg.has_value(); - - u64 parameter_register = instr.gpr20.Value(); - if (is_bindless) { - ++parameter_register; - } - - const u32 bias_lod_offset = (is_bindless ? 1 : 0); - if (lod_bias_enabled) { - ++parameter_register; - } - - const auto coord_counts = ValidateAndGetCoordinateElement(texture_type, depth_compare, is_array, - lod_bias_enabled, 4, 5); - const auto coord_count = std::get<0>(coord_counts); - // If enabled arrays index is always stored in the gpr8 field - const u64 array_register = instr.gpr8.Value(); - // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used - const u64 coord_register = array_register + (is_array ? 1 : 0); - - std::vector coords; - for (std::size_t i = 0; i < coord_count; ++i) { - coords.push_back(GetRegister(coord_register + i)); - } - // 1D.DC in OpenGL the 2nd component is ignored. - if (depth_compare && !is_array && texture_type == TextureType::Texture1D) { - coords.push_back(Immediate(0.0f)); - } - - const Node array = is_array ? GetRegister(array_register) : nullptr; - - std::vector aoffi; - if (is_aoffi) { - aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, false); - } - - Node dc; - if (depth_compare) { - // Depth is always stored in the register signaled by gpr20 or in the next register if lod - // or bias are used - dc = GetRegister(parameter_register++); - } - - return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_lod_offset, - aoffi, bindless_reg); -} - -Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, - TextureProcessMode process_mode, bool depth_compare, bool is_array) { - const bool lod_bias_enabled = - (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); - - const auto coord_counts = ValidateAndGetCoordinateElement(texture_type, depth_compare, is_array, - lod_bias_enabled, 4, 4); - const auto coord_count = std::get<0>(coord_counts); - - // If enabled arrays index is always stored in the gpr8 field - const u64 array_register = instr.gpr8.Value(); - // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used - const u64 coord_register = array_register + (is_array ? 1 : 0); - const u64 last_coord_register = - (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2)) - ? static_cast(instr.gpr20.Value()) - : coord_register + 1; - const u32 bias_offset = coord_count > 2 ? 1 : 0; - - std::vector coords; - for (std::size_t i = 0; i < coord_count; ++i) { - const bool last = (i == (coord_count - 1)) && (coord_count > 1); - coords.push_back(GetRegister(last ? last_coord_register : coord_register + i)); - } - - const Node array = is_array ? GetRegister(array_register) : nullptr; - - Node dc; - if (depth_compare) { - // Depth is always stored in the register signaled by gpr20 or in the next register if lod - // or bias are used - const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); - dc = GetRegister(depth_register); - } - - return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset, {}, - {}); -} - -Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, - bool is_array, bool is_aoffi, bool is_ptp, bool is_bindless) { - ASSERT_MSG(!(is_aoffi && is_ptp), "AOFFI and PTP can't be enabled at the same time"); - - const std::size_t coord_count = GetCoordCount(texture_type); - - // If enabled arrays index is always stored in the gpr8 field - const u64 array_register = instr.gpr8.Value(); - // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used - const u64 coord_register = array_register + (is_array ? 1 : 0); - - std::vector coords; - for (std::size_t i = 0; i < coord_count; ++i) { - coords.push_back(GetRegister(coord_register + i)); - } - - u64 parameter_register = instr.gpr20.Value(); - - SamplerInfo info; - info.type = texture_type; - info.is_array = is_array; - info.is_shadow = depth_compare; - - Node index_var; - const std::optional sampler = - is_bindless ? GetBindlessSampler(parameter_register++, info, index_var) - : GetSampler(instr.sampler, info); - Node4 values; - if (!sampler) { - for (u32 element = 0; element < values.size(); ++element) { - values[element] = Immediate(0); - } - return values; - } - - std::vector aoffi, ptp; - if (is_aoffi) { - aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, true); - } else if (is_ptp) { - ptp = GetPtpCoordinates( - {GetRegister(parameter_register++), GetRegister(parameter_register++)}); - } - - Node dc; - if (depth_compare) { - dc = GetRegister(parameter_register++); - } - - const Node component = is_bindless ? Immediate(static_cast(instr.tld4_b.component)) - : Immediate(static_cast(instr.tld4.component)); - - for (u32 element = 0; element < values.size(); ++element) { - auto coords_copy = coords; - MetaTexture meta{ - *sampler, GetRegister(array_register), dc, aoffi, ptp, {}, {}, {}, component, element, - index_var}; - values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); - } - - return values; -} - -Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) { - const auto texture_type{instr.tld.texture_type}; - const bool is_array{instr.tld.is_array != 0}; - const bool lod_enabled{instr.tld.GetTextureProcessMode() == TextureProcessMode::LL}; - const std::size_t coord_count{GetCoordCount(texture_type)}; - - u64 gpr8_cursor{instr.gpr8.Value()}; - const Node array_register{is_array ? GetRegister(gpr8_cursor++) : nullptr}; - - std::vector coords; - coords.reserve(coord_count); - for (std::size_t i = 0; i < coord_count; ++i) { - coords.push_back(GetRegister(gpr8_cursor++)); - } - - u64 gpr20_cursor{instr.gpr20.Value()}; - // const Node bindless_register{is_bindless ? GetRegister(gpr20_cursor++) : nullptr}; - const Node lod{lod_enabled ? GetRegister(gpr20_cursor++) : Immediate(0u)}; - // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr}; - // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr}; - - const std::optional sampler = GetSampler(instr.sampler, {}); - - Node4 values; - for (u32 element = 0; element < values.size(); ++element) { - auto coords_copy = coords; - MetaTexture meta{*sampler, array_register, {}, {}, {}, {}, {}, lod, {}, element, {}}; - values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); - } - - return values; -} - -Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) { - SamplerInfo info; - info.type = texture_type; - info.is_array = is_array; - info.is_shadow = false; - const std::optional sampler = GetSampler(instr.sampler, info); - - const std::size_t type_coord_count = GetCoordCount(texture_type); - const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; - const bool aoffi_enabled = instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI); - - // If enabled arrays index is always stored in the gpr8 field - const u64 array_register = instr.gpr8.Value(); - // if is array gpr20 is used - const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value(); - - const u64 last_coord_register = - ((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array - ? static_cast(instr.gpr20.Value()) - : coord_register + 1; - - std::vector coords; - for (std::size_t i = 0; i < type_coord_count; ++i) { - const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1); - coords.push_back( - GetRegister(last && !aoffi_enabled ? last_coord_register : coord_register + i)); - } - - const Node array = is_array ? GetRegister(array_register) : nullptr; - // When lod is used always is in gpr20 - const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0); - - std::vector aoffi; - if (aoffi_enabled) { - aoffi = GetAoffiCoordinates(GetRegister(instr.gpr20), type_coord_count, false); - } - - Node4 values; - for (u32 element = 0; element < values.size(); ++element) { - auto coords_copy = coords; - MetaTexture meta{*sampler, array, {}, aoffi, {}, {}, {}, lod, {}, element, {}}; - values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); - } - return values; -} - -std::tuple ShaderIR::ValidateAndGetCoordinateElement( - TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled, - std::size_t max_coords, std::size_t max_inputs) { - const std::size_t coord_count = GetCoordCount(texture_type); - - std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0); - const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0); - if (total_coord_count > max_coords || total_reg_count > max_inputs) { - UNIMPLEMENTED_MSG("Unsupported Texture operation"); - total_coord_count = std::min(total_coord_count, max_coords); - } - // 1D.DC OpenGL is using a vec3 but 2nd component is ignored later. - total_coord_count += - (depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0; - - return {coord_count, total_coord_count}; -} - -std::vector ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, - bool is_tld4) { - const std::array coord_offsets = is_tld4 ? std::array{0U, 8U, 16U} : std::array{0U, 4U, 8U}; - const u32 size = is_tld4 ? 6 : 4; - const s32 wrap_value = is_tld4 ? 32 : 8; - const s32 diff_value = is_tld4 ? 64 : 16; - const u32 mask = (1U << size) - 1; - - std::vector aoffi; - aoffi.reserve(coord_count); - - const auto aoffi_immediate{ - TrackImmediate(aoffi_reg, global_code, static_cast(global_code.size()))}; - if (!aoffi_immediate) { - // Variable access, not supported on AMD. - LOG_WARNING(HW_GPU, - "AOFFI constant folding failed, some hardware might have graphical issues"); - for (std::size_t coord = 0; coord < coord_count; ++coord) { - const Node value = BitfieldExtract(aoffi_reg, coord_offsets[coord], size); - const Node condition = - Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(wrap_value)); - const Node negative = Operation(OperationCode::IAdd, value, Immediate(-diff_value)); - aoffi.push_back(Operation(OperationCode::Select, condition, negative, value)); - } - return aoffi; - } - - for (std::size_t coord = 0; coord < coord_count; ++coord) { - s32 value = (*aoffi_immediate >> coord_offsets[coord]) & mask; - if (value >= wrap_value) { - value -= diff_value; - } - aoffi.push_back(Immediate(value)); - } - return aoffi; -} - -std::vector ShaderIR::GetPtpCoordinates(std::array ptp_regs) { - static constexpr u32 num_entries = 8; - - std::vector ptp; - ptp.reserve(num_entries); - - const auto global_size = static_cast(global_code.size()); - const std::optional low = TrackImmediate(ptp_regs[0], global_code, global_size); - const std::optional high = TrackImmediate(ptp_regs[1], global_code, global_size); - if (!low || !high) { - for (u32 entry = 0; entry < num_entries; ++entry) { - const u32 reg = entry / 4; - const u32 offset = entry % 4; - const Node value = BitfieldExtract(ptp_regs[reg], offset * 8, 6); - const Node condition = - Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(32)); - const Node negative = Operation(OperationCode::IAdd, value, Immediate(-64)); - ptp.push_back(Operation(OperationCode::Select, condition, negative, value)); - } - return ptp; - } - - const u64 immediate = (static_cast(*high) << 32) | static_cast(*low); - for (u32 entry = 0; entry < num_entries; ++entry) { - s32 value = (immediate >> (entry * 8)) & 0b111111; - if (value >= 32) { - value -= 64; - } - ptp.push_back(Immediate(value)); - } - - return ptp; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp deleted file mode 100644 index 1c0957277..000000000 --- a/src/video_core/shader/decode/video.cpp +++ /dev/null @@ -1,169 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using std::move; -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::Pred; -using Tegra::Shader::VideoType; -using Tegra::Shader::VmadShr; -using Tegra::Shader::VmnmxOperation; -using Tegra::Shader::VmnmxType; - -u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - if (opcode->get().GetId() == OpCode::Id::VMNMX) { - DecodeVMNMX(bb, instr); - return pc; - } - - const Node op_a = - GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a, - instr.video.type_a, instr.video.byte_height_a); - const Node op_b = [this, instr] { - if (instr.video.use_register_b) { - return GetVideoOperand(GetRegister(instr.gpr20), instr.video.is_byte_chunk_b, - instr.video.signed_b, instr.video.type_b, - instr.video.byte_height_b); - } - if (instr.video.signed_b) { - const auto imm = static_cast(instr.alu.GetImm20_16()); - return Immediate(static_cast(imm)); - } else { - return Immediate(instr.alu.GetImm20_16()); - } - }(); - - switch (opcode->get().GetId()) { - case OpCode::Id::VMAD: { - const bool result_signed = instr.video.signed_a == 1 || instr.video.signed_b == 1; - const Node op_c = GetRegister(instr.gpr39); - - Node value = SignedOperation(OperationCode::IMul, result_signed, NO_PRECISE, op_a, op_b); - value = SignedOperation(OperationCode::IAdd, result_signed, NO_PRECISE, value, op_c); - - if (instr.vmad.shr == VmadShr::Shr7 || instr.vmad.shr == VmadShr::Shr15) { - const Node shift = Immediate(instr.vmad.shr == VmadShr::Shr7 ? 7 : 15); - value = - SignedOperation(OperationCode::IArithmeticShiftRight, result_signed, value, shift); - } - - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::VSETP: { - // We can't use the constant predicate as destination. - ASSERT(instr.vsetp.pred3 != static_cast(Pred::UnusedIndex)); - - const bool sign = instr.video.signed_a == 1 || instr.video.signed_b == 1; - const Node first_pred = GetPredicateComparisonInteger(instr.vsetp.cond, sign, op_a, op_b); - const Node second_pred = GetPredicate(instr.vsetp.pred39, false); - - const OperationCode combiner = GetPredicateCombiner(instr.vsetp.op); - - // Set the primary predicate to the result of Predicate OP SecondPredicate - SetPredicate(bb, instr.vsetp.pred3, Operation(combiner, first_pred, second_pred)); - - if (instr.vsetp.pred0 != static_cast(Pred::UnusedIndex)) { - // Set the secondary predicate to the result of !Predicate OP SecondPredicate, - // if enabled - const Node negate_pred = Operation(OperationCode::LogicalNegate, first_pred); - SetPredicate(bb, instr.vsetp.pred0, Operation(combiner, negate_pred, second_pred)); - } - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled video instruction: {}", opcode->get().GetName()); - } - - return pc; -} - -Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed, VideoType type, - u64 byte_height) { - if (!is_chunk) { - return BitfieldExtract(op, static_cast(byte_height * 8), 8); - } - - switch (type) { - case VideoType::Size16_Low: - return BitfieldExtract(op, 0, 16); - case VideoType::Size16_High: - return BitfieldExtract(op, 16, 16); - case VideoType::Size32: - // TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when this type is used - // (1 * 1 + 0 == 0x5b800000). Until a better explanation is found: abort. - UNIMPLEMENTED(); - return Immediate(0); - case VideoType::Invalid: - UNREACHABLE_MSG("Invalid instruction encoding"); - return Immediate(0); - default: - UNREACHABLE(); - return Immediate(0); - } -} - -void ShaderIR::DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr) { - UNIMPLEMENTED_IF(!instr.vmnmx.is_op_b_register); - UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatA() != VmnmxType::Bits32); - UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatB() != VmnmxType::Bits32); - UNIMPLEMENTED_IF(instr.vmnmx.is_src_a_signed != instr.vmnmx.is_src_b_signed); - UNIMPLEMENTED_IF(instr.vmnmx.sat); - UNIMPLEMENTED_IF(instr.generates_cc); - - Node op_a = GetRegister(instr.gpr8); - Node op_b = GetRegister(instr.gpr20); - Node op_c = GetRegister(instr.gpr39); - - const bool is_oper1_signed = instr.vmnmx.is_src_a_signed; // Stubbed - const bool is_oper2_signed = instr.vmnmx.is_dest_signed; - - const auto operation_a = instr.vmnmx.mx ? OperationCode::IMax : OperationCode::IMin; - Node value = SignedOperation(operation_a, is_oper1_signed, move(op_a), move(op_b)); - - switch (instr.vmnmx.operation) { - case VmnmxOperation::Mrg_16H: - value = BitfieldInsert(move(op_c), move(value), 16, 16); - break; - case VmnmxOperation::Mrg_16L: - value = BitfieldInsert(move(op_c), move(value), 0, 16); - break; - case VmnmxOperation::Mrg_8B0: - value = BitfieldInsert(move(op_c), move(value), 0, 8); - break; - case VmnmxOperation::Mrg_8B2: - value = BitfieldInsert(move(op_c), move(value), 16, 8); - break; - case VmnmxOperation::Acc: - value = Operation(OperationCode::IAdd, move(value), move(op_c)); - break; - case VmnmxOperation::Min: - value = SignedOperation(OperationCode::IMin, is_oper2_signed, move(value), move(op_c)); - break; - case VmnmxOperation::Max: - value = SignedOperation(OperationCode::IMax, is_oper2_signed, move(value), move(op_c)); - break; - case VmnmxOperation::Nop: - break; - default: - UNREACHABLE(); - break; - } - - SetRegister(bb, instr.gpr0, move(value)); -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp deleted file mode 100644 index 37433d783..000000000 --- a/src/video_core/shader/decode/warp.cpp +++ /dev/null @@ -1,117 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::Pred; -using Tegra::Shader::ShuffleOperation; -using Tegra::Shader::VoteOperation; - -namespace { - -OperationCode GetOperationCode(VoteOperation vote_op) { - switch (vote_op) { - case VoteOperation::All: - return OperationCode::VoteAll; - case VoteOperation::Any: - return OperationCode::VoteAny; - case VoteOperation::Eq: - return OperationCode::VoteEqual; - default: - UNREACHABLE_MSG("Invalid vote operation={}", vote_op); - return OperationCode::VoteAll; - } -} - -} // Anonymous namespace - -u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - // Signal the backend that this shader uses warp instructions. - uses_warps = true; - - switch (opcode->get().GetId()) { - case OpCode::Id::VOTE: { - const Node value = GetPredicate(instr.vote.value, instr.vote.negate_value != 0); - const Node active = Operation(OperationCode::BallotThread, value); - const Node vote = Operation(GetOperationCode(instr.vote.operation), value); - SetRegister(bb, instr.gpr0, active); - SetPredicate(bb, instr.vote.dest_pred, vote); - break; - } - case OpCode::Id::SHFL: { - Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast(instr.shfl.mask_imm)) - : GetRegister(instr.gpr39); - Node index = instr.shfl.is_index_imm ? Immediate(static_cast(instr.shfl.index_imm)) - : GetRegister(instr.gpr20); - - Node thread_id = Operation(OperationCode::ThreadId); - Node clamp = Operation(OperationCode::IBitwiseAnd, mask, Immediate(0x1FU)); - Node seg_mask = BitfieldExtract(mask, 8, 16); - - Node neg_seg_mask = Operation(OperationCode::IBitwiseNot, seg_mask); - Node min_thread_id = Operation(OperationCode::IBitwiseAnd, thread_id, seg_mask); - Node max_thread_id = Operation(OperationCode::IBitwiseOr, min_thread_id, - Operation(OperationCode::IBitwiseAnd, clamp, neg_seg_mask)); - - Node src_thread_id = [instr, index, neg_seg_mask, min_thread_id, thread_id] { - switch (instr.shfl.operation) { - case ShuffleOperation::Idx: - return Operation(OperationCode::IBitwiseOr, - Operation(OperationCode::IBitwiseAnd, index, neg_seg_mask), - min_thread_id); - case ShuffleOperation::Down: - return Operation(OperationCode::IAdd, thread_id, index); - case ShuffleOperation::Up: - return Operation(OperationCode::IAdd, thread_id, - Operation(OperationCode::INegate, index)); - case ShuffleOperation::Bfly: - return Operation(OperationCode::IBitwiseXor, thread_id, index); - } - UNREACHABLE(); - return Immediate(0U); - }(); - - Node in_bounds = [instr, src_thread_id, min_thread_id, max_thread_id] { - if (instr.shfl.operation == ShuffleOperation::Up) { - return Operation(OperationCode::LogicalIGreaterEqual, src_thread_id, min_thread_id); - } else { - return Operation(OperationCode::LogicalILessEqual, src_thread_id, max_thread_id); - } - }(); - - SetPredicate(bb, instr.shfl.pred48, in_bounds); - SetRegister( - bb, instr.gpr0, - Operation(OperationCode::ShuffleIndexed, GetRegister(instr.gpr8), src_thread_id)); - break; - } - case OpCode::Id::FSWZADD: { - UNIMPLEMENTED_IF(instr.fswzadd.ndv); - - Node op_a = GetRegister(instr.gpr8); - Node op_b = GetRegister(instr.gpr20); - Node mask = Immediate(static_cast(instr.fswzadd.swizzle)); - SetRegister(bb, instr.gpr0, Operation(OperationCode::FSwizzleAdd, op_a, op_b, mask)); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled warp instruction: {}", opcode->get().GetName()); - break; - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp deleted file mode 100644 index 233b8fa42..000000000 --- a/src/video_core/shader/decode/xmad.cpp +++ /dev/null @@ -1,156 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::PredCondition; - -u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - UNIMPLEMENTED_IF(instr.xmad.sign_a); - UNIMPLEMENTED_IF(instr.xmad.sign_b); - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in XMAD is not implemented"); - - Node op_a = GetRegister(instr.gpr8); - - // TODO(bunnei): Needs to be fixed once op_a or op_b is signed - UNIMPLEMENTED_IF(instr.xmad.sign_a != instr.xmad.sign_b); - const bool is_signed_a = instr.xmad.sign_a == 1; - const bool is_signed_b = instr.xmad.sign_b == 1; - const bool is_signed_c = is_signed_a; - - auto [is_merge, is_psl, is_high_b, mode, op_b_binding, - op_c] = [&]() -> std::tuple { - switch (opcode->get().GetId()) { - case OpCode::Id::XMAD_CR: - return {instr.xmad.merge_56, - instr.xmad.product_shift_left_second, - instr.xmad.high_b, - instr.xmad.mode_cbf, - GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), - GetRegister(instr.gpr39)}; - case OpCode::Id::XMAD_RR: - return {instr.xmad.merge_37, instr.xmad.product_shift_left, instr.xmad.high_b_rr, - instr.xmad.mode, GetRegister(instr.gpr20), GetRegister(instr.gpr39)}; - case OpCode::Id::XMAD_RC: - return {false, - false, - instr.xmad.high_b, - instr.xmad.mode_cbf, - GetRegister(instr.gpr39), - GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; - case OpCode::Id::XMAD_IMM: - return {instr.xmad.merge_37, - instr.xmad.product_shift_left, - false, - instr.xmad.mode, - Immediate(static_cast(instr.xmad.imm20_16)), - GetRegister(instr.gpr39)}; - default: - UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName()); - return {false, false, false, Tegra::Shader::XmadMode::None, Immediate(0), Immediate(0)}; - } - }(); - - op_a = SignedOperation(OperationCode::IBitfieldExtract, is_signed_a, std::move(op_a), - instr.xmad.high_a ? Immediate(16) : Immediate(0), Immediate(16)); - - const Node original_b = op_b_binding; - const Node op_b = - SignedOperation(OperationCode::IBitfieldExtract, is_signed_b, std::move(op_b_binding), - is_high_b ? Immediate(16) : Immediate(0), Immediate(16)); - - // we already check sign_a and sign_b is difference or not before so just use one in here. - Node product = SignedOperation(OperationCode::IMul, is_signed_a, op_a, op_b); - if (is_psl) { - product = - SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_a, product, Immediate(16)); - } - SetTemporary(bb, 0, product); - product = GetTemporary(0); - - Node original_c = op_c; - const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error - op_c = [&] { - switch (set_mode) { - case Tegra::Shader::XmadMode::None: - return original_c; - case Tegra::Shader::XmadMode::CLo: - return BitfieldExtract(std::move(original_c), 0, 16); - case Tegra::Shader::XmadMode::CHi: - return BitfieldExtract(std::move(original_c), 16, 16); - case Tegra::Shader::XmadMode::CBcc: { - Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b, - original_b, Immediate(16)); - return SignedOperation(OperationCode::IAdd, is_signed_c, std::move(original_c), - std::move(shifted_b)); - } - case Tegra::Shader::XmadMode::CSfu: { - const Node comp_a = - GetPredicateComparisonInteger(PredCondition::EQ, is_signed_a, op_a, Immediate(0)); - const Node comp_b = - GetPredicateComparisonInteger(PredCondition::EQ, is_signed_b, op_b, Immediate(0)); - const Node comp = Operation(OperationCode::LogicalOr, comp_a, comp_b); - - const Node comp_minus_a = GetPredicateComparisonInteger( - PredCondition::NE, is_signed_a, - SignedOperation(OperationCode::IBitwiseAnd, is_signed_a, op_a, - Immediate(0x80000000)), - Immediate(0)); - const Node comp_minus_b = GetPredicateComparisonInteger( - PredCondition::NE, is_signed_b, - SignedOperation(OperationCode::IBitwiseAnd, is_signed_b, op_b, - Immediate(0x80000000)), - Immediate(0)); - - Node new_c = Operation( - OperationCode::Select, comp_minus_a, - SignedOperation(OperationCode::IAdd, is_signed_c, original_c, Immediate(-65536)), - original_c); - new_c = Operation( - OperationCode::Select, comp_minus_b, - SignedOperation(OperationCode::IAdd, is_signed_c, new_c, Immediate(-65536)), - std::move(new_c)); - - return Operation(OperationCode::Select, comp, original_c, std::move(new_c)); - } - default: - UNREACHABLE(); - return Immediate(0); - } - }(); - - SetTemporary(bb, 1, op_c); - op_c = GetTemporary(1); - - // TODO(Rodrigo): Use an appropiate sign for this operation - Node sum = SignedOperation(OperationCode::IAdd, is_signed_a, product, std::move(op_c)); - SetTemporary(bb, 2, sum); - sum = GetTemporary(2); - if (is_merge) { - const Node a = SignedOperation(OperationCode::IBitfieldExtract, is_signed_a, std::move(sum), - Immediate(0), Immediate(16)); - const Node b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b, original_b, - Immediate(16)); - sum = SignedOperation(OperationCode::IBitwiseOr, is_signed_a, a, b); - } - - SetInternalFlagsFromInteger(bb, sum, instr.generates_cc); - SetRegister(bb, instr.gpr0, std::move(sum)); - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/expr.cpp b/src/video_core/shader/expr.cpp deleted file mode 100644 index 2647865d4..000000000 --- a/src/video_core/shader/expr.cpp +++ /dev/null @@ -1,93 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include - -#include "video_core/shader/expr.h" - -namespace VideoCommon::Shader { -namespace { -bool ExprIsBoolean(const Expr& expr) { - return std::holds_alternative(*expr); -} - -bool ExprBooleanGet(const Expr& expr) { - return std::get_if(expr.get())->value; -} -} // Anonymous namespace - -bool ExprAnd::operator==(const ExprAnd& b) const { - return (*operand1 == *b.operand1) && (*operand2 == *b.operand2); -} - -bool ExprAnd::operator!=(const ExprAnd& b) const { - return !operator==(b); -} - -bool ExprOr::operator==(const ExprOr& b) const { - return (*operand1 == *b.operand1) && (*operand2 == *b.operand2); -} - -bool ExprOr::operator!=(const ExprOr& b) const { - return !operator==(b); -} - -bool ExprNot::operator==(const ExprNot& b) const { - return *operand1 == *b.operand1; -} - -bool ExprNot::operator!=(const ExprNot& b) const { - return !operator==(b); -} - -Expr MakeExprNot(Expr first) { - if (std::holds_alternative(*first)) { - return std::get_if(first.get())->operand1; - } - return MakeExpr(std::move(first)); -} - -Expr MakeExprAnd(Expr first, Expr second) { - if (ExprIsBoolean(first)) { - return ExprBooleanGet(first) ? second : first; - } - if (ExprIsBoolean(second)) { - return ExprBooleanGet(second) ? first : second; - } - return MakeExpr(std::move(first), std::move(second)); -} - -Expr MakeExprOr(Expr first, Expr second) { - if (ExprIsBoolean(first)) { - return ExprBooleanGet(first) ? first : second; - } - if (ExprIsBoolean(second)) { - return ExprBooleanGet(second) ? second : first; - } - return MakeExpr(std::move(first), std::move(second)); -} - -bool ExprAreEqual(const Expr& first, const Expr& second) { - return (*first) == (*second); -} - -bool ExprAreOpposite(const Expr& first, const Expr& second) { - if (std::holds_alternative(*first)) { - return ExprAreEqual(std::get_if(first.get())->operand1, second); - } - if (std::holds_alternative(*second)) { - return ExprAreEqual(std::get_if(second.get())->operand1, first); - } - return false; -} - -bool ExprIsTrue(const Expr& first) { - if (ExprIsBoolean(first)) { - return ExprBooleanGet(first); - } - return false; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/expr.h b/src/video_core/shader/expr.h deleted file mode 100644 index cda284c72..000000000 --- a/src/video_core/shader/expr.h +++ /dev/null @@ -1,156 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include - -#include "video_core/engines/shader_bytecode.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::ConditionCode; -using Tegra::Shader::Pred; - -class ExprAnd; -class ExprBoolean; -class ExprCondCode; -class ExprGprEqual; -class ExprNot; -class ExprOr; -class ExprPredicate; -class ExprVar; - -using ExprData = std::variant; -using Expr = std::shared_ptr; - -class ExprAnd final { -public: - explicit ExprAnd(Expr a, Expr b) : operand1{std::move(a)}, operand2{std::move(b)} {} - - bool operator==(const ExprAnd& b) const; - bool operator!=(const ExprAnd& b) const; - - Expr operand1; - Expr operand2; -}; - -class ExprOr final { -public: - explicit ExprOr(Expr a, Expr b) : operand1{std::move(a)}, operand2{std::move(b)} {} - - bool operator==(const ExprOr& b) const; - bool operator!=(const ExprOr& b) const; - - Expr operand1; - Expr operand2; -}; - -class ExprNot final { -public: - explicit ExprNot(Expr a) : operand1{std::move(a)} {} - - bool operator==(const ExprNot& b) const; - bool operator!=(const ExprNot& b) const; - - Expr operand1; -}; - -class ExprVar final { -public: - explicit ExprVar(u32 index) : var_index{index} {} - - bool operator==(const ExprVar& b) const { - return var_index == b.var_index; - } - - bool operator!=(const ExprVar& b) const { - return !operator==(b); - } - - u32 var_index; -}; - -class ExprPredicate final { -public: - explicit ExprPredicate(u32 predicate_) : predicate{predicate_} {} - - bool operator==(const ExprPredicate& b) const { - return predicate == b.predicate; - } - - bool operator!=(const ExprPredicate& b) const { - return !operator==(b); - } - - u32 predicate; -}; - -class ExprCondCode final { -public: - explicit ExprCondCode(ConditionCode condition_code) : cc{condition_code} {} - - bool operator==(const ExprCondCode& b) const { - return cc == b.cc; - } - - bool operator!=(const ExprCondCode& b) const { - return !operator==(b); - } - - ConditionCode cc; -}; - -class ExprBoolean final { -public: - explicit ExprBoolean(bool val) : value{val} {} - - bool operator==(const ExprBoolean& b) const { - return value == b.value; - } - - bool operator!=(const ExprBoolean& b) const { - return !operator==(b); - } - - bool value; -}; - -class ExprGprEqual final { -public: - explicit ExprGprEqual(u32 gpr_, u32 value_) : gpr{gpr_}, value{value_} {} - - bool operator==(const ExprGprEqual& b) const { - return gpr == b.gpr && value == b.value; - } - - bool operator!=(const ExprGprEqual& b) const { - return !operator==(b); - } - - u32 gpr; - u32 value; -}; - -template -Expr MakeExpr(Args&&... args) { - static_assert(std::is_convertible_v); - return std::make_shared(T(std::forward(args)...)); -} - -bool ExprAreEqual(const Expr& first, const Expr& second); - -bool ExprAreOpposite(const Expr& first, const Expr& second); - -Expr MakeExprNot(Expr first); - -Expr MakeExprAnd(Expr first, Expr second); - -Expr MakeExprOr(Expr first, Expr second); - -bool ExprIsTrue(const Expr& first); - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/memory_util.cpp b/src/video_core/shader/memory_util.cpp deleted file mode 100644 index e18ccba8e..000000000 --- a/src/video_core/shader/memory_util.cpp +++ /dev/null @@ -1,76 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include - -#include - -#include "common/common_types.h" -#include "core/core.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/memory_manager.h" -#include "video_core/shader/memory_util.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -GPUVAddr GetShaderAddress(Tegra::Engines::Maxwell3D& maxwell3d, - Tegra::Engines::Maxwell3D::Regs::ShaderProgram program) { - const auto& shader_config{maxwell3d.regs.shader_config[static_cast(program)]}; - return maxwell3d.regs.code_address.CodeAddress() + shader_config.offset; -} - -bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) { - // Sched instructions appear once every 4 instructions. - constexpr std::size_t SchedPeriod = 4; - const std::size_t absolute_offset = offset - main_offset; - return (absolute_offset % SchedPeriod) == 0; -} - -std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute) { - // This is the encoded version of BRA that jumps to itself. All Nvidia - // shaders end with one. - static constexpr u64 SELF_JUMPING_BRANCH = 0xE2400FFFFF07000FULL; - static constexpr u64 MASK = 0xFFFFFFFFFF7FFFFFULL; - - const std::size_t start_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; - std::size_t offset = start_offset; - while (offset < program.size()) { - const u64 instruction = program[offset]; - if (!IsSchedInstruction(offset, start_offset)) { - if ((instruction & MASK) == SELF_JUMPING_BRANCH) { - // End on Maxwell's "nop" instruction - break; - } - if (instruction == 0) { - break; - } - } - ++offset; - } - // The last instruction is included in the program size - return std::min(offset + 1, program.size()); -} - -ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, GPUVAddr gpu_addr, - const u8* host_ptr, bool is_compute) { - ProgramCode code(VideoCommon::Shader::MAX_PROGRAM_LENGTH); - ASSERT_OR_EXECUTE(host_ptr != nullptr, { return code; }); - memory_manager.ReadBlockUnsafe(gpu_addr, code.data(), code.size() * sizeof(u64)); - code.resize(CalculateProgramSize(code, is_compute)); - return code; -} - -u64 GetUniqueIdentifier(Tegra::Engines::ShaderType shader_type, bool is_a, const ProgramCode& code, - const ProgramCode& code_b) { - size_t unique_identifier = boost::hash_value(code); - if (is_a) { - // VertexA programs include two programs - boost::hash_combine(unique_identifier, boost::hash_value(code_b)); - } - return static_cast(unique_identifier); -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/memory_util.h b/src/video_core/shader/memory_util.h deleted file mode 100644 index 4624d38e6..000000000 --- a/src/video_core/shader/memory_util.h +++ /dev/null @@ -1,43 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include - -#include "common/common_types.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" - -namespace Tegra { -class MemoryManager; -} - -namespace VideoCommon::Shader { - -using ProgramCode = std::vector; - -constexpr u32 STAGE_MAIN_OFFSET = 10; -constexpr u32 KERNEL_MAIN_OFFSET = 0; - -/// Gets the address for the specified shader stage program -GPUVAddr GetShaderAddress(Tegra::Engines::Maxwell3D& maxwell3d, - Tegra::Engines::Maxwell3D::Regs::ShaderProgram program); - -/// Gets if the current instruction offset is a scheduler instruction -bool IsSchedInstruction(std::size_t offset, std::size_t main_offset); - -/// Calculates the size of a program stream -std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute); - -/// Gets the shader program code from memory for the specified address -ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, GPUVAddr gpu_addr, - const u8* host_ptr, bool is_compute); - -/// Hashes one (or two) program streams -u64 GetUniqueIdentifier(Tegra::Engines::ShaderType shader_type, bool is_a, const ProgramCode& code, - const ProgramCode& code_b = {}); - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h deleted file mode 100644 index b54d33763..000000000 --- a/src/video_core/shader/node.h +++ /dev/null @@ -1,701 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" - -namespace VideoCommon::Shader { - -enum class OperationCode { - Assign, /// (float& dest, float src) -> void - - Select, /// (MetaArithmetic, bool pred, float a, float b) -> float - - FAdd, /// (MetaArithmetic, float a, float b) -> float - FMul, /// (MetaArithmetic, float a, float b) -> float - FDiv, /// (MetaArithmetic, float a, float b) -> float - FFma, /// (MetaArithmetic, float a, float b, float c) -> float - FNegate, /// (MetaArithmetic, float a) -> float - FAbsolute, /// (MetaArithmetic, float a) -> float - FClamp, /// (MetaArithmetic, float value, float min, float max) -> float - FCastHalf0, /// (MetaArithmetic, f16vec2 a) -> float - FCastHalf1, /// (MetaArithmetic, f16vec2 a) -> float - FMin, /// (MetaArithmetic, float a, float b) -> float - FMax, /// (MetaArithmetic, float a, float b) -> float - FCos, /// (MetaArithmetic, float a) -> float - FSin, /// (MetaArithmetic, float a) -> float - FExp2, /// (MetaArithmetic, float a) -> float - FLog2, /// (MetaArithmetic, float a) -> float - FInverseSqrt, /// (MetaArithmetic, float a) -> float - FSqrt, /// (MetaArithmetic, float a) -> float - FRoundEven, /// (MetaArithmetic, float a) -> float - FFloor, /// (MetaArithmetic, float a) -> float - FCeil, /// (MetaArithmetic, float a) -> float - FTrunc, /// (MetaArithmetic, float a) -> float - FCastInteger, /// (MetaArithmetic, int a) -> float - FCastUInteger, /// (MetaArithmetic, uint a) -> float - FSwizzleAdd, /// (float a, float b, uint mask) -> float - - IAdd, /// (MetaArithmetic, int a, int b) -> int - IMul, /// (MetaArithmetic, int a, int b) -> int - IDiv, /// (MetaArithmetic, int a, int b) -> int - INegate, /// (MetaArithmetic, int a) -> int - IAbsolute, /// (MetaArithmetic, int a) -> int - IMin, /// (MetaArithmetic, int a, int b) -> int - IMax, /// (MetaArithmetic, int a, int b) -> int - ICastFloat, /// (MetaArithmetic, float a) -> int - ICastUnsigned, /// (MetaArithmetic, uint a) -> int - ILogicalShiftLeft, /// (MetaArithmetic, int a, uint b) -> int - ILogicalShiftRight, /// (MetaArithmetic, int a, uint b) -> int - IArithmeticShiftRight, /// (MetaArithmetic, int a, uint b) -> int - IBitwiseAnd, /// (MetaArithmetic, int a, int b) -> int - IBitwiseOr, /// (MetaArithmetic, int a, int b) -> int - IBitwiseXor, /// (MetaArithmetic, int a, int b) -> int - IBitwiseNot, /// (MetaArithmetic, int a) -> int - IBitfieldInsert, /// (MetaArithmetic, int base, int insert, int offset, int bits) -> int - IBitfieldExtract, /// (MetaArithmetic, int value, int offset, int offset) -> int - IBitCount, /// (MetaArithmetic, int) -> int - IBitMSB, /// (MetaArithmetic, int) -> int - - UAdd, /// (MetaArithmetic, uint a, uint b) -> uint - UMul, /// (MetaArithmetic, uint a, uint b) -> uint - UDiv, /// (MetaArithmetic, uint a, uint b) -> uint - UMin, /// (MetaArithmetic, uint a, uint b) -> uint - UMax, /// (MetaArithmetic, uint a, uint b) -> uint - UCastFloat, /// (MetaArithmetic, float a) -> uint - UCastSigned, /// (MetaArithmetic, int a) -> uint - ULogicalShiftLeft, /// (MetaArithmetic, uint a, uint b) -> uint - ULogicalShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint - UArithmeticShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint - UBitwiseAnd, /// (MetaArithmetic, uint a, uint b) -> uint - UBitwiseOr, /// (MetaArithmetic, uint a, uint b) -> uint - UBitwiseXor, /// (MetaArithmetic, uint a, uint b) -> uint - UBitwiseNot, /// (MetaArithmetic, uint a) -> uint - UBitfieldInsert, /// (MetaArithmetic, uint base, uint insert, int offset, int bits) -> uint - UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint - UBitCount, /// (MetaArithmetic, uint) -> uint - UBitMSB, /// (MetaArithmetic, uint) -> uint - - HAdd, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 - HMul, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 - HFma, /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2 - HAbsolute, /// (f16vec2 a) -> f16vec2 - HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2 - HClamp, /// (f16vec2 src, float min, float max) -> f16vec2 - HCastFloat, /// (MetaArithmetic, float a) -> f16vec2 - HUnpack, /// (Tegra::Shader::HalfType, T value) -> f16vec2 - HMergeF32, /// (f16vec2 src) -> float - HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2 - HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2 - HPack2, /// (float a, float b) -> f16vec2 - - LogicalAssign, /// (bool& dst, bool src) -> void - LogicalAnd, /// (bool a, bool b) -> bool - LogicalOr, /// (bool a, bool b) -> bool - LogicalXor, /// (bool a, bool b) -> bool - LogicalNegate, /// (bool a) -> bool - LogicalPick2, /// (bool2 pair, uint index) -> bool - LogicalAnd2, /// (bool2 a) -> bool - - LogicalFOrdLessThan, /// (float a, float b) -> bool - LogicalFOrdEqual, /// (float a, float b) -> bool - LogicalFOrdLessEqual, /// (float a, float b) -> bool - LogicalFOrdGreaterThan, /// (float a, float b) -> bool - LogicalFOrdNotEqual, /// (float a, float b) -> bool - LogicalFOrdGreaterEqual, /// (float a, float b) -> bool - LogicalFOrdered, /// (float a, float b) -> bool - LogicalFUnordered, /// (float a, float b) -> bool - LogicalFUnordLessThan, /// (float a, float b) -> bool - LogicalFUnordEqual, /// (float a, float b) -> bool - LogicalFUnordLessEqual, /// (float a, float b) -> bool - LogicalFUnordGreaterThan, /// (float a, float b) -> bool - LogicalFUnordNotEqual, /// (float a, float b) -> bool - LogicalFUnordGreaterEqual, /// (float a, float b) -> bool - - LogicalILessThan, /// (int a, int b) -> bool - LogicalIEqual, /// (int a, int b) -> bool - LogicalILessEqual, /// (int a, int b) -> bool - LogicalIGreaterThan, /// (int a, int b) -> bool - LogicalINotEqual, /// (int a, int b) -> bool - LogicalIGreaterEqual, /// (int a, int b) -> bool - - LogicalULessThan, /// (uint a, uint b) -> bool - LogicalUEqual, /// (uint a, uint b) -> bool - LogicalULessEqual, /// (uint a, uint b) -> bool - LogicalUGreaterThan, /// (uint a, uint b) -> bool - LogicalUNotEqual, /// (uint a, uint b) -> bool - LogicalUGreaterEqual, /// (uint a, uint b) -> bool - - LogicalAddCarry, /// (uint a, uint b) -> bool - - Logical2HLessThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HLessEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HGreaterThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HNotEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HGreaterEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HLessThanWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HLessEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HGreaterThanWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HNotEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HGreaterEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - - Texture, /// (MetaTexture, float[N] coords) -> float4 - TextureLod, /// (MetaTexture, float[N] coords) -> float4 - TextureGather, /// (MetaTexture, float[N] coords) -> float4 - TextureQueryDimensions, /// (MetaTexture, float a) -> float4 - TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4 - TexelFetch, /// (MetaTexture, int[N], int) -> float4 - TextureGradient, /// (MetaTexture, float[N] coords, float[N*2] derivates) -> float4 - - ImageLoad, /// (MetaImage, int[N] coords) -> void - ImageStore, /// (MetaImage, int[N] coords) -> void - - AtomicImageAdd, /// (MetaImage, int[N] coords) -> void - AtomicImageAnd, /// (MetaImage, int[N] coords) -> void - AtomicImageOr, /// (MetaImage, int[N] coords) -> void - AtomicImageXor, /// (MetaImage, int[N] coords) -> void - AtomicImageExchange, /// (MetaImage, int[N] coords) -> void - - AtomicUExchange, /// (memory, uint) -> uint - AtomicUAdd, /// (memory, uint) -> uint - AtomicUMin, /// (memory, uint) -> uint - AtomicUMax, /// (memory, uint) -> uint - AtomicUAnd, /// (memory, uint) -> uint - AtomicUOr, /// (memory, uint) -> uint - AtomicUXor, /// (memory, uint) -> uint - - AtomicIExchange, /// (memory, int) -> int - AtomicIAdd, /// (memory, int) -> int - AtomicIMin, /// (memory, int) -> int - AtomicIMax, /// (memory, int) -> int - AtomicIAnd, /// (memory, int) -> int - AtomicIOr, /// (memory, int) -> int - AtomicIXor, /// (memory, int) -> int - - ReduceUAdd, /// (memory, uint) -> void - ReduceUMin, /// (memory, uint) -> void - ReduceUMax, /// (memory, uint) -> void - ReduceUAnd, /// (memory, uint) -> void - ReduceUOr, /// (memory, uint) -> void - ReduceUXor, /// (memory, uint) -> void - - ReduceIAdd, /// (memory, int) -> void - ReduceIMin, /// (memory, int) -> void - ReduceIMax, /// (memory, int) -> void - ReduceIAnd, /// (memory, int) -> void - ReduceIOr, /// (memory, int) -> void - ReduceIXor, /// (memory, int) -> void - - Branch, /// (uint branch_target) -> void - BranchIndirect, /// (uint branch_target) -> void - PushFlowStack, /// (uint branch_target) -> void - PopFlowStack, /// () -> void - Exit, /// () -> void - Discard, /// () -> void - - EmitVertex, /// () -> void - EndPrimitive, /// () -> void - - InvocationId, /// () -> int - YNegate, /// () -> float - LocalInvocationIdX, /// () -> uint - LocalInvocationIdY, /// () -> uint - LocalInvocationIdZ, /// () -> uint - WorkGroupIdX, /// () -> uint - WorkGroupIdY, /// () -> uint - WorkGroupIdZ, /// () -> uint - - BallotThread, /// (bool) -> uint - VoteAll, /// (bool) -> bool - VoteAny, /// (bool) -> bool - VoteEqual, /// (bool) -> bool - - ThreadId, /// () -> uint - ThreadEqMask, /// () -> uint - ThreadGeMask, /// () -> uint - ThreadGtMask, /// () -> uint - ThreadLeMask, /// () -> uint - ThreadLtMask, /// () -> uint - ShuffleIndexed, /// (uint value, uint index) -> uint - - Barrier, /// () -> void - MemoryBarrierGroup, /// () -> void - MemoryBarrierGlobal, /// () -> void - - Amount, -}; - -enum class InternalFlag { - Zero = 0, - Sign = 1, - Carry = 2, - Overflow = 3, - Amount = 4, -}; - -enum class MetaStackClass { - Ssy, - Pbk, -}; - -class OperationNode; -class ConditionalNode; -class GprNode; -class CustomVarNode; -class ImmediateNode; -class InternalFlagNode; -class PredicateNode; -class AbufNode; -class CbufNode; -class LmemNode; -class PatchNode; -class SmemNode; -class GmemNode; -class CommentNode; - -using NodeData = std::variant; -using Node = std::shared_ptr; -using Node4 = std::array; -using NodeBlock = std::vector; - -struct ArraySamplerNode; -struct BindlessSamplerNode; -struct SeparateSamplerNode; - -using TrackSamplerData = std::variant; -using TrackSampler = std::shared_ptr; - -struct SamplerEntry { - /// Bound samplers constructor - explicit SamplerEntry(u32 index_, u32 offset_, Tegra::Shader::TextureType type_, bool is_array_, - bool is_shadow_, bool is_buffer_, bool is_indexed_) - : index{index_}, offset{offset_}, type{type_}, is_array{is_array_}, is_shadow{is_shadow_}, - is_buffer{is_buffer_}, is_indexed{is_indexed_} {} - - /// Separate sampler constructor - explicit SamplerEntry(u32 index_, std::pair offsets, std::pair buffers, - Tegra::Shader::TextureType type_, bool is_array_, bool is_shadow_, - bool is_buffer_) - : index{index_}, offset{offsets.first}, secondary_offset{offsets.second}, - buffer{buffers.first}, secondary_buffer{buffers.second}, type{type_}, is_array{is_array_}, - is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_separated{true} {} - - /// Bindless samplers constructor - explicit SamplerEntry(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::TextureType type_, - bool is_array_, bool is_shadow_, bool is_buffer_, bool is_indexed_) - : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_array{is_array_}, - is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_bindless{true}, is_indexed{is_indexed_} { - } - - u32 index = 0; ///< Emulated index given for the this sampler. - u32 offset = 0; ///< Offset in the const buffer from where the sampler is being read. - u32 secondary_offset = 0; ///< Secondary offset in the const buffer. - u32 buffer = 0; ///< Buffer where the bindless sampler is read. - u32 secondary_buffer = 0; ///< Secondary buffer where the bindless sampler is read. - u32 size = 1; ///< Size of the sampler. - - Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) - bool is_array = false; ///< Whether the texture is being sampled as an array texture or not. - bool is_shadow = false; ///< Whether the texture is being sampled as a depth texture or not. - bool is_buffer = false; ///< Whether the texture is a texture buffer without sampler. - bool is_bindless = false; ///< Whether this sampler belongs to a bindless texture or not. - bool is_indexed = false; ///< Whether this sampler is an indexed array of textures. - bool is_separated = false; ///< Whether the image and sampler is separated or not. -}; - -/// Represents a tracked bindless sampler into a direct const buffer -struct ArraySamplerNode { - u32 index; - u32 base_offset; - u32 bindless_var; -}; - -/// Represents a tracked separate sampler image pair that was folded statically -struct SeparateSamplerNode { - std::pair indices; - std::pair offsets; -}; - -/// Represents a tracked bindless sampler into a direct const buffer -struct BindlessSamplerNode { - u32 index; - u32 offset; -}; - -struct ImageEntry { -public: - /// Bound images constructor - explicit ImageEntry(u32 index_, u32 offset_, Tegra::Shader::ImageType type_) - : index{index_}, offset{offset_}, type{type_} {} - - /// Bindless samplers constructor - explicit ImageEntry(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::ImageType type_) - : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_bindless{true} {} - - void MarkWrite() { - is_written = true; - } - - void MarkRead() { - is_read = true; - } - - void MarkAtomic() { - MarkWrite(); - MarkRead(); - is_atomic = true; - } - - u32 index = 0; - u32 offset = 0; - u32 buffer = 0; - - Tegra::Shader::ImageType type{}; - bool is_bindless = false; - bool is_written = false; - bool is_read = false; - bool is_atomic = false; -}; - -struct GlobalMemoryBase { - u32 cbuf_index = 0; - u32 cbuf_offset = 0; - - [[nodiscard]] bool operator<(const GlobalMemoryBase& rhs) const { - return std::tie(cbuf_index, cbuf_offset) < std::tie(rhs.cbuf_index, rhs.cbuf_offset); - } -}; - -/// Parameters describing an arithmetic operation -struct MetaArithmetic { - bool precise{}; ///< Whether the operation can be constraint or not -}; - -/// Parameters describing a texture sampler -struct MetaTexture { - SamplerEntry sampler; - Node array; - Node depth_compare; - std::vector aoffi; - std::vector ptp; - std::vector derivates; - Node bias; - Node lod; - Node component; - u32 element{}; - Node index; -}; - -struct MetaImage { - const ImageEntry& image; - std::vector values; - u32 element{}; -}; - -/// Parameters that modify an operation but are not part of any particular operand -using Meta = - std::variant; - -class AmendNode { -public: - [[nodiscard]] std::optional GetAmendIndex() const { - if (amend_index == amend_null_index) { - return std::nullopt; - } - return {amend_index}; - } - - void SetAmendIndex(std::size_t index) { - amend_index = index; - } - - void ClearAmend() { - amend_index = amend_null_index; - } - -private: - static constexpr std::size_t amend_null_index = 0xFFFFFFFFFFFFFFFFULL; - std::size_t amend_index{amend_null_index}; -}; - -/// Holds any kind of operation that can be done in the IR -class OperationNode final : public AmendNode { -public: - explicit OperationNode(OperationCode code_) : OperationNode(code_, Meta{}) {} - - explicit OperationNode(OperationCode code_, Meta meta_) - : OperationNode(code_, std::move(meta_), std::vector{}) {} - - explicit OperationNode(OperationCode code_, std::vector operands_) - : OperationNode(code_, Meta{}, std::move(operands_)) {} - - explicit OperationNode(OperationCode code_, Meta meta_, std::vector operands_) - : code{code_}, meta{std::move(meta_)}, operands{std::move(operands_)} {} - - template - explicit OperationNode(OperationCode code_, Meta meta_, Args&&... operands_) - : code{code_}, meta{std::move(meta_)}, operands{operands_...} {} - - [[nodiscard]] OperationCode GetCode() const { - return code; - } - - [[nodiscard]] const Meta& GetMeta() const { - return meta; - } - - [[nodiscard]] std::size_t GetOperandsCount() const { - return operands.size(); - } - - [[nodiscard]] const Node& operator[](std::size_t operand_index) const { - return operands.at(operand_index); - } - -private: - OperationCode code{}; - Meta meta{}; - std::vector operands; -}; - -/// Encloses inside any kind of node that returns a boolean conditionally-executed code -class ConditionalNode final : public AmendNode { -public: - explicit ConditionalNode(Node condition_, std::vector&& code_) - : condition{std::move(condition_)}, code{std::move(code_)} {} - - [[nodiscard]] const Node& GetCondition() const { - return condition; - } - - [[nodiscard]] const std::vector& GetCode() const { - return code; - } - -private: - Node condition; ///< Condition to be satisfied - std::vector code; ///< Code to execute -}; - -/// A general purpose register -class GprNode final { -public: - explicit constexpr GprNode(Tegra::Shader::Register index_) : index{index_} {} - - [[nodiscard]] constexpr u32 GetIndex() const { - return static_cast(index); - } - -private: - Tegra::Shader::Register index{}; -}; - -/// A custom variable -class CustomVarNode final { -public: - explicit constexpr CustomVarNode(u32 index_) : index{index_} {} - - [[nodiscard]] constexpr u32 GetIndex() const { - return index; - } - -private: - u32 index{}; -}; - -/// A 32-bits value that represents an immediate value -class ImmediateNode final { -public: - explicit constexpr ImmediateNode(u32 value_) : value{value_} {} - - [[nodiscard]] constexpr u32 GetValue() const { - return value; - } - -private: - u32 value{}; -}; - -/// One of Maxwell's internal flags -class InternalFlagNode final { -public: - explicit constexpr InternalFlagNode(InternalFlag flag_) : flag{flag_} {} - - [[nodiscard]] constexpr InternalFlag GetFlag() const { - return flag; - } - -private: - InternalFlag flag{}; -}; - -/// A predicate register, it can be negated without additional nodes -class PredicateNode final { -public: - explicit constexpr PredicateNode(Tegra::Shader::Pred index_, bool negated_) - : index{index_}, negated{negated_} {} - - [[nodiscard]] constexpr Tegra::Shader::Pred GetIndex() const { - return index; - } - - [[nodiscard]] constexpr bool IsNegated() const { - return negated; - } - -private: - Tegra::Shader::Pred index{}; - bool negated{}; -}; - -/// Attribute buffer memory (known as attributes or varyings in GLSL terms) -class AbufNode final { -public: - // Initialize for standard attributes (index is explicit). - explicit AbufNode(Tegra::Shader::Attribute::Index index_, u32 element_, Node buffer_ = {}) - : buffer{std::move(buffer_)}, index{index_}, element{element_} {} - - // Initialize for physical attributes (index is a variable value). - explicit AbufNode(Node physical_address_, Node buffer_ = {}) - : physical_address{std::move(physical_address_)}, buffer{std::move(buffer_)} {} - - [[nodiscard]] Tegra::Shader::Attribute::Index GetIndex() const { - return index; - } - - [[nodiscard]] u32 GetElement() const { - return element; - } - - [[nodiscard]] const Node& GetBuffer() const { - return buffer; - } - - [[nodiscard]] bool IsPhysicalBuffer() const { - return static_cast(physical_address); - } - - [[nodiscard]] const Node& GetPhysicalAddress() const { - return physical_address; - } - -private: - Node physical_address; - Node buffer; - Tegra::Shader::Attribute::Index index{}; - u32 element{}; -}; - -/// Patch memory (used to communicate tessellation stages). -class PatchNode final { -public: - explicit constexpr PatchNode(u32 offset_) : offset{offset_} {} - - [[nodiscard]] constexpr u32 GetOffset() const { - return offset; - } - -private: - u32 offset{}; -}; - -/// Constant buffer node, usually mapped to uniform buffers in GLSL -class CbufNode final { -public: - explicit CbufNode(u32 index_, Node offset_) : index{index_}, offset{std::move(offset_)} {} - - [[nodiscard]] u32 GetIndex() const { - return index; - } - - [[nodiscard]] const Node& GetOffset() const { - return offset; - } - -private: - u32 index{}; - Node offset; -}; - -/// Local memory node -class LmemNode final { -public: - explicit LmemNode(Node address_) : address{std::move(address_)} {} - - [[nodiscard]] const Node& GetAddress() const { - return address; - } - -private: - Node address; -}; - -/// Shared memory node -class SmemNode final { -public: - explicit SmemNode(Node address_) : address{std::move(address_)} {} - - [[nodiscard]] const Node& GetAddress() const { - return address; - } - -private: - Node address; -}; - -/// Global memory node -class GmemNode final { -public: - explicit GmemNode(Node real_address_, Node base_address_, const GlobalMemoryBase& descriptor_) - : real_address{std::move(real_address_)}, base_address{std::move(base_address_)}, - descriptor{descriptor_} {} - - [[nodiscard]] const Node& GetRealAddress() const { - return real_address; - } - - [[nodiscard]] const Node& GetBaseAddress() const { - return base_address; - } - - [[nodiscard]] const GlobalMemoryBase& GetDescriptor() const { - return descriptor; - } - -private: - Node real_address; - Node base_address; - GlobalMemoryBase descriptor; -}; - -/// Commentary, can be dropped -class CommentNode final { -public: - explicit CommentNode(std::string text_) : text{std::move(text_)} {} - - [[nodiscard]] const std::string& GetText() const { - return text; - } - -private: - std::string text; -}; - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/node_helper.cpp b/src/video_core/shader/node_helper.cpp deleted file mode 100644 index 6a5b6940d..000000000 --- a/src/video_core/shader/node_helper.cpp +++ /dev/null @@ -1,115 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include - -#include "common/common_types.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -Node Conditional(Node condition, std::vector code) { - return MakeNode(std::move(condition), std::move(code)); -} - -Node Comment(std::string text) { - return MakeNode(std::move(text)); -} - -Node Immediate(u32 value) { - return MakeNode(value); -} - -Node Immediate(s32 value) { - return Immediate(static_cast(value)); -} - -Node Immediate(f32 value) { - u32 integral; - std::memcpy(&integral, &value, sizeof(u32)); - return Immediate(integral); -} - -OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed) { - if (is_signed) { - return operation_code; - } - switch (operation_code) { - case OperationCode::FCastInteger: - return OperationCode::FCastUInteger; - case OperationCode::IAdd: - return OperationCode::UAdd; - case OperationCode::IMul: - return OperationCode::UMul; - case OperationCode::IDiv: - return OperationCode::UDiv; - case OperationCode::IMin: - return OperationCode::UMin; - case OperationCode::IMax: - return OperationCode::UMax; - case OperationCode::ICastFloat: - return OperationCode::UCastFloat; - case OperationCode::ICastUnsigned: - return OperationCode::UCastSigned; - case OperationCode::ILogicalShiftLeft: - return OperationCode::ULogicalShiftLeft; - case OperationCode::ILogicalShiftRight: - return OperationCode::ULogicalShiftRight; - case OperationCode::IArithmeticShiftRight: - return OperationCode::UArithmeticShiftRight; - case OperationCode::IBitwiseAnd: - return OperationCode::UBitwiseAnd; - case OperationCode::IBitwiseOr: - return OperationCode::UBitwiseOr; - case OperationCode::IBitwiseXor: - return OperationCode::UBitwiseXor; - case OperationCode::IBitwiseNot: - return OperationCode::UBitwiseNot; - case OperationCode::IBitfieldExtract: - return OperationCode::UBitfieldExtract; - case OperationCode::IBitfieldInsert: - return OperationCode::UBitfieldInsert; - case OperationCode::IBitCount: - return OperationCode::UBitCount; - case OperationCode::LogicalILessThan: - return OperationCode::LogicalULessThan; - case OperationCode::LogicalIEqual: - return OperationCode::LogicalUEqual; - case OperationCode::LogicalILessEqual: - return OperationCode::LogicalULessEqual; - case OperationCode::LogicalIGreaterThan: - return OperationCode::LogicalUGreaterThan; - case OperationCode::LogicalINotEqual: - return OperationCode::LogicalUNotEqual; - case OperationCode::LogicalIGreaterEqual: - return OperationCode::LogicalUGreaterEqual; - case OperationCode::AtomicIExchange: - return OperationCode::AtomicUExchange; - case OperationCode::AtomicIAdd: - return OperationCode::AtomicUAdd; - case OperationCode::AtomicIMin: - return OperationCode::AtomicUMin; - case OperationCode::AtomicIMax: - return OperationCode::AtomicUMax; - case OperationCode::AtomicIAnd: - return OperationCode::AtomicUAnd; - case OperationCode::AtomicIOr: - return OperationCode::AtomicUOr; - case OperationCode::AtomicIXor: - return OperationCode::AtomicUXor; - case OperationCode::INegate: - UNREACHABLE_MSG("Can't negate an unsigned integer"); - return {}; - case OperationCode::IAbsolute: - UNREACHABLE_MSG("Can't apply absolute to an unsigned integer"); - return {}; - default: - UNREACHABLE_MSG("Unknown signed operation with code={}", operation_code); - return {}; - } -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/node_helper.h b/src/video_core/shader/node_helper.h deleted file mode 100644 index 1e0886185..000000000 --- a/src/video_core/shader/node_helper.h +++ /dev/null @@ -1,71 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include -#include -#include - -#include "common/common_types.h" -#include "video_core/shader/node.h" - -namespace VideoCommon::Shader { - -/// This arithmetic operation cannot be constraint -inline constexpr MetaArithmetic PRECISE = {true}; -/// This arithmetic operation can be optimized away -inline constexpr MetaArithmetic NO_PRECISE = {false}; - -/// Creates a conditional node -Node Conditional(Node condition, std::vector code); - -/// Creates a commentary node -Node Comment(std::string text); - -/// Creates an u32 immediate -Node Immediate(u32 value); - -/// Creates a s32 immediate -Node Immediate(s32 value); - -/// Creates a f32 immediate -Node Immediate(f32 value); - -/// Converts an signed operation code to an unsigned operation code -OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed); - -template -Node MakeNode(Args&&... args) { - static_assert(std::is_convertible_v); - return std::make_shared(T(std::forward(args)...)); -} - -template -TrackSampler MakeTrackSampler(Args&&... args) { - static_assert(std::is_convertible_v); - return std::make_shared(T{std::forward(args)...}); -} - -template -Node Operation(OperationCode code, Args&&... args) { - if constexpr (sizeof...(args) == 0) { - return MakeNode(code); - } else if constexpr (std::is_convertible_v>, - Meta>) { - return MakeNode(code, std::forward(args)...); - } else { - return MakeNode(code, Meta{}, std::forward(args)...); - } -} - -template -Node SignedOperation(OperationCode code, bool is_signed, Args&&... args) { - return Operation(SignedToUnsignedCode(code, is_signed), std::forward(args)...); -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/registry.cpp b/src/video_core/shader/registry.cpp deleted file mode 100644 index 148d91fcb..000000000 --- a/src/video_core/shader/registry.cpp +++ /dev/null @@ -1,181 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/kepler_compute.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" -#include "video_core/shader/registry.h" - -namespace VideoCommon::Shader { - -using Tegra::Engines::ConstBufferEngineInterface; -using Tegra::Engines::SamplerDescriptor; -using Tegra::Engines::ShaderType; - -namespace { - -GraphicsInfo MakeGraphicsInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) { - if (shader_stage == ShaderType::Compute) { - return {}; - } - - auto& graphics = dynamic_cast(engine); - - return { - .tfb_layouts = graphics.regs.tfb_layouts, - .tfb_varying_locs = graphics.regs.tfb_varying_locs, - .primitive_topology = graphics.regs.draw.topology, - .tessellation_primitive = graphics.regs.tess_mode.prim, - .tessellation_spacing = graphics.regs.tess_mode.spacing, - .tfb_enabled = graphics.regs.tfb_enabled != 0, - .tessellation_clockwise = graphics.regs.tess_mode.cw.Value() != 0, - }; -} - -ComputeInfo MakeComputeInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) { - if (shader_stage != ShaderType::Compute) { - return {}; - } - - auto& compute = dynamic_cast(engine); - const auto& launch = compute.launch_description; - - return { - .workgroup_size = {launch.block_dim_x, launch.block_dim_y, launch.block_dim_z}, - .shared_memory_size_in_words = launch.shared_alloc, - .local_memory_size_in_words = launch.local_pos_alloc, - }; -} - -} // Anonymous namespace - -Registry::Registry(ShaderType shader_stage, const SerializedRegistryInfo& info) - : stage{shader_stage}, stored_guest_driver_profile{info.guest_driver_profile}, - bound_buffer{info.bound_buffer}, graphics_info{info.graphics}, compute_info{info.compute} {} - -Registry::Registry(ShaderType shader_stage, ConstBufferEngineInterface& engine_) - : stage{shader_stage}, engine{&engine_}, bound_buffer{engine_.GetBoundBuffer()}, - graphics_info{MakeGraphicsInfo(shader_stage, engine_)}, compute_info{MakeComputeInfo( - shader_stage, engine_)} {} - -Registry::~Registry() = default; - -std::optional Registry::ObtainKey(u32 buffer, u32 offset) { - const std::pair key = {buffer, offset}; - const auto iter = keys.find(key); - if (iter != keys.end()) { - return iter->second; - } - if (!engine) { - return std::nullopt; - } - const u32 value = engine->AccessConstBuffer32(stage, buffer, offset); - keys.emplace(key, value); - return value; -} - -std::optional Registry::ObtainBoundSampler(u32 offset) { - const u32 key = offset; - const auto iter = bound_samplers.find(key); - if (iter != bound_samplers.end()) { - return iter->second; - } - if (!engine) { - return std::nullopt; - } - const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset); - bound_samplers.emplace(key, value); - return value; -} - -std::optional Registry::ObtainSeparateSampler( - std::pair buffers, std::pair offsets) { - SeparateSamplerKey key; - key.buffers = buffers; - key.offsets = offsets; - const auto iter = separate_samplers.find(key); - if (iter != separate_samplers.end()) { - return iter->second; - } - if (!engine) { - return std::nullopt; - } - - const u32 handle_1 = engine->AccessConstBuffer32(stage, key.buffers.first, key.offsets.first); - const u32 handle_2 = engine->AccessConstBuffer32(stage, key.buffers.second, key.offsets.second); - const SamplerDescriptor value = engine->AccessSampler(handle_1 | handle_2); - separate_samplers.emplace(key, value); - return value; -} - -std::optional Registry::ObtainBindlessSampler(u32 buffer, u32 offset) { - const std::pair key = {buffer, offset}; - const auto iter = bindless_samplers.find(key); - if (iter != bindless_samplers.end()) { - return iter->second; - } - if (!engine) { - return std::nullopt; - } - const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset); - bindless_samplers.emplace(key, value); - return value; -} - -void Registry::InsertKey(u32 buffer, u32 offset, u32 value) { - keys.insert_or_assign({buffer, offset}, value); -} - -void Registry::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) { - bound_samplers.insert_or_assign(offset, sampler); -} - -void Registry::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) { - bindless_samplers.insert_or_assign({buffer, offset}, sampler); -} - -bool Registry::IsConsistent() const { - if (!engine) { - return true; - } - return std::all_of(keys.begin(), keys.end(), - [this](const auto& pair) { - const auto [cbuf, offset] = pair.first; - const auto value = pair.second; - return value == engine->AccessConstBuffer32(stage, cbuf, offset); - }) && - std::all_of(bound_samplers.begin(), bound_samplers.end(), - [this](const auto& sampler) { - const auto [key, value] = sampler; - return value == engine->AccessBoundSampler(stage, key); - }) && - std::all_of(bindless_samplers.begin(), bindless_samplers.end(), - [this](const auto& sampler) { - const auto [cbuf, offset] = sampler.first; - const auto value = sampler.second; - return value == engine->AccessBindlessSampler(stage, cbuf, offset); - }); -} - -bool Registry::HasEqualKeys(const Registry& rhs) const { - return std::tie(keys, bound_samplers, bindless_samplers) == - std::tie(rhs.keys, rhs.bound_samplers, rhs.bindless_samplers); -} - -const GraphicsInfo& Registry::GetGraphicsInfo() const { - ASSERT(stage != Tegra::Engines::ShaderType::Compute); - return graphics_info; -} - -const ComputeInfo& Registry::GetComputeInfo() const { - ASSERT(stage == Tegra::Engines::ShaderType::Compute); - return compute_info; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/registry.h b/src/video_core/shader/registry.h deleted file mode 100644 index 4bebefdde..000000000 --- a/src/video_core/shader/registry.h +++ /dev/null @@ -1,172 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include -#include - -#include "common/common_types.h" -#include "common/hash.h" -#include "video_core/engines/const_buffer_engine_interface.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" -#include "video_core/guest_driver.h" - -namespace VideoCommon::Shader { - -struct SeparateSamplerKey { - std::pair buffers; - std::pair offsets; -}; - -} // namespace VideoCommon::Shader - -namespace std { - -template <> -struct hash { - std::size_t operator()(const VideoCommon::Shader::SeparateSamplerKey& key) const noexcept { - return std::hash{}(key.buffers.first ^ key.buffers.second ^ key.offsets.first ^ - key.offsets.second); - } -}; - -template <> -struct equal_to { - bool operator()(const VideoCommon::Shader::SeparateSamplerKey& lhs, - const VideoCommon::Shader::SeparateSamplerKey& rhs) const noexcept { - return lhs.buffers == rhs.buffers && lhs.offsets == rhs.offsets; - } -}; - -} // namespace std - -namespace VideoCommon::Shader { - -using KeyMap = std::unordered_map, u32, Common::PairHash>; -using BoundSamplerMap = std::unordered_map; -using SeparateSamplerMap = - std::unordered_map; -using BindlessSamplerMap = - std::unordered_map, Tegra::Engines::SamplerDescriptor, Common::PairHash>; - -struct GraphicsInfo { - using Maxwell = Tegra::Engines::Maxwell3D::Regs; - - std::array - tfb_layouts{}; - std::array, Maxwell::NumTransformFeedbackBuffers> tfb_varying_locs{}; - Maxwell::PrimitiveTopology primitive_topology{}; - Maxwell::TessellationPrimitive tessellation_primitive{}; - Maxwell::TessellationSpacing tessellation_spacing{}; - bool tfb_enabled = false; - bool tessellation_clockwise = false; -}; -static_assert(std::is_trivially_copyable_v && - std::is_standard_layout_v); - -struct ComputeInfo { - std::array workgroup_size{}; - u32 shared_memory_size_in_words = 0; - u32 local_memory_size_in_words = 0; -}; -static_assert(std::is_trivially_copyable_v && std::is_standard_layout_v); - -struct SerializedRegistryInfo { - VideoCore::GuestDriverProfile guest_driver_profile; - u32 bound_buffer = 0; - GraphicsInfo graphics; - ComputeInfo compute; -}; - -/** - * The Registry is a class use to interface the 3D and compute engines with the shader compiler. - * With it, the shader can obtain required data from GPU state and store it for disk shader - * compilation. - */ -class Registry { -public: - explicit Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info); - - explicit Registry(Tegra::Engines::ShaderType shader_stage, - Tegra::Engines::ConstBufferEngineInterface& engine_); - - ~Registry(); - - /// Retrieves a key from the registry, if it's registered, it will give the registered value, if - /// not it will obtain it from maxwell3d and register it. - std::optional ObtainKey(u32 buffer, u32 offset); - - std::optional ObtainBoundSampler(u32 offset); - - std::optional ObtainSeparateSampler( - std::pair buffers, std::pair offsets); - - std::optional ObtainBindlessSampler(u32 buffer, u32 offset); - - /// Inserts a key. - void InsertKey(u32 buffer, u32 offset, u32 value); - - /// Inserts a bound sampler key. - void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler); - - /// Inserts a bindless sampler key. - void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler); - - /// Checks keys and samplers against engine's current const buffers. - /// Returns true if they are the same value, false otherwise. - bool IsConsistent() const; - - /// Returns true if the keys are equal to the other ones in the registry. - bool HasEqualKeys(const Registry& rhs) const; - - /// Returns graphics information from this shader - const GraphicsInfo& GetGraphicsInfo() const; - - /// Returns compute information from this shader - const ComputeInfo& GetComputeInfo() const; - - /// Gives an getter to the const buffer keys in the database. - const KeyMap& GetKeys() const { - return keys; - } - - /// Gets samplers database. - const BoundSamplerMap& GetBoundSamplers() const { - return bound_samplers; - } - - /// Gets bindless samplers database. - const BindlessSamplerMap& GetBindlessSamplers() const { - return bindless_samplers; - } - - /// Gets bound buffer used on this shader - u32 GetBoundBuffer() const { - return bound_buffer; - } - - /// Obtains access to the guest driver's profile. - VideoCore::GuestDriverProfile& AccessGuestDriverProfile() { - return engine ? engine->AccessGuestDriverProfile() : stored_guest_driver_profile; - } - -private: - const Tegra::Engines::ShaderType stage; - VideoCore::GuestDriverProfile stored_guest_driver_profile; - Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; - KeyMap keys; - BoundSamplerMap bound_samplers; - SeparateSamplerMap separate_samplers; - BindlessSamplerMap bindless_samplers; - u32 bound_buffer; - GraphicsInfo graphics_info; - ComputeInfo compute_info; -}; - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp deleted file mode 100644 index a4987ffc6..000000000 --- a/src/video_core/shader/shader_ir.cpp +++ /dev/null @@ -1,464 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include - -#include "common/assert.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Attribute; -using Tegra::Shader::Instruction; -using Tegra::Shader::IpaMode; -using Tegra::Shader::Pred; -using Tegra::Shader::PredCondition; -using Tegra::Shader::PredOperation; -using Tegra::Shader::Register; - -ShaderIR::ShaderIR(const ProgramCode& program_code_, u32 main_offset_, CompilerSettings settings_, - Registry& registry_) - : program_code{program_code_}, main_offset{main_offset_}, settings{settings_}, registry{ - registry_} { - Decode(); - PostDecode(); -} - -ShaderIR::~ShaderIR() = default; - -Node ShaderIR::GetRegister(Register reg) { - if (reg != Register::ZeroIndex) { - used_registers.insert(static_cast(reg)); - } - return MakeNode(reg); -} - -Node ShaderIR::GetCustomVariable(u32 id) { - return MakeNode(id); -} - -Node ShaderIR::GetImmediate19(Instruction instr) { - return Immediate(instr.alu.GetImm20_19()); -} - -Node ShaderIR::GetImmediate32(Instruction instr) { - return Immediate(instr.alu.GetImm20_32()); -} - -Node ShaderIR::GetConstBuffer(u64 index_, u64 offset_) { - const auto index = static_cast(index_); - const auto offset = static_cast(offset_); - - used_cbufs.try_emplace(index).first->second.MarkAsUsed(offset); - - return MakeNode(index, Immediate(offset)); -} - -Node ShaderIR::GetConstBufferIndirect(u64 index_, u64 offset_, Node node) { - const auto index = static_cast(index_); - const auto offset = static_cast(offset_); - - used_cbufs.try_emplace(index).first->second.MarkAsUsedIndirect(); - - Node final_offset = [&] { - // Attempt to inline constant buffer without a variable offset. This is done to allow - // tracking LDC calls. - if (const auto gpr = std::get_if(&*node)) { - if (gpr->GetIndex() == Register::ZeroIndex) { - return Immediate(offset); - } - } - return Operation(OperationCode::UAdd, NO_PRECISE, std::move(node), Immediate(offset)); - }(); - return MakeNode(index, std::move(final_offset)); -} - -Node ShaderIR::GetPredicate(u64 pred_, bool negated) { - const auto pred = static_cast(pred_); - if (pred != Pred::UnusedIndex && pred != Pred::NeverExecute) { - used_predicates.insert(pred); - } - - return MakeNode(pred, negated); -} - -Node ShaderIR::GetPredicate(bool immediate) { - return GetPredicate(static_cast(immediate ? Pred::UnusedIndex : Pred::NeverExecute)); -} - -Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, Node buffer) { - MarkAttributeUsage(index, element); - used_input_attributes.emplace(index); - return MakeNode(index, static_cast(element), std::move(buffer)); -} - -Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer) { - uses_physical_attributes = true; - return MakeNode(GetRegister(physical_address), buffer); -} - -Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buffer) { - MarkAttributeUsage(index, element); - used_output_attributes.insert(index); - return MakeNode(index, static_cast(element), std::move(buffer)); -} - -Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) const { - Node node = MakeNode(flag); - if (negated) { - return Operation(OperationCode::LogicalNegate, std::move(node)); - } - return node; -} - -Node ShaderIR::GetLocalMemory(Node address) { - return MakeNode(std::move(address)); -} - -Node ShaderIR::GetSharedMemory(Node address) { - return MakeNode(std::move(address)); -} - -Node ShaderIR::GetTemporary(u32 id) { - return GetRegister(Register::ZeroIndex + 1 + id); -} - -Node ShaderIR::GetOperandAbsNegFloat(Node value, bool absolute, bool negate) { - if (absolute) { - value = Operation(OperationCode::FAbsolute, NO_PRECISE, std::move(value)); - } - if (negate) { - value = Operation(OperationCode::FNegate, NO_PRECISE, std::move(value)); - } - return value; -} - -Node ShaderIR::GetSaturatedFloat(Node value, bool saturate) { - if (!saturate) { - return value; - } - - Node positive_zero = Immediate(std::copysignf(0, 1)); - Node positive_one = Immediate(1.0f); - return Operation(OperationCode::FClamp, NO_PRECISE, std::move(value), std::move(positive_zero), - std::move(positive_one)); -} - -Node ShaderIR::ConvertIntegerSize(Node value, Register::Size size, bool is_signed) { - switch (size) { - case Register::Size::Byte: - value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, - std::move(value), Immediate(24)); - value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, - std::move(value), Immediate(24)); - return value; - case Register::Size::Short: - value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, - std::move(value), Immediate(16)); - value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, - std::move(value), Immediate(16)); - return value; - case Register::Size::Word: - // Default - do nothing - return value; - default: - UNREACHABLE_MSG("Unimplemented conversion size: {}", size); - return value; - } -} - -Node ShaderIR::GetOperandAbsNegInteger(Node value, bool absolute, bool negate, bool is_signed) { - if (!is_signed) { - // Absolute or negate on an unsigned is pointless - return value; - } - if (absolute) { - value = Operation(OperationCode::IAbsolute, NO_PRECISE, std::move(value)); - } - if (negate) { - value = Operation(OperationCode::INegate, NO_PRECISE, std::move(value)); - } - return value; -} - -Node ShaderIR::UnpackHalfImmediate(Instruction instr, bool has_negation) { - Node value = Immediate(instr.half_imm.PackImmediates()); - if (!has_negation) { - return value; - } - - Node first_negate = GetPredicate(instr.half_imm.first_negate != 0); - Node second_negate = GetPredicate(instr.half_imm.second_negate != 0); - - return Operation(OperationCode::HNegate, NO_PRECISE, std::move(value), std::move(first_negate), - std::move(second_negate)); -} - -Node ShaderIR::UnpackHalfFloat(Node value, Tegra::Shader::HalfType type) { - return Operation(OperationCode::HUnpack, type, std::move(value)); -} - -Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) { - switch (merge) { - case Tegra::Shader::HalfMerge::H0_H1: - return src; - case Tegra::Shader::HalfMerge::F32: - return Operation(OperationCode::HMergeF32, std::move(src)); - case Tegra::Shader::HalfMerge::Mrg_H0: - return Operation(OperationCode::HMergeH0, std::move(dest), std::move(src)); - case Tegra::Shader::HalfMerge::Mrg_H1: - return Operation(OperationCode::HMergeH1, std::move(dest), std::move(src)); - } - UNREACHABLE(); - return src; -} - -Node ShaderIR::GetOperandAbsNegHalf(Node value, bool absolute, bool negate) { - if (absolute) { - value = Operation(OperationCode::HAbsolute, NO_PRECISE, std::move(value)); - } - if (negate) { - value = Operation(OperationCode::HNegate, NO_PRECISE, std::move(value), GetPredicate(true), - GetPredicate(true)); - } - return value; -} - -Node ShaderIR::GetSaturatedHalfFloat(Node value, bool saturate) { - if (!saturate) { - return value; - } - - Node positive_zero = Immediate(std::copysignf(0, 1)); - Node positive_one = Immediate(1.0f); - return Operation(OperationCode::HClamp, NO_PRECISE, std::move(value), std::move(positive_zero), - std::move(positive_one)); -} - -Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) { - if (condition == PredCondition::T) { - return GetPredicate(true); - } else if (condition == PredCondition::F) { - return GetPredicate(false); - } - - static constexpr std::array comparison_table{ - OperationCode(0), - OperationCode::LogicalFOrdLessThan, // LT - OperationCode::LogicalFOrdEqual, // EQ - OperationCode::LogicalFOrdLessEqual, // LE - OperationCode::LogicalFOrdGreaterThan, // GT - OperationCode::LogicalFOrdNotEqual, // NE - OperationCode::LogicalFOrdGreaterEqual, // GE - OperationCode::LogicalFOrdered, // NUM - OperationCode::LogicalFUnordered, // NAN - OperationCode::LogicalFUnordLessThan, // LTU - OperationCode::LogicalFUnordEqual, // EQU - OperationCode::LogicalFUnordLessEqual, // LEU - OperationCode::LogicalFUnordGreaterThan, // GTU - OperationCode::LogicalFUnordNotEqual, // NEU - OperationCode::LogicalFUnordGreaterEqual, // GEU - }; - const std::size_t index = static_cast(condition); - ASSERT_MSG(index < std::size(comparison_table), "Invalid condition={}", index); - - return Operation(comparison_table[index], op_a, op_b); -} - -Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_signed, Node op_a, - Node op_b) { - static constexpr std::array comparison_table{ - std::pair{PredCondition::LT, OperationCode::LogicalILessThan}, - std::pair{PredCondition::EQ, OperationCode::LogicalIEqual}, - std::pair{PredCondition::LE, OperationCode::LogicalILessEqual}, - std::pair{PredCondition::GT, OperationCode::LogicalIGreaterThan}, - std::pair{PredCondition::NE, OperationCode::LogicalINotEqual}, - std::pair{PredCondition::GE, OperationCode::LogicalIGreaterEqual}, - }; - - const auto comparison = - std::find_if(comparison_table.cbegin(), comparison_table.cend(), - [condition](const auto entry) { return condition == entry.first; }); - UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(), - "Unknown predicate comparison operation"); - - return SignedOperation(comparison->second, is_signed, NO_PRECISE, std::move(op_a), - std::move(op_b)); -} - -Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a, - Node op_b) { - static constexpr std::array comparison_table{ - std::pair{PredCondition::LT, OperationCode::Logical2HLessThan}, - std::pair{PredCondition::EQ, OperationCode::Logical2HEqual}, - std::pair{PredCondition::LE, OperationCode::Logical2HLessEqual}, - std::pair{PredCondition::GT, OperationCode::Logical2HGreaterThan}, - std::pair{PredCondition::NE, OperationCode::Logical2HNotEqual}, - std::pair{PredCondition::GE, OperationCode::Logical2HGreaterEqual}, - std::pair{PredCondition::LTU, OperationCode::Logical2HLessThanWithNan}, - std::pair{PredCondition::LEU, OperationCode::Logical2HLessEqualWithNan}, - std::pair{PredCondition::GTU, OperationCode::Logical2HGreaterThanWithNan}, - std::pair{PredCondition::NEU, OperationCode::Logical2HNotEqualWithNan}, - std::pair{PredCondition::GEU, OperationCode::Logical2HGreaterEqualWithNan}, - }; - - const auto comparison = - std::find_if(comparison_table.cbegin(), comparison_table.cend(), - [condition](const auto entry) { return condition == entry.first; }); - UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(), - "Unknown predicate comparison operation"); - - return Operation(comparison->second, NO_PRECISE, std::move(op_a), std::move(op_b)); -} - -OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) { - static constexpr std::array operation_table{ - OperationCode::LogicalAnd, - OperationCode::LogicalOr, - OperationCode::LogicalXor, - }; - - const auto index = static_cast(operation); - if (index >= operation_table.size()) { - UNIMPLEMENTED_MSG("Unknown predicate operation."); - return {}; - } - - return operation_table[index]; -} - -Node ShaderIR::GetConditionCode(ConditionCode cc) const { - switch (cc) { - case ConditionCode::NEU: - return GetInternalFlag(InternalFlag::Zero, true); - case ConditionCode::FCSM_TR: - UNIMPLEMENTED_MSG("EXIT.FCSM_TR is not implemented"); - return MakeNode(Pred::NeverExecute, false); - default: - UNIMPLEMENTED_MSG("Unimplemented condition code: {}", cc); - return MakeNode(Pred::NeverExecute, false); - } -} - -void ShaderIR::SetRegister(NodeBlock& bb, Register dest, Node src) { - bb.push_back(Operation(OperationCode::Assign, GetRegister(dest), std::move(src))); -} - -void ShaderIR::SetPredicate(NodeBlock& bb, u64 dest, Node src) { - bb.push_back(Operation(OperationCode::LogicalAssign, GetPredicate(dest), std::move(src))); -} - -void ShaderIR::SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value) { - bb.push_back(Operation(OperationCode::LogicalAssign, GetInternalFlag(flag), std::move(value))); -} - -void ShaderIR::SetLocalMemory(NodeBlock& bb, Node address, Node value) { - bb.push_back( - Operation(OperationCode::Assign, GetLocalMemory(std::move(address)), std::move(value))); -} - -void ShaderIR::SetSharedMemory(NodeBlock& bb, Node address, Node value) { - bb.push_back( - Operation(OperationCode::Assign, GetSharedMemory(std::move(address)), std::move(value))); -} - -void ShaderIR::SetTemporary(NodeBlock& bb, u32 id, Node value) { - SetRegister(bb, Register::ZeroIndex + 1 + id, std::move(value)); -} - -void ShaderIR::SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc) { - if (!sets_cc) { - return; - } - Node zerop = Operation(OperationCode::LogicalFOrdEqual, std::move(value), Immediate(0.0f)); - SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop)); - LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); -} - -void ShaderIR::SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_cc) { - if (!sets_cc) { - return; - } - Node zerop = Operation(OperationCode::LogicalIEqual, std::move(value), Immediate(0)); - SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop)); - LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); -} - -Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) { - return Operation(OperationCode::UBitfieldExtract, NO_PRECISE, std::move(value), - Immediate(offset), Immediate(bits)); -} - -Node ShaderIR::BitfieldInsert(Node base, Node insert, u32 offset, u32 bits) { - return Operation(OperationCode::UBitfieldInsert, NO_PRECISE, base, insert, Immediate(offset), - Immediate(bits)); -} - -void ShaderIR::MarkAttributeUsage(Attribute::Index index, u64 element) { - switch (index) { - case Attribute::Index::LayerViewportPointSize: - switch (element) { - case 0: - UNIMPLEMENTED(); - break; - case 1: - uses_layer = true; - break; - case 2: - uses_viewport_index = true; - break; - case 3: - uses_point_size = true; - break; - } - break; - case Attribute::Index::TessCoordInstanceIDVertexID: - switch (element) { - case 2: - uses_instance_id = true; - break; - case 3: - uses_vertex_id = true; - break; - } - break; - case Attribute::Index::ClipDistances0123: - case Attribute::Index::ClipDistances4567: { - const u64 clip_index = (index == Attribute::Index::ClipDistances4567 ? 4 : 0) + element; - used_clip_distances.at(clip_index) = true; - break; - } - case Attribute::Index::FrontColor: - case Attribute::Index::FrontSecondaryColor: - case Attribute::Index::BackColor: - case Attribute::Index::BackSecondaryColor: - uses_legacy_varyings = true; - break; - default: - if (index >= Attribute::Index::TexCoord_0 && index <= Attribute::Index::TexCoord_7) { - uses_legacy_varyings = true; - } - break; - } -} - -std::size_t ShaderIR::DeclareAmend(Node new_amend) { - const auto id = amend_code.size(); - amend_code.push_back(std::move(new_amend)); - return id; -} - -u32 ShaderIR::NewCustomVariable() { - return num_custom_variables++; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h deleted file mode 100644 index 1cd7c14d7..000000000 --- a/src/video_core/shader/shader_ir.h +++ /dev/null @@ -1,479 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include -#include -#include -#include - -#include "common/common_types.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/engines/shader_header.h" -#include "video_core/shader/ast.h" -#include "video_core/shader/compiler_settings.h" -#include "video_core/shader/memory_util.h" -#include "video_core/shader/node.h" -#include "video_core/shader/registry.h" - -namespace VideoCommon::Shader { - -struct ShaderBlock; - -constexpr u32 MAX_PROGRAM_LENGTH = 0x1000; - -struct ConstBuffer { - constexpr explicit ConstBuffer(u32 max_offset_, bool is_indirect_) - : max_offset{max_offset_}, is_indirect{is_indirect_} {} - - constexpr ConstBuffer() = default; - - void MarkAsUsed(u64 offset) { - max_offset = std::max(max_offset, static_cast(offset)); - } - - void MarkAsUsedIndirect() { - is_indirect = true; - } - - bool IsIndirect() const { - return is_indirect; - } - - u32 GetSize() const { - return max_offset + static_cast(sizeof(float)); - } - - u32 GetMaxOffset() const { - return max_offset; - } - -private: - u32 max_offset = 0; - bool is_indirect = false; -}; - -struct GlobalMemoryUsage { - bool is_read{}; - bool is_written{}; -}; - -class ShaderIR final { -public: - explicit ShaderIR(const ProgramCode& program_code_, u32 main_offset_, - CompilerSettings settings_, Registry& registry_); - ~ShaderIR(); - - const std::map& GetBasicBlocks() const { - return basic_blocks; - } - - const std::set& GetRegisters() const { - return used_registers; - } - - const std::set& GetPredicates() const { - return used_predicates; - } - - const std::set& GetInputAttributes() const { - return used_input_attributes; - } - - const std::set& GetOutputAttributes() const { - return used_output_attributes; - } - - const std::map& GetConstantBuffers() const { - return used_cbufs; - } - - const std::list& GetSamplers() const { - return used_samplers; - } - - const std::list& GetImages() const { - return used_images; - } - - const std::array& GetClipDistances() - const { - return used_clip_distances; - } - - const std::map& GetGlobalMemory() const { - return used_global_memory; - } - - std::size_t GetLength() const { - return static_cast(coverage_end * sizeof(u64)); - } - - bool UsesLayer() const { - return uses_layer; - } - - bool UsesViewportIndex() const { - return uses_viewport_index; - } - - bool UsesPointSize() const { - return uses_point_size; - } - - bool UsesInstanceId() const { - return uses_instance_id; - } - - bool UsesVertexId() const { - return uses_vertex_id; - } - - bool UsesLegacyVaryings() const { - return uses_legacy_varyings; - } - - bool UsesYNegate() const { - return uses_y_negate; - } - - bool UsesWarps() const { - return uses_warps; - } - - bool HasPhysicalAttributes() const { - return uses_physical_attributes; - } - - const Tegra::Shader::Header& GetHeader() const { - return header; - } - - bool IsFlowStackDisabled() const { - return disable_flow_stack; - } - - bool IsDecompiled() const { - return decompiled; - } - - const ASTManager& GetASTManager() const { - return program_manager; - } - - ASTNode GetASTProgram() const { - return program_manager.GetProgram(); - } - - u32 GetASTNumVariables() const { - return program_manager.GetVariables(); - } - - u32 ConvertAddressToNvidiaSpace(u32 address) const { - return (address - main_offset) * static_cast(sizeof(Tegra::Shader::Instruction)); - } - - /// Returns a condition code evaluated from internal flags - Node GetConditionCode(Tegra::Shader::ConditionCode cc) const; - - const Node& GetAmendNode(std::size_t index) const { - return amend_code[index]; - } - - u32 GetNumCustomVariables() const { - return num_custom_variables; - } - -private: - friend class ASTDecoder; - - struct SamplerInfo { - std::optional type; - std::optional is_array; - std::optional is_shadow; - std::optional is_buffer; - - constexpr bool IsComplete() const noexcept { - return type && is_array && is_shadow && is_buffer; - } - }; - - void Decode(); - void PostDecode(); - - NodeBlock DecodeRange(u32 begin, u32 end); - void DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end); - void InsertControlFlow(NodeBlock& bb, const ShaderBlock& block); - - /** - * Decodes a single instruction from Tegra to IR. - * @param bb Basic block where the nodes will be written to. - * @param pc Program counter. Offset to decode. - * @return Next address to decode. - */ - u32 DecodeInstr(NodeBlock& bb, u32 pc); - - u32 DecodeArithmetic(NodeBlock& bb, u32 pc); - u32 DecodeArithmeticImmediate(NodeBlock& bb, u32 pc); - u32 DecodeBfe(NodeBlock& bb, u32 pc); - u32 DecodeBfi(NodeBlock& bb, u32 pc); - u32 DecodeShift(NodeBlock& bb, u32 pc); - u32 DecodeArithmeticInteger(NodeBlock& bb, u32 pc); - u32 DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc); - u32 DecodeArithmeticHalf(NodeBlock& bb, u32 pc); - u32 DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc); - u32 DecodeFfma(NodeBlock& bb, u32 pc); - u32 DecodeHfma2(NodeBlock& bb, u32 pc); - u32 DecodeConversion(NodeBlock& bb, u32 pc); - u32 DecodeWarp(NodeBlock& bb, u32 pc); - u32 DecodeMemory(NodeBlock& bb, u32 pc); - u32 DecodeTexture(NodeBlock& bb, u32 pc); - u32 DecodeImage(NodeBlock& bb, u32 pc); - u32 DecodeFloatSetPredicate(NodeBlock& bb, u32 pc); - u32 DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc); - u32 DecodeHalfSetPredicate(NodeBlock& bb, u32 pc); - u32 DecodePredicateSetRegister(NodeBlock& bb, u32 pc); - u32 DecodePredicateSetPredicate(NodeBlock& bb, u32 pc); - u32 DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc); - u32 DecodeFloatSet(NodeBlock& bb, u32 pc); - u32 DecodeIntegerSet(NodeBlock& bb, u32 pc); - u32 DecodeHalfSet(NodeBlock& bb, u32 pc); - u32 DecodeVideo(NodeBlock& bb, u32 pc); - u32 DecodeXmad(NodeBlock& bb, u32 pc); - u32 DecodeOther(NodeBlock& bb, u32 pc); - - /// Generates a node for a passed register. - Node GetRegister(Tegra::Shader::Register reg); - /// Generates a node for a custom variable - Node GetCustomVariable(u32 id); - /// Generates a node representing a 19-bit immediate value - Node GetImmediate19(Tegra::Shader::Instruction instr); - /// Generates a node representing a 32-bit immediate value - Node GetImmediate32(Tegra::Shader::Instruction instr); - /// Generates a node representing a constant buffer - Node GetConstBuffer(u64 index, u64 offset); - /// Generates a node representing a constant buffer with a variadic offset - Node GetConstBufferIndirect(u64 index, u64 offset, Node node); - /// Generates a node for a passed predicate. It can be optionally negated - Node GetPredicate(u64 pred, bool negated = false); - /// Generates a predicate node for an immediate true or false value - Node GetPredicate(bool immediate); - /// Generates a node representing an input attribute. Keeps track of used attributes. - Node GetInputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer = {}); - /// Generates a node representing a physical input attribute. - Node GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer = {}); - /// Generates a node representing an output attribute. Keeps track of used attributes. - Node GetOutputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer); - /// Generates a node representing an internal flag - Node GetInternalFlag(InternalFlag flag, bool negated = false) const; - /// Generates a node representing a local memory address - Node GetLocalMemory(Node address); - /// Generates a node representing a shared memory address - Node GetSharedMemory(Node address); - /// Generates a temporary, internally it uses a post-RZ register - Node GetTemporary(u32 id); - - /// Sets a register. src value must be a number-evaluated node. - void SetRegister(NodeBlock& bb, Tegra::Shader::Register dest, Node src); - /// Sets a predicate. src value must be a bool-evaluated node - void SetPredicate(NodeBlock& bb, u64 dest, Node src); - /// Sets an internal flag. src value must be a bool-evaluated node - void SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value); - /// Sets a local memory address with a value. - void SetLocalMemory(NodeBlock& bb, Node address, Node value); - /// Sets a shared memory address with a value. - void SetSharedMemory(NodeBlock& bb, Node address, Node value); - /// Sets a temporary. Internally it uses a post-RZ register - void SetTemporary(NodeBlock& bb, u32 id, Node value); - - /// Sets internal flags from a float - void SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc = true); - /// Sets internal flags from an integer - void SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_cc = true); - - /// Conditionally absolute/negated float. Absolute is applied first - Node GetOperandAbsNegFloat(Node value, bool absolute, bool negate); - /// Conditionally saturates a float - Node GetSaturatedFloat(Node value, bool saturate = true); - - /// Converts an integer to different sizes. - Node ConvertIntegerSize(Node value, Tegra::Shader::Register::Size size, bool is_signed); - /// Conditionally absolute/negated integer. Absolute is applied first - Node GetOperandAbsNegInteger(Node value, bool absolute, bool negate, bool is_signed); - - /// Unpacks a half immediate from an instruction - Node UnpackHalfImmediate(Tegra::Shader::Instruction instr, bool has_negation); - /// Unpacks a binary value into a half float pair with a type format - Node UnpackHalfFloat(Node value, Tegra::Shader::HalfType type); - /// Merges a half pair into another value - Node HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge); - /// Conditionally absolute/negated half float pair. Absolute is applied first - Node GetOperandAbsNegHalf(Node value, bool absolute, bool negate); - /// Conditionally saturates a half float pair - Node GetSaturatedHalfFloat(Node value, bool saturate = true); - - /// Get image component value by type and size - std::pair GetComponentValue(Tegra::Texture::ComponentType component_type, - u32 component_size, Node original_value); - - /// Returns a predicate comparing two floats - Node GetPredicateComparisonFloat(Tegra::Shader::PredCondition condition, Node op_a, Node op_b); - /// Returns a predicate comparing two integers - Node GetPredicateComparisonInteger(Tegra::Shader::PredCondition condition, bool is_signed, - Node op_a, Node op_b); - /// Returns a predicate comparing two half floats. meta consumes how both pairs will be compared - Node GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a, Node op_b); - - /// Returns a predicate combiner operation - OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation); - - /// Queries the missing sampler info from the execution context. - SamplerInfo GetSamplerInfo(SamplerInfo info, - std::optional sampler); - - /// Accesses a texture sampler. - std::optional GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo info); - - /// Accesses a texture sampler for a bindless texture. - std::optional GetBindlessSampler(Tegra::Shader::Register reg, SamplerInfo info, - Node& index_var); - - /// Accesses an image. - ImageEntry& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type); - - /// Access a bindless image sampler. - ImageEntry& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type); - - /// Extracts a sequence of bits from a node - Node BitfieldExtract(Node value, u32 offset, u32 bits); - - /// Inserts a sequence of bits from a node - Node BitfieldInsert(Node base, Node insert, u32 offset, u32 bits); - - /// Marks the usage of a input or output attribute. - void MarkAttributeUsage(Tegra::Shader::Attribute::Index index, u64 element); - - /// Decodes VMNMX instruction and inserts its code into the passed basic block. - void DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr); - - void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, - const Node4& components); - - void WriteTexsInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, - const Node4& components, bool ignore_mask = false); - void WriteTexsInstructionHalfFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, - const Node4& components, bool ignore_mask = false); - - Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, - Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, - bool is_array, bool is_aoffi, - std::optional bindless_reg); - - Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, - Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, - bool is_array); - - Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, - bool depth_compare, bool is_array, bool is_aoffi, bool is_ptp, - bool is_bindless); - - Node4 GetTldCode(Tegra::Shader::Instruction instr); - - Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, - bool is_array); - - std::tuple ValidateAndGetCoordinateElement( - Tegra::Shader::TextureType texture_type, bool depth_compare, bool is_array, - bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs); - - std::vector GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, bool is_tld4); - - std::vector GetPtpCoordinates(std::array ptp_regs); - - Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, - Tegra::Shader::TextureProcessMode process_mode, std::vector coords, - Node array, Node depth_compare, u32 bias_offset, std::vector aoffi, - std::optional bindless_reg); - - Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type, - u64 byte_height); - - void WriteLogicOperation(NodeBlock& bb, Tegra::Shader::Register dest, - Tegra::Shader::LogicOperation logic_op, Node op_a, Node op_b, - Tegra::Shader::PredicateResultMode predicate_mode, - Tegra::Shader::Pred predicate, bool sets_cc); - void WriteLop3Instruction(NodeBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b, - Node op_c, Node imm_lut, bool sets_cc); - - std::tuple TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const; - - std::pair TrackBindlessSampler(Node tracked, const NodeBlock& code, - s64 cursor); - - std::pair HandleBindlessIndirectRead(const CbufNode& cbuf, - const OperationNode& operation, - Node gpr, Node base_offset, - Node tracked, const NodeBlock& code, - s64 cursor); - - std::optional TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const; - - std::pair TrackRegister(const GprNode* tracked, const NodeBlock& code, - s64 cursor) const; - - std::tuple TrackGlobalMemory(NodeBlock& bb, - Tegra::Shader::Instruction instr, - bool is_read, bool is_write); - - /// Register new amending code and obtain the reference id. - std::size_t DeclareAmend(Node new_amend); - - u32 NewCustomVariable(); - - const ProgramCode& program_code; - const u32 main_offset; - const CompilerSettings settings; - Registry& registry; - - bool decompiled{}; - bool disable_flow_stack{}; - - u32 coverage_begin{}; - u32 coverage_end{}; - - std::map basic_blocks; - NodeBlock global_code; - ASTManager program_manager{true, true}; - std::vector amend_code; - u32 num_custom_variables{}; - - std::set used_registers; - std::set used_predicates; - std::set used_input_attributes; - std::set used_output_attributes; - std::map used_cbufs; - std::list used_samplers; - std::list used_images; - std::array used_clip_distances{}; - std::map used_global_memory; - bool uses_layer{}; - bool uses_viewport_index{}; - bool uses_point_size{}; - bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes - bool uses_instance_id{}; - bool uses_vertex_id{}; - bool uses_legacy_varyings{}; - bool uses_y_negate{}; - bool uses_warps{}; - bool uses_indexed_samplers{}; - - Tegra::Shader::Header header; -}; - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp deleted file mode 100644 index 6be3ea92b..000000000 --- a/src/video_core/shader/track.cpp +++ /dev/null @@ -1,236 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include - -#include "common/common_types.h" -#include "video_core/shader/node.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -namespace { - -std::pair FindOperation(const NodeBlock& code, s64 cursor, - OperationCode operation_code) { - for (; cursor >= 0; --cursor) { - Node node = code.at(cursor); - - if (const auto operation = std::get_if(&*node)) { - if (operation->GetCode() == operation_code) { - return {std::move(node), cursor}; - } - } - - if (const auto conditional = std::get_if(&*node)) { - const auto& conditional_code = conditional->GetCode(); - auto result = FindOperation( - conditional_code, static_cast(conditional_code.size() - 1), operation_code); - auto& found = result.first; - if (found) { - return {std::move(found), cursor}; - } - } - } - return {}; -} - -std::optional> DecoupleIndirectRead(const OperationNode& operation) { - if (operation.GetCode() != OperationCode::UAdd) { - return std::nullopt; - } - Node gpr; - Node offset; - ASSERT(operation.GetOperandsCount() == 2); - for (std::size_t i = 0; i < operation.GetOperandsCount(); i++) { - Node operand = operation[i]; - if (std::holds_alternative(*operand)) { - offset = operation[i]; - } else if (std::holds_alternative(*operand)) { - gpr = operation[i]; - } - } - if (offset && gpr) { - return std::make_pair(gpr, offset); - } - return std::nullopt; -} - -bool AmendNodeCv(std::size_t amend_index, Node node) { - if (const auto operation = std::get_if(&*node)) { - operation->SetAmendIndex(amend_index); - return true; - } - if (const auto conditional = std::get_if(&*node)) { - conditional->SetAmendIndex(amend_index); - return true; - } - return false; -} - -} // Anonymous namespace - -std::pair ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code, - s64 cursor) { - if (const auto cbuf = std::get_if(&*tracked)) { - const u32 cbuf_index = cbuf->GetIndex(); - - // Constant buffer found, test if it's an immediate - const auto& offset = cbuf->GetOffset(); - if (const auto immediate = std::get_if(&*offset)) { - auto track = MakeTrackSampler(cbuf_index, immediate->GetValue()); - return {tracked, track}; - } - if (const auto operation = std::get_if(&*offset)) { - const u32 bound_buffer = registry.GetBoundBuffer(); - if (bound_buffer != cbuf_index) { - return {}; - } - if (const std::optional pair = DecoupleIndirectRead(*operation)) { - auto [gpr, base_offset] = *pair; - return HandleBindlessIndirectRead(*cbuf, *operation, gpr, base_offset, tracked, - code, cursor); - } - } - return {}; - } - if (const auto gpr = std::get_if(&*tracked)) { - if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) { - return {}; - } - // Reduce the cursor in one to avoid infinite loops when the instruction sets the same - // register that it uses as operand - const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1); - if (!source) { - return {}; - } - return TrackBindlessSampler(source, code, new_cursor); - } - if (const auto operation = std::get_if(&*tracked)) { - const OperationNode& op = *operation; - - const OperationCode opcode = operation->GetCode(); - if (opcode == OperationCode::IBitwiseOr || opcode == OperationCode::UBitwiseOr) { - ASSERT(op.GetOperandsCount() == 2); - auto [node_a, index_a, offset_a] = TrackCbuf(op[0], code, cursor); - auto [node_b, index_b, offset_b] = TrackCbuf(op[1], code, cursor); - if (node_a && node_b) { - auto track = MakeTrackSampler(std::pair{index_a, index_b}, - std::pair{offset_a, offset_b}); - return {tracked, std::move(track)}; - } - } - std::size_t i = op.GetOperandsCount(); - while (i--) { - if (auto found = TrackBindlessSampler(op[i - 1], code, cursor); std::get<0>(found)) { - // Constant buffer found in operand. - return found; - } - } - return {}; - } - if (const auto conditional = std::get_if(&*tracked)) { - const auto& conditional_code = conditional->GetCode(); - return TrackBindlessSampler(tracked, conditional_code, - static_cast(conditional_code.size())); - } - return {}; -} - -std::pair ShaderIR::HandleBindlessIndirectRead( - const CbufNode& cbuf, const OperationNode& operation, Node gpr, Node base_offset, Node tracked, - const NodeBlock& code, s64 cursor) { - const auto offset_imm = std::get(*base_offset); - const auto& gpu_driver = registry.AccessGuestDriverProfile(); - const u32 bindless_cv = NewCustomVariable(); - const u32 texture_handler_size = gpu_driver.GetTextureHandlerSize(); - Node op = Operation(OperationCode::UDiv, gpr, Immediate(texture_handler_size)); - - Node cv_node = GetCustomVariable(bindless_cv); - Node amend_op = Operation(OperationCode::Assign, std::move(cv_node), std::move(op)); - const std::size_t amend_index = DeclareAmend(std::move(amend_op)); - AmendNodeCv(amend_index, code[cursor]); - - // TODO: Implement bindless index custom variable - auto track = - MakeTrackSampler(cbuf.GetIndex(), offset_imm.GetValue(), bindless_cv); - return {tracked, track}; -} - -std::tuple ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, - s64 cursor) const { - if (const auto cbuf = std::get_if(&*tracked)) { - // Constant buffer found, test if it's an immediate - const auto& offset = cbuf->GetOffset(); - if (const auto immediate = std::get_if(&*offset)) { - return {tracked, cbuf->GetIndex(), immediate->GetValue()}; - } - return {}; - } - if (const auto gpr = std::get_if(&*tracked)) { - if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) { - return {}; - } - // Reduce the cursor in one to avoid infinite loops when the instruction sets the same - // register that it uses as operand - const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1); - if (!source) { - return {}; - } - return TrackCbuf(source, code, new_cursor); - } - if (const auto operation = std::get_if(&*tracked)) { - for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) { - if (auto found = TrackCbuf((*operation)[i - 1], code, cursor); std::get<0>(found)) { - // Cbuf found in operand. - return found; - } - } - return {}; - } - if (const auto conditional = std::get_if(&*tracked)) { - const auto& conditional_code = conditional->GetCode(); - return TrackCbuf(tracked, conditional_code, static_cast(conditional_code.size())); - } - return {}; -} - -std::optional ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const { - // Reduce the cursor in one to avoid infinite loops when the instruction sets the same register - // that it uses as operand - const auto result = TrackRegister(&std::get(*tracked), code, cursor - 1); - const auto& found = result.first; - if (!found) { - return std::nullopt; - } - if (const auto immediate = std::get_if(&*found)) { - return immediate->GetValue(); - } - return std::nullopt; -} - -std::pair ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code, - s64 cursor) const { - for (; cursor >= 0; --cursor) { - const auto [found_node, new_cursor] = FindOperation(code, cursor, OperationCode::Assign); - if (!found_node) { - return {}; - } - const auto operation = std::get_if(&*found_node); - ASSERT(operation); - - const auto& target = (*operation)[0]; - if (const auto gpr_target = std::get_if(&*target)) { - if (gpr_target->GetIndex() == tracked->GetIndex()) { - return {(*operation)[1], new_cursor}; - } - } - } - return {}; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/transform_feedback.cpp b/src/video_core/shader/transform_feedback.cpp deleted file mode 100644 index 22a933761..000000000 --- a/src/video_core/shader/transform_feedback.cpp +++ /dev/null @@ -1,115 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/transform_feedback.h" - -namespace VideoCommon::Shader { - -namespace { - -using Maxwell = Tegra::Engines::Maxwell3D::Regs; - -// TODO(Rodrigo): Change this to constexpr std::unordered_set in C++20 - -/// Attribute offsets that describe a vector -constexpr std::array VECTORS = { - 28, // gl_Position - 32, // Generic 0 - 36, // Generic 1 - 40, // Generic 2 - 44, // Generic 3 - 48, // Generic 4 - 52, // Generic 5 - 56, // Generic 6 - 60, // Generic 7 - 64, // Generic 8 - 68, // Generic 9 - 72, // Generic 10 - 76, // Generic 11 - 80, // Generic 12 - 84, // Generic 13 - 88, // Generic 14 - 92, // Generic 15 - 96, // Generic 16 - 100, // Generic 17 - 104, // Generic 18 - 108, // Generic 19 - 112, // Generic 20 - 116, // Generic 21 - 120, // Generic 22 - 124, // Generic 23 - 128, // Generic 24 - 132, // Generic 25 - 136, // Generic 26 - 140, // Generic 27 - 144, // Generic 28 - 148, // Generic 29 - 152, // Generic 30 - 156, // Generic 31 - 160, // gl_FrontColor - 164, // gl_FrontSecondaryColor - 160, // gl_BackColor - 164, // gl_BackSecondaryColor - 192, // gl_TexCoord[0] - 196, // gl_TexCoord[1] - 200, // gl_TexCoord[2] - 204, // gl_TexCoord[3] - 208, // gl_TexCoord[4] - 212, // gl_TexCoord[5] - 216, // gl_TexCoord[6] - 220, // gl_TexCoord[7] -}; -} // namespace - -std::unordered_map BuildTransformFeedback(const GraphicsInfo& info) { - - std::unordered_map tfb; - - for (std::size_t buffer = 0; buffer < Maxwell::NumTransformFeedbackBuffers; ++buffer) { - const auto& locations = info.tfb_varying_locs[buffer]; - const auto& layout = info.tfb_layouts[buffer]; - const std::size_t varying_count = layout.varying_count; - - std::size_t highest = 0; - - for (std::size_t offset = 0; offset < varying_count; ++offset) { - const std::size_t base_offset = offset; - const u8 location = locations[offset]; - - VaryingTFB varying; - varying.buffer = layout.stream; - varying.stride = layout.stride; - varying.offset = offset * sizeof(u32); - varying.components = 1; - - if (std::find(VECTORS.begin(), VECTORS.end(), location / 4 * 4) != VECTORS.end()) { - UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB"); - - const u8 base_index = location / 4; - while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) { - ++offset; - ++varying.components; - } - } - - [[maybe_unused]] const bool inserted = tfb.emplace(location, varying).second; - UNIMPLEMENTED_IF_MSG(!inserted, "Varying already stored"); - - highest = std::max(highest, (base_offset + varying.components) * sizeof(u32)); - } - - UNIMPLEMENTED_IF(highest != layout.stride); - } - return tfb; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/transform_feedback.h b/src/video_core/shader/transform_feedback.h deleted file mode 100644 index 77d05f64c..000000000 --- a/src/video_core/shader/transform_feedback.h +++ /dev/null @@ -1,23 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include - -#include "common/common_types.h" -#include "video_core/shader/registry.h" - -namespace VideoCommon::Shader { - -struct VaryingTFB { - std::size_t buffer; - std::size_t stride; - std::size_t offset; - std::size_t components; -}; - -std::unordered_map BuildTransformFeedback(const GraphicsInfo& info); - -} // namespace VideoCommon::Shader -- cgit v1.2.3 From 85cce78583bc2232428a8fb39e43182877c8d5ad Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 17 Feb 2021 00:59:28 -0300 Subject: shader: Primitive Vulkan integration --- src/video_core/CMakeLists.txt | 6 +- src/video_core/engines/kepler_compute.h | 1 - src/video_core/engines/shader_bytecode.h | 2298 -------------------- src/video_core/engines/shader_header.h | 158 -- .../renderer_vulkan/vk_compute_pipeline.cpp | 140 +- .../renderer_vulkan/vk_compute_pipeline.h | 43 +- .../renderer_vulkan/vk_descriptor_pool.cpp | 6 +- .../renderer_vulkan/vk_descriptor_pool.h | 10 +- src/video_core/renderer_vulkan/vk_pipeline.h | 36 + .../renderer_vulkan/vk_pipeline_cache.cpp | 190 +- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 30 +- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 23 +- src/video_core/renderer_vulkan/vk_rasterizer.h | 3 - .../renderer_vulkan/vk_resource_pool.cpp | 12 +- src/video_core/renderer_vulkan/vk_resource_pool.h | 12 +- 15 files changed, 430 insertions(+), 2538 deletions(-) delete mode 100644 src/video_core/engines/shader_bytecode.h delete mode 100644 src/video_core/engines/shader_header.h create mode 100644 src/video_core/renderer_vulkan/vk_pipeline.h (limited to 'src/video_core') diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index c5ce71706..3323e6916 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -43,9 +43,6 @@ add_library(video_core STATIC engines/maxwell_3d.h engines/maxwell_dma.cpp engines/maxwell_dma.h - engines/shader_bytecode.h - engines/shader_header.h - engines/shader_type.h framebuffer_config.h macro/macro.cpp macro/macro.h @@ -123,6 +120,7 @@ add_library(video_core STATIC renderer_vulkan/vk_master_semaphore.h renderer_vulkan/vk_pipeline_cache.cpp renderer_vulkan/vk_pipeline_cache.h + renderer_vulkan/vk_pipeline.h renderer_vulkan/vk_query_cache.cpp renderer_vulkan/vk_query_cache.h renderer_vulkan/vk_rasterizer.cpp @@ -201,7 +199,7 @@ add_library(video_core STATIC create_target_directory_groups(video_core) target_link_libraries(video_core PUBLIC common core) -target_link_libraries(video_core PRIVATE glad xbyak) +target_link_libraries(video_core PRIVATE glad shader_recompiler xbyak) if (YUZU_USE_BUNDLED_FFMPEG AND NOT WIN32) add_dependencies(video_core ffmpeg-build) diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index 0d7683c2d..f8b8d06ac 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -12,7 +12,6 @@ #include "common/common_types.h" #include "video_core/engines/engine_interface.h" #include "video_core/engines/engine_upload.h" -#include "video_core/engines/shader_type.h" #include "video_core/gpu.h" #include "video_core/textures/texture.h" diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h deleted file mode 100644 index 8b45f1b62..000000000 --- a/src/video_core/engines/shader_bytecode.h +++ /dev/null @@ -1,2298 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include -#include - -#include "common/assert.h" -#include "common/bit_field.h" -#include "common/common_types.h" - -namespace Tegra::Shader { - -struct Register { - /// Number of registers - static constexpr std::size_t NumRegisters = 256; - - /// Register 255 is special cased to always be 0 - static constexpr std::size_t ZeroIndex = 255; - - enum class Size : u64 { - Byte = 0, - Short = 1, - Word = 2, - Long = 3, - }; - - constexpr Register() = default; - - constexpr Register(u64 value_) : value(value_) {} - - [[nodiscard]] constexpr operator u64() const { - return value; - } - - template - [[nodiscard]] constexpr u64 operator-(const T& oth) const { - return value - oth; - } - - template - [[nodiscard]] constexpr u64 operator&(const T& oth) const { - return value & oth; - } - - [[nodiscard]] constexpr u64 operator&(const Register& oth) const { - return value & oth.value; - } - - [[nodiscard]] constexpr u64 operator~() const { - return ~value; - } - - [[nodiscard]] u64 GetSwizzledIndex(u64 elem) const { - elem = (value + elem) & 3; - return (value & ~3) + elem; - } - -private: - u64 value{}; -}; - -enum class AttributeSize : u64 { - Word = 0, - DoubleWord = 1, - TripleWord = 2, - QuadWord = 3, -}; - -union Attribute { - Attribute() = default; - - constexpr explicit Attribute(u64 value_) : value(value_) {} - - enum class Index : u64 { - LayerViewportPointSize = 6, - Position = 7, - Attribute_0 = 8, - Attribute_31 = 39, - FrontColor = 40, - FrontSecondaryColor = 41, - BackColor = 42, - BackSecondaryColor = 43, - ClipDistances0123 = 44, - ClipDistances4567 = 45, - PointCoord = 46, - // This attribute contains a tuple of (~, ~, InstanceId, VertexId) when inside a vertex - // shader, and a tuple of (TessCoord.x, TessCoord.y, TessCoord.z, ~) when inside a Tess Eval - // shader. - TessCoordInstanceIDVertexID = 47, - TexCoord_0 = 48, - TexCoord_7 = 55, - // This attribute contains a tuple of (Unk, Unk, Unk, gl_FrontFacing) when inside a fragment - // shader. It is unknown what the other values contain. - FrontFacing = 63, - }; - - union { - BitField<20, 10, u64> immediate; - BitField<22, 2, u64> element; - BitField<24, 6, Index> index; - BitField<31, 1, u64> patch; - BitField<47, 3, AttributeSize> size; - - [[nodiscard]] bool IsPhysical() const { - return patch == 0 && element == 0 && static_cast(index.Value()) == 0; - } - } fmt20; - - union { - BitField<30, 2, u64> element; - BitField<32, 6, Index> index; - } fmt28; - - BitField<39, 8, u64> reg; - u64 value{}; -}; - -union Sampler { - Sampler() = default; - - constexpr explicit Sampler(u64 value_) : value(value_) {} - - enum class Index : u64 { - Sampler_0 = 8, - }; - - BitField<36, 13, Index> index; - u64 value{}; -}; - -union Image { - Image() = default; - - constexpr explicit Image(u64 value_) : value{value_} {} - - BitField<36, 13, u64> index; - u64 value; -}; - -} // namespace Tegra::Shader - -namespace std { - -// TODO(bunnei): The below is forbidden by the C++ standard, but works fine. See #330. -template <> -struct make_unsigned { - using type = Tegra::Shader::Attribute; -}; - -template <> -struct make_unsigned { - using type = Tegra::Shader::Register; -}; - -} // namespace std - -namespace Tegra::Shader { - -enum class Pred : u64 { - UnusedIndex = 0x7, - NeverExecute = 0xF, -}; - -enum class PredCondition : u64 { - F = 0, // Always false - LT = 1, // Ordered less than - EQ = 2, // Ordered equal - LE = 3, // Ordered less than or equal - GT = 4, // Ordered greater than - NE = 5, // Ordered not equal - GE = 6, // Ordered greater than or equal - NUM = 7, // Ordered - NAN_ = 8, // Unordered - LTU = 9, // Unordered less than - EQU = 10, // Unordered equal - LEU = 11, // Unordered less than or equal - GTU = 12, // Unordered greater than - NEU = 13, // Unordered not equal - GEU = 14, // Unordered greater than or equal - T = 15, // Always true -}; - -enum class PredOperation : u64 { - And = 0, - Or = 1, - Xor = 2, -}; - -enum class LogicOperation : u64 { - And = 0, - Or = 1, - Xor = 2, - PassB = 3, -}; - -enum class SubOp : u64 { - Cos = 0x0, - Sin = 0x1, - Ex2 = 0x2, - Lg2 = 0x3, - Rcp = 0x4, - Rsq = 0x5, - Sqrt = 0x8, -}; - -enum class F2iRoundingOp : u64 { - RoundEven = 0, - Floor = 1, - Ceil = 2, - Trunc = 3, -}; - -enum class F2fRoundingOp : u64 { - None = 0, - Pass = 3, - Round = 8, - Floor = 9, - Ceil = 10, - Trunc = 11, -}; - -enum class AtomicOp : u64 { - Add = 0, - Min = 1, - Max = 2, - Inc = 3, - Dec = 4, - And = 5, - Or = 6, - Xor = 7, - Exch = 8, - SafeAdd = 10, -}; - -enum class GlobalAtomicType : u64 { - U32 = 0, - S32 = 1, - U64 = 2, - F32_FTZ_RN = 3, - F16x2_FTZ_RN = 4, - S64 = 5, -}; - -enum class UniformType : u64 { - UnsignedByte = 0, - SignedByte = 1, - UnsignedShort = 2, - SignedShort = 3, - Single = 4, - Double = 5, - Quad = 6, - UnsignedQuad = 7, -}; - -enum class StoreType : u64 { - Unsigned8 = 0, - Signed8 = 1, - Unsigned16 = 2, - Signed16 = 3, - Bits32 = 4, - Bits64 = 5, - Bits128 = 6, -}; - -enum class AtomicType : u64 { - U32 = 0, - S32 = 1, - U64 = 2, - S64 = 3, -}; - -enum class IMinMaxExchange : u64 { - None = 0, - XLo = 1, - XMed = 2, - XHi = 3, -}; - -enum class VideoType : u64 { - Size16_Low = 0, - Size16_High = 1, - Size32 = 2, - Invalid = 3, -}; - -enum class VmadShr : u64 { - Shr7 = 1, - Shr15 = 2, -}; - -enum class VmnmxType : u64 { - Bits8, - Bits16, - Bits32, -}; - -enum class VmnmxOperation : u64 { - Mrg_16H = 0, - Mrg_16L = 1, - Mrg_8B0 = 2, - Mrg_8B2 = 3, - Acc = 4, - Min = 5, - Max = 6, - Nop = 7, -}; - -enum class XmadMode : u64 { - None = 0, - CLo = 1, - CHi = 2, - CSfu = 3, - CBcc = 4, -}; - -enum class IAdd3Mode : u64 { - None = 0, - RightShift = 1, - LeftShift = 2, -}; - -enum class IAdd3Height : u64 { - None = 0, - LowerHalfWord = 1, - UpperHalfWord = 2, -}; - -enum class FlowCondition : u64 { - Always = 0xF, - Fcsm_Tr = 0x1C, // TODO(bunnei): What is this used for? -}; - -enum class ConditionCode : u64 { - F = 0, - LT = 1, - EQ = 2, - LE = 3, - GT = 4, - NE = 5, - GE = 6, - Num = 7, - Nan = 8, - LTU = 9, - EQU = 10, - LEU = 11, - GTU = 12, - NEU = 13, - GEU = 14, - T = 15, - OFF = 16, - LO = 17, - SFF = 18, - LS = 19, - HI = 20, - SFT = 21, - HS = 22, - OFT = 23, - CSM_TA = 24, - CSM_TR = 25, - CSM_MX = 26, - FCSM_TA = 27, - FCSM_TR = 28, - FCSM_MX = 29, - RLE = 30, - RGT = 31, -}; - -enum class PredicateResultMode : u64 { - None = 0x0, - NotZero = 0x3, -}; - -enum class TextureType : u64 { - Texture1D = 0, - Texture2D = 1, - Texture3D = 2, - TextureCube = 3, -}; - -enum class TextureQueryType : u64 { - Dimension = 1, - TextureType = 2, - SamplePosition = 5, - Filter = 16, - LevelOfDetail = 18, - Wrap = 20, - BorderColor = 22, -}; - -enum class TextureProcessMode : u64 { - None = 0, - LZ = 1, // Load LOD of zero. - LB = 2, // Load Bias. - LL = 3, // Load LOD. - LBA = 6, // Load Bias. The A is unknown, does not appear to differ with LB. - LLA = 7 // Load LOD. The A is unknown, does not appear to differ with LL. -}; - -enum class TextureMiscMode : u64 { - DC, - AOFFI, // Uses Offset - NDV, - NODEP, - MZ, - PTP, -}; - -enum class SurfaceDataMode : u64 { - P = 0, - D_BA = 1, -}; - -enum class OutOfBoundsStore : u64 { - Ignore = 0, - Clamp = 1, - Trap = 2, -}; - -enum class ImageType : u64 { - Texture1D = 0, - TextureBuffer = 1, - Texture1DArray = 2, - Texture2D = 3, - Texture2DArray = 4, - Texture3D = 5, -}; - -enum class IsberdMode : u64 { - None = 0, - Patch = 1, - Prim = 2, - Attr = 3, -}; - -enum class IsberdShift : u64 { None = 0, U16 = 1, B32 = 2 }; - -enum class MembarType : u64 { - CTA = 0, - GL = 1, - SYS = 2, - VC = 3, -}; - -enum class MembarUnknown : u64 { Default = 0, IVALLD = 1, IVALLT = 2, IVALLTD = 3 }; - -enum class HalfType : u64 { - H0_H1 = 0, - F32 = 1, - H0_H0 = 2, - H1_H1 = 3, -}; - -enum class HalfMerge : u64 { - H0_H1 = 0, - F32 = 1, - Mrg_H0 = 2, - Mrg_H1 = 3, -}; - -enum class HalfPrecision : u64 { - None = 0, - FTZ = 1, - FMZ = 2, -}; - -enum class R2pMode : u64 { - Pr = 0, - Cc = 1, -}; - -enum class IpaInterpMode : u64 { - Pass = 0, - Multiply = 1, - Constant = 2, - Sc = 3, -}; - -enum class IpaSampleMode : u64 { - Default = 0, - Centroid = 1, - Offset = 2, -}; - -enum class LmemLoadCacheManagement : u64 { - Default = 0, - LU = 1, - CI = 2, - CV = 3, -}; - -enum class StoreCacheManagement : u64 { - Default = 0, - CG = 1, - CS = 2, - WT = 3, -}; - -struct IpaMode { - IpaInterpMode interpolation_mode; - IpaSampleMode sampling_mode; - - [[nodiscard]] bool operator==(const IpaMode& a) const { - return std::tie(interpolation_mode, sampling_mode) == - std::tie(a.interpolation_mode, a.sampling_mode); - } - [[nodiscard]] bool operator!=(const IpaMode& a) const { - return !operator==(a); - } - [[nodiscard]] bool operator<(const IpaMode& a) const { - return std::tie(interpolation_mode, sampling_mode) < - std::tie(a.interpolation_mode, a.sampling_mode); - } -}; - -enum class SystemVariable : u64 { - LaneId = 0x00, - VirtCfg = 0x02, - VirtId = 0x03, - Pm0 = 0x04, - Pm1 = 0x05, - Pm2 = 0x06, - Pm3 = 0x07, - Pm4 = 0x08, - Pm5 = 0x09, - Pm6 = 0x0a, - Pm7 = 0x0b, - OrderingTicket = 0x0f, - PrimType = 0x10, - InvocationId = 0x11, - Ydirection = 0x12, - ThreadKill = 0x13, - ShaderType = 0x14, - DirectBeWriteAddressLow = 0x15, - DirectBeWriteAddressHigh = 0x16, - DirectBeWriteEnabled = 0x17, - MachineId0 = 0x18, - MachineId1 = 0x19, - MachineId2 = 0x1a, - MachineId3 = 0x1b, - Affinity = 0x1c, - InvocationInfo = 0x1d, - WscaleFactorXY = 0x1e, - WscaleFactorZ = 0x1f, - Tid = 0x20, - TidX = 0x21, - TidY = 0x22, - TidZ = 0x23, - CtaParam = 0x24, - CtaIdX = 0x25, - CtaIdY = 0x26, - CtaIdZ = 0x27, - NtId = 0x28, - CirQueueIncrMinusOne = 0x29, - Nlatc = 0x2a, - SmSpaVersion = 0x2c, - MultiPassShaderInfo = 0x2d, - LwinHi = 0x2e, - SwinHi = 0x2f, - SwinLo = 0x30, - SwinSz = 0x31, - SmemSz = 0x32, - SmemBanks = 0x33, - LwinLo = 0x34, - LwinSz = 0x35, - LmemLosz = 0x36, - LmemHioff = 0x37, - EqMask = 0x38, - LtMask = 0x39, - LeMask = 0x3a, - GtMask = 0x3b, - GeMask = 0x3c, - RegAlloc = 0x3d, - CtxAddr = 0x3e, // .fmask = F_SM50 - BarrierAlloc = 0x3e, // .fmask = F_SM60 - GlobalErrorStatus = 0x40, - WarpErrorStatus = 0x42, - WarpErrorStatusClear = 0x43, - PmHi0 = 0x48, - PmHi1 = 0x49, - PmHi2 = 0x4a, - PmHi3 = 0x4b, - PmHi4 = 0x4c, - PmHi5 = 0x4d, - PmHi6 = 0x4e, - PmHi7 = 0x4f, - ClockLo = 0x50, - ClockHi = 0x51, - GlobalTimerLo = 0x52, - GlobalTimerHi = 0x53, - HwTaskId = 0x60, - CircularQueueEntryIndex = 0x61, - CircularQueueEntryAddressLow = 0x62, - CircularQueueEntryAddressHigh = 0x63, -}; - -enum class PhysicalAttributeDirection : u64 { - Input = 0, - Output = 1, -}; - -enum class VoteOperation : u64 { - All = 0, // allThreadsNV - Any = 1, // anyThreadNV - Eq = 2, // allThreadsEqualNV -}; - -enum class ImageAtomicOperationType : u64 { - U32 = 0, - S32 = 1, - U64 = 2, - F32 = 3, - S64 = 5, - SD32 = 6, - SD64 = 7, -}; - -enum class ImageAtomicOperation : u64 { - Add = 0, - Min = 1, - Max = 2, - Inc = 3, - Dec = 4, - And = 5, - Or = 6, - Xor = 7, - Exch = 8, -}; - -enum class ShuffleOperation : u64 { - Idx = 0, // shuffleNV - Up = 1, // shuffleUpNV - Down = 2, // shuffleDownNV - Bfly = 3, // shuffleXorNV -}; - -enum class ShfType : u64 { - Bits32 = 0, - U64 = 2, - S64 = 3, -}; - -enum class ShfXmode : u64 { - None = 0, - HI = 1, - X = 2, - XHI = 3, -}; - -union Instruction { - constexpr Instruction& operator=(const Instruction& instr) { - value = instr.value; - return *this; - } - - constexpr Instruction(u64 value_) : value{value_} {} - constexpr Instruction(const Instruction& instr) : value(instr.value) {} - - [[nodiscard]] constexpr bool Bit(u64 offset) const { - return ((value >> offset) & 1) != 0; - } - - BitField<0, 8, Register> gpr0; - BitField<8, 8, Register> gpr8; - union { - BitField<16, 4, Pred> full_pred; - BitField<16, 3, u64> pred_index; - } pred; - BitField<19, 1, u64> negate_pred; - BitField<20, 8, Register> gpr20; - BitField<20, 4, SubOp> sub_op; - BitField<28, 8, Register> gpr28; - BitField<39, 8, Register> gpr39; - BitField<48, 16, u64> opcode; - - union { - BitField<8, 5, ConditionCode> cc; - BitField<13, 1, u64> trigger; - } nop; - - union { - BitField<48, 2, VoteOperation> operation; - BitField<45, 3, u64> dest_pred; - BitField<39, 3, u64> value; - BitField<42, 1, u64> negate_value; - } vote; - - union { - BitField<30, 2, ShuffleOperation> operation; - BitField<48, 3, u64> pred48; - BitField<28, 1, u64> is_index_imm; - BitField<29, 1, u64> is_mask_imm; - BitField<20, 5, u64> index_imm; - BitField<34, 13, u64> mask_imm; - } shfl; - - union { - BitField<44, 1, u64> ftz; - BitField<39, 2, u64> tab5cb8_2; - BitField<38, 1, u64> ndv; - BitField<47, 1, u64> cc; - BitField<28, 8, u64> swizzle; - } fswzadd; - - union { - BitField<8, 8, Register> gpr; - BitField<20, 24, s64> offset; - } gmem; - - union { - BitField<20, 16, u64> imm20_16; - BitField<20, 19, u64> imm20_19; - BitField<20, 32, s64> imm20_32; - BitField<45, 1, u64> negate_b; - BitField<46, 1, u64> abs_a; - BitField<48, 1, u64> negate_a; - BitField<49, 1, u64> abs_b; - BitField<50, 1, u64> saturate_d; - BitField<56, 1, u64> negate_imm; - - union { - BitField<39, 3, u64> pred; - BitField<42, 1, u64> negate_pred; - } fmnmx; - - union { - BitField<39, 1, u64> invert_a; - BitField<40, 1, u64> invert_b; - BitField<41, 2, LogicOperation> operation; - BitField<44, 2, PredicateResultMode> pred_result_mode; - BitField<48, 3, Pred> pred48; - } lop; - - union { - BitField<53, 2, LogicOperation> operation; - BitField<55, 1, u64> invert_a; - BitField<56, 1, u64> invert_b; - } lop32i; - - union { - BitField<28, 8, u64> imm_lut28; - BitField<48, 8, u64> imm_lut48; - - [[nodiscard]] u32 GetImmLut28() const { - return static_cast(imm_lut28); - } - - [[nodiscard]] u32 GetImmLut48() const { - return static_cast(imm_lut48); - } - } lop3; - - [[nodiscard]] u16 GetImm20_16() const { - return static_cast(imm20_16); - } - - [[nodiscard]] u32 GetImm20_19() const { - u32 imm{static_cast(imm20_19)}; - imm <<= 12; - imm |= negate_imm ? 0x80000000 : 0; - return imm; - } - - [[nodiscard]] u32 GetImm20_32() const { - return static_cast(imm20_32); - } - - [[nodiscard]] s32 GetSignedImm20_20() const { - const auto immediate = static_cast(imm20_19 | (negate_imm << 19)); - // Sign extend the 20-bit value. - const auto mask = 1U << (20 - 1); - return static_cast((immediate ^ mask) - mask); - } - } alu; - - union { - BitField<38, 1, u64> idx; - BitField<51, 1, u64> saturate; - BitField<52, 2, IpaSampleMode> sample_mode; - BitField<54, 2, IpaInterpMode> interp_mode; - } ipa; - - union { - BitField<39, 2, u64> tab5cb8_2; - BitField<41, 3, u64> postfactor; - BitField<44, 2, u64> tab5c68_0; - BitField<48, 1, u64> negate_b; - } fmul; - - union { - BitField<55, 1, u64> saturate; - } fmul32; - - union { - BitField<52, 1, u64> generates_cc; - } op_32; - - union { - BitField<48, 1, u64> is_signed; - } shift; - - union { - BitField<39, 1, u64> wrap; - } shr; - - union { - BitField<37, 2, ShfType> type; - BitField<48, 2, ShfXmode> xmode; - BitField<50, 1, u64> wrap; - BitField<20, 6, u64> immediate; - } shf; - - union { - BitField<39, 5, u64> shift_amount; - BitField<48, 1, u64> negate_b; - BitField<49, 1, u64> negate_a; - } alu_integer; - - union { - BitField<43, 1, u64> x; - } iadd; - - union { - BitField<39, 1, u64> ftz; - BitField<32, 1, u64> saturate; - BitField<49, 2, HalfMerge> merge; - - BitField<44, 1, u64> abs_a; - BitField<47, 2, HalfType> type_a; - - BitField<30, 1, u64> abs_b; - BitField<28, 2, HalfType> type_b; - - BitField<35, 2, HalfType> type_c; - } alu_half; - - union { - BitField<39, 2, HalfPrecision> precision; - BitField<39, 1, u64> ftz; - BitField<52, 1, u64> saturate; - BitField<49, 2, HalfMerge> merge; - - BitField<43, 1, u64> negate_a; - BitField<44, 1, u64> abs_a; - BitField<47, 2, HalfType> type_a; - } alu_half_imm; - - union { - BitField<29, 1, u64> first_negate; - BitField<20, 9, u64> first; - - BitField<56, 1, u64> second_negate; - BitField<30, 9, u64> second; - - [[nodiscard]] u32 PackImmediates() const { - // Immediates are half floats shifted. - constexpr u32 imm_shift = 6; - return static_cast((first << imm_shift) | (second << (16 + imm_shift))); - } - } half_imm; - - union { - union { - BitField<37, 2, HalfPrecision> precision; - BitField<32, 1, u64> saturate; - - BitField<31, 1, u64> negate_b; - BitField<30, 1, u64> negate_c; - BitField<35, 2, HalfType> type_c; - } rr; - - BitField<57, 2, HalfPrecision> precision; - BitField<52, 1, u64> saturate; - - BitField<49, 2, HalfMerge> merge; - - BitField<47, 2, HalfType> type_a; - - BitField<56, 1, u64> negate_b; - BitField<28, 2, HalfType> type_b; - - BitField<51, 1, u64> negate_c; - BitField<53, 2, HalfType> type_reg39; - } hfma2; - - union { - BitField<40, 1, u64> invert; - } popc; - - union { - BitField<41, 1, u64> sh; - BitField<40, 1, u64> invert; - BitField<48, 1, u64> is_signed; - } flo; - - union { - BitField<39, 3, u64> pred; - BitField<42, 1, u64> neg_pred; - } sel; - - union { - BitField<39, 3, u64> pred; - BitField<42, 1, u64> negate_pred; - BitField<43, 2, IMinMaxExchange> exchange; - BitField<48, 1, u64> is_signed; - } imnmx; - - union { - BitField<31, 2, IAdd3Height> height_c; - BitField<33, 2, IAdd3Height> height_b; - BitField<35, 2, IAdd3Height> height_a; - BitField<37, 2, IAdd3Mode> mode; - BitField<49, 1, u64> neg_c; - BitField<50, 1, u64> neg_b; - BitField<51, 1, u64> neg_a; - } iadd3; - - union { - BitField<54, 1, u64> saturate; - BitField<56, 1, u64> negate_a; - } iadd32i; - - union { - BitField<53, 1, u64> negate_b; - BitField<54, 1, u64> abs_a; - BitField<56, 1, u64> negate_a; - BitField<57, 1, u64> abs_b; - } fadd32i; - - union { - BitField<40, 1, u64> brev; - BitField<47, 1, u64> rd_cc; - BitField<48, 1, u64> is_signed; - } bfe; - - union { - BitField<48, 3, u64> pred48; - - union { - BitField<20, 20, u64> entry_a; - BitField<39, 5, u64> entry_b; - BitField<45, 1, u64> neg; - BitField<46, 1, u64> uses_cc; - } imm; - - union { - BitField<20, 14, u64> cb_index; - BitField<34, 5, u64> cb_offset; - BitField<56, 1, u64> neg; - BitField<57, 1, u64> uses_cc; - } hi; - - union { - BitField<20, 14, u64> cb_index; - BitField<34, 5, u64> cb_offset; - BitField<39, 5, u64> entry_a; - BitField<45, 1, u64> neg; - BitField<46, 1, u64> uses_cc; - } rz; - - union { - BitField<39, 5, u64> entry_a; - BitField<45, 1, u64> neg; - BitField<46, 1, u64> uses_cc; - } r1; - - union { - BitField<28, 8, u64> entry_a; - BitField<37, 1, u64> neg; - BitField<38, 1, u64> uses_cc; - } r2; - - } lea; - - union { - BitField<0, 5, FlowCondition> cond; - } flow; - - union { - BitField<47, 1, u64> cc; - BitField<48, 1, u64> negate_b; - BitField<49, 1, u64> negate_c; - BitField<51, 2, u64> tab5980_1; - BitField<53, 2, u64> tab5980_0; - } ffma; - - union { - BitField<48, 3, UniformType> type; - BitField<44, 2, u64> unknown; - } ld_c; - - union { - BitField<48, 3, StoreType> type; - } ldst_sl; - - union { - BitField<44, 2, u64> unknown; - } ld_l; - - union { - BitField<44, 2, StoreCacheManagement> cache_management; - } st_l; - - union { - BitField<48, 3, UniformType> type; - BitField<46, 2, u64> cache_mode; - } ldg; - - union { - BitField<48, 3, UniformType> type; - BitField<46, 2, u64> cache_mode; - } stg; - - union { - BitField<23, 3, AtomicOp> operation; - BitField<48, 1, u64> extended; - BitField<20, 3, GlobalAtomicType> type; - } red; - - union { - BitField<52, 4, AtomicOp> operation; - BitField<49, 3, GlobalAtomicType> type; - BitField<28, 20, s64> offset; - } atom; - - union { - BitField<52, 4, AtomicOp> operation; - BitField<28, 2, AtomicType> type; - BitField<30, 22, s64> offset; - - [[nodiscard]] s32 GetImmediateOffset() const { - return static_cast(offset << 2); - } - } atoms; - - union { - BitField<32, 1, PhysicalAttributeDirection> direction; - BitField<47, 3, AttributeSize> size; - BitField<20, 11, u64> address; - } al2p; - - union { - BitField<53, 3, UniformType> type; - BitField<52, 1, u64> extended; - } generic; - - union { - BitField<0, 3, u64> pred0; - BitField<3, 3, u64> pred3; - BitField<6, 1, u64> neg_b; - BitField<7, 1, u64> abs_a; - BitField<39, 3, u64> pred39; - BitField<42, 1, u64> neg_pred; - BitField<43, 1, u64> neg_a; - BitField<44, 1, u64> abs_b; - BitField<45, 2, PredOperation> op; - BitField<47, 1, u64> ftz; - BitField<48, 4, PredCondition> cond; - } fsetp; - - union { - BitField<0, 3, u64> pred0; - BitField<3, 3, u64> pred3; - BitField<39, 3, u64> pred39; - BitField<42, 1, u64> neg_pred; - BitField<45, 2, PredOperation> op; - BitField<48, 1, u64> is_signed; - BitField<49, 3, PredCondition> cond; - } isetp; - - union { - BitField<48, 1, u64> is_signed; - BitField<49, 3, PredCondition> cond; - } icmp; - - union { - BitField<0, 3, u64> pred0; - BitField<3, 3, u64> pred3; - BitField<12, 3, u64> pred12; - BitField<15, 1, u64> neg_pred12; - BitField<24, 2, PredOperation> cond; - BitField<29, 3, u64> pred29; - BitField<32, 1, u64> neg_pred29; - BitField<39, 3, u64> pred39; - BitField<42, 1, u64> neg_pred39; - BitField<45, 2, PredOperation> op; - } psetp; - - union { - BitField<43, 4, PredCondition> cond; - BitField<45, 2, PredOperation> op; - BitField<3, 3, u64> pred3; - BitField<0, 3, u64> pred0; - BitField<39, 3, u64> pred39; - } vsetp; - - union { - BitField<12, 3, u64> pred12; - BitField<15, 1, u64> neg_pred12; - BitField<24, 2, PredOperation> cond; - BitField<29, 3, u64> pred29; - BitField<32, 1, u64> neg_pred29; - BitField<39, 3, u64> pred39; - BitField<42, 1, u64> neg_pred39; - BitField<44, 1, u64> bf; - BitField<45, 2, PredOperation> op; - } pset; - - union { - BitField<0, 3, u64> pred0; - BitField<3, 3, u64> pred3; - BitField<8, 5, ConditionCode> cc; // flag in cc - BitField<39, 3, u64> pred39; - BitField<42, 1, u64> neg_pred39; - BitField<45, 4, PredOperation> op; // op with pred39 - } csetp; - - union { - BitField<6, 1, u64> ftz; - BitField<45, 2, PredOperation> op; - BitField<3, 3, u64> pred3; - BitField<0, 3, u64> pred0; - BitField<43, 1, u64> negate_a; - BitField<44, 1, u64> abs_a; - BitField<47, 2, HalfType> type_a; - union { - BitField<35, 4, PredCondition> cond; - BitField<49, 1, u64> h_and; - BitField<31, 1, u64> negate_b; - BitField<30, 1, u64> abs_b; - BitField<28, 2, HalfType> type_b; - } reg; - union { - BitField<56, 1, u64> negate_b; - BitField<54, 1, u64> abs_b; - } cbuf; - union { - BitField<49, 4, PredCondition> cond; - BitField<53, 1, u64> h_and; - } cbuf_and_imm; - BitField<42, 1, u64> neg_pred; - BitField<39, 3, u64> pred39; - } hsetp2; - - union { - BitField<40, 1, R2pMode> mode; - BitField<41, 2, u64> byte; - BitField<20, 7, u64> immediate_mask; - } p2r_r2p; - - union { - BitField<39, 3, u64> pred39; - BitField<42, 1, u64> neg_pred; - BitField<43, 1, u64> neg_a; - BitField<44, 1, u64> abs_b; - BitField<45, 2, PredOperation> op; - BitField<48, 4, PredCondition> cond; - BitField<52, 1, u64> bf; - BitField<53, 1, u64> neg_b; - BitField<54, 1, u64> abs_a; - BitField<55, 1, u64> ftz; - } fset; - - union { - BitField<47, 1, u64> ftz; - BitField<48, 4, PredCondition> cond; - } fcmp; - - union { - BitField<49, 1, u64> bf; - BitField<35, 3, PredCondition> cond; - BitField<50, 1, u64> ftz; - BitField<45, 2, PredOperation> op; - BitField<43, 1, u64> negate_a; - BitField<44, 1, u64> abs_a; - BitField<47, 2, HalfType> type_a; - BitField<31, 1, u64> negate_b; - BitField<30, 1, u64> abs_b; - BitField<28, 2, HalfType> type_b; - BitField<42, 1, u64> neg_pred; - BitField<39, 3, u64> pred39; - } hset2; - - union { - BitField<39, 3, u64> pred39; - BitField<42, 1, u64> neg_pred; - BitField<44, 1, u64> bf; - BitField<45, 2, PredOperation> op; - BitField<48, 1, u64> is_signed; - BitField<49, 3, PredCondition> cond; - } iset; - - union { - BitField<45, 1, u64> negate_a; - BitField<49, 1, u64> abs_a; - BitField<10, 2, Register::Size> src_size; - BitField<13, 1, u64> is_input_signed; - BitField<8, 2, Register::Size> dst_size; - BitField<12, 1, u64> is_output_signed; - - union { - BitField<39, 2, u64> tab5cb8_2; - } i2f; - - union { - BitField<39, 2, F2iRoundingOp> rounding; - } f2i; - - union { - BitField<39, 4, u64> rounding; - // H0, H1 extract for F16 missing - BitField<41, 1, u64> selector; // Guessed as some games set it, TODO: reverse this value - [[nodiscard]] F2fRoundingOp GetRoundingMode() const { - constexpr u64 rounding_mask = 0x0B; - return static_cast(rounding.Value() & rounding_mask); - } - } f2f; - - union { - BitField<41, 2, u64> selector; - } int_src; - - union { - BitField<41, 1, u64> selector; - } float_src; - } conversion; - - union { - BitField<28, 1, u64> array; - BitField<29, 2, TextureType> texture_type; - BitField<31, 4, u64> component_mask; - BitField<49, 1, u64> nodep_flag; - BitField<50, 1, u64> dc_flag; - BitField<54, 1, u64> aoffi_flag; - BitField<55, 3, TextureProcessMode> process_mode; - - [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { - return ((1ULL << component) & component_mask) != 0; - } - - [[nodiscard]] TextureProcessMode GetTextureProcessMode() const { - return process_mode; - } - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::DC: - return dc_flag != 0; - case TextureMiscMode::NODEP: - return nodep_flag != 0; - case TextureMiscMode::AOFFI: - return aoffi_flag != 0; - default: - break; - } - return false; - } - } tex; - - union { - BitField<28, 1, u64> array; - BitField<29, 2, TextureType> texture_type; - BitField<31, 4, u64> component_mask; - BitField<49, 1, u64> nodep_flag; - BitField<50, 1, u64> dc_flag; - BitField<36, 1, u64> aoffi_flag; - BitField<37, 3, TextureProcessMode> process_mode; - - [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { - return ((1ULL << component) & component_mask) != 0; - } - - [[nodiscard]] TextureProcessMode GetTextureProcessMode() const { - return process_mode; - } - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::DC: - return dc_flag != 0; - case TextureMiscMode::NODEP: - return nodep_flag != 0; - case TextureMiscMode::AOFFI: - return aoffi_flag != 0; - default: - break; - } - return false; - } - } tex_b; - - union { - BitField<22, 6, TextureQueryType> query_type; - BitField<31, 4, u64> component_mask; - BitField<49, 1, u64> nodep_flag; - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::NODEP: - return nodep_flag != 0; - default: - break; - } - return false; - } - - [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { - return ((1ULL << component) & component_mask) != 0; - } - } txq; - - union { - BitField<28, 1, u64> array; - BitField<29, 2, TextureType> texture_type; - BitField<31, 4, u64> component_mask; - BitField<35, 1, u64> ndv_flag; - BitField<49, 1, u64> nodep_flag; - - [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { - return ((1ULL << component) & component_mask) != 0; - } - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::NDV: - return (ndv_flag != 0); - case TextureMiscMode::NODEP: - return (nodep_flag != 0); - default: - break; - } - return false; - } - } tmml; - - union { - BitField<28, 1, u64> array; - BitField<29, 2, TextureType> texture_type; - BitField<35, 1, u64> ndv_flag; - BitField<49, 1, u64> nodep_flag; - BitField<50, 1, u64> dc_flag; - BitField<54, 2, u64> offset_mode; - BitField<56, 2, u64> component; - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::NDV: - return ndv_flag != 0; - case TextureMiscMode::NODEP: - return nodep_flag != 0; - case TextureMiscMode::DC: - return dc_flag != 0; - case TextureMiscMode::AOFFI: - return offset_mode == 1; - case TextureMiscMode::PTP: - return offset_mode == 2; - default: - break; - } - return false; - } - } tld4; - - union { - BitField<35, 1, u64> ndv_flag; - BitField<49, 1, u64> nodep_flag; - BitField<50, 1, u64> dc_flag; - BitField<33, 2, u64> offset_mode; - BitField<37, 2, u64> component; - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::NDV: - return ndv_flag != 0; - case TextureMiscMode::NODEP: - return nodep_flag != 0; - case TextureMiscMode::DC: - return dc_flag != 0; - case TextureMiscMode::AOFFI: - return offset_mode == 1; - case TextureMiscMode::PTP: - return offset_mode == 2; - default: - break; - } - return false; - } - } tld4_b; - - union { - BitField<49, 1, u64> nodep_flag; - BitField<50, 1, u64> dc_flag; - BitField<51, 1, u64> aoffi_flag; - BitField<52, 2, u64> component; - BitField<55, 1, u64> fp16_flag; - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::DC: - return dc_flag != 0; - case TextureMiscMode::NODEP: - return nodep_flag != 0; - case TextureMiscMode::AOFFI: - return aoffi_flag != 0; - default: - break; - } - return false; - } - } tld4s; - - union { - BitField<0, 8, Register> gpr0; - BitField<28, 8, Register> gpr28; - BitField<49, 1, u64> nodep_flag; - BitField<50, 3, u64> component_mask_selector; - BitField<53, 4, u64> texture_info; - BitField<59, 1, u64> fp32_flag; - - [[nodiscard]] TextureType GetTextureType() const { - // The TEXS instruction has a weird encoding for the texture type. - if (texture_info == 0) { - return TextureType::Texture1D; - } - if (texture_info >= 1 && texture_info <= 9) { - return TextureType::Texture2D; - } - if (texture_info >= 10 && texture_info <= 11) { - return TextureType::Texture3D; - } - if (texture_info >= 12 && texture_info <= 13) { - return TextureType::TextureCube; - } - - LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}", texture_info.Value()); - UNREACHABLE(); - return TextureType::Texture1D; - } - - [[nodiscard]] TextureProcessMode GetTextureProcessMode() const { - switch (texture_info) { - case 0: - case 2: - case 6: - case 8: - case 9: - case 11: - return TextureProcessMode::LZ; - case 3: - case 5: - case 13: - return TextureProcessMode::LL; - default: - break; - } - return TextureProcessMode::None; - } - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::DC: - return (texture_info >= 4 && texture_info <= 6) || texture_info == 9; - case TextureMiscMode::NODEP: - return nodep_flag != 0; - default: - break; - } - return false; - } - - [[nodiscard]] bool IsArrayTexture() const { - // TEXS only supports Texture2D arrays. - return texture_info >= 7 && texture_info <= 9; - } - - [[nodiscard]] bool HasTwoDestinations() const { - return gpr28.Value() != Register::ZeroIndex; - } - - [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { - static constexpr std::array, 4> mask_lut{{ - {}, - {0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc}, - {0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc}, - {0x7, 0xb, 0xd, 0xe, 0xf}, - }}; - - std::size_t index{gpr0.Value() != Register::ZeroIndex ? 1U : 0U}; - index |= gpr28.Value() != Register::ZeroIndex ? 2 : 0; - - u32 mask = mask_lut[index][component_mask_selector]; - // A mask of 0 means this instruction uses an unimplemented mask. - ASSERT(mask != 0); - return ((1ull << component) & mask) != 0; - } - } texs; - - union { - BitField<28, 1, u64> is_array; - BitField<29, 2, TextureType> texture_type; - BitField<35, 1, u64> aoffi; - BitField<49, 1, u64> nodep_flag; - BitField<50, 1, u64> ms; // Multisample? - BitField<54, 1, u64> cl; - BitField<55, 1, u64> process_mode; - - [[nodiscard]] TextureProcessMode GetTextureProcessMode() const { - return process_mode == 0 ? TextureProcessMode::LZ : TextureProcessMode::LL; - } - } tld; - - union { - BitField<49, 1, u64> nodep_flag; - BitField<53, 4, u64> texture_info; - BitField<59, 1, u64> fp32_flag; - - [[nodiscard]] TextureType GetTextureType() const { - // The TLDS instruction has a weird encoding for the texture type. - if (texture_info <= 1) { - return TextureType::Texture1D; - } - if (texture_info == 2 || texture_info == 8 || texture_info == 12 || - (texture_info >= 4 && texture_info <= 6)) { - return TextureType::Texture2D; - } - if (texture_info == 7) { - return TextureType::Texture3D; - } - - LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}", texture_info.Value()); - UNREACHABLE(); - return TextureType::Texture1D; - } - - [[nodiscard]] TextureProcessMode GetTextureProcessMode() const { - if (texture_info == 1 || texture_info == 5 || texture_info == 12) { - return TextureProcessMode::LL; - } - return TextureProcessMode::LZ; - } - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::AOFFI: - return texture_info == 12 || texture_info == 4; - case TextureMiscMode::MZ: - return texture_info == 5; - case TextureMiscMode::NODEP: - return nodep_flag != 0; - default: - break; - } - return false; - } - - [[nodiscard]] bool IsArrayTexture() const { - // TEXS only supports Texture2D arrays. - return texture_info == 8; - } - } tlds; - - union { - BitField<28, 1, u64> is_array; - BitField<29, 2, TextureType> texture_type; - BitField<35, 1, u64> aoffi_flag; - BitField<49, 1, u64> nodep_flag; - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::AOFFI: - return aoffi_flag != 0; - case TextureMiscMode::NODEP: - return nodep_flag != 0; - default: - break; - } - return false; - } - - } txd; - - union { - BitField<24, 2, StoreCacheManagement> cache_management; - BitField<33, 3, ImageType> image_type; - BitField<49, 2, OutOfBoundsStore> out_of_bounds_store; - BitField<51, 1, u64> is_immediate; - BitField<52, 1, SurfaceDataMode> mode; - - BitField<20, 3, StoreType> store_data_layout; - BitField<20, 4, u64> component_mask_selector; - - [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { - ASSERT(mode == SurfaceDataMode::P); - constexpr u8 R = 0b0001; - constexpr u8 G = 0b0010; - constexpr u8 B = 0b0100; - constexpr u8 A = 0b1000; - constexpr std::array mask = { - 0, (R), (G), (R | G), (B), (R | B), - (G | B), (R | G | B), (A), (R | A), (G | A), (R | G | A), - (B | A), (R | B | A), (G | B | A), (R | G | B | A)}; - return std::bitset<4>{mask.at(component_mask_selector)}.test(component); - } - - [[nodiscard]] StoreType GetStoreDataLayout() const { - ASSERT(mode == SurfaceDataMode::D_BA); - return store_data_layout; - } - } suldst; - - union { - BitField<28, 1, u64> is_ba; - BitField<51, 3, ImageAtomicOperationType> operation_type; - BitField<33, 3, ImageType> image_type; - BitField<29, 4, ImageAtomicOperation> operation; - BitField<49, 2, OutOfBoundsStore> out_of_bounds_store; - } suatom_d; - - union { - BitField<20, 24, u64> target; - BitField<5, 1, u64> constant_buffer; - - [[nodiscard]] s32 GetBranchTarget() const { - // Sign extend the branch target offset - const auto mask = 1U << (24 - 1); - const auto target_value = static_cast(target); - constexpr auto instruction_size = static_cast(sizeof(Instruction)); - - // The branch offset is relative to the next instruction and is stored in bytes, so - // divide it by the size of an instruction and add 1 to it. - return static_cast((target_value ^ mask) - mask) / instruction_size + 1; - } - } bra; - - union { - BitField<20, 24, u64> target; - BitField<5, 1, u64> constant_buffer; - - [[nodiscard]] s32 GetBranchExtend() const { - // Sign extend the branch target offset - const auto mask = 1U << (24 - 1); - const auto target_value = static_cast(target); - constexpr auto instruction_size = static_cast(sizeof(Instruction)); - - // The branch offset is relative to the next instruction and is stored in bytes, so - // divide it by the size of an instruction and add 1 to it. - return static_cast((target_value ^ mask) - mask) / instruction_size + 1; - } - } brx; - - union { - BitField<39, 1, u64> emit; // EmitVertex - BitField<40, 1, u64> cut; // EndPrimitive - } out; - - union { - BitField<31, 1, u64> skew; - BitField<32, 1, u64> o; - BitField<33, 2, IsberdMode> mode; - BitField<47, 2, IsberdShift> shift; - } isberd; - - union { - BitField<8, 2, MembarType> type; - BitField<0, 2, MembarUnknown> unknown; - } membar; - - union { - BitField<48, 1, u64> signed_a; - BitField<38, 1, u64> is_byte_chunk_a; - BitField<36, 2, VideoType> type_a; - BitField<36, 2, u64> byte_height_a; - - BitField<49, 1, u64> signed_b; - BitField<50, 1, u64> use_register_b; - BitField<30, 1, u64> is_byte_chunk_b; - BitField<28, 2, VideoType> type_b; - BitField<28, 2, u64> byte_height_b; - } video; - - union { - BitField<51, 2, VmadShr> shr; - BitField<55, 1, u64> saturate; // Saturates the result (a * b + c) - BitField<47, 1, u64> cc; - } vmad; - - union { - BitField<54, 1, u64> is_dest_signed; - BitField<48, 1, u64> is_src_a_signed; - BitField<49, 1, u64> is_src_b_signed; - BitField<37, 2, u64> src_format_a; - BitField<29, 2, u64> src_format_b; - BitField<56, 1, u64> mx; - BitField<55, 1, u64> sat; - BitField<36, 2, u64> selector_a; - BitField<28, 2, u64> selector_b; - BitField<50, 1, u64> is_op_b_register; - BitField<51, 3, VmnmxOperation> operation; - - [[nodiscard]] VmnmxType SourceFormatA() const { - switch (src_format_a) { - case 0b11: - return VmnmxType::Bits32; - case 0b10: - return VmnmxType::Bits16; - default: - return VmnmxType::Bits8; - } - } - - [[nodiscard]] VmnmxType SourceFormatB() const { - switch (src_format_b) { - case 0b11: - return VmnmxType::Bits32; - case 0b10: - return VmnmxType::Bits16; - default: - return VmnmxType::Bits8; - } - } - } vmnmx; - - union { - BitField<20, 16, u64> imm20_16; - BitField<35, 1, u64> high_b_rr; // used on RR - BitField<36, 1, u64> product_shift_left; - BitField<37, 1, u64> merge_37; - BitField<48, 1, u64> sign_a; - BitField<49, 1, u64> sign_b; - BitField<50, 2, XmadMode> mode_cbf; // used by CR, RC - BitField<50, 3, XmadMode> mode; - BitField<52, 1, u64> high_b; - BitField<53, 1, u64> high_a; - BitField<55, 1, u64> product_shift_left_second; // used on CR - BitField<56, 1, u64> merge_56; - } xmad; - - union { - BitField<20, 14, u64> shifted_offset; - BitField<34, 5, u64> index; - - [[nodiscard]] u64 GetOffset() const { - return shifted_offset * 4; - } - } cbuf34; - - union { - BitField<20, 16, s64> offset; - BitField<36, 5, u64> index; - - [[nodiscard]] s64 GetOffset() const { - return offset; - } - } cbuf36; - - // Unsure about the size of this one. - // It's always used with a gpr0, so any size should be fine. - BitField<20, 8, SystemVariable> sys20; - - BitField<47, 1, u64> generates_cc; - BitField<61, 1, u64> is_b_imm; - BitField<60, 1, u64> is_b_gpr; - BitField<59, 1, u64> is_c_gpr; - BitField<20, 24, s64> smem_imm; - BitField<0, 5, ConditionCode> flow_condition_code; - - Attribute attribute; - Sampler sampler; - Image image; - - u64 value; -}; -static_assert(sizeof(Instruction) == 0x8, "Incorrect structure size"); -static_assert(std::is_standard_layout_v, "Instruction is not standard layout"); - -class OpCode { -public: - enum class Id { - KIL, - SSY, - SYNC, - BRK, - DEPBAR, - VOTE, - VOTE_VTG, - SHFL, - FSWZADD, - BFE_C, - BFE_R, - BFE_IMM, - BFI_RC, - BFI_IMM_R, - BRA, - BRX, - PBK, - LD_A, - LD_L, - LD_S, - LD_C, - LD, // Load from generic memory - LDG, // Load from global memory - ST_A, - ST_L, - ST_S, - ST, // Store in generic memory - STG, // Store in global memory - RED, // Reduction operation - ATOM, // Atomic operation on global memory - ATOMS, // Atomic operation on shared memory - AL2P, // Transforms attribute memory into physical memory - TEX, - TEX_B, // Texture Load Bindless - TXQ, // Texture Query - TXQ_B, // Texture Query Bindless - TEXS, // Texture Fetch with scalar/non-vec4 source/destinations - TLD, // Texture Load - TLDS, // Texture Load with scalar/non-vec4 source/destinations - TLD4, // Texture Gather 4 - TLD4_B, // Texture Gather 4 Bindless - TLD4S, // Texture Load 4 with scalar / non - vec4 source / destinations - TMML_B, // Texture Mip Map Level - TMML, // Texture Mip Map Level - TXD, // Texture Gradient/Load with Derivates - TXD_B, // Texture Gradient/Load with Derivates Bindless - SUST, // Surface Store - SULD, // Surface Load - SUATOM, // Surface Atomic Operation - EXIT, - NOP, - IPA, - OUT_R, // Emit vertex/primitive - ISBERD, - BAR, - MEMBAR, - VMAD, - VSETP, - VMNMX, - FFMA_IMM, // Fused Multiply and Add - FFMA_CR, - FFMA_RC, - FFMA_RR, - FADD_C, - FADD_R, - FADD_IMM, - FADD32I, - FMUL_C, - FMUL_R, - FMUL_IMM, - FMUL32_IMM, - IADD_C, - IADD_R, - IADD_IMM, - IADD3_C, // Add 3 Integers - IADD3_R, - IADD3_IMM, - IADD32I, - ISCADD_C, // Scale and Add - ISCADD_R, - ISCADD_IMM, - FLO_R, - FLO_C, - FLO_IMM, - LEA_R1, - LEA_R2, - LEA_RZ, - LEA_IMM, - LEA_HI, - HADD2_C, - HADD2_R, - HADD2_IMM, - HMUL2_C, - HMUL2_R, - HMUL2_IMM, - HFMA2_CR, - HFMA2_RC, - HFMA2_RR, - HFMA2_IMM_R, - HSETP2_C, - HSETP2_R, - HSETP2_IMM, - HSET2_C, - HSET2_R, - HSET2_IMM, - POPC_C, - POPC_R, - POPC_IMM, - SEL_C, - SEL_R, - SEL_IMM, - ICMP_RC, - ICMP_R, - ICMP_CR, - ICMP_IMM, - FCMP_RR, - FCMP_RC, - FCMP_IMMR, - MUFU, // Multi-Function Operator - RRO_C, // Range Reduction Operator - RRO_R, - RRO_IMM, - F2F_C, - F2F_R, - F2F_IMM, - F2I_C, - F2I_R, - F2I_IMM, - I2F_C, - I2F_R, - I2F_IMM, - I2I_C, - I2I_R, - I2I_IMM, - LOP_C, - LOP_R, - LOP_IMM, - LOP32I, - LOP3_C, - LOP3_R, - LOP3_IMM, - MOV_C, - MOV_R, - MOV_IMM, - S2R, - MOV32_IMM, - SHL_C, - SHL_R, - SHL_IMM, - SHR_C, - SHR_R, - SHR_IMM, - SHF_RIGHT_R, - SHF_RIGHT_IMM, - SHF_LEFT_R, - SHF_LEFT_IMM, - FMNMX_C, - FMNMX_R, - FMNMX_IMM, - IMNMX_C, - IMNMX_R, - IMNMX_IMM, - FSETP_C, // Set Predicate - FSETP_R, - FSETP_IMM, - FSET_C, - FSET_R, - FSET_IMM, - ISETP_C, - ISETP_IMM, - ISETP_R, - ISET_R, - ISET_C, - ISET_IMM, - PSETP, - PSET, - CSETP, - R2P_IMM, - P2R_IMM, - XMAD_IMM, - XMAD_CR, - XMAD_RC, - XMAD_RR, - }; - - enum class Type { - Trivial, - Arithmetic, - ArithmeticImmediate, - ArithmeticInteger, - ArithmeticIntegerImmediate, - ArithmeticHalf, - ArithmeticHalfImmediate, - Bfe, - Bfi, - Shift, - Ffma, - Hfma2, - Flow, - Synch, - Warp, - Memory, - Texture, - Image, - FloatSet, - FloatSetPredicate, - IntegerSet, - IntegerSetPredicate, - HalfSet, - HalfSetPredicate, - PredicateSetPredicate, - PredicateSetRegister, - RegisterSetPredicate, - Conversion, - Video, - Xmad, - Unknown, - }; - - /// Returns whether an opcode has an execution predicate field or not (ie, whether it can be - /// conditionally executed). - [[nodiscard]] static bool IsPredicatedInstruction(Id opcode) { - // TODO(Subv): Add the rest of unpredicated instructions. - return opcode != Id::SSY && opcode != Id::PBK; - } - - class Matcher { - public: - constexpr Matcher(const char* const name_, u16 mask_, u16 expected_, Id id_, Type type_) - : name{name_}, mask{mask_}, expected{expected_}, id{id_}, type{type_} {} - - [[nodiscard]] constexpr const char* GetName() const { - return name; - } - - [[nodiscard]] constexpr u16 GetMask() const { - return mask; - } - - [[nodiscard]] constexpr Id GetId() const { - return id; - } - - [[nodiscard]] constexpr Type GetType() const { - return type; - } - - /** - * Tests to see if the given instruction is the instruction this matcher represents. - * @param instruction The instruction to test - * @returns true if the given instruction matches. - */ - [[nodiscard]] constexpr bool Matches(u16 instruction) const { - return (instruction & mask) == expected; - } - - private: - const char* name; - u16 mask; - u16 expected; - Id id; - Type type; - }; - - using DecodeResult = std::optional>; - [[nodiscard]] static DecodeResult Decode(Instruction instr) { - static const auto table{GetDecodeTable()}; - - const auto matches_instruction = [instr](const auto& matcher) { - return matcher.Matches(static_cast(instr.opcode)); - }; - - auto iter = std::find_if(table.begin(), table.end(), matches_instruction); - return iter != table.end() ? std::optional>(*iter) - : std::nullopt; - } - -private: - struct Detail { - private: - static constexpr std::size_t opcode_bitsize = 16; - - /** - * Generates the mask and the expected value after masking from a given bitstring. - * A '0' in a bitstring indicates that a zero must be present at that bit position. - * A '1' in a bitstring indicates that a one must be present at that bit position. - */ - [[nodiscard]] static constexpr auto GetMaskAndExpect(const char* const bitstring) { - u16 mask = 0, expect = 0; - for (std::size_t i = 0; i < opcode_bitsize; i++) { - const std::size_t bit_position = opcode_bitsize - i - 1; - switch (bitstring[i]) { - case '0': - mask |= static_cast(1U << bit_position); - break; - case '1': - expect |= static_cast(1U << bit_position); - mask |= static_cast(1U << bit_position); - break; - default: - // Ignore - break; - } - } - return std::make_pair(mask, expect); - } - - public: - /// Creates a matcher that can match and parse instructions based on bitstring. - [[nodiscard]] static constexpr auto GetMatcher(const char* const bitstring, Id op, - Type type, const char* const name) { - const auto [mask, expected] = GetMaskAndExpect(bitstring); - return Matcher(name, mask, expected, op, type); - } - }; - - [[nodiscard]] static std::vector GetDecodeTable() { - std::vector table = { -#define INST(bitstring, op, type, name) Detail::GetMatcher(bitstring, op, type, name) - INST("111000110011----", Id::KIL, Type::Flow, "KIL"), - INST("111000101001----", Id::SSY, Type::Flow, "SSY"), - INST("111000101010----", Id::PBK, Type::Flow, "PBK"), - INST("111000100100----", Id::BRA, Type::Flow, "BRA"), - INST("111000100101----", Id::BRX, Type::Flow, "BRX"), - INST("1111000011111---", Id::SYNC, Type::Flow, "SYNC"), - INST("111000110100----", Id::BRK, Type::Flow, "BRK"), - INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"), - INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"), - INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"), - INST("0101000011100---", Id::VOTE_VTG, Type::Warp, "VOTE_VTG"), - INST("1110111100010---", Id::SHFL, Type::Warp, "SHFL"), - INST("0101000011111---", Id::FSWZADD, Type::Warp, "FSWZADD"), - INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), - INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"), - INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"), - INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"), - INST("100-------------", Id::LD, Type::Memory, "LD"), - INST("1110111011010---", Id::LDG, Type::Memory, "LDG"), - INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"), - INST("1110111101011---", Id::ST_S, Type::Memory, "ST_S"), - INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), - INST("101-------------", Id::ST, Type::Memory, "ST"), - INST("1110111011011---", Id::STG, Type::Memory, "STG"), - INST("1110101111111---", Id::RED, Type::Memory, "RED"), - INST("11101101--------", Id::ATOM, Type::Memory, "ATOM"), - INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"), - INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"), - INST("110000----111---", Id::TEX, Type::Texture, "TEX"), - INST("1101111010111---", Id::TEX_B, Type::Texture, "TEX_B"), - INST("1101111101001---", Id::TXQ, Type::Texture, "TXQ"), - INST("1101111101010---", Id::TXQ_B, Type::Texture, "TXQ_B"), - INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"), - INST("11011100--11----", Id::TLD, Type::Texture, "TLD"), - INST("1101-01---------", Id::TLDS, Type::Texture, "TLDS"), - INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"), - INST("1101111011111---", Id::TLD4_B, Type::Texture, "TLD4_B"), - INST("11011111-0------", Id::TLD4S, Type::Texture, "TLD4S"), - INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"), - INST("1101111101011---", Id::TMML, Type::Texture, "TMML"), - INST("11011110011110--", Id::TXD_B, Type::Texture, "TXD_B"), - INST("11011110001110--", Id::TXD, Type::Texture, "TXD"), - INST("11101011001-----", Id::SUST, Type::Image, "SUST"), - INST("11101011000-----", Id::SULD, Type::Image, "SULD"), - INST("1110101000------", Id::SUATOM, Type::Image, "SUATOM_D"), - INST("0101000010110---", Id::NOP, Type::Trivial, "NOP"), - INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), - INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), - INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"), - INST("1111000010101---", Id::BAR, Type::Trivial, "BAR"), - INST("1110111110011---", Id::MEMBAR, Type::Trivial, "MEMBAR"), - INST("01011111--------", Id::VMAD, Type::Video, "VMAD"), - INST("0101000011110---", Id::VSETP, Type::Video, "VSETP"), - INST("0011101---------", Id::VMNMX, Type::Video, "VMNMX"), - INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"), - INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"), - INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"), - INST("010110011-------", Id::FFMA_RR, Type::Ffma, "FFMA_RR"), - INST("0100110001011---", Id::FADD_C, Type::Arithmetic, "FADD_C"), - INST("0101110001011---", Id::FADD_R, Type::Arithmetic, "FADD_R"), - INST("0011100-01011---", Id::FADD_IMM, Type::Arithmetic, "FADD_IMM"), - INST("000010----------", Id::FADD32I, Type::ArithmeticImmediate, "FADD32I"), - INST("0100110001101---", Id::FMUL_C, Type::Arithmetic, "FMUL_C"), - INST("0101110001101---", Id::FMUL_R, Type::Arithmetic, "FMUL_R"), - INST("0011100-01101---", Id::FMUL_IMM, Type::Arithmetic, "FMUL_IMM"), - INST("00011110--------", Id::FMUL32_IMM, Type::ArithmeticImmediate, "FMUL32_IMM"), - INST("0100110000010---", Id::IADD_C, Type::ArithmeticInteger, "IADD_C"), - INST("0101110000010---", Id::IADD_R, Type::ArithmeticInteger, "IADD_R"), - INST("0011100-00010---", Id::IADD_IMM, Type::ArithmeticInteger, "IADD_IMM"), - INST("010011001100----", Id::IADD3_C, Type::ArithmeticInteger, "IADD3_C"), - INST("010111001100----", Id::IADD3_R, Type::ArithmeticInteger, "IADD3_R"), - INST("0011100-1100----", Id::IADD3_IMM, Type::ArithmeticInteger, "IADD3_IMM"), - INST("0001110---------", Id::IADD32I, Type::ArithmeticIntegerImmediate, "IADD32I"), - INST("0100110000011---", Id::ISCADD_C, Type::ArithmeticInteger, "ISCADD_C"), - INST("0101110000011---", Id::ISCADD_R, Type::ArithmeticInteger, "ISCADD_R"), - INST("0011100-00011---", Id::ISCADD_IMM, Type::ArithmeticInteger, "ISCADD_IMM"), - INST("0100110000001---", Id::POPC_C, Type::ArithmeticInteger, "POPC_C"), - INST("0101110000001---", Id::POPC_R, Type::ArithmeticInteger, "POPC_R"), - INST("0011100-00001---", Id::POPC_IMM, Type::ArithmeticInteger, "POPC_IMM"), - INST("0100110010100---", Id::SEL_C, Type::ArithmeticInteger, "SEL_C"), - INST("0101110010100---", Id::SEL_R, Type::ArithmeticInteger, "SEL_R"), - INST("0011100-10100---", Id::SEL_IMM, Type::ArithmeticInteger, "SEL_IMM"), - INST("010100110100----", Id::ICMP_RC, Type::ArithmeticInteger, "ICMP_RC"), - INST("010110110100----", Id::ICMP_R, Type::ArithmeticInteger, "ICMP_R"), - INST("010010110100----", Id::ICMP_CR, Type::ArithmeticInteger, "ICMP_CR"), - INST("0011011-0100----", Id::ICMP_IMM, Type::ArithmeticInteger, "ICMP_IMM"), - INST("0101110000110---", Id::FLO_R, Type::ArithmeticInteger, "FLO_R"), - INST("0100110000110---", Id::FLO_C, Type::ArithmeticInteger, "FLO_C"), - INST("0011100-00110---", Id::FLO_IMM, Type::ArithmeticInteger, "FLO_IMM"), - INST("0101101111011---", Id::LEA_R2, Type::ArithmeticInteger, "LEA_R2"), - INST("0101101111010---", Id::LEA_R1, Type::ArithmeticInteger, "LEA_R1"), - INST("001101101101----", Id::LEA_IMM, Type::ArithmeticInteger, "LEA_IMM"), - INST("010010111101----", Id::LEA_RZ, Type::ArithmeticInteger, "LEA_RZ"), - INST("00011000--------", Id::LEA_HI, Type::ArithmeticInteger, "LEA_HI"), - INST("0111101-1-------", Id::HADD2_C, Type::ArithmeticHalf, "HADD2_C"), - INST("0101110100010---", Id::HADD2_R, Type::ArithmeticHalf, "HADD2_R"), - INST("0111101-0-------", Id::HADD2_IMM, Type::ArithmeticHalfImmediate, "HADD2_IMM"), - INST("0111100-1-------", Id::HMUL2_C, Type::ArithmeticHalf, "HMUL2_C"), - INST("0101110100001---", Id::HMUL2_R, Type::ArithmeticHalf, "HMUL2_R"), - INST("0111100-0-------", Id::HMUL2_IMM, Type::ArithmeticHalfImmediate, "HMUL2_IMM"), - INST("01110---1-------", Id::HFMA2_CR, Type::Hfma2, "HFMA2_CR"), - INST("01100---1-------", Id::HFMA2_RC, Type::Hfma2, "HFMA2_RC"), - INST("0101110100000---", Id::HFMA2_RR, Type::Hfma2, "HFMA2_RR"), - INST("01110---0-------", Id::HFMA2_IMM_R, Type::Hfma2, "HFMA2_R_IMM"), - INST("0111111-1-------", Id::HSETP2_C, Type::HalfSetPredicate, "HSETP2_C"), - INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"), - INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"), - INST("0111110-1-------", Id::HSET2_C, Type::HalfSet, "HSET2_C"), - INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"), - INST("0111110-0-------", Id::HSET2_IMM, Type::HalfSet, "HSET2_IMM"), - INST("010110111010----", Id::FCMP_RR, Type::Arithmetic, "FCMP_RR"), - INST("010010111010----", Id::FCMP_RC, Type::Arithmetic, "FCMP_RC"), - INST("0011011-1010----", Id::FCMP_IMMR, Type::Arithmetic, "FCMP_IMMR"), - INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"), - INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"), - INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"), - INST("0011100-10010---", Id::RRO_IMM, Type::Arithmetic, "RRO_IMM"), - INST("0100110010101---", Id::F2F_C, Type::Conversion, "F2F_C"), - INST("0101110010101---", Id::F2F_R, Type::Conversion, "F2F_R"), - INST("0011100-10101---", Id::F2F_IMM, Type::Conversion, "F2F_IMM"), - INST("0100110010110---", Id::F2I_C, Type::Conversion, "F2I_C"), - INST("0101110010110---", Id::F2I_R, Type::Conversion, "F2I_R"), - INST("0011100-10110---", Id::F2I_IMM, Type::Conversion, "F2I_IMM"), - INST("0100110010011---", Id::MOV_C, Type::Arithmetic, "MOV_C"), - INST("0101110010011---", Id::MOV_R, Type::Arithmetic, "MOV_R"), - INST("0011100-10011---", Id::MOV_IMM, Type::Arithmetic, "MOV_IMM"), - INST("1111000011001---", Id::S2R, Type::Trivial, "S2R"), - INST("000000010000----", Id::MOV32_IMM, Type::ArithmeticImmediate, "MOV32_IMM"), - INST("0100110001100---", Id::FMNMX_C, Type::Arithmetic, "FMNMX_C"), - INST("0101110001100---", Id::FMNMX_R, Type::Arithmetic, "FMNMX_R"), - INST("0011100-01100---", Id::FMNMX_IMM, Type::Arithmetic, "FMNMX_IMM"), - INST("0100110000100---", Id::IMNMX_C, Type::ArithmeticInteger, "IMNMX_C"), - INST("0101110000100---", Id::IMNMX_R, Type::ArithmeticInteger, "IMNMX_R"), - INST("0011100-00100---", Id::IMNMX_IMM, Type::ArithmeticInteger, "IMNMX_IMM"), - INST("0100110000000---", Id::BFE_C, Type::Bfe, "BFE_C"), - INST("0101110000000---", Id::BFE_R, Type::Bfe, "BFE_R"), - INST("0011100-00000---", Id::BFE_IMM, Type::Bfe, "BFE_IMM"), - INST("0101001111110---", Id::BFI_RC, Type::Bfi, "BFI_RC"), - INST("0011011-11110---", Id::BFI_IMM_R, Type::Bfi, "BFI_IMM_R"), - INST("0100110001000---", Id::LOP_C, Type::ArithmeticInteger, "LOP_C"), - INST("0101110001000---", Id::LOP_R, Type::ArithmeticInteger, "LOP_R"), - INST("0011100-01000---", Id::LOP_IMM, Type::ArithmeticInteger, "LOP_IMM"), - INST("000001----------", Id::LOP32I, Type::ArithmeticIntegerImmediate, "LOP32I"), - INST("0000001---------", Id::LOP3_C, Type::ArithmeticInteger, "LOP3_C"), - INST("0101101111100---", Id::LOP3_R, Type::ArithmeticInteger, "LOP3_R"), - INST("0011110---------", Id::LOP3_IMM, Type::ArithmeticInteger, "LOP3_IMM"), - INST("0100110001001---", Id::SHL_C, Type::Shift, "SHL_C"), - INST("0101110001001---", Id::SHL_R, Type::Shift, "SHL_R"), - INST("0011100-01001---", Id::SHL_IMM, Type::Shift, "SHL_IMM"), - INST("0100110000101---", Id::SHR_C, Type::Shift, "SHR_C"), - INST("0101110000101---", Id::SHR_R, Type::Shift, "SHR_R"), - INST("0011100-00101---", Id::SHR_IMM, Type::Shift, "SHR_IMM"), - INST("0101110011111---", Id::SHF_RIGHT_R, Type::Shift, "SHF_RIGHT_R"), - INST("0011100-11111---", Id::SHF_RIGHT_IMM, Type::Shift, "SHF_RIGHT_IMM"), - INST("0101101111111---", Id::SHF_LEFT_R, Type::Shift, "SHF_LEFT_R"), - INST("0011011-11111---", Id::SHF_LEFT_IMM, Type::Shift, "SHF_LEFT_IMM"), - INST("0100110011100---", Id::I2I_C, Type::Conversion, "I2I_C"), - INST("0101110011100---", Id::I2I_R, Type::Conversion, "I2I_R"), - INST("0011100-11100---", Id::I2I_IMM, Type::Conversion, "I2I_IMM"), - INST("0100110010111---", Id::I2F_C, Type::Conversion, "I2F_C"), - INST("0101110010111---", Id::I2F_R, Type::Conversion, "I2F_R"), - INST("0011100-10111---", Id::I2F_IMM, Type::Conversion, "I2F_IMM"), - INST("01011000--------", Id::FSET_R, Type::FloatSet, "FSET_R"), - INST("0100100---------", Id::FSET_C, Type::FloatSet, "FSET_C"), - INST("0011000---------", Id::FSET_IMM, Type::FloatSet, "FSET_IMM"), - INST("010010111011----", Id::FSETP_C, Type::FloatSetPredicate, "FSETP_C"), - INST("010110111011----", Id::FSETP_R, Type::FloatSetPredicate, "FSETP_R"), - INST("0011011-1011----", Id::FSETP_IMM, Type::FloatSetPredicate, "FSETP_IMM"), - INST("010010110110----", Id::ISETP_C, Type::IntegerSetPredicate, "ISETP_C"), - INST("010110110110----", Id::ISETP_R, Type::IntegerSetPredicate, "ISETP_R"), - INST("0011011-0110----", Id::ISETP_IMM, Type::IntegerSetPredicate, "ISETP_IMM"), - INST("010110110101----", Id::ISET_R, Type::IntegerSet, "ISET_R"), - INST("010010110101----", Id::ISET_C, Type::IntegerSet, "ISET_C"), - INST("0011011-0101----", Id::ISET_IMM, Type::IntegerSet, "ISET_IMM"), - INST("0101000010001---", Id::PSET, Type::PredicateSetRegister, "PSET"), - INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"), - INST("010100001010----", Id::CSETP, Type::PredicateSetPredicate, "CSETP"), - INST("0011100-11110---", Id::R2P_IMM, Type::RegisterSetPredicate, "R2P_IMM"), - INST("0011100-11101---", Id::P2R_IMM, Type::RegisterSetPredicate, "P2R_IMM"), - INST("0011011-00------", Id::XMAD_IMM, Type::Xmad, "XMAD_IMM"), - INST("0100111---------", Id::XMAD_CR, Type::Xmad, "XMAD_CR"), - INST("010100010-------", Id::XMAD_RC, Type::Xmad, "XMAD_RC"), - INST("0101101100------", Id::XMAD_RR, Type::Xmad, "XMAD_RR"), - }; -#undef INST - std::stable_sort(table.begin(), table.end(), [](const auto& a, const auto& b) { - // If a matcher has more bits in its mask it is more specific, so it - // should come first. - return std::bitset<16>(a.GetMask()).count() > std::bitset<16>(b.GetMask()).count(); - }); - - return table; - } -}; - -} // namespace Tegra::Shader diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h deleted file mode 100644 index e0d7b89c5..000000000 --- a/src/video_core/engines/shader_header.h +++ /dev/null @@ -1,158 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include - -#include "common/bit_field.h" -#include "common/common_funcs.h" -#include "common/common_types.h" - -namespace Tegra::Shader { - -enum class OutputTopology : u32 { - PointList = 1, - LineStrip = 6, - TriangleStrip = 7, -}; - -enum class PixelImap : u8 { - Unused = 0, - Constant = 1, - Perspective = 2, - ScreenLinear = 3, -}; - -// Documentation in: -// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html -struct Header { - union { - BitField<0, 5, u32> sph_type; - BitField<5, 5, u32> version; - BitField<10, 4, u32> shader_type; - BitField<14, 1, u32> mrt_enable; - BitField<15, 1, u32> kills_pixels; - BitField<16, 1, u32> does_global_store; - BitField<17, 4, u32> sass_version; - BitField<21, 5, u32> reserved; - BitField<26, 1, u32> does_load_or_store; - BitField<27, 1, u32> does_fp64; - BitField<28, 4, u32> stream_out_mask; - } common0; - - union { - BitField<0, 24, u32> shader_local_memory_low_size; - BitField<24, 8, u32> per_patch_attribute_count; - } common1; - - union { - BitField<0, 24, u32> shader_local_memory_high_size; - BitField<24, 8, u32> threads_per_input_primitive; - } common2; - - union { - BitField<0, 24, u32> shader_local_memory_crs_size; - BitField<24, 4, OutputTopology> output_topology; - BitField<28, 4, u32> reserved; - } common3; - - union { - BitField<0, 12, u32> max_output_vertices; - BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders. - BitField<20, 4, u32> reserved; - BitField<24, 8, u32> store_req_end; // NOTE: not used by geometry shaders. - } common4; - - union { - struct { - INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA - INSERT_PADDING_BYTES_NOINIT(1); // ImapSystemValuesB - INSERT_PADDING_BYTES_NOINIT(16); // ImapGenericVector[32] - INSERT_PADDING_BYTES_NOINIT(2); // ImapColor - union { - BitField<0, 8, u16> clip_distances; - BitField<8, 1, u16> point_sprite_s; - BitField<9, 1, u16> point_sprite_t; - BitField<10, 1, u16> fog_coordinate; - BitField<12, 1, u16> tessellation_eval_point_u; - BitField<13, 1, u16> tessellation_eval_point_v; - BitField<14, 1, u16> instance_id; - BitField<15, 1, u16> vertex_id; - }; - INSERT_PADDING_BYTES_NOINIT(5); // ImapFixedFncTexture[10] - INSERT_PADDING_BYTES_NOINIT(1); // ImapReserved - INSERT_PADDING_BYTES_NOINIT(3); // OmapSystemValuesA - INSERT_PADDING_BYTES_NOINIT(1); // OmapSystemValuesB - INSERT_PADDING_BYTES_NOINIT(16); // OmapGenericVector[32] - INSERT_PADDING_BYTES_NOINIT(2); // OmapColor - INSERT_PADDING_BYTES_NOINIT(2); // OmapSystemValuesC - INSERT_PADDING_BYTES_NOINIT(5); // OmapFixedFncTexture[10] - INSERT_PADDING_BYTES_NOINIT(1); // OmapReserved - } vtg; - - struct { - INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA - INSERT_PADDING_BYTES_NOINIT(1); // ImapSystemValuesB - - union { - BitField<0, 2, PixelImap> x; - BitField<2, 2, PixelImap> y; - BitField<4, 2, PixelImap> z; - BitField<6, 2, PixelImap> w; - u8 raw; - } imap_generic_vector[32]; - - INSERT_PADDING_BYTES_NOINIT(2); // ImapColor - INSERT_PADDING_BYTES_NOINIT(2); // ImapSystemValuesC - INSERT_PADDING_BYTES_NOINIT(10); // ImapFixedFncTexture[10] - INSERT_PADDING_BYTES_NOINIT(2); // ImapReserved - - struct { - u32 target; - union { - BitField<0, 1, u32> sample_mask; - BitField<1, 1, u32> depth; - BitField<2, 30, u32> reserved; - }; - } omap; - - bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const { - const u32 bit = render_target * 4 + component; - return omap.target & (1 << bit); - } - - PixelImap GetPixelImap(u32 attribute) const { - const auto get_index = [this, attribute](u32 index) { - return static_cast( - (imap_generic_vector[attribute].raw >> (index * 2)) & 3); - }; - - std::optional result; - for (u32 component = 0; component < 4; ++component) { - const PixelImap index = get_index(component); - if (index == PixelImap::Unused) { - continue; - } - if (result && result != index) { - LOG_CRITICAL(HW_GPU, "Generic attribute conflict in interpolation mode"); - } - result = index; - } - return result.value_or(PixelImap::Unused); - } - } ps; - - std::array raw; - }; - - u64 GetLocalMemorySize() const { - return (common1.shader_local_memory_low_size | - (common2.shader_local_memory_high_size << 24)); - } -}; -static_assert(sizeof(Header) == 0x50, "Incorrect structure size"); - -} // namespace Tegra::Shader diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 7a3660496..588ce6139 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -4,6 +4,9 @@ #include +#include + +#include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" @@ -13,9 +16,142 @@ #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { +namespace { +vk::DescriptorSetLayout CreateDescriptorSetLayout(const Device& device, const Shader::Info& info) { + boost::container::small_vector bindings; + u32 binding{}; + for ([[maybe_unused]] const auto& desc : info.constant_buffer_descriptors) { + bindings.push_back({ + .binding = binding, + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }); + ++binding; + } + for ([[maybe_unused]] const auto& desc : info.storage_buffers_descriptors) { + bindings.push_back({ + .binding = binding, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }); + ++binding; + } + return device.GetLogical().CreateDescriptorSetLayout({ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .bindingCount = static_cast(bindings.size()), + .pBindings = bindings.data(), + }); +} + +vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate( + const Device& device, const Shader::Info& info, VkDescriptorSetLayout descriptor_set_layout, + VkPipelineLayout pipeline_layout) { + boost::container::small_vector entries; + size_t offset{}; + u32 binding{}; + for ([[maybe_unused]] const auto& desc : info.constant_buffer_descriptors) { + entries.push_back({ + .dstBinding = binding, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .offset = offset, + .stride = sizeof(DescriptorUpdateEntry), + }); + ++binding; + offset += sizeof(DescriptorUpdateEntry); + } + for ([[maybe_unused]] const auto& desc : info.storage_buffers_descriptors) { + entries.push_back({ + .dstBinding = binding, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .offset = offset, + .stride = sizeof(DescriptorUpdateEntry), + }); + ++binding; + offset += sizeof(DescriptorUpdateEntry); + } + return device.GetLogical().CreateDescriptorUpdateTemplateKHR({ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .descriptorUpdateEntryCount = static_cast(entries.size()), + .pDescriptorUpdateEntries = entries.data(), + .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET, + .descriptorSetLayout = descriptor_set_layout, + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_COMPUTE, + .pipelineLayout = pipeline_layout, + .set = 0, + }); +} +} // Anonymous namespace + +ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool, + VKUpdateDescriptorQueue& update_descriptor_queue_, + const Shader::Info& info_, vk::ShaderModule spv_module_) + : update_descriptor_queue{&update_descriptor_queue_}, info{info_}, + spv_module(std::move(spv_module_)), + descriptor_set_layout(CreateDescriptorSetLayout(device, info)), + descriptor_allocator(descriptor_pool, *descriptor_set_layout), + pipeline_layout{device.GetLogical().CreatePipelineLayout({ + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .setLayoutCount = 1, + .pSetLayouts = descriptor_set_layout.address(), + .pushConstantRangeCount = 0, + .pPushConstantRanges = nullptr, + })}, + descriptor_update_template{ + CreateDescriptorUpdateTemplate(device, info, *descriptor_set_layout, *pipeline_layout)}, + pipeline{device.GetLogical().CreateComputePipeline({ + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = *spv_module, + .pName = "main", + .pSpecializationInfo = nullptr, + }, + .layout = *pipeline_layout, + .basePipelineHandle = 0, + .basePipelineIndex = 0, + })} {} + +void ComputePipeline::ConfigureBufferCache(BufferCache& buffer_cache) { + u32 enabled_uniforms{}; + for (const auto& desc : info.constant_buffer_descriptors) { + enabled_uniforms |= ((1ULL << desc.count) - 1) << desc.index; + } + buffer_cache.SetEnabledComputeUniformBuffers(enabled_uniforms); -ComputePipeline::ComputePipeline() = default; + buffer_cache.UnbindComputeStorageBuffers(); + size_t index{}; + for (const auto& desc : info.storage_buffers_descriptors) { + ASSERT(desc.count == 1); + buffer_cache.BindComputeStorageBuffer(index, desc.cbuf_index, desc.cbuf_offset, true); + ++index; + } + buffer_cache.UpdateComputeBuffers(); + buffer_cache.BindHostComputeBuffers(); +} -ComputePipeline::~ComputePipeline() = default; +VkDescriptorSet ComputePipeline::UpdateDescriptorSet() { + const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; + update_descriptor_queue->Send(*descriptor_update_template, descriptor_set); + return descriptor_set; +} } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 433d8bb3d..dc045d524 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -5,19 +5,52 @@ #pragma once #include "common/common_types.h" +#include "shader_recompiler/shader_info.h" +#include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" +#include "video_core/renderer_vulkan/vk_pipeline.h" +#include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { class Device; -class VKScheduler; -class VKUpdateDescriptorQueue; -class ComputePipeline { +class ComputePipeline : public Pipeline { public: - explicit ComputePipeline(); - ~ComputePipeline(); + explicit ComputePipeline() = default; + explicit ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool, + VKUpdateDescriptorQueue& update_descriptor_queue, + const Shader::Info& info, vk::ShaderModule spv_module); + + ComputePipeline& operator=(ComputePipeline&&) noexcept = default; + ComputePipeline(ComputePipeline&&) noexcept = default; + + ComputePipeline& operator=(const ComputePipeline&) = delete; + ComputePipeline(const ComputePipeline&) = delete; + + void ConfigureBufferCache(BufferCache& buffer_cache); + + [[nodiscard]] VkDescriptorSet UpdateDescriptorSet(); + + [[nodiscard]] VkPipeline Handle() const noexcept { + return *pipeline; + } + + [[nodiscard]] VkPipelineLayout PipelineLayout() const noexcept { + return *pipeline_layout; + } + +private: + VKUpdateDescriptorQueue* update_descriptor_queue; + Shader::Info info; + + vk::ShaderModule spv_module; + vk::DescriptorSetLayout descriptor_set_layout; + DescriptorAllocator descriptor_allocator; + vk::PipelineLayout pipeline_layout; + vk::DescriptorUpdateTemplateKHR descriptor_update_template; + vk::Pipeline pipeline; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp index ef9fb5910..3bea1ff44 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp @@ -19,9 +19,7 @@ constexpr std::size_t SETS_GROW_RATE = 0x20; DescriptorAllocator::DescriptorAllocator(VKDescriptorPool& descriptor_pool_, VkDescriptorSetLayout layout_) : ResourcePool(descriptor_pool_.master_semaphore, SETS_GROW_RATE), - descriptor_pool{descriptor_pool_}, layout{layout_} {} - -DescriptorAllocator::~DescriptorAllocator() = default; + descriptor_pool{&descriptor_pool_}, layout{layout_} {} VkDescriptorSet DescriptorAllocator::Commit() { const std::size_t index = CommitResource(); @@ -29,7 +27,7 @@ VkDescriptorSet DescriptorAllocator::Commit() { } void DescriptorAllocator::Allocate(std::size_t begin, std::size_t end) { - descriptors_allocations.push_back(descriptor_pool.AllocateDescriptors(layout, end - begin)); + descriptors_allocations.push_back(descriptor_pool->AllocateDescriptors(layout, end - begin)); } VKDescriptorPool::VKDescriptorPool(const Device& device_, VKScheduler& scheduler) diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.h b/src/video_core/renderer_vulkan/vk_descriptor_pool.h index f892be7be..2501f9967 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.h +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.h @@ -17,8 +17,12 @@ class VKScheduler; class DescriptorAllocator final : public ResourcePool { public: + explicit DescriptorAllocator() = default; explicit DescriptorAllocator(VKDescriptorPool& descriptor_pool, VkDescriptorSetLayout layout); - ~DescriptorAllocator() override; + ~DescriptorAllocator() override = default; + + DescriptorAllocator& operator=(DescriptorAllocator&&) noexcept = default; + DescriptorAllocator(DescriptorAllocator&&) noexcept = default; DescriptorAllocator& operator=(const DescriptorAllocator&) = delete; DescriptorAllocator(const DescriptorAllocator&) = delete; @@ -29,8 +33,8 @@ protected: void Allocate(std::size_t begin, std::size_t end) override; private: - VKDescriptorPool& descriptor_pool; - const VkDescriptorSetLayout layout; + VKDescriptorPool* descriptor_pool{}; + VkDescriptorSetLayout layout{}; std::vector descriptors_allocations; }; diff --git a/src/video_core/renderer_vulkan/vk_pipeline.h b/src/video_core/renderer_vulkan/vk_pipeline.h new file mode 100644 index 000000000..b06288403 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_pipeline.h @@ -0,0 +1,36 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "video_core/vulkan_common/vulkan_wrapper.h" + +namespace Vulkan { + +class Pipeline { +public: + /// Add a reference count to the pipeline + void AddRef() noexcept { + ++ref_count; + } + + [[nodiscard]] bool RemoveRef() noexcept { + --ref_count; + return ref_count == 0; + } + + [[nodiscard]] u64 UsageTick() const noexcept { + return usage_tick; + } + +protected: + u64 usage_tick{}; + +private: + size_t ref_count{}; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 7d0ba1180..4bf3e4819 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -12,6 +12,8 @@ #include "common/microprofile.h" #include "core/core.h" #include "core/memory.h" +#include "shader_recompiler/environment.h" +#include "shader_recompiler/recompiler.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/memory_manager.h" @@ -22,43 +24,105 @@ #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_shader_util.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/shader_cache.h" #include "video_core/shader_notify.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" +#pragma optimize("", off) + namespace Vulkan { MICROPROFILE_DECLARE(Vulkan_PipelineCache); using Tegra::Engines::ShaderType; namespace { -size_t StageFromProgram(size_t program) { - return program == 0 ? 0 : program - 1; -} +class Environment final : public Shader::Environment { +public: + explicit Environment(Tegra::Engines::KeplerCompute& kepler_compute_, + Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_) + : kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, program_base{program_base_} {} + + ~Environment() override = default; + + [[nodiscard]] std::optional Analyze(u32 start_address) { + const std::optional size{TryFindSize(start_address)}; + if (!size) { + return std::nullopt; + } + cached_lowest = start_address; + cached_highest = start_address + static_cast(*size); + return Common::CityHash128(reinterpret_cast(code.data()), code.size()); + } -ShaderType StageFromProgram(Maxwell::ShaderProgram program) { - return static_cast(StageFromProgram(static_cast(program))); -} + [[nodiscard]] size_t ShaderSize() const noexcept { + return read_highest - read_lowest + INST_SIZE; + } -ShaderType GetShaderType(Maxwell::ShaderProgram program) { - switch (program) { - case Maxwell::ShaderProgram::VertexB: - return ShaderType::Vertex; - case Maxwell::ShaderProgram::TesselationControl: - return ShaderType::TesselationControl; - case Maxwell::ShaderProgram::TesselationEval: - return ShaderType::TesselationEval; - case Maxwell::ShaderProgram::Geometry: - return ShaderType::Geometry; - case Maxwell::ShaderProgram::Fragment: - return ShaderType::Fragment; - default: - UNIMPLEMENTED_MSG("program={}", program); - return ShaderType::Vertex; + [[nodiscard]] u128 ComputeHash() const { + const size_t size{ShaderSize()}; + auto data = std::make_unique(size); + gpu_memory.ReadBlock(program_base + read_lowest, data.get(), size); + return Common::CityHash128(reinterpret_cast(data.get()), size); } -} + + u64 ReadInstruction(u32 address) override { + read_lowest = std::min(read_lowest, address); + read_highest = std::max(read_highest, address); + + if (address >= cached_lowest && address < cached_highest) { + return code[address / INST_SIZE]; + } + return gpu_memory.Read(program_base + address); + } + + std::array WorkgroupSize() override { + const auto& qmd{kepler_compute.launch_description}; + return {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; + } + +private: + static constexpr size_t INST_SIZE = sizeof(u64); + static constexpr size_t BLOCK_SIZE = 0x1000; + static constexpr size_t MAXIMUM_SIZE = 0x100000; + + static constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL; + static constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL; + + std::optional TryFindSize(u32 start_address) { + GPUVAddr guest_addr = program_base + start_address; + size_t offset = 0; + size_t size = BLOCK_SIZE; + while (size <= MAXIMUM_SIZE) { + code.resize(size / INST_SIZE); + u64* const data = code.data() + offset / INST_SIZE; + gpu_memory.ReadBlock(guest_addr, data, BLOCK_SIZE); + for (size_t i = 0; i < BLOCK_SIZE; i += INST_SIZE) { + const u64 inst = data[i / INST_SIZE]; + if (inst == SELF_BRANCH_A || inst == SELF_BRANCH_B) { + return offset + i; + } + } + guest_addr += BLOCK_SIZE; + size += BLOCK_SIZE; + offset += BLOCK_SIZE; + } + return std::nullopt; + } + + Tegra::Engines::KeplerCompute& kepler_compute; + Tegra::MemoryManager& gpu_memory; + GPUVAddr program_base; + + u32 read_lowest = 0; + u32 read_highest = 0; + + std::vector code; + u32 cached_lowest = std::numeric_limits::max(); + u32 cached_highest = 0; +}; } // Anonymous namespace size_t ComputePipelineCacheKey::Hash() const noexcept { @@ -70,35 +134,91 @@ bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) con return std::memcmp(&rhs, this, sizeof *this) == 0; } -Shader::Shader() = default; - -Shader::~Shader() = default; - PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::Engines::KeplerCompute& kepler_compute_, Tegra::MemoryManager& gpu_memory_, const Device& device_, VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_) - : VideoCommon::ShaderCache{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, + : VideoCommon::ShaderCache{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{ update_descriptor_queue_} {} PipelineCache::~PipelineCache() = default; -ComputePipeline& PipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) { +ComputePipeline* PipelineCache::CurrentComputePipeline() { MICROPROFILE_SCOPE(Vulkan_PipelineCache); - const auto [pair, is_cache_miss] = compute_cache.try_emplace(key); - auto& entry = pair->second; - if (!is_cache_miss) { - return *entry; + const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; + const auto& qmd{kepler_compute.launch_description}; + const GPUVAddr shader_addr{program_base + qmd.program_start}; + const std::optional cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)}; + if (!cpu_shader_addr) { + return nullptr; + } + ShaderInfo* const shader{TryGet(*cpu_shader_addr)}; + if (!shader) { + return CreateComputePipelineWithoutShader(*cpu_shader_addr); + } + const ComputePipelineCacheKey key{MakeComputePipelineKey(shader->unique_hash)}; + const auto [pair, is_new]{compute_cache.try_emplace(key)}; + auto& pipeline{pair->second}; + if (!is_new) { + return &pipeline; + } + pipeline = CreateComputePipeline(shader); + shader->compute_users.push_back(key); + return &pipeline; +} + +ComputePipeline PipelineCache::CreateComputePipeline(ShaderInfo* shader_info) { + const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; + const auto& qmd{kepler_compute.launch_description}; + Environment env{kepler_compute, gpu_memory, program_base}; + if (const std::optional cached_hash{env.Analyze(qmd.program_start)}) { + // TODO: Load from cache } - LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); - throw "Bad"; + const auto [info, code]{Shader::RecompileSPIRV(env, qmd.program_start)}; + shader_info->unique_hash = env.ComputeHash(); + shader_info->size_bytes = env.ShaderSize(); + return ComputePipeline{device, descriptor_pool, update_descriptor_queue, info, + BuildShader(device, code)}; } -void PipelineCache::OnShaderRemoval(Shader*) {} +ComputePipeline* PipelineCache::CreateComputePipelineWithoutShader(VAddr shader_cpu_addr) { + ShaderInfo shader; + ComputePipeline pipeline{CreateComputePipeline(&shader)}; + const ComputePipelineCacheKey key{MakeComputePipelineKey(shader.unique_hash)}; + shader.compute_users.push_back(key); + pipeline.AddRef(); + + const size_t size_bytes{shader.size_bytes}; + Register(std::make_unique(std::move(shader)), shader_cpu_addr, size_bytes); + return &compute_cache.emplace(key, std::move(pipeline)).first->second; +} + +ComputePipelineCacheKey PipelineCache::MakeComputePipelineKey(u128 unique_hash) const { + const auto& qmd{kepler_compute.launch_description}; + return { + .unique_hash = unique_hash, + .shared_memory_size = qmd.shared_alloc, + .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}, + }; +} + +void PipelineCache::OnShaderRemoval(ShaderInfo* shader) { + for (const ComputePipelineCacheKey& key : shader->compute_users) { + const auto it = compute_cache.find(key); + ASSERT(it != compute_cache.end()); + + Pipeline& pipeline = it->second; + if (pipeline.RemoveRef()) { + // Wait for the pipeline to be free of GPU usage before destroying it + scheduler.Wait(pipeline.UsageTick()); + compute_cache.erase(it); + } + } +} } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index e3e63340d..eb35abc27 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -36,7 +36,7 @@ class VKUpdateDescriptorQueue; using Maxwell = Tegra::Engines::Maxwell3D::Regs; struct ComputePipelineCacheKey { - GPUVAddr shader; + u128 unique_hash; u32 shared_memory_size; std::array workgroup_size; @@ -67,13 +67,13 @@ struct hash { namespace Vulkan { -class Shader { -public: - explicit Shader(); - ~Shader(); +struct ShaderInfo { + u128 unique_hash{}; + size_t size_bytes{}; + std::vector compute_users; }; -class PipelineCache final : public VideoCommon::ShaderCache { +class PipelineCache final : public VideoCommon::ShaderCache { public: explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu, Tegra::Engines::Maxwell3D& maxwell3d, @@ -83,12 +83,18 @@ public: VKUpdateDescriptorQueue& update_descriptor_queue); ~PipelineCache() override; - ComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); + [[nodiscard]] ComputePipeline* CurrentComputePipeline(); protected: - void OnShaderRemoval(Shader* shader) final; + void OnShaderRemoval(ShaderInfo* shader) override; private: + ComputePipeline CreateComputePipeline(ShaderInfo* shader); + + ComputePipeline* CreateComputePipelineWithoutShader(VAddr shader_cpu_addr); + + ComputePipelineCacheKey MakeComputePipelineKey(u128 unique_hash) const; + Tegra::GPU& gpu; Tegra::Engines::Maxwell3D& maxwell3d; Tegra::Engines::KeplerCompute& kepler_compute; @@ -99,13 +105,7 @@ private: VKDescriptorPool& descriptor_pool; VKUpdateDescriptorQueue& update_descriptor_queue; - std::unique_ptr null_shader; - std::unique_ptr null_kernel; - - std::array last_shaders{}; - - std::mutex pipeline_cache; - std::unordered_map> compute_cache; + std::unordered_map compute_cache; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index f152297d9..b757454c4 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -36,6 +36,8 @@ #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" +#pragma optimize("", off) + namespace Vulkan { using Maxwell = Tegra::Engines::Maxwell3D::Regs; @@ -237,7 +239,26 @@ void RasterizerVulkan::Clear() { } void RasterizerVulkan::DispatchCompute() { - UNREACHABLE_MSG("Not implemented"); + ComputePipeline* const pipeline{pipeline_cache.CurrentComputePipeline()}; + if (!pipeline) { + return; + } + std::scoped_lock lock{buffer_cache.mutex}; + update_descriptor_queue.Acquire(); + pipeline->ConfigureBufferCache(buffer_cache); + const VkDescriptorSet descriptor_set{pipeline->UpdateDescriptorSet()}; + + const auto& qmd{kepler_compute.launch_description}; + const std::array dim{qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z}; + const VkPipeline pipeline_handle{pipeline->Handle()}; + const VkPipelineLayout pipeline_layout{pipeline->PipelineLayout()}; + scheduler.Record( + [pipeline_handle, pipeline_layout, dim, descriptor_set](vk::CommandBuffer cmdbuf) { + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_handle); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, 0, + descriptor_set, nullptr); + cmdbuf.Dispatch(dim[0], dim[1], dim[2]); + }); } void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 31017dc2b..3fd03b915 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -21,7 +21,6 @@ #include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_fence_manager.h" -#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_query_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -150,8 +149,6 @@ private: BlitImageHelper blit_image; ASTCDecoderPass astc_decoder_pass; - GraphicsPipelineCacheKey graphics_key; - TextureCacheRuntime texture_cache_runtime; TextureCache texture_cache; BufferCacheRuntime buffer_cache_runtime; diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.cpp b/src/video_core/renderer_vulkan/vk_resource_pool.cpp index a8bf7bda8..2dd514968 100644 --- a/src/video_core/renderer_vulkan/vk_resource_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_resource_pool.cpp @@ -10,18 +10,16 @@ namespace Vulkan { ResourcePool::ResourcePool(MasterSemaphore& master_semaphore_, size_t grow_step_) - : master_semaphore{master_semaphore_}, grow_step{grow_step_} {} - -ResourcePool::~ResourcePool() = default; + : master_semaphore{&master_semaphore_}, grow_step{grow_step_} {} size_t ResourcePool::CommitResource() { // Refresh semaphore to query updated results - master_semaphore.Refresh(); - const u64 gpu_tick = master_semaphore.KnownGpuTick(); + master_semaphore->Refresh(); + const u64 gpu_tick = master_semaphore->KnownGpuTick(); const auto search = [this, gpu_tick](size_t begin, size_t end) -> std::optional { for (size_t iterator = begin; iterator < end; ++iterator) { if (gpu_tick >= ticks[iterator]) { - ticks[iterator] = master_semaphore.CurrentTick(); + ticks[iterator] = master_semaphore->CurrentTick(); return iterator; } } @@ -36,7 +34,7 @@ size_t ResourcePool::CommitResource() { // Both searches failed, the pool is full; handle it. const size_t free_resource = ManageOverflow(); - ticks[free_resource] = master_semaphore.CurrentTick(); + ticks[free_resource] = master_semaphore->CurrentTick(); found = free_resource; } } diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.h b/src/video_core/renderer_vulkan/vk_resource_pool.h index 9d0bb3b4d..f0b80ad59 100644 --- a/src/video_core/renderer_vulkan/vk_resource_pool.h +++ b/src/video_core/renderer_vulkan/vk_resource_pool.h @@ -18,8 +18,16 @@ class MasterSemaphore; */ class ResourcePool { public: + explicit ResourcePool() = default; explicit ResourcePool(MasterSemaphore& master_semaphore, size_t grow_step); - virtual ~ResourcePool(); + + virtual ~ResourcePool() = default; + + ResourcePool& operator=(ResourcePool&&) noexcept = default; + ResourcePool(ResourcePool&&) noexcept = default; + + ResourcePool& operator=(const ResourcePool&) = default; + ResourcePool(const ResourcePool&) = default; protected: size_t CommitResource(); @@ -34,7 +42,7 @@ private: /// Allocates a new page of resources. void Grow(); - MasterSemaphore& master_semaphore; + MasterSemaphore* master_semaphore{}; size_t grow_step = 0; ///< Number of new resources created after an overflow size_t hint_iterator = 0; ///< Hint to where the next free resources is likely to be found std::vector ticks; ///< Ticks for each resource -- cgit v1.2.3 From 6db69990da9f232e6d982cdcb69c2e27d93075cf Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 19 Feb 2021 18:10:18 -0300 Subject: spirv: Add lower fp16 to fp32 pass --- src/video_core/renderer_vulkan/vk_compute_pass.cpp | 3 +++ src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 8 ++++++-- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 2 -- src/video_core/vulkan_common/vulkan_device.cpp | 10 +++++----- 4 files changed, 14 insertions(+), 9 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 4181d83ee..a444d55d3 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -206,6 +206,8 @@ VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_ .codeSize = static_cast(code.size_bytes()), .pCode = code.data(), }); + /* + FIXME pipeline = device.GetLogical().CreateComputePipeline({ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, .pNext = nullptr, @@ -224,6 +226,7 @@ VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_ .basePipelineHandle = nullptr, .basePipelineIndex = 0, }); + */ } VKComputePass::~VKComputePass() = default; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 4bf3e4819..c2a41a360 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -31,8 +31,6 @@ #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" -#pragma optimize("", off) - namespace Vulkan { MICROPROFILE_DECLARE(Vulkan_PipelineCache); @@ -180,6 +178,12 @@ ComputePipeline PipelineCache::CreateComputePipeline(ShaderInfo* shader_info) { // TODO: Load from cache } const auto [info, code]{Shader::RecompileSPIRV(env, qmd.program_start)}; + + FILE* file = fopen("D:\\shader.spv", "wb"); + fwrite(code.data(), 4, code.size(), file); + fclose(file); + std::system("spirv-dis D:\\shader.spv"); + shader_info->unique_hash = env.ComputeHash(); shader_info->size_bytes = env.ShaderSize(); return ComputePipeline{device, descriptor_pool, update_descriptor_queue, info, diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index b757454c4..1b662f9f3 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -36,8 +36,6 @@ #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" -#pragma optimize("", off) - namespace Vulkan { using Maxwell = Tegra::Engines::Maxwell3D::Regs; diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index f214510da..85f903125 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -247,9 +247,9 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR .shaderStorageImageArrayDynamicIndexing = false, .shaderClipDistance = false, .shaderCullDistance = false, - .shaderFloat64 = false, - .shaderInt64 = false, - .shaderInt16 = false, + .shaderFloat64 = true, + .shaderInt64 = true, + .shaderInt16 = true, .shaderResourceResidency = false, .shaderResourceMinLod = false, .sparseBinding = false, @@ -420,8 +420,8 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR } if (is_float16_supported && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) { // Intel's compiler crashes when using fp16 on Astral Chain, disable it for the time being. - LOG_WARNING(Render_Vulkan, "Blacklisting Intel proprietary from float16 math"); - is_float16_supported = false; + // LOG_WARNING(Render_Vulkan, "Blacklisting Intel proprietary from float16 math"); + // is_float16_supported = false; } graphics_queue = logical.GetQueue(graphics_family); -- cgit v1.2.3 From e2bc05b17d91854cbb9c0ce3647141bf7d33143e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 20 Feb 2021 03:30:13 -0300 Subject: shader: Add denorm flush support --- .../renderer_vulkan/vk_compute_pipeline.cpp | 7 +---- .../renderer_vulkan/vk_pipeline_cache.cpp | 15 +++++++++- src/video_core/vulkan_common/vulkan_device.cpp | 26 +++++++++++------ src/video_core/vulkan_common/vulkan_device.h | 33 +++++++++++----------- src/video_core/vulkan_common/vulkan_wrapper.cpp | 2 -- 5 files changed, 50 insertions(+), 33 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 588ce6139..a658a3276 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -131,12 +131,7 @@ ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descrip })} {} void ComputePipeline::ConfigureBufferCache(BufferCache& buffer_cache) { - u32 enabled_uniforms{}; - for (const auto& desc : info.constant_buffer_descriptors) { - enabled_uniforms |= ((1ULL << desc.count) - 1) << desc.index; - } - buffer_cache.SetEnabledComputeUniformBuffers(enabled_uniforms); - + buffer_cache.SetEnabledComputeUniformBuffers(info.constant_buffer_mask); buffer_cache.UnbindComputeStorageBuffers(); size_t index{}; for (const auto& desc : info.storage_buffers_descriptors) { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index c2a41a360..49ff911d6 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -177,7 +177,20 @@ ComputePipeline PipelineCache::CreateComputePipeline(ShaderInfo* shader_info) { if (const std::optional cached_hash{env.Analyze(qmd.program_start)}) { // TODO: Load from cache } - const auto [info, code]{Shader::RecompileSPIRV(env, qmd.program_start)}; + const auto& float_control{device.FloatControlProperties()}; + const Shader::Profile profile{ + .unified_descriptor_binding = true, + .support_float_controls = true, + .support_separate_denorm_behavior = float_control.denormBehaviorIndependence == + VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR, + .support_separate_rounding_mode = + float_control.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR, + .support_fp16_denorm_preserve = float_control.shaderDenormPreserveFloat16 != VK_FALSE, + .support_fp32_denorm_preserve = float_control.shaderDenormPreserveFloat32 != VK_FALSE, + .support_fp16_denorm_flush = float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE, + .support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE, + }; + const auto [info, code]{Shader::RecompileSPIRV(profile, env, qmd.program_start)}; FILE* file = fopen("D:\\shader.spv", "wb"); fwrite(code.data(), 4, code.size(), file); diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 85f903125..4887d6fd9 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -43,6 +43,7 @@ constexpr std::array REQUIRED_EXTENSIONS{ VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME, VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME, VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME, + VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME, VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME, VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME, VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, @@ -200,6 +201,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR CheckSuitability(surface != nullptr); SetupFamilies(surface); SetupFeatures(); + SetupProperties(); const auto queue_cis = GetDeviceQueueCreateInfos(); const std::vector extensions = LoadExtensions(surface != nullptr); @@ -426,8 +428,6 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR graphics_queue = logical.GetQueue(graphics_family); present_queue = logical.GetQueue(present_family); - - use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue(); } Device::~Device() = default; @@ -600,7 +600,7 @@ void Device::CheckSuitability(bool requires_swapchain) const { VkPhysicalDeviceRobustness2FeaturesEXT robustness2{}; robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT; - VkPhysicalDeviceFeatures2 features2{}; + VkPhysicalDeviceFeatures2KHR features2{}; features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; features2.pNext = &robustness2; @@ -684,7 +684,7 @@ std::vector Device::LoadExtensions(bool requires_surface) { true); } } - VkPhysicalDeviceFeatures2KHR features; + VkPhysicalDeviceFeatures2KHR features{}; features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR; VkPhysicalDeviceProperties2KHR physical_properties; @@ -806,11 +806,21 @@ void Device::SetupFamilies(VkSurfaceKHR surface) { } void Device::SetupFeatures() { - const auto supported_features{physical.GetFeatures()}; - is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat; - is_shader_storage_image_multisample = supported_features.shaderStorageImageMultisample; + const VkPhysicalDeviceFeatures features{physical.GetFeatures()}; + is_formatless_image_load_supported = features.shaderStorageImageReadWithoutFormat; + is_shader_storage_image_multisample = features.shaderStorageImageMultisample; is_blit_depth_stencil_supported = TestDepthStencilBlits(); - is_optimal_astc_supported = IsOptimalAstcSupported(supported_features); + is_optimal_astc_supported = IsOptimalAstcSupported(features); +} + +void Device::SetupProperties() { + float_controls.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES_KHR; + + VkPhysicalDeviceProperties2KHR properties2{}; + properties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR; + properties2.pNext = &float_controls; + + physical.GetProperties2KHR(properties2); } void Device::CollectTelemetryParameters() { diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 96c0f8c60..82bccc8f0 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -128,6 +128,11 @@ public: return properties.limits.maxComputeSharedMemorySize; } + /// Returns float control properties of the device. + const VkPhysicalDeviceFloatControlsPropertiesKHR& FloatControlProperties() const { + return float_controls; + } + /// Returns true if ASTC is natively supported. bool IsOptimalAstcSupported() const { return is_optimal_astc_supported; @@ -223,11 +228,6 @@ public: return reported_extensions; } - /// Returns true if the setting for async shader compilation is enabled. - bool UseAsynchronousShaders() const { - return use_asynchronous_shaders; - } - u64 GetDeviceLocalMemory() const { return device_access_memory; } @@ -245,6 +245,9 @@ private: /// Sets up device features. void SetupFeatures(); + /// Sets up device properties. + void SetupProperties(); + /// Collects telemetry information from the device. void CollectTelemetryParameters(); @@ -267,14 +270,15 @@ private: bool IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, FormatType format_type) const; - VkInstance instance; ///< Vulkan instance. - vk::DeviceDispatch dld; ///< Device function pointers. - vk::PhysicalDevice physical; ///< Physical device. - VkPhysicalDeviceProperties properties; ///< Device properties. - vk::Device logical; ///< Logical device. - vk::Queue graphics_queue; ///< Main graphics queue. - vk::Queue present_queue; ///< Main present queue. - u32 instance_version{}; ///< Vulkan onstance version. + VkInstance instance; ///< Vulkan instance. + vk::DeviceDispatch dld; ///< Device function pointers. + vk::PhysicalDevice physical; ///< Physical device. + VkPhysicalDeviceProperties properties; ///< Device properties. + VkPhysicalDeviceFloatControlsPropertiesKHR float_controls{}; ///< Float control properties. + vk::Device logical; ///< Logical device. + vk::Queue graphics_queue; ///< Main graphics queue. + vk::Queue present_queue; ///< Main present queue. + u32 instance_version{}; ///< Vulkan onstance version. u32 graphics_family{}; ///< Main graphics queue family index. u32 present_family{}; ///< Main present queue family index. VkDriverIdKHR driver_id{}; ///< Driver ID. @@ -301,9 +305,6 @@ private: bool has_renderdoc{}; ///< Has RenderDoc attached bool has_nsight_graphics{}; ///< Has Nsight Graphics attached - // Asynchronous Graphics Pipeline setting - bool use_asynchronous_shaders{}; ///< Setting to use asynchronous shaders/graphics pipeline - // Telemetry parameters std::string vendor_name; ///< Device's driver name. std::vector reported_extensions; ///< Reported Vulkan extensions. diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp index 2aa0ffbe6..33fb74bfb 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.cpp +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp @@ -311,8 +311,6 @@ const char* ToString(VkResult result) noexcept { return "VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT"; case VkResult::VK_ERROR_UNKNOWN: return "VK_ERROR_UNKNOWN"; - case VkResult::VK_ERROR_INCOMPATIBLE_VERSION_KHR: - return "VK_ERROR_INCOMPATIBLE_VERSION_KHR"; case VkResult::VK_THREAD_IDLE_KHR: return "VK_THREAD_IDLE_KHR"; case VkResult::VK_THREAD_DONE_KHR: -- cgit v1.2.3 From 704c6f353f68745168902c6c66c04bb730bd30e6 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 21 Feb 2021 17:50:14 -0300 Subject: shader: Rename, implement FADD.SAT and P2R (imm) --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 49ff911d6..b25af6cd3 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -191,12 +191,12 @@ ComputePipeline PipelineCache::CreateComputePipeline(ShaderInfo* shader_info) { .support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE, }; const auto [info, code]{Shader::RecompileSPIRV(profile, env, qmd.program_start)}; - + /* FILE* file = fopen("D:\\shader.spv", "wb"); fwrite(code.data(), 4, code.size(), file); fclose(file); std::system("spirv-dis D:\\shader.spv"); - + */ shader_info->unique_hash = env.ComputeHash(); shader_info->size_bytes = env.ShaderSize(); return ComputePipeline{device, descriptor_pool, update_descriptor_queue, info, -- cgit v1.2.3 From 274897dfd59b4d08029ab7e93be4f84654abcdc8 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 21 Feb 2021 23:42:38 -0300 Subject: spirv: Fixes and Intel specific workarounds --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index b25af6cd3..2497c2385 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -189,6 +189,7 @@ ComputePipeline PipelineCache::CreateComputePipeline(ShaderInfo* shader_info) { .support_fp32_denorm_preserve = float_control.shaderDenormPreserveFloat32 != VK_FALSE, .support_fp16_denorm_flush = float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE, .support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE, + .has_broken_spirv_clamp = true, // TODO: is_intel }; const auto [info, code]{Shader::RecompileSPIRV(profile, env, qmd.program_start)}; /* -- cgit v1.2.3 From ab463712474de5f99eec137a9c6233e55fe184f0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 8 Mar 2021 18:31:53 -0300 Subject: shader: Initial support for textures and TEX --- .../renderer_vulkan/vk_compute_pipeline.cpp | 101 +++++++++++++++++++++ .../renderer_vulkan/vk_compute_pipeline.h | 4 + .../renderer_vulkan/vk_pipeline_cache.cpp | 4 + src/video_core/renderer_vulkan/vk_rasterizer.cpp | 3 +- 4 files changed, 111 insertions(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index a658a3276..ef8bef6ff 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -40,6 +40,16 @@ vk::DescriptorSetLayout CreateDescriptorSetLayout(const Device& device, const Sh }); ++binding; } + for (const auto& desc : info.texture_descriptors) { + bindings.push_back({ + .binding = binding, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }); + ++binding; + } return device.GetLogical().CreateDescriptorSetLayout({ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, .pNext = nullptr, @@ -79,6 +89,18 @@ vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate( ++binding; offset += sizeof(DescriptorUpdateEntry); } + for (const auto& desc : info.texture_descriptors) { + entries.push_back({ + .dstBinding = binding, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .offset = offset, + .stride = sizeof(DescriptorUpdateEntry), + }); + ++binding; + offset += sizeof(DescriptorUpdateEntry); + } return device.GetLogical().CreateDescriptorUpdateTemplateKHR({ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO, .pNext = nullptr, @@ -92,6 +114,44 @@ vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate( .set = 0, }); } + +struct TextureHandle { + explicit TextureHandle(u32 data, bool via_header_index) { + const Tegra::Texture::TextureHandle handle{data}; + image = handle.tic_id; + sampler = via_header_index ? image : handle.tsc_id.Value(); + } + + u32 image; + u32 sampler; +}; + +VideoCommon::ImageViewType CastType(Shader::TextureType type) { + switch (type) { + case Shader::TextureType::Color1D: + case Shader::TextureType::Shadow1D: + return VideoCommon::ImageViewType::e1D; + case Shader::TextureType::ColorArray1D: + case Shader::TextureType::ShadowArray1D: + return VideoCommon::ImageViewType::e1DArray; + case Shader::TextureType::Color2D: + case Shader::TextureType::Shadow2D: + return VideoCommon::ImageViewType::e2D; + case Shader::TextureType::ColorArray2D: + case Shader::TextureType::ShadowArray2D: + return VideoCommon::ImageViewType::e2DArray; + case Shader::TextureType::Color3D: + case Shader::TextureType::Shadow3D: + return VideoCommon::ImageViewType::e3D; + case Shader::TextureType::ColorCube: + case Shader::TextureType::ShadowCube: + return VideoCommon::ImageViewType::Cube; + case Shader::TextureType::ColorArrayCube: + case Shader::TextureType::ShadowArrayCube: + return VideoCommon::ImageViewType::CubeArray; + } + UNREACHABLE_MSG("Invalid texture type {}", type); +} } // Anonymous namespace ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool, @@ -143,6 +203,47 @@ void ComputePipeline::ConfigureBufferCache(BufferCache& buffer_cache) { buffer_cache.BindHostComputeBuffers(); } +void ComputePipeline::ConfigureTextureCache(Tegra::Engines::KeplerCompute& kepler_compute, + Tegra::MemoryManager& gpu_memory, + TextureCache& texture_cache) { + texture_cache.SynchronizeComputeDescriptors(); + + static constexpr size_t max_elements = 64; + std::array image_view_ids; + boost::container::static_vector image_view_indices; + boost::container::static_vector sampler_handles; + + const auto& launch_desc{kepler_compute.launch_description}; + const auto& cbufs{launch_desc.const_buffer_config}; + const bool via_header_index{launch_desc.linked_tsc}; + for (const auto& desc : info.texture_descriptors) { + const u32 cbuf_index{desc.cbuf_index}; + const u32 cbuf_offset{desc.cbuf_offset}; + ASSERT(((launch_desc.const_buffer_enable_mask >> cbuf_index) & 1) != 0); + + const GPUVAddr addr{cbufs[cbuf_index].Address() + cbuf_offset}; + const u32 raw_handle{gpu_memory.Read(addr)}; + + const TextureHandle handle(raw_handle, via_header_index); + image_view_indices.push_back(handle.image); + + Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); + sampler_handles.push_back(sampler->Handle()); + } + + const std::span indices_span(image_view_indices.data(), image_view_indices.size()); + texture_cache.FillComputeImageViews(indices_span, image_view_ids); + + size_t index{}; + for (const auto& desc : info.texture_descriptors) { + const VkSampler vk_sampler{sampler_handles[index]}; + ImageView& image_view{texture_cache.GetImageView(image_view_ids[index])}; + const VkImageView vk_image_view{image_view.Handle(CastType(desc.type))}; + update_descriptor_queue->AddSampledImage(vk_image_view, vk_sampler); + ++index; + } +} + VkDescriptorSet ComputePipeline::UpdateDescriptorSet() { const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; update_descriptor_queue->Send(*descriptor_update_template, descriptor_set); diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index dc045d524..08d73a2a4 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -6,9 +6,11 @@ #include "common/common_types.h" #include "shader_recompiler/shader_info.h" +#include "video_core/memory_manager.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_pipeline.h" +#include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -30,6 +32,8 @@ public: ComputePipeline(const ComputePipeline&) = delete; void ConfigureBufferCache(BufferCache& buffer_cache); + void ConfigureTextureCache(Tegra::Engines::KeplerCompute& kepler_compute, + Tegra::MemoryManager& gpu_memory, TextureCache& texture_cache); [[nodiscard]] VkDescriptorSet UpdateDescriptorSet(); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 2497c2385..bcb7dd2eb 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -76,6 +76,10 @@ public: return gpu_memory.Read(program_base + address); } + u32 TextureBoundBuffer() override { + return kepler_compute.regs.tex_cb_index; + } + std::array WorkgroupSize() override { const auto& qmd{kepler_compute.launch_description}; return {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 1b662f9f3..c94419d29 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -241,9 +241,10 @@ void RasterizerVulkan::DispatchCompute() { if (!pipeline) { return; } - std::scoped_lock lock{buffer_cache.mutex}; + std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex}; update_descriptor_queue.Acquire(); pipeline->ConfigureBufferCache(buffer_cache); + pipeline->ConfigureTextureCache(kepler_compute, gpu_memory, texture_cache); const VkDescriptorSet descriptor_set{pipeline->UpdateDescriptorSet()}; const auto& qmd{kepler_compute.launch_description}; -- cgit v1.2.3 From b9f7bf4472b8e0a5aad1aec3a5ff5bb56470bfff Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 14 Mar 2021 01:51:40 -0500 Subject: spirv: Add SignedZeroInfNanPreserve logic --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index bcb7dd2eb..5477a2903 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -193,6 +193,10 @@ ComputePipeline PipelineCache::CreateComputePipeline(ShaderInfo* shader_info) { .support_fp32_denorm_preserve = float_control.shaderDenormPreserveFloat32 != VK_FALSE, .support_fp16_denorm_flush = float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE, .support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE, + .support_fp16_signed_zero_nan_preserve = + float_control.shaderSignedZeroInfNanPreserveFloat16 != VK_FALSE, + .support_fp32_signed_zero_nan_preserve = + float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE, .has_broken_spirv_clamp = true, // TODO: is_intel }; const auto [info, code]{Shader::RecompileSPIRV(profile, env, qmd.program_start)}; -- cgit v1.2.3 From 260743f371236f7c57b01334b1c3474b15a47c39 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 19 Mar 2021 19:28:31 -0300 Subject: shader: Add partial rasterizer integration --- src/video_core/CMakeLists.txt | 6 +- .../renderer_vulkan/fixed_pipeline_state.cpp | 4 + .../renderer_vulkan/fixed_pipeline_state.h | 9 +- src/video_core/renderer_vulkan/maxwell_to_vk.cpp | 24 ++ src/video_core/renderer_vulkan/maxwell_to_vk.h | 2 + src/video_core/renderer_vulkan/pipeline_helper.h | 162 ++++++++ .../renderer_vulkan/vk_compute_pipeline.cpp | 209 ++-------- .../renderer_vulkan/vk_compute_pipeline.h | 3 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 445 +++++++++++++++++++++ .../renderer_vulkan/vk_graphics_pipeline.h | 66 +++ src/video_core/renderer_vulkan/vk_pipeline.h | 36 -- .../renderer_vulkan/vk_pipeline_cache.cpp | 346 ++++++++++++---- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 82 +++- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 47 ++- src/video_core/renderer_vulkan/vk_rasterizer.h | 2 + .../renderer_vulkan/vk_render_pass_cache.cpp | 100 +++++ .../renderer_vulkan/vk_render_pass_cache.h | 53 +++ .../renderer_vulkan/vk_texture_cache.cpp | 68 +--- src/video_core/renderer_vulkan/vk_texture_cache.h | 29 +- src/video_core/vulkan_common/vulkan_device.cpp | 15 + 20 files changed, 1298 insertions(+), 410 deletions(-) create mode 100644 src/video_core/renderer_vulkan/pipeline_helper.h create mode 100644 src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp create mode 100644 src/video_core/renderer_vulkan/vk_graphics_pipeline.h delete mode 100644 src/video_core/renderer_vulkan/vk_pipeline.h create mode 100644 src/video_core/renderer_vulkan/vk_render_pass_cache.cpp create mode 100644 src/video_core/renderer_vulkan/vk_render_pass_cache.h (limited to 'src/video_core') diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 3323e6916..71b07c194 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -100,6 +100,7 @@ add_library(video_core STATIC renderer_vulkan/fixed_pipeline_state.h renderer_vulkan/maxwell_to_vk.cpp renderer_vulkan/maxwell_to_vk.h + renderer_vulkan/pipeline_helper.h renderer_vulkan/renderer_vulkan.h renderer_vulkan/renderer_vulkan.cpp renderer_vulkan/vk_blit_screen.cpp @@ -116,15 +117,18 @@ add_library(video_core STATIC renderer_vulkan/vk_descriptor_pool.h renderer_vulkan/vk_fence_manager.cpp renderer_vulkan/vk_fence_manager.h + renderer_vulkan/vk_graphics_pipeline.cpp + renderer_vulkan/vk_graphics_pipeline.h renderer_vulkan/vk_master_semaphore.cpp renderer_vulkan/vk_master_semaphore.h renderer_vulkan/vk_pipeline_cache.cpp renderer_vulkan/vk_pipeline_cache.h - renderer_vulkan/vk_pipeline.h renderer_vulkan/vk_query_cache.cpp renderer_vulkan/vk_query_cache.h renderer_vulkan/vk_rasterizer.cpp renderer_vulkan/vk_rasterizer.h + renderer_vulkan/vk_render_pass_cache.cpp + renderer_vulkan/vk_render_pass_cache.h renderer_vulkan/vk_resource_pool.cpp renderer_vulkan/vk_resource_pool.h renderer_vulkan/vk_scheduler.cpp diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 362278f01..d8f683907 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -72,6 +72,10 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, regs.alpha_test_enabled != 0 ? regs.alpha_test_func : Maxwell::ComparisonOp::Always; alpha_test_func.Assign(PackComparisonOp(test_func)); early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0); + depth_enabled.Assign(regs.zeta_enable != 0 ? 1 : 0); + depth_format.Assign(static_cast(regs.zeta.format)); + std::ranges::transform(regs.rt, color_formats.begin(), + [](const auto& rt) { return static_cast(rt.format); }); alpha_test_ref = Common::BitCast(regs.alpha_test_ref); point_size = Common::BitCast(regs.point_size); diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index a0eb83a68..348f1d6ce 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -60,7 +60,7 @@ struct FixedPipelineState { void Refresh(const Maxwell& regs, size_t index); - constexpr std::array Mask() const noexcept { + std::array Mask() const noexcept { return {mask_r != 0, mask_g != 0, mask_b != 0, mask_a != 0}; } @@ -97,11 +97,11 @@ struct FixedPipelineState { BitField<20, 3, u32> type; BitField<23, 6, u32> size; - constexpr Maxwell::VertexAttribute::Type Type() const noexcept { + Maxwell::VertexAttribute::Type Type() const noexcept { return static_cast(type.Value()); } - constexpr Maxwell::VertexAttribute::Size Size() const noexcept { + Maxwell::VertexAttribute::Size Size() const noexcept { return static_cast(size.Value()); } }; @@ -187,7 +187,10 @@ struct FixedPipelineState { u32 raw2; BitField<0, 3, u32> alpha_test_func; BitField<3, 1, u32> early_z; + BitField<4, 1, u32> depth_enabled; + BitField<5, 5, u32> depth_format; }; + std::array color_formats; u32 alpha_test_ref; u32 point_size; diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index f088447e9..dc4ff0da2 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -741,4 +741,28 @@ VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reducti return VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT; } +VkSampleCountFlagBits MsaaMode(Tegra::Texture::MsaaMode msaa_mode) { + switch (msaa_mode) { + case Tegra::Texture::MsaaMode::Msaa1x1: + return VK_SAMPLE_COUNT_1_BIT; + case Tegra::Texture::MsaaMode::Msaa2x1: + case Tegra::Texture::MsaaMode::Msaa2x1_D3D: + return VK_SAMPLE_COUNT_2_BIT; + case Tegra::Texture::MsaaMode::Msaa2x2: + case Tegra::Texture::MsaaMode::Msaa2x2_VC4: + case Tegra::Texture::MsaaMode::Msaa2x2_VC12: + return VK_SAMPLE_COUNT_4_BIT; + case Tegra::Texture::MsaaMode::Msaa4x2: + case Tegra::Texture::MsaaMode::Msaa4x2_D3D: + case Tegra::Texture::MsaaMode::Msaa4x2_VC8: + case Tegra::Texture::MsaaMode::Msaa4x2_VC24: + return VK_SAMPLE_COUNT_8_BIT; + case Tegra::Texture::MsaaMode::Msaa4x4: + return VK_SAMPLE_COUNT_16_BIT; + default: + UNREACHABLE_MSG("Invalid msaa_mode={}", static_cast(msaa_mode)); + return VK_SAMPLE_COUNT_1_BIT; + } +} + } // namespace Vulkan::MaxwellToVK diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h index e3e06ba38..9f78e15b6 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.h +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h @@ -71,4 +71,6 @@ VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle); VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reduction); +VkSampleCountFlagBits MsaaMode(Tegra::Texture::MsaaMode msaa_mode); + } // namespace Vulkan::MaxwellToVK diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h new file mode 100644 index 000000000..0a59aa659 --- /dev/null +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -0,0 +1,162 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include + +#include "common/assert.h" +#include "common/common_types.h" +#include "shader_recompiler/shader_info.h" +#include "video_core/renderer_vulkan/vk_texture_cache.h" +#include "video_core/renderer_vulkan/vk_update_descriptor.h" +#include "video_core/texture_cache/texture_cache.h" +#include "video_core/texture_cache/types.h" +#include "video_core/textures/texture.h" + +namespace Vulkan { + +struct TextureHandle { + explicit TextureHandle(u32 data, bool via_header_index) { + [[likely]] if (via_header_index) { + image = data; + sampler = data; + } else { + const Tegra::Texture::TextureHandle handle{data}; + image = handle.tic_id; + sampler = via_header_index ? image : handle.tsc_id.Value(); + } + } + + u32 image; + u32 sampler; +}; + +struct DescriptorLayoutTuple { + vk::DescriptorSetLayout descriptor_set_layout; + vk::PipelineLayout pipeline_layout; + vk::DescriptorUpdateTemplateKHR descriptor_update_template; +}; + +class DescriptorLayoutBuilder { +public: + DescriptorLayoutTuple Create(const vk::Device& device) { + DescriptorLayoutTuple result; + if (!bindings.empty()) { + result.descriptor_set_layout = device.CreateDescriptorSetLayout({ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .bindingCount = static_cast(bindings.size()), + .pBindings = bindings.data(), + }); + } + result.pipeline_layout = device.CreatePipelineLayout({ + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .setLayoutCount = result.descriptor_set_layout ? 1U : 0U, + .pSetLayouts = bindings.empty() ? nullptr : result.descriptor_set_layout.address(), + .pushConstantRangeCount = 0, + .pPushConstantRanges = nullptr, + }); + if (!entries.empty()) { + result.descriptor_update_template = device.CreateDescriptorUpdateTemplateKHR({ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, + .pNext = nullptr, + .flags = 0, + .descriptorUpdateEntryCount = static_cast(entries.size()), + .pDescriptorUpdateEntries = entries.data(), + .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR, + .descriptorSetLayout = *result.descriptor_set_layout, + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .pipelineLayout = *result.pipeline_layout, + .set = 0, + }); + } + return result; + } + + void Add(const Shader::Info& info, VkShaderStageFlags stage) { + for ([[maybe_unused]] const auto& desc : info.constant_buffer_descriptors) { + Add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, stage); + } + for ([[maybe_unused]] const auto& desc : info.storage_buffers_descriptors) { + Add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stage); + } + for ([[maybe_unused]] const auto& desc : info.texture_descriptors) { + Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage); + } + } + +private: + void Add(VkDescriptorType type, VkShaderStageFlags stage) { + bindings.push_back({ + .binding = binding, + .descriptorType = type, + .descriptorCount = 1, + .stageFlags = stage, + .pImmutableSamplers = nullptr, + }); + entries.push_back(VkDescriptorUpdateTemplateEntryKHR{ + .dstBinding = binding, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = type, + .offset = offset, + .stride = sizeof(DescriptorUpdateEntry), + }); + ++binding; + offset += sizeof(DescriptorUpdateEntry); + } + + boost::container::small_vector bindings; + boost::container::small_vector entries; + u32 binding{}; + size_t offset{}; +}; + +inline VideoCommon::ImageViewType CastType(Shader::TextureType type) { + switch (type) { + case Shader::TextureType::Color1D: + case Shader::TextureType::Shadow1D: + return VideoCommon::ImageViewType::e1D; + case Shader::TextureType::ColorArray1D: + case Shader::TextureType::ShadowArray1D: + return VideoCommon::ImageViewType::e1DArray; + case Shader::TextureType::Color2D: + case Shader::TextureType::Shadow2D: + return VideoCommon::ImageViewType::e2D; + case Shader::TextureType::ColorArray2D: + case Shader::TextureType::ShadowArray2D: + return VideoCommon::ImageViewType::e2DArray; + case Shader::TextureType::Color3D: + case Shader::TextureType::Shadow3D: + return VideoCommon::ImageViewType::e3D; + case Shader::TextureType::ColorCube: + case Shader::TextureType::ShadowCube: + return VideoCommon::ImageViewType::Cube; + case Shader::TextureType::ColorArrayCube: + case Shader::TextureType::ShadowArrayCube: + return VideoCommon::ImageViewType::CubeArray; + } + UNREACHABLE_MSG("Invalid texture type {}", type); + return {}; +} + +inline void PushImageDescriptors(const Shader::Info& info, const VkSampler* samplers, + const ImageId* image_view_ids, TextureCache& texture_cache, + VKUpdateDescriptorQueue& update_descriptor_queue, size_t& index) { + for (const auto& desc : info.texture_descriptors) { + const VkSampler sampler{samplers[index]}; + ImageView& image_view{texture_cache.GetImageView(image_view_ids[index])}; + const VkImageView vk_image_view{image_view.Handle(CastType(desc.type))}; + update_descriptor_queue.AddSampledImage(vk_image_view, sampler); + ++index; + } +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index ef8bef6ff..6684d37a6 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -6,6 +6,7 @@ #include +#include "video_core/renderer_vulkan/pipeline_helper.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" @@ -17,140 +18,10 @@ namespace Vulkan { namespace { -vk::DescriptorSetLayout CreateDescriptorSetLayout(const Device& device, const Shader::Info& info) { - boost::container::small_vector bindings; - u32 binding{}; - for ([[maybe_unused]] const auto& desc : info.constant_buffer_descriptors) { - bindings.push_back({ - .binding = binding, - .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }); - ++binding; - } - for ([[maybe_unused]] const auto& desc : info.storage_buffers_descriptors) { - bindings.push_back({ - .binding = binding, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }); - ++binding; - } - for (const auto& desc : info.texture_descriptors) { - bindings.push_back({ - .binding = binding, - .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }); - ++binding; - } - return device.GetLogical().CreateDescriptorSetLayout({ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .bindingCount = static_cast(bindings.size()), - .pBindings = bindings.data(), - }); -} - -vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate( - const Device& device, const Shader::Info& info, VkDescriptorSetLayout descriptor_set_layout, - VkPipelineLayout pipeline_layout) { - boost::container::small_vector entries; - size_t offset{}; - u32 binding{}; - for ([[maybe_unused]] const auto& desc : info.constant_buffer_descriptors) { - entries.push_back({ - .dstBinding = binding, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, - .offset = offset, - .stride = sizeof(DescriptorUpdateEntry), - }); - ++binding; - offset += sizeof(DescriptorUpdateEntry); - } - for ([[maybe_unused]] const auto& desc : info.storage_buffers_descriptors) { - entries.push_back({ - .dstBinding = binding, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .offset = offset, - .stride = sizeof(DescriptorUpdateEntry), - }); - ++binding; - offset += sizeof(DescriptorUpdateEntry); - } - for (const auto& desc : info.texture_descriptors) { - entries.push_back({ - .dstBinding = binding, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .offset = offset, - .stride = sizeof(DescriptorUpdateEntry), - }); - ++binding; - offset += sizeof(DescriptorUpdateEntry); - } - return device.GetLogical().CreateDescriptorUpdateTemplateKHR({ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .descriptorUpdateEntryCount = static_cast(entries.size()), - .pDescriptorUpdateEntries = entries.data(), - .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET, - .descriptorSetLayout = descriptor_set_layout, - .pipelineBindPoint = VK_PIPELINE_BIND_POINT_COMPUTE, - .pipelineLayout = pipeline_layout, - .set = 0, - }); -} - -struct TextureHandle { - explicit TextureHandle(u32 data, bool via_header_index) { - const Tegra::Texture::TextureHandle handle{data}; - image = handle.tic_id; - sampler = via_header_index ? image : handle.tsc_id.Value(); - } - - u32 image; - u32 sampler; -}; - -VideoCommon::ImageViewType CastType(Shader::TextureType type) { - switch (type) { - case Shader::TextureType::Color1D: - case Shader::TextureType::Shadow1D: - return VideoCommon::ImageViewType::e1D; - case Shader::TextureType::ColorArray1D: - case Shader::TextureType::ShadowArray1D: - return VideoCommon::ImageViewType::e1DArray; - case Shader::TextureType::Color2D: - case Shader::TextureType::Shadow2D: - return VideoCommon::ImageViewType::e2D; - case Shader::TextureType::ColorArray2D: - case Shader::TextureType::ShadowArray2D: - return VideoCommon::ImageViewType::e2DArray; - case Shader::TextureType::Color3D: - case Shader::TextureType::Shadow3D: - return VideoCommon::ImageViewType::e3D; - case Shader::TextureType::ColorCube: - case Shader::TextureType::ShadowCube: - return VideoCommon::ImageViewType::Cube; - case Shader::TextureType::ColorArrayCube: - case Shader::TextureType::ShadowArrayCube: - return VideoCommon::ImageViewType::CubeArray; - } - UNREACHABLE_MSG("Invalid texture type {}", type); +DescriptorLayoutTuple CreateLayout(const Device& device, const Shader::Info& info) { + DescriptorLayoutBuilder builder; + builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT); + return builder.Create(device.GetLogical()); } } // Anonymous namespace @@ -158,37 +29,31 @@ ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descrip VKUpdateDescriptorQueue& update_descriptor_queue_, const Shader::Info& info_, vk::ShaderModule spv_module_) : update_descriptor_queue{&update_descriptor_queue_}, info{info_}, - spv_module(std::move(spv_module_)), - descriptor_set_layout(CreateDescriptorSetLayout(device, info)), - descriptor_allocator(descriptor_pool, *descriptor_set_layout), - pipeline_layout{device.GetLogical().CreatePipelineLayout({ - .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .setLayoutCount = 1, - .pSetLayouts = descriptor_set_layout.address(), - .pushConstantRangeCount = 0, - .pPushConstantRanges = nullptr, - })}, - descriptor_update_template{ - CreateDescriptorUpdateTemplate(device, info, *descriptor_set_layout, *pipeline_layout)}, - pipeline{device.GetLogical().CreateComputePipeline({ - .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = VK_SHADER_STAGE_COMPUTE_BIT, - .module = *spv_module, - .pName = "main", - .pSpecializationInfo = nullptr, - }, - .layout = *pipeline_layout, - .basePipelineHandle = 0, - .basePipelineIndex = 0, - })} {} + spv_module(std::move(spv_module_)) { + DescriptorLayoutTuple tuple{CreateLayout(device, info)}; + descriptor_set_layout = std::move(tuple.descriptor_set_layout); + pipeline_layout = std::move(tuple.pipeline_layout); + descriptor_update_template = std::move(tuple.descriptor_update_template); + descriptor_allocator = DescriptorAllocator(descriptor_pool, *descriptor_set_layout); + + pipeline = device.GetLogical().CreateComputePipeline({ + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = *spv_module, + .pName = "main", + .pSpecializationInfo = nullptr, + }, + .layout = *pipeline_layout, + .basePipelineHandle = 0, + .basePipelineIndex = 0, + }); +} void ComputePipeline::ConfigureBufferCache(BufferCache& buffer_cache) { buffer_cache.SetEnabledComputeUniformBuffers(info.constant_buffer_mask); @@ -211,7 +76,7 @@ void ComputePipeline::ConfigureTextureCache(Tegra::Engines::KeplerCompute& keple static constexpr size_t max_elements = 64; std::array image_view_ids; boost::container::static_vector image_view_indices; - boost::container::static_vector sampler_handles; + boost::container::static_vector samplers; const auto& launch_desc{kepler_compute.launch_description}; const auto& cbufs{launch_desc.const_buffer_config}; @@ -228,20 +93,14 @@ void ComputePipeline::ConfigureTextureCache(Tegra::Engines::KeplerCompute& keple image_view_indices.push_back(handle.image); Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); - sampler_handles.push_back(sampler->Handle()); + samplers.push_back(sampler->Handle()); } - const std::span indices_span(image_view_indices.data(), image_view_indices.size()); texture_cache.FillComputeImageViews(indices_span, image_view_ids); size_t index{}; - for (const auto& desc : info.texture_descriptors) { - const VkSampler vk_sampler{sampler_handles[index]}; - ImageView& image_view{texture_cache.GetImageView(image_view_ids[index])}; - const VkImageView vk_image_view{image_view.Handle(CastType(desc.type))}; - update_descriptor_queue->AddSampledImage(vk_image_view, vk_sampler); - ++index; - } + PushImageDescriptors(info, samplers.data(), image_view_ids.data(), texture_cache, + *update_descriptor_queue, index); } VkDescriptorSet ComputePipeline::UpdateDescriptorSet() { diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 08d73a2a4..e82e5816b 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -9,7 +9,6 @@ #include "video_core/memory_manager.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" -#include "video_core/renderer_vulkan/vk_pipeline.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -18,7 +17,7 @@ namespace Vulkan { class Device; -class ComputePipeline : public Pipeline { +class ComputePipeline { public: explicit ComputePipeline() = default; explicit ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool, diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp new file mode 100644 index 000000000..a2ec418b1 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -0,0 +1,445 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include + +#include +#include + +#include "common/bit_field.h" +#include "video_core/renderer_vulkan/maxwell_to_vk.h" +#include "video_core/renderer_vulkan/pipeline_helper.h" +#include "video_core/renderer_vulkan/vk_buffer_cache.h" +#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" +#include "video_core/renderer_vulkan/vk_render_pass_cache.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_texture_cache.h" +#include "video_core/renderer_vulkan/vk_update_descriptor.h" +#include "video_core/vulkan_common/vulkan_device.h" + +namespace Vulkan { +namespace { +using boost::container::small_vector; +using boost::container::static_vector; +using VideoCore::Surface::PixelFormat; +using VideoCore::Surface::PixelFormatFromDepthFormat; +using VideoCore::Surface::PixelFormatFromRenderTargetFormat; + +DescriptorLayoutTuple CreateLayout(const Device& device, std::span infos) { + DescriptorLayoutBuilder builder; + for (size_t index = 0; index < infos.size(); ++index) { + static constexpr std::array stages{ + VK_SHADER_STAGE_VERTEX_BIT, + VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT, + VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT, + VK_SHADER_STAGE_GEOMETRY_BIT, + VK_SHADER_STAGE_FRAGMENT_BIT, + }; + builder.Add(infos[index], stages.at(index)); + } + return builder.Create(device.GetLogical()); +} + +template +VkStencilOpState GetStencilFaceState(const StencilFace& face) { + return { + .failOp = MaxwellToVK::StencilOp(face.ActionStencilFail()), + .passOp = MaxwellToVK::StencilOp(face.ActionDepthPass()), + .depthFailOp = MaxwellToVK::StencilOp(face.ActionDepthFail()), + .compareOp = MaxwellToVK::ComparisonOp(face.TestFunc()), + .compareMask = 0, + .writeMask = 0, + .reference = 0, + }; +} + +bool SupportsPrimitiveRestart(VkPrimitiveTopology topology) { + static constexpr std::array unsupported_topologies{ + VK_PRIMITIVE_TOPOLOGY_POINT_LIST, + VK_PRIMITIVE_TOPOLOGY_LINE_LIST, + VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, + VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY, + VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY, + VK_PRIMITIVE_TOPOLOGY_PATCH_LIST, + // VK_PRIMITIVE_TOPOLOGY_QUAD_LIST_EXT, + }; + return std::ranges::find(unsupported_topologies, topology) == unsupported_topologies.end(); +} + +VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) { + union Swizzle { + u32 raw; + BitField<0, 3, Maxwell::ViewportSwizzle> x; + BitField<4, 3, Maxwell::ViewportSwizzle> y; + BitField<8, 3, Maxwell::ViewportSwizzle> z; + BitField<12, 3, Maxwell::ViewportSwizzle> w; + }; + const Swizzle unpacked{swizzle}; + return VkViewportSwizzleNV{ + .x = MaxwellToVK::ViewportSwizzle(unpacked.x), + .y = MaxwellToVK::ViewportSwizzle(unpacked.y), + .z = MaxwellToVK::ViewportSwizzle(unpacked.z), + .w = MaxwellToVK::ViewportSwizzle(unpacked.w), + }; +} + +PixelFormat DecodeFormat(u8 encoded_format) { + const auto format{static_cast(encoded_format)}; + if (format == Tegra::RenderTargetFormat::NONE) { + return PixelFormat::Invalid; + } + return PixelFormatFromRenderTargetFormat(format); +} + +RenderPassKey MakeRenderPassKey(const FixedPipelineState& state) { + RenderPassKey key; + std::ranges::transform(state.color_formats, key.color_formats.begin(), DecodeFormat); + if (state.depth_enabled != 0) { + const auto depth_format{static_cast(state.depth_format.Value())}; + key.depth_format = PixelFormatFromDepthFormat(depth_format); + } else { + key.depth_format = PixelFormat::Invalid; + } + key.samples = MaxwellToVK::MsaaMode(state.msaa_mode); + return key; +} +} // Anonymous namespace + +GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::MemoryManager& gpu_memory_, VKScheduler& scheduler_, + BufferCache& buffer_cache_, TextureCache& texture_cache_, + const Device& device, VKDescriptorPool& descriptor_pool, + VKUpdateDescriptorQueue& update_descriptor_queue_, + RenderPassCache& render_pass_cache, + const FixedPipelineState& state, + std::array stages, + const std::array& infos) + : maxwell3d{&maxwell3d_}, gpu_memory{&gpu_memory_}, texture_cache{&texture_cache_}, + buffer_cache{&buffer_cache_}, scheduler{&scheduler_}, + update_descriptor_queue{&update_descriptor_queue_}, spv_modules{std::move(stages)} { + std::ranges::transform(infos, stage_infos.begin(), + [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); + + DescriptorLayoutTuple tuple{CreateLayout(device, stage_infos)}; + descriptor_set_layout = std::move(tuple.descriptor_set_layout); + pipeline_layout = std::move(tuple.pipeline_layout); + descriptor_update_template = std::move(tuple.descriptor_update_template); + descriptor_allocator = DescriptorAllocator(descriptor_pool, *descriptor_set_layout); + + const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(state))}; + MakePipeline(device, state, render_pass); +} + +void GraphicsPipeline::Configure(bool is_indexed) { + static constexpr size_t max_images_elements = 64; + std::array image_view_ids; + static_vector image_view_indices; + static_vector samplers; + + texture_cache->SynchronizeGraphicsDescriptors(); + texture_cache->UpdateRenderTargets(false); + + const auto& regs{maxwell3d->regs}; + const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; + for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { + const Shader::Info& info{stage_infos[stage]}; + buffer_cache->SetEnabledUniformBuffers(stage, info.constant_buffer_mask); + buffer_cache->UnbindGraphicsStorageBuffers(stage); + size_t index{}; + for (const auto& desc : info.storage_buffers_descriptors) { + ASSERT(desc.count == 1); + buffer_cache->BindGraphicsStorageBuffer(stage, index, desc.cbuf_index, desc.cbuf_offset, + true); + ++index; + } + const auto& cbufs{maxwell3d->state.shader_stages[stage].const_buffers}; + for (const auto& desc : info.texture_descriptors) { + const u32 cbuf_index{desc.cbuf_index}; + const u32 cbuf_offset{desc.cbuf_offset}; + ASSERT(cbufs[cbuf_index].enabled); + const GPUVAddr addr{cbufs[cbuf_index].address + cbuf_offset}; + const u32 raw_handle{gpu_memory->Read(addr)}; + + const TextureHandle handle(raw_handle, via_header_index); + image_view_indices.push_back(handle.image); + + Sampler* const sampler{texture_cache->GetGraphicsSampler(handle.sampler)}; + samplers.push_back(sampler->Handle()); + } + } + const std::span indices_span(image_view_indices.data(), image_view_indices.size()); + buffer_cache->UpdateGraphicsBuffers(is_indexed); + texture_cache->FillGraphicsImageViews(indices_span, image_view_ids); + + buffer_cache->BindHostGeometryBuffers(is_indexed); + + size_t index{}; + for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { + buffer_cache->BindHostStageBuffers(stage); + PushImageDescriptors(stage_infos[stage], samplers.data(), image_view_ids.data(), + *texture_cache, *update_descriptor_queue, index); + } + const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; + update_descriptor_queue->Send(*descriptor_update_template, descriptor_set); + + scheduler->BindGraphicsPipeline(*pipeline); + scheduler->Record([descriptor_set, layout = *pipeline_layout](vk::CommandBuffer cmdbuf) { + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, + nullptr); + }); +} + +void GraphicsPipeline::MakePipeline(const Device& device, const FixedPipelineState& state, + VkRenderPass render_pass) { + FixedPipelineState::DynamicState dynamic{}; + if (!device.IsExtExtendedDynamicStateSupported()) { + dynamic = state.dynamic_state; + } + static_vector vertex_bindings; + static_vector vertex_binding_divisors; + for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { + const bool instanced = state.binding_divisors[index] != 0; + const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; + vertex_bindings.push_back({ + .binding = static_cast(index), + .stride = dynamic.vertex_strides[index], + .inputRate = rate, + }); + if (instanced) { + vertex_binding_divisors.push_back({ + .binding = static_cast(index), + .divisor = state.binding_divisors[index], + }); + } + } + static_vector vertex_attributes; + const auto& input_attributes = stage_infos[0].loads_generics; + for (size_t index = 0; index < state.attributes.size(); ++index) { + const auto& attribute = state.attributes[index]; + if (!attribute.enabled || !input_attributes[index]) { + continue; + } + vertex_attributes.push_back({ + .location = static_cast(index), + .binding = attribute.buffer, + .format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size()), + .offset = attribute.offset, + }); + } + VkPipelineVertexInputStateCreateInfo vertex_input_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .vertexBindingDescriptionCount = static_cast(vertex_bindings.size()), + .pVertexBindingDescriptions = vertex_bindings.data(), + .vertexAttributeDescriptionCount = static_cast(vertex_attributes.size()), + .pVertexAttributeDescriptions = vertex_attributes.data(), + }; + const VkPipelineVertexInputDivisorStateCreateInfoEXT input_divisor_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT, + .pNext = nullptr, + .vertexBindingDivisorCount = static_cast(vertex_binding_divisors.size()), + .pVertexBindingDivisors = vertex_binding_divisors.data(), + }; + if (!vertex_binding_divisors.empty()) { + vertex_input_ci.pNext = &input_divisor_ci; + } + const auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, state.topology); + const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .topology = MaxwellToVK::PrimitiveTopology(device, state.topology), + .primitiveRestartEnable = state.primitive_restart_enable != 0 && + SupportsPrimitiveRestart(input_assembly_topology), + }; + const VkPipelineTessellationStateCreateInfo tessellation_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .patchControlPoints = state.patch_control_points_minus_one.Value() + 1, + }; + VkPipelineViewportStateCreateInfo viewport_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .viewportCount = Maxwell::NumViewports, + .pViewports = nullptr, + .scissorCount = Maxwell::NumViewports, + .pScissors = nullptr, + }; + std::array swizzles; + std::ranges::transform(state.viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle); + VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV, + .pNext = nullptr, + .flags = 0, + .viewportCount = Maxwell::NumViewports, + .pViewportSwizzles = swizzles.data(), + }; + if (device.IsNvViewportSwizzleSupported()) { + viewport_ci.pNext = &swizzle_ci; + } + + const VkPipelineRasterizationStateCreateInfo rasterization_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .depthClampEnable = + static_cast(state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE), + .rasterizerDiscardEnable = + static_cast(state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE), + .polygonMode = VK_POLYGON_MODE_FILL, + .cullMode = static_cast( + dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE), + .frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()), + .depthBiasEnable = state.depth_bias_enable, + .depthBiasConstantFactor = 0.0f, + .depthBiasClamp = 0.0f, + .depthBiasSlopeFactor = 0.0f, + .lineWidth = 1.0f, + }; + const VkPipelineMultisampleStateCreateInfo multisample_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .rasterizationSamples = MaxwellToVK::MsaaMode(state.msaa_mode), + .sampleShadingEnable = VK_FALSE, + .minSampleShading = 0.0f, + .pSampleMask = nullptr, + .alphaToCoverageEnable = VK_FALSE, + .alphaToOneEnable = VK_FALSE, + }; + const VkPipelineDepthStencilStateCreateInfo depth_stencil_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .depthTestEnable = dynamic.depth_test_enable, + .depthWriteEnable = dynamic.depth_write_enable, + .depthCompareOp = dynamic.depth_test_enable + ? MaxwellToVK::ComparisonOp(dynamic.DepthTestFunc()) + : VK_COMPARE_OP_ALWAYS, + .depthBoundsTestEnable = dynamic.depth_bounds_enable, + .stencilTestEnable = dynamic.stencil_enable, + .front = GetStencilFaceState(dynamic.front), + .back = GetStencilFaceState(dynamic.back), + .minDepthBounds = 0.0f, + .maxDepthBounds = 0.0f, + }; + static_vector cb_attachments; + for (size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { + static constexpr std::array mask_table{ + VK_COLOR_COMPONENT_R_BIT, + VK_COLOR_COMPONENT_G_BIT, + VK_COLOR_COMPONENT_B_BIT, + VK_COLOR_COMPONENT_A_BIT, + }; + const auto format{static_cast(state.color_formats[index])}; + if (format == Tegra::RenderTargetFormat::NONE) { + continue; + } + const auto& blend{state.attachments[index]}; + const std::array mask{blend.Mask()}; + VkColorComponentFlags write_mask{}; + for (size_t i = 0; i < mask_table.size(); ++i) { + write_mask |= mask[i] ? mask_table[i] : 0; + } + cb_attachments.push_back({ + .blendEnable = blend.enable != 0, + .srcColorBlendFactor = MaxwellToVK::BlendFactor(blend.SourceRGBFactor()), + .dstColorBlendFactor = MaxwellToVK::BlendFactor(blend.DestRGBFactor()), + .colorBlendOp = MaxwellToVK::BlendEquation(blend.EquationRGB()), + .srcAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.SourceAlphaFactor()), + .dstAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.DestAlphaFactor()), + .alphaBlendOp = MaxwellToVK::BlendEquation(blend.EquationAlpha()), + .colorWriteMask = write_mask, + }); + } + const VkPipelineColorBlendStateCreateInfo color_blend_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .logicOpEnable = VK_FALSE, + .logicOp = VK_LOGIC_OP_COPY, + .attachmentCount = static_cast(cb_attachments.size()), + .pAttachments = cb_attachments.data(), + .blendConstants = {}, + }; + static_vector dynamic_states{ + VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, + VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS, + VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, + VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE, + }; + if (device.IsExtExtendedDynamicStateSupported()) { + static constexpr std::array extended{ + VK_DYNAMIC_STATE_CULL_MODE_EXT, + VK_DYNAMIC_STATE_FRONT_FACE_EXT, + VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT, + VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT, + VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT, + VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT, + VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE_EXT, + VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT, + VK_DYNAMIC_STATE_STENCIL_OP_EXT, + }; + dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end()); + } + const VkPipelineDynamicStateCreateInfo dynamic_state_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .dynamicStateCount = static_cast(dynamic_states.size()), + .pDynamicStates = dynamic_states.data(), + }; + const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, + .pNext = nullptr, + .requiredSubgroupSize = GuestWarpSize, + }; + static_vector shader_stages; + for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { + if (!spv_modules[stage]) { + continue; + } + [[maybe_unused]] auto& stage_ci = shader_stages.emplace_back(VkPipelineShaderStageCreateInfo{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = MaxwellToVK::ShaderStage(static_cast(stage)), + .module = *spv_modules[stage], + .pName = "main", + .pSpecializationInfo = nullptr, + }); + /* + if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(stage_ci.stage)) { + stage_ci.pNext = &subgroup_size_ci; + } + */ + } + pipeline = device.GetLogical().CreateGraphicsPipeline({ + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stageCount = static_cast(shader_stages.size()), + .pStages = shader_stages.data(), + .pVertexInputState = &vertex_input_ci, + .pInputAssemblyState = &input_assembly_ci, + .pTessellationState = &tessellation_ci, + .pViewportState = &viewport_ci, + .pRasterizationState = &rasterization_ci, + .pMultisampleState = &multisample_ci, + .pDepthStencilState = &depth_stencil_ci, + .pColorBlendState = &color_blend_ci, + .pDynamicState = &dynamic_state_ci, + .layout = *pipeline_layout, + .renderPass = render_pass, + .subpass = 0, + .basePipelineHandle = nullptr, + .basePipelineIndex = 0, + }); +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h new file mode 100644 index 000000000..ba1d34a83 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -0,0 +1,66 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "shader_recompiler/shader_info.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/renderer_vulkan/fixed_pipeline_state.h" +#include "video_core/renderer_vulkan/vk_descriptor_pool.h" +#include "video_core/renderer_vulkan/vk_texture_cache.h" +#include "video_core/renderer_vulkan/vk_buffer_cache.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" + +namespace Vulkan { + +class Device; +class RenderPassCache; +class VKScheduler; +class VKUpdateDescriptorQueue; + +class GraphicsPipeline { + static constexpr size_t NUM_STAGES = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage; + +public: + explicit GraphicsPipeline() = default; + explicit GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d, + Tegra::MemoryManager& gpu_memory, VKScheduler& scheduler, + BufferCache& buffer_cache, + TextureCache& texture_cache, const Device& device, VKDescriptorPool& descriptor_pool, + VKUpdateDescriptorQueue& update_descriptor_queue, + RenderPassCache& render_pass_cache, const FixedPipelineState& state, + std::array stages, + const std::array& infos); + + void Configure(bool is_indexed); + + GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = default; + GraphicsPipeline(GraphicsPipeline&&) noexcept = default; + + GraphicsPipeline& operator=(const GraphicsPipeline&) = delete; + GraphicsPipeline(const GraphicsPipeline&) = delete; + +private: + void MakePipeline(const Device& device, const FixedPipelineState& state, + VkRenderPass render_pass); + + Tegra::Engines::Maxwell3D* maxwell3d{}; + Tegra::MemoryManager* gpu_memory{}; + TextureCache* texture_cache{}; + BufferCache* buffer_cache{}; + VKScheduler* scheduler{}; + VKUpdateDescriptorQueue* update_descriptor_queue{}; + + std::array spv_modules; + std::array stage_infos; + vk::DescriptorSetLayout descriptor_set_layout; + DescriptorAllocator descriptor_allocator; + vk::PipelineLayout pipeline_layout; + vk::DescriptorUpdateTemplateKHR descriptor_update_template; + vk::Pipeline pipeline; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline.h b/src/video_core/renderer_vulkan/vk_pipeline.h deleted file mode 100644 index b06288403..000000000 --- a/src/video_core/renderer_vulkan/vk_pipeline.h +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include - -#include "video_core/vulkan_common/vulkan_wrapper.h" - -namespace Vulkan { - -class Pipeline { -public: - /// Add a reference count to the pipeline - void AddRef() noexcept { - ++ref_count; - } - - [[nodiscard]] bool RemoveRef() noexcept { - --ref_count; - return ref_count == 0; - } - - [[nodiscard]] u64 UsageTick() const noexcept { - return usage_tick; - } - -protected: - u64 usage_tick{}; - -private: - size_t ref_count{}; -}; - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 5477a2903..c9da2080d 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -12,8 +12,11 @@ #include "common/microprofile.h" #include "core/core.h" #include "core/memory.h" +#include "shader_recompiler/backend/spirv/emit_spirv.h" #include "shader_recompiler/environment.h" -#include "shader_recompiler/recompiler.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/frontend/maxwell/program.h" +#include "shader_recompiler/program_header.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/memory_manager.h" @@ -34,18 +37,18 @@ namespace Vulkan { MICROPROFILE_DECLARE(Vulkan_PipelineCache); -using Tegra::Engines::ShaderType; - namespace { -class Environment final : public Shader::Environment { +using Shader::Backend::SPIRV::EmitSPIRV; + +class GenericEnvironment : public Shader::Environment { public: - explicit Environment(Tegra::Engines::KeplerCompute& kepler_compute_, - Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_) - : kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, program_base{program_base_} {} + explicit GenericEnvironment() = default; + explicit GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_) + : gpu_memory{&gpu_memory_}, program_base{program_base_} {} - ~Environment() override = default; + ~GenericEnvironment() override = default; - [[nodiscard]] std::optional Analyze(u32 start_address) { + std::optional Analyze(u32 start_address) { const std::optional size{TryFindSize(start_address)}; if (!size) { return std::nullopt; @@ -55,52 +58,47 @@ public: return Common::CityHash128(reinterpret_cast(code.data()), code.size()); } - [[nodiscard]] size_t ShaderSize() const noexcept { + [[nodiscard]] size_t CachedSize() const noexcept { + return cached_highest - cached_lowest + INST_SIZE; + } + + [[nodiscard]] size_t ReadSize() const noexcept { return read_highest - read_lowest + INST_SIZE; } - [[nodiscard]] u128 ComputeHash() const { - const size_t size{ShaderSize()}; + [[nodiscard]] u128 CalculateHash() const { + const size_t size{ReadSize()}; auto data = std::make_unique(size); - gpu_memory.ReadBlock(program_base + read_lowest, data.get(), size); + gpu_memory->ReadBlock(program_base + read_lowest, data.get(), size); return Common::CityHash128(reinterpret_cast(data.get()), size); } - u64 ReadInstruction(u32 address) override { + u64 ReadInstruction(u32 address) final { read_lowest = std::min(read_lowest, address); read_highest = std::max(read_highest, address); if (address >= cached_lowest && address < cached_highest) { return code[address / INST_SIZE]; } - return gpu_memory.Read(program_base + address); - } - - u32 TextureBoundBuffer() override { - return kepler_compute.regs.tex_cb_index; - } - - std::array WorkgroupSize() override { - const auto& qmd{kepler_compute.launch_description}; - return {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; + return gpu_memory->Read(program_base + address); } -private: +protected: static constexpr size_t INST_SIZE = sizeof(u64); - static constexpr size_t BLOCK_SIZE = 0x1000; - static constexpr size_t MAXIMUM_SIZE = 0x100000; - static constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL; - static constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL; + std::optional TryFindSize(GPUVAddr guest_addr) { + constexpr size_t BLOCK_SIZE = 0x1000; + constexpr size_t MAXIMUM_SIZE = 0x100000; + + constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL; + constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL; - std::optional TryFindSize(u32 start_address) { - GPUVAddr guest_addr = program_base + start_address; size_t offset = 0; size_t size = BLOCK_SIZE; while (size <= MAXIMUM_SIZE) { code.resize(size / INST_SIZE); u64* const data = code.data() + offset / INST_SIZE; - gpu_memory.ReadBlock(guest_addr, data, BLOCK_SIZE); + gpu_memory->ReadBlock(guest_addr, data, BLOCK_SIZE); for (size_t i = 0; i < BLOCK_SIZE; i += INST_SIZE) { const u64 inst = data[i / INST_SIZE]; if (inst == SELF_BRANCH_A || inst == SELF_BRANCH_B) { @@ -114,17 +112,87 @@ private: return std::nullopt; } - Tegra::Engines::KeplerCompute& kepler_compute; - Tegra::MemoryManager& gpu_memory; - GPUVAddr program_base; + Tegra::MemoryManager* gpu_memory{}; + GPUVAddr program_base{}; + + std::vector code; - u32 read_lowest = 0; + u32 read_lowest = std::numeric_limits::max(); u32 read_highest = 0; - std::vector code; u32 cached_lowest = std::numeric_limits::max(); u32 cached_highest = 0; }; + +class GraphicsEnvironment final : public GenericEnvironment { +public: + explicit GraphicsEnvironment() = default; + explicit GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::MemoryManager& gpu_memory_, Maxwell::ShaderProgram program, + GPUVAddr program_base_, u32 start_offset) + : GenericEnvironment{gpu_memory_, program_base_}, maxwell3d{&maxwell3d_} { + gpu_memory->ReadBlock(program_base + start_offset, &sph, sizeof(sph)); + switch (program) { + case Maxwell::ShaderProgram::VertexA: + stage = Shader::Stage::VertexA; + break; + case Maxwell::ShaderProgram::VertexB: + stage = Shader::Stage::VertexB; + break; + case Maxwell::ShaderProgram::TesselationControl: + stage = Shader::Stage::TessellationControl; + break; + case Maxwell::ShaderProgram::TesselationEval: + stage = Shader::Stage::TessellationEval; + break; + case Maxwell::ShaderProgram::Geometry: + stage = Shader::Stage::Geometry; + break; + case Maxwell::ShaderProgram::Fragment: + stage = Shader::Stage::Fragment; + break; + default: + UNREACHABLE_MSG("Invalid program={}", program); + } + } + + ~GraphicsEnvironment() override = default; + + u32 TextureBoundBuffer() override { + return maxwell3d->regs.tex_cb_index; + } + + std::array WorkgroupSize() override { + throw Shader::LogicError("Requesting workgroup size in a graphics stage"); + } + +private: + Tegra::Engines::Maxwell3D* maxwell3d{}; +}; + +class ComputeEnvironment final : public GenericEnvironment { +public: + explicit ComputeEnvironment() = default; + explicit ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_, + Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_) + : GenericEnvironment{gpu_memory_, program_base_}, kepler_compute{&kepler_compute_} { + stage = Shader::Stage::Compute; + } + + ~ComputeEnvironment() override = default; + + u32 TextureBoundBuffer() override { + return kepler_compute->regs.tex_cb_index; + } + + std::array WorkgroupSize() override { + const auto& qmd{kepler_compute->launch_description}; + return {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; + } + +private: + Tegra::Engines::KeplerCompute* kepler_compute{}; +}; } // Anonymous namespace size_t ComputePipelineCacheKey::Hash() const noexcept { @@ -136,19 +204,67 @@ bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) con return std::memcmp(&rhs, this, sizeof *this) == 0; } +size_t GraphicsPipelineCacheKey::Hash() const noexcept { + const u64 hash = Common::CityHash64(reinterpret_cast(this), Size()); + return static_cast(hash); +} + +bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) const noexcept { + return std::memcmp(&rhs, this, Size()) == 0; +} + PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::Engines::KeplerCompute& kepler_compute_, Tegra::MemoryManager& gpu_memory_, const Device& device_, VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, - VKUpdateDescriptorQueue& update_descriptor_queue_) + VKUpdateDescriptorQueue& update_descriptor_queue_, + RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_, + TextureCache& texture_cache_) : VideoCommon::ShaderCache{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, - scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{ - update_descriptor_queue_} {} + scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, + update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, + buffer_cache{buffer_cache_}, texture_cache{texture_cache_} { + const auto& float_control{device.FloatControlProperties()}; + profile = Shader::Profile{ + .unified_descriptor_binding = true, + .support_float_controls = true, + .support_separate_denorm_behavior = float_control.denormBehaviorIndependence == + VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR, + .support_separate_rounding_mode = + float_control.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR, + .support_fp16_denorm_preserve = float_control.shaderDenormPreserveFloat16 != VK_FALSE, + .support_fp32_denorm_preserve = float_control.shaderDenormPreserveFloat32 != VK_FALSE, + .support_fp16_denorm_flush = float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE, + .support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE, + .support_fp16_signed_zero_nan_preserve = + float_control.shaderSignedZeroInfNanPreserveFloat16 != VK_FALSE, + .support_fp32_signed_zero_nan_preserve = + float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE, + .has_broken_spirv_clamp = true, // TODO: is_intel + }; +} PipelineCache::~PipelineCache() = default; +GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { + MICROPROFILE_SCOPE(Vulkan_PipelineCache); + + if (!RefreshStages()) { + return nullptr; + } + graphics_key.state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported()); + + const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)}; + auto& pipeline{pair->second}; + if (!is_new) { + return &pipeline; + } + pipeline = CreateGraphicsPipeline(); + return &pipeline; +} + ComputePipeline* PipelineCache::CurrentComputePipeline() { MICROPROFILE_SCOPE(Vulkan_PipelineCache); @@ -170,45 +286,130 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() { return &pipeline; } pipeline = CreateComputePipeline(shader); - shader->compute_users.push_back(key); return &pipeline; } +bool PipelineCache::RefreshStages() { + const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; + for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { + graphics_key.unique_hashes[index] = u128{}; + continue; + } + const auto& shader_config{maxwell3d.regs.shader_config[index]}; + const auto program{static_cast(index)}; + const GPUVAddr shader_addr{base_addr + shader_config.offset}; + const std::optional cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)}; + if (!cpu_shader_addr) { + LOG_ERROR(Render_Vulkan, "Invalid GPU address for shader 0x{:016x}", shader_addr); + return false; + } + const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)}; + if (!shader_info) { + const u32 offset{shader_config.offset}; + shader_info = MakeShaderInfo(program, base_addr, offset, *cpu_shader_addr); + } + graphics_key.unique_hashes[index] = shader_info->unique_hash; + } + return true; +} + +const ShaderInfo* PipelineCache::MakeShaderInfo(Maxwell::ShaderProgram program, GPUVAddr base_addr, + u32 start_address, VAddr cpu_addr) { + GraphicsEnvironment env{maxwell3d, gpu_memory, program, base_addr, start_address}; + auto info = std::make_unique(); + if (const std::optional cached_hash{env.Analyze(start_address)}) { + info->unique_hash = *cached_hash; + info->size_bytes = env.CachedSize(); + } else { + // Slow path, not really hit on commercial games + // Build a control flow graph to get the real shader size + flow_block_pool.ReleaseContents(); + Shader::Maxwell::Flow::CFG cfg{env, flow_block_pool, start_address}; + info->unique_hash = env.CalculateHash(); + info->size_bytes = env.ReadSize(); + } + const size_t size_bytes{info->size_bytes}; + const ShaderInfo* const result{info.get()}; + Register(std::move(info), cpu_addr, size_bytes); + return result; +} + +GraphicsPipeline PipelineCache::CreateGraphicsPipeline() { + flow_block_pool.ReleaseContents(); + inst_pool.ReleaseContents(); + block_pool.ReleaseContents(); + + std::array envs; + std::array programs; + + const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; + for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + if (graphics_key.unique_hashes[index] == u128{}) { + continue; + } + const auto program{static_cast(index)}; + GraphicsEnvironment& env{envs[index]}; + const u32 start_address{maxwell3d.regs.shader_config[index].offset}; + env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; + + const u32 cfg_offset = start_address + sizeof(Shader::ProgramHeader); + Shader::Maxwell::Flow::CFG cfg(env, flow_block_pool, cfg_offset); + programs[index] = Shader::Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg); + } + std::array infos{}; + std::array modules; + + u32 binding{0}; + for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + if (graphics_key.unique_hashes[index] == u128{}) { + continue; + } + UNIMPLEMENTED_IF(index == 0); + + GraphicsEnvironment& env{envs[index]}; + Shader::IR::Program& program{programs[index]}; + + const size_t stage_index{index - 1}; + infos[stage_index] = &program.info; + std::vector code{EmitSPIRV(profile, env, program, binding)}; + + FILE* file = fopen("D:\\shader.spv", "wb"); + fwrite(code.data(), 4, code.size(), file); + fclose(file); + std::system("spirv-cross --vulkan-semantics D:\\shader.spv"); + + modules[stage_index] = BuildShader(device, code); + } + return GraphicsPipeline(maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, device, + descriptor_pool, update_descriptor_queue, render_pass_cache, + graphics_key.state, std::move(modules), infos); +} + ComputePipeline PipelineCache::CreateComputePipeline(ShaderInfo* shader_info) { const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; const auto& qmd{kepler_compute.launch_description}; - Environment env{kepler_compute, gpu_memory, program_base}; + ComputeEnvironment env{kepler_compute, gpu_memory, program_base}; if (const std::optional cached_hash{env.Analyze(qmd.program_start)}) { // TODO: Load from cache } - const auto& float_control{device.FloatControlProperties()}; - const Shader::Profile profile{ - .unified_descriptor_binding = true, - .support_float_controls = true, - .support_separate_denorm_behavior = float_control.denormBehaviorIndependence == - VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR, - .support_separate_rounding_mode = - float_control.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR, - .support_fp16_denorm_preserve = float_control.shaderDenormPreserveFloat16 != VK_FALSE, - .support_fp32_denorm_preserve = float_control.shaderDenormPreserveFloat32 != VK_FALSE, - .support_fp16_denorm_flush = float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE, - .support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE, - .support_fp16_signed_zero_nan_preserve = - float_control.shaderSignedZeroInfNanPreserveFloat16 != VK_FALSE, - .support_fp32_signed_zero_nan_preserve = - float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE, - .has_broken_spirv_clamp = true, // TODO: is_intel - }; - const auto [info, code]{Shader::RecompileSPIRV(profile, env, qmd.program_start)}; + flow_block_pool.ReleaseContents(); + inst_pool.ReleaseContents(); + block_pool.ReleaseContents(); + + Shader::Maxwell::Flow::CFG cfg{env, flow_block_pool, qmd.program_start}; + Shader::IR::Program program{Shader::Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg)}; + u32 binding{0}; + std::vector code{EmitSPIRV(profile, env, program, binding)}; /* FILE* file = fopen("D:\\shader.spv", "wb"); fwrite(code.data(), 4, code.size(), file); fclose(file); std::system("spirv-dis D:\\shader.spv"); */ - shader_info->unique_hash = env.ComputeHash(); - shader_info->size_bytes = env.ShaderSize(); - return ComputePipeline{device, descriptor_pool, update_descriptor_queue, info, + shader_info->unique_hash = env.CalculateHash(); + shader_info->size_bytes = env.ReadSize(); + return ComputePipeline{device, descriptor_pool, update_descriptor_queue, program.info, BuildShader(device, code)}; } @@ -216,9 +417,6 @@ ComputePipeline* PipelineCache::CreateComputePipelineWithoutShader(VAddr shader_ ShaderInfo shader; ComputePipeline pipeline{CreateComputePipeline(&shader)}; const ComputePipelineCacheKey key{MakeComputePipelineKey(shader.unique_hash)}; - shader.compute_users.push_back(key); - pipeline.AddRef(); - const size_t size_bytes{shader.size_bytes}; Register(std::make_unique(std::move(shader)), shader_cpu_addr, size_bytes); return &compute_cache.emplace(key, std::move(pipeline)).first->second; @@ -233,18 +431,4 @@ ComputePipelineCacheKey PipelineCache::MakeComputePipelineKey(u128 unique_hash) }; } -void PipelineCache::OnShaderRemoval(ShaderInfo* shader) { - for (const ComputePipelineCacheKey& key : shader->compute_users) { - const auto it = compute_cache.find(key); - ASSERT(it != compute_cache.end()); - - Pipeline& pipeline = it->second; - if (pipeline.RemoveRef()) { - // Wait for the pipeline to be free of GPU usage before destroying it - scheduler.Wait(pipeline.UsageTick()); - compute_cache.erase(it); - } - } -} - } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index eb35abc27..60fb976df 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -12,11 +12,18 @@ #include #include -#include - #include "common/common_types.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/object_pool.h" +#include "shader_recompiler/profile.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" +#include "video_core/renderer_vulkan/vk_buffer_cache.h" +#include "video_core/renderer_vulkan/vk_compute_pipeline.h" +#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" +#include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/shader_cache.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -26,13 +33,6 @@ class System; namespace Vulkan { -class Device; -class RasterizerVulkan; -class ComputePipeline; -class VKDescriptorPool; -class VKScheduler; -class VKUpdateDescriptorQueue; - using Maxwell = Tegra::Engines::Maxwell3D::Regs; struct ComputePipelineCacheKey { @@ -52,6 +52,26 @@ static_assert(std::has_unique_object_representations_v) static_assert(std::is_trivially_copyable_v); static_assert(std::is_trivially_constructible_v); +struct GraphicsPipelineCacheKey { + std::array unique_hashes; + FixedPipelineState state; + + size_t Hash() const noexcept; + + bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept; + + bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept { + return !operator==(rhs); + } + + size_t Size() const noexcept { + return sizeof(unique_hashes) + state.Size(); + } +}; +static_assert(std::has_unique_object_representations_v); +static_assert(std::is_trivially_copyable_v); +static_assert(std::is_trivially_constructible_v); + } // namespace Vulkan namespace std { @@ -63,14 +83,28 @@ struct hash { } }; +template <> +struct hash { + size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept { + return k.Hash(); + } +}; + } // namespace std namespace Vulkan { +class ComputePipeline; +class Device; +class RasterizerVulkan; +class RenderPassCache; +class VKDescriptorPool; +class VKScheduler; +class VKUpdateDescriptorQueue; + struct ShaderInfo { u128 unique_hash{}; size_t size_bytes{}; - std::vector compute_users; }; class PipelineCache final : public VideoCommon::ShaderCache { @@ -80,15 +114,23 @@ public: Tegra::Engines::KeplerCompute& kepler_compute, Tegra::MemoryManager& gpu_memory, const Device& device, VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, - VKUpdateDescriptorQueue& update_descriptor_queue); + VKUpdateDescriptorQueue& update_descriptor_queue, + RenderPassCache& render_pass_cache, BufferCache& buffer_cache, + TextureCache& texture_cache); ~PipelineCache() override; - [[nodiscard]] ComputePipeline* CurrentComputePipeline(); + [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline(); -protected: - void OnShaderRemoval(ShaderInfo* shader) override; + [[nodiscard]] ComputePipeline* CurrentComputePipeline(); private: + bool RefreshStages(); + + const ShaderInfo* MakeShaderInfo(Maxwell::ShaderProgram program, GPUVAddr base_addr, + u32 start_address, VAddr cpu_addr); + + GraphicsPipeline CreateGraphicsPipeline(); + ComputePipeline CreateComputePipeline(ShaderInfo* shader); ComputePipeline* CreateComputePipelineWithoutShader(VAddr shader_cpu_addr); @@ -104,8 +146,20 @@ private: VKScheduler& scheduler; VKDescriptorPool& descriptor_pool; VKUpdateDescriptorQueue& update_descriptor_queue; + RenderPassCache& render_pass_cache; + BufferCache& buffer_cache; + TextureCache& texture_cache; + + GraphicsPipelineCacheKey graphics_key{}; std::unordered_map compute_cache; + std::unordered_map graphics_cache; + + Shader::ObjectPool inst_pool; + Shader::ObjectPool block_pool; + Shader::ObjectPool flow_block_pool; + + Shader::Profile profile; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index c94419d29..036b531b9 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -141,15 +141,18 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra blit_image(device, scheduler, state_tracker, descriptor_pool), astc_decoder_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue, memory_allocator), - texture_cache_runtime{device, scheduler, memory_allocator, - staging_pool, blit_image, astc_decoder_pass}, + render_pass_cache(device), texture_cache_runtime{device, scheduler, + memory_allocator, staging_pool, + blit_image, astc_decoder_pass, + render_pass_cache}, texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, update_descriptor_queue, descriptor_pool), buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler, - descriptor_pool, update_descriptor_queue), - query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{buffer_cache}, + descriptor_pool, update_descriptor_queue, render_pass_cache, buffer_cache, + texture_cache), + query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{ buffer_cache }, fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), wfi_event(device.GetLogical().CreateEvent()) { scheduler.SetQueryCache(query_cache); @@ -158,7 +161,39 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra RasterizerVulkan::~RasterizerVulkan() = default; void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { - UNREACHABLE_MSG("Rendering not implemented {} {}", is_indexed, is_instanced); + MICROPROFILE_SCOPE(Vulkan_Drawing); + + SCOPE_EXIT({ gpu.TickWork(); }); + FlushWork(); + + query_cache.UpdateCounters(); + + GraphicsPipeline* const pipeline{pipeline_cache.CurrentGraphicsPipeline()}; + if (!pipeline) { + return; + } + update_descriptor_queue.Acquire(); + std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; + pipeline->Configure(is_indexed); + + BeginTransformFeedback(); + + scheduler.RequestRenderpass(texture_cache.GetFramebuffer()); + UpdateDynamicStates(); + + const auto& regs{maxwell3d.regs}; + const u32 num_instances{maxwell3d.mme_draw.instance_count}; + const DrawParams draw_params{MakeDrawParams(regs, num_instances, is_instanced, is_indexed)}; + scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) { + if (draw_params.is_indexed) { + cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, 0, + draw_params.base_vertex, draw_params.base_instance); + } else { + cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances, + draw_params.base_vertex, draw_params.base_instance); + } + }); + EndTransformFeedback(); } void RasterizerVulkan::Clear() { @@ -487,13 +522,11 @@ void RasterizerVulkan::FlushWork() { if ((++draw_counter & 7) != 7) { return; } - if (draw_counter < DRAWS_TO_DISPATCH) { // Send recorded tasks to the worker thread scheduler.DispatchWork(); return; } - // Otherwise (every certain number of draws) flush execution. // This submits commands to the Vulkan driver. scheduler.Flush(); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 3fd03b915..88dbd753b 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -23,6 +23,7 @@ #include "video_core/renderer_vulkan/vk_fence_manager.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_query_cache.h" +#include "video_core/renderer_vulkan/vk_render_pass_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" @@ -148,6 +149,7 @@ private: VKUpdateDescriptorQueue update_descriptor_queue; BlitImageHelper blit_image; ASTCDecoderPass astc_decoder_pass; + RenderPassCache render_pass_cache; TextureCacheRuntime texture_cache_runtime; TextureCache texture_cache; diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp new file mode 100644 index 000000000..7e5ae43ea --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp @@ -0,0 +1,100 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include + +#include "video_core/renderer_vulkan/maxwell_to_vk.h" +#include "video_core/renderer_vulkan/vk_render_pass_cache.h" +#include "video_core/surface.h" +#include "video_core/vulkan_common/vulkan_device.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" + +namespace Vulkan { +namespace { +using VideoCore::Surface::PixelFormat; + +constexpr std::array ATTACHMENT_REFERENCES{ + VkAttachmentReference{0, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{1, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{2, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{3, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{4, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{5, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{6, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{7, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{8, VK_IMAGE_LAYOUT_GENERAL}, +}; + +VkAttachmentDescription AttachmentDescription(const Device& device, PixelFormat format, + VkSampleCountFlagBits samples) { + using MaxwellToVK::SurfaceFormat; + return { + .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT, + .format = SurfaceFormat(device, FormatType::Optimal, true, format).format, + .samples = samples, + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE, + .initialLayout = VK_IMAGE_LAYOUT_GENERAL, + .finalLayout = VK_IMAGE_LAYOUT_GENERAL, + }; +} +} // Anonymous namespace + +RenderPassCache::RenderPassCache(const Device& device_) : device{&device_} {} + +VkRenderPass RenderPassCache::Get(const RenderPassKey& key) { + const auto [pair, is_new] = cache.try_emplace(key); + if (!is_new) { + return *pair->second; + } + boost::container::static_vector descriptions; + u32 num_images{0}; + + for (size_t index = 0; index < key.color_formats.size(); ++index) { + const PixelFormat format{key.color_formats[index]}; + if (format == PixelFormat::Invalid) { + continue; + } + descriptions.push_back(AttachmentDescription(*device, format, key.samples)); + ++num_images; + } + const size_t num_colors{descriptions.size()}; + const VkAttachmentReference* depth_attachment{}; + if (key.depth_format != PixelFormat::Invalid) { + depth_attachment = &ATTACHMENT_REFERENCES[num_colors]; + descriptions.push_back(AttachmentDescription(*device, key.depth_format, key.samples)); + } + const VkSubpassDescription subpass{ + .flags = 0, + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .inputAttachmentCount = 0, + .pInputAttachments = nullptr, + .colorAttachmentCount = static_cast(num_colors), + .pColorAttachments = num_colors != 0 ? ATTACHMENT_REFERENCES.data() : nullptr, + .pResolveAttachments = nullptr, + .pDepthStencilAttachment = depth_attachment, + .preserveAttachmentCount = 0, + .pPreserveAttachments = nullptr, + }; + pair->second = device->GetLogical().CreateRenderPass({ + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .attachmentCount = static_cast(descriptions.size()), + .pAttachments = descriptions.data(), + .subpassCount = 1, + .pSubpasses = &subpass, + .dependencyCount = 0, + .pDependencies = nullptr, + }); + return *pair->second; +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.h b/src/video_core/renderer_vulkan/vk_render_pass_cache.h new file mode 100644 index 000000000..db8e83f1a --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.h @@ -0,0 +1,53 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "video_core/surface.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" + +namespace Vulkan { + +struct RenderPassKey { + auto operator<=>(const RenderPassKey&) const noexcept = default; + + std::array color_formats; + VideoCore::Surface::PixelFormat depth_format; + VkSampleCountFlagBits samples; +}; + +} // namespace Vulkan + +namespace std { +template <> +struct hash { + [[nodiscard]] size_t operator()(const Vulkan::RenderPassKey& key) const noexcept { + size_t value = static_cast(key.depth_format) << 48; + value ^= static_cast(key.samples) << 52; + for (size_t i = 0; i < key.color_formats.size(); ++i) { + value ^= static_cast(key.color_formats[i]) << (i * 6); + } + return value; + } +}; +} // namespace std + +namespace Vulkan { + + class Device; + +class RenderPassCache { +public: + explicit RenderPassCache(const Device& device_); + + VkRenderPass Get(const RenderPassKey& key); + +private: + const Device* device{}; + std::unordered_map cache; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 88ccf96f5..1bbc542a1 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -18,6 +18,7 @@ #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" +#include "video_core/renderer_vulkan/vk_render_pass_cache.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -34,19 +35,6 @@ using VideoCommon::SubresourceRange; using VideoCore::Surface::IsPixelFormatASTC; namespace { - -constexpr std::array ATTACHMENT_REFERENCES{ - VkAttachmentReference{0, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{1, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{2, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{3, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{4, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{5, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{6, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{7, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{8, VK_IMAGE_LAYOUT_GENERAL}, -}; - constexpr VkBorderColor ConvertBorderColor(const std::array& color) { if (color == std::array{0, 0, 0, 0}) { return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; @@ -226,23 +214,6 @@ constexpr VkBorderColor ConvertBorderColor(const std::array& color) { } } -[[nodiscard]] VkAttachmentDescription AttachmentDescription(const Device& device, - const ImageView* image_view) { - using MaxwellToVK::SurfaceFormat; - const PixelFormat pixel_format = image_view->format; - return VkAttachmentDescription{ - .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT, - .format = SurfaceFormat(device, FormatType::Optimal, true, pixel_format).format, - .samples = image_view->Samples(), - .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, - .storeOp = VK_ATTACHMENT_STORE_OP_STORE, - .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD, - .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE, - .initialLayout = VK_IMAGE_LAYOUT_GENERAL, - .finalLayout = VK_IMAGE_LAYOUT_GENERAL, - }; -} - [[nodiscard]] VkComponentSwizzle ComponentSwizzle(SwizzleSource swizzle) { switch (swizzle) { case SwizzleSource::Zero: @@ -1164,7 +1135,6 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& t Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span color_buffers, ImageView* depth_buffer, const VideoCommon::RenderTargets& key) { - std::vector descriptions; std::vector attachments; RenderPassKey renderpass_key{}; s32 num_layers = 1; @@ -1175,7 +1145,6 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::spanRenderTarget()); renderpass_key.color_formats[index] = color_buffer->format; num_layers = std::max(num_layers, color_buffer->range.extent.layers); @@ -1185,10 +1154,7 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::spanRenderTarget()); renderpass_key.depth_format = depth_buffer->format; num_layers = std::max(num_layers, depth_buffer->range.extent.layers); @@ -1201,40 +1167,14 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span(num_colors), - .pColorAttachments = num_colors != 0 ? ATTACHMENT_REFERENCES.data() : nullptr, - .pResolveAttachments = nullptr, - .pDepthStencilAttachment = depth_attachment, - .preserveAttachmentCount = 0, - .pPreserveAttachments = nullptr, - }; - cache_pair->second = device.CreateRenderPass(VkRenderPassCreateInfo{ - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .attachmentCount = static_cast(descriptions.size()), - .pAttachments = descriptions.data(), - .subpassCount = 1, - .pSubpasses = &subpass, - .dependencyCount = 0, - .pDependencies = nullptr, - }); - } - renderpass = *cache_pair->second; + renderpass = runtime.render_pass_cache.Get(renderpass_key); + render_area = VkExtent2D{ .width = key.size.width, .height = key.size.height, }; num_color_buffers = static_cast(num_colors); - framebuffer = device.CreateFramebuffer(VkFramebufferCreateInfo{ + framebuffer = runtime.device.GetLogical().CreateFramebuffer({ .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, .pNext = nullptr, .flags = 0, diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 172bcdf98..189ee5a68 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -26,35 +26,10 @@ class Device; class Image; class ImageView; class Framebuffer; +class RenderPassCache; class StagingBufferPool; class VKScheduler; -struct RenderPassKey { - constexpr auto operator<=>(const RenderPassKey&) const noexcept = default; - - std::array color_formats; - PixelFormat depth_format; - VkSampleCountFlagBits samples; -}; - -} // namespace Vulkan - -namespace std { -template <> -struct hash { - [[nodiscard]] constexpr size_t operator()(const Vulkan::RenderPassKey& key) const noexcept { - size_t value = static_cast(key.depth_format) << 48; - value ^= static_cast(key.samples) << 52; - for (size_t i = 0; i < key.color_formats.size(); ++i) { - value ^= static_cast(key.color_formats[i]) << (i * 6); - } - return value; - } -}; -} // namespace std - -namespace Vulkan { - struct TextureCacheRuntime { const Device& device; VKScheduler& scheduler; @@ -62,7 +37,7 @@ struct TextureCacheRuntime { StagingBufferPool& staging_buffer_pool; BlitImageHelper& blit_image_helper; ASTCDecoderPass& astc_decoder_pass; - std::unordered_map renderpass_cache{}; + RenderPassCache& render_pass_cache; void Finish(); diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 4887d6fd9..f0e5b098c 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -49,6 +49,7 @@ constexpr std::array REQUIRED_EXTENSIONS{ VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, VK_EXT_ROBUSTNESS_2_EXTENSION_NAME, VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME, + VK_EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION_EXTENSION_NAME, #ifdef _WIN32 VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, #endif @@ -312,6 +313,13 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR }; SetNext(next, host_query_reset); + VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT demote{ + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT, + .pNext = nullptr, + .shaderDemoteToHelperInvocation = true, + }; + SetNext(next, demote); + VkPhysicalDeviceFloat16Int8FeaturesKHR float16_int8; if (is_float16_supported) { float16_int8 = { @@ -597,8 +605,14 @@ void Device::CheckSuitability(bool requires_swapchain) const { throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT); } } + VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT demote{}; + demote.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT; + demote.pNext = nullptr; + VkPhysicalDeviceRobustness2FeaturesEXT robustness2{}; robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT; + robustness2.pNext = &demote; VkPhysicalDeviceFeatures2KHR features2{}; features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; @@ -625,6 +639,7 @@ void Device::CheckSuitability(bool requires_swapchain) const { std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"), std::make_pair(features.shaderStorageImageWriteWithoutFormat, "shaderStorageImageWriteWithoutFormat"), + std::make_pair(demote.shaderDemoteToHelperInvocation, "shaderDemoteToHelperInvocation"), std::make_pair(robustness2.robustBufferAccess2, "robustBufferAccess2"), std::make_pair(robustness2.robustImageAccess2, "robustImageAccess2"), std::make_pair(robustness2.nullDescriptor, "nullDescriptor"), -- cgit v1.2.3 From f91859efd259995806c2944f7941b105b58300d3 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 20 Mar 2021 05:04:12 -0300 Subject: shader: Implement I2F --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index c9da2080d..d1399a46d 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -227,6 +227,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_}, texture_cache{texture_cache_} { const auto& float_control{device.FloatControlProperties()}; + const VkDriverIdKHR driver_id{device.GetDriverID()}; profile = Shader::Profile{ .unified_descriptor_binding = true, .support_float_controls = true, @@ -242,7 +243,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, float_control.shaderSignedZeroInfNanPreserveFloat16 != VK_FALSE, .support_fp32_signed_zero_nan_preserve = float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE, - .has_broken_spirv_clamp = true, // TODO: is_intel + .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, }; } -- cgit v1.2.3 From 76c8a962ac4eae77e71d66a72c448930240339f9 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 20 Mar 2021 19:11:56 -0300 Subject: spirv: Implement VertexId and InstanceId, refactor code --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index d1399a46d..90e1a30f6 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -230,6 +230,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, const VkDriverIdKHR driver_id{device.GetDriverID()}; profile = Shader::Profile{ .unified_descriptor_binding = true, + .support_vertex_instance_id = false, .support_float_controls = true, .support_separate_denorm_behavior = float_control.denormBehaviorIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR, -- cgit v1.2.3 From e4e1cc11b8f7649171fe922b2899e57120bfba53 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 21 Mar 2021 19:28:37 -0400 Subject: shader: Implement DMNMX, DSET, DSETP --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 90e1a30f6..75f7c1e61 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -244,6 +244,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, float_control.shaderSignedZeroInfNanPreserveFloat16 != VK_FALSE, .support_fp32_signed_zero_nan_preserve = float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE, + .support_fp64_signed_zero_nan_preserve = + float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE, .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, }; } -- cgit v1.2.3 From c63cf4fa2e22538a01c191e1f97ac0f93b67e804 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 22 Mar 2021 21:03:20 -0300 Subject: vk_pipeline_cache: Add pipeline cache --- .../renderer_vulkan/vk_pipeline_cache.cpp | 391 ++++++++++++++++----- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 34 +- .../renderer_vulkan/vk_render_pass_cache.cpp | 1 + .../renderer_vulkan/vk_render_pass_cache.h | 4 +- 4 files changed, 332 insertions(+), 98 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 75f7c1e61..41fc9588f 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -4,12 +4,15 @@ #include #include +#include #include #include #include "common/bit_cast.h" #include "common/cityhash.h" +#include "common/file_util.h" #include "common/microprofile.h" +#include "common/thread_worker.h" #include "core/core.h" #include "core/memory.h" #include "shader_recompiler/backend/spirv/emit_spirv.h" @@ -37,18 +40,23 @@ namespace Vulkan { MICROPROFILE_DECLARE(Vulkan_PipelineCache); -namespace { -using Shader::Backend::SPIRV::EmitSPIRV; +template +auto MakeSpan(Container& container) { + return std::span(container.data(), container.size()); +} class GenericEnvironment : public Shader::Environment { public: explicit GenericEnvironment() = default; - explicit GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_) - : gpu_memory{&gpu_memory_}, program_base{program_base_} {} + explicit GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, + u32 start_address_) + : gpu_memory{&gpu_memory_}, program_base{program_base_} { + start_address = start_address_; + } ~GenericEnvironment() override = default; - std::optional Analyze(u32 start_address) { + std::optional Analyze() { const std::optional size{TryFindSize(start_address)}; if (!size) { return std::nullopt; @@ -66,11 +74,15 @@ public: return read_highest - read_lowest + INST_SIZE; } + [[nodiscard]] bool CanBeSerialized() const noexcept { + return has_unbound_instructions; + } + [[nodiscard]] u128 CalculateHash() const { const size_t size{ReadSize()}; - auto data = std::make_unique(size); + const auto data{std::make_unique(size)}; gpu_memory->ReadBlock(program_base + read_lowest, data.get(), size); - return Common::CityHash128(reinterpret_cast(data.get()), size); + return Common::CityHash128(data.get(), size); } u64 ReadInstruction(u32 address) final { @@ -80,9 +92,32 @@ public: if (address >= cached_lowest && address < cached_highest) { return code[address / INST_SIZE]; } + has_unbound_instructions = true; return gpu_memory->Read(program_base + address); } + void Serialize(std::ofstream& file) const { + const u64 code_size{static_cast(ReadSize())}; + const auto data{std::make_unique(code_size)}; + gpu_memory->ReadBlock(program_base + read_lowest, data.get(), code_size); + + const u32 texture_bound{TextureBoundBuffer()}; + + file.write(reinterpret_cast(&code_size), sizeof(code_size)) + .write(reinterpret_cast(&texture_bound), sizeof(texture_bound)) + .write(reinterpret_cast(&start_address), sizeof(start_address)) + .write(reinterpret_cast(&read_lowest), sizeof(read_lowest)) + .write(reinterpret_cast(&read_highest), sizeof(read_highest)) + .write(reinterpret_cast(&stage), sizeof(stage)) + .write(data.get(), code_size); + if (stage == Shader::Stage::Compute) { + const std::array workgroup_size{WorkgroupSize()}; + file.write(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)); + } else { + file.write(reinterpret_cast(&sph), sizeof(sph)); + } + } + protected: static constexpr size_t INST_SIZE = sizeof(u64); @@ -122,16 +157,22 @@ protected: u32 cached_lowest = std::numeric_limits::max(); u32 cached_highest = 0; + + bool has_unbound_instructions = false; }; +namespace { +using Shader::Backend::SPIRV::EmitSPIRV; +using Shader::Maxwell::TranslateProgram; + class GraphicsEnvironment final : public GenericEnvironment { public: explicit GraphicsEnvironment() = default; explicit GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_, Maxwell::ShaderProgram program, - GPUVAddr program_base_, u32 start_offset) - : GenericEnvironment{gpu_memory_, program_base_}, maxwell3d{&maxwell3d_} { - gpu_memory->ReadBlock(program_base + start_offset, &sph, sizeof(sph)); + GPUVAddr program_base_, u32 start_address_) + : GenericEnvironment{gpu_memory_, program_base_, start_address_}, maxwell3d{&maxwell3d_} { + gpu_memory->ReadBlock(program_base + start_address, &sph, sizeof(sph)); switch (program) { case Maxwell::ShaderProgram::VertexA: stage = Shader::Stage::VertexA; @@ -158,11 +199,11 @@ public: ~GraphicsEnvironment() override = default; - u32 TextureBoundBuffer() override { + u32 TextureBoundBuffer() const override { return maxwell3d->regs.tex_cb_index; } - std::array WorkgroupSize() override { + std::array WorkgroupSize() const override { throw Shader::LogicError("Requesting workgroup size in a graphics stage"); } @@ -174,18 +215,20 @@ class ComputeEnvironment final : public GenericEnvironment { public: explicit ComputeEnvironment() = default; explicit ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_, - Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_) - : GenericEnvironment{gpu_memory_, program_base_}, kepler_compute{&kepler_compute_} { + Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, + u32 start_address_) + : GenericEnvironment{gpu_memory_, program_base_, start_address_}, kepler_compute{ + &kepler_compute_} { stage = Shader::Stage::Compute; } ~ComputeEnvironment() override = default; - u32 TextureBoundBuffer() override { + u32 TextureBoundBuffer() const override { return kepler_compute->regs.tex_cb_index; } - std::array WorkgroupSize() override { + std::array WorkgroupSize() const override { const auto& qmd{kepler_compute->launch_description}; return {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; } @@ -193,8 +236,174 @@ public: private: Tegra::Engines::KeplerCompute* kepler_compute{}; }; + +void SerializePipeline(std::span key, std::span envs, + std::ofstream& file) { + if (!std::ranges::all_of(envs, &GenericEnvironment::CanBeSerialized)) { + return; + } + const u32 num_envs{static_cast(envs.size())}; + file.write(reinterpret_cast(&num_envs), sizeof(num_envs)); + for (const GenericEnvironment* const env : envs) { + env->Serialize(file); + } + file.write(key.data(), key.size_bytes()); +} + +template +void SerializePipeline(const Key& key, const Envs& envs, const std::string& filename) { + try { + std::ofstream file; + file.exceptions(std::ifstream::failbit); + Common::FS::OpenFStream(file, filename, std::ios::binary | std::ios::app); + if (!file.is_open()) { + LOG_ERROR(Common_Filesystem, "Failed to open pipeline cache file {}", filename); + return; + } + if (file.tellp() == 0) { + // Write header... + } + const std::span key_span(reinterpret_cast(&key), sizeof(key)); + SerializePipeline(key_span, MakeSpan(envs), file); + + } catch (const std::ios_base::failure& e) { + LOG_ERROR(Common_Filesystem, "{}", e.what()); + if (!Common::FS::Delete(filename)) { + LOG_ERROR(Common_Filesystem, "Failed to delete pipeline cache file {}", filename); + } + } +} + +class FileEnvironment final : public Shader::Environment { +public: + void Deserialize(std::ifstream& file) { + u64 code_size{}; + file.read(reinterpret_cast(&code_size), sizeof(code_size)) + .read(reinterpret_cast(&texture_bound), sizeof(texture_bound)) + .read(reinterpret_cast(&start_address), sizeof(start_address)) + .read(reinterpret_cast(&read_lowest), sizeof(read_lowest)) + .read(reinterpret_cast(&read_highest), sizeof(read_highest)) + .read(reinterpret_cast(&stage), sizeof(stage)); + code = std::make_unique(Common::DivCeil(code_size, sizeof(u64))); + file.read(reinterpret_cast(code.get()), code_size); + if (stage == Shader::Stage::Compute) { + file.read(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)); + } else { + file.read(reinterpret_cast(&sph), sizeof(sph)); + } + } + + u64 ReadInstruction(u32 address) override { + if (address < read_lowest || address > read_highest) { + throw Shader::LogicError("Out of bounds address {}", address); + } + return code[(address - read_lowest) / sizeof(u64)]; + } + + u32 TextureBoundBuffer() const override { + return texture_bound; + } + + std::array WorkgroupSize() const override { + return workgroup_size; + } + +private: + std::unique_ptr code; + std::array workgroup_size{}; + u32 texture_bound{}; + u32 read_lowest{}; + u32 read_highest{}; +}; } // Anonymous namespace +void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, + const VideoCore::DiskResourceLoadCallback& callback) { + if (title_id == 0) { + return; + } + std::string shader_dir{Common::FS::GetUserPath(Common::FS::UserPath::ShaderDir)}; + std::string base_dir{shader_dir + "/vulkan"}; + std::string transferable_dir{base_dir + "/transferable"}; + std::string precompiled_dir{base_dir + "/precompiled"}; + if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir) || + !Common::FS::CreateDir(transferable_dir) || !Common::FS::CreateDir(precompiled_dir)) { + LOG_ERROR(Common_Filesystem, "Failed to create pipeline cache directories"); + return; + } + pipeline_cache_filename = fmt::format("{}/{:016x}.bin", transferable_dir, title_id); + + Common::ThreadWorker worker(11, "PipelineBuilder"); + std::mutex cache_mutex; + struct { + size_t total{0}; + size_t built{0}; + bool has_loaded{false}; + } state; + + std::ifstream file; + Common::FS::OpenFStream(file, pipeline_cache_filename, std::ios::binary | std::ios::ate); + if (!file.is_open()) { + return; + } + file.exceptions(std::ifstream::failbit); + const auto end{file.tellg()}; + file.seekg(0, std::ios::beg); + // Read header... + + while (file.tellg() != end) { + if (stop_loading) { + return; + } + u32 num_envs{}; + file.read(reinterpret_cast(&num_envs), sizeof(num_envs)); + auto envs{std::make_shared>(num_envs)}; + for (FileEnvironment& env : *envs) { + env.Deserialize(file); + } + if (envs->front().ShaderStage() == Shader::Stage::Compute) { + ComputePipelineCacheKey key; + file.read(reinterpret_cast(&key), sizeof(key)); + + worker.QueueWork([this, key, envs, &cache_mutex, &state, &callback] { + ShaderPools pools; + ComputePipeline pipeline{CreateComputePipeline(pools, key, envs->front())}; + + std::lock_guard lock{cache_mutex}; + compute_cache.emplace(key, std::move(pipeline)); + if (state.has_loaded) { + callback(VideoCore::LoadCallbackStage::Build, ++state.built, state.total); + } + }); + } else { + GraphicsPipelineCacheKey key; + file.read(reinterpret_cast(&key), sizeof(key)); + + worker.QueueWork([this, key, envs, &cache_mutex, &state, &callback] { + ShaderPools pools; + boost::container::static_vector env_ptrs; + for (auto& env : *envs) { + env_ptrs.push_back(&env); + } + GraphicsPipeline pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs))}; + + std::lock_guard lock{cache_mutex}; + graphics_cache.emplace(key, std::move(pipeline)); + if (state.has_loaded) { + callback(VideoCore::LoadCallbackStage::Build, ++state.built, state.total); + } + }); + } + ++state.total; + } + { + std::lock_guard lock{cache_mutex}; + callback(VideoCore::LoadCallbackStage::Build, 0, state.total); + state.has_loaded = true; + } + worker.WaitForRequests(); +} + size_t ComputePipelineCacheKey::Hash() const noexcept { const u64 hash = Common::CityHash64(reinterpret_cast(this), sizeof *this); return static_cast(hash); @@ -279,17 +488,22 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() { if (!cpu_shader_addr) { return nullptr; } - ShaderInfo* const shader{TryGet(*cpu_shader_addr)}; + const ShaderInfo* shader{TryGet(*cpu_shader_addr)}; if (!shader) { - return CreateComputePipelineWithoutShader(*cpu_shader_addr); + ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start}; + shader = MakeShaderInfo(env, *cpu_shader_addr); } - const ComputePipelineCacheKey key{MakeComputePipelineKey(shader->unique_hash)}; + const ComputePipelineCacheKey key{ + .unique_hash = shader->unique_hash, + .shared_memory_size = qmd.shared_alloc, + .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}, + }; const auto [pair, is_new]{compute_cache.try_emplace(key)}; auto& pipeline{pair->second}; if (!is_new) { return &pipeline; } - pipeline = CreateComputePipeline(shader); + pipeline = CreateComputePipeline(key, shader); return &pipeline; } @@ -310,26 +524,25 @@ bool PipelineCache::RefreshStages() { } const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)}; if (!shader_info) { - const u32 offset{shader_config.offset}; - shader_info = MakeShaderInfo(program, base_addr, offset, *cpu_shader_addr); + const u32 start_address{shader_config.offset}; + GraphicsEnvironment env{maxwell3d, gpu_memory, program, base_addr, start_address}; + shader_info = MakeShaderInfo(env, *cpu_shader_addr); } graphics_key.unique_hashes[index] = shader_info->unique_hash; } return true; } -const ShaderInfo* PipelineCache::MakeShaderInfo(Maxwell::ShaderProgram program, GPUVAddr base_addr, - u32 start_address, VAddr cpu_addr) { - GraphicsEnvironment env{maxwell3d, gpu_memory, program, base_addr, start_address}; +const ShaderInfo* PipelineCache::MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr) { auto info = std::make_unique(); - if (const std::optional cached_hash{env.Analyze(start_address)}) { + if (const std::optional cached_hash{env.Analyze()}) { info->unique_hash = *cached_hash; info->size_bytes = env.CachedSize(); } else { // Slow path, not really hit on commercial games // Build a control flow graph to get the real shader size - flow_block_pool.ReleaseContents(); - Shader::Maxwell::Flow::CFG cfg{env, flow_block_pool, start_address}; + main_pools.flow_block.ReleaseContents(); + Shader::Maxwell::Flow::CFG cfg{env, main_pools.flow_block, env.StartAddress()}; info->unique_hash = env.CalculateHash(); info->size_bytes = env.ReadSize(); } @@ -339,100 +552,100 @@ const ShaderInfo* PipelineCache::MakeShaderInfo(Maxwell::ShaderProgram program, return result; } -GraphicsPipeline PipelineCache::CreateGraphicsPipeline() { - flow_block_pool.ReleaseContents(); - inst_pool.ReleaseContents(); - block_pool.ReleaseContents(); - - std::array envs; +GraphicsPipeline PipelineCache::CreateGraphicsPipeline(ShaderPools& pools, + const GraphicsPipelineCacheKey& key, + std::span envs) { + LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); + size_t env_index{0}; std::array programs; - - const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - if (graphics_key.unique_hashes[index] == u128{}) { + if (key.unique_hashes[index] == u128{}) { continue; } - const auto program{static_cast(index)}; - GraphicsEnvironment& env{envs[index]}; - const u32 start_address{maxwell3d.regs.shader_config[index].offset}; - env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; + Shader::Environment& env{*envs[env_index]}; + ++env_index; - const u32 cfg_offset = start_address + sizeof(Shader::ProgramHeader); - Shader::Maxwell::Flow::CFG cfg(env, flow_block_pool, cfg_offset); - programs[index] = Shader::Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg); + const u32 cfg_offset{env.StartAddress() + sizeof(Shader::ProgramHeader)}; + Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset); + programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); } std::array infos{}; std::array modules; u32 binding{0}; + env_index = 0; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - if (graphics_key.unique_hashes[index] == u128{}) { + if (key.unique_hashes[index] == u128{}) { continue; } UNIMPLEMENTED_IF(index == 0); - GraphicsEnvironment& env{envs[index]}; Shader::IR::Program& program{programs[index]}; - const size_t stage_index{index - 1}; infos[stage_index] = &program.info; - std::vector code{EmitSPIRV(profile, env, program, binding)}; - FILE* file = fopen("D:\\shader.spv", "wb"); - fwrite(code.data(), 4, code.size(), file); - fclose(file); - std::system("spirv-cross --vulkan-semantics D:\\shader.spv"); + Shader::Environment& env{*envs[env_index]}; + ++env_index; + const std::vector code{EmitSPIRV(profile, env, program, binding)}; modules[stage_index] = BuildShader(device, code); } return GraphicsPipeline(maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, device, - descriptor_pool, update_descriptor_queue, render_pass_cache, - graphics_key.state, std::move(modules), infos); + descriptor_pool, update_descriptor_queue, render_pass_cache, key.state, + std::move(modules), infos); } -ComputePipeline PipelineCache::CreateComputePipeline(ShaderInfo* shader_info) { +GraphicsPipeline PipelineCache::CreateGraphicsPipeline() { + main_pools.ReleaseContents(); + + std::array graphics_envs; + boost::container::static_vector generic_envs; + boost::container::static_vector envs; + + const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; + for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + if (graphics_key.unique_hashes[index] == u128{}) { + continue; + } + const auto program{static_cast(index)}; + GraphicsEnvironment& env{graphics_envs[index]}; + const u32 start_address{maxwell3d.regs.shader_config[index].offset}; + env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; + generic_envs.push_back(&env); + envs.push_back(&env); + } + GraphicsPipeline pipeline{CreateGraphicsPipeline(main_pools, graphics_key, MakeSpan(envs))}; + if (!pipeline_cache_filename.empty()) { + SerializePipeline(graphics_key, generic_envs, pipeline_cache_filename); + } + return pipeline; +} + +ComputePipeline PipelineCache::CreateComputePipeline(const ComputePipelineCacheKey& key, + const ShaderInfo* shader) { const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; const auto& qmd{kepler_compute.launch_description}; - ComputeEnvironment env{kepler_compute, gpu_memory, program_base}; - if (const std::optional cached_hash{env.Analyze(qmd.program_start)}) { - // TODO: Load from cache + ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start}; + main_pools.ReleaseContents(); + ComputePipeline pipeline{CreateComputePipeline(main_pools, key, env)}; + if (!pipeline_cache_filename.empty()) { + SerializePipeline(key, std::array{&env}, + pipeline_cache_filename); } - flow_block_pool.ReleaseContents(); - inst_pool.ReleaseContents(); - block_pool.ReleaseContents(); + return pipeline; +} + +ComputePipeline PipelineCache::CreateComputePipeline(ShaderPools& pools, + const ComputePipelineCacheKey& key, + Shader::Environment& env) const { + LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); - Shader::Maxwell::Flow::CFG cfg{env, flow_block_pool, qmd.program_start}; - Shader::IR::Program program{Shader::Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg)}; + Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; + Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; u32 binding{0}; std::vector code{EmitSPIRV(profile, env, program, binding)}; - /* - FILE* file = fopen("D:\\shader.spv", "wb"); - fwrite(code.data(), 4, code.size(), file); - fclose(file); - std::system("spirv-dis D:\\shader.spv"); - */ - shader_info->unique_hash = env.CalculateHash(); - shader_info->size_bytes = env.ReadSize(); return ComputePipeline{device, descriptor_pool, update_descriptor_queue, program.info, BuildShader(device, code)}; } -ComputePipeline* PipelineCache::CreateComputePipelineWithoutShader(VAddr shader_cpu_addr) { - ShaderInfo shader; - ComputePipeline pipeline{CreateComputePipeline(&shader)}; - const ComputePipelineCacheKey key{MakeComputePipelineKey(shader.unique_hash)}; - const size_t size_bytes{shader.size_bytes}; - Register(std::make_unique(std::move(shader)), shader_cpu_addr, size_bytes); - return &compute_cache.emplace(key, std::move(pipeline)).first->second; -} - -ComputePipelineCacheKey PipelineCache::MakeComputePipelineKey(u128 unique_hash) const { - const auto& qmd{kepler_compute.launch_description}; - return { - .unique_hash = unique_hash, - .shared_memory_size = qmd.shared_alloc, - .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}, - }; -} - } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 60fb976df..2ecb68bdc 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -96,6 +97,7 @@ namespace Vulkan { class ComputePipeline; class Device; +class GenericEnvironment; class RasterizerVulkan; class RenderPassCache; class VKDescriptorPool; @@ -107,6 +109,18 @@ struct ShaderInfo { size_t size_bytes{}; }; +struct ShaderPools { + void ReleaseContents() { + inst.ReleaseContents(); + block.ReleaseContents(); + flow_block.ReleaseContents(); + } + + Shader::ObjectPool inst; + Shader::ObjectPool block; + Shader::ObjectPool flow_block; +}; + class PipelineCache final : public VideoCommon::ShaderCache { public: explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu, @@ -123,19 +137,24 @@ public: [[nodiscard]] ComputePipeline* CurrentComputePipeline(); + void LoadDiskResources(u64 title_id, std::stop_token stop_loading, + const VideoCore::DiskResourceLoadCallback& callback); + private: bool RefreshStages(); - const ShaderInfo* MakeShaderInfo(Maxwell::ShaderProgram program, GPUVAddr base_addr, - u32 start_address, VAddr cpu_addr); + const ShaderInfo* MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr); GraphicsPipeline CreateGraphicsPipeline(); - ComputePipeline CreateComputePipeline(ShaderInfo* shader); + GraphicsPipeline CreateGraphicsPipeline(ShaderPools& pools, const GraphicsPipelineCacheKey& key, + std::span envs); - ComputePipeline* CreateComputePipelineWithoutShader(VAddr shader_cpu_addr); + ComputePipeline CreateComputePipeline(const ComputePipelineCacheKey& key, + const ShaderInfo* shader); - ComputePipelineCacheKey MakeComputePipelineKey(u128 unique_hash) const; + ComputePipeline CreateComputePipeline(ShaderPools& pools, const ComputePipelineCacheKey& key, + Shader::Environment& env) const; Tegra::GPU& gpu; Tegra::Engines::Maxwell3D& maxwell3d; @@ -155,11 +174,10 @@ private: std::unordered_map compute_cache; std::unordered_map graphics_cache; - Shader::ObjectPool inst_pool; - Shader::ObjectPool block_pool; - Shader::ObjectPool flow_block_pool; + ShaderPools main_pools; Shader::Profile profile; + std::string pipeline_cache_filename; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp index 7e5ae43ea..1c6ba7289 100644 --- a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp @@ -50,6 +50,7 @@ VkAttachmentDescription AttachmentDescription(const Device& device, PixelFormat RenderPassCache::RenderPassCache(const Device& device_) : device{&device_} {} VkRenderPass RenderPassCache::Get(const RenderPassKey& key) { + std::lock_guard lock{mutex}; const auto [pair, is_new] = cache.try_emplace(key); if (!is_new) { return *pair->second; diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.h b/src/video_core/renderer_vulkan/vk_render_pass_cache.h index db8e83f1a..eaa0ed775 100644 --- a/src/video_core/renderer_vulkan/vk_render_pass_cache.h +++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.h @@ -4,6 +4,7 @@ #pragma once +#include #include #include "video_core/surface.h" @@ -37,7 +38,7 @@ struct hash { namespace Vulkan { - class Device; +class Device; class RenderPassCache { public: @@ -48,6 +49,7 @@ public: private: const Device* device{}; std::unordered_map cache; + std::mutex mutex; }; } // namespace Vulkan -- cgit v1.2.3 From f8115a6a9e544c3cc33f32ea821d0df15e01591c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 22 Mar 2021 21:03:20 -0300 Subject: vk_pipeline_cache: Add pipeline cache --- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 5 +++++ src/video_core/renderer_vulkan/vk_rasterizer.h | 2 ++ 2 files changed, 7 insertions(+) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 036b531b9..8f63a7591 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -514,6 +514,11 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, return true; } +void RasterizerVulkan::LoadDiskResources(u64 title_id, std::stop_token stop_loading, + const VideoCore::DiskResourceLoadCallback& callback) { + pipeline_cache.LoadDiskResources(title_id, stop_loading, callback); +} + void RasterizerVulkan::FlushWork() { static constexpr u32 DRAWS_TO_DISPATCH = 4096; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 88dbd753b..2f1551e65 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -101,6 +101,8 @@ public: Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, u32 pixel_stride) override; + void LoadDiskResources(u64 title_id, std::stop_token stop_loading, + const VideoCore::DiskResourceLoadCallback& callback) override; private: static constexpr size_t MAX_TEXTURES = 192; -- cgit v1.2.3 From d40faa1db0f1703edc4e8f279f1556cee4ebddad Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 23 Mar 2021 16:12:04 -0300 Subject: vk_pipeline_cache: Fix ReleaseContents order --- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 2ecb68bdc..d481f56f9 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -111,9 +111,9 @@ struct ShaderInfo { struct ShaderPools { void ReleaseContents() { - inst.ReleaseContents(); - block.ReleaseContents(); flow_block.ReleaseContents(); + block.ReleaseContents(); + inst.ReleaseContents(); } Shader::ObjectPool inst; -- cgit v1.2.3 From 3d07cef009cf9e287744c7771c67166ef5761ce8 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 23 Mar 2021 20:27:17 -0400 Subject: shader: Implement VOTE --- src/video_core/renderer_vulkan/vk_compute_pipeline.cpp | 7 ++++++- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 2 ++ src/video_core/vulkan_common/vulkan_device.cpp | 1 + src/video_core/vulkan_common/vulkan_device.h | 6 ++++++ 4 files changed, 15 insertions(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 6684d37a6..8e544d745 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -36,13 +36,18 @@ ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descrip descriptor_update_template = std::move(tuple.descriptor_update_template); descriptor_allocator = DescriptorAllocator(descriptor_pool, *descriptor_set_layout); + const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, + .pNext = nullptr, + .requiredSubgroupSize = GuestWarpSize, + }; pipeline = device.GetLogical().CreateComputePipeline({ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, .pNext = nullptr, .flags = 0, .stage{ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = nullptr, + .pNext = device.IsExtSubgroupSizeControlSupported() ? &subgroup_size_ci : nullptr, .flags = 0, .stage = VK_SHADER_STAGE_COMPUTE_BIT, .module = *spv_module, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 41fc9588f..bdbc8dd1e 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -455,6 +455,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE, .support_fp64_signed_zero_nan_preserve = float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE, + .support_vote = true, + .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, }; } diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index f0e5b098c..009b74f12 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -737,6 +737,7 @@ std::vector Device::LoadExtensions(bool requires_surface) { subgroup_properties.maxSubgroupSize >= GuestWarpSize) { extensions.push_back(VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME); guest_warp_stages = subgroup_properties.requiredSubgroupSizeStages; + ext_subgroup_size_control = true; } } else { is_warp_potentially_bigger = true; diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 82bccc8f0..c268a4f8d 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -193,6 +193,11 @@ public: return ext_shader_viewport_index_layer; } + /// Returns true if the device supports VK_EXT_subgroup_size_control. + bool IsExtSubgroupSizeControlSupported() const { + return ext_subgroup_size_control; + } + /// Returns true if the device supports VK_EXT_transform_feedback. bool IsExtTransformFeedbackSupported() const { return ext_transform_feedback; @@ -297,6 +302,7 @@ private: bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info. + bool ext_subgroup_size_control{}; ///< Support for VK_EXT_subgroup_size_control. bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. -- cgit v1.2.3 From 68a9505d8a1d00c6ba2739bc0af3069cf87b9b84 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 24 Mar 2021 01:33:45 -0300 Subject: shader: Implement NDC [-1, 1], attribute types and default varying initialization --- .../renderer_vulkan/vk_graphics_pipeline.cpp | 3 ++ .../renderer_vulkan/vk_pipeline_cache.cpp | 33 ++++++++++++++++++++-- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 4 ++- 3 files changed, 37 insertions(+), 3 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index a2ec418b1..a87ed1976 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -181,6 +181,9 @@ void GraphicsPipeline::Configure(bool is_indexed) { PushImageDescriptors(stage_infos[stage], samplers.data(), image_view_ids.data(), *texture_cache, *update_descriptor_queue, index); } + if (!descriptor_set_layout) { + return; + } const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; update_descriptor_queue->Send(*descriptor_update_template, descriptor_set); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index bdbc8dd1e..504b8c9d6 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -437,7 +437,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, buffer_cache{buffer_cache_}, texture_cache{texture_cache_} { const auto& float_control{device.FloatControlProperties()}; const VkDriverIdKHR driver_id{device.GetDriverID()}; - profile = Shader::Profile{ + base_profile = Shader::Profile{ .unified_descriptor_binding = true, .support_vertex_instance_id = false, .support_float_controls = true, @@ -458,6 +458,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, .support_vote = true, .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, + .generic_input_types{}, }; } @@ -589,6 +590,7 @@ GraphicsPipeline PipelineCache::CreateGraphicsPipeline(ShaderPools& pools, Shader::Environment& env{*envs[env_index]}; ++env_index; + const Shader::Profile profile{MakeProfile(key, env.ShaderStage())}; const std::vector code{EmitSPIRV(profile, env, program, binding)}; modules[stage_index] = BuildShader(device, code); } @@ -645,9 +647,36 @@ ComputePipeline PipelineCache::CreateComputePipeline(ShaderPools& pools, Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; u32 binding{0}; - std::vector code{EmitSPIRV(profile, env, program, binding)}; + std::vector code{EmitSPIRV(base_profile, env, program, binding)}; return ComputePipeline{device, descriptor_pool, update_descriptor_queue, program.info, BuildShader(device, code)}; } +static Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexAttribute& attr) { + switch (attr.Type()) { + case Maxwell::VertexAttribute::Type::SignedNorm: + case Maxwell::VertexAttribute::Type::UnsignedNorm: + case Maxwell::VertexAttribute::Type::UnsignedScaled: + case Maxwell::VertexAttribute::Type::SignedScaled: + case Maxwell::VertexAttribute::Type::Float: + return Shader::AttributeType::Float; + case Maxwell::VertexAttribute::Type::SignedInt: + return Shader::AttributeType::SignedInt; + case Maxwell::VertexAttribute::Type::UnsignedInt: + return Shader::AttributeType::UnsignedInt; + } + return Shader::AttributeType::Float; +} + +Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key, + Shader::Stage stage) { + Shader::Profile profile{base_profile}; + if (stage == Shader::Stage::VertexB) { + profile.convert_depth_mode = key.state.ndc_minus_one_to_one != 0; + std::ranges::transform(key.state.attributes, profile.generic_input_types.begin(), + &CastAttributeType); + } + return profile; +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index d481f56f9..e09d78063 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -156,6 +156,8 @@ private: ComputePipeline CreateComputePipeline(ShaderPools& pools, const ComputePipelineCacheKey& key, Shader::Environment& env) const; + Shader::Profile MakeProfile(const GraphicsPipelineCacheKey& key, Shader::Stage stage); + Tegra::GPU& gpu; Tegra::Engines::Maxwell3D& maxwell3d; Tegra::Engines::KeplerCompute& kepler_compute; @@ -176,7 +178,7 @@ private: ShaderPools main_pools; - Shader::Profile profile; + Shader::Profile base_profile; std::string pipeline_cache_filename; }; -- cgit v1.2.3 From 17063d16a3cfe6542e74265739191e1d018fc456 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 26 Mar 2021 18:45:38 -0300 Subject: shader: Implement TXQ and fix FragDepth --- .../renderer_vulkan/vk_pipeline_cache.cpp | 92 ++++++++++++++++++++++ 1 file changed, 92 insertions(+) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 504b8c9d6..30d424346 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -25,6 +25,7 @@ #include "video_core/memory_manager.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" +#include "video_core/renderer_vulkan/pipeline_helper.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" @@ -45,6 +46,10 @@ auto MakeSpan(Container& container) { return std::span(container.data(), container.size()); } +u64 MakeCbufKey(u32 index, u32 offset) { + return (static_cast(index) << 32) | static_cast(offset); +} + class GenericEnvironment : public Shader::Environment { public: explicit GenericEnvironment() = default; @@ -101,15 +106,21 @@ public: const auto data{std::make_unique(code_size)}; gpu_memory->ReadBlock(program_base + read_lowest, data.get(), code_size); + const u64 num_texture_types{static_cast(texture_types.size())}; const u32 texture_bound{TextureBoundBuffer()}; file.write(reinterpret_cast(&code_size), sizeof(code_size)) + .write(reinterpret_cast(&num_texture_types), sizeof(num_texture_types)) .write(reinterpret_cast(&texture_bound), sizeof(texture_bound)) .write(reinterpret_cast(&start_address), sizeof(start_address)) .write(reinterpret_cast(&read_lowest), sizeof(read_lowest)) .write(reinterpret_cast(&read_highest), sizeof(read_highest)) .write(reinterpret_cast(&stage), sizeof(stage)) .write(data.get(), code_size); + for (const auto [key, type] : texture_types) { + file.write(reinterpret_cast(&key), sizeof(key)) + .write(reinterpret_cast(&type), sizeof(type)); + } if (stage == Shader::Stage::Compute) { const std::array workgroup_size{WorkgroupSize()}; file.write(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)); @@ -147,10 +158,47 @@ protected: return std::nullopt; } + Shader::TextureType ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, bool via_header_index, + GPUVAddr cbuf_addr, u32 cbuf_size, u32 cbuf_index, + u32 cbuf_offset) { + const u32 raw{cbuf_offset < cbuf_size ? gpu_memory->Read(cbuf_addr + cbuf_offset) : 0}; + const TextureHandle handle{raw, via_header_index}; + const GPUVAddr descriptor_addr{tic_addr + handle.image * sizeof(Tegra::Texture::TICEntry)}; + Tegra::Texture::TICEntry entry; + gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry)); + + const Shader::TextureType result{[&] { + switch (entry.texture_type) { + case Tegra::Texture::TextureType::Texture1D: + return Shader::TextureType::Color1D; + case Tegra::Texture::TextureType::Texture2D: + case Tegra::Texture::TextureType::Texture2DNoMipmap: + return Shader::TextureType::Color2D; + case Tegra::Texture::TextureType::Texture3D: + return Shader::TextureType::Color3D; + case Tegra::Texture::TextureType::TextureCubemap: + return Shader::TextureType::ColorCube; + case Tegra::Texture::TextureType::Texture1DArray: + return Shader::TextureType::ColorArray1D; + case Tegra::Texture::TextureType::Texture2DArray: + return Shader::TextureType::ColorArray2D; + case Tegra::Texture::TextureType::Texture1DBuffer: + throw Shader::NotImplementedException("Texture buffer"); + case Tegra::Texture::TextureType::TextureCubeArray: + return Shader::TextureType::ColorArrayCube; + default: + throw Shader::NotImplementedException("Unknown texture type"); + } + }()}; + texture_types.emplace(MakeCbufKey(cbuf_index, cbuf_offset), result); + return result; + } + Tegra::MemoryManager* gpu_memory{}; GPUVAddr program_base{}; std::vector code; + std::unordered_map texture_types; u32 read_lowest = std::numeric_limits::max(); u32 read_highest = 0; @@ -176,29 +224,45 @@ public: switch (program) { case Maxwell::ShaderProgram::VertexA: stage = Shader::Stage::VertexA; + stage_index = 0; break; case Maxwell::ShaderProgram::VertexB: stage = Shader::Stage::VertexB; + stage_index = 0; break; case Maxwell::ShaderProgram::TesselationControl: stage = Shader::Stage::TessellationControl; + stage_index = 1; break; case Maxwell::ShaderProgram::TesselationEval: stage = Shader::Stage::TessellationEval; + stage_index = 2; break; case Maxwell::ShaderProgram::Geometry: stage = Shader::Stage::Geometry; + stage_index = 3; break; case Maxwell::ShaderProgram::Fragment: stage = Shader::Stage::Fragment; + stage_index = 4; break; default: UNREACHABLE_MSG("Invalid program={}", program); + break; } } ~GraphicsEnvironment() override = default; + Shader::TextureType ReadTextureType(u32 cbuf_index, u32 cbuf_offset) override { + const auto& regs{maxwell3d->regs}; + const auto& cbuf{maxwell3d->state.shader_stages[stage_index].const_buffers[cbuf_index]}; + ASSERT(cbuf.enabled); + const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; + return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, via_header_index, + cbuf.address, cbuf.size, cbuf_index, cbuf_offset); + } + u32 TextureBoundBuffer() const override { return maxwell3d->regs.tex_cb_index; } @@ -209,6 +273,7 @@ public: private: Tegra::Engines::Maxwell3D* maxwell3d{}; + size_t stage_index{}; }; class ComputeEnvironment final : public GenericEnvironment { @@ -224,6 +289,15 @@ public: ~ComputeEnvironment() override = default; + Shader::TextureType ReadTextureType(u32 cbuf_index, u32 cbuf_offset) override { + const auto& regs{kepler_compute->regs}; + const auto& qmd{kepler_compute->launch_description}; + ASSERT(((qmd.const_buffer_enable_mask.Value() >> cbuf_index) & 1) != 0); + const auto& cbuf{qmd.const_buffer_config[cbuf_index]}; + return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, + cbuf.Address(), cbuf.size, cbuf_index, cbuf_offset); + } + u32 TextureBoundBuffer() const override { return kepler_compute->regs.tex_cb_index; } @@ -278,7 +352,9 @@ class FileEnvironment final : public Shader::Environment { public: void Deserialize(std::ifstream& file) { u64 code_size{}; + u64 num_texture_types{}; file.read(reinterpret_cast(&code_size), sizeof(code_size)) + .read(reinterpret_cast(&num_texture_types), sizeof(num_texture_types)) .read(reinterpret_cast(&texture_bound), sizeof(texture_bound)) .read(reinterpret_cast(&start_address), sizeof(start_address)) .read(reinterpret_cast(&read_lowest), sizeof(read_lowest)) @@ -286,6 +362,13 @@ public: .read(reinterpret_cast(&stage), sizeof(stage)); code = std::make_unique(Common::DivCeil(code_size, sizeof(u64))); file.read(reinterpret_cast(code.get()), code_size); + for (size_t i = 0; i < num_texture_types; ++i) { + u64 key; + Shader::TextureType type; + file.read(reinterpret_cast(&key), sizeof(key)) + .read(reinterpret_cast(&type), sizeof(type)); + texture_types.emplace(key, type); + } if (stage == Shader::Stage::Compute) { file.read(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)); } else { @@ -300,6 +383,14 @@ public: return code[(address - read_lowest) / sizeof(u64)]; } + Shader::TextureType ReadTextureType(u32 cbuf_index, u32 cbuf_offset) override { + const auto it{texture_types.find(MakeCbufKey(cbuf_index, cbuf_offset))}; + if (it == texture_types.end()) { + throw Shader::LogicError("Uncached read texture type"); + } + return it->second; + } + u32 TextureBoundBuffer() const override { return texture_bound; } @@ -310,6 +401,7 @@ public: private: std::unique_ptr code; + std::unordered_map texture_types; std::array workgroup_size{}; u32 texture_bound{}; u32 read_lowest{}; -- cgit v1.2.3 From ec005be99d4f231f6d4d812841c84ab4af4204a6 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 26 Mar 2021 18:55:07 -0300 Subject: shader: Fix rasterizer integration order issues --- src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 7 +++++-- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 1 - src/video_core/renderer_vulkan/vk_render_pass_cache.cpp | 5 +---- 3 files changed, 6 insertions(+), 7 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index a87ed1976..82536b9d6 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -139,7 +139,6 @@ void GraphicsPipeline::Configure(bool is_indexed) { static_vector samplers; texture_cache->SynchronizeGraphicsDescriptors(); - texture_cache->UpdateRenderTargets(false); const auto& regs{maxwell3d->regs}; const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; @@ -181,13 +180,17 @@ void GraphicsPipeline::Configure(bool is_indexed) { PushImageDescriptors(stage_infos[stage], samplers.data(), image_view_ids.data(), *texture_cache, *update_descriptor_queue, index); } + texture_cache->UpdateRenderTargets(false); + scheduler->RequestRenderpass(texture_cache->GetFramebuffer()); + + scheduler->BindGraphicsPipeline(*pipeline); + if (!descriptor_set_layout) { return; } const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; update_descriptor_queue->Send(*descriptor_update_template, descriptor_set); - scheduler->BindGraphicsPipeline(*pipeline); scheduler->Record([descriptor_set, layout = *pipeline_layout](vk::CommandBuffer cmdbuf) { cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, nullptr); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 8f63a7591..d7d9927dd 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -178,7 +178,6 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { BeginTransformFeedback(); - scheduler.RequestRenderpass(texture_cache.GetFramebuffer()); UpdateDynamicStates(); const auto& regs{maxwell3d.regs}; diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp index 1c6ba7289..b2dcd74ab 100644 --- a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp @@ -56,15 +56,12 @@ VkRenderPass RenderPassCache::Get(const RenderPassKey& key) { return *pair->second; } boost::container::static_vector descriptions; - u32 num_images{0}; - for (size_t index = 0; index < key.color_formats.size(); ++index) { const PixelFormat format{key.color_formats[index]}; if (format == PixelFormat::Invalid) { continue; } descriptions.push_back(AttachmentDescription(*device, format, key.samples)); - ++num_images; } const size_t num_colors{descriptions.size()}; const VkAttachmentReference* depth_attachment{}; @@ -89,7 +86,7 @@ VkRenderPass RenderPassCache::Get(const RenderPassKey& key) { .pNext = nullptr, .flags = 0, .attachmentCount = static_cast(descriptions.size()), - .pAttachments = descriptions.data(), + .pAttachments = descriptions.empty() ? nullptr : descriptions.data(), .subpassCount = 1, .pSubpasses = &subpass, .dependencyCount = 0, -- cgit v1.2.3 From cb6039ccea77d35fb829c337fd61451f549e3453 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 27 Mar 2021 02:56:09 -0300 Subject: vk_pipeline_cache: Fix pipeline and shader caches --- .../renderer_vulkan/vk_pipeline_cache.cpp | 26 +++++++++++++++++----- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 1 + 2 files changed, 21 insertions(+), 6 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 30d424346..51c155077 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -62,7 +62,7 @@ public: ~GenericEnvironment() override = default; std::optional Analyze() { - const std::optional size{TryFindSize(start_address)}; + const std::optional size{TryFindSize()}; if (!size) { return std::nullopt; } @@ -71,6 +71,13 @@ public: return Common::CityHash128(reinterpret_cast(code.data()), code.size()); } + void SetCachedSize(size_t size_bytes) { + cached_lowest = start_address; + cached_highest = start_address + static_cast(size_bytes); + code.resize(CachedSize()); + gpu_memory->ReadBlock(program_base + cached_lowest, code.data(), code.size() * sizeof(u64)); + } + [[nodiscard]] size_t CachedSize() const noexcept { return cached_highest - cached_lowest + INST_SIZE; } @@ -80,7 +87,7 @@ public: } [[nodiscard]] bool CanBeSerialized() const noexcept { - return has_unbound_instructions; + return !has_unbound_instructions; } [[nodiscard]] u128 CalculateHash() const { @@ -95,7 +102,7 @@ public: read_highest = std::max(read_highest, address); if (address >= cached_lowest && address < cached_highest) { - return code[address / INST_SIZE]; + return code[(address - cached_lowest) / INST_SIZE]; } has_unbound_instructions = true; return gpu_memory->Read(program_base + address); @@ -117,30 +124,34 @@ public: .write(reinterpret_cast(&read_highest), sizeof(read_highest)) .write(reinterpret_cast(&stage), sizeof(stage)) .write(data.get(), code_size); + file.flush(); for (const auto [key, type] : texture_types) { file.write(reinterpret_cast(&key), sizeof(key)) .write(reinterpret_cast(&type), sizeof(type)); } + file.flush(); if (stage == Shader::Stage::Compute) { const std::array workgroup_size{WorkgroupSize()}; file.write(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)); } else { file.write(reinterpret_cast(&sph), sizeof(sph)); } + file.flush(); } protected: static constexpr size_t INST_SIZE = sizeof(u64); - std::optional TryFindSize(GPUVAddr guest_addr) { + std::optional TryFindSize() { constexpr size_t BLOCK_SIZE = 0x1000; constexpr size_t MAXIMUM_SIZE = 0x100000; constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL; constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL; - size_t offset = 0; - size_t size = BLOCK_SIZE; + GPUVAddr guest_addr{program_base + start_address}; + size_t offset{0}; + size_t size{BLOCK_SIZE}; while (size <= MAXIMUM_SIZE) { code.resize(size / INST_SIZE); u64* const data = code.data() + offset / INST_SIZE; @@ -623,6 +634,7 @@ bool PipelineCache::RefreshStages() { GraphicsEnvironment env{maxwell3d, gpu_memory, program, base_addr, start_address}; shader_info = MakeShaderInfo(env, *cpu_shader_addr); } + shader_infos[index] = shader_info; graphics_key.unique_hashes[index] = shader_info->unique_hash; } return true; @@ -707,6 +719,8 @@ GraphicsPipeline PipelineCache::CreateGraphicsPipeline() { GraphicsEnvironment& env{graphics_envs[index]}; const u32 start_address{maxwell3d.regs.shader_config[index].offset}; env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; + env.SetCachedSize(shader_infos[index]->size_bytes); + generic_envs.push_back(&env); envs.push_back(&env); } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index e09d78063..b55e14189 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -172,6 +172,7 @@ private: TextureCache& texture_cache; GraphicsPipelineCacheKey graphics_key{}; + std::array shader_infos{}; std::unordered_map compute_cache; std::unordered_map graphics_cache; -- cgit v1.2.3 From 675a82416d7775dc7a252a5d8f5b704e6b8f2326 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 27 Mar 2021 03:08:31 -0300 Subject: spirv: Remove dependencies on Environment when generating SPIR-V --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 51c155077..251559b16 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -680,7 +680,6 @@ GraphicsPipeline PipelineCache::CreateGraphicsPipeline(ShaderPools& pools, std::array modules; u32 binding{0}; - env_index = 0; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { if (key.unique_hashes[index] == u128{}) { continue; @@ -691,11 +690,8 @@ GraphicsPipeline PipelineCache::CreateGraphicsPipeline(ShaderPools& pools, const size_t stage_index{index - 1}; infos[stage_index] = &program.info; - Shader::Environment& env{*envs[env_index]}; - ++env_index; - - const Shader::Profile profile{MakeProfile(key, env.ShaderStage())}; - const std::vector code{EmitSPIRV(profile, env, program, binding)}; + const Shader::Profile profile{MakeProfile(key, program.stage)}; + const std::vector code{EmitSPIRV(profile, program, binding)}; modules[stage_index] = BuildShader(device, code); } return GraphicsPipeline(maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, device, @@ -753,7 +749,7 @@ ComputePipeline PipelineCache::CreateComputePipeline(ShaderPools& pools, Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; u32 binding{0}; - std::vector code{EmitSPIRV(base_profile, env, program, binding)}; + std::vector code{EmitSPIRV(base_profile, program, binding)}; return ComputePipeline{device, descriptor_pool, update_descriptor_queue, program.info, BuildShader(device, code)}; } -- cgit v1.2.3 From dbd882ddeb1a1a9233c0085d0b8ccb022db385b2 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 27 Mar 2021 04:59:58 -0300 Subject: shader: Better interpolation and disabled attributes support --- src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 4 ++-- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 82536b9d6..278509bf0 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -221,10 +221,10 @@ void GraphicsPipeline::MakePipeline(const Device& device, const FixedPipelineSta } } static_vector vertex_attributes; - const auto& input_attributes = stage_infos[0].loads_generics; + const auto& input_attributes = stage_infos[0].input_generics; for (size_t index = 0; index < state.attributes.size(); ++index) { const auto& attribute = state.attributes[index]; - if (!attribute.enabled || !input_attributes[index]) { + if (!attribute.enabled || !input_attributes[index].used) { continue; } vertex_attributes.push_back({ diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 251559b16..69dd945b2 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -755,6 +755,9 @@ ComputePipeline PipelineCache::CreateComputePipeline(ShaderPools& pools, } static Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexAttribute& attr) { + if (attr.enabled == 0) { + return Shader::AttributeType::Disabled; + } switch (attr.Type()) { case Maxwell::VertexAttribute::Type::SignedNorm: case Maxwell::VertexAttribute::Type::UnsignedNorm: -- cgit v1.2.3 From e860870dd2244cd87645190c89244f1d2c4c775b Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 28 Mar 2021 19:53:34 -0300 Subject: shader: Implement LDS, STS, LDL, and STS and use SPIR-V 1.4 when available --- .../renderer_vulkan/vk_pipeline_cache.cpp | 47 ++++++++++++++++++++-- src/video_core/vulkan_common/vulkan_device.cpp | 34 ++++++++++++++++ src/video_core/vulkan_common/vulkan_device.h | 42 ++++++++++++------- 3 files changed, 104 insertions(+), 19 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 69dd945b2..0d6a32bfd 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -114,10 +114,12 @@ public: gpu_memory->ReadBlock(program_base + read_lowest, data.get(), code_size); const u64 num_texture_types{static_cast(texture_types.size())}; + const u32 local_memory_size{LocalMemorySize()}; const u32 texture_bound{TextureBoundBuffer()}; file.write(reinterpret_cast(&code_size), sizeof(code_size)) .write(reinterpret_cast(&num_texture_types), sizeof(num_texture_types)) + .write(reinterpret_cast(&local_memory_size), sizeof(local_memory_size)) .write(reinterpret_cast(&texture_bound), sizeof(texture_bound)) .write(reinterpret_cast(&start_address), sizeof(start_address)) .write(reinterpret_cast(&read_lowest), sizeof(read_lowest)) @@ -132,7 +134,10 @@ public: file.flush(); if (stage == Shader::Stage::Compute) { const std::array workgroup_size{WorkgroupSize()}; - file.write(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)); + const u32 shared_memory_size{SharedMemorySize()}; + file.write(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)) + .write(reinterpret_cast(&shared_memory_size), + sizeof(shared_memory_size)); } else { file.write(reinterpret_cast(&sph), sizeof(sph)); } @@ -278,6 +283,16 @@ public: return maxwell3d->regs.tex_cb_index; } + u32 LocalMemorySize() const override { + const u64 size{sph.LocalMemorySize()}; + ASSERT(size <= std::numeric_limits::max()); + return static_cast(size); + } + + u32 SharedMemorySize() const override { + throw Shader::LogicError("Requesting shared memory size in graphics stage"); + } + std::array WorkgroupSize() const override { throw Shader::LogicError("Requesting workgroup size in a graphics stage"); } @@ -313,6 +328,16 @@ public: return kepler_compute->regs.tex_cb_index; } + u32 LocalMemorySize() const override { + const auto& qmd{kepler_compute->launch_description}; + return qmd.local_pos_alloc; + } + + u32 SharedMemorySize() const override { + const auto& qmd{kepler_compute->launch_description}; + return qmd.shared_alloc; + } + std::array WorkgroupSize() const override { const auto& qmd{kepler_compute->launch_description}; return {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; @@ -366,6 +391,7 @@ public: u64 num_texture_types{}; file.read(reinterpret_cast(&code_size), sizeof(code_size)) .read(reinterpret_cast(&num_texture_types), sizeof(num_texture_types)) + .read(reinterpret_cast(&local_memory_size), sizeof(local_memory_size)) .read(reinterpret_cast(&texture_bound), sizeof(texture_bound)) .read(reinterpret_cast(&start_address), sizeof(start_address)) .read(reinterpret_cast(&read_lowest), sizeof(read_lowest)) @@ -381,7 +407,8 @@ public: texture_types.emplace(key, type); } if (stage == Shader::Stage::Compute) { - file.read(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)); + file.read(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)) + .read(reinterpret_cast(&shared_memory_size), sizeof(shared_memory_size)); } else { file.read(reinterpret_cast(&sph), sizeof(sph)); } @@ -402,6 +429,14 @@ public: return it->second; } + u32 LocalMemorySize() const override { + return local_memory_size; + } + + u32 SharedMemorySize() const override { + return shared_memory_size; + } + u32 TextureBoundBuffer() const override { return texture_bound; } @@ -414,6 +449,8 @@ private: std::unique_ptr code; std::unordered_map texture_types; std::array workgroup_size{}; + u32 local_memory_size{}; + u32 shared_memory_size{}; u32 texture_bound{}; u32 read_lowest{}; u32 read_highest{}; @@ -541,6 +578,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, const auto& float_control{device.FloatControlProperties()}; const VkDriverIdKHR driver_id{device.GetDriverID()}; base_profile = Shader::Profile{ + .supported_spirv = device.IsKhrSpirv1_4Supported() ? 0x00010400U : 0x00010000U, .unified_descriptor_binding = true, .support_vertex_instance_id = false, .support_float_controls = true, @@ -558,6 +596,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE, .support_fp64_signed_zero_nan_preserve = float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE, + .support_explicit_workgroup_layout = device.IsKhrWorkgroupMemoryExplicitLayoutSupported(), .support_vote = true, .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, @@ -600,8 +639,8 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() { shader = MakeShaderInfo(env, *cpu_shader_addr); } const ComputePipelineCacheKey key{ - .unique_hash = shader->unique_hash, - .shared_memory_size = qmd.shared_alloc, + .unique_hash{shader->unique_hash}, + .shared_memory_size{qmd.shared_alloc}, .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}, }; const auto [pair, is_new]{compute_cache.try_emplace(key)}; diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 009b74f12..c027598ba 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -399,6 +399,20 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state"); } + VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR workgroup_layout; + if (khr_workgroup_memory_explicit_layout) { + workgroup_layout = { + .sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR, + .pNext = nullptr, + .workgroupMemoryExplicitLayout = VK_TRUE, + .workgroupMemoryExplicitLayoutScalarBlockLayout = VK_TRUE, + .workgroupMemoryExplicitLayout8BitAccess = VK_TRUE, + .workgroupMemoryExplicitLayout16BitAccess = VK_TRUE, + }; + SetNext(next, workgroup_layout); + } + if (!ext_depth_range_unrestricted) { LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted"); } @@ -662,6 +676,7 @@ std::vector Device::LoadExtensions(bool requires_surface) { } bool has_khr_shader_float16_int8{}; + bool has_khr_workgroup_memory_explicit_layout{}; bool has_ext_subgroup_size_control{}; bool has_ext_transform_feedback{}; bool has_ext_custom_border_color{}; @@ -682,6 +697,7 @@ std::vector Device::LoadExtensions(bool requires_surface) { test(nv_viewport_swizzle, VK_NV_VIEWPORT_SWIZZLE_EXTENSION_NAME, true); test(khr_uniform_buffer_standard_layout, VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true); + test(khr_spirv_1_4, VK_KHR_SPIRV_1_4_EXTENSION_NAME, true); test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false); test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true); test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true); @@ -694,6 +710,8 @@ std::vector Device::LoadExtensions(bool requires_surface) { test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false); test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false); + test(has_khr_workgroup_memory_explicit_layout, + VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME, false); if (Settings::values.renderer_debug) { test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME, true); @@ -787,6 +805,22 @@ std::vector Device::LoadExtensions(bool requires_surface) { ext_extended_dynamic_state = true; } } + if (has_khr_workgroup_memory_explicit_layout) { + VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR layout; + layout.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR; + layout.pNext = nullptr; + features.pNext = &layout; + physical.GetFeatures2KHR(features); + + if (layout.workgroupMemoryExplicitLayout && + layout.workgroupMemoryExplicitLayout8BitAccess && + layout.workgroupMemoryExplicitLayout16BitAccess && + layout.workgroupMemoryExplicitLayoutScalarBlockLayout) { + extensions.push_back(VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME); + khr_workgroup_memory_explicit_layout = true; + } + } return extensions; } diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index c268a4f8d..ac2311e7e 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -168,11 +168,21 @@ public: return nv_viewport_swizzle; } - /// Returns true if the device supports VK_EXT_scalar_block_layout. + /// Returns true if the device supports VK_KHR_uniform_buffer_standard_layout. bool IsKhrUniformBufferStandardLayoutSupported() const { return khr_uniform_buffer_standard_layout; } + /// Returns true if the device supports VK_KHR_spirv_1_4. + bool IsKhrSpirv1_4Supported() const { + return khr_spirv_1_4; + } + + /// Returns true if the device supports VK_KHR_workgroup_memory_explicit_layout. + bool IsKhrWorkgroupMemoryExplicitLayoutSupported() const { + return khr_workgroup_memory_explicit_layout; + } + /// Returns true if the device supports VK_EXT_index_type_uint8. bool IsExtIndexTypeUint8Supported() const { return ext_index_type_uint8; @@ -296,20 +306,22 @@ private: bool is_shader_storage_image_multisample{}; ///< Support for image operations on MSAA images. bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil. bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle. - bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs. - bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. - bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax. - bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. - bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. - bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info. - bool ext_subgroup_size_control{}; ///< Support for VK_EXT_subgroup_size_control. - bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. - bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. - bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. - bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. - bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. - bool has_renderdoc{}; ///< Has RenderDoc attached - bool has_nsight_graphics{}; ///< Has Nsight Graphics attached + bool khr_uniform_buffer_standard_layout{}; ///< Support for scalar uniform buffer layouts. + bool khr_spirv_1_4{}; ///< Support for VK_KHR_spirv_1_4. + bool khr_workgroup_memory_explicit_layout{}; ///< Support for explicit workgroup layouts. + bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. + bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax. + bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. + bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. + bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info. + bool ext_subgroup_size_control{}; ///< Support for VK_EXT_subgroup_size_control. + bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. + bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. + bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. + bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. + bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. + bool has_renderdoc{}; ///< Has RenderDoc attached + bool has_nsight_graphics{}; ///< Has Nsight Graphics attached // Telemetry parameters std::string vendor_name; ///< Device's driver name. -- cgit v1.2.3 From 3c758d9b538e957a20ea6db136741ad2bd16406d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 28 Mar 2021 21:55:47 -0300 Subject: vk_pipeline_cache: Fix size hashing of shaders --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 0d6a32bfd..8b2816c13 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -68,7 +68,7 @@ public: } cached_lowest = start_address; cached_highest = start_address + static_cast(*size); - return Common::CityHash128(reinterpret_cast(code.data()), code.size()); + return Common::CityHash128(reinterpret_cast(code.data()), *size); } void SetCachedSize(size_t size_bytes) { @@ -126,12 +126,10 @@ public: .write(reinterpret_cast(&read_highest), sizeof(read_highest)) .write(reinterpret_cast(&stage), sizeof(stage)) .write(data.get(), code_size); - file.flush(); for (const auto [key, type] : texture_types) { file.write(reinterpret_cast(&key), sizeof(key)) .write(reinterpret_cast(&type), sizeof(type)); } - file.flush(); if (stage == Shader::Stage::Compute) { const std::array workgroup_size{WorkgroupSize()}; const u32 shared_memory_size{SharedMemorySize()}; @@ -141,7 +139,6 @@ public: } else { file.write(reinterpret_cast(&sph), sizeof(sph)); } - file.flush(); } protected: @@ -161,10 +158,10 @@ protected: code.resize(size / INST_SIZE); u64* const data = code.data() + offset / INST_SIZE; gpu_memory->ReadBlock(guest_addr, data, BLOCK_SIZE); - for (size_t i = 0; i < BLOCK_SIZE; i += INST_SIZE) { - const u64 inst = data[i / INST_SIZE]; + for (size_t index = 0; index < BLOCK_SIZE; index += INST_SIZE) { + const u64 inst = data[index / INST_SIZE]; if (inst == SELF_BRANCH_A || inst == SELF_BRANCH_B) { - return offset + i; + return offset + index; } } guest_addr += BLOCK_SIZE; @@ -751,7 +748,7 @@ GraphicsPipeline PipelineCache::CreateGraphicsPipeline() { continue; } const auto program{static_cast(index)}; - GraphicsEnvironment& env{graphics_envs[index]}; + auto& env{graphics_envs[index]}; const u32 start_address{maxwell3d.regs.shader_config[index].offset}; env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; env.SetCachedSize(shader_infos[index]->size_bytes); @@ -771,6 +768,8 @@ ComputePipeline PipelineCache::CreateComputePipeline(const ComputePipelineCacheK const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; const auto& qmd{kepler_compute.launch_description}; ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start}; + env.SetCachedSize(shader->size_bytes); + main_pools.ReleaseContents(); ComputePipeline pipeline{CreateComputePipeline(main_pools, key, env)}; if (!pipeline_cache_filename.empty()) { -- cgit v1.2.3 From 34aba9627a8fad20b3b173180e2f3d679dd32293 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sat, 27 Mar 2021 22:30:24 +0100 Subject: shader: Implement BRX --- .../renderer_vulkan/vk_pipeline_cache.cpp | 50 +++++++++++++++++++++- 1 file changed, 49 insertions(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 8b2816c13..6cde01491 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -47,7 +47,7 @@ auto MakeSpan(Container& container) { } u64 MakeCbufKey(u32 index, u32 offset) { - return (static_cast(index) << 32) | static_cast(offset); + return (static_cast(index) << 32) | offset; } class GenericEnvironment : public Shader::Environment { @@ -114,11 +114,13 @@ public: gpu_memory->ReadBlock(program_base + read_lowest, data.get(), code_size); const u64 num_texture_types{static_cast(texture_types.size())}; + const u64 num_cbuf_values{static_cast(cbuf_values.size())}; const u32 local_memory_size{LocalMemorySize()}; const u32 texture_bound{TextureBoundBuffer()}; file.write(reinterpret_cast(&code_size), sizeof(code_size)) .write(reinterpret_cast(&num_texture_types), sizeof(num_texture_types)) + .write(reinterpret_cast(&num_cbuf_values), sizeof(num_cbuf_values)) .write(reinterpret_cast(&local_memory_size), sizeof(local_memory_size)) .write(reinterpret_cast(&texture_bound), sizeof(texture_bound)) .write(reinterpret_cast(&start_address), sizeof(start_address)) @@ -130,6 +132,10 @@ public: file.write(reinterpret_cast(&key), sizeof(key)) .write(reinterpret_cast(&type), sizeof(type)); } + for (const auto [key, type] : cbuf_values) { + file.write(reinterpret_cast(&key), sizeof(key)) + .write(reinterpret_cast(&type), sizeof(type)); + } if (stage == Shader::Stage::Compute) { const std::array workgroup_size{WorkgroupSize()}; const u32 shared_memory_size{SharedMemorySize()}; @@ -212,6 +218,7 @@ protected: std::vector code; std::unordered_map texture_types; + std::unordered_map cbuf_values; u32 read_lowest = std::numeric_limits::max(); u32 read_highest = 0; @@ -267,6 +274,17 @@ public: ~GraphicsEnvironment() override = default; + u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override { + const auto& cbuf{maxwell3d->state.shader_stages[stage_index].const_buffers[cbuf_index]}; + ASSERT(cbuf.enabled); + u32 value{}; + if (cbuf_offset < cbuf.size) { + value = gpu_memory->Read(cbuf.address + cbuf_offset); + } + cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value); + return value; + } + Shader::TextureType ReadTextureType(u32 cbuf_index, u32 cbuf_offset) override { const auto& regs{maxwell3d->regs}; const auto& cbuf{maxwell3d->state.shader_stages[stage_index].const_buffers[cbuf_index]}; @@ -312,6 +330,18 @@ public: ~ComputeEnvironment() override = default; + u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override { + const auto& qmd{kepler_compute->launch_description}; + ASSERT(((qmd.const_buffer_enable_mask.Value() >> cbuf_index) & 1) != 0); + const auto& cbuf{qmd.const_buffer_config[cbuf_index]}; + u32 value{}; + if (cbuf_offset < cbuf.size) { + value = gpu_memory->Read(cbuf.Address() + cbuf_offset); + } + cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value); + return value; + } + Shader::TextureType ReadTextureType(u32 cbuf_index, u32 cbuf_offset) override { const auto& regs{kepler_compute->regs}; const auto& qmd{kepler_compute->launch_description}; @@ -386,8 +416,10 @@ public: void Deserialize(std::ifstream& file) { u64 code_size{}; u64 num_texture_types{}; + u64 num_cbuf_values{}; file.read(reinterpret_cast(&code_size), sizeof(code_size)) .read(reinterpret_cast(&num_texture_types), sizeof(num_texture_types)) + .read(reinterpret_cast(&num_cbuf_values), sizeof(num_cbuf_values)) .read(reinterpret_cast(&local_memory_size), sizeof(local_memory_size)) .read(reinterpret_cast(&texture_bound), sizeof(texture_bound)) .read(reinterpret_cast(&start_address), sizeof(start_address)) @@ -403,6 +435,13 @@ public: .read(reinterpret_cast(&type), sizeof(type)); texture_types.emplace(key, type); } + for (size_t i = 0; i < num_cbuf_values; ++i) { + u64 key; + u32 value; + file.read(reinterpret_cast(&key), sizeof(key)) + .read(reinterpret_cast(&value), sizeof(value)); + cbuf_values.emplace(key, value); + } if (stage == Shader::Stage::Compute) { file.read(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)) .read(reinterpret_cast(&shared_memory_size), sizeof(shared_memory_size)); @@ -418,6 +457,14 @@ public: return code[(address - read_lowest) / sizeof(u64)]; } + u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override { + const auto it{cbuf_values.find(MakeCbufKey(cbuf_index, cbuf_offset))}; + if (it == cbuf_values.end()) { + throw Shader::LogicError("Uncached read texture type"); + } + return it->second; + } + Shader::TextureType ReadTextureType(u32 cbuf_index, u32 cbuf_offset) override { const auto it{texture_types.find(MakeCbufKey(cbuf_index, cbuf_offset))}; if (it == texture_types.end()) { @@ -445,6 +492,7 @@ public: private: std::unique_ptr code; std::unordered_map texture_types; + std::unordered_map cbuf_values; std::array workgroup_size{}; u32 local_memory_size{}; u32 shared_memory_size{}; -- cgit v1.2.3 From 7a1c14269e20cffeed780f388c90a86f8bba1a92 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 30 Mar 2021 03:58:46 -0300 Subject: spirv: Add fixed pipeline point size --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 6cde01491..eb4df9000 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -864,6 +864,9 @@ Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key, Shader::Profile profile{base_profile}; if (stage == Shader::Stage::VertexB) { profile.convert_depth_mode = key.state.ndc_minus_one_to_one != 0; + if (key.state.topology == Maxwell::PrimitiveTopology::Points) { + profile.fixed_state_point_size = Common::BitCast(key.state.point_size); + } std::ranges::transform(key.state.attributes, profile.generic_input_types.begin(), &CastAttributeType); } -- cgit v1.2.3 From dc1a9a3bed2aa9b0851f07976b0c687172aa3edc Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Fri, 26 Mar 2021 20:51:05 +0100 Subject: shader: Implement TLD --- src/video_core/memory_manager.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index d2b9d5f2b..05e27c687 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -64,12 +64,11 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) { } const auto it = std::ranges::lower_bound(map_ranges, gpu_addr, {}, &MapRange::first); if (it != map_ranges.end()) { - ASSERT(it->first == gpu_addr); + // ASSERT(it->first == gpu_addr); map_ranges.erase(it); } else { UNREACHABLE_MSG("Unmapping non-existent GPU address=0x{:x}", gpu_addr); } - const auto submapped_ranges = GetSubmappedRange(gpu_addr, size); for (const auto& map : submapped_ranges) { -- cgit v1.2.3 From 4d0d29fc2092bf02e102b8bac9cfa1b509274901 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Tue, 30 Mar 2021 08:41:21 +0200 Subject: shader: Address feedback --- src/video_core/memory_manager.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 05e27c687..882eff880 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -64,7 +64,7 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) { } const auto it = std::ranges::lower_bound(map_ranges, gpu_addr, {}, &MapRange::first); if (it != map_ranges.end()) { - // ASSERT(it->first == gpu_addr); + ASSERT(it->first == gpu_addr); map_ranges.erase(it); } else { UNREACHABLE_MSG("Unmapping non-existent GPU address=0x{:x}", gpu_addr); -- cgit v1.2.3 From 0c933e20dec02e12a4644281b9b7bf9716a5cbb5 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 30 Mar 2021 21:28:00 -0300 Subject: vk_pipeline_cache: Name SPIR-V modules --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index eb4df9000..30a707599 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -777,6 +777,11 @@ GraphicsPipeline PipelineCache::CreateGraphicsPipeline(ShaderPools& pools, const Shader::Profile profile{MakeProfile(key, program.stage)}; const std::vector code{EmitSPIRV(profile, program, binding)}; modules[stage_index] = BuildShader(device, code); + if (device.HasDebuggingToolAttached()) { + const std::string name{fmt::format("{:016x}{:016x}", key.unique_hashes[index][0], + key.unique_hashes[index][1])}; + modules[stage_index].SetObjectNameEXT(name.c_str()); + } } return GraphicsPipeline(maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, device, descriptor_pool, update_descriptor_queue, render_pass_cache, key.state, @@ -836,8 +841,13 @@ ComputePipeline PipelineCache::CreateComputePipeline(ShaderPools& pools, Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; u32 binding{0}; std::vector code{EmitSPIRV(base_profile, program, binding)}; + vk::ShaderModule spv_module{BuildShader(device, code)}; + if (device.HasDebuggingToolAttached()) { + const auto name{fmt::format("{:016x}{:016x}", key.unique_hash[0], key.unique_hash[1])}; + spv_module.SetObjectNameEXT(name.c_str()); + } return ComputePipeline{device, descriptor_pool, update_descriptor_queue, program.info, - BuildShader(device, code)}; + std::move(spv_module)}; } static Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexAttribute& attr) { -- cgit v1.2.3 From 2fc698b040e7e25223ba6ebe31abb04b1fc65f06 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 1 Apr 2021 01:36:22 -0300 Subject: vulkan: Build pipelines in parallel at runtime Wait from the worker thread for a pipeline to build before binding it to the command buffer. This allows queueing pipelines to multiple threads. --- .../renderer_vulkan/vk_compute_pipeline.cpp | 95 +++++++++++++--------- .../renderer_vulkan/vk_compute_pipeline.h | 30 +++---- .../renderer_vulkan/vk_graphics_pipeline.cpp | 72 +++++++++------- .../renderer_vulkan/vk_graphics_pipeline.h | 31 +++---- .../renderer_vulkan/vk_pipeline_cache.cpp | 78 +++++++++--------- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 24 ++++-- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 15 +--- src/video_core/renderer_vulkan/vk_scheduler.cpp | 10 +-- src/video_core/renderer_vulkan/vk_scheduler.h | 7 +- 9 files changed, 197 insertions(+), 165 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 8e544d745..1c3249e3c 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -27,8 +27,9 @@ DescriptorLayoutTuple CreateLayout(const Device& device, const Shader::Info& inf ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue_, - const Shader::Info& info_, vk::ShaderModule spv_module_) - : update_descriptor_queue{&update_descriptor_queue_}, info{info_}, + Common::ThreadWorker* thread_worker, const Shader::Info& info_, + vk::ShaderModule spv_module_) + : update_descriptor_queue{update_descriptor_queue_}, info{info_}, spv_module(std::move(spv_module_)) { DescriptorLayoutTuple tuple{CreateLayout(device, info)}; descriptor_set_layout = std::move(tuple.descriptor_set_layout); @@ -36,46 +37,55 @@ ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descrip descriptor_update_template = std::move(tuple.descriptor_update_template); descriptor_allocator = DescriptorAllocator(descriptor_pool, *descriptor_set_layout); - const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, - .pNext = nullptr, - .requiredSubgroupSize = GuestWarpSize, - }; - pipeline = device.GetLogical().CreateComputePipeline({ - .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = device.IsExtSubgroupSizeControlSupported() ? &subgroup_size_ci : nullptr, + auto func{[this, &device] { + const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, + .pNext = nullptr, + .requiredSubgroupSize = GuestWarpSize, + }; + pipeline = device.GetLogical().CreateComputePipeline({ + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .pNext = nullptr, .flags = 0, - .stage = VK_SHADER_STAGE_COMPUTE_BIT, - .module = *spv_module, - .pName = "main", - .pSpecializationInfo = nullptr, - }, - .layout = *pipeline_layout, - .basePipelineHandle = 0, - .basePipelineIndex = 0, - }); + .stage{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = device.IsExtSubgroupSizeControlSupported() ? &subgroup_size_ci : nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = *spv_module, + .pName = "main", + .pSpecializationInfo = nullptr, + }, + .layout = *pipeline_layout, + .basePipelineHandle = 0, + .basePipelineIndex = 0, + }); + building_flag.test_and_set(); + building_flag.notify_all(); + }}; + if (thread_worker) { + thread_worker->QueueWork(std::move(func)); + } else { + func(); + } } -void ComputePipeline::ConfigureBufferCache(BufferCache& buffer_cache) { +void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, + Tegra::MemoryManager& gpu_memory, VKScheduler& scheduler, + BufferCache& buffer_cache, TextureCache& texture_cache) { + update_descriptor_queue.Acquire(); + buffer_cache.SetEnabledComputeUniformBuffers(info.constant_buffer_mask); buffer_cache.UnbindComputeStorageBuffers(); - size_t index{}; + size_t ssbo_index{}; for (const auto& desc : info.storage_buffers_descriptors) { ASSERT(desc.count == 1); - buffer_cache.BindComputeStorageBuffer(index, desc.cbuf_index, desc.cbuf_offset, true); - ++index; + buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset, true); + ++ssbo_index; } buffer_cache.UpdateComputeBuffers(); buffer_cache.BindHostComputeBuffers(); -} -void ComputePipeline::ConfigureTextureCache(Tegra::Engines::KeplerCompute& kepler_compute, - Tegra::MemoryManager& gpu_memory, - TextureCache& texture_cache) { texture_cache.SynchronizeComputeDescriptors(); static constexpr size_t max_elements = 64; @@ -103,15 +113,26 @@ void ComputePipeline::ConfigureTextureCache(Tegra::Engines::KeplerCompute& keple const std::span indices_span(image_view_indices.data(), image_view_indices.size()); texture_cache.FillComputeImageViews(indices_span, image_view_ids); - size_t index{}; + size_t image_index{}; PushImageDescriptors(info, samplers.data(), image_view_ids.data(), texture_cache, - *update_descriptor_queue, index); -} + update_descriptor_queue, image_index); -VkDescriptorSet ComputePipeline::UpdateDescriptorSet() { + if (!building_flag.test()) { + // Wait for the pipeline to be built + scheduler.Record([this](vk::CommandBuffer) { building_flag.wait(false); }); + } + scheduler.Record([this](vk::CommandBuffer cmdbuf) { + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); + }); + if (!descriptor_set_layout) { + return; + } const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; - update_descriptor_queue->Send(*descriptor_update_template, descriptor_set); - return descriptor_set; + update_descriptor_queue.Send(*descriptor_update_template, descriptor_set); + scheduler.Record([this, descriptor_set](vk::CommandBuffer cmdbuf) { + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0, + descriptor_set, nullptr); + }); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index e82e5816b..02da504f7 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -4,7 +4,10 @@ #pragma once +#include + #include "common/common_types.h" +#include "common/thread_worker.h" #include "shader_recompiler/shader_info.h" #include "video_core/memory_manager.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h" @@ -16,36 +19,26 @@ namespace Vulkan { class Device; +class VKScheduler; class ComputePipeline { public: - explicit ComputePipeline() = default; explicit ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue, - const Shader::Info& info, vk::ShaderModule spv_module); + Common::ThreadWorker* thread_worker, const Shader::Info& info, + vk::ShaderModule spv_module); - ComputePipeline& operator=(ComputePipeline&&) noexcept = default; - ComputePipeline(ComputePipeline&&) noexcept = default; + ComputePipeline& operator=(ComputePipeline&&) noexcept = delete; + ComputePipeline(ComputePipeline&&) noexcept = delete; ComputePipeline& operator=(const ComputePipeline&) = delete; ComputePipeline(const ComputePipeline&) = delete; - void ConfigureBufferCache(BufferCache& buffer_cache); - void ConfigureTextureCache(Tegra::Engines::KeplerCompute& kepler_compute, - Tegra::MemoryManager& gpu_memory, TextureCache& texture_cache); - - [[nodiscard]] VkDescriptorSet UpdateDescriptorSet(); - - [[nodiscard]] VkPipeline Handle() const noexcept { - return *pipeline; - } - - [[nodiscard]] VkPipelineLayout PipelineLayout() const noexcept { - return *pipeline_layout; - } + void Configure(Tegra::Engines::KeplerCompute& kepler_compute, Tegra::MemoryManager& gpu_memory, + VKScheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache); private: - VKUpdateDescriptorQueue* update_descriptor_queue; + VKUpdateDescriptorQueue& update_descriptor_queue; Shader::Info info; vk::ShaderModule spv_module; @@ -54,6 +47,7 @@ private: vk::PipelineLayout pipeline_layout; vk::DescriptorUpdateTemplateKHR descriptor_update_template; vk::Pipeline pipeline; + std::atomic_flag building_flag{}; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 278509bf0..ddc08b8c4 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -112,13 +112,15 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, BufferCache& buffer_cache_, TextureCache& texture_cache_, const Device& device, VKDescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue_, + Common::ThreadWorker* worker_thread, RenderPassCache& render_pass_cache, - const FixedPipelineState& state, + const FixedPipelineState& state_, std::array stages, const std::array& infos) - : maxwell3d{&maxwell3d_}, gpu_memory{&gpu_memory_}, texture_cache{&texture_cache_}, - buffer_cache{&buffer_cache_}, scheduler{&scheduler_}, - update_descriptor_queue{&update_descriptor_queue_}, spv_modules{std::move(stages)} { + : maxwell3d{maxwell3d_}, gpu_memory{gpu_memory_}, texture_cache{texture_cache_}, + buffer_cache{buffer_cache_}, scheduler{scheduler_}, + update_descriptor_queue{update_descriptor_queue_}, state{state_}, spv_modules{ + std::move(stages)} { std::ranges::transform(infos, stage_infos.begin(), [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); @@ -128,8 +130,17 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, descriptor_update_template = std::move(tuple.descriptor_update_template); descriptor_allocator = DescriptorAllocator(descriptor_pool, *descriptor_set_layout); - const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(state))}; - MakePipeline(device, state, render_pass); + auto func{[this, &device, &render_pass_cache] { + const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(state))}; + MakePipeline(device, render_pass); + building_flag.test_and_set(); + building_flag.notify_all(); + }}; + if (worker_thread) { + worker_thread->QueueWork(std::move(func)); + } else { + func(); + } } void GraphicsPipeline::Configure(bool is_indexed) { @@ -138,67 +149,72 @@ void GraphicsPipeline::Configure(bool is_indexed) { static_vector image_view_indices; static_vector samplers; - texture_cache->SynchronizeGraphicsDescriptors(); + texture_cache.SynchronizeGraphicsDescriptors(); - const auto& regs{maxwell3d->regs}; + const auto& regs{maxwell3d.regs}; const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { const Shader::Info& info{stage_infos[stage]}; - buffer_cache->SetEnabledUniformBuffers(stage, info.constant_buffer_mask); - buffer_cache->UnbindGraphicsStorageBuffers(stage); + buffer_cache.SetEnabledUniformBuffers(stage, info.constant_buffer_mask); + buffer_cache.UnbindGraphicsStorageBuffers(stage); size_t index{}; for (const auto& desc : info.storage_buffers_descriptors) { ASSERT(desc.count == 1); - buffer_cache->BindGraphicsStorageBuffer(stage, index, desc.cbuf_index, desc.cbuf_offset, - true); + buffer_cache.BindGraphicsStorageBuffer(stage, index, desc.cbuf_index, desc.cbuf_offset, + true); ++index; } - const auto& cbufs{maxwell3d->state.shader_stages[stage].const_buffers}; + const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers}; for (const auto& desc : info.texture_descriptors) { const u32 cbuf_index{desc.cbuf_index}; const u32 cbuf_offset{desc.cbuf_offset}; ASSERT(cbufs[cbuf_index].enabled); const GPUVAddr addr{cbufs[cbuf_index].address + cbuf_offset}; - const u32 raw_handle{gpu_memory->Read(addr)}; + const u32 raw_handle{gpu_memory.Read(addr)}; const TextureHandle handle(raw_handle, via_header_index); image_view_indices.push_back(handle.image); - Sampler* const sampler{texture_cache->GetGraphicsSampler(handle.sampler)}; + Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.sampler)}; samplers.push_back(sampler->Handle()); } } const std::span indices_span(image_view_indices.data(), image_view_indices.size()); - buffer_cache->UpdateGraphicsBuffers(is_indexed); - texture_cache->FillGraphicsImageViews(indices_span, image_view_ids); + buffer_cache.UpdateGraphicsBuffers(is_indexed); + texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); - buffer_cache->BindHostGeometryBuffers(is_indexed); + buffer_cache.BindHostGeometryBuffers(is_indexed); size_t index{}; for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { - buffer_cache->BindHostStageBuffers(stage); + buffer_cache.BindHostStageBuffers(stage); PushImageDescriptors(stage_infos[stage], samplers.data(), image_view_ids.data(), - *texture_cache, *update_descriptor_queue, index); + texture_cache, update_descriptor_queue, index); } - texture_cache->UpdateRenderTargets(false); - scheduler->RequestRenderpass(texture_cache->GetFramebuffer()); - - scheduler->BindGraphicsPipeline(*pipeline); + texture_cache.UpdateRenderTargets(false); + scheduler.RequestRenderpass(texture_cache.GetFramebuffer()); + if (!building_flag.test()) { + scheduler.Record([this](vk::CommandBuffer) { building_flag.wait(false); }); + } + if (scheduler.UpdateGraphicsPipeline(this)) { + scheduler.Record([this](vk::CommandBuffer cmdbuf) { + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); + }); + } if (!descriptor_set_layout) { return; } const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; - update_descriptor_queue->Send(*descriptor_update_template, descriptor_set); + update_descriptor_queue.Send(*descriptor_update_template, descriptor_set); - scheduler->Record([descriptor_set, layout = *pipeline_layout](vk::CommandBuffer cmdbuf) { + scheduler.Record([descriptor_set, layout = *pipeline_layout](vk::CommandBuffer cmdbuf) { cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, nullptr); }); } -void GraphicsPipeline::MakePipeline(const Device& device, const FixedPipelineState& state, - VkRenderPass render_pass) { +void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pass) { FixedPipelineState::DynamicState dynamic{}; if (!device.IsExtExtendedDynamicStateSupported()) { dynamic = state.dynamic_state; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index ba1d34a83..4e0583157 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -5,13 +5,15 @@ #pragma once #include +#include +#include "common/thread_worker.h" #include "shader_recompiler/shader_info.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" +#include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" -#include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { @@ -25,34 +27,34 @@ class GraphicsPipeline { static constexpr size_t NUM_STAGES = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage; public: - explicit GraphicsPipeline() = default; explicit GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d, Tegra::MemoryManager& gpu_memory, VKScheduler& scheduler, - BufferCache& buffer_cache, - TextureCache& texture_cache, const Device& device, VKDescriptorPool& descriptor_pool, + BufferCache& buffer_cache, TextureCache& texture_cache, + const Device& device, VKDescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue, + Common::ThreadWorker* worker_thread, RenderPassCache& render_pass_cache, const FixedPipelineState& state, std::array stages, const std::array& infos); void Configure(bool is_indexed); - GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = default; - GraphicsPipeline(GraphicsPipeline&&) noexcept = default; + GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = delete; + GraphicsPipeline(GraphicsPipeline&&) noexcept = delete; GraphicsPipeline& operator=(const GraphicsPipeline&) = delete; GraphicsPipeline(const GraphicsPipeline&) = delete; private: - void MakePipeline(const Device& device, const FixedPipelineState& state, - VkRenderPass render_pass); + void MakePipeline(const Device& device, VkRenderPass render_pass); - Tegra::Engines::Maxwell3D* maxwell3d{}; - Tegra::MemoryManager* gpu_memory{}; - TextureCache* texture_cache{}; - BufferCache* buffer_cache{}; - VKScheduler* scheduler{}; - VKUpdateDescriptorQueue* update_descriptor_queue{}; + Tegra::Engines::Maxwell3D& maxwell3d; + Tegra::MemoryManager& gpu_memory; + TextureCache& texture_cache; + BufferCache& buffer_cache; + VKScheduler& scheduler; + VKUpdateDescriptorQueue& update_descriptor_queue; + const FixedPipelineState state; std::array spv_modules; std::array stage_infos; @@ -61,6 +63,7 @@ private: vk::PipelineLayout pipeline_layout; vk::DescriptorUpdateTemplateKHR descriptor_update_template; vk::Pipeline pipeline; + std::atomic_flag building_flag{}; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 30a707599..e3d9debf4 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -518,9 +518,8 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading } pipeline_cache_filename = fmt::format("{}/{:016x}.bin", transferable_dir, title_id); - Common::ThreadWorker worker(11, "PipelineBuilder"); - std::mutex cache_mutex; struct { + std::mutex mutex; size_t total{0}; size_t built{0}; bool has_loaded{false}; @@ -542,51 +541,53 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading } u32 num_envs{}; file.read(reinterpret_cast(&num_envs), sizeof(num_envs)); - auto envs{std::make_shared>(num_envs)}; - for (FileEnvironment& env : *envs) { + std::vector envs(num_envs); + for (FileEnvironment& env : envs) { env.Deserialize(file); } - if (envs->front().ShaderStage() == Shader::Stage::Compute) { + if (envs.front().ShaderStage() == Shader::Stage::Compute) { ComputePipelineCacheKey key; file.read(reinterpret_cast(&key), sizeof(key)); - worker.QueueWork([this, key, envs, &cache_mutex, &state, &callback] { + workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable { ShaderPools pools; - ComputePipeline pipeline{CreateComputePipeline(pools, key, envs->front())}; + auto pipeline{CreateComputePipeline(pools, key, envs.front(), false)}; - std::lock_guard lock{cache_mutex}; + std::lock_guard lock{state.mutex}; compute_cache.emplace(key, std::move(pipeline)); + ++state.built; if (state.has_loaded) { - callback(VideoCore::LoadCallbackStage::Build, ++state.built, state.total); + callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); } }); } else { GraphicsPipelineCacheKey key; file.read(reinterpret_cast(&key), sizeof(key)); - worker.QueueWork([this, key, envs, &cache_mutex, &state, &callback] { + workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable { ShaderPools pools; boost::container::static_vector env_ptrs; - for (auto& env : *envs) { + for (auto& env : envs) { env_ptrs.push_back(&env); } - GraphicsPipeline pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs))}; + auto pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs), false)}; - std::lock_guard lock{cache_mutex}; + std::lock_guard lock{state.mutex}; graphics_cache.emplace(key, std::move(pipeline)); + ++state.built; if (state.has_loaded) { - callback(VideoCore::LoadCallbackStage::Build, ++state.built, state.total); + callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); } }); } ++state.total; } { - std::lock_guard lock{cache_mutex}; + std::lock_guard lock{state.mutex}; callback(VideoCore::LoadCallbackStage::Build, 0, state.total); state.has_loaded = true; } - worker.WaitForRequests(); + workers.WaitForRequests(); } size_t ComputePipelineCacheKey::Hash() const noexcept { @@ -619,7 +620,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, - buffer_cache{buffer_cache_}, texture_cache{texture_cache_} { + buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, workers(11, "PipelineBuilder") { const auto& float_control{device.FloatControlProperties()}; const VkDriverIdKHR driver_id{device.GetDriverID()}; base_profile = Shader::Profile{ @@ -662,10 +663,10 @@ GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)}; auto& pipeline{pair->second}; if (!is_new) { - return &pipeline; + return pipeline.get(); } pipeline = CreateGraphicsPipeline(); - return &pipeline; + return pipeline.get(); } ComputePipeline* PipelineCache::CurrentComputePipeline() { @@ -691,10 +692,10 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() { const auto [pair, is_new]{compute_cache.try_emplace(key)}; auto& pipeline{pair->second}; if (!is_new) { - return &pipeline; + return pipeline.get(); } pipeline = CreateComputePipeline(key, shader); - return &pipeline; + return pipeline.get(); } bool PipelineCache::RefreshStages() { @@ -743,9 +744,9 @@ const ShaderInfo* PipelineCache::MakeShaderInfo(GenericEnvironment& env, VAddr c return result; } -GraphicsPipeline PipelineCache::CreateGraphicsPipeline(ShaderPools& pools, - const GraphicsPipelineCacheKey& key, - std::span envs) { +std::unique_ptr PipelineCache::CreateGraphicsPipeline( + ShaderPools& pools, const GraphicsPipelineCacheKey& key, + std::span envs, bool build_in_parallel) { LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); size_t env_index{0}; std::array programs; @@ -783,12 +784,14 @@ GraphicsPipeline PipelineCache::CreateGraphicsPipeline(ShaderPools& pools, modules[stage_index].SetObjectNameEXT(name.c_str()); } } - return GraphicsPipeline(maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, device, - descriptor_pool, update_descriptor_queue, render_pass_cache, key.state, - std::move(modules), infos); + Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; + return std::make_unique( + maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, device, descriptor_pool, + update_descriptor_queue, thread_worker, render_pass_cache, key.state, std::move(modules), + infos); } -GraphicsPipeline PipelineCache::CreateGraphicsPipeline() { +std::unique_ptr PipelineCache::CreateGraphicsPipeline() { main_pools.ReleaseContents(); std::array graphics_envs; @@ -809,22 +812,22 @@ GraphicsPipeline PipelineCache::CreateGraphicsPipeline() { generic_envs.push_back(&env); envs.push_back(&env); } - GraphicsPipeline pipeline{CreateGraphicsPipeline(main_pools, graphics_key, MakeSpan(envs))}; + auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, MakeSpan(envs), true)}; if (!pipeline_cache_filename.empty()) { SerializePipeline(graphics_key, generic_envs, pipeline_cache_filename); } return pipeline; } -ComputePipeline PipelineCache::CreateComputePipeline(const ComputePipelineCacheKey& key, - const ShaderInfo* shader) { +std::unique_ptr PipelineCache::CreateComputePipeline( + const ComputePipelineCacheKey& key, const ShaderInfo* shader) { const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; const auto& qmd{kepler_compute.launch_description}; ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start}; env.SetCachedSize(shader->size_bytes); main_pools.ReleaseContents(); - ComputePipeline pipeline{CreateComputePipeline(main_pools, key, env)}; + auto pipeline{CreateComputePipeline(main_pools, key, env, true)}; if (!pipeline_cache_filename.empty()) { SerializePipeline(key, std::array{&env}, pipeline_cache_filename); @@ -832,9 +835,9 @@ ComputePipeline PipelineCache::CreateComputePipeline(const ComputePipelineCacheK return pipeline; } -ComputePipeline PipelineCache::CreateComputePipeline(ShaderPools& pools, - const ComputePipelineCacheKey& key, - Shader::Environment& env) const { +std::unique_ptr PipelineCache::CreateComputePipeline( + ShaderPools& pools, const ComputePipelineCacheKey& key, Shader::Environment& env, + bool build_in_parallel) { LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; @@ -846,8 +849,9 @@ ComputePipeline PipelineCache::CreateComputePipeline(ShaderPools& pools, const auto name{fmt::format("{:016x}{:016x}", key.unique_hash[0], key.unique_hash[1])}; spv_module.SetObjectNameEXT(name.c_str()); } - return ComputePipeline{device, descriptor_pool, update_descriptor_queue, program.info, - std::move(spv_module)}; + Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; + return std::make_unique(device, descriptor_pool, update_descriptor_queue, + thread_worker, program.info, std::move(spv_module)); } static Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexAttribute& attr) { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index b55e14189..609f00898 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -14,6 +14,7 @@ #include #include "common/common_types.h" +#include "common/thread_worker.h" #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" @@ -145,16 +146,19 @@ private: const ShaderInfo* MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr); - GraphicsPipeline CreateGraphicsPipeline(); + std::unique_ptr CreateGraphicsPipeline(); - GraphicsPipeline CreateGraphicsPipeline(ShaderPools& pools, const GraphicsPipelineCacheKey& key, - std::span envs); + std::unique_ptr CreateGraphicsPipeline( + ShaderPools& pools, const GraphicsPipelineCacheKey& key, + std::span envs, bool build_in_parallel); - ComputePipeline CreateComputePipeline(const ComputePipelineCacheKey& key, - const ShaderInfo* shader); + std::unique_ptr CreateComputePipeline(const ComputePipelineCacheKey& key, + const ShaderInfo* shader); - ComputePipeline CreateComputePipeline(ShaderPools& pools, const ComputePipelineCacheKey& key, - Shader::Environment& env) const; + std::unique_ptr CreateComputePipeline(ShaderPools& pools, + const ComputePipelineCacheKey& key, + Shader::Environment& env, + bool build_in_parallel); Shader::Profile MakeProfile(const GraphicsPipelineCacheKey& key, Shader::Stage stage); @@ -174,13 +178,15 @@ private: GraphicsPipelineCacheKey graphics_key{}; std::array shader_infos{}; - std::unordered_map compute_cache; - std::unordered_map graphics_cache; + std::unordered_map> compute_cache; + std::unordered_map> graphics_cache; ShaderPools main_pools; Shader::Profile base_profile; std::string pipeline_cache_filename; + + Common::ThreadWorker workers; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index d7d9927dd..f0bd4b8af 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -276,22 +276,11 @@ void RasterizerVulkan::DispatchCompute() { return; } std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex}; - update_descriptor_queue.Acquire(); - pipeline->ConfigureBufferCache(buffer_cache); - pipeline->ConfigureTextureCache(kepler_compute, gpu_memory, texture_cache); - const VkDescriptorSet descriptor_set{pipeline->UpdateDescriptorSet()}; + pipeline->Configure(kepler_compute, gpu_memory, scheduler, buffer_cache, texture_cache); const auto& qmd{kepler_compute.launch_description}; const std::array dim{qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z}; - const VkPipeline pipeline_handle{pipeline->Handle()}; - const VkPipelineLayout pipeline_layout{pipeline->PipelineLayout()}; - scheduler.Record( - [pipeline_handle, pipeline_layout, dim, descriptor_set](vk::CommandBuffer cmdbuf) { - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_handle); - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, 0, - descriptor_set, nullptr); - cmdbuf.Dispatch(dim[0], dim[1], dim[2]); - }); + scheduler.Record([dim](vk::CommandBuffer cmdbuf) { cmdbuf.Dispatch(dim[0], dim[1], dim[2]); }); } void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) { diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index f35c120b0..25a4933e5 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -124,18 +124,16 @@ void VKScheduler::RequestOutsideRenderPassOperationContext() { EndRenderPass(); } -void VKScheduler::BindGraphicsPipeline(VkPipeline pipeline) { +bool VKScheduler::UpdateGraphicsPipeline(GraphicsPipeline* pipeline) { if (state.graphics_pipeline == pipeline) { - return; + return false; } state.graphics_pipeline = pipeline; - Record([pipeline](vk::CommandBuffer cmdbuf) { - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); - }); + return true; } void VKScheduler::WorkerThread() { - Common::SetCurrentThreadPriority(Common::ThreadPriority::High); + Common::SetCurrentThreadName("yuzu:VulkanWorker"); std::unique_lock lock{mutex}; do { cv.wait(lock, [this] { return !chunk_queue.Empty() || quit; }); diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 3ce48e9d2..a40bb8bcd 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -22,6 +22,7 @@ namespace Vulkan { class CommandPool; class Device; class Framebuffer; +class GraphicsPipeline; class StateTracker; class VKQueryCache; @@ -52,8 +53,8 @@ public: /// of a renderpass. void RequestOutsideRenderPassOperationContext(); - /// Binds a pipeline to the current execution context. - void BindGraphicsPipeline(VkPipeline pipeline); + /// Update the pipeline to the current execution context. + bool UpdateGraphicsPipeline(GraphicsPipeline* pipeline); /// Invalidates current command buffer state except for render passes void InvalidateState(); @@ -170,7 +171,7 @@ private: VkRenderPass renderpass = nullptr; VkFramebuffer framebuffer = nullptr; VkExtent2D render_area = {0, 0}; - VkPipeline graphics_pipeline = nullptr; + GraphicsPipeline* graphics_pipeline = nullptr; }; void WorkerThread(); -- cgit v1.2.3 From 8771639d1e97cf2224657c0d2ee87d800a784ac8 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 1 Apr 2021 03:15:28 -0300 Subject: vulkan: Create pipeline layouts in separate threads --- src/video_core/renderer_vulkan/pipeline_helper.h | 72 ++++++++++++---------- src/video_core/renderer_vulkan/vk_compute_pass.cpp | 2 +- .../renderer_vulkan/vk_compute_pipeline.cpp | 19 +++--- .../renderer_vulkan/vk_graphics_pipeline.cpp | 26 ++++---- .../renderer_vulkan/vk_pipeline_cache.cpp | 3 +- .../renderer_vulkan/vk_update_descriptor.cpp | 4 +- .../renderer_vulkan/vk_update_descriptor.h | 2 +- 7 files changed, 65 insertions(+), 63 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index 0a59aa659..eebe5d569 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -35,49 +35,52 @@ struct TextureHandle { u32 sampler; }; -struct DescriptorLayoutTuple { - vk::DescriptorSetLayout descriptor_set_layout; - vk::PipelineLayout pipeline_layout; - vk::DescriptorUpdateTemplateKHR descriptor_update_template; -}; - class DescriptorLayoutBuilder { public: - DescriptorLayoutTuple Create(const vk::Device& device) { - DescriptorLayoutTuple result; - if (!bindings.empty()) { - result.descriptor_set_layout = device.CreateDescriptorSetLayout({ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .bindingCount = static_cast(bindings.size()), - .pBindings = bindings.data(), - }); + DescriptorLayoutBuilder(const vk::Device& device_) : device{&device_} {} + + vk::DescriptorSetLayout CreateDescriptorSetLayout() const { + if (bindings.empty()) { + return nullptr; } - result.pipeline_layout = device.CreatePipelineLayout({ + return device->CreateDescriptorSetLayout({ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .bindingCount = static_cast(bindings.size()), + .pBindings = bindings.data(), + }); + } + + vk::DescriptorUpdateTemplateKHR CreateTemplate(VkDescriptorSetLayout descriptor_set_layout, + VkPipelineLayout pipeline_layout) const { + if (entries.empty()) { + return nullptr; + } + return device->CreateDescriptorUpdateTemplateKHR({ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, + .pNext = nullptr, + .flags = 0, + .descriptorUpdateEntryCount = static_cast(entries.size()), + .pDescriptorUpdateEntries = entries.data(), + .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR, + .descriptorSetLayout = descriptor_set_layout, + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .pipelineLayout = pipeline_layout, + .set = 0, + }); + } + + vk::PipelineLayout CreatePipelineLayout(VkDescriptorSetLayout descriptor_set_layout) const { + return device->CreatePipelineLayout({ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, .pNext = nullptr, .flags = 0, - .setLayoutCount = result.descriptor_set_layout ? 1U : 0U, - .pSetLayouts = bindings.empty() ? nullptr : result.descriptor_set_layout.address(), + .setLayoutCount = descriptor_set_layout ? 1U : 0U, + .pSetLayouts = bindings.empty() ? nullptr : &descriptor_set_layout, .pushConstantRangeCount = 0, .pPushConstantRanges = nullptr, }); - if (!entries.empty()) { - result.descriptor_update_template = device.CreateDescriptorUpdateTemplateKHR({ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, - .pNext = nullptr, - .flags = 0, - .descriptorUpdateEntryCount = static_cast(entries.size()), - .pDescriptorUpdateEntries = entries.data(), - .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR, - .descriptorSetLayout = *result.descriptor_set_layout, - .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, - .pipelineLayout = *result.pipeline_layout, - .set = 0, - }); - } - return result; } void Add(const Shader::Info& info, VkShaderStageFlags stage) { @@ -113,6 +116,7 @@ private: offset += sizeof(DescriptorUpdateEntry); } + const vk::Device* device{}; boost::container::small_vector bindings; boost::container::small_vector entries; u32 binding{}; diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index a444d55d3..760857839 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -237,7 +237,7 @@ VkDescriptorSet VKComputePass::CommitDescriptorSet( return nullptr; } const VkDescriptorSet set = descriptor_allocator->Commit(); - update_descriptor_queue.Send(*descriptor_template, set); + update_descriptor_queue.Send(descriptor_template.address(), set); return set; } diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 1c3249e3c..fb19bb4b9 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -17,13 +17,6 @@ #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { -namespace { -DescriptorLayoutTuple CreateLayout(const Device& device, const Shader::Info& info) { - DescriptorLayoutBuilder builder; - builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT); - return builder.Create(device.GetLogical()); -} -} // Anonymous namespace ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue_, @@ -31,10 +24,12 @@ ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descrip vk::ShaderModule spv_module_) : update_descriptor_queue{update_descriptor_queue_}, info{info_}, spv_module(std::move(spv_module_)) { - DescriptorLayoutTuple tuple{CreateLayout(device, info)}; - descriptor_set_layout = std::move(tuple.descriptor_set_layout); - pipeline_layout = std::move(tuple.pipeline_layout); - descriptor_update_template = std::move(tuple.descriptor_update_template); + DescriptorLayoutBuilder builder{device.GetLogical()}; + builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT); + + descriptor_set_layout = builder.CreateDescriptorSetLayout(); + pipeline_layout = builder.CreatePipelineLayout(*descriptor_set_layout); + descriptor_update_template = builder.CreateTemplate(*descriptor_set_layout, *pipeline_layout); descriptor_allocator = DescriptorAllocator(descriptor_pool, *descriptor_set_layout); auto func{[this, &device] { @@ -128,7 +123,7 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, return; } const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; - update_descriptor_queue.Send(*descriptor_update_template, descriptor_set); + update_descriptor_queue.Send(descriptor_update_template.address(), descriptor_set); scheduler.Record([this, descriptor_set](vk::CommandBuffer cmdbuf) { cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0, descriptor_set, nullptr); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index ddc08b8c4..d17b79e02 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -27,8 +27,8 @@ using VideoCore::Surface::PixelFormat; using VideoCore::Surface::PixelFormatFromDepthFormat; using VideoCore::Surface::PixelFormatFromRenderTargetFormat; -DescriptorLayoutTuple CreateLayout(const Device& device, std::span infos) { - DescriptorLayoutBuilder builder; +DescriptorLayoutBuilder MakeBuilder(const Device& device, std::span infos) { + DescriptorLayoutBuilder builder{device.GetLogical()}; for (size_t index = 0; index < infos.size(); ++index) { static constexpr std::array stages{ VK_SHADER_STAGE_VERTEX_BIT, @@ -39,7 +39,7 @@ DescriptorLayoutTuple CreateLayout(const Device& device, std::span @@ -124,13 +124,15 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, std::ranges::transform(infos, stage_infos.begin(), [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); - DescriptorLayoutTuple tuple{CreateLayout(device, stage_infos)}; - descriptor_set_layout = std::move(tuple.descriptor_set_layout); - pipeline_layout = std::move(tuple.pipeline_layout); - descriptor_update_template = std::move(tuple.descriptor_update_template); + DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)}; + descriptor_set_layout = builder.CreateDescriptorSetLayout(); descriptor_allocator = DescriptorAllocator(descriptor_pool, *descriptor_set_layout); - auto func{[this, &device, &render_pass_cache] { + auto func{[this, &device, &render_pass_cache, builder] { + const VkDescriptorSetLayout set_layout{*descriptor_set_layout}; + pipeline_layout = builder.CreatePipelineLayout(set_layout); + descriptor_update_template = builder.CreateTemplate(set_layout, *pipeline_layout); + const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(state))}; MakePipeline(device, render_pass); building_flag.test_and_set(); @@ -206,11 +208,11 @@ void GraphicsPipeline::Configure(bool is_indexed) { return; } const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; - update_descriptor_queue.Send(*descriptor_update_template, descriptor_set); + update_descriptor_queue.Send(descriptor_update_template.address(), descriptor_set); - scheduler.Record([descriptor_set, layout = *pipeline_layout](vk::CommandBuffer cmdbuf) { - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, - nullptr); + scheduler.Record([this, descriptor_set](vk::CommandBuffer cmdbuf) { + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0, + descriptor_set, nullptr); }); } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index e3d9debf4..597261964 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -620,7 +620,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, - buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, workers(11, "PipelineBuilder") { + buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, + workers(11, "yuzu:PipelineBuilder") { const auto& float_control{device.FloatControlProperties()}; const VkDriverIdKHR driver_id{device.GetDriverID()}; base_profile = Shader::Profile{ diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp index dc45fdcb1..bea9b8012 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp @@ -36,12 +36,12 @@ void VKUpdateDescriptorQueue::Acquire() { upload_start = payload_cursor; } -void VKUpdateDescriptorQueue::Send(VkDescriptorUpdateTemplateKHR update_template, +void VKUpdateDescriptorQueue::Send(const VkDescriptorUpdateTemplateKHR* update_template, VkDescriptorSet set) { const void* const data = upload_start; const vk::Device* const logical = &device.GetLogical(); scheduler.Record([data, logical, set, update_template](vk::CommandBuffer) { - logical->UpdateDescriptorSet(set, update_template, data); + logical->UpdateDescriptorSet(set, *update_template, data); }); } diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h index d35e77c44..82bc9920c 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.h +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h @@ -39,7 +39,7 @@ public: void Acquire(); - void Send(VkDescriptorUpdateTemplateKHR update_template, VkDescriptorSet set); + void Send(const VkDescriptorUpdateTemplateKHR* update_template, VkDescriptorSet set); void AddSampledImage(VkImageView image_view, VkSampler sampler) { *(payload_cursor++) = VkDescriptorImageInfo{ -- cgit v1.2.3 From d0a529683a2e5a693b53c6f24f6816c06f8f7e65 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 1 Apr 2021 04:09:09 -0300 Subject: vulkan: Serialize pipelines on a separate thread --- .../renderer_vulkan/vk_pipeline_cache.cpp | 130 ++++++++++----------- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 1 + 2 files changed, 64 insertions(+), 67 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 597261964..79cd204c7 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -61,6 +61,33 @@ public: ~GenericEnvironment() override = default; + u32 TextureBoundBuffer() const final { + return texture_bound; + } + + u32 LocalMemorySize() const final { + return local_memory_size; + } + + u32 SharedMemorySize() const final { + return shared_memory_size; + } + + std::array WorkgroupSize() const final { + return workgroup_size; + } + + u64 ReadInstruction(u32 address) final { + read_lowest = std::min(read_lowest, address); + read_highest = std::max(read_highest, address); + + if (address >= cached_lowest && address < cached_highest) { + return code[(address - cached_lowest) / INST_SIZE]; + } + has_unbound_instructions = true; + return gpu_memory->Read(program_base + address); + } + std::optional Analyze() { const std::optional size{TryFindSize()}; if (!size) { @@ -97,26 +124,10 @@ public: return Common::CityHash128(data.get(), size); } - u64 ReadInstruction(u32 address) final { - read_lowest = std::min(read_lowest, address); - read_highest = std::max(read_highest, address); - - if (address >= cached_lowest && address < cached_highest) { - return code[(address - cached_lowest) / INST_SIZE]; - } - has_unbound_instructions = true; - return gpu_memory->Read(program_base + address); - } - void Serialize(std::ofstream& file) const { - const u64 code_size{static_cast(ReadSize())}; - const auto data{std::make_unique(code_size)}; - gpu_memory->ReadBlock(program_base + read_lowest, data.get(), code_size); - + const u64 code_size{static_cast(CachedSize())}; const u64 num_texture_types{static_cast(texture_types.size())}; const u64 num_cbuf_values{static_cast(cbuf_values.size())}; - const u32 local_memory_size{LocalMemorySize()}; - const u32 texture_bound{TextureBoundBuffer()}; file.write(reinterpret_cast(&code_size), sizeof(code_size)) .write(reinterpret_cast(&num_texture_types), sizeof(num_texture_types)) @@ -124,10 +135,10 @@ public: .write(reinterpret_cast(&local_memory_size), sizeof(local_memory_size)) .write(reinterpret_cast(&texture_bound), sizeof(texture_bound)) .write(reinterpret_cast(&start_address), sizeof(start_address)) - .write(reinterpret_cast(&read_lowest), sizeof(read_lowest)) - .write(reinterpret_cast(&read_highest), sizeof(read_highest)) + .write(reinterpret_cast(&cached_lowest), sizeof(cached_lowest)) + .write(reinterpret_cast(&cached_highest), sizeof(cached_highest)) .write(reinterpret_cast(&stage), sizeof(stage)) - .write(data.get(), code_size); + .write(reinterpret_cast(code.data()), code_size); for (const auto [key, type] : texture_types) { file.write(reinterpret_cast(&key), sizeof(key)) .write(reinterpret_cast(&type), sizeof(type)); @@ -137,8 +148,6 @@ public: .write(reinterpret_cast(&type), sizeof(type)); } if (stage == Shader::Stage::Compute) { - const std::array workgroup_size{WorkgroupSize()}; - const u32 shared_memory_size{SharedMemorySize()}; file.write(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)) .write(reinterpret_cast(&shared_memory_size), sizeof(shared_memory_size)); @@ -220,6 +229,11 @@ protected: std::unordered_map texture_types; std::unordered_map cbuf_values; + u32 local_memory_size{}; + u32 texture_bound{}; + u32 shared_memory_size{}; + std::array workgroup_size{}; + u32 read_lowest = std::numeric_limits::max(); u32 read_highest = 0; @@ -270,6 +284,10 @@ public: UNREACHABLE_MSG("Invalid program={}", program); break; } + const u64 local_size{sph.LocalMemorySize()}; + ASSERT(local_size <= std::numeric_limits::max()); + local_memory_size = static_cast(local_size); + texture_bound = maxwell3d->regs.tex_cb_index; } ~GraphicsEnvironment() override = default; @@ -294,24 +312,6 @@ public: cbuf.address, cbuf.size, cbuf_index, cbuf_offset); } - u32 TextureBoundBuffer() const override { - return maxwell3d->regs.tex_cb_index; - } - - u32 LocalMemorySize() const override { - const u64 size{sph.LocalMemorySize()}; - ASSERT(size <= std::numeric_limits::max()); - return static_cast(size); - } - - u32 SharedMemorySize() const override { - throw Shader::LogicError("Requesting shared memory size in graphics stage"); - } - - std::array WorkgroupSize() const override { - throw Shader::LogicError("Requesting workgroup size in a graphics stage"); - } - private: Tegra::Engines::Maxwell3D* maxwell3d{}; size_t stage_index{}; @@ -325,7 +325,12 @@ public: u32 start_address_) : GenericEnvironment{gpu_memory_, program_base_, start_address_}, kepler_compute{ &kepler_compute_} { + const auto& qmd{kepler_compute->launch_description}; stage = Shader::Stage::Compute; + local_memory_size = qmd.local_pos_alloc; + texture_bound = kepler_compute->regs.tex_cb_index; + shared_memory_size = qmd.shared_alloc; + workgroup_size = {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; } ~ComputeEnvironment() override = default; @@ -351,25 +356,6 @@ public: cbuf.Address(), cbuf.size, cbuf_index, cbuf_offset); } - u32 TextureBoundBuffer() const override { - return kepler_compute->regs.tex_cb_index; - } - - u32 LocalMemorySize() const override { - const auto& qmd{kepler_compute->launch_description}; - return qmd.local_pos_alloc; - } - - u32 SharedMemorySize() const override { - const auto& qmd{kepler_compute->launch_description}; - return qmd.shared_alloc; - } - - std::array WorkgroupSize() const override { - const auto& qmd{kepler_compute->launch_description}; - return {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; - } - private: Tegra::Engines::KeplerCompute* kepler_compute{}; }; @@ -621,7 +607,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, - workers(11, "yuzu:PipelineBuilder") { + workers(11, "yuzu:PipelineBuilder"), serialization_thread(1, "yuzu:PipelineSerialization") { const auto& float_control{device.FloatControlProperties()}; const VkDriverIdKHR driver_id{device.GetDriverID()}; base_profile = Shader::Profile{ @@ -796,7 +782,6 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline() { main_pools.ReleaseContents(); std::array graphics_envs; - boost::container::static_vector generic_envs; boost::container::static_vector envs; const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; @@ -810,13 +795,22 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline() { env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; env.SetCachedSize(shader_infos[index]->size_bytes); - generic_envs.push_back(&env); envs.push_back(&env); } auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, MakeSpan(envs), true)}; - if (!pipeline_cache_filename.empty()) { - SerializePipeline(graphics_key, generic_envs, pipeline_cache_filename); - } + if (pipeline_cache_filename.empty()) { + return pipeline; + } + serialization_thread.QueueWork([this, key = graphics_key, envs = std::move(graphics_envs)] { + boost::container::static_vector + env_ptrs; + for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + if (key.unique_hashes[index] != u128{}) { + env_ptrs.push_back(&envs[index]); + } + } + SerializePipeline(key, env_ptrs, pipeline_cache_filename); + }); return pipeline; } @@ -830,8 +824,10 @@ std::unique_ptr PipelineCache::CreateComputePipeline( main_pools.ReleaseContents(); auto pipeline{CreateComputePipeline(main_pools, key, env, true)}; if (!pipeline_cache_filename.empty()) { - SerializePipeline(key, std::array{&env}, - pipeline_cache_filename); + serialization_thread.QueueWork([this, key, env = std::move(env)] { + SerializePipeline(key, std::array{&env}, + pipeline_cache_filename); + }); } return pipeline; } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 609f00898..343ea1554 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -187,6 +187,7 @@ private: std::string pipeline_cache_filename; Common::ThreadWorker workers; + Common::ThreadWorker serialization_thread; }; } // namespace Vulkan -- cgit v1.2.3 From d819ba4489b90955286341c739083e638173b938 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Thu, 1 Apr 2021 08:34:45 +0200 Subject: shader: Implement ViewportIndex --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 79cd204c7..1f308eec2 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -631,6 +631,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE, .support_explicit_workgroup_layout = device.IsKhrWorkgroupMemoryExplicitLayoutSupported(), .support_vote = true, + .support_viewport_index_layer_non_geometry = device.IsExtShaderViewportIndexLayerSupported(), .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, .generic_input_types{}, -- cgit v1.2.3 From 12f5f320985824d1ebad587ebecb0f8406143ebc Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Thu, 1 Apr 2021 09:21:53 +0200 Subject: shader: Mark SSBOs as written when they are --- src/video_core/renderer_vulkan/vk_compute_pipeline.cpp | 2 +- src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index fb19bb4b9..6707842ab 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -75,7 +75,7 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, size_t ssbo_index{}; for (const auto& desc : info.storage_buffers_descriptors) { ASSERT(desc.count == 1); - buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset, true); + buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset, desc.is_written); ++ssbo_index; } buffer_cache.UpdateComputeBuffers(); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index d17b79e02..e8c3a5624 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -163,7 +163,7 @@ void GraphicsPipeline::Configure(bool is_indexed) { for (const auto& desc : info.storage_buffers_descriptors) { ASSERT(desc.count == 1); buffer_cache.BindGraphicsStorageBuffer(stage, index, desc.cbuf_index, desc.cbuf_offset, - true); + desc.is_written); ++index; } const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers}; -- cgit v1.2.3 From 480dc0d5e68fd1c79345e93216013a1d2e172c70 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sat, 3 Apr 2021 02:27:25 +0200 Subject: vk_pipeline_cache: Small fixes to the pipeline cache --- .../renderer_vulkan/vk_pipeline_cache.cpp | 24 +++++++++++++--------- 1 file changed, 14 insertions(+), 10 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 1f308eec2..3111165fb 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -539,11 +539,13 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading ShaderPools pools; auto pipeline{CreateComputePipeline(pools, key, envs.front(), false)}; - std::lock_guard lock{state.mutex}; - compute_cache.emplace(key, std::move(pipeline)); - ++state.built; - if (state.has_loaded) { - callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); + { + std::lock_guard lock{state.mutex}; + compute_cache.emplace(key, std::move(pipeline)); + ++state.built; + if (state.has_loaded) { + callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); + } } }); } else { @@ -558,11 +560,13 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading } auto pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs), false)}; - std::lock_guard lock{state.mutex}; - graphics_cache.emplace(key, std::move(pipeline)); - ++state.built; - if (state.has_loaded) { - callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); + { + std::lock_guard lock{state.mutex}; + graphics_cache.emplace(key, std::move(pipeline)); + ++state.built; + if (state.has_loaded) { + callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); + } } }); } -- cgit v1.2.3 From 6ff2e9ba097f3619c21d2e547570e1fbaae8d6ee Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 3 Apr 2021 05:19:13 -0300 Subject: vk_pipeline_cache: Remove unnecesary scope in pipeline cache locking --- .../renderer_vulkan/vk_pipeline_cache.cpp | 27 ++++++++++------------ 1 file changed, 12 insertions(+), 15 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 3111165fb..f88ab67ae 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -539,13 +539,11 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading ShaderPools pools; auto pipeline{CreateComputePipeline(pools, key, envs.front(), false)}; - { - std::lock_guard lock{state.mutex}; - compute_cache.emplace(key, std::move(pipeline)); - ++state.built; - if (state.has_loaded) { - callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); - } + std::lock_guard lock{state.mutex}; + compute_cache.emplace(key, std::move(pipeline)); + ++state.built; + if (state.has_loaded) { + callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); } }); } else { @@ -560,13 +558,11 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading } auto pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs), false)}; - { - std::lock_guard lock{state.mutex}; - graphics_cache.emplace(key, std::move(pipeline)); - ++state.built; - if (state.has_loaded) { - callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); - } + std::lock_guard lock{state.mutex}; + graphics_cache.emplace(key, std::move(pipeline)); + ++state.built; + if (state.has_loaded) { + callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); } }); } @@ -635,7 +631,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE, .support_explicit_workgroup_layout = device.IsKhrWorkgroupMemoryExplicitLayoutSupported(), .support_vote = true, - .support_viewport_index_layer_non_geometry = device.IsExtShaderViewportIndexLayerSupported(), + .support_viewport_index_layer_non_geometry = + device.IsExtShaderViewportIndexLayerSupported(), .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, .generic_input_types{}, -- cgit v1.2.3 From 5ed68e83db39e1f6790a625529f10f4e1d5a8f89 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 3 Apr 2021 21:41:49 -0300 Subject: shader: Remove atomic flags and use mutex + cond variable for pipelines --- src/video_core/renderer_vulkan/vk_compute_pipeline.cpp | 15 ++++++++++----- src/video_core/renderer_vulkan/vk_compute_pipeline.h | 7 ++++++- src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 14 ++++++++++---- src/video_core/renderer_vulkan/vk_graphics_pipeline.h | 7 ++++++- 4 files changed, 32 insertions(+), 11 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 6707842ab..0bb5b852d 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -55,8 +55,9 @@ ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descrip .basePipelineHandle = 0, .basePipelineIndex = 0, }); - building_flag.test_and_set(); - building_flag.notify_all(); + std::lock_guard lock{build_mutex}; + is_built = true; + build_condvar.notify_one(); }}; if (thread_worker) { thread_worker->QueueWork(std::move(func)); @@ -75,7 +76,8 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, size_t ssbo_index{}; for (const auto& desc : info.storage_buffers_descriptors) { ASSERT(desc.count == 1); - buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset, desc.is_written); + buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset, + desc.is_written); ++ssbo_index; } buffer_cache.UpdateComputeBuffers(); @@ -112,9 +114,12 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, PushImageDescriptors(info, samplers.data(), image_view_ids.data(), texture_cache, update_descriptor_queue, image_index); - if (!building_flag.test()) { + if (!is_built.load(std::memory_order::relaxed)) { // Wait for the pipeline to be built - scheduler.Record([this](vk::CommandBuffer) { building_flag.wait(false); }); + scheduler.Record([this](vk::CommandBuffer) { + std::unique_lock lock{build_mutex}; + build_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); }); + }); } scheduler.Record([this](vk::CommandBuffer cmdbuf) { cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 02da504f7..104e6cc85 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -4,6 +4,8 @@ #pragma once +#include +#include #include #include "common/common_types.h" @@ -47,7 +49,10 @@ private: vk::PipelineLayout pipeline_layout; vk::DescriptorUpdateTemplateKHR descriptor_update_template; vk::Pipeline pipeline; - std::atomic_flag building_flag{}; + + std::condition_variable build_condvar; + std::mutex build_mutex; + std::atomic_bool is_built{false}; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index e8c3a5624..67de3cb79 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -135,8 +135,10 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(state))}; MakePipeline(device, render_pass); - building_flag.test_and_set(); - building_flag.notify_all(); + + std::lock_guard lock{build_mutex}; + is_built = true; + build_condvar.notify_one(); }}; if (worker_thread) { worker_thread->QueueWork(std::move(func)); @@ -196,8 +198,12 @@ void GraphicsPipeline::Configure(bool is_indexed) { texture_cache.UpdateRenderTargets(false); scheduler.RequestRenderpass(texture_cache.GetFramebuffer()); - if (!building_flag.test()) { - scheduler.Record([this](vk::CommandBuffer) { building_flag.wait(false); }); + if (!is_built.load(std::memory_order::relaxed)) { + // Wait for the pipeline to be built + scheduler.Record([this](vk::CommandBuffer) { + std::unique_lock lock{build_mutex}; + build_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); }); + }); } if (scheduler.UpdateGraphicsPipeline(this)) { scheduler.Record([this](vk::CommandBuffer cmdbuf) { diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 4e0583157..7d14d2378 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -6,6 +6,8 @@ #include #include +#include +#include #include "common/thread_worker.h" #include "shader_recompiler/shader_info.h" @@ -63,7 +65,10 @@ private: vk::PipelineLayout pipeline_layout; vk::DescriptorUpdateTemplateKHR descriptor_update_template; vk::Pipeline pipeline; - std::atomic_flag building_flag{}; + + std::condition_variable build_condvar; + std::mutex build_mutex; + std::atomic_bool is_built{false}; }; } // namespace Vulkan -- cgit v1.2.3 From 5b3c6d59c2c92ac388530740f8008f1b9764c14d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 3 Apr 2021 22:28:07 -0300 Subject: vk_compute_pass: Fix compute passes --- src/video_core/renderer_vulkan/vk_compute_pass.cpp | 39 ++++++++++------------ .../renderer_vulkan/vk_graphics_pipeline.cpp | 2 ++ src/video_core/renderer_vulkan/vk_rasterizer.cpp | 1 - 3 files changed, 19 insertions(+), 23 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 760857839..2cfe9d4bd 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -206,27 +206,23 @@ VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_ .codeSize = static_cast(code.size_bytes()), .pCode = code.data(), }); - /* - FIXME pipeline = device.GetLogical().CreateComputePipeline({ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, .pNext = nullptr, .flags = 0, - .stage = - { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = VK_SHADER_STAGE_COMPUTE_BIT, - .module = *module, - .pName = "main", - .pSpecializationInfo = nullptr, - }, + .stage{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = *module, + .pName = "main", + .pSpecializationInfo = nullptr, + }, .layout = *layout, .basePipelineHandle = nullptr, .basePipelineIndex = 0, }); - */ } VKComputePass::~VKComputePass() = default; @@ -262,8 +258,7 @@ std::pair Uint8Pass::Assemble(u32 num_vertices, VkBuffer const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set, - num_vertices](vk::CommandBuffer cmdbuf) { + scheduler.Record([this, buffer = staging.buffer, set, num_vertices](vk::CommandBuffer cmdbuf) { static constexpr u32 DISPATCH_SIZE = 1024; static constexpr VkMemoryBarrier WRITE_BARRIER{ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, @@ -271,8 +266,8 @@ std::pair Uint8Pass::Assemble(u32 num_vertices, VkBuffer .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, }; - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {}); cmdbuf.Dispatch(Common::DivCeil(num_vertices, DISPATCH_SIZE), 1, 1); cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, WRITE_BARRIER); @@ -319,8 +314,8 @@ std::pair QuadIndexedPass::Assemble( const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set, - num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) { + scheduler.Record([this, buffer = staging.buffer, set, num_tri_vertices, base_vertex, + index_shift](vk::CommandBuffer cmdbuf) { static constexpr u32 DISPATCH_SIZE = 1024; static constexpr VkMemoryBarrier WRITE_BARRIER{ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, @@ -329,9 +324,9 @@ std::pair QuadIndexedPass::Assemble( .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, }; const std::array push_constants = {base_vertex, index_shift}; - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); - cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {}); + cmdbuf.PushConstants(*layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), &push_constants); cmdbuf.Dispatch(Common::DivCeil(num_tri_vertices, DISPATCH_SIZE), 1, 1); cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 67de3cb79..a0ef0e98b 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -189,6 +189,8 @@ void GraphicsPipeline::Configure(bool is_indexed) { buffer_cache.BindHostGeometryBuffers(is_indexed); + update_descriptor_queue.Acquire(); + size_t index{}; for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { buffer_cache.BindHostStageBuffers(stage); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index f0bd4b8af..0292a1b94 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -172,7 +172,6 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { if (!pipeline) { return; } - update_descriptor_queue.Acquire(); std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; pipeline->Configure(is_indexed); -- cgit v1.2.3 From 72daa2a039d58d23b0dca035bb5f6af8b10ce97b Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Mon, 5 Apr 2021 08:56:58 +0200 Subject: shader: Fix ShadowCube declaration type, set number of pipeline threads based on hardware --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index f88ab67ae..088de7001 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include "common/bit_cast.h" @@ -607,7 +608,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, - workers(11, "yuzu:PipelineBuilder"), serialization_thread(1, "yuzu:PipelineSerialization") { + workers(std::thread::hardware_concurrency() - 1, "yuzu:PipelineBuilder"), + serialization_thread(1, "yuzu:PipelineSerialization") { const auto& float_control{device.FloatControlProperties()}; const VkDriverIdKHR driver_id{device.GetDriverID()}; base_profile = Shader::Profile{ -- cgit v1.2.3 From bfeeb23ddce9f3531a834c257bd8af05c42ed194 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 5 Apr 2021 19:15:45 -0300 Subject: vk_pipeline_cache: Fix num of pipeline workers on weird platforms --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 088de7001..25f592b8a 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -608,7 +608,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, - workers(std::thread::hardware_concurrency() - 1, "yuzu:PipelineBuilder"), + workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:PipelineBuilder"), serialization_thread(1, "yuzu:PipelineSerialization") { const auto& float_control{device.FloatControlProperties()}; const VkDriverIdKHR driver_id{device.GetDriverID()}; -- cgit v1.2.3 From 1f3eb601acdcdfa4c119cffbf36b5792147b893f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 6 Apr 2021 02:56:15 -0300 Subject: shader: Implement texture buffers --- src/video_core/renderer_vulkan/pipeline_helper.h | 10 ++++++++++ src/video_core/renderer_vulkan/vk_compute_pipeline.cpp | 15 +++++++++------ src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 14 +++++++++----- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 2 +- 4 files changed, 29 insertions(+), 12 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index eebe5d569..decf0d32c 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -93,6 +93,9 @@ public: for ([[maybe_unused]] const auto& desc : info.texture_descriptors) { Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage); } + for (const auto& desc : info.texture_buffer_descriptors) { + Add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, stage); + } } private: @@ -146,6 +149,8 @@ inline VideoCommon::ImageViewType CastType(Shader::TextureType type) { case Shader::TextureType::ColorArrayCube: case Shader::TextureType::ShadowArrayCube: return VideoCommon::ImageViewType::CubeArray; + case Shader::TextureType::Buffer: + break; } UNREACHABLE_MSG("Invalid texture type {}", type); return {}; @@ -161,6 +166,11 @@ inline void PushImageDescriptors(const Shader::Info& info, const VkSampler* samp update_descriptor_queue.AddSampledImage(vk_image_view, sampler); ++index; } + for (const auto& desc : info.texture_buffer_descriptors) { + ImageView& image_view{texture_cache.GetImageView(image_view_ids[index])}; + update_descriptor_queue.AddTexelBuffer(image_view.BufferView()); + ++index; + } } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 0bb5b852d..9922cbd0f 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -93,20 +93,23 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, const auto& launch_desc{kepler_compute.launch_description}; const auto& cbufs{launch_desc.const_buffer_config}; const bool via_header_index{launch_desc.linked_tsc}; - for (const auto& desc : info.texture_descriptors) { - const u32 cbuf_index{desc.cbuf_index}; - const u32 cbuf_offset{desc.cbuf_offset}; + const auto read_handle{[&](u32 cbuf_index, u32 cbuf_offset) { ASSERT(((launch_desc.const_buffer_enable_mask >> cbuf_index) & 1) != 0); - const GPUVAddr addr{cbufs[cbuf_index].Address() + cbuf_offset}; const u32 raw_handle{gpu_memory.Read(addr)}; - - const TextureHandle handle(raw_handle, via_header_index); + return TextureHandle(raw_handle, via_header_index); + }}; + for (const auto& desc : info.texture_descriptors) { + const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; image_view_indices.push_back(handle.image); Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); samplers.push_back(sampler->Handle()); } + for (const auto& desc : info.texture_buffer_descriptors) { + const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; + image_view_indices.push_back(handle.image); + } const std::span indices_span(image_view_indices.data(), image_view_indices.size()); texture_cache.FillComputeImageViews(indices_span, image_view_ids); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index a0ef0e98b..afdd8b371 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -169,19 +169,23 @@ void GraphicsPipeline::Configure(bool is_indexed) { ++index; } const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers}; - for (const auto& desc : info.texture_descriptors) { - const u32 cbuf_index{desc.cbuf_index}; - const u32 cbuf_offset{desc.cbuf_offset}; + const auto read_handle{[&](u32 cbuf_index, u32 cbuf_offset) { ASSERT(cbufs[cbuf_index].enabled); const GPUVAddr addr{cbufs[cbuf_index].address + cbuf_offset}; const u32 raw_handle{gpu_memory.Read(addr)}; - - const TextureHandle handle(raw_handle, via_header_index); + return TextureHandle(raw_handle, via_header_index); + }}; + for (const auto& desc : info.texture_descriptors) { + const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; image_view_indices.push_back(handle.image); Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.sampler)}; samplers.push_back(sampler->Handle()); } + for (const auto& desc : info.texture_buffer_descriptors) { + const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; + image_view_indices.push_back(handle.image); + } } const std::span indices_span(image_view_indices.data(), image_view_indices.size()); buffer_cache.UpdateGraphicsBuffers(is_indexed); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 25f592b8a..23bf84a92 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -212,7 +212,7 @@ protected: case Tegra::Texture::TextureType::Texture2DArray: return Shader::TextureType::ColorArray2D; case Tegra::Texture::TextureType::Texture1DBuffer: - throw Shader::NotImplementedException("Texture buffer"); + return Shader::TextureType::Buffer; case Tegra::Texture::TextureType::TextureCubeArray: return Shader::TextureType::ColorArrayCube; default: -- cgit v1.2.3 From e9a91bc5cc2c39b476ba8946f66930f5ab5608b2 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 6 Apr 2021 20:14:55 -0300 Subject: shader: Interact texture buffers with buffer cache --- src/video_core/buffer_cache/buffer_cache.h | 138 +++++++++++++++++++++ src/video_core/renderer_opengl/gl_buffer_cache.h | 1 + .../renderer_opengl/gl_texture_cache.cpp | 4 + src/video_core/renderer_opengl/gl_texture_cache.h | 2 + src/video_core/renderer_vulkan/pipeline_helper.h | 26 ++-- src/video_core/renderer_vulkan/vk_buffer_cache.cpp | 57 ++++++--- src/video_core/renderer_vulkan/vk_buffer_cache.h | 18 +++ .../renderer_vulkan/vk_compute_pipeline.cpp | 30 +++-- .../renderer_vulkan/vk_graphics_pipeline.cpp | 31 +++-- .../renderer_vulkan/vk_texture_cache.cpp | 63 ++-------- src/video_core/renderer_vulkan/vk_texture_cache.h | 30 ++--- src/video_core/texture_cache/image_view_base.cpp | 9 ++ src/video_core/texture_cache/image_view_base.h | 1 + src/video_core/texture_cache/texture_cache.h | 13 +- 14 files changed, 304 insertions(+), 119 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 7373cb62d..6701aab82 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -31,6 +31,7 @@ #include "video_core/engines/maxwell_3d.h" #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" +#include "video_core/surface.h" #include "video_core/texture_cache/slot_vector.h" #include "video_core/texture_cache/types.h" @@ -42,11 +43,14 @@ MICROPROFILE_DECLARE(GPU_DownloadMemory); using BufferId = SlotId; +using VideoCore::Surface::PixelFormat; + constexpr u32 NUM_VERTEX_BUFFERS = 32; constexpr u32 NUM_TRANSFORM_FEEDBACK_BUFFERS = 4; constexpr u32 NUM_GRAPHICS_UNIFORM_BUFFERS = 18; constexpr u32 NUM_COMPUTE_UNIFORM_BUFFERS = 8; constexpr u32 NUM_STORAGE_BUFFERS = 16; +constexpr u32 NUM_TEXTURE_BUFFERS = 16; constexpr u32 NUM_STAGES = 5; using namespace Common::Literals; @@ -66,6 +70,7 @@ class BufferCache { P::HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT; static constexpr bool NEEDS_BIND_UNIFORM_INDEX = P::NEEDS_BIND_UNIFORM_INDEX; static constexpr bool NEEDS_BIND_STORAGE_INDEX = P::NEEDS_BIND_STORAGE_INDEX; + static constexpr bool NEEDS_BIND_TEXTURE_BUFFER_INDEX = P::NEEDS_BIND_TEXTURE_BUFFER_INDEX; static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS; static constexpr BufferId NULL_BUFFER_ID{0}; @@ -96,6 +101,10 @@ class BufferCache { BufferId buffer_id; }; + struct TextureBufferBinding : Binding { + PixelFormat format; + }; + static constexpr Binding NULL_BINDING{ .cpu_addr = 0, .size = 0, @@ -142,11 +151,21 @@ public: void BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset, bool is_written); + void UnbindGraphicsTextureBuffers(size_t stage); + + void BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, u32 size, + PixelFormat format); + void UnbindComputeStorageBuffers(); void BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset, bool is_written); + void UnbindComputeTextureBuffers(); + + void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, + PixelFormat format); + void FlushCachedWrites(); /// Return true when there are uncommitted buffers to be downloaded @@ -254,12 +273,16 @@ private: void BindHostGraphicsStorageBuffers(size_t stage); + void BindHostGraphicsTextureBuffers(size_t stage); + void BindHostTransformFeedbackBuffers(); void BindHostComputeUniformBuffers(); void BindHostComputeStorageBuffers(); + void BindHostComputeTextureBuffers(); + void DoUpdateGraphicsBuffers(bool is_indexed); void DoUpdateComputeBuffers(); @@ -274,6 +297,8 @@ private: void UpdateStorageBuffers(size_t stage); + void UpdateTextureBuffers(size_t stage); + void UpdateTransformFeedbackBuffers(); void UpdateTransformFeedbackBuffer(u32 index); @@ -282,6 +307,8 @@ private: void UpdateComputeStorageBuffers(); + void UpdateComputeTextureBuffers(); + void MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size); [[nodiscard]] BufferId FindBuffer(VAddr cpu_addr, u32 size); @@ -323,6 +350,9 @@ private: [[nodiscard]] Binding StorageBufferBinding(GPUVAddr ssbo_addr) const; + [[nodiscard]] TextureBufferBinding GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size, + PixelFormat format); + [[nodiscard]] std::span ImmediateBufferWithData(VAddr cpu_addr, size_t size); [[nodiscard]] std::span ImmediateBuffer(size_t wanted_capacity); @@ -347,10 +377,12 @@ private: std::array vertex_buffers; std::array, NUM_STAGES> uniform_buffers; std::array, NUM_STAGES> storage_buffers; + std::array, NUM_STAGES> texture_buffers; std::array transform_feedback_buffers; std::array compute_uniform_buffers; std::array compute_storage_buffers; + std::array compute_texture_buffers; std::array enabled_uniform_buffers{}; u32 enabled_compute_uniform_buffers = 0; @@ -360,6 +392,9 @@ private: u32 enabled_compute_storage_buffers = 0; u32 written_compute_storage_buffers = 0; + std::array enabled_texture_buffers{}; + u32 enabled_compute_texture_buffers = 0; + std::array fast_bound_uniform_buffers{}; std::array uniform_cache_hits{}; @@ -619,6 +654,7 @@ void BufferCache

::BindHostStageBuffers(size_t stage) { MICROPROFILE_SCOPE(GPU_BindUploadBuffers); BindHostGraphicsUniformBuffers(stage); BindHostGraphicsStorageBuffers(stage); + BindHostGraphicsTextureBuffers(stage); } template @@ -626,6 +662,7 @@ void BufferCache

::BindHostComputeBuffers() { MICROPROFILE_SCOPE(GPU_BindUploadBuffers); BindHostComputeUniformBuffers(); BindHostComputeStorageBuffers(); + BindHostComputeTextureBuffers(); } template @@ -660,6 +697,18 @@ void BufferCache

::BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, storage_buffers[stage][ssbo_index] = StorageBufferBinding(ssbo_addr); } +template +void BufferCache

::UnbindGraphicsTextureBuffers(size_t stage) { + enabled_texture_buffers[stage] = 0; +} + +template +void BufferCache

::BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, + u32 size, PixelFormat format) { + enabled_texture_buffers[stage] |= 1U << tbo_index; + texture_buffers[stage][tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); +} + template void BufferCache

::UnbindComputeStorageBuffers() { enabled_compute_storage_buffers = 0; @@ -680,6 +729,18 @@ void BufferCache

::BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, compute_storage_buffers[ssbo_index] = StorageBufferBinding(ssbo_addr); } +template +void BufferCache

::UnbindComputeTextureBuffers() { + enabled_compute_texture_buffers = 0; +} + +template +void BufferCache

::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, + PixelFormat format) { + enabled_compute_texture_buffers |= 1U << tbo_index; + compute_texture_buffers[tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); +} + template void BufferCache

::FlushCachedWrites() { for (const BufferId buffer_id : cached_write_buffer_ids) { @@ -988,6 +1049,26 @@ void BufferCache

::BindHostGraphicsStorageBuffers(size_t stage) { }); } +template +void BufferCache

::BindHostGraphicsTextureBuffers(size_t stage) { + u32 binding_index = 0; + ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) { + const TextureBufferBinding& binding = texture_buffers[stage][index]; + Buffer& buffer = slot_buffers[binding.buffer_id]; + const u32 size = binding.size; + SynchronizeBuffer(buffer, binding.cpu_addr, size); + + const u32 offset = buffer.Offset(binding.cpu_addr); + const PixelFormat format = binding.format; + if constexpr (NEEDS_BIND_TEXTURE_BUFFER_INDEX) { + runtime.BindTextureBuffer(binding_index, buffer, offset, size, format); + ++binding_index; + } else { + runtime.BindTextureBuffer(buffer, offset, size, format); + } + }); +} + template void BufferCache

::BindHostTransformFeedbackBuffers() { if (maxwell3d.regs.tfb_enabled == 0) { @@ -1050,6 +1131,26 @@ void BufferCache

::BindHostComputeStorageBuffers() { }); } +template +void BufferCache

::BindHostComputeTextureBuffers() { + u32 binding_index = 0; + ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) { + const TextureBufferBinding& binding = compute_texture_buffers[index]; + Buffer& buffer = slot_buffers[binding.buffer_id]; + const u32 size = binding.size; + SynchronizeBuffer(buffer, binding.cpu_addr, size); + + const u32 offset = buffer.Offset(binding.cpu_addr); + const PixelFormat format = binding.format; + if constexpr (NEEDS_BIND_TEXTURE_BUFFER_INDEX) { + runtime.BindTextureBuffer(binding_index, buffer, offset, size, format); + ++binding_index; + } else { + runtime.BindTextureBuffer(buffer, offset, size, format); + } + }); +} + template void BufferCache

::DoUpdateGraphicsBuffers(bool is_indexed) { if (is_indexed) { @@ -1060,6 +1161,7 @@ void BufferCache

::DoUpdateGraphicsBuffers(bool is_indexed) { for (size_t stage = 0; stage < NUM_STAGES; ++stage) { UpdateUniformBuffers(stage); UpdateStorageBuffers(stage); + UpdateTextureBuffers(stage); } } @@ -1067,6 +1169,7 @@ template void BufferCache

::DoUpdateComputeBuffers() { UpdateComputeUniformBuffers(); UpdateComputeStorageBuffers(); + UpdateComputeTextureBuffers(); } template @@ -1166,6 +1269,14 @@ void BufferCache

::UpdateStorageBuffers(size_t stage) { }); } +template +void BufferCache

::UpdateTextureBuffers(size_t stage) { + ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) { + Binding& binding = texture_buffers[stage][index]; + binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); + }); +} + template void BufferCache

::UpdateTransformFeedbackBuffers() { if (maxwell3d.regs.tfb_enabled == 0) { @@ -1227,6 +1338,14 @@ void BufferCache

::UpdateComputeStorageBuffers() { }); } +template +void BufferCache

::UpdateComputeTextureBuffers() { + ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) { + Binding& binding = compute_texture_buffers[index]; + binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); + }); +} + template void BufferCache

::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size) { Buffer& buffer = slot_buffers[buffer_id]; @@ -1581,6 +1700,25 @@ typename BufferCache

::Binding BufferCache

::StorageBufferBinding(GPUVAddr s return binding; } +template +typename BufferCache

::TextureBufferBinding BufferCache

::GetTextureBufferBinding( + GPUVAddr gpu_addr, u32 size, PixelFormat format) { + const std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); + TextureBufferBinding binding; + if (!cpu_addr || size == 0) { + binding.cpu_addr = 0; + binding.size = 0; + binding.buffer_id = NULL_BUFFER_ID; + binding.format = PixelFormat::Invalid; + } else { + binding.cpu_addr = *cpu_addr; + binding.size = size; + binding.buffer_id = BufferId{}; + binding.format = format; + } + return binding; +} + template std::span BufferCache

::ImmediateBufferWithData(VAddr cpu_addr, size_t size) { u8* const base_pointer = cpu_memory.GetPointer(cpu_addr); diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index fe91aa452..ddcce5e97 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -155,6 +155,7 @@ struct BufferCacheParams { static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = true; static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true; static constexpr bool NEEDS_BIND_STORAGE_INDEX = true; + static constexpr bool NEEDS_BIND_TEXTURE_BUFFER_INDEX = true; static constexpr bool USE_MEMORY_MAPS = false; }; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index ff0f03e99..a8bf84218 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -1016,6 +1016,10 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI default_handle = Handle(info.type); } +ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, + const VideoCommon::ImageViewInfo& view_info) + : VideoCommon::ImageViewBase{info, view_info} {} + ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageParams& params) : VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {} diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index cf3b789e3..817b0e650 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -182,6 +182,8 @@ class ImageView : public VideoCommon::ImageViewBase { public: explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&); + explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, + const VideoCommon::ImageViewInfo& view_info); explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&); [[nodiscard]] GLuint Handle(ImageViewType query_type) const noexcept { diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index decf0d32c..cff93cc60 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -24,7 +24,8 @@ struct TextureHandle { [[likely]] if (via_header_index) { image = data; sampler = data; - } else { + } + else { const Tegra::Texture::TextureHandle handle{data}; image = handle.tic_id; sampler = via_header_index ? image : handle.tsc_id.Value(); @@ -90,12 +91,12 @@ public: for ([[maybe_unused]] const auto& desc : info.storage_buffers_descriptors) { Add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stage); } + for ([[maybe_unused]] const auto& desc : info.texture_buffer_descriptors) { + Add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, stage); + } for ([[maybe_unused]] const auto& desc : info.texture_descriptors) { Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage); } - for (const auto& desc : info.texture_buffer_descriptors) { - Add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, stage); - } } private: @@ -156,20 +157,15 @@ inline VideoCommon::ImageViewType CastType(Shader::TextureType type) { return {}; } -inline void PushImageDescriptors(const Shader::Info& info, const VkSampler* samplers, - const ImageId* image_view_ids, TextureCache& texture_cache, - VKUpdateDescriptorQueue& update_descriptor_queue, size_t& index) { +inline void PushImageDescriptors(const Shader::Info& info, const VkSampler*& samplers, + const ImageId*& image_view_ids, TextureCache& texture_cache, + VKUpdateDescriptorQueue& update_descriptor_queue) { + image_view_ids += info.texture_buffer_descriptors.size(); for (const auto& desc : info.texture_descriptors) { - const VkSampler sampler{samplers[index]}; - ImageView& image_view{texture_cache.GetImageView(image_view_ids[index])}; + const VkSampler sampler{*(samplers++)}; + ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))}; const VkImageView vk_image_view{image_view.Handle(CastType(desc.type))}; update_descriptor_queue.AddSampledImage(vk_image_view, sampler); - ++index; - } - for (const auto& desc : info.texture_buffer_descriptors) { - ImageView& image_view{texture_cache.GetImageView(image_view_ids[index])}; - update_descriptor_queue.AddTexelBuffer(image_view.BufferView()); - ++index; } } diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 0def1e769..cdda56ab1 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -67,25 +67,50 @@ Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params) Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, VAddr cpu_addr_, u64 size_bytes_) - : VideoCommon::BufferBase(rasterizer_, cpu_addr_, size_bytes_) { - buffer = runtime.device.GetLogical().CreateBuffer(VkBufferCreateInfo{ - .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .size = SizeBytes(), - .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | - VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | - VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | - VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | - VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, - .sharingMode = VK_SHARING_MODE_EXCLUSIVE, - .queueFamilyIndexCount = 0, - .pQueueFamilyIndices = nullptr, - }); + : VideoCommon::BufferBase(rasterizer_, cpu_addr_, size_bytes_), + device{&runtime.device}, + buffer{device->GetLogical().CreateBuffer({ + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = SizeBytes(), + .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | + VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | + VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + })}, + commit{runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal)} { if (runtime.device.HasDebuggingToolAttached()) { buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str()); } - commit = runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal); +} + +VkBufferView Buffer::View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format) { + const auto it{std::ranges::find_if(views, [offset, size, format](const BufferView& view) { + return offset == view.offset && size == view.size && format == view.format; + })}; + if (it != views.end()) { + return *it->handle; + } + views.push_back({ + .offset = offset, + .size = size, + .format = format, + .handle = device->GetLogical().CreateBufferView({ + .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .buffer = *buffer, + .format = MaxwellToVK::SurfaceFormat(*device, FormatType::Buffer, false, format).format, + .offset = offset, + .range = size, + }), + }); + return *views.back().handle; } BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_allocator_, diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 3bb81d5b3..ea17406dc 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -9,6 +9,7 @@ #include "video_core/renderer_vulkan/vk_compute_pass.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" +#include "video_core/surface.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -26,6 +27,8 @@ public: explicit Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, VAddr cpu_addr_, u64 size_bytes_); + [[nodiscard]] VkBufferView View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format); + [[nodiscard]] VkBuffer Handle() const noexcept { return *buffer; } @@ -35,8 +38,17 @@ public: } private: + struct BufferView { + u32 offset; + u32 size; + VideoCore::Surface::PixelFormat format; + vk::BufferView handle; + }; + + const Device* device{}; vk::Buffer buffer; MemoryCommit commit; + std::vector views; }; class BufferCacheRuntime { @@ -87,6 +99,11 @@ public: BindBuffer(buffer, offset, size); } + void BindTextureBuffer(Buffer& buffer, u32 offset, u32 size, + VideoCore::Surface::PixelFormat format) { + update_descriptor_queue.AddTexelBuffer(buffer.View(offset, size, format)); + } + private: void BindBuffer(VkBuffer buffer, u32 offset, u32 size) { update_descriptor_queue.AddBuffer(buffer, offset, size); @@ -123,6 +140,7 @@ struct BufferCacheParams { static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = false; static constexpr bool NEEDS_BIND_UNIFORM_INDEX = false; static constexpr bool NEEDS_BIND_STORAGE_INDEX = false; + static constexpr bool NEEDS_BIND_TEXTURE_BUFFER_INDEX = false; static constexpr bool USE_MEMORY_MAPS = true; }; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 9922cbd0f..ac47b1f3c 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -80,8 +80,6 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, desc.is_written); ++ssbo_index; } - buffer_cache.UpdateComputeBuffers(); - buffer_cache.BindHostComputeBuffers(); texture_cache.SynchronizeComputeDescriptors(); @@ -99,6 +97,10 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, const u32 raw_handle{gpu_memory.Read(addr)}; return TextureHandle(raw_handle, via_header_index); }}; + for (const auto& desc : info.texture_buffer_descriptors) { + const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; + image_view_indices.push_back(handle.image); + } for (const auto& desc : info.texture_descriptors) { const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; image_view_indices.push_back(handle.image); @@ -106,16 +108,26 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); samplers.push_back(sampler->Handle()); } - for (const auto& desc : info.texture_buffer_descriptors) { - const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; - image_view_indices.push_back(handle.image); - } const std::span indices_span(image_view_indices.data(), image_view_indices.size()); texture_cache.FillComputeImageViews(indices_span, image_view_ids); - size_t image_index{}; - PushImageDescriptors(info, samplers.data(), image_view_ids.data(), texture_cache, - update_descriptor_queue, image_index); + buffer_cache.UnbindComputeTextureBuffers(); + ImageId* texture_buffer_ids{image_view_ids.data()}; + size_t index{}; + for (const auto& desc : info.texture_buffer_descriptors) { + ASSERT(desc.count == 1); + ImageView& image_view = texture_cache.GetImageView(*texture_buffer_ids); + buffer_cache.BindComputeTextureBuffer(index, image_view.GpuAddr(), image_view.BufferSize(), + image_view.format); + ++texture_buffer_ids; + ++index; + } + buffer_cache.UpdateComputeBuffers(); + buffer_cache.BindHostComputeBuffers(); + + const VkSampler* samplers_it{samplers.data()}; + const ImageId* views_it{image_view_ids.data()}; + PushImageDescriptors(info, samplers_it, views_it, texture_cache, update_descriptor_queue); if (!is_built.load(std::memory_order::relaxed)) { // Wait for the pipeline to be built diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index afdd8b371..893258b4a 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -175,6 +175,10 @@ void GraphicsPipeline::Configure(bool is_indexed) { const u32 raw_handle{gpu_memory.Read(addr)}; return TextureHandle(raw_handle, via_header_index); }}; + for (const auto& desc : info.texture_buffer_descriptors) { + const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; + image_view_indices.push_back(handle.image); + } for (const auto& desc : info.texture_descriptors) { const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; image_view_indices.push_back(handle.image); @@ -182,24 +186,37 @@ void GraphicsPipeline::Configure(bool is_indexed) { Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.sampler)}; samplers.push_back(sampler->Handle()); } + } + const std::span indices_span(image_view_indices.data(), image_view_indices.size()); + texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); + + ImageId* texture_buffer_index{image_view_ids.data()}; + for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { + const Shader::Info& info{stage_infos[stage]}; + buffer_cache.UnbindGraphicsTextureBuffers(stage); + size_t index{}; for (const auto& desc : info.texture_buffer_descriptors) { - const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; - image_view_indices.push_back(handle.image); + ASSERT(desc.count == 1); + ImageView& image_view = texture_cache.GetImageView(*texture_buffer_index); + buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(), + image_view.BufferSize(), image_view.format); + ++index; + ++texture_buffer_index; } + texture_buffer_index += info.texture_descriptors.size(); } - const std::span indices_span(image_view_indices.data(), image_view_indices.size()); buffer_cache.UpdateGraphicsBuffers(is_indexed); - texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); buffer_cache.BindHostGeometryBuffers(is_indexed); update_descriptor_queue.Acquire(); - size_t index{}; + const VkSampler* samplers_it{samplers.data()}; + const ImageId* views_it{image_view_ids.data()}; for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { buffer_cache.BindHostStageBuffers(stage); - PushImageDescriptors(stage_infos[stage], samplers.data(), image_view_ids.data(), - texture_cache, update_descriptor_queue, index); + PushImageDescriptors(stage_infos[stage], samplers_it, views_it, texture_cache, + update_descriptor_queue); } texture_cache.UpdateRenderTargets(false); scheduler.RequestRenderpass(texture_cache.GetFramebuffer()); diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 1bbc542a1..e42b091c5 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -15,10 +15,10 @@ #include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/vk_compute_pass.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" +#include "video_core/renderer_vulkan/vk_render_pass_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" -#include "video_core/renderer_vulkan/vk_render_pass_cache.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -162,25 +162,6 @@ constexpr VkBorderColor ConvertBorderColor(const std::array& color) { return device.GetLogical().CreateImage(MakeImageCreateInfo(device, info)); } -[[nodiscard]] vk::Buffer MakeBuffer(const Device& device, const ImageInfo& info) { - if (info.type != ImageType::Buffer) { - return vk::Buffer{}; - } - const size_t bytes_per_block = VideoCore::Surface::BytesPerBlock(info.format); - return device.GetLogical().CreateBuffer(VkBufferCreateInfo{ - .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .size = info.size.width * bytes_per_block, - .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | - VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | - VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT, - .sharingMode = VK_SHARING_MODE_EXCLUSIVE, - .queueFamilyIndexCount = 0, - .pQueueFamilyIndices = nullptr, - }); -} - [[nodiscard]] VkImageAspectFlags ImageAspectMask(PixelFormat format) { switch (VideoCore::Surface::GetFormatType(format)) { case VideoCore::Surface::SurfaceType::ColorTexture: @@ -813,13 +794,9 @@ u64 TextureCacheRuntime::GetDeviceLocalMemory() const { Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_) : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime.scheduler}, - image(MakeImage(runtime.device, info)), buffer(MakeBuffer(runtime.device, info)), + image(MakeImage(runtime.device, info)), + commit(runtime.memory_allocator.Commit(image, MemoryUsage::DeviceLocal)), aspect_mask(ImageAspectMask(info.format)) { - if (image) { - commit = runtime.memory_allocator.Commit(image, MemoryUsage::DeviceLocal); - } else { - commit = runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal); - } if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) { if (Settings::values.accelerate_astc.GetValue()) { flags |= VideoCommon::ImageFlagBits::AcceleratedUpload; @@ -828,11 +805,7 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_ } } if (runtime.device.HasDebuggingToolAttached()) { - if (image) { - image.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); - } else { - buffer.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); - } + image.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); } static constexpr VkImageViewUsageCreateInfo storage_image_view_usage_create_info{ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO, @@ -884,19 +857,6 @@ void Image::UploadMemory(const StagingBufferRef& map, std::span copies) { - // TODO: Move this to another API - scheduler->RequestOutsideRenderPassOperationContext(); - std::vector vk_copies = TransformBufferCopies(copies, map.offset); - const VkBuffer src_buffer = map.buffer; - const VkBuffer dst_buffer = *buffer; - scheduler->Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) { - // TODO: Barriers - cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies); - }); -} - void Image::DownloadMemory(const StagingBufferRef& map, std::span copies) { std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask); scheduler->RequestOutsideRenderPassOperationContext(); @@ -1032,19 +992,16 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI UNIMPLEMENTED(); break; case VideoCommon::ImageViewType::Buffer: - buffer_view = device->GetLogical().CreateBufferView(VkBufferViewCreateInfo{ - .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .buffer = image.Buffer(), - .format = format_info.format, - .offset = 0, // TODO: Redesign buffer cache to support this - .range = image.guest_size_bytes, - }); + UNREACHABLE(); break; } } +ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, + const VideoCommon::ImageViewInfo& view_info, GPUVAddr gpu_addr_) + : VideoCommon::ImageViewBase{info, view_info}, gpu_addr{gpu_addr_}, + buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {} + ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams& params) : VideoCommon::ImageViewBase{params} {} diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 189ee5a68..498e76a1c 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -41,9 +41,9 @@ struct TextureCacheRuntime { void Finish(); - [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size); + StagingBufferRef UploadStagingBuffer(size_t size); - [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size); + StagingBufferRef DownloadStagingBuffer(size_t size); void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, const Region2D& dst_region, const Region2D& src_region, @@ -54,7 +54,7 @@ struct TextureCacheRuntime { void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view); - [[nodiscard]] bool CanAccelerateImageUpload(Image&) const noexcept { + bool CanAccelerateImageUpload(Image&) const noexcept { return false; } @@ -92,8 +92,6 @@ public: void UploadMemory(const StagingBufferRef& map, std::span copies); - void UploadMemory(const StagingBufferRef& map, std::span copies); - void DownloadMemory(const StagingBufferRef& map, std::span copies); @@ -101,10 +99,6 @@ public: return *image; } - [[nodiscard]] VkBuffer Buffer() const noexcept { - return *buffer; - } - [[nodiscard]] VkImageAspectFlags AspectMask() const noexcept { return aspect_mask; } @@ -121,7 +115,6 @@ public: private: VKScheduler* scheduler; vk::Image image; - vk::Buffer buffer; MemoryCommit commit; vk::ImageView image_view; std::vector storage_image_views; @@ -132,6 +125,8 @@ private: class ImageView : public VideoCommon::ImageViewBase { public: explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&); + explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo&, + const VideoCommon::ImageViewInfo&, GPUVAddr); explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&); [[nodiscard]] VkImageView DepthView(); @@ -142,10 +137,6 @@ public: return *image_views[static_cast(query_type)]; } - [[nodiscard]] VkBufferView BufferView() const noexcept { - return *buffer_view; - } - [[nodiscard]] VkImage ImageHandle() const noexcept { return image_handle; } @@ -162,6 +153,14 @@ public: return samples; } + [[nodiscard]] GPUVAddr GpuAddr() const noexcept { + return gpu_addr; + } + + [[nodiscard]] u32 BufferSize() const noexcept { + return buffer_size; + } + private: [[nodiscard]] vk::ImageView MakeDepthStencilView(VkImageAspectFlags aspect_mask); @@ -169,11 +168,12 @@ private: std::array image_views; vk::ImageView depth_view; vk::ImageView stencil_view; - vk::BufferView buffer_view; VkImage image_handle = VK_NULL_HANDLE; VkImageView render_target = VK_NULL_HANDLE; PixelFormat image_format = PixelFormat::Invalid; VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT; + GPUVAddr gpu_addr = 0; + u32 buffer_size = 0; }; class ImageAlloc : public VideoCommon::ImageAllocBase {}; diff --git a/src/video_core/texture_cache/image_view_base.cpp b/src/video_core/texture_cache/image_view_base.cpp index e8d632f9e..450becbeb 100644 --- a/src/video_core/texture_cache/image_view_base.cpp +++ b/src/video_core/texture_cache/image_view_base.cpp @@ -36,6 +36,15 @@ ImageViewBase::ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_i } } +ImageViewBase::ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_info) + : format{info.format}, type{ImageViewType::Buffer}, size{ + .width = info.size.width, + .height = 1, + .depth = 1, + } { + ASSERT_MSG(view_info.type == ImageViewType::Buffer, "Expected texture buffer"); +} + ImageViewBase::ImageViewBase(const NullImageParams&) {} } // namespace VideoCommon diff --git a/src/video_core/texture_cache/image_view_base.h b/src/video_core/texture_cache/image_view_base.h index 73954167e..903f715c5 100644 --- a/src/video_core/texture_cache/image_view_base.h +++ b/src/video_core/texture_cache/image_view_base.h @@ -27,6 +27,7 @@ DECLARE_ENUM_FLAG_OPERATORS(ImageViewFlagBits) struct ImageViewBase { explicit ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info, ImageId image_id); + explicit ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_info); explicit ImageViewBase(const NullImageParams&); [[nodiscard]] bool IsBuffer() const noexcept { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 85ce06d56..5e8d99482 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -968,9 +968,6 @@ void TextureCache

::UploadImageContents(Image& image, StagingBuffer& staging) auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data); ConvertImage(unswizzled_data, image.info, mapped_span, copies); image.UploadMemory(staging, copies); - } else if (image.info.type == ImageType::Buffer) { - const std::array copies{UploadBufferCopy(gpu_memory, gpu_addr, image, mapped_span)}; - image.UploadMemory(staging, copies); } else { const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span); image.UploadMemory(staging, copies); @@ -993,7 +990,12 @@ ImageViewId TextureCache

::FindImageView(const TICEntry& config) { template ImageViewId TextureCache

::CreateImageView(const TICEntry& config) { const ImageInfo info(config); - const GPUVAddr image_gpu_addr = config.Address() - config.BaseLayer() * info.layer_stride; + if (info.type == ImageType::Buffer) { + const ImageViewInfo view_info(config, 0); + return slot_image_views.insert(runtime, info, view_info, config.Address()); + } + const u32 layer_offset = config.BaseLayer() * info.layer_stride; + const GPUVAddr image_gpu_addr = config.Address() - layer_offset; const ImageId image_id = FindOrInsertImage(info, image_gpu_addr); if (!image_id) { return NULL_IMAGE_VIEW_ID; @@ -1801,6 +1803,9 @@ void TextureCache

::PrepareImageView(ImageViewId image_view_id, bool is_modifi return; } const ImageViewBase& image_view = slot_image_views[image_view_id]; + if (image_view.IsBuffer()) { + return; + } PrepareImage(image_view.image_id, is_modification, invalidate); } -- cgit v1.2.3 From 0bb85f6a753c769266c95c4ba146b25b9eaaaffd Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Mon, 5 Apr 2021 22:25:22 -0400 Subject: shader_recompiler,video_core: Cleanup some GCC and Clang errors Mostly fixing unused *, implicit conversion, braced scalar init, fpermissive, and some others. Some Clang errors likely remain in video_core, and std::ranges is still a pertinent issue in shader_recompiler shader_recompiler: cmake: Force bracket depth to 1024 on Clang Increases the maximum fold expression depth thread_worker: Include condition_variable Don't use list initializers in control flow Co-authored-by: ReinUsesLisp --- src/video_core/CMakeLists.txt | 2 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 21 +++++++++++---------- .../renderer_vulkan/vk_pipeline_cache.cpp | 5 +++-- .../renderer_vulkan/vk_render_pass_cache.cpp | 2 -- src/video_core/renderer_vulkan/vk_texture_cache.cpp | 2 +- 5 files changed, 16 insertions(+), 16 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 71b07c194..3166a69dc 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -203,7 +203,7 @@ add_library(video_core STATIC create_target_directory_groups(video_core) target_link_libraries(video_core PUBLIC common core) -target_link_libraries(video_core PRIVATE glad shader_recompiler xbyak) +target_link_libraries(video_core PUBLIC glad shader_recompiler xbyak) if (YUZU_USE_BUNDLED_FFMPEG AND NOT WIN32) add_dependencies(video_core ffmpeg-build) diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 893258b4a..57e2d569c 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -447,7 +447,7 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa .dynamicStateCount = static_cast(dynamic_states.size()), .pDynamicStates = dynamic_states.data(), }; - const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ + [[maybe_unused]] const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, .pNext = nullptr, .requiredSubgroupSize = GuestWarpSize, @@ -457,15 +457,16 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa if (!spv_modules[stage]) { continue; } - [[maybe_unused]] auto& stage_ci = shader_stages.emplace_back(VkPipelineShaderStageCreateInfo{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = MaxwellToVK::ShaderStage(static_cast(stage)), - .module = *spv_modules[stage], - .pName = "main", - .pSpecializationInfo = nullptr, - }); + [[maybe_unused]] auto& stage_ci = + shader_stages.emplace_back(VkPipelineShaderStageCreateInfo{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = MaxwellToVK::ShaderStage(static_cast(stage)), + .module = *spv_modules[stage], + .pName = "main", + .pSpecializationInfo = nullptr, + }); /* if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(stage_ci.stage)) { stage_ci.pNext = &subgroup_size_ci; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 23bf84a92..fcebb8f6e 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -47,7 +47,7 @@ auto MakeSpan(Container& container) { return std::span(container.data(), container.size()); } -u64 MakeCbufKey(u32 index, u32 offset) { +static u64 MakeCbufKey(u32 index, u32 offset) { return (static_cast(index) << 32) | offset; } @@ -638,6 +638,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, .generic_input_types{}, + .fixed_state_point_size{}, }; } @@ -748,7 +749,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( Shader::Environment& env{*envs[env_index]}; ++env_index; - const u32 cfg_offset{env.StartAddress() + sizeof(Shader::ProgramHeader)}; + const u32 cfg_offset{static_cast(env.StartAddress() + sizeof(Shader::ProgramHeader))}; Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset); programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); } diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp index b2dcd74ab..991afe521 100644 --- a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp @@ -2,8 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#pragma once - #include #include diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index e42b091c5..70328680d 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -279,7 +279,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array& color) { }; } -[[nodiscard]] std::vector TransformBufferCopies( +[[maybe_unused]] [[nodiscard]] std::vector TransformBufferCopies( std::span copies, size_t buffer_offset) { std::vector result(copies.size()); std::ranges::transform( -- cgit v1.2.3 From 5bfcafa0a21619e8cd82c38ec51e260838f42042 Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Sat, 10 Apr 2021 02:32:55 -0400 Subject: shader: Address feedback + clang format --- src/video_core/renderer_vulkan/vk_compute_pipeline.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 104e6cc85..8efdc2926 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -4,9 +4,9 @@ #pragma once -#include -#include #include +#include +#include #include "common/common_types.h" #include "common/thread_worker.h" -- cgit v1.2.3 From 7cb2ab358517d95ebcd35c94c72b9e91762906c3 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 9 Apr 2021 01:45:39 -0300 Subject: shader: Implement SULD and SUST --- src/video_core/renderer_vulkan/blit_image.cpp | 4 +- src/video_core/renderer_vulkan/pipeline_helper.h | 43 +++----- .../renderer_vulkan/vk_compute_pipeline.cpp | 4 + .../renderer_vulkan/vk_graphics_pipeline.cpp | 4 + src/video_core/renderer_vulkan/vk_rasterizer.cpp | 2 +- .../renderer_vulkan/vk_texture_cache.cpp | 112 +++++++++++++++++---- src/video_core/renderer_vulkan/vk_texture_cache.h | 23 +++-- src/video_core/texture_cache/texture_cache.h | 8 ++ 8 files changed, 135 insertions(+), 65 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index 6c0d5c7f4..39fe9289b 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -361,7 +361,7 @@ void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, const ImageV .operation = operation, }; const VkPipelineLayout layout = *one_texture_pipeline_layout; - const VkImageView src_view = src_image_view.Handle(ImageViewType::e2D); + const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D); const VkSampler sampler = is_linear ? *linear_sampler : *nearest_sampler; const VkPipeline pipeline = FindOrEmplacePipeline(key); const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); @@ -435,7 +435,7 @@ void BlitImageHelper::ConvertR16ToD16(const Framebuffer* dst_framebuffer, void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, const ImageView& src_image_view) { const VkPipelineLayout layout = *one_texture_pipeline_layout; - const VkImageView src_view = src_image_view.Handle(ImageViewType::e2D); + const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D); const VkSampler sampler = *nearest_sampler; const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); const VkExtent2D extent{ diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index cff93cc60..d2c3f11c1 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -97,6 +97,9 @@ public: for ([[maybe_unused]] const auto& desc : info.texture_descriptors) { Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage); } + for (const auto& desc : info.image_descriptors) { + Add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, stage); + } } private: @@ -127,36 +130,6 @@ private: size_t offset{}; }; -inline VideoCommon::ImageViewType CastType(Shader::TextureType type) { - switch (type) { - case Shader::TextureType::Color1D: - case Shader::TextureType::Shadow1D: - return VideoCommon::ImageViewType::e1D; - case Shader::TextureType::ColorArray1D: - case Shader::TextureType::ShadowArray1D: - return VideoCommon::ImageViewType::e1DArray; - case Shader::TextureType::Color2D: - case Shader::TextureType::Shadow2D: - return VideoCommon::ImageViewType::e2D; - case Shader::TextureType::ColorArray2D: - case Shader::TextureType::ShadowArray2D: - return VideoCommon::ImageViewType::e2DArray; - case Shader::TextureType::Color3D: - case Shader::TextureType::Shadow3D: - return VideoCommon::ImageViewType::e3D; - case Shader::TextureType::ColorCube: - case Shader::TextureType::ShadowCube: - return VideoCommon::ImageViewType::Cube; - case Shader::TextureType::ColorArrayCube: - case Shader::TextureType::ShadowArrayCube: - return VideoCommon::ImageViewType::CubeArray; - case Shader::TextureType::Buffer: - break; - } - UNREACHABLE_MSG("Invalid texture type {}", type); - return {}; -} - inline void PushImageDescriptors(const Shader::Info& info, const VkSampler*& samplers, const ImageId*& image_view_ids, TextureCache& texture_cache, VKUpdateDescriptorQueue& update_descriptor_queue) { @@ -164,9 +137,17 @@ inline void PushImageDescriptors(const Shader::Info& info, const VkSampler*& sam for (const auto& desc : info.texture_descriptors) { const VkSampler sampler{*(samplers++)}; ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))}; - const VkImageView vk_image_view{image_view.Handle(CastType(desc.type))}; + const VkImageView vk_image_view{image_view.Handle(desc.type)}; update_descriptor_queue.AddSampledImage(vk_image_view, sampler); } + for (const auto& desc : info.image_descriptors) { + ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))}; + if (desc.is_written) { + texture_cache.MarkModification(image_view.image_id); + } + const VkImageView vk_image_view{image_view.StorageView(desc.type, desc.format)}; + update_descriptor_queue.AddImage(vk_image_view); + } } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index ac47b1f3c..3d690f335 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -108,6 +108,10 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); samplers.push_back(sampler->Handle()); } + for (const auto& desc : info.image_descriptors) { + const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; + image_view_indices.push_back(handle.image); + } const std::span indices_span(image_view_indices.data(), image_view_indices.size()); texture_cache.FillComputeImageViews(indices_span, image_view_ids); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 57e2d569c..23c01f24e 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -186,6 +186,10 @@ void GraphicsPipeline::Configure(bool is_indexed) { Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.sampler)}; samplers.push_back(sampler->Handle()); } + for (const auto& desc : info.image_descriptors) { + const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; + image_view_indices.push_back(handle.image); + } } const std::span indices_span(image_view_indices.data(), image_view_indices.size()); texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 0292a1b94..2ba44330f 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -494,7 +494,7 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, if (!image_view) { return false; } - screen_info.image_view = image_view->Handle(VideoCommon::ImageViewType::e2D); + screen_info.image_view = image_view->Handle(Shader::TextureType::Color2D); screen_info.width = image_view->size.width; screen_info.height = image_view->size.height; screen_info.is_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format); diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 70328680d..8e029bcb3 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -215,6 +215,30 @@ constexpr VkBorderColor ConvertBorderColor(const std::array& color) { return VK_COMPONENT_SWIZZLE_ZERO; } +[[nodiscard]] VkImageViewType ImageViewType(Shader::TextureType type) { + switch (type) { + case Shader::TextureType::Color1D: + return VK_IMAGE_VIEW_TYPE_1D; + case Shader::TextureType::Color2D: + return VK_IMAGE_VIEW_TYPE_2D; + case Shader::TextureType::ColorCube: + return VK_IMAGE_VIEW_TYPE_CUBE; + case Shader::TextureType::Color3D: + return VK_IMAGE_VIEW_TYPE_3D; + case Shader::TextureType::ColorArray1D: + return VK_IMAGE_VIEW_TYPE_1D_ARRAY; + case Shader::TextureType::ColorArray2D: + return VK_IMAGE_VIEW_TYPE_2D_ARRAY; + case Shader::TextureType::ColorArrayCube: + return VK_IMAGE_VIEW_TYPE_CUBE_ARRAY; + case Shader::TextureType::Buffer: + UNREACHABLE_MSG("Texture buffers can't be image views"); + return VK_IMAGE_VIEW_TYPE_1D; + } + UNREACHABLE_MSG("Invalid image view type={}", type); + return VK_IMAGE_VIEW_TYPE_2D; +} + [[nodiscard]] VkImageViewType ImageViewType(VideoCommon::ImageViewType type) { switch (type) { case VideoCommon::ImageViewType::e1D: @@ -232,7 +256,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array& color) { case VideoCommon::ImageViewType::CubeArray: return VK_IMAGE_VIEW_TYPE_CUBE_ARRAY; case VideoCommon::ImageViewType::Rect: - LOG_WARNING(Render_Vulkan, "Unnormalized image view type not supported"); + UNIMPLEMENTED_MSG("Rect image view"); return VK_IMAGE_VIEW_TYPE_2D; case VideoCommon::ImageViewType::Buffer: UNREACHABLE_MSG("Texture buffers can't be image views"); @@ -539,6 +563,28 @@ struct RangedBarrierRange { } }; +[[nodiscard]] VkFormat Format(Shader::ImageFormat format) { + switch (format) { + case Shader::ImageFormat::Typeless: + break; + case Shader::ImageFormat::R8_SINT: + return VK_FORMAT_R8_SINT; + case Shader::ImageFormat::R8_UINT: + return VK_FORMAT_R8_UINT; + case Shader::ImageFormat::R16_UINT: + return VK_FORMAT_R16_UINT; + case Shader::ImageFormat::R16_SINT: + return VK_FORMAT_R16_SINT; + case Shader::ImageFormat::R32_UINT: + return VK_FORMAT_R32_UINT; + case Shader::ImageFormat::R32G32_UINT: + return VK_FORMAT_R32G32_UINT; + case Shader::ImageFormat::R32G32B32A32_UINT: + return VK_FORMAT_R32G32B32A32_UINT; + } + UNREACHABLE_MSG("Invalid image format={}", format); + return VK_FORMAT_R32_UINT; +} } // Anonymous namespace void TextureCacheRuntime::Finish() { @@ -577,7 +623,7 @@ void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst return; } } - ASSERT(src.ImageFormat() == dst.ImageFormat()); + ASSERT(src.format == dst.format); ASSERT(!(is_dst_msaa && !is_src_msaa)); ASSERT(operation == Fermi2D::Operation::SrcCopy); @@ -915,8 +961,9 @@ void Image::DownloadMemory(const StagingBufferRef& map, std::span swizzle{ SwizzleSource::R, @@ -954,39 +1001,39 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI }, .subresourceRange = MakeSubresourceRange(aspect_mask, info.range), }; - const auto create = [&](VideoCommon::ImageViewType view_type, std::optional num_layers) { + const auto create = [&](TextureType tex_type, std::optional num_layers) { VkImageViewCreateInfo ci{create_info}; - ci.viewType = ImageViewType(view_type); + ci.viewType = ImageViewType(tex_type); if (num_layers) { ci.subresourceRange.layerCount = *num_layers; } vk::ImageView handle = device->GetLogical().CreateImageView(ci); if (device->HasDebuggingToolAttached()) { - handle.SetObjectNameEXT(VideoCommon::Name(*this, view_type).c_str()); + handle.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); } - image_views[static_cast(view_type)] = std::move(handle); + image_views[static_cast(tex_type)] = std::move(handle); }; switch (info.type) { case VideoCommon::ImageViewType::e1D: case VideoCommon::ImageViewType::e1DArray: - create(VideoCommon::ImageViewType::e1D, 1); - create(VideoCommon::ImageViewType::e1DArray, std::nullopt); - render_target = Handle(VideoCommon::ImageViewType::e1DArray); + create(TextureType::Color1D, 1); + create(TextureType::ColorArray1D, std::nullopt); + render_target = Handle(TextureType::ColorArray1D); break; case VideoCommon::ImageViewType::e2D: case VideoCommon::ImageViewType::e2DArray: - create(VideoCommon::ImageViewType::e2D, 1); - create(VideoCommon::ImageViewType::e2DArray, std::nullopt); - render_target = Handle(VideoCommon::ImageViewType::e2DArray); + create(TextureType::Color2D, 1); + create(TextureType::ColorArray2D, std::nullopt); + render_target = Handle(Shader::TextureType::ColorArray2D); break; case VideoCommon::ImageViewType::e3D: - create(VideoCommon::ImageViewType::e3D, std::nullopt); - render_target = Handle(VideoCommon::ImageViewType::e3D); + create(TextureType::Color3D, std::nullopt); + render_target = Handle(Shader::TextureType::Color3D); break; case VideoCommon::ImageViewType::Cube: case VideoCommon::ImageViewType::CubeArray: - create(VideoCommon::ImageViewType::Cube, 6); - create(VideoCommon::ImageViewType::CubeArray, std::nullopt); + create(TextureType::ColorCube, 6); + create(TextureType::ColorArrayCube, std::nullopt); break; case VideoCommon::ImageViewType::Rect: UNIMPLEMENTED(); @@ -1009,7 +1056,8 @@ VkImageView ImageView::DepthView() { if (depth_view) { return *depth_view; } - depth_view = MakeDepthStencilView(VK_IMAGE_ASPECT_DEPTH_BIT); + const auto& info = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format); + depth_view = MakeView(info.format, VK_IMAGE_ASPECT_DEPTH_BIT); return *depth_view; } @@ -1017,18 +1065,38 @@ VkImageView ImageView::StencilView() { if (stencil_view) { return *stencil_view; } - stencil_view = MakeDepthStencilView(VK_IMAGE_ASPECT_STENCIL_BIT); + const auto& info = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format); + stencil_view = MakeView(info.format, VK_IMAGE_ASPECT_STENCIL_BIT); return *stencil_view; } -vk::ImageView ImageView::MakeDepthStencilView(VkImageAspectFlags aspect_mask) { +VkImageView ImageView::StorageView(Shader::TextureType texture_type, + Shader::ImageFormat image_format) { + if (image_format == Shader::ImageFormat::Typeless) { + return Handle(texture_type); + } + const bool is_signed{image_format == Shader::ImageFormat::R8_SINT || + image_format == Shader::ImageFormat::R16_SINT}; + if (!storage_views) { + storage_views = std::make_unique(); + } + auto& views{is_signed ? storage_views->signeds : storage_views->unsigneds}; + auto& view{views[static_cast(texture_type)]}; + if (view) { + return *view; + } + view = MakeView(Format(image_format), VK_IMAGE_ASPECT_COLOR_BIT); + return *view; +} + +vk::ImageView ImageView::MakeView(VkFormat vk_format, VkImageAspectFlags aspect_mask) { return device->GetLogical().CreateImageView({ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .pNext = nullptr, .flags = 0, .image = image_handle, .viewType = ImageViewType(type), - .format = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format).format, + .format = vk_format, .components{ .r = VK_COMPONENT_SWIZZLE_IDENTITY, .g = VK_COMPONENT_SWIZZLE_IDENTITY, diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 498e76a1c..0b73d55f8 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -7,6 +7,7 @@ #include #include +#include "shader_recompiler/shader_info.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/texture_cache/texture_cache.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h" @@ -133,8 +134,11 @@ public: [[nodiscard]] VkImageView StencilView(); - [[nodiscard]] VkImageView Handle(VideoCommon::ImageViewType query_type) const noexcept { - return *image_views[static_cast(query_type)]; + [[nodiscard]] VkImageView StorageView(Shader::TextureType texture_type, + Shader::ImageFormat image_format); + + [[nodiscard]] VkImageView Handle(Shader::TextureType texture_type) const noexcept { + return *image_views[static_cast(texture_type)]; } [[nodiscard]] VkImage ImageHandle() const noexcept { @@ -145,10 +149,6 @@ public: return render_target; } - [[nodiscard]] PixelFormat ImageFormat() const noexcept { - return image_format; - } - [[nodiscard]] VkSampleCountFlagBits Samples() const noexcept { return samples; } @@ -162,15 +162,20 @@ public: } private: - [[nodiscard]] vk::ImageView MakeDepthStencilView(VkImageAspectFlags aspect_mask); + struct StorageViews { + std::array signeds; + std::array unsigneds; + }; + + [[nodiscard]] vk::ImageView MakeView(VkFormat vk_format, VkImageAspectFlags aspect_mask); const Device* device = nullptr; - std::array image_views; + std::array image_views; + std::unique_ptr storage_views; vk::ImageView depth_view; vk::ImageView stencil_view; VkImage image_handle = VK_NULL_HANDLE; VkImageView render_target = VK_NULL_HANDLE; - PixelFormat image_format = PixelFormat::Invalid; VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT; GPUVAddr gpu_addr = 0; u32 buffer_size = 0; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 5e8d99482..255b07cf8 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -117,6 +117,9 @@ public: /// Return a reference to the given image view id [[nodiscard]] ImageView& GetImageView(ImageViewId id) noexcept; + /// Mark an image as modified from the GPU + void MarkModification(ImageId id) noexcept; + /// Fill image_view_ids with the graphics images in indices void FillGraphicsImageViews(std::span indices, std::span image_view_ids); @@ -526,6 +529,11 @@ typename P::ImageView& TextureCache

::GetImageView(ImageViewId id) noexcept { return slot_image_views[id]; } +template +void TextureCache

::MarkModification(ImageId id) noexcept { + MarkModification(slot_images[id]); +} + template void TextureCache

::FillGraphicsImageViews(std::span indices, std::span image_view_ids) { -- cgit v1.2.3 From e5e79648cfa3ac9d30c00bccf4252cd0dc93bccc Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 9 Jul 2021 18:16:56 -0300 Subject: pipeline_helper: Add missing [[maybe_unused]] --- src/video_core/renderer_vulkan/pipeline_helper.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index d2c3f11c1..a39459b2e 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -97,7 +97,7 @@ public: for ([[maybe_unused]] const auto& desc : info.texture_descriptors) { Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage); } - for (const auto& desc : info.image_descriptors) { + for ([[maybe_unused]] const auto& desc : info.image_descriptors) { Add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, stage); } } -- cgit v1.2.3 From 1030b612a36f6b44e3b25215039748b01cfb9b8c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 11 Apr 2021 02:12:56 -0300 Subject: vk_rasterizer: Request outside render pass execution context for compute --- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 2ba44330f..7df169c85 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -279,6 +279,7 @@ void RasterizerVulkan::DispatchCompute() { const auto& qmd{kepler_compute.launch_description}; const std::array dim{qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z}; + scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([dim](vk::CommandBuffer cmdbuf) { cmdbuf.Dispatch(dim[0], dim[1], dim[2]); }); } -- cgit v1.2.3 From ab543f18213133b3076b81f30df386d5cb470e49 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 11 Apr 2021 02:37:03 -0300 Subject: spirv: Guard against typeless image reads on unsupported devices --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index fcebb8f6e..25dbefd5c 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -635,6 +635,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, .support_vote = true, .support_viewport_index_layer_non_geometry = device.IsExtShaderViewportIndexLayerSupported(), + .support_typeless_image_loads = device.IsFormatlessImageLoadSupported(), .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, .generic_input_types{}, -- cgit v1.2.3 From 479ca00071ccaab6ca9ac28daf375e1ed15dc447 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 11 Apr 2021 02:50:30 -0300 Subject: nsight_aftermath_tracker: Report used shaders to Nsight Aftermath --- src/video_core/renderer_vulkan/vk_compute_pass.cpp | 1 + .../renderer_vulkan/vk_pipeline_cache.cpp | 4 +++- .../vulkan_common/nsight_aftermath_tracker.cpp | 5 ++--- .../vulkan_common/nsight_aftermath_tracker.h | 21 +++++++++++---------- src/video_core/vulkan_common/vulkan_device.cpp | 2 +- src/video_core/vulkan_common/vulkan_device.h | 3 ++- 6 files changed, 20 insertions(+), 16 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 2cfe9d4bd..ec9866605 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -206,6 +206,7 @@ VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_ .codeSize = static_cast(code.size_bytes()), .pCode = code.data(), }); + device.SaveShader(code); pipeline = device.GetLogical().CreateComputePipeline({ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, .pNext = nullptr, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 25dbefd5c..f699a9bdf 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -770,6 +770,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( const Shader::Profile profile{MakeProfile(key, program.stage)}; const std::vector code{EmitSPIRV(profile, program, binding)}; + device.SaveShader(code); modules[stage_index] = BuildShader(device, code); if (device.HasDebuggingToolAttached()) { const std::string name{fmt::format("{:016x}{:016x}", key.unique_hashes[index][0], @@ -846,7 +847,8 @@ std::unique_ptr PipelineCache::CreateComputePipeline( Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; u32 binding{0}; - std::vector code{EmitSPIRV(base_profile, program, binding)}; + const std::vector code{EmitSPIRV(base_profile, program, binding)}; + device.SaveShader(code); vk::ShaderModule spv_module{BuildShader(device, code)}; if (device.HasDebuggingToolAttached()) { const auto name{fmt::format("{:016x}{:016x}", key.unique_hash[0], key.unique_hash[1])}; diff --git a/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp b/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp index 758c038ba..209cb1e0a 100644 --- a/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp +++ b/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp @@ -73,12 +73,11 @@ NsightAftermathTracker::~NsightAftermathTracker() { } } -void NsightAftermathTracker::SaveShader(const std::vector& spirv) const { +void NsightAftermathTracker::SaveShader(std::span spirv) const { if (!initialized) { return; } - - std::vector spirv_copy = spirv; + std::vector spirv_copy(spirv.begin(), spirv.end()); GFSDK_Aftermath_SpirvCode shader; shader.pData = spirv_copy.data(); shader.size = static_cast(spirv_copy.size() * 4); diff --git a/src/video_core/vulkan_common/nsight_aftermath_tracker.h b/src/video_core/vulkan_common/nsight_aftermath_tracker.h index 4fe2b14d9..eae1891dd 100644 --- a/src/video_core/vulkan_common/nsight_aftermath_tracker.h +++ b/src/video_core/vulkan_common/nsight_aftermath_tracker.h @@ -6,6 +6,7 @@ #include #include +#include #include #include @@ -33,7 +34,7 @@ public: NsightAftermathTracker(NsightAftermathTracker&&) = delete; NsightAftermathTracker& operator=(NsightAftermathTracker&&) = delete; - void SaveShader(const std::vector& spirv) const; + void SaveShader(std::span spirv) const; private: #ifdef HAS_NSIGHT_AFTERMATH @@ -61,21 +62,21 @@ private: bool initialized = false; Common::DynamicLibrary dl; - PFN_GFSDK_Aftermath_DisableGpuCrashDumps GFSDK_Aftermath_DisableGpuCrashDumps; - PFN_GFSDK_Aftermath_EnableGpuCrashDumps GFSDK_Aftermath_EnableGpuCrashDumps; - PFN_GFSDK_Aftermath_GetShaderDebugInfoIdentifier GFSDK_Aftermath_GetShaderDebugInfoIdentifier; - PFN_GFSDK_Aftermath_GetShaderHashSpirv GFSDK_Aftermath_GetShaderHashSpirv; - PFN_GFSDK_Aftermath_GpuCrashDump_CreateDecoder GFSDK_Aftermath_GpuCrashDump_CreateDecoder; - PFN_GFSDK_Aftermath_GpuCrashDump_DestroyDecoder GFSDK_Aftermath_GpuCrashDump_DestroyDecoder; - PFN_GFSDK_Aftermath_GpuCrashDump_GenerateJSON GFSDK_Aftermath_GpuCrashDump_GenerateJSON; - PFN_GFSDK_Aftermath_GpuCrashDump_GetJSON GFSDK_Aftermath_GpuCrashDump_GetJSON; + PFN_GFSDK_Aftermath_DisableGpuCrashDumps GFSDK_Aftermath_DisableGpuCrashDumps{}; + PFN_GFSDK_Aftermath_EnableGpuCrashDumps GFSDK_Aftermath_EnableGpuCrashDumps{}; + PFN_GFSDK_Aftermath_GetShaderDebugInfoIdentifier GFSDK_Aftermath_GetShaderDebugInfoIdentifier{}; + PFN_GFSDK_Aftermath_GetShaderHashSpirv GFSDK_Aftermath_GetShaderHashSpirv{}; + PFN_GFSDK_Aftermath_GpuCrashDump_CreateDecoder GFSDK_Aftermath_GpuCrashDump_CreateDecoder{}; + PFN_GFSDK_Aftermath_GpuCrashDump_DestroyDecoder GFSDK_Aftermath_GpuCrashDump_DestroyDecoder{}; + PFN_GFSDK_Aftermath_GpuCrashDump_GenerateJSON GFSDK_Aftermath_GpuCrashDump_GenerateJSON{}; + PFN_GFSDK_Aftermath_GpuCrashDump_GetJSON GFSDK_Aftermath_GpuCrashDump_GetJSON{}; #endif }; #ifndef HAS_NSIGHT_AFTERMATH inline NsightAftermathTracker::NsightAftermathTracker() = default; inline NsightAftermathTracker::~NsightAftermathTracker() = default; -inline void NsightAftermathTracker::SaveShader(const std::vector&) const {} +inline void NsightAftermathTracker::SaveShader(std::span) const {} #endif } // namespace Vulkan diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index c027598ba..78bb741bc 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -493,7 +493,7 @@ void Device::ReportLoss() const { std::this_thread::sleep_for(std::chrono::seconds{15}); } -void Device::SaveShader(const std::vector& spirv) const { +void Device::SaveShader(std::span spirv) const { if (nsight_aftermath_tracker) { nsight_aftermath_tracker->SaveShader(spirv); } diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index ac2311e7e..adf62a707 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include "common/common_types.h" @@ -43,7 +44,7 @@ public: void ReportLoss() const; /// Reports a shader to Nsight Aftermath. - void SaveShader(const std::vector& spirv) const; + void SaveShader(std::span spirv) const; /// Returns the name of the VkDriverId reported from Vulkan. std::string GetDriverName() const; -- cgit v1.2.3 From 3db2b3effa953ae66457b7a19b419fc4db2c4801 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 11 Apr 2021 02:07:02 -0400 Subject: shader: Implement ATOM/S and RED --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 1 + src/video_core/vulkan_common/vulkan_device.cpp | 14 ++++++++++++++ src/video_core/vulkan_common/vulkan_device.h | 6 ++++++ 3 files changed, 21 insertions(+) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index f699a9bdf..b953d694b 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -637,6 +637,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, device.IsExtShaderViewportIndexLayerSupported(), .support_typeless_image_loads = device.IsFormatlessImageLoadSupported(), .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), + .support_int64_atomics = device.IsExtShaderAtomicInt64Supported(), .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, .generic_input_types{}, .fixed_state_point_size{}, diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 78bb741bc..911dfed44 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -681,6 +681,7 @@ std::vector Device::LoadExtensions(bool requires_surface) { bool has_ext_transform_feedback{}; bool has_ext_custom_border_color{}; bool has_ext_extended_dynamic_state{}; + bool has_ext_shader_atomic_int64{}; for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) { const auto test = [&](std::optional> status, const char* name, bool push) { @@ -710,6 +711,7 @@ std::vector Device::LoadExtensions(bool requires_surface) { test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false); test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false); + test(has_ext_shader_atomic_int64, VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME, false); test(has_khr_workgroup_memory_explicit_layout, VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME, false); if (Settings::values.renderer_debug) { @@ -760,6 +762,18 @@ std::vector Device::LoadExtensions(bool requires_surface) { } else { is_warp_potentially_bigger = true; } + if (has_ext_shader_atomic_int64) { + VkPhysicalDeviceShaderAtomicInt64Features atomic_int64; + atomic_int64.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT; + atomic_int64.pNext = nullptr; + features.pNext = &atomic_int64; + physical.GetFeatures2KHR(features); + + if (atomic_int64.shaderBufferInt64Atomics && atomic_int64.shaderSharedInt64Atomics) { + extensions.push_back(VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME); + ext_shader_atomic_int64 = true; + } + } if (has_ext_transform_feedback) { VkPhysicalDeviceTransformFeedbackFeaturesEXT tfb_features; tfb_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT; diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index adf62a707..4e6d13308 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -229,6 +229,11 @@ public: return ext_shader_stencil_export; } + /// Returns true if the device supports VK_KHR_shader_atomic_int64. + bool IsExtShaderAtomicInt64Supported() const { + return ext_shader_atomic_int64; + } + /// Returns true when a known debugging tool is attached. bool HasDebuggingToolAttached() const { return has_renderdoc || has_nsight_graphics; @@ -320,6 +325,7 @@ private: bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. + bool ext_shader_atomic_int64{}; ///< Support for VK_KHR_shader_atomic_int64. bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. bool has_renderdoc{}; ///< Has RenderDoc attached bool has_nsight_graphics{}; ///< Has Nsight Graphics attached -- cgit v1.2.3 From a33014022ed86c27ba4faa243fa6d0a69df75564 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 11 Apr 2021 20:57:37 -0300 Subject: pipeline_helper: Simplify descriptor objects initialization --- src/video_core/renderer_vulkan/pipeline_helper.h | 58 ++++++++++-------------- 1 file changed, 25 insertions(+), 33 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index a39459b2e..e2167dc4b 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -85,42 +85,34 @@ public: } void Add(const Shader::Info& info, VkShaderStageFlags stage) { - for ([[maybe_unused]] const auto& desc : info.constant_buffer_descriptors) { - Add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, stage); - } - for ([[maybe_unused]] const auto& desc : info.storage_buffers_descriptors) { - Add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stage); - } - for ([[maybe_unused]] const auto& desc : info.texture_buffer_descriptors) { - Add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, stage); - } - for ([[maybe_unused]] const auto& desc : info.texture_descriptors) { - Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage); - } - for ([[maybe_unused]] const auto& desc : info.image_descriptors) { - Add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, stage); - } + Add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, stage, info.constant_buffer_descriptors.size()); + Add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stage, info.storage_buffers_descriptors.size()); + Add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, stage, info.texture_buffer_descriptors.size()); + Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage, info.texture_descriptors.size()); + Add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, stage, info.image_descriptors.size()); } private: - void Add(VkDescriptorType type, VkShaderStageFlags stage) { - bindings.push_back({ - .binding = binding, - .descriptorType = type, - .descriptorCount = 1, - .stageFlags = stage, - .pImmutableSamplers = nullptr, - }); - entries.push_back(VkDescriptorUpdateTemplateEntryKHR{ - .dstBinding = binding, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = type, - .offset = offset, - .stride = sizeof(DescriptorUpdateEntry), - }); - ++binding; - offset += sizeof(DescriptorUpdateEntry); + void Add(VkDescriptorType type, VkShaderStageFlags stage, size_t num) { + for (size_t i = 0; i < num; ++i) { + bindings.push_back({ + .binding = binding, + .descriptorType = type, + .descriptorCount = 1, + .stageFlags = stage, + .pImmutableSamplers = nullptr, + }); + entries.push_back({ + .dstBinding = binding, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = type, + .offset = offset, + .stride = sizeof(DescriptorUpdateEntry), + }); + ++binding; + offset += sizeof(DescriptorUpdateEntry); + } } const vk::Device* device{}; -- cgit v1.2.3 From f263760c5a3aff771123b32b15677e1f7a089640 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 12 Apr 2021 19:41:22 -0300 Subject: shader: Implement geometry shaders --- .../renderer_vulkan/vk_pipeline_cache.cpp | 56 +++++++++++++++++++--- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 7 ++- 2 files changed, 56 insertions(+), 7 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index b953d694b..f49add208 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -769,7 +769,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( const size_t stage_index{index - 1}; infos[stage_index] = &program.info; - const Shader::Profile profile{MakeProfile(key, program.stage)}; + const Shader::Profile profile{MakeProfile(key, program)}; const std::vector code{EmitSPIRV(profile, program, binding)}; device.SaveShader(code); modules[stage_index] = BuildShader(device, code); @@ -880,15 +880,59 @@ static Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexA } Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key, - Shader::Stage stage) { + const Shader::IR::Program& program) { Shader::Profile profile{base_profile}; - if (stage == Shader::Stage::VertexB) { - profile.convert_depth_mode = key.state.ndc_minus_one_to_one != 0; - if (key.state.topology == Maxwell::PrimitiveTopology::Points) { - profile.fixed_state_point_size = Common::BitCast(key.state.point_size); + + const Shader::Stage stage{program.stage}; + const bool has_geometry{key.unique_hashes[4] != u128{}}; + const bool gl_ndc{key.state.ndc_minus_one_to_one != 0}; + const float point_size{Common::BitCast(key.state.point_size)}; + switch (stage) { + case Shader::Stage::VertexB: + if (!has_geometry) { + if (key.state.topology == Maxwell::PrimitiveTopology::Points) { + profile.fixed_state_point_size = point_size; + } + profile.convert_depth_mode = gl_ndc; } std::ranges::transform(key.state.attributes, profile.generic_input_types.begin(), &CastAttributeType); + break; + case Shader::Stage::Geometry: + if (program.output_topology == Shader::OutputTopology::PointList) { + profile.fixed_state_point_size = point_size; + } + profile.convert_depth_mode = gl_ndc; + break; + default: + break; + } + switch (key.state.topology) { + case Maxwell::PrimitiveTopology::Points: + profile.input_topology = Shader::InputTopology::Points; + break; + case Maxwell::PrimitiveTopology::Lines: + case Maxwell::PrimitiveTopology::LineLoop: + case Maxwell::PrimitiveTopology::LineStrip: + profile.input_topology = Shader::InputTopology::Lines; + break; + case Maxwell::PrimitiveTopology::Triangles: + case Maxwell::PrimitiveTopology::TriangleStrip: + case Maxwell::PrimitiveTopology::TriangleFan: + case Maxwell::PrimitiveTopology::Quads: + case Maxwell::PrimitiveTopology::QuadStrip: + case Maxwell::PrimitiveTopology::Polygon: + case Maxwell::PrimitiveTopology::Patches: + profile.input_topology = Shader::InputTopology::Triangles; + break; + case Maxwell::PrimitiveTopology::LinesAdjacency: + case Maxwell::PrimitiveTopology::LineStripAdjacency: + profile.input_topology = Shader::InputTopology::LinesAdjacency; + break; + case Maxwell::PrimitiveTopology::TrianglesAdjacency: + case Maxwell::PrimitiveTopology::TriangleStripAdjacency: + profile.input_topology = Shader::InputTopology::TrianglesAdjacency; + break; } return profile; } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 343ea1554..8b6839966 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -33,6 +33,10 @@ namespace Core { class System; } +namespace Shader::IR { +struct Program; +} + namespace Vulkan { using Maxwell = Tegra::Engines::Maxwell3D::Regs; @@ -160,7 +164,8 @@ private: Shader::Environment& env, bool build_in_parallel); - Shader::Profile MakeProfile(const GraphicsPipelineCacheKey& key, Shader::Stage stage); + Shader::Profile MakeProfile(const GraphicsPipelineCacheKey& key, + const Shader::IR::Program& program); Tegra::GPU& gpu; Tegra::Engines::Maxwell3D& maxwell3d; -- cgit v1.2.3 From fa75b9b0626c8e118e27207dd1e82e2f415fc0bc Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 13 Apr 2021 05:32:21 -0300 Subject: spirv: Rework storage buffers and shader memory --- src/video_core/vulkan_common/vulkan_device.cpp | 29 +++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 911dfed44..87cfe6312 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -44,6 +44,7 @@ constexpr std::array REQUIRED_EXTENSIONS{ VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME, VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME, VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME, + VK_KHR_VARIABLE_POINTERS_EXTENSION_NAME, VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME, VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME, VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, @@ -313,6 +314,14 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR }; SetNext(next, host_query_reset); + VkPhysicalDeviceVariablePointerFeaturesKHR variable_pointers{ + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES_KHR, + .pNext = nullptr, + .variablePointersStorageBuffer = VK_TRUE, + .variablePointers = VK_TRUE, + }; + SetNext(next, variable_pointers); + VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT demote{ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT, .pNext = nullptr, @@ -399,6 +408,17 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state"); } + VkPhysicalDeviceShaderAtomicInt64FeaturesKHR atomic_int64; + if (ext_shader_atomic_int64) { + atomic_int64 = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES_KHR, + .pNext = nullptr, + .shaderBufferInt64Atomics = VK_TRUE, + .shaderSharedInt64Atomics = VK_TRUE, + }; + SetNext(next, atomic_int64); + } + VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR workgroup_layout; if (khr_workgroup_memory_explicit_layout) { workgroup_layout = { @@ -624,9 +644,13 @@ void Device::CheckSuitability(bool requires_swapchain) const { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT; demote.pNext = nullptr; + VkPhysicalDeviceVariablePointerFeaturesKHR variable_pointers{}; + variable_pointers.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES_KHR; + variable_pointers.pNext = &demote; + VkPhysicalDeviceRobustness2FeaturesEXT robustness2{}; robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT; - robustness2.pNext = &demote; + robustness2.pNext = &variable_pointers; VkPhysicalDeviceFeatures2KHR features2{}; features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; @@ -654,6 +678,9 @@ void Device::CheckSuitability(bool requires_swapchain) const { std::make_pair(features.shaderStorageImageWriteWithoutFormat, "shaderStorageImageWriteWithoutFormat"), std::make_pair(demote.shaderDemoteToHelperInvocation, "shaderDemoteToHelperInvocation"), + std::make_pair(variable_pointers.variablePointers, "variablePointers"), + std::make_pair(variable_pointers.variablePointersStorageBuffer, + "variablePointersStorageBuffer"), std::make_pair(robustness2.robustBufferAccess2, "robustBufferAccess2"), std::make_pair(robustness2.robustImageAccess2, "robustImageAccess2"), std::make_pair(robustness2.nullDescriptor, "nullDescriptor"), -- cgit v1.2.3 From a83579b50a167ab9483e5058fd1c748018ef6d7c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 13 Apr 2021 16:56:22 -0300 Subject: shader: Implement early Z tests --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index f49add208..8a59a2611 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -934,6 +934,7 @@ Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key, profile.input_topology = Shader::InputTopology::TrianglesAdjacency; break; } + profile.force_early_z = key.state.early_z != 0; return profile; } -- cgit v1.2.3 From b126987c59964d81ae3705ad7ad6c0ace8714e19 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 14 Apr 2021 01:04:59 -0300 Subject: shader: Implement transform feedbacks and define file format --- .../renderer_vulkan/fixed_pipeline_state.cpp | 19 +++- .../renderer_vulkan/fixed_pipeline_state.h | 26 ++++- .../renderer_vulkan/vk_pipeline_cache.cpp | 118 ++++++++++++++++++++- 3 files changed, 156 insertions(+), 7 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index d8f683907..6a3baf837 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -52,6 +52,8 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, const u32 topology_index = static_cast(regs.draw.topology.Value()); raw1 = 0; + no_extended_dynamic_state.Assign(has_extended_dynamic_state ? 0 : 1); + xfb_enabled.Assign(regs.tfb_enabled != 0); primitive_restart_enable.Assign(regs.primitive_restart.enabled != 0 ? 1 : 0); depth_bias_enable.Assign(enabled_lut[POLYGON_OFFSET_ENABLE_LUT[topology_index]] != 0 ? 1 : 0); depth_clamp_disabled.Assign(regs.view_volume_clip_control.depth_clamp_disabled.Value()); @@ -113,10 +115,12 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, return static_cast(viewport.swizzle.raw); }); } - if (!has_extended_dynamic_state) { - no_extended_dynamic_state.Assign(1); + if (no_extended_dynamic_state != 0) { dynamic_state.Refresh(regs); } + if (xfb_enabled != 0) { + xfb_state.Refresh(regs); + } } void FixedPipelineState::BlendingAttachment::Refresh(const Maxwell& regs, size_t index) { @@ -158,6 +162,17 @@ void FixedPipelineState::BlendingAttachment::Refresh(const Maxwell& regs, size_t enable.Assign(1); } +void FixedPipelineState::TransformFeedbackState::Refresh(const Maxwell& regs) { + std::ranges::transform(regs.tfb_layouts, layouts.begin(), [](const auto& layout) { + return Layout{ + .stream = layout.stream, + .varying_count = layout.varying_count, + .stride = layout.stride, + }; + }); + varyings = regs.tfb_varying_locs; +} + void FixedPipelineState::DynamicState::Refresh(const Maxwell& regs) { u32 packed_front_face = PackFrontFace(regs.front_face); if (regs.screen_y_control.triangle_rast_flip != 0) { diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index 348f1d6ce..5568c4f72 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -130,6 +130,18 @@ struct FixedPipelineState { } }; + struct TransformFeedbackState { + struct Layout { + u32 stream; + u32 varying_count; + u32 stride; + }; + std::array layouts; + std::array, Maxwell::NumTransformFeedbackBuffers> varyings; + + void Refresh(const Maxwell& regs); + }; + struct DynamicState { union { u32 raw1; @@ -168,6 +180,7 @@ struct FixedPipelineState { union { u32 raw1; BitField<0, 1, u32> no_extended_dynamic_state; + BitField<1, 1, u32> xfb_enabled; BitField<2, 1, u32> primitive_restart_enable; BitField<3, 1, u32> depth_bias_enable; BitField<4, 1, u32> depth_clamp_disabled; @@ -199,6 +212,7 @@ struct FixedPipelineState { std::array attachments; std::array viewport_swizzles; DynamicState dynamic_state; + TransformFeedbackState xfb_state; void Refresh(Tegra::Engines::Maxwell3D& maxwell3d, bool has_extended_dynamic_state); @@ -211,8 +225,16 @@ struct FixedPipelineState { } size_t Size() const noexcept { - const size_t total_size = sizeof *this; - return total_size - (no_extended_dynamic_state != 0 ? 0 : sizeof(DynamicState)); + if (xfb_enabled != 0) { + // When transform feedback is enabled, use the whole struct + return sizeof(*this); + } else if (no_extended_dynamic_state != 0) { + // Dynamic state is enabled, we can enable more + return offsetof(FixedPipelineState, xfb_state); + } else { + // No XFB, extended dynamic state enabled + return offsetof(FixedPipelineState, dynamic_state); + } } }; static_assert(std::has_unique_object_representations_v); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 8a59a2611..de52d0f30 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -248,6 +248,10 @@ namespace { using Shader::Backend::SPIRV::EmitSPIRV; using Shader::Maxwell::TranslateProgram; +// TODO: Move this to a separate file +constexpr std::array MAGIC_NUMBER{'y', 'u', 'z', 'u', 'c', 'a', 'c', 'h'}; +constexpr u32 CACHE_VERSION{1}; + class GraphicsEnvironment final : public GenericEnvironment { public: explicit GraphicsEnvironment() = default; @@ -379,13 +383,14 @@ void SerializePipeline(const Key& key, const Envs& envs, const std::string& file try { std::ofstream file; file.exceptions(std::ifstream::failbit); - Common::FS::OpenFStream(file, filename, std::ios::binary | std::ios::app); + Common::FS::OpenFStream(file, filename, std::ios::binary | std::ios::ate | std::ios::app); if (!file.is_open()) { LOG_ERROR(Common_Filesystem, "Failed to open pipeline cache file {}", filename); return; } if (file.tellp() == 0) { - // Write header... + file.write(MAGIC_NUMBER.data(), MAGIC_NUMBER.size()) + .write(reinterpret_cast(&CACHE_VERSION), sizeof(CACHE_VERSION)); } const std::span key_span(reinterpret_cast(&key), sizeof(key)); SerializePipeline(key_span, MakeSpan(envs), file); @@ -520,8 +525,27 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading file.exceptions(std::ifstream::failbit); const auto end{file.tellg()}; file.seekg(0, std::ios::beg); - // Read header... + std::array magic_number; + u32 cache_version; + file.read(magic_number.data(), magic_number.size()) + .read(reinterpret_cast(&cache_version), sizeof(cache_version)); + if (magic_number != MAGIC_NUMBER || cache_version != CACHE_VERSION) { + file.close(); + if (Common::FS::Delete(pipeline_cache_filename)) { + if (magic_number != MAGIC_NUMBER) { + LOG_ERROR(Render_Vulkan, "Invalid pipeline cache file"); + } + if (cache_version != CACHE_VERSION) { + LOG_INFO(Render_Vulkan, "Deleting old pipeline cache"); + } + } else { + LOG_ERROR(Render_Vulkan, + "Invalid pipeline cache file and failed to delete it in \"{}\"", + pipeline_cache_filename); + } + return; + } while (file.tellg() != end) { if (stop_loading) { return; @@ -879,6 +903,88 @@ static Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexA return Shader::AttributeType::Float; } +static std::vector MakeTransformFeedbackVaryings( + const GraphicsPipelineCacheKey& key) { + static constexpr std::array VECTORS{ + 28, // gl_Position + 32, // Generic 0 + 36, // Generic 1 + 40, // Generic 2 + 44, // Generic 3 + 48, // Generic 4 + 52, // Generic 5 + 56, // Generic 6 + 60, // Generic 7 + 64, // Generic 8 + 68, // Generic 9 + 72, // Generic 10 + 76, // Generic 11 + 80, // Generic 12 + 84, // Generic 13 + 88, // Generic 14 + 92, // Generic 15 + 96, // Generic 16 + 100, // Generic 17 + 104, // Generic 18 + 108, // Generic 19 + 112, // Generic 20 + 116, // Generic 21 + 120, // Generic 22 + 124, // Generic 23 + 128, // Generic 24 + 132, // Generic 25 + 136, // Generic 26 + 140, // Generic 27 + 144, // Generic 28 + 148, // Generic 29 + 152, // Generic 30 + 156, // Generic 31 + 160, // gl_FrontColor + 164, // gl_FrontSecondaryColor + 160, // gl_BackColor + 164, // gl_BackSecondaryColor + 192, // gl_TexCoord[0] + 196, // gl_TexCoord[1] + 200, // gl_TexCoord[2] + 204, // gl_TexCoord[3] + 208, // gl_TexCoord[4] + 212, // gl_TexCoord[5] + 216, // gl_TexCoord[6] + 220, // gl_TexCoord[7] + }; + std::vector xfb(256); + for (size_t buffer = 0; buffer < Maxwell::NumTransformFeedbackBuffers; ++buffer) { + const auto& locations = key.state.xfb_state.varyings[buffer]; + const auto& layout = key.state.xfb_state.layouts[buffer]; + const u32 varying_count = layout.varying_count; + u32 highest = 0; + for (u32 offset = 0; offset < varying_count; ++offset) { + const u32 base_offset = offset; + const u8 location = locations[offset]; + + Shader::TransformFeedbackVarying varying; + varying.buffer = layout.stream; + varying.stride = layout.stride; + varying.offset = offset * 4; + varying.components = 1; + + if (std::ranges::find(VECTORS, Common::AlignDown(location, 4)) != VECTORS.end()) { + UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB"); + + const u8 base_index = location / 4; + while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) { + ++offset; + ++varying.components; + } + } + xfb[location] = varying; + highest = std::max(highest, (base_offset + varying.components) * 4); + } + UNIMPLEMENTED_IF(highest != layout.stride); + } + return xfb; +} + Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key, const Shader::IR::Program& program) { Shader::Profile profile{base_profile}; @@ -893,6 +999,9 @@ Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key, if (key.state.topology == Maxwell::PrimitiveTopology::Points) { profile.fixed_state_point_size = point_size; } + if (key.state.xfb_enabled != 0) { + profile.xfb_varyings = MakeTransformFeedbackVaryings(key); + } profile.convert_depth_mode = gl_ndc; } std::ranges::transform(key.state.attributes, profile.generic_input_types.begin(), @@ -902,6 +1011,9 @@ Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key, if (program.output_topology == Shader::OutputTopology::PointList) { profile.fixed_state_point_size = point_size; } + if (key.state.xfb_enabled != 0) { + profile.xfb_varyings = MakeTransformFeedbackVaryings(key); + } profile.convert_depth_mode = gl_ndc; break; default: -- cgit v1.2.3 From 6c512f4bffde6bd8e4dbc74ed27cc84cd7fffadb Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 14 Apr 2021 00:32:18 -0400 Subject: spirv: Implement alpha test --- .../renderer_vulkan/vk_pipeline_cache.cpp | 36 ++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index de52d0f30..80f196d0e 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -492,6 +492,37 @@ private: u32 read_lowest{}; u32 read_highest{}; }; + +Shader::CompareFunction MaxwellToCompareFunction(Maxwell::ComparisonOp comparison) { + switch (comparison) { + case Maxwell::ComparisonOp::Never: + case Maxwell::ComparisonOp::NeverOld: + return Shader::CompareFunction::Never; + case Maxwell::ComparisonOp::Less: + case Maxwell::ComparisonOp::LessOld: + return Shader::CompareFunction::Less; + case Maxwell::ComparisonOp::Equal: + case Maxwell::ComparisonOp::EqualOld: + return Shader::CompareFunction::Equal; + case Maxwell::ComparisonOp::LessEqual: + case Maxwell::ComparisonOp::LessEqualOld: + return Shader::CompareFunction::LessThanEqual; + case Maxwell::ComparisonOp::Greater: + case Maxwell::ComparisonOp::GreaterOld: + return Shader::CompareFunction::Greater; + case Maxwell::ComparisonOp::NotEqual: + case Maxwell::ComparisonOp::NotEqualOld: + return Shader::CompareFunction::NotEqual; + case Maxwell::ComparisonOp::GreaterEqual: + case Maxwell::ComparisonOp::GreaterEqualOld: + return Shader::CompareFunction::GreaterThanEqual; + case Maxwell::ComparisonOp::Always: + case Maxwell::ComparisonOp::AlwaysOld: + return Shader::CompareFunction::Always; + } + UNIMPLEMENTED_MSG("Unimplemented comparison op={}", comparison); + return {}; +} } // Anonymous namespace void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, @@ -1016,6 +1047,11 @@ Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key, } profile.convert_depth_mode = gl_ndc; break; + case Shader::Stage::Fragment: + profile.alpha_test_func = MaxwellToCompareFunction( + key.state.UnpackComparisonOp(key.state.alpha_test_func.Value())); + profile.alpha_test_reference = Common::BitCast(key.state.alpha_test_ref); + break; default: break; } -- cgit v1.2.3 From 416e1b7441d34512fcb0ffed014daf7ca4bb62bd Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 14 Apr 2021 21:36:36 -0300 Subject: spirv: Implement image buffers --- src/video_core/buffer_cache/buffer_cache.h | 24 +++++++++++++---- src/video_core/renderer_vulkan/pipeline_helper.h | 2 ++ .../renderer_vulkan/vk_compute_pipeline.cpp | 25 ++++++++++------- .../renderer_vulkan/vk_graphics_pipeline.cpp | 31 +++++++++++++--------- 4 files changed, 56 insertions(+), 26 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 6701aab82..29746f61d 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -154,7 +154,7 @@ public: void UnbindGraphicsTextureBuffers(size_t stage); void BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, u32 size, - PixelFormat format); + PixelFormat format, bool is_written); void UnbindComputeStorageBuffers(); @@ -163,8 +163,8 @@ public: void UnbindComputeTextureBuffers(); - void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, - PixelFormat format); + void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format, + bool is_written); void FlushCachedWrites(); @@ -393,7 +393,9 @@ private: u32 written_compute_storage_buffers = 0; std::array enabled_texture_buffers{}; + std::array written_texture_buffers{}; u32 enabled_compute_texture_buffers = 0; + u32 written_compute_texture_buffers = 0; std::array fast_bound_uniform_buffers{}; @@ -700,12 +702,14 @@ void BufferCache

::BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, template void BufferCache

::UnbindGraphicsTextureBuffers(size_t stage) { enabled_texture_buffers[stage] = 0; + written_texture_buffers[stage] = 0; } template void BufferCache

::BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, - u32 size, PixelFormat format) { + u32 size, PixelFormat format, bool is_written) { enabled_texture_buffers[stage] |= 1U << tbo_index; + written_texture_buffers[stage] |= (is_written ? 1U : 0U) << tbo_index; texture_buffers[stage][tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); } @@ -732,12 +736,14 @@ void BufferCache

::BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, template void BufferCache

::UnbindComputeTextureBuffers() { enabled_compute_texture_buffers = 0; + written_compute_texture_buffers = 0; } template void BufferCache

::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, - PixelFormat format) { + PixelFormat format, bool is_written) { enabled_compute_texture_buffers |= 1U << tbo_index; + written_compute_texture_buffers |= (is_written ? 1U : 0U) << tbo_index; compute_texture_buffers[tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); } @@ -1274,6 +1280,10 @@ void BufferCache

::UpdateTextureBuffers(size_t stage) { ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) { Binding& binding = texture_buffers[stage][index]; binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); + // Mark buffer as written if needed + if (((written_texture_buffers[stage] >> index) & 1) != 0) { + MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size); + } }); } @@ -1343,6 +1353,10 @@ void BufferCache

::UpdateComputeTextureBuffers() { ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) { Binding& binding = compute_texture_buffers[index]; binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); + // Mark as written if needed + if (((written_compute_texture_buffers >> index) & 1) != 0) { + MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size); + } }); } diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index e2167dc4b..aaf9a735e 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -88,6 +88,7 @@ public: Add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, stage, info.constant_buffer_descriptors.size()); Add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stage, info.storage_buffers_descriptors.size()); Add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, stage, info.texture_buffer_descriptors.size()); + Add(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, stage, info.image_buffer_descriptors.size()); Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage, info.texture_descriptors.size()); Add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, stage, info.image_descriptors.size()); } @@ -126,6 +127,7 @@ inline void PushImageDescriptors(const Shader::Info& info, const VkSampler*& sam const ImageId*& image_view_ids, TextureCache& texture_cache, VKUpdateDescriptorQueue& update_descriptor_queue) { image_view_ids += info.texture_buffer_descriptors.size(); + image_view_ids += info.image_buffer_descriptors.size(); for (const auto& desc : info.texture_descriptors) { const VkSampler sampler{*(samplers++)}; ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))}; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 3d690f335..3c907ec5a 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -97,10 +97,12 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, const u32 raw_handle{gpu_memory.Read(addr)}; return TextureHandle(raw_handle, via_header_index); }}; - for (const auto& desc : info.texture_buffer_descriptors) { + const auto add_image{[&](const auto& desc) { const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; image_view_indices.push_back(handle.image); - } + }}; + std::ranges::for_each(info.texture_buffer_descriptors, add_image); + std::ranges::for_each(info.image_buffer_descriptors, add_image); for (const auto& desc : info.texture_descriptors) { const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; image_view_indices.push_back(handle.image); @@ -108,24 +110,29 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); samplers.push_back(sampler->Handle()); } - for (const auto& desc : info.image_descriptors) { - const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; - image_view_indices.push_back(handle.image); - } + std::ranges::for_each(info.image_descriptors, add_image); + const std::span indices_span(image_view_indices.data(), image_view_indices.size()); texture_cache.FillComputeImageViews(indices_span, image_view_ids); buffer_cache.UnbindComputeTextureBuffers(); ImageId* texture_buffer_ids{image_view_ids.data()}; size_t index{}; - for (const auto& desc : info.texture_buffer_descriptors) { + const auto add_buffer{[&](const auto& desc) { ASSERT(desc.count == 1); + bool is_written{false}; + if constexpr (std::is_same_v) { + is_written = desc.is_written; + } ImageView& image_view = texture_cache.GetImageView(*texture_buffer_ids); buffer_cache.BindComputeTextureBuffer(index, image_view.GpuAddr(), image_view.BufferSize(), - image_view.format); + image_view.format, is_written); ++texture_buffer_ids; ++index; - } + }}; + std::ranges::for_each(info.texture_buffer_descriptors, add_buffer); + std::ranges::for_each(info.image_buffer_descriptors, add_buffer); + buffer_cache.UpdateComputeBuffers(); buffer_cache.BindHostComputeBuffers(); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 23c01f24e..84720a6f9 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -175,10 +175,12 @@ void GraphicsPipeline::Configure(bool is_indexed) { const u32 raw_handle{gpu_memory.Read(addr)}; return TextureHandle(raw_handle, via_header_index); }}; - for (const auto& desc : info.texture_buffer_descriptors) { + const auto add_image{[&](const auto& desc) { const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; image_view_indices.push_back(handle.image); - } + }}; + std::ranges::for_each(info.texture_buffer_descriptors, add_image); + std::ranges::for_each(info.image_buffer_descriptors, add_image); for (const auto& desc : info.texture_descriptors) { const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; image_view_indices.push_back(handle.image); @@ -186,28 +188,33 @@ void GraphicsPipeline::Configure(bool is_indexed) { Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.sampler)}; samplers.push_back(sampler->Handle()); } - for (const auto& desc : info.image_descriptors) { - const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; - image_view_indices.push_back(handle.image); - } + std::ranges::for_each(info.image_descriptors, add_image); } const std::span indices_span(image_view_indices.data(), image_view_indices.size()); texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); ImageId* texture_buffer_index{image_view_ids.data()}; for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { - const Shader::Info& info{stage_infos[stage]}; - buffer_cache.UnbindGraphicsTextureBuffers(stage); size_t index{}; - for (const auto& desc : info.texture_buffer_descriptors) { + const auto add_buffer{[&](const auto& desc) { ASSERT(desc.count == 1); - ImageView& image_view = texture_cache.GetImageView(*texture_buffer_index); + bool is_written{false}; + if constexpr (std::is_same_v) { + is_written = desc.is_written; + } + ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)}; buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(), - image_view.BufferSize(), image_view.format); + image_view.BufferSize(), image_view.format, + is_written); ++index; ++texture_buffer_index; - } + }}; + const Shader::Info& info{stage_infos[stage]}; + buffer_cache.UnbindGraphicsTextureBuffers(stage); + std::ranges::for_each(info.texture_buffer_descriptors, add_buffer); + std::ranges::for_each(info.image_buffer_descriptors, add_buffer); texture_buffer_index += info.texture_descriptors.size(); + texture_buffer_index += info.image_descriptors.size(); } buffer_cache.UpdateGraphicsBuffers(is_indexed); -- cgit v1.2.3 From 7ae3ea6beefae76e6671436114863fce1baacd9e Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Thu, 15 Apr 2021 19:01:45 -0400 Subject: vk_pipeline_cache: Silence GCC warnings Silences `-Werror=missing-field-initializers` due to missing initializers. --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 80f196d0e..ee22255bf 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -696,6 +696,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, .generic_input_types{}, .fixed_state_point_size{}, + .alpha_test_func{}, + .xfb_varyings{}, }; } -- cgit v1.2.3 From 183855e396cc6918d36fbf3e38ea426e934b4e3e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 15 Apr 2021 22:46:11 -0300 Subject: shader: Implement tessellation shaders, polygon mode and invocation id --- src/video_core/renderer_vulkan/maxwell_to_vk.cpp | 13 ++++++++++ src/video_core/renderer_vulkan/maxwell_to_vk.h | 2 ++ .../renderer_vulkan/vk_graphics_pipeline.cpp | 3 ++- .../renderer_vulkan/vk_pipeline_cache.cpp | 30 ++++++++++++++++++++++ .../renderer_vulkan/vk_staging_buffer_pool.cpp | 2 +- src/video_core/vulkan_common/vulkan_device.cpp | 3 ++- 6 files changed, 50 insertions(+), 3 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index dc4ff0da2..8f0b0b8ec 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -685,6 +685,19 @@ VkCullModeFlagBits CullFace(Maxwell::CullFace cull_face) { return {}; } +VkPolygonMode PolygonMode(Maxwell::PolygonMode polygon_mode) { + switch (polygon_mode) { + case Maxwell::PolygonMode::Point: + return VK_POLYGON_MODE_POINT; + case Maxwell::PolygonMode::Line: + return VK_POLYGON_MODE_LINE; + case Maxwell::PolygonMode::Fill: + return VK_POLYGON_MODE_FILL; + } + UNIMPLEMENTED_MSG("Unimplemented polygon mode={}", polygon_mode); + return {}; +} + VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle) { switch (swizzle) { case Tegra::Texture::SwizzleSource::Zero: diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h index 9f78e15b6..50a599c11 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.h +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h @@ -65,6 +65,8 @@ VkFrontFace FrontFace(Maxwell::FrontFace front_face); VkCullModeFlagBits CullFace(Maxwell::CullFace cull_face); +VkPolygonMode PolygonMode(Maxwell::PolygonMode polygon_mode); + VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle); VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 84720a6f9..d5e9dae0f 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -355,7 +355,8 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa static_cast(state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE), .rasterizerDiscardEnable = static_cast(state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE), - .polygonMode = VK_POLYGON_MODE_FILL, + .polygonMode = + MaxwellToVK::PolygonMode(FixedPipelineState::UnpackPolygonMode(state.polygon_mode)), .cullMode = static_cast( dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE), .frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()), diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index ee22255bf..0bccc640a 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -1040,6 +1040,36 @@ Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key, std::ranges::transform(key.state.attributes, profile.generic_input_types.begin(), &CastAttributeType); break; + case Shader::Stage::TessellationEval: + // We have to flip tessellation clockwise for some reason... + profile.tess_clockwise = key.state.tessellation_clockwise == 0; + profile.tess_primitive = [&key] { + const u32 raw{key.state.tessellation_primitive.Value()}; + switch (static_cast(raw)) { + case Maxwell::TessellationPrimitive::Isolines: + return Shader::TessPrimitive::Isolines; + case Maxwell::TessellationPrimitive::Triangles: + return Shader::TessPrimitive::Triangles; + case Maxwell::TessellationPrimitive::Quads: + return Shader::TessPrimitive::Quads; + } + UNREACHABLE(); + return Shader::TessPrimitive::Triangles; + }(); + profile.tess_spacing = [&] { + const u32 raw{key.state.tessellation_spacing}; + switch (static_cast(raw)) { + case Maxwell::TessellationSpacing::Equal: + return Shader::TessSpacing::Equal; + case Maxwell::TessellationSpacing::FractionalOdd: + return Shader::TessSpacing::FractionalOdd; + case Maxwell::TessellationSpacing::FractionalEven: + return Shader::TessSpacing::FractionalEven; + } + UNREACHABLE(); + return Shader::TessSpacing::Equal; + }(); + break; case Shader::Stage::Geometry: if (program.output_topology == Shader::OutputTopology::PointList) { profile.fixed_state_point_size = point_size; diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp index 0412b5234..555b12ed7 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp @@ -91,7 +91,7 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem .flags = 0, .size = STREAM_BUFFER_SIZE, .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | - VK_BUFFER_USAGE_INDEX_BUFFER_BIT, + VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, .queueFamilyIndexCount = 0, .pQueueFamilyIndices = nullptr, diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 87cfe6312..f0de19ba1 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -225,7 +225,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR .drawIndirectFirstInstance = false, .depthClamp = true, .depthBiasClamp = true, - .fillModeNonSolid = false, + .fillModeNonSolid = true, .depthBounds = false, .wideLines = false, .largePoints = true, @@ -670,6 +670,7 @@ void Device::CheckSuitability(bool requires_swapchain) const { std::make_pair(features.largePoints, "largePoints"), std::make_pair(features.multiViewport, "multiViewport"), std::make_pair(features.depthBiasClamp, "depthBiasClamp"), + std::make_pair(features.fillModeNonSolid, "fillModeNonSolid"), std::make_pair(features.geometryShader, "geometryShader"), std::make_pair(features.tessellationShader, "tessellationShader"), std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"), -- cgit v1.2.3 From e3514bcd6b09f623da14c4f3c4ffd988e75577ed Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 16 Apr 2021 16:31:15 -0300 Subject: spirv: Implement ViewportMask with NV_viewport_array2 --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 1 + src/video_core/vulkan_common/vulkan_device.cpp | 5 +++++ src/video_core/vulkan_common/vulkan_device.h | 6 ++++++ 3 files changed, 12 insertions(+) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 0bccc640a..4d0d3ebb7 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -690,6 +690,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, .support_vote = true, .support_viewport_index_layer_non_geometry = device.IsExtShaderViewportIndexLayerSupported(), + .support_viewport_mask = device.IsNvViewportArray2Supported(), .support_typeless_image_loads = device.IsFormatlessImageLoadSupported(), .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), .support_int64_atomics = device.IsExtShaderAtomicInt64Supported(), diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index f0de19ba1..72b83f99a 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -346,6 +346,10 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR LOG_INFO(Render_Vulkan, "Device doesn't support viewport swizzles"); } + if (!nv_viewport_array2) { + LOG_INFO(Render_Vulkan, "Device doesn't support viewport masks"); + } + VkPhysicalDeviceUniformBufferStandardLayoutFeaturesKHR std430_layout; if (khr_uniform_buffer_standard_layout) { std430_layout = { @@ -724,6 +728,7 @@ std::vector Device::LoadExtensions(bool requires_surface) { } }; test(nv_viewport_swizzle, VK_NV_VIEWPORT_SWIZZLE_EXTENSION_NAME, true); + test(nv_viewport_array2, VK_NV_VIEWPORT_ARRAY2_EXTENSION_NAME, true); test(khr_uniform_buffer_standard_layout, VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true); test(khr_spirv_1_4, VK_KHR_SPIRV_1_4_EXTENSION_NAME, true); diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 4e6d13308..4415558bb 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -169,6 +169,11 @@ public: return nv_viewport_swizzle; } + /// Returns true if the device supports VK_NV_viewport_array2. + bool IsNvViewportArray2Supported() const { + return nv_viewport_array2; + } + /// Returns true if the device supports VK_KHR_uniform_buffer_standard_layout. bool IsKhrUniformBufferStandardLayoutSupported() const { return khr_uniform_buffer_standard_layout; @@ -312,6 +317,7 @@ private: bool is_shader_storage_image_multisample{}; ///< Support for image operations on MSAA images. bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil. bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle. + bool nv_viewport_array2{}; ///< Support for VK_NV_viewport_array2. bool khr_uniform_buffer_standard_layout{}; ///< Support for scalar uniform buffer layouts. bool khr_spirv_1_4{}; ///< Support for VK_KHR_spirv_1_4. bool khr_workgroup_memory_explicit_layout{}; ///< Support for explicit workgroup layouts. -- cgit v1.2.3 From 95815a3883d708f71db5119f42243e183f32f9a2 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 16 Apr 2021 17:22:59 -0300 Subject: shader: Implement PIXLD.MY_INDEX --- src/video_core/vulkan_common/vulkan_device.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 72b83f99a..038231298 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -218,7 +218,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR .independentBlend = true, .geometryShader = true, .tessellationShader = true, - .sampleRateShading = false, + .sampleRateShading = true, .dualSrcBlend = false, .logicOp = false, .multiDrawIndirect = false, @@ -677,6 +677,7 @@ void Device::CheckSuitability(bool requires_swapchain) const { std::make_pair(features.fillModeNonSolid, "fillModeNonSolid"), std::make_pair(features.geometryShader, "geometryShader"), std::make_pair(features.tessellationShader, "tessellationShader"), + std::make_pair(features.sampleRateShading, "sampleRateShading"), std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"), std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"), std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"), -- cgit v1.2.3 From f18a6dd1bdaffda4c3e771af3cf7cf41919ebd67 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Fri, 16 Apr 2021 23:52:58 +0200 Subject: shader: Implement SR_Y_DIRECTION --- src/video_core/renderer_vulkan/fixed_pipeline_state.cpp | 2 ++ src/video_core/renderer_vulkan/fixed_pipeline_state.h | 1 + src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 1 + 3 files changed, 4 insertions(+) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 6a3baf837..24834e0f7 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -82,6 +82,8 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, alpha_test_ref = Common::BitCast(regs.alpha_test_ref); point_size = Common::BitCast(regs.point_size); + y_negate.Assign(regs.screen_y_control.y_negate != 0 ? 1 : 0); + if (maxwell3d.dirty.flags[Dirty::InstanceDivisors]) { maxwell3d.dirty.flags[Dirty::InstanceDivisors] = false; for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index 5568c4f72..31de6b2c8 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -202,6 +202,7 @@ struct FixedPipelineState { BitField<3, 1, u32> early_z; BitField<4, 1, u32> depth_enabled; BitField<5, 5, u32> depth_format; + BitField<10, 1, u32> y_negate; }; std::array color_formats; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 4d0d3ebb7..e9b93336b 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -1116,6 +1116,7 @@ Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key, break; } profile.force_early_z = key.state.early_z != 0; + profile.y_negate = key.state.y_negate != 0; return profile; } -- cgit v1.2.3 From dd860b684c7695097107c1186e96a70e754e5990 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 20 Apr 2021 19:48:45 -0300 Subject: shader: Implement D3D samplers --- .../renderer_vulkan/vk_compute_pipeline.cpp | 31 ++++++++++++++------- .../renderer_vulkan/vk_graphics_pipeline.cpp | 25 ++++++++++++----- .../renderer_vulkan/vk_pipeline_cache.cpp | 32 ++++++++-------------- 3 files changed, 51 insertions(+), 37 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 3c907ec5a..45d837ca4 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -88,23 +88,34 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, boost::container::static_vector image_view_indices; boost::container::static_vector samplers; - const auto& launch_desc{kepler_compute.launch_description}; - const auto& cbufs{launch_desc.const_buffer_config}; - const bool via_header_index{launch_desc.linked_tsc}; - const auto read_handle{[&](u32 cbuf_index, u32 cbuf_offset) { - ASSERT(((launch_desc.const_buffer_enable_mask >> cbuf_index) & 1) != 0); - const GPUVAddr addr{cbufs[cbuf_index].Address() + cbuf_offset}; - const u32 raw_handle{gpu_memory.Read(addr)}; - return TextureHandle(raw_handle, via_header_index); + const auto& qmd{kepler_compute.launch_description}; + const auto& cbufs{qmd.const_buffer_config}; + const bool via_header_index{qmd.linked_tsc != 0}; + const auto read_handle{[&](const auto& desc) { + ASSERT(((qmd.const_buffer_enable_mask >> desc.cbuf_index) & 1) != 0); + const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + desc.cbuf_offset}; + if constexpr (std::is_same_v || + std::is_same_v) { + if (desc.has_secondary) { + ASSERT(((qmd.const_buffer_enable_mask >> desc.secondary_cbuf_index) & 1) != 0); + const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].Address() + + desc.secondary_cbuf_offset}; + const u32 lhs_raw{gpu_memory.Read(addr)}; + const u32 rhs_raw{gpu_memory.Read(separate_addr)}; + const u32 raw{lhs_raw | rhs_raw}; + return TextureHandle{raw, via_header_index}; + } + } + return TextureHandle{gpu_memory.Read(addr), via_header_index}; }}; const auto add_image{[&](const auto& desc) { - const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; + const TextureHandle handle{read_handle(desc)}; image_view_indices.push_back(handle.image); }}; std::ranges::for_each(info.texture_buffer_descriptors, add_image); std::ranges::for_each(info.image_buffer_descriptors, add_image); for (const auto& desc : info.texture_descriptors) { - const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; + const TextureHandle handle{read_handle(desc)}; image_view_indices.push_back(handle.image); Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index d5e9dae0f..08f00b9ce 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -169,20 +169,31 @@ void GraphicsPipeline::Configure(bool is_indexed) { ++index; } const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers}; - const auto read_handle{[&](u32 cbuf_index, u32 cbuf_offset) { - ASSERT(cbufs[cbuf_index].enabled); - const GPUVAddr addr{cbufs[cbuf_index].address + cbuf_offset}; - const u32 raw_handle{gpu_memory.Read(addr)}; - return TextureHandle(raw_handle, via_header_index); + const auto read_handle{[&](const auto& desc) { + ASSERT(cbufs[desc.cbuf_index].enabled); + const GPUVAddr addr{cbufs[desc.cbuf_index].address + desc.cbuf_offset}; + if constexpr (std::is_same_v || + std::is_same_v) { + if (desc.has_secondary) { + ASSERT(cbufs[desc.secondary_cbuf_index].enabled); + const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].address + + desc.secondary_cbuf_offset}; + const u32 lhs_raw{gpu_memory.Read(addr)}; + const u32 rhs_raw{gpu_memory.Read(separate_addr)}; + const u32 raw{lhs_raw | rhs_raw}; + return TextureHandle{raw, via_header_index}; + } + } + return TextureHandle{gpu_memory.Read(addr), via_header_index}; }}; const auto add_image{[&](const auto& desc) { - const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; + const TextureHandle handle{read_handle(desc)}; image_view_indices.push_back(handle.image); }}; std::ranges::for_each(info.texture_buffer_descriptors, add_image); std::ranges::for_each(info.image_buffer_descriptors, add_image); for (const auto& desc : info.texture_descriptors) { - const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; + const TextureHandle handle{read_handle(desc)}; image_view_indices.push_back(handle.image); Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.sampler)}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index e9b93336b..4317b2ac7 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -188,9 +188,7 @@ protected: } Shader::TextureType ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, bool via_header_index, - GPUVAddr cbuf_addr, u32 cbuf_size, u32 cbuf_index, - u32 cbuf_offset) { - const u32 raw{cbuf_offset < cbuf_size ? gpu_memory->Read(cbuf_addr + cbuf_offset) : 0}; + u32 raw) { const TextureHandle handle{raw, via_header_index}; const GPUVAddr descriptor_addr{tic_addr + handle.image * sizeof(Tegra::Texture::TICEntry)}; Tegra::Texture::TICEntry entry; @@ -219,7 +217,7 @@ protected: throw Shader::NotImplementedException("Unknown texture type"); } }()}; - texture_types.emplace(MakeCbufKey(cbuf_index, cbuf_offset), result); + texture_types.emplace(raw, result); return result; } @@ -227,7 +225,7 @@ protected: GPUVAddr program_base{}; std::vector code; - std::unordered_map texture_types; + std::unordered_map texture_types; std::unordered_map cbuf_values; u32 local_memory_size{}; @@ -250,7 +248,7 @@ using Shader::Maxwell::TranslateProgram; // TODO: Move this to a separate file constexpr std::array MAGIC_NUMBER{'y', 'u', 'z', 'u', 'c', 'a', 'c', 'h'}; -constexpr u32 CACHE_VERSION{1}; +constexpr u32 CACHE_VERSION{2}; class GraphicsEnvironment final : public GenericEnvironment { public: @@ -308,13 +306,10 @@ public: return value; } - Shader::TextureType ReadTextureType(u32 cbuf_index, u32 cbuf_offset) override { + Shader::TextureType ReadTextureType(u32 handle) override { const auto& regs{maxwell3d->regs}; - const auto& cbuf{maxwell3d->state.shader_stages[stage_index].const_buffers[cbuf_index]}; - ASSERT(cbuf.enabled); const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; - return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, via_header_index, - cbuf.address, cbuf.size, cbuf_index, cbuf_offset); + return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, via_header_index, handle); } private: @@ -352,13 +347,10 @@ public: return value; } - Shader::TextureType ReadTextureType(u32 cbuf_index, u32 cbuf_offset) override { + Shader::TextureType ReadTextureType(u32 handle) override { const auto& regs{kepler_compute->regs}; const auto& qmd{kepler_compute->launch_description}; - ASSERT(((qmd.const_buffer_enable_mask.Value() >> cbuf_index) & 1) != 0); - const auto& cbuf{qmd.const_buffer_config[cbuf_index]}; - return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, - cbuf.Address(), cbuf.size, cbuf_index, cbuf_offset); + return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, handle); } private: @@ -421,7 +413,7 @@ public: code = std::make_unique(Common::DivCeil(code_size, sizeof(u64))); file.read(reinterpret_cast(code.get()), code_size); for (size_t i = 0; i < num_texture_types; ++i) { - u64 key; + u32 key; Shader::TextureType type; file.read(reinterpret_cast(&key), sizeof(key)) .read(reinterpret_cast(&type), sizeof(type)); @@ -457,8 +449,8 @@ public: return it->second; } - Shader::TextureType ReadTextureType(u32 cbuf_index, u32 cbuf_offset) override { - const auto it{texture_types.find(MakeCbufKey(cbuf_index, cbuf_offset))}; + Shader::TextureType ReadTextureType(u32 handle) override { + const auto it{texture_types.find(handle)}; if (it == texture_types.end()) { throw Shader::LogicError("Uncached read texture type"); } @@ -483,7 +475,7 @@ public: private: std::unique_ptr code; - std::unordered_map texture_types; + std::unordered_map texture_types; std::unordered_map cbuf_values; std::array workgroup_size{}; u32 local_memory_size{}; -- cgit v1.2.3 From 050e81500c002f304d581f28700de549b828a2bc Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 21 Apr 2021 00:35:47 -0300 Subject: shader: Move microinstruction header to the value header --- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 8b6839966..e12e4422f 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -16,7 +16,7 @@ #include "common/common_types.h" #include "common/thread_worker.h" #include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" #include "shader_recompiler/object_pool.h" #include "shader_recompiler/profile.h" -- cgit v1.2.3 From d10cf55353175b13bed4cf18791e080ecb7fd95b Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 22 Apr 2021 16:17:59 -0300 Subject: shader: Implement indexed textures --- src/video_core/renderer_vulkan/pipeline_helper.h | 50 ++++++++++------- .../renderer_vulkan/vk_compute_pipeline.cpp | 46 +++++++++------- .../renderer_vulkan/vk_graphics_pipeline.cpp | 63 +++++++++++++--------- 3 files changed, 95 insertions(+), 64 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index aaf9a735e..dd7d2cc0c 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -85,28 +85,30 @@ public: } void Add(const Shader::Info& info, VkShaderStageFlags stage) { - Add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, stage, info.constant_buffer_descriptors.size()); - Add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stage, info.storage_buffers_descriptors.size()); - Add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, stage, info.texture_buffer_descriptors.size()); - Add(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, stage, info.image_buffer_descriptors.size()); - Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage, info.texture_descriptors.size()); - Add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, stage, info.image_descriptors.size()); + Add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, stage, info.constant_buffer_descriptors); + Add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stage, info.storage_buffers_descriptors); + Add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, stage, info.texture_buffer_descriptors); + Add(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, stage, info.image_buffer_descriptors); + Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage, info.texture_descriptors); + Add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, stage, info.image_descriptors); } private: - void Add(VkDescriptorType type, VkShaderStageFlags stage, size_t num) { + template + void Add(VkDescriptorType type, VkShaderStageFlags stage, const Descriptors& descriptors) { + const size_t num{descriptors.size()}; for (size_t i = 0; i < num; ++i) { bindings.push_back({ .binding = binding, .descriptorType = type, - .descriptorCount = 1, + .descriptorCount = descriptors[i].count, .stageFlags = stage, .pImmutableSamplers = nullptr, }); entries.push_back({ .dstBinding = binding, .dstArrayElement = 0, - .descriptorCount = 1, + .descriptorCount = descriptors[i].count, .descriptorType = type, .offset = offset, .stride = sizeof(DescriptorUpdateEntry), @@ -126,21 +128,29 @@ private: inline void PushImageDescriptors(const Shader::Info& info, const VkSampler*& samplers, const ImageId*& image_view_ids, TextureCache& texture_cache, VKUpdateDescriptorQueue& update_descriptor_queue) { - image_view_ids += info.texture_buffer_descriptors.size(); - image_view_ids += info.image_buffer_descriptors.size(); + for (const auto& desc : info.texture_buffer_descriptors) { + image_view_ids += desc.count; + } + for (const auto& desc : info.image_buffer_descriptors) { + image_view_ids += desc.count; + } for (const auto& desc : info.texture_descriptors) { - const VkSampler sampler{*(samplers++)}; - ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))}; - const VkImageView vk_image_view{image_view.Handle(desc.type)}; - update_descriptor_queue.AddSampledImage(vk_image_view, sampler); + for (u32 index = 0; index < desc.count; ++index) { + const VkSampler sampler{*(samplers++)}; + ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))}; + const VkImageView vk_image_view{image_view.Handle(desc.type)}; + update_descriptor_queue.AddSampledImage(vk_image_view, sampler); + } } for (const auto& desc : info.image_descriptors) { - ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))}; - if (desc.is_written) { - texture_cache.MarkModification(image_view.image_id); + for (u32 index = 0; index < desc.count; ++index) { + ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))}; + if (desc.is_written) { + texture_cache.MarkModification(image_view.image_id); + } + const VkImageView vk_image_view{image_view.StorageView(desc.type, desc.format)}; + update_descriptor_queue.AddImage(vk_image_view); } - const VkImageView vk_image_view{image_view.StorageView(desc.type, desc.format)}; - update_descriptor_queue.AddImage(vk_image_view); } } diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 45d837ca4..6e9f66262 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -91,35 +91,41 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, const auto& qmd{kepler_compute.launch_description}; const auto& cbufs{qmd.const_buffer_config}; const bool via_header_index{qmd.linked_tsc != 0}; - const auto read_handle{[&](const auto& desc) { + const auto read_handle{[&](const auto& desc, u32 index) { ASSERT(((qmd.const_buffer_enable_mask >> desc.cbuf_index) & 1) != 0); + const u32 index_offset{index << desc.size_shift}; + const u32 offset{desc.cbuf_offset + index_offset}; const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + desc.cbuf_offset}; if constexpr (std::is_same_v || std::is_same_v) { if (desc.has_secondary) { ASSERT(((qmd.const_buffer_enable_mask >> desc.secondary_cbuf_index) & 1) != 0); + const u32 secondary_offset{desc.secondary_cbuf_offset + index_offset}; const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].Address() + - desc.secondary_cbuf_offset}; + secondary_offset}; const u32 lhs_raw{gpu_memory.Read(addr)}; const u32 rhs_raw{gpu_memory.Read(separate_addr)}; - const u32 raw{lhs_raw | rhs_raw}; - return TextureHandle{raw, via_header_index}; + return TextureHandle{lhs_raw | rhs_raw, via_header_index}; } } return TextureHandle{gpu_memory.Read(addr), via_header_index}; }}; const auto add_image{[&](const auto& desc) { - const TextureHandle handle{read_handle(desc)}; - image_view_indices.push_back(handle.image); + for (u32 index = 0; index < desc.count; ++index) { + const TextureHandle handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.image); + } }}; std::ranges::for_each(info.texture_buffer_descriptors, add_image); std::ranges::for_each(info.image_buffer_descriptors, add_image); for (const auto& desc : info.texture_descriptors) { - const TextureHandle handle{read_handle(desc)}; - image_view_indices.push_back(handle.image); + for (u32 index = 0; index < desc.count; ++index) { + const TextureHandle handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.image); - Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); - samplers.push_back(sampler->Handle()); + Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); + samplers.push_back(sampler->Handle()); + } } std::ranges::for_each(info.image_descriptors, add_image); @@ -130,16 +136,18 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, ImageId* texture_buffer_ids{image_view_ids.data()}; size_t index{}; const auto add_buffer{[&](const auto& desc) { - ASSERT(desc.count == 1); - bool is_written{false}; - if constexpr (std::is_same_v) { - is_written = desc.is_written; + for (u32 index = 0; index < desc.count; ++index) { + bool is_written{false}; + if constexpr (std::is_same_v) { + is_written = desc.is_written; + } + ImageView& image_view = texture_cache.GetImageView(*texture_buffer_ids); + buffer_cache.BindComputeTextureBuffer(index, image_view.GpuAddr(), + image_view.BufferSize(), image_view.format, + is_written); + ++texture_buffer_ids; + ++index; } - ImageView& image_view = texture_cache.GetImageView(*texture_buffer_ids); - buffer_cache.BindComputeTextureBuffer(index, image_view.GpuAddr(), image_view.BufferSize(), - image_view.format, is_written); - ++texture_buffer_ids; - ++index; }}; std::ranges::for_each(info.texture_buffer_descriptors, add_buffer); std::ranges::for_each(info.image_buffer_descriptors, add_buffer); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 08f00b9ce..b7688aef9 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -161,23 +161,26 @@ void GraphicsPipeline::Configure(bool is_indexed) { const Shader::Info& info{stage_infos[stage]}; buffer_cache.SetEnabledUniformBuffers(stage, info.constant_buffer_mask); buffer_cache.UnbindGraphicsStorageBuffers(stage); - size_t index{}; + size_t ssbo_index{}; for (const auto& desc : info.storage_buffers_descriptors) { ASSERT(desc.count == 1); - buffer_cache.BindGraphicsStorageBuffer(stage, index, desc.cbuf_index, desc.cbuf_offset, - desc.is_written); - ++index; + buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, desc.cbuf_index, + desc.cbuf_offset, desc.is_written); + ++ssbo_index; } const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers}; - const auto read_handle{[&](const auto& desc) { + const auto read_handle{[&](const auto& desc, u32 index) { ASSERT(cbufs[desc.cbuf_index].enabled); - const GPUVAddr addr{cbufs[desc.cbuf_index].address + desc.cbuf_offset}; + const u32 index_offset{index << desc.size_shift}; + const u32 offset{desc.cbuf_offset + index_offset}; + const GPUVAddr addr{cbufs[desc.cbuf_index].address + offset}; if constexpr (std::is_same_v || std::is_same_v) { if (desc.has_secondary) { ASSERT(cbufs[desc.secondary_cbuf_index].enabled); + const u32 second_offset{desc.secondary_cbuf_offset + index_offset}; const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].address + - desc.secondary_cbuf_offset}; + second_offset}; const u32 lhs_raw{gpu_memory.Read(addr)}; const u32 rhs_raw{gpu_memory.Read(separate_addr)}; const u32 raw{lhs_raw | rhs_raw}; @@ -187,17 +190,21 @@ void GraphicsPipeline::Configure(bool is_indexed) { return TextureHandle{gpu_memory.Read(addr), via_header_index}; }}; const auto add_image{[&](const auto& desc) { - const TextureHandle handle{read_handle(desc)}; - image_view_indices.push_back(handle.image); + for (u32 index = 0; index < desc.count; ++index) { + const TextureHandle handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.image); + } }}; std::ranges::for_each(info.texture_buffer_descriptors, add_image); std::ranges::for_each(info.image_buffer_descriptors, add_image); for (const auto& desc : info.texture_descriptors) { - const TextureHandle handle{read_handle(desc)}; - image_view_indices.push_back(handle.image); + for (u32 index = 0; index < desc.count; ++index) { + const TextureHandle handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.image); - Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.sampler)}; - samplers.push_back(sampler->Handle()); + Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.sampler)}; + samplers.push_back(sampler->Handle()); + } } std::ranges::for_each(info.image_descriptors, add_image); } @@ -208,24 +215,30 @@ void GraphicsPipeline::Configure(bool is_indexed) { for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { size_t index{}; const auto add_buffer{[&](const auto& desc) { - ASSERT(desc.count == 1); - bool is_written{false}; - if constexpr (std::is_same_v) { - is_written = desc.is_written; + for (u32 index = 0; index < desc.count; ++index) { + bool is_written{false}; + if constexpr (std::is_same_v) { + is_written = desc.is_written; + } + ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)}; + buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(), + image_view.BufferSize(), image_view.format, + is_written); + ++index; + ++texture_buffer_index; } - ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)}; - buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(), - image_view.BufferSize(), image_view.format, - is_written); - ++index; - ++texture_buffer_index; }}; const Shader::Info& info{stage_infos[stage]}; buffer_cache.UnbindGraphicsTextureBuffers(stage); std::ranges::for_each(info.texture_buffer_descriptors, add_buffer); std::ranges::for_each(info.image_buffer_descriptors, add_buffer); - texture_buffer_index += info.texture_descriptors.size(); - texture_buffer_index += info.image_descriptors.size(); + for (const auto& desc : info.texture_descriptors) { + texture_buffer_index += desc.count; + } + for (const auto& desc : info.image_descriptors) { + texture_buffer_index += desc.count; + } } buffer_cache.UpdateGraphicsBuffers(is_indexed); -- cgit v1.2.3 From 0ace34575cd099fb0db955ab32c215106ef19f84 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 22 Apr 2021 16:19:14 -0300 Subject: shader: Require dual source blending --- src/video_core/vulkan_common/vulkan_device.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 038231298..9c609e504 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -219,7 +219,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR .geometryShader = true, .tessellationShader = true, .sampleRateShading = true, - .dualSrcBlend = false, + .dualSrcBlend = true, .logicOp = false, .multiDrawIndirect = false, .drawIndirectFirstInstance = false, @@ -678,6 +678,7 @@ void Device::CheckSuitability(bool requires_swapchain) const { std::make_pair(features.geometryShader, "geometryShader"), std::make_pair(features.tessellationShader, "tessellationShader"), std::make_pair(features.sampleRateShading, "sampleRateShading"), + std::make_pair(features.dualSrcBlend, "dualSrcBlend"), std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"), std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"), std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"), -- cgit v1.2.3 From 7a1f296cda32bdb8996f25fd1862b422ac2bfe48 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 22 Apr 2021 21:05:10 -0300 Subject: shader: Fix render targets with null attachments --- .../renderer_vulkan/vk_graphics_pipeline.cpp | 18 +++++++--- .../renderer_vulkan/vk_render_pass_cache.cpp | 42 +++++++++++----------- 2 files changed, 34 insertions(+), 26 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index b7688aef9..e43db280f 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -105,6 +105,17 @@ RenderPassKey MakeRenderPassKey(const FixedPipelineState& state) { key.samples = MaxwellToVK::MsaaMode(state.msaa_mode); return key; } + +size_t NumAttachments(const FixedPipelineState& state) { + size_t num{}; + for (size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { + const auto format{static_cast(state.color_formats[index])}; + if (format != Tegra::RenderTargetFormat::NONE) { + num = index + 1; + } + } + return num; +} } // Anonymous namespace GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, @@ -418,17 +429,14 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa .maxDepthBounds = 0.0f, }; static_vector cb_attachments; - for (size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { + const size_t num_attachments{NumAttachments(state)}; + for (size_t index = 0; index < num_attachments; ++index) { static constexpr std::array mask_table{ VK_COLOR_COMPONENT_R_BIT, VK_COLOR_COMPONENT_G_BIT, VK_COLOR_COMPONENT_B_BIT, VK_COLOR_COMPONENT_A_BIT, }; - const auto format{static_cast(state.color_formats[index])}; - if (format == Tegra::RenderTargetFormat::NONE) { - continue; - } const auto& blend{state.attachments[index]}; const std::array mask{blend.Mask()}; VkColorComponentFlags write_mask{}; diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp index 991afe521..451ffe019 100644 --- a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp @@ -16,18 +16,6 @@ namespace Vulkan { namespace { using VideoCore::Surface::PixelFormat; -constexpr std::array ATTACHMENT_REFERENCES{ - VkAttachmentReference{0, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{1, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{2, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{3, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{4, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{5, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{6, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{7, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{8, VK_IMAGE_LAYOUT_GENERAL}, -}; - VkAttachmentDescription AttachmentDescription(const Device& device, PixelFormat format, VkSampleCountFlagBits samples) { using MaxwellToVK::SurfaceFormat; @@ -54,17 +42,29 @@ VkRenderPass RenderPassCache::Get(const RenderPassKey& key) { return *pair->second; } boost::container::static_vector descriptions; + std::array references{}; + u32 num_attachments{}; + u32 num_colors{}; for (size_t index = 0; index < key.color_formats.size(); ++index) { const PixelFormat format{key.color_formats[index]}; - if (format == PixelFormat::Invalid) { - continue; + const bool is_valid{format != PixelFormat::Invalid}; + references[index] = VkAttachmentReference{ + .attachment = is_valid ? num_colors : VK_ATTACHMENT_UNUSED, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }; + if (is_valid) { + descriptions.push_back(AttachmentDescription(*device, format, key.samples)); + num_attachments = static_cast(index + 1); + ++num_colors; } - descriptions.push_back(AttachmentDescription(*device, format, key.samples)); } - const size_t num_colors{descriptions.size()}; - const VkAttachmentReference* depth_attachment{}; + const bool has_depth{key.depth_format != PixelFormat::Invalid}; + VkAttachmentReference depth_reference{}; if (key.depth_format != PixelFormat::Invalid) { - depth_attachment = &ATTACHMENT_REFERENCES[num_colors]; + depth_reference = VkAttachmentReference{ + .attachment = num_colors, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }; descriptions.push_back(AttachmentDescription(*device, key.depth_format, key.samples)); } const VkSubpassDescription subpass{ @@ -72,10 +72,10 @@ VkRenderPass RenderPassCache::Get(const RenderPassKey& key) { .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, .inputAttachmentCount = 0, .pInputAttachments = nullptr, - .colorAttachmentCount = static_cast(num_colors), - .pColorAttachments = num_colors != 0 ? ATTACHMENT_REFERENCES.data() : nullptr, + .colorAttachmentCount = num_attachments, + .pColorAttachments = references.data(), .pResolveAttachments = nullptr, - .pDepthStencilAttachment = depth_attachment, + .pDepthStencilAttachment = has_depth ? &depth_reference : nullptr, .preserveAttachmentCount = 0, .pPreserveAttachments = nullptr, }; -- cgit v1.2.3 From 2dc86372c76afb134651499452bb5074b6d1e839 Mon Sep 17 00:00:00 2001 From: Rodrigo Locatti Date: Fri, 23 Apr 2021 02:38:02 -0300 Subject: shader: Fix bugs and build issues on GCC --- src/video_core/renderer_vulkan/vk_compute_pipeline.cpp | 4 ++-- src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 2 +- src/video_core/vulkan_common/vulkan_device.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 6e9f66262..6611c1de3 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -95,7 +95,7 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, ASSERT(((qmd.const_buffer_enable_mask >> desc.cbuf_index) & 1) != 0); const u32 index_offset{index << desc.size_shift}; const u32 offset{desc.cbuf_offset + index_offset}; - const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + desc.cbuf_offset}; + const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + offset}; if constexpr (std::is_same_v || std::is_same_v) { if (desc.has_secondary) { @@ -136,7 +136,7 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, ImageId* texture_buffer_ids{image_view_ids.data()}; size_t index{}; const auto add_buffer{[&](const auto& desc) { - for (u32 index = 0; index < desc.count; ++index) { + for (u32 i = 0; index < desc.count; ++i) { bool is_written{false}; if constexpr (std::is_same_v) { is_written = desc.is_written; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index e43db280f..a8b402253 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -226,7 +226,7 @@ void GraphicsPipeline::Configure(bool is_indexed) { for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { size_t index{}; const auto add_buffer{[&](const auto& desc) { - for (u32 index = 0; index < desc.count; ++index) { + for (u32 i = 0; i < desc.count; ++i) { bool is_written{false}; if constexpr (std::is_same_v) { diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 4415558bb..ebe073293 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -4,10 +4,10 @@ #pragma once +#include #include #include #include -#include #include #include "common/common_types.h" -- cgit v1.2.3 From 5b1b06f11e4520ec9d0b7864dc822daea3e3be0c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 23 Apr 2021 07:33:21 -0300 Subject: vk_graphics_pipeline: Guard against non-tessellation pipelines using patches --- src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index a8b402253..2bc1f67ae 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -345,12 +345,18 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa if (!vertex_binding_divisors.empty()) { vertex_input_ci.pNext = &input_divisor_ci; } - const auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, state.topology); + auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, state.topology); + if (input_assembly_topology == VK_PRIMITIVE_TOPOLOGY_PATCH_LIST) { + if (!spv_modules[1] && !spv_modules[2]) { + LOG_WARNING(Render_Vulkan, "Patch topology used without tessellation, using points"); + input_assembly_topology = VK_PRIMITIVE_TOPOLOGY_POINT_LIST; + } + } const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, .pNext = nullptr, .flags = 0, - .topology = MaxwellToVK::PrimitiveTopology(device, state.topology), + .topology = input_assembly_topology, .primitiveRestartEnable = state.primitive_restart_enable != 0 && SupportsPrimitiveRestart(input_assembly_topology), }; -- cgit v1.2.3 From 0c0ee9d8973abd7d1649df7a6b6f57b3a5570dfe Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 23 Apr 2021 07:39:00 -0300 Subject: vulkan_device: Require shaderClipDistance and shaderCullDistance features --- src/video_core/vulkan_common/vulkan_device.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 9c609e504..2318c1bda 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -249,8 +249,8 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR .shaderSampledImageArrayDynamicIndexing = false, .shaderStorageBufferArrayDynamicIndexing = false, .shaderStorageImageArrayDynamicIndexing = false, - .shaderClipDistance = false, - .shaderCullDistance = false, + .shaderClipDistance = true, + .shaderCullDistance = true, .shaderFloat64 = true, .shaderInt64 = true, .shaderInt16 = true, @@ -684,6 +684,8 @@ void Device::CheckSuitability(bool requires_swapchain) const { std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"), std::make_pair(features.shaderStorageImageWriteWithoutFormat, "shaderStorageImageWriteWithoutFormat"), + std::make_pair(features.shaderClipDistance, "shaderClipDistance"), + std::make_pair(features.shaderCullDistance, "shaderCullDistance"), std::make_pair(demote.shaderDemoteToHelperInvocation, "shaderDemoteToHelperInvocation"), std::make_pair(variable_pointers.variablePointers, "variablePointers"), std::make_pair(variable_pointers.variablePointersStorageBuffer, -- cgit v1.2.3 From 8fda599a316b7b3a5e017cb01db1e9c021ce7654 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 23 Apr 2021 21:24:30 -0300 Subject: vk_compute_pipeline: Fix index comparison oversight on compute texture buffers --- src/video_core/renderer_vulkan/vk_compute_pipeline.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 6611c1de3..990ead575 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -136,7 +136,7 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, ImageId* texture_buffer_ids{image_view_ids.data()}; size_t index{}; const auto add_buffer{[&](const auto& desc) { - for (u32 i = 0; index < desc.count; ++i) { + for (u32 i = 0; i < desc.count; ++i) { bool is_written{false}; if constexpr (std::is_same_v) { is_written = desc.is_written; -- cgit v1.2.3 From f4ace63957ee47c4e3e913954f07375d0391beae Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 24 Apr 2021 18:27:25 -0300 Subject: shader: Accelerate pipeline transitions and use dirty flags for shaders --- src/video_core/dirty_flags.cpp | 6 +++ src/video_core/dirty_flags.h | 2 + src/video_core/renderer_opengl/gl_rasterizer.cpp | 2 +- .../renderer_opengl/gl_state_tracker.cpp | 6 --- src/video_core/renderer_opengl/gl_state_tracker.h | 1 - .../renderer_vulkan/vk_graphics_pipeline.cpp | 46 +++++++++--------- .../renderer_vulkan/vk_graphics_pipeline.h | 54 +++++++++++++++++++++- .../renderer_vulkan/vk_pipeline_cache.cpp | 31 ++++++++++--- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 30 ++---------- 9 files changed, 114 insertions(+), 64 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/dirty_flags.cpp b/src/video_core/dirty_flags.cpp index 7149af290..b1be065c3 100644 --- a/src/video_core/dirty_flags.cpp +++ b/src/video_core/dirty_flags.cpp @@ -58,6 +58,11 @@ void SetupDirtyRenderTargets(Maxwell3D::DirtyState::Tables& tables) { FillBlock(table, OFF(zeta), NUM(zeta), flag); } } + +void SetupDirtyShaders(Maxwell3D::DirtyState::Tables& tables) { + FillBlock(tables[0], OFF(shader_config[0]), + NUM(shader_config[0]) * Maxwell3D::Regs::MaxShaderProgram, Shaders); +} } // Anonymous namespace void SetupDirtyFlags(Maxwell3D::DirtyState::Tables& tables) { @@ -65,6 +70,7 @@ void SetupDirtyFlags(Maxwell3D::DirtyState::Tables& tables) { SetupIndexBuffer(tables); SetupDirtyDescriptors(tables); SetupDirtyRenderTargets(tables); + SetupDirtyShaders(tables); } } // namespace VideoCommon::Dirty diff --git a/src/video_core/dirty_flags.h b/src/video_core/dirty_flags.h index 702688ace..504465d3f 100644 --- a/src/video_core/dirty_flags.h +++ b/src/video_core/dirty_flags.h @@ -36,6 +36,8 @@ enum : u8 { IndexBuffer, + Shaders, + LastCommonEntry, }; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 3551dbdcc..dd1937863 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -635,7 +635,7 @@ void RasterizerOpenGL::SyncDepthClamp() { void RasterizerOpenGL::SyncClipEnabled(u32 clip_mask) { auto& flags = maxwell3d.dirty.flags; - if (!flags[Dirty::ClipDistances] && !flags[Dirty::Shaders]) { + if (!flags[Dirty::ClipDistances] && !flags[VideoCommon::Dirty::Shaders]) { return; } flags[Dirty::ClipDistances] = false; diff --git a/src/video_core/renderer_opengl/gl_state_tracker.cpp b/src/video_core/renderer_opengl/gl_state_tracker.cpp index dbdf5230f..586da84e3 100644 --- a/src/video_core/renderer_opengl/gl_state_tracker.cpp +++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp @@ -83,11 +83,6 @@ void SetupDirtyScissors(Tables& tables) { FillBlock(tables[1], OFF(scissor_test), NUM(scissor_test), Scissors); } -void SetupDirtyShaders(Tables& tables) { - FillBlock(tables[0], OFF(shader_config[0]), NUM(shader_config[0]) * Regs::MaxShaderProgram, - Shaders); -} - void SetupDirtyPolygonModes(Tables& tables) { tables[0][OFF(polygon_mode_front)] = PolygonModeFront; tables[0][OFF(polygon_mode_back)] = PolygonModeBack; @@ -217,7 +212,6 @@ StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags} SetupDirtyScissors(tables); SetupDirtyVertexInstances(tables); SetupDirtyVertexFormat(tables); - SetupDirtyShaders(tables); SetupDirtyPolygonModes(tables); SetupDirtyDepthTest(tables); SetupDirtyStencilTest(tables); diff --git a/src/video_core/renderer_opengl/gl_state_tracker.h b/src/video_core/renderer_opengl/gl_state_tracker.h index 94c905116..5864c7c07 100644 --- a/src/video_core/renderer_opengl/gl_state_tracker.h +++ b/src/video_core/renderer_opengl/gl_state_tracker.h @@ -52,7 +52,6 @@ enum : u8 { BlendState0, BlendState7 = BlendState0 + 7, - Shaders, ClipDistances, PolygonModes, diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 2bc1f67ae..100a5e07a 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -125,13 +125,12 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, VKUpdateDescriptorQueue& update_descriptor_queue_, Common::ThreadWorker* worker_thread, RenderPassCache& render_pass_cache, - const FixedPipelineState& state_, + const GraphicsPipelineCacheKey& key_, std::array stages, const std::array& infos) - : maxwell3d{maxwell3d_}, gpu_memory{gpu_memory_}, texture_cache{texture_cache_}, + : key{key_}, maxwell3d{maxwell3d_}, gpu_memory{gpu_memory_}, texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, scheduler{scheduler_}, - update_descriptor_queue{update_descriptor_queue_}, state{state_}, spv_modules{ - std::move(stages)} { + update_descriptor_queue{update_descriptor_queue_}, spv_modules{std::move(stages)} { std::ranges::transform(infos, stage_infos.begin(), [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); @@ -144,7 +143,7 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, pipeline_layout = builder.CreatePipelineLayout(set_layout); descriptor_update_template = builder.CreateTemplate(set_layout, *pipeline_layout); - const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(state))}; + const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(key.state))}; MakePipeline(device, render_pass); std::lock_guard lock{build_mutex}; @@ -158,6 +157,11 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, } } +void GraphicsPipeline::AddTransition(GraphicsPipeline* transition) { + transition_keys.push_back(transition->key); + transitions.push_back(transition); +} + void GraphicsPipeline::Configure(bool is_indexed) { static constexpr size_t max_images_elements = 64; std::array image_view_ids; @@ -294,12 +298,12 @@ void GraphicsPipeline::Configure(bool is_indexed) { void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pass) { FixedPipelineState::DynamicState dynamic{}; if (!device.IsExtExtendedDynamicStateSupported()) { - dynamic = state.dynamic_state; + dynamic = key.state.dynamic_state; } static_vector vertex_bindings; static_vector vertex_binding_divisors; for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { - const bool instanced = state.binding_divisors[index] != 0; + const bool instanced = key.state.binding_divisors[index] != 0; const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; vertex_bindings.push_back({ .binding = static_cast(index), @@ -309,14 +313,14 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa if (instanced) { vertex_binding_divisors.push_back({ .binding = static_cast(index), - .divisor = state.binding_divisors[index], + .divisor = key.state.binding_divisors[index], }); } } static_vector vertex_attributes; const auto& input_attributes = stage_infos[0].input_generics; - for (size_t index = 0; index < state.attributes.size(); ++index) { - const auto& attribute = state.attributes[index]; + for (size_t index = 0; index < key.state.attributes.size(); ++index) { + const auto& attribute = key.state.attributes[index]; if (!attribute.enabled || !input_attributes[index].used) { continue; } @@ -345,7 +349,7 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa if (!vertex_binding_divisors.empty()) { vertex_input_ci.pNext = &input_divisor_ci; } - auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, state.topology); + auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, key.state.topology); if (input_assembly_topology == VK_PRIMITIVE_TOPOLOGY_PATCH_LIST) { if (!spv_modules[1] && !spv_modules[2]) { LOG_WARNING(Render_Vulkan, "Patch topology used without tessellation, using points"); @@ -357,14 +361,14 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa .pNext = nullptr, .flags = 0, .topology = input_assembly_topology, - .primitiveRestartEnable = state.primitive_restart_enable != 0 && + .primitiveRestartEnable = key.state.primitive_restart_enable != 0 && SupportsPrimitiveRestart(input_assembly_topology), }; const VkPipelineTessellationStateCreateInfo tessellation_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO, .pNext = nullptr, .flags = 0, - .patchControlPoints = state.patch_control_points_minus_one.Value() + 1, + .patchControlPoints = key.state.patch_control_points_minus_one.Value() + 1, }; VkPipelineViewportStateCreateInfo viewport_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, @@ -376,7 +380,7 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa .pScissors = nullptr, }; std::array swizzles; - std::ranges::transform(state.viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle); + std::ranges::transform(key.state.viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle); VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV, .pNext = nullptr, @@ -393,15 +397,15 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa .pNext = nullptr, .flags = 0, .depthClampEnable = - static_cast(state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE), + static_cast(key.state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE), .rasterizerDiscardEnable = - static_cast(state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE), + static_cast(key.state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE), .polygonMode = - MaxwellToVK::PolygonMode(FixedPipelineState::UnpackPolygonMode(state.polygon_mode)), + MaxwellToVK::PolygonMode(FixedPipelineState::UnpackPolygonMode(key.state.polygon_mode)), .cullMode = static_cast( dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE), .frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()), - .depthBiasEnable = state.depth_bias_enable, + .depthBiasEnable = key.state.depth_bias_enable, .depthBiasConstantFactor = 0.0f, .depthBiasClamp = 0.0f, .depthBiasSlopeFactor = 0.0f, @@ -411,7 +415,7 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, .pNext = nullptr, .flags = 0, - .rasterizationSamples = MaxwellToVK::MsaaMode(state.msaa_mode), + .rasterizationSamples = MaxwellToVK::MsaaMode(key.state.msaa_mode), .sampleShadingEnable = VK_FALSE, .minSampleShading = 0.0f, .pSampleMask = nullptr, @@ -435,7 +439,7 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa .maxDepthBounds = 0.0f, }; static_vector cb_attachments; - const size_t num_attachments{NumAttachments(state)}; + const size_t num_attachments{NumAttachments(key.state)}; for (size_t index = 0; index < num_attachments; ++index) { static constexpr std::array mask_table{ VK_COLOR_COMPONENT_R_BIT, @@ -443,7 +447,7 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa VK_COLOR_COMPONENT_B_BIT, VK_COLOR_COMPONENT_A_BIT, }; - const auto& blend{state.attachments[index]}; + const auto& blend{key.state.attachments[index]}; const std::array mask{blend.Mask()}; VkColorComponentFlags write_mask{}; for (size_t i = 0; i < mask_table.size(); ++i) { diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 7d14d2378..fd787840b 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -4,10 +4,12 @@ #pragma once +#include #include #include #include #include +#include #include "common/thread_worker.h" #include "shader_recompiler/shader_info.h" @@ -20,6 +22,39 @@ namespace Vulkan { +struct GraphicsPipelineCacheKey { + std::array unique_hashes; + FixedPipelineState state; + + size_t Hash() const noexcept; + + bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept; + + bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept { + return !operator==(rhs); + } + + size_t Size() const noexcept { + return sizeof(unique_hashes) + state.Size(); + } +}; +static_assert(std::has_unique_object_representations_v); +static_assert(std::is_trivially_copyable_v); +static_assert(std::is_trivially_constructible_v); + +} // namespace Vulkan + +namespace std { +template <> +struct hash { + size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept { + return k.Hash(); + } +}; +} // namespace std + +namespace Vulkan { + class Device; class RenderPassCache; class VKScheduler; @@ -35,7 +70,8 @@ public: const Device& device, VKDescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue, Common::ThreadWorker* worker_thread, - RenderPassCache& render_pass_cache, const FixedPipelineState& state, + RenderPassCache& render_pass_cache, + const GraphicsPipelineCacheKey& key, std::array stages, const std::array& infos); @@ -47,16 +83,30 @@ public: GraphicsPipeline& operator=(const GraphicsPipeline&) = delete; GraphicsPipeline(const GraphicsPipeline&) = delete; + void AddTransition(GraphicsPipeline* transition); + + GraphicsPipeline* Next(const GraphicsPipelineCacheKey& current_key) noexcept { + if (key == current_key) { + return this; + } + const auto it{std::find(transition_keys.begin(), transition_keys.end(), current_key)}; + return it != transition_keys.end() ? transitions[std::distance(transition_keys.begin(), it)] + : nullptr; + } + private: void MakePipeline(const Device& device, VkRenderPass render_pass); + const GraphicsPipelineCacheKey key; Tegra::Engines::Maxwell3D& maxwell3d; Tegra::MemoryManager& gpu_memory; TextureCache& texture_cache; BufferCache& buffer_cache; VKScheduler& scheduler; VKUpdateDescriptorQueue& update_descriptor_queue; - const FixedPipelineState state; + + std::vector transition_keys; + std::vector transitions; std::array spv_modules; std::array stage_infos; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 4317b2ac7..2bd870060 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -21,6 +21,7 @@ #include "shader_recompiler/frontend/maxwell/control_flow.h" #include "shader_recompiler/frontend/maxwell/program.h" #include "shader_recompiler/program_header.h" +#include "video_core/dirty_flags.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/memory_manager.h" @@ -700,17 +701,28 @@ GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { MICROPROFILE_SCOPE(Vulkan_PipelineCache); if (!RefreshStages()) { + current_pipeline = nullptr; return nullptr; } graphics_key.state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported()); + if (current_pipeline) { + GraphicsPipeline* const next{current_pipeline->Next(graphics_key)}; + if (next) { + current_pipeline = next; + return current_pipeline; + } + } const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)}; auto& pipeline{pair->second}; - if (!is_new) { - return pipeline.get(); + if (is_new) { + pipeline = CreateGraphicsPipeline(); } - pipeline = CreateGraphicsPipeline(); - return pipeline.get(); + if (current_pipeline) { + current_pipeline->AddTransition(pipeline.get()); + } + current_pipeline = pipeline.get(); + return current_pipeline; } ComputePipeline* PipelineCache::CurrentComputePipeline() { @@ -743,6 +755,12 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() { } bool PipelineCache::RefreshStages() { + auto& dirty{maxwell3d.dirty.flags}; + if (!dirty[VideoCommon::Dirty::Shaders]) { + return last_valid_shaders; + } + dirty[VideoCommon::Dirty::Shaders] = false; + const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { @@ -755,6 +773,7 @@ bool PipelineCache::RefreshStages() { const std::optional cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)}; if (!cpu_shader_addr) { LOG_ERROR(Render_Vulkan, "Invalid GPU address for shader 0x{:016x}", shader_addr); + last_valid_shaders = false; return false; } const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)}; @@ -766,6 +785,7 @@ bool PipelineCache::RefreshStages() { shader_infos[index] = shader_info; graphics_key.unique_hashes[index] = shader_info->unique_hash; } + last_valid_shaders = true; return true; } @@ -832,8 +852,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; return std::make_unique( maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, device, descriptor_pool, - update_descriptor_queue, thread_worker, render_pass_cache, key.state, std::move(modules), - infos); + update_descriptor_queue, thread_worker, render_pass_cache, key, std::move(modules), infos); } std::unique_ptr PipelineCache::CreateGraphicsPipeline() { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index e12e4422f..ad569acc4 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -58,26 +58,6 @@ static_assert(std::has_unique_object_representations_v) static_assert(std::is_trivially_copyable_v); static_assert(std::is_trivially_constructible_v); -struct GraphicsPipelineCacheKey { - std::array unique_hashes; - FixedPipelineState state; - - size_t Hash() const noexcept; - - bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept; - - bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept { - return !operator==(rhs); - } - - size_t Size() const noexcept { - return sizeof(unique_hashes) + state.Size(); - } -}; -static_assert(std::has_unique_object_representations_v); -static_assert(std::is_trivially_copyable_v); -static_assert(std::is_trivially_constructible_v); - } // namespace Vulkan namespace std { @@ -89,13 +69,6 @@ struct hash { } }; -template <> -struct hash { - size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept { - return k.Hash(); - } -}; - } // namespace std namespace Vulkan { @@ -181,7 +154,10 @@ private: TextureCache& texture_cache; GraphicsPipelineCacheKey graphics_key{}; + GraphicsPipeline* current_pipeline{}; + std::array shader_infos{}; + bool last_valid_shaders{}; std::unordered_map> compute_cache; std::unordered_map> graphics_cache; -- cgit v1.2.3 From 5ed871398b0e89cb3f2e3eb740d431f5faaa12e4 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 24 Apr 2021 18:28:02 -0300 Subject: vk_graphics_pipeline: Generate specialized pipeline config functions and improve code --- .../renderer_vulkan/vk_graphics_pipeline.cpp | 239 ++++++++++++++++++--- .../renderer_vulkan/vk_graphics_pipeline.h | 22 +- 2 files changed, 230 insertions(+), 31 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 100a5e07a..674226cb7 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -19,14 +19,24 @@ #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/vulkan_common/vulkan_device.h" +#ifdef _MSC_VER +#define LAMBDA_FORCEINLINE [[msvc::forceinline]] +#else +#define LAMBDA_FORCEINLINE +#endif + namespace Vulkan { namespace { using boost::container::small_vector; using boost::container::static_vector; +using Shader::ImageBufferDescriptor; using VideoCore::Surface::PixelFormat; using VideoCore::Surface::PixelFormatFromDepthFormat; using VideoCore::Surface::PixelFormatFromRenderTargetFormat; +constexpr size_t NUM_STAGES = Maxwell::MaxShaderStage; +constexpr size_t MAX_IMAGE_ELEMENTS = 64; + DescriptorLayoutBuilder MakeBuilder(const Device& device, std::span infos) { DescriptorLayoutBuilder builder{device.GetLogical()}; for (size_t index = 0; index < infos.size(); ++index) { @@ -116,6 +126,80 @@ size_t NumAttachments(const FixedPipelineState& state) { } return num; } + +template +bool Passes(const std::array& modules, + const std::array& stage_infos) { + for (size_t stage = 0; stage < NUM_STAGES; ++stage) { + if (!Spec::enabled_stages[stage] && modules[stage]) { + return false; + } + const auto& info{stage_infos[stage]}; + if constexpr (!Spec::has_storage_buffers) { + if (!info.storage_buffers_descriptors.empty()) { + return false; + } + } + if constexpr (!Spec::has_texture_buffers) { + if (!info.texture_buffer_descriptors.empty()) { + return false; + } + } + if constexpr (!Spec::has_image_buffers) { + if (!info.image_buffer_descriptors.empty()) { + return false; + } + } + if constexpr (!Spec::has_images) { + if (!info.image_descriptors.empty()) { + return false; + } + } + } + return true; +} + +using ConfigureFuncPtr = void (*)(GraphicsPipeline*, bool); + +template +ConfigureFuncPtr FindSpec(const std::array& modules, + const std::array& stage_infos) { + if constexpr (sizeof...(Specs) > 0) { + if (!Passes(modules, stage_infos)) { + return FindSpec(modules, stage_infos); + } + } + return GraphicsPipeline::MakeConfigureSpecFunc(); +} + +struct SimpleVertexFragmentSpec { + static constexpr std::array enabled_stages{true, false, false, false, true}; + static constexpr bool has_storage_buffers = false; + static constexpr bool has_texture_buffers = false; + static constexpr bool has_image_buffers = false; + static constexpr bool has_images = false; +}; + +struct SimpleVertexSpec { + static constexpr std::array enabled_stages{true, false, false, false, false}; + static constexpr bool has_storage_buffers = false; + static constexpr bool has_texture_buffers = false; + static constexpr bool has_image_buffers = false; + static constexpr bool has_images = false; +}; + +struct DefaultSpec { + static constexpr std::array enabled_stages{true, true, true, true, true}; + static constexpr bool has_storage_buffers = true; + static constexpr bool has_texture_buffers = true; + static constexpr bool has_image_buffers = true; + static constexpr bool has_images = true; +}; + +ConfigureFuncPtr ConfigureFunc(const std::array& modules, + const std::array& infos) { + return FindSpec(modules, infos); +} } // Anonymous namespace GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, @@ -144,6 +228,7 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, descriptor_update_template = builder.CreateTemplate(set_layout, *pipeline_layout); const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(key.state))}; + Validate(); MakePipeline(device, render_pass); std::lock_guard lock{build_mutex}; @@ -155,6 +240,7 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, } else { func(); } + configure_func = ConfigureFunc(spv_modules, stage_infos); } void GraphicsPipeline::AddTransition(GraphicsPipeline* transition) { @@ -162,26 +248,29 @@ void GraphicsPipeline::AddTransition(GraphicsPipeline* transition) { transitions.push_back(transition); } -void GraphicsPipeline::Configure(bool is_indexed) { - static constexpr size_t max_images_elements = 64; - std::array image_view_ids; - static_vector image_view_indices; - static_vector samplers; +template +void GraphicsPipeline::ConfigureImpl(bool is_indexed) { + std::array image_view_ids; + std::array image_view_indices; + std::array samplers; + size_t image_index{}; texture_cache.SynchronizeGraphicsDescriptors(); const auto& regs{maxwell3d.regs}; const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; - for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { + const auto config_stage{[&](size_t stage) LAMBDA_FORCEINLINE { const Shader::Info& info{stage_infos[stage]}; buffer_cache.SetEnabledUniformBuffers(stage, info.constant_buffer_mask); buffer_cache.UnbindGraphicsStorageBuffers(stage); - size_t ssbo_index{}; - for (const auto& desc : info.storage_buffers_descriptors) { - ASSERT(desc.count == 1); - buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, desc.cbuf_index, - desc.cbuf_offset, desc.is_written); - ++ssbo_index; + if constexpr (Spec::has_storage_buffers) { + size_t ssbo_index{}; + for (const auto& desc : info.storage_buffers_descriptors) { + ASSERT(desc.count == 1); + buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, desc.cbuf_index, + desc.cbuf_offset, desc.is_written); + ++ssbo_index; + } } const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers}; const auto read_handle{[&](const auto& desc, u32 index) { @@ -207,33 +296,60 @@ void GraphicsPipeline::Configure(bool is_indexed) { const auto add_image{[&](const auto& desc) { for (u32 index = 0; index < desc.count; ++index) { const TextureHandle handle{read_handle(desc, index)}; - image_view_indices.push_back(handle.image); + image_view_indices[image_index++] = handle.image; } }}; - std::ranges::for_each(info.texture_buffer_descriptors, add_image); - std::ranges::for_each(info.image_buffer_descriptors, add_image); + if constexpr (Spec::has_texture_buffers) { + for (const auto& desc : info.texture_buffer_descriptors) { + add_image(desc); + } + } + if constexpr (Spec::has_image_buffers) { + for (const auto& desc : info.image_buffer_descriptors) { + add_image(desc); + } + } for (const auto& desc : info.texture_descriptors) { for (u32 index = 0; index < desc.count; ++index) { const TextureHandle handle{read_handle(desc, index)}; - image_view_indices.push_back(handle.image); + image_view_indices[image_index] = handle.image; Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.sampler)}; - samplers.push_back(sampler->Handle()); + samplers[image_index] = sampler->Handle(); + ++image_index; + } + } + if constexpr (Spec::has_images) { + for (const auto& desc : info.image_descriptors) { + add_image(desc); } } - std::ranges::for_each(info.image_descriptors, add_image); + }}; + if constexpr (Spec::enabled_stages[0]) { + config_stage(0); + } + if constexpr (Spec::enabled_stages[1]) { + config_stage(1); + } + if constexpr (Spec::enabled_stages[2]) { + config_stage(2); } - const std::span indices_span(image_view_indices.data(), image_view_indices.size()); + if constexpr (Spec::enabled_stages[3]) { + config_stage(3); + } + if constexpr (Spec::enabled_stages[4]) { + config_stage(4); + } + const std::span indices_span(image_view_indices.data(), image_index); texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); ImageId* texture_buffer_index{image_view_ids.data()}; - for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { + const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE { size_t index{}; const auto add_buffer{[&](const auto& desc) { for (u32 i = 0; i < desc.count; ++i) { bool is_written{false}; - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v) { is_written = desc.is_written; } ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)}; @@ -245,29 +361,75 @@ void GraphicsPipeline::Configure(bool is_indexed) { } }}; const Shader::Info& info{stage_infos[stage]}; - buffer_cache.UnbindGraphicsTextureBuffers(stage); - std::ranges::for_each(info.texture_buffer_descriptors, add_buffer); - std::ranges::for_each(info.image_buffer_descriptors, add_buffer); + if constexpr (Spec::has_texture_buffers || Spec::has_image_buffers) { + buffer_cache.UnbindGraphicsTextureBuffers(stage); + } + if constexpr (Spec::has_texture_buffers) { + for (const auto& desc : info.texture_buffer_descriptors) { + add_buffer(desc); + } + } + if constexpr (Spec::has_image_buffers) { + for (const auto& desc : info.image_buffer_descriptors) { + add_buffer(desc); + } + } for (const auto& desc : info.texture_descriptors) { texture_buffer_index += desc.count; } - for (const auto& desc : info.image_descriptors) { - texture_buffer_index += desc.count; + if constexpr (Spec::has_images) { + for (const auto& desc : info.image_descriptors) { + texture_buffer_index += desc.count; + } } + }}; + if constexpr (Spec::enabled_stages[0]) { + bind_stage_info(0); + } + if constexpr (Spec::enabled_stages[1]) { + bind_stage_info(1); + } + if constexpr (Spec::enabled_stages[2]) { + bind_stage_info(2); + } + if constexpr (Spec::enabled_stages[3]) { + bind_stage_info(3); + } + if constexpr (Spec::enabled_stages[4]) { + bind_stage_info(4); } - buffer_cache.UpdateGraphicsBuffers(is_indexed); + buffer_cache.UpdateGraphicsBuffers(is_indexed); buffer_cache.BindHostGeometryBuffers(is_indexed); update_descriptor_queue.Acquire(); const VkSampler* samplers_it{samplers.data()}; const ImageId* views_it{image_view_ids.data()}; - for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { + const auto prepare_stage{[&](size_t stage) LAMBDA_FORCEINLINE { buffer_cache.BindHostStageBuffers(stage); PushImageDescriptors(stage_infos[stage], samplers_it, views_it, texture_cache, update_descriptor_queue); + }}; + if constexpr (Spec::enabled_stages[0]) { + prepare_stage(0); + } + if constexpr (Spec::enabled_stages[1]) { + prepare_stage(1); + } + if constexpr (Spec::enabled_stages[2]) { + prepare_stage(2); + } + if constexpr (Spec::enabled_stages[3]) { + prepare_stage(3); } + if constexpr (Spec::enabled_stages[4]) { + prepare_stage(4); + } + ConfigureDraw(); +} + +void GraphicsPipeline::ConfigureDraw() { texture_cache.UpdateRenderTargets(false); scheduler.RequestRenderpass(texture_cache.GetFramebuffer()); @@ -550,4 +712,23 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa }); } +void GraphicsPipeline::Validate() { + size_t num_images{}; + for (const auto& info : stage_infos) { + for (const auto& desc : info.texture_buffer_descriptors) { + num_images += desc.count; + } + for (const auto& desc : info.image_buffer_descriptors) { + num_images += desc.count; + } + for (const auto& desc : info.texture_descriptors) { + num_images += desc.count; + } + for (const auto& desc : info.image_descriptors) { + num_images += desc.count; + } + } + ASSERT(num_images <= MAX_IMAGE_ELEMENTS); +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index fd787840b..edab5703f 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -75,8 +75,6 @@ public: std::array stages, const std::array& infos); - void Configure(bool is_indexed); - GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = delete; GraphicsPipeline(GraphicsPipeline&&) noexcept = delete; @@ -85,6 +83,10 @@ public: void AddTransition(GraphicsPipeline* transition); + void Configure(bool is_indexed) { + configure_func(this, is_indexed); + } + GraphicsPipeline* Next(const GraphicsPipelineCacheKey& current_key) noexcept { if (key == current_key) { return this; @@ -94,9 +96,23 @@ public: : nullptr; } + template + static auto MakeConfigureSpecFunc() { + return [](GraphicsPipeline* pipeline, bool is_indexed) { + pipeline->ConfigureImpl(is_indexed); + }; + } + private: + template + void ConfigureImpl(bool is_indexed); + + void ConfigureDraw(); + void MakePipeline(const Device& device, VkRenderPass render_pass); + void Validate(); + const GraphicsPipelineCacheKey key; Tegra::Engines::Maxwell3D& maxwell3d; Tegra::MemoryManager& gpu_memory; @@ -105,6 +121,8 @@ private: VKScheduler& scheduler; VKUpdateDescriptorQueue& update_descriptor_queue; + void (*configure_func)(GraphicsPipeline*, bool); + std::vector transition_keys; std::vector transitions; -- cgit v1.2.3 From 2f3c3dfc10a318f63862c4976f0608ea50c19387 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 25 Apr 2021 00:15:32 -0300 Subject: vulkan: Rework descriptor allocation algorithm Create multiple descriptor pools on demand. There are some degrees of freedom what is considered a compatible pool to avoid wasting large pools on small descriptors. --- src/video_core/renderer_vulkan/blit_image.cpp | 19 +- src/video_core/renderer_vulkan/blit_image.h | 2 +- src/video_core/renderer_vulkan/vk_buffer_cache.cpp | 2 +- src/video_core/renderer_vulkan/vk_buffer_cache.h | 4 +- src/video_core/renderer_vulkan/vk_compute_pass.cpp | 200 +++++++++++---------- src/video_core/renderer_vulkan/vk_compute_pass.h | 28 +-- .../renderer_vulkan/vk_compute_pipeline.cpp | 4 +- .../renderer_vulkan/vk_compute_pipeline.h | 2 +- .../renderer_vulkan/vk_descriptor_pool.cpp | 172 +++++++++++++----- .../renderer_vulkan/vk_descriptor_pool.h | 62 +++++-- .../renderer_vulkan/vk_graphics_pipeline.cpp | 4 +- .../renderer_vulkan/vk_graphics_pipeline.h | 2 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 2 +- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 6 +- src/video_core/renderer_vulkan/vk_rasterizer.h | 2 +- 15 files changed, 314 insertions(+), 197 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index 39fe9289b..4058f62cd 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -49,6 +49,16 @@ constexpr VkDescriptorSetLayoutCreateInfo ONE_TEXTURE_DESCRIPTOR_SET_LAYOUT_CREA .bindingCount = 1, .pBindings = &TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING<0>, }; +template +inline constexpr DescriptorBankInfo TEXTURE_DESCRIPTOR_BANK_INFO{ + .uniform_buffers = 0, + .storage_buffers = 0, + .texture_buffers = 0, + .image_buffers = 0, + .textures = num_textures, + .images = 0, + .score = 2, +}; constexpr VkDescriptorSetLayoutCreateInfo TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO{ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, .pNext = nullptr, @@ -326,14 +336,16 @@ void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Regi } // Anonymous namespace BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_, - StateTracker& state_tracker_, VKDescriptorPool& descriptor_pool) + StateTracker& state_tracker_, DescriptorPool& descriptor_pool) : device{device_}, scheduler{scheduler_}, state_tracker{state_tracker_}, one_texture_set_layout(device.GetLogical().CreateDescriptorSetLayout( ONE_TEXTURE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)), two_textures_set_layout(device.GetLogical().CreateDescriptorSetLayout( TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)), - one_texture_descriptor_allocator(descriptor_pool, *one_texture_set_layout), - two_textures_descriptor_allocator(descriptor_pool, *two_textures_set_layout), + one_texture_descriptor_allocator{ + descriptor_pool.Allocator(*one_texture_set_layout, TEXTURE_DESCRIPTOR_BANK_INFO<1>)}, + two_textures_descriptor_allocator{ + descriptor_pool.Allocator(*two_textures_set_layout, TEXTURE_DESCRIPTOR_BANK_INFO<2>)}, one_texture_pipeline_layout(device.GetLogical().CreatePipelineLayout( PipelineLayoutCreateInfo(one_texture_set_layout.address()))), two_textures_pipeline_layout(device.GetLogical().CreatePipelineLayout( @@ -415,7 +427,6 @@ void BlitImageHelper::ConvertD32ToR32(const Framebuffer* dst_framebuffer, void BlitImageHelper::ConvertR32ToD32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view) { - ConvertColorToDepthPipeline(convert_r32_to_d32_pipeline, dst_framebuffer->RenderPass()); Convert(*convert_r32_to_d32_pipeline, dst_framebuffer, src_image_view); } diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h index 0d81a06ed..33ee095c1 100644 --- a/src/video_core/renderer_vulkan/blit_image.h +++ b/src/video_core/renderer_vulkan/blit_image.h @@ -31,7 +31,7 @@ struct BlitImagePipelineKey { class BlitImageHelper { public: explicit BlitImageHelper(const Device& device, VKScheduler& scheduler, - StateTracker& state_tracker, VKDescriptorPool& descriptor_pool); + StateTracker& state_tracker, DescriptorPool& descriptor_pool); ~BlitImageHelper(); void BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index cdda56ab1..568993c58 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -116,7 +116,7 @@ VkBufferView Buffer::View(u32 offset, u32 size, VideoCore::Surface::PixelFormat BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_allocator_, VKScheduler& scheduler_, StagingBufferPool& staging_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_, - VKDescriptorPool& descriptor_pool) + DescriptorPool& descriptor_pool) : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_}, staging_pool{staging_pool_}, update_descriptor_queue{update_descriptor_queue_}, uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index ea17406dc..c52001b5a 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -16,7 +16,7 @@ namespace Vulkan { class Device; -class VKDescriptorPool; +class DescriptorPool; class VKScheduler; class BufferCacheRuntime; @@ -61,7 +61,7 @@ public: explicit BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_manager_, VKScheduler& scheduler_, StagingBufferPool& staging_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_, - VKDescriptorPool& descriptor_pool); + DescriptorPool& descriptor_pool); void Finish(); diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index ec9866605..e2f3d16bf 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -41,80 +41,92 @@ constexpr u32 ASTC_BINDING_SWIZZLE_BUFFER = 2; constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 3; constexpr size_t ASTC_NUM_BINDINGS = 4; -VkPushConstantRange BuildComputePushConstantRange(std::size_t size) { - return { - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .offset = 0, - .size = static_cast(size), - }; -} - -std::array BuildInputOutputDescriptorSetBindings() { - return {{ - { - .binding = 0, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }, - { - .binding = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }, - }}; -} +template +inline constexpr VkPushConstantRange COMPUTE_PUSH_CONSTANT_RANGE{ + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .offset = 0, + .size = static_cast(size), +}; -std::array BuildASTCDescriptorSetBindings() { - return {{ - { - .binding = ASTC_BINDING_INPUT_BUFFER, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }, - { - .binding = ASTC_BINDING_ENC_BUFFER, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }, - { - .binding = ASTC_BINDING_SWIZZLE_BUFFER, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }, - { - .binding = ASTC_BINDING_OUTPUT_IMAGE, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }, - }}; -} +constexpr std::array INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS{{ + { + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }, + { + .binding = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }, +}}; + +constexpr DescriptorBankInfo INPUT_OUTPUT_BANK_INFO{ + .uniform_buffers = 0, + .storage_buffers = 2, + .texture_buffers = 0, + .image_buffers = 0, + .textures = 0, + .images = 0, + .score = 2, +}; -VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() { - return { - .dstBinding = 0, - .dstArrayElement = 0, - .descriptorCount = 2, +constexpr std::array ASTC_DESCRIPTOR_SET_BINDINGS{{ + { + .binding = ASTC_BINDING_INPUT_BUFFER, .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .offset = 0, - .stride = sizeof(DescriptorUpdateEntry), - }; -} + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }, + { + .binding = ASTC_BINDING_ENC_BUFFER, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }, + { + .binding = ASTC_BINDING_SWIZZLE_BUFFER, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }, + { + .binding = ASTC_BINDING_OUTPUT_IMAGE, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }, +}}; + +constexpr DescriptorBankInfo ASTC_BANK_INFO{ + .uniform_buffers = 0, + .storage_buffers = 3, + .texture_buffers = 0, + .image_buffers = 0, + .textures = 0, + .images = 1, + .score = 4, +}; + +constexpr VkDescriptorUpdateTemplateEntryKHR INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE{ + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 2, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .offset = 0, + .stride = sizeof(DescriptorUpdateEntry), +}; -std::array -BuildASTCPassDescriptorUpdateTemplateEntry() { - return {{ +constexpr std::array + ASTC_PASS_DESCRIPTOR_UPDATE_TEMPLATE_ENTRY{{ { .dstBinding = ASTC_BINDING_INPUT_BUFFER, .dstArrayElement = 0, @@ -148,7 +160,6 @@ BuildASTCPassDescriptorUpdateTemplateEntry() { .stride = sizeof(DescriptorUpdateEntry), }, }}; -} struct AstcPushConstants { std::array blocks_dims; @@ -159,14 +170,13 @@ struct AstcPushConstants { u32 block_height; u32 block_height_mask; }; - } // Anonymous namespace -VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool, - vk::Span bindings, - vk::Span templates, - vk::Span push_constants, - std::span code) { +ComputePass::ComputePass(const Device& device, DescriptorPool& descriptor_pool, + vk::Span bindings, + vk::Span templates, + const DescriptorBankInfo& bank_info, + vk::Span push_constants, std::span code) { descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, .pNext = nullptr, @@ -196,8 +206,7 @@ VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_ .pipelineLayout = *layout, .set = 0, }); - - descriptor_allocator.emplace(descriptor_pool, *descriptor_set_layout); + descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, bank_info); } module = device.GetLogical().CreateShaderModule({ .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, @@ -226,23 +235,23 @@ VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_ }); } -VKComputePass::~VKComputePass() = default; +ComputePass::~ComputePass() = default; -VkDescriptorSet VKComputePass::CommitDescriptorSet( - VKUpdateDescriptorQueue& update_descriptor_queue) { +VkDescriptorSet ComputePass::CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue) { if (!descriptor_template) { return nullptr; } - const VkDescriptorSet set = descriptor_allocator->Commit(); + const VkDescriptorSet set = descriptor_allocator.Commit(); update_descriptor_queue.Send(descriptor_template.address(), set); return set; } -Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool, StagingBufferPool& staging_buffer_pool_, +Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_, DescriptorPool& descriptor_pool, + StagingBufferPool& staging_buffer_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_) - : VKComputePass(device, descriptor_pool, BuildInputOutputDescriptorSetBindings(), - BuildInputOutputDescriptorUpdateTemplate(), {}, VULKAN_UINT8_COMP_SPV), + : ComputePass(device, descriptor_pool, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS, + INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO, {}, + VULKAN_UINT8_COMP_SPV), scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, update_descriptor_queue{update_descriptor_queue_} {} @@ -277,12 +286,12 @@ std::pair Uint8Pass::Assemble(u32 num_vertices, VkBuffer } QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool_, + DescriptorPool& descriptor_pool_, StagingBufferPool& staging_buffer_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_) - : VKComputePass(device_, descriptor_pool_, BuildInputOutputDescriptorSetBindings(), - BuildInputOutputDescriptorUpdateTemplate(), - BuildComputePushConstantRange(sizeof(u32) * 2), VULKAN_QUAD_INDEXED_COMP_SPV), + : ComputePass(device_, descriptor_pool_, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS, + INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO, + COMPUTE_PUSH_CONSTANT_RANGE, VULKAN_QUAD_INDEXED_COMP_SPV), scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, update_descriptor_queue{update_descriptor_queue_} {} @@ -337,14 +346,13 @@ std::pair QuadIndexedPass::Assemble( } ASTCDecoderPass::ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool_, + DescriptorPool& descriptor_pool_, StagingBufferPool& staging_buffer_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_, MemoryAllocator& memory_allocator_) - : VKComputePass(device_, descriptor_pool_, BuildASTCDescriptorSetBindings(), - BuildASTCPassDescriptorUpdateTemplateEntry(), - BuildComputePushConstantRange(sizeof(AstcPushConstants)), - ASTC_DECODER_COMP_SPV), + : ComputePass(device_, descriptor_pool_, ASTC_DESCRIPTOR_SET_BINDINGS, + ASTC_PASS_DESCRIPTOR_UPDATE_TEMPLATE_ENTRY, ASTC_BANK_INFO, + COMPUTE_PUSH_CONSTANT_RANGE, ASTC_DECODER_COMP_SPV), device{device_}, scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, update_descriptor_queue{update_descriptor_queue_}, memory_allocator{memory_allocator_} {} diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h index 5ea187c30..54c1ac4cb 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.h +++ b/src/video_core/renderer_vulkan/vk_compute_pass.h @@ -4,7 +4,6 @@ #pragma once -#include #include #include @@ -27,13 +26,14 @@ class VKUpdateDescriptorQueue; class Image; struct StagingBufferRef; -class VKComputePass { +class ComputePass { public: - explicit VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool, - vk::Span bindings, - vk::Span templates, - vk::Span push_constants, std::span code); - ~VKComputePass(); + explicit ComputePass(const Device& device, DescriptorPool& descriptor_pool, + vk::Span bindings, + vk::Span templates, + const DescriptorBankInfo& bank_info, + vk::Span push_constants, std::span code); + ~ComputePass(); protected: VkDescriptorSet CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue); @@ -44,14 +44,14 @@ protected: private: vk::DescriptorSetLayout descriptor_set_layout; - std::optional descriptor_allocator; + DescriptorAllocator descriptor_allocator; vk::ShaderModule module; }; -class Uint8Pass final : public VKComputePass { +class Uint8Pass final : public ComputePass { public: explicit Uint8Pass(const Device& device_, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool_, StagingBufferPool& staging_buffer_pool_, + DescriptorPool& descriptor_pool_, StagingBufferPool& staging_buffer_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_); ~Uint8Pass(); @@ -66,10 +66,10 @@ private: VKUpdateDescriptorQueue& update_descriptor_queue; }; -class QuadIndexedPass final : public VKComputePass { +class QuadIndexedPass final : public ComputePass { public: explicit QuadIndexedPass(const Device& device_, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool_, + DescriptorPool& descriptor_pool_, StagingBufferPool& staging_buffer_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_); ~QuadIndexedPass(); @@ -84,10 +84,10 @@ private: VKUpdateDescriptorQueue& update_descriptor_queue; }; -class ASTCDecoderPass final : public VKComputePass { +class ASTCDecoderPass final : public ComputePass { public: explicit ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool_, + DescriptorPool& descriptor_pool_, StagingBufferPool& staging_buffer_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_, MemoryAllocator& memory_allocator_); diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 990ead575..54a57c358 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -18,7 +18,7 @@ namespace Vulkan { -ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool, +ComputePipeline::ComputePipeline(const Device& device, DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue_, Common::ThreadWorker* thread_worker, const Shader::Info& info_, vk::ShaderModule spv_module_) @@ -30,7 +30,7 @@ ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descrip descriptor_set_layout = builder.CreateDescriptorSetLayout(); pipeline_layout = builder.CreatePipelineLayout(*descriptor_set_layout); descriptor_update_template = builder.CreateTemplate(*descriptor_set_layout, *pipeline_layout); - descriptor_allocator = DescriptorAllocator(descriptor_pool, *descriptor_set_layout); + descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, info); auto func{[this, &device] { const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 8efdc2926..0d4cd37be 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -25,7 +25,7 @@ class VKScheduler; class ComputePipeline { public: - explicit ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool, + explicit ComputePipeline(const Device& device, DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue, Common::ThreadWorker* thread_worker, const Shader::Info& info, vk::ShaderModule spv_module); diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp index 3bea1ff44..8e77e4796 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp @@ -2,6 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include +#include #include #include "common/common_types.h" @@ -13,77 +15,149 @@ namespace Vulkan { -// Prefer small grow rates to avoid saturating the descriptor pool with barely used pipelines. -constexpr std::size_t SETS_GROW_RATE = 0x20; +// Prefer small grow rates to avoid saturating the descriptor pool with barely used pipelines +constexpr size_t SETS_GROW_RATE = 16; +constexpr s32 SCORE_THRESHOLD = 3; +constexpr u32 SETS_PER_POOL = 64; -DescriptorAllocator::DescriptorAllocator(VKDescriptorPool& descriptor_pool_, - VkDescriptorSetLayout layout_) - : ResourcePool(descriptor_pool_.master_semaphore, SETS_GROW_RATE), - descriptor_pool{&descriptor_pool_}, layout{layout_} {} +struct DescriptorBank { + DescriptorBankInfo info; + std::vector pools; +}; -VkDescriptorSet DescriptorAllocator::Commit() { - const std::size_t index = CommitResource(); - return descriptors_allocations[index / SETS_GROW_RATE][index % SETS_GROW_RATE]; +bool DescriptorBankInfo::IsSuperset(const DescriptorBankInfo& subset) const noexcept { + return uniform_buffers >= subset.uniform_buffers && storage_buffers >= subset.storage_buffers && + texture_buffers >= subset.texture_buffers && image_buffers >= subset.image_buffers && + textures >= subset.textures && images >= subset.image_buffers; } -void DescriptorAllocator::Allocate(std::size_t begin, std::size_t end) { - descriptors_allocations.push_back(descriptor_pool->AllocateDescriptors(layout, end - begin)); +template +static u32 Accumulate(const Descriptors& descriptors) { + u32 count = 0; + for (const auto& descriptor : descriptors) { + count += descriptor.count; + } + return count; } -VKDescriptorPool::VKDescriptorPool(const Device& device_, VKScheduler& scheduler) - : device{device_}, master_semaphore{scheduler.GetMasterSemaphore()}, active_pool{ - AllocateNewPool()} {} - -VKDescriptorPool::~VKDescriptorPool() = default; - -vk::DescriptorPool* VKDescriptorPool::AllocateNewPool() { - static constexpr u32 num_sets = 0x20000; - static constexpr VkDescriptorPoolSize pool_sizes[] = { - {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, num_sets * 90}, - {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, num_sets * 60}, - {VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, num_sets * 64}, - {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, num_sets * 64}, - {VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, num_sets * 64}, - {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, num_sets * 40}, - }; +static DescriptorBankInfo MakeBankInfo(std::span infos) { + DescriptorBankInfo bank; + for (const Shader::Info& info : infos) { + bank.uniform_buffers += Accumulate(info.constant_buffer_descriptors); + bank.storage_buffers += Accumulate(info.storage_buffers_descriptors); + bank.texture_buffers += Accumulate(info.texture_buffer_descriptors); + bank.image_buffers += Accumulate(info.image_buffer_descriptors); + bank.textures += Accumulate(info.texture_descriptors); + bank.images += Accumulate(info.image_descriptors); + } + bank.score = bank.uniform_buffers + bank.storage_buffers + bank.texture_buffers + + bank.image_buffers + bank.textures + bank.images; + return bank; +} - const VkDescriptorPoolCreateInfo ci{ +static void AllocatePool(const Device& device, DescriptorBank& bank) { + std::array pool_sizes; + size_t pool_cursor{}; + const auto add = [&](VkDescriptorType type, u32 count) { + if (count > 0) { + pool_sizes[pool_cursor++] = { + .type = type, + .descriptorCount = count * SETS_PER_POOL, + }; + } + }; + const auto& info{bank.info}; + add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, info.uniform_buffers); + add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, info.storage_buffers); + add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, info.texture_buffers); + add(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, info.image_buffers); + add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, info.textures); + add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, info.images); + bank.pools.push_back(device.GetLogical().CreateDescriptorPool({ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, .pNext = nullptr, .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, - .maxSets = num_sets, - .poolSizeCount = static_cast(std::size(pool_sizes)), + .maxSets = SETS_PER_POOL, + .poolSizeCount = static_cast(pool_cursor), .pPoolSizes = std::data(pool_sizes), - }; - return &pools.emplace_back(device.GetLogical().CreateDescriptorPool(ci)); + })); } -vk::DescriptorSets VKDescriptorPool::AllocateDescriptors(VkDescriptorSetLayout layout, - std::size_t count) { - const std::vector layout_copies(count, layout); - VkDescriptorSetAllocateInfo ai{ +DescriptorAllocator::DescriptorAllocator(const Device& device_, MasterSemaphore& master_semaphore_, + DescriptorBank& bank_, VkDescriptorSetLayout layout_) + : ResourcePool(master_semaphore_, SETS_GROW_RATE), device{&device_}, bank{&bank_}, + layout{layout_} {} + +VkDescriptorSet DescriptorAllocator::Commit() { + const size_t index = CommitResource(); + return sets[index / SETS_GROW_RATE][index % SETS_GROW_RATE]; +} + +void DescriptorAllocator::Allocate(size_t begin, size_t end) { + sets.push_back(AllocateDescriptors(end - begin)); +} + +vk::DescriptorSets DescriptorAllocator::AllocateDescriptors(size_t count) { + const std::vector layouts(count, layout); + VkDescriptorSetAllocateInfo allocate_info{ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, .pNext = nullptr, - .descriptorPool = **active_pool, + .descriptorPool = *bank->pools.back(), .descriptorSetCount = static_cast(count), - .pSetLayouts = layout_copies.data(), + .pSetLayouts = layouts.data(), }; - - vk::DescriptorSets sets = active_pool->Allocate(ai); - if (!sets.IsOutOfPoolMemory()) { - return sets; + vk::DescriptorSets new_sets = bank->pools.back().Allocate(allocate_info); + if (!new_sets.IsOutOfPoolMemory()) { + return new_sets; } - // Our current pool is out of memory. Allocate a new one and retry - active_pool = AllocateNewPool(); - ai.descriptorPool = **active_pool; - sets = active_pool->Allocate(ai); - if (!sets.IsOutOfPoolMemory()) { - return sets; + AllocatePool(*device, *bank); + allocate_info.descriptorPool = *bank->pools.back(); + new_sets = bank->pools.back().Allocate(allocate_info); + if (!new_sets.IsOutOfPoolMemory()) { + return new_sets; } - // After allocating a new pool, we are out of memory again. We can't handle this from here. throw vk::Exception(VK_ERROR_OUT_OF_POOL_MEMORY); } +DescriptorPool::DescriptorPool(const Device& device_, VKScheduler& scheduler) + : device{device_}, master_semaphore{scheduler.GetMasterSemaphore()} {} + +DescriptorPool::~DescriptorPool() = default; + +DescriptorAllocator DescriptorPool::Allocator(VkDescriptorSetLayout layout, + std::span infos) { + return Allocator(layout, MakeBankInfo(infos)); +} + +DescriptorAllocator DescriptorPool::Allocator(VkDescriptorSetLayout layout, + const Shader::Info& info) { + return Allocator(layout, MakeBankInfo(std::array{info})); +} + +DescriptorAllocator DescriptorPool::Allocator(VkDescriptorSetLayout layout, + const DescriptorBankInfo& info) { + return DescriptorAllocator(device, master_semaphore, Bank(info), layout); +} + +DescriptorBank& DescriptorPool::Bank(const DescriptorBankInfo& reqs) { + std::shared_lock read_lock{banks_mutex}; + const auto it = std::ranges::find_if(bank_infos, [&reqs](const DescriptorBankInfo& bank) { + return std::abs(bank.score - reqs.score) < SCORE_THRESHOLD && bank.IsSuperset(reqs); + }); + if (it != bank_infos.end()) { + return *banks[std::distance(bank_infos.begin(), it)].get(); + } + read_lock.unlock(); + + std::unique_lock write_lock{banks_mutex}; + bank_infos.push_back(reqs); + + auto& bank = *banks.emplace_back(std::make_unique()); + bank.info = reqs; + AllocatePool(device, bank); + return bank; +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.h b/src/video_core/renderer_vulkan/vk_descriptor_pool.h index 2501f9967..59466aac5 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.h +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.h @@ -4,21 +4,38 @@ #pragma once +#include +#include #include +#include "shader_recompiler/shader_info.h" #include "video_core/renderer_vulkan/vk_resource_pool.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { class Device; -class VKDescriptorPool; class VKScheduler; +struct DescriptorBank; + +struct DescriptorBankInfo { + [[nodiscard]] bool IsSuperset(const DescriptorBankInfo& subset) const noexcept; + + u32 uniform_buffers{}; ///< Number of uniform buffer descriptors + u32 storage_buffers{}; ///< Number of storage buffer descriptors + u32 texture_buffers{}; ///< Number of texture buffer descriptors + u32 image_buffers{}; ///< Number of image buffer descriptors + u32 textures{}; ///< Number of texture descriptors + u32 images{}; ///< Number of image descriptors + s32 score{}; ///< Number of descriptors in total +}; + class DescriptorAllocator final : public ResourcePool { + friend class DescriptorPool; + public: explicit DescriptorAllocator() = default; - explicit DescriptorAllocator(VKDescriptorPool& descriptor_pool, VkDescriptorSetLayout layout); ~DescriptorAllocator() override = default; DescriptorAllocator& operator=(DescriptorAllocator&&) noexcept = default; @@ -29,36 +46,43 @@ public: VkDescriptorSet Commit(); -protected: - void Allocate(std::size_t begin, std::size_t end) override; - private: - VKDescriptorPool* descriptor_pool{}; + explicit DescriptorAllocator(const Device& device_, MasterSemaphore& master_semaphore_, + DescriptorBank& bank_, VkDescriptorSetLayout layout_); + + void Allocate(size_t begin, size_t end) override; + + vk::DescriptorSets AllocateDescriptors(size_t count); + + const Device* device{}; + DescriptorBank* bank{}; VkDescriptorSetLayout layout{}; - std::vector descriptors_allocations; + std::vector sets; }; -class VKDescriptorPool final { - friend DescriptorAllocator; - +class DescriptorPool { public: - explicit VKDescriptorPool(const Device& device, VKScheduler& scheduler); - ~VKDescriptorPool(); + explicit DescriptorPool(const Device& device, VKScheduler& scheduler); + ~DescriptorPool(); - VKDescriptorPool(const VKDescriptorPool&) = delete; - VKDescriptorPool& operator=(const VKDescriptorPool&) = delete; + DescriptorPool& operator=(const DescriptorPool&) = delete; + DescriptorPool(const DescriptorPool&) = delete; -private: - vk::DescriptorPool* AllocateNewPool(); + DescriptorAllocator Allocator(VkDescriptorSetLayout layout, + std::span infos); + DescriptorAllocator Allocator(VkDescriptorSetLayout layout, const Shader::Info& info); + DescriptorAllocator Allocator(VkDescriptorSetLayout layout, const DescriptorBankInfo& info); - vk::DescriptorSets AllocateDescriptors(VkDescriptorSetLayout layout, std::size_t count); +private: + DescriptorBank& Bank(const DescriptorBankInfo& reqs); const Device& device; MasterSemaphore& master_semaphore; - std::vector pools; - vk::DescriptorPool* active_pool; + std::shared_mutex banks_mutex; + std::vector bank_infos; + std::vector> banks; }; } // namespace Vulkan \ No newline at end of file diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 674226cb7..0526c197a 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -205,7 +205,7 @@ ConfigureFuncPtr ConfigureFunc(const std::array& m GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_, VKScheduler& scheduler_, BufferCache& buffer_cache_, TextureCache& texture_cache_, - const Device& device, VKDescriptorPool& descriptor_pool, + const Device& device, DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue_, Common::ThreadWorker* worker_thread, RenderPassCache& render_pass_cache, @@ -220,7 +220,7 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)}; descriptor_set_layout = builder.CreateDescriptorSetLayout(); - descriptor_allocator = DescriptorAllocator(descriptor_pool, *descriptor_set_layout); + descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, stage_infos); auto func{[this, &device, &render_pass_cache, builder] { const VkDescriptorSetLayout set_layout{*descriptor_set_layout}; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index edab5703f..454fc049e 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -67,7 +67,7 @@ public: explicit GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d, Tegra::MemoryManager& gpu_memory, VKScheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache, - const Device& device, VKDescriptorPool& descriptor_pool, + const Device& device, DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue, Common::ThreadWorker* worker_thread, RenderPassCache& render_pass_cache, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 2bd870060..9d9729022 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -647,7 +647,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::Engines::KeplerCompute& kepler_compute_, Tegra::MemoryManager& gpu_memory_, const Device& device_, - VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, + VKScheduler& scheduler_, DescriptorPool& descriptor_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_, RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_, TextureCache& texture_cache_) diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index ad569acc4..eec17d3fd 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -75,10 +75,10 @@ namespace Vulkan { class ComputePipeline; class Device; +class DescriptorPool; class GenericEnvironment; class RasterizerVulkan; class RenderPassCache; -class VKDescriptorPool; class VKScheduler; class VKUpdateDescriptorQueue; @@ -105,7 +105,7 @@ public: Tegra::Engines::Maxwell3D& maxwell3d, Tegra::Engines::KeplerCompute& kepler_compute, Tegra::MemoryManager& gpu_memory, const Device& device, - VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, + VKScheduler& scheduler, DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue, RenderPassCache& render_pass_cache, BufferCache& buffer_cache, TextureCache& texture_cache); @@ -147,7 +147,7 @@ private: const Device& device; VKScheduler& scheduler; - VKDescriptorPool& descriptor_pool; + DescriptorPool& descriptor_pool; VKUpdateDescriptorQueue& update_descriptor_queue; RenderPassCache& render_pass_cache; BufferCache& buffer_cache; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 2f1551e65..1302bed02 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -147,7 +147,7 @@ private: VKScheduler& scheduler; StagingBufferPool staging_pool; - VKDescriptorPool descriptor_pool; + DescriptorPool descriptor_pool; VKUpdateDescriptorQueue update_descriptor_queue; BlitImageHelper blit_image; ASTCDecoderPass astc_decoder_pass; -- cgit v1.2.3 From ac8835659ead30d289ff8b907a2295d87790670f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 25 Apr 2021 01:04:49 -0300 Subject: vulkan: Defer descriptor set work to the Vulkan thread Move descriptor lookup and update code to a separate thread. Delaying this removes work from the main GPU thread and allows creating descriptor layouts on another thread. This reduces a bit the workload of the main thread when new pipelines are encountered. --- src/video_core/renderer_vulkan/vk_compute_pass.cpp | 45 ++++++++++------------ src/video_core/renderer_vulkan/vk_compute_pass.h | 8 ++-- .../renderer_vulkan/vk_compute_pipeline.cpp | 36 +++++++++-------- .../renderer_vulkan/vk_compute_pipeline.h | 1 + .../renderer_vulkan/vk_graphics_pipeline.cpp | 40 +++++++++---------- .../renderer_vulkan/vk_graphics_pipeline.h | 5 ++- .../renderer_vulkan/vk_update_descriptor.cpp | 9 ----- .../renderer_vulkan/vk_update_descriptor.h | 4 +- 8 files changed, 69 insertions(+), 79 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index e2f3d16bf..7e5ba283b 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -172,11 +172,12 @@ struct AstcPushConstants { }; } // Anonymous namespace -ComputePass::ComputePass(const Device& device, DescriptorPool& descriptor_pool, +ComputePass::ComputePass(const Device& device_, DescriptorPool& descriptor_pool, vk::Span bindings, vk::Span templates, const DescriptorBankInfo& bank_info, - vk::Span push_constants, std::span code) { + vk::Span push_constants, std::span code) + : device{device_} { descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, .pNext = nullptr, @@ -237,15 +238,6 @@ ComputePass::ComputePass(const Device& device, DescriptorPool& descriptor_pool, ComputePass::~ComputePass() = default; -VkDescriptorSet ComputePass::CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue) { - if (!descriptor_template) { - return nullptr; - } - const VkDescriptorSet set = descriptor_allocator.Commit(); - update_descriptor_queue.Send(descriptor_template.address(), set); - return set; -} - Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_, DescriptorPool& descriptor_pool, StagingBufferPool& staging_buffer_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_) @@ -265,10 +257,11 @@ std::pair Uint8Pass::Assemble(u32 num_vertices, VkBuffer update_descriptor_queue.Acquire(); update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices); update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size); - const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); + const void* const descriptor_data{update_descriptor_queue.UpdateData()}; + const VkBuffer buffer{staging.buffer}; scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([this, buffer = staging.buffer, set, num_vertices](vk::CommandBuffer cmdbuf) { + scheduler.Record([this, buffer, descriptor_data, num_vertices](vk::CommandBuffer cmdbuf) { static constexpr u32 DISPATCH_SIZE = 1024; static constexpr VkMemoryBarrier WRITE_BARRIER{ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, @@ -276,6 +269,8 @@ std::pair Uint8Pass::Assemble(u32 num_vertices, VkBuffer .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, }; + const VkDescriptorSet set = descriptor_allocator.Commit(); + device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data); cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {}); cmdbuf.Dispatch(Common::DivCeil(num_vertices, DISPATCH_SIZE), 1, 1); @@ -321,10 +316,10 @@ std::pair QuadIndexedPass::Assemble( update_descriptor_queue.Acquire(); update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size); update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size); - const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); + const void* const descriptor_data{update_descriptor_queue.UpdateData()}; scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([this, buffer = staging.buffer, set, num_tri_vertices, base_vertex, + scheduler.Record([this, buffer = staging.buffer, descriptor_data, num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) { static constexpr u32 DISPATCH_SIZE = 1024; static constexpr VkMemoryBarrier WRITE_BARRIER{ @@ -333,7 +328,9 @@ std::pair QuadIndexedPass::Assemble( .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, }; - const std::array push_constants = {base_vertex, index_shift}; + const std::array push_constants{base_vertex, index_shift}; + const VkDescriptorSet set = descriptor_allocator.Commit(); + device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data); cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {}); cmdbuf.PushConstants(*layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), @@ -353,7 +350,7 @@ ASTCDecoderPass::ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_, : ComputePass(device_, descriptor_pool_, ASTC_DESCRIPTOR_SET_BINDINGS, ASTC_PASS_DESCRIPTOR_UPDATE_TEMPLATE_ENTRY, ASTC_BANK_INFO, COMPUTE_PUSH_CONSTANT_RANGE, ASTC_DECODER_COMP_SPV), - device{device_}, scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, + scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, update_descriptor_queue{update_descriptor_queue_}, memory_allocator{memory_allocator_} {} ASTCDecoderPass::~ASTCDecoderPass() = default; @@ -451,16 +448,14 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, update_descriptor_queue.AddBuffer(*data_buffer, sizeof(ASTC_ENCODINGS_VALUES), sizeof(SWIZZLE_TABLE)); update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level)); - - const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); - const VkPipelineLayout vk_layout = *layout; + const void* const descriptor_data{update_descriptor_queue.UpdateData()}; // To unswizzle the ASTC data const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info); ASSERT(params.origin == (std::array{0, 0, 0})); ASSERT(params.destination == (std::array{0, 0, 0})); - scheduler.Record([vk_layout, num_dispatches_x, num_dispatches_y, num_dispatches_z, - block_dims, params, set](vk::CommandBuffer cmdbuf) { + scheduler.Record([this, num_dispatches_x, num_dispatches_y, num_dispatches_z, block_dims, + params, descriptor_data](vk::CommandBuffer cmdbuf) { const AstcPushConstants uniforms{ .blocks_dims = block_dims, .bytes_per_block_log2 = params.bytes_per_block_log2, @@ -470,8 +465,10 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, .block_height = params.block_height, .block_height_mask = params.block_height_mask, }; - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, vk_layout, 0, set, {}); - cmdbuf.PushConstants(vk_layout, VK_SHADER_STAGE_COMPUTE_BIT, uniforms); + const VkDescriptorSet set = descriptor_allocator.Commit(); + device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {}); + cmdbuf.PushConstants(*layout, VK_SHADER_STAGE_COMPUTE_BIT, uniforms); cmdbuf.Dispatch(num_dispatches_x, num_dispatches_y, num_dispatches_z); }); } diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h index 54c1ac4cb..114aef2bd 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.h +++ b/src/video_core/renderer_vulkan/vk_compute_pass.h @@ -36,15 +36,14 @@ public: ~ComputePass(); protected: - VkDescriptorSet CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue); - + const Device& device; vk::DescriptorUpdateTemplateKHR descriptor_template; vk::PipelineLayout layout; vk::Pipeline pipeline; - -private: vk::DescriptorSetLayout descriptor_set_layout; DescriptorAllocator descriptor_allocator; + +private: vk::ShaderModule module; }; @@ -99,7 +98,6 @@ public: private: void MakeDataBuffer(); - const Device& device; VKScheduler& scheduler; StagingBufferPool& staging_buffer_pool; VKUpdateDescriptorQueue& update_descriptor_queue; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 54a57c358..feaace0c5 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -18,21 +18,22 @@ namespace Vulkan { -ComputePipeline::ComputePipeline(const Device& device, DescriptorPool& descriptor_pool, +ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue_, Common::ThreadWorker* thread_worker, const Shader::Info& info_, vk::ShaderModule spv_module_) - : update_descriptor_queue{update_descriptor_queue_}, info{info_}, + : device{device_}, update_descriptor_queue{update_descriptor_queue_}, info{info_}, spv_module(std::move(spv_module_)) { - DescriptorLayoutBuilder builder{device.GetLogical()}; - builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT); + auto func{[this, &descriptor_pool] { + DescriptorLayoutBuilder builder{device.GetLogical()}; + builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT); - descriptor_set_layout = builder.CreateDescriptorSetLayout(); - pipeline_layout = builder.CreatePipelineLayout(*descriptor_set_layout); - descriptor_update_template = builder.CreateTemplate(*descriptor_set_layout, *pipeline_layout); - descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, info); + descriptor_set_layout = builder.CreateDescriptorSetLayout(); + pipeline_layout = builder.CreatePipelineLayout(*descriptor_set_layout); + descriptor_update_template = + builder.CreateTemplate(*descriptor_set_layout, *pipeline_layout); + descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, info); - auto func{[this, &device] { const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, .pNext = nullptr, @@ -166,15 +167,16 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, build_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); }); }); } - scheduler.Record([this](vk::CommandBuffer cmdbuf) { + const void* const descriptor_data{update_descriptor_queue.UpdateData()}; + scheduler.Record([this, descriptor_data](vk::CommandBuffer cmdbuf) { cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); - }); - if (!descriptor_set_layout) { - return; - } - const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; - update_descriptor_queue.Send(descriptor_update_template.address(), descriptor_set); - scheduler.Record([this, descriptor_set](vk::CommandBuffer cmdbuf) { + + if (!descriptor_set_layout) { + return; + } + const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; + const vk::Device& dev{device.GetLogical()}; + dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, descriptor_data); cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0, descriptor_set, nullptr); }); diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 0d4cd37be..a560e382e 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -40,6 +40,7 @@ public: VKScheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache); private: + const Device& device; VKUpdateDescriptorQueue& update_descriptor_queue; Shader::Info info; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 0526c197a..76080bde1 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -205,31 +205,31 @@ ConfigureFuncPtr ConfigureFunc(const std::array& m GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_, VKScheduler& scheduler_, BufferCache& buffer_cache_, TextureCache& texture_cache_, - const Device& device, DescriptorPool& descriptor_pool, + const Device& device_, DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue_, Common::ThreadWorker* worker_thread, RenderPassCache& render_pass_cache, const GraphicsPipelineCacheKey& key_, std::array stages, const std::array& infos) - : key{key_}, maxwell3d{maxwell3d_}, gpu_memory{gpu_memory_}, texture_cache{texture_cache_}, - buffer_cache{buffer_cache_}, scheduler{scheduler_}, + : key{key_}, maxwell3d{maxwell3d_}, gpu_memory{gpu_memory_}, device{device_}, + texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, scheduler{scheduler_}, update_descriptor_queue{update_descriptor_queue_}, spv_modules{std::move(stages)} { std::ranges::transform(infos, stage_infos.begin(), [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); - DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)}; - descriptor_set_layout = builder.CreateDescriptorSetLayout(); - descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, stage_infos); + auto func{[this, &render_pass_cache, &descriptor_pool] { + DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)}; + descriptor_set_layout = builder.CreateDescriptorSetLayout(); + descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, stage_infos); - auto func{[this, &device, &render_pass_cache, builder] { const VkDescriptorSetLayout set_layout{*descriptor_set_layout}; pipeline_layout = builder.CreatePipelineLayout(set_layout); descriptor_update_template = builder.CreateTemplate(set_layout, *pipeline_layout); const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(key.state))}; Validate(); - MakePipeline(device, render_pass); + MakePipeline(render_pass); std::lock_guard lock{build_mutex}; is_built = true; @@ -440,24 +440,22 @@ void GraphicsPipeline::ConfigureDraw() { build_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); }); }); } - if (scheduler.UpdateGraphicsPipeline(this)) { - scheduler.Record([this](vk::CommandBuffer cmdbuf) { - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); - }); - } - if (!descriptor_set_layout) { - return; - } - const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; - update_descriptor_queue.Send(descriptor_update_template.address(), descriptor_set); - - scheduler.Record([this, descriptor_set](vk::CommandBuffer cmdbuf) { + const bool bind_pipeline{scheduler.UpdateGraphicsPipeline(this)}; + const void* const descriptor_data{update_descriptor_queue.UpdateData()}; + scheduler.Record([this, descriptor_data, bind_pipeline](vk::CommandBuffer cmdbuf) { + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); + if (!descriptor_set_layout) { + return; + } + const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; + const vk::Device& dev{device.GetLogical()}; + dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, descriptor_data); cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0, descriptor_set, nullptr); }); } -void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pass) { +void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { FixedPipelineState::DynamicState dynamic{}; if (!device.IsExtExtendedDynamicStateSupported()) { dynamic = key.state.dynamic_state; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 454fc049e..85e21f611 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -109,19 +109,20 @@ private: void ConfigureDraw(); - void MakePipeline(const Device& device, VkRenderPass render_pass); + void MakePipeline(VkRenderPass render_pass); void Validate(); const GraphicsPipelineCacheKey key; Tegra::Engines::Maxwell3D& maxwell3d; Tegra::MemoryManager& gpu_memory; + const Device& device; TextureCache& texture_cache; BufferCache& buffer_cache; VKScheduler& scheduler; VKUpdateDescriptorQueue& update_descriptor_queue; - void (*configure_func)(GraphicsPipeline*, bool); + void (*configure_func)(GraphicsPipeline*, bool){}; std::vector transition_keys; std::vector transitions; diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp index bea9b8012..ce3427c9b 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp @@ -36,13 +36,4 @@ void VKUpdateDescriptorQueue::Acquire() { upload_start = payload_cursor; } -void VKUpdateDescriptorQueue::Send(const VkDescriptorUpdateTemplateKHR* update_template, - VkDescriptorSet set) { - const void* const data = upload_start; - const vk::Device* const logical = &device.GetLogical(); - scheduler.Record([data, logical, set, update_template](vk::CommandBuffer) { - logical->UpdateDescriptorSet(set, *update_template, data); - }); -} - } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h index 82bc9920c..d7de4c490 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.h +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h @@ -39,7 +39,9 @@ public: void Acquire(); - void Send(const VkDescriptorUpdateTemplateKHR* update_template, VkDescriptorSet set); + const DescriptorUpdateEntry* UpdateData() const noexcept { + return upload_start; + } void AddSampledImage(VkImageView image_view, VkSampler sampler) { *(payload_cursor++) = VkDescriptorImageInfo{ -- cgit v1.2.3 From 025b20f96ae588777e3ff11083cc4184bf418af6 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 26 Apr 2021 03:53:26 -0300 Subject: shader: Move pipeline cache logic to separate files Move code to separate files to be able to reuse it from OpenGL. This greatly simplifies the pipeline cache logic on Vulkan. Transform feedback state is not yet abstracted and it's still intrusively stored inside vk_pipeline_cache. It will be moved when needed on OpenGL. --- src/video_core/CMakeLists.txt | 3 + src/video_core/renderer_opengl/gl_rasterizer.h | 2 +- src/video_core/renderer_opengl/gl_shader_cache.cpp | 21 +- src/video_core/renderer_opengl/gl_shader_cache.h | 58 +- .../renderer_vulkan/vk_graphics_pipeline.h | 2 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 719 +++------------------ src/video_core/renderer_vulkan/vk_pipeline_cache.h | 30 +- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 2 +- src/video_core/shader_cache.cpp | 233 +++++++ src/video_core/shader_cache.h | 198 ++---- src/video_core/shader_environment.cpp | 453 +++++++++++++ src/video_core/shader_environment.h | 198 ++++++ 12 files changed, 1095 insertions(+), 824 deletions(-) create mode 100644 src/video_core/shader_cache.cpp create mode 100644 src/video_core/shader_environment.cpp create mode 100644 src/video_core/shader_environment.h (limited to 'src/video_core') diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 3166a69dc..6e0e4b8f5 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -145,7 +145,10 @@ add_library(video_core STATIC renderer_vulkan/vk_texture_cache.h renderer_vulkan/vk_update_descriptor.cpp renderer_vulkan/vk_update_descriptor.h + shader_cache.cpp shader_cache.h + shader_environment.cpp + shader_environment.h shader_notify.cpp shader_notify.h surface.cpp diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 1f58f8791..2fdcbe4ba 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -217,7 +217,7 @@ private: TextureCache texture_cache; BufferCacheRuntime buffer_cache_runtime; BufferCache buffer_cache; - ShaderCacheOpenGL shader_cache; + ShaderCache shader_cache; QueryCache query_cache; AccelerateDMA accelerate_dma; FenceManagerOpenGL fence_manager; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 4dd166156..c3e490b40 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -29,18 +29,13 @@ namespace OpenGL { -Shader::Shader() = default; - -Shader::~Shader() = default; - -ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_, - Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, - Tegra::Engines::Maxwell3D& maxwell3d_, - Tegra::Engines::KeplerCompute& kepler_compute_, - Tegra::MemoryManager& gpu_memory_, const Device& device_) - : ShaderCache{rasterizer_}, emu_window{emu_window_}, gpu{gpu_}, gpu_memory{gpu_memory_}, - maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, device{device_} {} - -ShaderCacheOpenGL::~ShaderCacheOpenGL() = default; +ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, + Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::Engines::KeplerCompute& kepler_compute_, + Tegra::MemoryManager& gpu_memory_, const Device& device_) + : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_}, + emu_window{emu_window_}, gpu{gpu_}, device{device_} {} + +ShaderCache::~ShaderCache() = default; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index ad3d15a76..96520e17c 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -36,27 +36,59 @@ class RasterizerOpenGL; using Maxwell = Tegra::Engines::Maxwell3D::Regs; -class Shader { +struct GraphicsProgramKey { + struct TransformFeedbackState { + struct Layout { + u32 stream; + u32 varying_count; + u32 stride; + }; + std::array layouts; + std::array, Maxwell::NumTransformFeedbackBuffers> varyings; + }; + + std::array unique_hashes; + std::array color_formats; + union { + u32 raw; + BitField<0, 1, u32> xfb_enabled; + BitField<1, 1, u32> early_z; + BitField<2, 4, Maxwell::PrimitiveTopology> gs_input_topology; + BitField<6, 2, u32> tessellation_primitive; + BitField<8, 2, u32> tessellation_spacing; + BitField<10, 1, u32> tessellation_clockwise; + }; + u32 padding; + TransformFeedbackState xfb_state; + + [[nodiscard]] size_t Size() const noexcept { + if (xfb_enabled != 0) { + return sizeof(GraphicsProgramKey); + } else { + return offsetof(GraphicsProgramKey, padding); + } + } +}; +static_assert(std::has_unique_object_representations_v); +static_assert(std::is_trivially_copyable_v); +static_assert(std::is_trivially_constructible_v); + +class GraphicsProgram { public: - explicit Shader(); - ~Shader(); +private: }; -class ShaderCacheOpenGL final : public VideoCommon::ShaderCache { +class ShaderCache : public VideoCommon::ShaderCache { public: - explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_, - Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu, - Tegra::Engines::Maxwell3D& maxwell3d_, - Tegra::Engines::KeplerCompute& kepler_compute_, - Tegra::MemoryManager& gpu_memory_, const Device& device_); - ~ShaderCacheOpenGL() override; + explicit ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, + Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::Engines::KeplerCompute& kepler_compute_, + Tegra::MemoryManager& gpu_memory_, const Device& device_); + ~ShaderCache(); private: Core::Frontend::EmuWindow& emu_window; Tegra::GPU& gpu; - Tegra::MemoryManager& gpu_memory; - Tegra::Engines::Maxwell3D& maxwell3d; - Tegra::Engines::KeplerCompute& kepler_compute; const Device& device; }; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 85e21f611..e362d13c5 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -23,7 +23,7 @@ namespace Vulkan { struct GraphicsPipelineCacheKey { - std::array unique_hashes; + std::array unique_hashes; FixedPipelineState state; size_t Hash() const noexcept; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 9d9729022..0822862fe 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -11,7 +11,8 @@ #include "common/bit_cast.h" #include "common/cityhash.h" -#include "common/file_util.h" +#include "common/fs/fs.h" +#include "common/fs/path_util.h" #include "common/microprofile.h" #include "common/thread_worker.h" #include "core/core.h" @@ -36,6 +37,7 @@ #include "video_core/renderer_vulkan/vk_shader_util.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/shader_cache.h" +#include "video_core/shader_environment.h" #include "video_core/shader_notify.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -43,449 +45,19 @@ namespace Vulkan { MICROPROFILE_DECLARE(Vulkan_PipelineCache); -template -auto MakeSpan(Container& container) { - return std::span(container.data(), container.size()); -} - -static u64 MakeCbufKey(u32 index, u32 offset) { - return (static_cast(index) << 32) | offset; -} - -class GenericEnvironment : public Shader::Environment { -public: - explicit GenericEnvironment() = default; - explicit GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, - u32 start_address_) - : gpu_memory{&gpu_memory_}, program_base{program_base_} { - start_address = start_address_; - } - - ~GenericEnvironment() override = default; - - u32 TextureBoundBuffer() const final { - return texture_bound; - } - - u32 LocalMemorySize() const final { - return local_memory_size; - } - - u32 SharedMemorySize() const final { - return shared_memory_size; - } - - std::array WorkgroupSize() const final { - return workgroup_size; - } - - u64 ReadInstruction(u32 address) final { - read_lowest = std::min(read_lowest, address); - read_highest = std::max(read_highest, address); - - if (address >= cached_lowest && address < cached_highest) { - return code[(address - cached_lowest) / INST_SIZE]; - } - has_unbound_instructions = true; - return gpu_memory->Read(program_base + address); - } - - std::optional Analyze() { - const std::optional size{TryFindSize()}; - if (!size) { - return std::nullopt; - } - cached_lowest = start_address; - cached_highest = start_address + static_cast(*size); - return Common::CityHash128(reinterpret_cast(code.data()), *size); - } - - void SetCachedSize(size_t size_bytes) { - cached_lowest = start_address; - cached_highest = start_address + static_cast(size_bytes); - code.resize(CachedSize()); - gpu_memory->ReadBlock(program_base + cached_lowest, code.data(), code.size() * sizeof(u64)); - } - - [[nodiscard]] size_t CachedSize() const noexcept { - return cached_highest - cached_lowest + INST_SIZE; - } - - [[nodiscard]] size_t ReadSize() const noexcept { - return read_highest - read_lowest + INST_SIZE; - } - - [[nodiscard]] bool CanBeSerialized() const noexcept { - return !has_unbound_instructions; - } - - [[nodiscard]] u128 CalculateHash() const { - const size_t size{ReadSize()}; - const auto data{std::make_unique(size)}; - gpu_memory->ReadBlock(program_base + read_lowest, data.get(), size); - return Common::CityHash128(data.get(), size); - } - - void Serialize(std::ofstream& file) const { - const u64 code_size{static_cast(CachedSize())}; - const u64 num_texture_types{static_cast(texture_types.size())}; - const u64 num_cbuf_values{static_cast(cbuf_values.size())}; - - file.write(reinterpret_cast(&code_size), sizeof(code_size)) - .write(reinterpret_cast(&num_texture_types), sizeof(num_texture_types)) - .write(reinterpret_cast(&num_cbuf_values), sizeof(num_cbuf_values)) - .write(reinterpret_cast(&local_memory_size), sizeof(local_memory_size)) - .write(reinterpret_cast(&texture_bound), sizeof(texture_bound)) - .write(reinterpret_cast(&start_address), sizeof(start_address)) - .write(reinterpret_cast(&cached_lowest), sizeof(cached_lowest)) - .write(reinterpret_cast(&cached_highest), sizeof(cached_highest)) - .write(reinterpret_cast(&stage), sizeof(stage)) - .write(reinterpret_cast(code.data()), code_size); - for (const auto [key, type] : texture_types) { - file.write(reinterpret_cast(&key), sizeof(key)) - .write(reinterpret_cast(&type), sizeof(type)); - } - for (const auto [key, type] : cbuf_values) { - file.write(reinterpret_cast(&key), sizeof(key)) - .write(reinterpret_cast(&type), sizeof(type)); - } - if (stage == Shader::Stage::Compute) { - file.write(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)) - .write(reinterpret_cast(&shared_memory_size), - sizeof(shared_memory_size)); - } else { - file.write(reinterpret_cast(&sph), sizeof(sph)); - } - } - -protected: - static constexpr size_t INST_SIZE = sizeof(u64); - - std::optional TryFindSize() { - constexpr size_t BLOCK_SIZE = 0x1000; - constexpr size_t MAXIMUM_SIZE = 0x100000; - - constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL; - constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL; - - GPUVAddr guest_addr{program_base + start_address}; - size_t offset{0}; - size_t size{BLOCK_SIZE}; - while (size <= MAXIMUM_SIZE) { - code.resize(size / INST_SIZE); - u64* const data = code.data() + offset / INST_SIZE; - gpu_memory->ReadBlock(guest_addr, data, BLOCK_SIZE); - for (size_t index = 0; index < BLOCK_SIZE; index += INST_SIZE) { - const u64 inst = data[index / INST_SIZE]; - if (inst == SELF_BRANCH_A || inst == SELF_BRANCH_B) { - return offset + index; - } - } - guest_addr += BLOCK_SIZE; - size += BLOCK_SIZE; - offset += BLOCK_SIZE; - } - return std::nullopt; - } - - Shader::TextureType ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, bool via_header_index, - u32 raw) { - const TextureHandle handle{raw, via_header_index}; - const GPUVAddr descriptor_addr{tic_addr + handle.image * sizeof(Tegra::Texture::TICEntry)}; - Tegra::Texture::TICEntry entry; - gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry)); - - const Shader::TextureType result{[&] { - switch (entry.texture_type) { - case Tegra::Texture::TextureType::Texture1D: - return Shader::TextureType::Color1D; - case Tegra::Texture::TextureType::Texture2D: - case Tegra::Texture::TextureType::Texture2DNoMipmap: - return Shader::TextureType::Color2D; - case Tegra::Texture::TextureType::Texture3D: - return Shader::TextureType::Color3D; - case Tegra::Texture::TextureType::TextureCubemap: - return Shader::TextureType::ColorCube; - case Tegra::Texture::TextureType::Texture1DArray: - return Shader::TextureType::ColorArray1D; - case Tegra::Texture::TextureType::Texture2DArray: - return Shader::TextureType::ColorArray2D; - case Tegra::Texture::TextureType::Texture1DBuffer: - return Shader::TextureType::Buffer; - case Tegra::Texture::TextureType::TextureCubeArray: - return Shader::TextureType::ColorArrayCube; - default: - throw Shader::NotImplementedException("Unknown texture type"); - } - }()}; - texture_types.emplace(raw, result); - return result; - } - - Tegra::MemoryManager* gpu_memory{}; - GPUVAddr program_base{}; - - std::vector code; - std::unordered_map texture_types; - std::unordered_map cbuf_values; - - u32 local_memory_size{}; - u32 texture_bound{}; - u32 shared_memory_size{}; - std::array workgroup_size{}; - - u32 read_lowest = std::numeric_limits::max(); - u32 read_highest = 0; - - u32 cached_lowest = std::numeric_limits::max(); - u32 cached_highest = 0; - - bool has_unbound_instructions = false; -}; - namespace { using Shader::Backend::SPIRV::EmitSPIRV; using Shader::Maxwell::TranslateProgram; +using VideoCommon::ComputeEnvironment; +using VideoCommon::FileEnvironment; +using VideoCommon::GenericEnvironment; +using VideoCommon::GraphicsEnvironment; -// TODO: Move this to a separate file -constexpr std::array MAGIC_NUMBER{'y', 'u', 'z', 'u', 'c', 'a', 'c', 'h'}; -constexpr u32 CACHE_VERSION{2}; - -class GraphicsEnvironment final : public GenericEnvironment { -public: - explicit GraphicsEnvironment() = default; - explicit GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_, - Tegra::MemoryManager& gpu_memory_, Maxwell::ShaderProgram program, - GPUVAddr program_base_, u32 start_address_) - : GenericEnvironment{gpu_memory_, program_base_, start_address_}, maxwell3d{&maxwell3d_} { - gpu_memory->ReadBlock(program_base + start_address, &sph, sizeof(sph)); - switch (program) { - case Maxwell::ShaderProgram::VertexA: - stage = Shader::Stage::VertexA; - stage_index = 0; - break; - case Maxwell::ShaderProgram::VertexB: - stage = Shader::Stage::VertexB; - stage_index = 0; - break; - case Maxwell::ShaderProgram::TesselationControl: - stage = Shader::Stage::TessellationControl; - stage_index = 1; - break; - case Maxwell::ShaderProgram::TesselationEval: - stage = Shader::Stage::TessellationEval; - stage_index = 2; - break; - case Maxwell::ShaderProgram::Geometry: - stage = Shader::Stage::Geometry; - stage_index = 3; - break; - case Maxwell::ShaderProgram::Fragment: - stage = Shader::Stage::Fragment; - stage_index = 4; - break; - default: - UNREACHABLE_MSG("Invalid program={}", program); - break; - } - const u64 local_size{sph.LocalMemorySize()}; - ASSERT(local_size <= std::numeric_limits::max()); - local_memory_size = static_cast(local_size); - texture_bound = maxwell3d->regs.tex_cb_index; - } - - ~GraphicsEnvironment() override = default; - - u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override { - const auto& cbuf{maxwell3d->state.shader_stages[stage_index].const_buffers[cbuf_index]}; - ASSERT(cbuf.enabled); - u32 value{}; - if (cbuf_offset < cbuf.size) { - value = gpu_memory->Read(cbuf.address + cbuf_offset); - } - cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value); - return value; - } - - Shader::TextureType ReadTextureType(u32 handle) override { - const auto& regs{maxwell3d->regs}; - const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; - return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, via_header_index, handle); - } - -private: - Tegra::Engines::Maxwell3D* maxwell3d{}; - size_t stage_index{}; -}; - -class ComputeEnvironment final : public GenericEnvironment { -public: - explicit ComputeEnvironment() = default; - explicit ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_, - Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, - u32 start_address_) - : GenericEnvironment{gpu_memory_, program_base_, start_address_}, kepler_compute{ - &kepler_compute_} { - const auto& qmd{kepler_compute->launch_description}; - stage = Shader::Stage::Compute; - local_memory_size = qmd.local_pos_alloc; - texture_bound = kepler_compute->regs.tex_cb_index; - shared_memory_size = qmd.shared_alloc; - workgroup_size = {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; - } - - ~ComputeEnvironment() override = default; - - u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override { - const auto& qmd{kepler_compute->launch_description}; - ASSERT(((qmd.const_buffer_enable_mask.Value() >> cbuf_index) & 1) != 0); - const auto& cbuf{qmd.const_buffer_config[cbuf_index]}; - u32 value{}; - if (cbuf_offset < cbuf.size) { - value = gpu_memory->Read(cbuf.Address() + cbuf_offset); - } - cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value); - return value; - } - - Shader::TextureType ReadTextureType(u32 handle) override { - const auto& regs{kepler_compute->regs}; - const auto& qmd{kepler_compute->launch_description}; - return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, handle); - } - -private: - Tegra::Engines::KeplerCompute* kepler_compute{}; -}; - -void SerializePipeline(std::span key, std::span envs, - std::ofstream& file) { - if (!std::ranges::all_of(envs, &GenericEnvironment::CanBeSerialized)) { - return; - } - const u32 num_envs{static_cast(envs.size())}; - file.write(reinterpret_cast(&num_envs), sizeof(num_envs)); - for (const GenericEnvironment* const env : envs) { - env->Serialize(file); - } - file.write(key.data(), key.size_bytes()); -} - -template -void SerializePipeline(const Key& key, const Envs& envs, const std::string& filename) { - try { - std::ofstream file; - file.exceptions(std::ifstream::failbit); - Common::FS::OpenFStream(file, filename, std::ios::binary | std::ios::ate | std::ios::app); - if (!file.is_open()) { - LOG_ERROR(Common_Filesystem, "Failed to open pipeline cache file {}", filename); - return; - } - if (file.tellp() == 0) { - file.write(MAGIC_NUMBER.data(), MAGIC_NUMBER.size()) - .write(reinterpret_cast(&CACHE_VERSION), sizeof(CACHE_VERSION)); - } - const std::span key_span(reinterpret_cast(&key), sizeof(key)); - SerializePipeline(key_span, MakeSpan(envs), file); - - } catch (const std::ios_base::failure& e) { - LOG_ERROR(Common_Filesystem, "{}", e.what()); - if (!Common::FS::Delete(filename)) { - LOG_ERROR(Common_Filesystem, "Failed to delete pipeline cache file {}", filename); - } - } +template +auto MakeSpan(Container& container) { + return std::span(container.data(), container.size()); } -class FileEnvironment final : public Shader::Environment { -public: - void Deserialize(std::ifstream& file) { - u64 code_size{}; - u64 num_texture_types{}; - u64 num_cbuf_values{}; - file.read(reinterpret_cast(&code_size), sizeof(code_size)) - .read(reinterpret_cast(&num_texture_types), sizeof(num_texture_types)) - .read(reinterpret_cast(&num_cbuf_values), sizeof(num_cbuf_values)) - .read(reinterpret_cast(&local_memory_size), sizeof(local_memory_size)) - .read(reinterpret_cast(&texture_bound), sizeof(texture_bound)) - .read(reinterpret_cast(&start_address), sizeof(start_address)) - .read(reinterpret_cast(&read_lowest), sizeof(read_lowest)) - .read(reinterpret_cast(&read_highest), sizeof(read_highest)) - .read(reinterpret_cast(&stage), sizeof(stage)); - code = std::make_unique(Common::DivCeil(code_size, sizeof(u64))); - file.read(reinterpret_cast(code.get()), code_size); - for (size_t i = 0; i < num_texture_types; ++i) { - u32 key; - Shader::TextureType type; - file.read(reinterpret_cast(&key), sizeof(key)) - .read(reinterpret_cast(&type), sizeof(type)); - texture_types.emplace(key, type); - } - for (size_t i = 0; i < num_cbuf_values; ++i) { - u64 key; - u32 value; - file.read(reinterpret_cast(&key), sizeof(key)) - .read(reinterpret_cast(&value), sizeof(value)); - cbuf_values.emplace(key, value); - } - if (stage == Shader::Stage::Compute) { - file.read(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)) - .read(reinterpret_cast(&shared_memory_size), sizeof(shared_memory_size)); - } else { - file.read(reinterpret_cast(&sph), sizeof(sph)); - } - } - - u64 ReadInstruction(u32 address) override { - if (address < read_lowest || address > read_highest) { - throw Shader::LogicError("Out of bounds address {}", address); - } - return code[(address - read_lowest) / sizeof(u64)]; - } - - u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override { - const auto it{cbuf_values.find(MakeCbufKey(cbuf_index, cbuf_offset))}; - if (it == cbuf_values.end()) { - throw Shader::LogicError("Uncached read texture type"); - } - return it->second; - } - - Shader::TextureType ReadTextureType(u32 handle) override { - const auto it{texture_types.find(handle)}; - if (it == texture_types.end()) { - throw Shader::LogicError("Uncached read texture type"); - } - return it->second; - } - - u32 LocalMemorySize() const override { - return local_memory_size; - } - - u32 SharedMemorySize() const override { - return shared_memory_size; - } - - u32 TextureBoundBuffer() const override { - return texture_bound; - } - - std::array WorkgroupSize() const override { - return workgroup_size; - } - -private: - std::unique_ptr code; - std::unordered_map texture_types; - std::unordered_map cbuf_values; - std::array workgroup_size{}; - u32 local_memory_size{}; - u32 shared_memory_size{}; - u32 texture_bound{}; - u32 read_lowest{}; - u32 read_highest{}; -}; - Shader::CompareFunction MaxwellToCompareFunction(Maxwell::ComparisonOp comparison) { switch (comparison) { case Maxwell::ComparisonOp::Never: @@ -518,113 +90,6 @@ Shader::CompareFunction MaxwellToCompareFunction(Maxwell::ComparisonOp compariso } } // Anonymous namespace -void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, - const VideoCore::DiskResourceLoadCallback& callback) { - if (title_id == 0) { - return; - } - std::string shader_dir{Common::FS::GetUserPath(Common::FS::UserPath::ShaderDir)}; - std::string base_dir{shader_dir + "/vulkan"}; - std::string transferable_dir{base_dir + "/transferable"}; - std::string precompiled_dir{base_dir + "/precompiled"}; - if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir) || - !Common::FS::CreateDir(transferable_dir) || !Common::FS::CreateDir(precompiled_dir)) { - LOG_ERROR(Common_Filesystem, "Failed to create pipeline cache directories"); - return; - } - pipeline_cache_filename = fmt::format("{}/{:016x}.bin", transferable_dir, title_id); - - struct { - std::mutex mutex; - size_t total{0}; - size_t built{0}; - bool has_loaded{false}; - } state; - - std::ifstream file; - Common::FS::OpenFStream(file, pipeline_cache_filename, std::ios::binary | std::ios::ate); - if (!file.is_open()) { - return; - } - file.exceptions(std::ifstream::failbit); - const auto end{file.tellg()}; - file.seekg(0, std::ios::beg); - - std::array magic_number; - u32 cache_version; - file.read(magic_number.data(), magic_number.size()) - .read(reinterpret_cast(&cache_version), sizeof(cache_version)); - if (magic_number != MAGIC_NUMBER || cache_version != CACHE_VERSION) { - file.close(); - if (Common::FS::Delete(pipeline_cache_filename)) { - if (magic_number != MAGIC_NUMBER) { - LOG_ERROR(Render_Vulkan, "Invalid pipeline cache file"); - } - if (cache_version != CACHE_VERSION) { - LOG_INFO(Render_Vulkan, "Deleting old pipeline cache"); - } - } else { - LOG_ERROR(Render_Vulkan, - "Invalid pipeline cache file and failed to delete it in \"{}\"", - pipeline_cache_filename); - } - return; - } - while (file.tellg() != end) { - if (stop_loading) { - return; - } - u32 num_envs{}; - file.read(reinterpret_cast(&num_envs), sizeof(num_envs)); - std::vector envs(num_envs); - for (FileEnvironment& env : envs) { - env.Deserialize(file); - } - if (envs.front().ShaderStage() == Shader::Stage::Compute) { - ComputePipelineCacheKey key; - file.read(reinterpret_cast(&key), sizeof(key)); - - workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable { - ShaderPools pools; - auto pipeline{CreateComputePipeline(pools, key, envs.front(), false)}; - - std::lock_guard lock{state.mutex}; - compute_cache.emplace(key, std::move(pipeline)); - ++state.built; - if (state.has_loaded) { - callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); - } - }); - } else { - GraphicsPipelineCacheKey key; - file.read(reinterpret_cast(&key), sizeof(key)); - - workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable { - ShaderPools pools; - boost::container::static_vector env_ptrs; - for (auto& env : envs) { - env_ptrs.push_back(&env); - } - auto pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs), false)}; - - std::lock_guard lock{state.mutex}; - graphics_cache.emplace(key, std::move(pipeline)); - ++state.built; - if (state.has_loaded) { - callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); - } - }); - } - ++state.total; - } - { - std::lock_guard lock{state.mutex}; - callback(VideoCore::LoadCallbackStage::Build, 0, state.total); - state.has_loaded = true; - } - workers.WaitForRequests(); -} - size_t ComputePipelineCacheKey::Hash() const noexcept { const u64 hash = Common::CityHash64(reinterpret_cast(this), sizeof *this); return static_cast(hash); @@ -643,17 +108,15 @@ bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) c return std::memcmp(&rhs, this, Size()) == 0; } -PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, - Tegra::Engines::Maxwell3D& maxwell3d_, +PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::Engines::KeplerCompute& kepler_compute_, Tegra::MemoryManager& gpu_memory_, const Device& device_, VKScheduler& scheduler_, DescriptorPool& descriptor_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_, RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_, TextureCache& texture_cache_) - : VideoCommon::ShaderCache{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, - kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, - scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, + : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_}, + device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:PipelineBuilder"), @@ -700,7 +163,7 @@ PipelineCache::~PipelineCache() = default; GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { MICROPROFILE_SCOPE(Vulkan_PipelineCache); - if (!RefreshStages()) { + if (!RefreshStages(graphics_key.unique_hashes)) { current_pipeline = nullptr; return nullptr; } @@ -728,21 +191,14 @@ GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { ComputePipeline* PipelineCache::CurrentComputePipeline() { MICROPROFILE_SCOPE(Vulkan_PipelineCache); - const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; - const auto& qmd{kepler_compute.launch_description}; - const GPUVAddr shader_addr{program_base + qmd.program_start}; - const std::optional cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)}; - if (!cpu_shader_addr) { - return nullptr; - } - const ShaderInfo* shader{TryGet(*cpu_shader_addr)}; + const ShaderInfo* const shader{ComputeShader()}; if (!shader) { - ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start}; - shader = MakeShaderInfo(env, *cpu_shader_addr); + return nullptr; } + const auto& qmd{kepler_compute.launch_description}; const ComputePipelineCacheKey key{ - .unique_hash{shader->unique_hash}, - .shared_memory_size{qmd.shared_alloc}, + .unique_hash = shader->unique_hash, + .shared_memory_size = qmd.shared_alloc, .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}, }; const auto [pair, is_new]{compute_cache.try_emplace(key)}; @@ -754,58 +210,75 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() { return pipeline.get(); } -bool PipelineCache::RefreshStages() { - auto& dirty{maxwell3d.dirty.flags}; - if (!dirty[VideoCommon::Dirty::Shaders]) { - return last_valid_shaders; +void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, + const VideoCore::DiskResourceLoadCallback& callback) { + if (title_id == 0) { + return; } - dirty[VideoCommon::Dirty::Shaders] = false; - - const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; - for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { - graphics_key.unique_hashes[index] = u128{}; - continue; - } - const auto& shader_config{maxwell3d.regs.shader_config[index]}; - const auto program{static_cast(index)}; - const GPUVAddr shader_addr{base_addr + shader_config.offset}; - const std::optional cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)}; - if (!cpu_shader_addr) { - LOG_ERROR(Render_Vulkan, "Invalid GPU address for shader 0x{:016x}", shader_addr); - last_valid_shaders = false; - return false; - } - const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)}; - if (!shader_info) { - const u32 start_address{shader_config.offset}; - GraphicsEnvironment env{maxwell3d, gpu_memory, program, base_addr, start_address}; - shader_info = MakeShaderInfo(env, *cpu_shader_addr); - } - shader_infos[index] = shader_info; - graphics_key.unique_hashes[index] = shader_info->unique_hash; + auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)}; + auto base_dir{shader_dir / "vulkan"}; + auto transferable_dir{base_dir / "transferable"}; + auto precompiled_dir{base_dir / "precompiled"}; + if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir) || + !Common::FS::CreateDir(transferable_dir) || !Common::FS::CreateDir(precompiled_dir)) { + LOG_ERROR(Common_Filesystem, "Failed to create pipeline cache directories"); + return; } - last_valid_shaders = true; - return true; -} + pipeline_cache_filename = transferable_dir / fmt::format("{:016x}.bin", title_id); -const ShaderInfo* PipelineCache::MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr) { - auto info = std::make_unique(); - if (const std::optional cached_hash{env.Analyze()}) { - info->unique_hash = *cached_hash; - info->size_bytes = env.CachedSize(); - } else { - // Slow path, not really hit on commercial games - // Build a control flow graph to get the real shader size - main_pools.flow_block.ReleaseContents(); - Shader::Maxwell::Flow::CFG cfg{env, main_pools.flow_block, env.StartAddress()}; - info->unique_hash = env.CalculateHash(); - info->size_bytes = env.ReadSize(); - } - const size_t size_bytes{info->size_bytes}; - const ShaderInfo* const result{info.get()}; - Register(std::move(info), cpu_addr, size_bytes); - return result; + struct { + std::mutex mutex; + size_t total{0}; + size_t built{0}; + bool has_loaded{false}; + } state; + + const auto load_compute{[&](std::ifstream& file, FileEnvironment env) { + ComputePipelineCacheKey key; + file.read(reinterpret_cast(&key), sizeof(key)); + + workers.QueueWork([this, key, env = std::move(env), &state, &callback]() mutable { + ShaderPools pools; + auto pipeline{CreateComputePipeline(pools, key, env, false)}; + + std::lock_guard lock{state.mutex}; + compute_cache.emplace(key, std::move(pipeline)); + ++state.built; + if (state.has_loaded) { + callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); + } + }); + ++state.total; + }}; + const auto load_graphics{[&](std::ifstream& file, std::vector envs) { + GraphicsPipelineCacheKey key; + file.read(reinterpret_cast(&key), sizeof(key)); + + workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable { + ShaderPools pools; + boost::container::static_vector env_ptrs; + for (auto& env : envs) { + env_ptrs.push_back(&env); + } + auto pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs), false)}; + + std::lock_guard lock{state.mutex}; + graphics_cache.emplace(key, std::move(pipeline)); + ++state.built; + if (state.has_loaded) { + callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); + } + }); + ++state.total; + }}; + VideoCommon::LoadPipelines(stop_loading, pipeline_cache_filename, load_compute, load_graphics); + + std::unique_lock lock{state.mutex}; + callback(VideoCore::LoadCallbackStage::Build, 0, state.total); + state.has_loaded = true; + lock.unlock(); + + workers.WaitForRequests(); } std::unique_ptr PipelineCache::CreateGraphicsPipeline( @@ -815,7 +288,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( size_t env_index{0}; std::array programs; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - if (key.unique_hashes[index] == u128{}) { + if (key.unique_hashes[index] == 0) { continue; } Shader::Environment& env{*envs[env_index]}; @@ -830,7 +303,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( u32 binding{0}; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - if (key.unique_hashes[index] == u128{}) { + if (key.unique_hashes[index] == 0) { continue; } UNIMPLEMENTED_IF(index == 0); @@ -844,8 +317,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( device.SaveShader(code); modules[stage_index] = BuildShader(device, code); if (device.HasDebuggingToolAttached()) { - const std::string name{fmt::format("{:016x}{:016x}", key.unique_hashes[index][0], - key.unique_hashes[index][1])}; + const std::string name{fmt::format("{:016x}", key.unique_hashes[index])}; modules[stage_index].SetObjectNameEXT(name.c_str()); } } @@ -863,7 +335,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline() { const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - if (graphics_key.unique_hashes[index] == u128{}) { + if (graphics_key.unique_hashes[index] == 0) { continue; } const auto program{static_cast(index)}; @@ -871,7 +343,6 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline() { const u32 start_address{maxwell3d.regs.shader_config[index].offset}; env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; env.SetCachedSize(shader_infos[index]->size_bytes); - envs.push_back(&env); } auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, MakeSpan(envs), true)}; @@ -882,11 +353,11 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline() { boost::container::static_vector env_ptrs; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - if (key.unique_hashes[index] != u128{}) { + if (key.unique_hashes[index] != 0) { env_ptrs.push_back(&envs[index]); } } - SerializePipeline(key, env_ptrs, pipeline_cache_filename); + VideoCommon::SerializePipeline(key, env_ptrs, pipeline_cache_filename); }); return pipeline; } @@ -902,8 +373,8 @@ std::unique_ptr PipelineCache::CreateComputePipeline( auto pipeline{CreateComputePipeline(main_pools, key, env, true)}; if (!pipeline_cache_filename.empty()) { serialization_thread.QueueWork([this, key, env = std::move(env)] { - SerializePipeline(key, std::array{&env}, - pipeline_cache_filename); + VideoCommon::SerializePipeline(key, std::array{&env}, + pipeline_cache_filename); }); } return pipeline; @@ -921,7 +392,7 @@ std::unique_ptr PipelineCache::CreateComputePipeline( device.SaveShader(code); vk::ShaderModule spv_module{BuildShader(device, code)}; if (device.HasDebuggingToolAttached()) { - const auto name{fmt::format("{:016x}{:016x}", key.unique_hash[0], key.unique_hash[1])}; + const auto name{fmt::format("{:016x}", key.unique_hash)}; spv_module.SetObjectNameEXT(name.c_str()); } Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; @@ -1035,7 +506,7 @@ Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key, Shader::Profile profile{base_profile}; const Shader::Stage stage{program.stage}; - const bool has_geometry{key.unique_hashes[4] != u128{}}; + const bool has_geometry{key.unique_hashes[4] != 0}; const bool gl_ndc{key.state.ndc_minus_one_to_one != 0}; const float point_size{Common::BitCast(key.state.point_size)}; switch (stage) { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index eec17d3fd..4e48b4956 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -42,7 +43,7 @@ namespace Vulkan { using Maxwell = Tegra::Engines::Maxwell3D::Regs; struct ComputePipelineCacheKey { - u128 unique_hash; + u64 unique_hash; u32 shared_memory_size; std::array workgroup_size; @@ -76,16 +77,12 @@ namespace Vulkan { class ComputePipeline; class Device; class DescriptorPool; -class GenericEnvironment; class RasterizerVulkan; class RenderPassCache; class VKScheduler; class VKUpdateDescriptorQueue; -struct ShaderInfo { - u128 unique_hash{}; - size_t size_bytes{}; -}; +using VideoCommon::ShaderInfo; struct ShaderPools { void ReleaseContents() { @@ -99,17 +96,16 @@ struct ShaderPools { Shader::ObjectPool flow_block; }; -class PipelineCache final : public VideoCommon::ShaderCache { +class PipelineCache : public VideoCommon::ShaderCache { public: - explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu, - Tegra::Engines::Maxwell3D& maxwell3d, + explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::Engines::Maxwell3D& maxwell3d, Tegra::Engines::KeplerCompute& kepler_compute, Tegra::MemoryManager& gpu_memory, const Device& device, VKScheduler& scheduler, DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue, RenderPassCache& render_pass_cache, BufferCache& buffer_cache, TextureCache& texture_cache); - ~PipelineCache() override; + ~PipelineCache(); [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline(); @@ -119,10 +115,6 @@ public: const VideoCore::DiskResourceLoadCallback& callback); private: - bool RefreshStages(); - - const ShaderInfo* MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr); - std::unique_ptr CreateGraphicsPipeline(); std::unique_ptr CreateGraphicsPipeline( @@ -140,11 +132,6 @@ private: Shader::Profile MakeProfile(const GraphicsPipelineCacheKey& key, const Shader::IR::Program& program); - Tegra::GPU& gpu; - Tegra::Engines::Maxwell3D& maxwell3d; - Tegra::Engines::KeplerCompute& kepler_compute; - Tegra::MemoryManager& gpu_memory; - const Device& device; VKScheduler& scheduler; DescriptorPool& descriptor_pool; @@ -156,16 +143,13 @@ private: GraphicsPipelineCacheKey graphics_key{}; GraphicsPipeline* current_pipeline{}; - std::array shader_infos{}; - bool last_valid_shaders{}; - std::unordered_map> compute_cache; std::unordered_map> graphics_cache; ShaderPools main_pools; Shader::Profile base_profile; - std::string pipeline_cache_filename; + std::filesystem::path pipeline_cache_filename; Common::ThreadWorker workers; Common::ThreadWorker serialization_thread; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 7df169c85..fa6daeb3a 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -149,7 +149,7 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, update_descriptor_queue, descriptor_pool), buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), - pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler, + pipeline_cache(*this, maxwell3d, kepler_compute, gpu_memory, device, scheduler, descriptor_pool, update_descriptor_queue, render_pass_cache, buffer_cache, texture_cache), query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{ buffer_cache }, diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp new file mode 100644 index 000000000..b8b8eace5 --- /dev/null +++ b/src/video_core/shader_cache.cpp @@ -0,0 +1,233 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include + +#include "common/assert.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/object_pool.h" +#include "video_core/dirty_flags.h" +#include "video_core/engines/kepler_compute.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/memory_manager.h" +#include "video_core/shader_cache.h" +#include "video_core/shader_environment.h" + +namespace VideoCommon { + +void ShaderCache::InvalidateRegion(VAddr addr, size_t size) { + std::scoped_lock lock{invalidation_mutex}; + InvalidatePagesInRegion(addr, size); + RemovePendingShaders(); +} + +void ShaderCache::OnCPUWrite(VAddr addr, size_t size) { + std::lock_guard lock{invalidation_mutex}; + InvalidatePagesInRegion(addr, size); +} + +void ShaderCache::SyncGuestHost() { + std::scoped_lock lock{invalidation_mutex}; + RemovePendingShaders(); +} + +ShaderCache::ShaderCache(VideoCore::RasterizerInterface& rasterizer_, + Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::Engines::KeplerCompute& kepler_compute_) + : gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, + rasterizer{rasterizer_} {} + +bool ShaderCache::RefreshStages(std::array& unique_hashes) { + auto& dirty{maxwell3d.dirty.flags}; + if (!dirty[VideoCommon::Dirty::Shaders]) { + return last_shaders_valid; + } + dirty[VideoCommon::Dirty::Shaders] = false; + + const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; + for (size_t index = 0; index < Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram; ++index) { + if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { + unique_hashes[index] = 0; + continue; + } + const auto& shader_config{maxwell3d.regs.shader_config[index]}; + const auto program{static_cast(index)}; + const GPUVAddr shader_addr{base_addr + shader_config.offset}; + const std::optional cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)}; + if (!cpu_shader_addr) { + LOG_ERROR(HW_GPU, "Invalid GPU address for shader 0x{:016x}", shader_addr); + last_shaders_valid = false; + return false; + } + const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)}; + if (!shader_info) { + const u32 start_address{shader_config.offset}; + GraphicsEnvironment env{maxwell3d, gpu_memory, program, base_addr, start_address}; + shader_info = MakeShaderInfo(env, *cpu_shader_addr); + } + shader_infos[index] = shader_info; + unique_hashes[index] = shader_info->unique_hash; + } + last_shaders_valid = true; + return true; +} + +const ShaderInfo* ShaderCache::ComputeShader() { + const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; + const auto& qmd{kepler_compute.launch_description}; + const GPUVAddr shader_addr{program_base + qmd.program_start}; + const std::optional cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)}; + if (!cpu_shader_addr) { + LOG_ERROR(HW_GPU, "Invalid GPU address for shader 0x{:016x}", shader_addr); + return nullptr; + } + if (const ShaderInfo* const shader = TryGet(*cpu_shader_addr)) { + return shader; + } + ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start}; + return MakeShaderInfo(env, *cpu_shader_addr); +} + +ShaderInfo* ShaderCache::TryGet(VAddr addr) const { + std::scoped_lock lock{lookup_mutex}; + + const auto it = lookup_cache.find(addr); + if (it == lookup_cache.end()) { + return nullptr; + } + return it->second->data; +} + +void ShaderCache::Register(std::unique_ptr data, VAddr addr, size_t size) { + std::scoped_lock lock{invalidation_mutex, lookup_mutex}; + + const VAddr addr_end = addr + size; + Entry* const entry = NewEntry(addr, addr_end, data.get()); + + const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS; + for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) { + invalidation_cache[page].push_back(entry); + } + + storage.push_back(std::move(data)); + + rasterizer.UpdatePagesCachedCount(addr, size, 1); +} + +void ShaderCache::InvalidatePagesInRegion(VAddr addr, size_t size) { + const VAddr addr_end = addr + size; + const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS; + for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) { + auto it = invalidation_cache.find(page); + if (it == invalidation_cache.end()) { + continue; + } + InvalidatePageEntries(it->second, addr, addr_end); + } +} + +void ShaderCache::RemovePendingShaders() { + if (marked_for_removal.empty()) { + return; + } + // Remove duplicates + std::ranges::sort(marked_for_removal); + marked_for_removal.erase(std::unique(marked_for_removal.begin(), marked_for_removal.end()), + marked_for_removal.end()); + + std::vector removed_shaders; + removed_shaders.reserve(marked_for_removal.size()); + + std::scoped_lock lock{lookup_mutex}; + + for (Entry* const entry : marked_for_removal) { + removed_shaders.push_back(entry->data); + + const auto it = lookup_cache.find(entry->addr_start); + ASSERT(it != lookup_cache.end()); + lookup_cache.erase(it); + } + marked_for_removal.clear(); + + if (!removed_shaders.empty()) { + RemoveShadersFromStorage(std::move(removed_shaders)); + } +} + +void ShaderCache::InvalidatePageEntries(std::vector& entries, VAddr addr, VAddr addr_end) { + size_t index = 0; + while (index < entries.size()) { + Entry* const entry = entries[index]; + if (!entry->Overlaps(addr, addr_end)) { + ++index; + continue; + } + + UnmarkMemory(entry); + RemoveEntryFromInvalidationCache(entry); + marked_for_removal.push_back(entry); + } +} + +void ShaderCache::RemoveEntryFromInvalidationCache(const Entry* entry) { + const u64 page_end = (entry->addr_end + PAGE_SIZE - 1) >> PAGE_BITS; + for (u64 page = entry->addr_start >> PAGE_BITS; page < page_end; ++page) { + const auto entries_it = invalidation_cache.find(page); + ASSERT(entries_it != invalidation_cache.end()); + std::vector& entries = entries_it->second; + + const auto entry_it = std::ranges::find(entries, entry); + ASSERT(entry_it != entries.end()); + entries.erase(entry_it); + } +} + +void ShaderCache::UnmarkMemory(Entry* entry) { + if (!entry->is_memory_marked) { + return; + } + entry->is_memory_marked = false; + + const VAddr addr = entry->addr_start; + const size_t size = entry->addr_end - addr; + rasterizer.UpdatePagesCachedCount(addr, size, -1); +} + +void ShaderCache::RemoveShadersFromStorage(std::vector removed_shaders) { + // Remove them from the cache + std::erase_if(storage, [&removed_shaders](const std::unique_ptr& shader) { + return std::ranges::find(removed_shaders, shader.get()) != removed_shaders.end(); + }); +} + +ShaderCache::Entry* ShaderCache::NewEntry(VAddr addr, VAddr addr_end, ShaderInfo* data) { + auto entry = std::make_unique(Entry{addr, addr_end, data}); + Entry* const entry_pointer = entry.get(); + + lookup_cache.emplace(addr, std::move(entry)); + return entry_pointer; +} + +const ShaderInfo* ShaderCache::MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr) { + auto info = std::make_unique(); + if (const std::optional cached_hash{env.Analyze()}) { + info->unique_hash = *cached_hash; + info->size_bytes = env.CachedSize(); + } else { + // Slow path, not really hit on commercial games + // Build a control flow graph to get the real shader size + Shader::ObjectPool flow_block; + Shader::Maxwell::Flow::CFG cfg{env, flow_block, env.StartAddress()}; + info->unique_hash = env.CalculateHash(); + info->size_bytes = env.ReadSize(); + } + const size_t size_bytes{info->size_bytes}; + const ShaderInfo* const result{info.get()}; + Register(std::move(info), cpu_addr, size_bytes); + return result; +} + +} // namespace VideoCommon diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h index 015a789d6..89a4bcc84 100644 --- a/src/video_core/shader_cache.h +++ b/src/video_core/shader_cache.h @@ -4,20 +4,28 @@ #pragma once -#include #include #include #include #include #include -#include "common/assert.h" #include "common/common_types.h" #include "video_core/rasterizer_interface.h" +namespace Tegra { +class MemoryManager; +} + namespace VideoCommon { -template +class GenericEnvironment; + +struct ShaderInfo { + u64 unique_hash{}; + size_t size_bytes{}; +}; + class ShaderCache { static constexpr u64 PAGE_BITS = 14; static constexpr u64 PAGE_SIZE = u64(1) << PAGE_BITS; @@ -25,206 +33,100 @@ class ShaderCache { struct Entry { VAddr addr_start; VAddr addr_end; - T* data; + ShaderInfo* data; bool is_memory_marked = true; - constexpr bool Overlaps(VAddr start, VAddr end) const noexcept { + bool Overlaps(VAddr start, VAddr end) const noexcept { return start < addr_end && addr_start < end; } }; public: - virtual ~ShaderCache() = default; - /// @brief Removes shaders inside a given region /// @note Checks for ranges /// @param addr Start address of the invalidation /// @param size Number of bytes of the invalidation - void InvalidateRegion(VAddr addr, std::size_t size) { - std::scoped_lock lock{invalidation_mutex}; - InvalidatePagesInRegion(addr, size); - RemovePendingShaders(); - } + void InvalidateRegion(VAddr addr, size_t size); /// @brief Unmarks a memory region as cached and marks it for removal /// @param addr Start address of the CPU write operation /// @param size Number of bytes of the CPU write operation - void OnCPUWrite(VAddr addr, std::size_t size) { - std::lock_guard lock{invalidation_mutex}; - InvalidatePagesInRegion(addr, size); - } + void OnCPUWrite(VAddr addr, size_t size); /// @brief Flushes delayed removal operations - void SyncGuestHost() { - std::scoped_lock lock{invalidation_mutex}; - RemovePendingShaders(); - } + void SyncGuestHost(); + +protected: + explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_, + Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::Engines::KeplerCompute& kepler_compute_); + + /// @brief Update the hashes and information of shader stages + /// @param unique_hashes Shader hashes to store into when a stage is enabled + /// @return True no success, false on error + bool RefreshStages(std::array& unique_hashes); + + /// @brief Returns information about the current compute shader + /// @return Pointer to a valid shader, nullptr on error + const ShaderInfo* ComputeShader(); + + Tegra::MemoryManager& gpu_memory; + Tegra::Engines::Maxwell3D& maxwell3d; + Tegra::Engines::KeplerCompute& kepler_compute; + std::array shader_infos{}; + bool last_shaders_valid = false; + +private: /// @brief Tries to obtain a cached shader starting in a given address /// @note Doesn't check for ranges, the given address has to be the start of the shader /// @param addr Start address of the shader, this doesn't cache for region /// @return Pointer to a valid shader, nullptr when nothing is found - T* TryGet(VAddr addr) const { - std::scoped_lock lock{lookup_mutex}; - - const auto it = lookup_cache.find(addr); - if (it == lookup_cache.end()) { - return nullptr; - } - return it->second->data; - } - -protected: - explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_) : rasterizer{rasterizer_} {} + ShaderInfo* TryGet(VAddr addr) const; /// @brief Register in the cache a given entry /// @param data Shader to store in the cache /// @param addr Start address of the shader that will be registered /// @param size Size in bytes of the shader - void Register(std::unique_ptr data, VAddr addr, std::size_t size) { - std::scoped_lock lock{invalidation_mutex, lookup_mutex}; - - const VAddr addr_end = addr + size; - Entry* const entry = NewEntry(addr, addr_end, data.get()); - - const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS; - for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) { - invalidation_cache[page].push_back(entry); - } - - storage.push_back(std::move(data)); + void Register(std::unique_ptr data, VAddr addr, size_t size); - rasterizer.UpdatePagesCachedCount(addr, size, 1); - } - - /// @brief Called when a shader is going to be removed - /// @param shader Shader that will be removed - /// @pre invalidation_cache is locked - /// @pre lookup_mutex is locked - virtual void OnShaderRemoval([[maybe_unused]] T* shader) {} - -private: /// @brief Invalidate pages in a given region /// @pre invalidation_mutex is locked - void InvalidatePagesInRegion(VAddr addr, std::size_t size) { - const VAddr addr_end = addr + size; - const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS; - for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) { - auto it = invalidation_cache.find(page); - if (it == invalidation_cache.end()) { - continue; - } - InvalidatePageEntries(it->second, addr, addr_end); - } - } + void InvalidatePagesInRegion(VAddr addr, size_t size); /// @brief Remove shaders marked for deletion /// @pre invalidation_mutex is locked - void RemovePendingShaders() { - if (marked_for_removal.empty()) { - return; - } - // Remove duplicates - std::sort(marked_for_removal.begin(), marked_for_removal.end()); - marked_for_removal.erase(std::unique(marked_for_removal.begin(), marked_for_removal.end()), - marked_for_removal.end()); - - std::vector removed_shaders; - removed_shaders.reserve(marked_for_removal.size()); - - std::scoped_lock lock{lookup_mutex}; - - for (Entry* const entry : marked_for_removal) { - removed_shaders.push_back(entry->data); - - const auto it = lookup_cache.find(entry->addr_start); - ASSERT(it != lookup_cache.end()); - lookup_cache.erase(it); - } - marked_for_removal.clear(); - - if (!removed_shaders.empty()) { - RemoveShadersFromStorage(std::move(removed_shaders)); - } - } + void RemovePendingShaders(); /// @brief Invalidates entries in a given range for the passed page /// @param entries Vector of entries in the page, it will be modified on overlaps /// @param addr Start address of the invalidation /// @param addr_end Non-inclusive end address of the invalidation /// @pre invalidation_mutex is locked - void InvalidatePageEntries(std::vector& entries, VAddr addr, VAddr addr_end) { - std::size_t index = 0; - while (index < entries.size()) { - Entry* const entry = entries[index]; - if (!entry->Overlaps(addr, addr_end)) { - ++index; - continue; - } - - UnmarkMemory(entry); - RemoveEntryFromInvalidationCache(entry); - marked_for_removal.push_back(entry); - } - } + void InvalidatePageEntries(std::vector& entries, VAddr addr, VAddr addr_end); /// @brief Removes all references to an entry in the invalidation cache /// @param entry Entry to remove from the invalidation cache /// @pre invalidation_mutex is locked - void RemoveEntryFromInvalidationCache(const Entry* entry) { - const u64 page_end = (entry->addr_end + PAGE_SIZE - 1) >> PAGE_BITS; - for (u64 page = entry->addr_start >> PAGE_BITS; page < page_end; ++page) { - const auto entries_it = invalidation_cache.find(page); - ASSERT(entries_it != invalidation_cache.end()); - std::vector& entries = entries_it->second; - - const auto entry_it = std::find(entries.begin(), entries.end(), entry); - ASSERT(entry_it != entries.end()); - entries.erase(entry_it); - } - } + void RemoveEntryFromInvalidationCache(const Entry* entry); /// @brief Unmarks an entry from the rasterizer cache /// @param entry Entry to unmark from memory - void UnmarkMemory(Entry* entry) { - if (!entry->is_memory_marked) { - return; - } - entry->is_memory_marked = false; - - const VAddr addr = entry->addr_start; - const std::size_t size = entry->addr_end - addr; - rasterizer.UpdatePagesCachedCount(addr, size, -1); - } + void UnmarkMemory(Entry* entry); /// @brief Removes a vector of shaders from a list /// @param removed_shaders Shaders to be removed from the storage /// @pre invalidation_mutex is locked /// @pre lookup_mutex is locked - void RemoveShadersFromStorage(std::vector removed_shaders) { - // Notify removals - for (T* const shader : removed_shaders) { - OnShaderRemoval(shader); - } - - // Remove them from the cache - const auto is_removed = [&removed_shaders](const std::unique_ptr& shader) { - return std::find(removed_shaders.begin(), removed_shaders.end(), shader.get()) != - removed_shaders.end(); - }; - std::erase_if(storage, is_removed); - } + void RemoveShadersFromStorage(std::vector removed_shaders); /// @brief Creates a new entry in the lookup cache and returns its pointer /// @pre lookup_mutex is locked - Entry* NewEntry(VAddr addr, VAddr addr_end, T* data) { - auto entry = std::make_unique(Entry{addr, addr_end, data}); - Entry* const entry_pointer = entry.get(); + Entry* NewEntry(VAddr addr, VAddr addr_end, ShaderInfo* data); - lookup_cache.emplace(addr, std::move(entry)); - return entry_pointer; - } + /// @brief Create a new shader entry and register it + const ShaderInfo* MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr); VideoCore::RasterizerInterface& rasterizer; @@ -233,7 +135,7 @@ private: std::unordered_map> lookup_cache; std::unordered_map> invalidation_cache; - std::vector> storage; + std::vector> storage; std::vector marked_for_removal; }; diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp new file mode 100644 index 000000000..5dccc0097 --- /dev/null +++ b/src/video_core/shader_environment.cpp @@ -0,0 +1,453 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include +#include +#include + +#include "common/assert.h" +#include "common/cityhash.h" +#include "common/common_types.h" +#include "common/div_ceil.h" +#include "common/fs/fs.h" +#include "common/logging/log.h" +#include "shader_recompiler/environment.h" +#include "video_core/memory_manager.h" +#include "video_core/shader_environment.h" +#include "video_core/textures/texture.h" + +namespace VideoCommon { + +constexpr std::array MAGIC_NUMBER{'y', 'u', 'z', 'u', 'c', 'a', 'c', 'h'}; +constexpr u32 CACHE_VERSION = 3; + +constexpr size_t INST_SIZE = sizeof(u64); + +using Maxwell = Tegra::Engines::Maxwell3D::Regs; + +static u64 MakeCbufKey(u32 index, u32 offset) { + return (static_cast(index) << 32) | offset; +} + +static Shader::TextureType ConvertType(const Tegra::Texture::TICEntry& entry) { + switch (entry.texture_type) { + case Tegra::Texture::TextureType::Texture1D: + return Shader::TextureType::Color1D; + case Tegra::Texture::TextureType::Texture2D: + case Tegra::Texture::TextureType::Texture2DNoMipmap: + return Shader::TextureType::Color2D; + case Tegra::Texture::TextureType::Texture3D: + return Shader::TextureType::Color3D; + case Tegra::Texture::TextureType::TextureCubemap: + return Shader::TextureType::ColorCube; + case Tegra::Texture::TextureType::Texture1DArray: + return Shader::TextureType::ColorArray1D; + case Tegra::Texture::TextureType::Texture2DArray: + return Shader::TextureType::ColorArray2D; + case Tegra::Texture::TextureType::Texture1DBuffer: + return Shader::TextureType::Buffer; + case Tegra::Texture::TextureType::TextureCubeArray: + return Shader::TextureType::ColorArrayCube; + default: + throw Shader::NotImplementedException("Unknown texture type"); + } +} + +GenericEnvironment::GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, + u32 start_address_) + : gpu_memory{&gpu_memory_}, program_base{program_base_} { + start_address = start_address_; +} + +GenericEnvironment::~GenericEnvironment() = default; + +u32 GenericEnvironment::TextureBoundBuffer() const { + return texture_bound; +} + +u32 GenericEnvironment::LocalMemorySize() const { + return local_memory_size; +} + +u32 GenericEnvironment::SharedMemorySize() const { + return shared_memory_size; +} + +std::array GenericEnvironment::WorkgroupSize() const { + return workgroup_size; +} + +u64 GenericEnvironment::ReadInstruction(u32 address) { + read_lowest = std::min(read_lowest, address); + read_highest = std::max(read_highest, address); + + if (address >= cached_lowest && address < cached_highest) { + return code[(address - cached_lowest) / INST_SIZE]; + } + has_unbound_instructions = true; + return gpu_memory->Read(program_base + address); +} + +std::optional GenericEnvironment::Analyze() { + const std::optional size{TryFindSize()}; + if (!size) { + return std::nullopt; + } + cached_lowest = start_address; + cached_highest = start_address + static_cast(*size); + return Common::CityHash64(reinterpret_cast(code.data()), *size); +} + +void GenericEnvironment::SetCachedSize(size_t size_bytes) { + cached_lowest = start_address; + cached_highest = start_address + static_cast(size_bytes); + code.resize(CachedSize()); + gpu_memory->ReadBlock(program_base + cached_lowest, code.data(), code.size() * sizeof(u64)); +} + +size_t GenericEnvironment::CachedSize() const noexcept { + return cached_highest - cached_lowest + INST_SIZE; +} + +size_t GenericEnvironment::ReadSize() const noexcept { + return read_highest - read_lowest + INST_SIZE; +} + +bool GenericEnvironment::CanBeSerialized() const noexcept { + return !has_unbound_instructions; +} + +u64 GenericEnvironment::CalculateHash() const { + const size_t size{ReadSize()}; + const auto data{std::make_unique(size)}; + gpu_memory->ReadBlock(program_base + read_lowest, data.get(), size); + return Common::CityHash64(data.get(), size); +} + +void GenericEnvironment::Serialize(std::ofstream& file) const { + const u64 code_size{static_cast(CachedSize())}; + const u64 num_texture_types{static_cast(texture_types.size())}; + const u64 num_cbuf_values{static_cast(cbuf_values.size())}; + + file.write(reinterpret_cast(&code_size), sizeof(code_size)) + .write(reinterpret_cast(&num_texture_types), sizeof(num_texture_types)) + .write(reinterpret_cast(&num_cbuf_values), sizeof(num_cbuf_values)) + .write(reinterpret_cast(&local_memory_size), sizeof(local_memory_size)) + .write(reinterpret_cast(&texture_bound), sizeof(texture_bound)) + .write(reinterpret_cast(&start_address), sizeof(start_address)) + .write(reinterpret_cast(&cached_lowest), sizeof(cached_lowest)) + .write(reinterpret_cast(&cached_highest), sizeof(cached_highest)) + .write(reinterpret_cast(&stage), sizeof(stage)) + .write(reinterpret_cast(code.data()), code_size); + for (const auto [key, type] : texture_types) { + file.write(reinterpret_cast(&key), sizeof(key)) + .write(reinterpret_cast(&type), sizeof(type)); + } + for (const auto [key, type] : cbuf_values) { + file.write(reinterpret_cast(&key), sizeof(key)) + .write(reinterpret_cast(&type), sizeof(type)); + } + if (stage == Shader::Stage::Compute) { + file.write(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)) + .write(reinterpret_cast(&shared_memory_size), sizeof(shared_memory_size)); + } else { + file.write(reinterpret_cast(&sph), sizeof(sph)); + } +} + +std::optional GenericEnvironment::TryFindSize() { + static constexpr size_t BLOCK_SIZE = 0x1000; + static constexpr size_t MAXIMUM_SIZE = 0x100000; + + static constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL; + static constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL; + + GPUVAddr guest_addr{program_base + start_address}; + size_t offset{0}; + size_t size{BLOCK_SIZE}; + while (size <= MAXIMUM_SIZE) { + code.resize(size / INST_SIZE); + u64* const data = code.data() + offset / INST_SIZE; + gpu_memory->ReadBlock(guest_addr, data, BLOCK_SIZE); + for (size_t index = 0; index < BLOCK_SIZE; index += INST_SIZE) { + const u64 inst = data[index / INST_SIZE]; + if (inst == SELF_BRANCH_A || inst == SELF_BRANCH_B) { + return offset + index; + } + } + guest_addr += BLOCK_SIZE; + size += BLOCK_SIZE; + offset += BLOCK_SIZE; + } + return std::nullopt; +} + +Shader::TextureType GenericEnvironment::ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, + bool via_header_index, u32 raw) { + const TextureHandle handle{raw, via_header_index}; + const GPUVAddr descriptor_addr{tic_addr + handle.image * sizeof(Tegra::Texture::TICEntry)}; + Tegra::Texture::TICEntry entry; + gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry)); + const Shader::TextureType result{ConvertType(entry)}; + texture_types.emplace(raw, result); + return result; +} + +GraphicsEnvironment::GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::MemoryManager& gpu_memory_, + Maxwell::ShaderProgram program, GPUVAddr program_base_, + u32 start_address_) + : GenericEnvironment{gpu_memory_, program_base_, start_address_}, maxwell3d{&maxwell3d_} { + gpu_memory->ReadBlock(program_base + start_address, &sph, sizeof(sph)); + switch (program) { + case Maxwell::ShaderProgram::VertexA: + stage = Shader::Stage::VertexA; + stage_index = 0; + break; + case Maxwell::ShaderProgram::VertexB: + stage = Shader::Stage::VertexB; + stage_index = 0; + break; + case Maxwell::ShaderProgram::TesselationControl: + stage = Shader::Stage::TessellationControl; + stage_index = 1; + break; + case Maxwell::ShaderProgram::TesselationEval: + stage = Shader::Stage::TessellationEval; + stage_index = 2; + break; + case Maxwell::ShaderProgram::Geometry: + stage = Shader::Stage::Geometry; + stage_index = 3; + break; + case Maxwell::ShaderProgram::Fragment: + stage = Shader::Stage::Fragment; + stage_index = 4; + break; + default: + UNREACHABLE_MSG("Invalid program={}", program); + break; + } + const u64 local_size{sph.LocalMemorySize()}; + ASSERT(local_size <= std::numeric_limits::max()); + local_memory_size = static_cast(local_size); + texture_bound = maxwell3d->regs.tex_cb_index; +} + +u32 GraphicsEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) { + const auto& cbuf{maxwell3d->state.shader_stages[stage_index].const_buffers[cbuf_index]}; + ASSERT(cbuf.enabled); + u32 value{}; + if (cbuf_offset < cbuf.size) { + value = gpu_memory->Read(cbuf.address + cbuf_offset); + } + cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value); + return value; +} + +Shader::TextureType GraphicsEnvironment::ReadTextureType(u32 handle) { + const auto& regs{maxwell3d->regs}; + const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; + return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, via_header_index, handle); +} + +ComputeEnvironment::ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_, + Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, + u32 start_address_) + : GenericEnvironment{gpu_memory_, program_base_, start_address_}, kepler_compute{ + &kepler_compute_} { + const auto& qmd{kepler_compute->launch_description}; + stage = Shader::Stage::Compute; + local_memory_size = qmd.local_pos_alloc; + texture_bound = kepler_compute->regs.tex_cb_index; + shared_memory_size = qmd.shared_alloc; + workgroup_size = {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; +} + +u32 ComputeEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) { + const auto& qmd{kepler_compute->launch_description}; + ASSERT(((qmd.const_buffer_enable_mask.Value() >> cbuf_index) & 1) != 0); + const auto& cbuf{qmd.const_buffer_config[cbuf_index]}; + u32 value{}; + if (cbuf_offset < cbuf.size) { + value = gpu_memory->Read(cbuf.Address() + cbuf_offset); + } + cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value); + return value; +} + +Shader::TextureType ComputeEnvironment::ReadTextureType(u32 handle) { + const auto& regs{kepler_compute->regs}; + const auto& qmd{kepler_compute->launch_description}; + return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, handle); +} + +void FileEnvironment::Deserialize(std::ifstream& file) { + u64 code_size{}; + u64 num_texture_types{}; + u64 num_cbuf_values{}; + file.read(reinterpret_cast(&code_size), sizeof(code_size)) + .read(reinterpret_cast(&num_texture_types), sizeof(num_texture_types)) + .read(reinterpret_cast(&num_cbuf_values), sizeof(num_cbuf_values)) + .read(reinterpret_cast(&local_memory_size), sizeof(local_memory_size)) + .read(reinterpret_cast(&texture_bound), sizeof(texture_bound)) + .read(reinterpret_cast(&start_address), sizeof(start_address)) + .read(reinterpret_cast(&read_lowest), sizeof(read_lowest)) + .read(reinterpret_cast(&read_highest), sizeof(read_highest)) + .read(reinterpret_cast(&stage), sizeof(stage)); + code = std::make_unique(Common::DivCeil(code_size, sizeof(u64))); + file.read(reinterpret_cast(code.get()), code_size); + for (size_t i = 0; i < num_texture_types; ++i) { + u32 key; + Shader::TextureType type; + file.read(reinterpret_cast(&key), sizeof(key)) + .read(reinterpret_cast(&type), sizeof(type)); + texture_types.emplace(key, type); + } + for (size_t i = 0; i < num_cbuf_values; ++i) { + u64 key; + u32 value; + file.read(reinterpret_cast(&key), sizeof(key)) + .read(reinterpret_cast(&value), sizeof(value)); + cbuf_values.emplace(key, value); + } + if (stage == Shader::Stage::Compute) { + file.read(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)) + .read(reinterpret_cast(&shared_memory_size), sizeof(shared_memory_size)); + } else { + file.read(reinterpret_cast(&sph), sizeof(sph)); + } +} + +u64 FileEnvironment::ReadInstruction(u32 address) { + if (address < read_lowest || address > read_highest) { + throw Shader::LogicError("Out of bounds address {}", address); + } + return code[(address - read_lowest) / sizeof(u64)]; +} + +u32 FileEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) { + const auto it{cbuf_values.find(MakeCbufKey(cbuf_index, cbuf_offset))}; + if (it == cbuf_values.end()) { + throw Shader::LogicError("Uncached read texture type"); + } + return it->second; +} + +Shader::TextureType FileEnvironment::ReadTextureType(u32 handle) { + const auto it{texture_types.find(handle)}; + if (it == texture_types.end()) { + throw Shader::LogicError("Uncached read texture type"); + } + return it->second; +} + +u32 FileEnvironment::LocalMemorySize() const { + return local_memory_size; +} + +u32 FileEnvironment::SharedMemorySize() const { + return shared_memory_size; +} + +u32 FileEnvironment::TextureBoundBuffer() const { + return texture_bound; +} + +std::array FileEnvironment::WorkgroupSize() const { + return workgroup_size; +} + +void SerializePipeline(std::span key, std::span envs, + const std::filesystem::path& filename) try { + std::ofstream file(filename, std::ios::binary | std::ios::ate | std::ios::app); + file.exceptions(std::ifstream::failbit); + if (!file.is_open()) { + LOG_ERROR(Common_Filesystem, "Failed to open pipeline cache file {}", + Common::FS::PathToUTF8String(filename)); + return; + } + if (file.tellp() == 0) { + // Write header + file.write(MAGIC_NUMBER.data(), MAGIC_NUMBER.size()) + .write(reinterpret_cast(&CACHE_VERSION), sizeof(CACHE_VERSION)); + } + if (!std::ranges::all_of(envs, &GenericEnvironment::CanBeSerialized)) { + return; + } + const u32 num_envs{static_cast(envs.size())}; + file.write(reinterpret_cast(&num_envs), sizeof(num_envs)); + for (const GenericEnvironment* const env : envs) { + env->Serialize(file); + } + file.write(key.data(), key.size_bytes()); + +} catch (const std::ios_base::failure& e) { + LOG_ERROR(Common_Filesystem, "{}", e.what()); + if (!Common::FS::RemoveFile(filename)) { + LOG_ERROR(Common_Filesystem, "Failed to delete pipeline cache file {}", + Common::FS::PathToUTF8String(filename)); + } +} + +void LoadPipelines( + std::stop_token stop_loading, const std::filesystem::path& filename, + Common::UniqueFunction load_compute, + Common::UniqueFunction> load_graphics) try { + std::ifstream file(filename, std::ios::binary | std::ios::ate); + if (!file.is_open()) { + return; + } + file.exceptions(std::ifstream::failbit); + const auto end{file.tellg()}; + file.seekg(0, std::ios::beg); + + std::array magic_number; + u32 cache_version; + file.read(magic_number.data(), magic_number.size()) + .read(reinterpret_cast(&cache_version), sizeof(cache_version)); + if (magic_number != MAGIC_NUMBER || cache_version != CACHE_VERSION) { + file.close(); + if (Common::FS::RemoveFile(filename)) { + if (magic_number != MAGIC_NUMBER) { + LOG_ERROR(Common_Filesystem, "Invalid pipeline cache file"); + } + if (cache_version != CACHE_VERSION) { + LOG_INFO(Common_Filesystem, "Deleting old pipeline cache"); + } + } else { + LOG_ERROR(Common_Filesystem, + "Invalid pipeline cache file and failed to delete it in \"{}\"", + Common::FS::PathToUTF8String(filename)); + } + return; + } + while (file.tellg() != end) { + if (stop_loading.stop_requested()) { + return; + } + u32 num_envs{}; + file.read(reinterpret_cast(&num_envs), sizeof(num_envs)); + std::vector envs(num_envs); + for (FileEnvironment& env : envs) { + env.Deserialize(file); + } + if (envs.front().ShaderStage() == Shader::Stage::Compute) { + load_compute(file, std::move(envs.front())); + } else { + load_graphics(file, std::move(envs)); + } + } + +} catch (const std::ios_base::failure& e) { + LOG_ERROR(Common_Filesystem, "{}", e.what()); + if (!Common::FS::RemoveFile(filename)) { + LOG_ERROR(Common_Filesystem, "Failed to delete pipeline cache file {}", + Common::FS::PathToUTF8String(filename)); + } +} + +} // namespace VideoCommon diff --git a/src/video_core/shader_environment.h b/src/video_core/shader_environment.h new file mode 100644 index 000000000..37d712045 --- /dev/null +++ b/src/video_core/shader_environment.h @@ -0,0 +1,198 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common/common_types.h" +#include "common/unique_function.h" +#include "shader_recompiler/environment.h" +#include "video_core/engines/kepler_compute.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/textures/texture.h" + +namespace Tegra { +class Memorymanager; +} + +namespace VideoCommon { + +struct TextureHandle { + explicit TextureHandle(u32 data, bool via_header_index) { + if (via_header_index) { + image = data; + sampler = data; + } else { + const Tegra::Texture::TextureHandle handle{data}; + image = handle.tic_id; + sampler = via_header_index ? image : handle.tsc_id.Value(); + } + } + + u32 image; + u32 sampler; +}; + +class GenericEnvironment : public Shader::Environment { +public: + explicit GenericEnvironment() = default; + explicit GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, + u32 start_address_); + + ~GenericEnvironment() override; + + [[nodiscard]] u32 TextureBoundBuffer() const final; + + [[nodiscard]] u32 LocalMemorySize() const final; + + [[nodiscard]] u32 SharedMemorySize() const final; + + [[nodiscard]] std::array WorkgroupSize() const final; + + [[nodiscard]] u64 ReadInstruction(u32 address) final; + + [[nodiscard]] std::optional Analyze(); + + void SetCachedSize(size_t size_bytes); + + [[nodiscard]] size_t CachedSize() const noexcept; + + [[nodiscard]] size_t ReadSize() const noexcept; + + [[nodiscard]] bool CanBeSerialized() const noexcept; + + [[nodiscard]] u64 CalculateHash() const; + + void Serialize(std::ofstream& file) const; + +protected: + std::optional TryFindSize(); + + Shader::TextureType ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, bool via_header_index, + u32 raw); + + Tegra::MemoryManager* gpu_memory{}; + GPUVAddr program_base{}; + + std::vector code; + std::unordered_map texture_types; + std::unordered_map cbuf_values; + + u32 local_memory_size{}; + u32 texture_bound{}; + u32 shared_memory_size{}; + std::array workgroup_size{}; + + u32 read_lowest = std::numeric_limits::max(); + u32 read_highest = 0; + + u32 cached_lowest = std::numeric_limits::max(); + u32 cached_highest = 0; + + bool has_unbound_instructions = false; +}; + +class GraphicsEnvironment final : public GenericEnvironment { +public: + explicit GraphicsEnvironment() = default; + explicit GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::MemoryManager& gpu_memory_, + Tegra::Engines::Maxwell3D::Regs::ShaderProgram program, + GPUVAddr program_base_, u32 start_address_); + + ~GraphicsEnvironment() override = default; + + u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override; + + Shader::TextureType ReadTextureType(u32 handle) override; + +private: + Tegra::Engines::Maxwell3D* maxwell3d{}; + size_t stage_index{}; +}; + +class ComputeEnvironment final : public GenericEnvironment { +public: + explicit ComputeEnvironment() = default; + explicit ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_, + Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, + u32 start_address_); + + ~ComputeEnvironment() override = default; + + u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override; + + Shader::TextureType ReadTextureType(u32 handle) override; + +private: + Tegra::Engines::KeplerCompute* kepler_compute{}; +}; + +class FileEnvironment final : public Shader::Environment { +public: + FileEnvironment() = default; + ~FileEnvironment() override = default; + + FileEnvironment& operator=(FileEnvironment&&) noexcept = default; + FileEnvironment(FileEnvironment&&) noexcept = default; + + FileEnvironment& operator=(const FileEnvironment&) = delete; + FileEnvironment(const FileEnvironment&) = delete; + + void Deserialize(std::ifstream& file); + + [[nodiscard]] u64 ReadInstruction(u32 address) override; + + [[nodiscard]] u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override; + + [[nodiscard]] Shader::TextureType ReadTextureType(u32 handle) override; + + [[nodiscard]] u32 LocalMemorySize() const override; + + [[nodiscard]] u32 SharedMemorySize() const override; + + [[nodiscard]] u32 TextureBoundBuffer() const override; + + [[nodiscard]] std::array WorkgroupSize() const override; + +private: + std::unique_ptr code; + std::unordered_map texture_types; + std::unordered_map cbuf_values; + std::array workgroup_size{}; + u32 local_memory_size{}; + u32 shared_memory_size{}; + u32 texture_bound{}; + u32 read_lowest{}; + u32 read_highest{}; +}; + +void SerializePipeline(std::span key, std::span envs, + const std::filesystem::path& filename); + +template +void SerializePipeline(const Key& key, const Envs& envs, const std::filesystem::path& filename) { + static_assert(std::is_trivially_copyable_v); + static_assert(std::has_unique_object_representations_v); + SerializePipeline(std::span(reinterpret_cast(&key), sizeof(key)), + std::span(envs.data(), envs.size()), filename); +} + +void LoadPipelines( + std::stop_token stop_loading, const std::filesystem::path& filename, + Common::UniqueFunction load_compute, + Common::UniqueFunction> load_graphics); + +} // namespace VideoCommon -- cgit v1.2.3 From c5425b38c1a4d7eae270780d8b3ba66231015038 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 26 Apr 2021 05:18:26 -0300 Subject: vk_compute_pass: Fix -Wshadow warning --- src/video_core/renderer_vulkan/vk_compute_pass.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 7e5ba283b..8e426ce2c 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -238,10 +238,10 @@ ComputePass::ComputePass(const Device& device_, DescriptorPool& descriptor_pool, ComputePass::~ComputePass() = default; -Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_, DescriptorPool& descriptor_pool, - StagingBufferPool& staging_buffer_pool_, +Uint8Pass::Uint8Pass(const Device& device_, VKScheduler& scheduler_, + DescriptorPool& descriptor_pool, StagingBufferPool& staging_buffer_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_) - : ComputePass(device, descriptor_pool, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS, + : ComputePass(device_, descriptor_pool, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS, INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO, {}, VULKAN_UINT8_COMP_SPV), scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, -- cgit v1.2.3 From 53acdda772a8b7650c46ba9d998119b8c8e30844 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 26 Apr 2021 22:11:31 -0300 Subject: vk_scheduler: Allow command submission on worker thread This changes how Scheduler::Flush works. It queues the current command buffer to be sent to the GPU but does not do it immediately. The Vulkan worker thread takes care of that. Users will have to use Scheduler::Flush + Scheduler::WaitWorker to get the previous behavior. Scheduler::Finish is unchanged. To avoid waiting on work never queued, Scheduler::Wait sends the current command buffer if that's what the caller wants to wait. --- src/video_core/renderer_vulkan/renderer_vulkan.cpp | 75 +++++++------ src/video_core/renderer_vulkan/vk_blit_screen.cpp | 94 ++++++++-------- src/video_core/renderer_vulkan/vk_query_cache.cpp | 9 +- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 7 +- src/video_core/renderer_vulkan/vk_scheduler.cpp | 120 +++++++++++---------- src/video_core/renderer_vulkan/vk_scheduler.h | 15 +++ src/video_core/renderer_vulkan/vk_swapchain.cpp | 39 +++---- src/video_core/renderer_vulkan/vk_swapchain.h | 23 ++-- 8 files changed, 200 insertions(+), 182 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index bec3a81d9..7e39b65bd 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -97,19 +97,14 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, Core::Frontend::EmuWindow& emu_window, Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, std::unique_ptr context_) try - : RendererBase(emu_window, std::move(context_)), - telemetry_session(telemetry_session_), - cpu_memory(cpu_memory_), - gpu(gpu_), - library(OpenLibrary()), + : RendererBase(emu_window, std::move(context_)), telemetry_session(telemetry_session_), + cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary()), instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, true, Settings::values.renderer_debug.GetValue())), debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr), surface(CreateSurface(instance, render_window)), - device(CreateDevice(instance, dld, *surface)), - memory_allocator(device, false), - state_tracker(gpu), - scheduler(device, state_tracker), + device(CreateDevice(instance, dld, *surface)), memory_allocator(device, false), + state_tracker(gpu), scheduler(device, state_tracker), swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width, render_window.GetFramebufferLayout().height, false), blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, scheduler, @@ -130,35 +125,47 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { if (!framebuffer) { return; } - const auto& layout = render_window.GetFramebufferLayout(); - if (layout.width > 0 && layout.height > 0 && render_window.IsShown()) { - const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset; - const bool use_accelerated = - rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); - const bool is_srgb = use_accelerated && screen_info.is_srgb; - if (swapchain.HasFramebufferChanged(layout) || swapchain.GetSrgbState() != is_srgb) { - swapchain.Create(layout.width, layout.height, is_srgb); - blit_screen.Recreate(); - } - - scheduler.WaitWorker(); - - while (!swapchain.AcquireNextImage()) { - swapchain.Create(layout.width, layout.height, is_srgb); - blit_screen.Recreate(); + SCOPE_EXIT({ render_window.OnFrameDisplayed(); }); + if (!render_window.IsShown()) { + return; + } + const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset; + const bool use_accelerated = + rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); + const bool is_srgb = use_accelerated && screen_info.is_srgb; + + const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout(); + bool has_been_recreated = false; + const auto recreate_swapchain = [&] { + if (!has_been_recreated) { + has_been_recreated = true; + scheduler.WaitWorker(); } - const VkSemaphore render_semaphore = blit_screen.Draw(*framebuffer, use_accelerated); - - scheduler.Flush(render_semaphore); - - if (swapchain.Present(render_semaphore)) { - blit_screen.Recreate(); + swapchain.Create(layout.width, layout.height, is_srgb); + }; + if (swapchain.NeedsRecreate() || + swapchain.HasDifferentLayout(layout.width, layout.height, is_srgb)) { + recreate_swapchain(); + } + bool needs_recreate; + do { + needs_recreate = false; + swapchain.AcquireNextImage(); + if (swapchain.NeedsRecreate()) { + recreate_swapchain(); + needs_recreate = true; } - gpu.RendererFrameEndNotify(); - rasterizer.TickFrame(); + } while (needs_recreate); + if (has_been_recreated) { + blit_screen.Recreate(); } + const VkSemaphore render_semaphore = blit_screen.Draw(*framebuffer, use_accelerated); + scheduler.Flush(render_semaphore); + scheduler.WaitWorker(); + swapchain.Present(render_semaphore); - render_window.OnFrameDisplayed(); + gpu.RendererFrameEndNotify(); + rasterizer.TickFrame(); } void RendererVulkan::Report() const { diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index 363134129..516f428e7 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -184,47 +184,43 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool .depth = 1, }, }; - scheduler.Record( - [buffer = *buffer, image = *raw_images[image_index], copy](vk::CommandBuffer cmdbuf) { - const VkImageMemoryBarrier base_barrier{ - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .pNext = nullptr, - .srcAccessMask = 0, - .dstAccessMask = 0, - .oldLayout = VK_IMAGE_LAYOUT_GENERAL, - .newLayout = VK_IMAGE_LAYOUT_GENERAL, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = image, - .subresourceRange = - { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1, - }, - }; - VkImageMemoryBarrier read_barrier = base_barrier; - read_barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; - read_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - read_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; - - VkImageMemoryBarrier write_barrier = base_barrier; - write_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - write_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; - - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, - 0, read_barrier); - cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_GENERAL, copy); - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, write_barrier); - }); + scheduler.Record([this, copy, image_index](vk::CommandBuffer cmdbuf) { + const VkImage image = *raw_images[image_index]; + const VkImageMemoryBarrier base_barrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = 0, + .dstAccessMask = 0, + .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange{ + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }; + VkImageMemoryBarrier read_barrier = base_barrier; + read_barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; + read_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + read_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + + VkImageMemoryBarrier write_barrier = base_barrier; + write_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + write_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, + read_barrier); + cmdbuf.CopyBufferToImage(*buffer, image, VK_IMAGE_LAYOUT_GENERAL, copy); + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, write_barrier); + }); } - scheduler.Record([renderpass = *renderpass, framebuffer = *framebuffers[image_index], - descriptor_set = descriptor_sets[image_index], buffer = *buffer, - size = swapchain.GetSize(), pipeline = *pipeline, - layout = *pipeline_layout](vk::CommandBuffer cmdbuf) { + scheduler.Record([this, image_index, size = swapchain.GetSize()](vk::CommandBuffer cmdbuf) { const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f; const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f; const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f; @@ -234,8 +230,8 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool const VkRenderPassBeginInfo renderpass_bi{ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, .pNext = nullptr, - .renderPass = renderpass, - .framebuffer = framebuffer, + .renderPass = *renderpass, + .framebuffer = *framebuffers[image_index], .renderArea = { .offset = {0, 0}, @@ -257,12 +253,13 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool .extent = size, }; cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); cmdbuf.SetViewport(0, viewport); cmdbuf.SetScissor(0, scissor); - cmdbuf.BindVertexBuffer(0, buffer, offsetof(BufferData, vertices)); - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, {}); + cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices)); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0, + descriptor_sets[image_index], {}); cmdbuf.Draw(4, 1, 0, 0); cmdbuf.EndRenderPass(); }); @@ -304,8 +301,7 @@ void VKBlitScreen::CreateShaders() { void VKBlitScreen::CreateSemaphores() { semaphores.resize(image_count); - std::generate(semaphores.begin(), semaphores.end(), - [this] { return device.GetLogical().CreateSemaphore(); }); + std::ranges::generate(semaphores, [this] { return device.GetLogical().CreateSemaphore(); }); } void VKBlitScreen::CreateDescriptorPool() { @@ -633,8 +629,8 @@ void VKBlitScreen::CreateFramebuffers() { } void VKBlitScreen::ReleaseRawImages() { - for (std::size_t i = 0; i < raw_images.size(); ++i) { - scheduler.Wait(resource_ticks.at(i)); + for (const u64 tick : resource_ticks) { + scheduler.Wait(tick); } raw_images.clear(); raw_buffer_commits.clear(); diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index 7cadd5147..1dd78328c 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -114,10 +114,13 @@ void HostCounter::EndQuery() { } u64 HostCounter::BlockingQuery() const { - if (tick >= cache.GetScheduler().CurrentTick()) { - cache.GetScheduler().Flush(); + auto& scheduler{cache.GetScheduler()}; + if (tick >= scheduler.CurrentTick()) { + scheduler.Flush(); + // This may not be necessary, but it's better to play it safe and assume drivers don't + // support wait before signal on vkGetQueryPoolResults + scheduler.WaitWorker(); } - u64 data; const VkResult query_result = cache.GetDevice().GetLogical().GetQueryResults( query.first, query.second, 1, sizeof(data), &data, sizeof(data), diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index fa6daeb3a..0f15ad2f7 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -452,10 +452,11 @@ void RasterizerVulkan::TiledCacheBarrier() { } void RasterizerVulkan::FlushCommands() { - if (draw_counter > 0) { - draw_counter = 0; - scheduler.Flush(); + if (draw_counter == 0) { + return; } + draw_counter = 0; + scheduler.Flush(); } void RasterizerVulkan::TickFrame() { diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 25a4933e5..81cb330d9 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -31,7 +31,7 @@ void VKScheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf) { command->~Command(); command = next; } - + submit = false; command_offset = 0; first = nullptr; last = nullptr; @@ -42,7 +42,7 @@ VKScheduler::VKScheduler(const Device& device_, StateTracker& state_tracker_) master_semaphore{std::make_unique(device)}, command_pool{std::make_unique(*master_semaphore, device)} { AcquireNewChunk(); - AllocateNewContext(); + AllocateWorkerCommandBuffer(); worker_thread = std::thread(&VKScheduler::WorkerThread, this); } @@ -60,6 +60,7 @@ void VKScheduler::Flush(VkSemaphore semaphore) { void VKScheduler::Finish(VkSemaphore semaphore) { const u64 presubmit_tick = CurrentTick(); SubmitExecution(semaphore); + WaitWorker(); Wait(presubmit_tick); AllocateNewContext(); } @@ -140,75 +141,82 @@ void VKScheduler::WorkerThread() { if (quit) { continue; } - auto extracted_chunk = std::move(chunk_queue.Front()); - chunk_queue.Pop(); - extracted_chunk->ExecuteAll(current_cmdbuf); - chunk_reserve.Push(std::move(extracted_chunk)); + while (!chunk_queue.Empty()) { + auto extracted_chunk = std::move(chunk_queue.Front()); + chunk_queue.Pop(); + const bool has_submit = extracted_chunk->HasSubmit(); + extracted_chunk->ExecuteAll(current_cmdbuf); + if (has_submit) { + AllocateWorkerCommandBuffer(); + } + chunk_reserve.Push(std::move(extracted_chunk)); + } } while (!quit); } +void VKScheduler::AllocateWorkerCommandBuffer() { + current_cmdbuf = vk::CommandBuffer(command_pool->Commit(), device.GetDispatchLoader()); + current_cmdbuf.Begin({ + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, + .pNext = nullptr, + .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, + .pInheritanceInfo = nullptr, + }); +} + void VKScheduler::SubmitExecution(VkSemaphore semaphore) { EndPendingOperations(); InvalidateState(); - WaitWorker(); - - std::unique_lock lock{mutex}; - - current_cmdbuf.End(); - - const VkSemaphore timeline_semaphore = master_semaphore->Handle(); - const u32 num_signal_semaphores = semaphore ? 2U : 1U; const u64 signal_value = master_semaphore->CurrentTick(); - const u64 wait_value = signal_value - 1; - const VkPipelineStageFlags wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; - master_semaphore->NextTick(); - const std::array signal_values{signal_value, u64(0)}; - const std::array signal_semaphores{timeline_semaphore, semaphore}; + Record([semaphore, signal_value, this](vk::CommandBuffer cmdbuf) { + cmdbuf.End(); - const VkTimelineSemaphoreSubmitInfoKHR timeline_si{ - .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR, - .pNext = nullptr, - .waitSemaphoreValueCount = 1, - .pWaitSemaphoreValues = &wait_value, - .signalSemaphoreValueCount = num_signal_semaphores, - .pSignalSemaphoreValues = signal_values.data(), - }; - const VkSubmitInfo submit_info{ - .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, - .pNext = &timeline_si, - .waitSemaphoreCount = 1, - .pWaitSemaphores = &timeline_semaphore, - .pWaitDstStageMask = &wait_stage_mask, - .commandBufferCount = 1, - .pCommandBuffers = current_cmdbuf.address(), - .signalSemaphoreCount = num_signal_semaphores, - .pSignalSemaphores = signal_semaphores.data(), - }; - switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info)) { - case VK_SUCCESS: - break; - case VK_ERROR_DEVICE_LOST: - device.ReportLoss(); - [[fallthrough]]; - default: - vk::Check(result); - } -} + const u32 num_signal_semaphores = semaphore ? 2U : 1U; -void VKScheduler::AllocateNewContext() { - std::unique_lock lock{mutex}; + const u64 wait_value = signal_value - 1; + const VkPipelineStageFlags wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; - current_cmdbuf = vk::CommandBuffer(command_pool->Commit(), device.GetDispatchLoader()); - current_cmdbuf.Begin({ - .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, - .pNext = nullptr, - .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, - .pInheritanceInfo = nullptr, + const VkSemaphore timeline_semaphore = master_semaphore->Handle(); + const std::array signal_values{signal_value, u64(0)}; + const std::array signal_semaphores{timeline_semaphore, semaphore}; + + const VkTimelineSemaphoreSubmitInfoKHR timeline_si{ + .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR, + .pNext = nullptr, + .waitSemaphoreValueCount = 1, + .pWaitSemaphoreValues = &wait_value, + .signalSemaphoreValueCount = num_signal_semaphores, + .pSignalSemaphoreValues = signal_values.data(), + }; + const VkSubmitInfo submit_info{ + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .pNext = &timeline_si, + .waitSemaphoreCount = 1, + .pWaitSemaphores = &timeline_semaphore, + .pWaitDstStageMask = &wait_stage_mask, + .commandBufferCount = 1, + .pCommandBuffers = cmdbuf.address(), + .signalSemaphoreCount = num_signal_semaphores, + .pSignalSemaphores = signal_semaphores.data(), + }; + switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info)) { + case VK_SUCCESS: + break; + case VK_ERROR_DEVICE_LOST: + device.ReportLoss(); + [[fallthrough]]; + default: + vk::Check(result); + } }); + chunk->MarkSubmit(); + DispatchWork(); +} +void VKScheduler::AllocateNewContext() { // Enable counters once again. These are disabled when a command buffer is finished. if (query_cache) { query_cache->UpdateCounters(); diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index a40bb8bcd..40215c4c5 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -86,6 +86,10 @@ public: /// Waits for the given tick to trigger on the GPU. void Wait(u64 tick) { + if (tick >= master_semaphore->CurrentTick()) { + // Make sure we are not waiting for the current tick without signalling + Flush(); + } master_semaphore->Wait(tick); } @@ -155,15 +159,24 @@ private: return true; } + void MarkSubmit() { + submit = true; + } + bool Empty() const { return command_offset == 0; } + bool HasSubmit() const { + return submit; + } + private: Command* first = nullptr; Command* last = nullptr; size_t command_offset = 0; + bool submit = false; alignas(std::max_align_t) std::array data{}; }; @@ -176,6 +189,8 @@ private: void WorkerThread(); + void AllocateWorkerCommandBuffer(); + void SubmitExecution(VkSemaphore semaphore); void AllocateNewContext(); diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index dfd5c65ba..a71b0b01e 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -65,6 +65,8 @@ VKSwapchain::VKSwapchain(VkSurfaceKHR surface_, const Device& device_, VKSchedul VKSwapchain::~VKSwapchain() = default; void VKSwapchain::Create(u32 width, u32 height, bool srgb) { + needs_recreate = false; + const auto physical_device = device.GetPhysical(); const auto capabilities{physical_device.GetSurfaceCapabilitiesKHR(surface)}; if (capabilities.maxImageExtent.width == 0 || capabilities.maxImageExtent.height == 0) { @@ -82,21 +84,20 @@ void VKSwapchain::Create(u32 width, u32 height, bool srgb) { resource_ticks.resize(image_count); } -bool VKSwapchain::AcquireNextImage() { +void VKSwapchain::AcquireNextImage() { const VkResult result = device.GetLogical().AcquireNextImageKHR(*swapchain, std::numeric_limits::max(), *present_semaphores[frame_index], {}, &image_index); + needs_recreate |= result != VK_SUCCESS && result != VK_SUBOPTIMAL_KHR; scheduler.Wait(resource_ticks[image_index]); - return result == VK_SUCCESS || result == VK_SUBOPTIMAL_KHR; + resource_ticks[image_index] = scheduler.CurrentTick(); } -bool VKSwapchain::Present(VkSemaphore render_semaphore) { +void VKSwapchain::Present(VkSemaphore render_semaphore) { const VkSemaphore present_semaphore{*present_semaphores[frame_index]}; const std::array semaphores{present_semaphore, render_semaphore}; const auto present_queue{device.GetPresentQueue()}; - bool recreated = false; - const VkPresentInfoKHR present_info{ .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, .pNext = nullptr, @@ -107,7 +108,6 @@ bool VKSwapchain::Present(VkSemaphore render_semaphore) { .pImageIndices = &image_index, .pResults = nullptr, }; - switch (const VkResult result = present_queue.Present(present_info)) { case VK_SUCCESS: break; @@ -115,24 +115,16 @@ bool VKSwapchain::Present(VkSemaphore render_semaphore) { LOG_DEBUG(Render_Vulkan, "Suboptimal swapchain"); break; case VK_ERROR_OUT_OF_DATE_KHR: - if (current_width > 0 && current_height > 0) { - Create(current_width, current_height, current_srgb); - recreated = true; - } + needs_recreate = true; break; default: LOG_CRITICAL(Render_Vulkan, "Failed to present with error {}", vk::ToString(result)); break; } - - resource_ticks[image_index] = scheduler.CurrentTick(); - frame_index = (frame_index + 1) % static_cast(image_count); - return recreated; -} - -bool VKSwapchain::HasFramebufferChanged(const Layout::FramebufferLayout& framebuffer) const { - // TODO(Rodrigo): Handle framebuffer pixel format changes - return framebuffer.width != current_width || framebuffer.height != current_height; + ++frame_index; + if (frame_index >= image_count) { + frame_index = 0; + } } void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, @@ -148,7 +140,6 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, if (capabilities.maxImageCount > 0 && requested_image_count > capabilities.maxImageCount) { requested_image_count = capabilities.maxImageCount; } - VkSwapchainCreateInfoKHR swapchain_ci{ .sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR, .pNext = nullptr, @@ -169,7 +160,6 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, .clipped = VK_FALSE, .oldSwapchain = nullptr, }; - const u32 graphics_family{device.GetGraphicsFamily()}; const u32 present_family{device.GetPresentFamily()}; const std::array queue_indices{graphics_family, present_family}; @@ -178,7 +168,6 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, swapchain_ci.queueFamilyIndexCount = static_cast(queue_indices.size()); swapchain_ci.pQueueFamilyIndices = queue_indices.data(); } - // Request the size again to reduce the possibility of a TOCTOU race condition. const auto updated_capabilities = physical_device.GetSurfaceCapabilitiesKHR(surface); swapchain_ci.imageExtent = ChooseSwapExtent(updated_capabilities, width, height); @@ -186,8 +175,6 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, swapchain = device.GetLogical().CreateSwapchainKHR(swapchain_ci); extent = swapchain_ci.imageExtent; - current_width = extent.width; - current_height = extent.height; current_srgb = srgb; images = swapchain.GetImages(); @@ -197,8 +184,8 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, void VKSwapchain::CreateSemaphores() { present_semaphores.resize(image_count); - std::generate(present_semaphores.begin(), present_semaphores.end(), - [this] { return device.GetLogical().CreateSemaphore(); }); + std::ranges::generate(present_semaphores, + [this] { return device.GetLogical().CreateSemaphore(); }); } void VKSwapchain::CreateImageViews() { diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h index adc8d27cf..b38fd9dc2 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.h +++ b/src/video_core/renderer_vulkan/vk_swapchain.h @@ -28,14 +28,20 @@ public: void Create(u32 width, u32 height, bool srgb); /// Acquires the next image in the swapchain, waits as needed. - bool AcquireNextImage(); + void AcquireNextImage(); - /// Presents the rendered image to the swapchain. Returns true when the swapchains had to be - /// recreated. Takes responsability for the ownership of fence. - bool Present(VkSemaphore render_semaphore); + /// Presents the rendered image to the swapchain. + void Present(VkSemaphore render_semaphore); /// Returns true when the framebuffer layout has changed. - bool HasFramebufferChanged(const Layout::FramebufferLayout& framebuffer) const; + bool HasDifferentLayout(u32 width, u32 height, bool is_srgb) const { + return extent.width != width || extent.height != height || current_srgb != is_srgb; + } + + /// Returns true when the image has to be recreated. + bool NeedsRecreate() const { + return needs_recreate; + } VkExtent2D GetSize() const { return extent; @@ -61,10 +67,6 @@ public: return image_format; } - bool GetSrgbState() const { - return current_srgb; - } - private: void CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, u32 height, bool srgb); @@ -92,9 +94,8 @@ private: VkFormat image_format{}; VkExtent2D extent{}; - u32 current_width{}; - u32 current_height{}; bool current_srgb{}; + bool needs_recreate{}; }; } // namespace Vulkan -- cgit v1.2.3 From f4b82b8dd70a57b5a828bcdbecf9aefd1bd240b6 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 1 May 2021 00:29:31 -0300 Subject: vk_graphics_pipeline: Fix texture buffer descriptors --- src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 76080bde1..9f5d30fe8 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -253,6 +253,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { std::array image_view_ids; std::array image_view_indices; std::array samplers; + size_t sampler_index{}; size_t image_index{}; texture_cache.SynchronizeGraphicsDescriptors(); @@ -312,11 +313,10 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { for (const auto& desc : info.texture_descriptors) { for (u32 index = 0; index < desc.count; ++index) { const TextureHandle handle{read_handle(desc, index)}; - image_view_indices[image_index] = handle.image; + image_view_indices[image_index++] = handle.image; Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.sampler)}; - samplers[image_index] = sampler->Handle(); - ++image_index; + samplers[sampler_index++] = sampler->Handle(); } } if constexpr (Spec::has_images) { @@ -360,10 +360,9 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { ++texture_buffer_index; } }}; + buffer_cache.UnbindGraphicsTextureBuffers(stage); + const Shader::Info& info{stage_infos[stage]}; - if constexpr (Spec::has_texture_buffers || Spec::has_image_buffers) { - buffer_cache.UnbindGraphicsTextureBuffers(stage); - } if constexpr (Spec::has_texture_buffers) { for (const auto& desc : info.texture_buffer_descriptors) { add_buffer(desc); @@ -443,7 +442,9 @@ void GraphicsPipeline::ConfigureDraw() { const bool bind_pipeline{scheduler.UpdateGraphicsPipeline(this)}; const void* const descriptor_data{update_descriptor_queue.UpdateData()}; scheduler.Record([this, descriptor_data, bind_pipeline](vk::CommandBuffer cmdbuf) { - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); + if (bind_pipeline) { + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); + } if (!descriptor_set_layout) { return; } -- cgit v1.2.3 From b541f5e5e333a8ec8c3569e02d67e59ad14217c2 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Mon, 19 Apr 2021 01:03:38 +0200 Subject: shader: Implement VertexA stage --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 0822862fe..638475251 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -47,6 +47,7 @@ MICROPROFILE_DECLARE(Vulkan_PipelineCache); namespace { using Shader::Backend::SPIRV::EmitSPIRV; +using Shader::Maxwell::MergeDualVertexPrograms; using Shader::Maxwell::TranslateProgram; using VideoCommon::ComputeEnvironment; using VideoCommon::FileEnvironment; @@ -287,22 +288,32 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); size_t env_index{0}; std::array programs; + bool uses_vertex_a{}; + std::size_t start_value_processing{}; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { if (key.unique_hashes[index] == 0) { continue; } + uses_vertex_a |= index == 0; Shader::Environment& env{*envs[env_index]}; ++env_index; const u32 cfg_offset{static_cast(env.StartAddress() + sizeof(Shader::ProgramHeader))}; - Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset); - programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); + Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); + if (!uses_vertex_a || index != 1) { + programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); + continue; + } + Shader::IR::Program& program_va{programs[0]}; + Shader::IR::Program program_vb{TranslateProgram(pools.inst, pools.block, env, cfg)}; + programs[index] = MergeDualVertexPrograms(program_va, program_vb, env); + start_value_processing = 1; } std::array infos{}; std::array modules; u32 binding{0}; - for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + for (size_t index = start_value_processing; index < Maxwell::MaxShaderProgram; ++index) { if (key.unique_hashes[index] == 0) { continue; } -- cgit v1.2.3 From c49d56c931471f21d475a31272164fbfae5ea64a Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sat, 1 May 2021 14:56:25 +0200 Subject: shader: Address feedback --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 638475251..634bbb450 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -288,32 +288,32 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); size_t env_index{0}; std::array programs; - bool uses_vertex_a{}; - std::size_t start_value_processing{}; + const bool uses_vertex_a{key.unique_hashes[0] != 0}; + const bool uses_vertex_b{key.unique_hashes[1] != 0}; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { if (key.unique_hashes[index] == 0) { continue; } - uses_vertex_a |= index == 0; Shader::Environment& env{*envs[env_index]}; ++env_index; const u32 cfg_offset{static_cast(env.StartAddress() + sizeof(Shader::ProgramHeader))}; Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); if (!uses_vertex_a || index != 1) { + // Normal path programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); - continue; + } else { + // VertexB path when VertexA is present. + Shader::IR::Program& program_va{programs[0]}; + Shader::IR::Program program_vb{TranslateProgram(pools.inst, pools.block, env, cfg)}; + programs[index] = MergeDualVertexPrograms(program_va, program_vb, env); } - Shader::IR::Program& program_va{programs[0]}; - Shader::IR::Program program_vb{TranslateProgram(pools.inst, pools.block, env, cfg)}; - programs[index] = MergeDualVertexPrograms(program_va, program_vb, env); - start_value_processing = 1; } std::array infos{}; std::array modules; u32 binding{0}; - for (size_t index = start_value_processing; index < Maxwell::MaxShaderProgram; ++index) { + for (size_t index = uses_vertex_a && uses_vertex_b ? 1 : 0; index < Maxwell::MaxShaderProgram; ++index) { if (key.unique_hashes[index] == 0) { continue; } -- cgit v1.2.3 From 48a17298d76cd8ed3bf2b53aca1e1ac097693976 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 23 May 2021 03:58:11 -0300 Subject: spirv: Support OpenGL uniform buffers and change bindings --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 634bbb450..1334882b5 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -127,6 +127,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw base_profile = Shader::Profile{ .supported_spirv = device.IsKhrSpirv1_4Supported() ? 0x00010400U : 0x00010000U, .unified_descriptor_binding = true, + .support_descriptor_aliasing = true, .support_vertex_instance_id = false, .support_float_controls = true, .support_separate_denorm_behavior = float_control.denormBehaviorIndependence == @@ -149,9 +150,11 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw device.IsExtShaderViewportIndexLayerSupported(), .support_viewport_mask = device.IsNvViewportArray2Supported(), .support_typeless_image_loads = device.IsFormatlessImageLoadSupported(), + .support_demote_to_helper_invocation = true, .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), .support_int64_atomics = device.IsExtShaderAtomicInt64Supported(), .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, + .has_broken_unsigned_image_offsets = false, .generic_input_types{}, .fixed_state_point_size{}, .alpha_test_func{}, @@ -312,7 +315,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( std::array infos{}; std::array modules; - u32 binding{0}; + Shader::Backend::SPIRV::Bindings binding; for (size_t index = uses_vertex_a && uses_vertex_b ? 1 : 0; index < Maxwell::MaxShaderProgram; ++index) { if (key.unique_hashes[index] == 0) { continue; @@ -398,7 +401,7 @@ std::unique_ptr PipelineCache::CreateComputePipeline( Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; - u32 binding{0}; + Shader::Backend::SPIRV::Bindings binding; const std::vector code{EmitSPIRV(base_profile, program, binding)}; device.SaveShader(code); vk::ShaderModule spv_module{BuildShader(device, code)}; -- cgit v1.2.3 From d621e96d0de212cc16897eadf71e8a1b2e1eb5dc Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 23 May 2021 04:28:34 -0300 Subject: shader: Initial OpenGL implementation --- src/video_core/CMakeLists.txt | 4 + src/video_core/buffer_cache/buffer_cache.h | 53 ++-- src/video_core/renderer_opengl/gl_buffer_cache.cpp | 37 ++- src/video_core/renderer_opengl/gl_buffer_cache.h | 40 ++- .../renderer_opengl/gl_compute_program.cpp | 178 +++++++++++++ .../renderer_opengl/gl_compute_program.h | 83 ++++++ src/video_core/renderer_opengl/gl_device.cpp | 89 ------- src/video_core/renderer_opengl/gl_device.h | 16 -- .../renderer_opengl/gl_graphics_program.cpp | 296 +++++++++++++++++++++ .../renderer_opengl/gl_graphics_program.h | 105 ++++++++ src/video_core/renderer_opengl/gl_rasterizer.cpp | 23 +- src/video_core/renderer_opengl/gl_shader_cache.cpp | 275 ++++++++++++++++++- src/video_core/renderer_opengl/gl_shader_cache.h | 98 ++++--- .../renderer_opengl/gl_shader_manager.cpp | 146 ---------- src/video_core/renderer_opengl/gl_shader_manager.h | 73 +---- .../renderer_opengl/gl_texture_cache.cpp | 257 ++++++------------ src/video_core/renderer_opengl/gl_texture_cache.h | 29 +- src/video_core/renderer_opengl/maxwell_to_gl.h | 108 ++++++++ src/video_core/renderer_opengl/renderer_opengl.cpp | 17 +- src/video_core/renderer_opengl/renderer_opengl.h | 5 +- src/video_core/renderer_opengl/util_shaders.cpp | 13 +- src/video_core/renderer_vulkan/pipeline_helper.h | 17 -- src/video_core/renderer_vulkan/vk_buffer_cache.h | 2 +- .../renderer_vulkan/vk_compute_pipeline.cpp | 22 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 22 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 23 +- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 11 - src/video_core/shader_cache.cpp | 17 ++ src/video_core/shader_cache.h | 23 +- src/video_core/shader_environment.cpp | 4 +- src/video_core/shader_environment.h | 16 -- src/video_core/texture_cache/formatter.cpp | 4 +- src/video_core/texture_cache/formatter.h | 3 +- src/video_core/textures/texture.h | 9 + src/video_core/vulkan_common/vulkan_device.cpp | 2 +- 35 files changed, 1415 insertions(+), 705 deletions(-) create mode 100644 src/video_core/renderer_opengl/gl_compute_program.cpp create mode 100644 src/video_core/renderer_opengl/gl_compute_program.h create mode 100644 src/video_core/renderer_opengl/gl_graphics_program.cpp create mode 100644 src/video_core/renderer_opengl/gl_graphics_program.h (limited to 'src/video_core') diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 6e0e4b8f5..b008c37c0 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -67,10 +67,14 @@ add_library(video_core STATIC renderer_base.h renderer_opengl/gl_buffer_cache.cpp renderer_opengl/gl_buffer_cache.h + renderer_opengl/gl_compute_program.cpp + renderer_opengl/gl_compute_program.h renderer_opengl/gl_device.cpp renderer_opengl/gl_device.h renderer_opengl/gl_fence_manager.cpp renderer_opengl/gl_fence_manager.h + renderer_opengl/gl_graphics_program.cpp + renderer_opengl/gl_graphics_program.h renderer_opengl/gl_rasterizer.cpp renderer_opengl/gl_rasterizer.h renderer_opengl/gl_resource_manager.cpp diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 29746f61d..6c92e4c30 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -70,8 +70,8 @@ class BufferCache { P::HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT; static constexpr bool NEEDS_BIND_UNIFORM_INDEX = P::NEEDS_BIND_UNIFORM_INDEX; static constexpr bool NEEDS_BIND_STORAGE_INDEX = P::NEEDS_BIND_STORAGE_INDEX; - static constexpr bool NEEDS_BIND_TEXTURE_BUFFER_INDEX = P::NEEDS_BIND_TEXTURE_BUFFER_INDEX; static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS; + static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS; static constexpr BufferId NULL_BUFFER_ID{0}; @@ -154,7 +154,7 @@ public: void UnbindGraphicsTextureBuffers(size_t stage); void BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, u32 size, - PixelFormat format, bool is_written); + PixelFormat format, bool is_written, bool is_image); void UnbindComputeStorageBuffers(); @@ -164,7 +164,7 @@ public: void UnbindComputeTextureBuffers(); void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format, - bool is_written); + bool is_written, bool is_image); void FlushCachedWrites(); @@ -197,6 +197,7 @@ public: [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size); std::mutex mutex; + Runtime& runtime; private: template @@ -366,7 +367,6 @@ private: Tegra::Engines::KeplerCompute& kepler_compute; Tegra::MemoryManager& gpu_memory; Core::Memory::Memory& cpu_memory; - Runtime& runtime; SlotVector slot_buffers; DelayedDestructionRing delayed_destruction_ring; @@ -394,8 +394,10 @@ private: std::array enabled_texture_buffers{}; std::array written_texture_buffers{}; + std::array image_texture_buffers{}; u32 enabled_compute_texture_buffers = 0; u32 written_compute_texture_buffers = 0; + u32 image_compute_texture_buffers = 0; std::array fast_bound_uniform_buffers{}; @@ -431,8 +433,8 @@ BufferCache

::BufferCache(VideoCore::RasterizerInterface& rasterizer_, Tegra::Engines::KeplerCompute& kepler_compute_, Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, Runtime& runtime_) - : rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, - gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, runtime{runtime_} { + : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, + kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_} { // Ensure the first slot is used for the null buffer void(slot_buffers.insert(runtime, NullBufferParams{})); deletion_iterator = slot_buffers.end(); @@ -703,13 +705,18 @@ template void BufferCache

::UnbindGraphicsTextureBuffers(size_t stage) { enabled_texture_buffers[stage] = 0; written_texture_buffers[stage] = 0; + image_texture_buffers[stage] = 0; } template void BufferCache

::BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, - u32 size, PixelFormat format, bool is_written) { + u32 size, PixelFormat format, bool is_written, + bool is_image) { enabled_texture_buffers[stage] |= 1U << tbo_index; written_texture_buffers[stage] |= (is_written ? 1U : 0U) << tbo_index; + if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { + image_texture_buffers[stage] |= (is_image ? 1U : 0U) << tbo_index; + } texture_buffers[stage][tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); } @@ -717,6 +724,7 @@ template void BufferCache

::UnbindComputeStorageBuffers() { enabled_compute_storage_buffers = 0; written_compute_storage_buffers = 0; + image_compute_texture_buffers = 0; } template @@ -737,13 +745,17 @@ template void BufferCache

::UnbindComputeTextureBuffers() { enabled_compute_texture_buffers = 0; written_compute_texture_buffers = 0; + image_compute_texture_buffers = 0; } template void BufferCache

::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, - PixelFormat format, bool is_written) { + PixelFormat format, bool is_written, bool is_image) { enabled_compute_texture_buffers |= 1U << tbo_index; written_compute_texture_buffers |= (is_written ? 1U : 0U) << tbo_index; + if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { + image_compute_texture_buffers |= (is_image ? 1U : 0U) << tbo_index; + } compute_texture_buffers[tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); } @@ -1057,7 +1069,6 @@ void BufferCache

::BindHostGraphicsStorageBuffers(size_t stage) { template void BufferCache

::BindHostGraphicsTextureBuffers(size_t stage) { - u32 binding_index = 0; ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) { const TextureBufferBinding& binding = texture_buffers[stage][index]; Buffer& buffer = slot_buffers[binding.buffer_id]; @@ -1066,9 +1077,12 @@ void BufferCache

::BindHostGraphicsTextureBuffers(size_t stage) { const u32 offset = buffer.Offset(binding.cpu_addr); const PixelFormat format = binding.format; - if constexpr (NEEDS_BIND_TEXTURE_BUFFER_INDEX) { - runtime.BindTextureBuffer(binding_index, buffer, offset, size, format); - ++binding_index; + if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { + if (((image_texture_buffers[stage] >> index) & 1) != 0) { + runtime.BindImageBuffer(buffer, offset, size, format); + } else { + runtime.BindTextureBuffer(buffer, offset, size, format); + } } else { runtime.BindTextureBuffer(buffer, offset, size, format); } @@ -1139,7 +1153,6 @@ void BufferCache

::BindHostComputeStorageBuffers() { template void BufferCache

::BindHostComputeTextureBuffers() { - u32 binding_index = 0; ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) { const TextureBufferBinding& binding = compute_texture_buffers[index]; Buffer& buffer = slot_buffers[binding.buffer_id]; @@ -1148,9 +1161,12 @@ void BufferCache

::BindHostComputeTextureBuffers() { const u32 offset = buffer.Offset(binding.cpu_addr); const PixelFormat format = binding.format; - if constexpr (NEEDS_BIND_TEXTURE_BUFFER_INDEX) { - runtime.BindTextureBuffer(binding_index, buffer, offset, size, format); - ++binding_index; + if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { + if (((image_compute_texture_buffers >> index) & 1) != 0) { + runtime.BindImageBuffer(buffer, offset, size, format); + } else { + runtime.BindTextureBuffer(buffer, offset, size, format); + } } else { runtime.BindTextureBuffer(buffer, offset, size, format); } @@ -1339,11 +1355,10 @@ void BufferCache

::UpdateComputeStorageBuffers() { ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) { // Resolve buffer Binding& binding = compute_storage_buffers[index]; - const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size); - binding.buffer_id = buffer_id; + binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); // Mark as written if needed if (((written_compute_storage_buffers >> index) & 1) != 0) { - MarkWrittenBuffer(buffer_id, binding.cpu_addr, binding.size); + MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size); } }); } diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index c4189fb60..2d0ef1307 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -2,14 +2,18 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include #include #include "video_core/buffer_cache/buffer_cache.h" #include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_device.h" +#include "video_core/renderer_opengl/maxwell_to_gl.h" namespace OpenGL { namespace { +using VideoCore::Surface::PixelFormat; + struct BindlessSSBO { GLuint64EXT address; GLsizei length; @@ -62,6 +66,26 @@ void Buffer::MakeResident(GLenum access) noexcept { glMakeNamedBufferResidentNV(buffer.handle, access); } +GLuint Buffer::View(u32 offset, u32 size, PixelFormat format) { + const auto it{std::ranges::find_if(views, [offset, size, format](const BufferView& view) { + return offset == view.offset && size == view.size && format == view.format; + })}; + if (it != views.end()) { + return it->texture.handle; + } + OGLTexture texture; + texture.Create(GL_TEXTURE_BUFFER); + const GLenum gl_format{MaxwellToGL::GetFormatTuple(format).internal_format}; + glTextureBufferRange(texture.handle, gl_format, buffer.handle, offset, size); + views.push_back({ + .offset = offset, + .size = size, + .format = format, + .texture = std::move(texture), + }); + return views.back().texture.handle; +} + BufferCacheRuntime::BufferCacheRuntime(const Device& device_) : device{device_}, has_fast_buffer_sub_data{device.HasFastBufferSubData()}, use_assembly_shaders{device.UseAssemblyShaders()}, @@ -144,7 +168,7 @@ void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buff glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, static_cast(size)); } else { - const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer; + const GLuint base_binding = graphics_base_uniform_bindings[stage]; const GLuint binding = base_binding + binding_index; glBindBufferRange(GL_UNIFORM_BUFFER, binding, buffer.Handle(), static_cast(offset), static_cast(size)); @@ -181,7 +205,7 @@ void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buff glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1, reinterpret_cast(&ssbo)); } else { - const GLuint base_binding = device.GetBaseBindings(stage).shader_storage_buffer; + const GLuint base_binding = graphics_base_storage_bindings[stage]; const GLuint binding = base_binding + binding_index; glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(), static_cast(offset), static_cast(size)); @@ -213,4 +237,13 @@ void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, Buffer& buffer, static_cast(offset), static_cast(size)); } +void BufferCacheRuntime::BindTextureBuffer(Buffer& buffer, u32 offset, u32 size, + PixelFormat format) { + *texture_handles++ = buffer.View(offset, size, format); +} + +void BufferCacheRuntime::BindImageBuffer(Buffer& buffer, u32 offset, u32 size, PixelFormat format) { + *image_handles++ = buffer.View(offset, size, format); +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index ddcce5e97..4986c65fd 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -32,6 +32,8 @@ public: void MakeResident(GLenum access) noexcept; + [[nodiscard]] GLuint View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format); + [[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept { return address; } @@ -41,9 +43,17 @@ public: } private: + struct BufferView { + u32 offset; + u32 size; + VideoCore::Surface::PixelFormat format; + OGLTexture texture; + }; + GLuint64EXT address = 0; OGLBuffer buffer; GLenum current_residency_access = GL_NONE; + std::vector views; }; class BufferCacheRuntime { @@ -75,13 +85,19 @@ public: void BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset, u32 size); + void BindTextureBuffer(Buffer& buffer, u32 offset, u32 size, + VideoCore::Surface::PixelFormat format); + + void BindImageBuffer(Buffer& buffer, u32 offset, u32 size, + VideoCore::Surface::PixelFormat format); + void BindFastUniformBuffer(size_t stage, u32 binding_index, u32 size) { if (use_assembly_shaders) { const GLuint handle = fast_uniforms[stage][binding_index].handle; const GLsizeiptr gl_size = static_cast(size); glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, gl_size); } else { - const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer; + const GLuint base_binding = graphics_base_uniform_bindings[stage]; const GLuint binding = base_binding + binding_index; glBindBufferRange(GL_UNIFORM_BUFFER, binding, fast_uniforms[stage][binding_index].handle, 0, @@ -103,7 +119,7 @@ public: std::span BindMappedUniformBuffer(size_t stage, u32 binding_index, u32 size) noexcept { const auto [mapped_span, offset] = stream_buffer->Request(static_cast(size)); - const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer; + const GLuint base_binding = graphics_base_uniform_bindings[stage]; const GLuint binding = base_binding + binding_index; glBindBufferRange(GL_UNIFORM_BUFFER, binding, stream_buffer->Handle(), static_cast(offset), static_cast(size)); @@ -118,6 +134,19 @@ public: return has_fast_buffer_sub_data; } + void SetBaseUniformBindings(const std::array& bindings) { + graphics_base_uniform_bindings = bindings; + } + + void SetBaseStorageBindings(const std::array& bindings) { + graphics_base_storage_bindings = bindings; + } + + void SetImagePointers(GLuint* texture_handles_, GLuint* image_handles_) { + texture_handles = texture_handles_; + image_handles = image_handles_; + } + private: static constexpr std::array PABO_LUT{ GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, @@ -133,6 +162,11 @@ private: u32 max_attributes = 0; + std::array graphics_base_uniform_bindings{}; + std::array graphics_base_storage_bindings{}; + GLuint* texture_handles = nullptr; + GLuint* image_handles = nullptr; + std::optional stream_buffer; std::array, @@ -155,8 +189,8 @@ struct BufferCacheParams { static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = true; static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true; static constexpr bool NEEDS_BIND_STORAGE_INDEX = true; - static constexpr bool NEEDS_BIND_TEXTURE_BUFFER_INDEX = true; static constexpr bool USE_MEMORY_MAPS = false; + static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true; }; using BufferCache = VideoCommon::BufferCache; diff --git a/src/video_core/renderer_opengl/gl_compute_program.cpp b/src/video_core/renderer_opengl/gl_compute_program.cpp new file mode 100644 index 000000000..d5ef65439 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_compute_program.cpp @@ -0,0 +1,178 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/cityhash.h" +#include "video_core/renderer_opengl/gl_compute_program.h" +#include "video_core/renderer_opengl/gl_shader_manager.h" + +namespace OpenGL { + +using Shader::ImageBufferDescriptor; +using Tegra::Texture::TexturePair; +using VideoCommon::ImageId; + +constexpr u32 MAX_TEXTURES = 64; +constexpr u32 MAX_IMAGES = 16; + +size_t ComputeProgramKey::Hash() const noexcept { + return static_cast( + Common::CityHash64(reinterpret_cast(this), sizeof *this)); +} + +bool ComputeProgramKey::operator==(const ComputeProgramKey& rhs) const noexcept { + return std::memcmp(this, &rhs, sizeof *this) == 0; +} + +ComputeProgram::ComputeProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_, + Tegra::MemoryManager& gpu_memory_, + Tegra::Engines::KeplerCompute& kepler_compute_, + ProgramManager& program_manager_, OGLProgram program_, + const Shader::Info& info_) + : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, + kepler_compute{kepler_compute_}, + program_manager{program_manager_}, program{std::move(program_)}, info{info_} { + for (const auto& desc : info.texture_buffer_descriptors) { + num_texture_buffers += desc.count; + } + for (const auto& desc : info.image_buffer_descriptors) { + num_image_buffers += desc.count; + } + u32 num_textures = num_texture_buffers; + for (const auto& desc : info.texture_descriptors) { + num_textures += desc.count; + } + ASSERT(num_textures <= MAX_TEXTURES); + + u32 num_images = num_image_buffers; + for (const auto& desc : info.image_descriptors) { + num_images += desc.count; + } + ASSERT(num_images <= MAX_IMAGES); +} + +void ComputeProgram::Configure() { + buffer_cache.SetEnabledComputeUniformBuffers(info.constant_buffer_mask); + buffer_cache.UnbindComputeStorageBuffers(); + size_t ssbo_index{}; + for (const auto& desc : info.storage_buffers_descriptors) { + ASSERT(desc.count == 1); + buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset, + desc.is_written); + ++ssbo_index; + } + texture_cache.SynchronizeComputeDescriptors(); + + std::array image_view_ids; + boost::container::static_vector image_view_indices; + std::array samplers; + std::array textures; + std::array images; + GLsizei sampler_binding{}; + GLsizei texture_binding{}; + GLsizei image_binding{}; + + const auto& qmd{kepler_compute.launch_description}; + const auto& cbufs{qmd.const_buffer_config}; + const bool via_header_index{qmd.linked_tsc != 0}; + const auto read_handle{[&](const auto& desc, u32 index) { + ASSERT(((qmd.const_buffer_enable_mask >> desc.cbuf_index) & 1) != 0); + const u32 index_offset{index << desc.size_shift}; + const u32 offset{desc.cbuf_offset + index_offset}; + const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + offset}; + if constexpr (std::is_same_v || + std::is_same_v) { + if (desc.has_secondary) { + ASSERT(((qmd.const_buffer_enable_mask >> desc.secondary_cbuf_index) & 1) != 0); + const u32 secondary_offset{desc.secondary_cbuf_offset + index_offset}; + const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].Address() + + secondary_offset}; + const u32 lhs_raw{gpu_memory.Read(addr)}; + const u32 rhs_raw{gpu_memory.Read(separate_addr)}; + return TexturePair(lhs_raw | rhs_raw, via_header_index); + } + } + return TexturePair(gpu_memory.Read(addr), via_header_index); + }}; + const auto add_image{[&](const auto& desc) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.first); + } + }}; + for (const auto& desc : info.texture_buffer_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.first); + samplers[sampler_binding++] = 0; + } + } + std::ranges::for_each(info.image_buffer_descriptors, add_image); + for (const auto& desc : info.texture_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.first); + + Sampler* const sampler = texture_cache.GetComputeSampler(handle.second); + samplers[sampler_binding++] = sampler->Handle(); + } + } + std::ranges::for_each(info.image_descriptors, add_image); + + const std::span indices_span(image_view_indices.data(), image_view_indices.size()); + texture_cache.FillComputeImageViews(indices_span, image_view_ids); + + buffer_cache.UnbindComputeTextureBuffers(); + size_t texbuf_index{}; + const auto add_buffer{[&](const auto& desc) { + constexpr bool is_image = std::is_same_v; + for (u32 i = 0; i < desc.count; ++i) { + bool is_written{false}; + if constexpr (is_image) { + is_written = desc.is_written; + } + ImageView& image_view{texture_cache.GetImageView(image_view_ids[texbuf_index])}; + buffer_cache.BindComputeTextureBuffer(texbuf_index, image_view.GpuAddr(), + image_view.BufferSize(), image_view.format, + is_written, is_image); + ++texbuf_index; + } + }}; + std::ranges::for_each(info.texture_buffer_descriptors, add_buffer); + std::ranges::for_each(info.image_buffer_descriptors, add_buffer); + + buffer_cache.UpdateComputeBuffers(); + + buffer_cache.runtime.SetImagePointers(textures.data(), images.data()); + buffer_cache.BindHostComputeBuffers(); + + const ImageId* views_it{image_view_ids.data() + num_texture_buffers + num_image_buffers}; + texture_binding += num_texture_buffers; + image_binding += num_image_buffers; + + for (const auto& desc : info.texture_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; + textures[texture_binding++] = image_view.Handle(desc.type); + } + } + for (const auto& desc : info.image_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; + images[image_binding++] = image_view.Handle(desc.type); + } + } + if (texture_binding != 0) { + ASSERT(texture_binding == sampler_binding); + glBindTextures(0, texture_binding, textures.data()); + glBindSamplers(0, sampler_binding, samplers.data()); + } + if (image_binding != 0) { + glBindImageTextures(0, image_binding, images.data()); + } + program_manager.BindProgram(program.handle); +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_compute_program.h b/src/video_core/renderer_opengl/gl_compute_program.h new file mode 100644 index 000000000..64a75d44d --- /dev/null +++ b/src/video_core/renderer_opengl/gl_compute_program.h @@ -0,0 +1,83 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include + +#include "common/common_types.h" +#include "shader_recompiler/shader_info.h" +#include "video_core/renderer_opengl/gl_buffer_cache.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_texture_cache.h" + +namespace Tegra { +class MemoryManager; +} + +namespace Tegra::Engines { +class KeplerCompute; +} + +namespace Shader { +struct Info; +} + +namespace OpenGL { + +class ProgramManager; + +struct ComputeProgramKey { + u64 unique_hash; + u32 shared_memory_size; + std::array workgroup_size; + + size_t Hash() const noexcept; + + bool operator==(const ComputeProgramKey&) const noexcept; + + bool operator!=(const ComputeProgramKey& rhs) const noexcept { + return !operator==(rhs); + } +}; +static_assert(std::has_unique_object_representations_v); +static_assert(std::is_trivially_copyable_v); +static_assert(std::is_trivially_constructible_v); + +class ComputeProgram { +public: + explicit ComputeProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_, + Tegra::MemoryManager& gpu_memory_, + Tegra::Engines::KeplerCompute& kepler_compute_, + ProgramManager& program_manager_, OGLProgram program_, + const Shader::Info& info_); + + void Configure(); + +private: + TextureCache& texture_cache; + BufferCache& buffer_cache; + Tegra::MemoryManager& gpu_memory; + Tegra::Engines::KeplerCompute& kepler_compute; + ProgramManager& program_manager; + + OGLProgram program; + Shader::Info info; + + u32 num_texture_buffers{}; + u32 num_image_buffers{}; +}; + +} // namespace OpenGL + +namespace std { +template <> +struct hash { + size_t operator()(const OpenGL::ComputeProgramKey& k) const noexcept { + return k.Hash(); + } +}; +} // namespace std diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 3b00614e7..18bbc4c1f 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -22,34 +22,11 @@ namespace OpenGL { namespace { -// One uniform block is reserved for emulation purposes -constexpr u32 ReservedUniformBlocks = 1; - -constexpr u32 NumStages = 5; - constexpr std::array LIMIT_UBOS = { GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS, GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS, GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS, }; -constexpr std::array LIMIT_SSBOS = { - GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS, - GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS, - GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS, -}; -constexpr std::array LIMIT_SAMPLERS = { - GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS, - GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS, - GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS, - GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS, - GL_MAX_TEXTURE_IMAGE_UNITS, - GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS, -}; -constexpr std::array LIMIT_IMAGES = { - GL_MAX_VERTEX_IMAGE_UNIFORMS, GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS, - GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, GL_MAX_GEOMETRY_IMAGE_UNIFORMS, - GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS, -}; template T GetInteger(GLenum pname) { @@ -82,15 +59,6 @@ bool HasExtension(std::span extensions, std::string_view return std::ranges::find(extensions, extension) != extensions.end(); } -u32 Extract(u32& base, u32& num, u32 amount, std::optional limit = {}) { - ASSERT(num >= amount); - if (limit) { - amount = std::min(amount, GetInteger(*limit)); - } - num -= amount; - return std::exchange(base, base + amount); -} - std::array BuildMaxUniformBuffers() noexcept { std::array max; std::ranges::transform(LIMIT_UBOS, max.begin(), @@ -98,62 +66,6 @@ std::array BuildMaxUniformBuffers() noexcep return max; } -std::array BuildBaseBindings() noexcept { - std::array bindings; - - static constexpr std::array stage_swizzle{0, 1, 2, 3, 4}; - const u32 total_ubos = GetInteger(GL_MAX_UNIFORM_BUFFER_BINDINGS); - const u32 total_ssbos = GetInteger(GL_MAX_SHADER_STORAGE_BUFFER_BINDINGS); - const u32 total_samplers = GetInteger(GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS); - - u32 num_ubos = total_ubos - ReservedUniformBlocks; - u32 num_ssbos = total_ssbos; - u32 num_samplers = total_samplers; - - u32 base_ubo = ReservedUniformBlocks; - u32 base_ssbo = 0; - u32 base_samplers = 0; - - for (std::size_t i = 0; i < NumStages; ++i) { - const std::size_t stage = stage_swizzle[i]; - bindings[stage] = { - Extract(base_ubo, num_ubos, total_ubos / NumStages, LIMIT_UBOS[stage]), - Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LIMIT_SSBOS[stage]), - Extract(base_samplers, num_samplers, total_samplers / NumStages, - LIMIT_SAMPLERS[stage])}; - } - - u32 num_images = GetInteger(GL_MAX_IMAGE_UNITS); - u32 base_images = 0; - - // GL_MAX_IMAGE_UNITS is guaranteed by the spec to have a minimum value of 8. - // Due to the limitation of GL_MAX_IMAGE_UNITS, reserve at least 4 image bindings on the - // fragment stage, and at least 1 for the rest of the stages. - // So far games are observed to use 1 image binding on vertex and 4 on fragment stages. - - // Reserve at least 4 image bindings on the fragment stage. - bindings[4].image = - Extract(base_images, num_images, std::max(4U, num_images / NumStages), LIMIT_IMAGES[4]); - - // This is guaranteed to be at least 1. - const u32 total_extracted_images = num_images / (NumStages - 1); - - // Reserve the other image bindings. - for (std::size_t i = 0; i < NumStages; ++i) { - const std::size_t stage = stage_swizzle[i]; - if (stage == 4) { - continue; - } - bindings[stage].image = - Extract(base_images, num_images, total_extracted_images, LIMIT_IMAGES[stage]); - } - - // Compute doesn't care about any of this. - bindings[5] = {0, 0, 0, 0}; - - return bindings; -} - bool IsASTCSupported() { static constexpr std::array targets = {GL_TEXTURE_2D, GL_TEXTURE_2D_ARRAY}; static constexpr std::array formats = { @@ -225,7 +137,6 @@ Device::Device() { } max_uniform_buffers = BuildMaxUniformBuffers(); - base_bindings = BuildBaseBindings(); uniform_buffer_alignment = GetInteger(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); shader_storage_alignment = GetInteger(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); max_vertex_attributes = GetInteger(GL_MAX_VERTEX_ATTRIBS); diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 2c2b13767..152a3acd3 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -12,13 +12,6 @@ namespace OpenGL { class Device { public: - struct BaseBindings { - u32 uniform_buffer{}; - u32 shader_storage_buffer{}; - u32 sampler{}; - u32 image{}; - }; - explicit Device(); explicit Device(std::nullptr_t); @@ -28,14 +21,6 @@ public: return max_uniform_buffers[static_cast(shader_type)]; } - const BaseBindings& GetBaseBindings(std::size_t stage_index) const noexcept { - return base_bindings[stage_index]; - } - - const BaseBindings& GetBaseBindings(Tegra::Engines::ShaderType shader_type) const noexcept { - return GetBaseBindings(static_cast(shader_type)); - } - size_t GetUniformBufferAlignment() const { return uniform_buffer_alignment; } @@ -134,7 +119,6 @@ private: std::string vendor_name; std::array max_uniform_buffers{}; - std::array base_bindings{}; size_t uniform_buffer_alignment{}; size_t shader_storage_alignment{}; u32 max_vertex_attributes{}; diff --git a/src/video_core/renderer_opengl/gl_graphics_program.cpp b/src/video_core/renderer_opengl/gl_graphics_program.cpp new file mode 100644 index 000000000..fd0958719 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_graphics_program.cpp @@ -0,0 +1,296 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/cityhash.h" +#include "shader_recompiler/shader_info.h" +#include "video_core/renderer_opengl/gl_graphics_program.h" +#include "video_core/renderer_opengl/gl_shader_manager.h" +#include "video_core/renderer_opengl/gl_state_tracker.h" +#include "video_core/texture_cache/texture_cache.h" + +namespace OpenGL { + +using Shader::ImageBufferDescriptor; +using Tegra::Texture::TexturePair; +using VideoCommon::ImageId; + +constexpr u32 MAX_TEXTURES = 64; +constexpr u32 MAX_IMAGES = 8; + +size_t GraphicsProgramKey::Hash() const noexcept { + return static_cast(Common::CityHash64(reinterpret_cast(this), Size())); +} + +bool GraphicsProgramKey::operator==(const GraphicsProgramKey& rhs) const noexcept { + return std::memcmp(this, &rhs, Size()) == 0; +} + +GraphicsProgram::GraphicsProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_, + Tegra::MemoryManager& gpu_memory_, + Tegra::Engines::Maxwell3D& maxwell3d_, + ProgramManager& program_manager_, StateTracker& state_tracker_, + OGLProgram program_, + const std::array& infos) + : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, + gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, program_manager{program_manager_}, + state_tracker{state_tracker_}, program{std::move(program_)} { + std::ranges::transform(infos, stage_infos.begin(), + [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); + + u32 num_textures{}; + u32 num_images{}; + for (size_t stage = 0; stage < base_uniform_bindings.size() - 1; ++stage) { + const auto& info{stage_infos[stage]}; + base_uniform_bindings[stage + 1] = base_uniform_bindings[stage]; + base_storage_bindings[stage + 1] = base_storage_bindings[stage]; + for (const auto& desc : info.constant_buffer_descriptors) { + base_uniform_bindings[stage + 1] += desc.count; + } + for (const auto& desc : info.storage_buffers_descriptors) { + base_storage_bindings[stage + 1] += desc.count; + } + for (const auto& desc : info.texture_buffer_descriptors) { + num_texture_buffers[stage] += desc.count; + num_textures += desc.count; + } + for (const auto& desc : info.image_buffer_descriptors) { + num_image_buffers[stage] += desc.count; + num_images += desc.count; + } + for (const auto& desc : info.texture_descriptors) { + num_textures += desc.count; + } + for (const auto& desc : info.image_descriptors) { + num_images += desc.count; + } + } + ASSERT(num_textures <= MAX_TEXTURES); + ASSERT(num_images <= MAX_IMAGES); +} + +struct Spec { + static constexpr std::array enabled_stages{true, true, true, true, true}; + static constexpr bool has_storage_buffers = true; + static constexpr bool has_texture_buffers = true; + static constexpr bool has_image_buffers = true; + static constexpr bool has_images = true; +}; + +void GraphicsProgram::Configure(bool is_indexed) { + std::array image_view_ids; + std::array image_view_indices; + std::array samplers; + size_t image_view_index{}; + GLsizei sampler_binding{}; + + texture_cache.SynchronizeGraphicsDescriptors(); + + buffer_cache.runtime.SetBaseUniformBindings(base_uniform_bindings); + buffer_cache.runtime.SetBaseStorageBindings(base_storage_bindings); + + const auto& regs{maxwell3d.regs}; + const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; + const auto config_stage{[&](size_t stage) { + const Shader::Info& info{stage_infos[stage]}; + buffer_cache.SetEnabledUniformBuffers(stage, info.constant_buffer_mask); + buffer_cache.UnbindGraphicsStorageBuffers(stage); + if constexpr (Spec::has_storage_buffers) { + size_t ssbo_index{}; + for (const auto& desc : info.storage_buffers_descriptors) { + ASSERT(desc.count == 1); + buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, desc.cbuf_index, + desc.cbuf_offset, desc.is_written); + ++ssbo_index; + } + } + const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers}; + const auto read_handle{[&](const auto& desc, u32 index) { + ASSERT(cbufs[desc.cbuf_index].enabled); + const u32 index_offset{index << desc.size_shift}; + const u32 offset{desc.cbuf_offset + index_offset}; + const GPUVAddr addr{cbufs[desc.cbuf_index].address + offset}; + if constexpr (std::is_same_v || + std::is_same_v) { + if (desc.has_secondary) { + ASSERT(cbufs[desc.secondary_cbuf_index].enabled); + const u32 second_offset{desc.secondary_cbuf_offset + index_offset}; + const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].address + + second_offset}; + const u32 lhs_raw{gpu_memory.Read(addr)}; + const u32 rhs_raw{gpu_memory.Read(separate_addr)}; + const u32 raw{lhs_raw | rhs_raw}; + return TexturePair(raw, via_header_index); + } + } + return TexturePair(gpu_memory.Read(addr), via_header_index); + }}; + const auto add_image{[&](const auto& desc) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices[image_view_index++] = handle.first; + } + }}; + if constexpr (Spec::has_texture_buffers) { + for (const auto& desc : info.texture_buffer_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices[image_view_index++] = handle.first; + samplers[sampler_binding++] = 0; + } + } + } + if constexpr (Spec::has_image_buffers) { + for (const auto& desc : info.image_buffer_descriptors) { + add_image(desc); + } + } + for (const auto& desc : info.texture_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices[image_view_index++] = handle.first; + + Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)}; + samplers[sampler_binding++] = sampler->Handle(); + } + } + if constexpr (Spec::has_images) { + for (const auto& desc : info.image_descriptors) { + add_image(desc); + } + } + }}; + if constexpr (Spec::enabled_stages[0]) { + config_stage(0); + } + if constexpr (Spec::enabled_stages[1]) { + config_stage(1); + } + if constexpr (Spec::enabled_stages[2]) { + config_stage(2); + } + if constexpr (Spec::enabled_stages[3]) { + config_stage(3); + } + if constexpr (Spec::enabled_stages[4]) { + config_stage(4); + } + const std::span indices_span(image_view_indices.data(), image_view_index); + texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); + + ImageId* texture_buffer_index{image_view_ids.data()}; + const auto bind_stage_info{[&](size_t stage) { + size_t index{}; + const auto add_buffer{[&](const auto& desc) { + constexpr bool is_image = std::is_same_v; + for (u32 i = 0; i < desc.count; ++i) { + bool is_written{false}; + if constexpr (is_image) { + is_written = desc.is_written; + } + ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)}; + buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(), + image_view.BufferSize(), image_view.format, + is_written, is_image); + ++index; + ++texture_buffer_index; + } + }}; + const Shader::Info& info{stage_infos[stage]}; + buffer_cache.UnbindGraphicsTextureBuffers(stage); + + if constexpr (Spec::has_texture_buffers) { + for (const auto& desc : info.texture_buffer_descriptors) { + add_buffer(desc); + } + } + if constexpr (Spec::has_image_buffers) { + for (const auto& desc : info.image_buffer_descriptors) { + add_buffer(desc); + } + } + for (const auto& desc : info.texture_descriptors) { + texture_buffer_index += desc.count; + } + if constexpr (Spec::has_images) { + for (const auto& desc : info.image_descriptors) { + texture_buffer_index += desc.count; + } + } + }}; + if constexpr (Spec::enabled_stages[0]) { + bind_stage_info(0); + } + if constexpr (Spec::enabled_stages[1]) { + bind_stage_info(1); + } + if constexpr (Spec::enabled_stages[2]) { + bind_stage_info(2); + } + if constexpr (Spec::enabled_stages[3]) { + bind_stage_info(3); + } + if constexpr (Spec::enabled_stages[4]) { + bind_stage_info(4); + } + buffer_cache.UpdateGraphicsBuffers(is_indexed); + buffer_cache.BindHostGeometryBuffers(is_indexed); + + const ImageId* views_it{image_view_ids.data()}; + GLsizei texture_binding = 0; + GLsizei image_binding = 0; + std::array textures; + std::array images; + const auto prepare_stage{[&](size_t stage) { + buffer_cache.runtime.SetImagePointers(&textures[texture_binding], &images[image_binding]); + buffer_cache.BindHostStageBuffers(stage); + + texture_binding += num_texture_buffers[stage]; + image_binding += num_image_buffers[stage]; + + const auto& info{stage_infos[stage]}; + for (const auto& desc : info.texture_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; + textures[texture_binding++] = image_view.Handle(desc.type); + } + } + for (const auto& desc : info.image_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; + images[image_binding++] = image_view.Handle(desc.type); + } + } + }}; + if constexpr (Spec::enabled_stages[0]) { + prepare_stage(0); + } + if constexpr (Spec::enabled_stages[1]) { + prepare_stage(1); + } + if constexpr (Spec::enabled_stages[2]) { + prepare_stage(2); + } + if constexpr (Spec::enabled_stages[3]) { + prepare_stage(3); + } + if constexpr (Spec::enabled_stages[4]) { + prepare_stage(4); + } + if (texture_binding != 0) { + ASSERT(texture_binding == sampler_binding); + glBindTextures(0, texture_binding, textures.data()); + glBindSamplers(0, sampler_binding, samplers.data()); + } + if (image_binding != 0) { + glBindImageTextures(0, image_binding, images.data()); + } + texture_cache.UpdateRenderTargets(false); + + state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); + program_manager.BindProgram(program.handle); +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_graphics_program.h b/src/video_core/renderer_opengl/gl_graphics_program.h new file mode 100644 index 000000000..5adf3f41e --- /dev/null +++ b/src/video_core/renderer_opengl/gl_graphics_program.h @@ -0,0 +1,105 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/shader_info.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/memory_manager.h" +#include "video_core/renderer_opengl/gl_buffer_cache.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_texture_cache.h" + +namespace OpenGL { + +class ProgramManager; + +using Maxwell = Tegra::Engines::Maxwell3D::Regs; + +struct GraphicsProgramKey { + struct TransformFeedbackState { + struct Layout { + u32 stream; + u32 varying_count; + u32 stride; + }; + std::array layouts; + std::array, Maxwell::NumTransformFeedbackBuffers> varyings; + }; + + std::array unique_hashes; + union { + u32 raw; + BitField<0, 1, u32> xfb_enabled; + BitField<1, 1, u32> early_z; + BitField<2, 4, Maxwell::PrimitiveTopology> gs_input_topology; + BitField<6, 2, Maxwell::TessellationPrimitive> tessellation_primitive; + BitField<8, 2, Maxwell::TessellationSpacing> tessellation_spacing; + BitField<10, 1, u32> tessellation_clockwise; + }; + std::array padding; + TransformFeedbackState xfb_state; + + size_t Hash() const noexcept; + + bool operator==(const GraphicsProgramKey&) const noexcept; + + bool operator!=(const GraphicsProgramKey& rhs) const noexcept { + return !operator==(rhs); + } + + [[nodiscard]] size_t Size() const noexcept { + if (xfb_enabled != 0) { + return sizeof(GraphicsProgramKey); + } else { + return offsetof(GraphicsProgramKey, padding); + } + } +}; +static_assert(std::has_unique_object_representations_v); +static_assert(std::is_trivially_copyable_v); +static_assert(std::is_trivially_constructible_v); + +class GraphicsProgram { +public: + explicit GraphicsProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_, + Tegra::MemoryManager& gpu_memory_, + Tegra::Engines::Maxwell3D& maxwell3d_, + ProgramManager& program_manager_, StateTracker& state_tracker_, + OGLProgram program_, const std::array& infos); + + void Configure(bool is_indexed); + +private: + TextureCache& texture_cache; + BufferCache& buffer_cache; + Tegra::MemoryManager& gpu_memory; + Tegra::Engines::Maxwell3D& maxwell3d; + ProgramManager& program_manager; + StateTracker& state_tracker; + + OGLProgram program; + std::array stage_infos{}; + std::array base_uniform_bindings{}; + std::array base_storage_bindings{}; + std::array num_texture_buffers{}; + std::array num_image_buffers{}; +}; + +} // namespace OpenGL + +namespace std { +template <> +struct hash { + size_t operator()(const OpenGL::GraphicsProgramKey& k) const noexcept { + return k.Hash(); + } +}; +} // namespace std diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index dd1937863..e527b76ba 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -98,7 +98,8 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), buffer_cache_runtime(device), buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), - shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device), + shader_cache(*this, emu_window_, maxwell3d, kepler_compute, gpu_memory, device, texture_cache, + buffer_cache, program_manager, state_tracker), query_cache(*this, maxwell3d, gpu_memory), accelerate_dma(buffer_cache), fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {} @@ -246,12 +247,10 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { SyncState(); - // Setup shaders and their used resources. - std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; + GraphicsProgram* const program{shader_cache.CurrentGraphicsProgram()}; - texture_cache.UpdateRenderTargets(false); - state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); - program_manager.BindGraphicsPipeline(); + std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; + program->Configure(is_indexed); const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology); BeginTransformFeedback(primitive_mode); @@ -293,7 +292,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { num_instances, base_instance); } } - EndTransformFeedback(); ++num_queued_commands; @@ -302,7 +300,14 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { } void RasterizerOpenGL::DispatchCompute() { - UNREACHABLE_MSG("Not implemented"); + ComputeProgram* const program{shader_cache.CurrentComputeProgram()}; + if (!program) { + return; + } + program->Configure(); + const auto& qmd{kepler_compute.launch_description}; + glDispatchCompute(qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z); + ++num_queued_commands; } void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) { @@ -515,7 +520,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, // ASSERT_MSG(image_view->size.width == config.width, "Framebuffer width is different"); // ASSERT_MSG(image_view->size.height == config.height, "Framebuffer height is different"); - screen_info.display_texture = image_view->Handle(ImageViewType::e2D); + screen_info.display_texture = image_view->Handle(Shader::TextureType::Color2D); screen_info.display_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format); return true; } diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index c3e490b40..c9ca1f005 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -16,6 +16,11 @@ #include "common/scope_exit.h" #include "core/core.h" #include "core/frontend/emu_window.h" +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/frontend/maxwell/program.h" +#include "shader_recompiler/profile.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/shader_type.h" @@ -25,17 +30,281 @@ #include "video_core/renderer_opengl/gl_shader_cache.h" #include "video_core/renderer_opengl/gl_state_tracker.h" #include "video_core/shader_cache.h" +#include "video_core/shader_environment.h" #include "video_core/shader_notify.h" namespace OpenGL { +namespace { +// FIXME: Move this somewhere else +const Shader::Profile profile{ + .supported_spirv = 0x00010000, + + .unified_descriptor_binding = false, + .support_descriptor_aliasing = false, + .support_int8 = false, + .support_int16 = false, + .support_vertex_instance_id = true, + .support_float_controls = false, + .support_separate_denorm_behavior = false, + .support_separate_rounding_mode = false, + .support_fp16_denorm_preserve = false, + .support_fp32_denorm_preserve = false, + .support_fp16_denorm_flush = false, + .support_fp32_denorm_flush = false, + .support_fp16_signed_zero_nan_preserve = false, + .support_fp32_signed_zero_nan_preserve = false, + .support_fp64_signed_zero_nan_preserve = false, + .support_explicit_workgroup_layout = false, + .support_vote = true, + .support_viewport_index_layer_non_geometry = true, + .support_viewport_mask = true, + .support_typeless_image_loads = true, + .support_demote_to_helper_invocation = false, + .warp_size_potentially_larger_than_guest = true, + .support_int64_atomics = false, + .lower_left_origin_mode = true, + + .has_broken_spirv_clamp = true, + .has_broken_unsigned_image_offsets = true, + .has_broken_signed_operations = true, + .ignore_nan_fp_comparisons = true, + + .generic_input_types = {}, + .convert_depth_mode = false, + .force_early_z = false, + + .tess_primitive = {}, + .tess_spacing = {}, + .tess_clockwise = false, + + .input_topology = Shader::InputTopology::Triangles, + + .fixed_state_point_size = std::nullopt, + + .alpha_test_func = Shader::CompareFunction::Always, + .alpha_test_reference = 0.0f, + + .y_negate = false, + + .xfb_varyings = {}, +}; + +using Shader::Backend::SPIRV::EmitSPIRV; +using Shader::Maxwell::TranslateProgram; +using VideoCommon::ComputeEnvironment; +using VideoCommon::GraphicsEnvironment; + +template +auto MakeSpan(Container& container) { + return std::span(container.data(), container.size()); +} + +void AddShader(GLenum stage, GLuint program, std::span code) { + OGLShader shader; + shader.handle = glCreateShader(stage); + + glShaderBinary(1, &shader.handle, GL_SHADER_BINARY_FORMAT_SPIR_V_ARB, code.data(), + static_cast(code.size_bytes())); + glSpecializeShader(shader.handle, "main", 0, nullptr, nullptr); + glAttachShader(program, shader.handle); + if (!Settings::values.renderer_debug) { + return; + } + GLint shader_status{}; + glGetShaderiv(shader.handle, GL_COMPILE_STATUS, &shader_status); + if (shader_status == GL_FALSE) { + LOG_ERROR(Render_OpenGL, "Failed to build shader"); + } + GLint log_length{}; + glGetShaderiv(shader.handle, GL_INFO_LOG_LENGTH, &log_length); + if (log_length == 0) { + return; + } + std::string log(log_length, 0); + glGetShaderInfoLog(shader.handle, log_length, nullptr, log.data()); + if (shader_status == GL_FALSE) { + LOG_ERROR(Render_OpenGL, "{}", log); + } else { + LOG_WARNING(Render_OpenGL, "{}", log); + } +} + +void LinkProgram(GLuint program) { + glLinkProgram(program); + if (!Settings::values.renderer_debug) { + return; + } + GLint link_status{}; + glGetProgramiv(program, GL_LINK_STATUS, &link_status); + + GLint log_length{}; + glGetProgramiv(program, GL_INFO_LOG_LENGTH, &log_length); + if (log_length == 0) { + return; + } + std::string log(log_length, 0); + glGetProgramInfoLog(program, log_length, nullptr, log.data()); + if (link_status == GL_FALSE) { + LOG_ERROR(Render_OpenGL, "{}", log); + } else { + LOG_WARNING(Render_OpenGL, "{}", log); + } +} + +GLenum Stage(size_t stage_index) { + switch (stage_index) { + case 0: + return GL_VERTEX_SHADER; + case 1: + return GL_TESS_CONTROL_SHADER; + case 2: + return GL_TESS_EVALUATION_SHADER; + case 3: + return GL_GEOMETRY_SHADER; + case 4: + return GL_FRAGMENT_SHADER; + } + UNREACHABLE_MSG("{}", stage_index); + return GL_NONE; +} +} // Anonymous namespace ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, - Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::Engines::KeplerCompute& kepler_compute_, - Tegra::MemoryManager& gpu_memory_, const Device& device_) + Tegra::MemoryManager& gpu_memory_, const Device& device_, + TextureCache& texture_cache_, BufferCache& buffer_cache_, + ProgramManager& program_manager_, StateTracker& state_tracker_) : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_}, - emu_window{emu_window_}, gpu{gpu_}, device{device_} {} + emu_window{emu_window_}, device{device_}, texture_cache{texture_cache_}, + buffer_cache{buffer_cache_}, program_manager{program_manager_}, state_tracker{ + state_tracker_} {} ShaderCache::~ShaderCache() = default; +GraphicsProgram* ShaderCache::CurrentGraphicsProgram() { + if (!RefreshStages(graphics_key.unique_hashes)) { + return nullptr; + } + const auto& regs{maxwell3d.regs}; + graphics_key.raw = 0; + graphics_key.early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0); + graphics_key.gs_input_topology.Assign(graphics_key.unique_hashes[4] != 0 + ? regs.draw.topology.Value() + : Maxwell::PrimitiveTopology{}); + graphics_key.tessellation_primitive.Assign(regs.tess_mode.prim.Value()); + graphics_key.tessellation_spacing.Assign(regs.tess_mode.spacing.Value()); + graphics_key.tessellation_clockwise.Assign(regs.tess_mode.cw.Value()); + + const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)}; + auto& program{pair->second}; + if (is_new) { + program = CreateGraphicsProgram(); + } + return program.get(); +} + +ComputeProgram* ShaderCache::CurrentComputeProgram() { + const VideoCommon::ShaderInfo* const shader{ComputeShader()}; + if (!shader) { + return nullptr; + } + const auto& qmd{kepler_compute.launch_description}; + const ComputeProgramKey key{ + .unique_hash = shader->unique_hash, + .shared_memory_size = qmd.shared_alloc, + .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}, + }; + const auto [pair, is_new]{compute_cache.try_emplace(key)}; + auto& pipeline{pair->second}; + if (!is_new) { + return pipeline.get(); + } + pipeline = CreateComputeProgram(key, shader); + return pipeline.get(); +} + +std::unique_ptr ShaderCache::CreateGraphicsProgram() { + GraphicsEnvironments environments; + GetGraphicsEnvironments(environments, graphics_key.unique_hashes); + + main_pools.ReleaseContents(); + return CreateGraphicsProgram(main_pools, graphics_key, environments.Span(), true); +} + +std::unique_ptr ShaderCache::CreateGraphicsProgram( + ShaderPools& pools, const GraphicsProgramKey& key, std::span envs, + bool build_in_parallel) { + LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); + size_t env_index{0}; + std::array programs; + for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + if (key.unique_hashes[index] == 0) { + continue; + } + Shader::Environment& env{*envs[env_index]}; + ++env_index; + + const u32 cfg_offset{static_cast(env.StartAddress() + sizeof(Shader::ProgramHeader))}; + Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset); + programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); + } + std::array infos{}; + + OGLProgram gl_program; + gl_program.handle = glCreateProgram(); + + Shader::Backend::SPIRV::Bindings binding; + for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + if (key.unique_hashes[index] == 0) { + continue; + } + UNIMPLEMENTED_IF(index == 0); + + Shader::IR::Program& program{programs[index]}; + const size_t stage_index{index - 1}; + infos[stage_index] = &program.info; + + const std::vector code{EmitSPIRV(profile, program, binding)}; + FILE* file = fopen("D:\\shader.spv", "wb"); + fwrite(code.data(), 4, code.size(), file); + fclose(file); + AddShader(Stage(stage_index), gl_program.handle, code); + } + LinkProgram(gl_program.handle); + + return std::make_unique(texture_cache, buffer_cache, gpu_memory, maxwell3d, + program_manager, state_tracker, std::move(gl_program), + infos); +} + +std::unique_ptr ShaderCache::CreateComputeProgram( + const ComputeProgramKey& key, const VideoCommon::ShaderInfo* shader) { + const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; + const auto& qmd{kepler_compute.launch_description}; + ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start}; + env.SetCachedSize(shader->size_bytes); + + main_pools.ReleaseContents(); + return CreateComputeProgram(main_pools, key, env, true); +} + +std::unique_ptr ShaderCache::CreateComputeProgram(ShaderPools& pools, + const ComputeProgramKey& key, + Shader::Environment& env, + bool build_in_parallel) { + LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); + + Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; + Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; + Shader::Backend::SPIRV::Bindings binding; + const std::vector code{EmitSPIRV(profile, program, binding)}; + OGLProgram gl_program; + gl_program.handle = glCreateProgram(); + AddShader(GL_COMPUTE_SHADER, gl_program.handle, code); + LinkProgram(gl_program.handle); + return std::make_unique(texture_cache, buffer_cache, gpu_memory, kepler_compute, + program_manager, std::move(gl_program), program.info); +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 96520e17c..b479d073a 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -5,20 +5,18 @@ #pragma once #include -#include -#include -#include -#include -#include #include -#include -#include #include #include "common/common_types.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/object_pool.h" #include "video_core/engines/shader_type.h" -#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_compute_program.h" +#include "video_core/renderer_opengl/gl_graphics_program.h" #include "video_core/shader_cache.h" namespace Tegra { @@ -32,64 +30,62 @@ class EmuWindow; namespace OpenGL { class Device; +class ProgramManager; class RasterizerOpenGL; -using Maxwell = Tegra::Engines::Maxwell3D::Regs; - -struct GraphicsProgramKey { - struct TransformFeedbackState { - struct Layout { - u32 stream; - u32 varying_count; - u32 stride; - }; - std::array layouts; - std::array, Maxwell::NumTransformFeedbackBuffers> varyings; - }; - - std::array unique_hashes; - std::array color_formats; - union { - u32 raw; - BitField<0, 1, u32> xfb_enabled; - BitField<1, 1, u32> early_z; - BitField<2, 4, Maxwell::PrimitiveTopology> gs_input_topology; - BitField<6, 2, u32> tessellation_primitive; - BitField<8, 2, u32> tessellation_spacing; - BitField<10, 1, u32> tessellation_clockwise; - }; - u32 padding; - TransformFeedbackState xfb_state; - - [[nodiscard]] size_t Size() const noexcept { - if (xfb_enabled != 0) { - return sizeof(GraphicsProgramKey); - } else { - return offsetof(GraphicsProgramKey, padding); - } +struct ShaderPools { + void ReleaseContents() { + flow_block.ReleaseContents(); + block.ReleaseContents(); + inst.ReleaseContents(); } -}; -static_assert(std::has_unique_object_representations_v); -static_assert(std::is_trivially_copyable_v); -static_assert(std::is_trivially_constructible_v); -class GraphicsProgram { -public: -private: + Shader::ObjectPool inst; + Shader::ObjectPool block; + Shader::ObjectPool flow_block; }; class ShaderCache : public VideoCommon::ShaderCache { public: explicit ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, - Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::Engines::KeplerCompute& kepler_compute_, - Tegra::MemoryManager& gpu_memory_, const Device& device_); + Tegra::MemoryManager& gpu_memory_, const Device& device_, + TextureCache& texture_cache_, BufferCache& buffer_cache_, + ProgramManager& program_manager_, StateTracker& state_tracker_); ~ShaderCache(); + [[nodiscard]] GraphicsProgram* CurrentGraphicsProgram(); + + [[nodiscard]] ComputeProgram* CurrentComputeProgram(); + private: + std::unique_ptr CreateGraphicsProgram(); + + std::unique_ptr CreateGraphicsProgram( + ShaderPools& pools, const GraphicsProgramKey& key, + std::span envs, bool build_in_parallel); + + std::unique_ptr CreateComputeProgram(const ComputeProgramKey& key, + const VideoCommon::ShaderInfo* shader); + + std::unique_ptr CreateComputeProgram(ShaderPools& pools, + const ComputeProgramKey& key, + Shader::Environment& env, + bool build_in_parallel); + Core::Frontend::EmuWindow& emu_window; - Tegra::GPU& gpu; const Device& device; + TextureCache& texture_cache; + BufferCache& buffer_cache; + ProgramManager& program_manager; + StateTracker& state_tracker; + + GraphicsProgramKey graphics_key{}; + + ShaderPools main_pools; + std::unordered_map> graphics_cache; + std::unordered_map> compute_cache; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index 553e6e8d6..399959afb 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -1,149 +1,3 @@ // Copyright 2018 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. - -#include - -#include "common/common_types.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/renderer_opengl/gl_device.h" -#include "video_core/renderer_opengl/gl_shader_manager.h" - -namespace OpenGL { - -namespace { - -void BindProgram(GLenum stage, GLuint current, GLuint old, bool& enabled) { - if (current == old) { - return; - } - if (current == 0) { - if (enabled) { - enabled = false; - glDisable(stage); - } - return; - } - if (!enabled) { - enabled = true; - glEnable(stage); - } - glBindProgramARB(stage, current); -} - -} // Anonymous namespace - -ProgramManager::ProgramManager(const Device& device) - : use_assembly_programs{device.UseAssemblyShaders()} { - if (use_assembly_programs) { - glEnable(GL_COMPUTE_PROGRAM_NV); - } else { - graphics_pipeline.Create(); - glBindProgramPipeline(graphics_pipeline.handle); - } -} - -ProgramManager::~ProgramManager() = default; - -void ProgramManager::BindCompute(GLuint program) { - if (use_assembly_programs) { - glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program); - } else { - is_graphics_bound = false; - glUseProgram(program); - } -} - -void ProgramManager::BindGraphicsPipeline() { - if (!use_assembly_programs) { - UpdateSourcePrograms(); - } -} - -void ProgramManager::BindHostPipeline(GLuint pipeline) { - if (use_assembly_programs) { - if (geometry_enabled) { - geometry_enabled = false; - old_state.geometry = 0; - glDisable(GL_GEOMETRY_PROGRAM_NV); - } - } else { - if (!is_graphics_bound) { - glUseProgram(0); - } - } - glBindProgramPipeline(pipeline); -} - -void ProgramManager::RestoreGuestPipeline() { - if (use_assembly_programs) { - glBindProgramPipeline(0); - } else { - glBindProgramPipeline(graphics_pipeline.handle); - } -} - -void ProgramManager::BindHostCompute(GLuint program) { - if (use_assembly_programs) { - glDisable(GL_COMPUTE_PROGRAM_NV); - } - glUseProgram(program); - is_graphics_bound = false; -} - -void ProgramManager::RestoreGuestCompute() { - if (use_assembly_programs) { - glEnable(GL_COMPUTE_PROGRAM_NV); - glUseProgram(0); - } -} - -void ProgramManager::UseVertexShader(GLuint program) { - if (use_assembly_programs) { - BindProgram(GL_VERTEX_PROGRAM_NV, program, current_state.vertex, vertex_enabled); - } - current_state.vertex = program; -} - -void ProgramManager::UseGeometryShader(GLuint program) { - if (use_assembly_programs) { - BindProgram(GL_GEOMETRY_PROGRAM_NV, program, current_state.vertex, geometry_enabled); - } - current_state.geometry = program; -} - -void ProgramManager::UseFragmentShader(GLuint program) { - if (use_assembly_programs) { - BindProgram(GL_FRAGMENT_PROGRAM_NV, program, current_state.vertex, fragment_enabled); - } - current_state.fragment = program; -} - -void ProgramManager::UpdateSourcePrograms() { - if (!is_graphics_bound) { - is_graphics_bound = true; - glUseProgram(0); - } - - const GLuint handle = graphics_pipeline.handle; - const auto update_state = [handle](GLenum stage, GLuint current, GLuint old) { - if (current == old) { - return; - } - glUseProgramStages(handle, stage, current); - }; - update_state(GL_VERTEX_SHADER_BIT, current_state.vertex, old_state.vertex); - update_state(GL_GEOMETRY_SHADER_BIT, current_state.geometry, old_state.geometry); - update_state(GL_FRAGMENT_SHADER_BIT, current_state.fragment, old_state.fragment); - - old_state = current_state; -} - -void MaxwellUniformData::SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell) { - const auto& regs = maxwell.regs; - - // Y_NEGATE controls what value S2R returns for the Y_DIRECTION system value. - y_direction = regs.screen_y_control.y_negate == 0 ? 1.0f : -1.0f; -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index ad42cce74..70781d6f5 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -4,79 +4,24 @@ #pragma once -#include - #include -#include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/maxwell_to_gl.h" - namespace OpenGL { -class Device; - -/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned -/// @note Always keep a vec4 at the end. The GL spec is not clear whether the alignment at -/// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not. -/// Not following that rule will cause problems on some AMD drivers. -struct alignas(16) MaxwellUniformData { - void SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell); - - GLfloat y_direction; -}; -static_assert(sizeof(MaxwellUniformData) == 16, "MaxwellUniformData structure size is incorrect"); -static_assert(sizeof(MaxwellUniformData) < 16384, - "MaxwellUniformData structure must be less than 16kb as per the OpenGL spec"); - class ProgramManager { public: - explicit ProgramManager(const Device& device); - ~ProgramManager(); - - /// Binds a compute program - void BindCompute(GLuint program); - - /// Updates bound programs. - void BindGraphicsPipeline(); - - /// Binds an OpenGL pipeline object unsynchronized with the guest state. - void BindHostPipeline(GLuint pipeline); + void BindProgram(GLuint program) { + if (bound_program == program) { + return; + } + bound_program = program; + glUseProgram(program); + } - /// Rewinds BindHostPipeline state changes. - void RestoreGuestPipeline(); - - /// Binds an OpenGL GLSL program object unsynchronized with the guest state. - void BindHostCompute(GLuint program); - - /// Rewinds BindHostCompute state changes. - void RestoreGuestCompute(); - - void UseVertexShader(GLuint program); - void UseGeometryShader(GLuint program); - void UseFragmentShader(GLuint program); + void RestoreGuestCompute() {} private: - struct PipelineState { - GLuint vertex = 0; - GLuint geometry = 0; - GLuint fragment = 0; - }; - - /// Update GLSL programs. - void UpdateSourcePrograms(); - - OGLPipeline graphics_pipeline; - - PipelineState current_state; - PipelineState old_state; - - bool use_assembly_programs = false; - - bool is_graphics_bound = true; - - bool vertex_enabled = false; - bool geometry_enabled = false; - bool fragment_enabled = false; + GLuint bound_program = 0; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index a8bf84218..7053be161 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -24,9 +24,7 @@ #include "video_core/textures/decoders.h" namespace OpenGL { - namespace { - using Tegra::Texture::SwizzleSource; using Tegra::Texture::TextureMipmapFilter; using Tegra::Texture::TextureType; @@ -59,107 +57,6 @@ struct CopyRegion { GLsizei depth; }; -struct FormatTuple { - GLenum internal_format; - GLenum format = GL_NONE; - GLenum type = GL_NONE; -}; - -constexpr std::array FORMAT_TABLE = {{ - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM - {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM - {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT - {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT - {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM - {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM - {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM - {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM - {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT - {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM - {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM - {GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM - {GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT - {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT - {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT - {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM - {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM - {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT - {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT - {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT - {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT - {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM - {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM - {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM - {GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM - {GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM - {GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM - {GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM - {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM - {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT - {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT - {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM - {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT - {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT - {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT - {GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT - {GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT - {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT - {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM - {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM - {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT - {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT - {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM - {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT - {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT - {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT - {GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM - {GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_SRGB - {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM - {GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM - {GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT - {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT - {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT - {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT - {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT - {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT - {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM - {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM - {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_SRGB - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB - {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB - {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB - {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB - {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB - {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB - {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB - {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB - {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB - {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB - {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT - {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT - {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM - {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT - {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM - {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, - GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT -}}; - constexpr std::array ACCELERATED_FORMATS{ GL_RGBA32F, GL_RGBA16F, GL_RG32F, GL_RG16F, GL_R11F_G11F_B10F, GL_R32F, GL_R16F, GL_RGBA32UI, GL_RGBA16UI, GL_RGB10_A2UI, GL_RGBA8UI, GL_RG32UI, @@ -170,11 +67,6 @@ constexpr std::array ACCELERATED_FORMATS{ GL_RG8_SNORM, GL_R16_SNORM, GL_R8_SNORM, }; -const FormatTuple& GetFormatTuple(PixelFormat pixel_format) { - ASSERT(static_cast(pixel_format) < FORMAT_TABLE.size()); - return FORMAT_TABLE[static_cast(pixel_format)]; -} - GLenum ImageTarget(const VideoCommon::ImageInfo& info) { switch (info.type) { case ImageType::e1D: @@ -195,26 +87,24 @@ GLenum ImageTarget(const VideoCommon::ImageInfo& info) { return GL_NONE; } -GLenum ImageTarget(ImageViewType type, int num_samples = 1) { +GLenum ImageTarget(Shader::TextureType type, int num_samples = 1) { const bool is_multisampled = num_samples > 1; switch (type) { - case ImageViewType::e1D: + case Shader::TextureType::Color1D: return GL_TEXTURE_1D; - case ImageViewType::e2D: + case Shader::TextureType::Color2D: return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D; - case ImageViewType::Cube: + case Shader::TextureType::ColorCube: return GL_TEXTURE_CUBE_MAP; - case ImageViewType::e3D: + case Shader::TextureType::Color3D: return GL_TEXTURE_3D; - case ImageViewType::e1DArray: + case Shader::TextureType::ColorArray1D: return GL_TEXTURE_1D_ARRAY; - case ImageViewType::e2DArray: + case Shader::TextureType::ColorArray2D: return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE_ARRAY : GL_TEXTURE_2D_ARRAY; - case ImageViewType::CubeArray: + case Shader::TextureType::ColorArrayCube: return GL_TEXTURE_CUBE_MAP_ARRAY; - case ImageViewType::Rect: - return GL_TEXTURE_RECTANGLE; - case ImageViewType::Buffer: + case Shader::TextureType::Buffer: return GL_TEXTURE_BUFFER; } UNREACHABLE_MSG("Invalid image view type={}", type); @@ -322,7 +212,7 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::arrayflags & VideoCommon::ImageViewFlagBits::Slice)) { - const GLuint texture = image_view->DefaultHandle(); - glNamedFramebufferTexture(fbo, attachment, texture, 0); + glNamedFramebufferTexture(fbo, attachment, image_view->DefaultHandle(), 0); return; } - const GLuint texture = image_view->Handle(ImageViewType::e3D); + const GLuint texture = image_view->Handle(Shader::TextureType::Color3D); if (image_view->range.extent.layers > 1) { // TODO: OpenGL doesn't support rendering to a fixed number of slices glNamedFramebufferTexture(fbo, attachment, texture, 0); @@ -453,7 +342,7 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D}; for (size_t i = 0; i < TARGETS.size(); ++i) { const GLenum target = TARGETS[i]; - for (const FormatTuple& tuple : FORMAT_TABLE) { + for (const MaxwellToGL::FormatTuple& tuple : MaxwellToGL::FORMAT_TABLE) { const GLenum format = tuple.internal_format; GLint compat_class; GLint compat_type; @@ -475,11 +364,9 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& null_image_1d_array.Create(GL_TEXTURE_1D_ARRAY); null_image_cube_array.Create(GL_TEXTURE_CUBE_MAP_ARRAY); null_image_3d.Create(GL_TEXTURE_3D); - null_image_rect.Create(GL_TEXTURE_RECTANGLE); glTextureStorage2D(null_image_1d_array.handle, 1, GL_R8, 1, 1); glTextureStorage3D(null_image_cube_array.handle, 1, GL_R8, 1, 1, 6); glTextureStorage3D(null_image_3d.handle, 1, GL_R8, 1, 1, 1); - glTextureStorage2D(null_image_rect.handle, 1, GL_R8, 1, 1); std::array new_handles; glGenTextures(static_cast(new_handles.size()), new_handles.data()); @@ -496,29 +383,28 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& glTextureView(null_image_view_cube.handle, GL_TEXTURE_CUBE_MAP, null_image_cube_array.handle, GL_R8, 0, 1, 0, 6); const std::array texture_handles{ - null_image_1d_array.handle, null_image_cube_array.handle, null_image_3d.handle, - null_image_rect.handle, null_image_view_1d.handle, null_image_view_2d.handle, - null_image_view_2d_array.handle, null_image_view_cube.handle, + null_image_1d_array.handle, null_image_cube_array.handle, null_image_3d.handle, + null_image_view_1d.handle, null_image_view_2d.handle, null_image_view_2d_array.handle, + null_image_view_cube.handle, }; for (const GLuint handle : texture_handles) { static constexpr std::array NULL_SWIZZLE{GL_ZERO, GL_ZERO, GL_ZERO, GL_ZERO}; glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, NULL_SWIZZLE.data()); } - const auto set_view = [this](ImageViewType type, GLuint handle) { + const auto set_view = [this](Shader::TextureType type, GLuint handle) { if (device.HasDebuggingToolAttached()) { const std::string name = fmt::format("NullImage {}", type); glObjectLabel(GL_TEXTURE, handle, static_cast(name.size()), name.data()); } null_image_views[static_cast(type)] = handle; }; - set_view(ImageViewType::e1D, null_image_view_1d.handle); - set_view(ImageViewType::e2D, null_image_view_2d.handle); - set_view(ImageViewType::Cube, null_image_view_cube.handle); - set_view(ImageViewType::e3D, null_image_3d.handle); - set_view(ImageViewType::e1DArray, null_image_1d_array.handle); - set_view(ImageViewType::e2DArray, null_image_view_2d_array.handle); - set_view(ImageViewType::CubeArray, null_image_cube_array.handle); - set_view(ImageViewType::Rect, null_image_rect.handle); + set_view(Shader::TextureType::Color1D, null_image_view_1d.handle); + set_view(Shader::TextureType::Color2D, null_image_view_2d.handle); + set_view(Shader::TextureType::ColorCube, null_image_view_cube.handle); + set_view(Shader::TextureType::Color3D, null_image_3d.handle); + set_view(Shader::TextureType::ColorArray1D, null_image_1d_array.handle); + set_view(Shader::TextureType::ColorArray2D, null_image_view_2d_array.handle); + set_view(Shader::TextureType::ColorArrayCube, null_image_cube_array.handle); } TextureCacheRuntime::~TextureCacheRuntime() = default; @@ -710,7 +596,7 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_, gl_format = GL_RGBA; gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; } else { - const auto& tuple = GetFormatTuple(info.format); + const auto& tuple = MaxwellToGL::GetFormatTuple(info.format); gl_internal_format = tuple.internal_format; gl_format = tuple.format; gl_type = tuple.type; @@ -750,8 +636,7 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_, glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, depth); break; case GL_TEXTURE_BUFFER: - buffer.Create(); - glNamedBufferStorage(buffer.handle, guest_size_bytes, nullptr, 0); + UNREACHABLE(); break; default: UNREACHABLE_MSG("Invalid target=0x{:x}", target); @@ -789,14 +674,6 @@ void Image::UploadMemory(const ImageBufferMap& map, } } -void Image::UploadMemory(const ImageBufferMap& map, - std::span copies) { - for (const VideoCommon::BufferCopy& copy : copies) { - glCopyNamedBufferSubData(map.buffer, buffer.handle, copy.src_offset + map.offset, - copy.dst_offset, copy.size); - } -} - void Image::DownloadMemory(ImageBufferMap& map, std::span copies) { glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API @@ -958,7 +835,7 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI if (True(image.flags & ImageFlagBits::Converted)) { internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8; } else { - internal_format = GetFormatTuple(format).internal_format; + internal_format = MaxwellToGL::GetFormatTuple(format).internal_format; } VideoCommon::SubresourceRange flatten_range = info.range; std::array handles; @@ -970,8 +847,8 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI [[fallthrough]]; case ImageViewType::e1D: glGenTextures(2, handles.data()); - SetupView(device, image, ImageViewType::e1D, handles[0], info, flatten_range); - SetupView(device, image, ImageViewType::e1DArray, handles[1], info, info.range); + SetupView(device, image, Shader::TextureType::Color1D, handles[0], info, flatten_range); + SetupView(device, image, Shader::TextureType::ColorArray1D, handles[1], info, info.range); break; case ImageViewType::e2DArray: flatten_range.extent.layers = 1; @@ -985,37 +862,65 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI .extent = {.levels = 1, .layers = 1}, }; glGenTextures(1, handles.data()); - SetupView(device, image, ImageViewType::e3D, handles[0], info, slice_range); - break; + SetupView(device, image, Shader::TextureType::Color3D, handles[0], info, slice_range); + } else { + glGenTextures(2, handles.data()); + SetupView(device, image, Shader::TextureType::Color2D, handles[0], info, flatten_range); + SetupView(device, image, Shader::TextureType::ColorArray2D, handles[1], info, + info.range); } - glGenTextures(2, handles.data()); - SetupView(device, image, ImageViewType::e2D, handles[0], info, flatten_range); - SetupView(device, image, ImageViewType::e2DArray, handles[1], info, info.range); break; case ImageViewType::e3D: glGenTextures(1, handles.data()); - SetupView(device, image, ImageViewType::e3D, handles[0], info, info.range); + SetupView(device, image, Shader::TextureType::Color3D, handles[0], info, info.range); break; case ImageViewType::CubeArray: flatten_range.extent.layers = 6; [[fallthrough]]; case ImageViewType::Cube: glGenTextures(2, handles.data()); - SetupView(device, image, ImageViewType::Cube, handles[0], info, flatten_range); - SetupView(device, image, ImageViewType::CubeArray, handles[1], info, info.range); + SetupView(device, image, Shader::TextureType::ColorCube, handles[0], info, flatten_range); + SetupView(device, image, Shader::TextureType::ColorArrayCube, handles[1], info, info.range); break; case ImageViewType::Rect: - glGenTextures(1, handles.data()); - SetupView(device, image, ImageViewType::Rect, handles[0], info, info.range); + UNIMPLEMENTED(); break; case ImageViewType::Buffer: - glCreateTextures(GL_TEXTURE_BUFFER, 1, handles.data()); - SetupView(device, image, ImageViewType::Buffer, handles[0], info, info.range); + UNREACHABLE(); + break; + } + switch (info.type) { + case ImageViewType::e1D: + default_handle = Handle(Shader::TextureType::Color1D); + break; + case ImageViewType::e1DArray: + default_handle = Handle(Shader::TextureType::ColorArray1D); + break; + case ImageViewType::e2D: + default_handle = Handle(Shader::TextureType::Color2D); + break; + case ImageViewType::e2DArray: + default_handle = Handle(Shader::TextureType::ColorArray2D); + break; + case ImageViewType::e3D: + default_handle = Handle(Shader::TextureType::Color3D); + break; + case ImageViewType::Cube: + default_handle = Handle(Shader::TextureType::ColorCube); + break; + case ImageViewType::CubeArray: + default_handle = Handle(Shader::TextureType::ColorArrayCube); + break; + default: break; } - default_handle = Handle(info.type); } +ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, + const VideoCommon::ImageViewInfo& view_info, GPUVAddr gpu_addr_) + : VideoCommon::ImageViewBase{info, view_info}, gpu_addr{gpu_addr_}, + buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {} + ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, const VideoCommon::ImageViewInfo& view_info) : VideoCommon::ImageViewBase{info, view_info} {} @@ -1023,24 +928,18 @@ ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageParams& params) : VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {} -void ImageView::SetupView(const Device& device, Image& image, ImageViewType view_type, +void ImageView::SetupView(const Device& device, Image& image, Shader::TextureType view_type, GLuint handle, const VideoCommon::ImageViewInfo& info, VideoCommon::SubresourceRange view_range) { - if (info.type == ImageViewType::Buffer) { - // TODO: Take offset from buffer cache - glTextureBufferRange(handle, internal_format, image.buffer.handle, 0, - image.guest_size_bytes); - } else { - const GLuint parent = image.texture.handle; - const GLenum target = ImageTarget(view_type, image.info.num_samples); - glTextureView(handle, target, parent, internal_format, view_range.base.level, - view_range.extent.levels, view_range.base.layer, view_range.extent.layers); - if (!info.IsRenderTarget()) { - ApplySwizzle(handle, format, info.Swizzle()); - } + const GLuint parent = image.texture.handle; + const GLenum target = ImageTarget(view_type, image.info.num_samples); + glTextureView(handle, target, parent, internal_format, view_range.base.level, + view_range.extent.levels, view_range.base.layer, view_range.extent.layers); + if (!info.IsRenderTarget()) { + ApplySwizzle(handle, format, info.Swizzle()); } if (device.HasDebuggingToolAttached()) { - const std::string name = VideoCommon::Name(*this, view_type); + const std::string name = VideoCommon::Name(*this); glObjectLabel(GL_TEXTURE, handle, static_cast(name.size()), name.data()); } stored_views.emplace_back().handle = handle; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 817b0e650..2e3e02b79 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -9,6 +9,7 @@ #include +#include "shader_recompiler/shader_info.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/util_shaders.h" #include "video_core/texture_cache/texture_cache.h" @@ -127,13 +128,12 @@ private: OGLTexture null_image_1d_array; OGLTexture null_image_cube_array; OGLTexture null_image_3d; - OGLTexture null_image_rect; OGLTextureView null_image_view_1d; OGLTextureView null_image_view_2d; OGLTextureView null_image_view_2d_array; OGLTextureView null_image_view_cube; - std::array null_image_views; + std::array null_image_views{}; }; class Image : public VideoCommon::ImageBase { @@ -154,8 +154,6 @@ public: void UploadMemory(const ImageBufferMap& map, std::span copies); - void UploadMemory(const ImageBufferMap& map, std::span copies); - void DownloadMemory(ImageBufferMap& map, std::span copies); GLuint StorageHandle() noexcept; @@ -170,7 +168,6 @@ private: void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset); OGLTexture texture; - OGLBuffer buffer; OGLTextureView store_view; GLenum gl_internal_format = GL_NONE; GLenum gl_format = GL_NONE; @@ -182,12 +179,14 @@ class ImageView : public VideoCommon::ImageViewBase { public: explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&); + explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo&, + const VideoCommon::ImageViewInfo&, GPUVAddr); explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, const VideoCommon::ImageViewInfo& view_info); explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&); - [[nodiscard]] GLuint Handle(ImageViewType query_type) const noexcept { - return views[static_cast(query_type)]; + [[nodiscard]] GLuint Handle(Shader::TextureType handle_type) const noexcept { + return views[static_cast(handle_type)]; } [[nodiscard]] GLuint DefaultHandle() const noexcept { @@ -198,15 +197,25 @@ public: return internal_format; } + [[nodiscard]] GPUVAddr GpuAddr() const noexcept { + return gpu_addr; + } + + [[nodiscard]] u32 BufferSize() const noexcept { + return buffer_size; + } + private: - void SetupView(const Device& device, Image& image, ImageViewType view_type, GLuint handle, + void SetupView(const Device& device, Image& image, Shader::TextureType view_type, GLuint handle, const VideoCommon::ImageViewInfo& info, VideoCommon::SubresourceRange view_range); - std::array views{}; + std::array views{}; std::vector stored_views; - GLuint default_handle = 0; GLenum internal_format = GL_NONE; + GLuint default_handle = 0; + GPUVAddr gpu_addr = 0; + u32 buffer_size = 0; }; class ImageAlloc : public VideoCommon::ImageAllocBase {}; diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index f7ad8f370..672f94bfc 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -5,12 +5,120 @@ #pragma once #include + #include "video_core/engines/maxwell_3d.h" +#include "video_core/surface.h" namespace OpenGL::MaxwellToGL { using Maxwell = Tegra::Engines::Maxwell3D::Regs; +struct FormatTuple { + GLenum internal_format; + GLenum format = GL_NONE; + GLenum type = GL_NONE; +}; + +constexpr std::array FORMAT_TABLE = {{ + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM + {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM + {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT + {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT + {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM + {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM + {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM + {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM + {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT + {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM + {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM + {GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM + {GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT + {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT + {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT + {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM + {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM + {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT + {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT + {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT + {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT + {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM + {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM + {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM + {GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM + {GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM + {GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM + {GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM + {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM + {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT + {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT + {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM + {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT + {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT + {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT + {GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT + {GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT + {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT + {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM + {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM + {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT + {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT + {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM + {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT + {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT + {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT + {GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM + {GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_SRGB + {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM + {GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM + {GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT + {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT + {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT + {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT + {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT + {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT + {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM + {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM + {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_SRGB + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB + {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB + {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB + {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB + {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB + {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB + {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB + {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB + {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB + {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB + {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT + {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT + {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM + {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT + {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM + {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, + GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT +}}; + +inline const FormatTuple& GetFormatTuple(VideoCore::Surface::PixelFormat pixel_format) { + ASSERT(static_cast(pixel_format) < FORMAT_TABLE.size()); + return FORMAT_TABLE[static_cast(pixel_format)]; +} + inline GLenum VertexFormat(Maxwell::VertexAttribute attrib) { switch (attrib.type) { case Maxwell::VertexAttribute::Type::UnsignedNorm: diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index c12929de6..4e77ef808 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -130,7 +130,6 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, std::unique_ptr context_) : RendererBase{emu_window_, std::move(context_)}, telemetry_session{telemetry_session_}, emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, state_tracker{gpu}, - program_manager{device}, rasterizer(emu_window, gpu, cpu_memory, device, screen_info, program_manager, state_tracker) { if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) { glEnable(GL_DEBUG_OUTPUT); @@ -236,12 +235,7 @@ void RendererOpenGL::InitOpenGLObjects() { OGLShader fragment_shader; fragment_shader.Create(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER); - vertex_program.Create(true, false, vertex_shader.handle); - fragment_program.Create(true, false, fragment_shader.handle); - - pipeline.Create(); - glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex_program.handle); - glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment_program.handle); + present_program.Create(false, false, vertex_shader.handle, fragment_shader.handle); // Generate presentation sampler present_sampler.Create(); @@ -342,8 +336,8 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { // Set projection matrix const std::array ortho_matrix = MakeOrthographicMatrix(static_cast(layout.width), static_cast(layout.height)); - glProgramUniformMatrix3x2fv(vertex_program.handle, ModelViewMatrixLocation, 1, GL_FALSE, - std::data(ortho_matrix)); + program_manager.BindProgram(present_program.handle); + glUniformMatrix3x2fv(ModelViewMatrixLocation, 1, GL_FALSE, ortho_matrix.data()); const auto& texcoords = screen_info.display_texcoords; auto left = texcoords.left; @@ -404,8 +398,6 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { state_tracker.NotifyClipControl(); state_tracker.NotifyAlphaTest(); - program_manager.BindHostPipeline(pipeline.handle); - state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE); glEnable(GL_CULL_FACE); if (screen_info.display_srgb) { @@ -453,7 +445,8 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { glClear(GL_COLOR_BUFFER_BIT); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); - program_manager.RestoreGuestPipeline(); + // TODO + // program_manager.RestoreGuestPipeline(); } void RendererOpenGL::RenderScreenshot() { diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 0b66f8332..b3ee55665 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -12,7 +12,6 @@ #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_state_tracker.h" namespace Core { @@ -111,9 +110,7 @@ private: // OpenGL object IDs OGLSampler present_sampler; OGLBuffer vertex_buffer; - OGLProgram vertex_program; - OGLProgram fragment_program; - OGLPipeline pipeline; + OGLProgram present_program; OGLFramebuffer screenshot_framebuffer; // GPU address of the vertex buffer diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index 8fb5be393..51e72b705 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp @@ -16,7 +16,6 @@ #include "video_core/host_shaders/opengl_copy_bc4_comp.h" #include "video_core/host_shaders/opengl_copy_bgra_comp.h" #include "video_core/host_shaders/pitch_unswizzle_comp.h" -#include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_texture_cache.h" #include "video_core/renderer_opengl/util_shaders.h" @@ -86,7 +85,7 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, .width = VideoCore::Surface::DefaultBlockWidth(image.info.format), .height = VideoCore::Surface::DefaultBlockHeight(image.info.format), }; - program_manager.BindHostCompute(astc_decoder_program.handle); + program_manager.BindProgram(astc_decoder_program.handle); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_ENC_BUFFER, astc_buffer.handle); @@ -134,7 +133,7 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, static constexpr GLuint BINDING_INPUT_BUFFER = 1; static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; - program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle); + program_manager.BindProgram(block_linear_unswizzle_2d_program.handle); glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); @@ -173,7 +172,7 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); - program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle); + program_manager.BindProgram(block_linear_unswizzle_3d_program.handle); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format)); @@ -222,7 +221,7 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, UNIMPLEMENTED_IF_MSG(!std::has_single_bit(bytes_per_block), "Non-power of two images are not implemented"); - program_manager.BindHostCompute(pitch_unswizzle_program.handle); + program_manager.BindProgram(pitch_unswizzle_program.handle); glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); glUniform2ui(LOC_ORIGIN, 0, 0); glUniform2i(LOC_DESTINATION, 0, 0); @@ -250,7 +249,7 @@ void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index feaace0c5..168ffa7e9 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -18,6 +18,9 @@ namespace Vulkan { +using Shader::ImageBufferDescriptor; +using Tegra::Texture::TexturePair; + ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue_, Common::ThreadWorker* thread_worker, const Shader::Info& info_, @@ -106,25 +109,25 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, secondary_offset}; const u32 lhs_raw{gpu_memory.Read(addr)}; const u32 rhs_raw{gpu_memory.Read(separate_addr)}; - return TextureHandle{lhs_raw | rhs_raw, via_header_index}; + return TexturePair(lhs_raw | rhs_raw, via_header_index); } } - return TextureHandle{gpu_memory.Read(addr), via_header_index}; + return TexturePair(gpu_memory.Read(addr), via_header_index); }}; const auto add_image{[&](const auto& desc) { for (u32 index = 0; index < desc.count; ++index) { - const TextureHandle handle{read_handle(desc, index)}; - image_view_indices.push_back(handle.image); + const auto handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.first); } }}; std::ranges::for_each(info.texture_buffer_descriptors, add_image); std::ranges::for_each(info.image_buffer_descriptors, add_image); for (const auto& desc : info.texture_descriptors) { for (u32 index = 0; index < desc.count; ++index) { - const TextureHandle handle{read_handle(desc, index)}; - image_view_indices.push_back(handle.image); + const auto handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.first); - Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); + Sampler* const sampler = texture_cache.GetComputeSampler(handle.second); samplers.push_back(sampler->Handle()); } } @@ -137,15 +140,16 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, ImageId* texture_buffer_ids{image_view_ids.data()}; size_t index{}; const auto add_buffer{[&](const auto& desc) { + constexpr bool is_image = std::is_same_v; for (u32 i = 0; i < desc.count; ++i) { bool is_written{false}; - if constexpr (std::is_same_v) { + if constexpr (is_image) { is_written = desc.is_written; } ImageView& image_view = texture_cache.GetImageView(*texture_buffer_ids); buffer_cache.BindComputeTextureBuffer(index, image_view.GpuAddr(), image_view.BufferSize(), image_view.format, - is_written); + is_written, is_image); ++texture_buffer_ids; ++index; } diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 9f5d30fe8..e5f54a84f 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -19,7 +19,7 @@ #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/vulkan_common/vulkan_device.h" -#ifdef _MSC_VER +#if defined(_MSC_VER) && defined(NDEBUG) #define LAMBDA_FORCEINLINE [[msvc::forceinline]] #else #define LAMBDA_FORCEINLINE @@ -30,6 +30,7 @@ namespace { using boost::container::small_vector; using boost::container::static_vector; using Shader::ImageBufferDescriptor; +using Tegra::Texture::TexturePair; using VideoCore::Surface::PixelFormat; using VideoCore::Surface::PixelFormatFromDepthFormat; using VideoCore::Surface::PixelFormatFromRenderTargetFormat; @@ -289,15 +290,15 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { const u32 lhs_raw{gpu_memory.Read(addr)}; const u32 rhs_raw{gpu_memory.Read(separate_addr)}; const u32 raw{lhs_raw | rhs_raw}; - return TextureHandle{raw, via_header_index}; + return TexturePair(raw, via_header_index); } } - return TextureHandle{gpu_memory.Read(addr), via_header_index}; + return TexturePair(gpu_memory.Read(addr), via_header_index); }}; const auto add_image{[&](const auto& desc) { for (u32 index = 0; index < desc.count; ++index) { - const TextureHandle handle{read_handle(desc, index)}; - image_view_indices[image_index++] = handle.image; + const auto handle{read_handle(desc, index)}; + image_view_indices[image_index++] = handle.first; } }}; if constexpr (Spec::has_texture_buffers) { @@ -312,10 +313,10 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { } for (const auto& desc : info.texture_descriptors) { for (u32 index = 0; index < desc.count; ++index) { - const TextureHandle handle{read_handle(desc, index)}; - image_view_indices[image_index++] = handle.image; + const auto handle{read_handle(desc, index)}; + image_view_indices[image_index++] = handle.first; - Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.sampler)}; + Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)}; samplers[sampler_index++] = sampler->Handle(); } } @@ -347,15 +348,16 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE { size_t index{}; const auto add_buffer{[&](const auto& desc) { + constexpr bool is_image = std::is_same_v; for (u32 i = 0; i < desc.count; ++i) { bool is_written{false}; - if constexpr (std::is_same_v) { + if constexpr (is_image) { is_written = desc.is_written; } ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)}; buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(), image_view.BufferSize(), image_view.format, - is_written); + is_written, is_image); ++index; ++texture_buffer_index; } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 1334882b5..30b71bdbc 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -342,28 +342,15 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( } std::unique_ptr PipelineCache::CreateGraphicsPipeline() { - main_pools.ReleaseContents(); - - std::array graphics_envs; - boost::container::static_vector envs; + GraphicsEnvironments environments; + GetGraphicsEnvironments(environments, graphics_key.unique_hashes); - const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; - for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - if (graphics_key.unique_hashes[index] == 0) { - continue; - } - const auto program{static_cast(index)}; - auto& env{graphics_envs[index]}; - const u32 start_address{maxwell3d.regs.shader_config[index].offset}; - env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; - env.SetCachedSize(shader_infos[index]->size_bytes); - envs.push_back(&env); - } - auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, MakeSpan(envs), true)}; + main_pools.ReleaseContents(); + auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(), true)}; if (pipeline_cache_filename.empty()) { return pipeline; } - serialization_thread.QueueWork([this, key = graphics_key, envs = std::move(graphics_envs)] { + serialization_thread.QueueWork([this, key = graphics_key, envs = std::move(environments.envs)] { boost::container::static_vector env_ptrs; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 0f15ad2f7..ef14e91e7 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -96,17 +96,6 @@ VkRect2D GetScissorState(const Maxwell& regs, size_t index) { return scissor; } -struct TextureHandle { - constexpr TextureHandle(u32 data, bool via_header_index) { - const Tegra::Texture::TextureHandle handle{data}; - image = handle.tic_id; - sampler = via_header_index ? image : handle.tsc_id.Value(); - } - - u32 image; - u32 sampler; -}; - DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced, bool is_indexed) { DrawParams params{ diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp index b8b8eace5..78bf90c48 100644 --- a/src/video_core/shader_cache.cpp +++ b/src/video_core/shader_cache.cpp @@ -91,6 +91,23 @@ const ShaderInfo* ShaderCache::ComputeShader() { return MakeShaderInfo(env, *cpu_shader_addr); } +void ShaderCache::GetGraphicsEnvironments(GraphicsEnvironments& result, + const std::array& unique_hashes) { + size_t env_index{}; + const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; + for (size_t index = 0; index < NUM_PROGRAMS; ++index) { + if (unique_hashes[index] == 0) { + continue; + } + const auto program{static_cast(index)}; + auto& env{result.envs[index]}; + const u32 start_address{maxwell3d.regs.shader_config[index].offset}; + env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; + env.SetCachedSize(shader_infos[index]->size_bytes); + result.env_ptrs[env_index++] = &env; + } +} + ShaderInfo* ShaderCache::TryGet(VAddr addr) const { std::scoped_lock lock{lookup_mutex}; diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h index 89a4bcc84..136fe294c 100644 --- a/src/video_core/shader_cache.h +++ b/src/video_core/shader_cache.h @@ -4,14 +4,18 @@ #pragma once +#include +#include #include #include +#include #include #include #include #include "common/common_types.h" #include "video_core/rasterizer_interface.h" +#include "video_core/shader_environment.h" namespace Tegra { class MemoryManager; @@ -30,6 +34,8 @@ class ShaderCache { static constexpr u64 PAGE_BITS = 14; static constexpr u64 PAGE_SIZE = u64(1) << PAGE_BITS; + static constexpr size_t NUM_PROGRAMS = 6; + struct Entry { VAddr addr_start; VAddr addr_end; @@ -58,6 +64,15 @@ public: void SyncGuestHost(); protected: + struct GraphicsEnvironments { + std::array envs; + std::array env_ptrs; + + std::span Span() const noexcept { + return std::span(env_ptrs.begin(), std::ranges::find(env_ptrs, nullptr)); + } + }; + explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::Engines::KeplerCompute& kepler_compute_); @@ -65,17 +80,21 @@ protected: /// @brief Update the hashes and information of shader stages /// @param unique_hashes Shader hashes to store into when a stage is enabled /// @return True no success, false on error - bool RefreshStages(std::array& unique_hashes); + bool RefreshStages(std::array& unique_hashes); /// @brief Returns information about the current compute shader /// @return Pointer to a valid shader, nullptr on error const ShaderInfo* ComputeShader(); + /// @brief Collect the current graphics environments + void GetGraphicsEnvironments(GraphicsEnvironments& result, + const std::array& unique_hashes); + Tegra::MemoryManager& gpu_memory; Tegra::Engines::Maxwell3D& maxwell3d; Tegra::Engines::KeplerCompute& kepler_compute; - std::array shader_infos{}; + std::array shader_infos{}; bool last_shaders_valid = false; private: diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp index 5dccc0097..c93174519 100644 --- a/src/video_core/shader_environment.cpp +++ b/src/video_core/shader_environment.cpp @@ -187,8 +187,8 @@ std::optional GenericEnvironment::TryFindSize() { Shader::TextureType GenericEnvironment::ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, bool via_header_index, u32 raw) { - const TextureHandle handle{raw, via_header_index}; - const GPUVAddr descriptor_addr{tic_addr + handle.image * sizeof(Tegra::Texture::TICEntry)}; + const auto handle{Tegra::Texture::TexturePair(raw, via_header_index)}; + const GPUVAddr descriptor_addr{tic_addr + handle.first * sizeof(Tegra::Texture::TICEntry)}; Tegra::Texture::TICEntry entry; gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry)); const Shader::TextureType result{ConvertType(entry)}; diff --git a/src/video_core/shader_environment.h b/src/video_core/shader_environment.h index 37d712045..d26dbfaab 100644 --- a/src/video_core/shader_environment.h +++ b/src/video_core/shader_environment.h @@ -29,22 +29,6 @@ class Memorymanager; namespace VideoCommon { -struct TextureHandle { - explicit TextureHandle(u32 data, bool via_header_index) { - if (via_header_index) { - image = data; - sampler = data; - } else { - const Tegra::Texture::TextureHandle handle{data}; - image = handle.tic_id; - sampler = via_header_index ? image : handle.tsc_id.Value(); - } - } - - u32 image; - u32 sampler; -}; - class GenericEnvironment : public Shader::Environment { public: explicit GenericEnvironment() = default; diff --git a/src/video_core/texture_cache/formatter.cpp b/src/video_core/texture_cache/formatter.cpp index d10ba4ccd..249cc4d0f 100644 --- a/src/video_core/texture_cache/formatter.cpp +++ b/src/video_core/texture_cache/formatter.cpp @@ -43,7 +43,7 @@ std::string Name(const ImageBase& image) { return "Invalid"; } -std::string Name(const ImageViewBase& image_view, std::optional type) { +std::string Name(const ImageViewBase& image_view) { const u32 width = image_view.size.width; const u32 height = image_view.size.height; const u32 depth = image_view.size.depth; @@ -51,7 +51,7 @@ std::string Name(const ImageViewBase& image_view, std::optional t const u32 num_layers = image_view.range.extent.layers; const std::string level = num_levels > 1 ? fmt::format(":{}", num_levels) : ""; - switch (type.value_or(image_view.type)) { + switch (image_view.type) { case ImageViewType::e1D: return fmt::format("ImageView 1D {}{}", width, level); case ImageViewType::e2D: diff --git a/src/video_core/texture_cache/formatter.h b/src/video_core/texture_cache/formatter.h index a48413983..c6cf0583f 100644 --- a/src/video_core/texture_cache/formatter.h +++ b/src/video_core/texture_cache/formatter.h @@ -255,8 +255,7 @@ struct RenderTargets; [[nodiscard]] std::string Name(const ImageBase& image); -[[nodiscard]] std::string Name(const ImageViewBase& image_view, - std::optional type = std::nullopt); +[[nodiscard]] std::string Name(const ImageViewBase& image_view); [[nodiscard]] std::string Name(const RenderTargets& render_targets); diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h index c1d14335e..1a9399455 100644 --- a/src/video_core/textures/texture.h +++ b/src/video_core/textures/texture.h @@ -154,6 +154,15 @@ union TextureHandle { }; static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size"); +[[nodiscard]] inline std::pair TexturePair(u32 raw, bool via_header_index) { + if (via_header_index) { + return {raw, raw}; + } else { + const Tegra::Texture::TextureHandle handle{raw}; + return {handle.tic_id, via_header_index ? handle.tic_id : handle.tsc_id}; + } +} + struct TICEntry { union { struct { diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 2318c1bda..e27a2b51e 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -282,7 +282,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR VkPhysicalDevice16BitStorageFeaturesKHR bit16_storage{ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR, .pNext = nullptr, - .storageBuffer16BitAccess = false, + .storageBuffer16BitAccess = true, .uniformAndStorageBuffer16BitAccess = true, .storagePushConstant16 = false, .storageInputOutput16 = false, -- cgit v1.2.3 From bed090807afd3364ed6ef18a031a0ffd95a1b89b Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 3 May 2021 20:53:00 -0300 Subject: Move SPIR-V emission functions to their own header --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 5 ++--- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 8 ++++---- 2 files changed, 6 insertions(+), 7 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index c9ca1f005..6585817bc 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -254,7 +254,7 @@ std::unique_ptr ShaderCache::CreateGraphicsProgram( OGLProgram gl_program; gl_program.handle = glCreateProgram(); - Shader::Backend::SPIRV::Bindings binding; + Shader::Backend::Bindings binding; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { if (key.unique_hashes[index] == 0) { continue; @@ -297,8 +297,7 @@ std::unique_ptr ShaderCache::CreateComputeProgram(ShaderPools& p Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; - Shader::Backend::SPIRV::Bindings binding; - const std::vector code{EmitSPIRV(profile, program, binding)}; + const std::vector code{EmitSPIRV(profile, program)}; OGLProgram gl_program; gl_program.handle = glCreateProgram(); AddShader(GL_COMPUTE_SHADER, gl_program.handle, code); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 30b71bdbc..a5edcd072 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -315,8 +315,9 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( std::array infos{}; std::array modules; - Shader::Backend::SPIRV::Bindings binding; - for (size_t index = uses_vertex_a && uses_vertex_b ? 1 : 0; index < Maxwell::MaxShaderProgram; ++index) { + Shader::Backend::Bindings binding; + for (size_t index = uses_vertex_a && uses_vertex_b ? 1 : 0; index < Maxwell::MaxShaderProgram; + ++index) { if (key.unique_hashes[index] == 0) { continue; } @@ -388,8 +389,7 @@ std::unique_ptr PipelineCache::CreateComputePipeline( Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; - Shader::Backend::SPIRV::Bindings binding; - const std::vector code{EmitSPIRV(base_profile, program, binding)}; + const std::vector code{EmitSPIRV(base_profile, program)}; device.SaveShader(code); vk::ShaderModule spv_module{BuildShader(device, code)}; if (device.HasDebuggingToolAttached()) { -- cgit v1.2.3 From bfa47539f6d5779a80d6fb23ae49c1d34e01ae93 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 5 May 2021 01:08:16 -0300 Subject: gl_shader_cache: Remove code unintentionally committed --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 3 --- 1 file changed, 3 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 6585817bc..9bbdfeb62 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -266,9 +266,6 @@ std::unique_ptr ShaderCache::CreateGraphicsProgram( infos[stage_index] = &program.info; const std::vector code{EmitSPIRV(profile, program, binding)}; - FILE* file = fopen("D:\\shader.spv", "wb"); - fwrite(code.data(), 4, code.size(), file); - fclose(file); AddShader(Stage(stage_index), gl_program.handle, code); } LinkProgram(gl_program.handle); -- cgit v1.2.3 From a51503660435f1279ce0fa449f9cf76e74b45d74 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 7 May 2021 00:29:08 -0300 Subject: vk_master_semaphore: Use fetch_add to increase master semaphore tick --- src/video_core/renderer_vulkan/vk_master_semaphore.h | 6 +++--- src/video_core/renderer_vulkan/vk_scheduler.cpp | 4 +--- 2 files changed, 4 insertions(+), 6 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h index ee3cd35d0..4f8688118 100644 --- a/src/video_core/renderer_vulkan/vk_master_semaphore.h +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h @@ -39,9 +39,9 @@ public: return KnownGpuTick() >= tick; } - /// Advance to the logical tick. - void NextTick() noexcept { - ++current_tick; + /// Advance to the logical tick and return the old one + [[nodiscard]] u64 NextTick() noexcept { + return current_tick.fetch_add(1, std::memory_order::relaxed); } /// Refresh the known GPU tick diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 81cb330d9..fcb6a5911 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -168,9 +168,7 @@ void VKScheduler::SubmitExecution(VkSemaphore semaphore) { EndPendingOperations(); InvalidateState(); - const u64 signal_value = master_semaphore->CurrentTick(); - master_semaphore->NextTick(); - + const u64 signal_value = master_semaphore->NextTick(); Record([semaphore, signal_value, this](vk::CommandBuffer cmdbuf) { cmdbuf.End(); -- cgit v1.2.3 From 56c47951c5d92d5e6145060469528301c67e0754 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 7 May 2021 00:29:37 -0300 Subject: vk_query_cache: Wait before reading queries --- src/video_core/renderer_vulkan/vk_query_cache.cpp | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index 1dd78328c..c9cb32d71 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -114,17 +114,10 @@ void HostCounter::EndQuery() { } u64 HostCounter::BlockingQuery() const { - auto& scheduler{cache.GetScheduler()}; - if (tick >= scheduler.CurrentTick()) { - scheduler.Flush(); - // This may not be necessary, but it's better to play it safe and assume drivers don't - // support wait before signal on vkGetQueryPoolResults - scheduler.WaitWorker(); - } + cache.GetScheduler().Wait(tick); u64 data; const VkResult query_result = cache.GetDevice().GetLogical().GetQueryResults( - query.first, query.second, 1, sizeof(data), &data, sizeof(data), - VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT); + query.first, query.second, 1, sizeof(data), &data, sizeof(data), VK_QUERY_RESULT_64_BIT); switch (query_result) { case VK_SUCCESS: -- cgit v1.2.3 From 36f158626726f940d9dba22a2b03ebbb5aa41c5e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 7 May 2021 06:26:12 -0300 Subject: vk_scheduler: Use locks instead of SPSC a queue This tries to fix a data race where we'd wait forever for the GPU. --- src/video_core/renderer_vulkan/vk_scheduler.cpp | 58 ++++++++++++++----------- src/video_core/renderer_vulkan/vk_scheduler.h | 16 ++++--- 2 files changed, 42 insertions(+), 32 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index fcb6a5911..4840962de 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -47,8 +47,11 @@ VKScheduler::VKScheduler(const Device& device_, StateTracker& state_tracker_) } VKScheduler::~VKScheduler() { - quit = true; - cv.notify_all(); + { + std::lock_guard lock{work_mutex}; + quit = true; + } + work_cv.notify_all(); worker_thread.join(); } @@ -69,20 +72,19 @@ void VKScheduler::WaitWorker() { MICROPROFILE_SCOPE(Vulkan_WaitForWorker); DispatchWork(); - bool finished = false; - do { - cv.notify_all(); - std::unique_lock lock{mutex}; - finished = chunk_queue.Empty(); - } while (!finished); + std::unique_lock lock{work_mutex}; + wait_cv.wait(lock, [this] { return work_queue.empty(); }); } void VKScheduler::DispatchWork() { if (chunk->Empty()) { return; } - chunk_queue.Push(std::move(chunk)); - cv.notify_all(); + { + std::lock_guard lock{work_mutex}; + work_queue.push(std::move(chunk)); + } + work_cv.notify_one(); AcquireNewChunk(); } @@ -135,22 +137,27 @@ bool VKScheduler::UpdateGraphicsPipeline(GraphicsPipeline* pipeline) { void VKScheduler::WorkerThread() { Common::SetCurrentThreadName("yuzu:VulkanWorker"); - std::unique_lock lock{mutex}; do { - cv.wait(lock, [this] { return !chunk_queue.Empty() || quit; }); - if (quit) { - continue; + if (work_queue.empty()) { + wait_cv.notify_all(); } - while (!chunk_queue.Empty()) { - auto extracted_chunk = std::move(chunk_queue.Front()); - chunk_queue.Pop(); - const bool has_submit = extracted_chunk->HasSubmit(); - extracted_chunk->ExecuteAll(current_cmdbuf); - if (has_submit) { - AllocateWorkerCommandBuffer(); + std::unique_ptr work; + { + std::unique_lock lock{work_mutex}; + work_cv.wait(lock, [this] { return !work_queue.empty() || quit; }); + if (quit) { + continue; } - chunk_reserve.Push(std::move(extracted_chunk)); + work = std::move(work_queue.front()); + work_queue.pop(); + } + const bool has_submit = work->HasSubmit(); + work->ExecuteAll(current_cmdbuf); + if (has_submit) { + AllocateWorkerCommandBuffer(); } + std::lock_guard reserve_lock{reserve_mutex}; + chunk_reserve.push_back(std::move(work)); } while (!quit); } @@ -269,12 +276,13 @@ void VKScheduler::EndRenderPass() { } void VKScheduler::AcquireNewChunk() { - if (chunk_reserve.Empty()) { + std::lock_guard lock{reserve_mutex}; + if (chunk_reserve.empty()) { chunk = std::make_unique(); return; } - chunk = std::move(chunk_reserve.Front()); - chunk_reserve.Pop(); + chunk = std::move(chunk_reserve.back()); + chunk_reserve.pop_back(); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 40215c4c5..6600fb142 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -6,14 +6,14 @@ #include #include +#include #include #include -#include #include #include + #include "common/alignment.h" #include "common/common_types.h" -#include "common/threadsafe_queue.h" #include "video_core/renderer_vulkan/vk_master_semaphore.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -220,11 +220,13 @@ private: std::array renderpass_images{}; std::array renderpass_image_ranges{}; - Common::SPSCQueue> chunk_queue; - Common::SPSCQueue> chunk_reserve; - std::mutex mutex; - std::condition_variable cv; - bool quit = false; + std::queue> work_queue; + std::vector> chunk_reserve; + std::mutex reserve_mutex; + std::mutex work_mutex; + std::condition_variable work_cv; + std::condition_variable wait_cv; + std::atomic_bool quit{}; }; } // namespace Vulkan -- cgit v1.2.3 From 2c81ad831192a8234e26a61706f18b460999c89f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 8 May 2021 16:34:41 -0300 Subject: glasm: Initial GLASM compute implementation for testing --- .../renderer_opengl/gl_compute_program.cpp | 17 +++++++--- .../renderer_opengl/gl_compute_program.h | 7 ++-- src/video_core/renderer_opengl/gl_shader_cache.cpp | 37 ++++++++++++++++++---- 3 files changed, 47 insertions(+), 14 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_compute_program.cpp b/src/video_core/renderer_opengl/gl_compute_program.cpp index d5ef65439..fb54618a4 100644 --- a/src/video_core/renderer_opengl/gl_compute_program.cpp +++ b/src/video_core/renderer_opengl/gl_compute_program.cpp @@ -29,11 +29,11 @@ bool ComputeProgramKey::operator==(const ComputeProgramKey& rhs) const noexcept ComputeProgram::ComputeProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::KeplerCompute& kepler_compute_, - ProgramManager& program_manager_, OGLProgram program_, - const Shader::Info& info_) + ProgramManager& program_manager_, const Shader::Info& info_, + OGLProgram source_program_, OGLAssemblyProgram assembly_program_) : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, - kepler_compute{kepler_compute_}, - program_manager{program_manager_}, program{std::move(program_)}, info{info_} { + kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_}, + source_program{std::move(source_program_)}, assembly_program{std::move(assembly_program_)} { for (const auto& desc : info.texture_buffer_descriptors) { num_texture_buffers += desc.count; } @@ -124,6 +124,14 @@ void ComputeProgram::Configure() { const std::span indices_span(image_view_indices.data(), image_view_indices.size()); texture_cache.FillComputeImageViews(indices_span, image_view_ids); + if (assembly_program.handle != 0) { + // FIXME: State track this + glEnable(GL_COMPUTE_PROGRAM_NV); + glBindProgramARB(GL_COMPUTE_PROGRAM_NV, assembly_program.handle); + program_manager.BindProgram(0); + } else { + program_manager.BindProgram(source_program.handle); + } buffer_cache.UnbindComputeTextureBuffers(); size_t texbuf_index{}; const auto add_buffer{[&](const auto& desc) { @@ -172,7 +180,6 @@ void ComputeProgram::Configure() { if (image_binding != 0) { glBindImageTextures(0, image_binding, images.data()); } - program_manager.BindProgram(program.handle); } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_compute_program.h b/src/video_core/renderer_opengl/gl_compute_program.h index 64a75d44d..ddb00dc1d 100644 --- a/src/video_core/renderer_opengl/gl_compute_program.h +++ b/src/video_core/renderer_opengl/gl_compute_program.h @@ -52,8 +52,8 @@ public: explicit ComputeProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::KeplerCompute& kepler_compute_, - ProgramManager& program_manager_, OGLProgram program_, - const Shader::Info& info_); + ProgramManager& program_manager_, const Shader::Info& info_, + OGLProgram source_program_, OGLAssemblyProgram assembly_program_); void Configure(); @@ -64,8 +64,9 @@ private: Tegra::Engines::KeplerCompute& kepler_compute; ProgramManager& program_manager; - OGLProgram program; Shader::Info info; + OGLProgram source_program; + OGLAssemblyProgram assembly_program; u32 num_texture_buffers{}; u32 num_image_buffers{}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 9bbdfeb62..d9f0bca78 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -16,6 +16,7 @@ #include "common/scope_exit.h" #include "core/core.h" #include "core/frontend/emu_window.h" +#include "shader_recompiler/backend/glasm/emit_glasm.h" #include "shader_recompiler/backend/spirv/emit_spirv.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" @@ -89,6 +90,7 @@ const Shader::Profile profile{ .xfb_varyings = {}, }; +using Shader::Backend::GLASM::EmitGLASM; using Shader::Backend::SPIRV::EmitSPIRV; using Shader::Maxwell::TranslateProgram; using VideoCommon::ComputeEnvironment; @@ -151,6 +153,22 @@ void LinkProgram(GLuint program) { } } +OGLAssemblyProgram CompileProgram(std::string_view code, GLenum target) { + OGLAssemblyProgram program; + glGenProgramsARB(1, &program.handle); + glNamedProgramStringEXT(program.handle, target, GL_PROGRAM_FORMAT_ASCII_ARB, + static_cast(code.size()), code.data()); + if (!Settings::values.renderer_debug) { + return program; + } + const auto err = reinterpret_cast(glGetString(GL_PROGRAM_ERROR_STRING_NV)); + if (err && *err) { + LOG_CRITICAL(Render_OpenGL, "{}", err); + LOG_INFO(Render_OpenGL, "{}", code); + } + return program; +} + GLenum Stage(size_t stage_index) { switch (stage_index) { case 0: @@ -294,13 +312,20 @@ std::unique_ptr ShaderCache::CreateComputeProgram(ShaderPools& p Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; - const std::vector code{EmitSPIRV(profile, program)}; - OGLProgram gl_program; - gl_program.handle = glCreateProgram(); - AddShader(GL_COMPUTE_SHADER, gl_program.handle, code); - LinkProgram(gl_program.handle); + OGLAssemblyProgram asm_program; + OGLProgram source_program; + if (device.UseAssemblyShaders()) { + const std::string code{EmitGLASM(profile, program)}; + asm_program = CompileProgram(code, GL_COMPUTE_PROGRAM_NV); + } else { + const std::vector code{EmitSPIRV(profile, program)}; + source_program.handle = glCreateProgram(); + AddShader(GL_COMPUTE_SHADER, source_program.handle, code); + LinkProgram(source_program.handle); + } return std::make_unique(texture_cache, buffer_cache, gpu_memory, kepler_compute, - program_manager, std::move(gl_program), program.info); + program_manager, program.info, + std::move(source_program), std::move(asm_program)); } } // namespace OpenGL -- cgit v1.2.3 From dc02cb92e43d2ef05197e4edb2573116d7ae58c1 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 8 May 2021 16:36:51 -0300 Subject: gl_rasterizer: Flush L2 caches before glFlush on GLASM --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index e527b76ba..4834d58f0 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -473,6 +473,14 @@ void RasterizerOpenGL::FlushCommands() { return; } num_queued_commands = 0; + + // Make sure memory stored from the previous GL command stream is visible + // This is only needed on assembly shaders where we write to GPU memory with raw pointers + // TODO: Call this only when NV_shader_buffer_load or NV_shader_buffer_store have been used + // and prefer using NV_shader_storage_buffer_object when possible + if (Settings::values.use_assembly_shaders.GetValue()) { + glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); + } glFlush(); } -- cgit v1.2.3 From 01e18581b9bdba021e6a4ae8fa0f0987ceea3e49 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 9 May 2021 03:09:55 -0300 Subject: vk_pipeline_cache: Enable int8 and int16 types on Vulkan --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index a5edcd072..7830c0194 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -128,6 +128,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw .supported_spirv = device.IsKhrSpirv1_4Supported() ? 0x00010400U : 0x00010000U, .unified_descriptor_binding = true, .support_descriptor_aliasing = true, + .support_int8 = true, + .support_int16 = true, .support_vertex_instance_id = false, .support_float_controls = true, .support_separate_denorm_behavior = float_control.denormBehaviorIndependence == -- cgit v1.2.3 From 568d813eeadfc7888bad72d4f5d695c9d745cfe5 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 14 May 2021 00:43:19 -0300 Subject: vk_update_descriptor: Properly initialize payload on the update descriptor queue --- src/video_core/renderer_vulkan/vk_update_descriptor.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp index ce3427c9b..0df3a7fe9 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp @@ -15,7 +15,9 @@ namespace Vulkan { VKUpdateDescriptorQueue::VKUpdateDescriptorQueue(const Device& device_, VKScheduler& scheduler_) - : device{device_}, scheduler{scheduler_} {} + : device{device_}, scheduler{scheduler_} { + payload_cursor = payload.data(); +} VKUpdateDescriptorQueue::~VKUpdateDescriptorQueue() = default; -- cgit v1.2.3 From 258f2dec1bc6f1f9d966579c1efb96f76d947060 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 15 May 2021 18:19:08 -0300 Subject: opengl: Initial (broken) support to GLASM shaders --- .../renderer_opengl/gl_graphics_program.cpp | 15 ++++++- .../renderer_opengl/gl_graphics_program.h | 6 ++- src/video_core/renderer_opengl/gl_shader_cache.cpp | 46 ++++++++++++++++------ 3 files changed, 53 insertions(+), 14 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_graphics_program.cpp b/src/video_core/renderer_opengl/gl_graphics_program.cpp index fd0958719..7c0bf7bc8 100644 --- a/src/video_core/renderer_opengl/gl_graphics_program.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_program.cpp @@ -33,10 +33,12 @@ GraphicsProgram::GraphicsProgram(TextureCache& texture_cache_, BufferCache& buff Tegra::Engines::Maxwell3D& maxwell3d_, ProgramManager& program_manager_, StateTracker& state_tracker_, OGLProgram program_, + std::array assembly_programs_, const std::array& infos) : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, program_manager{program_manager_}, - state_tracker{state_tracker_}, program{std::move(program_)} { + state_tracker{state_tracker_}, program{std::move(program_)}, assembly_programs{std::move( + assembly_programs_)} { std::ranges::transform(infos, stage_infos.begin(), [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); @@ -290,7 +292,16 @@ void GraphicsProgram::Configure(bool is_indexed) { texture_cache.UpdateRenderTargets(false); state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); - program_manager.BindProgram(program.handle); + if (assembly_programs[0].handle != 0) { + // TODO: State track this + glEnable(GL_VERTEX_PROGRAM_NV); + glEnable(GL_FRAGMENT_PROGRAM_NV); + glBindProgramARB(GL_VERTEX_PROGRAM_NV, assembly_programs[0].handle); + glBindProgramARB(GL_FRAGMENT_PROGRAM_NV, assembly_programs[4].handle); + program_manager.BindProgram(0); + } else { + program_manager.BindProgram(program.handle); + } } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_graphics_program.h b/src/video_core/renderer_opengl/gl_graphics_program.h index 5adf3f41e..58aa4b0bc 100644 --- a/src/video_core/renderer_opengl/gl_graphics_program.h +++ b/src/video_core/renderer_opengl/gl_graphics_program.h @@ -73,7 +73,9 @@ public: Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, ProgramManager& program_manager_, StateTracker& state_tracker_, - OGLProgram program_, const std::array& infos); + OGLProgram program_, + std::array assembly_programs_, + const std::array& infos); void Configure(bool is_indexed); @@ -86,6 +88,8 @@ private: StateTracker& state_tracker; OGLProgram program; + std::array assembly_programs; + std::array stage_infos{}; std::array base_uniform_bindings{}; std::array base_storage_bindings{}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index d9f0bca78..c10ea2f60 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -185,6 +185,23 @@ GLenum Stage(size_t stage_index) { UNREACHABLE_MSG("{}", stage_index); return GL_NONE; } + +GLenum AssemblyStage(size_t stage_index) { + switch (stage_index) { + case 0: + return GL_VERTEX_PROGRAM_NV; + case 1: + return GL_TESS_CONTROL_PROGRAM_NV; + case 2: + return GL_TESS_EVALUATION_PROGRAM_NV; + case 3: + return GL_GEOMETRY_PROGRAM_NV; + case 4: + return GL_FRAGMENT_PROGRAM_NV; + } + UNREACHABLE_MSG("{}", stage_index); + return GL_NONE; +} } // Anonymous namespace ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, @@ -269,10 +286,12 @@ std::unique_ptr ShaderCache::CreateGraphicsProgram( } std::array infos{}; - OGLProgram gl_program; - gl_program.handle = glCreateProgram(); - + OGLProgram source_program; + std::array assembly_programs; Shader::Backend::Bindings binding; + if (!device.UseAssemblyShaders()) { + source_program.handle = glCreateProgram(); + } for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { if (key.unique_hashes[index] == 0) { continue; @@ -282,15 +301,20 @@ std::unique_ptr ShaderCache::CreateGraphicsProgram( Shader::IR::Program& program{programs[index]}; const size_t stage_index{index - 1}; infos[stage_index] = &program.info; - - const std::vector code{EmitSPIRV(profile, program, binding)}; - AddShader(Stage(stage_index), gl_program.handle, code); + if (device.UseAssemblyShaders()) { + const std::string code{EmitGLASM(profile, program)}; + assembly_programs[stage_index] = CompileProgram(code, AssemblyStage(stage_index)); + } else { + const std::vector code{EmitSPIRV(profile, program, binding)}; + AddShader(Stage(stage_index), source_program.handle, code); + } } - LinkProgram(gl_program.handle); - - return std::make_unique(texture_cache, buffer_cache, gpu_memory, maxwell3d, - program_manager, state_tracker, std::move(gl_program), - infos); + if (!device.UseAssemblyShaders()) { + LinkProgram(source_program.handle); + } + return std::make_unique( + texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker, + std::move(source_program), std::move(assembly_programs), infos); } std::unique_ptr ShaderCache::CreateComputeProgram( -- cgit v1.2.3 From 8b7d5912d61d56f65fb7e3a03bba544a4c40bfa6 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 18 May 2021 21:04:09 -0300 Subject: glasm: Support textures used in more than one stage --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index c10ea2f60..b84b36b9d 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -302,7 +302,7 @@ std::unique_ptr ShaderCache::CreateGraphicsProgram( const size_t stage_index{index - 1}; infos[stage_index] = &program.info; if (device.UseAssemblyShaders()) { - const std::string code{EmitGLASM(profile, program)}; + const std::string code{EmitGLASM(profile, program, binding)}; assembly_programs[stage_index] = CompileProgram(code, AssemblyStage(stage_index)); } else { const std::vector code{EmitSPIRV(profile, program, binding)}; -- cgit v1.2.3 From 85fc7e584ef9d64bae3269e7993bbf919bd10640 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 18 May 2021 21:07:18 -0300 Subject: HACK: Bind stages before and after bindings Works around a bug where program parameters are only applied to the current stage, and this one wasn't bound at the moment. Affects all SSBO usages on GLASM. --- src/video_core/renderer_opengl/gl_graphics_program.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_graphics_program.cpp b/src/video_core/renderer_opengl/gl_graphics_program.cpp index 7c0bf7bc8..4ac026502 100644 --- a/src/video_core/renderer_opengl/gl_graphics_program.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_program.cpp @@ -240,6 +240,17 @@ void GraphicsProgram::Configure(bool is_indexed) { buffer_cache.UpdateGraphicsBuffers(is_indexed); buffer_cache.BindHostGeometryBuffers(is_indexed); + // FIXME: Unhack this + if (assembly_programs[0].handle != 0) { + // TODO: State track this + glEnable(GL_VERTEX_PROGRAM_NV); + glEnable(GL_FRAGMENT_PROGRAM_NV); + glBindProgramARB(GL_VERTEX_PROGRAM_NV, assembly_programs[0].handle); + glBindProgramARB(GL_FRAGMENT_PROGRAM_NV, assembly_programs[4].handle); + program_manager.BindProgram(0); + } else { + program_manager.BindProgram(program.handle); + } const ImageId* views_it{image_view_ids.data()}; GLsizei texture_binding = 0; GLsizei image_binding = 0; -- cgit v1.2.3 From c5ca4fe451c398542f4f6c5e468e0bb96866175d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 20 May 2021 16:53:51 -0300 Subject: renderer_opengl: State track assembly programs --- .../renderer_opengl/gl_graphics_program.cpp | 27 +++--------- .../renderer_opengl/gl_graphics_program.h | 1 + src/video_core/renderer_opengl/gl_shader_manager.h | 51 ++++++++++++++++++++-- 3 files changed, 56 insertions(+), 23 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_graphics_program.cpp b/src/video_core/renderer_opengl/gl_graphics_program.cpp index 4ac026502..b5d75aa13 100644 --- a/src/video_core/renderer_opengl/gl_graphics_program.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_program.cpp @@ -42,6 +42,9 @@ GraphicsProgram::GraphicsProgram(TextureCache& texture_cache_, BufferCache& buff std::ranges::transform(infos, stage_infos.begin(), [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); + for (size_t stage = 0; stage < 5; ++stage) { + enabled_stages_mask |= (assembly_programs[stage].handle != 0 ? 1 : 0) << stage; + } u32 num_textures{}; u32 num_images{}; for (size_t stage = 0; stage < base_uniform_bindings.size() - 1; ++stage) { @@ -182,6 +185,9 @@ void GraphicsProgram::Configure(bool is_indexed) { const std::span indices_span(image_view_indices.data(), image_view_index); texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); + texture_cache.UpdateRenderTargets(false); + state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); + ImageId* texture_buffer_index{image_view_ids.data()}; const auto bind_stage_info{[&](size_t stage) { size_t index{}; @@ -240,14 +246,8 @@ void GraphicsProgram::Configure(bool is_indexed) { buffer_cache.UpdateGraphicsBuffers(is_indexed); buffer_cache.BindHostGeometryBuffers(is_indexed); - // FIXME: Unhack this if (assembly_programs[0].handle != 0) { - // TODO: State track this - glEnable(GL_VERTEX_PROGRAM_NV); - glEnable(GL_FRAGMENT_PROGRAM_NV); - glBindProgramARB(GL_VERTEX_PROGRAM_NV, assembly_programs[0].handle); - glBindProgramARB(GL_FRAGMENT_PROGRAM_NV, assembly_programs[4].handle); - program_manager.BindProgram(0); + program_manager.BindAssemblyPrograms(assembly_programs, enabled_stages_mask); } else { program_manager.BindProgram(program.handle); } @@ -300,19 +300,6 @@ void GraphicsProgram::Configure(bool is_indexed) { if (image_binding != 0) { glBindImageTextures(0, image_binding, images.data()); } - texture_cache.UpdateRenderTargets(false); - - state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); - if (assembly_programs[0].handle != 0) { - // TODO: State track this - glEnable(GL_VERTEX_PROGRAM_NV); - glEnable(GL_FRAGMENT_PROGRAM_NV); - glBindProgramARB(GL_VERTEX_PROGRAM_NV, assembly_programs[0].handle); - glBindProgramARB(GL_FRAGMENT_PROGRAM_NV, assembly_programs[4].handle); - program_manager.BindProgram(0); - } else { - program_manager.BindProgram(program.handle); - } } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_graphics_program.h b/src/video_core/renderer_opengl/gl_graphics_program.h index 58aa4b0bc..18292bb16 100644 --- a/src/video_core/renderer_opengl/gl_graphics_program.h +++ b/src/video_core/renderer_opengl/gl_graphics_program.h @@ -89,6 +89,7 @@ private: OGLProgram program; std::array assembly_programs; + u32 enabled_stages_mask{}; std::array stage_infos{}; std::array base_uniform_bindings{}; diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 70781d6f5..48669b3cd 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -4,24 +4,69 @@ #pragma once +#include +#include + #include +#include "video_core/renderer_opengl/gl_resource_manager.h" + +#pragma optimize("", off) + namespace OpenGL { class ProgramManager { + static constexpr size_t NUM_STAGES = 5; + + static constexpr std::array ASSEMBLY_PROGRAM_ENUMS{ + GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV, + GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV, + }; + public: void BindProgram(GLuint program) { - if (bound_program == program) { + if (current_source_program == program) { return; } - bound_program = program; + current_source_program = program; glUseProgram(program); } + void BindAssemblyPrograms(std::span programs, + u32 stage_mask) { + const u32 changed_mask = current_assembly_mask ^ stage_mask; + current_assembly_mask = stage_mask; + + if (changed_mask != 0) { + for (size_t stage = 0; stage < NUM_STAGES; ++stage) { + if (((changed_mask >> stage) & 1) != 0) { + if (((stage_mask >> stage) & 1) != 0) { + glEnable(ASSEMBLY_PROGRAM_ENUMS[stage]); + } else { + glDisable(ASSEMBLY_PROGRAM_ENUMS[stage]); + } + } + } + } + for (size_t stage = 0; stage < NUM_STAGES; ++stage) { + if (current_assembly_programs[stage] != programs[stage].handle) { + current_assembly_programs[stage] = programs[stage].handle; + glBindProgramARB(ASSEMBLY_PROGRAM_ENUMS[stage], programs[stage].handle); + } + } + if (current_source_program != 0) { + current_source_program = 0; + glUseProgram(0); + } + } + void RestoreGuestCompute() {} private: - GLuint bound_program = 0; + GLuint current_source_program = 0; + + u32 current_assembly_mask = 0; + std::array current_assembly_programs; }; } // namespace OpenGL -- cgit v1.2.3 From 690b1841e6a1437335c0aae6d934f3fdcdb1680c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 20 May 2021 16:59:05 -0300 Subject: renderer_opengl: State track compute assembly programs --- src/video_core/renderer_opengl/gl_compute_program.cpp | 5 +---- src/video_core/renderer_opengl/gl_shader_manager.h | 19 +++++++++++++++++++ src/video_core/renderer_opengl/renderer_opengl.cpp | 1 + 3 files changed, 21 insertions(+), 4 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_compute_program.cpp b/src/video_core/renderer_opengl/gl_compute_program.cpp index fb54618a4..ce52a0052 100644 --- a/src/video_core/renderer_opengl/gl_compute_program.cpp +++ b/src/video_core/renderer_opengl/gl_compute_program.cpp @@ -125,10 +125,7 @@ void ComputeProgram::Configure() { texture_cache.FillComputeImageViews(indices_span, image_view_ids); if (assembly_program.handle != 0) { - // FIXME: State track this - glEnable(GL_COMPUTE_PROGRAM_NV); - glBindProgramARB(GL_COMPUTE_PROGRAM_NV, assembly_program.handle); - program_manager.BindProgram(0); + program_manager.BindComputeAssemblyProgram(assembly_program.handle); } else { program_manager.BindProgram(source_program.handle); } diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 48669b3cd..df7e1f644 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -10,6 +10,7 @@ #include #include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_device.h" #pragma optimize("", off) @@ -24,6 +25,12 @@ class ProgramManager { }; public: + explicit ProgramManager(const Device& device) { + if (device.UseAssemblyShaders()) { + glEnable(GL_COMPUTE_PROGRAM_NV); + } + } + void BindProgram(GLuint program) { if (current_source_program == program) { return; @@ -32,6 +39,17 @@ public: glUseProgram(program); } + void BindComputeAssemblyProgram(GLuint program) { + if (current_compute_assembly_program != program) { + current_compute_assembly_program = program; + glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program); + } + if (current_source_program != 0) { + current_source_program = 0; + glUseProgram(0); + } + } + void BindAssemblyPrograms(std::span programs, u32 stage_mask) { const u32 changed_mask = current_assembly_mask ^ stage_mask; @@ -67,6 +85,7 @@ private: u32 current_assembly_mask = 0; std::array current_assembly_programs; + GLuint current_compute_assembly_program = 0; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 4e77ef808..a4805f3da 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -130,6 +130,7 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, std::unique_ptr context_) : RendererBase{emu_window_, std::move(context_)}, telemetry_session{telemetry_session_}, emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, state_tracker{gpu}, + program_manager{device}, rasterizer(emu_window, gpu, cpu_memory, device, screen_info, program_manager, state_tracker) { if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) { glEnable(GL_DEBUG_OUTPUT); -- cgit v1.2.3 From c0e4074721825e2af7be4f1a70408f5edb06597d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 20 May 2021 21:00:55 -0300 Subject: gl_shader_manager: Remove unintentionally committed #pragma --- src/video_core/renderer_opengl/gl_shader_manager.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index df7e1f644..c922bcf82 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -12,8 +12,6 @@ #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_device.h" -#pragma optimize("", off) - namespace OpenGL { class ProgramManager { -- cgit v1.2.3 From 54decced922aaa73f4c30d696679f3602c930204 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 20 May 2021 21:01:41 -0300 Subject: gl_shader_manager: Zero initialize current assembly programs --- src/video_core/renderer_opengl/gl_shader_manager.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index c922bcf82..5ec57d707 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -82,7 +82,7 @@ private: GLuint current_source_program = 0; u32 current_assembly_mask = 0; - std::array current_assembly_programs; + std::array current_assembly_programs{}; GLuint current_compute_assembly_program = 0; }; -- cgit v1.2.3 From 9e7b6622c25aa858b96bf0f1c7f94223a2f449a2 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 May 2021 02:12:32 -0300 Subject: shader: Split profile and runtime information in separate structs --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 26 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 418 ++++++++++----------- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 5 +- 3 files changed, 212 insertions(+), 237 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index b84b36b9d..d7efbdd01 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -61,33 +61,15 @@ const Shader::Profile profile{ .support_viewport_mask = true, .support_typeless_image_loads = true, .support_demote_to_helper_invocation = false, - .warp_size_potentially_larger_than_guest = true, .support_int64_atomics = false, + + .warp_size_potentially_larger_than_guest = true, .lower_left_origin_mode = true, .has_broken_spirv_clamp = true, .has_broken_unsigned_image_offsets = true, .has_broken_signed_operations = true, .ignore_nan_fp_comparisons = true, - - .generic_input_types = {}, - .convert_depth_mode = false, - .force_early_z = false, - - .tess_primitive = {}, - .tess_spacing = {}, - .tess_clockwise = false, - - .input_topology = Shader::InputTopology::Triangles, - - .fixed_state_point_size = std::nullopt, - - .alpha_test_func = Shader::CompareFunction::Always, - .alpha_test_reference = 0.0f, - - .y_negate = false, - - .xfb_varyings = {}, }; using Shader::Backend::GLASM::EmitGLASM; @@ -302,10 +284,10 @@ std::unique_ptr ShaderCache::CreateGraphicsProgram( const size_t stage_index{index - 1}; infos[stage_index] = &program.info; if (device.UseAssemblyShaders()) { - const std::string code{EmitGLASM(profile, program, binding)}; + const std::string code{EmitGLASM(profile, {}, program, binding)}; assembly_programs[stage_index] = CompileProgram(code, AssemblyStage(stage_index)); } else { - const std::vector code{EmitSPIRV(profile, program, binding)}; + const std::vector code{EmitSPIRV(profile, {}, program, binding)}; AddShader(Stage(stage_index), source_program.handle, code); } } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 7830c0194..88db10b75 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -89,6 +89,208 @@ Shader::CompareFunction MaxwellToCompareFunction(Maxwell::ComparisonOp compariso UNIMPLEMENTED_MSG("Unimplemented comparison op={}", comparison); return {}; } + +static Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexAttribute& attr) { + if (attr.enabled == 0) { + return Shader::AttributeType::Disabled; + } + switch (attr.Type()) { + case Maxwell::VertexAttribute::Type::SignedNorm: + case Maxwell::VertexAttribute::Type::UnsignedNorm: + case Maxwell::VertexAttribute::Type::UnsignedScaled: + case Maxwell::VertexAttribute::Type::SignedScaled: + case Maxwell::VertexAttribute::Type::Float: + return Shader::AttributeType::Float; + case Maxwell::VertexAttribute::Type::SignedInt: + return Shader::AttributeType::SignedInt; + case Maxwell::VertexAttribute::Type::UnsignedInt: + return Shader::AttributeType::UnsignedInt; + } + return Shader::AttributeType::Float; +} + +std::vector MakeTransformFeedbackVaryings( + const GraphicsPipelineCacheKey& key) { + static constexpr std::array VECTORS{ + 28, // gl_Position + 32, // Generic 0 + 36, // Generic 1 + 40, // Generic 2 + 44, // Generic 3 + 48, // Generic 4 + 52, // Generic 5 + 56, // Generic 6 + 60, // Generic 7 + 64, // Generic 8 + 68, // Generic 9 + 72, // Generic 10 + 76, // Generic 11 + 80, // Generic 12 + 84, // Generic 13 + 88, // Generic 14 + 92, // Generic 15 + 96, // Generic 16 + 100, // Generic 17 + 104, // Generic 18 + 108, // Generic 19 + 112, // Generic 20 + 116, // Generic 21 + 120, // Generic 22 + 124, // Generic 23 + 128, // Generic 24 + 132, // Generic 25 + 136, // Generic 26 + 140, // Generic 27 + 144, // Generic 28 + 148, // Generic 29 + 152, // Generic 30 + 156, // Generic 31 + 160, // gl_FrontColor + 164, // gl_FrontSecondaryColor + 160, // gl_BackColor + 164, // gl_BackSecondaryColor + 192, // gl_TexCoord[0] + 196, // gl_TexCoord[1] + 200, // gl_TexCoord[2] + 204, // gl_TexCoord[3] + 208, // gl_TexCoord[4] + 212, // gl_TexCoord[5] + 216, // gl_TexCoord[6] + 220, // gl_TexCoord[7] + }; + std::vector xfb(256); + for (size_t buffer = 0; buffer < Maxwell::NumTransformFeedbackBuffers; ++buffer) { + const auto& locations = key.state.xfb_state.varyings[buffer]; + const auto& layout = key.state.xfb_state.layouts[buffer]; + const u32 varying_count = layout.varying_count; + u32 highest = 0; + for (u32 offset = 0; offset < varying_count; ++offset) { + const u32 base_offset = offset; + const u8 location = locations[offset]; + + Shader::TransformFeedbackVarying varying; + varying.buffer = layout.stream; + varying.stride = layout.stride; + varying.offset = offset * 4; + varying.components = 1; + + if (std::ranges::find(VECTORS, Common::AlignDown(location, 4)) != VECTORS.end()) { + UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB"); + + const u8 base_index = location / 4; + while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) { + ++offset; + ++varying.components; + } + } + xfb[location] = varying; + highest = std::max(highest, (base_offset + varying.components) * 4); + } + UNIMPLEMENTED_IF(highest != layout.stride); + } + return xfb; +} + +Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineCacheKey& key, + const Shader::IR::Program& program) { + Shader::RuntimeInfo info; + + const Shader::Stage stage{program.stage}; + const bool has_geometry{key.unique_hashes[4] != 0}; + const bool gl_ndc{key.state.ndc_minus_one_to_one != 0}; + const float point_size{Common::BitCast(key.state.point_size)}; + switch (stage) { + case Shader::Stage::VertexB: + if (!has_geometry) { + if (key.state.topology == Maxwell::PrimitiveTopology::Points) { + info.fixed_state_point_size = point_size; + } + if (key.state.xfb_enabled != 0) { + info.xfb_varyings = MakeTransformFeedbackVaryings(key); + } + info.convert_depth_mode = gl_ndc; + } + std::ranges::transform(key.state.attributes, info.generic_input_types.begin(), + &CastAttributeType); + break; + case Shader::Stage::TessellationEval: + // We have to flip tessellation clockwise for some reason... + info.tess_clockwise = key.state.tessellation_clockwise == 0; + info.tess_primitive = [&key] { + const u32 raw{key.state.tessellation_primitive.Value()}; + switch (static_cast(raw)) { + case Maxwell::TessellationPrimitive::Isolines: + return Shader::TessPrimitive::Isolines; + case Maxwell::TessellationPrimitive::Triangles: + return Shader::TessPrimitive::Triangles; + case Maxwell::TessellationPrimitive::Quads: + return Shader::TessPrimitive::Quads; + } + UNREACHABLE(); + return Shader::TessPrimitive::Triangles; + }(); + info.tess_spacing = [&] { + const u32 raw{key.state.tessellation_spacing}; + switch (static_cast(raw)) { + case Maxwell::TessellationSpacing::Equal: + return Shader::TessSpacing::Equal; + case Maxwell::TessellationSpacing::FractionalOdd: + return Shader::TessSpacing::FractionalOdd; + case Maxwell::TessellationSpacing::FractionalEven: + return Shader::TessSpacing::FractionalEven; + } + UNREACHABLE(); + return Shader::TessSpacing::Equal; + }(); + break; + case Shader::Stage::Geometry: + if (program.output_topology == Shader::OutputTopology::PointList) { + info.fixed_state_point_size = point_size; + } + if (key.state.xfb_enabled != 0) { + info.xfb_varyings = MakeTransformFeedbackVaryings(key); + } + info.convert_depth_mode = gl_ndc; + break; + case Shader::Stage::Fragment: + info.alpha_test_func = MaxwellToCompareFunction( + key.state.UnpackComparisonOp(key.state.alpha_test_func.Value())); + info.alpha_test_reference = Common::BitCast(key.state.alpha_test_ref); + break; + default: + break; + } + switch (key.state.topology) { + case Maxwell::PrimitiveTopology::Points: + info.input_topology = Shader::InputTopology::Points; + break; + case Maxwell::PrimitiveTopology::Lines: + case Maxwell::PrimitiveTopology::LineLoop: + case Maxwell::PrimitiveTopology::LineStrip: + info.input_topology = Shader::InputTopology::Lines; + break; + case Maxwell::PrimitiveTopology::Triangles: + case Maxwell::PrimitiveTopology::TriangleStrip: + case Maxwell::PrimitiveTopology::TriangleFan: + case Maxwell::PrimitiveTopology::Quads: + case Maxwell::PrimitiveTopology::QuadStrip: + case Maxwell::PrimitiveTopology::Polygon: + case Maxwell::PrimitiveTopology::Patches: + info.input_topology = Shader::InputTopology::Triangles; + break; + case Maxwell::PrimitiveTopology::LinesAdjacency: + case Maxwell::PrimitiveTopology::LineStripAdjacency: + info.input_topology = Shader::InputTopology::LinesAdjacency; + break; + case Maxwell::PrimitiveTopology::TrianglesAdjacency: + case Maxwell::PrimitiveTopology::TriangleStripAdjacency: + info.input_topology = Shader::InputTopology::TrianglesAdjacency; + break; + } + info.force_early_z = key.state.early_z != 0; + info.y_negate = key.state.y_negate != 0; + return info; +} } // Anonymous namespace size_t ComputePipelineCacheKey::Hash() const noexcept { @@ -124,7 +326,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw serialization_thread(1, "yuzu:PipelineSerialization") { const auto& float_control{device.FloatControlProperties()}; const VkDriverIdKHR driver_id{device.GetDriverID()}; - base_profile = Shader::Profile{ + profile = Shader::Profile{ .supported_spirv = device.IsKhrSpirv1_4Supported() ? 0x00010400U : 0x00010000U, .unified_descriptor_binding = true, .support_descriptor_aliasing = true, @@ -153,14 +355,10 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw .support_viewport_mask = device.IsNvViewportArray2Supported(), .support_typeless_image_loads = device.IsFormatlessImageLoadSupported(), .support_demote_to_helper_invocation = true, - .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), .support_int64_atomics = device.IsExtShaderAtomicInt64Supported(), + .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, .has_broken_unsigned_image_offsets = false, - .generic_input_types{}, - .fixed_state_point_size{}, - .alpha_test_func{}, - .xfb_varyings{}, }; } @@ -329,8 +527,8 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( const size_t stage_index{index - 1}; infos[stage_index] = &program.info; - const Shader::Profile profile{MakeProfile(key, program)}; - const std::vector code{EmitSPIRV(profile, program, binding)}; + const Shader::RuntimeInfo runtime_info{MakeRuntimeInfo(key, program)}; + const std::vector code{EmitSPIRV(profile, runtime_info, program, binding)}; device.SaveShader(code); modules[stage_index] = BuildShader(device, code); if (device.HasDebuggingToolAttached()) { @@ -391,7 +589,7 @@ std::unique_ptr PipelineCache::CreateComputePipeline( Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; - const std::vector code{EmitSPIRV(base_profile, program)}; + const std::vector code{EmitSPIRV(profile, program)}; device.SaveShader(code); vk::ShaderModule spv_module{BuildShader(device, code)}; if (device.HasDebuggingToolAttached()) { @@ -403,206 +601,4 @@ std::unique_ptr PipelineCache::CreateComputePipeline( thread_worker, program.info, std::move(spv_module)); } -static Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexAttribute& attr) { - if (attr.enabled == 0) { - return Shader::AttributeType::Disabled; - } - switch (attr.Type()) { - case Maxwell::VertexAttribute::Type::SignedNorm: - case Maxwell::VertexAttribute::Type::UnsignedNorm: - case Maxwell::VertexAttribute::Type::UnsignedScaled: - case Maxwell::VertexAttribute::Type::SignedScaled: - case Maxwell::VertexAttribute::Type::Float: - return Shader::AttributeType::Float; - case Maxwell::VertexAttribute::Type::SignedInt: - return Shader::AttributeType::SignedInt; - case Maxwell::VertexAttribute::Type::UnsignedInt: - return Shader::AttributeType::UnsignedInt; - } - return Shader::AttributeType::Float; -} - -static std::vector MakeTransformFeedbackVaryings( - const GraphicsPipelineCacheKey& key) { - static constexpr std::array VECTORS{ - 28, // gl_Position - 32, // Generic 0 - 36, // Generic 1 - 40, // Generic 2 - 44, // Generic 3 - 48, // Generic 4 - 52, // Generic 5 - 56, // Generic 6 - 60, // Generic 7 - 64, // Generic 8 - 68, // Generic 9 - 72, // Generic 10 - 76, // Generic 11 - 80, // Generic 12 - 84, // Generic 13 - 88, // Generic 14 - 92, // Generic 15 - 96, // Generic 16 - 100, // Generic 17 - 104, // Generic 18 - 108, // Generic 19 - 112, // Generic 20 - 116, // Generic 21 - 120, // Generic 22 - 124, // Generic 23 - 128, // Generic 24 - 132, // Generic 25 - 136, // Generic 26 - 140, // Generic 27 - 144, // Generic 28 - 148, // Generic 29 - 152, // Generic 30 - 156, // Generic 31 - 160, // gl_FrontColor - 164, // gl_FrontSecondaryColor - 160, // gl_BackColor - 164, // gl_BackSecondaryColor - 192, // gl_TexCoord[0] - 196, // gl_TexCoord[1] - 200, // gl_TexCoord[2] - 204, // gl_TexCoord[3] - 208, // gl_TexCoord[4] - 212, // gl_TexCoord[5] - 216, // gl_TexCoord[6] - 220, // gl_TexCoord[7] - }; - std::vector xfb(256); - for (size_t buffer = 0; buffer < Maxwell::NumTransformFeedbackBuffers; ++buffer) { - const auto& locations = key.state.xfb_state.varyings[buffer]; - const auto& layout = key.state.xfb_state.layouts[buffer]; - const u32 varying_count = layout.varying_count; - u32 highest = 0; - for (u32 offset = 0; offset < varying_count; ++offset) { - const u32 base_offset = offset; - const u8 location = locations[offset]; - - Shader::TransformFeedbackVarying varying; - varying.buffer = layout.stream; - varying.stride = layout.stride; - varying.offset = offset * 4; - varying.components = 1; - - if (std::ranges::find(VECTORS, Common::AlignDown(location, 4)) != VECTORS.end()) { - UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB"); - - const u8 base_index = location / 4; - while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) { - ++offset; - ++varying.components; - } - } - xfb[location] = varying; - highest = std::max(highest, (base_offset + varying.components) * 4); - } - UNIMPLEMENTED_IF(highest != layout.stride); - } - return xfb; -} - -Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key, - const Shader::IR::Program& program) { - Shader::Profile profile{base_profile}; - - const Shader::Stage stage{program.stage}; - const bool has_geometry{key.unique_hashes[4] != 0}; - const bool gl_ndc{key.state.ndc_minus_one_to_one != 0}; - const float point_size{Common::BitCast(key.state.point_size)}; - switch (stage) { - case Shader::Stage::VertexB: - if (!has_geometry) { - if (key.state.topology == Maxwell::PrimitiveTopology::Points) { - profile.fixed_state_point_size = point_size; - } - if (key.state.xfb_enabled != 0) { - profile.xfb_varyings = MakeTransformFeedbackVaryings(key); - } - profile.convert_depth_mode = gl_ndc; - } - std::ranges::transform(key.state.attributes, profile.generic_input_types.begin(), - &CastAttributeType); - break; - case Shader::Stage::TessellationEval: - // We have to flip tessellation clockwise for some reason... - profile.tess_clockwise = key.state.tessellation_clockwise == 0; - profile.tess_primitive = [&key] { - const u32 raw{key.state.tessellation_primitive.Value()}; - switch (static_cast(raw)) { - case Maxwell::TessellationPrimitive::Isolines: - return Shader::TessPrimitive::Isolines; - case Maxwell::TessellationPrimitive::Triangles: - return Shader::TessPrimitive::Triangles; - case Maxwell::TessellationPrimitive::Quads: - return Shader::TessPrimitive::Quads; - } - UNREACHABLE(); - return Shader::TessPrimitive::Triangles; - }(); - profile.tess_spacing = [&] { - const u32 raw{key.state.tessellation_spacing}; - switch (static_cast(raw)) { - case Maxwell::TessellationSpacing::Equal: - return Shader::TessSpacing::Equal; - case Maxwell::TessellationSpacing::FractionalOdd: - return Shader::TessSpacing::FractionalOdd; - case Maxwell::TessellationSpacing::FractionalEven: - return Shader::TessSpacing::FractionalEven; - } - UNREACHABLE(); - return Shader::TessSpacing::Equal; - }(); - break; - case Shader::Stage::Geometry: - if (program.output_topology == Shader::OutputTopology::PointList) { - profile.fixed_state_point_size = point_size; - } - if (key.state.xfb_enabled != 0) { - profile.xfb_varyings = MakeTransformFeedbackVaryings(key); - } - profile.convert_depth_mode = gl_ndc; - break; - case Shader::Stage::Fragment: - profile.alpha_test_func = MaxwellToCompareFunction( - key.state.UnpackComparisonOp(key.state.alpha_test_func.Value())); - profile.alpha_test_reference = Common::BitCast(key.state.alpha_test_ref); - break; - default: - break; - } - switch (key.state.topology) { - case Maxwell::PrimitiveTopology::Points: - profile.input_topology = Shader::InputTopology::Points; - break; - case Maxwell::PrimitiveTopology::Lines: - case Maxwell::PrimitiveTopology::LineLoop: - case Maxwell::PrimitiveTopology::LineStrip: - profile.input_topology = Shader::InputTopology::Lines; - break; - case Maxwell::PrimitiveTopology::Triangles: - case Maxwell::PrimitiveTopology::TriangleStrip: - case Maxwell::PrimitiveTopology::TriangleFan: - case Maxwell::PrimitiveTopology::Quads: - case Maxwell::PrimitiveTopology::QuadStrip: - case Maxwell::PrimitiveTopology::Polygon: - case Maxwell::PrimitiveTopology::Patches: - profile.input_topology = Shader::InputTopology::Triangles; - break; - case Maxwell::PrimitiveTopology::LinesAdjacency: - case Maxwell::PrimitiveTopology::LineStripAdjacency: - profile.input_topology = Shader::InputTopology::LinesAdjacency; - break; - case Maxwell::PrimitiveTopology::TrianglesAdjacency: - case Maxwell::PrimitiveTopology::TriangleStripAdjacency: - profile.input_topology = Shader::InputTopology::TrianglesAdjacency; - break; - } - profile.force_early_z = key.state.early_z != 0; - profile.y_negate = key.state.y_negate != 0; - return profile; -} - } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 4e48b4956..4116cc73f 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -129,9 +129,6 @@ private: Shader::Environment& env, bool build_in_parallel); - Shader::Profile MakeProfile(const GraphicsPipelineCacheKey& key, - const Shader::IR::Program& program); - const Device& device; VKScheduler& scheduler; DescriptorPool& descriptor_pool; @@ -148,7 +145,7 @@ private: ShaderPools main_pools; - Shader::Profile base_profile; + Shader::Profile profile; std::filesystem::path pipeline_cache_filename; Common::ThreadWorker workers; -- cgit v1.2.3 From c07cc9d6a560d14e25ec59974ae5a15a7842d779 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 May 2021 02:57:52 -0300 Subject: gl_shader_cache: Pass shader runtime information --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 76 +++++++++++++++++++++- 1 file changed, 74 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index d7efbdd01..b4f26dd74 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -184,6 +184,76 @@ GLenum AssemblyStage(size_t stage_index) { UNREACHABLE_MSG("{}", stage_index); return GL_NONE; } + +Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsProgramKey& key, + const Shader::IR::Program& program) { + UNIMPLEMENTED_IF_MSG(key.xfb_enabled != 0, "Transform feedbacks"); + + Shader::RuntimeInfo info; + switch (program.stage) { + case Shader::Stage::TessellationEval: + // We have to flip tessellation clockwise for some reason... + info.tess_clockwise = key.tessellation_clockwise == 0; + info.tess_primitive = [&key] { + switch (key.tessellation_primitive) { + case Maxwell::TessellationPrimitive::Isolines: + return Shader::TessPrimitive::Isolines; + case Maxwell::TessellationPrimitive::Triangles: + return Shader::TessPrimitive::Triangles; + case Maxwell::TessellationPrimitive::Quads: + return Shader::TessPrimitive::Quads; + } + UNREACHABLE(); + return Shader::TessPrimitive::Triangles; + }(); + info.tess_spacing = [&] { + switch (key.tessellation_spacing) { + case Maxwell::TessellationSpacing::Equal: + return Shader::TessSpacing::Equal; + case Maxwell::TessellationSpacing::FractionalOdd: + return Shader::TessSpacing::FractionalOdd; + case Maxwell::TessellationSpacing::FractionalEven: + return Shader::TessSpacing::FractionalEven; + } + UNREACHABLE(); + return Shader::TessSpacing::Equal; + }(); + break; + case Shader::Stage::Geometry: + + break; + default: + break; + } + switch (key.gs_input_topology) { + case Maxwell::PrimitiveTopology::Points: + info.input_topology = Shader::InputTopology::Points; + break; + case Maxwell::PrimitiveTopology::Lines: + case Maxwell::PrimitiveTopology::LineLoop: + case Maxwell::PrimitiveTopology::LineStrip: + info.input_topology = Shader::InputTopology::Lines; + break; + case Maxwell::PrimitiveTopology::Triangles: + case Maxwell::PrimitiveTopology::TriangleStrip: + case Maxwell::PrimitiveTopology::TriangleFan: + case Maxwell::PrimitiveTopology::Quads: + case Maxwell::PrimitiveTopology::QuadStrip: + case Maxwell::PrimitiveTopology::Polygon: + case Maxwell::PrimitiveTopology::Patches: + info.input_topology = Shader::InputTopology::Triangles; + break; + case Maxwell::PrimitiveTopology::LinesAdjacency: + case Maxwell::PrimitiveTopology::LineStripAdjacency: + info.input_topology = Shader::InputTopology::LinesAdjacency; + break; + case Maxwell::PrimitiveTopology::TrianglesAdjacency: + case Maxwell::PrimitiveTopology::TriangleStripAdjacency: + info.input_topology = Shader::InputTopology::TrianglesAdjacency; + break; + } + return info; +} } // Anonymous namespace ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, @@ -283,11 +353,13 @@ std::unique_ptr ShaderCache::CreateGraphicsProgram( Shader::IR::Program& program{programs[index]}; const size_t stage_index{index - 1}; infos[stage_index] = &program.info; + + const Shader::RuntimeInfo runtime_info{MakeRuntimeInfo(key, program)}; if (device.UseAssemblyShaders()) { - const std::string code{EmitGLASM(profile, {}, program, binding)}; + const std::string code{EmitGLASM(profile, runtime_info, program, binding)}; assembly_programs[stage_index] = CompileProgram(code, AssemblyStage(stage_index)); } else { - const std::vector code{EmitSPIRV(profile, {}, program, binding)}; + const std::vector code{EmitSPIRV(profile, runtime_info, program, binding)}; AddShader(Stage(stage_index), source_program.handle, code); } } -- cgit v1.2.3 From 69b910e9e7c2b9c361f4389cb1d136105b991bc0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 May 2021 17:19:35 -0300 Subject: video_core: Abstract transform feedback translation utility --- src/video_core/CMakeLists.txt | 2 + .../renderer_vulkan/fixed_pipeline_state.cpp | 25 +++--- .../renderer_vulkan/fixed_pipeline_state.h | 15 +--- .../renderer_vulkan/vk_pipeline_cache.cpp | 86 +------------------ src/video_core/transform_feedback.cpp | 98 ++++++++++++++++++++++ src/video_core/transform_feedback.h | 30 +++++++ 6 files changed, 145 insertions(+), 111 deletions(-) create mode 100644 src/video_core/transform_feedback.cpp create mode 100644 src/video_core/transform_feedback.h (limited to 'src/video_core') diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index b008c37c0..8250f736c 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -187,6 +187,8 @@ add_library(video_core STATIC textures/decoders.h textures/texture.cpp textures/texture.h + transform_feedback.cpp + transform_feedback.h video_core.cpp video_core.h vulkan_common/vulkan_debug_callback.cpp diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 24834e0f7..3a43c329f 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -15,9 +15,7 @@ #include "video_core/renderer_vulkan/vk_state_tracker.h" namespace Vulkan { - namespace { - constexpr size_t POINT = 0; constexpr size_t LINE = 1; constexpr size_t POLYGON = 2; @@ -39,6 +37,16 @@ constexpr std::array POLYGON_OFFSET_ENABLE_LUT = { POLYGON, // Patches }; +void RefreshXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell& regs) { + std::ranges::transform(regs.tfb_layouts, state.layouts.begin(), [](const auto& layout) { + return VideoCommon::TransformFeedbackState::Layout{ + .stream = layout.stream, + .varying_count = layout.varying_count, + .stride = layout.stride, + }; + }); + state.varyings = regs.tfb_varying_locs; +} } // Anonymous namespace void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, @@ -121,7 +129,7 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, dynamic_state.Refresh(regs); } if (xfb_enabled != 0) { - xfb_state.Refresh(regs); + RefreshXfbState(xfb_state, regs); } } @@ -164,17 +172,6 @@ void FixedPipelineState::BlendingAttachment::Refresh(const Maxwell& regs, size_t enable.Assign(1); } -void FixedPipelineState::TransformFeedbackState::Refresh(const Maxwell& regs) { - std::ranges::transform(regs.tfb_layouts, layouts.begin(), [](const auto& layout) { - return Layout{ - .stream = layout.stream, - .varying_count = layout.varying_count, - .stride = layout.stride, - }; - }); - varyings = regs.tfb_varying_locs; -} - void FixedPipelineState::DynamicState::Refresh(const Maxwell& regs) { u32 packed_front_face = PackFrontFace(regs.front_face); if (regs.screen_y_control.triangle_rast_flip != 0) { diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index 31de6b2c8..0f1eff9cd 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -12,6 +12,7 @@ #include "video_core/engines/maxwell_3d.h" #include "video_core/surface.h" +#include "video_core/transform_feedback.h" namespace Vulkan { @@ -130,18 +131,6 @@ struct FixedPipelineState { } }; - struct TransformFeedbackState { - struct Layout { - u32 stream; - u32 varying_count; - u32 stride; - }; - std::array layouts; - std::array, Maxwell::NumTransformFeedbackBuffers> varyings; - - void Refresh(const Maxwell& regs); - }; - struct DynamicState { union { u32 raw1; @@ -213,7 +202,7 @@ struct FixedPipelineState { std::array attachments; std::array viewport_swizzles; DynamicState dynamic_state; - TransformFeedbackState xfb_state; + VideoCommon::TransformFeedbackState xfb_state; void Refresh(Tegra::Engines::Maxwell3D& maxwell3d, bool has_extended_dynamic_state); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 88db10b75..f86bf9c30 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -109,88 +109,6 @@ static Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexA return Shader::AttributeType::Float; } -std::vector MakeTransformFeedbackVaryings( - const GraphicsPipelineCacheKey& key) { - static constexpr std::array VECTORS{ - 28, // gl_Position - 32, // Generic 0 - 36, // Generic 1 - 40, // Generic 2 - 44, // Generic 3 - 48, // Generic 4 - 52, // Generic 5 - 56, // Generic 6 - 60, // Generic 7 - 64, // Generic 8 - 68, // Generic 9 - 72, // Generic 10 - 76, // Generic 11 - 80, // Generic 12 - 84, // Generic 13 - 88, // Generic 14 - 92, // Generic 15 - 96, // Generic 16 - 100, // Generic 17 - 104, // Generic 18 - 108, // Generic 19 - 112, // Generic 20 - 116, // Generic 21 - 120, // Generic 22 - 124, // Generic 23 - 128, // Generic 24 - 132, // Generic 25 - 136, // Generic 26 - 140, // Generic 27 - 144, // Generic 28 - 148, // Generic 29 - 152, // Generic 30 - 156, // Generic 31 - 160, // gl_FrontColor - 164, // gl_FrontSecondaryColor - 160, // gl_BackColor - 164, // gl_BackSecondaryColor - 192, // gl_TexCoord[0] - 196, // gl_TexCoord[1] - 200, // gl_TexCoord[2] - 204, // gl_TexCoord[3] - 208, // gl_TexCoord[4] - 212, // gl_TexCoord[5] - 216, // gl_TexCoord[6] - 220, // gl_TexCoord[7] - }; - std::vector xfb(256); - for (size_t buffer = 0; buffer < Maxwell::NumTransformFeedbackBuffers; ++buffer) { - const auto& locations = key.state.xfb_state.varyings[buffer]; - const auto& layout = key.state.xfb_state.layouts[buffer]; - const u32 varying_count = layout.varying_count; - u32 highest = 0; - for (u32 offset = 0; offset < varying_count; ++offset) { - const u32 base_offset = offset; - const u8 location = locations[offset]; - - Shader::TransformFeedbackVarying varying; - varying.buffer = layout.stream; - varying.stride = layout.stride; - varying.offset = offset * 4; - varying.components = 1; - - if (std::ranges::find(VECTORS, Common::AlignDown(location, 4)) != VECTORS.end()) { - UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB"); - - const u8 base_index = location / 4; - while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) { - ++offset; - ++varying.components; - } - } - xfb[location] = varying; - highest = std::max(highest, (base_offset + varying.components) * 4); - } - UNIMPLEMENTED_IF(highest != layout.stride); - } - return xfb; -} - Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineCacheKey& key, const Shader::IR::Program& program) { Shader::RuntimeInfo info; @@ -206,7 +124,7 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineCacheKey& key, info.fixed_state_point_size = point_size; } if (key.state.xfb_enabled != 0) { - info.xfb_varyings = MakeTransformFeedbackVaryings(key); + info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state); } info.convert_depth_mode = gl_ndc; } @@ -248,7 +166,7 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineCacheKey& key, info.fixed_state_point_size = point_size; } if (key.state.xfb_enabled != 0) { - info.xfb_varyings = MakeTransformFeedbackVaryings(key); + info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state); } info.convert_depth_mode = gl_ndc; break; diff --git a/src/video_core/transform_feedback.cpp b/src/video_core/transform_feedback.cpp new file mode 100644 index 000000000..db52fff93 --- /dev/null +++ b/src/video_core/transform_feedback.cpp @@ -0,0 +1,98 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include + +#include "common/alignment.h" +#include "common/assert.h" +#include "shader_recompiler/shader_info.h" +#include "video_core/transform_feedback.h" + +namespace VideoCommon { + +std::vector MakeTransformFeedbackVaryings( + const TransformFeedbackState& state) { + static constexpr std::array VECTORS{ + 28, // gl_Position + 32, // Generic 0 + 36, // Generic 1 + 40, // Generic 2 + 44, // Generic 3 + 48, // Generic 4 + 52, // Generic 5 + 56, // Generic 6 + 60, // Generic 7 + 64, // Generic 8 + 68, // Generic 9 + 72, // Generic 10 + 76, // Generic 11 + 80, // Generic 12 + 84, // Generic 13 + 88, // Generic 14 + 92, // Generic 15 + 96, // Generic 16 + 100, // Generic 17 + 104, // Generic 18 + 108, // Generic 19 + 112, // Generic 20 + 116, // Generic 21 + 120, // Generic 22 + 124, // Generic 23 + 128, // Generic 24 + 132, // Generic 25 + 136, // Generic 26 + 140, // Generic 27 + 144, // Generic 28 + 148, // Generic 29 + 152, // Generic 30 + 156, // Generic 31 + 160, // gl_FrontColor + 164, // gl_FrontSecondaryColor + 160, // gl_BackColor + 164, // gl_BackSecondaryColor + 192, // gl_TexCoord[0] + 196, // gl_TexCoord[1] + 200, // gl_TexCoord[2] + 204, // gl_TexCoord[3] + 208, // gl_TexCoord[4] + 212, // gl_TexCoord[5] + 216, // gl_TexCoord[6] + 220, // gl_TexCoord[7] + }; + std::vector xfb(256); + for (size_t buffer = 0; buffer < state.layouts.size(); ++buffer) { + const auto& locations = state.varyings[buffer]; + const auto& layout = state.layouts[buffer]; + const u32 varying_count = layout.varying_count; + u32 highest = 0; + for (u32 offset = 0; offset < varying_count; ++offset) { + const u32 base_offset = offset; + const u8 location = locations[offset]; + + Shader::TransformFeedbackVarying varying{ + .buffer = layout.stream, + .stride = layout.stride, + .offset = offset * 4, + .components = 1, + }; + if (std::ranges::find(VECTORS, Common::AlignDown(location, 4)) != VECTORS.end()) { + UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB"); + + const u8 base_index = location / 4; + while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) { + ++offset; + ++varying.components; + } + } + xfb[location] = varying; + highest = std::max(highest, (base_offset + varying.components) * 4); + } + UNIMPLEMENTED_IF(highest != layout.stride); + } + return xfb; +} + +} // namespace VideoCommon diff --git a/src/video_core/transform_feedback.h b/src/video_core/transform_feedback.h new file mode 100644 index 000000000..6832c6db1 --- /dev/null +++ b/src/video_core/transform_feedback.h @@ -0,0 +1,30 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include "common/common_types.h" +#include "shader_recompiler/profile.h" +#include "video_core/engines/maxwell_3d.h" + +namespace VideoCommon { + +struct TransformFeedbackState { + struct Layout { + u32 stream; + u32 varying_count; + u32 stride; + }; + std::array layouts; + std::array, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers> + varyings; +}; + +std::vector MakeTransformFeedbackVaryings( + const TransformFeedbackState& state); + +} // namespace VideoCommon -- cgit v1.2.3 From 6bc54e12a0d274beee0cb7584f73429112ec98b2 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 May 2021 18:17:53 -0300 Subject: glasm: Set transform feedback state --- .../renderer_opengl/gl_graphics_program.cpp | 90 +++++++++++++++++++- .../renderer_opengl/gl_graphics_program.h | 32 ++++--- src/video_core/renderer_opengl/gl_rasterizer.cpp | 98 ++-------------------- src/video_core/renderer_opengl/gl_rasterizer.h | 6 +- src/video_core/renderer_opengl/gl_shader_cache.cpp | 19 ++++- 5 files changed, 132 insertions(+), 113 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_graphics_program.cpp b/src/video_core/renderer_opengl/gl_graphics_program.cpp index b5d75aa13..9677a3ed6 100644 --- a/src/video_core/renderer_opengl/gl_graphics_program.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_program.cpp @@ -12,7 +12,7 @@ #include "video_core/texture_cache/texture_cache.h" namespace OpenGL { - +namespace { using Shader::ImageBufferDescriptor; using Tegra::Texture::TexturePair; using VideoCommon::ImageId; @@ -20,6 +20,35 @@ using VideoCommon::ImageId; constexpr u32 MAX_TEXTURES = 64; constexpr u32 MAX_IMAGES = 8; +/// Translates hardware transform feedback indices +/// @param location Hardware location +/// @return Pair of ARB_transform_feedback3 token stream first and third arguments +/// @note Read https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_transform_feedback3.txt +std::pair TransformFeedbackEnum(u8 location) { + const u8 index = location / 4; + if (index >= 8 && index <= 39) { + return {GL_GENERIC_ATTRIB_NV, index - 8}; + } + if (index >= 48 && index <= 55) { + return {GL_TEXTURE_COORD_NV, index - 48}; + } + switch (index) { + case 7: + return {GL_POSITION, 0}; + case 40: + return {GL_PRIMARY_COLOR_NV, 0}; + case 41: + return {GL_SECONDARY_COLOR_NV, 0}; + case 42: + return {GL_BACK_PRIMARY_COLOR_NV, 0}; + case 43: + return {GL_BACK_SECONDARY_COLOR_NV, 0}; + } + UNIMPLEMENTED_MSG("index={}", index); + return {GL_POSITION, 0}; +} +} // Anonymous namespace + size_t GraphicsProgramKey::Hash() const noexcept { return static_cast(Common::CityHash64(reinterpret_cast(this), Size())); } @@ -34,7 +63,8 @@ GraphicsProgram::GraphicsProgram(TextureCache& texture_cache_, BufferCache& buff ProgramManager& program_manager_, StateTracker& state_tracker_, OGLProgram program_, std::array assembly_programs_, - const std::array& infos) + const std::array& infos, + const VideoCommon::TransformFeedbackState* xfb_state) : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, program_manager{program_manager_}, state_tracker{state_tracker_}, program{std::move(program_)}, assembly_programs{std::move( @@ -74,6 +104,10 @@ GraphicsProgram::GraphicsProgram(TextureCache& texture_cache_, BufferCache& buff } ASSERT(num_textures <= MAX_TEXTURES); ASSERT(num_images <= MAX_IMAGES); + + if (assembly_programs[0].handle != 0 && xfb_state) { + GenerateTransformFeedbackState(*xfb_state); + } } struct Spec { @@ -302,4 +336,56 @@ void GraphicsProgram::Configure(bool is_indexed) { } } +void GraphicsProgram::GenerateTransformFeedbackState( + const VideoCommon::TransformFeedbackState& xfb_state) { + // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal + // when this is required. + const auto& regs{maxwell3d.regs}; + + GLint* cursor{xfb_attribs.data()}; + GLint* current_stream{xfb_streams.data()}; + + for (size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) { + const auto& layout = regs.tfb_layouts[feedback]; + UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding"); + if (layout.varying_count == 0) { + continue; + } + *current_stream = static_cast(feedback); + if (current_stream != xfb_streams.data()) { + // When stepping one stream, push the expected token + cursor[0] = GL_NEXT_BUFFER_NV; + cursor[1] = 0; + cursor[2] = 0; + cursor += XFB_ENTRY_STRIDE; + } + ++current_stream; + + const auto& locations = regs.tfb_varying_locs[feedback]; + std::optional current_index; + for (u32 offset = 0; offset < layout.varying_count; ++offset) { + const u8 location = locations[offset]; + const u8 index = location / 4; + + if (current_index == index) { + // Increase number of components of the previous attachment + ++cursor[-2]; + continue; + } + current_index = index; + + std::tie(cursor[0], cursor[2]) = TransformFeedbackEnum(location); + cursor[1] = 1; + cursor += XFB_ENTRY_STRIDE; + } + } + num_xfb_attribs = static_cast((cursor - xfb_attribs.data()) / XFB_ENTRY_STRIDE); + num_xfb_strides = static_cast(current_stream - xfb_streams.data()); +} + +void GraphicsProgram::ConfigureTransformFeedbackImpl() const { + glTransformFeedbackStreamAttribsNV(num_xfb_attribs, xfb_attribs.data(), num_xfb_strides, + xfb_streams.data(), GL_INTERLEAVED_ATTRIBS); +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_graphics_program.h b/src/video_core/renderer_opengl/gl_graphics_program.h index 18292bb16..53a57ede5 100644 --- a/src/video_core/renderer_opengl/gl_graphics_program.h +++ b/src/video_core/renderer_opengl/gl_graphics_program.h @@ -16,6 +16,7 @@ #include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_texture_cache.h" +#include "video_core/transform_feedback.h" namespace OpenGL { @@ -24,16 +25,6 @@ class ProgramManager; using Maxwell = Tegra::Engines::Maxwell3D::Regs; struct GraphicsProgramKey { - struct TransformFeedbackState { - struct Layout { - u32 stream; - u32 varying_count; - u32 stride; - }; - std::array layouts; - std::array, Maxwell::NumTransformFeedbackBuffers> varyings; - }; - std::array unique_hashes; union { u32 raw; @@ -45,7 +36,7 @@ struct GraphicsProgramKey { BitField<10, 1, u32> tessellation_clockwise; }; std::array padding; - TransformFeedbackState xfb_state; + VideoCommon::TransformFeedbackState xfb_state; size_t Hash() const noexcept; @@ -75,11 +66,22 @@ public: ProgramManager& program_manager_, StateTracker& state_tracker_, OGLProgram program_, std::array assembly_programs_, - const std::array& infos); + const std::array& infos, + const VideoCommon::TransformFeedbackState* xfb_state); void Configure(bool is_indexed); + void ConfigureTransformFeedback() const { + if (num_xfb_attribs != 0) { + ConfigureTransformFeedbackImpl(); + } + } + private: + void GenerateTransformFeedbackState(const VideoCommon::TransformFeedbackState& xfb_state); + + void ConfigureTransformFeedbackImpl() const; + TextureCache& texture_cache; BufferCache& buffer_cache; Tegra::MemoryManager& gpu_memory; @@ -96,6 +98,12 @@ private: std::array base_storage_bindings{}; std::array num_texture_buffers{}; std::array num_image_buffers{}; + + static constexpr std::size_t XFB_ENTRY_STRIDE = 3; + GLsizei num_xfb_attribs{}; + GLsizei num_xfb_strides{}; + std::array xfb_attribs{}; + std::array xfb_streams{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 4834d58f0..51ff42ee9 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -51,37 +51,8 @@ MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(128, 128, 192)); MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Management", MP_RGB(100, 255, 100)); namespace { - constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16; -/// Translates hardware transform feedback indices -/// @param location Hardware location -/// @return Pair of ARB_transform_feedback3 token stream first and third arguments -/// @note Read https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_transform_feedback3.txt -std::pair TransformFeedbackEnum(u8 location) { - const u8 index = location / 4; - if (index >= 8 && index <= 39) { - return {GL_GENERIC_ATTRIB_NV, index - 8}; - } - if (index >= 48 && index <= 55) { - return {GL_TEXTURE_COORD_NV, index - 48}; - } - switch (index) { - case 7: - return {GL_POSITION, 0}; - case 40: - return {GL_PRIMARY_COLOR_NV, 0}; - case 41: - return {GL_SECONDARY_COLOR_NV, 0}; - case 42: - return {GL_BACK_PRIMARY_COLOR_NV, 0}; - case 43: - return {GL_BACK_SECONDARY_COLOR_NV, 0}; - } - UNIMPLEMENTED_MSG("index={}", index); - return {GL_POSITION, 0}; -} - void oglEnable(GLenum cap, bool state) { (state ? glEnable : glDisable)(cap); } @@ -253,7 +224,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { program->Configure(is_indexed); const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology); - BeginTransformFeedback(primitive_mode); + BeginTransformFeedback(program, primitive_mode); const GLuint base_instance = static_cast(maxwell3d.regs.vb_base_instance); const GLsizei num_instances = @@ -1025,68 +996,13 @@ void RasterizerOpenGL::SyncFramebufferSRGB() { oglEnable(GL_FRAMEBUFFER_SRGB, maxwell3d.regs.framebuffer_srgb); } -void RasterizerOpenGL::SyncTransformFeedback() { - // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal - // when this is required. - const auto& regs = maxwell3d.regs; - - static constexpr std::size_t STRIDE = 3; - std::array attribs; - std::array streams; - - GLint* cursor = attribs.data(); - GLint* current_stream = streams.data(); - - for (std::size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) { - const auto& layout = regs.tfb_layouts[feedback]; - UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding"); - if (layout.varying_count == 0) { - continue; - } - - *current_stream = static_cast(feedback); - if (current_stream != streams.data()) { - // When stepping one stream, push the expected token - cursor[0] = GL_NEXT_BUFFER_NV; - cursor[1] = 0; - cursor[2] = 0; - cursor += STRIDE; - } - ++current_stream; - - const auto& locations = regs.tfb_varying_locs[feedback]; - std::optional current_index; - for (u32 offset = 0; offset < layout.varying_count; ++offset) { - const u8 location = locations[offset]; - const u8 index = location / 4; - - if (current_index == index) { - // Increase number of components of the previous attachment - ++cursor[-2]; - continue; - } - current_index = index; - - std::tie(cursor[0], cursor[2]) = TransformFeedbackEnum(location); - cursor[1] = 1; - cursor += STRIDE; - } - } - - const GLsizei num_attribs = static_cast((cursor - attribs.data()) / STRIDE); - const GLsizei num_strides = static_cast(current_stream - streams.data()); - glTransformFeedbackStreamAttribsNV(num_attribs, attribs.data(), num_strides, streams.data(), - GL_INTERLEAVED_ATTRIBS); -} - -void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) { +void RasterizerOpenGL::BeginTransformFeedback(GraphicsProgram* program, GLenum primitive_mode) { const auto& regs = maxwell3d.regs; if (regs.tfb_enabled == 0) { return; } - if (device.UseAssemblyShaders()) { - SyncTransformFeedback(); - } + program->ConfigureTransformFeedback(); + UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) || regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) || regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry)); @@ -1100,11 +1016,9 @@ void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) { } void RasterizerOpenGL::EndTransformFeedback() { - const auto& regs = maxwell3d.regs; - if (regs.tfb_enabled == 0) { - return; + if (maxwell3d.regs.tfb_enabled != 0) { + glEndTransformFeedback(); } - glEndTransformFeedback(); } AccelerateDMA::AccelerateDMA(BufferCache& buffer_cache_) : buffer_cache{buffer_cache_} {} diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 2fdcbe4ba..08f509c19 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -193,12 +193,8 @@ private: /// Syncs vertex instances to match the guest state void SyncVertexInstances(); - /// Syncs transform feedback state to match guest state - /// @note Only valid on assembly shaders - void SyncTransformFeedback(); - /// Begin a transform feedback - void BeginTransformFeedback(GLenum primitive_mode); + void BeginTransformFeedback(GraphicsProgram* program, GLenum primitive_mode); /// End a transform feedback void EndTransformFeedback(); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index b4f26dd74..0a0f1324f 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -254,6 +254,17 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsProgramKey& key, } return info; } + +void SetXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell& regs) { + std::ranges::transform(regs.tfb_layouts, state.layouts.begin(), [](const auto& layout) { + return VideoCommon::TransformFeedbackState::Layout{ + .stream = layout.stream, + .varying_count = layout.varying_count, + .stride = layout.stride, + }; + }); + state.varyings = regs.tfb_varying_locs; +} } // Anonymous namespace ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, @@ -282,7 +293,10 @@ GraphicsProgram* ShaderCache::CurrentGraphicsProgram() { graphics_key.tessellation_primitive.Assign(regs.tess_mode.prim.Value()); graphics_key.tessellation_spacing.Assign(regs.tess_mode.spacing.Value()); graphics_key.tessellation_clockwise.Assign(regs.tess_mode.cw.Value()); - + graphics_key.xfb_enabled.Assign(regs.tfb_enabled != 0 ? 1 : 0); + if (graphics_key.xfb_enabled) { + SetXfbState(graphics_key.xfb_state, regs); + } const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)}; auto& program{pair->second}; if (is_new) { @@ -368,7 +382,8 @@ std::unique_ptr ShaderCache::CreateGraphicsProgram( } return std::make_unique( texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker, - std::move(source_program), std::move(assembly_programs), infos); + std::move(source_program), std::move(assembly_programs), infos, + key.xfb_enabled != 0 ? &key.xfb_state : nullptr); } std::unique_ptr ShaderCache::CreateComputeProgram( -- cgit v1.2.3 From 84feabac881443d27f84f8fec5eba6dc3b13b620 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 May 2021 18:27:37 -0300 Subject: glasm: Implement forced early Z --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 0a0f1324f..e678b4bb2 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -219,8 +219,8 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsProgramKey& key, return Shader::TessSpacing::Equal; }(); break; - case Shader::Stage::Geometry: - + case Shader::Stage::Fragment: + info.force_early_z = key.early_z != 0; break; default: break; -- cgit v1.2.3 From df406246d9117ba1c428d81ba7466ba0291ece3c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 May 2021 20:36:30 -0300 Subject: gl_shader_cache: Improve GLASM error print logic --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index e678b4bb2..747a133fb 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -140,13 +140,16 @@ OGLAssemblyProgram CompileProgram(std::string_view code, GLenum target) { glGenProgramsARB(1, &program.handle); glNamedProgramStringEXT(program.handle, target, GL_PROGRAM_FORMAT_ASCII_ARB, static_cast(code.size()), code.data()); - if (!Settings::values.renderer_debug) { - return program; - } - const auto err = reinterpret_cast(glGetString(GL_PROGRAM_ERROR_STRING_NV)); - if (err && *err) { - LOG_CRITICAL(Render_OpenGL, "{}", err); - LOG_INFO(Render_OpenGL, "{}", code); + if (Settings::values.renderer_debug) { + const auto err = reinterpret_cast(glGetString(GL_PROGRAM_ERROR_STRING_NV)); + if (err && *err) { + if (std::strstr(err, "error")) { + LOG_CRITICAL(Render_OpenGL, "\n{}", err); + LOG_INFO(Render_OpenGL, "\n{}", code); + } else { + LOG_WARNING(Render_OpenGL, "\n{}", err); + } + } } return program; } -- cgit v1.2.3 From c31521512fd49603ea42c93e2a6eac5d7985cd78 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 May 2021 20:46:40 -0300 Subject: gl_shader_cache,glasm: Conditionally use typeless image reads extension --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 74 +++++++++++----------- src/video_core/renderer_opengl/gl_shader_cache.h | 2 + 2 files changed, 39 insertions(+), 37 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 747a133fb..2c0510f11 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -36,42 +36,6 @@ namespace OpenGL { namespace { -// FIXME: Move this somewhere else -const Shader::Profile profile{ - .supported_spirv = 0x00010000, - - .unified_descriptor_binding = false, - .support_descriptor_aliasing = false, - .support_int8 = false, - .support_int16 = false, - .support_vertex_instance_id = true, - .support_float_controls = false, - .support_separate_denorm_behavior = false, - .support_separate_rounding_mode = false, - .support_fp16_denorm_preserve = false, - .support_fp32_denorm_preserve = false, - .support_fp16_denorm_flush = false, - .support_fp32_denorm_flush = false, - .support_fp16_signed_zero_nan_preserve = false, - .support_fp32_signed_zero_nan_preserve = false, - .support_fp64_signed_zero_nan_preserve = false, - .support_explicit_workgroup_layout = false, - .support_vote = true, - .support_viewport_index_layer_non_geometry = true, - .support_viewport_mask = true, - .support_typeless_image_loads = true, - .support_demote_to_helper_invocation = false, - .support_int64_atomics = false, - - .warp_size_potentially_larger_than_guest = true, - .lower_left_origin_mode = true, - - .has_broken_spirv_clamp = true, - .has_broken_unsigned_image_offsets = true, - .has_broken_signed_operations = true, - .ignore_nan_fp_comparisons = true, -}; - using Shader::Backend::GLASM::EmitGLASM; using Shader::Backend::SPIRV::EmitSPIRV; using Shader::Maxwell::TranslateProgram; @@ -279,7 +243,43 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_}, emu_window{emu_window_}, device{device_}, texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, program_manager{program_manager_}, state_tracker{ - state_tracker_} {} + state_tracker_} { + profile = Shader::Profile{ + .supported_spirv = 0x00010000, + + .unified_descriptor_binding = false, + .support_descriptor_aliasing = false, + .support_int8 = false, + .support_int16 = false, + .support_vertex_instance_id = true, + .support_float_controls = false, + .support_separate_denorm_behavior = false, + .support_separate_rounding_mode = false, + .support_fp16_denorm_preserve = false, + .support_fp32_denorm_preserve = false, + .support_fp16_denorm_flush = false, + .support_fp32_denorm_flush = false, + .support_fp16_signed_zero_nan_preserve = false, + .support_fp32_signed_zero_nan_preserve = false, + .support_fp64_signed_zero_nan_preserve = false, + .support_explicit_workgroup_layout = false, + .support_vote = true, + .support_viewport_index_layer_non_geometry = + device.HasNvViewportArray2() || device.HasVertexViewportLayer(), + .support_viewport_mask = true, + .support_typeless_image_loads = device.HasImageLoadFormatted(), + .support_demote_to_helper_invocation = false, + .support_int64_atomics = false, + + .warp_size_potentially_larger_than_guest = true, + .lower_left_origin_mode = true, + + .has_broken_spirv_clamp = true, + .has_broken_unsigned_image_offsets = true, + .has_broken_signed_operations = true, + .ignore_nan_fp_comparisons = true, + }; +} ShaderCache::~ShaderCache() = default; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index b479d073a..b49cd0ac7 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -86,6 +86,8 @@ private: ShaderPools main_pools; std::unordered_map> graphics_cache; std::unordered_map> compute_cache; + + Shader::Profile profile; }; } // namespace OpenGL -- cgit v1.2.3 From 1bccb43cbecdbf069f5c86086670a8d5440408e3 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 May 2021 20:47:48 -0300 Subject: gl_shader_cache: Conditionally use viewport mask --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 2c0510f11..cf03280fa 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -266,7 +266,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_vote = true, .support_viewport_index_layer_non_geometry = device.HasNvViewportArray2() || device.HasVertexViewportLayer(), - .support_viewport_mask = true, + .support_viewport_mask = device.HasNvViewportArray2(), .support_typeless_image_loads = device.HasImageLoadFormatted(), .support_demote_to_helper_invocation = false, .support_int64_atomics = false, -- cgit v1.2.3 From 80884e32701e1e93fded045be4c235ff143d6ea0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 23 May 2021 21:24:24 -0300 Subject: gl_graphics_program: Fix texture buffer bindings --- .../renderer_opengl/gl_graphics_program.cpp | 59 +++++++++++++--------- 1 file changed, 35 insertions(+), 24 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_graphics_program.cpp b/src/video_core/renderer_opengl/gl_graphics_program.cpp index 9677a3ed6..7c3d23f85 100644 --- a/src/video_core/renderer_opengl/gl_graphics_program.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_program.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include #include #include "common/cityhash.h" @@ -14,12 +15,24 @@ namespace OpenGL { namespace { using Shader::ImageBufferDescriptor; +using Shader::ImageDescriptor; +using Shader::TextureBufferDescriptor; +using Shader::TextureDescriptor; using Tegra::Texture::TexturePair; using VideoCommon::ImageId; constexpr u32 MAX_TEXTURES = 64; constexpr u32 MAX_IMAGES = 8; +template +u32 AccumulateCount(Range&& range) { + u32 num{}; + for (const auto& desc : range) { + num += desc.count; + } + return num; +} + /// Translates hardware transform feedback indices /// @param location Hardware location /// @return Pair of ARB_transform_feedback3 token stream first and third arguments @@ -77,30 +90,25 @@ GraphicsProgram::GraphicsProgram(TextureCache& texture_cache_, BufferCache& buff } u32 num_textures{}; u32 num_images{}; - for (size_t stage = 0; stage < base_uniform_bindings.size() - 1; ++stage) { + for (size_t stage = 0; stage < base_uniform_bindings.size(); ++stage) { const auto& info{stage_infos[stage]}; - base_uniform_bindings[stage + 1] = base_uniform_bindings[stage]; - base_storage_bindings[stage + 1] = base_storage_bindings[stage]; - for (const auto& desc : info.constant_buffer_descriptors) { - base_uniform_bindings[stage + 1] += desc.count; - } - for (const auto& desc : info.storage_buffers_descriptors) { - base_storage_bindings[stage + 1] += desc.count; - } - for (const auto& desc : info.texture_buffer_descriptors) { - num_texture_buffers[stage] += desc.count; - num_textures += desc.count; - } - for (const auto& desc : info.image_buffer_descriptors) { - num_image_buffers[stage] += desc.count; - num_images += desc.count; - } - for (const auto& desc : info.texture_descriptors) { - num_textures += desc.count; - } - for (const auto& desc : info.image_descriptors) { - num_images += desc.count; + if (stage < 4) { + base_uniform_bindings[stage + 1] = base_uniform_bindings[stage]; + base_storage_bindings[stage + 1] = base_storage_bindings[stage]; + + base_uniform_bindings[stage + 1] += AccumulateCount(info.constant_buffer_descriptors); + base_storage_bindings[stage + 1] += AccumulateCount(info.storage_buffers_descriptors); } + const u32 num_tex_buffer_bindings{AccumulateCount(info.texture_buffer_descriptors)}; + num_texture_buffers[stage] += num_tex_buffer_bindings; + num_textures += num_tex_buffer_bindings; + + const u32 num_img_buffers_bindings{AccumulateCount(info.image_buffer_descriptors)}; + num_image_buffers[stage] += num_img_buffers_bindings; + num_images += num_img_buffers_bindings; + + num_textures += AccumulateCount(info.texture_descriptors); + num_images += AccumulateCount(info.image_descriptors); } ASSERT(num_textures <= MAX_TEXTURES); ASSERT(num_images <= MAX_IMAGES); @@ -151,8 +159,8 @@ void GraphicsProgram::Configure(bool is_indexed) { const u32 index_offset{index << desc.size_shift}; const u32 offset{desc.cbuf_offset + index_offset}; const GPUVAddr addr{cbufs[desc.cbuf_index].address + offset}; - if constexpr (std::is_same_v || - std::is_same_v) { + if constexpr (std::is_same_v || + std::is_same_v) { if (desc.has_secondary) { ASSERT(cbufs[desc.secondary_cbuf_index].enabled); const u32 second_offset{desc.secondary_cbuf_offset + index_offset}; @@ -297,6 +305,9 @@ void GraphicsProgram::Configure(bool is_indexed) { texture_binding += num_texture_buffers[stage]; image_binding += num_image_buffers[stage]; + views_it += num_texture_buffers[stage]; + views_it += num_image_buffers[stage]; + const auto& info{stage_infos[stage]}; for (const auto& desc : info.texture_descriptors) { for (u32 index = 0; index < desc.count; ++index) { -- cgit v1.2.3 From 40179282137370380387cab2610dcf21bd709efa Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 24 May 2021 03:24:19 -0300 Subject: gl_shader_cache: Do not flip tessellation on OpenGL --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index cf03280fa..ceec83a8a 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -159,8 +159,7 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsProgramKey& key, Shader::RuntimeInfo info; switch (program.stage) { case Shader::Stage::TessellationEval: - // We have to flip tessellation clockwise for some reason... - info.tess_clockwise = key.tessellation_clockwise == 0; + info.tess_clockwise = key.tessellation_clockwise != 0; info.tess_primitive = [&key] { switch (key.tessellation_primitive) { case Maxwell::TessellationPrimitive::Isolines: -- cgit v1.2.3 From eacf18cce9a05a28f50e916a752c04b0c9337707 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 25 May 2021 18:58:28 -0300 Subject: gl_shader_cache: Rename Program abstractions into Pipeline --- src/video_core/CMakeLists.txt | 8 +- .../renderer_opengl/gl_compute_pipeline.cpp | 182 ++++++++++ .../renderer_opengl/gl_compute_pipeline.h | 84 +++++ .../renderer_opengl/gl_compute_program.cpp | 182 ---------- .../renderer_opengl/gl_compute_program.h | 84 ----- .../renderer_opengl/gl_graphics_pipeline.cpp | 402 +++++++++++++++++++++ .../renderer_opengl/gl_graphics_pipeline.h | 118 ++++++ .../renderer_opengl/gl_graphics_program.cpp | 402 --------------------- .../renderer_opengl/gl_graphics_program.h | 118 ------ src/video_core/renderer_opengl/gl_rasterizer.cpp | 10 +- src/video_core/renderer_opengl/gl_rasterizer.h | 2 +- src/video_core/renderer_opengl/gl_shader_cache.cpp | 42 +-- src/video_core/renderer_opengl/gl_shader_cache.h | 32 +- src/video_core/renderer_opengl/gl_shader_manager.h | 2 +- 14 files changed, 834 insertions(+), 834 deletions(-) create mode 100644 src/video_core/renderer_opengl/gl_compute_pipeline.cpp create mode 100644 src/video_core/renderer_opengl/gl_compute_pipeline.h delete mode 100644 src/video_core/renderer_opengl/gl_compute_program.cpp delete mode 100644 src/video_core/renderer_opengl/gl_compute_program.h create mode 100644 src/video_core/renderer_opengl/gl_graphics_pipeline.cpp create mode 100644 src/video_core/renderer_opengl/gl_graphics_pipeline.h delete mode 100644 src/video_core/renderer_opengl/gl_graphics_program.cpp delete mode 100644 src/video_core/renderer_opengl/gl_graphics_program.h (limited to 'src/video_core') diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 8250f736c..1ef3a6189 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -67,14 +67,14 @@ add_library(video_core STATIC renderer_base.h renderer_opengl/gl_buffer_cache.cpp renderer_opengl/gl_buffer_cache.h - renderer_opengl/gl_compute_program.cpp - renderer_opengl/gl_compute_program.h + renderer_opengl/gl_compute_pipeline.cpp + renderer_opengl/gl_compute_pipeline.h renderer_opengl/gl_device.cpp renderer_opengl/gl_device.h renderer_opengl/gl_fence_manager.cpp renderer_opengl/gl_fence_manager.h - renderer_opengl/gl_graphics_program.cpp - renderer_opengl/gl_graphics_program.h + renderer_opengl/gl_graphics_pipeline.cpp + renderer_opengl/gl_graphics_pipeline.h renderer_opengl/gl_rasterizer.cpp renderer_opengl/gl_rasterizer.h renderer_opengl/gl_resource_manager.cpp diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp new file mode 100644 index 000000000..700ebd8b8 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp @@ -0,0 +1,182 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/cityhash.h" +#include "video_core/renderer_opengl/gl_compute_pipeline.h" +#include "video_core/renderer_opengl/gl_shader_manager.h" + +namespace OpenGL { + +using Shader::ImageBufferDescriptor; +using Tegra::Texture::TexturePair; +using VideoCommon::ImageId; + +constexpr u32 MAX_TEXTURES = 64; +constexpr u32 MAX_IMAGES = 16; + +size_t ComputePipelineKey::Hash() const noexcept { + return static_cast( + Common::CityHash64(reinterpret_cast(this), sizeof *this)); +} + +bool ComputePipelineKey::operator==(const ComputePipelineKey& rhs) const noexcept { + return std::memcmp(this, &rhs, sizeof *this) == 0; +} + +ComputePipeline::ComputePipeline(TextureCache& texture_cache_, BufferCache& buffer_cache_, + Tegra::MemoryManager& gpu_memory_, + Tegra::Engines::KeplerCompute& kepler_compute_, + ProgramManager& program_manager_, const Shader::Info& info_, + OGLProgram source_program_, OGLAssemblyProgram assembly_program_) + : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, + kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_}, + source_program{std::move(source_program_)}, assembly_program{std::move(assembly_program_)} { + for (const auto& desc : info.texture_buffer_descriptors) { + num_texture_buffers += desc.count; + } + for (const auto& desc : info.image_buffer_descriptors) { + num_image_buffers += desc.count; + } + u32 num_textures = num_texture_buffers; + for (const auto& desc : info.texture_descriptors) { + num_textures += desc.count; + } + ASSERT(num_textures <= MAX_TEXTURES); + + u32 num_images = num_image_buffers; + for (const auto& desc : info.image_descriptors) { + num_images += desc.count; + } + ASSERT(num_images <= MAX_IMAGES); +} + +void ComputePipeline::Configure() { + buffer_cache.SetEnabledComputeUniformBuffers(info.constant_buffer_mask); + buffer_cache.UnbindComputeStorageBuffers(); + size_t ssbo_index{}; + for (const auto& desc : info.storage_buffers_descriptors) { + ASSERT(desc.count == 1); + buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset, + desc.is_written); + ++ssbo_index; + } + texture_cache.SynchronizeComputeDescriptors(); + + std::array image_view_ids; + boost::container::static_vector image_view_indices; + std::array samplers; + std::array textures; + std::array images; + GLsizei sampler_binding{}; + GLsizei texture_binding{}; + GLsizei image_binding{}; + + const auto& qmd{kepler_compute.launch_description}; + const auto& cbufs{qmd.const_buffer_config}; + const bool via_header_index{qmd.linked_tsc != 0}; + const auto read_handle{[&](const auto& desc, u32 index) { + ASSERT(((qmd.const_buffer_enable_mask >> desc.cbuf_index) & 1) != 0); + const u32 index_offset{index << desc.size_shift}; + const u32 offset{desc.cbuf_offset + index_offset}; + const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + offset}; + if constexpr (std::is_same_v || + std::is_same_v) { + if (desc.has_secondary) { + ASSERT(((qmd.const_buffer_enable_mask >> desc.secondary_cbuf_index) & 1) != 0); + const u32 secondary_offset{desc.secondary_cbuf_offset + index_offset}; + const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].Address() + + secondary_offset}; + const u32 lhs_raw{gpu_memory.Read(addr)}; + const u32 rhs_raw{gpu_memory.Read(separate_addr)}; + return TexturePair(lhs_raw | rhs_raw, via_header_index); + } + } + return TexturePair(gpu_memory.Read(addr), via_header_index); + }}; + const auto add_image{[&](const auto& desc) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.first); + } + }}; + for (const auto& desc : info.texture_buffer_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.first); + samplers[sampler_binding++] = 0; + } + } + std::ranges::for_each(info.image_buffer_descriptors, add_image); + for (const auto& desc : info.texture_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.first); + + Sampler* const sampler = texture_cache.GetComputeSampler(handle.second); + samplers[sampler_binding++] = sampler->Handle(); + } + } + std::ranges::for_each(info.image_descriptors, add_image); + + const std::span indices_span(image_view_indices.data(), image_view_indices.size()); + texture_cache.FillComputeImageViews(indices_span, image_view_ids); + + if (assembly_program.handle != 0) { + program_manager.BindComputeAssemblyProgram(assembly_program.handle); + } else { + program_manager.BindProgram(source_program.handle); + } + buffer_cache.UnbindComputeTextureBuffers(); + size_t texbuf_index{}; + const auto add_buffer{[&](const auto& desc) { + constexpr bool is_image = std::is_same_v; + for (u32 i = 0; i < desc.count; ++i) { + bool is_written{false}; + if constexpr (is_image) { + is_written = desc.is_written; + } + ImageView& image_view{texture_cache.GetImageView(image_view_ids[texbuf_index])}; + buffer_cache.BindComputeTextureBuffer(texbuf_index, image_view.GpuAddr(), + image_view.BufferSize(), image_view.format, + is_written, is_image); + ++texbuf_index; + } + }}; + std::ranges::for_each(info.texture_buffer_descriptors, add_buffer); + std::ranges::for_each(info.image_buffer_descriptors, add_buffer); + + buffer_cache.UpdateComputeBuffers(); + + buffer_cache.runtime.SetImagePointers(textures.data(), images.data()); + buffer_cache.BindHostComputeBuffers(); + + const ImageId* views_it{image_view_ids.data() + num_texture_buffers + num_image_buffers}; + texture_binding += num_texture_buffers; + image_binding += num_image_buffers; + + for (const auto& desc : info.texture_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; + textures[texture_binding++] = image_view.Handle(desc.type); + } + } + for (const auto& desc : info.image_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; + images[image_binding++] = image_view.Handle(desc.type); + } + } + if (texture_binding != 0) { + ASSERT(texture_binding == sampler_binding); + glBindTextures(0, texture_binding, textures.data()); + glBindSamplers(0, sampler_binding, samplers.data()); + } + if (image_binding != 0) { + glBindImageTextures(0, image_binding, images.data()); + } +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.h b/src/video_core/renderer_opengl/gl_compute_pipeline.h new file mode 100644 index 000000000..e3b94e2f3 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.h @@ -0,0 +1,84 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include + +#include "common/common_types.h" +#include "shader_recompiler/shader_info.h" +#include "video_core/renderer_opengl/gl_buffer_cache.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_texture_cache.h" + +namespace Tegra { +class MemoryManager; +} + +namespace Tegra::Engines { +class KeplerCompute; +} + +namespace Shader { +struct Info; +} + +namespace OpenGL { + +class ProgramManager; + +struct ComputePipelineKey { + u64 unique_hash; + u32 shared_memory_size; + std::array workgroup_size; + + size_t Hash() const noexcept; + + bool operator==(const ComputePipelineKey&) const noexcept; + + bool operator!=(const ComputePipelineKey& rhs) const noexcept { + return !operator==(rhs); + } +}; +static_assert(std::has_unique_object_representations_v); +static_assert(std::is_trivially_copyable_v); +static_assert(std::is_trivially_constructible_v); + +class ComputePipeline { +public: + explicit ComputePipeline(TextureCache& texture_cache_, BufferCache& buffer_cache_, + Tegra::MemoryManager& gpu_memory_, + Tegra::Engines::KeplerCompute& kepler_compute_, + ProgramManager& program_manager_, const Shader::Info& info_, + OGLProgram source_program_, OGLAssemblyProgram assembly_program_); + + void Configure(); + +private: + TextureCache& texture_cache; + BufferCache& buffer_cache; + Tegra::MemoryManager& gpu_memory; + Tegra::Engines::KeplerCompute& kepler_compute; + ProgramManager& program_manager; + + Shader::Info info; + OGLProgram source_program; + OGLAssemblyProgram assembly_program; + + u32 num_texture_buffers{}; + u32 num_image_buffers{}; +}; + +} // namespace OpenGL + +namespace std { +template <> +struct hash { + size_t operator()(const OpenGL::ComputePipelineKey& k) const noexcept { + return k.Hash(); + } +}; +} // namespace std diff --git a/src/video_core/renderer_opengl/gl_compute_program.cpp b/src/video_core/renderer_opengl/gl_compute_program.cpp deleted file mode 100644 index ce52a0052..000000000 --- a/src/video_core/renderer_opengl/gl_compute_program.cpp +++ /dev/null @@ -1,182 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include - -#include "common/cityhash.h" -#include "video_core/renderer_opengl/gl_compute_program.h" -#include "video_core/renderer_opengl/gl_shader_manager.h" - -namespace OpenGL { - -using Shader::ImageBufferDescriptor; -using Tegra::Texture::TexturePair; -using VideoCommon::ImageId; - -constexpr u32 MAX_TEXTURES = 64; -constexpr u32 MAX_IMAGES = 16; - -size_t ComputeProgramKey::Hash() const noexcept { - return static_cast( - Common::CityHash64(reinterpret_cast(this), sizeof *this)); -} - -bool ComputeProgramKey::operator==(const ComputeProgramKey& rhs) const noexcept { - return std::memcmp(this, &rhs, sizeof *this) == 0; -} - -ComputeProgram::ComputeProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_, - Tegra::MemoryManager& gpu_memory_, - Tegra::Engines::KeplerCompute& kepler_compute_, - ProgramManager& program_manager_, const Shader::Info& info_, - OGLProgram source_program_, OGLAssemblyProgram assembly_program_) - : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, - kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_}, - source_program{std::move(source_program_)}, assembly_program{std::move(assembly_program_)} { - for (const auto& desc : info.texture_buffer_descriptors) { - num_texture_buffers += desc.count; - } - for (const auto& desc : info.image_buffer_descriptors) { - num_image_buffers += desc.count; - } - u32 num_textures = num_texture_buffers; - for (const auto& desc : info.texture_descriptors) { - num_textures += desc.count; - } - ASSERT(num_textures <= MAX_TEXTURES); - - u32 num_images = num_image_buffers; - for (const auto& desc : info.image_descriptors) { - num_images += desc.count; - } - ASSERT(num_images <= MAX_IMAGES); -} - -void ComputeProgram::Configure() { - buffer_cache.SetEnabledComputeUniformBuffers(info.constant_buffer_mask); - buffer_cache.UnbindComputeStorageBuffers(); - size_t ssbo_index{}; - for (const auto& desc : info.storage_buffers_descriptors) { - ASSERT(desc.count == 1); - buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset, - desc.is_written); - ++ssbo_index; - } - texture_cache.SynchronizeComputeDescriptors(); - - std::array image_view_ids; - boost::container::static_vector image_view_indices; - std::array samplers; - std::array textures; - std::array images; - GLsizei sampler_binding{}; - GLsizei texture_binding{}; - GLsizei image_binding{}; - - const auto& qmd{kepler_compute.launch_description}; - const auto& cbufs{qmd.const_buffer_config}; - const bool via_header_index{qmd.linked_tsc != 0}; - const auto read_handle{[&](const auto& desc, u32 index) { - ASSERT(((qmd.const_buffer_enable_mask >> desc.cbuf_index) & 1) != 0); - const u32 index_offset{index << desc.size_shift}; - const u32 offset{desc.cbuf_offset + index_offset}; - const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + offset}; - if constexpr (std::is_same_v || - std::is_same_v) { - if (desc.has_secondary) { - ASSERT(((qmd.const_buffer_enable_mask >> desc.secondary_cbuf_index) & 1) != 0); - const u32 secondary_offset{desc.secondary_cbuf_offset + index_offset}; - const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].Address() + - secondary_offset}; - const u32 lhs_raw{gpu_memory.Read(addr)}; - const u32 rhs_raw{gpu_memory.Read(separate_addr)}; - return TexturePair(lhs_raw | rhs_raw, via_header_index); - } - } - return TexturePair(gpu_memory.Read(addr), via_header_index); - }}; - const auto add_image{[&](const auto& desc) { - for (u32 index = 0; index < desc.count; ++index) { - const auto handle{read_handle(desc, index)}; - image_view_indices.push_back(handle.first); - } - }}; - for (const auto& desc : info.texture_buffer_descriptors) { - for (u32 index = 0; index < desc.count; ++index) { - const auto handle{read_handle(desc, index)}; - image_view_indices.push_back(handle.first); - samplers[sampler_binding++] = 0; - } - } - std::ranges::for_each(info.image_buffer_descriptors, add_image); - for (const auto& desc : info.texture_descriptors) { - for (u32 index = 0; index < desc.count; ++index) { - const auto handle{read_handle(desc, index)}; - image_view_indices.push_back(handle.first); - - Sampler* const sampler = texture_cache.GetComputeSampler(handle.second); - samplers[sampler_binding++] = sampler->Handle(); - } - } - std::ranges::for_each(info.image_descriptors, add_image); - - const std::span indices_span(image_view_indices.data(), image_view_indices.size()); - texture_cache.FillComputeImageViews(indices_span, image_view_ids); - - if (assembly_program.handle != 0) { - program_manager.BindComputeAssemblyProgram(assembly_program.handle); - } else { - program_manager.BindProgram(source_program.handle); - } - buffer_cache.UnbindComputeTextureBuffers(); - size_t texbuf_index{}; - const auto add_buffer{[&](const auto& desc) { - constexpr bool is_image = std::is_same_v; - for (u32 i = 0; i < desc.count; ++i) { - bool is_written{false}; - if constexpr (is_image) { - is_written = desc.is_written; - } - ImageView& image_view{texture_cache.GetImageView(image_view_ids[texbuf_index])}; - buffer_cache.BindComputeTextureBuffer(texbuf_index, image_view.GpuAddr(), - image_view.BufferSize(), image_view.format, - is_written, is_image); - ++texbuf_index; - } - }}; - std::ranges::for_each(info.texture_buffer_descriptors, add_buffer); - std::ranges::for_each(info.image_buffer_descriptors, add_buffer); - - buffer_cache.UpdateComputeBuffers(); - - buffer_cache.runtime.SetImagePointers(textures.data(), images.data()); - buffer_cache.BindHostComputeBuffers(); - - const ImageId* views_it{image_view_ids.data() + num_texture_buffers + num_image_buffers}; - texture_binding += num_texture_buffers; - image_binding += num_image_buffers; - - for (const auto& desc : info.texture_descriptors) { - for (u32 index = 0; index < desc.count; ++index) { - ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; - textures[texture_binding++] = image_view.Handle(desc.type); - } - } - for (const auto& desc : info.image_descriptors) { - for (u32 index = 0; index < desc.count; ++index) { - ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; - images[image_binding++] = image_view.Handle(desc.type); - } - } - if (texture_binding != 0) { - ASSERT(texture_binding == sampler_binding); - glBindTextures(0, texture_binding, textures.data()); - glBindSamplers(0, sampler_binding, samplers.data()); - } - if (image_binding != 0) { - glBindImageTextures(0, image_binding, images.data()); - } -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_compute_program.h b/src/video_core/renderer_opengl/gl_compute_program.h deleted file mode 100644 index ddb00dc1d..000000000 --- a/src/video_core/renderer_opengl/gl_compute_program.h +++ /dev/null @@ -1,84 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include - -#include "common/common_types.h" -#include "shader_recompiler/shader_info.h" -#include "video_core/renderer_opengl/gl_buffer_cache.h" -#include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_texture_cache.h" - -namespace Tegra { -class MemoryManager; -} - -namespace Tegra::Engines { -class KeplerCompute; -} - -namespace Shader { -struct Info; -} - -namespace OpenGL { - -class ProgramManager; - -struct ComputeProgramKey { - u64 unique_hash; - u32 shared_memory_size; - std::array workgroup_size; - - size_t Hash() const noexcept; - - bool operator==(const ComputeProgramKey&) const noexcept; - - bool operator!=(const ComputeProgramKey& rhs) const noexcept { - return !operator==(rhs); - } -}; -static_assert(std::has_unique_object_representations_v); -static_assert(std::is_trivially_copyable_v); -static_assert(std::is_trivially_constructible_v); - -class ComputeProgram { -public: - explicit ComputeProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_, - Tegra::MemoryManager& gpu_memory_, - Tegra::Engines::KeplerCompute& kepler_compute_, - ProgramManager& program_manager_, const Shader::Info& info_, - OGLProgram source_program_, OGLAssemblyProgram assembly_program_); - - void Configure(); - -private: - TextureCache& texture_cache; - BufferCache& buffer_cache; - Tegra::MemoryManager& gpu_memory; - Tegra::Engines::KeplerCompute& kepler_compute; - ProgramManager& program_manager; - - Shader::Info info; - OGLProgram source_program; - OGLAssemblyProgram assembly_program; - - u32 num_texture_buffers{}; - u32 num_image_buffers{}; -}; - -} // namespace OpenGL - -namespace std { -template <> -struct hash { - size_t operator()(const OpenGL::ComputeProgramKey& k) const noexcept { - return k.Hash(); - } -}; -} // namespace std diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp new file mode 100644 index 000000000..32df35202 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -0,0 +1,402 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include + +#include "common/cityhash.h" +#include "shader_recompiler/shader_info.h" +#include "video_core/renderer_opengl/gl_graphics_pipeline.h" +#include "video_core/renderer_opengl/gl_shader_manager.h" +#include "video_core/renderer_opengl/gl_state_tracker.h" +#include "video_core/texture_cache/texture_cache.h" + +namespace OpenGL { +namespace { +using Shader::ImageBufferDescriptor; +using Shader::ImageDescriptor; +using Shader::TextureBufferDescriptor; +using Shader::TextureDescriptor; +using Tegra::Texture::TexturePair; +using VideoCommon::ImageId; + +constexpr u32 MAX_TEXTURES = 64; +constexpr u32 MAX_IMAGES = 8; + +template +u32 AccumulateCount(Range&& range) { + u32 num{}; + for (const auto& desc : range) { + num += desc.count; + } + return num; +} + +/// Translates hardware transform feedback indices +/// @param location Hardware location +/// @return Pair of ARB_transform_feedback3 token stream first and third arguments +/// @note Read https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_transform_feedback3.txt +std::pair TransformFeedbackEnum(u8 location) { + const u8 index = location / 4; + if (index >= 8 && index <= 39) { + return {GL_GENERIC_ATTRIB_NV, index - 8}; + } + if (index >= 48 && index <= 55) { + return {GL_TEXTURE_COORD_NV, index - 48}; + } + switch (index) { + case 7: + return {GL_POSITION, 0}; + case 40: + return {GL_PRIMARY_COLOR_NV, 0}; + case 41: + return {GL_SECONDARY_COLOR_NV, 0}; + case 42: + return {GL_BACK_PRIMARY_COLOR_NV, 0}; + case 43: + return {GL_BACK_SECONDARY_COLOR_NV, 0}; + } + UNIMPLEMENTED_MSG("index={}", index); + return {GL_POSITION, 0}; +} +} // Anonymous namespace + +size_t GraphicsPipelineKey::Hash() const noexcept { + return static_cast(Common::CityHash64(reinterpret_cast(this), Size())); +} + +bool GraphicsPipelineKey::operator==(const GraphicsPipelineKey& rhs) const noexcept { + return std::memcmp(this, &rhs, Size()) == 0; +} + +GraphicsPipeline::GraphicsPipeline(TextureCache& texture_cache_, BufferCache& buffer_cache_, + Tegra::MemoryManager& gpu_memory_, + Tegra::Engines::Maxwell3D& maxwell3d_, + ProgramManager& program_manager_, StateTracker& state_tracker_, + OGLProgram program_, + std::array assembly_programs_, + const std::array& infos, + const VideoCommon::TransformFeedbackState* xfb_state) + : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, + gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, program_manager{program_manager_}, + state_tracker{state_tracker_}, program{std::move(program_)}, assembly_programs{std::move( + assembly_programs_)} { + std::ranges::transform(infos, stage_infos.begin(), + [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); + + for (size_t stage = 0; stage < 5; ++stage) { + enabled_stages_mask |= (assembly_programs[stage].handle != 0 ? 1 : 0) << stage; + } + u32 num_textures{}; + u32 num_images{}; + for (size_t stage = 0; stage < base_uniform_bindings.size(); ++stage) { + const auto& info{stage_infos[stage]}; + if (stage < 4) { + base_uniform_bindings[stage + 1] = base_uniform_bindings[stage]; + base_storage_bindings[stage + 1] = base_storage_bindings[stage]; + + base_uniform_bindings[stage + 1] += AccumulateCount(info.constant_buffer_descriptors); + base_storage_bindings[stage + 1] += AccumulateCount(info.storage_buffers_descriptors); + } + const u32 num_tex_buffer_bindings{AccumulateCount(info.texture_buffer_descriptors)}; + num_texture_buffers[stage] += num_tex_buffer_bindings; + num_textures += num_tex_buffer_bindings; + + const u32 num_img_buffers_bindings{AccumulateCount(info.image_buffer_descriptors)}; + num_image_buffers[stage] += num_img_buffers_bindings; + num_images += num_img_buffers_bindings; + + num_textures += AccumulateCount(info.texture_descriptors); + num_images += AccumulateCount(info.image_descriptors); + } + ASSERT(num_textures <= MAX_TEXTURES); + ASSERT(num_images <= MAX_IMAGES); + + if (assembly_programs[0].handle != 0 && xfb_state) { + GenerateTransformFeedbackState(*xfb_state); + } +} + +struct Spec { + static constexpr std::array enabled_stages{true, true, true, true, true}; + static constexpr bool has_storage_buffers = true; + static constexpr bool has_texture_buffers = true; + static constexpr bool has_image_buffers = true; + static constexpr bool has_images = true; +}; + +void GraphicsPipeline::Configure(bool is_indexed) { + std::array image_view_ids; + std::array image_view_indices; + std::array samplers; + size_t image_view_index{}; + GLsizei sampler_binding{}; + + texture_cache.SynchronizeGraphicsDescriptors(); + + buffer_cache.runtime.SetBaseUniformBindings(base_uniform_bindings); + buffer_cache.runtime.SetBaseStorageBindings(base_storage_bindings); + + const auto& regs{maxwell3d.regs}; + const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; + const auto config_stage{[&](size_t stage) { + const Shader::Info& info{stage_infos[stage]}; + buffer_cache.SetEnabledUniformBuffers(stage, info.constant_buffer_mask); + buffer_cache.UnbindGraphicsStorageBuffers(stage); + if constexpr (Spec::has_storage_buffers) { + size_t ssbo_index{}; + for (const auto& desc : info.storage_buffers_descriptors) { + ASSERT(desc.count == 1); + buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, desc.cbuf_index, + desc.cbuf_offset, desc.is_written); + ++ssbo_index; + } + } + const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers}; + const auto read_handle{[&](const auto& desc, u32 index) { + ASSERT(cbufs[desc.cbuf_index].enabled); + const u32 index_offset{index << desc.size_shift}; + const u32 offset{desc.cbuf_offset + index_offset}; + const GPUVAddr addr{cbufs[desc.cbuf_index].address + offset}; + if constexpr (std::is_same_v || + std::is_same_v) { + if (desc.has_secondary) { + ASSERT(cbufs[desc.secondary_cbuf_index].enabled); + const u32 second_offset{desc.secondary_cbuf_offset + index_offset}; + const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].address + + second_offset}; + const u32 lhs_raw{gpu_memory.Read(addr)}; + const u32 rhs_raw{gpu_memory.Read(separate_addr)}; + const u32 raw{lhs_raw | rhs_raw}; + return TexturePair(raw, via_header_index); + } + } + return TexturePair(gpu_memory.Read(addr), via_header_index); + }}; + const auto add_image{[&](const auto& desc) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices[image_view_index++] = handle.first; + } + }}; + if constexpr (Spec::has_texture_buffers) { + for (const auto& desc : info.texture_buffer_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices[image_view_index++] = handle.first; + samplers[sampler_binding++] = 0; + } + } + } + if constexpr (Spec::has_image_buffers) { + for (const auto& desc : info.image_buffer_descriptors) { + add_image(desc); + } + } + for (const auto& desc : info.texture_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices[image_view_index++] = handle.first; + + Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)}; + samplers[sampler_binding++] = sampler->Handle(); + } + } + if constexpr (Spec::has_images) { + for (const auto& desc : info.image_descriptors) { + add_image(desc); + } + } + }}; + if constexpr (Spec::enabled_stages[0]) { + config_stage(0); + } + if constexpr (Spec::enabled_stages[1]) { + config_stage(1); + } + if constexpr (Spec::enabled_stages[2]) { + config_stage(2); + } + if constexpr (Spec::enabled_stages[3]) { + config_stage(3); + } + if constexpr (Spec::enabled_stages[4]) { + config_stage(4); + } + const std::span indices_span(image_view_indices.data(), image_view_index); + texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); + + texture_cache.UpdateRenderTargets(false); + state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); + + ImageId* texture_buffer_index{image_view_ids.data()}; + const auto bind_stage_info{[&](size_t stage) { + size_t index{}; + const auto add_buffer{[&](const auto& desc) { + constexpr bool is_image = std::is_same_v; + for (u32 i = 0; i < desc.count; ++i) { + bool is_written{false}; + if constexpr (is_image) { + is_written = desc.is_written; + } + ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)}; + buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(), + image_view.BufferSize(), image_view.format, + is_written, is_image); + ++index; + ++texture_buffer_index; + } + }}; + const Shader::Info& info{stage_infos[stage]}; + buffer_cache.UnbindGraphicsTextureBuffers(stage); + + if constexpr (Spec::has_texture_buffers) { + for (const auto& desc : info.texture_buffer_descriptors) { + add_buffer(desc); + } + } + if constexpr (Spec::has_image_buffers) { + for (const auto& desc : info.image_buffer_descriptors) { + add_buffer(desc); + } + } + for (const auto& desc : info.texture_descriptors) { + texture_buffer_index += desc.count; + } + if constexpr (Spec::has_images) { + for (const auto& desc : info.image_descriptors) { + texture_buffer_index += desc.count; + } + } + }}; + if constexpr (Spec::enabled_stages[0]) { + bind_stage_info(0); + } + if constexpr (Spec::enabled_stages[1]) { + bind_stage_info(1); + } + if constexpr (Spec::enabled_stages[2]) { + bind_stage_info(2); + } + if constexpr (Spec::enabled_stages[3]) { + bind_stage_info(3); + } + if constexpr (Spec::enabled_stages[4]) { + bind_stage_info(4); + } + buffer_cache.UpdateGraphicsBuffers(is_indexed); + buffer_cache.BindHostGeometryBuffers(is_indexed); + + if (assembly_programs[0].handle != 0) { + program_manager.BindAssemblyPrograms(assembly_programs, enabled_stages_mask); + } else { + program_manager.BindProgram(program.handle); + } + const ImageId* views_it{image_view_ids.data()}; + GLsizei texture_binding = 0; + GLsizei image_binding = 0; + std::array textures; + std::array images; + const auto prepare_stage{[&](size_t stage) { + buffer_cache.runtime.SetImagePointers(&textures[texture_binding], &images[image_binding]); + buffer_cache.BindHostStageBuffers(stage); + + texture_binding += num_texture_buffers[stage]; + image_binding += num_image_buffers[stage]; + + views_it += num_texture_buffers[stage]; + views_it += num_image_buffers[stage]; + + const auto& info{stage_infos[stage]}; + for (const auto& desc : info.texture_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; + textures[texture_binding++] = image_view.Handle(desc.type); + } + } + for (const auto& desc : info.image_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; + images[image_binding++] = image_view.Handle(desc.type); + } + } + }}; + if constexpr (Spec::enabled_stages[0]) { + prepare_stage(0); + } + if constexpr (Spec::enabled_stages[1]) { + prepare_stage(1); + } + if constexpr (Spec::enabled_stages[2]) { + prepare_stage(2); + } + if constexpr (Spec::enabled_stages[3]) { + prepare_stage(3); + } + if constexpr (Spec::enabled_stages[4]) { + prepare_stage(4); + } + if (texture_binding != 0) { + ASSERT(texture_binding == sampler_binding); + glBindTextures(0, texture_binding, textures.data()); + glBindSamplers(0, sampler_binding, samplers.data()); + } + if (image_binding != 0) { + glBindImageTextures(0, image_binding, images.data()); + } +} + +void GraphicsPipeline::GenerateTransformFeedbackState( + const VideoCommon::TransformFeedbackState& xfb_state) { + // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal + // when this is required. + const auto& regs{maxwell3d.regs}; + + GLint* cursor{xfb_attribs.data()}; + GLint* current_stream{xfb_streams.data()}; + + for (size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) { + const auto& layout = regs.tfb_layouts[feedback]; + UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding"); + if (layout.varying_count == 0) { + continue; + } + *current_stream = static_cast(feedback); + if (current_stream != xfb_streams.data()) { + // When stepping one stream, push the expected token + cursor[0] = GL_NEXT_BUFFER_NV; + cursor[1] = 0; + cursor[2] = 0; + cursor += XFB_ENTRY_STRIDE; + } + ++current_stream; + + const auto& locations = regs.tfb_varying_locs[feedback]; + std::optional current_index; + for (u32 offset = 0; offset < layout.varying_count; ++offset) { + const u8 location = locations[offset]; + const u8 index = location / 4; + + if (current_index == index) { + // Increase number of components of the previous attachment + ++cursor[-2]; + continue; + } + current_index = index; + + std::tie(cursor[0], cursor[2]) = TransformFeedbackEnum(location); + cursor[1] = 1; + cursor += XFB_ENTRY_STRIDE; + } + } + num_xfb_attribs = static_cast((cursor - xfb_attribs.data()) / XFB_ENTRY_STRIDE); + num_xfb_strides = static_cast(current_stream - xfb_streams.data()); +} + +void GraphicsPipeline::ConfigureTransformFeedbackImpl() const { + glTransformFeedbackStreamAttribsNV(num_xfb_attribs, xfb_attribs.data(), num_xfb_strides, + xfb_streams.data(), GL_INTERLEAVED_ATTRIBS); +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h new file mode 100644 index 000000000..62f700cf5 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -0,0 +1,118 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/shader_info.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/memory_manager.h" +#include "video_core/renderer_opengl/gl_buffer_cache.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_texture_cache.h" +#include "video_core/transform_feedback.h" + +namespace OpenGL { + +class ProgramManager; + +using Maxwell = Tegra::Engines::Maxwell3D::Regs; + +struct GraphicsPipelineKey { + std::array unique_hashes; + union { + u32 raw; + BitField<0, 1, u32> xfb_enabled; + BitField<1, 1, u32> early_z; + BitField<2, 4, Maxwell::PrimitiveTopology> gs_input_topology; + BitField<6, 2, Maxwell::TessellationPrimitive> tessellation_primitive; + BitField<8, 2, Maxwell::TessellationSpacing> tessellation_spacing; + BitField<10, 1, u32> tessellation_clockwise; + }; + std::array padding; + VideoCommon::TransformFeedbackState xfb_state; + + size_t Hash() const noexcept; + + bool operator==(const GraphicsPipelineKey&) const noexcept; + + bool operator!=(const GraphicsPipelineKey& rhs) const noexcept { + return !operator==(rhs); + } + + [[nodiscard]] size_t Size() const noexcept { + if (xfb_enabled != 0) { + return sizeof(GraphicsPipelineKey); + } else { + return offsetof(GraphicsPipelineKey, padding); + } + } +}; +static_assert(std::has_unique_object_representations_v); +static_assert(std::is_trivially_copyable_v); +static_assert(std::is_trivially_constructible_v); + +class GraphicsPipeline { +public: + explicit GraphicsPipeline(TextureCache& texture_cache_, BufferCache& buffer_cache_, + Tegra::MemoryManager& gpu_memory_, + Tegra::Engines::Maxwell3D& maxwell3d_, + ProgramManager& program_manager_, StateTracker& state_tracker_, + OGLProgram program_, + std::array assembly_programs_, + const std::array& infos, + const VideoCommon::TransformFeedbackState* xfb_state); + + void Configure(bool is_indexed); + + void ConfigureTransformFeedback() const { + if (num_xfb_attribs != 0) { + ConfigureTransformFeedbackImpl(); + } + } + +private: + void GenerateTransformFeedbackState(const VideoCommon::TransformFeedbackState& xfb_state); + + void ConfigureTransformFeedbackImpl() const; + + TextureCache& texture_cache; + BufferCache& buffer_cache; + Tegra::MemoryManager& gpu_memory; + Tegra::Engines::Maxwell3D& maxwell3d; + ProgramManager& program_manager; + StateTracker& state_tracker; + + OGLProgram program; + std::array assembly_programs; + u32 enabled_stages_mask{}; + + std::array stage_infos{}; + std::array base_uniform_bindings{}; + std::array base_storage_bindings{}; + std::array num_texture_buffers{}; + std::array num_image_buffers{}; + + static constexpr std::size_t XFB_ENTRY_STRIDE = 3; + GLsizei num_xfb_attribs{}; + GLsizei num_xfb_strides{}; + std::array xfb_attribs{}; + std::array xfb_streams{}; +}; + +} // namespace OpenGL + +namespace std { +template <> +struct hash { + size_t operator()(const OpenGL::GraphicsPipelineKey& k) const noexcept { + return k.Hash(); + } +}; +} // namespace std diff --git a/src/video_core/renderer_opengl/gl_graphics_program.cpp b/src/video_core/renderer_opengl/gl_graphics_program.cpp deleted file mode 100644 index 7c3d23f85..000000000 --- a/src/video_core/renderer_opengl/gl_graphics_program.cpp +++ /dev/null @@ -1,402 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include - -#include "common/cityhash.h" -#include "shader_recompiler/shader_info.h" -#include "video_core/renderer_opengl/gl_graphics_program.h" -#include "video_core/renderer_opengl/gl_shader_manager.h" -#include "video_core/renderer_opengl/gl_state_tracker.h" -#include "video_core/texture_cache/texture_cache.h" - -namespace OpenGL { -namespace { -using Shader::ImageBufferDescriptor; -using Shader::ImageDescriptor; -using Shader::TextureBufferDescriptor; -using Shader::TextureDescriptor; -using Tegra::Texture::TexturePair; -using VideoCommon::ImageId; - -constexpr u32 MAX_TEXTURES = 64; -constexpr u32 MAX_IMAGES = 8; - -template -u32 AccumulateCount(Range&& range) { - u32 num{}; - for (const auto& desc : range) { - num += desc.count; - } - return num; -} - -/// Translates hardware transform feedback indices -/// @param location Hardware location -/// @return Pair of ARB_transform_feedback3 token stream first and third arguments -/// @note Read https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_transform_feedback3.txt -std::pair TransformFeedbackEnum(u8 location) { - const u8 index = location / 4; - if (index >= 8 && index <= 39) { - return {GL_GENERIC_ATTRIB_NV, index - 8}; - } - if (index >= 48 && index <= 55) { - return {GL_TEXTURE_COORD_NV, index - 48}; - } - switch (index) { - case 7: - return {GL_POSITION, 0}; - case 40: - return {GL_PRIMARY_COLOR_NV, 0}; - case 41: - return {GL_SECONDARY_COLOR_NV, 0}; - case 42: - return {GL_BACK_PRIMARY_COLOR_NV, 0}; - case 43: - return {GL_BACK_SECONDARY_COLOR_NV, 0}; - } - UNIMPLEMENTED_MSG("index={}", index); - return {GL_POSITION, 0}; -} -} // Anonymous namespace - -size_t GraphicsProgramKey::Hash() const noexcept { - return static_cast(Common::CityHash64(reinterpret_cast(this), Size())); -} - -bool GraphicsProgramKey::operator==(const GraphicsProgramKey& rhs) const noexcept { - return std::memcmp(this, &rhs, Size()) == 0; -} - -GraphicsProgram::GraphicsProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_, - Tegra::MemoryManager& gpu_memory_, - Tegra::Engines::Maxwell3D& maxwell3d_, - ProgramManager& program_manager_, StateTracker& state_tracker_, - OGLProgram program_, - std::array assembly_programs_, - const std::array& infos, - const VideoCommon::TransformFeedbackState* xfb_state) - : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, - gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, program_manager{program_manager_}, - state_tracker{state_tracker_}, program{std::move(program_)}, assembly_programs{std::move( - assembly_programs_)} { - std::ranges::transform(infos, stage_infos.begin(), - [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); - - for (size_t stage = 0; stage < 5; ++stage) { - enabled_stages_mask |= (assembly_programs[stage].handle != 0 ? 1 : 0) << stage; - } - u32 num_textures{}; - u32 num_images{}; - for (size_t stage = 0; stage < base_uniform_bindings.size(); ++stage) { - const auto& info{stage_infos[stage]}; - if (stage < 4) { - base_uniform_bindings[stage + 1] = base_uniform_bindings[stage]; - base_storage_bindings[stage + 1] = base_storage_bindings[stage]; - - base_uniform_bindings[stage + 1] += AccumulateCount(info.constant_buffer_descriptors); - base_storage_bindings[stage + 1] += AccumulateCount(info.storage_buffers_descriptors); - } - const u32 num_tex_buffer_bindings{AccumulateCount(info.texture_buffer_descriptors)}; - num_texture_buffers[stage] += num_tex_buffer_bindings; - num_textures += num_tex_buffer_bindings; - - const u32 num_img_buffers_bindings{AccumulateCount(info.image_buffer_descriptors)}; - num_image_buffers[stage] += num_img_buffers_bindings; - num_images += num_img_buffers_bindings; - - num_textures += AccumulateCount(info.texture_descriptors); - num_images += AccumulateCount(info.image_descriptors); - } - ASSERT(num_textures <= MAX_TEXTURES); - ASSERT(num_images <= MAX_IMAGES); - - if (assembly_programs[0].handle != 0 && xfb_state) { - GenerateTransformFeedbackState(*xfb_state); - } -} - -struct Spec { - static constexpr std::array enabled_stages{true, true, true, true, true}; - static constexpr bool has_storage_buffers = true; - static constexpr bool has_texture_buffers = true; - static constexpr bool has_image_buffers = true; - static constexpr bool has_images = true; -}; - -void GraphicsProgram::Configure(bool is_indexed) { - std::array image_view_ids; - std::array image_view_indices; - std::array samplers; - size_t image_view_index{}; - GLsizei sampler_binding{}; - - texture_cache.SynchronizeGraphicsDescriptors(); - - buffer_cache.runtime.SetBaseUniformBindings(base_uniform_bindings); - buffer_cache.runtime.SetBaseStorageBindings(base_storage_bindings); - - const auto& regs{maxwell3d.regs}; - const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; - const auto config_stage{[&](size_t stage) { - const Shader::Info& info{stage_infos[stage]}; - buffer_cache.SetEnabledUniformBuffers(stage, info.constant_buffer_mask); - buffer_cache.UnbindGraphicsStorageBuffers(stage); - if constexpr (Spec::has_storage_buffers) { - size_t ssbo_index{}; - for (const auto& desc : info.storage_buffers_descriptors) { - ASSERT(desc.count == 1); - buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, desc.cbuf_index, - desc.cbuf_offset, desc.is_written); - ++ssbo_index; - } - } - const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers}; - const auto read_handle{[&](const auto& desc, u32 index) { - ASSERT(cbufs[desc.cbuf_index].enabled); - const u32 index_offset{index << desc.size_shift}; - const u32 offset{desc.cbuf_offset + index_offset}; - const GPUVAddr addr{cbufs[desc.cbuf_index].address + offset}; - if constexpr (std::is_same_v || - std::is_same_v) { - if (desc.has_secondary) { - ASSERT(cbufs[desc.secondary_cbuf_index].enabled); - const u32 second_offset{desc.secondary_cbuf_offset + index_offset}; - const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].address + - second_offset}; - const u32 lhs_raw{gpu_memory.Read(addr)}; - const u32 rhs_raw{gpu_memory.Read(separate_addr)}; - const u32 raw{lhs_raw | rhs_raw}; - return TexturePair(raw, via_header_index); - } - } - return TexturePair(gpu_memory.Read(addr), via_header_index); - }}; - const auto add_image{[&](const auto& desc) { - for (u32 index = 0; index < desc.count; ++index) { - const auto handle{read_handle(desc, index)}; - image_view_indices[image_view_index++] = handle.first; - } - }}; - if constexpr (Spec::has_texture_buffers) { - for (const auto& desc : info.texture_buffer_descriptors) { - for (u32 index = 0; index < desc.count; ++index) { - const auto handle{read_handle(desc, index)}; - image_view_indices[image_view_index++] = handle.first; - samplers[sampler_binding++] = 0; - } - } - } - if constexpr (Spec::has_image_buffers) { - for (const auto& desc : info.image_buffer_descriptors) { - add_image(desc); - } - } - for (const auto& desc : info.texture_descriptors) { - for (u32 index = 0; index < desc.count; ++index) { - const auto handle{read_handle(desc, index)}; - image_view_indices[image_view_index++] = handle.first; - - Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)}; - samplers[sampler_binding++] = sampler->Handle(); - } - } - if constexpr (Spec::has_images) { - for (const auto& desc : info.image_descriptors) { - add_image(desc); - } - } - }}; - if constexpr (Spec::enabled_stages[0]) { - config_stage(0); - } - if constexpr (Spec::enabled_stages[1]) { - config_stage(1); - } - if constexpr (Spec::enabled_stages[2]) { - config_stage(2); - } - if constexpr (Spec::enabled_stages[3]) { - config_stage(3); - } - if constexpr (Spec::enabled_stages[4]) { - config_stage(4); - } - const std::span indices_span(image_view_indices.data(), image_view_index); - texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); - - texture_cache.UpdateRenderTargets(false); - state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); - - ImageId* texture_buffer_index{image_view_ids.data()}; - const auto bind_stage_info{[&](size_t stage) { - size_t index{}; - const auto add_buffer{[&](const auto& desc) { - constexpr bool is_image = std::is_same_v; - for (u32 i = 0; i < desc.count; ++i) { - bool is_written{false}; - if constexpr (is_image) { - is_written = desc.is_written; - } - ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)}; - buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(), - image_view.BufferSize(), image_view.format, - is_written, is_image); - ++index; - ++texture_buffer_index; - } - }}; - const Shader::Info& info{stage_infos[stage]}; - buffer_cache.UnbindGraphicsTextureBuffers(stage); - - if constexpr (Spec::has_texture_buffers) { - for (const auto& desc : info.texture_buffer_descriptors) { - add_buffer(desc); - } - } - if constexpr (Spec::has_image_buffers) { - for (const auto& desc : info.image_buffer_descriptors) { - add_buffer(desc); - } - } - for (const auto& desc : info.texture_descriptors) { - texture_buffer_index += desc.count; - } - if constexpr (Spec::has_images) { - for (const auto& desc : info.image_descriptors) { - texture_buffer_index += desc.count; - } - } - }}; - if constexpr (Spec::enabled_stages[0]) { - bind_stage_info(0); - } - if constexpr (Spec::enabled_stages[1]) { - bind_stage_info(1); - } - if constexpr (Spec::enabled_stages[2]) { - bind_stage_info(2); - } - if constexpr (Spec::enabled_stages[3]) { - bind_stage_info(3); - } - if constexpr (Spec::enabled_stages[4]) { - bind_stage_info(4); - } - buffer_cache.UpdateGraphicsBuffers(is_indexed); - buffer_cache.BindHostGeometryBuffers(is_indexed); - - if (assembly_programs[0].handle != 0) { - program_manager.BindAssemblyPrograms(assembly_programs, enabled_stages_mask); - } else { - program_manager.BindProgram(program.handle); - } - const ImageId* views_it{image_view_ids.data()}; - GLsizei texture_binding = 0; - GLsizei image_binding = 0; - std::array textures; - std::array images; - const auto prepare_stage{[&](size_t stage) { - buffer_cache.runtime.SetImagePointers(&textures[texture_binding], &images[image_binding]); - buffer_cache.BindHostStageBuffers(stage); - - texture_binding += num_texture_buffers[stage]; - image_binding += num_image_buffers[stage]; - - views_it += num_texture_buffers[stage]; - views_it += num_image_buffers[stage]; - - const auto& info{stage_infos[stage]}; - for (const auto& desc : info.texture_descriptors) { - for (u32 index = 0; index < desc.count; ++index) { - ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; - textures[texture_binding++] = image_view.Handle(desc.type); - } - } - for (const auto& desc : info.image_descriptors) { - for (u32 index = 0; index < desc.count; ++index) { - ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; - images[image_binding++] = image_view.Handle(desc.type); - } - } - }}; - if constexpr (Spec::enabled_stages[0]) { - prepare_stage(0); - } - if constexpr (Spec::enabled_stages[1]) { - prepare_stage(1); - } - if constexpr (Spec::enabled_stages[2]) { - prepare_stage(2); - } - if constexpr (Spec::enabled_stages[3]) { - prepare_stage(3); - } - if constexpr (Spec::enabled_stages[4]) { - prepare_stage(4); - } - if (texture_binding != 0) { - ASSERT(texture_binding == sampler_binding); - glBindTextures(0, texture_binding, textures.data()); - glBindSamplers(0, sampler_binding, samplers.data()); - } - if (image_binding != 0) { - glBindImageTextures(0, image_binding, images.data()); - } -} - -void GraphicsProgram::GenerateTransformFeedbackState( - const VideoCommon::TransformFeedbackState& xfb_state) { - // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal - // when this is required. - const auto& regs{maxwell3d.regs}; - - GLint* cursor{xfb_attribs.data()}; - GLint* current_stream{xfb_streams.data()}; - - for (size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) { - const auto& layout = regs.tfb_layouts[feedback]; - UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding"); - if (layout.varying_count == 0) { - continue; - } - *current_stream = static_cast(feedback); - if (current_stream != xfb_streams.data()) { - // When stepping one stream, push the expected token - cursor[0] = GL_NEXT_BUFFER_NV; - cursor[1] = 0; - cursor[2] = 0; - cursor += XFB_ENTRY_STRIDE; - } - ++current_stream; - - const auto& locations = regs.tfb_varying_locs[feedback]; - std::optional current_index; - for (u32 offset = 0; offset < layout.varying_count; ++offset) { - const u8 location = locations[offset]; - const u8 index = location / 4; - - if (current_index == index) { - // Increase number of components of the previous attachment - ++cursor[-2]; - continue; - } - current_index = index; - - std::tie(cursor[0], cursor[2]) = TransformFeedbackEnum(location); - cursor[1] = 1; - cursor += XFB_ENTRY_STRIDE; - } - } - num_xfb_attribs = static_cast((cursor - xfb_attribs.data()) / XFB_ENTRY_STRIDE); - num_xfb_strides = static_cast(current_stream - xfb_streams.data()); -} - -void GraphicsProgram::ConfigureTransformFeedbackImpl() const { - glTransformFeedbackStreamAttribsNV(num_xfb_attribs, xfb_attribs.data(), num_xfb_strides, - xfb_streams.data(), GL_INTERLEAVED_ATTRIBS); -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_graphics_program.h b/src/video_core/renderer_opengl/gl_graphics_program.h deleted file mode 100644 index 53a57ede5..000000000 --- a/src/video_core/renderer_opengl/gl_graphics_program.h +++ /dev/null @@ -1,118 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include - -#include "common/bit_field.h" -#include "common/common_types.h" -#include "shader_recompiler/shader_info.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/memory_manager.h" -#include "video_core/renderer_opengl/gl_buffer_cache.h" -#include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_texture_cache.h" -#include "video_core/transform_feedback.h" - -namespace OpenGL { - -class ProgramManager; - -using Maxwell = Tegra::Engines::Maxwell3D::Regs; - -struct GraphicsProgramKey { - std::array unique_hashes; - union { - u32 raw; - BitField<0, 1, u32> xfb_enabled; - BitField<1, 1, u32> early_z; - BitField<2, 4, Maxwell::PrimitiveTopology> gs_input_topology; - BitField<6, 2, Maxwell::TessellationPrimitive> tessellation_primitive; - BitField<8, 2, Maxwell::TessellationSpacing> tessellation_spacing; - BitField<10, 1, u32> tessellation_clockwise; - }; - std::array padding; - VideoCommon::TransformFeedbackState xfb_state; - - size_t Hash() const noexcept; - - bool operator==(const GraphicsProgramKey&) const noexcept; - - bool operator!=(const GraphicsProgramKey& rhs) const noexcept { - return !operator==(rhs); - } - - [[nodiscard]] size_t Size() const noexcept { - if (xfb_enabled != 0) { - return sizeof(GraphicsProgramKey); - } else { - return offsetof(GraphicsProgramKey, padding); - } - } -}; -static_assert(std::has_unique_object_representations_v); -static_assert(std::is_trivially_copyable_v); -static_assert(std::is_trivially_constructible_v); - -class GraphicsProgram { -public: - explicit GraphicsProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_, - Tegra::MemoryManager& gpu_memory_, - Tegra::Engines::Maxwell3D& maxwell3d_, - ProgramManager& program_manager_, StateTracker& state_tracker_, - OGLProgram program_, - std::array assembly_programs_, - const std::array& infos, - const VideoCommon::TransformFeedbackState* xfb_state); - - void Configure(bool is_indexed); - - void ConfigureTransformFeedback() const { - if (num_xfb_attribs != 0) { - ConfigureTransformFeedbackImpl(); - } - } - -private: - void GenerateTransformFeedbackState(const VideoCommon::TransformFeedbackState& xfb_state); - - void ConfigureTransformFeedbackImpl() const; - - TextureCache& texture_cache; - BufferCache& buffer_cache; - Tegra::MemoryManager& gpu_memory; - Tegra::Engines::Maxwell3D& maxwell3d; - ProgramManager& program_manager; - StateTracker& state_tracker; - - OGLProgram program; - std::array assembly_programs; - u32 enabled_stages_mask{}; - - std::array stage_infos{}; - std::array base_uniform_bindings{}; - std::array base_storage_bindings{}; - std::array num_texture_buffers{}; - std::array num_image_buffers{}; - - static constexpr std::size_t XFB_ENTRY_STRIDE = 3; - GLsizei num_xfb_attribs{}; - GLsizei num_xfb_strides{}; - std::array xfb_attribs{}; - std::array xfb_streams{}; -}; - -} // namespace OpenGL - -namespace std { -template <> -struct hash { - size_t operator()(const OpenGL::GraphicsProgramKey& k) const noexcept { - return k.Hash(); - } -}; -} // namespace std diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 51ff42ee9..72a6dfd2a 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -218,13 +218,13 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { SyncState(); - GraphicsProgram* const program{shader_cache.CurrentGraphicsProgram()}; + GraphicsPipeline* const pipeline{shader_cache.CurrentGraphicsPipeline()}; std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; - program->Configure(is_indexed); + pipeline->Configure(is_indexed); const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology); - BeginTransformFeedback(program, primitive_mode); + BeginTransformFeedback(pipeline, primitive_mode); const GLuint base_instance = static_cast(maxwell3d.regs.vb_base_instance); const GLsizei num_instances = @@ -271,7 +271,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { } void RasterizerOpenGL::DispatchCompute() { - ComputeProgram* const program{shader_cache.CurrentComputeProgram()}; + ComputePipeline* const program{shader_cache.CurrentComputePipeline()}; if (!program) { return; } @@ -996,7 +996,7 @@ void RasterizerOpenGL::SyncFramebufferSRGB() { oglEnable(GL_FRAMEBUFFER_SRGB, maxwell3d.regs.framebuffer_srgb); } -void RasterizerOpenGL::BeginTransformFeedback(GraphicsProgram* program, GLenum primitive_mode) { +void RasterizerOpenGL::BeginTransformFeedback(GraphicsPipeline* program, GLenum primitive_mode) { const auto& regs = maxwell3d.regs; if (regs.tfb_enabled == 0) { return; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 08f509c19..afd43b2ee 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -194,7 +194,7 @@ private: void SyncVertexInstances(); /// Begin a transform feedback - void BeginTransformFeedback(GraphicsProgram* program, GLenum primitive_mode); + void BeginTransformFeedback(GraphicsPipeline* pipeline, GLenum primitive_mode); /// End a transform feedback void EndTransformFeedback(); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index ceec83a8a..33757938a 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -152,7 +152,7 @@ GLenum AssemblyStage(size_t stage_index) { return GL_NONE; } -Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsProgramKey& key, +Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key, const Shader::IR::Program& program) { UNIMPLEMENTED_IF_MSG(key.xfb_enabled != 0, "Transform feedbacks"); @@ -282,7 +282,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo ShaderCache::~ShaderCache() = default; -GraphicsProgram* ShaderCache::CurrentGraphicsProgram() { +GraphicsPipeline* ShaderCache::CurrentGraphicsPipeline() { if (!RefreshStages(graphics_key.unique_hashes)) { return nullptr; } @@ -302,18 +302,18 @@ GraphicsProgram* ShaderCache::CurrentGraphicsProgram() { const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)}; auto& program{pair->second}; if (is_new) { - program = CreateGraphicsProgram(); + program = CreateGraphicsPipeline(); } return program.get(); } -ComputeProgram* ShaderCache::CurrentComputeProgram() { +ComputePipeline* ShaderCache::CurrentComputePipeline() { const VideoCommon::ShaderInfo* const shader{ComputeShader()}; if (!shader) { return nullptr; } const auto& qmd{kepler_compute.launch_description}; - const ComputeProgramKey key{ + const ComputePipelineKey key{ .unique_hash = shader->unique_hash, .shared_memory_size = qmd.shared_alloc, .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}, @@ -323,20 +323,20 @@ ComputeProgram* ShaderCache::CurrentComputeProgram() { if (!is_new) { return pipeline.get(); } - pipeline = CreateComputeProgram(key, shader); + pipeline = CreateComputePipeline(key, shader); return pipeline.get(); } -std::unique_ptr ShaderCache::CreateGraphicsProgram() { +std::unique_ptr ShaderCache::CreateGraphicsPipeline() { GraphicsEnvironments environments; GetGraphicsEnvironments(environments, graphics_key.unique_hashes); main_pools.ReleaseContents(); - return CreateGraphicsProgram(main_pools, graphics_key, environments.Span(), true); + return CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(), true); } -std::unique_ptr ShaderCache::CreateGraphicsProgram( - ShaderPools& pools, const GraphicsProgramKey& key, std::span envs, +std::unique_ptr ShaderCache::CreateGraphicsPipeline( + ShaderPools& pools, const GraphicsPipelineKey& key, std::span envs, bool build_in_parallel) { LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); size_t env_index{0}; @@ -382,27 +382,27 @@ std::unique_ptr ShaderCache::CreateGraphicsProgram( if (!device.UseAssemblyShaders()) { LinkProgram(source_program.handle); } - return std::make_unique( + return std::make_unique( texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker, std::move(source_program), std::move(assembly_programs), infos, key.xfb_enabled != 0 ? &key.xfb_state : nullptr); } -std::unique_ptr ShaderCache::CreateComputeProgram( - const ComputeProgramKey& key, const VideoCommon::ShaderInfo* shader) { +std::unique_ptr ShaderCache::CreateComputePipeline( + const ComputePipelineKey& key, const VideoCommon::ShaderInfo* shader) { const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; const auto& qmd{kepler_compute.launch_description}; ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start}; env.SetCachedSize(shader->size_bytes); main_pools.ReleaseContents(); - return CreateComputeProgram(main_pools, key, env, true); + return CreateComputePipeline(main_pools, key, env, true); } -std::unique_ptr ShaderCache::CreateComputeProgram(ShaderPools& pools, - const ComputeProgramKey& key, - Shader::Environment& env, - bool build_in_parallel) { +std::unique_ptr ShaderCache::CreateComputePipeline(ShaderPools& pools, + const ComputePipelineKey& key, + Shader::Environment& env, + bool build_in_parallel) { LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; @@ -418,9 +418,9 @@ std::unique_ptr ShaderCache::CreateComputeProgram(ShaderPools& p AddShader(GL_COMPUTE_SHADER, source_program.handle, code); LinkProgram(source_program.handle); } - return std::make_unique(texture_cache, buffer_cache, gpu_memory, kepler_compute, - program_manager, program.info, - std::move(source_program), std::move(asm_program)); + return std::make_unique(texture_cache, buffer_cache, gpu_memory, + kepler_compute, program_manager, program.info, + std::move(source_program), std::move(asm_program)); } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index b49cd0ac7..a56559ea9 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -15,8 +15,8 @@ #include "shader_recompiler/frontend/maxwell/control_flow.h" #include "shader_recompiler/object_pool.h" #include "video_core/engines/shader_type.h" -#include "video_core/renderer_opengl/gl_compute_program.h" -#include "video_core/renderer_opengl/gl_graphics_program.h" +#include "video_core/renderer_opengl/gl_compute_pipeline.h" +#include "video_core/renderer_opengl/gl_graphics_pipeline.h" #include "video_core/shader_cache.h" namespace Tegra { @@ -55,24 +55,24 @@ public: ProgramManager& program_manager_, StateTracker& state_tracker_); ~ShaderCache(); - [[nodiscard]] GraphicsProgram* CurrentGraphicsProgram(); + [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline(); - [[nodiscard]] ComputeProgram* CurrentComputeProgram(); + [[nodiscard]] ComputePipeline* CurrentComputePipeline(); private: - std::unique_ptr CreateGraphicsProgram(); + std::unique_ptr CreateGraphicsPipeline(); - std::unique_ptr CreateGraphicsProgram( - ShaderPools& pools, const GraphicsProgramKey& key, + std::unique_ptr CreateGraphicsPipeline( + ShaderPools& pools, const GraphicsPipelineKey& key, std::span envs, bool build_in_parallel); - std::unique_ptr CreateComputeProgram(const ComputeProgramKey& key, - const VideoCommon::ShaderInfo* shader); + std::unique_ptr CreateComputePipeline(const ComputePipelineKey& key, + const VideoCommon::ShaderInfo* shader); - std::unique_ptr CreateComputeProgram(ShaderPools& pools, - const ComputeProgramKey& key, - Shader::Environment& env, - bool build_in_parallel); + std::unique_ptr CreateComputePipeline(ShaderPools& pools, + const ComputePipelineKey& key, + Shader::Environment& env, + bool build_in_parallel); Core::Frontend::EmuWindow& emu_window; const Device& device; @@ -81,11 +81,11 @@ private: ProgramManager& program_manager; StateTracker& state_tracker; - GraphicsProgramKey graphics_key{}; + GraphicsPipelineKey graphics_key{}; ShaderPools main_pools; - std::unordered_map> graphics_cache; - std::unordered_map> compute_cache; + std::unordered_map> graphics_cache; + std::unordered_map> compute_cache; Shader::Profile profile; }; diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 5ec57d707..88b734bcb 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -9,8 +9,8 @@ #include -#include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_device.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" namespace OpenGL { -- cgit v1.2.3 From a49532c8eb29807814214ab326ff970f5a964a03 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 25 May 2021 18:58:52 -0300 Subject: video_core,shader: Clang-format fixes --- src/video_core/renderer_vulkan/renderer_vulkan.cpp | 13 +++++++++---- src/video_core/renderer_vulkan/vk_scheduler.h | 2 +- 2 files changed, 10 insertions(+), 5 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 7e39b65bd..d50647ba7 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -97,14 +97,19 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, Core::Frontend::EmuWindow& emu_window, Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, std::unique_ptr context_) try - : RendererBase(emu_window, std::move(context_)), telemetry_session(telemetry_session_), - cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary()), + : RendererBase(emu_window, std::move(context_)), + telemetry_session(telemetry_session_), + cpu_memory(cpu_memory_), + gpu(gpu_), + library(OpenLibrary()), instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, true, Settings::values.renderer_debug.GetValue())), debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr), surface(CreateSurface(instance, render_window)), - device(CreateDevice(instance, dld, *surface)), memory_allocator(device, false), - state_tracker(gpu), scheduler(device, state_tracker), + device(CreateDevice(instance, dld, *surface)), + memory_allocator(device, false), + state_tracker(gpu), + scheduler(device, state_tracker), swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width, render_window.GetFramebufferLayout().height, false), blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, scheduler, diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 6600fb142..cf39a2363 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -6,11 +6,11 @@ #include #include -#include #include #include #include #include +#include #include "common/alignment.h" #include "common/common_types.h" -- cgit v1.2.3 From a41b2ed3917f9ca5af30773e4671f4829380dceb Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 25 May 2021 20:39:55 -0300 Subject: gl_shader_cache: Add disk shader cache --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 4 +- src/video_core/renderer_opengl/gl_shader_cache.cpp | 113 +++++++++++++++++++-- src/video_core/renderer_opengl/gl_shader_cache.h | 10 +- 3 files changed, 116 insertions(+), 11 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 72a6dfd2a..eec01e8c2 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -140,7 +140,9 @@ void RasterizerOpenGL::SyncVertexInstances() { } void RasterizerOpenGL::LoadDiskResources(u64 title_id, std::stop_token stop_loading, - const VideoCore::DiskResourceLoadCallback& callback) {} + const VideoCore::DiskResourceLoadCallback& callback) { + shader_cache.LoadDiskResources(title_id, stop_loading, callback); +} void RasterizerOpenGL::Clear() { MICROPROFILE_SCOPE(OpenGL_Clears); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 33757938a..3aa5ac31d 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -3,17 +3,19 @@ // Refer to the license.txt file included. #include +#include #include #include -#include #include #include -#include #include "common/alignment.h" #include "common/assert.h" +#include "common/fs/fs.h" +#include "common/fs/path_util.h" #include "common/logging/log.h" #include "common/scope_exit.h" +#include "common/thread_worker.h" #include "core/core.h" #include "core/frontend/emu_window.h" #include "shader_recompiler/backend/glasm/emit_glasm.h" @@ -40,6 +42,8 @@ using Shader::Backend::GLASM::EmitGLASM; using Shader::Backend::SPIRV::EmitSPIRV; using Shader::Maxwell::TranslateProgram; using VideoCommon::ComputeEnvironment; +using VideoCommon::FileEnvironment; +using VideoCommon::GenericEnvironment; using VideoCommon::GraphicsEnvironment; template @@ -154,8 +158,6 @@ GLenum AssemblyStage(size_t stage_index) { Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key, const Shader::IR::Program& program) { - UNIMPLEMENTED_IF_MSG(key.xfb_enabled != 0, "Transform feedbacks"); - Shader::RuntimeInfo info; switch (program.stage) { case Shader::Stage::TessellationEval: @@ -282,6 +284,89 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo ShaderCache::~ShaderCache() = default; +void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, + const VideoCore::DiskResourceLoadCallback& callback) { + if (title_id == 0) { + return; + } + auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)}; + auto base_dir{shader_dir / "new_opengl"}; + auto transferable_dir{base_dir / "transferable"}; + auto precompiled_dir{base_dir / "precompiled"}; + if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir) || + !Common::FS::CreateDir(transferable_dir) || !Common::FS::CreateDir(precompiled_dir)) { + LOG_ERROR(Common_Filesystem, "Failed to create pipeline cache directories"); + return; + } + shader_cache_filename = transferable_dir / fmt::format("{:016x}.bin", title_id); + + struct Context { + explicit Context(Core::Frontend::EmuWindow& emu_window) + : gl_context{emu_window.CreateSharedContext()}, scoped{*gl_context} {} + + std::unique_ptr gl_context; + Core::Frontend::GraphicsContext::Scoped scoped; + ShaderPools pools; + }; + Common::StatefulThreadWorker workers( + std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:ShaderBuilder", + [this] { return Context{emu_window}; }); + + struct { + std::mutex mutex; + size_t total{0}; + size_t built{0}; + bool has_loaded{false}; + } state; + + const auto load_compute{[&](std::ifstream& file, FileEnvironment env) { + ComputePipelineKey key; + file.read(reinterpret_cast(&key), sizeof(key)); + workers.QueueWork( + [this, key, env = std::move(env), &state, &callback](Context* ctx) mutable { + ctx->pools.ReleaseContents(); + auto pipeline{CreateComputePipeline(ctx->pools, key, env, false)}; + + std::lock_guard lock{state.mutex}; + compute_cache.emplace(key, std::move(pipeline)); + ++state.built; + if (state.has_loaded) { + callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); + } + }); + ++state.total; + }}; + const auto load_graphics{[&](std::ifstream& file, std::vector envs) { + GraphicsPipelineKey key; + file.read(reinterpret_cast(&key), sizeof(key)); + workers.QueueWork( + [this, key, envs = std::move(envs), &state, &callback](Context* ctx) mutable { + boost::container::static_vector env_ptrs; + for (auto& env : envs) { + env_ptrs.push_back(&env); + } + ctx->pools.ReleaseContents(); + auto pipeline{CreateGraphicsPipeline(ctx->pools, key, MakeSpan(env_ptrs), false)}; + + std::lock_guard lock{state.mutex}; + graphics_cache.emplace(key, std::move(pipeline)); + ++state.built; + if (state.has_loaded) { + callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); + } + }); + ++state.total; + }}; + VideoCommon::LoadPipelines(stop_loading, shader_cache_filename, load_compute, load_graphics); + + std::unique_lock lock{state.mutex}; + callback(VideoCore::LoadCallbackStage::Build, 0, state.total); + state.has_loaded = true; + lock.unlock(); + + workers.WaitForRequests(); +} + GraphicsPipeline* ShaderCache::CurrentGraphicsPipeline() { if (!RefreshStages(graphics_key.unique_hashes)) { return nullptr; @@ -332,7 +417,18 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline() { GetGraphicsEnvironments(environments, graphics_key.unique_hashes); main_pools.ReleaseContents(); - return CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(), true); + auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(), true)}; + if (shader_cache_filename.empty()) { + return pipeline; + } + boost::container::static_vector env_ptrs; + for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + if (graphics_key.unique_hashes[index] != 0) { + env_ptrs.push_back(&environments.envs[index]); + } + } + VideoCommon::SerializePipeline(graphics_key, env_ptrs, shader_cache_filename); + return pipeline; } std::unique_ptr ShaderCache::CreateGraphicsPipeline( @@ -396,7 +492,12 @@ std::unique_ptr ShaderCache::CreateComputePipeline( env.SetCachedSize(shader->size_bytes); main_pools.ReleaseContents(); - return CreateComputePipeline(main_pools, key, env, true); + auto pipeline{CreateComputePipeline(main_pools, key, env, true)}; + if (!shader_cache_filename.empty()) { + VideoCommon::SerializePipeline(key, std::array{&env}, + shader_cache_filename); + } + return pipeline; } std::unique_ptr ShaderCache::CreateComputePipeline(ShaderPools& pools, diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index a56559ea9..16175318b 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -5,6 +5,8 @@ #pragma once #include +#include +#include #include #include @@ -23,10 +25,6 @@ namespace Tegra { class MemoryManager; } -namespace Core::Frontend { -class EmuWindow; -} - namespace OpenGL { class Device; @@ -55,6 +53,9 @@ public: ProgramManager& program_manager_, StateTracker& state_tracker_); ~ShaderCache(); + void LoadDiskResources(u64 title_id, std::stop_token stop_loading, + const VideoCore::DiskResourceLoadCallback& callback); + [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline(); [[nodiscard]] ComputePipeline* CurrentComputePipeline(); @@ -88,6 +89,7 @@ private: std::unordered_map> compute_cache; Shader::Profile profile; + std::filesystem::path shader_cache_filename; }; } // namespace OpenGL -- cgit v1.2.3 From adb591a757ccb289634920d51cf519b515ca32b6 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 26 May 2021 18:32:59 -0300 Subject: glasm: Use storage buffers instead of global memory when possible --- src/video_core/renderer_opengl/gl_buffer_cache.cpp | 26 +++++++------- src/video_core/renderer_opengl/gl_buffer_cache.h | 6 ++++ .../renderer_opengl/gl_compute_pipeline.cpp | 42 +++++++++++++--------- .../renderer_opengl/gl_compute_pipeline.h | 12 +++++-- src/video_core/renderer_opengl/gl_device.cpp | 18 +--------- src/video_core/renderer_opengl/gl_device.h | 6 +++- .../renderer_opengl/gl_graphics_pipeline.cpp | 19 +++++++--- .../renderer_opengl/gl_graphics_pipeline.h | 12 +++++-- src/video_core/renderer_opengl/gl_rasterizer.cpp | 13 +++---- src/video_core/renderer_opengl/gl_rasterizer.h | 3 +- src/video_core/renderer_opengl/gl_shader_cache.cpp | 30 ++++++++++++---- 11 files changed, 120 insertions(+), 67 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 2d0ef1307..334ed470f 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -195,7 +195,12 @@ void BufferCacheRuntime::BindComputeUniformBuffer(u32 binding_index, Buffer& buf void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer, u32 offset, u32 size, bool is_written) { - if (use_assembly_shaders) { + if (use_storage_buffers) { + const GLuint base_binding = graphics_base_storage_bindings[stage]; + const GLuint binding = base_binding + binding_index; + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(), + static_cast(offset), static_cast(size)); + } else { const BindlessSSBO ssbo{ .address = buffer.HostGpuAddr() + offset, .length = static_cast(size), @@ -204,17 +209,19 @@ void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buff buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY); glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1, reinterpret_cast(&ssbo)); - } else { - const GLuint base_binding = graphics_base_storage_bindings[stage]; - const GLuint binding = base_binding + binding_index; - glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(), - static_cast(offset), static_cast(size)); } } void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset, u32 size, bool is_written) { - if (use_assembly_shaders) { + if (use_storage_buffers) { + if (size != 0) { + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, buffer.Handle(), + static_cast(offset), static_cast(size)); + } else { + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, 0, 0, 0); + } + } else { const BindlessSSBO ssbo{ .address = buffer.HostGpuAddr() + offset, .length = static_cast(size), @@ -223,11 +230,6 @@ void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buf buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY); glProgramLocalParametersI4uivNV(GL_COMPUTE_PROGRAM_NV, binding_index, 1, reinterpret_cast(&ssbo)); - } else if (size == 0) { - glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, 0, 0, 0); - } else { - glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, buffer.Handle(), - static_cast(offset), static_cast(size)); } } diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 4986c65fd..bc16abafb 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -147,6 +147,10 @@ public: image_handles = image_handles_; } + void SetEnableStorageBuffers(bool use_storage_buffers_) { + use_storage_buffers = use_storage_buffers_; + } + private: static constexpr std::array PABO_LUT{ GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, @@ -160,6 +164,8 @@ private: bool use_assembly_shaders = false; bool has_unified_vertex_buffers = false; + bool use_storage_buffers = false; + u32 max_attributes = 0; std::array graphics_base_uniform_bindings{}; diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp index 700ebd8b8..5cf5f97a9 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp @@ -17,6 +17,15 @@ using VideoCommon::ImageId; constexpr u32 MAX_TEXTURES = 64; constexpr u32 MAX_IMAGES = 16; +template +u32 AccumulateCount(const Range& range) { + u32 num{}; + for (const auto& desc : range) { + num += desc.count; + } + return num; +} + size_t ComputePipelineKey::Hash() const noexcept { return static_cast( Common::CityHash64(reinterpret_cast(this), sizeof *this)); @@ -26,31 +35,31 @@ bool ComputePipelineKey::operator==(const ComputePipelineKey& rhs) const noexcep return std::memcmp(this, &rhs, sizeof *this) == 0; } -ComputePipeline::ComputePipeline(TextureCache& texture_cache_, BufferCache& buffer_cache_, - Tegra::MemoryManager& gpu_memory_, +ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cache_, + BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::KeplerCompute& kepler_compute_, ProgramManager& program_manager_, const Shader::Info& info_, OGLProgram source_program_, OGLAssemblyProgram assembly_program_) : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_}, source_program{std::move(source_program_)}, assembly_program{std::move(assembly_program_)} { - for (const auto& desc : info.texture_buffer_descriptors) { - num_texture_buffers += desc.count; - } - for (const auto& desc : info.image_buffer_descriptors) { - num_image_buffers += desc.count; - } - u32 num_textures = num_texture_buffers; - for (const auto& desc : info.texture_descriptors) { - num_textures += desc.count; - } + + num_texture_buffers = AccumulateCount(info.texture_buffer_descriptors); + num_image_buffers = AccumulateCount(info.image_buffer_descriptors); + + const u32 num_textures{num_texture_buffers + AccumulateCount(info.texture_descriptors)}; ASSERT(num_textures <= MAX_TEXTURES); - u32 num_images = num_image_buffers; - for (const auto& desc : info.image_descriptors) { - num_images += desc.count; - } + const u32 num_images{num_image_buffers + AccumulateCount(info.image_descriptors)}; ASSERT(num_images <= MAX_IMAGES); + + const bool is_glasm{assembly_program.handle != 0}; + const u32 num_storage_buffers{AccumulateCount(info.storage_buffers_descriptors)}; + use_storage_buffers = + !is_glasm || num_storage_buffers < device.GetMaxGLASMStorageBufferBlocks(); + writes_global_memory = !use_storage_buffers && + std::ranges::any_of(info.storage_buffers_descriptors, + [](const auto& desc) { return desc.is_written; }); } void ComputePipeline::Configure() { @@ -150,6 +159,7 @@ void ComputePipeline::Configure() { buffer_cache.UpdateComputeBuffers(); + buffer_cache.runtime.SetEnableStorageBuffers(use_storage_buffers); buffer_cache.runtime.SetImagePointers(textures.data(), images.data()); buffer_cache.BindHostComputeBuffers(); diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.h b/src/video_core/renderer_opengl/gl_compute_pipeline.h index e3b94e2f3..dd6b62ef2 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.h +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.h @@ -28,6 +28,7 @@ struct Info; namespace OpenGL { +class Device; class ProgramManager; struct ComputePipelineKey { @@ -49,14 +50,18 @@ static_assert(std::is_trivially_constructible_v); class ComputePipeline { public: - explicit ComputePipeline(TextureCache& texture_cache_, BufferCache& buffer_cache_, - Tegra::MemoryManager& gpu_memory_, + explicit ComputePipeline(const Device& device, TextureCache& texture_cache_, + BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::KeplerCompute& kepler_compute_, ProgramManager& program_manager_, const Shader::Info& info_, OGLProgram source_program_, OGLAssemblyProgram assembly_program_); void Configure(); + [[nodiscard]] bool WritesGlobalMemory() const noexcept { + return writes_global_memory; + } + private: TextureCache& texture_cache; BufferCache& buffer_cache; @@ -70,6 +75,9 @@ private: u32 num_texture_buffers{}; u32 num_image_buffers{}; + + bool use_storage_buffers{}; + bool writes_global_memory{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 18bbc4c1f..01da2bb57 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -135,13 +135,13 @@ Device::Device() { "Beta driver 443.24 is known to have issues. There might be performance issues."); disable_fast_buffer_sub_data = true; } - max_uniform_buffers = BuildMaxUniformBuffers(); uniform_buffer_alignment = GetInteger(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); shader_storage_alignment = GetInteger(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); max_vertex_attributes = GetInteger(GL_MAX_VERTEX_ATTRIBS); max_varyings = GetInteger(GL_MAX_VARYING_VECTORS); max_compute_shared_memory_size = GetInteger(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE); + max_glasm_storage_buffer_blocks = GetInteger(GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS); has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group && GLAD_GL_NV_shader_thread_shuffle; has_shader_ballot = GLAD_GL_ARB_shader_ballot; @@ -236,22 +236,6 @@ std::string Device::GetVendorName() const { return vendor_name; } -Device::Device(std::nullptr_t) { - max_uniform_buffers.fill(std::numeric_limits::max()); - uniform_buffer_alignment = 4; - shader_storage_alignment = 4; - max_vertex_attributes = 16; - max_varyings = 15; - max_compute_shared_memory_size = 0x10000; - has_warp_intrinsics = true; - has_shader_ballot = true; - has_vertex_viewport_layer = true; - has_image_load_formatted = true; - has_texture_shadow_lod = true; - has_variable_aoffi = true; - has_depth_buffer_float = true; -} - bool Device::TestVariableAoffi() { return TestProgram(R"(#version 430 core // This is a unit test, please ignore me on apitrace bug reports. diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 152a3acd3..d67f5693c 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -13,7 +13,6 @@ namespace OpenGL { class Device { public: explicit Device(); - explicit Device(std::nullptr_t); [[nodiscard]] std::string GetVendorName() const; @@ -41,6 +40,10 @@ public: return max_compute_shared_memory_size; } + u32 GetMaxGLASMStorageBufferBlocks() const { + return max_glasm_storage_buffer_blocks; + } + bool HasWarpIntrinsics() const { return has_warp_intrinsics; } @@ -124,6 +127,7 @@ private: u32 max_vertex_attributes{}; u32 max_varyings{}; u32 max_compute_shared_memory_size{}; + u32 max_glasm_storage_buffer_blocks{}; bool has_warp_intrinsics{}; bool has_shader_ballot{}; bool has_vertex_viewport_layer{}; diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index 32df35202..19d85c482 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -25,7 +25,7 @@ constexpr u32 MAX_TEXTURES = 64; constexpr u32 MAX_IMAGES = 8; template -u32 AccumulateCount(Range&& range) { +u32 AccumulateCount(const Range& range) { u32 num{}; for (const auto& desc : range) { num += desc.count; @@ -70,8 +70,8 @@ bool GraphicsPipelineKey::operator==(const GraphicsPipelineKey& rhs) const noexc return std::memcmp(this, &rhs, Size()) == 0; } -GraphicsPipeline::GraphicsPipeline(TextureCache& texture_cache_, BufferCache& buffer_cache_, - Tegra::MemoryManager& gpu_memory_, +GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_cache_, + BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, ProgramManager& program_manager_, StateTracker& state_tracker_, OGLProgram program_, @@ -90,6 +90,7 @@ GraphicsPipeline::GraphicsPipeline(TextureCache& texture_cache_, BufferCache& bu } u32 num_textures{}; u32 num_images{}; + u32 num_storage_buffers{}; for (size_t stage = 0; stage < base_uniform_bindings.size(); ++stage) { const auto& info{stage_infos[stage]}; if (stage < 4) { @@ -109,11 +110,20 @@ GraphicsPipeline::GraphicsPipeline(TextureCache& texture_cache_, BufferCache& bu num_textures += AccumulateCount(info.texture_descriptors); num_images += AccumulateCount(info.image_descriptors); + num_storage_buffers += AccumulateCount(info.storage_buffers_descriptors); + + writes_global_memory |= std::ranges::any_of( + info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; }); } ASSERT(num_textures <= MAX_TEXTURES); ASSERT(num_images <= MAX_IMAGES); - if (assembly_programs[0].handle != 0 && xfb_state) { + const bool assembly_shaders{assembly_programs[0].handle != 0}; + use_storage_buffers = + !assembly_shaders || num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks(); + writes_global_memory &= !use_storage_buffers; + + if (assembly_shaders && xfb_state) { GenerateTransformFeedbackState(*xfb_state); } } @@ -137,6 +147,7 @@ void GraphicsPipeline::Configure(bool is_indexed) { buffer_cache.runtime.SetBaseUniformBindings(base_uniform_bindings); buffer_cache.runtime.SetBaseStorageBindings(base_storage_bindings); + buffer_cache.runtime.SetEnableStorageBuffers(use_storage_buffers); const auto& regs{maxwell3d.regs}; const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index 62f700cf5..c1113e180 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -20,6 +20,7 @@ namespace OpenGL { +class Device; class ProgramManager; using Maxwell = Tegra::Engines::Maxwell3D::Regs; @@ -60,8 +61,8 @@ static_assert(std::is_trivially_constructible_v); class GraphicsPipeline { public: - explicit GraphicsPipeline(TextureCache& texture_cache_, BufferCache& buffer_cache_, - Tegra::MemoryManager& gpu_memory_, + explicit GraphicsPipeline(const Device& device, TextureCache& texture_cache_, + BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, ProgramManager& program_manager_, StateTracker& state_tracker_, OGLProgram program_, @@ -77,6 +78,10 @@ public: } } + [[nodiscard]] bool WritesGlobalMemory() const noexcept { + return writes_global_memory; + } + private: void GenerateTransformFeedbackState(const VideoCommon::TransformFeedbackState& xfb_state); @@ -99,6 +104,9 @@ private: std::array num_texture_buffers{}; std::array num_image_buffers{}; + bool use_storage_buffers{}; + bool writes_global_memory{}; + static constexpr std::size_t XFB_ENTRY_STRIDE = 3; GLsizei num_xfb_attribs{}; GLsizei num_xfb_strides{}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index eec01e8c2..5d4e80364 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -268,19 +268,21 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { EndTransformFeedback(); ++num_queued_commands; + has_written_global_memory |= pipeline->WritesGlobalMemory(); gpu.TickWork(); } void RasterizerOpenGL::DispatchCompute() { - ComputePipeline* const program{shader_cache.CurrentComputePipeline()}; - if (!program) { + ComputePipeline* const pipeline{shader_cache.CurrentComputePipeline()}; + if (!pipeline) { return; } - program->Configure(); + pipeline->Configure(); const auto& qmd{kepler_compute.launch_description}; glDispatchCompute(qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z); ++num_queued_commands; + has_written_global_memory |= pipeline->WritesGlobalMemory(); } void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) { @@ -449,9 +451,8 @@ void RasterizerOpenGL::FlushCommands() { // Make sure memory stored from the previous GL command stream is visible // This is only needed on assembly shaders where we write to GPU memory with raw pointers - // TODO: Call this only when NV_shader_buffer_load or NV_shader_buffer_store have been used - // and prefer using NV_shader_storage_buffer_object when possible - if (Settings::values.use_assembly_shaders.GetValue()) { + if (has_written_global_memory) { + has_written_global_memory = false; glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); } glFlush(); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index afd43b2ee..d0397b745 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -225,7 +225,8 @@ private: std::array image_handles{}; /// Number of commands queued to the OpenGL driver. Resetted on flush. - std::size_t num_queued_commands = 0; + size_t num_queued_commands = 0; + bool has_written_global_memory = false; u32 last_clip_distance_mask = 0; }; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 3aa5ac31d..287f497b5 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -157,7 +157,8 @@ GLenum AssemblyStage(size_t stage_index) { } Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key, - const Shader::IR::Program& program) { + const Shader::IR::Program& program, + bool glasm_use_storage_buffers) { Shader::RuntimeInfo info; switch (program.stage) { case Shader::Stage::TessellationEval: @@ -220,6 +221,7 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key, info.input_topology = Shader::InputTopology::TrianglesAdjacency; break; } + info.glasm_use_storage_buffers = glasm_use_storage_buffers; return info; } @@ -435,7 +437,8 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( ShaderPools& pools, const GraphicsPipelineKey& key, std::span envs, bool build_in_parallel) { LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); - size_t env_index{0}; + size_t env_index{}; + u32 total_storage_buffers{}; std::array programs; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { if (key.unique_hashes[index] == 0) { @@ -447,7 +450,14 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( const u32 cfg_offset{static_cast(env.StartAddress() + sizeof(Shader::ProgramHeader))}; Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset); programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); + + for (const auto& desc : programs[index].info.storage_buffers_descriptors) { + total_storage_buffers += desc.count; + } } + const u32 glasm_storage_buffer_limit{device.GetMaxGLASMStorageBufferBlocks()}; + const bool glasm_use_storage_buffers{total_storage_buffers <= glasm_storage_buffer_limit}; + std::array infos{}; OGLProgram source_program; @@ -466,7 +476,7 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( const size_t stage_index{index - 1}; infos[stage_index] = &program.info; - const Shader::RuntimeInfo runtime_info{MakeRuntimeInfo(key, program)}; + const auto runtime_info{MakeRuntimeInfo(key, program, glasm_use_storage_buffers)}; if (device.UseAssemblyShaders()) { const std::string code{EmitGLASM(profile, runtime_info, program, binding)}; assembly_programs[stage_index] = CompileProgram(code, AssemblyStage(stage_index)); @@ -479,7 +489,7 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( LinkProgram(source_program.handle); } return std::make_unique( - texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker, + device, texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker, std::move(source_program), std::move(assembly_programs), infos, key.xfb_enabled != 0 ? &key.xfb_state : nullptr); } @@ -508,10 +518,18 @@ std::unique_ptr ShaderCache::CreateComputePipeline(ShaderPools& Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; + + u32 num_storage_buffers{}; + for (const auto& desc : program.info.storage_buffers_descriptors) { + num_storage_buffers += desc.count; + } + Shader::RuntimeInfo info; + info.glasm_use_storage_buffers = num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks(); + OGLAssemblyProgram asm_program; OGLProgram source_program; if (device.UseAssemblyShaders()) { - const std::string code{EmitGLASM(profile, program)}; + const std::string code{EmitGLASM(profile, info, program)}; asm_program = CompileProgram(code, GL_COMPUTE_PROGRAM_NV); } else { const std::vector code{EmitSPIRV(profile, program)}; @@ -519,7 +537,7 @@ std::unique_ptr ShaderCache::CreateComputePipeline(ShaderPools& AddShader(GL_COMPUTE_SHADER, source_program.handle, code); LinkProgram(source_program.handle); } - return std::make_unique(texture_cache, buffer_cache, gpu_memory, + return std::make_unique(device, texture_cache, buffer_cache, gpu_memory, kepler_compute, program_manager, program.info, std::move(source_program), std::move(asm_program)); } -- cgit v1.2.3 From 3b595fe8b28001eed4a936e2a7b465bd67dcc4b7 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 27 May 2021 16:47:49 -0300 Subject: glasm: Prepare XFB from state instead of global registers --- src/video_core/renderer_opengl/gl_graphics_pipeline.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index 19d85c482..38ec88b13 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -362,13 +362,11 @@ void GraphicsPipeline::GenerateTransformFeedbackState( const VideoCommon::TransformFeedbackState& xfb_state) { // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal // when this is required. - const auto& regs{maxwell3d.regs}; - GLint* cursor{xfb_attribs.data()}; GLint* current_stream{xfb_streams.data()}; for (size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) { - const auto& layout = regs.tfb_layouts[feedback]; + const auto& layout = xfb_state.layouts[feedback]; UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding"); if (layout.varying_count == 0) { continue; @@ -383,7 +381,7 @@ void GraphicsPipeline::GenerateTransformFeedbackState( } ++current_stream; - const auto& locations = regs.tfb_varying_locs[feedback]; + const auto& locations = xfb_state.varyings[feedback]; std::optional current_index; for (u32 offset = 0; offset < layout.varying_count; ++offset) { const u8 location = locations[offset]; -- cgit v1.2.3 From b7764c3a796e53ac74009bc7d7cd153c64b6d743 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 27 May 2021 17:51:00 -0300 Subject: shader: Handle host exceptions --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 4 +- src/video_core/renderer_opengl/gl_shader_cache.cpp | 43 +++++++++++++--------- src/video_core/renderer_opengl/gl_shader_cache.h | 5 +-- .../renderer_vulkan/vk_pipeline_cache.cpp | 35 ++++++++++++------ 4 files changed, 55 insertions(+), 32 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 5d4e80364..54696d97d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -221,7 +221,9 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { SyncState(); GraphicsPipeline* const pipeline{shader_cache.CurrentGraphicsPipeline()}; - + if (!pipeline) { + return; + } std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; pipeline->Configure(is_indexed); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 287f497b5..7d2ec4efa 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -45,6 +45,7 @@ using VideoCommon::ComputeEnvironment; using VideoCommon::FileEnvironment; using VideoCommon::GenericEnvironment; using VideoCommon::GraphicsEnvironment; +using VideoCommon::SerializePipeline; template auto MakeSpan(Container& container) { @@ -327,10 +328,11 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, workers.QueueWork( [this, key, env = std::move(env), &state, &callback](Context* ctx) mutable { ctx->pools.ReleaseContents(); - auto pipeline{CreateComputePipeline(ctx->pools, key, env, false)}; - + auto pipeline{CreateComputePipeline(ctx->pools, key, env)}; std::lock_guard lock{state.mutex}; - compute_cache.emplace(key, std::move(pipeline)); + if (pipeline) { + compute_cache.emplace(key, std::move(pipeline)); + } ++state.built; if (state.has_loaded) { callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); @@ -348,10 +350,11 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, env_ptrs.push_back(&env); } ctx->pools.ReleaseContents(); - auto pipeline{CreateGraphicsPipeline(ctx->pools, key, MakeSpan(env_ptrs), false)}; - + auto pipeline{CreateGraphicsPipeline(ctx->pools, key, MakeSpan(env_ptrs))}; std::lock_guard lock{state.mutex}; - graphics_cache.emplace(key, std::move(pipeline)); + if (pipeline) { + graphics_cache.emplace(key, std::move(pipeline)); + } ++state.built; if (state.has_loaded) { callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); @@ -419,8 +422,8 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline() { GetGraphicsEnvironments(environments, graphics_key.unique_hashes); main_pools.ReleaseContents(); - auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(), true)}; - if (shader_cache_filename.empty()) { + auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, environments.Span())}; + if (!pipeline || shader_cache_filename.empty()) { return pipeline; } boost::container::static_vector env_ptrs; @@ -429,13 +432,13 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline() { env_ptrs.push_back(&environments.envs[index]); } } - VideoCommon::SerializePipeline(graphics_key, env_ptrs, shader_cache_filename); + SerializePipeline(graphics_key, env_ptrs, shader_cache_filename); return pipeline; } std::unique_ptr ShaderCache::CreateGraphicsPipeline( - ShaderPools& pools, const GraphicsPipelineKey& key, std::span envs, - bool build_in_parallel) { + ShaderPools& pools, const GraphicsPipelineKey& key, + std::span envs) try { LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); size_t env_index{}; u32 total_storage_buffers{}; @@ -492,6 +495,10 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( device, texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker, std::move(source_program), std::move(assembly_programs), infos, key.xfb_enabled != 0 ? &key.xfb_state : nullptr); + +} catch (Shader::Exception& exception) { + LOG_ERROR(Render_OpenGL, "{}", exception.what()); + return nullptr; } std::unique_ptr ShaderCache::CreateComputePipeline( @@ -502,18 +509,17 @@ std::unique_ptr ShaderCache::CreateComputePipeline( env.SetCachedSize(shader->size_bytes); main_pools.ReleaseContents(); - auto pipeline{CreateComputePipeline(main_pools, key, env, true)}; - if (!shader_cache_filename.empty()) { - VideoCommon::SerializePipeline(key, std::array{&env}, - shader_cache_filename); + auto pipeline{CreateComputePipeline(main_pools, key, env)}; + if (!pipeline || shader_cache_filename.empty()) { + return pipeline; } + SerializePipeline(key, std::array{&env}, shader_cache_filename); return pipeline; } std::unique_ptr ShaderCache::CreateComputePipeline(ShaderPools& pools, const ComputePipelineKey& key, - Shader::Environment& env, - bool build_in_parallel) { + Shader::Environment& env) try { LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; @@ -540,6 +546,9 @@ std::unique_ptr ShaderCache::CreateComputePipeline(ShaderPools& return std::make_unique(device, texture_cache, buffer_cache, gpu_memory, kepler_compute, program_manager, program.info, std::move(source_program), std::move(asm_program)); +} catch (Shader::Exception& exception) { + LOG_ERROR(Render_OpenGL, "{}", exception.what()); + return nullptr; } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 16175318b..cf74d34e4 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -65,15 +65,14 @@ private: std::unique_ptr CreateGraphicsPipeline( ShaderPools& pools, const GraphicsPipelineKey& key, - std::span envs, bool build_in_parallel); + std::span envs); std::unique_ptr CreateComputePipeline(const ComputePipelineKey& key, const VideoCommon::ShaderInfo* shader); std::unique_ptr CreateComputePipeline(ShaderPools& pools, const ComputePipelineKey& key, - Shader::Environment& env, - bool build_in_parallel); + Shader::Environment& env); Core::Frontend::EmuWindow& emu_window; const Device& device; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index f86bf9c30..b6998e37c 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -303,6 +303,9 @@ GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { if (is_new) { pipeline = CreateGraphicsPipeline(); } + if (!pipeline) { + return nullptr; + } if (current_pipeline) { current_pipeline->AddTransition(pipeline.get()); } @@ -362,9 +365,10 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading workers.QueueWork([this, key, env = std::move(env), &state, &callback]() mutable { ShaderPools pools; auto pipeline{CreateComputePipeline(pools, key, env, false)}; - std::lock_guard lock{state.mutex}; - compute_cache.emplace(key, std::move(pipeline)); + if (pipeline) { + compute_cache.emplace(key, std::move(pipeline)); + } ++state.built; if (state.has_loaded) { callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); @@ -405,7 +409,7 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading std::unique_ptr PipelineCache::CreateGraphicsPipeline( ShaderPools& pools, const GraphicsPipelineCacheKey& key, - std::span envs, bool build_in_parallel) { + std::span envs, bool build_in_parallel) try { LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); size_t env_index{0}; std::array programs; @@ -458,6 +462,10 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( return std::make_unique( maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, device, descriptor_pool, update_descriptor_queue, thread_worker, render_pass_cache, key, std::move(modules), infos); + +} catch (const Shader::Exception& exception) { + LOG_ERROR(Render_Vulkan, "{}", exception.what()); + return nullptr; } std::unique_ptr PipelineCache::CreateGraphicsPipeline() { @@ -466,7 +474,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline() { main_pools.ReleaseContents(); auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(), true)}; - if (pipeline_cache_filename.empty()) { + if (!pipeline || pipeline_cache_filename.empty()) { return pipeline; } serialization_thread.QueueWork([this, key = graphics_key, envs = std::move(environments.envs)] { @@ -477,7 +485,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline() { env_ptrs.push_back(&envs[index]); } } - VideoCommon::SerializePipeline(key, env_ptrs, pipeline_cache_filename); + SerializePipeline(key, env_ptrs, pipeline_cache_filename); }); return pipeline; } @@ -491,18 +499,19 @@ std::unique_ptr PipelineCache::CreateComputePipeline( main_pools.ReleaseContents(); auto pipeline{CreateComputePipeline(main_pools, key, env, true)}; - if (!pipeline_cache_filename.empty()) { - serialization_thread.QueueWork([this, key, env = std::move(env)] { - VideoCommon::SerializePipeline(key, std::array{&env}, - pipeline_cache_filename); - }); + if (!pipeline || pipeline_cache_filename.empty()) { + return pipeline; } + serialization_thread.QueueWork([this, key, env = std::move(env)] { + SerializePipeline(key, std::array{&env}, + pipeline_cache_filename); + }); return pipeline; } std::unique_ptr PipelineCache::CreateComputePipeline( ShaderPools& pools, const ComputePipelineCacheKey& key, Shader::Environment& env, - bool build_in_parallel) { + bool build_in_parallel) try { LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; @@ -517,6 +526,10 @@ std::unique_ptr PipelineCache::CreateComputePipeline( Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; return std::make_unique(device, descriptor_pool, update_descriptor_queue, thread_worker, program.info, std::move(spv_module)); + +} catch (const Shader::Exception& exception) { + LOG_ERROR(Render_Vulkan, "{}", exception.what()); + return nullptr; } } // namespace Vulkan -- cgit v1.2.3 From 56d4a9ebde4afa18329ba6df4995ed9ef2aa1ca1 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 27 May 2021 17:53:27 -0300 Subject: texture_cache: Reduce invalid image/sampler error severity --- src/video_core/texture_cache/texture_cache.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 255b07cf8..f34c9d9ca 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -548,13 +548,13 @@ void TextureCache

::FillComputeImageViews(std::span indices, template typename P::Sampler* TextureCache

::GetGraphicsSampler(u32 index) { - [[unlikely]] if (index > graphics_sampler_table.Limit()) { - LOG_ERROR(HW_GPU, "Invalid sampler index={}", index); + if (index > graphics_sampler_table.Limit()) { + LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); return &slot_samplers[NULL_SAMPLER_ID]; } const auto [descriptor, is_new] = graphics_sampler_table.Read(index); SamplerId& id = graphics_sampler_ids[index]; - [[unlikely]] if (is_new) { + if (is_new) { id = FindSampler(descriptor); } return &slot_samplers[id]; @@ -562,13 +562,13 @@ typename P::Sampler* TextureCache

::GetGraphicsSampler(u32 index) { template typename P::Sampler* TextureCache

::GetComputeSampler(u32 index) { - [[unlikely]] if (index > compute_sampler_table.Limit()) { - LOG_ERROR(HW_GPU, "Invalid sampler index={}", index); + if (index > compute_sampler_table.Limit()) { + LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); return &slot_samplers[NULL_SAMPLER_ID]; } const auto [descriptor, is_new] = compute_sampler_table.Read(index); SamplerId& id = compute_sampler_ids[index]; - [[unlikely]] if (is_new) { + if (is_new) { id = FindSampler(descriptor); } return &slot_samplers[id]; @@ -669,7 +669,7 @@ ImageViewId TextureCache

::VisitImageView(DescriptorTable& table, std::span cached_image_view_ids, u32 index) { if (index > table.Limit()) { - LOG_ERROR(HW_GPU, "Invalid image view index={}", index); + LOG_DEBUG(HW_GPU, "Invalid image view index={}", index); return NULL_IMAGE_VIEW_ID; } const auto [descriptor, is_new] = table.Read(index); -- cgit v1.2.3 From 99f2c31b64aa7854690368f4637ef59a546b2d15 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 29 May 2021 02:53:39 -0300 Subject: vulkan_device: Enable float64 and int64 conditionally Add Intel Xe support. --- src/video_core/vulkan_common/vulkan_device.cpp | 6 ++++-- src/video_core/vulkan_common/vulkan_device.h | 2 ++ 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index e27a2b51e..aabcb0b10 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -251,8 +251,8 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR .shaderStorageImageArrayDynamicIndexing = false, .shaderClipDistance = true, .shaderCullDistance = true, - .shaderFloat64 = true, - .shaderInt64 = true, + .shaderFloat64 = is_shader_float64_supported, + .shaderInt64 = is_shader_int64_supported, .shaderInt16 = true, .shaderResourceResidency = false, .shaderResourceMinLod = false, @@ -909,6 +909,8 @@ void Device::SetupFamilies(VkSurfaceKHR surface) { void Device::SetupFeatures() { const VkPhysicalDeviceFeatures features{physical.GetFeatures()}; is_formatless_image_load_supported = features.shaderStorageImageReadWithoutFormat; + is_shader_float64_supported = features.shaderFloat64; + is_shader_int64_supported = features.shaderInt64; is_shader_storage_image_multisample = features.shaderStorageImageMultisample; is_blit_depth_stencil_supported = TestDepthStencilBlits(); is_optimal_astc_supported = IsOptimalAstcSupported(features); diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index ebe073293..693419505 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -314,6 +314,8 @@ private: bool is_float16_supported{}; ///< Support for float16 arithmetics. bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. bool is_formatless_image_load_supported{}; ///< Support for shader image read without format. + bool is_shader_float64_supported{}; ///< Support for float64. + bool is_shader_int64_supported{}; ///< Support for int64. bool is_shader_storage_image_multisample{}; ///< Support for image operations on MSAA images. bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil. bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle. -- cgit v1.2.3 From a7e9756671be5bb99566277709e5becdea774f34 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 30 May 2021 02:57:42 -0300 Subject: buffer_cache: Mark uniform buffers as dirty if any enable bit changes --- src/video_core/buffer_cache/buffer_cache.h | 10 +++++----- src/video_core/renderer_opengl/gl_graphics_pipeline.cpp | 4 +++- src/video_core/renderer_opengl/gl_graphics_pipeline.h | 1 + src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 6 +++++- src/video_core/renderer_vulkan/vk_graphics_pipeline.h | 3 +++ 5 files changed, 17 insertions(+), 7 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 6c92e4c30..d6b9eb99f 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -142,7 +142,7 @@ public: void BindHostComputeBuffers(); - void SetEnabledUniformBuffers(size_t stage, u32 enabled); + void SetEnabledUniformBuffers(const std::array& mask); void SetEnabledComputeUniformBuffers(u32 enabled); @@ -670,13 +670,13 @@ void BufferCache

::BindHostComputeBuffers() { } template -void BufferCache

::SetEnabledUniformBuffers(size_t stage, u32 enabled) { +void BufferCache

::SetEnabledUniformBuffers(const std::array& mask) { if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { - if (enabled_uniform_buffers[stage] != enabled) { - dirty_uniform_buffers[stage] = ~u32{0}; + if (enabled_uniform_buffers != mask) { + dirty_uniform_buffers.fill(~u32{0}); } } - enabled_uniform_buffers[stage] = enabled; + enabled_uniform_buffers = mask; } template diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index 38ec88b13..976897067 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -100,6 +100,8 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c base_uniform_bindings[stage + 1] += AccumulateCount(info.constant_buffer_descriptors); base_storage_bindings[stage + 1] += AccumulateCount(info.storage_buffers_descriptors); } + enabled_uniform_buffers[stage] = info.constant_buffer_mask; + const u32 num_tex_buffer_bindings{AccumulateCount(info.texture_buffer_descriptors)}; num_texture_buffers[stage] += num_tex_buffer_bindings; num_textures += num_tex_buffer_bindings; @@ -145,6 +147,7 @@ void GraphicsPipeline::Configure(bool is_indexed) { texture_cache.SynchronizeGraphicsDescriptors(); + buffer_cache.SetEnabledUniformBuffers(enabled_uniform_buffers); buffer_cache.runtime.SetBaseUniformBindings(base_uniform_bindings); buffer_cache.runtime.SetBaseStorageBindings(base_storage_bindings); buffer_cache.runtime.SetEnableStorageBuffers(use_storage_buffers); @@ -153,7 +156,6 @@ void GraphicsPipeline::Configure(bool is_indexed) { const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; const auto config_stage{[&](size_t stage) { const Shader::Info& info{stage_infos[stage]}; - buffer_cache.SetEnabledUniformBuffers(stage, info.constant_buffer_mask); buffer_cache.UnbindGraphicsStorageBuffers(stage); if constexpr (Spec::has_storage_buffers) { size_t ssbo_index{}; diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index c1113e180..bf33ce604 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -99,6 +99,7 @@ private: u32 enabled_stages_mask{}; std::array stage_infos{}; + std::array enabled_uniform_buffers{}; std::array base_uniform_bindings{}; std::array base_storage_bindings{}; std::array num_texture_buffers{}; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index e5f54a84f..dfe6e6a80 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -218,6 +218,9 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, update_descriptor_queue{update_descriptor_queue_}, spv_modules{std::move(stages)} { std::ranges::transform(infos, stage_infos.begin(), [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); + std::ranges::transform(infos, enabled_uniform_buffers.begin(), [](const Shader::Info* info) { + return info ? info->constant_buffer_mask : 0; + }); auto func{[this, &render_pass_cache, &descriptor_pool] { DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)}; @@ -259,11 +262,12 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { texture_cache.SynchronizeGraphicsDescriptors(); + buffer_cache.SetEnabledUniformBuffers(enabled_uniform_buffers); + const auto& regs{maxwell3d.regs}; const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; const auto config_stage{[&](size_t stage) LAMBDA_FORCEINLINE { const Shader::Info& info{stage_infos[stage]}; - buffer_cache.SetEnabledUniformBuffers(stage, info.constant_buffer_mask); buffer_cache.UnbindGraphicsStorageBuffers(stage); if constexpr (Spec::has_storage_buffers) { size_t ssbo_index{}; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index e362d13c5..4068a0edc 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -128,7 +128,10 @@ private: std::vector transitions; std::array spv_modules; + std::array stage_infos; + std::array enabled_uniform_buffers{}; + vk::DescriptorSetLayout descriptor_set_layout; DescriptorAllocator descriptor_allocator; vk::PipelineLayout pipeline_layout; -- cgit v1.2.3 From 916ca7432474e891864524dcbc6c879d5cdbfb72 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 30 May 2021 03:40:19 -0300 Subject: opengl: Declare fragment outputs even if they are not used Fixes Ori and the Blind Forest's menu on GLASM. For some reason (probably high level optimizations) it is not sanitized on SPIR-V for OpenGL. Vulkan is unaffected by this change. --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 2 ++ src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 7 +++++++ 2 files changed, 9 insertions(+) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 7d2ec4efa..6ea7c0ee8 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -276,7 +276,9 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_int64_atomics = false, .warp_size_potentially_larger_than_guest = true, + .lower_left_origin_mode = true, + .need_declared_frag_colors = true, .has_broken_spirv_clamp = true, .has_broken_unsigned_image_offsets = true, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index b6998e37c..cec51cc77 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -274,9 +274,16 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw .support_typeless_image_loads = device.IsFormatlessImageLoadSupported(), .support_demote_to_helper_invocation = true, .support_int64_atomics = device.IsExtShaderAtomicInt64Supported(), + .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), + + .lower_left_origin_mode = false, + .need_declared_frag_colors = false, + .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, .has_broken_unsigned_image_offsets = false, + .has_broken_signed_operations = false, + .ignore_nan_fp_comparisons = false, }; } -- cgit v1.2.3 From c44b16124fcfb64b9482d639ae55670005eb6307 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 30 May 2021 20:42:42 -0300 Subject: vk_buffer_cache: Add transform feedback usage to buffers --- src/video_core/renderer_vulkan/vk_buffer_cache.cpp | 37 +++++++++++++--------- 1 file changed, 22 insertions(+), 15 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 568993c58..2da3de6de 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -60,6 +60,27 @@ std::array MakeQuadIndices(u32 quad, u32 first) { } return indices; } + +vk::Buffer CreateBuffer(const Device& device, u64 size) { + VkBufferUsageFlags flags = + VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; + if (device.IsExtTransformFeedbackSupported()) { + flags |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT; + } + return device.GetLogical().CreateBuffer({ + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = size, + .usage = flags, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + }); +} } // Anonymous namespace Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params) @@ -68,21 +89,7 @@ Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params) Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, VAddr cpu_addr_, u64 size_bytes_) : VideoCommon::BufferBase(rasterizer_, cpu_addr_, size_bytes_), - device{&runtime.device}, - buffer{device->GetLogical().CreateBuffer({ - .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .size = SizeBytes(), - .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | - VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | - VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | - VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | - VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, - .sharingMode = VK_SHARING_MODE_EXCLUSIVE, - .queueFamilyIndexCount = 0, - .pQueueFamilyIndices = nullptr, - })}, + device{&runtime.device}, buffer{CreateBuffer(*device, SizeBytes())}, commit{runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal)} { if (runtime.device.HasDebuggingToolAttached()) { buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str()); -- cgit v1.2.3 From 77372443c3d6b20d7f78366bb4aa162f22bd7cde Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 30 May 2021 20:43:47 -0300 Subject: vulkan: Enable depth bounds and use it conditionally Intel devices pre-Xe don't support this. --- src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 5 ++++- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 5 +++++ src/video_core/vulkan_common/vulkan_device.cpp | 3 ++- src/video_core/vulkan_common/vulkan_device.h | 6 ++++++ 4 files changed, 17 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index dfe6e6a80..d381109d6 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -598,13 +598,16 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { .depthCompareOp = dynamic.depth_test_enable ? MaxwellToVK::ComparisonOp(dynamic.DepthTestFunc()) : VK_COMPARE_OP_ALWAYS, - .depthBoundsTestEnable = dynamic.depth_bounds_enable, + .depthBoundsTestEnable = dynamic.depth_bounds_enable && device.IsDepthBoundsSupported(), .stencilTestEnable = dynamic.stencil_enable, .front = GetStencilFaceState(dynamic.front), .back = GetStencilFaceState(dynamic.back), .minDepthBounds = 0.0f, .maxDepthBounds = 0.0f, }; + if (dynamic.depth_bounds_enable && !device.IsDepthBoundsSupported()) { + LOG_WARNING(Render_Vulkan, "Depth bounds is enabled but not supported"); + } static_vector cb_attachments; const size_t num_attachments{NumAttachments(key.state)}; for (size_t index = 0; index < num_attachments; ++index) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index ef14e91e7..9611b480a 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -682,6 +682,11 @@ void RasterizerVulkan::UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Re if (!state_tracker.TouchDepthBoundsTestEnable()) { return; } + bool enabled = regs.depth_bounds_enable; + if (enabled && !device.IsDepthBoundsSupported()) { + LOG_WARNING(Render_Vulkan, "Depth bounds is enabled but not supported"); + enabled = false; + } scheduler.Record([enable = regs.depth_bounds_enable](vk::CommandBuffer cmdbuf) { cmdbuf.SetDepthBoundsTestEnableEXT(enable); }); diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index aabcb0b10..0a42efb6a 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -226,7 +226,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR .depthClamp = true, .depthBiasClamp = true, .fillModeNonSolid = true, - .depthBounds = false, + .depthBounds = is_depth_bounds_supported, .wideLines = false, .largePoints = true, .alphaToOne = false, @@ -908,6 +908,7 @@ void Device::SetupFamilies(VkSurfaceKHR surface) { void Device::SetupFeatures() { const VkPhysicalDeviceFeatures features{physical.GetFeatures()}; + is_depth_bounds_supported = features.depthBounds; is_formatless_image_load_supported = features.shaderStorageImageReadWithoutFormat; is_shader_float64_supported = features.shaderFloat64; is_shader_int64_supported = features.shaderInt64; diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 693419505..1ab63ecd7 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -159,6 +159,11 @@ public: return is_formatless_image_load_supported; } + // Returns true if depth bounds is supported. + bool IsDepthBoundsSupported() const { + return is_depth_bounds_supported; + } + /// Returns true when blitting from and to depth stencil images is supported. bool IsBlitDepthStencilSupported() const { return is_blit_depth_stencil_supported; @@ -314,6 +319,7 @@ private: bool is_float16_supported{}; ///< Support for float16 arithmetics. bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. bool is_formatless_image_load_supported{}; ///< Support for shader image read without format. + bool is_depth_bounds_supported{}; ///< Support for depth bounds. bool is_shader_float64_supported{}; ///< Support for float64. bool is_shader_int64_supported{}; ///< Support for int64. bool is_shader_storage_image_multisample{}; ///< Support for image operations on MSAA images. -- cgit v1.2.3 From 1148a4eac715869077ace56a9a311a167643aca3 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 30 May 2021 20:44:28 -0300 Subject: vulkan: Conditionally use shaderInt16 Add support for Polaris AMD devices. --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 2 +- src/video_core/vulkan_common/vulkan_device.cpp | 3 ++- src/video_core/vulkan_common/vulkan_device.h | 6 ++++++ 3 files changed, 9 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index cec51cc77..2a2f166c8 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -249,7 +249,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw .unified_descriptor_binding = true, .support_descriptor_aliasing = true, .support_int8 = true, - .support_int16 = true, + .support_int16 = device.IsShaderInt16Supported(), .support_vertex_instance_id = false, .support_float_controls = true, .support_separate_denorm_behavior = float_control.denormBehaviorIndependence == diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 0a42efb6a..2b715baba 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -253,7 +253,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR .shaderCullDistance = true, .shaderFloat64 = is_shader_float64_supported, .shaderInt64 = is_shader_int64_supported, - .shaderInt16 = true, + .shaderInt16 = is_shader_int16_supported, .shaderResourceResidency = false, .shaderResourceMinLod = false, .sparseBinding = false, @@ -912,6 +912,7 @@ void Device::SetupFeatures() { is_formatless_image_load_supported = features.shaderStorageImageReadWithoutFormat; is_shader_float64_supported = features.shaderFloat64; is_shader_int64_supported = features.shaderInt64; + is_shader_int16_supported = features.shaderInt16; is_shader_storage_image_multisample = features.shaderStorageImageMultisample; is_blit_depth_stencil_supported = TestDepthStencilBlits(); is_optimal_astc_supported = IsOptimalAstcSupported(features); diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 1ab63ecd7..9bc1fb947 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -159,6 +159,11 @@ public: return is_formatless_image_load_supported; } + /// Returns true if shader int16 is supported. + bool IsShaderInt16Supported() const { + return is_shader_int16_supported; + } + // Returns true if depth bounds is supported. bool IsDepthBoundsSupported() const { return is_depth_bounds_supported; @@ -322,6 +327,7 @@ private: bool is_depth_bounds_supported{}; ///< Support for depth bounds. bool is_shader_float64_supported{}; ///< Support for float64. bool is_shader_int64_supported{}; ///< Support for int64. + bool is_shader_int16_supported{}; ///< Support for int16. bool is_shader_storage_image_multisample{}; ///< Support for image operations on MSAA images. bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil. bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle. -- cgit v1.2.3 From d26271b0148e4c41d87199b3e42a5702d1a6be53 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 1 Jun 2021 19:59:29 -0300 Subject: vk_swapchain: Avoid recreating the swapchain on each frame Recreate only when requested (or sRGB is changed) instead of tracking the frontend's size. That size is still used as a hint. --- src/video_core/renderer_vulkan/renderer_vulkan.cpp | 18 ++++++------------ src/video_core/renderer_vulkan/vk_swapchain.h | 6 +++--- 2 files changed, 9 insertions(+), 15 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index d50647ba7..54c41bcaf 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -97,19 +97,14 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, Core::Frontend::EmuWindow& emu_window, Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, std::unique_ptr context_) try - : RendererBase(emu_window, std::move(context_)), - telemetry_session(telemetry_session_), - cpu_memory(cpu_memory_), - gpu(gpu_), - library(OpenLibrary()), + : RendererBase(emu_window, std::move(context_)), telemetry_session(telemetry_session_), + cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary()), instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, true, Settings::values.renderer_debug.GetValue())), debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr), surface(CreateSurface(instance, render_window)), - device(CreateDevice(instance, dld, *surface)), - memory_allocator(device, false), - state_tracker(gpu), - scheduler(device, state_tracker), + device(CreateDevice(instance, dld, *surface)), memory_allocator(device, false), + state_tracker(gpu), scheduler(device, state_tracker), swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width, render_window.GetFramebufferLayout().height, false), blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, scheduler, @@ -139,17 +134,16 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); const bool is_srgb = use_accelerated && screen_info.is_srgb; - const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout(); bool has_been_recreated = false; const auto recreate_swapchain = [&] { if (!has_been_recreated) { has_been_recreated = true; scheduler.WaitWorker(); } + const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout(); swapchain.Create(layout.width, layout.height, is_srgb); }; - if (swapchain.NeedsRecreate() || - swapchain.HasDifferentLayout(layout.width, layout.height, is_srgb)) { + if (swapchain.NeedsRecreate() || swapchain.HasColorSpaceChanged(is_srgb)) { recreate_swapchain(); } bool needs_recreate; diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h index b38fd9dc2..df6da3d93 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.h +++ b/src/video_core/renderer_vulkan/vk_swapchain.h @@ -33,9 +33,9 @@ public: /// Presents the rendered image to the swapchain. void Present(VkSemaphore render_semaphore); - /// Returns true when the framebuffer layout has changed. - bool HasDifferentLayout(u32 width, u32 height, bool is_srgb) const { - return extent.width != width || extent.height != height || current_srgb != is_srgb; + /// Returns true when the color space has changed. + bool HasColorSpaceChanged(bool is_srgb) const { + return current_srgb != is_srgb; } /// Returns true when the image has to be recreated. -- cgit v1.2.3 From 46bd362d0dfab27e8c8d49f11eb4e3f373bf8f23 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 1 Jun 2021 20:37:45 -0300 Subject: fixed_pipeline_state: Use regular for loop instead of ranges for perf MSVC generates better code for it. --- src/video_core/renderer_vulkan/fixed_pipeline_state.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 3a43c329f..1486d088a 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -84,9 +84,10 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0); depth_enabled.Assign(regs.zeta_enable != 0 ? 1 : 0); depth_format.Assign(static_cast(regs.zeta.format)); - std::ranges::transform(regs.rt, color_formats.begin(), - [](const auto& rt) { return static_cast(rt.format); }); + for (size_t i = 0; i < regs.rt.size(); ++i) { + color_formats[i] = static_cast(regs.rt[i].format); + } alpha_test_ref = Common::BitCast(regs.alpha_test_ref); point_size = Common::BitCast(regs.point_size); -- cgit v1.2.3 From e57ee3b7fd8dd942b616d389635a4e9f00c596e9 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 2 Jun 2021 01:10:04 -0300 Subject: transform_feedback: Read buffer stride from index instead of layout --- src/video_core/transform_feedback.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/transform_feedback.cpp b/src/video_core/transform_feedback.cpp index db52fff93..ba26ac3f1 100644 --- a/src/video_core/transform_feedback.cpp +++ b/src/video_core/transform_feedback.cpp @@ -72,8 +72,9 @@ std::vector MakeTransformFeedbackVaryings( const u32 base_offset = offset; const u8 location = locations[offset]; + UNIMPLEMENTED_IF_MSG(layout.stream != 0, "Stream is not zero: {}", layout.stream); Shader::TransformFeedbackVarying varying{ - .buffer = layout.stream, + .buffer = static_cast(buffer), .stride = layout.stride, .offset = offset * 4, .components = 1, -- cgit v1.2.3 From 4a2361a1e2271727f3259e8e4a60869165537253 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 2 Jun 2021 02:15:07 -0300 Subject: buffer_cache: Reduce uniform buffer size from shader usage Increases performance significantly on certain titles. --- src/video_core/buffer_cache/buffer_cache.h | 42 +++++++++++++--------- .../renderer_opengl/gl_compute_pipeline.cpp | 4 ++- .../renderer_opengl/gl_compute_pipeline.h | 1 + .../renderer_opengl/gl_graphics_pipeline.cpp | 21 +++++------ .../renderer_opengl/gl_graphics_pipeline.h | 3 +- .../renderer_vulkan/vk_compute_pipeline.cpp | 6 +++- .../renderer_vulkan/vk_compute_pipeline.h | 2 ++ .../renderer_vulkan/vk_graphics_pipeline.cpp | 14 +++++--- .../renderer_vulkan/vk_graphics_pipeline.h | 3 +- 9 files changed, 61 insertions(+), 35 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index d6b9eb99f..ec64f2293 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -44,6 +44,7 @@ MICROPROFILE_DECLARE(GPU_DownloadMemory); using BufferId = SlotId; using VideoCore::Surface::PixelFormat; +using namespace Common::Literals; constexpr u32 NUM_VERTEX_BUFFERS = 32; constexpr u32 NUM_TRANSFORM_FEEDBACK_BUFFERS = 4; @@ -53,7 +54,8 @@ constexpr u32 NUM_STORAGE_BUFFERS = 16; constexpr u32 NUM_TEXTURE_BUFFERS = 16; constexpr u32 NUM_STAGES = 5; -using namespace Common::Literals; +using UniformBufferSizes = std::array, NUM_STAGES>; +using ComputeUniformBufferSizes = std::array; template class BufferCache { @@ -142,9 +144,10 @@ public: void BindHostComputeBuffers(); - void SetEnabledUniformBuffers(const std::array& mask); + void SetUniformBuffersState(const std::array& mask, + const UniformBufferSizes* sizes); - void SetEnabledComputeUniformBuffers(u32 enabled); + void SetComputeUniformBufferState(u32 mask, const ComputeUniformBufferSizes* sizes); void UnbindGraphicsStorageBuffers(size_t stage); @@ -384,8 +387,11 @@ private: std::array compute_storage_buffers; std::array compute_texture_buffers; - std::array enabled_uniform_buffers{}; - u32 enabled_compute_uniform_buffers = 0; + std::array enabled_uniform_buffer_masks{}; + u32 enabled_compute_uniform_buffer_mask = 0; + + const UniformBufferSizes* uniform_buffer_sizes{}; + const ComputeUniformBufferSizes* compute_uniform_buffer_sizes{}; std::array enabled_storage_buffers{}; std::array written_storage_buffers{}; @@ -670,18 +676,22 @@ void BufferCache

::BindHostComputeBuffers() { } template -void BufferCache

::SetEnabledUniformBuffers(const std::array& mask) { +void BufferCache

::SetUniformBuffersState(const std::array& mask, + const UniformBufferSizes* sizes) { if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { - if (enabled_uniform_buffers != mask) { + if (enabled_uniform_buffer_masks != mask) { dirty_uniform_buffers.fill(~u32{0}); } } - enabled_uniform_buffers = mask; + enabled_uniform_buffer_masks = mask; + uniform_buffer_sizes = sizes; } template -void BufferCache

::SetEnabledComputeUniformBuffers(u32 enabled) { - enabled_compute_uniform_buffers = enabled; +void BufferCache

::SetComputeUniformBufferState(u32 mask, + const ComputeUniformBufferSizes* sizes) { + enabled_compute_uniform_buffer_mask = mask; + compute_uniform_buffer_sizes = sizes; } template @@ -984,7 +994,7 @@ void BufferCache

::BindHostGraphicsUniformBuffers(size_t stage) { dirty = std::exchange(dirty_uniform_buffers[stage], 0); } u32 binding_index = 0; - ForEachEnabledBit(enabled_uniform_buffers[stage], [&](u32 index) { + ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) { const bool needs_bind = ((dirty >> index) & 1) != 0; BindHostGraphicsUniformBuffer(stage, index, binding_index, needs_bind); if constexpr (NEEDS_BIND_UNIFORM_INDEX) { @@ -998,7 +1008,7 @@ void BufferCache

::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 bool needs_bind) { const Binding& binding = uniform_buffers[stage][index]; const VAddr cpu_addr = binding.cpu_addr; - const u32 size = binding.size; + const u32 size = std::min(binding.size, (*uniform_buffer_sizes)[stage][index]); Buffer& buffer = slot_buffers[binding.buffer_id]; TouchBuffer(buffer); const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && @@ -1113,11 +1123,11 @@ void BufferCache

::BindHostComputeUniformBuffers() { dirty_uniform_buffers.fill(~u32{0}); } u32 binding_index = 0; - ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) { + ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) { const Binding& binding = compute_uniform_buffers[index]; Buffer& buffer = slot_buffers[binding.buffer_id]; TouchBuffer(buffer); - const u32 size = binding.size; + const u32 size = std::min(binding.size, (*compute_uniform_buffer_sizes)[index]); SynchronizeBuffer(buffer, binding.cpu_addr, size); const u32 offset = buffer.Offset(binding.cpu_addr); @@ -1261,7 +1271,7 @@ void BufferCache

::UpdateVertexBuffer(u32 index) { template void BufferCache

::UpdateUniformBuffers(size_t stage) { - ForEachEnabledBit(enabled_uniform_buffers[stage], [&](u32 index) { + ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) { Binding& binding = uniform_buffers[stage][index]; if (binding.buffer_id) { // Already updated @@ -1334,7 +1344,7 @@ void BufferCache

::UpdateTransformFeedbackBuffer(u32 index) { template void BufferCache

::UpdateComputeUniformBuffers() { - ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) { + ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) { Binding& binding = compute_uniform_buffers[index]; binding = NULL_BINDING; const auto& launch_desc = kepler_compute.launch_description; diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp index 5cf5f97a9..61b6fe4b7 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp @@ -43,6 +43,8 @@ ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cac : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_}, source_program{std::move(source_program_)}, assembly_program{std::move(assembly_program_)} { + std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(), + uniform_buffer_sizes.begin()); num_texture_buffers = AccumulateCount(info.texture_buffer_descriptors); num_image_buffers = AccumulateCount(info.image_buffer_descriptors); @@ -63,7 +65,7 @@ ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cac } void ComputePipeline::Configure() { - buffer_cache.SetEnabledComputeUniformBuffers(info.constant_buffer_mask); + buffer_cache.SetComputeUniformBufferState(info.constant_buffer_mask, &uniform_buffer_sizes); buffer_cache.UnbindComputeStorageBuffers(); size_t ssbo_index{}; for (const auto& desc : info.storage_buffers_descriptors) { diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.h b/src/video_core/renderer_opengl/gl_compute_pipeline.h index dd6b62ef2..b5dfb65e9 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.h +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.h @@ -72,6 +72,7 @@ private: Shader::Info info; OGLProgram source_program; OGLAssemblyProgram assembly_program; + VideoCommon::ComputeUniformBufferSizes uniform_buffer_sizes{}; u32 num_texture_buffers{}; u32 num_image_buffers{}; diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index 976897067..a5d65fdca 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -60,6 +60,14 @@ std::pair TransformFeedbackEnum(u8 location) { UNIMPLEMENTED_MSG("index={}", index); return {GL_POSITION, 0}; } + +struct Spec { + static constexpr std::array enabled_stages{true, true, true, true, true}; + static constexpr bool has_storage_buffers = true; + static constexpr bool has_texture_buffers = true; + static constexpr bool has_image_buffers = true; + static constexpr bool has_images = true; +}; } // Anonymous namespace size_t GraphicsPipelineKey::Hash() const noexcept { @@ -100,7 +108,8 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c base_uniform_bindings[stage + 1] += AccumulateCount(info.constant_buffer_descriptors); base_storage_bindings[stage + 1] += AccumulateCount(info.storage_buffers_descriptors); } - enabled_uniform_buffers[stage] = info.constant_buffer_mask; + enabled_uniform_buffer_masks[stage] = info.constant_buffer_mask; + std::ranges::copy(info.constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin()); const u32 num_tex_buffer_bindings{AccumulateCount(info.texture_buffer_descriptors)}; num_texture_buffers[stage] += num_tex_buffer_bindings; @@ -130,14 +139,6 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c } } -struct Spec { - static constexpr std::array enabled_stages{true, true, true, true, true}; - static constexpr bool has_storage_buffers = true; - static constexpr bool has_texture_buffers = true; - static constexpr bool has_image_buffers = true; - static constexpr bool has_images = true; -}; - void GraphicsPipeline::Configure(bool is_indexed) { std::array image_view_ids; std::array image_view_indices; @@ -147,7 +148,7 @@ void GraphicsPipeline::Configure(bool is_indexed) { texture_cache.SynchronizeGraphicsDescriptors(); - buffer_cache.SetEnabledUniformBuffers(enabled_uniform_buffers); + buffer_cache.SetUniformBuffersState(enabled_uniform_buffer_masks, &uniform_buffer_sizes); buffer_cache.runtime.SetBaseUniformBindings(base_uniform_bindings); buffer_cache.runtime.SetBaseStorageBindings(base_storage_bindings); buffer_cache.runtime.SetEnableStorageBuffers(use_storage_buffers); diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index bf33ce604..508fad5bb 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -99,7 +99,8 @@ private: u32 enabled_stages_mask{}; std::array stage_infos{}; - std::array enabled_uniform_buffers{}; + std::array enabled_uniform_buffer_masks{}; + VideoCommon::UniformBufferSizes uniform_buffer_sizes{}; std::array base_uniform_bindings{}; std::array base_storage_bindings{}; std::array num_texture_buffers{}; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 168ffa7e9..ca59042ff 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include #include #include @@ -27,6 +28,9 @@ ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descript vk::ShaderModule spv_module_) : device{device_}, update_descriptor_queue{update_descriptor_queue_}, info{info_}, spv_module(std::move(spv_module_)) { + std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(), + uniform_buffer_sizes.begin()); + auto func{[this, &descriptor_pool] { DescriptorLayoutBuilder builder{device.GetLogical()}; builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT); @@ -75,7 +79,7 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, BufferCache& buffer_cache, TextureCache& texture_cache) { update_descriptor_queue.Acquire(); - buffer_cache.SetEnabledComputeUniformBuffers(info.constant_buffer_mask); + buffer_cache.SetComputeUniformBufferState(info.constant_buffer_mask, &uniform_buffer_sizes); buffer_cache.UnbindComputeStorageBuffers(); size_t ssbo_index{}; for (const auto& desc : info.storage_buffers_descriptors) { diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index a560e382e..a6043866d 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -44,6 +44,8 @@ private: VKUpdateDescriptorQueue& update_descriptor_queue; Shader::Info info; + VideoCommon::ComputeUniformBufferSizes uniform_buffer_sizes{}; + vk::ShaderModule spv_module; vk::DescriptorSetLayout descriptor_set_layout; DescriptorAllocator descriptor_allocator; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index d381109d6..627ca0158 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -218,10 +218,14 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, update_descriptor_queue{update_descriptor_queue_}, spv_modules{std::move(stages)} { std::ranges::transform(infos, stage_infos.begin(), [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); - std::ranges::transform(infos, enabled_uniform_buffers.begin(), [](const Shader::Info* info) { - return info ? info->constant_buffer_mask : 0; - }); - + for (size_t stage = 0; stage < NUM_STAGES; ++stage) { + const Shader::Info* const info{infos[stage]}; + if (!info) { + continue; + } + enabled_uniform_buffer_masks[stage] = info->constant_buffer_mask; + std::ranges::copy(info->constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin()); + } auto func{[this, &render_pass_cache, &descriptor_pool] { DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)}; descriptor_set_layout = builder.CreateDescriptorSetLayout(); @@ -262,7 +266,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { texture_cache.SynchronizeGraphicsDescriptors(); - buffer_cache.SetEnabledUniformBuffers(enabled_uniform_buffers); + buffer_cache.SetUniformBuffersState(enabled_uniform_buffer_masks, &uniform_buffer_sizes); const auto& regs{maxwell3d.regs}; const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 4068a0edc..8c81c28a8 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -130,7 +130,8 @@ private: std::array spv_modules; std::array stage_infos; - std::array enabled_uniform_buffers{}; + std::array enabled_uniform_buffer_masks{}; + VideoCommon::UniformBufferSizes uniform_buffer_sizes{}; vk::DescriptorSetLayout descriptor_set_layout; DescriptorAllocator descriptor_allocator; -- cgit v1.2.3 From 79f2fe1a39120f498e915fa0c740b15dc0f09793 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 2 Jun 2021 03:02:33 -0300 Subject: glasm: Use ARB_derivative_control conditionally --- src/video_core/renderer_opengl/gl_device.cpp | 1 + src/video_core/renderer_opengl/gl_device.h | 5 +++++ src/video_core/renderer_opengl/gl_shader_cache.cpp | 1 + 3 files changed, 7 insertions(+) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 01da2bb57..3f7929f9e 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -154,6 +154,7 @@ Device::Device() { has_precise_bug = TestPreciseBug(); has_broken_texture_view_formats = is_amd || (!is_linux && is_intel); has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2; + has_derivative_control = GLAD_GL_ARB_derivative_control; has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory; has_debugging_tool_attached = IsDebugToolAttached(extensions); has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float"); diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index d67f5693c..1ffd24883 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -96,6 +96,10 @@ public: return has_nv_viewport_array2; } + bool HasDerivativeControl() const { + return has_derivative_control; + } + bool HasDebuggingToolAttached() const { return has_debugging_tool_attached; } @@ -141,6 +145,7 @@ private: bool has_broken_texture_view_formats{}; bool has_fast_buffer_sub_data{}; bool has_nv_viewport_array2{}; + bool has_derivative_control{}; bool has_debugging_tool_attached{}; bool use_assembly_shaders{}; bool use_asynchronous_shaders{}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 6ea7c0ee8..bdffac4b2 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -274,6 +274,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_typeless_image_loads = device.HasImageLoadFormatted(), .support_demote_to_helper_invocation = false, .support_int64_atomics = false, + .support_derivative_control = device.HasDerivativeControl(), .warp_size_potentially_larger_than_guest = true, -- cgit v1.2.3 From 8c954fcaee77c70583c5edc73c7c35eefcca39b0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 2 Jun 2021 04:28:30 -0300 Subject: vk_pipeline_cache: Set support_derivative_control to true --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 2a2f166c8..a7f3619a5 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -274,6 +274,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw .support_typeless_image_loads = device.IsFormatlessImageLoadSupported(), .support_demote_to_helper_invocation = true, .support_int64_atomics = device.IsExtShaderAtomicInt64Supported(), + .support_derivative_control = true, .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), -- cgit v1.2.3 From 8f099af6a8ea691c5f8f1403848ca42077b34bd6 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 2 Jun 2021 04:28:59 -0300 Subject: nsight_aftermath_tracker: Fix SPIR-V module writes --- src/video_core/vulkan_common/nsight_aftermath_tracker.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp b/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp index 209cb1e0a..fdd1a5081 100644 --- a/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp +++ b/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp @@ -99,7 +99,7 @@ void NsightAftermathTracker::SaveShader(std::span spirv) const { LOG_ERROR(Render_Vulkan, "Failed to dump SPIR-V module with hash={:016x}", hash.hash); return; } - if (file.Write(spirv) != spirv.size()) { + if (file.WriteSpan(spirv) != spirv.size()) { LOG_ERROR(Render_Vulkan, "Failed to write SPIR-V module with hash={:016x}", hash.hash); return; } -- cgit v1.2.3 From b02c78b276f449318bdc787a35d123df01c0bc6d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 2 Jun 2021 18:50:01 -0300 Subject: vk_buffer_cache: Handle null texture buffers Fixes a crash on Age of Calamity cutscenes. --- src/video_core/renderer_vulkan/vk_buffer_cache.cpp | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 2da3de6de..f4b3ee95c 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -97,6 +97,10 @@ Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rast } VkBufferView Buffer::View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format) { + if (!device) { + // Null buffer, return a null descriptor + return VK_NULL_HANDLE; + } const auto it{std::ranges::find_if(views, [offset, size, format](const BufferView& view) { return offset == view.offset && size == view.size && format == view.format; })}; -- cgit v1.2.3 From 562af301819227d65a251a2c29c997bf798da7ba Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Fri, 4 Jun 2021 00:11:16 +0200 Subject: shader: Fix VertexA Shaders. --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 26 +++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index bdffac4b2..0e4904733 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -40,6 +40,7 @@ namespace OpenGL { namespace { using Shader::Backend::GLASM::EmitGLASM; using Shader::Backend::SPIRV::EmitSPIRV; +using Shader::Maxwell::MergeDualVertexPrograms; using Shader::Maxwell::TranslateProgram; using VideoCommon::ComputeEnvironment; using VideoCommon::FileEnvironment; @@ -446,6 +447,8 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( size_t env_index{}; u32 total_storage_buffers{}; std::array programs; + const bool uses_vertex_a{key.unique_hashes[0] != 0}; + const bool uses_vertex_b{key.unique_hashes[1] != 0}; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { if (key.unique_hashes[index] == 0) { continue; @@ -454,11 +457,22 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( ++env_index; const u32 cfg_offset{static_cast(env.StartAddress() + sizeof(Shader::ProgramHeader))}; - Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset); - programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); + Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); + if (!uses_vertex_a || index != 1) { + // Normal path + programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); - for (const auto& desc : programs[index].info.storage_buffers_descriptors) { - total_storage_buffers += desc.count; + for (const auto& desc : programs[index].info.storage_buffers_descriptors) { + total_storage_buffers += desc.count; + } + } else { + // VertexB path when VertexA is present. + Shader::IR::Program& program_va{programs[0]}; + Shader::IR::Program program_vb{TranslateProgram(pools.inst, pools.block, env, cfg)}; + for (const auto& desc : program_vb.info.storage_buffers_descriptors) { + total_storage_buffers += desc.count; + } + programs[index] = MergeDualVertexPrograms(program_va, program_vb, env); } } const u32 glasm_storage_buffer_limit{device.GetMaxGLASMStorageBufferBlocks()}; @@ -472,7 +486,9 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( if (!device.UseAssemblyShaders()) { source_program.handle = glCreateProgram(); } - for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + + for (size_t index = uses_vertex_a && uses_vertex_b ? 1 : 0; index < Maxwell::MaxShaderProgram; + ++index) { if (key.unique_hashes[index] == 0) { continue; } -- cgit v1.2.3 From f45f7b5c2a869123340591cec6db58c33a5fd3ab Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 3 Jun 2021 17:42:24 -0300 Subject: vk_swapchain: Handle outdated swapchains Fixes pixelated presentation on Intel devices. --- src/video_core/renderer_vulkan/renderer_vulkan.cpp | 11 +++++---- src/video_core/renderer_vulkan/vk_swapchain.cpp | 26 ++++++++++++++++------ src/video_core/renderer_vulkan/vk_swapchain.h | 14 ++++++++---- 3 files changed, 34 insertions(+), 17 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 54c41bcaf..6fda06a7e 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -143,18 +143,17 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout(); swapchain.Create(layout.width, layout.height, is_srgb); }; - if (swapchain.NeedsRecreate() || swapchain.HasColorSpaceChanged(is_srgb)) { + if (swapchain.IsSubOptimal() || swapchain.HasColorSpaceChanged(is_srgb)) { recreate_swapchain(); } - bool needs_recreate; + bool is_outdated; do { - needs_recreate = false; swapchain.AcquireNextImage(); - if (swapchain.NeedsRecreate()) { + is_outdated = swapchain.IsOutDated(); + if (is_outdated) { recreate_swapchain(); - needs_recreate = true; } - } while (needs_recreate); + } while (is_outdated); if (has_been_recreated) { blit_screen.Recreate(); } diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index a71b0b01e..d990eefba 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -65,7 +65,8 @@ VKSwapchain::VKSwapchain(VkSurfaceKHR surface_, const Device& device_, VKSchedul VKSwapchain::~VKSwapchain() = default; void VKSwapchain::Create(u32 width, u32 height, bool srgb) { - needs_recreate = false; + is_outdated = false; + is_suboptimal = false; const auto physical_device = device.GetPhysical(); const auto capabilities{physical_device.GetSurfaceCapabilitiesKHR(surface)}; @@ -85,11 +86,22 @@ void VKSwapchain::Create(u32 width, u32 height, bool srgb) { } void VKSwapchain::AcquireNextImage() { - const VkResult result = - device.GetLogical().AcquireNextImageKHR(*swapchain, std::numeric_limits::max(), - *present_semaphores[frame_index], {}, &image_index); - needs_recreate |= result != VK_SUCCESS && result != VK_SUBOPTIMAL_KHR; - + const VkResult result = device.GetLogical().AcquireNextImageKHR( + *swapchain, std::numeric_limits::max(), *present_semaphores[frame_index], + VK_NULL_HANDLE, &image_index); + switch (result) { + case VK_SUCCESS: + break; + case VK_SUBOPTIMAL_KHR: + is_suboptimal = true; + break; + case VK_ERROR_OUT_OF_DATE_KHR: + is_outdated = true; + break; + default: + LOG_ERROR(Render_Vulkan, "vkAcquireNextImageKHR returned {}", vk::ToString(result)); + break; + } scheduler.Wait(resource_ticks[image_index]); resource_ticks[image_index] = scheduler.CurrentTick(); } @@ -115,7 +127,7 @@ void VKSwapchain::Present(VkSemaphore render_semaphore) { LOG_DEBUG(Render_Vulkan, "Suboptimal swapchain"); break; case VK_ERROR_OUT_OF_DATE_KHR: - needs_recreate = true; + is_outdated = true; break; default: LOG_CRITICAL(Render_Vulkan, "Failed to present with error {}", vk::ToString(result)); diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h index df6da3d93..35c2cdc14 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.h +++ b/src/video_core/renderer_vulkan/vk_swapchain.h @@ -38,9 +38,14 @@ public: return current_srgb != is_srgb; } - /// Returns true when the image has to be recreated. - bool NeedsRecreate() const { - return needs_recreate; + /// Returns true when the swapchain is outdated. + bool IsOutDated() const { + return is_outdated; + } + + /// Returns true when the swapchain is suboptimal. + bool IsSubOptimal() const { + return is_suboptimal; } VkExtent2D GetSize() const { @@ -95,7 +100,8 @@ private: VkExtent2D extent{}; bool current_srgb{}; - bool needs_recreate{}; + bool is_outdated{}; + bool is_suboptimal{}; }; } // namespace Vulkan -- cgit v1.2.3 From c736b9ffabc8a869d8ed131d365aff21b049f751 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Fri, 4 Jun 2021 00:52:40 +0200 Subject: DMA: Restrict optimised path for BlockToLinear further. --- src/video_core/engines/maxwell_dma.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index c51776466..c7ec1eac9 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -127,7 +127,8 @@ void MaxwellDMA::CopyBlockLinearToPitch() { // Optimized path for micro copies. const size_t dst_size = static_cast(regs.pitch_out) * regs.line_count; - if (dst_size < GOB_SIZE && regs.pitch_out <= GOB_SIZE_X) { + if (dst_size < GOB_SIZE && regs.pitch_out <= GOB_SIZE_X && + regs.src_params.height > GOB_SIZE_Y) { FastCopyBlockLinearToPitch(); return; } -- cgit v1.2.3 From 2a0aeaa3d283f1c7f003c956ab3079f70246b008 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 5 Jun 2021 21:48:38 -0300 Subject: vk_rasterizer: Flush work on clear and dispatches --- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 9611b480a..e72f8426b 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -189,6 +189,7 @@ void RasterizerVulkan::Clear() { if (!maxwell3d.ShouldExecute()) { return; } + FlushWork(); query_cache.UpdateCounters(); @@ -259,6 +260,8 @@ void RasterizerVulkan::Clear() { } void RasterizerVulkan::DispatchCompute() { + FlushWork(); + ComputePipeline* const pipeline{pipeline_cache.CurrentComputePipeline()}; if (!pipeline) { return; -- cgit v1.2.3 From 48aad8dc05f027c21aa0e8a68d827006d9f7a196 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 5 Jun 2021 22:10:02 -0300 Subject: vk_pipeline_cache: Add asynchronous shaders --- .../renderer_vulkan/vk_graphics_pipeline.h | 6 ++++- .../renderer_vulkan/vk_pipeline_cache.cpp | 26 ++++++++++++++++++++-- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 4 ++++ 3 files changed, 33 insertions(+), 3 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 8c81c28a8..3f8895927 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -87,7 +87,7 @@ public: configure_func(this, is_indexed); } - GraphicsPipeline* Next(const GraphicsPipelineCacheKey& current_key) noexcept { + [[nodiscard]] GraphicsPipeline* Next(const GraphicsPipelineCacheKey& current_key) noexcept { if (key == current_key) { return this; } @@ -96,6 +96,10 @@ public: : nullptr; } + [[nodiscard]] bool IsBuilt() const noexcept { + return is_built.load(std::memory_order::relaxed); + } + template static auto MakeConfigureSpecFunc() { return [](GraphicsPipeline* pipeline, bool is_indexed) { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index a7f3619a5..741ed1a98 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -240,6 +240,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, + use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()}, workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:PipelineBuilder"), serialization_thread(1, "yuzu:PipelineSerialization") { const auto& float_control{device.FloatControlProperties()}; @@ -303,7 +304,7 @@ GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { GraphicsPipeline* const next{current_pipeline->Next(graphics_key)}; if (next) { current_pipeline = next; - return current_pipeline; + return BuiltPipeline(current_pipeline); } } const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)}; @@ -318,7 +319,7 @@ GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { current_pipeline->AddTransition(pipeline.get()); } current_pipeline = pipeline.get(); - return current_pipeline; + return BuiltPipeline(current_pipeline); } ComputePipeline* PipelineCache::CurrentComputePipeline() { @@ -415,6 +416,27 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading workers.WaitForRequests(); } +GraphicsPipeline* PipelineCache::BuiltPipeline(GraphicsPipeline* pipeline) const noexcept { + if (pipeline->IsBuilt()) { + return pipeline; + } + if (!use_asynchronous_shaders) { + return pipeline; + } + // If something is using depth, we can assume that games are not rendering anything which + // will be used one time. + if (maxwell3d.regs.zeta_enable) { + return nullptr; + } + // If games are using a small index count, we can assume these are full screen quads. + // Usually these shaders are only used once for building textures so we can assume they + // can't be built async + if (maxwell3d.regs.index_array.count <= 6 || maxwell3d.regs.vertex_buffer.count <= 6) { + return pipeline; + } + return nullptr; +} + std::unique_ptr PipelineCache::CreateGraphicsPipeline( ShaderPools& pools, const GraphicsPipelineCacheKey& key, std::span envs, bool build_in_parallel) try { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 4116cc73f..869c63baf 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -115,6 +115,8 @@ public: const VideoCore::DiskResourceLoadCallback& callback); private: + [[nodiscard]] GraphicsPipeline* BuiltPipeline(GraphicsPipeline* pipeline) const noexcept; + std::unique_ptr CreateGraphicsPipeline(); std::unique_ptr CreateGraphicsPipeline( @@ -140,6 +142,8 @@ private: GraphicsPipelineCacheKey graphics_key{}; GraphicsPipeline* current_pipeline{}; + bool use_asynchronous_shaders{}; + std::unordered_map> compute_cache; std::unordered_map> graphics_cache; -- cgit v1.2.3 From cffd4716c5ebf9b93505b5bfa96d9b407f349336 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 6 Jun 2021 00:11:36 -0300 Subject: vk_pipeline_cache,shader_notify: Add shader notifications --- .../renderer_vulkan/vk_compute_pipeline.cpp | 12 ++++- .../renderer_vulkan/vk_compute_pipeline.h | 7 ++- .../renderer_vulkan/vk_graphics_pipeline.cpp | 30 +++++++------ .../renderer_vulkan/vk_graphics_pipeline.h | 22 +++++----- .../renderer_vulkan/vk_pipeline_cache.cpp | 50 ++++++++++++--------- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 9 +++- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 2 +- src/video_core/shader_notify.cpp | 51 ++++++++++------------ src/video_core/shader_notify.h | 28 +++++++----- 9 files changed, 121 insertions(+), 90 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index ca59042ff..cc855a62e 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -14,6 +14,7 @@ #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" +#include "video_core/shader_notify.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -24,14 +25,18 @@ using Tegra::Texture::TexturePair; ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue_, - Common::ThreadWorker* thread_worker, const Shader::Info& info_, + Common::ThreadWorker* thread_worker, + VideoCore::ShaderNotify* shader_notify, const Shader::Info& info_, vk::ShaderModule spv_module_) : device{device_}, update_descriptor_queue{update_descriptor_queue_}, info{info_}, spv_module(std::move(spv_module_)) { + if (shader_notify) { + shader_notify->MarkShaderBuilding(); + } std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(), uniform_buffer_sizes.begin()); - auto func{[this, &descriptor_pool] { + auto func{[this, &descriptor_pool, shader_notify] { DescriptorLayoutBuilder builder{device.GetLogical()}; builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT); @@ -66,6 +71,9 @@ ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descript std::lock_guard lock{build_mutex}; is_built = true; build_condvar.notify_one(); + if (shader_notify) { + shader_notify->MarkShaderComplete(); + } }}; if (thread_worker) { thread_worker->QueueWork(std::move(func)); diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index a6043866d..52fec04d3 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -18,6 +18,10 @@ #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/vulkan_common/vulkan_wrapper.h" +namespace VideoCore { +class ShaderNotify; +} + namespace Vulkan { class Device; @@ -27,7 +31,8 @@ class ComputePipeline { public: explicit ComputePipeline(const Device& device, DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue, - Common::ThreadWorker* thread_worker, const Shader::Info& info, + Common::ThreadWorker* thread_worker, + VideoCore::ShaderNotify* shader_notify, const Shader::Info& info, vk::ShaderModule spv_module); ComputePipeline& operator=(ComputePipeline&&) noexcept = delete; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 627ca0158..5c916c869 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -17,6 +17,7 @@ #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" +#include "video_core/shader_notify.h" #include "video_core/vulkan_common/vulkan_device.h" #if defined(_MSC_VER) && defined(NDEBUG) @@ -203,30 +204,30 @@ ConfigureFuncPtr ConfigureFunc(const std::array& m } } // Anonymous namespace -GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, - Tegra::MemoryManager& gpu_memory_, VKScheduler& scheduler_, - BufferCache& buffer_cache_, TextureCache& texture_cache_, - const Device& device_, DescriptorPool& descriptor_pool, - VKUpdateDescriptorQueue& update_descriptor_queue_, - Common::ThreadWorker* worker_thread, - RenderPassCache& render_pass_cache, - const GraphicsPipelineCacheKey& key_, - std::array stages, - const std::array& infos) +GraphicsPipeline::GraphicsPipeline( + Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_, + VKScheduler& scheduler_, BufferCache& buffer_cache_, TextureCache& texture_cache_, + VideoCore::ShaderNotify* shader_notify, const Device& device_, DescriptorPool& descriptor_pool, + VKUpdateDescriptorQueue& update_descriptor_queue_, Common::ThreadWorker* worker_thread, + RenderPassCache& render_pass_cache, const GraphicsPipelineCacheKey& key_, + std::array stages, + const std::array& infos) : key{key_}, maxwell3d{maxwell3d_}, gpu_memory{gpu_memory_}, device{device_}, texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, scheduler{scheduler_}, update_descriptor_queue{update_descriptor_queue_}, spv_modules{std::move(stages)} { - std::ranges::transform(infos, stage_infos.begin(), - [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); + if (shader_notify) { + shader_notify->MarkShaderBuilding(); + } for (size_t stage = 0; stage < NUM_STAGES; ++stage) { const Shader::Info* const info{infos[stage]}; if (!info) { continue; } + stage_infos[stage] = *info; enabled_uniform_buffer_masks[stage] = info->constant_buffer_mask; std::ranges::copy(info->constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin()); } - auto func{[this, &render_pass_cache, &descriptor_pool] { + auto func{[this, shader_notify, &render_pass_cache, &descriptor_pool] { DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)}; descriptor_set_layout = builder.CreateDescriptorSetLayout(); descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, stage_infos); @@ -242,6 +243,9 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, std::lock_guard lock{build_mutex}; is_built = true; build_condvar.notify_one(); + if (shader_notify) { + shader_notify->MarkShaderComplete(); + } }}; if (worker_thread) { worker_thread->QueueWork(std::move(func)); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 3f8895927..40d1edabd 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -20,6 +20,10 @@ #include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/vulkan_common/vulkan_wrapper.h" +namespace VideoCore { +class ShaderNotify; +} + namespace Vulkan { struct GraphicsPipelineCacheKey { @@ -64,16 +68,14 @@ class GraphicsPipeline { static constexpr size_t NUM_STAGES = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage; public: - explicit GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d, - Tegra::MemoryManager& gpu_memory, VKScheduler& scheduler, - BufferCache& buffer_cache, TextureCache& texture_cache, - const Device& device, DescriptorPool& descriptor_pool, - VKUpdateDescriptorQueue& update_descriptor_queue, - Common::ThreadWorker* worker_thread, - RenderPassCache& render_pass_cache, - const GraphicsPipelineCacheKey& key, - std::array stages, - const std::array& infos); + explicit GraphicsPipeline( + Tegra::Engines::Maxwell3D& maxwell3d, Tegra::MemoryManager& gpu_memory, + VKScheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache, + VideoCore::ShaderNotify* shader_notify, const Device& device, + DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue, + Common::ThreadWorker* worker_thread, RenderPassCache& render_pass_cache, + const GraphicsPipelineCacheKey& key, std::array stages, + const std::array& infos); GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = delete; GraphicsPipeline(GraphicsPipeline&&) noexcept = delete; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 741ed1a98..e61d76490 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -235,11 +235,11 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw VKScheduler& scheduler_, DescriptorPool& descriptor_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_, RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_, - TextureCache& texture_cache_) + TextureCache& texture_cache_, VideoCore::ShaderNotify& shader_notify_) : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_}, device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, - buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, + buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, shader_notify{shader_notify_}, use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()}, workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:PipelineBuilder"), serialization_thread(1, "yuzu:PipelineSerialization") { @@ -307,19 +307,7 @@ GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { return BuiltPipeline(current_pipeline); } } - const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)}; - auto& pipeline{pair->second}; - if (is_new) { - pipeline = CreateGraphicsPipeline(); - } - if (!pipeline) { - return nullptr; - } - if (current_pipeline) { - current_pipeline->AddTransition(pipeline.get()); - } - current_pipeline = pipeline.get(); - return BuiltPipeline(current_pipeline); + return CurrentGraphicsPipelineSlowPath(); } ComputePipeline* PipelineCache::CurrentComputePipeline() { @@ -416,6 +404,22 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading workers.WaitForRequests(); } +GraphicsPipeline* PipelineCache::CurrentGraphicsPipelineSlowPath() { + const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)}; + auto& pipeline{pair->second}; + if (is_new) { + pipeline = CreateGraphicsPipeline(); + } + if (!pipeline) { + return nullptr; + } + if (current_pipeline) { + current_pipeline->AddTransition(pipeline.get()); + } + current_pipeline = pipeline.get(); + return BuiltPipeline(current_pipeline); +} + GraphicsPipeline* PipelineCache::BuiltPipeline(GraphicsPipeline* pipeline) const noexcept { if (pipeline->IsBuilt()) { return pipeline; @@ -484,14 +488,16 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( device.SaveShader(code); modules[stage_index] = BuildShader(device, code); if (device.HasDebuggingToolAttached()) { - const std::string name{fmt::format("{:016x}", key.unique_hashes[index])}; + const std::string name{fmt::format("Shader {:016x}", key.unique_hashes[index])}; modules[stage_index].SetObjectNameEXT(name.c_str()); } } Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; - return std::make_unique( - maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, device, descriptor_pool, - update_descriptor_queue, thread_worker, render_pass_cache, key, std::move(modules), infos); + VideoCore::ShaderNotify* const notify{build_in_parallel ? &shader_notify : nullptr}; + return std::make_unique(maxwell3d, gpu_memory, scheduler, buffer_cache, + texture_cache, notify, device, descriptor_pool, + update_descriptor_queue, thread_worker, + render_pass_cache, key, std::move(modules), infos); } catch (const Shader::Exception& exception) { LOG_ERROR(Render_Vulkan, "{}", exception.what()); @@ -550,12 +556,14 @@ std::unique_ptr PipelineCache::CreateComputePipeline( device.SaveShader(code); vk::ShaderModule spv_module{BuildShader(device, code)}; if (device.HasDebuggingToolAttached()) { - const auto name{fmt::format("{:016x}", key.unique_hash)}; + const auto name{fmt::format("Shader {:016x}", key.unique_hash)}; spv_module.SetObjectNameEXT(name.c_str()); } Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; + VideoCore::ShaderNotify* const notify{build_in_parallel ? &shader_notify : nullptr}; return std::make_unique(device, descriptor_pool, update_descriptor_queue, - thread_worker, program.info, std::move(spv_module)); + thread_worker, notify, program.info, + std::move(spv_module)); } catch (const Shader::Exception& exception) { LOG_ERROR(Render_Vulkan, "{}", exception.what()); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 869c63baf..167a2ee2e 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -38,6 +38,10 @@ namespace Shader::IR { struct Program; } +namespace VideoCore { +class ShaderNotify; +} + namespace Vulkan { using Maxwell = Tegra::Engines::Maxwell3D::Regs; @@ -104,7 +108,7 @@ public: VKScheduler& scheduler, DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue, RenderPassCache& render_pass_cache, BufferCache& buffer_cache, - TextureCache& texture_cache); + TextureCache& texture_cache, VideoCore::ShaderNotify& shader_notify_); ~PipelineCache(); [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline(); @@ -115,6 +119,8 @@ public: const VideoCore::DiskResourceLoadCallback& callback); private: + [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipelineSlowPath(); + [[nodiscard]] GraphicsPipeline* BuiltPipeline(GraphicsPipeline* pipeline) const noexcept; std::unique_ptr CreateGraphicsPipeline(); @@ -138,6 +144,7 @@ private: RenderPassCache& render_pass_cache; BufferCache& buffer_cache; TextureCache& texture_cache; + VideoCore::ShaderNotify& shader_notify; GraphicsPipelineCacheKey graphics_key{}; GraphicsPipeline* current_pipeline{}; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index e72f8426b..d284b3653 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -140,7 +140,7 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), pipeline_cache(*this, maxwell3d, kepler_compute, gpu_memory, device, scheduler, descriptor_pool, update_descriptor_queue, render_pass_cache, buffer_cache, - texture_cache), + texture_cache, gpu.ShaderNotify()), query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{ buffer_cache }, fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), wfi_event(device.GetLogical().CreateEvent()) { diff --git a/src/video_core/shader_notify.cpp b/src/video_core/shader_notify.cpp index 693e47158..dc6995b46 100644 --- a/src/video_core/shader_notify.cpp +++ b/src/video_core/shader_notify.cpp @@ -2,42 +2,35 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include +#include +#include +#include + #include "video_core/shader_notify.h" using namespace std::chrono_literals; namespace VideoCore { -namespace { -constexpr auto UPDATE_TICK = 32ms; -} - -ShaderNotify::ShaderNotify() = default; -ShaderNotify::~ShaderNotify() = default; -std::size_t ShaderNotify::GetShadersBuilding() { - const auto now = std::chrono::high_resolution_clock::now(); - const auto diff = now - last_update; - if (diff > UPDATE_TICK) { - std::shared_lock lock(mutex); - last_updated_count = accurate_count; +const auto TIME_TO_STOP_REPORTING = 2s; + +int ShaderNotify::ShadersBuilding() noexcept { + const int now_complete = num_complete.load(std::memory_order::relaxed); + const int now_building = num_building.load(std::memory_order::relaxed); + if (now_complete == now_building) { + const auto now = std::chrono::high_resolution_clock::now(); + if (completed && num_complete == num_when_completed) { + if (now - complete_time > TIME_TO_STOP_REPORTING) { + report_base = now_complete; + completed = false; + } + } else { + completed = true; + num_when_completed = num_complete; + complete_time = now; + } } - return last_updated_count; -} - -std::size_t ShaderNotify::GetShadersBuildingAccurate() { - std::shared_lock lock{mutex}; - return accurate_count; -} - -void ShaderNotify::MarkShaderComplete() { - std::unique_lock lock{mutex}; - accurate_count--; -} - -void ShaderNotify::MarkSharderBuilding() { - std::unique_lock lock{mutex}; - accurate_count++; + return now_building - report_base; } } // namespace VideoCore diff --git a/src/video_core/shader_notify.h b/src/video_core/shader_notify.h index a9c92d179..ad363bfb5 100644 --- a/src/video_core/shader_notify.h +++ b/src/video_core/shader_notify.h @@ -4,26 +4,30 @@ #pragma once +#include #include -#include -#include "common/common_types.h" +#include namespace VideoCore { class ShaderNotify { public: - ShaderNotify(); - ~ShaderNotify(); + [[nodiscard]] int ShadersBuilding() noexcept; - std::size_t GetShadersBuilding(); - std::size_t GetShadersBuildingAccurate(); + void MarkShaderComplete() noexcept { + ++num_complete; + } - void MarkShaderComplete(); - void MarkSharderBuilding(); + void MarkShaderBuilding() noexcept { + ++num_building; + } private: - std::size_t last_updated_count{}; - std::size_t accurate_count{}; - std::shared_mutex mutex; - std::chrono::high_resolution_clock::time_point last_update{}; + std::atomic_int num_building{}; + std::atomic_int num_complete{}; + int report_base{}; + + bool completed{}; + int num_when_completed{}; + std::chrono::high_resolution_clock::time_point complete_time; }; } // namespace VideoCore -- cgit v1.2.3 From 12fe7210d2b546bd9c5825b6517b80efc818a7fe Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 6 Jun 2021 00:35:57 -0300 Subject: gl_shader_cache: Store workers in shader cache object --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 129 ++++++++++++--------- src/video_core/renderer_opengl/gl_shader_cache.h | 7 ++ 2 files changed, 78 insertions(+), 58 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 0e4904733..9d6cef6e8 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -239,6 +239,15 @@ void SetXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell& regs } } // Anonymous namespace +struct ShaderCache::Context { + explicit Context(Core::Frontend::EmuWindow& emu_window) + : gl_context{emu_window.CreateSharedContext()}, scoped{*gl_context} {} + + std::unique_ptr gl_context; + Core::Frontend::GraphicsContext::Scoped scoped; + ShaderPools pools; +}; + ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::Engines::KeplerCompute& kepler_compute_, @@ -247,46 +256,49 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo ProgramManager& program_manager_, StateTracker& state_tracker_) : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_}, emu_window{emu_window_}, device{device_}, texture_cache{texture_cache_}, - buffer_cache{buffer_cache_}, program_manager{program_manager_}, state_tracker{ - state_tracker_} { - profile = Shader::Profile{ - .supported_spirv = 0x00010000, - - .unified_descriptor_binding = false, - .support_descriptor_aliasing = false, - .support_int8 = false, - .support_int16 = false, - .support_vertex_instance_id = true, - .support_float_controls = false, - .support_separate_denorm_behavior = false, - .support_separate_rounding_mode = false, - .support_fp16_denorm_preserve = false, - .support_fp32_denorm_preserve = false, - .support_fp16_denorm_flush = false, - .support_fp32_denorm_flush = false, - .support_fp16_signed_zero_nan_preserve = false, - .support_fp32_signed_zero_nan_preserve = false, - .support_fp64_signed_zero_nan_preserve = false, - .support_explicit_workgroup_layout = false, - .support_vote = true, - .support_viewport_index_layer_non_geometry = - device.HasNvViewportArray2() || device.HasVertexViewportLayer(), - .support_viewport_mask = device.HasNvViewportArray2(), - .support_typeless_image_loads = device.HasImageLoadFormatted(), - .support_demote_to_helper_invocation = false, - .support_int64_atomics = false, - .support_derivative_control = device.HasDerivativeControl(), - - .warp_size_potentially_larger_than_guest = true, - - .lower_left_origin_mode = true, - .need_declared_frag_colors = true, - - .has_broken_spirv_clamp = true, - .has_broken_unsigned_image_offsets = true, - .has_broken_signed_operations = true, - .ignore_nan_fp_comparisons = true, - }; + buffer_cache{buffer_cache_}, program_manager{program_manager_}, state_tracker{state_tracker_}, + use_asynchronous_shaders{device.UseAsynchronousShaders()}, + profile{ + .supported_spirv = 0x00010000, + + .unified_descriptor_binding = false, + .support_descriptor_aliasing = false, + .support_int8 = false, + .support_int16 = false, + .support_vertex_instance_id = true, + .support_float_controls = false, + .support_separate_denorm_behavior = false, + .support_separate_rounding_mode = false, + .support_fp16_denorm_preserve = false, + .support_fp32_denorm_preserve = false, + .support_fp16_denorm_flush = false, + .support_fp32_denorm_flush = false, + .support_fp16_signed_zero_nan_preserve = false, + .support_fp32_signed_zero_nan_preserve = false, + .support_fp64_signed_zero_nan_preserve = false, + .support_explicit_workgroup_layout = false, + .support_vote = true, + .support_viewport_index_layer_non_geometry = + device.HasNvViewportArray2() || device.HasVertexViewportLayer(), + .support_viewport_mask = device.HasNvViewportArray2(), + .support_typeless_image_loads = device.HasImageLoadFormatted(), + .support_demote_to_helper_invocation = false, + .support_int64_atomics = false, + .support_derivative_control = device.HasDerivativeControl(), + + .warp_size_potentially_larger_than_guest = true, + + .lower_left_origin_mode = true, + .need_declared_frag_colors = true, + + .has_broken_spirv_clamp = true, + .has_broken_unsigned_image_offsets = true, + .has_broken_signed_operations = true, + .ignore_nan_fp_comparisons = true, + } { + if (use_asynchronous_shaders) { + workers = CreateWorkers(); + } } ShaderCache::~ShaderCache() = default; @@ -307,29 +319,20 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, } shader_cache_filename = transferable_dir / fmt::format("{:016x}.bin", title_id); - struct Context { - explicit Context(Core::Frontend::EmuWindow& emu_window) - : gl_context{emu_window.CreateSharedContext()}, scoped{*gl_context} {} - - std::unique_ptr gl_context; - Core::Frontend::GraphicsContext::Scoped scoped; - ShaderPools pools; - }; - Common::StatefulThreadWorker workers( - std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:ShaderBuilder", - [this] { return Context{emu_window}; }); - + if (!workers) { + workers = CreateWorkers(); + } struct { std::mutex mutex; - size_t total{0}; - size_t built{0}; - bool has_loaded{false}; + size_t total{}; + size_t built{}; + bool has_loaded{}; } state; const auto load_compute{[&](std::ifstream& file, FileEnvironment env) { ComputePipelineKey key; file.read(reinterpret_cast(&key), sizeof(key)); - workers.QueueWork( + workers->QueueWork( [this, key, env = std::move(env), &state, &callback](Context* ctx) mutable { ctx->pools.ReleaseContents(); auto pipeline{CreateComputePipeline(ctx->pools, key, env)}; @@ -347,7 +350,7 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, const auto load_graphics{[&](std::ifstream& file, std::vector envs) { GraphicsPipelineKey key; file.read(reinterpret_cast(&key), sizeof(key)); - workers.QueueWork( + workers->QueueWork( [this, key, envs = std::move(envs), &state, &callback](Context* ctx) mutable { boost::container::static_vector env_ptrs; for (auto& env : envs) { @@ -373,7 +376,10 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, state.has_loaded = true; lock.unlock(); - workers.WaitForRequests(); + workers->WaitForRequests(); + if (!use_asynchronous_shaders) { + workers.reset(); + } } GraphicsPipeline* ShaderCache::CurrentGraphicsPipeline() { @@ -570,4 +576,11 @@ std::unique_ptr ShaderCache::CreateComputePipeline(ShaderPools& return nullptr; } +std::unique_ptr> ShaderCache::CreateWorkers() + const { + return std::make_unique>( + std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:ShaderBuilder", + [this] { return Context{emu_window}; }); +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index cf74d34e4..e0c5a06d8 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -12,6 +12,7 @@ #include #include "common/common_types.h" +#include "common/thread_worker.h" #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" @@ -44,6 +45,8 @@ struct ShaderPools { }; class ShaderCache : public VideoCommon::ShaderCache { + struct Context; + public: explicit ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, Tegra::Engines::Maxwell3D& maxwell3d_, @@ -74,6 +77,8 @@ private: const ComputePipelineKey& key, Shader::Environment& env); + std::unique_ptr> CreateWorkers() const; + Core::Frontend::EmuWindow& emu_window; const Device& device; TextureCache& texture_cache; @@ -82,6 +87,7 @@ private: StateTracker& state_tracker; GraphicsPipelineKey graphics_key{}; + const bool use_asynchronous_shaders; ShaderPools main_pools; std::unordered_map> graphics_cache; @@ -89,6 +95,7 @@ private: Shader::Profile profile; std::filesystem::path shader_cache_filename; + std::unique_ptr> workers; }; } // namespace OpenGL -- cgit v1.2.3 From b1ed64ac18fe7b5fc89abe06442527d8c440ddc7 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 6 Jun 2021 01:28:22 -0300 Subject: gl_shader_util: Move shader utility code to a separate file --- .../renderer_opengl/gl_resource_manager.cpp | 27 ----- .../renderer_opengl/gl_resource_manager.h | 14 --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 81 +------------- src/video_core/renderer_opengl/gl_shader_util.cpp | 117 ++++++++++++++------- src/video_core/renderer_opengl/gl_shader_util.h | 89 ++-------------- src/video_core/renderer_opengl/renderer_opengl.cpp | 12 +-- src/video_core/renderer_opengl/util_shaders.cpp | 11 +- 7 files changed, 106 insertions(+), 245 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp index 3428e5e21..8695c29e3 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.cpp +++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp @@ -83,18 +83,6 @@ void OGLSampler::Release() { handle = 0; } -void OGLShader::Create(std::string_view source, GLenum type) { - if (handle != 0) { - return; - } - if (source.empty()) { - return; - } - - MICROPROFILE_SCOPE(OpenGL_ResourceCreation); - handle = GLShader::LoadShader(source, type); -} - void OGLShader::Release() { if (handle == 0) return; @@ -104,21 +92,6 @@ void OGLShader::Release() { handle = 0; } -void OGLProgram::CreateFromSource(const char* vert_shader, const char* geo_shader, - const char* frag_shader, bool separable_program, - bool hint_retrievable) { - OGLShader vert, geo, frag; - if (vert_shader) - vert.Create(vert_shader, GL_VERTEX_SHADER); - if (geo_shader) - geo.Create(geo_shader, GL_GEOMETRY_SHADER); - if (frag_shader) - frag.Create(frag_shader, GL_FRAGMENT_SHADER); - - MICROPROFILE_SCOPE(OpenGL_ResourceCreation); - Create(separable_program, hint_retrievable, vert.handle, geo.handle, frag.handle); -} - void OGLProgram::Release() { if (handle == 0) return; diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h index 552d79db4..b2d5bfd3b 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.h +++ b/src/video_core/renderer_opengl/gl_resource_manager.h @@ -8,7 +8,6 @@ #include #include #include "common/common_types.h" -#include "video_core/renderer_opengl/gl_shader_util.h" namespace OpenGL { @@ -128,8 +127,6 @@ public: return *this; } - void Create(std::string_view source, GLenum type); - void Release(); GLuint handle = 0; @@ -151,17 +148,6 @@ public: return *this; } - template - void Create(bool separable_program, bool hint_retrievable, T... shaders) { - if (handle != 0) - return; - handle = GLShader::LoadProgram(separable_program, hint_retrievable, shaders...); - } - - /// Creates a new internal OpenGL resource and stores the handle - void CreateFromSource(const char* vert_shader, const char* geo_shader, const char* frag_shader, - bool separable_program = false, bool hint_retrievable = false); - /// Deletes the internal OpenGL resource void Release(); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 9d6cef6e8..da0b36368 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -31,6 +31,7 @@ #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_cache.h" +#include "video_core/renderer_opengl/gl_shader_util.h" #include "video_core/renderer_opengl/gl_state_tracker.h" #include "video_core/shader_cache.h" #include "video_core/shader_environment.h" @@ -53,77 +54,6 @@ auto MakeSpan(Container& container) { return std::span(container.data(), container.size()); } -void AddShader(GLenum stage, GLuint program, std::span code) { - OGLShader shader; - shader.handle = glCreateShader(stage); - - glShaderBinary(1, &shader.handle, GL_SHADER_BINARY_FORMAT_SPIR_V_ARB, code.data(), - static_cast(code.size_bytes())); - glSpecializeShader(shader.handle, "main", 0, nullptr, nullptr); - glAttachShader(program, shader.handle); - if (!Settings::values.renderer_debug) { - return; - } - GLint shader_status{}; - glGetShaderiv(shader.handle, GL_COMPILE_STATUS, &shader_status); - if (shader_status == GL_FALSE) { - LOG_ERROR(Render_OpenGL, "Failed to build shader"); - } - GLint log_length{}; - glGetShaderiv(shader.handle, GL_INFO_LOG_LENGTH, &log_length); - if (log_length == 0) { - return; - } - std::string log(log_length, 0); - glGetShaderInfoLog(shader.handle, log_length, nullptr, log.data()); - if (shader_status == GL_FALSE) { - LOG_ERROR(Render_OpenGL, "{}", log); - } else { - LOG_WARNING(Render_OpenGL, "{}", log); - } -} - -void LinkProgram(GLuint program) { - glLinkProgram(program); - if (!Settings::values.renderer_debug) { - return; - } - GLint link_status{}; - glGetProgramiv(program, GL_LINK_STATUS, &link_status); - - GLint log_length{}; - glGetProgramiv(program, GL_INFO_LOG_LENGTH, &log_length); - if (log_length == 0) { - return; - } - std::string log(log_length, 0); - glGetProgramInfoLog(program, log_length, nullptr, log.data()); - if (link_status == GL_FALSE) { - LOG_ERROR(Render_OpenGL, "{}", log); - } else { - LOG_WARNING(Render_OpenGL, "{}", log); - } -} - -OGLAssemblyProgram CompileProgram(std::string_view code, GLenum target) { - OGLAssemblyProgram program; - glGenProgramsARB(1, &program.handle); - glNamedProgramStringEXT(program.handle, target, GL_PROGRAM_FORMAT_ASCII_ARB, - static_cast(code.size()), code.data()); - if (Settings::values.renderer_debug) { - const auto err = reinterpret_cast(glGetString(GL_PROGRAM_ERROR_STRING_NV)); - if (err && *err) { - if (std::strstr(err, "error")) { - LOG_CRITICAL(Render_OpenGL, "\n{}", err); - LOG_INFO(Render_OpenGL, "\n{}", code); - } else { - LOG_WARNING(Render_OpenGL, "\n{}", err); - } - } - } - return program; -} - GLenum Stage(size_t stage_index) { switch (stage_index) { case 0: @@ -492,9 +422,8 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( if (!device.UseAssemblyShaders()) { source_program.handle = glCreateProgram(); } - - for (size_t index = uses_vertex_a && uses_vertex_b ? 1 : 0; index < Maxwell::MaxShaderProgram; - ++index) { + const size_t first_index = uses_vertex_a && uses_vertex_b ? 1 : 0; + for (size_t index = first_index; index < Maxwell::MaxShaderProgram; ++index) { if (key.unique_hashes[index] == 0) { continue; } @@ -510,7 +439,7 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( assembly_programs[stage_index] = CompileProgram(code, AssemblyStage(stage_index)); } else { const std::vector code{EmitSPIRV(profile, runtime_info, program, binding)}; - AddShader(Stage(stage_index), source_program.handle, code); + AttachShader(Stage(stage_index), source_program.handle, code); } } if (!device.UseAssemblyShaders()) { @@ -565,7 +494,7 @@ std::unique_ptr ShaderCache::CreateComputePipeline(ShaderPools& } else { const std::vector code{EmitSPIRV(profile, program)}; source_program.handle = glCreateProgram(); - AddShader(GL_COMPUTE_SHADER, source_program.handle, code); + AttachShader(GL_COMPUTE_SHADER, source_program.handle, code); LinkProgram(source_program.handle); } return std::make_unique(device, texture_cache, buffer_cache, gpu_memory, diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp index 4bf0d6090..99cb81819 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.cpp +++ b/src/video_core/renderer_opengl/gl_shader_util.cpp @@ -5,57 +5,100 @@ #include #include #include + #include "common/assert.h" #include "common/logging/log.h" +#include "common/settings.h" #include "video_core/renderer_opengl/gl_shader_util.h" -namespace OpenGL::GLShader { - -namespace { +namespace OpenGL { -std::string_view StageDebugName(GLenum type) { - switch (type) { - case GL_VERTEX_SHADER: - return "vertex"; - case GL_GEOMETRY_SHADER: - return "geometry"; - case GL_FRAGMENT_SHADER: - return "fragment"; - case GL_COMPUTE_SHADER: - return "compute"; +static void LogShader(GLuint shader) { + GLint shader_status{}; + glGetShaderiv(shader, GL_COMPILE_STATUS, &shader_status); + if (shader_status == GL_FALSE) { + LOG_ERROR(Render_OpenGL, "Failed to build shader"); + } + GLint log_length{}; + glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &log_length); + if (log_length == 0) { + return; + } + std::string log(log_length, 0); + glGetShaderInfoLog(shader, log_length, nullptr, log.data()); + if (shader_status == GL_FALSE) { + LOG_ERROR(Render_OpenGL, "{}", log); + } else { + LOG_WARNING(Render_OpenGL, "{}", log); } - UNIMPLEMENTED(); - return "unknown"; } -} // Anonymous namespace +void AttachShader(GLenum stage, GLuint program, std::string_view code) { + OGLShader shader; + shader.handle = glCreateShader(stage); -GLuint LoadShader(std::string_view source, GLenum type) { - const std::string_view debug_type = StageDebugName(type); - const GLuint shader_id = glCreateShader(type); + const GLint length = static_cast(code.size()); + const GLchar* const code_ptr = code.data(); + glShaderSource(shader.handle, 1, &code_ptr, &length); + glCompileShader(shader.handle); + glAttachShader(program, shader.handle); + if (Settings::values.renderer_debug) { + LogShader(shader.handle); + } +} - const GLchar* source_string = source.data(); - const GLint source_length = static_cast(source.size()); +void AttachShader(GLenum stage, GLuint program, std::span code) { + OGLShader shader; + shader.handle = glCreateShader(stage); - glShaderSource(shader_id, 1, &source_string, &source_length); - LOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type); - glCompileShader(shader_id); + glShaderBinary(1, &shader.handle, GL_SHADER_BINARY_FORMAT_SPIR_V_ARB, code.data(), + static_cast(code.size_bytes())); + glSpecializeShader(shader.handle, "main", 0, nullptr, nullptr); + glAttachShader(program, shader.handle); + if (Settings::values.renderer_debug) { + LogShader(shader.handle); + } +} + +void LinkProgram(GLuint program) { + glLinkProgram(program); + if (!Settings::values.renderer_debug) { + return; + } + GLint link_status{}; + glGetProgramiv(program, GL_LINK_STATUS, &link_status); - GLint result = GL_FALSE; - GLint info_log_length; - glGetShaderiv(shader_id, GL_COMPILE_STATUS, &result); - glGetShaderiv(shader_id, GL_INFO_LOG_LENGTH, &info_log_length); + GLint log_length{}; + glGetProgramiv(program, GL_INFO_LOG_LENGTH, &log_length); + if (log_length == 0) { + return; + } + std::string log(log_length, 0); + glGetProgramInfoLog(program, log_length, nullptr, log.data()); + if (link_status == GL_FALSE) { + LOG_ERROR(Render_OpenGL, "{}", log); + } else { + LOG_WARNING(Render_OpenGL, "{}", log); + } +} - if (info_log_length > 1) { - std::string shader_error(info_log_length, ' '); - glGetShaderInfoLog(shader_id, info_log_length, nullptr, &shader_error[0]); - if (result == GL_TRUE) { - LOG_DEBUG(Render_OpenGL, "{}", shader_error); - } else { - LOG_ERROR(Render_OpenGL, "Error compiling {} shader:\n{}", debug_type, shader_error); +OGLAssemblyProgram CompileProgram(std::string_view code, GLenum target) { + OGLAssemblyProgram program; + glGenProgramsARB(1, &program.handle); + glNamedProgramStringEXT(program.handle, target, GL_PROGRAM_FORMAT_ASCII_ARB, + static_cast(code.size()), code.data()); + if (Settings::values.renderer_debug) { + const auto err = reinterpret_cast(glGetString(GL_PROGRAM_ERROR_STRING_NV)); + if (err && *err) { + if (std::strstr(err, "error")) { + LOG_CRITICAL(Render_OpenGL, "\n{}", err); + LOG_INFO(Render_OpenGL, "\n{}", code); + } else { + LOG_WARNING(Render_OpenGL, "\n{}", err); + } } } - return shader_id; + return program; } -} // namespace OpenGL::GLShader +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h index 1b770532e..ff5aa024f 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.h +++ b/src/video_core/renderer_opengl/gl_shader_util.h @@ -4,92 +4,25 @@ #pragma once +#include #include +#include #include + #include + #include "common/assert.h" #include "common/logging/log.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" -namespace OpenGL::GLShader { - -/** - * Utility function to log the source code of a list of shaders. - * @param shaders The OpenGL shaders whose source we will print. - */ -template -void LogShaderSource(T... shaders) { - auto shader_list = {shaders...}; - - for (const auto& shader : shader_list) { - if (shader == 0) - continue; - - GLint source_length; - glGetShaderiv(shader, GL_SHADER_SOURCE_LENGTH, &source_length); - - std::string source(source_length, ' '); - glGetShaderSource(shader, source_length, nullptr, &source[0]); - LOG_INFO(Render_OpenGL, "Shader source {}", source); - } -} - -/** - * Utility function to create and compile an OpenGL GLSL shader - * @param source String of the GLSL shader program - * @param type Type of the shader (GL_VERTEX_SHADER, GL_GEOMETRY_SHADER or GL_FRAGMENT_SHADER) - */ -GLuint LoadShader(std::string_view source, GLenum type); - -/** - * Utility function to create and compile an OpenGL GLSL shader program (vertex + fragment shader) - * @param separable_program whether to create a separable program - * @param shaders ID of shaders to attach to the program - * @returns Handle of the newly created OpenGL program object - */ -template -GLuint LoadProgram(bool separable_program, bool hint_retrievable, T... shaders) { - // Link the program - LOG_DEBUG(Render_OpenGL, "Linking program..."); - - GLuint program_id = glCreateProgram(); - - ((shaders == 0 ? (void)0 : glAttachShader(program_id, shaders)), ...); - - if (separable_program) { - glProgramParameteri(program_id, GL_PROGRAM_SEPARABLE, GL_TRUE); - } - if (hint_retrievable) { - glProgramParameteri(program_id, GL_PROGRAM_BINARY_RETRIEVABLE_HINT, GL_TRUE); - } - - glLinkProgram(program_id); - - // Check the program - GLint result = GL_FALSE; - GLint info_log_length; - glGetProgramiv(program_id, GL_LINK_STATUS, &result); - glGetProgramiv(program_id, GL_INFO_LOG_LENGTH, &info_log_length); - - if (info_log_length > 1) { - std::string program_error(info_log_length, ' '); - glGetProgramInfoLog(program_id, info_log_length, nullptr, &program_error[0]); - if (result == GL_TRUE) { - LOG_DEBUG(Render_OpenGL, "{}", program_error); - } else { - LOG_ERROR(Render_OpenGL, "Error linking shader:\n{}", program_error); - } - } +namespace OpenGL { - if (result == GL_FALSE) { - // There was a problem linking the shader, print the source for debugging purposes. - LogShaderSource(shaders...); - } +void AttachShader(GLenum stage, GLuint program, std::string_view code); - ASSERT_MSG(result == GL_TRUE, "Shader not linked"); +void AttachShader(GLenum stage, GLuint program, std::span code); - ((shaders == 0 ? (void)0 : glDetachShader(program_id, shaders)), ...); +void LinkProgram(GLuint program); - return program_id; -} +OGLAssemblyProgram CompileProgram(std::string_view code, GLenum target); -} // namespace OpenGL::GLShader +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index a4805f3da..b8777643b 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -24,6 +24,7 @@ #include "video_core/host_shaders/opengl_present_frag.h" #include "video_core/host_shaders/opengl_present_vert.h" #include "video_core/renderer_opengl/gl_rasterizer.h" +#include "video_core/renderer_opengl/gl_shader_util.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/renderer_opengl.h" #include "video_core/textures/decoders.h" @@ -230,13 +231,10 @@ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color void RendererOpenGL::InitOpenGLObjects() { // Create shader programs - OGLShader vertex_shader; - vertex_shader.Create(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER); - - OGLShader fragment_shader; - fragment_shader.Create(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER); - - present_program.Create(false, false, vertex_shader.handle, fragment_shader.handle); + present_program.handle = glCreateProgram(); + AttachShader(GL_VERTEX_SHADER, present_program.handle, HostShaders::OPENGL_PRESENT_VERT); + AttachShader(GL_FRAGMENT_SHADER, present_program.handle, HostShaders::OPENGL_PRESENT_FRAG); + LinkProgram(present_program.handle); // Generate presentation sampler present_sampler.Create(); diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index 51e72b705..8aa0683c8 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp @@ -17,6 +17,7 @@ #include "video_core/host_shaders/opengl_copy_bgra_comp.h" #include "video_core/host_shaders/pitch_unswizzle_comp.h" #include "video_core/renderer_opengl/gl_shader_manager.h" +#include "video_core/renderer_opengl/gl_shader_util.h" #include "video_core/renderer_opengl/gl_texture_cache.h" #include "video_core/renderer_opengl/util_shaders.h" #include "video_core/texture_cache/accelerated_swizzle.h" @@ -40,13 +41,12 @@ using VideoCommon::Accelerated::MakeBlockLinearSwizzle3DParams; using VideoCore::Surface::BytesPerBlock; namespace { - OGLProgram MakeProgram(std::string_view source) { - OGLShader shader; - shader.Create(source, GL_COMPUTE_SHADER); - OGLProgram program; - program.Create(true, false, shader.handle); + OGLShader shader; + program.handle = glCreateProgram(); + AttachShader(GL_COMPUTE_SHADER, program.handle, source); + LinkProgram(program.handle); return program; } @@ -54,7 +54,6 @@ size_t NumPixelsInCopy(const VideoCommon::ImageCopy& copy) { return static_cast(copy.extent.width * copy.extent.height * copy.src_subresource.num_layers); } - } // Anonymous namespace UtilShaders::UtilShaders(ProgramManager& program_manager_) -- cgit v1.2.3 From 7eaa74ad235b669608debaf3583af94bd675b6c6 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 7 Jun 2021 20:43:00 -0300 Subject: gl_texture_cache: Create image storage views Fixes SULD.D tests. --- .../renderer_opengl/gl_compute_pipeline.cpp | 5 +- .../renderer_opengl/gl_graphics_pipeline.cpp | 5 +- .../renderer_opengl/gl_texture_cache.cpp | 132 +++++++++++++++------ src/video_core/renderer_opengl/gl_texture_cache.h | 22 +++- 4 files changed, 126 insertions(+), 38 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp index 61b6fe4b7..a40106c87 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp @@ -178,7 +178,10 @@ void ComputePipeline::Configure() { for (const auto& desc : info.image_descriptors) { for (u32 index = 0; index < desc.count; ++index) { ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; - images[image_binding++] = image_view.Handle(desc.type); + if (desc.is_written) { + texture_cache.MarkModification(image_view.image_id); + } + images[image_binding++] = image_view.StorageView(desc.type, desc.format); } } if (texture_binding != 0) { diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index a5d65fdca..a2ea35d5a 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -332,7 +332,10 @@ void GraphicsPipeline::Configure(bool is_indexed) { for (const auto& desc : info.image_descriptors) { for (u32 index = 0; index < desc.count; ++index) { ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; - images[image_binding++] = image_view.Handle(desc.type); + if (desc.is_written) { + texture_cache.MarkModification(image_view.image_id); + } + images[image_binding++] = image_view.StorageView(desc.type, desc.format); } } }}; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 7053be161..c373c9cb4 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -328,6 +328,28 @@ void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) { } } +[[nodiscard]] GLenum ShaderFormat(Shader::ImageFormat format) { + switch (format) { + case Shader::ImageFormat::Typeless: + break; + case Shader::ImageFormat::R8_SINT: + return GL_R8I; + case Shader::ImageFormat::R8_UINT: + return GL_R8UI; + case Shader::ImageFormat::R16_UINT: + return GL_R16UI; + case Shader::ImageFormat::R16_SINT: + return GL_R16I; + case Shader::ImageFormat::R32_UINT: + return GL_R32UI; + case Shader::ImageFormat::R32G32_UINT: + return GL_RG32UI; + case Shader::ImageFormat::R32G32B32A32_UINT: + return GL_RGBA32UI; + } + UNREACHABLE_MSG("Invalid image format={}", format); + return GL_R32UI; +} } // Anonymous namespace ImageBufferMap::~ImageBufferMap() { @@ -837,21 +859,28 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI } else { internal_format = MaxwellToGL::GetFormatTuple(format).internal_format; } - VideoCommon::SubresourceRange flatten_range = info.range; - std::array handles; - stored_views.reserve(2); - + full_range = info.range; + flat_range = info.range; + set_object_label = device.HasDebuggingToolAttached(); + is_render_target = info.IsRenderTarget(); + original_texture = image.texture.handle; + num_samples = image.info.num_samples; + if (!is_render_target) { + swizzle[0] = info.x_source; + swizzle[1] = info.y_source; + swizzle[2] = info.z_source; + swizzle[3] = info.w_source; + } switch (info.type) { case ImageViewType::e1DArray: - flatten_range.extent.layers = 1; + flat_range.extent.layers = 1; [[fallthrough]]; case ImageViewType::e1D: - glGenTextures(2, handles.data()); - SetupView(device, image, Shader::TextureType::Color1D, handles[0], info, flatten_range); - SetupView(device, image, Shader::TextureType::ColorArray1D, handles[1], info, info.range); + SetupView(Shader::TextureType::Color1D); + SetupView(Shader::TextureType::ColorArray1D); break; case ImageViewType::e2DArray: - flatten_range.extent.layers = 1; + flat_range.extent.layers = 1; [[fallthrough]]; case ImageViewType::e2D: if (True(flags & VideoCommon::ImageViewFlagBits::Slice)) { @@ -861,26 +890,23 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI .base = {.level = info.range.base.level, .layer = 0}, .extent = {.levels = 1, .layers = 1}, }; - glGenTextures(1, handles.data()); - SetupView(device, image, Shader::TextureType::Color3D, handles[0], info, slice_range); + full_range = slice_range; + + SetupView(Shader::TextureType::Color3D); } else { - glGenTextures(2, handles.data()); - SetupView(device, image, Shader::TextureType::Color2D, handles[0], info, flatten_range); - SetupView(device, image, Shader::TextureType::ColorArray2D, handles[1], info, - info.range); + SetupView(Shader::TextureType::Color2D); + SetupView(Shader::TextureType::ColorArray2D); } break; case ImageViewType::e3D: - glGenTextures(1, handles.data()); - SetupView(device, image, Shader::TextureType::Color3D, handles[0], info, info.range); + SetupView(Shader::TextureType::Color3D); break; case ImageViewType::CubeArray: - flatten_range.extent.layers = 6; + flat_range.extent.layers = 6; [[fallthrough]]; case ImageViewType::Cube: - glGenTextures(2, handles.data()); - SetupView(device, image, Shader::TextureType::ColorCube, handles[0], info, flatten_range); - SetupView(device, image, Shader::TextureType::ColorArrayCube, handles[1], info, info.range); + SetupView(Shader::TextureType::ColorCube); + SetupView(Shader::TextureType::ColorArrayCube); break; case ImageViewType::Rect: UNIMPLEMENTED(); @@ -928,22 +954,62 @@ ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageParams& params) : VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {} -void ImageView::SetupView(const Device& device, Image& image, Shader::TextureType view_type, - GLuint handle, const VideoCommon::ImageViewInfo& info, - VideoCommon::SubresourceRange view_range) { - const GLuint parent = image.texture.handle; - const GLenum target = ImageTarget(view_type, image.info.num_samples); - glTextureView(handle, target, parent, internal_format, view_range.base.level, +GLuint ImageView::StorageView(Shader::TextureType texture_type, Shader::ImageFormat image_format) { + if (image_format == Shader::ImageFormat::Typeless) { + return Handle(texture_type); + } + const bool is_signed{image_format == Shader::ImageFormat::R8_SINT || + image_format == Shader::ImageFormat::R16_SINT}; + if (!storage_views) { + storage_views = std::make_unique(); + } + auto& type_views{is_signed ? storage_views->signeds : storage_views->unsigneds}; + GLuint& view{type_views[static_cast(texture_type)]}; + if (view == 0) { + view = MakeView(texture_type, ShaderFormat(image_format)); + } + return view; +} + +void ImageView::SetupView(Shader::TextureType view_type) { + views[static_cast(view_type)] = MakeView(view_type, internal_format); +} + +GLuint ImageView::MakeView(Shader::TextureType view_type, GLenum view_format) { + VideoCommon::SubresourceRange view_range; + switch (view_type) { + case Shader::TextureType::Color1D: + case Shader::TextureType::Color2D: + case Shader::TextureType::ColorCube: + view_range = flat_range; + break; + case Shader::TextureType::ColorArray1D: + case Shader::TextureType::ColorArray2D: + case Shader::TextureType::Color3D: + case Shader::TextureType::ColorArrayCube: + view_range = full_range; + break; + default: + UNREACHABLE(); + } + OGLTextureView& view = stored_views.emplace_back(); + view.Create(); + + const GLenum target = ImageTarget(view_type, num_samples); + glTextureView(view.handle, target, original_texture, view_format, view_range.base.level, view_range.extent.levels, view_range.base.layer, view_range.extent.layers); - if (!info.IsRenderTarget()) { - ApplySwizzle(handle, format, info.Swizzle()); + if (!is_render_target) { + std::array casted_swizzle; + std::ranges::transform(swizzle, casted_swizzle.begin(), [](u8 component_swizzle) { + return static_cast(component_swizzle); + }); + ApplySwizzle(view.handle, format, casted_swizzle); } - if (device.HasDebuggingToolAttached()) { + if (set_object_label) { const std::string name = VideoCommon::Name(*this); - glObjectLabel(GL_TEXTURE, handle, static_cast(name.size()), name.data()); + glObjectLabel(GL_TEXTURE, view.handle, static_cast(name.size()), name.data()); } - stored_views.emplace_back().handle = handle; - views[static_cast(view_type)] = handle; + return view.handle; } Sampler::Sampler(TextureCacheRuntime& runtime, const TSCEntry& config) { diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 2e3e02b79..921072ebe 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -185,6 +185,9 @@ public: const VideoCommon::ImageViewInfo& view_info); explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&); + [[nodiscard]] GLuint StorageView(Shader::TextureType texture_type, + Shader::ImageFormat image_format); + [[nodiscard]] GLuint Handle(Shader::TextureType handle_type) const noexcept { return views[static_cast(handle_type)]; } @@ -206,16 +209,29 @@ public: } private: - void SetupView(const Device& device, Image& image, Shader::TextureType view_type, GLuint handle, - const VideoCommon::ImageViewInfo& info, - VideoCommon::SubresourceRange view_range); + struct StorageViews { + std::array signeds{}; + std::array unsigneds{}; + }; + + void SetupView(Shader::TextureType view_type); + + GLuint MakeView(Shader::TextureType view_type, GLenum view_format); std::array views{}; std::vector stored_views; + std::unique_ptr storage_views; GLenum internal_format = GL_NONE; GLuint default_handle = 0; GPUVAddr gpu_addr = 0; u32 buffer_size = 0; + GLuint original_texture = 0; + int num_samples = 0; + VideoCommon::SubresourceRange flat_range; + VideoCommon::SubresourceRange full_range; + std::array swizzle{}; + bool set_object_label = false; + bool is_render_target = false; }; class ImageAlloc : public VideoCommon::ImageAllocBase {}; -- cgit v1.2.3 From 15bdd27cac4a0b1e6cd168272dc337cd685ef144 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 9 Jun 2021 23:33:48 -0400 Subject: shader_environment: Add shader_local_memory_crs_size to local memory size Fixes DOOM 2016 missing local memory --- src/video_core/shader_environment.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp index c93174519..a7a57a36f 100644 --- a/src/video_core/shader_environment.cpp +++ b/src/video_core/shader_environment.cpp @@ -69,7 +69,7 @@ u32 GenericEnvironment::TextureBoundBuffer() const { } u32 GenericEnvironment::LocalMemorySize() const { - return local_memory_size; + return local_memory_size + sph.common3.shader_local_memory_crs_size; } u32 GenericEnvironment::SharedMemorySize() const { -- cgit v1.2.3 From 60a96c49e59e600685b9a79d80b2685318b4fb64 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 10 Jun 2021 02:24:12 -0300 Subject: buffer_cache: Fix copy based uniform bindings tracking --- src/video_core/buffer_cache/buffer_cache.h | 19 +++++++++++++++---- src/video_core/renderer_opengl/gl_buffer_cache.h | 12 +++++++----- 2 files changed, 22 insertions(+), 9 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index ec64f2293..47cb0a47d 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -680,6 +680,9 @@ void BufferCache

::SetUniformBuffersState(const std::array& m const UniformBufferSizes* sizes) { if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { if (enabled_uniform_buffer_masks != mask) { + if constexpr (IS_OPENGL) { + fast_bound_uniform_buffers.fill(0); + } dirty_uniform_buffers.fill(~u32{0}); } } @@ -1020,6 +1023,7 @@ void BufferCache

::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 // Fast path for Nvidia if (!HasFastUniformBufferBound(stage, binding_index)) { // We only have to bind when the currently bound buffer is not the fast version + fast_bound_uniform_buffers[stage] |= 1U << binding_index; runtime.BindFastUniformBuffer(stage, binding_index, size); } const auto span = ImmediateBufferWithData(cpu_addr, size); @@ -1027,8 +1031,9 @@ void BufferCache

::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 return; } } - fast_bound_uniform_buffers[stage] |= 1U << binding_index; - + if constexpr (IS_OPENGL) { + fast_bound_uniform_buffers[stage] |= 1U << binding_index; + } // Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan const std::span span = runtime.BindMappedUniformBuffer(stage, binding_index, size); cpu_memory.ReadBlockUnsafe(cpu_addr, span.data(), size); @@ -1046,9 +1051,15 @@ void BufferCache

::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 // This exists to avoid instances where the fast buffer is bound and a GPU write happens return; } - fast_bound_uniform_buffers[stage] &= ~(1U << binding_index); - const u32 offset = buffer.Offset(cpu_addr); + if constexpr (IS_OPENGL) { + // Fast buffer will be unbound + fast_bound_uniform_buffers[stage] &= ~(1U << binding_index); + + // Mark the index as dirty if offset doesn't match + const bool is_copy_bind = offset != 0 && !runtime.SupportsNonZeroUniformOffset(); + dirty_uniform_buffers[stage] |= (is_copy_bind ? 1U : 0U) << index; + } if constexpr (NEEDS_BIND_UNIFORM_INDEX) { runtime.BindUniformBuffer(stage, binding_index, buffer, offset, size); } else { diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index bc16abafb..060d36427 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -92,16 +92,14 @@ public: VideoCore::Surface::PixelFormat format); void BindFastUniformBuffer(size_t stage, u32 binding_index, u32 size) { + const GLuint handle = fast_uniforms[stage][binding_index].handle; + const GLsizeiptr gl_size = static_cast(size); if (use_assembly_shaders) { - const GLuint handle = fast_uniforms[stage][binding_index].handle; - const GLsizeiptr gl_size = static_cast(size); glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, gl_size); } else { const GLuint base_binding = graphics_base_uniform_bindings[stage]; const GLuint binding = base_binding + binding_index; - glBindBufferRange(GL_UNIFORM_BUFFER, binding, - fast_uniforms[stage][binding_index].handle, 0, - static_cast(size)); + glBindBufferRange(GL_UNIFORM_BUFFER, binding, handle, 0, gl_size); } } @@ -134,6 +132,10 @@ public: return has_fast_buffer_sub_data; } + [[nodiscard]] bool SupportsNonZeroUniformOffset() const noexcept { + return !use_assembly_shaders; + } + void SetBaseUniformBindings(const std::array& bindings) { graphics_base_uniform_bindings = bindings; } -- cgit v1.2.3 From 5befc0bf872058315c4f81bf58dcd173db2589fd Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 10 Jun 2021 02:27:00 -0300 Subject: shader_environment: Fix local memory size calculations --- src/video_core/shader_environment.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp index a7a57a36f..6243cd176 100644 --- a/src/video_core/shader_environment.cpp +++ b/src/video_core/shader_environment.cpp @@ -69,7 +69,7 @@ u32 GenericEnvironment::TextureBoundBuffer() const { } u32 GenericEnvironment::LocalMemorySize() const { - return local_memory_size + sph.common3.shader_local_memory_crs_size; + return local_memory_size; } u32 GenericEnvironment::SharedMemorySize() const { @@ -233,7 +233,7 @@ GraphicsEnvironment::GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_, } const u64 local_size{sph.LocalMemorySize()}; ASSERT(local_size <= std::numeric_limits::max()); - local_memory_size = static_cast(local_size); + local_memory_size = static_cast(local_size) + sph.common3.shader_local_memory_crs_size; texture_bound = maxwell3d->regs.tex_cb_index; } @@ -261,7 +261,7 @@ ComputeEnvironment::ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_com &kepler_compute_} { const auto& qmd{kepler_compute->launch_description}; stage = Shader::Stage::Compute; - local_memory_size = qmd.local_pos_alloc; + local_memory_size = qmd.local_pos_alloc + qmd.local_crs_alloc; texture_bound = kepler_compute->regs.tex_cb_index; shared_memory_size = qmd.shared_alloc; workgroup_size = {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; -- cgit v1.2.3 From cd8427367ed372e355fa76a78d41b3bc64f997ca Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Thu, 10 Jun 2021 01:55:27 -0400 Subject: gl_buffer_cache: Use unorm internal formats for snorm texture buffer views Fixes black textures in UE4 games --- src/video_core/renderer_opengl/gl_buffer_cache.cpp | 25 +++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 334ed470f..0703614de 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -25,6 +25,25 @@ constexpr std::array PROGRAM_LUT{ GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV, GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV, }; + +[[nodiscard]] GLenum GetTextureBufferFormat(GLenum gl_format) { + switch (gl_format) { + case GL_RGBA8_SNORM: + return GL_RGBA8; + case GL_R8_SNORM: + return GL_R8; + case GL_RGBA16_SNORM: + return GL_RGBA16; + case GL_R16_SNORM: + return GL_R16; + case GL_RG16_SNORM: + return GL_RG16; + case GL_RG8_SNORM: + return GL_RG8; + default: + return gl_format; + } +} } // Anonymous namespace Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params) @@ -76,7 +95,11 @@ GLuint Buffer::View(u32 offset, u32 size, PixelFormat format) { OGLTexture texture; texture.Create(GL_TEXTURE_BUFFER); const GLenum gl_format{MaxwellToGL::GetFormatTuple(format).internal_format}; - glTextureBufferRange(texture.handle, gl_format, buffer.handle, offset, size); + const GLenum texture_format{GetTextureBufferFormat(gl_format)}; + if (texture_format != gl_format) { + LOG_WARNING(Render_OpenGL, "Emulating SNORM texture buffer with UNORM."); + } + glTextureBufferRange(texture.handle, texture_format, buffer.handle, offset, size); views.push_back({ .offset = offset, .size = size, -- cgit v1.2.3 From d554778311c32e0a19ecdc13d7525b264d8443b5 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 11 Jun 2021 21:52:04 -0300 Subject: vulkan: Use VK_EXT_provoking_vertex when available --- src/video_core/engines/maxwell_3d.h | 7 +++++- .../renderer_vulkan/fixed_pipeline_state.cpp | 4 ++-- .../renderer_vulkan/fixed_pipeline_state.h | 1 + .../renderer_vulkan/vk_graphics_pipeline.cpp | 10 +++++++- src/video_core/vulkan_common/vulkan_device.cpp | 28 ++++++++++++++++++++++ src/video_core/vulkan_common/vulkan_device.h | 6 +++++ 6 files changed, 52 insertions(+), 4 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index cbf94412b..04d5790f6 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -1151,7 +1151,11 @@ public: u32 index; } primitive_restart; - INSERT_PADDING_WORDS_NOINIT(0x5F); + INSERT_PADDING_WORDS_NOINIT(0xE); + + u32 provoking_vertex_last; + + INSERT_PADDING_WORDS_NOINIT(0x50); struct { u32 start_addr_high; @@ -1672,6 +1676,7 @@ ASSERT_REG_POSITION(point_coord_replace, 0x581); ASSERT_REG_POSITION(code_address, 0x582); ASSERT_REG_POSITION(draw, 0x585); ASSERT_REG_POSITION(primitive_restart, 0x591); +ASSERT_REG_POSITION(provoking_vertex_last, 0x5A1); ASSERT_REG_POSITION(index_array, 0x5F2); ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F); ASSERT_REG_POSITION(instanced_arrays, 0x620); diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 1486d088a..f121fbf0e 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -84,6 +84,8 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0); depth_enabled.Assign(regs.zeta_enable != 0 ? 1 : 0); depth_format.Assign(static_cast(regs.zeta.format)); + y_negate.Assign(regs.screen_y_control.y_negate != 0 ? 1 : 0); + provoking_vertex_last.Assign(regs.provoking_vertex_last != 0 ? 1 : 0); for (size_t i = 0; i < regs.rt.size(); ++i) { color_formats[i] = static_cast(regs.rt[i].format); @@ -91,8 +93,6 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, alpha_test_ref = Common::BitCast(regs.alpha_test_ref); point_size = Common::BitCast(regs.point_size); - y_negate.Assign(regs.screen_y_control.y_negate != 0 ? 1 : 0); - if (maxwell3d.dirty.flags[Dirty::InstanceDivisors]) { maxwell3d.dirty.flags[Dirty::InstanceDivisors] = false; for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index 0f1eff9cd..60adae316 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -192,6 +192,7 @@ struct FixedPipelineState { BitField<4, 1, u32> depth_enabled; BitField<5, 5, u32> depth_format; BitField<10, 1, u32> y_negate; + BitField<11, 1, u32> provoking_vertex_last; }; std::array color_formats; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 5c916c869..06a80c2ba 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -567,9 +567,16 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { viewport_ci.pNext = &swizzle_ci; } + const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT provoking_vertex{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT, + .pNext = nullptr, + .provokingVertexMode = key.state.provoking_vertex_last != 0 + ? VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT + : VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT, + }; const VkPipelineRasterizationStateCreateInfo rasterization_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, - .pNext = nullptr, + .pNext = device.IsExtProvokingVertexSupported() ? &provoking_vertex : nullptr, .flags = 0, .depthClampEnable = static_cast(key.state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE), @@ -586,6 +593,7 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { .depthBiasSlopeFactor = 0.0f, .lineWidth = 1.0f, }; + const VkPipelineMultisampleStateCreateInfo multisample_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, .pNext = nullptr, diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 2b715baba..618535aae 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -412,6 +412,19 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state"); } + VkPhysicalDeviceProvokingVertexFeaturesEXT provoking_vertex; + if (ext_provoking_vertex) { + provoking_vertex = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT, + .pNext = nullptr, + .provokingVertexLast = VK_TRUE, + .transformFeedbackPreservesProvokingVertex = VK_TRUE, + }; + SetNext(next, provoking_vertex); + } else { + LOG_INFO(Render_Vulkan, "Device doesn't support provoking vertex last"); + } + VkPhysicalDeviceShaderAtomicInt64FeaturesKHR atomic_int64; if (ext_shader_atomic_int64) { atomic_int64 = { @@ -718,6 +731,7 @@ std::vector Device::LoadExtensions(bool requires_surface) { bool has_ext_custom_border_color{}; bool has_ext_extended_dynamic_state{}; bool has_ext_shader_atomic_int64{}; + bool has_ext_provoking_vertex{}; for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) { const auto test = [&](std::optional> status, const char* name, bool push) { @@ -748,6 +762,7 @@ std::vector Device::LoadExtensions(bool requires_surface) { test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false); test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false); + test(has_ext_provoking_vertex, VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME, false); test(has_ext_shader_atomic_int64, VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME, false); test(has_khr_workgroup_memory_explicit_layout, VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME, false); @@ -799,6 +814,19 @@ std::vector Device::LoadExtensions(bool requires_surface) { } else { is_warp_potentially_bigger = true; } + if (has_ext_provoking_vertex) { + VkPhysicalDeviceProvokingVertexFeaturesEXT provoking_vertex; + provoking_vertex.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT; + provoking_vertex.pNext = nullptr; + features.pNext = &provoking_vertex; + physical.GetFeatures2KHR(features); + + if (provoking_vertex.provokingVertexLast && + provoking_vertex.transformFeedbackPreservesProvokingVertex) { + extensions.push_back(VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME); + ext_provoking_vertex = true; + } + } if (has_ext_shader_atomic_int64) { VkPhysicalDeviceShaderAtomicInt64Features atomic_int64; atomic_int64.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT; diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 9bc1fb947..37f589612 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -244,6 +244,11 @@ public: return ext_shader_stencil_export; } + /// Returns true if the device supports VK_EXT_provoking_vertex. + bool IsExtProvokingVertexSupported() const { + return ext_provoking_vertex; + } + /// Returns true if the device supports VK_KHR_shader_atomic_int64. bool IsExtShaderAtomicInt64Supported() const { return ext_shader_atomic_int64; @@ -346,6 +351,7 @@ private: bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. bool ext_shader_atomic_int64{}; ///< Support for VK_KHR_shader_atomic_int64. + bool ext_provoking_vertex{}; ///< Support for VK_EXT_provoking_vertex. bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. bool has_renderdoc{}; ///< Has RenderDoc attached bool has_nsight_graphics{}; ///< Has Nsight Graphics attached -- cgit v1.2.3 From 3025b2f605df74a129f0f47aadd4247055ecd6bd Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 11 Jun 2021 21:53:38 -0300 Subject: vk_rasterizer: Implement first index --- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index d284b3653..e339e9739 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -54,6 +54,7 @@ struct DrawParams { u32 num_instances; u32 base_vertex; u32 num_vertices; + u32 first_index; bool is_indexed; }; @@ -103,6 +104,7 @@ DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instan .num_instances = is_instanced ? num_instances : 1, .base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first, .num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count, + .first_index = is_indexed ? regs.index_array.first : 0, .is_indexed = is_indexed, }; if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) { @@ -173,8 +175,9 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { const DrawParams draw_params{MakeDrawParams(regs, num_instances, is_instanced, is_indexed)}; scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) { if (draw_params.is_indexed) { - cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, 0, - draw_params.base_vertex, draw_params.base_instance); + cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, + draw_params.first_index, draw_params.base_vertex, + draw_params.base_instance); } else { cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances, draw_params.base_vertex, draw_params.base_instance); -- cgit v1.2.3 From cb78a1b494be2f6bc0927ed5b7a878236a3dc1c0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 12 Jun 2021 01:46:30 -0300 Subject: shader: Reorder shader cache directories --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 13 +++++-------- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 17 +++++++---------- 2 files changed, 12 insertions(+), 18 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index da0b36368..9391a4cd9 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -238,16 +238,13 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, if (title_id == 0) { return; } - auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)}; - auto base_dir{shader_dir / "new_opengl"}; - auto transferable_dir{base_dir / "transferable"}; - auto precompiled_dir{base_dir / "precompiled"}; - if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir) || - !Common::FS::CreateDir(transferable_dir) || !Common::FS::CreateDir(precompiled_dir)) { - LOG_ERROR(Common_Filesystem, "Failed to create pipeline cache directories"); + const auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)}; + const auto base_dir{shader_dir / fmt::format("{:016x}", title_id)}; + if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir)) { + LOG_ERROR(Common_Filesystem, "Failed to create shader cache directories"); return; } - shader_cache_filename = transferable_dir / fmt::format("{:016x}.bin", title_id); + shader_cache_filename = base_dir / "opengl.bin"; if (!workers) { workers = CreateWorkers(); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index e61d76490..6df4088a7 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -337,22 +337,19 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading if (title_id == 0) { return; } - auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)}; - auto base_dir{shader_dir / "vulkan"}; - auto transferable_dir{base_dir / "transferable"}; - auto precompiled_dir{base_dir / "precompiled"}; - if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir) || - !Common::FS::CreateDir(transferable_dir) || !Common::FS::CreateDir(precompiled_dir)) { + const auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)}; + const auto base_dir{shader_dir / fmt::format("{:016x}", title_id)}; + if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir)) { LOG_ERROR(Common_Filesystem, "Failed to create pipeline cache directories"); return; } - pipeline_cache_filename = transferable_dir / fmt::format("{:016x}.bin", title_id); + pipeline_cache_filename = base_dir / "vulkan.bin"; struct { std::mutex mutex; - size_t total{0}; - size_t built{0}; - bool has_loaded{false}; + size_t total{}; + size_t built{}; + bool has_loaded{}; } state; const auto load_compute{[&](std::ifstream& file, FileEnvironment env) { -- cgit v1.2.3 From ea038d66538975319858f792052af1d0fa997fe3 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 12 Jun 2021 05:07:52 -0300 Subject: vulkan: Add VK_EXT_vertex_input_dynamic_state support Reduces the number of total pipelines generated on Vulkan. Tested on Super Smash Bros. Ultimate. --- .../renderer_vulkan/fixed_pipeline_state.cpp | 67 ++++++++----- .../renderer_vulkan/fixed_pipeline_state.h | 73 ++++++++------ .../renderer_vulkan/vk_graphics_pipeline.cpp | 107 +++++++++++++-------- .../renderer_vulkan/vk_pipeline_cache.cpp | 29 +++++- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 56 +++++++++++ src/video_core/renderer_vulkan/vk_rasterizer.h | 2 + .../renderer_vulkan/vk_state_tracker.cpp | 50 ++++++---- src/video_core/renderer_vulkan/vk_state_tracker.h | 8 +- src/video_core/vulkan_common/vulkan_device.h | 6 ++ src/video_core/vulkan_common/vulkan_wrapper.cpp | 1 + src/video_core/vulkan_common/vulkan_wrapper.h | 8 ++ 11 files changed, 291 insertions(+), 116 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index f121fbf0e..16cef8711 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -50,7 +50,7 @@ void RefreshXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell& } // Anonymous namespace void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, - bool has_extended_dynamic_state) { + bool has_extended_dynamic_state, bool has_dynamic_vertex_input) { const Maxwell& regs = maxwell3d.regs; const std::array enabled_lut{ regs.polygon_offset_point_enable, @@ -60,7 +60,8 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, const u32 topology_index = static_cast(regs.draw.topology.Value()); raw1 = 0; - no_extended_dynamic_state.Assign(has_extended_dynamic_state ? 0 : 1); + extended_dynamic_state.Assign(has_extended_dynamic_state ? 1 : 0); + dynamic_vertex_input.Assign(has_dynamic_vertex_input ? 1 : 0); xfb_enabled.Assign(regs.tfb_enabled != 0); primitive_restart_enable.Assign(regs.primitive_restart.enabled != 0 ? 1 : 0); depth_bias_enable.Assign(enabled_lut[POLYGON_OFFSET_ENABLE_LUT[topology_index]] != 0 ? 1 : 0); @@ -73,11 +74,11 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, tessellation_clockwise.Assign(regs.tess_mode.cw.Value()); logic_op_enable.Assign(regs.logic_op.enable != 0 ? 1 : 0); logic_op.Assign(PackLogicOp(regs.logic_op.operation)); - rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0); topology.Assign(regs.draw.topology); msaa_mode.Assign(regs.multisample_mode); raw2 = 0; + rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0); const auto test_func = regs.alpha_test_enabled != 0 ? regs.alpha_test_func : Maxwell::ComparisonOp::Always; alpha_test_func.Assign(PackComparisonOp(test_func)); @@ -93,24 +94,44 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, alpha_test_ref = Common::BitCast(regs.alpha_test_ref); point_size = Common::BitCast(regs.point_size); - if (maxwell3d.dirty.flags[Dirty::InstanceDivisors]) { - maxwell3d.dirty.flags[Dirty::InstanceDivisors] = false; - for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { - const bool is_enabled = regs.instanced_arrays.IsInstancingEnabled(index); - binding_divisors[index] = is_enabled ? regs.vertex_array[index].divisor : 0; - } - } - if (maxwell3d.dirty.flags[Dirty::VertexAttributes]) { - maxwell3d.dirty.flags[Dirty::VertexAttributes] = false; - for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { - const auto& input = regs.vertex_attrib_format[index]; - auto& attribute = attributes[index]; - attribute.raw = 0; - attribute.enabled.Assign(input.IsConstant() ? 0 : 1); - attribute.buffer.Assign(input.buffer); - attribute.offset.Assign(input.offset); - attribute.type.Assign(static_cast(input.type.Value())); - attribute.size.Assign(static_cast(input.size.Value())); + if (maxwell3d.dirty.flags[Dirty::VertexInput]) { + if (has_dynamic_vertex_input) { + // Dirty flag will be reset by the command buffer update + static constexpr std::array LUT{ + 0u, // Invalid + 1u, // SignedNorm + 1u, // UnsignedNorm + 2u, // SignedInt + 3u, // UnsignedInt + 1u, // UnsignedScaled + 1u, // SignedScaled + 1u, // Float + }; + const auto& attrs = regs.vertex_attrib_format; + attribute_types = 0; + for (size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) { + const u32 mask = attrs[i].constant != 0 ? 0 : 3; + const u32 type = LUT[static_cast(attrs[i].type.Value())]; + attribute_types |= static_cast(type & mask) << (i * 2); + } + } else { + maxwell3d.dirty.flags[Dirty::VertexInput] = false; + enabled_divisors = 0; + for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { + const bool is_enabled = regs.instanced_arrays.IsInstancingEnabled(index); + binding_divisors[index] = is_enabled ? regs.vertex_array[index].divisor : 0; + enabled_divisors |= (is_enabled ? u64{1} : 0) << index; + } + for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { + const auto& input = regs.vertex_attrib_format[index]; + auto& attribute = attributes[index]; + attribute.raw = 0; + attribute.enabled.Assign(input.IsConstant() ? 0 : 1); + attribute.buffer.Assign(input.buffer); + attribute.offset.Assign(input.offset); + attribute.type.Assign(static_cast(input.type.Value())); + attribute.size.Assign(static_cast(input.size.Value())); + } } } if (maxwell3d.dirty.flags[Dirty::Blending]) { @@ -126,10 +147,10 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, return static_cast(viewport.swizzle.raw); }); } - if (no_extended_dynamic_state != 0) { + if (!extended_dynamic_state) { dynamic_state.Refresh(regs); } - if (xfb_enabled != 0) { + if (xfb_enabled) { RefreshXfbState(xfb_state, regs); } } diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index 60adae316..04f34eb97 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -168,44 +168,51 @@ struct FixedPipelineState { union { u32 raw1; - BitField<0, 1, u32> no_extended_dynamic_state; - BitField<1, 1, u32> xfb_enabled; - BitField<2, 1, u32> primitive_restart_enable; - BitField<3, 1, u32> depth_bias_enable; - BitField<4, 1, u32> depth_clamp_disabled; - BitField<5, 1, u32> ndc_minus_one_to_one; - BitField<6, 2, u32> polygon_mode; - BitField<8, 5, u32> patch_control_points_minus_one; - BitField<13, 2, u32> tessellation_primitive; - BitField<15, 2, u32> tessellation_spacing; - BitField<17, 1, u32> tessellation_clockwise; - BitField<18, 1, u32> logic_op_enable; - BitField<19, 4, u32> logic_op; - BitField<23, 1, u32> rasterize_enable; + BitField<0, 1, u32> extended_dynamic_state; + BitField<1, 1, u32> dynamic_vertex_input; + BitField<2, 1, u32> xfb_enabled; + BitField<3, 1, u32> primitive_restart_enable; + BitField<4, 1, u32> depth_bias_enable; + BitField<5, 1, u32> depth_clamp_disabled; + BitField<6, 1, u32> ndc_minus_one_to_one; + BitField<7, 2, u32> polygon_mode; + BitField<9, 5, u32> patch_control_points_minus_one; + BitField<14, 2, u32> tessellation_primitive; + BitField<16, 2, u32> tessellation_spacing; + BitField<18, 1, u32> tessellation_clockwise; + BitField<19, 1, u32> logic_op_enable; + BitField<20, 4, u32> logic_op; BitField<24, 4, Maxwell::PrimitiveTopology> topology; BitField<28, 4, Tegra::Texture::MsaaMode> msaa_mode; }; union { u32 raw2; - BitField<0, 3, u32> alpha_test_func; - BitField<3, 1, u32> early_z; - BitField<4, 1, u32> depth_enabled; - BitField<5, 5, u32> depth_format; - BitField<10, 1, u32> y_negate; - BitField<11, 1, u32> provoking_vertex_last; + BitField<0, 1, u32> rasterize_enable; + BitField<1, 3, u32> alpha_test_func; + BitField<4, 1, u32> early_z; + BitField<5, 1, u32> depth_enabled; + BitField<6, 5, u32> depth_format; + BitField<11, 1, u32> y_negate; + BitField<12, 1, u32> provoking_vertex_last; }; std::array color_formats; u32 alpha_test_ref; u32 point_size; - std::array binding_divisors; - std::array attributes; std::array attachments; std::array viewport_swizzles; + union { + u64 attribute_types; // Used with VK_EXT_vertex_input_dynamic_state + u64 enabled_divisors; + }; + std::array attributes; + std::array binding_divisors; + DynamicState dynamic_state; VideoCommon::TransformFeedbackState xfb_state; - void Refresh(Tegra::Engines::Maxwell3D& maxwell3d, bool has_extended_dynamic_state); + void Refresh(Tegra::Engines::Maxwell3D& maxwell3d, bool has_extended_dynamic_state, + bool has_dynamic_vertex_input); size_t Hash() const noexcept; @@ -216,16 +223,24 @@ struct FixedPipelineState { } size_t Size() const noexcept { - if (xfb_enabled != 0) { + if (xfb_enabled) { // When transform feedback is enabled, use the whole struct return sizeof(*this); - } else if (no_extended_dynamic_state != 0) { - // Dynamic state is enabled, we can enable more - return offsetof(FixedPipelineState, xfb_state); - } else { - // No XFB, extended dynamic state enabled + } + if (dynamic_vertex_input) { + // Exclude dynamic state and attributes + return offsetof(FixedPipelineState, attributes); + } + if (extended_dynamic_state) { + // Exclude dynamic state return offsetof(FixedPipelineState, dynamic_state); } + // Default + return offsetof(FixedPipelineState, xfb_state); + } + + u32 DynamicAttributeType(size_t index) const noexcept { + return (attribute_types >> (index * 2)) & 0b11; } }; static_assert(std::has_unique_object_representations_v); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 06a80c2ba..ccef71f4c 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -472,39 +472,65 @@ void GraphicsPipeline::ConfigureDraw() { void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { FixedPipelineState::DynamicState dynamic{}; - if (!device.IsExtExtendedDynamicStateSupported()) { + if (key.state.extended_dynamic_state) { dynamic = key.state.dynamic_state; } static_vector vertex_bindings; static_vector vertex_binding_divisors; - for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { - const bool instanced = key.state.binding_divisors[index] != 0; - const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; - vertex_bindings.push_back({ - .binding = static_cast(index), - .stride = dynamic.vertex_strides[index], - .inputRate = rate, - }); - if (instanced) { - vertex_binding_divisors.push_back({ + static_vector vertex_attributes; + if (key.state.dynamic_vertex_input) { + const auto& input_attributes = stage_infos[0].input_generics; + for (size_t index = 0; index < key.state.attributes.size(); ++index) { + const u32 type = key.state.DynamicAttributeType(index); + if (!input_attributes[index].used || type == 0) { + continue; + } + vertex_attributes.push_back({ + .location = static_cast(index), + .binding = 0, + .format = type == 1 ? VK_FORMAT_R32_SFLOAT + : type == 2 ? VK_FORMAT_R32_SINT + : VK_FORMAT_R32_UINT, + .offset = 0, + }); + } + if (!vertex_attributes.empty()) { + vertex_bindings.push_back({ + .binding = 0, + .stride = 4, + .inputRate = VK_VERTEX_INPUT_RATE_VERTEX, + }); + } + } else { + for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { + const bool instanced = key.state.binding_divisors[index] != 0; + const auto rate = + instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; + vertex_bindings.push_back({ .binding = static_cast(index), - .divisor = key.state.binding_divisors[index], + .stride = dynamic.vertex_strides[index], + .inputRate = rate, }); + if (instanced) { + vertex_binding_divisors.push_back({ + .binding = static_cast(index), + .divisor = key.state.binding_divisors[index], + }); + } } - } - static_vector vertex_attributes; - const auto& input_attributes = stage_infos[0].input_generics; - for (size_t index = 0; index < key.state.attributes.size(); ++index) { - const auto& attribute = key.state.attributes[index]; - if (!attribute.enabled || !input_attributes[index].used) { - continue; + const auto& input_attributes = stage_infos[0].input_generics; + for (size_t index = 0; index < key.state.attributes.size(); ++index) { + const auto& attribute = key.state.attributes[index]; + if (!attribute.enabled || !input_attributes[index].used) { + continue; + } + vertex_attributes.push_back({ + .location = static_cast(index), + .binding = attribute.buffer, + .format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size()), + .offset = attribute.offset, + }); } - vertex_attributes.push_back({ - .location = static_cast(index), - .binding = attribute.buffer, - .format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size()), - .offset = attribute.offset, - }); } VkPipelineVertexInputStateCreateInfo vertex_input_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, @@ -545,27 +571,25 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { .flags = 0, .patchControlPoints = key.state.patch_control_points_minus_one.Value() + 1, }; - VkPipelineViewportStateCreateInfo viewport_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .viewportCount = Maxwell::NumViewports, - .pViewports = nullptr, - .scissorCount = Maxwell::NumViewports, - .pScissors = nullptr, - }; + std::array swizzles; std::ranges::transform(key.state.viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle); - VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{ + const VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV, .pNext = nullptr, .flags = 0, .viewportCount = Maxwell::NumViewports, .pViewportSwizzles = swizzles.data(), }; - if (device.IsNvViewportSwizzleSupported()) { - viewport_ci.pNext = &swizzle_ci; - } + const VkPipelineViewportStateCreateInfo viewport_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .pNext = device.IsNvViewportSwizzleSupported() ? &swizzle_ci : nullptr, + .flags = 0, + .viewportCount = Maxwell::NumViewports, + .pViewports = nullptr, + .scissorCount = Maxwell::NumViewports, + .pScissors = nullptr, + }; const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT provoking_vertex{ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT, @@ -660,13 +684,13 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { .pAttachments = cb_attachments.data(), .blendConstants = {}, }; - static_vector dynamic_states{ + static_vector dynamic_states{ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS, VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE, }; - if (device.IsExtExtendedDynamicStateSupported()) { + if (key.state.extended_dynamic_state) { static constexpr std::array extended{ VK_DYNAMIC_STATE_CULL_MODE_EXT, VK_DYNAMIC_STATE_FRONT_FACE_EXT, @@ -678,6 +702,9 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT, VK_DYNAMIC_STATE_STENCIL_OP_EXT, }; + if (key.state.dynamic_vertex_input) { + dynamic_states.push_back(VK_DYNAMIC_STATE_VERTEX_INPUT_EXT); + } dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end()); } const VkPipelineDynamicStateCreateInfo dynamic_state_ci{ diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 6df4088a7..db7da5555 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -109,6 +109,20 @@ static Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexA return Shader::AttributeType::Float; } +Shader::AttributeType AttributeType(const FixedPipelineState& state, size_t index) { + switch (state.DynamicAttributeType(index)) { + case 0: + return Shader::AttributeType::Disabled; + case 1: + return Shader::AttributeType::Float; + case 2: + return Shader::AttributeType::SignedInt; + case 3: + return Shader::AttributeType::UnsignedInt; + } + return Shader::AttributeType::Disabled; +} + Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineCacheKey& key, const Shader::IR::Program& program) { Shader::RuntimeInfo info; @@ -123,13 +137,19 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineCacheKey& key, if (key.state.topology == Maxwell::PrimitiveTopology::Points) { info.fixed_state_point_size = point_size; } - if (key.state.xfb_enabled != 0) { + if (key.state.xfb_enabled) { info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state); } info.convert_depth_mode = gl_ndc; } - std::ranges::transform(key.state.attributes, info.generic_input_types.begin(), - &CastAttributeType); + if (key.state.dynamic_vertex_input) { + for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { + info.generic_input_types[index] = AttributeType(key.state, index); + } + } else { + std::ranges::transform(key.state.attributes, info.generic_input_types.begin(), + &CastAttributeType); + } break; case Shader::Stage::TessellationEval: // We have to flip tessellation clockwise for some reason... @@ -298,7 +318,8 @@ GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { current_pipeline = nullptr; return nullptr; } - graphics_key.state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported()); + graphics_key.state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported(), + device.IsExtVertexInputDynamicStateSupported()); if (current_pipeline) { GraphicsPipeline* const next{current_pipeline->Next(graphics_key)}; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index e339e9739..855c17769 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -551,6 +551,9 @@ void RasterizerVulkan::UpdateDynamicStates() { UpdateFrontFace(regs); UpdateStencilOp(regs); UpdateStencilTestEnable(regs); + if (device.IsExtVertexInputDynamicStateSupported()) { + UpdateVertexInput(regs); + } } } @@ -780,4 +783,57 @@ void RasterizerVulkan::UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& }); } +void RasterizerVulkan::UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs) { + auto& dirty{maxwell3d.dirty.flags}; + if (!dirty[Dirty::VertexInput]) { + return; + } + dirty[Dirty::VertexInput] = false; + + boost::container::static_vector bindings; + boost::container::static_vector attributes; + + for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { + if (!dirty[Dirty::VertexAttribute0 + index]) { + continue; + } + const Maxwell::VertexAttribute attribute{regs.vertex_attrib_format[index]}; + const u32 binding{attribute.buffer}; + dirty[Dirty::VertexAttribute0 + index] = false; + dirty[Dirty::VertexBinding0 + static_cast(binding)] = true; + + attributes.push_back({ + .sType = VK_STRUCTURE_TYPE_VERTEX_INPUT_ATTRIBUTE_DESCRIPTION_2_EXT, + .pNext = nullptr, + .location = static_cast(index), + .binding = binding, + .format = attribute.IsConstant() + ? VK_FORMAT_A8B8G8R8_UNORM_PACK32 + : MaxwellToVK::VertexFormat(attribute.type, attribute.size), + .offset = attribute.offset, + }); + } + for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { + if (!dirty[Dirty::VertexBinding0 + index]) { + continue; + } + dirty[Dirty::VertexBinding0 + index] = false; + + const u32 binding{static_cast(index)}; + const auto& input_binding{regs.vertex_array[binding]}; + const bool is_instanced{regs.instanced_arrays.IsInstancingEnabled(binding)}; + bindings.push_back({ + .sType = VK_STRUCTURE_TYPE_VERTEX_INPUT_BINDING_DESCRIPTION_2_EXT, + .pNext = nullptr, + .binding = binding, + .stride = input_binding.stride, + .inputRate = is_instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX, + .divisor = is_instanced ? input_binding.divisor : 1, + }); + } + scheduler.Record([bindings, attributes](vk::CommandBuffer cmdbuf) { + cmdbuf.SetVertexInputEXT(bindings, attributes); + }); +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 1302bed02..c954fa7f8 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -135,6 +135,8 @@ private: void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); + void UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs); + Tegra::GPU& gpu; Tegra::MemoryManager& gpu_memory; Tegra::Engines::Maxwell3D& maxwell3d; diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp index 956f86845..0ebe0473f 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp +++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp @@ -29,9 +29,10 @@ using Flags = Maxwell3D::DirtyState::Flags; Flags MakeInvalidationFlags() { static constexpr int INVALIDATION_FLAGS[]{ - Viewports, Scissors, DepthBias, BlendConstants, DepthBounds, - StencilProperties, CullMode, DepthBoundsEnable, DepthTestEnable, DepthWriteEnable, - DepthCompareOp, FrontFace, StencilOp, StencilTestEnable, VertexBuffers, + Viewports, Scissors, DepthBias, BlendConstants, + DepthBounds, StencilProperties, CullMode, DepthBoundsEnable, + DepthTestEnable, DepthWriteEnable, DepthCompareOp, FrontFace, + StencilOp, StencilTestEnable, VertexBuffers, VertexInput, }; Flags flags{}; for (const int flag : INVALIDATION_FLAGS) { @@ -40,6 +41,12 @@ Flags MakeInvalidationFlags() { for (int index = VertexBuffer0; index <= VertexBuffer31; ++index) { flags[index] = true; } + for (int index = VertexAttribute0; index <= VertexAttribute31; ++index) { + flags[index] = true; + } + for (int index = VertexBinding0; index <= VertexBinding31; ++index) { + flags[index] = true; + } return flags; } @@ -134,31 +141,38 @@ void SetupDirtyBlending(Tables& tables) { FillBlock(tables[0], OFF(independent_blend), NUM(independent_blend), Blending); } -void SetupDirtyInstanceDivisors(Tables& tables) { - static constexpr size_t divisor_offset = 3; - for (size_t index = 0; index < Regs::NumVertexArrays; ++index) { - tables[0][OFF(instanced_arrays) + index] = InstanceDivisors; - tables[0][OFF(vertex_array) + index * NUM(vertex_array[0]) + divisor_offset] = - InstanceDivisors; +void SetupDirtyViewportSwizzles(Tables& tables) { + static constexpr size_t swizzle_offset = 6; + for (size_t index = 0; index < Regs::NumViewports; ++index) { + tables[0][OFF(viewport_transform) + index * NUM(viewport_transform[0]) + swizzle_offset] = + ViewportSwizzles; } } void SetupDirtyVertexAttributes(Tables& tables) { - FillBlock(tables[0], OFF(vertex_attrib_format), NUM(vertex_attrib_format), VertexAttributes); + for (size_t i = 0; i < Regs::NumVertexAttributes; ++i) { + const size_t offset = OFF(vertex_attrib_format) + i * NUM(vertex_attrib_format[0]); + FillBlock(tables[0], offset, NUM(vertex_attrib_format[0]), VertexAttribute0 + i); + } + FillBlock(tables[1], OFF(vertex_attrib_format), Regs::NumVertexAttributes, VertexInput); } -void SetupDirtyViewportSwizzles(Tables& tables) { - static constexpr size_t swizzle_offset = 6; - for (size_t index = 0; index < Regs::NumViewports; ++index) { - tables[0][OFF(viewport_transform) + index * NUM(viewport_transform[0]) + swizzle_offset] = - ViewportSwizzles; +void SetupDirtyVertexBindings(Tables& tables) { + // Do NOT include stride here, it's implicit in VertexBuffer + static constexpr size_t divisor_offset = 3; + for (size_t i = 0; i < Regs::NumVertexArrays; ++i) { + const u8 flag = static_cast(VertexBinding0 + i); + tables[0][OFF(instanced_arrays) + i] = VertexInput; + tables[1][OFF(instanced_arrays) + i] = flag; + tables[0][OFF(vertex_array) + i * NUM(vertex_array[0]) + divisor_offset] = VertexInput; + tables[1][OFF(vertex_array) + i * NUM(vertex_array[0]) + divisor_offset] = flag; } } } // Anonymous namespace StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags}, invalidation_flags{MakeInvalidationFlags()} { - auto& tables = gpu.Maxwell3D().dirty.tables; + auto& tables{gpu.Maxwell3D().dirty.tables}; SetupDirtyFlags(tables); SetupDirtyViewports(tables); SetupDirtyScissors(tables); @@ -175,9 +189,9 @@ StateTracker::StateTracker(Tegra::GPU& gpu) SetupDirtyStencilOp(tables); SetupDirtyStencilTestEnable(tables); SetupDirtyBlending(tables); - SetupDirtyInstanceDivisors(tables); - SetupDirtyVertexAttributes(tables); SetupDirtyViewportSwizzles(tables); + SetupDirtyVertexAttributes(tables); + SetupDirtyVertexBindings(tables); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h index 84e918a71..1976b7e9b 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.h +++ b/src/video_core/renderer_vulkan/vk_state_tracker.h @@ -19,6 +19,12 @@ namespace Dirty { enum : u8 { First = VideoCommon::Dirty::LastCommonEntry, + VertexInput, + VertexAttribute0, + VertexAttribute31 = VertexAttribute0 + 31, + VertexBinding0, + VertexBinding31 = VertexBinding0 + 31, + Viewports, Scissors, DepthBias, @@ -36,8 +42,6 @@ enum : u8 { StencilTestEnable, Blending, - InstanceDivisors, - VertexAttributes, ViewportSwizzles, Last diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 37f589612..4fda472b0 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -239,6 +239,11 @@ public: return ext_extended_dynamic_state; } + /// Returns true if the device supports VK_EXT_vertex_input_dynamic_state. + bool IsExtVertexInputDynamicStateSupported() const { + return ext_vertex_input_dynamic_state; + } + /// Returns true if the device supports VK_EXT_shader_stencil_export. bool IsExtShaderStencilExportSupported() const { return ext_shader_stencil_export; @@ -349,6 +354,7 @@ private: bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. + bool ext_vertex_input_dynamic_state{}; ///< Support for VK_EXT_vertex_input_dynamic_state. bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. bool ext_shader_atomic_int64{}; ///< Support for VK_KHR_shader_atomic_int64. bool ext_provoking_vertex{}; ///< Support for VK_EXT_provoking_vertex. diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp index 33fb74bfb..7e13ae8af 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.cpp +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp @@ -123,6 +123,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { X(vkCmdSetPrimitiveTopologyEXT); X(vkCmdSetStencilOpEXT); X(vkCmdSetStencilTestEnableEXT); + X(vkCmdSetVertexInputEXT); X(vkCmdResolveImage); X(vkCreateBuffer); X(vkCreateBufferView); diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h index 3e36d356a..6e5be1186 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.h +++ b/src/video_core/vulkan_common/vulkan_wrapper.h @@ -238,6 +238,7 @@ struct DeviceDispatch : InstanceDispatch { PFN_vkCmdSetPrimitiveTopologyEXT vkCmdSetPrimitiveTopologyEXT{}; PFN_vkCmdSetStencilOpEXT vkCmdSetStencilOpEXT{}; PFN_vkCmdSetStencilTestEnableEXT vkCmdSetStencilTestEnableEXT{}; + PFN_vkCmdSetVertexInputEXT vkCmdSetVertexInputEXT{}; PFN_vkCmdResolveImage vkCmdResolveImage{}; PFN_vkCreateBuffer vkCreateBuffer{}; PFN_vkCreateBufferView vkCreateBufferView{}; @@ -1203,6 +1204,13 @@ public: dld->vkCmdSetStencilTestEnableEXT(handle, enable ? VK_TRUE : VK_FALSE); } + void SetVertexInputEXT( + vk::Span bindings, + vk::Span attributes) const noexcept { + dld->vkCmdSetVertexInputEXT(handle, bindings.size(), bindings.data(), attributes.size(), + attributes.data()); + } + void BindTransformFeedbackBuffersEXT(u32 first, u32 count, const VkBuffer* buffers, const VkDeviceSize* offsets, const VkDeviceSize* sizes) const noexcept { -- cgit v1.2.3 From 41cca8b8ad6f7ea33e74210aee4e3867ffa0622e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 12 Jun 2021 23:24:49 -0300 Subject: vk_pipeline_cache: Skip cached pipelines with different dynamic state --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index db7da5555..b17f34cdd 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -391,10 +391,16 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading }); ++state.total; }}; + const bool extended_dynamic_state = device.IsExtExtendedDynamicStateSupported(); + const bool dynamic_vertex_input = device.IsExtVertexInputDynamicStateSupported(); const auto load_graphics{[&](std::ifstream& file, std::vector envs) { GraphicsPipelineCacheKey key; file.read(reinterpret_cast(&key), sizeof(key)); + if ((key.state.extended_dynamic_state != 0) != extended_dynamic_state || + (key.state.dynamic_vertex_input != 0) != dynamic_vertex_input) { + return; + } workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable { ShaderPools pools; boost::container::static_vector env_ptrs; -- cgit v1.2.3 From ba3bdf1d4156fa6fd257305406a3b88f0c288006 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 12 Jun 2021 23:25:40 -0300 Subject: vulkan_device: Enable VK_EXT_vertex_input_dynamic_state --- src/video_core/vulkan_common/vulkan_device.cpp | 28 ++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'src/video_core') diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 618535aae..8eb37a77a 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -425,6 +425,18 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR LOG_INFO(Render_Vulkan, "Device doesn't support provoking vertex last"); } + VkPhysicalDeviceVertexInputDynamicStateFeaturesEXT vertex_input_dynamic; + if (ext_vertex_input_dynamic_state) { + vertex_input_dynamic = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_INPUT_DYNAMIC_STATE_FEATURES_EXT, + .pNext = nullptr, + .vertexInputDynamicState = VK_TRUE, + }; + SetNext(next, vertex_input_dynamic); + } else { + LOG_INFO(Render_Vulkan, "Device doesn't support vertex input dynamic state"); + } + VkPhysicalDeviceShaderAtomicInt64FeaturesKHR atomic_int64; if (ext_shader_atomic_int64) { atomic_int64 = { @@ -732,6 +744,7 @@ std::vector Device::LoadExtensions(bool requires_surface) { bool has_ext_extended_dynamic_state{}; bool has_ext_shader_atomic_int64{}; bool has_ext_provoking_vertex{}; + bool has_ext_vertex_input_dynamic_state{}; for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) { const auto test = [&](std::optional> status, const char* name, bool push) { @@ -763,6 +776,8 @@ std::vector Device::LoadExtensions(bool requires_surface) { test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false); test(has_ext_provoking_vertex, VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME, false); + test(has_ext_vertex_input_dynamic_state, VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME, + false); test(has_ext_shader_atomic_int64, VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME, false); test(has_khr_workgroup_memory_explicit_layout, VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME, false); @@ -827,6 +842,19 @@ std::vector Device::LoadExtensions(bool requires_surface) { ext_provoking_vertex = true; } } + if (has_ext_vertex_input_dynamic_state) { + VkPhysicalDeviceVertexInputDynamicStateFeaturesEXT vertex_input; + vertex_input.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_INPUT_DYNAMIC_STATE_FEATURES_EXT; + vertex_input.pNext = nullptr; + features.pNext = &vertex_input; + physical.GetFeatures2KHR(features); + + if (vertex_input.vertexInputDynamicState) { + extensions.push_back(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME); + ext_vertex_input_dynamic_state = true; + } + } if (has_ext_shader_atomic_int64) { VkPhysicalDeviceShaderAtomicInt64Features atomic_int64; atomic_int64.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT; -- cgit v1.2.3 From 373f75d944473731408d7a72c967d5c4b37af5bb Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Thu, 8 Jul 2021 17:22:31 -0400 Subject: shader: Add shader loop safety check settings Also add a setting for enable Nsight Aftermath. --- src/video_core/vulkan_common/vulkan_device.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 8eb37a77a..bf063c047 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -467,7 +467,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR } VkDeviceDiagnosticsConfigCreateInfoNV diagnostics_nv; - if (nv_device_diagnostics_config) { + if (Settings::values.enable_nsight_aftermath && nv_device_diagnostics_config) { nsight_aftermath_tracker = std::make_unique(); diagnostics_nv = { @@ -781,7 +781,7 @@ std::vector Device::LoadExtensions(bool requires_surface) { test(has_ext_shader_atomic_int64, VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME, false); test(has_khr_workgroup_memory_explicit_layout, VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME, false); - if (Settings::values.renderer_debug) { + if (Settings::values.enable_nsight_aftermath) { test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME, true); } -- cgit v1.2.3 From 94e751f415d70fe255eada77c4385ec966c07a95 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 14 Jun 2021 04:32:45 -0300 Subject: buffer_cache: Invalidate fast buffers on compute --- src/video_core/buffer_cache/buffer_cache.h | 1 + 1 file changed, 1 insertion(+) (limited to 'src/video_core') diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 47cb0a47d..d004199ba 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -1132,6 +1132,7 @@ void BufferCache

::BindHostComputeUniformBuffers() { if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { // Mark all uniform buffers as dirty dirty_uniform_buffers.fill(~u32{0}); + fast_bound_uniform_buffers.fill(0); } u32 binding_index = 0; ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) { -- cgit v1.2.3 From dbf7cb9f90a99faa6d6ab07558d65dd113728ff1 Mon Sep 17 00:00:00 2001 From: Rodrigo Locatti Date: Mon, 14 Jun 2021 22:02:42 -0300 Subject: vk_graphics_pipeline: Fix path with no VK_EXT_extended_dynamic_state --- src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index ccef71f4c..e02b1b7ab 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -472,7 +472,7 @@ void GraphicsPipeline::ConfigureDraw() { void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { FixedPipelineState::DynamicState dynamic{}; - if (key.state.extended_dynamic_state) { + if (!key.state.extended_dynamic_state) { dynamic = key.state.dynamic_state; } static_vector vertex_bindings; -- cgit v1.2.3 From 8fb204893430de2d5c30e008e98db313f890f447 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 15 Jun 2021 02:43:01 -0300 Subject: vk_rasterizer: Exit render passes on fragment barriers --- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 855c17769..c57e16c50 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -440,6 +440,7 @@ void RasterizerVulkan::WaitForIdle() { void RasterizerVulkan::FragmentBarrier() { // We already put barriers when a render pass finishes + scheduler.RequestOutsideRenderPassOperationContext(); } void RasterizerVulkan::TiledCacheBarrier() { -- cgit v1.2.3 From eaff1030de07f3739794207403ea833ee91c0034 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 19 May 2021 21:58:32 -0400 Subject: glsl: Initial backend --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 9391a4cd9..4387532ab 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -19,6 +19,7 @@ #include "core/core.h" #include "core/frontend/emu_window.h" #include "shader_recompiler/backend/glasm/emit_glasm.h" +#include "shader_recompiler/backend/glsl/emit_glsl.h" #include "shader_recompiler/backend/spirv/emit_spirv.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" @@ -40,6 +41,7 @@ namespace OpenGL { namespace { using Shader::Backend::GLASM::EmitGLASM; +using Shader::Backend::GLSL::EmitGLSL; using Shader::Backend::SPIRV::EmitSPIRV; using Shader::Maxwell::MergeDualVertexPrograms; using Shader::Maxwell::TranslateProgram; @@ -435,7 +437,8 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( const std::string code{EmitGLASM(profile, runtime_info, program, binding)}; assembly_programs[stage_index] = CompileProgram(code, AssemblyStage(stage_index)); } else { - const std::vector code{EmitSPIRV(profile, runtime_info, program, binding)}; + const auto code{EmitGLSL(profile, program, binding)}; + OGLShader shader; AttachShader(Stage(stage_index), source_program.handle, code); } } @@ -489,7 +492,7 @@ std::unique_ptr ShaderCache::CreateComputePipeline(ShaderPools& const std::string code{EmitGLASM(profile, info, program)}; asm_program = CompileProgram(code, GL_COMPUTE_PROGRAM_NV); } else { - const std::vector code{EmitSPIRV(profile, program)}; + const auto code{EmitGLSL(profile, program)}; source_program.handle = glCreateProgram(); AttachShader(GL_COMPUTE_SHADER, source_program.handle, code); LinkProgram(source_program.handle); -- cgit v1.2.3 From 53667ddd4ebdaa98f9c40ef3aee8efbdb15a0a6f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 10 Jul 2021 17:57:35 -0300 Subject: glsl: Fixup build issues --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 4387532ab..602cf025b 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -437,7 +437,7 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( const std::string code{EmitGLASM(profile, runtime_info, program, binding)}; assembly_programs[stage_index] = CompileProgram(code, AssemblyStage(stage_index)); } else { - const auto code{EmitGLSL(profile, program, binding)}; + const auto code{EmitGLSL(profile, runtime_info, program, binding)}; OGLShader shader; AttachShader(Stage(stage_index), source_program.handle, code); } -- cgit v1.2.3 From bd24fa97138ff1e33a7f8d3c30a4f4482a6482a8 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 25 May 2021 19:55:40 -0400 Subject: glsl: Query GL Device for FP16 extension support --- src/video_core/renderer_opengl/gl_device.cpp | 2 ++ src/video_core/renderer_opengl/gl_device.h | 10 ++++++++++ src/video_core/renderer_opengl/gl_shader_cache.cpp | 2 ++ 3 files changed, 14 insertions(+) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 3f7929f9e..071133781 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -158,6 +158,8 @@ Device::Device() { has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory; has_debugging_tool_attached = IsDebugToolAttached(extensions); has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float"); + has_nv_gpu_shader_5 = GLAD_GL_NV_gpu_shader5; + has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float; // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive // uniform buffers as "push constants" diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 1ffd24883..9b9402c29 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -120,6 +120,14 @@ public: return has_depth_buffer_float; } + bool HasNvGpuShader5() const { + return has_nv_gpu_shader_5; + } + + bool HasAmdShaderHalfFloat() const { + return has_amd_shader_half_float; + } + private: static bool TestVariableAoffi(); static bool TestPreciseBug(); @@ -151,6 +159,8 @@ private: bool use_asynchronous_shaders{}; bool use_driver_cache{}; bool has_depth_buffer_float{}; + bool has_nv_gpu_shader_5{}; + bool has_amd_shader_half_float{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 602cf025b..e00d01e34 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -217,6 +217,8 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_demote_to_helper_invocation = false, .support_int64_atomics = false, .support_derivative_control = device.HasDerivativeControl(), + .support_gl_nv_gpu_shader_5 = device.HasNvGpuShader5(), + .support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(), .warp_size_potentially_larger_than_guest = true, -- cgit v1.2.3 From 3d086e6130a2c5f0546ccef3b234c65ef2f0c99b Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 26 May 2021 00:16:20 -0400 Subject: glsl: Implement some attribute getters and setters --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 1 - 1 file changed, 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index e00d01e34..8a052851b 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -440,7 +440,6 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( assembly_programs[stage_index] = CompileProgram(code, AssemblyStage(stage_index)); } else { const auto code{EmitGLSL(profile, runtime_info, program, binding)}; - OGLShader shader; AttachShader(Stage(stage_index), source_program.handle, code); } } -- cgit v1.2.3 From e35ffbbeb0f85f676416fcb8f0bb0207671f379d Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 30 May 2021 00:53:26 -0400 Subject: glsl: Implement VOTE for subgroup size potentially larger --- src/video_core/renderer_opengl/gl_device.cpp | 1 + src/video_core/renderer_opengl/gl_device.h | 5 +++++ src/video_core/renderer_opengl/gl_shader_cache.cpp | 2 +- 3 files changed, 7 insertions(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 071133781..20ea42cff 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -160,6 +160,7 @@ Device::Device() { has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float"); has_nv_gpu_shader_5 = GLAD_GL_NV_gpu_shader5; has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float; + warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel; // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive // uniform buffers as "push constants" diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 9b9402c29..ff0ff2b08 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -128,6 +128,10 @@ public: return has_amd_shader_half_float; } + bool IsWarpSizePotentiallyLargerThanGuest() const { + return warp_size_potentially_larger_than_guest; + } + private: static bool TestVariableAoffi(); static bool TestPreciseBug(); @@ -161,6 +165,7 @@ private: bool has_depth_buffer_float{}; bool has_nv_gpu_shader_5{}; bool has_amd_shader_half_float{}; + bool warp_size_potentially_larger_than_guest{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 8a052851b..cd11ff653 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -220,7 +220,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_gl_nv_gpu_shader_5 = device.HasNvGpuShader5(), .support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(), - .warp_size_potentially_larger_than_guest = true, + .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyLargerThanGuest(), .lower_left_origin_mode = true, .need_declared_frag_colors = true, -- cgit v1.2.3 From f4799e8fa15b92d8d5607dc5dfca4974901ee06c Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 2 Jun 2021 00:33:03 -0400 Subject: glsl: Implement transform feedback --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index cd11ff653..0a1ba363b 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -92,9 +92,15 @@ GLenum AssemblyStage(size_t stage_index) { Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key, const Shader::IR::Program& program, - bool glasm_use_storage_buffers) { + bool glasm_use_storage_buffers, bool use_assembly_shaders) { Shader::RuntimeInfo info; switch (program.stage) { + case Shader::Stage::VertexB: + case Shader::Stage::Geometry: + if (!use_assembly_shaders && key.xfb_enabled != 0) { + info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.xfb_state); + } + break; case Shader::Stage::TessellationEval: info.tess_clockwise = key.tessellation_clockwise != 0; info.tess_primitive = [&key] { @@ -420,7 +426,8 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( OGLProgram source_program; std::array assembly_programs; Shader::Backend::Bindings binding; - if (!device.UseAssemblyShaders()) { + const bool use_glasm{device.UseAssemblyShaders()}; + if (!use_glasm) { source_program.handle = glCreateProgram(); } const size_t first_index = uses_vertex_a && uses_vertex_b ? 1 : 0; @@ -434,8 +441,9 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( const size_t stage_index{index - 1}; infos[stage_index] = &program.info; - const auto runtime_info{MakeRuntimeInfo(key, program, glasm_use_storage_buffers)}; - if (device.UseAssemblyShaders()) { + const auto runtime_info{ + MakeRuntimeInfo(key, program, glasm_use_storage_buffers, use_glasm)}; + if (use_glasm) { const std::string code{EmitGLASM(profile, runtime_info, program, binding)}; assembly_programs[stage_index] = CompileProgram(code, AssemblyStage(stage_index)); } else { @@ -443,7 +451,7 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( AttachShader(Stage(stage_index), source_program.handle, code); } } - if (!device.UseAssemblyShaders()) { + if (!use_glasm) { LinkProgram(source_program.handle); } return std::make_unique( -- cgit v1.2.3 From 6577a63d368afa57d5f29df40e524af30eaabffa Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 2 Jun 2021 00:48:49 -0400 Subject: glsl: skip gl_ViewportIndex write if device does not support it --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 0a1ba363b..77681594a 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -225,6 +225,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_derivative_control = device.HasDerivativeControl(), .support_gl_nv_gpu_shader_5 = device.HasNvGpuShader5(), .support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(), + .support_gl_vertex_viewport_layer = device.HasVertexViewportLayer(), .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyLargerThanGuest(), -- cgit v1.2.3 From 747b8556a4611791c1b0afbb500c77de57adfc54 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 4 Jun 2021 00:46:46 -0400 Subject: glsl: Use textureGrad fallback when EXT_texture_shadow_lod is unsupported --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 77681594a..b4c634d29 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -226,6 +226,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_gl_nv_gpu_shader_5 = device.HasNvGpuShader5(), .support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(), .support_gl_vertex_viewport_layer = device.HasVertexViewportLayer(), + .support_gl_texture_shadow_lod = device.HasTextureShadowLod(), .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyLargerThanGuest(), -- cgit v1.2.3 From 970fc39d986c5eefa1c4b61ac89ef7e8c2bf23bf Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 7 Jun 2021 19:05:11 -0400 Subject: glsl: Rebase fixes --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 1 - src/video_core/renderer_opengl/gl_shader_util.cpp | 7 +++++-- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index b4c634d29..3d229a78c 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -225,7 +225,6 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_derivative_control = device.HasDerivativeControl(), .support_gl_nv_gpu_shader_5 = device.HasNvGpuShader5(), .support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(), - .support_gl_vertex_viewport_layer = device.HasVertexViewportLayer(), .support_gl_texture_shadow_lod = device.HasTextureShadowLod(), .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyLargerThanGuest(), diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp index 99cb81819..ac6f33e34 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.cpp +++ b/src/video_core/renderer_opengl/gl_shader_util.cpp @@ -13,7 +13,7 @@ namespace OpenGL { -static void LogShader(GLuint shader) { +static void LogShader(GLuint shader, std::optional code = {}) { GLint shader_status{}; glGetShaderiv(shader, GL_COMPILE_STATUS, &shader_status); if (shader_status == GL_FALSE) { @@ -28,6 +28,9 @@ static void LogShader(GLuint shader) { glGetShaderInfoLog(shader, log_length, nullptr, log.data()); if (shader_status == GL_FALSE) { LOG_ERROR(Render_OpenGL, "{}", log); + if (code.has_value()) { + LOG_INFO(Render_OpenGL, "\n{}", *code); + } } else { LOG_WARNING(Render_OpenGL, "{}", log); } @@ -43,7 +46,7 @@ void AttachShader(GLenum stage, GLuint program, std::string_view code) { glCompileShader(shader.handle); glAttachShader(program, shader.handle); if (Settings::values.renderer_debug) { - LogShader(shader.handle); + LogShader(shader.handle, code); } } -- cgit v1.2.3 From 8bb8bbf4ae2ef259857efe49436dfd71758ea092 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 8 Jun 2021 01:55:12 -0400 Subject: glsl: Implement fswzadd and wip nv thread shuffle impl --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 3d229a78c..4fcf4e458 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -226,6 +226,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_gl_nv_gpu_shader_5 = device.HasNvGpuShader5(), .support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(), .support_gl_texture_shadow_lod = device.HasTextureShadowLod(), + .support_gl_warp_intrinsics = false, .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyLargerThanGuest(), -- cgit v1.2.3 From 6650c4799d42044f087a1ac5cb5e4b1a9e899000 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 8 Jun 2021 23:52:28 -0400 Subject: gl_rasterizer: Add texture fetch barrier for fragments Fixes flicker seen in XC2 --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 54696d97d..7513bd071 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -437,7 +437,7 @@ void RasterizerOpenGL::WaitForIdle() { } void RasterizerOpenGL::FragmentBarrier() { - glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT); + glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT | GL_TEXTURE_FETCH_BARRIER_BIT); } void RasterizerOpenGL::TiledCacheBarrier() { -- cgit v1.2.3 From e81c73a8748ccfcde56acfee5630116c3950e479 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 11 Jun 2021 02:50:30 -0400 Subject: glsl: Address more feedback. Implement indexed texture reads --- src/video_core/renderer_opengl/gl_shader_util.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp index ac6f33e34..5109985f1 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.cpp +++ b/src/video_core/renderer_opengl/gl_shader_util.cpp @@ -13,7 +13,7 @@ namespace OpenGL { -static void LogShader(GLuint shader, std::optional code = {}) { +static void LogShader(GLuint shader, std::string_view code = {}) { GLint shader_status{}; glGetShaderiv(shader, GL_COMPILE_STATUS, &shader_status); if (shader_status == GL_FALSE) { @@ -28,8 +28,8 @@ static void LogShader(GLuint shader, std::optional code = {}) glGetShaderInfoLog(shader, log_length, nullptr, log.data()); if (shader_status == GL_FALSE) { LOG_ERROR(Render_OpenGL, "{}", log); - if (code.has_value()) { - LOG_INFO(Render_OpenGL, "\n{}", *code); + if (!code.empty()) { + LOG_INFO(Render_OpenGL, "\n{}", code); } } else { LOG_WARNING(Render_OpenGL, "{}", log); -- cgit v1.2.3 From 413eb6983f07bb4139cd07c5dca22bdb30e6af2d Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 12 Jun 2021 02:06:11 -0400 Subject: gl_shader_cache: Move OGL shader compilation to the respective Pipeline constructor --- .../renderer_opengl/gl_compute_pipeline.cpp | 13 +++- .../renderer_opengl/gl_compute_pipeline.h | 2 +- .../renderer_opengl/gl_graphics_pipeline.cpp | 67 ++++++++++++++++++--- .../renderer_opengl/gl_graphics_pipeline.h | 4 +- src/video_core/renderer_opengl/gl_shader_cache.cpp | 69 +++------------------- 5 files changed, 79 insertions(+), 76 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp index a40106c87..f984b635c 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp @@ -7,6 +7,7 @@ #include "common/cityhash.h" #include "video_core/renderer_opengl/gl_compute_pipeline.h" #include "video_core/renderer_opengl/gl_shader_manager.h" +#include "video_core/renderer_opengl/gl_shader_util.h" namespace OpenGL { @@ -39,10 +40,16 @@ ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cac BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::KeplerCompute& kepler_compute_, ProgramManager& program_manager_, const Shader::Info& info_, - OGLProgram source_program_, OGLAssemblyProgram assembly_program_) + const std::string code) : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, - kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_}, - source_program{std::move(source_program_)}, assembly_program{std::move(assembly_program_)} { + kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_} { + if (device.UseAssemblyShaders()) { + assembly_program = CompileProgram(code, GL_COMPUTE_PROGRAM_NV); + } else { + source_program.handle = glCreateProgram(); + AttachShader(GL_COMPUTE_SHADER, source_program.handle, code); + LinkProgram(source_program.handle); + } std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(), uniform_buffer_sizes.begin()); diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.h b/src/video_core/renderer_opengl/gl_compute_pipeline.h index b5dfb65e9..a93166eb6 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.h +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.h @@ -54,7 +54,7 @@ public: BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::KeplerCompute& kepler_compute_, ProgramManager& program_manager_, const Shader::Info& info_, - OGLProgram source_program_, OGLAssemblyProgram assembly_program_); + const std::string code); void Configure(); diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index a2ea35d5a..4d62d7062 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -9,6 +9,7 @@ #include "shader_recompiler/shader_info.h" #include "video_core/renderer_opengl/gl_graphics_pipeline.h" #include "video_core/renderer_opengl/gl_shader_manager.h" +#include "video_core/renderer_opengl/gl_shader_util.h" #include "video_core/renderer_opengl/gl_state_tracker.h" #include "video_core/texture_cache/texture_cache.h" @@ -33,6 +34,40 @@ u32 AccumulateCount(const Range& range) { return num; } +GLenum Stage(size_t stage_index) { + switch (stage_index) { + case 0: + return GL_VERTEX_SHADER; + case 1: + return GL_TESS_CONTROL_SHADER; + case 2: + return GL_TESS_EVALUATION_SHADER; + case 3: + return GL_GEOMETRY_SHADER; + case 4: + return GL_FRAGMENT_SHADER; + } + UNREACHABLE_MSG("{}", stage_index); + return GL_NONE; +} + +GLenum AssemblyStage(size_t stage_index) { + switch (stage_index) { + case 0: + return GL_VERTEX_PROGRAM_NV; + case 1: + return GL_TESS_CONTROL_PROGRAM_NV; + case 2: + return GL_TESS_EVALUATION_PROGRAM_NV; + case 3: + return GL_GEOMETRY_PROGRAM_NV; + case 4: + return GL_FRAGMENT_PROGRAM_NV; + } + UNREACHABLE_MSG("{}", stage_index); + return GL_NONE; +} + /// Translates hardware transform feedback indices /// @param location Hardware location /// @return Pair of ARB_transform_feedback3 token stream first and third arguments @@ -82,19 +117,33 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, ProgramManager& program_manager_, StateTracker& state_tracker_, - OGLProgram program_, - std::array assembly_programs_, + const std::array assembly_sources, + const std::array glsl_sources, const std::array& infos, const VideoCommon::TransformFeedbackState* xfb_state) - : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, - gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, program_manager{program_manager_}, - state_tracker{state_tracker_}, program{std::move(program_)}, assembly_programs{std::move( - assembly_programs_)} { + : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, + maxwell3d{maxwell3d_}, program_manager{program_manager_}, state_tracker{state_tracker_} { std::ranges::transform(infos, stage_infos.begin(), [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); - - for (size_t stage = 0; stage < 5; ++stage) { - enabled_stages_mask |= (assembly_programs[stage].handle != 0 ? 1 : 0) << stage; + if (device.UseAssemblyShaders()) { + for (size_t stage = 0; stage < 5; ++stage) { + const auto code{assembly_sources[stage]}; + if (code.empty()) { + continue; + } + assembly_programs[stage] = CompileProgram(code, AssemblyStage(stage)); + enabled_stages_mask |= (assembly_programs[stage].handle != 0 ? 1 : 0) << stage; + } + } else { + program.handle = glCreateProgram(); + for (size_t stage = 0; stage < 5; ++stage) { + const auto code{glsl_sources[stage]}; + if (code.empty()) { + continue; + } + AttachShader(Stage(stage), program.handle, code); + } + LinkProgram(program.handle); } u32 num_textures{}; u32 num_images{}; diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index 508fad5bb..984bf994f 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -65,8 +65,8 @@ public: BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, ProgramManager& program_manager_, StateTracker& state_tracker_, - OGLProgram program_, - std::array assembly_programs_, + const std::array assembly_sources, + const std::array glsl_sources, const std::array& infos, const VideoCommon::TransformFeedbackState* xfb_state); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 4fcf4e458..884739aec 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -56,40 +56,6 @@ auto MakeSpan(Container& container) { return std::span(container.data(), container.size()); } -GLenum Stage(size_t stage_index) { - switch (stage_index) { - case 0: - return GL_VERTEX_SHADER; - case 1: - return GL_TESS_CONTROL_SHADER; - case 2: - return GL_TESS_EVALUATION_SHADER; - case 3: - return GL_GEOMETRY_SHADER; - case 4: - return GL_FRAGMENT_SHADER; - } - UNREACHABLE_MSG("{}", stage_index); - return GL_NONE; -} - -GLenum AssemblyStage(size_t stage_index) { - switch (stage_index) { - case 0: - return GL_VERTEX_PROGRAM_NV; - case 1: - return GL_TESS_CONTROL_PROGRAM_NV; - case 2: - return GL_TESS_EVALUATION_PROGRAM_NV; - case 3: - return GL_GEOMETRY_PROGRAM_NV; - case 4: - return GL_FRAGMENT_PROGRAM_NV; - } - UNREACHABLE_MSG("{}", stage_index); - return GL_NONE; -} - Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key, const Shader::IR::Program& program, bool glasm_use_storage_buffers, bool use_assembly_shaders) { @@ -426,12 +392,10 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( std::array infos{}; OGLProgram source_program; - std::array assembly_programs; + std::array assembly_sources; + std::array glsl_sources; Shader::Backend::Bindings binding; const bool use_glasm{device.UseAssemblyShaders()}; - if (!use_glasm) { - source_program.handle = glCreateProgram(); - } const size_t first_index = uses_vertex_a && uses_vertex_b ? 1 : 0; for (size_t index = first_index; index < Maxwell::MaxShaderProgram; ++index) { if (key.unique_hashes[index] == 0) { @@ -446,20 +410,14 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( const auto runtime_info{ MakeRuntimeInfo(key, program, glasm_use_storage_buffers, use_glasm)}; if (use_glasm) { - const std::string code{EmitGLASM(profile, runtime_info, program, binding)}; - assembly_programs[stage_index] = CompileProgram(code, AssemblyStage(stage_index)); + assembly_sources[stage_index] = EmitGLASM(profile, runtime_info, program, binding); } else { - const auto code{EmitGLSL(profile, runtime_info, program, binding)}; - AttachShader(Stage(stage_index), source_program.handle, code); + glsl_sources[stage_index] = EmitGLSL(profile, runtime_info, program, binding); } } - if (!use_glasm) { - LinkProgram(source_program.handle); - } return std::make_unique( device, texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker, - std::move(source_program), std::move(assembly_programs), infos, - key.xfb_enabled != 0 ? &key.xfb_state : nullptr); + assembly_sources, glsl_sources, infos, key.xfb_enabled != 0 ? &key.xfb_state : nullptr); } catch (Shader::Exception& exception) { LOG_ERROR(Render_OpenGL, "{}", exception.what()); @@ -496,21 +454,10 @@ std::unique_ptr ShaderCache::CreateComputePipeline(ShaderPools& } Shader::RuntimeInfo info; info.glasm_use_storage_buffers = num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks(); - - OGLAssemblyProgram asm_program; - OGLProgram source_program; - if (device.UseAssemblyShaders()) { - const std::string code{EmitGLASM(profile, info, program)}; - asm_program = CompileProgram(code, GL_COMPUTE_PROGRAM_NV); - } else { - const auto code{EmitGLSL(profile, program)}; - source_program.handle = glCreateProgram(); - AttachShader(GL_COMPUTE_SHADER, source_program.handle, code); - LinkProgram(source_program.handle); - } + const std::string code{device.UseAssemblyShaders() ? EmitGLASM(profile, info, program) + : EmitGLSL(profile, program)}; return std::make_unique(device, texture_cache, buffer_cache, gpu_memory, - kepler_compute, program_manager, program.info, - std::move(source_program), std::move(asm_program)); + kepler_compute, program_manager, program.info, code); } catch (Shader::Exception& exception) { LOG_ERROR(Render_OpenGL, "{}", exception.what()); return nullptr; -- cgit v1.2.3 From ff3de0fb6bb46bcb59421cef203ca8e8daaec85c Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 12 Jun 2021 02:11:13 -0400 Subject: gl_shader_cache: Remove const from pipeline source arguments --- src/video_core/renderer_opengl/gl_compute_pipeline.cpp | 2 +- src/video_core/renderer_opengl/gl_compute_pipeline.h | 2 +- src/video_core/renderer_opengl/gl_graphics_pipeline.cpp | 4 ++-- src/video_core/renderer_opengl/gl_graphics_pipeline.h | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp index f984b635c..2d6442d74 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp @@ -40,7 +40,7 @@ ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cac BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::KeplerCompute& kepler_compute_, ProgramManager& program_manager_, const Shader::Info& info_, - const std::string code) + std::string code) : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_} { if (device.UseAssemblyShaders()) { diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.h b/src/video_core/renderer_opengl/gl_compute_pipeline.h index a93166eb6..b5fc45f26 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.h +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.h @@ -54,7 +54,7 @@ public: BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::KeplerCompute& kepler_compute_, ProgramManager& program_manager_, const Shader::Info& info_, - const std::string code); + std::string code); void Configure(); diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index 4d62d7062..d64723d6b 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -117,8 +117,8 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, ProgramManager& program_manager_, StateTracker& state_tracker_, - const std::array assembly_sources, - const std::array glsl_sources, + std::array assembly_sources, + std::array glsl_sources, const std::array& infos, const VideoCommon::TransformFeedbackState* xfb_state) : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index 984bf994f..dc791be53 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -65,8 +65,8 @@ public: BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, ProgramManager& program_manager_, StateTracker& state_tracker_, - const std::array assembly_sources, - const std::array glsl_sources, + std::array assembly_sources, + std::array glsl_sources, const std::array& infos, const VideoCommon::TransformFeedbackState* xfb_state); -- cgit v1.2.3 From 5e7b2b9661bf685c3950d7c4065d0d35b488f95c Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 13 Jun 2021 00:05:19 -0400 Subject: glsl: Add stubs for sparse queries and variable aoffi when not supported --- src/video_core/renderer_opengl/gl_device.cpp | 1 + src/video_core/renderer_opengl/gl_device.h | 5 +++++ src/video_core/renderer_opengl/gl_shader_cache.cpp | 2 ++ 3 files changed, 8 insertions(+) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 20ea42cff..bf08a6d93 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -160,6 +160,7 @@ Device::Device() { has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float"); has_nv_gpu_shader_5 = GLAD_GL_NV_gpu_shader5; has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float; + has_sparse_texture_2 = GLAD_GL_ARB_sparse_texture2; warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel; // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index ff0ff2b08..0b59c9df0 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -128,6 +128,10 @@ public: return has_amd_shader_half_float; } + bool HasSparseTexture2() const { + return has_sparse_texture_2; + } + bool IsWarpSizePotentiallyLargerThanGuest() const { return warp_size_potentially_larger_than_guest; } @@ -165,6 +169,7 @@ private: bool has_depth_buffer_float{}; bool has_nv_gpu_shader_5{}; bool has_amd_shader_half_float{}; + bool has_sparse_texture_2{}; bool warp_size_potentially_larger_than_guest{}; }; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 884739aec..3d59d34d7 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -193,6 +193,8 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(), .support_gl_texture_shadow_lod = device.HasTextureShadowLod(), .support_gl_warp_intrinsics = false, + .support_gl_variable_aoffi = device.HasVariableAoffi(), + .support_gl_sparse_textures = device.HasSparseTexture2(), .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyLargerThanGuest(), -- cgit v1.2.3 From 74f683787eeba7b6e8f5868134f445240733f8fd Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 14 Jun 2021 21:06:29 -0400 Subject: gl_shader_cache: Implement async shaders --- src/video_core/CMakeLists.txt | 1 + .../renderer_opengl/gl_graphics_pipeline.cpp | 123 ++++++++++++--------- .../renderer_opengl/gl_graphics_pipeline.h | 14 ++- src/video_core/renderer_opengl/gl_rasterizer.cpp | 2 +- src/video_core/renderer_opengl/gl_shader_cache.cpp | 54 +++++---- src/video_core/renderer_opengl/gl_shader_cache.h | 34 ++---- src/video_core/renderer_opengl/gl_shader_context.h | 33 ++++++ 7 files changed, 154 insertions(+), 107 deletions(-) create mode 100644 src/video_core/renderer_opengl/gl_shader_context.h (limited to 'src/video_core') diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 1ef3a6189..007ecc13e 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -83,6 +83,7 @@ add_library(video_core STATIC renderer_opengl/gl_shader_cache.h renderer_opengl/gl_shader_manager.cpp renderer_opengl/gl_shader_manager.h + renderer_opengl/gl_shader_context.h renderer_opengl/gl_shader_util.cpp renderer_opengl/gl_shader_util.h renderer_opengl/gl_state_tracker.cpp diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index d64723d6b..d27a3cf46 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -6,11 +6,13 @@ #include #include "common/cityhash.h" +#include "common/thread_worker.h" #include "shader_recompiler/shader_info.h" #include "video_core/renderer_opengl/gl_graphics_pipeline.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_shader_util.h" #include "video_core/renderer_opengl/gl_state_tracker.h" +#include "video_core/shader_notify.h" #include "video_core/texture_cache/texture_cache.h" namespace OpenGL { @@ -117,74 +119,91 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, ProgramManager& program_manager_, StateTracker& state_tracker_, - std::array assembly_sources, - std::array glsl_sources, + ShaderWorker* thread_worker, + VideoCore::ShaderNotify* shader_notify, + std::array sources, const std::array& infos, const VideoCommon::TransformFeedbackState* xfb_state) : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, program_manager{program_manager_}, state_tracker{state_tracker_} { + if (shader_notify) { + shader_notify->MarkShaderBuilding(); + } std::ranges::transform(infos, stage_infos.begin(), [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); - if (device.UseAssemblyShaders()) { - for (size_t stage = 0; stage < 5; ++stage) { - const auto code{assembly_sources[stage]}; - if (code.empty()) { - continue; + auto func{[this, device, sources, shader_notify, xfb_state](ShaderContext::Context*) mutable { + if (device.UseAssemblyShaders()) { + for (size_t stage = 0; stage < 5; ++stage) { + const auto code{sources[stage]}; + if (code.empty()) { + continue; + } + assembly_programs[stage] = CompileProgram(code, AssemblyStage(stage)); + enabled_stages_mask |= (assembly_programs[stage].handle != 0 ? 1 : 0) << stage; } - assembly_programs[stage] = CompileProgram(code, AssemblyStage(stage)); - enabled_stages_mask |= (assembly_programs[stage].handle != 0 ? 1 : 0) << stage; - } - } else { - program.handle = glCreateProgram(); - for (size_t stage = 0; stage < 5; ++stage) { - const auto code{glsl_sources[stage]}; - if (code.empty()) { - continue; + } else { + program.handle = glCreateProgram(); + for (size_t stage = 0; stage < 5; ++stage) { + const auto code{sources[stage]}; + if (code.empty()) { + continue; + } + AttachShader(Stage(stage), program.handle, code); } - AttachShader(Stage(stage), program.handle, code); + LinkProgram(program.handle); } - LinkProgram(program.handle); - } - u32 num_textures{}; - u32 num_images{}; - u32 num_storage_buffers{}; - for (size_t stage = 0; stage < base_uniform_bindings.size(); ++stage) { - const auto& info{stage_infos[stage]}; - if (stage < 4) { - base_uniform_bindings[stage + 1] = base_uniform_bindings[stage]; - base_storage_bindings[stage + 1] = base_storage_bindings[stage]; - - base_uniform_bindings[stage + 1] += AccumulateCount(info.constant_buffer_descriptors); - base_storage_bindings[stage + 1] += AccumulateCount(info.storage_buffers_descriptors); + if (shader_notify) { + shader_notify->MarkShaderComplete(); } - enabled_uniform_buffer_masks[stage] = info.constant_buffer_mask; - std::ranges::copy(info.constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin()); + u32 num_textures{}; + u32 num_images{}; + u32 num_storage_buffers{}; + for (size_t stage = 0; stage < base_uniform_bindings.size(); ++stage) { + const auto& info{stage_infos[stage]}; + if (stage < 4) { + base_uniform_bindings[stage + 1] = base_uniform_bindings[stage]; + base_storage_bindings[stage + 1] = base_storage_bindings[stage]; + + base_uniform_bindings[stage + 1] += + AccumulateCount(info.constant_buffer_descriptors); + base_storage_bindings[stage + 1] += + AccumulateCount(info.storage_buffers_descriptors); + } + enabled_uniform_buffer_masks[stage] = info.constant_buffer_mask; + std::ranges::copy(info.constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin()); - const u32 num_tex_buffer_bindings{AccumulateCount(info.texture_buffer_descriptors)}; - num_texture_buffers[stage] += num_tex_buffer_bindings; - num_textures += num_tex_buffer_bindings; + const u32 num_tex_buffer_bindings{AccumulateCount(info.texture_buffer_descriptors)}; + num_texture_buffers[stage] += num_tex_buffer_bindings; + num_textures += num_tex_buffer_bindings; - const u32 num_img_buffers_bindings{AccumulateCount(info.image_buffer_descriptors)}; - num_image_buffers[stage] += num_img_buffers_bindings; - num_images += num_img_buffers_bindings; + const u32 num_img_buffers_bindings{AccumulateCount(info.image_buffer_descriptors)}; + num_image_buffers[stage] += num_img_buffers_bindings; + num_images += num_img_buffers_bindings; - num_textures += AccumulateCount(info.texture_descriptors); - num_images += AccumulateCount(info.image_descriptors); - num_storage_buffers += AccumulateCount(info.storage_buffers_descriptors); + num_textures += AccumulateCount(info.texture_descriptors); + num_images += AccumulateCount(info.image_descriptors); + num_storage_buffers += AccumulateCount(info.storage_buffers_descriptors); - writes_global_memory |= std::ranges::any_of( - info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; }); - } - ASSERT(num_textures <= MAX_TEXTURES); - ASSERT(num_images <= MAX_IMAGES); + writes_global_memory |= std::ranges::any_of( + info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; }); + } + ASSERT(num_textures <= MAX_TEXTURES); + ASSERT(num_images <= MAX_IMAGES); - const bool assembly_shaders{assembly_programs[0].handle != 0}; - use_storage_buffers = - !assembly_shaders || num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks(); - writes_global_memory &= !use_storage_buffers; + const bool assembly_shaders{assembly_programs[0].handle != 0}; + use_storage_buffers = + !assembly_shaders || num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks(); + writes_global_memory &= !use_storage_buffers; - if (assembly_shaders && xfb_state) { - GenerateTransformFeedbackState(*xfb_state); + if (assembly_shaders && xfb_state) { + GenerateTransformFeedbackState(*xfb_state); + } + is_built.store(true, std::memory_order_relaxed); + }}; + if (thread_worker) { + thread_worker->QueueWork(std::move(func)); + } else { + func(nullptr); } } diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index dc791be53..58deafd3c 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -20,10 +20,15 @@ namespace OpenGL { +namespace ShaderContext { +struct Context; +} + class Device; class ProgramManager; using Maxwell = Tegra::Engines::Maxwell3D::Regs; +using ShaderWorker = Common::StatefulThreadWorker; struct GraphicsPipelineKey { std::array unique_hashes; @@ -65,8 +70,8 @@ public: BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, ProgramManager& program_manager_, StateTracker& state_tracker_, - std::array assembly_sources, - std::array glsl_sources, + ShaderWorker* thread_worker, VideoCore::ShaderNotify* shader_notify, + std::array sources, const std::array& infos, const VideoCommon::TransformFeedbackState* xfb_state); @@ -82,6 +87,10 @@ public: return writes_global_memory; } + [[nodiscard]] bool IsBuilt() const noexcept { + return is_built.load(std::memory_order::relaxed); + } + private: void GenerateTransformFeedbackState(const VideoCommon::TransformFeedbackState& xfb_state); @@ -108,6 +117,7 @@ private: bool use_storage_buffers{}; bool writes_global_memory{}; + std::atomic_bool is_built{false}; static constexpr std::size_t XFB_ENTRY_STRIDE = 3; GLsizei num_xfb_attribs{}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 7513bd071..e3d336f86 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -70,7 +70,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra buffer_cache_runtime(device), buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), shader_cache(*this, emu_window_, maxwell3d, kepler_compute, gpu_memory, device, texture_cache, - buffer_cache, program_manager, state_tracker), + buffer_cache, program_manager, state_tracker, gpu.ShaderNotify()), query_cache(*this, maxwell3d, gpu_memory), accelerate_dma(buffer_cache), fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {} diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 3d59d34d7..d082b9f73 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -17,7 +17,6 @@ #include "common/scope_exit.h" #include "common/thread_worker.h" #include "core/core.h" -#include "core/frontend/emu_window.h" #include "shader_recompiler/backend/glasm/emit_glasm.h" #include "shader_recompiler/backend/glsl/emit_glsl.h" #include "shader_recompiler/backend/spirv/emit_spirv.h" @@ -50,6 +49,7 @@ using VideoCommon::FileEnvironment; using VideoCommon::GenericEnvironment; using VideoCommon::GraphicsEnvironment; using VideoCommon::SerializePipeline; +using Context = ShaderContext::Context; template auto MakeSpan(Container& container) { @@ -143,25 +143,17 @@ void SetXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell& regs } } // Anonymous namespace -struct ShaderCache::Context { - explicit Context(Core::Frontend::EmuWindow& emu_window) - : gl_context{emu_window.CreateSharedContext()}, scoped{*gl_context} {} - - std::unique_ptr gl_context; - Core::Frontend::GraphicsContext::Scoped scoped; - ShaderPools pools; -}; - ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::Engines::KeplerCompute& kepler_compute_, Tegra::MemoryManager& gpu_memory_, const Device& device_, TextureCache& texture_cache_, BufferCache& buffer_cache_, - ProgramManager& program_manager_, StateTracker& state_tracker_) + ProgramManager& program_manager_, StateTracker& state_tracker_, + VideoCore::ShaderNotify& shader_notify_) : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_}, emu_window{emu_window_}, device{device_}, texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, program_manager{program_manager_}, state_tracker{state_tracker_}, - use_asynchronous_shaders{device.UseAsynchronousShaders()}, + shader_notify{shader_notify_}, use_asynchronous_shaders{device.UseAsynchronousShaders()}, profile{ .supported_spirv = 0x00010000, @@ -264,7 +256,7 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, env_ptrs.push_back(&env); } ctx->pools.ReleaseContents(); - auto pipeline{CreateGraphicsPipeline(ctx->pools, key, MakeSpan(env_ptrs))}; + auto pipeline{CreateGraphicsPipeline(ctx->pools, key, MakeSpan(env_ptrs), false)}; std::lock_guard lock{state.mutex}; if (pipeline) { graphics_cache.emplace(key, std::move(pipeline)); @@ -311,6 +303,9 @@ GraphicsPipeline* ShaderCache::CurrentGraphicsPipeline() { if (is_new) { program = CreateGraphicsPipeline(); } + if (!program || !program->IsBuilt()) { + return nullptr; + } return program.get(); } @@ -339,7 +334,8 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline() { GetGraphicsEnvironments(environments, graphics_key.unique_hashes); main_pools.ReleaseContents(); - auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, environments.Span())}; + auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(), + use_asynchronous_shaders)}; if (!pipeline || shader_cache_filename.empty()) { return pipeline; } @@ -354,8 +350,8 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline() { } std::unique_ptr ShaderCache::CreateGraphicsPipeline( - ShaderPools& pools, const GraphicsPipelineKey& key, - std::span envs) try { + ShaderContext::ShaderPools& pools, const GraphicsPipelineKey& key, + std::span envs, bool build_in_parallel) try { LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); size_t env_index{}; u32 total_storage_buffers{}; @@ -394,8 +390,7 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( std::array infos{}; OGLProgram source_program; - std::array assembly_sources; - std::array glsl_sources; + std::array sources; Shader::Backend::Bindings binding; const bool use_glasm{device.UseAssemblyShaders()}; const size_t first_index = uses_vertex_a && uses_vertex_b ? 1 : 0; @@ -412,14 +407,16 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( const auto runtime_info{ MakeRuntimeInfo(key, program, glasm_use_storage_buffers, use_glasm)}; if (use_glasm) { - assembly_sources[stage_index] = EmitGLASM(profile, runtime_info, program, binding); + sources[stage_index] = EmitGLASM(profile, runtime_info, program, binding); } else { - glsl_sources[stage_index] = EmitGLSL(profile, runtime_info, program, binding); + sources[stage_index] = EmitGLSL(profile, runtime_info, program, binding); } } + auto* const thread_worker{build_in_parallel ? workers.get() : nullptr}; + VideoCore::ShaderNotify* const notify{build_in_parallel ? &shader_notify : nullptr}; return std::make_unique( device, texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker, - assembly_sources, glsl_sources, infos, key.xfb_enabled != 0 ? &key.xfb_state : nullptr); + thread_worker, notify, sources, infos, key.xfb_enabled != 0 ? &key.xfb_state : nullptr); } catch (Shader::Exception& exception) { LOG_ERROR(Render_OpenGL, "{}", exception.what()); @@ -442,9 +439,9 @@ std::unique_ptr ShaderCache::CreateComputePipeline( return pipeline; } -std::unique_ptr ShaderCache::CreateComputePipeline(ShaderPools& pools, - const ComputePipelineKey& key, - Shader::Environment& env) try { +std::unique_ptr ShaderCache::CreateComputePipeline( + ShaderContext::ShaderPools& pools, const ComputePipelineKey& key, + Shader::Environment& env) try { LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; @@ -465,11 +462,10 @@ std::unique_ptr ShaderCache::CreateComputePipeline(ShaderPools& return nullptr; } -std::unique_ptr> ShaderCache::CreateWorkers() - const { - return std::make_unique>( - std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:ShaderBuilder", - [this] { return Context{emu_window}; }); +std::unique_ptr ShaderCache::CreateWorkers() const { + return std::make_unique(std::max(std::thread::hardware_concurrency(), 2U) - 1, + "yuzu:ShaderBuilder", + [this] { return Context{emu_window}; }); } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index e0c5a06d8..d24b54d90 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -13,13 +13,12 @@ #include "common/common_types.h" #include "common/thread_worker.h" -#include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/value.h" -#include "shader_recompiler/frontend/maxwell/control_flow.h" #include "shader_recompiler/object_pool.h" #include "video_core/engines/shader_type.h" #include "video_core/renderer_opengl/gl_compute_pipeline.h" #include "video_core/renderer_opengl/gl_graphics_pipeline.h" +#include "video_core/renderer_opengl/gl_shader_context.h" #include "video_core/shader_cache.h" namespace Tegra { @@ -31,29 +30,17 @@ namespace OpenGL { class Device; class ProgramManager; class RasterizerOpenGL; - -struct ShaderPools { - void ReleaseContents() { - flow_block.ReleaseContents(); - block.ReleaseContents(); - inst.ReleaseContents(); - } - - Shader::ObjectPool inst; - Shader::ObjectPool block; - Shader::ObjectPool flow_block; -}; +using ShaderWorker = Common::StatefulThreadWorker; class ShaderCache : public VideoCommon::ShaderCache { - struct Context; - public: explicit ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::Engines::KeplerCompute& kepler_compute_, Tegra::MemoryManager& gpu_memory_, const Device& device_, TextureCache& texture_cache_, BufferCache& buffer_cache_, - ProgramManager& program_manager_, StateTracker& state_tracker_); + ProgramManager& program_manager_, StateTracker& state_tracker_, + VideoCore::ShaderNotify& shader_notify_); ~ShaderCache(); void LoadDiskResources(u64 title_id, std::stop_token stop_loading, @@ -67,17 +54,17 @@ private: std::unique_ptr CreateGraphicsPipeline(); std::unique_ptr CreateGraphicsPipeline( - ShaderPools& pools, const GraphicsPipelineKey& key, - std::span envs); + ShaderContext::ShaderPools& pools, const GraphicsPipelineKey& key, + std::span envs, bool build_in_parallel); std::unique_ptr CreateComputePipeline(const ComputePipelineKey& key, const VideoCommon::ShaderInfo* shader); - std::unique_ptr CreateComputePipeline(ShaderPools& pools, + std::unique_ptr CreateComputePipeline(ShaderContext::ShaderPools& pools, const ComputePipelineKey& key, Shader::Environment& env); - std::unique_ptr> CreateWorkers() const; + std::unique_ptr CreateWorkers() const; Core::Frontend::EmuWindow& emu_window; const Device& device; @@ -85,17 +72,18 @@ private: BufferCache& buffer_cache; ProgramManager& program_manager; StateTracker& state_tracker; + VideoCore::ShaderNotify& shader_notify; GraphicsPipelineKey graphics_key{}; const bool use_asynchronous_shaders; - ShaderPools main_pools; + ShaderContext::ShaderPools main_pools; std::unordered_map> graphics_cache; std::unordered_map> compute_cache; Shader::Profile profile; std::filesystem::path shader_cache_filename; - std::unique_ptr> workers; + std::unique_ptr workers; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_context.h b/src/video_core/renderer_opengl/gl_shader_context.h new file mode 100644 index 000000000..6ff34e5d6 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_shader_context.h @@ -0,0 +1,33 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "core/frontend/emu_window.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" + +namespace OpenGL::ShaderContext { +struct ShaderPools { + void ReleaseContents() { + flow_block.ReleaseContents(); + block.ReleaseContents(); + inst.ReleaseContents(); + } + + Shader::ObjectPool inst; + Shader::ObjectPool block; + Shader::ObjectPool flow_block; +}; + +struct Context { + explicit Context(Core::Frontend::EmuWindow& emu_window) + : gl_context{emu_window.CreateSharedContext()}, scoped{*gl_context} {} + + std::unique_ptr gl_context; + Core::Frontend::GraphicsContext::Scoped scoped; + ShaderPools pools; +}; + +} // namespace OpenGL::ShaderContext -- cgit v1.2.3 From 6eea88d6149f7122777b325c7fc8549e2a974e64 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 14 Jun 2021 23:02:07 -0400 Subject: glsl: Cleanup/Address feedback --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index d082b9f73..5ffe28d45 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -196,6 +196,8 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .has_broken_spirv_clamp = true, .has_broken_unsigned_image_offsets = true, .has_broken_signed_operations = true, + .has_gl_component_indexing_bug = device.HasComponentIndexingBug(), + .has_gl_precise_bug = device.HasPreciseBug(), .ignore_nan_fp_comparisons = true, } { if (use_asynchronous_shaders) { -- cgit v1.2.3 From 3b339fbbf65a50ec2ec8baacd175ca7577c3b8bd Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 14 Jun 2021 23:33:26 -0400 Subject: glsl: Conditionally use fine/coarse derivatives based on device support --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 5ffe28d45..fedbce2f0 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -187,6 +187,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_gl_warp_intrinsics = false, .support_gl_variable_aoffi = device.HasVariableAoffi(), .support_gl_sparse_textures = device.HasSparseTexture2(), + .support_gl_derivative_control = device.HasDerivativeControl(), .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyLargerThanGuest(), -- cgit v1.2.3 From d36f667bc0adaa9f50d53efb4c908aadc38921a6 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 15 Jun 2021 17:23:57 -0400 Subject: glsl: Address rest of feedback --- src/video_core/renderer_opengl/gl_device.cpp | 1 + src/video_core/renderer_opengl/gl_device.h | 5 ++++ .../renderer_opengl/gl_graphics_pipeline.cpp | 32 ++++++++++------------ src/video_core/renderer_opengl/gl_shader_cache.cpp | 1 + 4 files changed, 22 insertions(+), 17 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index bf08a6d93..5838fc02f 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -162,6 +162,7 @@ Device::Device() { has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float; has_sparse_texture_2 = GLAD_GL_ARB_sparse_texture2; warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel; + need_fastmath_off = is_nvidia; // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive // uniform buffers as "push constants" diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 0b59c9df0..0c9d6fe31 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -136,6 +136,10 @@ public: return warp_size_potentially_larger_than_guest; } + bool NeedsFastmathOff() const { + return need_fastmath_off; + } + private: static bool TestVariableAoffi(); static bool TestPreciseBug(); @@ -171,6 +175,7 @@ private: bool has_amd_shader_half_float{}; bool has_sparse_texture_2{}; bool warp_size_potentially_larger_than_guest{}; + bool need_fastmath_off{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index d27a3cf46..8d11fbc55 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -132,28 +132,23 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c std::ranges::transform(infos, stage_infos.begin(), [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); auto func{[this, device, sources, shader_notify, xfb_state](ShaderContext::Context*) mutable { - if (device.UseAssemblyShaders()) { - for (size_t stage = 0; stage < 5; ++stage) { - const auto code{sources[stage]}; - if (code.empty()) { - continue; - } + if (!device.UseAssemblyShaders()) { + program.handle = glCreateProgram(); + } + for (size_t stage = 0; stage < 5; ++stage) { + const auto code{sources[stage]}; + if (code.empty()) { + continue; + } + if (device.UseAssemblyShaders()) { assembly_programs[stage] = CompileProgram(code, AssemblyStage(stage)); enabled_stages_mask |= (assembly_programs[stage].handle != 0 ? 1 : 0) << stage; - } - } else { - program.handle = glCreateProgram(); - for (size_t stage = 0; stage < 5; ++stage) { - const auto code{sources[stage]}; - if (code.empty()) { - continue; - } + } else { AttachShader(Stage(stage), program.handle, code); } - LinkProgram(program.handle); } - if (shader_notify) { - shader_notify->MarkShaderComplete(); + if (!device.UseAssemblyShaders()) { + LinkProgram(program.handle); } u32 num_textures{}; u32 num_images{}; @@ -198,6 +193,9 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c if (assembly_shaders && xfb_state) { GenerateTransformFeedbackState(*xfb_state); } + if (shader_notify) { + shader_notify->MarkShaderComplete(); + } is_built.store(true, std::memory_order_relaxed); }}; if (thread_worker) { diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index fedbce2f0..620666622 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -193,6 +193,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .lower_left_origin_mode = true, .need_declared_frag_colors = true, + .need_fastmath_off = device.NeedsFastmathOff(), .has_broken_spirv_clamp = true, .has_broken_unsigned_image_offsets = true, -- cgit v1.2.3 From 69f9b97e7ed1e873657105cff27ed9095ee277ed Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 15 Jun 2021 22:48:55 -0300 Subject: vulkan_device: Blacklist VK_EXT_vertex_input_dynamic_state on Intel --- src/video_core/vulkan_common/vulkan_device.cpp | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/video_core') diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index bf063c047..9754abcf8 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -491,6 +491,10 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR "Blacklisting RADV for VK_EXT_extended_dynamic state, likely due to a bug in yuzu"); ext_extended_dynamic_state = false; } + if (ext_vertex_input_dynamic_state && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) { + LOG_WARNING(Render_Vulkan, "Blacklisting Intel for VK_EXT_vertex_input_dynamic_state"); + ext_vertex_input_dynamic_state = false; + } if (is_float16_supported && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) { // Intel's compiler crashes when using fp16 on Astral Chain, disable it for the time being. // LOG_WARNING(Render_Vulkan, "Blacklisting Intel proprietary from float16 math"); -- cgit v1.2.3 From 376aa94819b7da976adb120136d83980a757d044 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 16 Jun 2021 01:49:19 -0300 Subject: shader: Rename maxwell/program.h to translate_program.h --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 2 +- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 620666622..c05cd5d28 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -22,7 +22,7 @@ #include "shader_recompiler/backend/spirv/emit_spirv.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" -#include "shader_recompiler/frontend/maxwell/program.h" +#include "shader_recompiler/frontend/maxwell/translate_program.h" #include "shader_recompiler/profile.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index b17f34cdd..0b6fe8e2e 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -20,7 +20,7 @@ #include "shader_recompiler/backend/spirv/emit_spirv.h" #include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" -#include "shader_recompiler/frontend/maxwell/program.h" +#include "shader_recompiler/frontend/maxwell/translate_program.h" #include "shader_recompiler/program_header.h" #include "video_core/dirty_flags.h" #include "video_core/engines/kepler_compute.h" -- cgit v1.2.3 From cbbca26d182991abf68d9b2e1b1e5935bf4eb476 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 16 Jun 2021 03:03:08 -0300 Subject: shader: Add support for native 16-bit floats --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 12 ++++++++---- src/video_core/renderer_opengl/gl_shader_cache.h | 3 +++ src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 12 ++++++++---- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 3 +++ src/video_core/vulkan_common/vulkan_device.cpp | 4 ++-- 5 files changed, 24 insertions(+), 10 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index c05cd5d28..b459397f5 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -201,6 +201,10 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .has_gl_component_indexing_bug = device.HasComponentIndexingBug(), .has_gl_precise_bug = device.HasPreciseBug(), .ignore_nan_fp_comparisons = true, + }, + host_info{ + .support_float16 = false, + .support_int64 = true, } { if (use_asynchronous_shaders) { workers = CreateWorkers(); @@ -373,15 +377,15 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); if (!uses_vertex_a || index != 1) { // Normal path - programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); + programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info); for (const auto& desc : programs[index].info.storage_buffers_descriptors) { total_storage_buffers += desc.count; } } else { // VertexB path when VertexA is present. - Shader::IR::Program& program_va{programs[0]}; - Shader::IR::Program program_vb{TranslateProgram(pools.inst, pools.block, env, cfg)}; + auto& program_va{programs[0]}; + auto program_vb{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; for (const auto& desc : program_vb.info.storage_buffers_descriptors) { total_storage_buffers += desc.count; } @@ -449,7 +453,7 @@ std::unique_ptr ShaderCache::CreateComputePipeline( LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; - Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; + auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; u32 num_storage_buffers{}; for (const auto& desc : program.info.storage_buffers_descriptors) { diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index d24b54d90..6952a1f2c 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -14,6 +14,7 @@ #include "common/common_types.h" #include "common/thread_worker.h" #include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/host_translate_info.h" #include "shader_recompiler/object_pool.h" #include "video_core/engines/shader_type.h" #include "video_core/renderer_opengl/gl_compute_pipeline.h" @@ -82,6 +83,8 @@ private: std::unordered_map> compute_cache; Shader::Profile profile; + Shader::HostTranslateInfo host_info; + std::filesystem::path shader_cache_filename; std::unique_ptr workers; }; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 0b6fe8e2e..72e6f4207 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -307,6 +307,10 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw .has_broken_signed_operations = false, .ignore_nan_fp_comparisons = false, }; + host_info = Shader::HostTranslateInfo{ + .support_float16 = device.IsFloat16Supported(), + .support_int64 = true, + }; } PipelineCache::~PipelineCache() = default; @@ -484,11 +488,11 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); if (!uses_vertex_a || index != 1) { // Normal path - programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); + programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info); } else { // VertexB path when VertexA is present. - Shader::IR::Program& program_va{programs[0]}; - Shader::IR::Program program_vb{TranslateProgram(pools.inst, pools.block, env, cfg)}; + auto& program_va{programs[0]}; + auto program_vb{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; programs[index] = MergeDualVertexPrograms(program_va, program_vb, env); } } @@ -575,7 +579,7 @@ std::unique_ptr PipelineCache::CreateComputePipeline( LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; - Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; + auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; const std::vector code{EmitSPIRV(profile, program)}; device.SaveShader(code); vk::ShaderModule spv_module{BuildShader(device, code)}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 167a2ee2e..42da2960b 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -19,6 +19,7 @@ #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/host_translate_info.h" #include "shader_recompiler/object_pool.h" #include "shader_recompiler/profile.h" #include "video_core/engines/maxwell_3d.h" @@ -157,6 +158,8 @@ private: ShaderPools main_pools; Shader::Profile profile; + Shader::HostTranslateInfo host_info; + std::filesystem::path pipeline_cache_filename; Common::ThreadWorker workers; diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 9754abcf8..0d8c6cd08 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -497,8 +497,8 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR } if (is_float16_supported && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) { // Intel's compiler crashes when using fp16 on Astral Chain, disable it for the time being. - // LOG_WARNING(Render_Vulkan, "Blacklisting Intel proprietary from float16 math"); - // is_float16_supported = false; + LOG_WARNING(Render_Vulkan, "Blacklisting Intel proprietary from float16 math"); + is_float16_supported = false; } graphics_queue = logical.GetQueue(graphics_family); -- cgit v1.2.3 From 0ffea97e2ea2c8f58928e13dc2488d620ea98ea8 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 16 Jun 2021 03:22:56 -0300 Subject: shader: Split profile and runtime info headers --- src/video_core/renderer_opengl/gl_shader_cache.h | 1 + src/video_core/transform_feedback.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 6952a1f2c..ff5707119 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -16,6 +16,7 @@ #include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/host_translate_info.h" #include "shader_recompiler/object_pool.h" +#include "shader_recompiler/profile.h" #include "video_core/engines/shader_type.h" #include "video_core/renderer_opengl/gl_compute_pipeline.h" #include "video_core/renderer_opengl/gl_graphics_pipeline.h" diff --git a/src/video_core/transform_feedback.h b/src/video_core/transform_feedback.h index 6832c6db1..8f6946d65 100644 --- a/src/video_core/transform_feedback.h +++ b/src/video_core/transform_feedback.h @@ -8,7 +8,7 @@ #include #include "common/common_types.h" -#include "shader_recompiler/profile.h" +#include "shader_recompiler/runtime_info.h" #include "video_core/engines/maxwell_3d.h" namespace VideoCommon { -- cgit v1.2.3 From 374eeda1a35f6a1dc81cf22122c701be68e89c0f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 16 Jun 2021 04:59:30 -0300 Subject: shader: Properly manage attributes not written from previous stages --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 11 ++++++++++- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 16 ++++++++++++---- 2 files changed, 22 insertions(+), 5 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index b459397f5..b8b24dd3d 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -58,8 +58,15 @@ auto MakeSpan(Container& container) { Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key, const Shader::IR::Program& program, + const Shader::IR::Program* previous_program, bool glasm_use_storage_buffers, bool use_assembly_shaders) { Shader::RuntimeInfo info; + if (previous_program) { + info.previous_stage_stores_generic = previous_program->info.stores_generics; + } else { + // Mark all stores as available + info.previous_stage_stores_generic.flip(); + } switch (program.stage) { case Shader::Stage::VertexB: case Shader::Stage::Geometry: @@ -400,6 +407,7 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( OGLProgram source_program; std::array sources; Shader::Backend::Bindings binding; + Shader::IR::Program* previous_program{}; const bool use_glasm{device.UseAssemblyShaders()}; const size_t first_index = uses_vertex_a && uses_vertex_b ? 1 : 0; for (size_t index = first_index; index < Maxwell::MaxShaderProgram; ++index) { @@ -413,12 +421,13 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( infos[stage_index] = &program.info; const auto runtime_info{ - MakeRuntimeInfo(key, program, glasm_use_storage_buffers, use_glasm)}; + MakeRuntimeInfo(key, program, previous_program, glasm_use_storage_buffers, use_glasm)}; if (use_glasm) { sources[stage_index] = EmitGLASM(profile, runtime_info, program, binding); } else { sources[stage_index] = EmitGLSL(profile, runtime_info, program, binding); } + previous_program = &program; } auto* const thread_worker{build_in_parallel ? workers.get() : nullptr}; VideoCore::ShaderNotify* const notify{build_in_parallel ? &shader_notify : nullptr}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 72e6f4207..dc028306a 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -90,7 +90,7 @@ Shader::CompareFunction MaxwellToCompareFunction(Maxwell::ComparisonOp compariso return {}; } -static Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexAttribute& attr) { +Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexAttribute& attr) { if (attr.enabled == 0) { return Shader::AttributeType::Disabled; } @@ -124,9 +124,15 @@ Shader::AttributeType AttributeType(const FixedPipelineState& state, size_t inde } Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineCacheKey& key, - const Shader::IR::Program& program) { + const Shader::IR::Program& program, + const Shader::IR::Program* previous_program) { Shader::RuntimeInfo info; - + if (previous_program) { + info.previous_stage_stores_generic = previous_program->info.stores_generics; + } else { + // Mark all stores as available + info.previous_stage_stores_generic.flip(); + } const Shader::Stage stage{program.stage}; const bool has_geometry{key.unique_hashes[4] != 0}; const bool gl_ndc{key.state.ndc_minus_one_to_one != 0}; @@ -499,6 +505,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( std::array infos{}; std::array modules; + const Shader::IR::Program* previous_stage{}; Shader::Backend::Bindings binding; for (size_t index = uses_vertex_a && uses_vertex_b ? 1 : 0; index < Maxwell::MaxShaderProgram; ++index) { @@ -511,7 +518,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( const size_t stage_index{index - 1}; infos[stage_index] = &program.info; - const Shader::RuntimeInfo runtime_info{MakeRuntimeInfo(key, program)}; + const Shader::RuntimeInfo runtime_info{MakeRuntimeInfo(key, program, previous_stage)}; const std::vector code{EmitSPIRV(profile, runtime_info, program, binding)}; device.SaveShader(code); modules[stage_index] = BuildShader(device, code); @@ -519,6 +526,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( const std::string name{fmt::format("Shader {:016x}", key.unique_hashes[index])}; modules[stage_index].SetObjectNameEXT(name.c_str()); } + previous_stage = &program; } Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; VideoCore::ShaderNotify* const notify{build_in_parallel ? &shader_notify : nullptr}; -- cgit v1.2.3 From ca67077ca87772b4b4ac61d08f5b2c60616348e0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 16 Jun 2021 21:14:57 -0300 Subject: vk_graphics_pipeline: Use VK_KHR_push_descriptor when available ~51% faster on Nvidia compared to previous method. --- src/video_core/renderer_vulkan/pipeline_helper.h | 32 ++++++++++++++++------ .../renderer_vulkan/vk_compute_pipeline.cpp | 8 ++---- .../renderer_vulkan/vk_graphics_pipeline.cpp | 28 ++++++++++++------- .../renderer_vulkan/vk_graphics_pipeline.h | 1 + src/video_core/vulkan_common/vulkan_device.cpp | 11 ++++++++ src/video_core/vulkan_common/vulkan_device.h | 12 ++++++++ src/video_core/vulkan_common/vulkan_wrapper.cpp | 1 + src/video_core/vulkan_common/vulkan_wrapper.h | 31 +++++++++++++-------- 8 files changed, 88 insertions(+), 36 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index c6e5e059b..4847db6b6 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -16,38 +16,50 @@ #include "video_core/texture_cache/texture_cache.h" #include "video_core/texture_cache/types.h" #include "video_core/textures/texture.h" +#include "video_core/vulkan_common/vulkan_device.h" namespace Vulkan { class DescriptorLayoutBuilder { public: - DescriptorLayoutBuilder(const vk::Device& device_) : device{&device_} {} + DescriptorLayoutBuilder(const Device& device_) : device{&device_} {} - vk::DescriptorSetLayout CreateDescriptorSetLayout() const { + bool CanUsePushDescriptor() const noexcept { + return device->IsKhrPushDescriptorSupported() && + num_descriptors <= device->MaxPushDescriptors(); + } + + vk::DescriptorSetLayout CreateDescriptorSetLayout(bool use_push_descriptor) const { if (bindings.empty()) { return nullptr; } - return device->CreateDescriptorSetLayout({ + const VkDescriptorSetLayoutCreateFlags flags = + use_push_descriptor ? VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR : 0; + return device->GetLogical().CreateDescriptorSetLayout({ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, .pNext = nullptr, - .flags = 0, + .flags = flags, .bindingCount = static_cast(bindings.size()), .pBindings = bindings.data(), }); } vk::DescriptorUpdateTemplateKHR CreateTemplate(VkDescriptorSetLayout descriptor_set_layout, - VkPipelineLayout pipeline_layout) const { + VkPipelineLayout pipeline_layout, + bool use_push_descriptor) const { if (entries.empty()) { return nullptr; } - return device->CreateDescriptorUpdateTemplateKHR({ + const VkDescriptorUpdateTemplateType type = + use_push_descriptor ? VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_PUSH_DESCRIPTORS_KHR + : VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR; + return device->GetLogical().CreateDescriptorUpdateTemplateKHR({ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, .pNext = nullptr, .flags = 0, .descriptorUpdateEntryCount = static_cast(entries.size()), .pDescriptorUpdateEntries = entries.data(), - .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR, + .templateType = type, .descriptorSetLayout = descriptor_set_layout, .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, .pipelineLayout = pipeline_layout, @@ -56,7 +68,7 @@ public: } vk::PipelineLayout CreatePipelineLayout(VkDescriptorSetLayout descriptor_set_layout) const { - return device->CreatePipelineLayout({ + return device->GetLogical().CreatePipelineLayout({ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, .pNext = nullptr, .flags = 0, @@ -97,14 +109,16 @@ private: .stride = sizeof(DescriptorUpdateEntry), }); ++binding; + num_descriptors += descriptors[i].count; offset += sizeof(DescriptorUpdateEntry); } } - const vk::Device* device{}; + const Device* device{}; boost::container::small_vector bindings; boost::container::small_vector entries; u32 binding{}; + u32 num_descriptors{}; size_t offset{}; }; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index cc855a62e..70b84c7a6 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -37,15 +37,14 @@ ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descript uniform_buffer_sizes.begin()); auto func{[this, &descriptor_pool, shader_notify] { - DescriptorLayoutBuilder builder{device.GetLogical()}; + DescriptorLayoutBuilder builder{device}; builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT); - descriptor_set_layout = builder.CreateDescriptorSetLayout(); + descriptor_set_layout = builder.CreateDescriptorSetLayout(false); pipeline_layout = builder.CreatePipelineLayout(*descriptor_set_layout); descriptor_update_template = - builder.CreateTemplate(*descriptor_set_layout, *pipeline_layout); + builder.CreateTemplate(*descriptor_set_layout, *pipeline_layout, false); descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, info); - const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, .pNext = nullptr, @@ -186,7 +185,6 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, const void* const descriptor_data{update_descriptor_queue.UpdateData()}; scheduler.Record([this, descriptor_data](vk::CommandBuffer cmdbuf) { cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); - if (!descriptor_set_layout) { return; } diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index e02b1b7ab..2b59a9d88 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -40,7 +40,7 @@ constexpr size_t NUM_STAGES = Maxwell::MaxShaderStage; constexpr size_t MAX_IMAGE_ELEMENTS = 64; DescriptorLayoutBuilder MakeBuilder(const Device& device, std::span infos) { - DescriptorLayoutBuilder builder{device.GetLogical()}; + DescriptorLayoutBuilder builder{device}; for (size_t index = 0; index < infos.size(); ++index) { static constexpr std::array stages{ VK_SHADER_STAGE_VERTEX_BIT, @@ -229,12 +229,15 @@ GraphicsPipeline::GraphicsPipeline( } auto func{[this, shader_notify, &render_pass_cache, &descriptor_pool] { DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)}; - descriptor_set_layout = builder.CreateDescriptorSetLayout(); - descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, stage_infos); - + uses_push_descriptor = builder.CanUsePushDescriptor(); + descriptor_set_layout = builder.CreateDescriptorSetLayout(uses_push_descriptor); + if (!uses_push_descriptor) { + descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, stage_infos); + } const VkDescriptorSetLayout set_layout{*descriptor_set_layout}; pipeline_layout = builder.CreatePipelineLayout(set_layout); - descriptor_update_template = builder.CreateTemplate(set_layout, *pipeline_layout); + descriptor_update_template = + builder.CreateTemplate(set_layout, *pipeline_layout, uses_push_descriptor); const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(key.state))}; Validate(); @@ -462,11 +465,16 @@ void GraphicsPipeline::ConfigureDraw() { if (!descriptor_set_layout) { return; } - const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; - const vk::Device& dev{device.GetLogical()}; - dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, descriptor_data); - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0, - descriptor_set, nullptr); + if (uses_push_descriptor) { + cmdbuf.PushDescriptorSetWithTemplateKHR(*descriptor_update_template, *pipeline_layout, + 0, descriptor_data); + } else { + const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; + const vk::Device& dev{device.GetLogical()}; + dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, descriptor_data); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0, + descriptor_set, nullptr); + } }); } diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 40d1edabd..622267147 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -148,6 +148,7 @@ private: std::condition_variable build_condvar; std::mutex build_mutex; std::atomic_bool is_built{false}; + bool uses_push_descriptor{false}; }; } // namespace Vulkan diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 0d8c6cd08..9d918de8d 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -767,6 +767,7 @@ std::vector Device::LoadExtensions(bool requires_surface) { test(khr_uniform_buffer_standard_layout, VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true); test(khr_spirv_1_4, VK_KHR_SPIRV_1_4_EXTENSION_NAME, true); + test(khr_push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, true); test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false); test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true); test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true); @@ -932,6 +933,16 @@ std::vector Device::LoadExtensions(bool requires_surface) { khr_workgroup_memory_explicit_layout = true; } } + if (khr_push_descriptor) { + VkPhysicalDevicePushDescriptorPropertiesKHR push_descriptor; + push_descriptor.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR; + push_descriptor.pNext = nullptr; + + physical_properties.pNext = &push_descriptor; + physical.GetProperties2KHR(physical_properties); + + max_push_descriptors = push_descriptor.maxPushDescriptors; + } return extensions; } diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 4fda472b0..49605752d 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -154,6 +154,11 @@ public: return guest_warp_stages & stage; } + /// Returns the maximum number of push descriptors. + u32 MaxPushDescriptors() const { + return max_push_descriptors; + } + /// Returns true if formatless image load is supported. bool IsFormatlessImageLoadSupported() const { return is_formatless_image_load_supported; @@ -194,6 +199,11 @@ public: return khr_spirv_1_4; } + /// Returns true if the device supports VK_KHR_push_descriptor. + bool IsKhrPushDescriptorSupported() const { + return khr_push_descriptor; + } + /// Returns true if the device supports VK_KHR_workgroup_memory_explicit_layout. bool IsKhrWorkgroupMemoryExplicitLayoutSupported() const { return khr_workgroup_memory_explicit_layout; @@ -330,6 +340,7 @@ private: VkDriverIdKHR driver_id{}; ///< Driver ID. VkShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced. u64 device_access_memory{}; ///< Total size of device local memory in bytes. + u32 max_push_descriptors{}; ///< Maximum number of push descriptors bool is_optimal_astc_supported{}; ///< Support for native ASTC. bool is_float16_supported{}; ///< Support for float16 arithmetics. bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. @@ -345,6 +356,7 @@ private: bool khr_uniform_buffer_standard_layout{}; ///< Support for scalar uniform buffer layouts. bool khr_spirv_1_4{}; ///< Support for VK_KHR_spirv_1_4. bool khr_workgroup_memory_explicit_layout{}; ///< Support for explicit workgroup layouts. + bool khr_push_descriptor{}; ///< Support for VK_KHR_push_descritor. bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax. bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp index 7e13ae8af..d7e9fac22 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.cpp +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp @@ -103,6 +103,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { X(vkCmdFillBuffer); X(vkCmdPipelineBarrier); X(vkCmdPushConstants); + X(vkCmdPushDescriptorSetWithTemplateKHR); X(vkCmdSetBlendConstants); X(vkCmdSetDepthBias); X(vkCmdSetDepthBounds); diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h index 6e5be1186..d43b606f1 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.h +++ b/src/video_core/vulkan_common/vulkan_wrapper.h @@ -193,15 +193,16 @@ struct DeviceDispatch : InstanceDispatch { PFN_vkBeginCommandBuffer vkBeginCommandBuffer{}; PFN_vkBindBufferMemory vkBindBufferMemory{}; PFN_vkBindImageMemory vkBindImageMemory{}; + PFN_vkCmdBeginDebugUtilsLabelEXT vkCmdBeginDebugUtilsLabelEXT{}; PFN_vkCmdBeginQuery vkCmdBeginQuery{}; PFN_vkCmdBeginRenderPass vkCmdBeginRenderPass{}; PFN_vkCmdBeginTransformFeedbackEXT vkCmdBeginTransformFeedbackEXT{}; - PFN_vkCmdBeginDebugUtilsLabelEXT vkCmdBeginDebugUtilsLabelEXT{}; PFN_vkCmdBindDescriptorSets vkCmdBindDescriptorSets{}; PFN_vkCmdBindIndexBuffer vkCmdBindIndexBuffer{}; PFN_vkCmdBindPipeline vkCmdBindPipeline{}; PFN_vkCmdBindTransformFeedbackBuffersEXT vkCmdBindTransformFeedbackBuffersEXT{}; PFN_vkCmdBindVertexBuffers vkCmdBindVertexBuffers{}; + PFN_vkCmdBindVertexBuffers2EXT vkCmdBindVertexBuffers2EXT{}; PFN_vkCmdBlitImage vkCmdBlitImage{}; PFN_vkCmdClearAttachments vkCmdClearAttachments{}; PFN_vkCmdCopyBuffer vkCmdCopyBuffer{}; @@ -211,35 +212,35 @@ struct DeviceDispatch : InstanceDispatch { PFN_vkCmdDispatch vkCmdDispatch{}; PFN_vkCmdDraw vkCmdDraw{}; PFN_vkCmdDrawIndexed vkCmdDrawIndexed{}; + PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT{}; PFN_vkCmdEndQuery vkCmdEndQuery{}; PFN_vkCmdEndRenderPass vkCmdEndRenderPass{}; PFN_vkCmdEndTransformFeedbackEXT vkCmdEndTransformFeedbackEXT{}; - PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT{}; PFN_vkCmdFillBuffer vkCmdFillBuffer{}; PFN_vkCmdPipelineBarrier vkCmdPipelineBarrier{}; PFN_vkCmdPushConstants vkCmdPushConstants{}; + PFN_vkCmdPushDescriptorSetWithTemplateKHR vkCmdPushDescriptorSetWithTemplateKHR{}; + PFN_vkCmdResolveImage vkCmdResolveImage{}; PFN_vkCmdSetBlendConstants vkCmdSetBlendConstants{}; + PFN_vkCmdSetCullModeEXT vkCmdSetCullModeEXT{}; PFN_vkCmdSetDepthBias vkCmdSetDepthBias{}; PFN_vkCmdSetDepthBounds vkCmdSetDepthBounds{}; - PFN_vkCmdSetEvent vkCmdSetEvent{}; - PFN_vkCmdSetScissor vkCmdSetScissor{}; - PFN_vkCmdSetStencilCompareMask vkCmdSetStencilCompareMask{}; - PFN_vkCmdSetStencilReference vkCmdSetStencilReference{}; - PFN_vkCmdSetStencilWriteMask vkCmdSetStencilWriteMask{}; - PFN_vkCmdSetViewport vkCmdSetViewport{}; - PFN_vkCmdWaitEvents vkCmdWaitEvents{}; - PFN_vkCmdBindVertexBuffers2EXT vkCmdBindVertexBuffers2EXT{}; - PFN_vkCmdSetCullModeEXT vkCmdSetCullModeEXT{}; PFN_vkCmdSetDepthBoundsTestEnableEXT vkCmdSetDepthBoundsTestEnableEXT{}; PFN_vkCmdSetDepthCompareOpEXT vkCmdSetDepthCompareOpEXT{}; PFN_vkCmdSetDepthTestEnableEXT vkCmdSetDepthTestEnableEXT{}; PFN_vkCmdSetDepthWriteEnableEXT vkCmdSetDepthWriteEnableEXT{}; + PFN_vkCmdSetEvent vkCmdSetEvent{}; PFN_vkCmdSetFrontFaceEXT vkCmdSetFrontFaceEXT{}; PFN_vkCmdSetPrimitiveTopologyEXT vkCmdSetPrimitiveTopologyEXT{}; + PFN_vkCmdSetScissor vkCmdSetScissor{}; + PFN_vkCmdSetStencilCompareMask vkCmdSetStencilCompareMask{}; PFN_vkCmdSetStencilOpEXT vkCmdSetStencilOpEXT{}; + PFN_vkCmdSetStencilReference vkCmdSetStencilReference{}; PFN_vkCmdSetStencilTestEnableEXT vkCmdSetStencilTestEnableEXT{}; + PFN_vkCmdSetStencilWriteMask vkCmdSetStencilWriteMask{}; PFN_vkCmdSetVertexInputEXT vkCmdSetVertexInputEXT{}; - PFN_vkCmdResolveImage vkCmdResolveImage{}; + PFN_vkCmdSetViewport vkCmdSetViewport{}; + PFN_vkCmdWaitEvents vkCmdWaitEvents{}; PFN_vkCreateBuffer vkCreateBuffer{}; PFN_vkCreateBufferView vkCreateBufferView{}; PFN_vkCreateCommandPool vkCreateCommandPool{}; @@ -990,6 +991,12 @@ public: dynamic_offsets.size(), dynamic_offsets.data()); } + void PushDescriptorSetWithTemplateKHR(VkDescriptorUpdateTemplateKHR update_template, + VkPipelineLayout layout, u32 set, + const void* data) const noexcept { + dld->vkCmdPushDescriptorSetWithTemplateKHR(handle, update_template, layout, set, data); + } + void BindPipeline(VkPipelineBindPoint bind_point, VkPipeline pipeline) const noexcept { dld->vkCmdBindPipeline(handle, bind_point, pipeline); } -- cgit v1.2.3 From fcff19e0fa3d21130bc7b6cd50a10db102b5d4d7 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Thu, 17 Jun 2021 23:12:41 -0400 Subject: shaders: Allow shader notify when async shaders is disabled --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 8 ++++---- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 12 +++++------- 2 files changed, 9 insertions(+), 11 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index b8b24dd3d..8aaadccc4 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -430,10 +430,10 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( previous_program = &program; } auto* const thread_worker{build_in_parallel ? workers.get() : nullptr}; - VideoCore::ShaderNotify* const notify{build_in_parallel ? &shader_notify : nullptr}; - return std::make_unique( - device, texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker, - thread_worker, notify, sources, infos, key.xfb_enabled != 0 ? &key.xfb_state : nullptr); + return std::make_unique(device, texture_cache, buffer_cache, gpu_memory, + maxwell3d, program_manager, state_tracker, + thread_worker, &shader_notify, sources, infos, + key.xfb_enabled != 0 ? &key.xfb_state : nullptr); } catch (Shader::Exception& exception) { LOG_ERROR(Render_OpenGL, "{}", exception.what()); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index dc028306a..e83628c13 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -529,11 +529,10 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( previous_stage = &program; } Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; - VideoCore::ShaderNotify* const notify{build_in_parallel ? &shader_notify : nullptr}; - return std::make_unique(maxwell3d, gpu_memory, scheduler, buffer_cache, - texture_cache, notify, device, descriptor_pool, - update_descriptor_queue, thread_worker, - render_pass_cache, key, std::move(modules), infos); + return std::make_unique( + maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, &shader_notify, device, + descriptor_pool, update_descriptor_queue, thread_worker, render_pass_cache, key, + std::move(modules), infos); } catch (const Shader::Exception& exception) { LOG_ERROR(Render_Vulkan, "{}", exception.what()); @@ -596,9 +595,8 @@ std::unique_ptr PipelineCache::CreateComputePipeline( spv_module.SetObjectNameEXT(name.c_str()); } Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; - VideoCore::ShaderNotify* const notify{build_in_parallel ? &shader_notify : nullptr}; return std::make_unique(device, descriptor_pool, update_descriptor_queue, - thread_worker, notify, program.info, + thread_worker, &shader_notify, program.info, std::move(spv_module)); } catch (const Shader::Exception& exception) { -- cgit v1.2.3 From 838d7e4ca59b79dc9a8dd727a12dfba00e73242c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 18 Jun 2021 03:22:00 -0300 Subject: buffer_cache: Fix size reductions not having in mind bind sizes A buffer binding can change between shaders without changing the shaders. This lead to outdated bindings on OpenGL. --- src/video_core/buffer_cache/buffer_cache.h | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index d004199ba..a75de4384 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -405,8 +405,6 @@ private: u32 written_compute_texture_buffers = 0; u32 image_compute_texture_buffers = 0; - std::array fast_bound_uniform_buffers{}; - std::array uniform_cache_hits{}; std::array uniform_cache_shots{}; @@ -416,6 +414,10 @@ private: std::conditional_t, Empty> dirty_uniform_buffers{}; + std::conditional_t, Empty> fast_bound_uniform_buffers{}; + std::conditional_t, NUM_STAGES>, Empty> + uniform_buffer_binding_sizes{}; std::vector cached_write_buffer_ids; @@ -684,6 +686,7 @@ void BufferCache

::SetUniformBuffersState(const std::array& m fast_bound_uniform_buffers.fill(0); } dirty_uniform_buffers.fill(~u32{0}); + uniform_buffer_binding_sizes.fill({}); } } enabled_uniform_buffer_masks = mask; @@ -1016,14 +1019,18 @@ void BufferCache

::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 TouchBuffer(buffer); const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && size <= uniform_buffer_skip_cache_size && - !buffer.IsRegionGpuModified(cpu_addr, size); + !buffer.IsRegionGpuModified(cpu_addr, size) && false; if (use_fast_buffer) { if constexpr (IS_OPENGL) { if (runtime.HasFastBufferSubData()) { // Fast path for Nvidia - if (!HasFastUniformBufferBound(stage, binding_index)) { + const bool should_fast_bind = + !HasFastUniformBufferBound(stage, binding_index) || + uniform_buffer_binding_sizes[stage][binding_index] != size; + if (should_fast_bind) { // We only have to bind when the currently bound buffer is not the fast version fast_bound_uniform_buffers[stage] |= 1U << binding_index; + uniform_buffer_binding_sizes[stage][binding_index] = size; runtime.BindFastUniformBuffer(stage, binding_index, size); } const auto span = ImmediateBufferWithData(cpu_addr, size); @@ -1033,6 +1040,7 @@ void BufferCache

::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 } if constexpr (IS_OPENGL) { fast_bound_uniform_buffers[stage] |= 1U << binding_index; + uniform_buffer_binding_sizes[stage][binding_index] = size; } // Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan const std::span span = runtime.BindMappedUniformBuffer(stage, binding_index, size); @@ -1046,9 +1054,13 @@ void BufferCache

::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 } ++uniform_cache_shots[0]; - if (!needs_bind && !HasFastUniformBufferBound(stage, binding_index)) { - // Skip binding if it's not needed and if the bound buffer is not the fast version - // This exists to avoid instances where the fast buffer is bound and a GPU write happens + // Skip binding if it's not needed and if the bound buffer is not the fast version + // This exists to avoid instances where the fast buffer is bound and a GPU write happens + needs_bind |= HasFastUniformBufferBound(stage, binding_index); + if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { + needs_bind |= uniform_buffer_binding_sizes[stage][binding_index] != size; + } + if (!needs_bind) { return; } const u32 offset = buffer.Offset(cpu_addr); @@ -1060,6 +1072,9 @@ void BufferCache

::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 const bool is_copy_bind = offset != 0 && !runtime.SupportsNonZeroUniformOffset(); dirty_uniform_buffers[stage] |= (is_copy_bind ? 1U : 0U) << index; } + if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { + uniform_buffer_binding_sizes[stage][binding_index] = size; + } if constexpr (NEEDS_BIND_UNIFORM_INDEX) { runtime.BindUniformBuffer(stage, binding_index, buffer, offset, size); } else { @@ -1725,6 +1740,7 @@ template void BufferCache

::NotifyBufferDeletion() { if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { dirty_uniform_buffers.fill(~u32{0}); + uniform_buffer_binding_sizes.fill({}); } auto& flags = maxwell3d.dirty.flags; flags[Dirty::IndexBuffer] = true; -- cgit v1.2.3 From df9b7e18f5c5bab84cc8c38214a0e1e9e9506bd4 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 19 Jun 2021 17:28:46 -0300 Subject: buffer_cache: Fix debugging leftover --- src/video_core/buffer_cache/buffer_cache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index a75de4384..cb9c69baf 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -1019,7 +1019,7 @@ void BufferCache

::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 TouchBuffer(buffer); const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && size <= uniform_buffer_skip_cache_size && - !buffer.IsRegionGpuModified(cpu_addr, size) && false; + !buffer.IsRegionGpuModified(cpu_addr, size); if (use_fast_buffer) { if constexpr (IS_OPENGL) { if (runtime.HasFastBufferSubData()) { -- cgit v1.2.3 From 218dedca1f8572bc0e43f8e7ea577f4ece28c4c2 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 20 Jun 2021 02:00:38 -0300 Subject: gl_graphics_pipeline: Port optimizations from Vulkan pipelines --- .../renderer_opengl/gl_graphics_pipeline.cpp | 180 ++++++++++++++------- .../renderer_opengl/gl_graphics_pipeline.h | 18 ++- 2 files changed, 141 insertions(+), 57 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index 8d11fbc55..6b62fa1da 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -15,6 +15,12 @@ #include "video_core/shader_notify.h" #include "video_core/texture_cache/texture_cache.h" +#if defined(_MSC_VER) && defined(NDEBUG) +#define LAMBDA_FORCEINLINE [[msvc::forceinline]] +#else +#define LAMBDA_FORCEINLINE +#endif + namespace OpenGL { namespace { using Shader::ImageBufferDescriptor; @@ -98,13 +104,76 @@ std::pair TransformFeedbackEnum(u8 location) { return {GL_POSITION, 0}; } -struct Spec { +template +bool Passes(const std::array& stage_infos, u32 enabled_mask) { + for (size_t stage = 0; stage < stage_infos.size(); ++stage) { + if (!Spec::enabled_stages[stage] && ((enabled_mask >> stage) & 1) != 0) { + return false; + } + const auto& info{stage_infos[stage]}; + if constexpr (!Spec::has_storage_buffers) { + if (!info.storage_buffers_descriptors.empty()) { + return false; + } + } + if constexpr (!Spec::has_texture_buffers) { + if (!info.texture_buffer_descriptors.empty()) { + return false; + } + } + if constexpr (!Spec::has_image_buffers) { + if (!info.image_buffer_descriptors.empty()) { + return false; + } + } + if constexpr (!Spec::has_images) { + if (!info.image_descriptors.empty()) { + return false; + } + } + } + return true; +} + +using ConfigureFuncPtr = void (*)(GraphicsPipeline*, bool); + +template +ConfigureFuncPtr FindSpec(const std::array& stage_infos, u32 enabled_mask) { + if constexpr (sizeof...(Specs) > 0) { + if (!Passes(stage_infos, enabled_mask)) { + return FindSpec(stage_infos, enabled_mask); + } + } + return GraphicsPipeline::MakeConfigureSpecFunc(); +} + +struct SimpleVertexFragmentSpec { + static constexpr std::array enabled_stages{true, false, false, false, true}; + static constexpr bool has_storage_buffers = false; + static constexpr bool has_texture_buffers = false; + static constexpr bool has_image_buffers = false; + static constexpr bool has_images = false; +}; + +struct SimpleVertexSpec { + static constexpr std::array enabled_stages{true, false, false, false, false}; + static constexpr bool has_storage_buffers = false; + static constexpr bool has_texture_buffers = false; + static constexpr bool has_image_buffers = false; + static constexpr bool has_images = false; +}; + +struct DefaultSpec { static constexpr std::array enabled_stages{true, true, true, true, true}; static constexpr bool has_storage_buffers = true; static constexpr bool has_texture_buffers = true; static constexpr bool has_image_buffers = true; static constexpr bool has_images = true; }; + +ConfigureFuncPtr ConfigureFunc(const std::array& infos, u32 enabled_mask) { + return FindSpec(infos, enabled_mask); +} } // Anonymous namespace size_t GraphicsPipelineKey::Hash() const noexcept { @@ -129,8 +198,52 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c if (shader_notify) { shader_notify->MarkShaderBuilding(); } - std::ranges::transform(infos, stage_infos.begin(), - [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); + u32 num_textures{}; + u32 num_images{}; + u32 num_storage_buffers{}; + for (size_t stage = 0; stage < base_uniform_bindings.size(); ++stage) { + auto& info{stage_infos[stage]}; + if (infos[stage]) { + info = *infos[stage]; + enabled_stages_mask |= 1u << stage; + } + if (stage < 4) { + base_uniform_bindings[stage + 1] = base_uniform_bindings[stage]; + base_storage_bindings[stage + 1] = base_storage_bindings[stage]; + + base_uniform_bindings[stage + 1] += AccumulateCount(info.constant_buffer_descriptors); + base_storage_bindings[stage + 1] += AccumulateCount(info.storage_buffers_descriptors); + } + enabled_uniform_buffer_masks[stage] = info.constant_buffer_mask; + std::ranges::copy(info.constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin()); + + const u32 num_tex_buffer_bindings{AccumulateCount(info.texture_buffer_descriptors)}; + num_texture_buffers[stage] += num_tex_buffer_bindings; + num_textures += num_tex_buffer_bindings; + + const u32 num_img_buffers_bindings{AccumulateCount(info.image_buffer_descriptors)}; + num_image_buffers[stage] += num_img_buffers_bindings; + num_images += num_img_buffers_bindings; + + num_textures += AccumulateCount(info.texture_descriptors); + num_images += AccumulateCount(info.image_descriptors); + num_storage_buffers += AccumulateCount(info.storage_buffers_descriptors); + + writes_global_memory |= std::ranges::any_of( + info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; }); + } + ASSERT(num_textures <= MAX_TEXTURES); + ASSERT(num_images <= MAX_IMAGES); + + const bool assembly_shaders{assembly_programs[0].handle != 0}; + use_storage_buffers = + !assembly_shaders || num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks(); + writes_global_memory &= !use_storage_buffers; + configure_func = ConfigureFunc(stage_infos, enabled_stages_mask); + + if (assembly_shaders && xfb_state) { + GenerateTransformFeedbackState(*xfb_state); + } auto func{[this, device, sources, shader_notify, xfb_state](ShaderContext::Context*) mutable { if (!device.UseAssemblyShaders()) { program.handle = glCreateProgram(); @@ -142,7 +255,6 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c } if (device.UseAssemblyShaders()) { assembly_programs[stage] = CompileProgram(code, AssemblyStage(stage)); - enabled_stages_mask |= (assembly_programs[stage].handle != 0 ? 1 : 0) << stage; } else { AttachShader(Stage(stage), program.handle, code); } @@ -150,49 +262,6 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c if (!device.UseAssemblyShaders()) { LinkProgram(program.handle); } - u32 num_textures{}; - u32 num_images{}; - u32 num_storage_buffers{}; - for (size_t stage = 0; stage < base_uniform_bindings.size(); ++stage) { - const auto& info{stage_infos[stage]}; - if (stage < 4) { - base_uniform_bindings[stage + 1] = base_uniform_bindings[stage]; - base_storage_bindings[stage + 1] = base_storage_bindings[stage]; - - base_uniform_bindings[stage + 1] += - AccumulateCount(info.constant_buffer_descriptors); - base_storage_bindings[stage + 1] += - AccumulateCount(info.storage_buffers_descriptors); - } - enabled_uniform_buffer_masks[stage] = info.constant_buffer_mask; - std::ranges::copy(info.constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin()); - - const u32 num_tex_buffer_bindings{AccumulateCount(info.texture_buffer_descriptors)}; - num_texture_buffers[stage] += num_tex_buffer_bindings; - num_textures += num_tex_buffer_bindings; - - const u32 num_img_buffers_bindings{AccumulateCount(info.image_buffer_descriptors)}; - num_image_buffers[stage] += num_img_buffers_bindings; - num_images += num_img_buffers_bindings; - - num_textures += AccumulateCount(info.texture_descriptors); - num_images += AccumulateCount(info.image_descriptors); - num_storage_buffers += AccumulateCount(info.storage_buffers_descriptors); - - writes_global_memory |= std::ranges::any_of( - info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; }); - } - ASSERT(num_textures <= MAX_TEXTURES); - ASSERT(num_images <= MAX_IMAGES); - - const bool assembly_shaders{assembly_programs[0].handle != 0}; - use_storage_buffers = - !assembly_shaders || num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks(); - writes_global_memory &= !use_storage_buffers; - - if (assembly_shaders && xfb_state) { - GenerateTransformFeedbackState(*xfb_state); - } if (shader_notify) { shader_notify->MarkShaderComplete(); } @@ -205,7 +274,8 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c } } -void GraphicsPipeline::Configure(bool is_indexed) { +template +void GraphicsPipeline::ConfigureImpl(bool is_indexed) { std::array image_view_ids; std::array image_view_indices; std::array samplers; @@ -221,7 +291,7 @@ void GraphicsPipeline::Configure(bool is_indexed) { const auto& regs{maxwell3d.regs}; const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; - const auto config_stage{[&](size_t stage) { + const auto config_stage{[&](size_t stage) LAMBDA_FORCEINLINE { const Shader::Info& info{stage_infos[stage]}; buffer_cache.UnbindGraphicsStorageBuffers(stage); if constexpr (Spec::has_storage_buffers) { @@ -311,7 +381,7 @@ void GraphicsPipeline::Configure(bool is_indexed) { state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); ImageId* texture_buffer_index{image_view_ids.data()}; - const auto bind_stage_info{[&](size_t stage) { + const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE { size_t index{}; const auto add_buffer{[&](const auto& desc) { constexpr bool is_image = std::is_same_v; @@ -430,6 +500,11 @@ void GraphicsPipeline::Configure(bool is_indexed) { } } +void GraphicsPipeline::ConfigureTransformFeedbackImpl() const { + glTransformFeedbackStreamAttribsNV(num_xfb_attribs, xfb_attribs.data(), num_xfb_strides, + xfb_streams.data(), GL_INTERLEAVED_ATTRIBS); +} + void GraphicsPipeline::GenerateTransformFeedbackState( const VideoCommon::TransformFeedbackState& xfb_state) { // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal @@ -475,9 +550,4 @@ void GraphicsPipeline::GenerateTransformFeedbackState( num_xfb_strides = static_cast(current_stream - xfb_streams.data()); } -void GraphicsPipeline::ConfigureTransformFeedbackImpl() const { - glTransformFeedbackStreamAttribsNV(num_xfb_attribs, xfb_attribs.data(), num_xfb_strides, - xfb_streams.data(), GL_INTERLEAVED_ATTRIBS); -} - } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index 58deafd3c..a3546daa8 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -75,7 +75,9 @@ public: const std::array& infos, const VideoCommon::TransformFeedbackState* xfb_state); - void Configure(bool is_indexed); + void Configure(bool is_indexed) { + configure_func(this, is_indexed); + } void ConfigureTransformFeedback() const { if (num_xfb_attribs != 0) { @@ -91,11 +93,21 @@ public: return is_built.load(std::memory_order::relaxed); } + template + static auto MakeConfigureSpecFunc() { + return [](GraphicsPipeline* pipeline, bool is_indexed) { + pipeline->ConfigureImpl(is_indexed); + }; + } + private: - void GenerateTransformFeedbackState(const VideoCommon::TransformFeedbackState& xfb_state); + template + void ConfigureImpl(bool is_indexed); void ConfigureTransformFeedbackImpl() const; + void GenerateTransformFeedbackState(const VideoCommon::TransformFeedbackState& xfb_state); + TextureCache& texture_cache; BufferCache& buffer_cache; Tegra::MemoryManager& gpu_memory; @@ -103,6 +115,8 @@ private: ProgramManager& program_manager; StateTracker& state_tracker; + void (*configure_func)(GraphicsPipeline*, bool){}; + OGLProgram program; std::array assembly_programs; u32 enabled_stages_mask{}; -- cgit v1.2.3 From f5db8c74405c93b52efbdef318790bd9ec4661c7 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 20 Jun 2021 02:23:50 -0300 Subject: gl_shader_cache: Check previous pipeline before checking hash map Port optimization from Vulkan. --- .../renderer_opengl/gl_graphics_pipeline.cpp | 33 ++++++++++------------ .../renderer_opengl/gl_graphics_pipeline.h | 9 ++++-- src/video_core/renderer_opengl/gl_shader_cache.cpp | 20 +++++++++---- src/video_core/renderer_opengl/gl_shader_cache.h | 5 +++- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 3 +- 5 files changed, 41 insertions(+), 29 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index 6b62fa1da..92974ba08 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -184,17 +184,15 @@ bool GraphicsPipelineKey::operator==(const GraphicsPipelineKey& rhs) const noexc return std::memcmp(this, &rhs, Size()) == 0; } -GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_cache_, - BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, - Tegra::Engines::Maxwell3D& maxwell3d_, - ProgramManager& program_manager_, StateTracker& state_tracker_, - ShaderWorker* thread_worker, - VideoCore::ShaderNotify* shader_notify, - std::array sources, - const std::array& infos, - const VideoCommon::TransformFeedbackState* xfb_state) - : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, - maxwell3d{maxwell3d_}, program_manager{program_manager_}, state_tracker{state_tracker_} { +GraphicsPipeline::GraphicsPipeline( + const Device& device, TextureCache& texture_cache_, BufferCache& buffer_cache_, + Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, + ProgramManager& program_manager_, StateTracker& state_tracker_, ShaderWorker* thread_worker, + VideoCore::ShaderNotify* shader_notify, std::array sources, + const std::array& infos, const GraphicsPipelineKey& key_) + : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, + gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, program_manager{program_manager_}, + state_tracker{state_tracker_}, key{key_} { if (shader_notify) { shader_notify->MarkShaderBuilding(); } @@ -241,10 +239,10 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c writes_global_memory &= !use_storage_buffers; configure_func = ConfigureFunc(stage_infos, enabled_stages_mask); - if (assembly_shaders && xfb_state) { - GenerateTransformFeedbackState(*xfb_state); + if (assembly_shaders && key.xfb_enabled) { + GenerateTransformFeedbackState(); } - auto func{[this, device, sources, shader_notify, xfb_state](ShaderContext::Context*) mutable { + auto func{[this, device, sources, shader_notify](ShaderContext::Context*) mutable { if (!device.UseAssemblyShaders()) { program.handle = glCreateProgram(); } @@ -505,15 +503,14 @@ void GraphicsPipeline::ConfigureTransformFeedbackImpl() const { xfb_streams.data(), GL_INTERLEAVED_ATTRIBS); } -void GraphicsPipeline::GenerateTransformFeedbackState( - const VideoCommon::TransformFeedbackState& xfb_state) { +void GraphicsPipeline::GenerateTransformFeedbackState() { // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal // when this is required. GLint* cursor{xfb_attribs.data()}; GLint* current_stream{xfb_streams.data()}; for (size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) { - const auto& layout = xfb_state.layouts[feedback]; + const auto& layout = key.xfb_state.layouts[feedback]; UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding"); if (layout.varying_count == 0) { continue; @@ -528,7 +525,7 @@ void GraphicsPipeline::GenerateTransformFeedbackState( } ++current_stream; - const auto& locations = xfb_state.varyings[feedback]; + const auto& locations = key.xfb_state.varyings[feedback]; std::optional current_index; for (u32 offset = 0; offset < layout.varying_count; ++offset) { const u8 location = locations[offset]; diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index a3546daa8..a033d4a95 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -73,7 +73,7 @@ public: ShaderWorker* thread_worker, VideoCore::ShaderNotify* shader_notify, std::array sources, const std::array& infos, - const VideoCommon::TransformFeedbackState* xfb_state); + const GraphicsPipelineKey& key_); void Configure(bool is_indexed) { configure_func(this, is_indexed); @@ -85,6 +85,10 @@ public: } } + [[nodiscard]] const GraphicsPipelineKey& Key() const noexcept { + return key; + } + [[nodiscard]] bool WritesGlobalMemory() const noexcept { return writes_global_memory; } @@ -106,7 +110,7 @@ private: void ConfigureTransformFeedbackImpl() const; - void GenerateTransformFeedbackState(const VideoCommon::TransformFeedbackState& xfb_state); + void GenerateTransformFeedbackState(); TextureCache& texture_cache; BufferCache& buffer_cache; @@ -114,6 +118,7 @@ private: Tegra::Engines::Maxwell3D& maxwell3d; ProgramManager& program_manager; StateTracker& state_tracker; + const GraphicsPipelineKey key; void (*configure_func)(GraphicsPipeline*, bool){}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 8aaadccc4..c36b0d8cf 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -298,6 +298,7 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, GraphicsPipeline* ShaderCache::CurrentGraphicsPipeline() { if (!RefreshStages(graphics_key.unique_hashes)) { + current_pipeline = nullptr; return nullptr; } const auto& regs{maxwell3d.regs}; @@ -313,15 +314,23 @@ GraphicsPipeline* ShaderCache::CurrentGraphicsPipeline() { if (graphics_key.xfb_enabled) { SetXfbState(graphics_key.xfb_state, regs); } + if (current_pipeline && graphics_key == current_pipeline->Key()) { + return current_pipeline->IsBuilt() ? current_pipeline : nullptr; + } + return CurrentGraphicsPipelineSlowPath(); +} + +GraphicsPipeline* ShaderCache::CurrentGraphicsPipelineSlowPath() { const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)}; - auto& program{pair->second}; + auto& pipeline{pair->second}; if (is_new) { - program = CreateGraphicsPipeline(); + pipeline = CreateGraphicsPipeline(); } - if (!program || !program->IsBuilt()) { + current_pipeline = pipeline.get(); + if (!pipeline || !pipeline->IsBuilt()) { return nullptr; } - return program.get(); + return pipeline.get(); } ComputePipeline* ShaderCache::CurrentComputePipeline() { @@ -432,8 +441,7 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( auto* const thread_worker{build_in_parallel ? workers.get() : nullptr}; return std::make_unique(device, texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker, - thread_worker, &shader_notify, sources, infos, - key.xfb_enabled != 0 ? &key.xfb_state : nullptr); + thread_worker, &shader_notify, sources, infos, key); } catch (Shader::Exception& exception) { LOG_ERROR(Render_OpenGL, "{}", exception.what()); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index ff5707119..16873fcec 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -53,6 +53,8 @@ public: [[nodiscard]] ComputePipeline* CurrentComputePipeline(); private: + GraphicsPipeline* CurrentGraphicsPipelineSlowPath(); + std::unique_ptr CreateGraphicsPipeline(); std::unique_ptr CreateGraphicsPipeline( @@ -75,9 +77,10 @@ private: ProgramManager& program_manager; StateTracker& state_tracker; VideoCore::ShaderNotify& shader_notify; + const bool use_asynchronous_shaders; GraphicsPipelineKey graphics_key{}; - const bool use_asynchronous_shaders; + GraphicsPipeline* current_pipeline{}; ShaderContext::ShaderPools main_pools; std::unordered_map> graphics_cache; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 42da2960b..efe5a7ed8 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -146,12 +146,11 @@ private: BufferCache& buffer_cache; TextureCache& texture_cache; VideoCore::ShaderNotify& shader_notify; + bool use_asynchronous_shaders{}; GraphicsPipelineCacheKey graphics_key{}; GraphicsPipeline* current_pipeline{}; - bool use_asynchronous_shaders{}; - std::unordered_map> compute_cache; std::unordered_map> graphics_cache; -- cgit v1.2.3 From 9bd05313849f76fc64406d5ebf3aadf39fa3bfde Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 20 Jun 2021 02:35:30 -0300 Subject: gl_graphics_pipeline: Inline hash and operator== key functions --- src/video_core/renderer_opengl/gl_graphics_pipeline.cpp | 10 ---------- src/video_core/renderer_opengl/gl_graphics_pipeline.h | 10 ++++++++-- 2 files changed, 8 insertions(+), 12 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index 92974ba08..ad61a17a5 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -3,9 +3,7 @@ // Refer to the license.txt file included. #include -#include -#include "common/cityhash.h" #include "common/thread_worker.h" #include "shader_recompiler/shader_info.h" #include "video_core/renderer_opengl/gl_graphics_pipeline.h" @@ -176,14 +174,6 @@ ConfigureFuncPtr ConfigureFunc(const std::array& infos, u32 ena } } // Anonymous namespace -size_t GraphicsPipelineKey::Hash() const noexcept { - return static_cast(Common::CityHash64(reinterpret_cast(this), Size())); -} - -bool GraphicsPipelineKey::operator==(const GraphicsPipelineKey& rhs) const noexcept { - return std::memcmp(this, &rhs, Size()) == 0; -} - GraphicsPipeline::GraphicsPipeline( const Device& device, TextureCache& texture_cache_, BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index a033d4a95..f82d712f8 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -5,10 +5,12 @@ #pragma once #include +#include #include #include #include "common/bit_field.h" +#include "common/cityhash.h" #include "common/common_types.h" #include "shader_recompiler/shader_info.h" #include "video_core/engines/maxwell_3d.h" @@ -44,9 +46,13 @@ struct GraphicsPipelineKey { std::array padding; VideoCommon::TransformFeedbackState xfb_state; - size_t Hash() const noexcept; + size_t Hash() const noexcept { + return static_cast(Common::CityHash64(reinterpret_cast(this), Size())); + } - bool operator==(const GraphicsPipelineKey&) const noexcept; + bool operator==(const GraphicsPipelineKey& rhs) const noexcept { + return std::memcmp(this, &rhs, Size()) == 0; + } bool operator!=(const GraphicsPipelineKey& rhs) const noexcept { return !operator==(rhs); -- cgit v1.2.3 From 3877918e9657bcde160080aecc1821cf8cb50ea4 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 20 Jun 2021 17:09:50 -0300 Subject: gl_graphics_pipeline: Fix assembly shaders check for transform feedbacks --- src/video_core/renderer_opengl/gl_graphics_pipeline.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index ad61a17a5..a93b03cf7 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -229,7 +229,7 @@ GraphicsPipeline::GraphicsPipeline( writes_global_memory &= !use_storage_buffers; configure_func = ConfigureFunc(stage_infos, enabled_stages_mask); - if (assembly_shaders && key.xfb_enabled) { + if (key.xfb_enabled && device.UseAssemblyShaders()) { GenerateTransformFeedbackState(); } auto func{[this, device, sources, shader_notify](ShaderContext::Context*) mutable { -- cgit v1.2.3 From fb166b5ff4b42279b2c63c69f5b5a35feafa259e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 23 Jun 2021 01:39:21 -0300 Subject: shader: Emulate 64-bit integers when not supported Useful for mobile and Intel Xe devices. --- src/video_core/renderer_opengl/gl_device.cpp | 1 + src/video_core/renderer_opengl/gl_device.h | 5 +++++ src/video_core/renderer_opengl/gl_shader_cache.cpp | 2 +- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 2 +- src/video_core/vulkan_common/vulkan_device.h | 5 +++++ 5 files changed, 13 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 5838fc02f..b1b5ba1ab 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -159,6 +159,7 @@ Device::Device() { has_debugging_tool_attached = IsDebugToolAttached(extensions); has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float"); has_nv_gpu_shader_5 = GLAD_GL_NV_gpu_shader5; + has_shader_int64 = HasExtension(extensions, "GL_ARB_gpu_shader_int64"); has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float; has_sparse_texture_2 = GLAD_GL_ARB_sparse_texture2; warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel; diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 0c9d6fe31..0bd277d38 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -124,6 +124,10 @@ public: return has_nv_gpu_shader_5; } + bool HasShaderInt64() const { + return has_shader_int64; + } + bool HasAmdShaderHalfFloat() const { return has_amd_shader_half_float; } @@ -172,6 +176,7 @@ private: bool use_driver_cache{}; bool has_depth_buffer_float{}; bool has_nv_gpu_shader_5{}; + bool has_shader_int64{}; bool has_amd_shader_half_float{}; bool has_sparse_texture_2{}; bool warp_size_potentially_larger_than_guest{}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index c36b0d8cf..f2f18b18a 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -211,7 +211,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo }, host_info{ .support_float16 = false, - .support_int64 = true, + .support_int64 = device.HasShaderInt64(), } { if (use_asynchronous_shaders) { workers = CreateWorkers(); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index e83628c13..ec06b124f 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -315,7 +315,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw }; host_info = Shader::HostTranslateInfo{ .support_float16 = device.IsFloat16Supported(), - .support_int64 = true, + .support_int64 = device.IsShaderInt64Supported(), }; } diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 49605752d..40d00a52f 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -164,6 +164,11 @@ public: return is_formatless_image_load_supported; } + /// Returns true if shader int64 is supported. + bool IsShaderInt64Supported() const { + return is_shader_int64_supported; + } + /// Returns true if shader int16 is supported. bool IsShaderInt16Supported() const { return is_shader_int16_supported; -- cgit v1.2.3 From 395bed3a0af90a53be44e81eadd06f4931c8e933 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 23 Jun 2021 02:41:00 -0300 Subject: shader: Unify shader stage types --- src/video_core/engines/kepler_compute.cpp | 1 - src/video_core/engines/maxwell_3d.cpp | 1 - src/video_core/engines/maxwell_3d.h | 1 - src/video_core/engines/shader_type.h | 21 --------------------- src/video_core/renderer_opengl/gl_device.cpp | 18 ++++++++++-------- src/video_core/renderer_opengl/gl_device.h | 11 ++++++----- src/video_core/renderer_opengl/gl_rasterizer.cpp | 2 -- src/video_core/renderer_opengl/gl_shader_cache.cpp | 1 - src/video_core/renderer_opengl/gl_shader_cache.h | 1 - src/video_core/renderer_vulkan/maxwell_to_vk.cpp | 15 ++++++++------- src/video_core/renderer_vulkan/maxwell_to_vk.h | 3 ++- .../renderer_vulkan/vk_graphics_pipeline.cpp | 2 +- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 2 -- src/video_core/shader_environment.cpp | 2 +- 14 files changed, 28 insertions(+), 53 deletions(-) delete mode 100644 src/video_core/engines/shader_type.h (limited to 'src/video_core') diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index cae93c470..492b4c5a3 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -8,7 +8,6 @@ #include "core/core.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" #include "video_core/renderer_base.h" diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 103a51fd0..b18b8a02a 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -8,7 +8,6 @@ #include "core/core.h" #include "core/core_timing.h" #include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" #include "video_core/gpu.h" #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 04d5790f6..fc2c36c6b 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -20,7 +20,6 @@ #include "video_core/engines/const_buffer_info.h" #include "video_core/engines/engine_interface.h" #include "video_core/engines/engine_upload.h" -#include "video_core/engines/shader_type.h" #include "video_core/gpu.h" #include "video_core/macro/macro.h" #include "video_core/textures/texture.h" diff --git a/src/video_core/engines/shader_type.h b/src/video_core/engines/shader_type.h deleted file mode 100644 index 49ce5cde5..000000000 --- a/src/video_core/engines/shader_type.h +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include "common/common_types.h" - -namespace Tegra::Engines { - -enum class ShaderType : u32 { - Vertex = 0, - TesselationControl = 1, - TesselationEval = 2, - Geometry = 3, - Fragment = 4, - Compute = 5, -}; -static constexpr std::size_t MaxShaderTypes = 6; - -} // namespace Tegra::Engines diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index b1b5ba1ab..27be347e6 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -17,6 +17,7 @@ #include "common/logging/log.h" #include "common/scope_exit.h" #include "common/settings.h" +#include "shader_recompiler/stage.h" #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_resource_manager.h" @@ -59,16 +60,18 @@ bool HasExtension(std::span extensions, std::string_view return std::ranges::find(extensions, extension) != extensions.end(); } -std::array BuildMaxUniformBuffers() noexcept { - std::array max; - std::ranges::transform(LIMIT_UBOS, max.begin(), - [](GLenum pname) { return GetInteger(pname); }); +std::array BuildMaxUniformBuffers() noexcept { + std::array max; + std::ranges::transform(LIMIT_UBOS, max.begin(), &GetInteger); return max; } bool IsASTCSupported() { - static constexpr std::array targets = {GL_TEXTURE_2D, GL_TEXTURE_2D_ARRAY}; - static constexpr std::array formats = { + static constexpr std::array targets{ + GL_TEXTURE_2D, + GL_TEXTURE_2D_ARRAY, + }; + static constexpr std::array formats{ GL_COMPRESSED_RGBA_ASTC_4x4_KHR, GL_COMPRESSED_RGBA_ASTC_5x4_KHR, GL_COMPRESSED_RGBA_ASTC_5x5_KHR, GL_COMPRESSED_RGBA_ASTC_6x5_KHR, GL_COMPRESSED_RGBA_ASTC_6x6_KHR, GL_COMPRESSED_RGBA_ASTC_8x5_KHR, @@ -84,11 +87,10 @@ bool IsASTCSupported() { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR, }; - static constexpr std::array required_support = { + static constexpr std::array required_support{ GL_VERTEX_TEXTURE, GL_TESS_CONTROL_TEXTURE, GL_TESS_EVALUATION_TEXTURE, GL_GEOMETRY_TEXTURE, GL_FRAGMENT_TEXTURE, GL_COMPUTE_TEXTURE, }; - for (const GLenum target : targets) { for (const GLenum format : formats) { for (const GLenum support : required_support) { diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 0bd277d38..ad7b01b06 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -6,7 +6,7 @@ #include #include "common/common_types.h" -#include "video_core/engines/shader_type.h" +#include "shader_recompiler/stage.h" namespace OpenGL { @@ -16,8 +16,8 @@ public: [[nodiscard]] std::string GetVendorName() const; - u32 GetMaxUniformBuffers(Tegra::Engines::ShaderType shader_type) const noexcept { - return max_uniform_buffers[static_cast(shader_type)]; + u32 GetMaxUniformBuffers(Shader::Stage stage) const noexcept { + return max_uniform_buffers[static_cast(stage)]; } size_t GetUniformBufferAlignment() const { @@ -148,8 +148,7 @@ private: static bool TestVariableAoffi(); static bool TestPreciseBug(); - std::string vendor_name; - std::array max_uniform_buffers{}; + std::array max_uniform_buffers{}; size_t uniform_buffer_alignment{}; size_t shader_storage_alignment{}; u32 max_vertex_attributes{}; @@ -181,6 +180,8 @@ private: bool has_sparse_texture_2{}; bool warp_size_potentially_larger_than_guest{}; bool need_fastmath_off{}; + + std::string vendor_name; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index e3d336f86..0f0d780b5 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -23,7 +23,6 @@ #include "core/memory.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" #include "video_core/memory_manager.h" #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_query_cache.h" @@ -40,7 +39,6 @@ namespace OpenGL { using Maxwell = Tegra::Engines::Maxwell3D::Regs; using GLvec4 = std::array; -using Tegra::Engines::ShaderType; using VideoCore::Surface::PixelFormat; using VideoCore::Surface::SurfaceTarget; using VideoCore::Surface::SurfaceType; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index f2f18b18a..5af9b7745 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -26,7 +26,6 @@ #include "shader_recompiler/profile.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" #include "video_core/memory_manager.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_resource_manager.h" diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 16873fcec..9d5306293 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -17,7 +17,6 @@ #include "shader_recompiler/host_translate_info.h" #include "shader_recompiler/object_pool.h" #include "shader_recompiler/profile.h" -#include "video_core/engines/shader_type.h" #include "video_core/renderer_opengl/gl_compute_pipeline.h" #include "video_core/renderer_opengl/gl_graphics_pipeline.h" #include "video_core/renderer_opengl/gl_shader_context.h" diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 8f0b0b8ec..8f9b9a11a 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -266,19 +266,20 @@ FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with return {device.GetSupportedFormat(tuple.format, usage, format_type), attachable, storage}; } -VkShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage) { +VkShaderStageFlagBits ShaderStage(Shader::Stage stage) { switch (stage) { - case Tegra::Engines::ShaderType::Vertex: + case Shader::Stage::VertexA: + case Shader::Stage::VertexB: return VK_SHADER_STAGE_VERTEX_BIT; - case Tegra::Engines::ShaderType::TesselationControl: + case Shader::Stage::TessellationControl: return VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT; - case Tegra::Engines::ShaderType::TesselationEval: + case Shader::Stage::TessellationEval: return VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT; - case Tegra::Engines::ShaderType::Geometry: + case Shader::Stage::Geometry: return VK_SHADER_STAGE_GEOMETRY_BIT; - case Tegra::Engines::ShaderType::Fragment: + case Shader::Stage::Fragment: return VK_SHADER_STAGE_FRAGMENT_BIT; - case Tegra::Engines::ShaderType::Compute: + case Shader::Stage::Compute: return VK_SHADER_STAGE_COMPUTE_BIT; } UNIMPLEMENTED_MSG("Unimplemented shader stage={}", stage); diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h index 50a599c11..8a9616039 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.h +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h @@ -5,6 +5,7 @@ #pragma once #include "common/common_types.h" +#include "shader_recompiler/stage.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/surface.h" #include "video_core/textures/texture.h" @@ -45,7 +46,7 @@ struct FormatInfo { [[nodiscard]] FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with_srgb, PixelFormat pixel_format); -VkShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage); +VkShaderStageFlagBits ShaderStage(Shader::Stage stage); VkPrimitiveTopology PrimitiveTopology(const Device& device, Maxwell::PrimitiveTopology topology); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 2b59a9d88..9eb353a88 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -737,7 +737,7 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, .pNext = nullptr, .flags = 0, - .stage = MaxwellToVK::ShaderStage(static_cast(stage)), + .stage = MaxwellToVK::ShaderStage(Shader::StageFromIndex(stage)), .module = *spv_modules[stage], .pName = "main", .pSpecializationInfo = nullptr, diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index c57e16c50..f04c3394c 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -58,8 +58,6 @@ struct DrawParams { bool is_indexed; }; -constexpr auto COMPUTE_SHADER_INDEX = static_cast(Tegra::Engines::ShaderType::Compute); - VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t index) { const auto& src = regs.viewport_transform[index]; const float width = src.scale_x * 2.0f; diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp index 6243cd176..d463e2b56 100644 --- a/src/video_core/shader_environment.cpp +++ b/src/video_core/shader_environment.cpp @@ -22,7 +22,7 @@ namespace VideoCommon { constexpr std::array MAGIC_NUMBER{'y', 'u', 'z', 'u', 'c', 'a', 'c', 'h'}; -constexpr u32 CACHE_VERSION = 3; +constexpr u32 CACHE_VERSION = 4; constexpr size_t INST_SIZE = sizeof(u64); -- cgit v1.2.3 From 4f052a1f393d45843eabc237e21757be15f20062 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 23 Jun 2021 03:32:41 -0300 Subject: vk_graphics_pipeline: Implement conservative rendering --- src/video_core/engines/maxwell_3d.h | 7 ++++- .../renderer_vulkan/fixed_pipeline_state.cpp | 1 + .../renderer_vulkan/fixed_pipeline_state.h | 1 + .../renderer_vulkan/vk_graphics_pipeline.cpp | 33 ++++++++++++++++------ src/video_core/vulkan_common/vulkan_device.cpp | 6 ++++ src/video_core/vulkan_common/vulkan_device.h | 6 ++++ 6 files changed, 44 insertions(+), 10 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index fc2c36c6b..da2ded671 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -910,7 +910,11 @@ public: u32 fill_rectangle; - INSERT_PADDING_WORDS_NOINIT(0x8); + INSERT_PADDING_WORDS_NOINIT(0x2); + + u32 conservative_raster_enable; + + INSERT_PADDING_WORDS_NOINIT(0x5); std::array vertex_attrib_format; @@ -1615,6 +1619,7 @@ ASSERT_REG_POSITION(zeta, 0x3F8); ASSERT_REG_POSITION(render_area, 0x3FD); ASSERT_REG_POSITION(clear_flags, 0x43E); ASSERT_REG_POSITION(fill_rectangle, 0x44F); +ASSERT_REG_POSITION(conservative_raster_enable, 0x452); ASSERT_REG_POSITION(vertex_attrib_format, 0x458); ASSERT_REG_POSITION(multisample_sample_locations, 0x478); ASSERT_REG_POSITION(multisample_coverage_to_color, 0x47E); diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 16cef8711..7563dc462 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -87,6 +87,7 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, depth_format.Assign(static_cast(regs.zeta.format)); y_negate.Assign(regs.screen_y_control.y_negate != 0 ? 1 : 0); provoking_vertex_last.Assign(regs.provoking_vertex_last != 0 ? 1 : 0); + conservative_raster_enable.Assign(regs.conservative_raster_enable != 0 ? 1 : 0); for (size_t i = 0; i < regs.rt.size(); ++i) { color_formats[i] = static_cast(regs.rt[i].format); diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index 04f34eb97..66b57b636 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -194,6 +194,7 @@ struct FixedPipelineState { BitField<6, 5, u32> depth_format; BitField<11, 1, u32> y_negate; BitField<12, 1, u32> provoking_vertex_last; + BitField<13, 1, u32> conservative_raster_enable; }; std::array color_formats; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 9eb353a88..70e183e65 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -599,16 +599,9 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { .pScissors = nullptr, }; - const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT provoking_vertex{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT, - .pNext = nullptr, - .provokingVertexMode = key.state.provoking_vertex_last != 0 - ? VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT - : VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT, - }; - const VkPipelineRasterizationStateCreateInfo rasterization_ci{ + VkPipelineRasterizationStateCreateInfo rasterization_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, - .pNext = device.IsExtProvokingVertexSupported() ? &provoking_vertex : nullptr, + .pNext = nullptr, .flags = 0, .depthClampEnable = static_cast(key.state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE), @@ -625,6 +618,28 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { .depthBiasSlopeFactor = 0.0f, .lineWidth = 1.0f, }; + VkPipelineRasterizationConservativeStateCreateInfoEXT conservative_raster{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_CONSERVATIVE_STATE_CREATE_INFO_EXT, + .pNext = nullptr, + .flags = 0, + .conservativeRasterizationMode = key.state.conservative_raster_enable != 0 + ? VK_CONSERVATIVE_RASTERIZATION_MODE_OVERESTIMATE_EXT + : VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT, + .extraPrimitiveOverestimationSize = 0.0f, + }; + VkPipelineRasterizationProvokingVertexStateCreateInfoEXT provoking_vertex{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT, + .pNext = nullptr, + .provokingVertexMode = key.state.provoking_vertex_last != 0 + ? VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT + : VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT, + }; + if (device.IsExtConservativeRasterizationSupported()) { + conservative_raster.pNext = std::exchange(rasterization_ci.pNext, &conservative_raster); + } + if (device.IsExtProvokingVertexSupported()) { + provoking_vertex.pNext = std::exchange(rasterization_ci.pNext, &provoking_vertex); + } const VkPipelineMultisampleStateCreateInfo multisample_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 9d918de8d..7b184d2f8 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -412,6 +412,10 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state"); } + if (!ext_conservative_rasterization) { + LOG_INFO(Render_Vulkan, "Device doesn't support conservative rasterization"); + } + VkPhysicalDeviceProvokingVertexFeaturesEXT provoking_vertex; if (ext_provoking_vertex) { provoking_vertex = { @@ -776,6 +780,8 @@ std::vector Device::LoadExtensions(bool requires_surface) { true); test(ext_tooling_info, VK_EXT_TOOLING_INFO_EXTENSION_NAME, true); test(ext_shader_stencil_export, VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME, true); + test(ext_conservative_rasterization, VK_EXT_CONSERVATIVE_RASTERIZATION_EXTENSION_NAME, + true); test(has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, false); test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false); test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 40d00a52f..a9c0a0e4d 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -264,6 +264,11 @@ public: return ext_shader_stencil_export; } + /// Returns true if the device supports VK_EXT_conservative_rasterization. + bool IsExtConservativeRasterizationSupported() const { + return ext_conservative_rasterization; + } + /// Returns true if the device supports VK_EXT_provoking_vertex. bool IsExtProvokingVertexSupported() const { return ext_provoking_vertex; @@ -374,6 +379,7 @@ private: bool ext_vertex_input_dynamic_state{}; ///< Support for VK_EXT_vertex_input_dynamic_state. bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. bool ext_shader_atomic_int64{}; ///< Support for VK_KHR_shader_atomic_int64. + bool ext_conservative_rasterization{}; ///< Support for VK_EXT_conservative_rasterization. bool ext_provoking_vertex{}; ///< Support for VK_EXT_provoking_vertex. bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. bool has_renderdoc{}; ///< Has RenderDoc attached -- cgit v1.2.3 From 7dafa96ab59892b7f1fbffdb61e4326e6443955f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 24 Jun 2021 02:41:09 -0300 Subject: shader: Rework varyings and implement passthrough geometry shaders Put all varyings into a single std::bitset with helpers to access it. Implement passthrough geometry shaders using host's. --- src/video_core/engines/maxwell_3d.h | 7 ++++++- src/video_core/renderer_opengl/gl_shader_cache.cpp | 7 ++++--- src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 6 ++---- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 16 ++++++++++------ src/video_core/shader_environment.cpp | 10 +++++++++- src/video_core/vulkan_common/vulkan_device.cpp | 6 ++++++ src/video_core/vulkan_common/vulkan_device.h | 6 ++++++ 7 files changed, 43 insertions(+), 15 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index da2ded671..471d5686a 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -961,7 +961,11 @@ public: SamplerIndex sampler_index; - INSERT_PADDING_WORDS_NOINIT(0x25); + INSERT_PADDING_WORDS_NOINIT(0x2); + + std::array gp_passthrough_mask; + + INSERT_PADDING_WORDS_NOINIT(0x1B); u32 depth_test_enable; @@ -1628,6 +1632,7 @@ ASSERT_REG_POSITION(zeta_width, 0x48a); ASSERT_REG_POSITION(zeta_height, 0x48b); ASSERT_REG_POSITION(zeta_depth, 0x48c); ASSERT_REG_POSITION(sampler_index, 0x48D); +ASSERT_REG_POSITION(gp_passthrough_mask, 0x490); ASSERT_REG_POSITION(depth_test_enable, 0x4B3); ASSERT_REG_POSITION(independent_blend_enable, 0x4B9); ASSERT_REG_POSITION(depth_write_enabled, 0x4BA); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 5af9b7745..06e39a503 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -61,10 +61,10 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key, bool glasm_use_storage_buffers, bool use_assembly_shaders) { Shader::RuntimeInfo info; if (previous_program) { - info.previous_stage_stores_generic = previous_program->info.stores_generics; + info.previous_stage_stores = previous_program->info.stores; } else { - // Mark all stores as available - info.previous_stage_stores_generic.flip(); + // Mark all stores as available for vertex shaders + info.previous_stage_stores.mask.set(); } switch (program.stage) { case Shader::Stage::VertexB: @@ -187,6 +187,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_demote_to_helper_invocation = false, .support_int64_atomics = false, .support_derivative_control = device.HasDerivativeControl(), + .support_geometry_shader_passthrough = false, // TODO .support_gl_nv_gpu_shader_5 = device.HasNvGpuShader5(), .support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(), .support_gl_texture_shadow_lod = device.HasTextureShadowLod(), diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 70e183e65..6d664ed6b 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -487,10 +487,9 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { static_vector vertex_binding_divisors; static_vector vertex_attributes; if (key.state.dynamic_vertex_input) { - const auto& input_attributes = stage_infos[0].input_generics; for (size_t index = 0; index < key.state.attributes.size(); ++index) { const u32 type = key.state.DynamicAttributeType(index); - if (!input_attributes[index].used || type == 0) { + if (!stage_infos[0].loads.Generic(index) || type == 0) { continue; } vertex_attributes.push_back({ @@ -526,10 +525,9 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { }); } } - const auto& input_attributes = stage_infos[0].input_generics; for (size_t index = 0; index < key.state.attributes.size(); ++index) { const auto& attribute = key.state.attributes[index]; - if (!attribute.enabled || !input_attributes[index].used) { + if (!attribute.enabled || !stage_infos[0].loads.Generic(index)) { continue; } vertex_attributes.push_back({ diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index ec06b124f..7aaa40ef2 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -123,18 +123,21 @@ Shader::AttributeType AttributeType(const FixedPipelineState& state, size_t inde return Shader::AttributeType::Disabled; } -Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineCacheKey& key, +Shader::RuntimeInfo MakeRuntimeInfo(std::span programs, + const GraphicsPipelineCacheKey& key, const Shader::IR::Program& program, const Shader::IR::Program* previous_program) { Shader::RuntimeInfo info; if (previous_program) { - info.previous_stage_stores_generic = previous_program->info.stores_generics; + info.previous_stage_stores = previous_program->info.stores; + if (previous_program->is_geometry_passthrough) { + info.previous_stage_stores.mask |= previous_program->info.passthrough.mask; + } } else { - // Mark all stores as available - info.previous_stage_stores_generic.flip(); + info.previous_stage_stores.mask.set(); } const Shader::Stage stage{program.stage}; - const bool has_geometry{key.unique_hashes[4] != 0}; + const bool has_geometry{key.unique_hashes[4] != 0 && !programs[4].is_geometry_passthrough}; const bool gl_ndc{key.state.ndc_minus_one_to_one != 0}; const float point_size{Common::BitCast(key.state.point_size)}; switch (stage) { @@ -302,6 +305,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw .support_demote_to_helper_invocation = true, .support_int64_atomics = device.IsExtShaderAtomicInt64Supported(), .support_derivative_control = true, + .support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(), .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), @@ -518,7 +522,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( const size_t stage_index{index - 1}; infos[stage_index] = &program.info; - const Shader::RuntimeInfo runtime_info{MakeRuntimeInfo(key, program, previous_stage)}; + const auto runtime_info{MakeRuntimeInfo(programs, key, program, previous_stage)}; const std::vector code{EmitSPIRV(profile, runtime_info, program, binding)}; device.SaveShader(code); modules[stage_index] = BuildShader(device, code); diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp index d463e2b56..429cab30d 100644 --- a/src/video_core/shader_environment.cpp +++ b/src/video_core/shader_environment.cpp @@ -22,7 +22,7 @@ namespace VideoCommon { constexpr std::array MAGIC_NUMBER{'y', 'u', 'z', 'u', 'c', 'a', 'c', 'h'}; -constexpr u32 CACHE_VERSION = 4; +constexpr u32 CACHE_VERSION = 5; constexpr size_t INST_SIZE = sizeof(u64); @@ -155,6 +155,10 @@ void GenericEnvironment::Serialize(std::ofstream& file) const { .write(reinterpret_cast(&shared_memory_size), sizeof(shared_memory_size)); } else { file.write(reinterpret_cast(&sph), sizeof(sph)); + if (stage == Shader::Stage::Geometry) { + file.write(reinterpret_cast(&gp_passthrough_mask), + sizeof(gp_passthrough_mask)); + } } } @@ -202,6 +206,7 @@ GraphicsEnvironment::GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_, u32 start_address_) : GenericEnvironment{gpu_memory_, program_base_, start_address_}, maxwell3d{&maxwell3d_} { gpu_memory->ReadBlock(program_base + start_address, &sph, sizeof(sph)); + gp_passthrough_mask = maxwell3d->regs.gp_passthrough_mask; switch (program) { case Maxwell::ShaderProgram::VertexA: stage = Shader::Stage::VertexA; @@ -319,6 +324,9 @@ void FileEnvironment::Deserialize(std::ifstream& file) { .read(reinterpret_cast(&shared_memory_size), sizeof(shared_memory_size)); } else { file.read(reinterpret_cast(&sph), sizeof(sph)); + if (stage == Shader::Stage::Geometry) { + file.read(reinterpret_cast(&gp_passthrough_mask), sizeof(gp_passthrough_mask)); + } } } diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 7b184d2f8..da4721e6b 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -350,6 +350,10 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR LOG_INFO(Render_Vulkan, "Device doesn't support viewport masks"); } + if (!nv_geometry_shader_passthrough) { + LOG_INFO(Render_Vulkan, "Device doesn't support passthrough geometry shaders"); + } + VkPhysicalDeviceUniformBufferStandardLayoutFeaturesKHR std430_layout; if (khr_uniform_buffer_standard_layout) { std430_layout = { @@ -768,6 +772,8 @@ std::vector Device::LoadExtensions(bool requires_surface) { }; test(nv_viewport_swizzle, VK_NV_VIEWPORT_SWIZZLE_EXTENSION_NAME, true); test(nv_viewport_array2, VK_NV_VIEWPORT_ARRAY2_EXTENSION_NAME, true); + test(nv_geometry_shader_passthrough, VK_NV_GEOMETRY_SHADER_PASSTHROUGH_EXTENSION_NAME, + true); test(khr_uniform_buffer_standard_layout, VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true); test(khr_spirv_1_4, VK_KHR_SPIRV_1_4_EXTENSION_NAME, true); diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index a9c0a0e4d..d0adc0127 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -194,6 +194,11 @@ public: return nv_viewport_array2; } + /// Returns true if the device supports VK_NV_geometry_shader_passthrough. + bool IsNvGeometryShaderPassthroughSupported() const { + return nv_geometry_shader_passthrough; + } + /// Returns true if the device supports VK_KHR_uniform_buffer_standard_layout. bool IsKhrUniformBufferStandardLayoutSupported() const { return khr_uniform_buffer_standard_layout; @@ -363,6 +368,7 @@ private: bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil. bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle. bool nv_viewport_array2{}; ///< Support for VK_NV_viewport_array2. + bool nv_geometry_shader_passthrough{}; ///< Support for VK_NV_geometry_shader_passthrough. bool khr_uniform_buffer_standard_layout{}; ///< Support for scalar uniform buffer layouts. bool khr_spirv_1_4{}; ///< Support for VK_KHR_spirv_1_4. bool khr_workgroup_memory_explicit_layout{}; ///< Support for explicit workgroup layouts. -- cgit v1.2.3 From 8a3427a4c857aa08e365d1776d1f0d9f32639c9c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 24 Jun 2021 17:40:24 -0300 Subject: glasm: Add passthrough geometry shader support --- src/video_core/renderer_opengl/gl_device.cpp | 1 + src/video_core/renderer_opengl/gl_device.h | 5 +++++ src/video_core/renderer_opengl/gl_shader_cache.cpp | 2 +- 3 files changed, 7 insertions(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 27be347e6..6818951f2 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -160,6 +160,7 @@ Device::Device() { has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory; has_debugging_tool_attached = IsDebugToolAttached(extensions); has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float"); + has_geometry_shader_passthrough = GLAD_GL_NV_geometry_shader_passthrough; has_nv_gpu_shader_5 = GLAD_GL_NV_gpu_shader5; has_shader_int64 = HasExtension(extensions, "GL_ARB_gpu_shader_int64"); has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float; diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index ad7b01b06..45ddf5e01 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -120,6 +120,10 @@ public: return has_depth_buffer_float; } + bool HasGeometryShaderPassthrough() const { + return has_geometry_shader_passthrough; + } + bool HasNvGpuShader5() const { return has_nv_gpu_shader_5; } @@ -174,6 +178,7 @@ private: bool use_asynchronous_shaders{}; bool use_driver_cache{}; bool has_depth_buffer_float{}; + bool has_geometry_shader_passthrough{}; bool has_nv_gpu_shader_5{}; bool has_shader_int64{}; bool has_amd_shader_half_float{}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 06e39a503..af8e9f44d 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -187,7 +187,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_demote_to_helper_invocation = false, .support_int64_atomics = false, .support_derivative_control = device.HasDerivativeControl(), - .support_geometry_shader_passthrough = false, // TODO + .support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(), .support_gl_nv_gpu_shader_5 = device.HasNvGpuShader5(), .support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(), .support_gl_texture_shadow_lod = device.HasTextureShadowLod(), -- cgit v1.2.3 From 1152d66ddd4e7b29b53e01990fef77e4cff20e24 Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Thu, 8 Jul 2021 17:28:48 -0400 Subject: general: Add setting shader_backend GLASM is getting good enough that we can move it out of advanced graphics settings. This removes the setting `use_assembly_shaders`, opting for a enum class `shader_backend`. This comes with the benefits that it is extensible for additional shader backends besides GLSL and GLASM, and this will work better with a QComboBox. Qt removes the related assembly shader setting from the Advanced Graphics section and places it as a new QComboBox in the API Settings group. This will replace the Vulkan device selector when OpenGL is selected. Additionally, mark all of the custom anisotropic filtering settings as "WILL BREAK THINGS", as that is the case with a select few games. --- src/video_core/renderer_opengl/gl_device.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 6818951f2..c4eeed53b 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -172,9 +172,10 @@ Device::Device() { // uniform buffers as "push constants" has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data; - use_assembly_shaders = Settings::values.use_assembly_shaders.GetValue() && - GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 && - GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2; + use_assembly_shaders = + Settings::values.shader_backend.GetValue() == Settings::ShaderBackend::GLASM && + GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback && + GLAD_GL_NV_transform_feedback2; // Blocks AMD and Intel OpenGL drivers on Windows from using asynchronous shader compilation. use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue() && @@ -187,7 +188,8 @@ Device::Device() { LOG_INFO(Render_OpenGL, "Renderer_BrokenTextureViewFormats: {}", has_broken_texture_view_formats); - if (Settings::values.use_assembly_shaders.GetValue() && !use_assembly_shaders) { + if (Settings::values.shader_backend.GetValue() == Settings::ShaderBackend::GLASM && + !use_assembly_shaders) { LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported"); } -- cgit v1.2.3 From fb9b1787f86d069db27fe0af44ded042c6d8de39 Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Tue, 22 Jun 2021 01:12:11 -0400 Subject: video_core: Enable GL SPIR-V shaders --- .../renderer_opengl/gl_compute_pipeline.cpp | 17 ++++-- .../renderer_opengl/gl_compute_pipeline.h | 2 +- src/video_core/renderer_opengl/gl_device.cpp | 8 ++- src/video_core/renderer_opengl/gl_device.h | 11 ++++ .../renderer_opengl/gl_graphics_pipeline.cpp | 64 ++++++++++++++-------- .../renderer_opengl/gl_graphics_pipeline.h | 1 + src/video_core/renderer_opengl/gl_shader_cache.cpp | 40 +++++++++++--- 7 files changed, 105 insertions(+), 38 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp index 2d6442d74..c63e87a56 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp @@ -5,6 +5,7 @@ #include #include "common/cityhash.h" +#include "common/settings.h" // for enum class Settings::ShaderBackend #include "video_core/renderer_opengl/gl_compute_pipeline.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_shader_util.h" @@ -40,15 +41,23 @@ ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cac BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::KeplerCompute& kepler_compute_, ProgramManager& program_manager_, const Shader::Info& info_, - std::string code) + std::string code, std::vector code_v) : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_} { - if (device.UseAssemblyShaders()) { - assembly_program = CompileProgram(code, GL_COMPUTE_PROGRAM_NV); - } else { + switch (device.GetShaderBackend()) { + case Settings::ShaderBackend::GLSL: source_program.handle = glCreateProgram(); AttachShader(GL_COMPUTE_SHADER, source_program.handle, code); LinkProgram(source_program.handle); + break; + case Settings::ShaderBackend::GLASM: + assembly_program = CompileProgram(code, GL_COMPUTE_PROGRAM_NV); + break; + case Settings::ShaderBackend::SPIRV: + source_program.handle = glCreateProgram(); + AttachShader(GL_COMPUTE_SHADER, source_program.handle, code_v); + LinkProgram(source_program.handle); + break; } std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(), uniform_buffer_sizes.begin()); diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.h b/src/video_core/renderer_opengl/gl_compute_pipeline.h index b5fc45f26..50c676365 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.h +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.h @@ -54,7 +54,7 @@ public: BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::KeplerCompute& kepler_compute_, ProgramManager& program_manager_, const Shader::Info& info_, - std::string code); + std::string code, std::vector code_v); void Configure(); diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index c4eeed53b..99f8769fc 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -177,6 +177,11 @@ Device::Device() { GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2; + shader_backend = (Settings::values.shader_backend.GetValue() == + Settings::ShaderBackend::GLASM) == use_assembly_shaders + ? Settings::values.shader_backend.GetValue() + : Settings::ShaderBackend::GLSL; + // Blocks AMD and Intel OpenGL drivers on Windows from using asynchronous shader compilation. use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue() && !(is_amd || (is_intel && !is_linux)); @@ -188,8 +193,7 @@ Device::Device() { LOG_INFO(Render_OpenGL, "Renderer_BrokenTextureViewFormats: {}", has_broken_texture_view_formats); - if (Settings::values.shader_backend.GetValue() == Settings::ShaderBackend::GLASM && - !use_assembly_shaders) { + if (shader_backend == Settings::ShaderBackend::GLASM && !use_assembly_shaders) { LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported"); } diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 45ddf5e01..ee992aed4 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -8,6 +8,10 @@ #include "common/common_types.h" #include "shader_recompiler/stage.h" +namespace Settings { +enum class ShaderBackend : u32; +}; + namespace OpenGL { class Device { @@ -148,6 +152,10 @@ public: return need_fastmath_off; } + Settings::ShaderBackend GetShaderBackend() const { + return shader_backend; + } + private: static bool TestVariableAoffi(); static bool TestPreciseBug(); @@ -159,6 +167,9 @@ private: u32 max_varyings{}; u32 max_compute_shared_memory_size{}; u32 max_glasm_storage_buffer_blocks{}; + + Settings::ShaderBackend shader_backend{}; + bool has_warp_intrinsics{}; bool has_shader_ballot{}; bool has_vertex_viewport_layer{}; diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index a93b03cf7..1f19b5825 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -3,7 +3,11 @@ // Refer to the license.txt file included. #include +#include +#include +#include +#include "common/settings.h" // for enum class Settings::ShaderBackend #include "common/thread_worker.h" #include "shader_recompiler/shader_info.h" #include "video_core/renderer_opengl/gl_graphics_pipeline.h" @@ -179,7 +183,8 @@ GraphicsPipeline::GraphicsPipeline( Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, ProgramManager& program_manager_, StateTracker& state_tracker_, ShaderWorker* thread_worker, VideoCore::ShaderNotify* shader_notify, std::array sources, - const std::array& infos, const GraphicsPipelineKey& key_) + std::array, 5> sources_spirv, const std::array& infos, + const GraphicsPipelineKey& key_) : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, program_manager{program_manager_}, state_tracker{state_tracker_}, key{key_} { @@ -232,29 +237,44 @@ GraphicsPipeline::GraphicsPipeline( if (key.xfb_enabled && device.UseAssemblyShaders()) { GenerateTransformFeedbackState(); } - auto func{[this, device, sources, shader_notify](ShaderContext::Context*) mutable { - if (!device.UseAssemblyShaders()) { - program.handle = glCreateProgram(); - } - for (size_t stage = 0; stage < 5; ++stage) { - const auto code{sources[stage]}; - if (code.empty()) { - continue; + auto func{ + [this, device, sources, sources_spirv, shader_notify](ShaderContext::Context*) mutable { + if (!device.UseAssemblyShaders()) { + program.handle = glCreateProgram(); } - if (device.UseAssemblyShaders()) { - assembly_programs[stage] = CompileProgram(code, AssemblyStage(stage)); - } else { - AttachShader(Stage(stage), program.handle, code); + for (size_t stage = 0; stage < 5; ++stage) { + switch (device.GetShaderBackend()) { + case Settings::ShaderBackend::GLSL: { + const auto code{sources[stage]}; + if (code.empty()) { + continue; + } + AttachShader(Stage(stage), program.handle, code); + } break; + case Settings::ShaderBackend::GLASM: { + const auto code{sources[stage]}; + if (code.empty()) { + continue; + } + assembly_programs[stage] = CompileProgram(code, AssemblyStage(stage)); + } break; + case Settings::ShaderBackend::SPIRV: { + const auto code{sources_spirv[stage]}; + if (code.empty()) { + continue; + } + AttachShader(Stage(stage), program.handle, code); + } break; + } } - } - if (!device.UseAssemblyShaders()) { - LinkProgram(program.handle); - } - if (shader_notify) { - shader_notify->MarkShaderComplete(); - } - is_built.store(true, std::memory_order_relaxed); - }}; + if (!device.UseAssemblyShaders()) { + LinkProgram(program.handle); + } + if (shader_notify) { + shader_notify->MarkShaderComplete(); + } + is_built.store(true, std::memory_order_relaxed); + }}; if (thread_worker) { thread_worker->QueueWork(std::move(func)); } else { diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index f82d712f8..5f5d57385 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -78,6 +78,7 @@ public: ProgramManager& program_manager_, StateTracker& state_tracker_, ShaderWorker* thread_worker, VideoCore::ShaderNotify* shader_notify, std::array sources, + std::array, 5> sources_spirv, const std::array& infos, const GraphicsPipelineKey& key_); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index af8e9f44d..cde0f54c9 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -15,6 +15,7 @@ #include "common/fs/path_util.h" #include "common/logging/log.h" #include "common/scope_exit.h" +#include "common/settings.h" #include "common/thread_worker.h" #include "core/core.h" #include "shader_recompiler/backend/glasm/emit_glasm.h" @@ -415,6 +416,7 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( OGLProgram source_program; std::array sources; + std::array, 5> sources_spirv; Shader::Backend::Bindings binding; Shader::IR::Program* previous_program{}; const bool use_glasm{device.UseAssemblyShaders()}; @@ -431,17 +433,23 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( const auto runtime_info{ MakeRuntimeInfo(key, program, previous_program, glasm_use_storage_buffers, use_glasm)}; - if (use_glasm) { - sources[stage_index] = EmitGLASM(profile, runtime_info, program, binding); - } else { + switch (device.GetShaderBackend()) { + case Settings::ShaderBackend::GLSL: sources[stage_index] = EmitGLSL(profile, runtime_info, program, binding); + break; + case Settings::ShaderBackend::GLASM: + sources[stage_index] = EmitGLASM(profile, runtime_info, program, binding); + break; + case Settings::ShaderBackend::SPIRV: + sources_spirv[stage_index] = EmitSPIRV(profile, runtime_info, program, binding); + break; } previous_program = &program; } auto* const thread_worker{build_in_parallel ? workers.get() : nullptr}; - return std::make_unique(device, texture_cache, buffer_cache, gpu_memory, - maxwell3d, program_manager, state_tracker, - thread_worker, &shader_notify, sources, infos, key); + return std::make_unique( + device, texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker, + thread_worker, &shader_notify, sources, sources_spirv, infos, key); } catch (Shader::Exception& exception) { LOG_ERROR(Render_OpenGL, "{}", exception.what()); @@ -478,10 +486,24 @@ std::unique_ptr ShaderCache::CreateComputePipeline( } Shader::RuntimeInfo info; info.glasm_use_storage_buffers = num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks(); - const std::string code{device.UseAssemblyShaders() ? EmitGLASM(profile, info, program) - : EmitGLSL(profile, program)}; + + std::string code{}; + std::vector code_spirv; + switch (device.GetShaderBackend()) { + case Settings::ShaderBackend::GLSL: + code = EmitGLSL(profile, program); + break; + case Settings::ShaderBackend::GLASM: + code = EmitGLASM(profile, info, program); + break; + case Settings::ShaderBackend::SPIRV: + code_spirv = EmitSPIRV(profile, program); + break; + } + return std::make_unique(device, texture_cache, buffer_cache, gpu_memory, - kepler_compute, program_manager, program.info, code); + kepler_compute, program_manager, program.info, code, + code_spirv); } catch (Shader::Exception& exception) { LOG_ERROR(Render_OpenGL, "{}", exception.what()); return nullptr; -- cgit v1.2.3 From 57a8921e01a90ff5993079dd638a6c48e5781756 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 25 Jun 2021 05:21:51 -0300 Subject: vk_graphics_pipeline: Implement line width --- src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 3 ++- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 9 +++++++++ src/video_core/renderer_vulkan/vk_rasterizer.h | 1 + src/video_core/renderer_vulkan/vk_state_tracker.cpp | 14 ++++++++++---- src/video_core/renderer_vulkan/vk_state_tracker.h | 7 ++++++- src/video_core/vulkan_common/vulkan_device.cpp | 4 ++-- src/video_core/vulkan_common/vulkan_wrapper.cpp | 1 + src/video_core/vulkan_common/vulkan_wrapper.h | 5 +++++ 8 files changed, 36 insertions(+), 8 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 6d664ed6b..3363a6877 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -705,11 +705,12 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { .pAttachments = cb_attachments.data(), .blendConstants = {}, }; - static_vector dynamic_states{ + static_vector dynamic_states{ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS, VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE, + VK_DYNAMIC_STATE_LINE_WIDTH, }; if (key.state.extended_dynamic_state) { static constexpr std::array extended{ diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index f04c3394c..bb7301c53 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -541,6 +541,7 @@ void RasterizerVulkan::UpdateDynamicStates() { UpdateBlendConstants(regs); UpdateDepthBounds(regs); UpdateStencilFaces(regs); + UpdateLineWidth(regs); if (device.IsExtExtendedDynamicStateSupported()) { UpdateCullMode(regs); UpdateDepthBoundsTestEnable(regs); @@ -676,6 +677,14 @@ void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs) } } +void RasterizerVulkan::UpdateLineWidth(Tegra::Engines::Maxwell3D::Regs& regs) { + if (!state_tracker.TouchLineWidth()) { + return; + } + const float width = regs.line_smooth_enable ? regs.line_width_smooth : regs.line_width_aliased; + scheduler.Record([width](vk::CommandBuffer cmdbuf) { cmdbuf.SetLineWidth(width); }); +} + void RasterizerVulkan::UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs) { if (!state_tracker.TouchCullMode()) { return; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index c954fa7f8..866827247 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -125,6 +125,7 @@ private: void UpdateBlendConstants(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs); + void UpdateLineWidth(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp index 0ebe0473f..e3b7dd61c 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp +++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp @@ -29,10 +29,10 @@ using Flags = Maxwell3D::DirtyState::Flags; Flags MakeInvalidationFlags() { static constexpr int INVALIDATION_FLAGS[]{ - Viewports, Scissors, DepthBias, BlendConstants, - DepthBounds, StencilProperties, CullMode, DepthBoundsEnable, - DepthTestEnable, DepthWriteEnable, DepthCompareOp, FrontFace, - StencilOp, StencilTestEnable, VertexBuffers, VertexInput, + Viewports, Scissors, DepthBias, BlendConstants, DepthBounds, + StencilProperties, LineWidth, CullMode, DepthBoundsEnable, DepthTestEnable, + DepthWriteEnable, DepthCompareOp, FrontFace, StencilOp, StencilTestEnable, + VertexBuffers, VertexInput, }; Flags flags{}; for (const int flag : INVALIDATION_FLAGS) { @@ -86,6 +86,11 @@ void SetupDirtyStencilProperties(Tables& tables) { table[OFF(stencil_back_func_mask)] = StencilProperties; } +void SetupDirtyLineWidth(Tables& tables) { + tables[0][OFF(line_width_smooth)] = LineWidth; + tables[0][OFF(line_width_aliased)] = LineWidth; +} + void SetupDirtyCullMode(Tables& tables) { auto& table = tables[0]; table[OFF(cull_face)] = CullMode; @@ -180,6 +185,7 @@ StateTracker::StateTracker(Tegra::GPU& gpu) SetupDirtyBlendConstants(tables); SetupDirtyDepthBounds(tables); SetupDirtyStencilProperties(tables); + SetupDirtyLineWidth(tables); SetupDirtyCullMode(tables); SetupDirtyDepthBoundsEnable(tables); SetupDirtyDepthTestEnable(tables); diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h index 1976b7e9b..5f78f6950 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.h +++ b/src/video_core/renderer_vulkan/vk_state_tracker.h @@ -31,6 +31,7 @@ enum : u8 { BlendConstants, DepthBounds, StencilProperties, + LineWidth, CullMode, DepthBoundsEnable, @@ -44,7 +45,7 @@ enum : u8 { Blending, ViewportSwizzles, - Last + Last, }; static_assert(Last <= std::numeric_limits::max()); @@ -93,6 +94,10 @@ public: return Exchange(Dirty::StencilProperties, false); } + bool TouchLineWidth() const { + return Exchange(Dirty::LineWidth, false); + } + bool TouchCullMode() { return Exchange(Dirty::CullMode, false); } diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index da4721e6b..912e03c5c 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -227,7 +227,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR .depthBiasClamp = true, .fillModeNonSolid = true, .depthBounds = is_depth_bounds_supported, - .wideLines = false, + .wideLines = true, .largePoints = true, .alphaToOne = false, .multiViewport = true, @@ -703,7 +703,6 @@ void Device::CheckSuitability(bool requires_swapchain) const { const std::array feature_report{ std::make_pair(features.robustBufferAccess, "robustBufferAccess"), std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"), - std::make_pair(features.robustBufferAccess, "robustBufferAccess"), std::make_pair(features.imageCubeArray, "imageCubeArray"), std::make_pair(features.independentBlend, "independentBlend"), std::make_pair(features.depthClamp, "depthClamp"), @@ -712,6 +711,7 @@ void Device::CheckSuitability(bool requires_swapchain) const { std::make_pair(features.multiViewport, "multiViewport"), std::make_pair(features.depthBiasClamp, "depthBiasClamp"), std::make_pair(features.fillModeNonSolid, "fillModeNonSolid"), + std::make_pair(features.wideLines, "wideLines"), std::make_pair(features.geometryShader, "geometryShader"), std::make_pair(features.tessellationShader, "tessellationShader"), std::make_pair(features.sampleRateShading, "sampleRateShading"), diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp index d7e9fac22..bbf0fccae 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.cpp +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp @@ -121,6 +121,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { X(vkCmdSetDepthTestEnableEXT); X(vkCmdSetDepthWriteEnableEXT); X(vkCmdSetFrontFaceEXT); + X(vkCmdSetLineWidth); X(vkCmdSetPrimitiveTopologyEXT); X(vkCmdSetStencilOpEXT); X(vkCmdSetStencilTestEnableEXT); diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h index d43b606f1..d76bb4324 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.h +++ b/src/video_core/vulkan_common/vulkan_wrapper.h @@ -231,6 +231,7 @@ struct DeviceDispatch : InstanceDispatch { PFN_vkCmdSetDepthWriteEnableEXT vkCmdSetDepthWriteEnableEXT{}; PFN_vkCmdSetEvent vkCmdSetEvent{}; PFN_vkCmdSetFrontFaceEXT vkCmdSetFrontFaceEXT{}; + PFN_vkCmdSetLineWidth vkCmdSetLineWidth{}; PFN_vkCmdSetPrimitiveTopologyEXT vkCmdSetPrimitiveTopologyEXT{}; PFN_vkCmdSetScissor vkCmdSetScissor{}; PFN_vkCmdSetStencilCompareMask vkCmdSetStencilCompareMask{}; @@ -1198,6 +1199,10 @@ public: dld->vkCmdSetFrontFaceEXT(handle, front_face); } + void SetLineWidth(float line_width) const noexcept { + dld->vkCmdSetLineWidth(handle, line_width); + } + void SetPrimitiveTopologyEXT(VkPrimitiveTopology primitive_topology) const noexcept { dld->vkCmdSetPrimitiveTopologyEXT(handle, primitive_topology); } -- cgit v1.2.3 From f94f0be5215369a6985247ad936d9d9f43c9b140 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 25 Jun 2021 05:25:19 -0300 Subject: vk_graphics_pipeline: Implement smooth lines --- .../renderer_vulkan/fixed_pipeline_state.cpp | 1 + .../renderer_vulkan/fixed_pipeline_state.h | 1 + .../renderer_vulkan/vk_graphics_pipeline.cpp | 21 +++++++++++ src/video_core/vulkan_common/vulkan_device.cpp | 41 +++++++++++++++++++--- src/video_core/vulkan_common/vulkan_device.h | 6 ++++ 5 files changed, 65 insertions(+), 5 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 7563dc462..d089da8a4 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -88,6 +88,7 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, y_negate.Assign(regs.screen_y_control.y_negate != 0 ? 1 : 0); provoking_vertex_last.Assign(regs.provoking_vertex_last != 0 ? 1 : 0); conservative_raster_enable.Assign(regs.conservative_raster_enable != 0 ? 1 : 0); + smooth_lines.Assign(regs.line_smooth_enable != 0 ? 1 : 0); for (size_t i = 0; i < regs.rt.size(); ++i) { color_formats[i] = static_cast(regs.rt[i].format); diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index 66b57b636..c9be37935 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -195,6 +195,7 @@ struct FixedPipelineState { BitField<11, 1, u32> y_negate; BitField<12, 1, u32> provoking_vertex_last; BitField<13, 1, u32> conservative_raster_enable; + BitField<14, 1, u32> smooth_lines; }; std::array color_formats; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 3363a6877..f0ae0b0d6 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -80,6 +80,14 @@ bool SupportsPrimitiveRestart(VkPrimitiveTopology topology) { return std::ranges::find(unsupported_topologies, topology) == unsupported_topologies.end(); } +bool IsLine(VkPrimitiveTopology topology) { + static constexpr std::array line_topologies{ + VK_PRIMITIVE_TOPOLOGY_LINE_LIST, VK_PRIMITIVE_TOPOLOGY_LINE_STRIP, + // VK_PRIMITIVE_TOPOLOGY_LINE_LOOP_EXT, + }; + return std::ranges::find(line_topologies, topology) == line_topologies.end(); +} + VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) { union Swizzle { u32 raw; @@ -616,6 +624,16 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { .depthBiasSlopeFactor = 0.0f, .lineWidth = 1.0f, }; + VkPipelineRasterizationLineStateCreateInfoEXT line_state{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT, + .pNext = nullptr, + .lineRasterizationMode = key.state.smooth_lines != 0 + ? VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT + : VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT, + .stippledLineEnable = VK_FALSE, // TODO + .lineStippleFactor = 0, + .lineStipplePattern = 0, + }; VkPipelineRasterizationConservativeStateCreateInfoEXT conservative_raster{ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_CONSERVATIVE_STATE_CREATE_INFO_EXT, .pNext = nullptr, @@ -632,6 +650,9 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { ? VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT : VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT, }; + if (IsLine(input_assembly_topology) && device.IsExtLineRasterizationSupported()) { + line_state.pNext = std::exchange(rasterization_ci.pNext, &line_state); + } if (device.IsExtConservativeRasterizationSupported()) { conservative_raster.pNext = std::exchange(rasterization_ci.pNext, &conservative_raster); } diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 912e03c5c..4fa1470e8 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -416,6 +416,23 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state"); } + VkPhysicalDeviceLineRasterizationFeaturesEXT line_raster; + if (ext_line_rasterization) { + line_raster = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT, + .pNext = nullptr, + .rectangularLines = VK_TRUE, + .bresenhamLines = VK_FALSE, + .smoothLines = VK_TRUE, + .stippledRectangularLines = VK_FALSE, + .stippledBresenhamLines = VK_FALSE, + .stippledSmoothLines = VK_FALSE, + }; + SetNext(next, line_raster); + } else { + LOG_INFO(Render_Vulkan, "Device doesn't support smooth lines"); + } + if (!ext_conservative_rasterization) { LOG_INFO(Render_Vulkan, "Device doesn't support conservative rasterization"); } @@ -757,6 +774,7 @@ std::vector Device::LoadExtensions(bool requires_surface) { bool has_ext_shader_atomic_int64{}; bool has_ext_provoking_vertex{}; bool has_ext_vertex_input_dynamic_state{}; + bool has_ext_line_rasterization{}; for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) { const auto test = [&](std::optional> status, const char* name, bool push) { @@ -798,6 +816,7 @@ std::vector Device::LoadExtensions(bool requires_surface) { test(has_ext_shader_atomic_int64, VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME, false); test(has_khr_workgroup_memory_explicit_layout, VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME, false); + test(has_ext_line_rasterization, VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME, false); if (Settings::values.enable_nsight_aftermath) { test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME, true); @@ -918,17 +937,29 @@ std::vector Device::LoadExtensions(bool requires_surface) { } } if (has_ext_extended_dynamic_state) { - VkPhysicalDeviceExtendedDynamicStateFeaturesEXT dynamic_state; - dynamic_state.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT; - dynamic_state.pNext = nullptr; - features.pNext = &dynamic_state; + VkPhysicalDeviceExtendedDynamicStateFeaturesEXT extended_dynamic_state; + extended_dynamic_state.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT; + extended_dynamic_state.pNext = nullptr; + features.pNext = &extended_dynamic_state; physical.GetFeatures2KHR(features); - if (dynamic_state.extendedDynamicState) { + if (extended_dynamic_state.extendedDynamicState) { extensions.push_back(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME); ext_extended_dynamic_state = true; } } + if (has_ext_line_rasterization) { + VkPhysicalDeviceLineRasterizationFeaturesEXT line_raster; + line_raster.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT; + line_raster.pNext = nullptr; + features.pNext = &line_raster; + physical.GetFeatures2KHR(features); + if (line_raster.rectangularLines && line_raster.smoothLines) { + extensions.push_back(VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME); + ext_line_rasterization = true; + } + } if (has_khr_workgroup_memory_explicit_layout) { VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR layout; layout.sType = diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index d0adc0127..26100166f 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -259,6 +259,11 @@ public: return ext_extended_dynamic_state; } + /// Returns true if the device supports VK_EXT_line_rasterization. + bool IsExtLineRasterizationSupported() const { + return ext_line_rasterization; + } + /// Returns true if the device supports VK_EXT_vertex_input_dynamic_state. bool IsExtVertexInputDynamicStateSupported() const { return ext_vertex_input_dynamic_state; @@ -382,6 +387,7 @@ private: bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. + bool ext_line_rasterization{}; ///< Support for VK_EXT_line_rasterization. bool ext_vertex_input_dynamic_state{}; ///< Support for VK_EXT_vertex_input_dynamic_state. bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. bool ext_shader_atomic_int64{}; ///< Support for VK_KHR_shader_atomic_int64. -- cgit v1.2.3 From 5643a909bc3fa9f497d2f2e68650f823ed2944ac Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 26 Jun 2021 01:14:06 -0300 Subject: shader: Fix disabled and unwritten attributes and varyings --- src/video_core/renderer_opengl/renderer_opengl.cpp | 35 ++++++++++++---------- 1 file changed, 20 insertions(+), 15 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index b8777643b..dab0afe6d 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -140,6 +140,26 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, } AddTelemetryFields(); InitOpenGLObjects(); + + // Initialize default attributes to match hardware's disabled attributes + GLint max_attribs{}; + glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &max_attribs); + for (GLint attrib = 0; attrib < max_attribs; ++attrib) { + glVertexAttrib4f(attrib, 0.0f, 0.0f, 0.0f, 0.0f); + } + // Enable seamless cubemaps when per texture parameters are not available + if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) { + glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS); + } + // Enable unified vertex attributes and query vertex buffer address when the driver supports it + if (device.HasVertexBufferUnifiedMemory()) { + glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); + glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV); + + glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY); + glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, + &vertex_buffer_address); + } } RendererOpenGL::~RendererOpenGL() = default; @@ -256,21 +276,6 @@ void RendererOpenGL::InitOpenGLObjects() { // Clear screen to black LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture); - - // Enable seamless cubemaps when per texture parameters are not available - if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) { - glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS); - } - - // Enable unified vertex attributes and query vertex buffer address when the driver supports it - if (device.HasVertexBufferUnifiedMemory()) { - glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); - glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV); - - glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY); - glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, - &vertex_buffer_address); - } } void RendererOpenGL::AddTelemetryFields() { -- cgit v1.2.3 From fba6bd92d456b4d472ed37e663006fafeef154a9 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 26 Jun 2021 17:46:01 -0300 Subject: vk_rasterizer: Workaround bug in VK_EXT_vertex_input_dynamic_state Workaround potential bug on Nvidia's driver where only updating high attributes leaves low attributes out dated. --- src/video_core/engines/maxwell_3d.h | 4 --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 2 +- .../renderer_vulkan/fixed_pipeline_state.cpp | 2 +- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 31 +++++++++++++--------- 4 files changed, 20 insertions(+), 19 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 471d5686a..1aa43523a 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -305,10 +305,6 @@ public: return (type == Type::SignedNorm) || (type == Type::UnsignedNorm); } - bool IsConstant() const { - return constant; - } - bool IsValid() const { return size != Size::Invalid; } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 0f0d780b5..41d2b73f4 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -97,7 +97,7 @@ void RasterizerOpenGL::SyncVertexFormats() { const auto gl_index = static_cast(index); // Disable constant attributes. - if (attrib.IsConstant()) { + if (attrib.constant) { glDisableVertexAttribArray(gl_index); continue; } diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index d089da8a4..d70153df3 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -128,7 +128,7 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, const auto& input = regs.vertex_attrib_format[index]; auto& attribute = attributes[index]; attribute.raw = 0; - attribute.enabled.Assign(input.IsConstant() ? 0 : 1); + attribute.enabled.Assign(input.constant ? 0 : 1); attribute.buffer.Assign(input.buffer); attribute.offset.Assign(input.offset); attribute.type.Assign(static_cast(input.type.Value())); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index bb7301c53..99576b826 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -801,25 +801,30 @@ void RasterizerVulkan::UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs) boost::container::static_vector bindings; boost::container::static_vector attributes; + // There seems to be a bug on Nvidia's driver where updating only higher attributes ends up + // generating dirty state. Track the highest dirty attribute and update all attributes until + // that one. + size_t highest_dirty_attr{}; for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { - if (!dirty[Dirty::VertexAttribute0 + index]) { - continue; + if (dirty[Dirty::VertexAttribute0 + index]) { + highest_dirty_attr = index; } + } + for (size_t index = 0; index < highest_dirty_attr; ++index) { const Maxwell::VertexAttribute attribute{regs.vertex_attrib_format[index]}; const u32 binding{attribute.buffer}; dirty[Dirty::VertexAttribute0 + index] = false; dirty[Dirty::VertexBinding0 + static_cast(binding)] = true; - - attributes.push_back({ - .sType = VK_STRUCTURE_TYPE_VERTEX_INPUT_ATTRIBUTE_DESCRIPTION_2_EXT, - .pNext = nullptr, - .location = static_cast(index), - .binding = binding, - .format = attribute.IsConstant() - ? VK_FORMAT_A8B8G8R8_UNORM_PACK32 - : MaxwellToVK::VertexFormat(attribute.type, attribute.size), - .offset = attribute.offset, - }); + if (!attribute.constant) { + attributes.push_back({ + .sType = VK_STRUCTURE_TYPE_VERTEX_INPUT_ATTRIBUTE_DESCRIPTION_2_EXT, + .pNext = nullptr, + .location = static_cast(index), + .binding = binding, + .format = MaxwellToVK::VertexFormat(attribute.type, attribute.size), + .offset = attribute.offset, + }); + } } for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { if (!dirty[Dirty::VertexBinding0 + index]) { -- cgit v1.2.3 From 8722668b3c027f0132d0be07e867247debd08d30 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 29 Jun 2021 18:42:17 -0300 Subject: emit_spirv: Workaround VK_KHR_shader_float_controls on fp16 Nvidia Fix regression on Fire Emblem: Three Houses when using native fp16. --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 1 + src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 1 + 2 files changed, 2 insertions(+) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index cde0f54c9..2ea9c9f07 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -206,6 +206,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .has_broken_spirv_clamp = true, .has_broken_unsigned_image_offsets = true, .has_broken_signed_operations = true, + .has_broken_fp16_float_controls = false, .has_gl_component_indexing_bug = device.HasComponentIndexingBug(), .has_gl_precise_bug = device.HasPreciseBug(), .ignore_nan_fp_comparisons = true, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 7aaa40ef2..87b843e3d 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -315,6 +315,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, .has_broken_unsigned_image_offsets = false, .has_broken_signed_operations = false, + .has_broken_fp16_float_controls = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR, .ignore_nan_fp_comparisons = false, }; host_info = Shader::HostTranslateInfo{ -- cgit v1.2.3 From 57171b23f9da0d9fa4b07bb77ba5c8ed0083a792 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 29 Jun 2021 18:58:32 -0300 Subject: vulkan_device: Enable VK_EXT_extended_dynamic_state on RADV 21.2 onward --- src/video_core/vulkan_common/vulkan_device.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 4fa1470e8..e297a3e92 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -511,10 +511,13 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR CollectToolingInfo(); if (ext_extended_dynamic_state && driver_id == VK_DRIVER_ID_MESA_RADV) { - LOG_WARNING( - Render_Vulkan, - "Blacklisting RADV for VK_EXT_extended_dynamic state, likely due to a bug in yuzu"); - ext_extended_dynamic_state = false; + // Mask driver version variant + const u32 version = (properties.driverVersion << 3) >> 3; + if (version < VK_MAKE_API_VERSION(0, 21, 2, 0)) { + LOG_WARNING(Render_Vulkan, + "RADV versions older than 21.2 have broken VK_EXT_extended_dynamic_state"); + ext_extended_dynamic_state = false; + } } if (ext_vertex_input_dynamic_state && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) { LOG_WARNING(Render_Vulkan, "Blacklisting Intel for VK_EXT_vertex_input_dynamic_state"); -- cgit v1.2.3 From dbee32d302a5944bc8e99b55d956013503b66c6c Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Thu, 1 Jul 2021 20:32:30 -0400 Subject: gl_shader_cache: Fixes for async shaders --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 25 ++++++++++++++++++++-- src/video_core/renderer_opengl/gl_shader_cache.h | 2 ++ 2 files changed, 25 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 2ea9c9f07..2d7eb3e33 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -328,11 +328,32 @@ GraphicsPipeline* ShaderCache::CurrentGraphicsPipelineSlowPath() { if (is_new) { pipeline = CreateGraphicsPipeline(); } + if (!pipeline) { + return nullptr; + } current_pipeline = pipeline.get(); - if (!pipeline || !pipeline->IsBuilt()) { + return BuiltPipeline(current_pipeline); +} + +GraphicsPipeline* ShaderCache::BuiltPipeline(GraphicsPipeline* pipeline) const noexcept { + if (pipeline->IsBuilt()) { + return pipeline; + } + if (!use_asynchronous_shaders) { + return pipeline; + } + // If something is using depth, we can assume that games are not rendering anything which + // will be used one time. + if (maxwell3d.regs.zeta_enable) { return nullptr; } - return pipeline.get(); + // If games are using a small index count, we can assume these are full screen quads. + // Usually these shaders are only used once for building textures so we can assume they + // can't be built async + if (maxwell3d.regs.index_array.count <= 6 || maxwell3d.regs.vertex_buffer.count <= 6) { + return pipeline; + } + return nullptr; } ComputePipeline* ShaderCache::CurrentComputePipeline() { diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 9d5306293..a34110b37 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -54,6 +54,8 @@ public: private: GraphicsPipeline* CurrentGraphicsPipelineSlowPath(); + [[nodiscard]] GraphicsPipeline* BuiltPipeline(GraphicsPipeline* pipeline) const noexcept; + std::unique_ptr CreateGraphicsPipeline(); std::unique_ptr CreateGraphicsPipeline( -- cgit v1.2.3 From 7277d7fe96d53ae2b73491d91e0a54caf0206fe7 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 3 Jul 2021 01:49:59 -0400 Subject: vulkan_device: Blacklist ampere devices from float16 math --- src/video_core/vulkan_common/vulkan_device.cpp | 29 ++++++++++++++++++-------- src/video_core/vulkan_common/vulkan_device.h | 6 +++--- 2 files changed, 23 insertions(+), 12 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index e297a3e92..7d66a43e7 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -194,12 +194,22 @@ std::unordered_map GetFormatProperties(vk::Physica return format_properties; } +std::vector GetSupportedExtensions(vk::PhysicalDevice physical) { + const std::vector extensions = physical.EnumerateDeviceExtensionProperties(); + std::vector supported_extensions(std::size(extensions)); + for (const auto& extension : extensions) { + supported_extensions.emplace_back(extension.extensionName); + } + return supported_extensions; +} + } // Anonymous namespace Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR surface, const vk::InstanceDispatch& dld_) : instance{instance_}, dld{dld_}, physical{physical_}, properties{physical.GetProperties()}, - format_properties{GetFormatProperties(physical)} { + supported_extensions{GetSupportedExtensions(physical)}, + format_properties(GetFormatProperties(physical)) { CheckSuitability(surface != nullptr); SetupFamilies(surface); SetupFeatures(); @@ -510,6 +520,13 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR CollectTelemetryParameters(); CollectToolingInfo(); + if (driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR && is_float16_supported) { + if (std::ranges::find(supported_extensions, VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME) != + supported_extensions.end()) { + LOG_WARNING(Render_Vulkan, "Blacklisting Ampere devices from float16 math"); + is_float16_supported = false; + } + } if (ext_extended_dynamic_state && driver_id == VK_DRIVER_ID_MESA_RADV) { // Mask driver version variant const u32 version = (properties.driverVersion << 3) >> 3; @@ -778,10 +795,10 @@ std::vector Device::LoadExtensions(bool requires_surface) { bool has_ext_provoking_vertex{}; bool has_ext_vertex_input_dynamic_state{}; bool has_ext_line_rasterization{}; - for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) { + for (const std::string& extension : supported_extensions) { const auto test = [&](std::optional> status, const char* name, bool push) { - if (extension.extensionName != std::string_view(name)) { + if (extension != name) { return; } if (push) { @@ -1064,12 +1081,6 @@ void Device::CollectTelemetryParameters() { driver_id = driver.driverID; vendor_name = driver.driverName; - - const std::vector extensions = physical.EnumerateDeviceExtensionProperties(); - reported_extensions.reserve(std::size(extensions)); - for (const auto& extension : extensions) { - reported_extensions.emplace_back(extension.extensionName); - } } void Device::CollectPhysicalMemoryInfo() { diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 26100166f..df394e384 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -301,7 +301,7 @@ public: /// Returns the list of available extensions. const std::vector& GetAvailableExtensions() const { - return reported_extensions; + return supported_extensions; } u64 GetDeviceLocalMemory() const { @@ -398,8 +398,8 @@ private: bool has_nsight_graphics{}; ///< Has Nsight Graphics attached // Telemetry parameters - std::string vendor_name; ///< Device's driver name. - std::vector reported_extensions; ///< Reported Vulkan extensions. + std::string vendor_name; ///< Device's driver name. + std::vector supported_extensions; ///< Reported Vulkan extensions. /// Format properties dictionary. std::unordered_map format_properties; -- cgit v1.2.3 From 55233c2861a72bd777b75bce20c8d4e46c17a72f Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Sat, 3 Jul 2021 03:07:50 -0400 Subject: vulkan_device: Add missing include algorithm --- src/video_core/vulkan_common/vulkan_device.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'src/video_core') diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 7d66a43e7..ceaee8a7e 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include #include #include #include -- cgit v1.2.3 From 11f04f1022d0820a1fdba38221ecd38f19d86d9e Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 4 Jul 2021 00:34:53 -0400 Subject: shader: Ignore global memory ops on devices lacking int64 support --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 1 + src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 1 + 2 files changed, 2 insertions(+) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 2d7eb3e33..58a4f0fb4 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -168,6 +168,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_descriptor_aliasing = false, .support_int8 = false, .support_int16 = false, + .support_int64 = device.HasShaderInt64(), .support_vertex_instance_id = true, .support_float_controls = false, .support_separate_denorm_behavior = false, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 87b843e3d..a2646fc6d 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -280,6 +280,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw .support_descriptor_aliasing = true, .support_int8 = true, .support_int16 = device.IsShaderInt16Supported(), + .support_int64 = device.IsShaderInt64Supported(), .support_vertex_instance_id = false, .support_float_controls = true, .support_separate_denorm_behavior = float_control.denormBehaviorIndependence == -- cgit v1.2.3 From be54aad1c40bb50c71e7bcd6465c2fd372c11cb7 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 7 Jul 2021 00:18:30 -0300 Subject: maxwell_to_vk: Add R16_SNORM --- src/video_core/renderer_vulkan/maxwell_to_vk.cpp | 2 +- src/video_core/vulkan_common/vulkan_device.cpp | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 8f9b9a11a..68a23b602 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -157,7 +157,7 @@ struct FormatTuple { {VK_FORMAT_R32_SFLOAT, Attachable | Storage}, // R32_FLOAT {VK_FORMAT_R16_SFLOAT, Attachable | Storage}, // R16_FLOAT {VK_FORMAT_R16_UNORM, Attachable | Storage}, // R16_UNORM - {VK_FORMAT_UNDEFINED}, // R16_SNORM + {VK_FORMAT_R16_SNORM, Attachable | Storage}, // R16_SNORM {VK_FORMAT_R16_UINT, Attachable | Storage}, // R16_UINT {VK_FORMAT_UNDEFINED}, // R16_SINT {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // R16G16_UNORM diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index ceaee8a7e..13d938434 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -116,6 +116,7 @@ std::unordered_map GetFormatProperties(vk::Physica VK_FORMAT_R16G16_SFLOAT, VK_FORMAT_R16G16_SINT, VK_FORMAT_R16_UNORM, + VK_FORMAT_R16_SNORM, VK_FORMAT_R16_UINT, VK_FORMAT_R8G8B8A8_SRGB, VK_FORMAT_R8G8_UNORM, -- cgit v1.2.3 From 8390286a89dd259f0ff44cc95fc20d017b58046f Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 9 Jul 2021 19:00:11 -0400 Subject: renderers: Disable async shader compilation The current implementation is prone to causing graphical issues. Disable until a better solution is implemented. --- src/video_core/renderer_opengl/gl_device.cpp | 6 ++++-- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 99f8769fc..563b291cd 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -182,9 +182,11 @@ Device::Device() { ? Settings::values.shader_backend.GetValue() : Settings::ShaderBackend::GLSL; + // Completely disable async shaders for now, as it causes graphical glitches + use_asynchronous_shaders = false; // Blocks AMD and Intel OpenGL drivers on Windows from using asynchronous shader compilation. - use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue() && - !(is_amd || (is_intel && !is_linux)); + // use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue() && + // !(is_amd || (is_intel && !is_linux)); use_driver_cache = is_nvidia; LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index a2646fc6d..39db35175 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -269,7 +269,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, shader_notify{shader_notify_}, - use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()}, + use_asynchronous_shaders{false}, workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:PipelineBuilder"), serialization_thread(1, "yuzu:PipelineSerialization") { const auto& float_control{device.FloatControlProperties()}; -- cgit v1.2.3 From 41493fbe89200a4a8321dec7b313872435c57df7 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 11 Jul 2021 01:04:52 -0400 Subject: renderers: Fix clang formatting --- src/video_core/renderer_opengl/renderer_opengl.cpp | 2 +- src/video_core/renderer_vulkan/renderer_vulkan.cpp | 13 +++++++++---- src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 5 ++--- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 2 +- 4 files changed, 13 insertions(+), 9 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index dab0afe6d..c9cfa6366 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -24,8 +24,8 @@ #include "video_core/host_shaders/opengl_present_frag.h" #include "video_core/host_shaders/opengl_present_vert.h" #include "video_core/renderer_opengl/gl_rasterizer.h" -#include "video_core/renderer_opengl/gl_shader_util.h" #include "video_core/renderer_opengl/gl_shader_manager.h" +#include "video_core/renderer_opengl/gl_shader_util.h" #include "video_core/renderer_opengl/renderer_opengl.h" #include "video_core/textures/decoders.h" diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 6fda06a7e..a8d04dc61 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -97,14 +97,19 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, Core::Frontend::EmuWindow& emu_window, Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, std::unique_ptr context_) try - : RendererBase(emu_window, std::move(context_)), telemetry_session(telemetry_session_), - cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary()), + : RendererBase(emu_window, std::move(context_)), + telemetry_session(telemetry_session_), + cpu_memory(cpu_memory_), + gpu(gpu_), + library(OpenLibrary()), instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, true, Settings::values.renderer_debug.GetValue())), debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr), surface(CreateSurface(instance, render_window)), - device(CreateDevice(instance, dld, *surface)), memory_allocator(device, false), - state_tracker(gpu), scheduler(device, state_tracker), + device(CreateDevice(instance, dld, *surface)), + memory_allocator(device, false), + state_tracker(gpu), + scheduler(device, state_tracker), swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width, render_window.GetFramebufferLayout().height, false), blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, scheduler, diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index f0ae0b0d6..18482e1d0 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -503,9 +503,8 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { vertex_attributes.push_back({ .location = static_cast(index), .binding = 0, - .format = type == 1 ? VK_FORMAT_R32_SFLOAT - : type == 2 ? VK_FORMAT_R32_SINT - : VK_FORMAT_R32_UINT, + .format = type == 1 ? VK_FORMAT_R32_SFLOAT + : type == 2 ? VK_FORMAT_R32_SINT : VK_FORMAT_R32_UINT, .offset = 0, }); } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 99576b826..c7a07fdd8 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -141,7 +141,7 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra pipeline_cache(*this, maxwell3d, kepler_compute, gpu_memory, device, scheduler, descriptor_pool, update_descriptor_queue, render_pass_cache, buffer_cache, texture_cache, gpu.ShaderNotify()), - query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{ buffer_cache }, + query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{buffer_cache}, fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), wfi_event(device.GetLogical().CreateEvent()) { scheduler.SetQueryCache(query_cache); -- cgit v1.2.3 From 49946cf780c317b4c5ccabb52ec433eba01c1970 Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Sun, 11 Jul 2021 22:10:38 -0400 Subject: shader_recompiler, video_core: Resolve clang errors Silences the following warnings-turned-errors: -Wsign-conversion -Wunused-private-field -Wbraced-scalar-init -Wunused-variable And some other errors --- src/video_core/renderer_vulkan/vk_graphics_pipeline.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 622267147..2bd48d697 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -104,9 +104,7 @@ public: template static auto MakeConfigureSpecFunc() { - return [](GraphicsPipeline* pipeline, bool is_indexed) { - pipeline->ConfigureImpl(is_indexed); - }; + return [](GraphicsPipeline* pl, bool is_indexed) { pl->ConfigureImpl(is_indexed); }; } private: -- cgit v1.2.3 From 8c166c68d46d160162caa9b588f1e762c57e52f4 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 11 Jul 2021 23:26:13 -0300 Subject: gl_shader_cache: Properly implement asynchronous shaders --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 58a4f0fb4..24f035c37 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -318,7 +318,7 @@ GraphicsPipeline* ShaderCache::CurrentGraphicsPipeline() { SetXfbState(graphics_key.xfb_state, regs); } if (current_pipeline && graphics_key == current_pipeline->Key()) { - return current_pipeline->IsBuilt() ? current_pipeline : nullptr; + return BuiltPipeline(current_pipeline); } return CurrentGraphicsPipelineSlowPath(); } -- cgit v1.2.3 From 94af0a00f67c9f28fcaf170458e55b7a95de76bf Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 12 Jul 2021 02:03:25 -0400 Subject: glsl: Clamp shared mem size to GL_MAX_COMPUTE_SHARED_MEMORY_SIZE --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 24f035c37..7ecafc862 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -211,6 +211,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .has_gl_component_indexing_bug = device.HasComponentIndexingBug(), .has_gl_precise_bug = device.HasPreciseBug(), .ignore_nan_fp_comparisons = true, + .gl_max_compute_smem_size = device.GetMaxComputeSharedMemorySize(), }, host_info{ .support_float16 = false, -- cgit v1.2.3 From e1ed218b418cd1ed94f6f25ccd0db86b63bd6bb5 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 14 Jul 2021 03:48:30 -0300 Subject: renderer_opengl: Use ARB_separate_shader_objects Ensures that states set for a particular stage are not attached to other stages which may not need them. --- .../renderer_opengl/gl_compute_pipeline.cpp | 10 +-- .../renderer_opengl/gl_graphics_pipeline.cpp | 62 ++++++------- .../renderer_opengl/gl_graphics_pipeline.h | 2 +- src/video_core/renderer_opengl/gl_shader_manager.h | 100 ++++++++++++++++----- src/video_core/renderer_opengl/gl_shader_util.cpp | 57 ++++++------ src/video_core/renderer_opengl/gl_shader_util.h | 6 +- src/video_core/renderer_opengl/renderer_opengl.cpp | 11 ++- src/video_core/renderer_opengl/renderer_opengl.h | 3 +- src/video_core/renderer_opengl/util_shaders.cpp | 19 ++-- 9 files changed, 154 insertions(+), 116 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp index c63e87a56..aa1cc592f 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp @@ -46,17 +46,13 @@ ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cac kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_} { switch (device.GetShaderBackend()) { case Settings::ShaderBackend::GLSL: - source_program.handle = glCreateProgram(); - AttachShader(GL_COMPUTE_SHADER, source_program.handle, code); - LinkProgram(source_program.handle); + source_program = CreateProgram(code, GL_COMPUTE_SHADER); break; case Settings::ShaderBackend::GLASM: assembly_program = CompileProgram(code, GL_COMPUTE_PROGRAM_NV); break; case Settings::ShaderBackend::SPIRV: - source_program.handle = glCreateProgram(); - AttachShader(GL_COMPUTE_SHADER, source_program.handle, code_v); - LinkProgram(source_program.handle); + source_program = CreateProgram(code_v, GL_COMPUTE_SHADER); break; } std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(), @@ -154,7 +150,7 @@ void ComputePipeline::Configure() { if (assembly_program.handle != 0) { program_manager.BindComputeAssemblyProgram(assembly_program.handle); } else { - program_manager.BindProgram(source_program.handle); + program_manager.BindComputeProgram(source_program.handle); } buffer_cache.UnbindComputeTextureBuffers(); size_t texbuf_index{}; diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index 1f19b5825..c8b2d833d 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -237,44 +237,32 @@ GraphicsPipeline::GraphicsPipeline( if (key.xfb_enabled && device.UseAssemblyShaders()) { GenerateTransformFeedbackState(); } - auto func{ - [this, device, sources, sources_spirv, shader_notify](ShaderContext::Context*) mutable { - if (!device.UseAssemblyShaders()) { - program.handle = glCreateProgram(); - } - for (size_t stage = 0; stage < 5; ++stage) { - switch (device.GetShaderBackend()) { - case Settings::ShaderBackend::GLSL: { - const auto code{sources[stage]}; - if (code.empty()) { - continue; - } - AttachShader(Stage(stage), program.handle, code); - } break; - case Settings::ShaderBackend::GLASM: { - const auto code{sources[stage]}; - if (code.empty()) { - continue; - } - assembly_programs[stage] = CompileProgram(code, AssemblyStage(stage)); - } break; - case Settings::ShaderBackend::SPIRV: { - const auto code{sources_spirv[stage]}; - if (code.empty()) { - continue; - } - AttachShader(Stage(stage), program.handle, code); - } break; + auto func{[this, device, sources, sources_spirv, + shader_notify](ShaderContext::Context*) mutable { + for (size_t stage = 0; stage < 5; ++stage) { + switch (device.GetShaderBackend()) { + case Settings::ShaderBackend::GLSL: + if (!sources[stage].empty()) { + source_programs[stage] = CreateProgram(sources[stage], Stage(stage)); } + break; + case Settings::ShaderBackend::GLASM: + if (!sources[stage].empty()) { + assembly_programs[stage] = CompileProgram(sources[stage], AssemblyStage(stage)); + } + break; + case Settings::ShaderBackend::SPIRV: + if (!sources_spirv[stage].empty()) { + source_programs[stage] = CreateProgram(sources_spirv[stage], Stage(stage)); + } + break; } - if (!device.UseAssemblyShaders()) { - LinkProgram(program.handle); - } - if (shader_notify) { - shader_notify->MarkShaderComplete(); - } - is_built.store(true, std::memory_order_relaxed); - }}; + } + if (shader_notify) { + shader_notify->MarkShaderComplete(); + } + is_built = true; + }}; if (thread_worker) { thread_worker->QueueWork(std::move(func)); } else { @@ -449,7 +437,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { if (assembly_programs[0].handle != 0) { program_manager.BindAssemblyPrograms(assembly_programs, enabled_stages_mask); } else { - program_manager.BindProgram(program.handle); + program_manager.BindSourcePrograms(source_programs); } const ImageId* views_it{image_view_ids.data()}; GLsizei texture_binding = 0; diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index 5f5d57385..5e34b9537 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -129,7 +129,7 @@ private: void (*configure_func)(GraphicsPipeline*, bool){}; - OGLProgram program; + std::array source_programs; std::array assembly_programs; u32 enabled_stages_mask{}; diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 88b734bcb..d7ef0775d 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -24,34 +24,68 @@ class ProgramManager { public: explicit ProgramManager(const Device& device) { + glCreateProgramPipelines(1, &pipeline.handle); if (device.UseAssemblyShaders()) { glEnable(GL_COMPUTE_PROGRAM_NV); } } - void BindProgram(GLuint program) { - if (current_source_program == program) { - return; - } - current_source_program = program; + void BindComputeProgram(GLuint program) { glUseProgram(program); + is_compute_bound = true; } void BindComputeAssemblyProgram(GLuint program) { - if (current_compute_assembly_program != program) { - current_compute_assembly_program = program; + if (current_assembly_compute_program != program) { + current_assembly_compute_program = program; glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program); } - if (current_source_program != 0) { - current_source_program = 0; - glUseProgram(0); + UnbindPipeline(); + } + + void BindSourcePrograms(std::span programs) { + static constexpr std::array stage_enums{ + GL_VERTEX_SHADER_BIT, GL_TESS_CONTROL_SHADER_BIT, GL_TESS_EVALUATION_SHADER_BIT, + GL_GEOMETRY_SHADER_BIT, GL_FRAGMENT_SHADER_BIT, + }; + for (size_t stage = 0; stage < NUM_STAGES; ++stage) { + if (current_programs[stage] != programs[stage].handle) { + current_programs[stage] = programs[stage].handle; + glUseProgramStages(pipeline.handle, stage_enums[stage], programs[stage].handle); + } + } + BindPipeline(); + } + + void BindPresentPrograms(GLuint vertex, GLuint fragment) { + if (current_programs[0] != vertex) { + current_programs[0] = vertex; + glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex); + } + if (current_programs[4] != fragment) { + current_programs[4] = fragment; + glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment); + } + glUseProgramStages( + pipeline.handle, + GL_TESS_CONTROL_SHADER_BIT | GL_TESS_EVALUATION_SHADER_BIT | GL_GEOMETRY_SHADER_BIT, 0); + current_programs[1] = 0; + current_programs[2] = 0; + current_programs[3] = 0; + + if (current_stage_mask != 0) { + current_stage_mask = 0; + for (const GLenum program_type : ASSEMBLY_PROGRAM_ENUMS) { + glDisable(program_type); + } } + BindPipeline(); } void BindAssemblyPrograms(std::span programs, u32 stage_mask) { - const u32 changed_mask = current_assembly_mask ^ stage_mask; - current_assembly_mask = stage_mask; + const u32 changed_mask = current_stage_mask ^ stage_mask; + current_stage_mask = stage_mask; if (changed_mask != 0) { for (size_t stage = 0; stage < NUM_STAGES; ++stage) { @@ -65,25 +99,47 @@ public: } } for (size_t stage = 0; stage < NUM_STAGES; ++stage) { - if (current_assembly_programs[stage] != programs[stage].handle) { - current_assembly_programs[stage] = programs[stage].handle; + if (current_programs[stage] != programs[stage].handle) { + current_programs[stage] = programs[stage].handle; glBindProgramARB(ASSEMBLY_PROGRAM_ENUMS[stage], programs[stage].handle); } } - if (current_source_program != 0) { - current_source_program = 0; - glUseProgram(0); - } + UnbindPipeline(); } void RestoreGuestCompute() {} private: - GLuint current_source_program = 0; + void BindPipeline() { + if (!is_pipeline_bound) { + is_pipeline_bound = true; + glBindProgramPipeline(pipeline.handle); + } + UnbindCompute(); + } + + void UnbindPipeline() { + if (is_pipeline_bound) { + is_pipeline_bound = false; + glBindProgramPipeline(0); + } + UnbindCompute(); + } + + void UnbindCompute() { + if (is_compute_bound) { + is_compute_bound = false; + glUseProgram(0); + } + } + + OGLPipeline pipeline; + bool is_pipeline_bound{}; + bool is_compute_bound{}; - u32 current_assembly_mask = 0; - std::array current_assembly_programs{}; - GLuint current_compute_assembly_program = 0; + u32 current_stage_mask = 0; + std::array current_programs{}; + GLuint current_assembly_compute_program = 0; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp index 5109985f1..d432072ad 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.cpp +++ b/src/video_core/renderer_opengl/gl_shader_util.cpp @@ -13,6 +13,33 @@ namespace OpenGL { +static OGLProgram LinkSeparableProgram(GLuint shader) { + OGLProgram program; + program.handle = glCreateProgram(); + glProgramParameteri(program.handle, GL_PROGRAM_SEPARABLE, GL_TRUE); + glAttachShader(program.handle, shader); + glLinkProgram(program.handle); + if (!Settings::values.renderer_debug) { + return program; + } + GLint link_status{}; + glGetProgramiv(program.handle, GL_LINK_STATUS, &link_status); + + GLint log_length{}; + glGetProgramiv(program.handle, GL_INFO_LOG_LENGTH, &log_length); + if (log_length == 0) { + return program; + } + std::string log(log_length, 0); + glGetProgramInfoLog(program.handle, log_length, nullptr, log.data()); + if (link_status == GL_FALSE) { + LOG_ERROR(Render_OpenGL, "{}", log); + } else { + LOG_WARNING(Render_OpenGL, "{}", log); + } + return program; +} + static void LogShader(GLuint shader, std::string_view code = {}) { GLint shader_status{}; glGetShaderiv(shader, GL_COMPILE_STATUS, &shader_status); @@ -36,7 +63,7 @@ static void LogShader(GLuint shader, std::string_view code = {}) { } } -void AttachShader(GLenum stage, GLuint program, std::string_view code) { +OGLProgram CreateProgram(std::string_view code, GLenum stage) { OGLShader shader; shader.handle = glCreateShader(stage); @@ -44,45 +71,23 @@ void AttachShader(GLenum stage, GLuint program, std::string_view code) { const GLchar* const code_ptr = code.data(); glShaderSource(shader.handle, 1, &code_ptr, &length); glCompileShader(shader.handle); - glAttachShader(program, shader.handle); if (Settings::values.renderer_debug) { LogShader(shader.handle, code); } + return LinkSeparableProgram(shader.handle); } -void AttachShader(GLenum stage, GLuint program, std::span code) { +OGLProgram CreateProgram(std::span code, GLenum stage) { OGLShader shader; shader.handle = glCreateShader(stage); glShaderBinary(1, &shader.handle, GL_SHADER_BINARY_FORMAT_SPIR_V_ARB, code.data(), static_cast(code.size_bytes())); glSpecializeShader(shader.handle, "main", 0, nullptr, nullptr); - glAttachShader(program, shader.handle); if (Settings::values.renderer_debug) { LogShader(shader.handle); } -} - -void LinkProgram(GLuint program) { - glLinkProgram(program); - if (!Settings::values.renderer_debug) { - return; - } - GLint link_status{}; - glGetProgramiv(program, GL_LINK_STATUS, &link_status); - - GLint log_length{}; - glGetProgramiv(program, GL_INFO_LOG_LENGTH, &log_length); - if (log_length == 0) { - return; - } - std::string log(log_length, 0); - glGetProgramInfoLog(program, log_length, nullptr, log.data()); - if (link_status == GL_FALSE) { - LOG_ERROR(Render_OpenGL, "{}", log); - } else { - LOG_WARNING(Render_OpenGL, "{}", log); - } + return LinkSeparableProgram(shader.handle); } OGLAssemblyProgram CompileProgram(std::string_view code, GLenum target) { diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h index ff5aa024f..4e1a2a8e1 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.h +++ b/src/video_core/renderer_opengl/gl_shader_util.h @@ -17,11 +17,9 @@ namespace OpenGL { -void AttachShader(GLenum stage, GLuint program, std::string_view code); +OGLProgram CreateProgram(std::string_view code, GLenum stage); -void AttachShader(GLenum stage, GLuint program, std::span code); - -void LinkProgram(GLuint program); +OGLProgram CreateProgram(std::span code, GLenum stage); OGLAssemblyProgram CompileProgram(std::string_view code, GLenum target); diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index c9cfa6366..d15167e19 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -251,10 +251,8 @@ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color void RendererOpenGL::InitOpenGLObjects() { // Create shader programs - present_program.handle = glCreateProgram(); - AttachShader(GL_VERTEX_SHADER, present_program.handle, HostShaders::OPENGL_PRESENT_VERT); - AttachShader(GL_FRAGMENT_SHADER, present_program.handle, HostShaders::OPENGL_PRESENT_FRAG); - LinkProgram(present_program.handle); + present_vertex = CreateProgram(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER); + present_fragment = CreateProgram(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER); // Generate presentation sampler present_sampler.Create(); @@ -340,8 +338,9 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { // Set projection matrix const std::array ortho_matrix = MakeOrthographicMatrix(static_cast(layout.width), static_cast(layout.height)); - program_manager.BindProgram(present_program.handle); - glUniformMatrix3x2fv(ModelViewMatrixLocation, 1, GL_FALSE, ortho_matrix.data()); + program_manager.BindPresentPrograms(present_vertex.handle, present_fragment.handle); + glProgramUniformMatrix3x2fv(present_vertex.handle, ModelViewMatrixLocation, 1, GL_FALSE, + ortho_matrix.data()); const auto& texcoords = screen_info.display_texcoords; auto left = texcoords.left; diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index b3ee55665..d455f572f 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -110,7 +110,8 @@ private: // OpenGL object IDs OGLSampler present_sampler; OGLBuffer vertex_buffer; - OGLProgram present_program; + OGLProgram present_vertex; + OGLProgram present_fragment; OGLFramebuffer screenshot_framebuffer; // GPU address of the vertex buffer diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index 8aa0683c8..37a4d1d9d 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp @@ -42,12 +42,7 @@ using VideoCore::Surface::BytesPerBlock; namespace { OGLProgram MakeProgram(std::string_view source) { - OGLProgram program; - OGLShader shader; - program.handle = glCreateProgram(); - AttachShader(GL_COMPUTE_SHADER, program.handle, source); - LinkProgram(program.handle); - return program; + return CreateProgram(source, GL_COMPUTE_SHADER); } size_t NumPixelsInCopy(const VideoCommon::ImageCopy& copy) { @@ -84,7 +79,7 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, .width = VideoCore::Surface::DefaultBlockWidth(image.info.format), .height = VideoCore::Surface::DefaultBlockHeight(image.info.format), }; - program_manager.BindProgram(astc_decoder_program.handle); + program_manager.BindComputeProgram(astc_decoder_program.handle); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_ENC_BUFFER, astc_buffer.handle); @@ -132,7 +127,7 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, static constexpr GLuint BINDING_INPUT_BUFFER = 1; static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; - program_manager.BindProgram(block_linear_unswizzle_2d_program.handle); + program_manager.BindComputeProgram(block_linear_unswizzle_2d_program.handle); glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); @@ -171,7 +166,7 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); - program_manager.BindProgram(block_linear_unswizzle_3d_program.handle); + program_manager.BindComputeProgram(block_linear_unswizzle_3d_program.handle); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format)); @@ -220,7 +215,7 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, UNIMPLEMENTED_IF_MSG(!std::has_single_bit(bytes_per_block), "Non-power of two images are not implemented"); - program_manager.BindProgram(pitch_unswizzle_program.handle); + program_manager.BindComputeProgram(pitch_unswizzle_program.handle); glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); glUniform2ui(LOC_ORIGIN, 0, 0); glUniform2i(LOC_DESTINATION, 0, 0); @@ -248,7 +243,7 @@ void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span Date: Sat, 17 Jul 2021 00:59:57 -0400 Subject: gl_device: Simplify GLASM setting logic --- src/video_core/renderer_opengl/gl_device.cpp | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 563b291cd..6afe6c1e1 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -172,16 +172,14 @@ Device::Device() { // uniform buffers as "push constants" has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data; - use_assembly_shaders = - Settings::values.shader_backend.GetValue() == Settings::ShaderBackend::GLASM && - GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback && - GLAD_GL_NV_transform_feedback2; - - shader_backend = (Settings::values.shader_backend.GetValue() == - Settings::ShaderBackend::GLASM) == use_assembly_shaders - ? Settings::values.shader_backend.GetValue() - : Settings::ShaderBackend::GLSL; - + shader_backend = Settings::values.shader_backend.GetValue(); + use_assembly_shaders = shader_backend == Settings::ShaderBackend::GLASM && + GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 && + GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2; + if (shader_backend == Settings::ShaderBackend::GLASM && !use_assembly_shaders) { + LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported"); + shader_backend = Settings::ShaderBackend::GLSL; + } // Completely disable async shaders for now, as it causes graphical glitches use_asynchronous_shaders = false; // Blocks AMD and Intel OpenGL drivers on Windows from using asynchronous shader compilation. @@ -194,11 +192,6 @@ Device::Device() { LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug); LOG_INFO(Render_OpenGL, "Renderer_BrokenTextureViewFormats: {}", has_broken_texture_view_formats); - - if (shader_backend == Settings::ShaderBackend::GLASM && !use_assembly_shaders) { - LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported"); - } - if (Settings::values.use_asynchronous_shaders.GetValue() && !use_asynchronous_shaders) { LOG_WARNING(Render_OpenGL, "Asynchronous shader compilation enabled but not supported"); } -- cgit v1.2.3 From 56478bc9ac5a01ca5c73ba72faae1a5eaae0f8cb Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 17 Jul 2021 16:16:23 -0400 Subject: shader: Fix disabled attribute default values --- src/video_core/renderer_opengl/renderer_opengl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index d15167e19..285e78384 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -145,7 +145,7 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, GLint max_attribs{}; glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &max_attribs); for (GLint attrib = 0; attrib < max_attribs; ++attrib) { - glVertexAttrib4f(attrib, 0.0f, 0.0f, 0.0f, 0.0f); + glVertexAttrib4f(attrib, 0.0f, 0.0f, 0.0f, 1.0f); } // Enable seamless cubemaps when per texture parameters are not available if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) { -- cgit v1.2.3 From 258f35515d61d01049d2e433146cab808837bb7d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 18 Jul 2021 21:07:12 -0300 Subject: shader_environment: Receive cache version from outside This allows us invalidating OpenGL and Vulkan separately in the future. --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 10 +++++++--- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 9 ++++++--- src/video_core/shader_environment.cpp | 11 +++++------ src/video_core/shader_environment.h | 9 +++++---- 4 files changed, 23 insertions(+), 16 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 7ecafc862..8d6cc074c 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -48,9 +48,12 @@ using VideoCommon::ComputeEnvironment; using VideoCommon::FileEnvironment; using VideoCommon::GenericEnvironment; using VideoCommon::GraphicsEnvironment; +using VideoCommon::LoadPipelines; using VideoCommon::SerializePipeline; using Context = ShaderContext::Context; +constexpr u32 CACHE_VERSION = 5; + template auto MakeSpan(Container& container) { return std::span(container.data(), container.size()); @@ -287,7 +290,7 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, }); ++state.total; }}; - VideoCommon::LoadPipelines(stop_loading, shader_cache_filename, load_compute, load_graphics); + LoadPipelines(stop_loading, shader_cache_filename, CACHE_VERSION, load_compute, load_graphics); std::unique_lock lock{state.mutex}; callback(VideoCore::LoadCallbackStage::Build, 0, state.total); @@ -394,7 +397,7 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline() { env_ptrs.push_back(&environments.envs[index]); } } - SerializePipeline(graphics_key, env_ptrs, shader_cache_filename); + SerializePipeline(graphics_key, env_ptrs, shader_cache_filename, CACHE_VERSION); return pipeline; } @@ -492,7 +495,8 @@ std::unique_ptr ShaderCache::CreateComputePipeline( if (!pipeline || shader_cache_filename.empty()) { return pipeline; } - SerializePipeline(key, std::array{&env}, shader_cache_filename); + SerializePipeline(key, std::array{&env}, shader_cache_filename, + CACHE_VERSION); return pipeline; } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 39db35175..2ce8b4156 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -54,6 +54,8 @@ using VideoCommon::FileEnvironment; using VideoCommon::GenericEnvironment; using VideoCommon::GraphicsEnvironment; +constexpr u32 CACHE_VERSION = 5; + template auto MakeSpan(Container& container) { return std::span(container.data(), container.size()); @@ -434,7 +436,8 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading }); ++state.total; }}; - VideoCommon::LoadPipelines(stop_loading, pipeline_cache_filename, load_compute, load_graphics); + VideoCommon::LoadPipelines(stop_loading, pipeline_cache_filename, CACHE_VERSION, load_compute, + load_graphics); std::unique_lock lock{state.mutex}; callback(VideoCore::LoadCallbackStage::Build, 0, state.total); @@ -562,7 +565,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline() { env_ptrs.push_back(&envs[index]); } } - SerializePipeline(key, env_ptrs, pipeline_cache_filename); + SerializePipeline(key, env_ptrs, pipeline_cache_filename, CACHE_VERSION); }); return pipeline; } @@ -581,7 +584,7 @@ std::unique_ptr PipelineCache::CreateComputePipeline( } serialization_thread.QueueWork([this, key, env = std::move(env)] { SerializePipeline(key, std::array{&env}, - pipeline_cache_filename); + pipeline_cache_filename, CACHE_VERSION); }); return pipeline; } diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp index 429cab30d..8a4581c19 100644 --- a/src/video_core/shader_environment.cpp +++ b/src/video_core/shader_environment.cpp @@ -22,7 +22,6 @@ namespace VideoCommon { constexpr std::array MAGIC_NUMBER{'y', 'u', 'z', 'u', 'c', 'a', 'c', 'h'}; -constexpr u32 CACHE_VERSION = 5; constexpr size_t INST_SIZE = sizeof(u64); @@ -370,7 +369,7 @@ std::array FileEnvironment::WorkgroupSize() const { } void SerializePipeline(std::span key, std::span envs, - const std::filesystem::path& filename) try { + const std::filesystem::path& filename, u32 cache_version) try { std::ofstream file(filename, std::ios::binary | std::ios::ate | std::ios::app); file.exceptions(std::ifstream::failbit); if (!file.is_open()) { @@ -381,7 +380,7 @@ void SerializePipeline(std::span key, std::span(&CACHE_VERSION), sizeof(CACHE_VERSION)); + .write(reinterpret_cast(&cache_version), sizeof(cache_version)); } if (!std::ranges::all_of(envs, &GenericEnvironment::CanBeSerialized)) { return; @@ -402,7 +401,7 @@ void SerializePipeline(std::span key, std::span load_compute, Common::UniqueFunction> load_graphics) try { std::ifstream file(filename, std::ios::binary | std::ios::ate); @@ -417,13 +416,13 @@ void LoadPipelines( u32 cache_version; file.read(magic_number.data(), magic_number.size()) .read(reinterpret_cast(&cache_version), sizeof(cache_version)); - if (magic_number != MAGIC_NUMBER || cache_version != CACHE_VERSION) { + if (magic_number != MAGIC_NUMBER || cache_version != expected_cache_version) { file.close(); if (Common::FS::RemoveFile(filename)) { if (magic_number != MAGIC_NUMBER) { LOG_ERROR(Common_Filesystem, "Invalid pipeline cache file"); } - if (cache_version != CACHE_VERSION) { + if (cache_version != expected_cache_version) { LOG_INFO(Common_Filesystem, "Deleting old pipeline cache"); } } else { diff --git a/src/video_core/shader_environment.h b/src/video_core/shader_environment.h index d26dbfaab..2079979db 100644 --- a/src/video_core/shader_environment.h +++ b/src/video_core/shader_environment.h @@ -164,18 +164,19 @@ private: }; void SerializePipeline(std::span key, std::span envs, - const std::filesystem::path& filename); + const std::filesystem::path& filename, u32 cache_version); template -void SerializePipeline(const Key& key, const Envs& envs, const std::filesystem::path& filename) { +void SerializePipeline(const Key& key, const Envs& envs, const std::filesystem::path& filename, + u32 cache_version) { static_assert(std::is_trivially_copyable_v); static_assert(std::has_unique_object_representations_v); SerializePipeline(std::span(reinterpret_cast(&key), sizeof(key)), - std::span(envs.data(), envs.size()), filename); + std::span(envs.data(), envs.size()), filename, cache_version); } void LoadPipelines( - std::stop_token stop_loading, const std::filesystem::path& filename, + std::stop_token stop_loading, const std::filesystem::path& filename, u32 expected_cache_version, Common::UniqueFunction load_compute, Common::UniqueFunction> load_graphics); -- cgit v1.2.3 From 8381490a04f4618ec5be90904815b409e3f4ca59 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 20 Jul 2021 03:05:41 -0300 Subject: opengl: Fix asynchronous shaders Wait for shader to build before configuring it, and wait for the shader to build before sharing it with other contexts. --- .../renderer_opengl/gl_graphics_pipeline.cpp | 30 +++++++++++++++++++--- .../renderer_opengl/gl_graphics_pipeline.h | 7 ++++- 2 files changed, 33 insertions(+), 4 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index c8b2d833d..fac0034fb 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -237,10 +237,12 @@ GraphicsPipeline::GraphicsPipeline( if (key.xfb_enabled && device.UseAssemblyShaders()) { GenerateTransformFeedbackState(); } - auto func{[this, device, sources, sources_spirv, - shader_notify](ShaderContext::Context*) mutable { + const bool in_parallel = thread_worker != nullptr; + const auto backend = device.GetShaderBackend(); + auto func{[this, sources = std::move(sources), sources_spirv = std::move(sources_spirv), + shader_notify, backend, in_parallel](ShaderContext::Context*) mutable { for (size_t stage = 0; stage < 5; ++stage) { - switch (device.GetShaderBackend()) { + switch (backend) { case Settings::ShaderBackend::GLSL: if (!sources[stage].empty()) { source_programs[stage] = CreateProgram(sources[stage], Stage(stage)); @@ -249,6 +251,10 @@ GraphicsPipeline::GraphicsPipeline( case Settings::ShaderBackend::GLASM: if (!sources[stage].empty()) { assembly_programs[stage] = CompileProgram(sources[stage], AssemblyStage(stage)); + if (in_parallel) { + // Make sure program is built before continuing when building in parallel + glGetString(GL_PROGRAM_ERROR_STRING_NV); + } } break; case Settings::ShaderBackend::SPIRV: @@ -258,10 +264,20 @@ GraphicsPipeline::GraphicsPipeline( break; } } + if (in_parallel && backend != Settings::ShaderBackend::GLASM) { + // Make sure programs have built if we are building shaders in parallel + for (OGLProgram& program : source_programs) { + if (program.handle != 0) { + GLint status{}; + glGetProgramiv(program.handle, GL_LINK_STATUS, &status); + } + } + } if (shader_notify) { shader_notify->MarkShaderComplete(); } is_built = true; + built_condvar.notify_one(); }}; if (thread_worker) { thread_worker->QueueWork(std::move(func)); @@ -434,6 +450,9 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { buffer_cache.UpdateGraphicsBuffers(is_indexed); buffer_cache.BindHostGeometryBuffers(is_indexed); + if (!is_built.load(std::memory_order::relaxed)) { + WaitForBuild(); + } if (assembly_programs[0].handle != 0) { program_manager.BindAssemblyPrograms(assembly_programs, enabled_stages_mask); } else { @@ -545,4 +564,9 @@ void GraphicsPipeline::GenerateTransformFeedbackState() { num_xfb_strides = static_cast(current_stream - xfb_streams.data()); } +void GraphicsPipeline::WaitForBuild() { + std::unique_lock lock{built_mutex}; + built_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); }); +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index 5e34b9537..4e28d9a42 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -119,6 +119,8 @@ private: void GenerateTransformFeedbackState(); + void WaitForBuild(); + TextureCache& texture_cache; BufferCache& buffer_cache; Tegra::MemoryManager& gpu_memory; @@ -143,13 +145,16 @@ private: bool use_storage_buffers{}; bool writes_global_memory{}; - std::atomic_bool is_built{false}; static constexpr std::size_t XFB_ENTRY_STRIDE = 3; GLsizei num_xfb_attribs{}; GLsizei num_xfb_strides{}; std::array xfb_attribs{}; std::array xfb_streams{}; + + std::mutex built_mutex; + std::condition_variable built_condvar; + std::atomic_bool is_built{false}; }; } // namespace OpenGL -- cgit v1.2.3 From 3c6d440015d7ffb81eedbfcd7ee1aab1ea87ee2a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 20 Jul 2021 03:08:06 -0300 Subject: Revert "renderers: Disable async shader compilation" This reverts commit 4a152767286717fa69bfc94846a124a366f70065. --- src/video_core/renderer_opengl/gl_device.cpp | 6 ++---- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 6afe6c1e1..9692b8e94 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -180,11 +180,9 @@ Device::Device() { LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported"); shader_backend = Settings::ShaderBackend::GLSL; } - // Completely disable async shaders for now, as it causes graphical glitches - use_asynchronous_shaders = false; // Blocks AMD and Intel OpenGL drivers on Windows from using asynchronous shader compilation. - // use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue() && - // !(is_amd || (is_intel && !is_linux)); + use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue() && + !(is_amd || (is_intel && !is_linux)); use_driver_cache = is_nvidia; LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 2ce8b4156..57b163247 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -271,7 +271,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, shader_notify{shader_notify_}, - use_asynchronous_shaders{false}, + use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()}, workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:PipelineBuilder"), serialization_thread(1, "yuzu:PipelineSerialization") { const auto& float_control{device.FloatControlProperties()}; -- cgit v1.2.3 From f6796cad9c4259aa13aab1f8b2e27392e07432b6 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 20 Jul 2021 21:51:10 -0300 Subject: vulkan_device: Blacklist Volta and older from VK_KHR_push_descriptor Causes crashes on Link's Awakening intro. It's hard to debug if it's our fault due to bugs in validation layers. --- src/video_core/vulkan_common/vulkan_device.cpp | 43 +++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 4 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 13d938434..44afdc1cd 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -34,6 +34,12 @@ constexpr std::array DEPTH16_UNORM_STENCIL8_UINT{ }; } // namespace Alternatives +enum class NvidiaArchitecture { + AmpereOrNewer, + Turing, + VoltaOrOlder, +}; + constexpr std::array REQUIRED_EXTENSIONS{ VK_KHR_MAINTENANCE1_EXTENSION_NAME, VK_KHR_STORAGE_BUFFER_STORAGE_CLASS_EXTENSION_NAME, @@ -198,13 +204,34 @@ std::unordered_map GetFormatProperties(vk::Physica std::vector GetSupportedExtensions(vk::PhysicalDevice physical) { const std::vector extensions = physical.EnumerateDeviceExtensionProperties(); - std::vector supported_extensions(std::size(extensions)); + std::vector supported_extensions; + supported_extensions.reserve(extensions.size()); for (const auto& extension : extensions) { supported_extensions.emplace_back(extension.extensionName); } return supported_extensions; } +NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical, + std::span exts) { + if (std::ranges::find(exts, VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME) != exts.end()) { + VkPhysicalDeviceFragmentShadingRatePropertiesKHR shading_rate_props{}; + shading_rate_props.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR; + VkPhysicalDeviceProperties2KHR physical_properties{}; + physical_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR; + physical_properties.pNext = &shading_rate_props; + physical.GetProperties2KHR(physical_properties); + if (shading_rate_props.primitiveFragmentShadingRateWithMultipleViewports) { + // Only Ampere and newer support this feature + return NvidiaArchitecture::AmpereOrNewer; + } + } + if (std::ranges::find(exts, VK_NV_SHADING_RATE_IMAGE_EXTENSION_NAME) != exts.end()) { + return NvidiaArchitecture::Turing; + } + return NvidiaArchitecture::VoltaOrOlder; +} } // Anonymous namespace Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR surface, @@ -522,11 +549,19 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR CollectTelemetryParameters(); CollectToolingInfo(); - if (driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR && is_float16_supported) { - if (std::ranges::find(supported_extensions, VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME) != - supported_extensions.end()) { + if (driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR) { + const auto arch = GetNvidiaArchitecture(physical, supported_extensions); + switch (arch) { + case NvidiaArchitecture::AmpereOrNewer: LOG_WARNING(Render_Vulkan, "Blacklisting Ampere devices from float16 math"); is_float16_supported = false; + break; + case NvidiaArchitecture::Turing: + break; + case NvidiaArchitecture::VoltaOrOlder: + LOG_WARNING(Render_Vulkan, "Blacklisting Volta and older from VK_KHR_push_descriptor"); + khr_push_descriptor = false; + break; } } if (ext_extended_dynamic_state && driver_id == VK_DRIVER_ID_MESA_RADV) { -- cgit v1.2.3 From a55ff22900c5261915eb8b88f2c0f18a4eb6f30f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 20 Jul 2021 21:52:29 -0300 Subject: vulkan/blit_image: Commit descriptor sets within worker thread Fixes race condition caused. The descriptor pool is not thread safe, so we have to commit descriptor sets within the same thread. --- src/video_core/renderer_vulkan/blit_image.cpp | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index 4058f62cd..6c1b2f063 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -376,11 +376,11 @@ void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, const ImageV const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D); const VkSampler sampler = is_linear ? *linear_sampler : *nearest_sampler; const VkPipeline pipeline = FindOrEmplacePipeline(key); - const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); scheduler.RequestRenderpass(dst_framebuffer); - scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_view, descriptor_set, - &device = device](vk::CommandBuffer cmdbuf) { + scheduler.Record([this, dst_region, src_region, pipeline, layout, sampler, + src_view](vk::CommandBuffer cmdbuf) { // TODO: Barriers + const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view); cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, @@ -402,12 +402,11 @@ void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer, const VkPipelineLayout layout = *two_textures_pipeline_layout; const VkSampler sampler = *nearest_sampler; const VkPipeline pipeline = BlitDepthStencilPipeline(dst_framebuffer->RenderPass()); - const VkDescriptorSet descriptor_set = two_textures_descriptor_allocator.Commit(); scheduler.RequestRenderpass(dst_framebuffer); scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_depth_view, - src_stencil_view, descriptor_set, - &device = device](vk::CommandBuffer cmdbuf) { + src_stencil_view, this](vk::CommandBuffer cmdbuf) { // TODO: Barriers + const VkDescriptorSet descriptor_set = two_textures_descriptor_allocator.Commit(); UpdateTwoTexturesDescriptorSet(device, descriptor_set, sampler, src_depth_view, src_stencil_view); cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); @@ -448,14 +447,12 @@ void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_frameb const VkPipelineLayout layout = *one_texture_pipeline_layout; const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D); const VkSampler sampler = *nearest_sampler; - const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); const VkExtent2D extent{ .width = src_image_view.size.width, .height = src_image_view.size.height, }; scheduler.RequestRenderpass(dst_framebuffer); - scheduler.Record([pipeline, layout, sampler, src_view, descriptor_set, extent, - &device = device](vk::CommandBuffer cmdbuf) { + scheduler.Record([pipeline, layout, sampler, src_view, extent, this](vk::CommandBuffer cmdbuf) { const VkOffset2D offset{ .x = 0, .y = 0, @@ -476,6 +473,7 @@ void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_frameb .tex_scale = {viewport.width, viewport.height}, .tex_offset = {0.0f, 0.0f}, }; + const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view); // TODO: Barriers -- cgit v1.2.3