diff options
Diffstat (limited to 'src/video_core/renderer_vulkan')
54 files changed, 1898 insertions, 1835 deletions
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index d1f0ea932..81a39a3b8 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -40,7 +40,6 @@ constexpr std::array POLYGON_OFFSET_ENABLE_LUT = { } // Anonymous namespace void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_state) { - const auto& clip = regs.view_volume_clip_control; const std::array enabled_lut = {regs.polygon_offset_point_enable, regs.polygon_offset_line_enable, regs.polygon_offset_fill_enable}; diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index d7f1ae89f..d22de1d81 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -78,9 +78,10 @@ VkSamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode w case Tegra::Texture::WrapMode::MirrorOnceBorder: UNIMPLEMENTED(); return VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE; + default: + UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode)); + return {}; } - UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode)); - return {}; } VkCompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func) { @@ -117,90 +118,101 @@ struct FormatTuple { VkFormat format; ///< Vulkan format int usage = 0; ///< Describes image format usage } constexpr tex_format_tuples[] = { - {VK_FORMAT_A8B8G8R8_UNORM_PACK32, Attachable | Storage}, // ABGR8U - {VK_FORMAT_A8B8G8R8_SNORM_PACK32, Attachable | Storage}, // ABGR8S - {VK_FORMAT_A8B8G8R8_UINT_PACK32, Attachable | Storage}, // ABGR8UI - {VK_FORMAT_B5G6R5_UNORM_PACK16}, // B5G6R5U - {VK_FORMAT_A2B10G10R10_UNORM_PACK32, Attachable | Storage}, // A2B10G10R10U - {VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable}, // A1B5G5R5U (flipped with swizzle) - {VK_FORMAT_R8_UNORM, Attachable | Storage}, // R8U - {VK_FORMAT_R8_UINT, Attachable | Storage}, // R8UI - {VK_FORMAT_R16G16B16A16_SFLOAT, Attachable | Storage}, // RGBA16F - {VK_FORMAT_R16G16B16A16_UNORM, Attachable | Storage}, // RGBA16U - {VK_FORMAT_R16G16B16A16_SNORM, Attachable | Storage}, // RGBA16S - {VK_FORMAT_R16G16B16A16_UINT, Attachable | Storage}, // RGBA16UI - {VK_FORMAT_B10G11R11_UFLOAT_PACK32, Attachable | Storage}, // R11FG11FB10F - {VK_FORMAT_R32G32B32A32_UINT, Attachable | Storage}, // RGBA32UI - {VK_FORMAT_BC1_RGBA_UNORM_BLOCK}, // DXT1 - {VK_FORMAT_BC2_UNORM_BLOCK}, // DXT23 - {VK_FORMAT_BC3_UNORM_BLOCK}, // DXT45 - {VK_FORMAT_BC4_UNORM_BLOCK}, // DXN1 - {VK_FORMAT_BC5_UNORM_BLOCK}, // DXN2UNORM - {VK_FORMAT_BC5_SNORM_BLOCK}, // DXN2SNORM - {VK_FORMAT_BC7_UNORM_BLOCK}, // BC7U - {VK_FORMAT_BC6H_UFLOAT_BLOCK}, // BC6H_UF16 - {VK_FORMAT_BC6H_SFLOAT_BLOCK}, // BC6H_SF16 - {VK_FORMAT_ASTC_4x4_UNORM_BLOCK}, // ASTC_2D_4X4 - {VK_FORMAT_B8G8R8A8_UNORM, Attachable}, // BGRA8 - {VK_FORMAT_R32G32B32A32_SFLOAT, Attachable | Storage}, // RGBA32F - {VK_FORMAT_R32G32_SFLOAT, Attachable | Storage}, // RG32F - {VK_FORMAT_R32_SFLOAT, Attachable | Storage}, // R32F - {VK_FORMAT_R16_SFLOAT, Attachable | Storage}, // R16F - {VK_FORMAT_R16_UNORM, Attachable | Storage}, // R16U - {VK_FORMAT_UNDEFINED}, // R16S - {VK_FORMAT_R16_UINT, Attachable | Storage}, // R16UI - {VK_FORMAT_UNDEFINED}, // R16I - {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // RG16 - {VK_FORMAT_R16G16_SFLOAT, Attachable | Storage}, // RG16F - {VK_FORMAT_UNDEFINED}, // RG16UI - {VK_FORMAT_UNDEFINED}, // RG16I - {VK_FORMAT_R16G16_SNORM, Attachable | Storage}, // RG16S - {VK_FORMAT_UNDEFINED}, // RGB32F - {VK_FORMAT_R8G8B8A8_SRGB, Attachable}, // RGBA8_SRGB - {VK_FORMAT_R8G8_UNORM, Attachable | Storage}, // RG8U - {VK_FORMAT_R8G8_SNORM, Attachable | Storage}, // RG8S - {VK_FORMAT_R8G8_UINT, Attachable | Storage}, // RG8UI - {VK_FORMAT_R32G32_UINT, Attachable | Storage}, // RG32UI - {VK_FORMAT_UNDEFINED}, // RGBX16F - {VK_FORMAT_R32_UINT, Attachable | Storage}, // R32UI - {VK_FORMAT_R32_SINT, Attachable | Storage}, // R32I - {VK_FORMAT_ASTC_8x8_UNORM_BLOCK}, // ASTC_2D_8X8 - {VK_FORMAT_UNDEFINED}, // ASTC_2D_8X5 - {VK_FORMAT_UNDEFINED}, // ASTC_2D_5X4 - {VK_FORMAT_B8G8R8A8_SRGB, Attachable}, // BGRA8_SRGB - {VK_FORMAT_BC1_RGBA_SRGB_BLOCK}, // DXT1_SRGB - {VK_FORMAT_BC2_SRGB_BLOCK}, // DXT23_SRGB - {VK_FORMAT_BC3_SRGB_BLOCK}, // DXT45_SRGB - {VK_FORMAT_BC7_SRGB_BLOCK}, // BC7U_SRGB - {VK_FORMAT_R4G4B4A4_UNORM_PACK16, Attachable}, // R4G4B4A4U - {VK_FORMAT_ASTC_4x4_SRGB_BLOCK}, // ASTC_2D_4X4_SRGB - {VK_FORMAT_ASTC_8x8_SRGB_BLOCK}, // ASTC_2D_8X8_SRGB - {VK_FORMAT_ASTC_8x5_SRGB_BLOCK}, // ASTC_2D_8X5_SRGB - {VK_FORMAT_ASTC_5x4_SRGB_BLOCK}, // ASTC_2D_5X4_SRGB - {VK_FORMAT_ASTC_5x5_UNORM_BLOCK}, // ASTC_2D_5X5 - {VK_FORMAT_ASTC_5x5_SRGB_BLOCK}, // ASTC_2D_5X5_SRGB - {VK_FORMAT_ASTC_10x8_UNORM_BLOCK}, // ASTC_2D_10X8 - {VK_FORMAT_ASTC_10x8_SRGB_BLOCK}, // ASTC_2D_10X8_SRGB - {VK_FORMAT_ASTC_6x6_UNORM_BLOCK}, // ASTC_2D_6X6 - {VK_FORMAT_ASTC_6x6_SRGB_BLOCK}, // ASTC_2D_6X6_SRGB - {VK_FORMAT_ASTC_10x10_UNORM_BLOCK}, // ASTC_2D_10X10 - {VK_FORMAT_ASTC_10x10_SRGB_BLOCK}, // ASTC_2D_10X10_SRGB - {VK_FORMAT_ASTC_12x12_UNORM_BLOCK}, // ASTC_2D_12X12 - {VK_FORMAT_ASTC_12x12_SRGB_BLOCK}, // ASTC_2D_12X12_SRGB - {VK_FORMAT_ASTC_8x6_UNORM_BLOCK}, // ASTC_2D_8X6 - {VK_FORMAT_ASTC_8x6_SRGB_BLOCK}, // ASTC_2D_8X6_SRGB - {VK_FORMAT_ASTC_6x5_UNORM_BLOCK}, // ASTC_2D_6X5 - {VK_FORMAT_ASTC_6x5_SRGB_BLOCK}, // ASTC_2D_6X5_SRGB - {VK_FORMAT_E5B9G9R9_UFLOAT_PACK32}, // E5B9G9R9F + {VK_FORMAT_A8B8G8R8_UNORM_PACK32, Attachable | Storage}, // A8B8G8R8_UNORM + {VK_FORMAT_A8B8G8R8_SNORM_PACK32, Attachable | Storage}, // A8B8G8R8_SNORM + {VK_FORMAT_A8B8G8R8_SINT_PACK32, Attachable | Storage}, // A8B8G8R8_SINT + {VK_FORMAT_A8B8G8R8_UINT_PACK32, Attachable | Storage}, // A8B8G8R8_UINT + {VK_FORMAT_R5G6B5_UNORM_PACK16, Attachable}, // R5G6B5_UNORM + {VK_FORMAT_B5G6R5_UNORM_PACK16, Attachable}, // B5G6R5_UNORM + {VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable}, // A1R5G5B5_UNORM + {VK_FORMAT_A2B10G10R10_UNORM_PACK32, Attachable | Storage}, // A2B10G10R10_UNORM + {VK_FORMAT_A2B10G10R10_UINT_PACK32, Attachable | Storage}, // A2B10G10R10_UINT + {VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable}, // A1B5G5R5_UNORM (flipped with swizzle) + {VK_FORMAT_R8_UNORM, Attachable | Storage}, // R8_UNORM + {VK_FORMAT_R8_SNORM, Attachable | Storage}, // R8_SNORM + {VK_FORMAT_R8_SINT, Attachable | Storage}, // R8_SINT + {VK_FORMAT_R8_UINT, Attachable | Storage}, // R8_UINT + {VK_FORMAT_R16G16B16A16_SFLOAT, Attachable | Storage}, // R16G16B16A16_FLOAT + {VK_FORMAT_R16G16B16A16_UNORM, Attachable | Storage}, // R16G16B16A16_UNORM + {VK_FORMAT_R16G16B16A16_SNORM, Attachable | Storage}, // R16G16B16A16_SNORM + {VK_FORMAT_R16G16B16A16_SINT, Attachable | Storage}, // R16G16B16A16_SINT + {VK_FORMAT_R16G16B16A16_UINT, Attachable | Storage}, // R16G16B16A16_UINT + {VK_FORMAT_B10G11R11_UFLOAT_PACK32, Attachable | Storage}, // B10G11R11_FLOAT + {VK_FORMAT_R32G32B32A32_UINT, Attachable | Storage}, // R32G32B32A32_UINT + {VK_FORMAT_BC1_RGBA_UNORM_BLOCK}, // BC1_RGBA_UNORM + {VK_FORMAT_BC2_UNORM_BLOCK}, // BC2_UNORM + {VK_FORMAT_BC3_UNORM_BLOCK}, // BC3_UNORM + {VK_FORMAT_BC4_UNORM_BLOCK}, // BC4_UNORM + {VK_FORMAT_BC4_SNORM_BLOCK}, // BC4_SNORM + {VK_FORMAT_BC5_UNORM_BLOCK}, // BC5_UNORM + {VK_FORMAT_BC5_SNORM_BLOCK}, // BC5_SNORM + {VK_FORMAT_BC7_UNORM_BLOCK}, // BC7_UNORM + {VK_FORMAT_BC6H_UFLOAT_BLOCK}, // BC6H_UFLOAT + {VK_FORMAT_BC6H_SFLOAT_BLOCK}, // BC6H_SFLOAT + {VK_FORMAT_ASTC_4x4_UNORM_BLOCK}, // ASTC_2D_4X4_UNORM + {VK_FORMAT_B8G8R8A8_UNORM, Attachable}, // B8G8R8A8_UNORM + {VK_FORMAT_R32G32B32A32_SFLOAT, Attachable | Storage}, // R32G32B32A32_FLOAT + {VK_FORMAT_R32G32B32A32_SINT, Attachable | Storage}, // R32G32B32A32_SINT + {VK_FORMAT_R32G32_SFLOAT, Attachable | Storage}, // R32G32_FLOAT + {VK_FORMAT_R32G32_SINT, Attachable | Storage}, // R32G32_SINT + {VK_FORMAT_R32_SFLOAT, Attachable | Storage}, // R32_FLOAT + {VK_FORMAT_R16_SFLOAT, Attachable | Storage}, // R16_FLOAT + {VK_FORMAT_R16_UNORM, Attachable | Storage}, // R16_UNORM + {VK_FORMAT_UNDEFINED}, // R16_SNORM + {VK_FORMAT_R16_UINT, Attachable | Storage}, // R16_UINT + {VK_FORMAT_UNDEFINED}, // R16_SINT + {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // R16G16_UNORM + {VK_FORMAT_R16G16_SFLOAT, Attachable | Storage}, // R16G16_FLOAT + {VK_FORMAT_UNDEFINED}, // R16G16_UINT + {VK_FORMAT_UNDEFINED}, // R16G16_SINT + {VK_FORMAT_R16G16_SNORM, Attachable | Storage}, // R16G16_SNORM + {VK_FORMAT_UNDEFINED}, // R32G32B32_FLOAT + {VK_FORMAT_R8G8B8A8_SRGB, Attachable}, // A8B8G8R8_SRGB + {VK_FORMAT_R8G8_UNORM, Attachable | Storage}, // R8G8_UNORM + {VK_FORMAT_R8G8_SNORM, Attachable | Storage}, // R8G8_SNORM + {VK_FORMAT_R8G8_SINT, Attachable | Storage}, // R8G8_SINT + {VK_FORMAT_R8G8_UINT, Attachable | Storage}, // R8G8_UINT + {VK_FORMAT_R32G32_UINT, Attachable | Storage}, // R32G32_UINT + {VK_FORMAT_UNDEFINED}, // R16G16B16X16_FLOAT + {VK_FORMAT_R32_UINT, Attachable | Storage}, // R32_UINT + {VK_FORMAT_R32_SINT, Attachable | Storage}, // R32_SINT + {VK_FORMAT_ASTC_8x8_UNORM_BLOCK}, // ASTC_2D_8X8_UNORM + {VK_FORMAT_UNDEFINED}, // ASTC_2D_8X5_UNORM + {VK_FORMAT_UNDEFINED}, // ASTC_2D_5X4_UNORM + {VK_FORMAT_B8G8R8A8_SRGB, Attachable}, // B8G8R8A8_SRGB + {VK_FORMAT_BC1_RGBA_SRGB_BLOCK}, // BC1_RGBA_SRGB + {VK_FORMAT_BC2_SRGB_BLOCK}, // BC2_SRGB + {VK_FORMAT_BC3_SRGB_BLOCK}, // BC3_SRGB + {VK_FORMAT_BC7_SRGB_BLOCK}, // BC7_SRGB + {VK_FORMAT_R4G4B4A4_UNORM_PACK16, Attachable}, // A4B4G4R4_UNORM + {VK_FORMAT_ASTC_4x4_SRGB_BLOCK}, // ASTC_2D_4X4_SRGB + {VK_FORMAT_ASTC_8x8_SRGB_BLOCK}, // ASTC_2D_8X8_SRGB + {VK_FORMAT_ASTC_8x5_SRGB_BLOCK}, // ASTC_2D_8X5_SRGB + {VK_FORMAT_ASTC_5x4_SRGB_BLOCK}, // ASTC_2D_5X4_SRGB + {VK_FORMAT_ASTC_5x5_UNORM_BLOCK}, // ASTC_2D_5X5_UNORM + {VK_FORMAT_ASTC_5x5_SRGB_BLOCK}, // ASTC_2D_5X5_SRGB + {VK_FORMAT_ASTC_10x8_UNORM_BLOCK}, // ASTC_2D_10X8_UNORM + {VK_FORMAT_ASTC_10x8_SRGB_BLOCK}, // ASTC_2D_10X8_SRGB + {VK_FORMAT_ASTC_6x6_UNORM_BLOCK}, // ASTC_2D_6X6_UNORM + {VK_FORMAT_ASTC_6x6_SRGB_BLOCK}, // ASTC_2D_6X6_SRGB + {VK_FORMAT_ASTC_10x10_UNORM_BLOCK}, // ASTC_2D_10X10_UNORM + {VK_FORMAT_ASTC_10x10_SRGB_BLOCK}, // ASTC_2D_10X10_SRGB + {VK_FORMAT_ASTC_12x12_UNORM_BLOCK}, // ASTC_2D_12X12_UNORM + {VK_FORMAT_ASTC_12x12_SRGB_BLOCK}, // ASTC_2D_12X12_SRGB + {VK_FORMAT_ASTC_8x6_UNORM_BLOCK}, // ASTC_2D_8X6_UNORM + {VK_FORMAT_ASTC_8x6_SRGB_BLOCK}, // ASTC_2D_8X6_SRGB + {VK_FORMAT_ASTC_6x5_UNORM_BLOCK}, // ASTC_2D_6X5_UNORM + {VK_FORMAT_ASTC_6x5_SRGB_BLOCK}, // ASTC_2D_6X5_SRGB + {VK_FORMAT_E5B9G9R9_UFLOAT_PACK32}, // E5B9G9R9_FLOAT // Depth formats - {VK_FORMAT_D32_SFLOAT, Attachable}, // Z32F - {VK_FORMAT_D16_UNORM, Attachable}, // Z16 + {VK_FORMAT_D32_SFLOAT, Attachable}, // D32_FLOAT + {VK_FORMAT_D16_UNORM, Attachable}, // D16_UNORM // DepthStencil formats - {VK_FORMAT_D24_UNORM_S8_UINT, Attachable}, // Z24S8 - {VK_FORMAT_D24_UNORM_S8_UINT, Attachable}, // S8Z24 (emulated) - {VK_FORMAT_D32_SFLOAT_S8_UINT, Attachable}, // Z32FS8 + {VK_FORMAT_D24_UNORM_S8_UINT, Attachable}, // D24_UNORM_S8_UINT + {VK_FORMAT_D24_UNORM_S8_UINT, Attachable}, // S8_UINT_D24_UNORM (emulated) + {VK_FORMAT_D32_SFLOAT_S8_UINT, Attachable}, // D32_FLOAT_S8_UINT }; static_assert(std::size(tex_format_tuples) == VideoCore::Surface::MaxPixelFormat); @@ -221,7 +233,7 @@ FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFo return {VK_FORMAT_A8B8G8R8_UNORM_PACK32, true, true}; } - // Use ABGR8 on hardware that doesn't support ASTC natively + // Use A8B8G8R8_UNORM on hardware that doesn't support ASTC natively if (!device.IsOptimalAstcSupported() && VideoCore::Surface::IsPixelFormatASTC(pixel_format)) { tuple.format = VideoCore::Surface::IsPixelFormatSRGB(pixel_format) ? VK_FORMAT_A8B8G8R8_SRGB_PACK32 @@ -287,9 +299,10 @@ VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const VKDevice& device, return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; case Maxwell::PrimitiveTopology::Patches: return VK_PRIMITIVE_TOPOLOGY_PATCH_LIST; + default: + UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology)); + return {}; } - UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology)); - return {}; } VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) { @@ -314,6 +327,8 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib return VK_FORMAT_R16G16B16A16_UNORM; case Maxwell::VertexAttribute::Size::Size_10_10_10_2: return VK_FORMAT_A2B10G10R10_UNORM_PACK32; + default: + break; } break; case Maxwell::VertexAttribute::Type::SignedNorm: @@ -336,6 +351,8 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib return VK_FORMAT_R16G16B16A16_SNORM; case Maxwell::VertexAttribute::Size::Size_10_10_10_2: return VK_FORMAT_A2B10G10R10_SNORM_PACK32; + default: + break; } break; case Maxwell::VertexAttribute::Type::UnsignedScaled: @@ -358,6 +375,8 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib return VK_FORMAT_R16G16B16A16_USCALED; case Maxwell::VertexAttribute::Size::Size_10_10_10_2: return VK_FORMAT_A2B10G10R10_USCALED_PACK32; + default: + break; } break; case Maxwell::VertexAttribute::Type::SignedScaled: @@ -380,6 +399,8 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib return VK_FORMAT_R16G16B16A16_SSCALED; case Maxwell::VertexAttribute::Size::Size_10_10_10_2: return VK_FORMAT_A2B10G10R10_SSCALED_PACK32; + default: + break; } break; case Maxwell::VertexAttribute::Type::UnsignedInt: @@ -410,6 +431,8 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib return VK_FORMAT_R32G32B32A32_UINT; case Maxwell::VertexAttribute::Size::Size_10_10_10_2: return VK_FORMAT_A2B10G10R10_UINT_PACK32; + default: + break; } break; case Maxwell::VertexAttribute::Type::SignedInt: @@ -440,6 +463,8 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib return VK_FORMAT_R32G32B32A32_SINT; case Maxwell::VertexAttribute::Size::Size_10_10_10_2: return VK_FORMAT_A2B10G10R10_SINT_PACK32; + default: + break; } break; case Maxwell::VertexAttribute::Type::Float: @@ -460,6 +485,8 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib return VK_FORMAT_R32G32B32_SFLOAT; case Maxwell::VertexAttribute::Size::Size_32_32_32_32: return VK_FORMAT_R32G32B32A32_SFLOAT; + default: + break; } break; } diff --git a/src/video_core/renderer_vulkan/nsight_aftermath_tracker.cpp b/src/video_core/renderer_vulkan/nsight_aftermath_tracker.cpp index 435c8c1b8..5b01020ec 100644 --- a/src/video_core/renderer_vulkan/nsight_aftermath_tracker.cpp +++ b/src/video_core/renderer_vulkan/nsight_aftermath_tracker.cpp @@ -65,10 +65,10 @@ bool NsightAftermathTracker::Initialize() { return false; } - dump_dir = FileUtil::GetUserPath(FileUtil::UserPath::LogDir) + "gpucrash"; + dump_dir = Common::FS::GetUserPath(Common::FS::UserPath::LogDir) + "gpucrash"; - (void)FileUtil::DeleteDirRecursively(dump_dir); - if (!FileUtil::CreateDir(dump_dir)) { + (void)Common::FS::DeleteDirRecursively(dump_dir); + if (!Common::FS::CreateDir(dump_dir)) { LOG_ERROR(Render_Vulkan, "Failed to create Nsight Aftermath dump directory"); return false; } @@ -106,7 +106,7 @@ void NsightAftermathTracker::SaveShader(const std::vector<u32>& spirv) const { return; } - FileUtil::IOFile file(fmt::format("{}/source_{:016x}.spv", dump_dir, hash.hash), "wb"); + Common::FS::IOFile file(fmt::format("{}/source_{:016x}.spv", dump_dir, hash.hash), "wb"); if (!file.IsOpen()) { LOG_ERROR(Render_Vulkan, "Failed to dump SPIR-V module with hash={:016x}", hash.hash); return; @@ -156,12 +156,12 @@ void NsightAftermathTracker::OnGpuCrashDumpCallback(const void* gpu_crash_dump, }(); std::string_view dump_view(static_cast<const char*>(gpu_crash_dump), gpu_crash_dump_size); - if (FileUtil::WriteStringToFile(false, base_name, dump_view) != gpu_crash_dump_size) { + if (Common::FS::WriteStringToFile(false, base_name, dump_view) != gpu_crash_dump_size) { LOG_ERROR(Render_Vulkan, "Failed to write dump file"); return; } const std::string_view json_view(json.data(), json.size()); - if (FileUtil::WriteStringToFile(true, base_name + ".json", json_view) != json.size()) { + if (Common::FS::WriteStringToFile(true, base_name + ".json", json_view) != json.size()) { LOG_ERROR(Render_Vulkan, "Failed to write JSON"); return; } @@ -180,7 +180,7 @@ void NsightAftermathTracker::OnShaderDebugInfoCallback(const void* shader_debug_ const std::string path = fmt::format("{}/shader_{:016x}{:016x}.nvdbg", dump_dir, identifier.id[0], identifier.id[1]); - FileUtil::IOFile file(path, "wb"); + Common::FS::IOFile file(path, "wb"); if (!file.IsOpen()) { LOG_ERROR(Render_Vulkan, "Failed to create file {}", path); return; diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 2258479f5..715182b3b 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -25,9 +25,9 @@ #include "video_core/renderer_vulkan/renderer_vulkan.h" #include "video_core/renderer_vulkan/vk_blit_screen.h" #include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/renderer_vulkan/vk_master_semaphore.h" #include "video_core/renderer_vulkan/vk_memory_manager.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" -#include "video_core/renderer_vulkan/vk_resource_manager.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_state_tracker.h" #include "video_core/renderer_vulkan/vk_swapchain.h" @@ -56,7 +56,7 @@ VkBool32 DebugCallback(VkDebugUtilsMessageSeverityFlagBitsEXT severity, VkDebugUtilsMessageTypeFlagsEXT type, const VkDebugUtilsMessengerCallbackDataEXT* data, [[maybe_unused]] void* user_data) { - const char* message{data->pMessage}; + const char* const message{data->pMessage}; if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) { LOG_CRITICAL(Render_Vulkan, "{}", message); @@ -78,7 +78,7 @@ Common::DynamicLibrary OpenVulkanLibrary() { if (!libvulkan_env || !library.Open(libvulkan_env)) { // Use the libvulkan.dylib from the application bundle. const std::string filename = - FileUtil::GetBundleDirectory() + "/Contents/Frameworks/libvulkan.dylib"; + Common::FS::GetBundleDirectory() + "/Contents/Frameworks/libvulkan.dylib"; library.Open(filename.c_str()); } #else @@ -86,7 +86,7 @@ Common::DynamicLibrary OpenVulkanLibrary() { if (!library.Open(filename.c_str())) { // Android devices may not have libvulkan.so.1, only libvulkan.so. filename = Common::DynamicLibrary::GetVersionedFilename("vulkan"); - library.Open(filename.c_str()); + (void)library.Open(filename.c_str()); } #endif return library; @@ -237,8 +237,12 @@ std::string BuildCommaSeparatedExtensions(std::vector<std::string> available_ext } // Anonymous namespace -RendererVulkan::RendererVulkan(Core::Frontend::EmuWindow& window, Core::System& system) - : RendererBase(window), system{system} {} +RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, + Core::Frontend::EmuWindow& emu_window, + Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, + std::unique_ptr<Core::Frontend::GraphicsContext> context) + : RendererBase{emu_window, std::move(context)}, telemetry_session{telemetry_session_}, + cpu_memory{cpu_memory_}, gpu{gpu_} {} RendererVulkan::~RendererVulkan() { ShutDown(); @@ -265,11 +269,11 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { scheduler->WaitWorker(); swapchain->AcquireNextImage(); - const auto [fence, render_semaphore] = blit_screen->Draw(*framebuffer, use_accelerated); + const VkSemaphore render_semaphore = blit_screen->Draw(*framebuffer, use_accelerated); - scheduler->Flush(false, render_semaphore); + scheduler->Flush(render_semaphore); - if (swapchain->Present(render_semaphore, fence)) { + if (swapchain->Present(render_semaphore)) { blit_screen->Recreate(); } @@ -279,11 +283,6 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { render_window.PollEvents(); } -bool RendererVulkan::TryPresent(int /*timeout_ms*/) { - // TODO (bunnei): ImplementMe - return true; -} - bool RendererVulkan::Init() { library = OpenVulkanLibrary(); instance = CreateInstance(library, dld, render_window.GetWindowInfo().type, @@ -296,23 +295,21 @@ bool RendererVulkan::Init() { memory_manager = std::make_unique<VKMemoryManager>(*device); - resource_manager = std::make_unique<VKResourceManager>(*device); + state_tracker = std::make_unique<StateTracker>(gpu); + + scheduler = std::make_unique<VKScheduler>(*device, *state_tracker); const auto& framebuffer = render_window.GetFramebufferLayout(); - swapchain = std::make_unique<VKSwapchain>(*surface, *device); + swapchain = std::make_unique<VKSwapchain>(*surface, *device, *scheduler); swapchain->Create(framebuffer.width, framebuffer.height, false); - state_tracker = std::make_unique<StateTracker>(system); - - scheduler = std::make_unique<VKScheduler>(*device, *resource_manager, *state_tracker); - - rasterizer = std::make_unique<RasterizerVulkan>(system, render_window, screen_info, *device, - *resource_manager, *memory_manager, - *state_tracker, *scheduler); + rasterizer = std::make_unique<RasterizerVulkan>(render_window, gpu, gpu.MemoryManager(), + cpu_memory, screen_info, *device, + *memory_manager, *state_tracker, *scheduler); - blit_screen = std::make_unique<VKBlitScreen>(system, render_window, *rasterizer, *device, - *resource_manager, *memory_manager, *swapchain, - *scheduler, screen_info); + blit_screen = + std::make_unique<VKBlitScreen>(cpu_memory, render_window, *rasterizer, *device, + *memory_manager, *swapchain, *scheduler, screen_info); return true; } @@ -330,7 +327,6 @@ void RendererVulkan::ShutDown() { scheduler.reset(); swapchain.reset(); memory_manager.reset(); - resource_manager.reset(); device.reset(); } @@ -438,8 +434,7 @@ void RendererVulkan::Report() const { LOG_INFO(Render_Vulkan, "Device: {}", model_name); LOG_INFO(Render_Vulkan, "Vulkan: {}", api_version); - auto& telemetry_session = system.TelemetrySession(); - constexpr auto field = Telemetry::FieldType::UserSystem; + static constexpr auto field = Common::Telemetry::FieldType::UserSystem; telemetry_session.AddField(field, "GPU_Vendor", vendor_name); telemetry_session.AddField(field, "GPU_Model", model_name); telemetry_session.AddField(field, "GPU_Vulkan_Driver", driver_name); diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index 522b5bff8..49a4141ec 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -14,7 +14,15 @@ #include "video_core/renderer_vulkan/wrapper.h" namespace Core { -class System; +class TelemetrySession; +} + +namespace Core::Memory { +class Memory; +} + +namespace Tegra { +class GPU; } namespace Vulkan { @@ -22,9 +30,7 @@ namespace Vulkan { class StateTracker; class VKBlitScreen; class VKDevice; -class VKFence; class VKMemoryManager; -class VKResourceManager; class VKSwapchain; class VKScheduler; class VKImage; @@ -38,13 +44,15 @@ struct VKScreenInfo { class RendererVulkan final : public VideoCore::RendererBase { public: - explicit RendererVulkan(Core::Frontend::EmuWindow& window, Core::System& system); + explicit RendererVulkan(Core::TelemetrySession& telemtry_session, + Core::Frontend::EmuWindow& emu_window, Core::Memory::Memory& cpu_memory, + Tegra::GPU& gpu, + std::unique_ptr<Core::Frontend::GraphicsContext> context); ~RendererVulkan() override; bool Init() override; void ShutDown() override; void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; - bool TryPresent(int timeout_ms) override; static std::vector<std::string> EnumerateDevices(); @@ -57,7 +65,9 @@ private: void Report() const; - Core::System& system; + Core::TelemetrySession& telemetry_session; + Core::Memory::Memory& cpu_memory; + Tegra::GPU& gpu; Common::DynamicLibrary library; vk::InstanceDispatch dld; @@ -69,11 +79,10 @@ private: vk::DebugCallback debug_callback; std::unique_ptr<VKDevice> device; - std::unique_ptr<VKSwapchain> swapchain; std::unique_ptr<VKMemoryManager> memory_manager; - std::unique_ptr<VKResourceManager> resource_manager; std::unique_ptr<StateTracker> state_tracker; std::unique_ptr<VKScheduler> scheduler; + std::unique_ptr<VKSwapchain> swapchain; std::unique_ptr<VKBlitScreen> blit_screen; }; diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index 866813465..b5b60309e 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -12,11 +12,9 @@ #include "common/assert.h" #include "common/common_types.h" #include "common/math_util.h" - #include "core/core.h" #include "core/frontend/emu_window.h" #include "core/memory.h" - #include "video_core/gpu.h" #include "video_core/morton.h" #include "video_core/rasterizer_interface.h" @@ -24,8 +22,8 @@ #include "video_core/renderer_vulkan/vk_blit_screen.h" #include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_image.h" +#include "video_core/renderer_vulkan/vk_master_semaphore.h" #include "video_core/renderer_vulkan/vk_memory_manager.h" -#include "video_core/renderer_vulkan/vk_resource_manager.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_shader_util.h" #include "video_core/renderer_vulkan/vk_swapchain.h" @@ -187,9 +185,9 @@ std::size_t GetSizeInBytes(const Tegra::FramebufferConfig& framebuffer) { VkFormat GetFormat(const Tegra::FramebufferConfig& framebuffer) { switch (framebuffer.pixel_format) { - case Tegra::FramebufferConfig::PixelFormat::ABGR8: + case Tegra::FramebufferConfig::PixelFormat::A8B8G8R8_UNORM: return VK_FORMAT_A8B8G8R8_UNORM_PACK32; - case Tegra::FramebufferConfig::PixelFormat::RGB565: + case Tegra::FramebufferConfig::PixelFormat::RGB565_UNORM: return VK_FORMAT_R5G6B5_UNORM_PACK16; default: UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}", @@ -210,17 +208,15 @@ struct VKBlitScreen::BufferData { // Unaligned image data goes here }; -VKBlitScreen::VKBlitScreen(Core::System& system, Core::Frontend::EmuWindow& render_window, - VideoCore::RasterizerInterface& rasterizer, const VKDevice& device, - VKResourceManager& resource_manager, VKMemoryManager& memory_manager, - VKSwapchain& swapchain, VKScheduler& scheduler, - const VKScreenInfo& screen_info) - : system{system}, render_window{render_window}, rasterizer{rasterizer}, device{device}, - resource_manager{resource_manager}, memory_manager{memory_manager}, swapchain{swapchain}, - scheduler{scheduler}, image_count{swapchain.GetImageCount()}, screen_info{screen_info} { - watches.resize(image_count); - std::generate(watches.begin(), watches.end(), - []() { return std::make_unique<VKFenceWatch>(); }); +VKBlitScreen::VKBlitScreen(Core::Memory::Memory& cpu_memory_, + Core::Frontend::EmuWindow& render_window_, + VideoCore::RasterizerInterface& rasterizer_, const VKDevice& device_, + VKMemoryManager& memory_manager_, VKSwapchain& swapchain_, + VKScheduler& scheduler_, const VKScreenInfo& screen_info_) + : cpu_memory{cpu_memory_}, render_window{render_window_}, rasterizer{rasterizer_}, + device{device_}, memory_manager{memory_manager_}, swapchain{swapchain_}, + scheduler{scheduler_}, image_count{swapchain.GetImageCount()}, screen_info{screen_info_} { + resource_ticks.resize(image_count); CreateStaticResources(); CreateDynamicResources(); @@ -232,15 +228,16 @@ void VKBlitScreen::Recreate() { CreateDynamicResources(); } -std::tuple<VKFence&, VkSemaphore> VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, - bool use_accelerated) { +VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool use_accelerated) { RefreshResources(framebuffer); // Finish any pending renderpass scheduler.RequestOutsideRenderPassOperationContext(); const std::size_t image_index = swapchain.GetImageIndex(); - watches[image_index]->Watch(scheduler.GetFence()); + + scheduler.Wait(resource_ticks[image_index]); + resource_ticks[image_index] = scheduler.CurrentTick(); VKImage* blit_image = use_accelerated ? screen_info.image : raw_images[image_index].get(); @@ -259,7 +256,7 @@ std::tuple<VKFence&, VkSemaphore> VKBlitScreen::Draw(const Tegra::FramebufferCon const auto pixel_format = VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format); const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset; - const auto host_ptr = system.Memory().GetPointer(framebuffer_addr); + const auto host_ptr = cpu_memory.GetPointer(framebuffer_addr); rasterizer.FlushRegion(ToCacheAddr(host_ptr), GetSizeInBytes(framebuffer)); // TODO(Rodrigo): Read this from HLE @@ -343,7 +340,7 @@ std::tuple<VKFence&, VkSemaphore> VKBlitScreen::Draw(const Tegra::FramebufferCon cmdbuf.EndRenderPass(); }); - return {scheduler.GetFence(), *semaphores[image_index]}; + return *semaphores[image_index]; } void VKBlitScreen::CreateStaticResources() { @@ -696,6 +693,7 @@ void VKBlitScreen::CreateFramebuffers() { .flags = 0, .renderPass = *renderpass, .attachmentCount = 1, + .pAttachments = nullptr, .width = size.width, .height = size.height, .layers = 1, @@ -710,7 +708,7 @@ void VKBlitScreen::CreateFramebuffers() { void VKBlitScreen::ReleaseRawImages() { for (std::size_t i = 0; i < raw_images.size(); ++i) { - watches[i]->Wait(); + scheduler.Wait(resource_ticks.at(i)); } raw_images.clear(); raw_buffer_commits.clear(); diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h index 243640fab..8f2839214 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.h +++ b/src/video_core/renderer_vulkan/vk_blit_screen.h @@ -5,16 +5,18 @@ #pragma once #include <memory> -#include <tuple> #include "video_core/renderer_vulkan/vk_memory_manager.h" -#include "video_core/renderer_vulkan/vk_resource_manager.h" #include "video_core/renderer_vulkan/wrapper.h" namespace Core { class System; } +namespace Core::Memory { +class Memory; +} + namespace Core::Frontend { class EmuWindow; } @@ -30,26 +32,26 @@ class RasterizerInterface; namespace Vulkan { struct ScreenInfo; + class RasterizerVulkan; class VKDevice; -class VKFence; class VKImage; class VKScheduler; class VKSwapchain; class VKBlitScreen final { public: - explicit VKBlitScreen(Core::System& system, Core::Frontend::EmuWindow& render_window, + explicit VKBlitScreen(Core::Memory::Memory& cpu_memory, + Core::Frontend::EmuWindow& render_window, VideoCore::RasterizerInterface& rasterizer, const VKDevice& device, - VKResourceManager& resource_manager, VKMemoryManager& memory_manager, - VKSwapchain& swapchain, VKScheduler& scheduler, - const VKScreenInfo& screen_info); + VKMemoryManager& memory_manager, VKSwapchain& swapchain, + VKScheduler& scheduler, const VKScreenInfo& screen_info); ~VKBlitScreen(); void Recreate(); - std::tuple<VKFence&, VkSemaphore> Draw(const Tegra::FramebufferConfig& framebuffer, - bool use_accelerated); + [[nodiscard]] VkSemaphore Draw(const Tegra::FramebufferConfig& framebuffer, + bool use_accelerated); private: struct BufferData; @@ -81,11 +83,10 @@ private: u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer, std::size_t image_index) const; - Core::System& system; + Core::Memory::Memory& cpu_memory; Core::Frontend::EmuWindow& render_window; VideoCore::RasterizerInterface& rasterizer; const VKDevice& device; - VKResourceManager& resource_manager; VKMemoryManager& memory_manager; VKSwapchain& swapchain; VKScheduler& scheduler; @@ -106,7 +107,7 @@ private: vk::Buffer buffer; VKMemoryCommit buffer_commit; - std::vector<std::unique_ptr<VKFenceWatch>> watches; + std::vector<u64> resource_ticks; std::vector<vk::Semaphore> semaphores; std::vector<std::unique_ptr<VKImage>> raw_images; diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 2be38d419..d9d3da9ea 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -39,16 +39,17 @@ std::unique_ptr<VKStreamBuffer> CreateStreamBuffer(const VKDevice& device, VKSch Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler_, VKStagingBufferPool& staging_pool_, VAddr cpu_addr, std::size_t size) - : VideoCommon::BufferBlock{cpu_addr, size}, scheduler{scheduler_}, staging_pool{staging_pool_} { - VkBufferCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.size = static_cast<VkDeviceSize>(size); - ci.usage = BUFFER_USAGE | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; - ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - ci.queueFamilyIndexCount = 0; - ci.pQueueFamilyIndices = nullptr; + : BufferBlock{cpu_addr, size}, scheduler{scheduler_}, staging_pool{staging_pool_} { + const VkBufferCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = static_cast<VkDeviceSize>(size), + .usage = BUFFER_USAGE | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + }; buffer.handle = device.GetLogical().CreateBuffer(ci); buffer.commit = memory_manager.Commit(buffer.handle, false); @@ -66,16 +67,17 @@ void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) { scheduler.Record([staging = *staging.handle, handle, offset, size](vk::CommandBuffer cmdbuf) { cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, size}); - VkBufferMemoryBarrier barrier; - barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; - barrier.pNext = nullptr; - barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - barrier.dstAccessMask = UPLOAD_ACCESS_BARRIERS; - barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.buffer = handle; - barrier.offset = offset; - barrier.size = size; + const VkBufferMemoryBarrier barrier{ + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = UPLOAD_ACCESS_BARRIERS, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = handle, + .offset = offset, + .size = size, + }; cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {}, barrier, {}); }); @@ -87,16 +89,17 @@ void Buffer::Download(std::size_t offset, std::size_t size, u8* data) { const VkBuffer handle = Handle(); scheduler.Record([staging = *staging.handle, handle, offset, size](vk::CommandBuffer cmdbuf) { - VkBufferMemoryBarrier barrier; - barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; - barrier.pNext = nullptr; - barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; - barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; - barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.buffer = handle; - barrier.offset = offset; - barrier.size = size; + const VkBufferMemoryBarrier barrier{ + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = handle, + .offset = offset, + .size = size, + }; cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | @@ -142,14 +145,15 @@ void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst }); } -VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, - const VKDevice& device, VKMemoryManager& memory_manager, - VKScheduler& scheduler, VKStagingBufferPool& staging_pool) - : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer, system, - CreateStreamBuffer(device, - scheduler)}, - device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{ - staging_pool} {} +VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, + Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory, + const VKDevice& device_, VKMemoryManager& memory_manager_, + VKScheduler& scheduler_, VKStagingBufferPool& staging_pool_) + : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer, gpu_memory, cpu_memory, + CreateStreamBuffer(device_, + scheduler_)}, + device{device_}, memory_manager{memory_manager_}, scheduler{scheduler_}, staging_pool{ + staging_pool_} {} VKBufferCache::~VKBufferCache() = default; diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 991ee451c..7fb5ceedf 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -13,10 +13,6 @@ #include "video_core/renderer_vulkan/vk_stream_buffer.h" #include "video_core/renderer_vulkan/wrapper.h" -namespace Core { -class System; -} - namespace Vulkan { class VKDevice; @@ -53,7 +49,8 @@ private: class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer> { public: - explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, + explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, + Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory, const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler, VKStagingBufferPool& staging_pool); ~VKBufferCache(); diff --git a/src/video_core/renderer_vulkan/vk_command_pool.cpp b/src/video_core/renderer_vulkan/vk_command_pool.cpp new file mode 100644 index 000000000..6339f4fe0 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_command_pool.cpp @@ -0,0 +1,46 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <cstddef> + +#include "video_core/renderer_vulkan/vk_command_pool.h" +#include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/renderer_vulkan/wrapper.h" + +namespace Vulkan { + +constexpr size_t COMMAND_BUFFER_POOL_SIZE = 0x1000; + +struct CommandPool::Pool { + vk::CommandPool handle; + vk::CommandBuffers cmdbufs; +}; + +CommandPool::CommandPool(MasterSemaphore& master_semaphore, const VKDevice& device) + : ResourcePool(master_semaphore, COMMAND_BUFFER_POOL_SIZE), device{device} {} + +CommandPool::~CommandPool() = default; + +void CommandPool::Allocate(size_t begin, size_t end) { + // Command buffers are going to be commited, recorded, executed every single usage cycle. + // They are also going to be reseted when commited. + Pool& pool = pools.emplace_back(); + pool.handle = device.GetLogical().CreateCommandPool({ + .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, + .pNext = nullptr, + .flags = + VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, + .queueFamilyIndex = device.GetGraphicsFamily(), + }); + pool.cmdbufs = pool.handle.Allocate(COMMAND_BUFFER_POOL_SIZE); +} + +VkCommandBuffer CommandPool::Commit() { + const size_t index = CommitResource(); + const auto pool_index = index / COMMAND_BUFFER_POOL_SIZE; + const auto sub_index = index % COMMAND_BUFFER_POOL_SIZE; + return pools[pool_index].cmdbufs[sub_index]; +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_command_pool.h b/src/video_core/renderer_vulkan/vk_command_pool.h new file mode 100644 index 000000000..b9cb3fb5d --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_command_pool.h @@ -0,0 +1,34 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <cstddef> +#include <vector> + +#include "video_core/renderer_vulkan/vk_resource_pool.h" +#include "video_core/renderer_vulkan/wrapper.h" + +namespace Vulkan { + +class MasterSemaphore; +class VKDevice; + +class CommandPool final : public ResourcePool { +public: + explicit CommandPool(MasterSemaphore& master_semaphore, const VKDevice& device); + ~CommandPool() override; + + void Allocate(size_t begin, size_t end) override; + + VkCommandBuffer Commit(); + +private: + struct Pool; + + const VKDevice& device; + std::vector<Pool> pools; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index da71e710c..9637c6059 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -112,35 +112,36 @@ constexpr u8 quad_array[] = { 0xf9, 0x00, 0x02, 0x00, 0x21, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x23, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x4b, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x4e, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x4b, 0x00, 0x00, 0x00, - 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00}; + 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00, +}; VkDescriptorSetLayoutBinding BuildQuadArrayPassDescriptorSetLayoutBinding() { - VkDescriptorSetLayoutBinding binding; - binding.binding = 0; - binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - binding.descriptorCount = 1; - binding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; - binding.pImmutableSamplers = nullptr; - return binding; + return { + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }; } VkDescriptorUpdateTemplateEntryKHR BuildQuadArrayPassDescriptorUpdateTemplateEntry() { - VkDescriptorUpdateTemplateEntryKHR entry; - entry.dstBinding = 0; - entry.dstArrayElement = 0; - entry.descriptorCount = 1; - entry.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - entry.offset = 0; - entry.stride = sizeof(DescriptorUpdateEntry); - return entry; + return { + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .offset = 0, + .stride = sizeof(DescriptorUpdateEntry), + }; } VkPushConstantRange BuildComputePushConstantRange(std::size_t size) { - VkPushConstantRange range; - range.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; - range.offset = 0; - range.size = static_cast<u32>(size); - return range; + return { + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .offset = 0, + .size = static_cast<u32>(size), + }; } // Uint8 SPIR-V module. Generated from the "shaders/" directory. @@ -218,7 +219,8 @@ constexpr u8 uint8_pass[] = { 0x2a, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, - 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00}; + 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00, +}; // Quad indexed SPIR-V module. Generated from the "shaders/" directory. constexpr u8 QUAD_INDEXED_SPV[] = { @@ -341,32 +343,37 @@ constexpr u8 QUAD_INDEXED_SPV[] = { 0xf9, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x37, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x73, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x76, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x73, 0x00, 0x00, 0x00, - 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00}; + 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00, +}; std::array<VkDescriptorSetLayoutBinding, 2> BuildInputOutputDescriptorSetBindings() { - std::array<VkDescriptorSetLayoutBinding, 2> bindings; - bindings[0].binding = 0; - bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - bindings[0].descriptorCount = 1; - bindings[0].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; - bindings[0].pImmutableSamplers = nullptr; - bindings[1].binding = 1; - bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - bindings[1].descriptorCount = 1; - bindings[1].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; - bindings[1].pImmutableSamplers = nullptr; - return bindings; + return {{ + { + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }, + { + .binding = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }, + }}; } VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() { - VkDescriptorUpdateTemplateEntryKHR entry; - entry.dstBinding = 0; - entry.dstArrayElement = 0; - entry.descriptorCount = 2; - entry.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - entry.offset = 0; - entry.stride = sizeof(DescriptorUpdateEntry); - return entry; + return { + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 2, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .offset = 0, + .stride = sizeof(DescriptorUpdateEntry), + }; } } // Anonymous namespace @@ -376,37 +383,37 @@ VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descripto vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates, vk::Span<VkPushConstantRange> push_constants, std::size_t code_size, const u8* code) { - VkDescriptorSetLayoutCreateInfo descriptor_layout_ci; - descriptor_layout_ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; - descriptor_layout_ci.pNext = nullptr; - descriptor_layout_ci.flags = 0; - descriptor_layout_ci.bindingCount = bindings.size(); - descriptor_layout_ci.pBindings = bindings.data(); - descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout(descriptor_layout_ci); - - VkPipelineLayoutCreateInfo pipeline_layout_ci; - pipeline_layout_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; - pipeline_layout_ci.pNext = nullptr; - pipeline_layout_ci.flags = 0; - pipeline_layout_ci.setLayoutCount = 1; - pipeline_layout_ci.pSetLayouts = descriptor_set_layout.address(); - pipeline_layout_ci.pushConstantRangeCount = push_constants.size(); - pipeline_layout_ci.pPushConstantRanges = push_constants.data(); - layout = device.GetLogical().CreatePipelineLayout(pipeline_layout_ci); + descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .bindingCount = bindings.size(), + .pBindings = bindings.data(), + }); + + layout = device.GetLogical().CreatePipelineLayout({ + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .setLayoutCount = 1, + .pSetLayouts = descriptor_set_layout.address(), + .pushConstantRangeCount = push_constants.size(), + .pPushConstantRanges = push_constants.data(), + }); if (!templates.empty()) { - VkDescriptorUpdateTemplateCreateInfoKHR template_ci; - template_ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR; - template_ci.pNext = nullptr; - template_ci.flags = 0; - template_ci.descriptorUpdateEntryCount = templates.size(); - template_ci.pDescriptorUpdateEntries = templates.data(); - template_ci.templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR; - template_ci.descriptorSetLayout = *descriptor_set_layout; - template_ci.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; - template_ci.pipelineLayout = *layout; - template_ci.set = 0; - descriptor_template = device.GetLogical().CreateDescriptorUpdateTemplateKHR(template_ci); + descriptor_template = device.GetLogical().CreateDescriptorUpdateTemplateKHR({ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, + .pNext = nullptr, + .flags = 0, + .descriptorUpdateEntryCount = templates.size(), + .pDescriptorUpdateEntries = templates.data(), + .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR, + .descriptorSetLayout = *descriptor_set_layout, + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .pipelineLayout = *layout, + .set = 0, + }); descriptor_allocator.emplace(descriptor_pool, *descriptor_set_layout); } @@ -414,42 +421,42 @@ VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descripto auto code_copy = std::make_unique<u32[]>(code_size / sizeof(u32) + 1); std::memcpy(code_copy.get(), code, code_size); - VkShaderModuleCreateInfo module_ci; - module_ci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; - module_ci.pNext = nullptr; - module_ci.flags = 0; - module_ci.codeSize = code_size; - module_ci.pCode = code_copy.get(); - module = device.GetLogical().CreateShaderModule(module_ci); - - VkComputePipelineCreateInfo pipeline_ci; - pipeline_ci.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; - pipeline_ci.pNext = nullptr; - pipeline_ci.flags = 0; - pipeline_ci.layout = *layout; - pipeline_ci.basePipelineHandle = nullptr; - pipeline_ci.basePipelineIndex = 0; - - VkPipelineShaderStageCreateInfo& stage_ci = pipeline_ci.stage; - stage_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; - stage_ci.pNext = nullptr; - stage_ci.flags = 0; - stage_ci.stage = VK_SHADER_STAGE_COMPUTE_BIT; - stage_ci.module = *module; - stage_ci.pName = "main"; - stage_ci.pSpecializationInfo = nullptr; - - pipeline = device.GetLogical().CreateComputePipeline(pipeline_ci); + module = device.GetLogical().CreateShaderModule({ + .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .codeSize = code_size, + .pCode = code_copy.get(), + }); + + pipeline = device.GetLogical().CreateComputePipeline({ + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = *module, + .pName = "main", + .pSpecializationInfo = nullptr, + }, + .layout = *layout, + .basePipelineHandle = nullptr, + .basePipelineIndex = 0, + }); } VKComputePass::~VKComputePass() = default; -VkDescriptorSet VKComputePass::CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue, - VKFence& fence) { +VkDescriptorSet VKComputePass::CommitDescriptorSet( + VKUpdateDescriptorQueue& update_descriptor_queue) { if (!descriptor_template) { return nullptr; } - const auto set = descriptor_allocator->Commit(fence); + const VkDescriptorSet set = descriptor_allocator->Commit(); update_descriptor_queue.Send(*descriptor_template, set); return set; } @@ -473,7 +480,7 @@ std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 update_descriptor_queue.Acquire(); update_descriptor_queue.AddBuffer(*buffer.handle, 0, staging_size); - const auto set = CommitDescriptorSet(update_descriptor_queue, scheduler.GetFence()); + const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); scheduler.RequestOutsideRenderPassOperationContext(); @@ -516,13 +523,13 @@ Uint8Pass::~Uint8Pass() = default; std::pair<VkBuffer, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer, u64 src_offset) { - const auto staging_size = static_cast<u32>(num_vertices * sizeof(u16)); + const u32 staging_size = static_cast<u32>(num_vertices * sizeof(u16)); auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false); update_descriptor_queue.Acquire(); update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices); update_descriptor_queue.AddBuffer(*buffer.handle, 0, staging_size); - const auto set = CommitDescriptorSet(update_descriptor_queue, scheduler.GetFence()); + const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = *buffer.handle, set, @@ -585,7 +592,7 @@ std::pair<VkBuffer, u64> QuadIndexedPass::Assemble( update_descriptor_queue.Acquire(); update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size); update_descriptor_queue.AddBuffer(*buffer.handle, 0, staging_size); - const auto set = CommitDescriptorSet(update_descriptor_queue, scheduler.GetFence()); + const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = *buffer.handle, set, diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h index 230b526bc..acc94f27e 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.h +++ b/src/video_core/renderer_vulkan/vk_compute_pass.h @@ -15,7 +15,6 @@ namespace Vulkan { class VKDevice; -class VKFence; class VKScheduler; class VKStagingBufferPool; class VKUpdateDescriptorQueue; @@ -30,8 +29,7 @@ public: ~VKComputePass(); protected: - VkDescriptorSet CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue, - VKFence& fence); + VkDescriptorSet CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue); vk::DescriptorUpdateTemplateKHR descriptor_template; vk::PipelineLayout layout; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 281bf9ac3..9be72dc9b 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -32,7 +32,7 @@ VkDescriptorSet VKComputePipeline::CommitDescriptorSet() { if (!descriptor_template) { return {}; } - const auto set = descriptor_allocator.Commit(scheduler.GetFence()); + const VkDescriptorSet set = descriptor_allocator.Commit(); update_descriptor_queue.Send(*descriptor_template, set); return set; } @@ -43,12 +43,13 @@ vk::DescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const { const auto add_bindings = [&](VkDescriptorType descriptor_type, std::size_t num_entries) { // TODO(Rodrigo): Maybe make individual bindings here? for (u32 bindpoint = 0; bindpoint < static_cast<u32>(num_entries); ++bindpoint) { - VkDescriptorSetLayoutBinding& entry = bindings.emplace_back(); - entry.binding = binding++; - entry.descriptorType = descriptor_type; - entry.descriptorCount = 1; - entry.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; - entry.pImmutableSamplers = nullptr; + bindings.push_back({ + .binding = binding++, + .descriptorType = descriptor_type, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }); } }; add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, entries.const_buffers.size()); @@ -58,25 +59,25 @@ vk::DescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const { add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, entries.storage_texels.size()); add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, entries.images.size()); - VkDescriptorSetLayoutCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.bindingCount = static_cast<u32>(bindings.size()); - ci.pBindings = bindings.data(); - return device.GetLogical().CreateDescriptorSetLayout(ci); + return device.GetLogical().CreateDescriptorSetLayout({ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .bindingCount = static_cast<u32>(bindings.size()), + .pBindings = bindings.data(), + }); } vk::PipelineLayout VKComputePipeline::CreatePipelineLayout() const { - VkPipelineLayoutCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.setLayoutCount = 1; - ci.pSetLayouts = descriptor_set_layout.address(); - ci.pushConstantRangeCount = 0; - ci.pPushConstantRanges = nullptr; - return device.GetLogical().CreatePipelineLayout(ci); + return device.GetLogical().CreatePipelineLayout({ + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .setLayoutCount = 1, + .pSetLayouts = descriptor_set_layout.address(), + .pushConstantRangeCount = 0, + .pPushConstantRanges = nullptr, + }); } vk::DescriptorUpdateTemplateKHR VKComputePipeline::CreateDescriptorUpdateTemplate() const { @@ -89,59 +90,63 @@ vk::DescriptorUpdateTemplateKHR VKComputePipeline::CreateDescriptorUpdateTemplat return {}; } - VkDescriptorUpdateTemplateCreateInfoKHR ci; - ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR; - ci.pNext = nullptr; - ci.flags = 0; - ci.descriptorUpdateEntryCount = static_cast<u32>(template_entries.size()); - ci.pDescriptorUpdateEntries = template_entries.data(); - ci.templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR; - ci.descriptorSetLayout = *descriptor_set_layout; - ci.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; - ci.pipelineLayout = *layout; - ci.set = DESCRIPTOR_SET; - return device.GetLogical().CreateDescriptorUpdateTemplateKHR(ci); + return device.GetLogical().CreateDescriptorUpdateTemplateKHR({ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, + .pNext = nullptr, + .flags = 0, + .descriptorUpdateEntryCount = static_cast<u32>(template_entries.size()), + .pDescriptorUpdateEntries = template_entries.data(), + .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR, + .descriptorSetLayout = *descriptor_set_layout, + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .pipelineLayout = *layout, + .set = DESCRIPTOR_SET, + }); } vk::ShaderModule VKComputePipeline::CreateShaderModule(const std::vector<u32>& code) const { device.SaveShader(code); - VkShaderModuleCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.codeSize = code.size() * sizeof(u32); - ci.pCode = code.data(); - return device.GetLogical().CreateShaderModule(ci); + return device.GetLogical().CreateShaderModule({ + .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .codeSize = code.size() * sizeof(u32), + .pCode = code.data(), + }); } vk::Pipeline VKComputePipeline::CreatePipeline() const { - VkComputePipelineCreateInfo ci; - VkPipelineShaderStageCreateInfo& stage_ci = ci.stage; - stage_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; - stage_ci.pNext = nullptr; - stage_ci.flags = 0; - stage_ci.stage = VK_SHADER_STAGE_COMPUTE_BIT; - stage_ci.module = *shader_module; - stage_ci.pName = "main"; - stage_ci.pSpecializationInfo = nullptr; - - VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci; - subgroup_size_ci.sType = - VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT; - subgroup_size_ci.pNext = nullptr; - subgroup_size_ci.requiredSubgroupSize = GuestWarpSize; + + VkComputePipelineCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = *shader_module, + .pName = "main", + .pSpecializationInfo = nullptr, + }, + .layout = *layout, + .basePipelineHandle = nullptr, + .basePipelineIndex = 0, + }; + + const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, + .pNext = nullptr, + .requiredSubgroupSize = GuestWarpSize, + }; if (entries.uses_warps && device.IsGuestWarpSizeSupported(VK_SHADER_STAGE_COMPUTE_BIT)) { - stage_ci.pNext = &subgroup_size_ci; + ci.stage.pNext = &subgroup_size_ci; } - ci.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.layout = *layout; - ci.basePipelineHandle = nullptr; - ci.basePipelineIndex = 0; return device.GetLogical().CreateComputePipeline(ci); } diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp index 9259b618d..f38e089d5 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp @@ -7,7 +7,8 @@ #include "common/common_types.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_device.h" -#include "video_core/renderer_vulkan/vk_resource_manager.h" +#include "video_core/renderer_vulkan/vk_resource_pool.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { @@ -15,14 +16,15 @@ namespace Vulkan { // Prefer small grow rates to avoid saturating the descriptor pool with barely used pipelines. constexpr std::size_t SETS_GROW_RATE = 0x20; -DescriptorAllocator::DescriptorAllocator(VKDescriptorPool& descriptor_pool, - VkDescriptorSetLayout layout) - : VKFencedPool{SETS_GROW_RATE}, descriptor_pool{descriptor_pool}, layout{layout} {} +DescriptorAllocator::DescriptorAllocator(VKDescriptorPool& descriptor_pool_, + VkDescriptorSetLayout layout_) + : ResourcePool(descriptor_pool_.master_semaphore, SETS_GROW_RATE), + descriptor_pool{descriptor_pool_}, layout{layout_} {} DescriptorAllocator::~DescriptorAllocator() = default; -VkDescriptorSet DescriptorAllocator::Commit(VKFence& fence) { - const std::size_t index = CommitResource(fence); +VkDescriptorSet DescriptorAllocator::Commit() { + const std::size_t index = CommitResource(); return descriptors_allocations[index / SETS_GROW_RATE][index % SETS_GROW_RATE]; } @@ -30,8 +32,9 @@ void DescriptorAllocator::Allocate(std::size_t begin, std::size_t end) { descriptors_allocations.push_back(descriptor_pool.AllocateDescriptors(layout, end - begin)); } -VKDescriptorPool::VKDescriptorPool(const VKDevice& device) - : device{device}, active_pool{AllocateNewPool()} {} +VKDescriptorPool::VKDescriptorPool(const VKDevice& device_, VKScheduler& scheduler) + : device{device_}, master_semaphore{scheduler.GetMasterSemaphore()}, active_pool{ + AllocateNewPool()} {} VKDescriptorPool::~VKDescriptorPool() = default; @@ -43,27 +46,30 @@ vk::DescriptorPool* VKDescriptorPool::AllocateNewPool() { {VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, num_sets * 64}, {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, num_sets * 64}, {VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, num_sets * 64}, - {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, num_sets * 40}}; - - VkDescriptorPoolCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT; - ci.maxSets = num_sets; - ci.poolSizeCount = static_cast<u32>(std::size(pool_sizes)); - ci.pPoolSizes = std::data(pool_sizes); + {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, num_sets * 40}, + }; + + const VkDescriptorPoolCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, + .pNext = nullptr, + .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, + .maxSets = num_sets, + .poolSizeCount = static_cast<u32>(std::size(pool_sizes)), + .pPoolSizes = std::data(pool_sizes), + }; return &pools.emplace_back(device.GetLogical().CreateDescriptorPool(ci)); } vk::DescriptorSets VKDescriptorPool::AllocateDescriptors(VkDescriptorSetLayout layout, std::size_t count) { const std::vector layout_copies(count, layout); - VkDescriptorSetAllocateInfo ai; - ai.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; - ai.pNext = nullptr; - ai.descriptorPool = **active_pool; - ai.descriptorSetCount = static_cast<u32>(count); - ai.pSetLayouts = layout_copies.data(); + VkDescriptorSetAllocateInfo ai{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .pNext = nullptr, + .descriptorPool = **active_pool, + .descriptorSetCount = static_cast<u32>(count), + .pSetLayouts = layout_copies.data(), + }; vk::DescriptorSets sets = active_pool->Allocate(ai); if (!sets.IsOutOfPoolMemory()) { diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.h b/src/video_core/renderer_vulkan/vk_descriptor_pool.h index 9efa66bef..544f32a20 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.h +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.h @@ -6,21 +6,24 @@ #include <vector> -#include "video_core/renderer_vulkan/vk_resource_manager.h" +#include "video_core/renderer_vulkan/vk_resource_pool.h" #include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { +class VKDevice; class VKDescriptorPool; +class VKScheduler; -class DescriptorAllocator final : public VKFencedPool { +class DescriptorAllocator final : public ResourcePool { public: explicit DescriptorAllocator(VKDescriptorPool& descriptor_pool, VkDescriptorSetLayout layout); ~DescriptorAllocator() override; + DescriptorAllocator& operator=(const DescriptorAllocator&) = delete; DescriptorAllocator(const DescriptorAllocator&) = delete; - VkDescriptorSet Commit(VKFence& fence); + VkDescriptorSet Commit(); protected: void Allocate(std::size_t begin, std::size_t end) override; @@ -36,15 +39,19 @@ class VKDescriptorPool final { friend DescriptorAllocator; public: - explicit VKDescriptorPool(const VKDevice& device); + explicit VKDescriptorPool(const VKDevice& device, VKScheduler& scheduler); ~VKDescriptorPool(); + VKDescriptorPool(const VKDescriptorPool&) = delete; + VKDescriptorPool& operator=(const VKDescriptorPool&) = delete; + private: vk::DescriptorPool* AllocateNewPool(); vk::DescriptorSets AllocateDescriptors(VkDescriptorSetLayout layout, std::size_t count); const VKDevice& device; + MasterSemaphore& master_semaphore; std::vector<vk::DescriptorPool> pools; vk::DescriptorPool* active_pool; diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index 9226e591c..3d8d3213d 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp @@ -42,6 +42,7 @@ constexpr std::array REQUIRED_EXTENSIONS{ VK_KHR_8BIT_STORAGE_EXTENSION_NAME, VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME, VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME, + VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME, VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME, VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME, VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, @@ -84,14 +85,19 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties( VK_FORMAT_A8B8G8R8_UNORM_PACK32, VK_FORMAT_A8B8G8R8_UINT_PACK32, VK_FORMAT_A8B8G8R8_SNORM_PACK32, + VK_FORMAT_A8B8G8R8_SINT_PACK32, VK_FORMAT_A8B8G8R8_SRGB_PACK32, VK_FORMAT_B5G6R5_UNORM_PACK16, VK_FORMAT_A2B10G10R10_UNORM_PACK32, + VK_FORMAT_A2B10G10R10_UINT_PACK32, VK_FORMAT_A1R5G5B5_UNORM_PACK16, VK_FORMAT_R32G32B32A32_SFLOAT, + VK_FORMAT_R32G32B32A32_SINT, VK_FORMAT_R32G32B32A32_UINT, VK_FORMAT_R32G32_SFLOAT, + VK_FORMAT_R32G32_SINT, VK_FORMAT_R32G32_UINT, + VK_FORMAT_R16G16B16A16_SINT, VK_FORMAT_R16G16B16A16_UINT, VK_FORMAT_R16G16B16A16_SNORM, VK_FORMAT_R16G16B16A16_UNORM, @@ -103,8 +109,11 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties( VK_FORMAT_R8G8B8A8_SRGB, VK_FORMAT_R8G8_UNORM, VK_FORMAT_R8G8_SNORM, + VK_FORMAT_R8G8_SINT, VK_FORMAT_R8G8_UINT, VK_FORMAT_R8_UNORM, + VK_FORMAT_R8_SNORM, + VK_FORMAT_R8_SINT, VK_FORMAT_R8_UINT, VK_FORMAT_B10G11R11_UFLOAT_PACK32, VK_FORMAT_R32_SFLOAT, @@ -124,6 +133,7 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties( VK_FORMAT_BC2_UNORM_BLOCK, VK_FORMAT_BC3_UNORM_BLOCK, VK_FORMAT_BC4_UNORM_BLOCK, + VK_FORMAT_BC4_SNORM_BLOCK, VK_FORMAT_BC5_UNORM_BLOCK, VK_FORMAT_BC5_SNORM_BLOCK, VK_FORMAT_BC7_UNORM_BLOCK, @@ -241,6 +251,13 @@ bool VKDevice::Create() { .inheritedQueries = false, }; + VkPhysicalDeviceTimelineSemaphoreFeaturesKHR timeline_semaphore{ + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR, + .pNext = nullptr, + .timelineSemaphore = true, + }; + SetNext(next, timeline_semaphore); + VkPhysicalDevice16BitStorageFeaturesKHR bit16_storage{ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR, .pNext = nullptr, @@ -373,6 +390,8 @@ bool VKDevice::Create() { graphics_queue = logical.GetQueue(graphics_family); present_queue = logical.GetQueue(present_family); + + use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue(); return true; } @@ -757,14 +776,15 @@ std::vector<VkDeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() const queue_cis.reserve(unique_queue_families.size()); for (const u32 queue_family : unique_queue_families) { - queue_cis.push_back({ + auto& ci = queue_cis.emplace_back(VkDeviceQueueCreateInfo{ .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, .pNext = nullptr, .flags = 0, .queueFamilyIndex = queue_family, .queueCount = 1, - .pQueuePriorities = &QUEUE_PRIORITY, + .pQueuePriorities = nullptr, }); + ci.pQueuePriorities = &QUEUE_PRIORITY; } return queue_cis; diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h index ae5c21baa..26a233db1 100644 --- a/src/video_core/renderer_vulkan/vk_device.h +++ b/src/video_core/renderer_vulkan/vk_device.h @@ -122,6 +122,11 @@ public: return properties.limits.maxPushConstantsSize; } + /// Returns the maximum size for shared memory. + u32 GetMaxComputeSharedMemorySize() const { + return properties.limits.maxComputeSharedMemorySize; + } + /// Returns true if ASTC is natively supported. bool IsOptimalAstcSupported() const { return is_optimal_astc_supported; @@ -197,6 +202,11 @@ public: return reported_extensions; } + /// Returns true if the setting for async shader compilation is enabled. + bool UseAsynchronousShaders() const { + return use_asynchronous_shaders; + } + /// Checks if the physical device is suitable. static bool IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface); @@ -247,6 +257,9 @@ private: bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. + // Asynchronous Graphics Pipeline setting + bool use_asynchronous_shaders{}; ///< Setting to use asynchronous shaders/graphics pipeline + // Telemetry parameters std::string vendor_name; ///< Device's driver name. std::vector<std::string> reported_extensions; ///< Reported Vulkan extensions. diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp index a02be5487..5babbdd0b 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp @@ -30,7 +30,7 @@ void InnerFence::Queue() { ASSERT(!event); event = device.GetLogical().CreateEvent(); - ticks = scheduler.Ticks(); + ticks = scheduler.CurrentTick(); scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([event = *event](vk::CommandBuffer cmdbuf) { @@ -52,7 +52,7 @@ void InnerFence::Wait() { } ASSERT(event); - if (ticks >= scheduler.Ticks()) { + if (ticks >= scheduler.CurrentTick()) { scheduler.Flush(); } while (!IsEventSignalled()) { @@ -71,12 +71,12 @@ bool InnerFence::IsEventSignalled() const { } } -VKFenceManager::VKFenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer, - const VKDevice& device, VKScheduler& scheduler, - VKTextureCache& texture_cache, VKBufferCache& buffer_cache, - VKQueryCache& query_cache) - : GenericFenceManager(system, rasterizer, texture_cache, buffer_cache, query_cache), - device{device}, scheduler{scheduler} {} +VKFenceManager::VKFenceManager(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu, + Tegra::MemoryManager& memory_manager, VKTextureCache& texture_cache, + VKBufferCache& buffer_cache, VKQueryCache& query_cache, + const VKDevice& device_, VKScheduler& scheduler_) + : GenericFenceManager(rasterizer, gpu, texture_cache, buffer_cache, query_cache), + device{device_}, scheduler{scheduler_} {} Fence VKFenceManager::CreateFence(u32 value, bool is_stubbed) { return std::make_shared<InnerFence>(device, scheduler, value, is_stubbed); diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h index 043fe7947..1547d6d30 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.h +++ b/src/video_core/renderer_vulkan/vk_fence_manager.h @@ -55,10 +55,10 @@ using GenericFenceManager = class VKFenceManager final : public GenericFenceManager { public: - explicit VKFenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer, - const VKDevice& device, VKScheduler& scheduler, - VKTextureCache& texture_cache, VKBufferCache& buffer_cache, - VKQueryCache& query_cache); + explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu, + Tegra::MemoryManager& memory_manager, VKTextureCache& texture_cache, + VKBufferCache& buffer_cache, VKQueryCache& query_cache, + const VKDevice& device, VKScheduler& scheduler); protected: Fence CreateFence(u32 value, bool is_stubbed) override; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index aaf930b90..a4b9e7ef5 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -78,15 +78,14 @@ VKGraphicsPipeline::VKGraphicsPipeline(const VKDevice& device, VKScheduler& sche const GraphicsPipelineCacheKey& key, vk::Span<VkDescriptorSetLayoutBinding> bindings, const SPIRVProgram& program) - : device{device}, scheduler{scheduler}, fixed_state{key.fixed_state}, hash{key.Hash()}, + : device{device}, scheduler{scheduler}, cache_key{key}, hash{cache_key.Hash()}, descriptor_set_layout{CreateDescriptorSetLayout(bindings)}, descriptor_allocator{descriptor_pool, *descriptor_set_layout}, update_descriptor_queue{update_descriptor_queue}, layout{CreatePipelineLayout()}, descriptor_template{CreateDescriptorUpdateTemplate(program)}, modules{CreateShaderModules( program)}, - renderpass{renderpass_cache.GetRenderPass(key.renderpass_params)}, pipeline{CreatePipeline( - key.renderpass_params, - program)} {} + renderpass{renderpass_cache.GetRenderPass(cache_key.renderpass_params)}, + pipeline{CreatePipeline(cache_key.renderpass_params, program)} {} VKGraphicsPipeline::~VKGraphicsPipeline() = default; @@ -94,7 +93,7 @@ VkDescriptorSet VKGraphicsPipeline::CommitDescriptorSet() { if (!descriptor_template) { return {}; } - const auto set = descriptor_allocator.Commit(scheduler.GetFence()); + const VkDescriptorSet set = descriptor_allocator.Commit(); update_descriptor_queue.Send(*descriptor_template, set); return set; } @@ -181,7 +180,7 @@ std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules( vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpass_params, const SPIRVProgram& program) const { - const auto& state = fixed_state; + const auto& state = cache_key.fixed_state; const auto& viewport_swizzles = state.viewport_swizzles; FixedPipelineState::DynamicState dynamic; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index a1d699a6c..58aa35efd 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -19,7 +19,27 @@ namespace Vulkan { using Maxwell = Tegra::Engines::Maxwell3D::Regs; -struct GraphicsPipelineCacheKey; +struct GraphicsPipelineCacheKey { + RenderPassParams renderpass_params; + u32 padding; + std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders; + FixedPipelineState fixed_state; + + std::size_t Hash() const noexcept; + + bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept; + + bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept { + return !operator==(rhs); + } + + std::size_t Size() const noexcept { + return sizeof(renderpass_params) + sizeof(padding) + sizeof(shaders) + fixed_state.Size(); + } +}; +static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>); +static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>); +static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>); class VKDescriptorPool; class VKDevice; @@ -54,6 +74,10 @@ public: return renderpass; } + GraphicsPipelineCacheKey GetCacheKey() const { + return cache_key; + } + private: vk::DescriptorSetLayout CreateDescriptorSetLayout( vk::Span<VkDescriptorSetLayoutBinding> bindings) const; @@ -70,7 +94,7 @@ private: const VKDevice& device; VKScheduler& scheduler; - const FixedPipelineState fixed_state; + const GraphicsPipelineCacheKey cache_key; const u64 hash; vk::DescriptorSetLayout descriptor_set_layout; diff --git a/src/video_core/renderer_vulkan/vk_image.cpp b/src/video_core/renderer_vulkan/vk_image.cpp index 9bceb3861..1c418ea17 100644 --- a/src/video_core/renderer_vulkan/vk_image.cpp +++ b/src/video_core/renderer_vulkan/vk_image.cpp @@ -102,21 +102,29 @@ bool VKImage::HasChanged(u32 base_layer, u32 num_layers, u32 base_level, u32 num void VKImage::CreatePresentView() { // Image type has to be 2D to be presented. - VkImageViewCreateInfo image_view_ci; - image_view_ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; - image_view_ci.pNext = nullptr; - image_view_ci.flags = 0; - image_view_ci.image = *image; - image_view_ci.viewType = VK_IMAGE_VIEW_TYPE_2D; - image_view_ci.format = format; - image_view_ci.components = {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, - VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY}; - image_view_ci.subresourceRange.aspectMask = aspect_mask; - image_view_ci.subresourceRange.baseMipLevel = 0; - image_view_ci.subresourceRange.levelCount = 1; - image_view_ci.subresourceRange.baseArrayLayer = 0; - image_view_ci.subresourceRange.layerCount = 1; - present_view = device.GetLogical().CreateImageView(image_view_ci); + present_view = device.GetLogical().CreateImageView({ + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .image = *image, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = format, + .components = + { + .r = VK_COMPONENT_SWIZZLE_IDENTITY, + .g = VK_COMPONENT_SWIZZLE_IDENTITY, + .b = VK_COMPONENT_SWIZZLE_IDENTITY, + .a = VK_COMPONENT_SWIZZLE_IDENTITY, + }, + .subresourceRange = + { + .aspectMask = aspect_mask, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }); } VKImage::SubrangeState& VKImage::GetSubrangeState(u32 layer, u32 level) noexcept { diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp new file mode 100644 index 000000000..ae26e558d --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp @@ -0,0 +1,56 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <atomic> +#include <chrono> + +#include "core/settings.h" +#include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/renderer_vulkan/vk_master_semaphore.h" +#include "video_core/renderer_vulkan/wrapper.h" + +namespace Vulkan { + +using namespace std::chrono_literals; + +MasterSemaphore::MasterSemaphore(const VKDevice& device) { + static constexpr VkSemaphoreTypeCreateInfoKHR semaphore_type_ci{ + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO_KHR, + .pNext = nullptr, + .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE_KHR, + .initialValue = 0, + }; + static constexpr VkSemaphoreCreateInfo semaphore_ci{ + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, + .pNext = &semaphore_type_ci, + .flags = 0, + }; + semaphore = device.GetLogical().CreateSemaphore(semaphore_ci); + + if (!Settings::values.renderer_debug) { + return; + } + // Validation layers have a bug where they fail to track resource usage when using timeline + // semaphores and synchronizing with GetSemaphoreCounterValueKHR. To workaround this issue, have + // a separate thread waiting for each timeline semaphore value. + debug_thread = std::thread([this] { + u64 counter = 0; + while (!shutdown) { + if (semaphore.Wait(counter, 10'000'000)) { + ++counter; + } + } + }); +} + +MasterSemaphore::~MasterSemaphore() { + shutdown = true; + + // This thread might not be started + if (debug_thread.joinable()) { + debug_thread.join(); + } +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h new file mode 100644 index 000000000..0e93706d7 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h @@ -0,0 +1,70 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <atomic> +#include <thread> + +#include "common/common_types.h" +#include "video_core/renderer_vulkan/wrapper.h" + +namespace Vulkan { + +class VKDevice; + +class MasterSemaphore { +public: + explicit MasterSemaphore(const VKDevice& device); + ~MasterSemaphore(); + + /// Returns the current logical tick. + [[nodiscard]] u64 CurrentTick() const noexcept { + return current_tick; + } + + /// Returns the timeline semaphore handle. + [[nodiscard]] VkSemaphore Handle() const noexcept { + return *semaphore; + } + + /// Returns true when a tick has been hit by the GPU. + [[nodiscard]] bool IsFree(u64 tick) { + return gpu_tick >= tick; + } + + /// Advance to the logical tick. + void NextTick() noexcept { + ++current_tick; + } + + /// Refresh the known GPU tick + void Refresh() { + gpu_tick = semaphore.GetCounter(); + } + + /// Waits for a tick to be hit on the GPU + void Wait(u64 tick) { + // No need to wait if the GPU is ahead of the tick + if (IsFree(tick)) { + return; + } + // Update the GPU tick and try again + Refresh(); + if (IsFree(tick)) { + return; + } + // If none of the above is hit, fallback to a regular wait + semaphore.Wait(tick); + } + +private: + vk::Semaphore semaphore; ///< Timeline semaphore. + std::atomic<u64> gpu_tick{0}; ///< Current known GPU tick. + std::atomic<u64> current_tick{1}; ///< Current logical tick. + std::atomic<bool> shutdown{false}; ///< True when the object is being destroyed. + std::thread debug_thread; ///< Debug thread to workaround validation layer bugs. +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.cpp b/src/video_core/renderer_vulkan/vk_memory_manager.cpp index b4c650a63..24c8960ac 100644 --- a/src/video_core/renderer_vulkan/vk_memory_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_memory_manager.cpp @@ -178,13 +178,12 @@ bool VKMemoryManager::AllocMemory(VkMemoryPropertyFlags wanted_properties, u32 t }(); // Try to allocate found type. - VkMemoryAllocateInfo memory_ai; - memory_ai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; - memory_ai.pNext = nullptr; - memory_ai.allocationSize = size; - memory_ai.memoryTypeIndex = type; - - vk::DeviceMemory memory = device.GetLogical().TryAllocateMemory(memory_ai); + vk::DeviceMemory memory = device.GetLogical().TryAllocateMemory({ + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .pNext = nullptr, + .allocationSize = size, + .memoryTypeIndex = type, + }); if (!memory) { LOG_CRITICAL(Render_Vulkan, "Device allocation failed!"); return false; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 3da835324..5c038f4bc 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -28,6 +28,7 @@ #include "video_core/shader/compiler_settings.h" #include "video_core/shader/memory_util.h" #include "video_core/shader_cache.h" +#include "video_core/shader_notify.h" namespace Vulkan { @@ -88,12 +89,13 @@ void AddBindings(std::vector<VkDescriptorSetLayoutBinding>& bindings, u32& bindi // Combined image samplers can be arrayed. count = container[i].size; } - VkDescriptorSetLayoutBinding& entry = bindings.emplace_back(); - entry.binding = binding++; - entry.descriptorType = descriptor_type; - entry.descriptorCount = count; - entry.stageFlags = stage_flags; - entry.pImmutableSamplers = nullptr; + bindings.push_back({ + .binding = binding++, + .descriptorType = descriptor_type, + .descriptorCount = count, + .stageFlags = stage_flags, + .pImmutableSamplers = nullptr, + }); } } @@ -133,64 +135,56 @@ bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) con return std::memcmp(&rhs, this, sizeof *this) == 0; } -Shader::Shader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr, - VideoCommon::Shader::ProgramCode program_code, u32 main_offset) - : gpu_addr{gpu_addr}, program_code{std::move(program_code)}, - registry{stage, GetEngine(system, stage)}, shader_ir{this->program_code, main_offset, - compiler_settings, registry}, - entries{GenerateShaderEntries(shader_ir)} {} +Shader::Shader(Tegra::Engines::ConstBufferEngineInterface& engine, Tegra::Engines::ShaderType stage, + GPUVAddr gpu_addr_, VAddr cpu_addr, VideoCommon::Shader::ProgramCode program_code_, + u32 main_offset) + : gpu_addr(gpu_addr_), program_code(std::move(program_code_)), registry(stage, engine), + shader_ir(program_code, main_offset, compiler_settings, registry), + entries(GenerateShaderEntries(shader_ir)) {} Shader::~Shader() = default; -Tegra::Engines::ConstBufferEngineInterface& Shader::GetEngine(Core::System& system, - Tegra::Engines::ShaderType stage) { - if (stage == ShaderType::Compute) { - return system.GPU().KeplerCompute(); - } else { - return system.GPU().Maxwell3D(); - } -} - -VKPipelineCache::VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer, - const VKDevice& device, VKScheduler& scheduler, - VKDescriptorPool& descriptor_pool, - VKUpdateDescriptorQueue& update_descriptor_queue, - VKRenderPassCache& renderpass_cache) - : VideoCommon::ShaderCache<Shader>{rasterizer}, system{system}, device{device}, - scheduler{scheduler}, descriptor_pool{descriptor_pool}, - update_descriptor_queue{update_descriptor_queue}, renderpass_cache{renderpass_cache} {} +VKPipelineCache::VKPipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu_, + Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::Engines::KeplerCompute& kepler_compute_, + Tegra::MemoryManager& gpu_memory_, const VKDevice& device_, + VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, + VKUpdateDescriptorQueue& update_descriptor_queue_, + VKRenderPassCache& renderpass_cache_) + : VideoCommon::ShaderCache<Shader>{rasterizer}, gpu{gpu_}, maxwell3d{maxwell3d_}, + kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, + scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, + update_descriptor_queue{update_descriptor_queue_}, renderpass_cache{renderpass_cache_} {} VKPipelineCache::~VKPipelineCache() = default; std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() { - const auto& gpu = system.GPU().Maxwell3D(); - std::array<Shader*, Maxwell::MaxShaderProgram> shaders{}; + for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { const auto program{static_cast<Maxwell::ShaderProgram>(index)}; // Skip stages that are not enabled - if (!gpu.regs.IsShaderConfigEnabled(index)) { + if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { continue; } - auto& memory_manager{system.GPU().MemoryManager()}; - const GPUVAddr program_addr{GetShaderAddress(system, program)}; - const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr); + const GPUVAddr gpu_addr{GetShaderAddress(maxwell3d, program)}; + const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); ASSERT(cpu_addr); Shader* result = cpu_addr ? TryGet(*cpu_addr) : null_shader.get(); if (!result) { - const auto host_ptr{memory_manager.GetPointer(program_addr)}; + const u8* const host_ptr{gpu_memory.GetPointer(gpu_addr)}; // No shader found - create a new one - constexpr u32 stage_offset = STAGE_MAIN_OFFSET; + static constexpr u32 stage_offset = STAGE_MAIN_OFFSET; const auto stage = static_cast<ShaderType>(index == 0 ? 0 : index - 1); - ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, false); + ProgramCode code = GetShaderCode(gpu_memory, gpu_addr, host_ptr, false); const std::size_t size_in_bytes = code.size() * sizeof(u64); - auto shader = std::make_unique<Shader>(system, stage, program_addr, std::move(code), - stage_offset); + auto shader = std::make_unique<Shader>(maxwell3d, stage, gpu_addr, *cpu_addr, + std::move(code), stage_offset); result = shader.get(); if (cpu_addr) { @@ -204,24 +198,43 @@ std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() { return last_shaders = shaders; } -VKGraphicsPipeline& VKPipelineCache::GetGraphicsPipeline(const GraphicsPipelineCacheKey& key) { +VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline( + const GraphicsPipelineCacheKey& key, VideoCommon::Shader::AsyncShaders& async_shaders) { MICROPROFILE_SCOPE(Vulkan_PipelineCache); if (last_graphics_pipeline && last_graphics_key == key) { - return *last_graphics_pipeline; + return last_graphics_pipeline; } last_graphics_key = key; + if (device.UseAsynchronousShaders() && async_shaders.IsShaderAsync(gpu)) { + std::unique_lock lock{pipeline_cache}; + const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key); + if (is_cache_miss) { + gpu.ShaderNotify().MarkSharderBuilding(); + LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); + const auto [program, bindings] = DecompileShaders(key.fixed_state); + async_shaders.QueueVulkanShader(this, device, scheduler, descriptor_pool, + update_descriptor_queue, renderpass_cache, bindings, + program, key); + } + last_graphics_pipeline = pair->second.get(); + return last_graphics_pipeline; + } + const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key); auto& entry = pair->second; if (is_cache_miss) { + gpu.ShaderNotify().MarkSharderBuilding(); LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); - const auto [program, bindings] = DecompileShaders(key); + const auto [program, bindings] = DecompileShaders(key.fixed_state); entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool, update_descriptor_queue, renderpass_cache, key, bindings, program); + gpu.ShaderNotify().MarkShaderComplete(); } - return *(last_graphics_pipeline = entry.get()); + last_graphics_pipeline = entry.get(); + return last_graphics_pipeline; } VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) { @@ -234,22 +247,21 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach } LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); - auto& memory_manager = system.GPU().MemoryManager(); - const auto program_addr = key.shader; + const GPUVAddr gpu_addr = key.shader; - const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr); + const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); ASSERT(cpu_addr); Shader* shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get(); if (!shader) { // No shader found - create a new one - const auto host_ptr = memory_manager.GetPointer(program_addr); + const auto host_ptr = gpu_memory.GetPointer(gpu_addr); - ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, true); + ProgramCode code = GetShaderCode(gpu_memory, gpu_addr, host_ptr, true); const std::size_t size_in_bytes = code.size() * sizeof(u64); - auto shader_info = std::make_unique<Shader>(system, ShaderType::Compute, program_addr, - std::move(code), KERNEL_MAIN_OFFSET); + auto shader_info = std::make_unique<Shader>(kepler_compute, ShaderType::Compute, gpu_addr, + *cpu_addr, std::move(code), KERNEL_MAIN_OFFSET); shader = shader_info.get(); if (cpu_addr) { @@ -259,10 +271,15 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach } } - Specialization specialization; - specialization.workgroup_size = key.workgroup_size; - specialization.shared_memory_size = key.shared_memory_size; - + const Specialization specialization{ + .base_binding = 0, + .workgroup_size = key.workgroup_size, + .shared_memory_size = key.shared_memory_size, + .point_size = std::nullopt, + .enabled_attributes = {}, + .attribute_types = {}, + .ndc_minus_one_to_one = false, + }; const SPIRVShader spirv_shader{Decompile(device, shader->GetIR(), ShaderType::Compute, shader->GetRegistry(), specialization), shader->GetEntries()}; @@ -271,6 +288,12 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach return *entry; } +void VKPipelineCache::EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline) { + gpu.ShaderNotify().MarkShaderComplete(); + std::unique_lock lock{pipeline_cache}; + graphics_cache.at(pipeline->GetCacheKey()) = std::move(pipeline); +} + void VKPipelineCache::OnShaderRemoval(Shader* shader) { bool finished = false; const auto Finish = [&] { @@ -306,11 +329,7 @@ void VKPipelineCache::OnShaderRemoval(Shader* shader) { } std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> -VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { - const auto& fixed_state = key.fixed_state; - auto& memory_manager = system.GPU().MemoryManager(); - const auto& gpu = system.GPU().Maxwell3D(); - +VKPipelineCache::DecompileShaders(const FixedPipelineState& fixed_state) { Specialization specialization; if (fixed_state.dynamic_state.Topology() == Maxwell::PrimitiveTopology::Points || device.IsExtExtendedDynamicStateSupported()) { @@ -333,12 +352,12 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { const auto program_enum = static_cast<Maxwell::ShaderProgram>(index); // Skip stages that are not enabled - if (!gpu.regs.IsShaderConfigEnabled(index)) { + if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { continue; } - const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum); - const std::optional<VAddr> cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); + const GPUVAddr gpu_addr = GetShaderAddress(maxwell3d, program_enum); + const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); Shader* const shader = cpu_addr ? TryGet(*cpu_addr) : null_shader.get(); const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5 @@ -370,13 +389,14 @@ void AddEntry(std::vector<VkDescriptorUpdateTemplateEntry>& template_entries, u3 if constexpr (descriptor_type == COMBINED_IMAGE_SAMPLER) { for (u32 i = 0; i < count; ++i) { const u32 num_samplers = container[i].size; - VkDescriptorUpdateTemplateEntry& entry = template_entries.emplace_back(); - entry.dstBinding = binding; - entry.dstArrayElement = 0; - entry.descriptorCount = num_samplers; - entry.descriptorType = descriptor_type; - entry.offset = offset; - entry.stride = entry_size; + template_entries.push_back({ + .dstBinding = binding, + .dstArrayElement = 0, + .descriptorCount = num_samplers, + .descriptorType = descriptor_type, + .offset = offset, + .stride = entry_size, + }); ++binding; offset += num_samplers * entry_size; @@ -389,22 +409,24 @@ void AddEntry(std::vector<VkDescriptorUpdateTemplateEntry>& template_entries, u3 // Nvidia has a bug where updating multiple texels at once causes the driver to crash. // Note: Fixed in driver Windows 443.24, Linux 440.66.15 for (u32 i = 0; i < count; ++i) { - VkDescriptorUpdateTemplateEntry& entry = template_entries.emplace_back(); - entry.dstBinding = binding + i; - entry.dstArrayElement = 0; - entry.descriptorCount = 1; - entry.descriptorType = descriptor_type; - entry.offset = static_cast<std::size_t>(offset + i * entry_size); - entry.stride = entry_size; + template_entries.push_back({ + .dstBinding = binding + i, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = descriptor_type, + .offset = static_cast<std::size_t>(offset + i * entry_size), + .stride = entry_size, + }); } } else if (count > 0) { - VkDescriptorUpdateTemplateEntry& entry = template_entries.emplace_back(); - entry.dstBinding = binding; - entry.dstArrayElement = 0; - entry.descriptorCount = count; - entry.descriptorType = descriptor_type; - entry.offset = offset; - entry.stride = entry_size; + template_entries.push_back({ + .dstBinding = binding, + .dstArrayElement = 0, + .descriptorCount = count, + .descriptorType = descriptor_type, + .offset = offset, + .stride = entry_size, + }); } offset += count * entry_size; binding += count; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 0a3fe65fb..e558e6658 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -22,6 +22,7 @@ #include "video_core/renderer_vulkan/vk_renderpass_cache.h" #include "video_core/renderer_vulkan/vk_shader_decompiler.h" #include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/shader/async_shaders.h" #include "video_core/shader/memory_util.h" #include "video_core/shader/registry.h" #include "video_core/shader/shader_ir.h" @@ -37,34 +38,11 @@ class RasterizerVulkan; class VKComputePipeline; class VKDescriptorPool; class VKDevice; -class VKFence; class VKScheduler; class VKUpdateDescriptorQueue; using Maxwell = Tegra::Engines::Maxwell3D::Regs; -struct GraphicsPipelineCacheKey { - RenderPassParams renderpass_params; - u32 padding; - std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders; - FixedPipelineState fixed_state; - - std::size_t Hash() const noexcept; - - bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept; - - bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept { - return !operator==(rhs); - } - - std::size_t Size() const noexcept { - return sizeof(renderpass_params) + sizeof(padding) + sizeof(shaders) + fixed_state.Size(); - } -}; -static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>); -static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>); -static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>); - struct ComputePipelineCacheKey { GPUVAddr shader; u32 shared_memory_size; @@ -106,7 +84,8 @@ namespace Vulkan { class Shader { public: - explicit Shader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr, + explicit Shader(Tegra::Engines::ConstBufferEngineInterface& engine, + Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr, VAddr cpu_addr, VideoCommon::Shader::ProgramCode program_code, u32 main_offset); ~Shader(); @@ -118,22 +97,19 @@ public: return shader_ir; } - const VideoCommon::Shader::Registry& GetRegistry() const { - return registry; - } - const VideoCommon::Shader::ShaderIR& GetIR() const { return shader_ir; } + const VideoCommon::Shader::Registry& GetRegistry() const { + return registry; + } + const ShaderEntries& GetEntries() const { return entries; } private: - static Tegra::Engines::ConstBufferEngineInterface& GetEngine(Core::System& system, - Tegra::Engines::ShaderType stage); - GPUVAddr gpu_addr{}; VideoCommon::Shader::ProgramCode program_code; VideoCommon::Shader::Registry registry; @@ -143,27 +119,36 @@ private: class VKPipelineCache final : public VideoCommon::ShaderCache<Shader> { public: - explicit VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer, - const VKDevice& device, VKScheduler& scheduler, - VKDescriptorPool& descriptor_pool, + explicit VKPipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu, + Tegra::Engines::Maxwell3D& maxwell3d, + Tegra::Engines::KeplerCompute& kepler_compute, + Tegra::MemoryManager& gpu_memory, const VKDevice& device, + VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue, VKRenderPassCache& renderpass_cache); ~VKPipelineCache() override; std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders(); - VKGraphicsPipeline& GetGraphicsPipeline(const GraphicsPipelineCacheKey& key); + VKGraphicsPipeline* GetGraphicsPipeline(const GraphicsPipelineCacheKey& key, + VideoCommon::Shader::AsyncShaders& async_shaders); VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); + void EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline); + protected: void OnShaderRemoval(Shader* shader) final; private: std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders( - const GraphicsPipelineCacheKey& key); + const FixedPipelineState& fixed_state); + + Tegra::GPU& gpu; + Tegra::Engines::Maxwell3D& maxwell3d; + Tegra::Engines::KeplerCompute& kepler_compute; + Tegra::MemoryManager& gpu_memory; - Core::System& system; const VKDevice& device; VKScheduler& scheduler; VKDescriptorPool& descriptor_pool; @@ -178,6 +163,7 @@ private: GraphicsPipelineCacheKey last_graphics_key; VKGraphicsPipeline* last_graphics_pipeline = nullptr; + std::mutex pipeline_cache; std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<VKGraphicsPipeline>> graphics_cache; std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<VKComputePipeline>> compute_cache; diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index bc91c48cc..ee2d871e3 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -9,35 +9,33 @@ #include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_query_cache.h" -#include "video_core/renderer_vulkan/vk_resource_manager.h" +#include "video_core/renderer_vulkan/vk_resource_pool.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { +using VideoCore::QueryType; + namespace { constexpr std::array QUERY_TARGETS = {VK_QUERY_TYPE_OCCLUSION}; -constexpr VkQueryType GetTarget(VideoCore::QueryType type) { +constexpr VkQueryType GetTarget(QueryType type) { return QUERY_TARGETS[static_cast<std::size_t>(type)]; } } // Anonymous namespace -QueryPool::QueryPool() : VKFencedPool{GROW_STEP} {} +QueryPool::QueryPool(const VKDevice& device_, VKScheduler& scheduler, QueryType type_) + : ResourcePool{scheduler.GetMasterSemaphore(), GROW_STEP}, device{device_}, type{type_} {} QueryPool::~QueryPool() = default; -void QueryPool::Initialize(const VKDevice& device_, VideoCore::QueryType type_) { - device = &device_; - type = type_; -} - -std::pair<VkQueryPool, u32> QueryPool::Commit(VKFence& fence) { +std::pair<VkQueryPool, u32> QueryPool::Commit() { std::size_t index; do { - index = CommitResource(fence); + index = CommitResource(); } while (usage[index]); usage[index] = true; @@ -47,14 +45,14 @@ std::pair<VkQueryPool, u32> QueryPool::Commit(VKFence& fence) { void QueryPool::Allocate(std::size_t begin, std::size_t end) { usage.resize(end); - VkQueryPoolCreateInfo query_pool_ci; - query_pool_ci.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO; - query_pool_ci.pNext = nullptr; - query_pool_ci.flags = 0; - query_pool_ci.queryType = GetTarget(type); - query_pool_ci.queryCount = static_cast<u32>(end - begin); - query_pool_ci.pipelineStatistics = 0; - pools.push_back(device->GetLogical().CreateQueryPool(query_pool_ci)); + pools.push_back(device.GetLogical().CreateQueryPool({ + .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .queryType = GetTarget(type), + .queryCount = static_cast<u32>(end - begin), + .pipelineStatistics = 0, + })); } void QueryPool::Reserve(std::pair<VkQueryPool, u32> query) { @@ -68,30 +66,39 @@ void QueryPool::Reserve(std::pair<VkQueryPool, u32> query) { usage[pool_index * GROW_STEP + static_cast<std::ptrdiff_t>(query.second)] = false; } -VKQueryCache::VKQueryCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, +VKQueryCache::VKQueryCache(VideoCore::RasterizerInterface& rasterizer, + Tegra::Engines::Maxwell3D& maxwell3d, Tegra::MemoryManager& gpu_memory, const VKDevice& device, VKScheduler& scheduler) - : VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, HostCounter, - QueryPool>{system, rasterizer}, - device{device}, scheduler{scheduler} { - for (std::size_t i = 0; i < static_cast<std::size_t>(VideoCore::NumQueryTypes); ++i) { - query_pools[i].Initialize(device, static_cast<VideoCore::QueryType>(i)); + : VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, + HostCounter>{rasterizer, maxwell3d, gpu_memory}, + device{device}, scheduler{scheduler}, query_pools{ + QueryPool{device, scheduler, + QueryType::SamplesPassed}, + } {} + +VKQueryCache::~VKQueryCache() { + // TODO(Rodrigo): This is a hack to destroy all HostCounter instances before the base class + // destructor is called. The query cache should be redesigned to have a proper ownership model + // instead of using shared pointers. + for (size_t query_type = 0; query_type < VideoCore::NumQueryTypes; ++query_type) { + auto& stream = Stream(static_cast<QueryType>(query_type)); + stream.Update(false); + stream.Reset(); } } -VKQueryCache::~VKQueryCache() = default; - -std::pair<VkQueryPool, u32> VKQueryCache::AllocateQuery(VideoCore::QueryType type) { - return query_pools[static_cast<std::size_t>(type)].Commit(scheduler.GetFence()); +std::pair<VkQueryPool, u32> VKQueryCache::AllocateQuery(QueryType type) { + return query_pools[static_cast<std::size_t>(type)].Commit(); } -void VKQueryCache::Reserve(VideoCore::QueryType type, std::pair<VkQueryPool, u32> query) { +void VKQueryCache::Reserve(QueryType type, std::pair<VkQueryPool, u32> query) { query_pools[static_cast<std::size_t>(type)].Reserve(query); } HostCounter::HostCounter(VKQueryCache& cache, std::shared_ptr<HostCounter> dependency, - VideoCore::QueryType type) + QueryType type) : VideoCommon::HostCounterBase<VKQueryCache, HostCounter>{std::move(dependency)}, cache{cache}, - type{type}, query{cache.AllocateQuery(type)}, ticks{cache.Scheduler().Ticks()} { + type{type}, query{cache.AllocateQuery(type)}, tick{cache.Scheduler().CurrentTick()} { const vk::Device* logical = &cache.Device().GetLogical(); cache.Scheduler().Record([logical, query = query](vk::CommandBuffer cmdbuf) { logical->ResetQueryPoolEXT(query.first, query.second, 1); @@ -109,7 +116,7 @@ void HostCounter::EndQuery() { } u64 HostCounter::BlockingQuery() const { - if (ticks >= cache.Scheduler().Ticks()) { + if (tick >= cache.Scheduler().CurrentTick()) { cache.Scheduler().Flush(); } u64 data; diff --git a/src/video_core/renderer_vulkan/vk_query_cache.h b/src/video_core/renderer_vulkan/vk_query_cache.h index 40119e6d3..2e57fb75d 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.h +++ b/src/video_core/renderer_vulkan/vk_query_cache.h @@ -11,7 +11,7 @@ #include "common/common_types.h" #include "video_core/query_cache.h" -#include "video_core/renderer_vulkan/vk_resource_manager.h" +#include "video_core/renderer_vulkan/vk_resource_pool.h" #include "video_core/renderer_vulkan/wrapper.h" namespace VideoCore { @@ -28,14 +28,12 @@ class VKScheduler; using CounterStream = VideoCommon::CounterStreamBase<VKQueryCache, HostCounter>; -class QueryPool final : public VKFencedPool { +class QueryPool final : public ResourcePool { public: - explicit QueryPool(); + explicit QueryPool(const VKDevice& device, VKScheduler& scheduler, VideoCore::QueryType type); ~QueryPool() override; - void Initialize(const VKDevice& device, VideoCore::QueryType type); - - std::pair<VkQueryPool, u32> Commit(VKFence& fence); + std::pair<VkQueryPool, u32> Commit(); void Reserve(std::pair<VkQueryPool, u32> query); @@ -45,18 +43,18 @@ protected: private: static constexpr std::size_t GROW_STEP = 512; - const VKDevice* device = nullptr; - VideoCore::QueryType type = {}; + const VKDevice& device; + const VideoCore::QueryType type; std::vector<vk::QueryPool> pools; std::vector<bool> usage; }; class VKQueryCache final - : public VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, HostCounter, - QueryPool> { + : public VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, HostCounter> { public: - explicit VKQueryCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, + explicit VKQueryCache(VideoCore::RasterizerInterface& rasterizer, + Tegra::Engines::Maxwell3D& maxwell3d, Tegra::MemoryManager& gpu_memory, const VKDevice& device, VKScheduler& scheduler); ~VKQueryCache(); @@ -75,6 +73,7 @@ public: private: const VKDevice& device; VKScheduler& scheduler; + std::array<QueryPool, VideoCore::NumQueryTypes> query_pools; }; class HostCounter final : public VideoCommon::HostCounterBase<VKQueryCache, HostCounter> { @@ -91,7 +90,7 @@ private: VKQueryCache& cache; const VideoCore::QueryType type; const std::pair<VkQueryPool, u32> query; - const u64 ticks; + const u64 tick; }; class CachedQuery : public VideoCommon::CachedQueryBase<HostCounter> { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 7625871c2..f3c2483c8 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -14,6 +14,7 @@ #include "common/assert.h" #include "common/logging/log.h" #include "common/microprofile.h" +#include "common/scope_exit.h" #include "core/core.h" #include "core/settings.h" #include "video_core/engines/kepler_compute.h" @@ -30,7 +31,6 @@ #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/renderer_vulkan/vk_renderpass_cache.h" -#include "video_core/renderer_vulkan/vk_resource_manager.h" #include "video_core/renderer_vulkan/vk_sampler_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" @@ -64,20 +64,22 @@ VkViewport GetViewportState(const VKDevice& device, const Maxwell& regs, std::si const auto& src = regs.viewport_transform[index]; const float width = src.scale_x * 2.0f; const float height = src.scale_y * 2.0f; + const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1.0f : 0.0f; - VkViewport viewport; - viewport.x = src.translate_x - src.scale_x; - viewport.y = src.translate_y - src.scale_y; - viewport.width = width != 0.0f ? width : 1.0f; - viewport.height = height != 0.0f ? height : 1.0f; + VkViewport viewport{ + .x = src.translate_x - src.scale_x, + .y = src.translate_y - src.scale_y, + .width = width != 0.0f ? width : 1.0f, + .height = height != 0.0f ? height : 1.0f, + .minDepth = src.translate_z - src.scale_z * reduce_z, + .maxDepth = src.translate_z + src.scale_z, + }; - const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1.0f : 0.0f; - viewport.minDepth = src.translate_z - src.scale_z * reduce_z; - viewport.maxDepth = src.translate_z + src.scale_z; if (!device.IsExtDepthRangeUnrestrictedSupported()) { viewport.minDepth = std::clamp(viewport.minDepth, 0.0f, 1.0f); viewport.maxDepth = std::clamp(viewport.maxDepth, 0.0f, 1.0f); } + return viewport; } @@ -378,28 +380,32 @@ void RasterizerVulkan::DrawParameters::Draw(vk::CommandBuffer cmdbuf) const { } } -RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& renderer, - VKScreenInfo& screen_info, const VKDevice& device, - VKResourceManager& resource_manager, - VKMemoryManager& memory_manager, StateTracker& state_tracker, - VKScheduler& scheduler) - : RasterizerAccelerated{system.Memory()}, system{system}, render_window{renderer}, - screen_info{screen_info}, device{device}, resource_manager{resource_manager}, - memory_manager{memory_manager}, state_tracker{state_tracker}, scheduler{scheduler}, - staging_pool(device, memory_manager, scheduler), descriptor_pool(device), - update_descriptor_queue(device, scheduler), renderpass_cache(device), +RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu_, + Tegra::MemoryManager& gpu_memory_, + Core::Memory::Memory& cpu_memory, VKScreenInfo& screen_info_, + const VKDevice& device_, VKMemoryManager& memory_manager_, + StateTracker& state_tracker_, VKScheduler& scheduler_) + : RasterizerAccelerated(cpu_memory), gpu(gpu_), gpu_memory(gpu_memory_), + maxwell3d(gpu.Maxwell3D()), kepler_compute(gpu.KeplerCompute()), screen_info(screen_info_), + device(device_), memory_manager(memory_manager_), state_tracker(state_tracker_), + scheduler(scheduler_), staging_pool(device, memory_manager, scheduler), + descriptor_pool(device, scheduler_), update_descriptor_queue(device, scheduler), + renderpass_cache(device), quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), quad_indexed_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), - texture_cache(system, *this, device, resource_manager, memory_manager, scheduler, - staging_pool), - pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue, - renderpass_cache), - buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool), - sampler_cache(device), - fence_manager(system, *this, device, scheduler, texture_cache, buffer_cache, query_cache), - query_cache(system, *this, device, scheduler), wfi_event{device.GetLogical().CreateEvent()} { + texture_cache(*this, maxwell3d, gpu_memory, device, memory_manager, scheduler, staging_pool), + pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler, + descriptor_pool, update_descriptor_queue, renderpass_cache), + buffer_cache(*this, gpu_memory, cpu_memory, device, memory_manager, scheduler, staging_pool), + sampler_cache(device), query_cache(*this, maxwell3d, gpu_memory, device, scheduler), + fence_manager(*this, gpu, gpu_memory, texture_cache, buffer_cache, query_cache, device, + scheduler), + wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window) { scheduler.SetQueryCache(query_cache); + if (device.UseAsynchronousShaders()) { + async_shaders.AllocateWorkers(); + } } RasterizerVulkan::~RasterizerVulkan() = default; @@ -407,13 +413,13 @@ RasterizerVulkan::~RasterizerVulkan() = default; void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { MICROPROFILE_SCOPE(Vulkan_Drawing); + SCOPE_EXIT({ gpu.TickWork(); }); FlushWork(); query_cache.UpdateCounters(); - const auto& gpu = system.GPU().Maxwell3D(); GraphicsPipelineCacheKey key; - key.fixed_state.Fill(gpu.regs, device.IsExtExtendedDynamicStateSupported()); + key.fixed_state.Fill(maxwell3d.regs, device.IsExtExtendedDynamicStateSupported()); buffer_cache.Map(CalculateGraphicsStreamBufferSize(is_indexed)); @@ -437,10 +443,15 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { key.renderpass_params = GetRenderPassParams(texceptions); key.padding = 0; - auto& pipeline = pipeline_cache.GetGraphicsPipeline(key); - scheduler.BindGraphicsPipeline(pipeline.GetHandle()); + auto* pipeline = pipeline_cache.GetGraphicsPipeline(key, async_shaders); + if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) { + // Async graphics pipeline was not ready. + return; + } + + scheduler.BindGraphicsPipeline(pipeline->GetHandle()); - const auto renderpass = pipeline.GetRenderPass(); + const auto renderpass = pipeline->GetRenderPass(); const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass); scheduler.RequestRenderpass(renderpass, framebuffer, render_area); @@ -450,8 +461,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { BeginTransformFeedback(); - const auto pipeline_layout = pipeline.GetLayout(); - const auto descriptor_set = pipeline.CommitDescriptorSet(); + const auto pipeline_layout = pipeline->GetLayout(); + const auto descriptor_set = pipeline->CommitDescriptorSet(); scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) { if (descriptor_set) { cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, @@ -461,15 +472,12 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { }); EndTransformFeedback(); - - system.GPU().TickWork(); } void RasterizerVulkan::Clear() { MICROPROFILE_SCOPE(Vulkan_Clearing); - const auto& gpu = system.GPU().Maxwell3D(); - if (!system.GPU().Maxwell3D().ShouldExecute()) { + if (!maxwell3d.ShouldExecute()) { return; } @@ -478,7 +486,7 @@ void RasterizerVulkan::Clear() { query_cache.UpdateCounters(); - const auto& regs = gpu.regs; + const auto& regs = maxwell3d.regs; const bool use_color = regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B || regs.clear_buffers.A; const bool use_depth = regs.clear_buffers.Z; @@ -508,10 +516,11 @@ void RasterizerVulkan::Clear() { const u32 color_attachment = regs.clear_buffers.RT; scheduler.Record([color_attachment, clear_value, clear_rect](vk::CommandBuffer cmdbuf) { - VkClearAttachment attachment; - attachment.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - attachment.colorAttachment = color_attachment; - attachment.clearValue = clear_value; + const VkClearAttachment attachment{ + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .colorAttachment = color_attachment, + .clearValue = clear_value, + }; cmdbuf.ClearAttachments(attachment, clear_rect); }); } @@ -529,10 +538,6 @@ void RasterizerVulkan::Clear() { scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil, clear_rect, aspect_flags](vk::CommandBuffer cmdbuf) { - VkClearValue clear_value; - clear_value.depthStencil.depth = clear_depth; - clear_value.depthStencil.stencil = clear_stencil; - VkClearAttachment attachment; attachment.aspectMask = aspect_flags; attachment.colorAttachment = 0; @@ -550,14 +555,17 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { query_cache.UpdateCounters(); - const auto& launch_desc = system.GPU().KeplerCompute().launch_description; - ComputePipelineCacheKey key; - key.shader = code_addr; - key.shared_memory_size = launch_desc.shared_alloc; - key.workgroup_size = {launch_desc.block_dim_x, launch_desc.block_dim_y, - launch_desc.block_dim_z}; - - auto& pipeline = pipeline_cache.GetComputePipeline(key); + const auto& launch_desc = kepler_compute.launch_description; + auto& pipeline = pipeline_cache.GetComputePipeline({ + .shader = code_addr, + .shared_memory_size = launch_desc.shared_alloc, + .workgroup_size = + { + launch_desc.block_dim_x, + launch_desc.block_dim_y, + launch_desc.block_dim_z, + }, + }); // Compute dispatches can't be executed inside a renderpass scheduler.RequestOutsideRenderPassOperationContext(); @@ -643,16 +651,14 @@ void RasterizerVulkan::SyncGuestHost() { } void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) { - auto& gpu{system.GPU()}; if (!gpu.IsAsync()) { - gpu.MemoryManager().Write<u32>(addr, value); + gpu_memory.Write<u32>(addr, value); return; } fence_manager.SignalSemaphore(addr, value); } void RasterizerVulkan::SignalSyncPoint(u32 value) { - auto& gpu{system.GPU()}; if (!gpu.IsAsync()) { gpu.IncrementSyncPoint(value); return; @@ -661,7 +667,6 @@ void RasterizerVulkan::SignalSyncPoint(u32 value) { } void RasterizerVulkan::ReleaseFences() { - auto& gpu{system.GPU()}; if (!gpu.IsAsync()) { return; } @@ -739,10 +744,6 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, return true; } -void RasterizerVulkan::SetupDirtyFlags() { - state_tracker.Initialize(); -} - void RasterizerVulkan::FlushWork() { static constexpr u32 DRAWS_TO_DISPATCH = 4096; @@ -766,10 +767,9 @@ void RasterizerVulkan::FlushWork() { RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments(bool is_clear) { MICROPROFILE_SCOPE(Vulkan_RenderTargets); - auto& maxwell3d = system.GPU().Maxwell3D(); - auto& dirty = maxwell3d.dirty.flags; - auto& regs = maxwell3d.regs; + const auto& regs = maxwell3d.regs; + auto& dirty = maxwell3d.dirty.flags; const bool update_rendertargets = dirty[VideoCommon::Dirty::RenderTargets]; dirty[VideoCommon::Dirty::RenderTargets] = false; @@ -813,8 +813,13 @@ bool RasterizerVulkan::WalkAttachmentOverlaps(const CachedSurfaceView& attachmen std::tuple<VkFramebuffer, VkExtent2D> RasterizerVulkan::ConfigureFramebuffers( VkRenderPass renderpass) { - FramebufferCacheKey key{renderpass, std::numeric_limits<u32>::max(), - std::numeric_limits<u32>::max(), std::numeric_limits<u32>::max()}; + FramebufferCacheKey key{ + .renderpass = renderpass, + .width = std::numeric_limits<u32>::max(), + .height = std::numeric_limits<u32>::max(), + .layers = std::numeric_limits<u32>::max(), + .views = {}, + }; const auto try_push = [&key](const View& view) { if (!view) { @@ -827,7 +832,7 @@ std::tuple<VkFramebuffer, VkExtent2D> RasterizerVulkan::ConfigureFramebuffers( return true; }; - const auto& regs = system.GPU().Maxwell3D().regs; + const auto& regs = maxwell3d.regs; const std::size_t num_attachments = static_cast<std::size_t>(regs.rt_control.count); for (std::size_t index = 0; index < num_attachments; ++index) { if (try_push(color_attachments[index])) { @@ -841,17 +846,17 @@ std::tuple<VkFramebuffer, VkExtent2D> RasterizerVulkan::ConfigureFramebuffers( const auto [fbentry, is_cache_miss] = framebuffer_cache.try_emplace(key); auto& framebuffer = fbentry->second; if (is_cache_miss) { - VkFramebufferCreateInfo framebuffer_ci; - framebuffer_ci.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; - framebuffer_ci.pNext = nullptr; - framebuffer_ci.flags = 0; - framebuffer_ci.renderPass = key.renderpass; - framebuffer_ci.attachmentCount = static_cast<u32>(key.views.size()); - framebuffer_ci.pAttachments = key.views.data(); - framebuffer_ci.width = key.width; - framebuffer_ci.height = key.height; - framebuffer_ci.layers = key.layers; - framebuffer = device.GetLogical().CreateFramebuffer(framebuffer_ci); + framebuffer = device.GetLogical().CreateFramebuffer({ + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .renderPass = key.renderpass, + .attachmentCount = static_cast<u32>(key.views.size()), + .pAttachments = key.views.data(), + .width = key.width, + .height = key.height, + .layers = key.layers, + }); } return {*framebuffer, VkExtent2D{key.width, key.height}}; @@ -863,13 +868,12 @@ RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineSt bool is_instanced) { MICROPROFILE_SCOPE(Vulkan_Geometry); - const auto& gpu = system.GPU().Maxwell3D(); - const auto& regs = gpu.regs; + const auto& regs = maxwell3d.regs; SetupVertexArrays(buffer_bindings); const u32 base_instance = regs.vb_base_instance; - const u32 num_instances = is_instanced ? gpu.mme_draw.instance_count : 1; + const u32 num_instances = is_instanced ? maxwell3d.mme_draw.instance_count : 1; const u32 base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first; const u32 num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count; @@ -930,7 +934,7 @@ void RasterizerVulkan::SetupImageTransitions( } void RasterizerVulkan::UpdateDynamicStates() { - auto& regs = system.GPU().Maxwell3D().regs; + auto& regs = maxwell3d.regs; UpdateViewportsState(regs); UpdateScissorsState(regs); UpdateDepthBias(regs); @@ -951,7 +955,7 @@ void RasterizerVulkan::UpdateDynamicStates() { } void RasterizerVulkan::BeginTransformFeedback() { - const auto& regs = system.GPU().Maxwell3D().regs; + const auto& regs = maxwell3d.regs; if (regs.tfb_enabled == 0) { return; } @@ -983,7 +987,7 @@ void RasterizerVulkan::BeginTransformFeedback() { } void RasterizerVulkan::EndTransformFeedback() { - const auto& regs = system.GPU().Maxwell3D().regs; + const auto& regs = maxwell3d.regs; if (regs.tfb_enabled == 0) { return; } @@ -996,7 +1000,7 @@ void RasterizerVulkan::EndTransformFeedback() { } void RasterizerVulkan::SetupVertexArrays(BufferBindings& buffer_bindings) { - const auto& regs = system.GPU().Maxwell3D().regs; + const auto& regs = maxwell3d.regs; for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { const auto& vertex_array = regs.vertex_array[index]; @@ -1022,7 +1026,7 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar if (params.num_vertices == 0) { return; } - const auto& regs = system.GPU().Maxwell3D().regs; + const auto& regs = maxwell3d.regs; switch (regs.draw.topology) { case Maxwell::PrimitiveTopology::Quads: { if (!params.is_indexed) { @@ -1070,8 +1074,7 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, std::size_t stage) { MICROPROFILE_SCOPE(Vulkan_ConstBuffers); - const auto& gpu = system.GPU().Maxwell3D(); - const auto& shader_stage = gpu.state.shader_stages[stage]; + const auto& shader_stage = maxwell3d.state.shader_stages[stage]; for (const auto& entry : entries.const_buffers) { SetupConstBuffer(entry, shader_stage.const_buffers[entry.GetIndex()]); } @@ -1079,8 +1082,7 @@ void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, s void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage) { MICROPROFILE_SCOPE(Vulkan_GlobalBuffers); - auto& gpu{system.GPU()}; - const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage]}; + const auto& cbufs{maxwell3d.state.shader_stages[stage]}; for (const auto& entry : entries.global_buffers) { const auto addr = cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset(); @@ -1090,19 +1092,17 @@ void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries, void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage) { MICROPROFILE_SCOPE(Vulkan_Textures); - const auto& gpu = system.GPU().Maxwell3D(); for (const auto& entry : entries.uniform_texels) { - const auto image = GetTextureInfo(gpu, entry, stage).tic; + const auto image = GetTextureInfo(maxwell3d, entry, stage).tic; SetupUniformTexels(image, entry); } } void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage) { MICROPROFILE_SCOPE(Vulkan_Textures); - const auto& gpu = system.GPU().Maxwell3D(); for (const auto& entry : entries.samplers) { for (std::size_t i = 0; i < entry.size; ++i) { - const auto texture = GetTextureInfo(gpu, entry, stage, i); + const auto texture = GetTextureInfo(maxwell3d, entry, stage, i); SetupTexture(texture, entry); } } @@ -1110,25 +1110,23 @@ void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, std:: void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage) { MICROPROFILE_SCOPE(Vulkan_Textures); - const auto& gpu = system.GPU().Maxwell3D(); for (const auto& entry : entries.storage_texels) { - const auto image = GetTextureInfo(gpu, entry, stage).tic; + const auto image = GetTextureInfo(maxwell3d, entry, stage).tic; SetupStorageTexel(image, entry); } } void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage) { MICROPROFILE_SCOPE(Vulkan_Images); - const auto& gpu = system.GPU().Maxwell3D(); for (const auto& entry : entries.images) { - const auto tic = GetTextureInfo(gpu, entry, stage).tic; + const auto tic = GetTextureInfo(maxwell3d, entry, stage).tic; SetupImage(tic, entry); } } void RasterizerVulkan::SetupComputeConstBuffers(const ShaderEntries& entries) { MICROPROFILE_SCOPE(Vulkan_ConstBuffers); - const auto& launch_desc = system.GPU().KeplerCompute().launch_description; + const auto& launch_desc = kepler_compute.launch_description; for (const auto& entry : entries.const_buffers) { const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value(); @@ -1142,7 +1140,7 @@ void RasterizerVulkan::SetupComputeConstBuffers(const ShaderEntries& entries) { void RasterizerVulkan::SetupComputeGlobalBuffers(const ShaderEntries& entries) { MICROPROFILE_SCOPE(Vulkan_GlobalBuffers); - const auto cbufs{system.GPU().KeplerCompute().launch_description.const_buffer_config}; + const auto& cbufs{kepler_compute.launch_description.const_buffer_config}; for (const auto& entry : entries.global_buffers) { const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()}; SetupGlobalBuffer(entry, addr); @@ -1151,19 +1149,17 @@ void RasterizerVulkan::SetupComputeGlobalBuffers(const ShaderEntries& entries) { void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) { MICROPROFILE_SCOPE(Vulkan_Textures); - const auto& gpu = system.GPU().KeplerCompute(); for (const auto& entry : entries.uniform_texels) { - const auto image = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic; + const auto image = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex).tic; SetupUniformTexels(image, entry); } } void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) { MICROPROFILE_SCOPE(Vulkan_Textures); - const auto& gpu = system.GPU().KeplerCompute(); for (const auto& entry : entries.samplers) { for (std::size_t i = 0; i < entry.size; ++i) { - const auto texture = GetTextureInfo(gpu, entry, ComputeShaderIndex, i); + const auto texture = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex, i); SetupTexture(texture, entry); } } @@ -1171,18 +1167,16 @@ void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) { void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) { MICROPROFILE_SCOPE(Vulkan_Textures); - const auto& gpu = system.GPU().KeplerCompute(); for (const auto& entry : entries.storage_texels) { - const auto image = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic; + const auto image = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex).tic; SetupStorageTexel(image, entry); } } void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) { MICROPROFILE_SCOPE(Vulkan_Images); - const auto& gpu = system.GPU().KeplerCompute(); for (const auto& entry : entries.images) { - const auto tic = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic; + const auto tic = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex).tic; SetupImage(tic, entry); } } @@ -1206,9 +1200,8 @@ void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry, } void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address) { - auto& memory_manager{system.GPU().MemoryManager()}; - const auto actual_addr = memory_manager.Read<u64>(address); - const auto size = memory_manager.Read<u32>(address + 8); + const u64 actual_addr = gpu_memory.Read<u64>(address); + const u32 size = gpu_memory.Read<u32>(address + 8); if (size == 0) { // Sometimes global memory pointers don't have a proper size. Upload a dummy entry @@ -1426,10 +1419,10 @@ void RasterizerVulkan::UpdateFrontFace(Tegra::Engines::Maxwell3D::Regs& regs) { } void RasterizerVulkan::UpdatePrimitiveTopology(Tegra::Engines::Maxwell3D::Regs& regs) { - if (!state_tracker.TouchPrimitiveTopology()) { + const Maxwell::PrimitiveTopology primitive_topology = regs.draw.topology.Value(); + if (!state_tracker.ChangePrimitiveTopology(primitive_topology)) { return; } - const Maxwell::PrimitiveTopology primitive_topology = regs.draw.topology.Value(); scheduler.Record([this, primitive_topology](vk::CommandBuffer cmdbuf) { cmdbuf.SetPrimitiveTopologyEXT(MaxwellToVK::PrimitiveTopology(device, primitive_topology)); }); @@ -1491,7 +1484,7 @@ std::size_t RasterizerVulkan::CalculateComputeStreamBufferSize() const { } std::size_t RasterizerVulkan::CalculateVertexArraysSize() const { - const auto& regs = system.GPU().Maxwell3D().regs; + const auto& regs = maxwell3d.regs; std::size_t size = 0; for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { @@ -1506,9 +1499,8 @@ std::size_t RasterizerVulkan::CalculateVertexArraysSize() const { } std::size_t RasterizerVulkan::CalculateIndexBufferSize() const { - const auto& regs = system.GPU().Maxwell3D().regs; - return static_cast<std::size_t>(regs.index_array.count) * - static_cast<std::size_t>(regs.index_array.FormatSizeInBytes()); + return static_cast<std::size_t>(maxwell3d.regs.index_array.count) * + static_cast<std::size_t>(maxwell3d.regs.index_array.FormatSizeInBytes()); } std::size_t RasterizerVulkan::CalculateConstBufferSize( @@ -1523,7 +1515,7 @@ std::size_t RasterizerVulkan::CalculateConstBufferSize( } RenderPassParams RasterizerVulkan::GetRenderPassParams(Texceptions texceptions) const { - const auto& regs = system.GPU().Maxwell3D().regs; + const auto& regs = maxwell3d.regs; const std::size_t num_attachments = static_cast<std::size_t>(regs.rt_control.count); RenderPassParams params; @@ -1553,17 +1545,17 @@ VkBuffer RasterizerVulkan::DefaultBuffer() { return *default_buffer; } - VkBufferCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.size = DEFAULT_BUFFER_SIZE; - ci.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | - VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT; - ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - ci.queueFamilyIndexCount = 0; - ci.pQueueFamilyIndices = nullptr; - default_buffer = device.GetLogical().CreateBuffer(ci); + default_buffer = device.GetLogical().CreateBuffer({ + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = DEFAULT_BUFFER_SIZE, + .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + }); default_buffer_commit = memory_manager.Commit(default_buffer, false); scheduler.RequestOutsideRenderPassOperationContext(); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 923178b0b..b47c8fc13 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -25,13 +25,13 @@ #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_query_cache.h" #include "video_core/renderer_vulkan/vk_renderpass_cache.h" -#include "video_core/renderer_vulkan/vk_resource_manager.h" #include "video_core/renderer_vulkan/vk_sampler_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/shader/async_shaders.h" namespace Core { class System; @@ -105,10 +105,11 @@ struct ImageView { class RasterizerVulkan final : public VideoCore::RasterizerAccelerated { public: - explicit RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& render_window, + explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu, + Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory, VKScreenInfo& screen_info, const VKDevice& device, - VKResourceManager& resource_manager, VKMemoryManager& memory_manager, - StateTracker& state_tracker, VKScheduler& scheduler); + VKMemoryManager& memory_manager, StateTracker& state_tracker, + VKScheduler& scheduler); ~RasterizerVulkan() override; void Draw(bool is_indexed, bool is_instanced) override; @@ -134,7 +135,14 @@ public: const Tegra::Engines::Fermi2D::Config& copy_config) override; bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, u32 pixel_stride) override; - void SetupDirtyFlags() override; + + VideoCommon::Shader::AsyncShaders& GetAsyncShaders() { + return async_shaders; + } + + const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const { + return async_shaders; + } /// Maximum supported size that a constbuffer can have in bytes. static constexpr std::size_t MaxConstbufferSize = 0x10000; @@ -270,11 +278,13 @@ private: VkBuffer DefaultBuffer(); - Core::System& system; - Core::Frontend::EmuWindow& render_window; + Tegra::GPU& gpu; + Tegra::MemoryManager& gpu_memory; + Tegra::Engines::Maxwell3D& maxwell3d; + Tegra::Engines::KeplerCompute& kepler_compute; + VKScreenInfo& screen_info; const VKDevice& device; - VKResourceManager& resource_manager; VKMemoryManager& memory_manager; StateTracker& state_tracker; VKScheduler& scheduler; @@ -291,12 +301,13 @@ private: VKPipelineCache pipeline_cache; VKBufferCache buffer_cache; VKSamplerCache sampler_cache; - VKFenceManager fence_manager; VKQueryCache query_cache; + VKFenceManager fence_manager; vk::Buffer default_buffer; VKMemoryCommit default_buffer_commit; vk::Event wfi_event; + VideoCommon::Shader::AsyncShaders async_shaders; std::array<View, Maxwell::NumRenderTargets> color_attachments; View zeta_attachment; diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp index 3f71d005e..80284cf92 100644 --- a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp @@ -39,10 +39,14 @@ VkRenderPass VKRenderPassCache::GetRenderPass(const RenderPassParams& params) { vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& params) const { using namespace VideoCore::Surface; + const std::size_t num_attachments = static_cast<std::size_t>(params.num_color_attachments); + std::vector<VkAttachmentDescription> descriptors; + descriptors.reserve(num_attachments); + std::vector<VkAttachmentReference> color_references; + color_references.reserve(num_attachments); - const std::size_t num_attachments = static_cast<std::size_t>(params.num_color_attachments); for (std::size_t rt = 0; rt < num_attachments; ++rt) { const auto guest_format = static_cast<Tegra::RenderTargetFormat>(params.color_formats[rt]); const PixelFormat pixel_format = PixelFormatFromRenderTargetFormat(guest_format); @@ -54,20 +58,22 @@ vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& param const VkImageLayout color_layout = ((params.texceptions >> rt) & 1) != 0 ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - VkAttachmentDescription& descriptor = descriptors.emplace_back(); - descriptor.flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT; - descriptor.format = format.format; - descriptor.samples = VK_SAMPLE_COUNT_1_BIT; - descriptor.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; - descriptor.storeOp = VK_ATTACHMENT_STORE_OP_STORE; - descriptor.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; - descriptor.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; - descriptor.initialLayout = color_layout; - descriptor.finalLayout = color_layout; - - VkAttachmentReference& reference = color_references.emplace_back(); - reference.attachment = static_cast<u32>(rt); - reference.layout = color_layout; + descriptors.push_back({ + .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT, + .format = format.format, + .samples = VK_SAMPLE_COUNT_1_BIT, + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE, + .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE, + .initialLayout = color_layout, + .finalLayout = color_layout, + }); + + color_references.push_back({ + .attachment = static_cast<u32>(rt), + .layout = color_layout, + }); } VkAttachmentReference zeta_attachment_ref; @@ -82,32 +88,36 @@ vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& param const VkImageLayout zeta_layout = params.zeta_texception != 0 ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; - VkAttachmentDescription& descriptor = descriptors.emplace_back(); - descriptor.flags = 0; - descriptor.format = format.format; - descriptor.samples = VK_SAMPLE_COUNT_1_BIT; - descriptor.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; - descriptor.storeOp = VK_ATTACHMENT_STORE_OP_STORE; - descriptor.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD; - descriptor.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE; - descriptor.initialLayout = zeta_layout; - descriptor.finalLayout = zeta_layout; - - zeta_attachment_ref.attachment = static_cast<u32>(num_attachments); - zeta_attachment_ref.layout = zeta_layout; + descriptors.push_back({ + .flags = 0, + .format = format.format, + .samples = VK_SAMPLE_COUNT_1_BIT, + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE, + .initialLayout = zeta_layout, + .finalLayout = zeta_layout, + }); + + zeta_attachment_ref = { + .attachment = static_cast<u32>(num_attachments), + .layout = zeta_layout, + }; } - VkSubpassDescription subpass_description; - subpass_description.flags = 0; - subpass_description.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; - subpass_description.inputAttachmentCount = 0; - subpass_description.pInputAttachments = nullptr; - subpass_description.colorAttachmentCount = static_cast<u32>(color_references.size()); - subpass_description.pColorAttachments = color_references.data(); - subpass_description.pResolveAttachments = nullptr; - subpass_description.pDepthStencilAttachment = has_zeta ? &zeta_attachment_ref : nullptr; - subpass_description.preserveAttachmentCount = 0; - subpass_description.pPreserveAttachments = nullptr; + const VkSubpassDescription subpass_description{ + .flags = 0, + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .inputAttachmentCount = 0, + .pInputAttachments = nullptr, + .colorAttachmentCount = static_cast<u32>(color_references.size()), + .pColorAttachments = color_references.data(), + .pResolveAttachments = nullptr, + .pDepthStencilAttachment = has_zeta ? &zeta_attachment_ref : nullptr, + .preserveAttachmentCount = 0, + .pPreserveAttachments = nullptr, + }; VkAccessFlags access = 0; VkPipelineStageFlags stage = 0; @@ -122,26 +132,27 @@ vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& param stage |= VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; } - VkSubpassDependency subpass_dependency; - subpass_dependency.srcSubpass = VK_SUBPASS_EXTERNAL; - subpass_dependency.dstSubpass = 0; - subpass_dependency.srcStageMask = stage; - subpass_dependency.dstStageMask = stage; - subpass_dependency.srcAccessMask = 0; - subpass_dependency.dstAccessMask = access; - subpass_dependency.dependencyFlags = 0; - - VkRenderPassCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.attachmentCount = static_cast<u32>(descriptors.size()); - ci.pAttachments = descriptors.data(); - ci.subpassCount = 1; - ci.pSubpasses = &subpass_description; - ci.dependencyCount = 1; - ci.pDependencies = &subpass_dependency; - return device.GetLogical().CreateRenderPass(ci); + const VkSubpassDependency subpass_dependency{ + .srcSubpass = VK_SUBPASS_EXTERNAL, + .dstSubpass = 0, + .srcStageMask = stage, + .dstStageMask = stage, + .srcAccessMask = 0, + .dstAccessMask = access, + .dependencyFlags = 0, + }; + + return device.GetLogical().CreateRenderPass({ + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .attachmentCount = static_cast<u32>(descriptors.size()), + .pAttachments = descriptors.data(), + .subpassCount = 1, + .pSubpasses = &subpass_description, + .dependencyCount = 1, + .pDependencies = &subpass_dependency, + }); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.cpp b/src/video_core/renderer_vulkan/vk_resource_manager.cpp deleted file mode 100644 index dc06f545a..000000000 --- a/src/video_core/renderer_vulkan/vk_resource_manager.cpp +++ /dev/null @@ -1,312 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <algorithm> -#include <optional> -#include "common/assert.h" -#include "common/logging/log.h" -#include "video_core/renderer_vulkan/vk_device.h" -#include "video_core/renderer_vulkan/vk_resource_manager.h" -#include "video_core/renderer_vulkan/wrapper.h" - -namespace Vulkan { - -namespace { - -// TODO(Rodrigo): Fine tune these numbers. -constexpr std::size_t COMMAND_BUFFER_POOL_SIZE = 0x1000; -constexpr std::size_t FENCES_GROW_STEP = 0x40; - -VkFenceCreateInfo BuildFenceCreateInfo() { - VkFenceCreateInfo fence_ci; - fence_ci.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; - fence_ci.pNext = nullptr; - fence_ci.flags = 0; - return fence_ci; -} - -} // Anonymous namespace - -class CommandBufferPool final : public VKFencedPool { -public: - CommandBufferPool(const VKDevice& device) - : VKFencedPool(COMMAND_BUFFER_POOL_SIZE), device{device} {} - - void Allocate(std::size_t begin, std::size_t end) override { - // Command buffers are going to be commited, recorded, executed every single usage cycle. - // They are also going to be reseted when commited. - VkCommandPoolCreateInfo command_pool_ci; - command_pool_ci.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; - command_pool_ci.pNext = nullptr; - command_pool_ci.flags = - VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; - command_pool_ci.queueFamilyIndex = device.GetGraphicsFamily(); - - Pool& pool = pools.emplace_back(); - pool.handle = device.GetLogical().CreateCommandPool(command_pool_ci); - pool.cmdbufs = pool.handle.Allocate(COMMAND_BUFFER_POOL_SIZE); - } - - VkCommandBuffer Commit(VKFence& fence) { - const std::size_t index = CommitResource(fence); - const auto pool_index = index / COMMAND_BUFFER_POOL_SIZE; - const auto sub_index = index % COMMAND_BUFFER_POOL_SIZE; - return pools[pool_index].cmdbufs[sub_index]; - } - -private: - struct Pool { - vk::CommandPool handle; - vk::CommandBuffers cmdbufs; - }; - - const VKDevice& device; - std::vector<Pool> pools; -}; - -VKResource::VKResource() = default; - -VKResource::~VKResource() = default; - -VKFence::VKFence(const VKDevice& device) - : device{device}, handle{device.GetLogical().CreateFence(BuildFenceCreateInfo())} {} - -VKFence::~VKFence() = default; - -void VKFence::Wait() { - switch (const VkResult result = handle.Wait()) { - case VK_SUCCESS: - return; - case VK_ERROR_DEVICE_LOST: - device.ReportLoss(); - [[fallthrough]]; - default: - throw vk::Exception(result); - } -} - -void VKFence::Release() { - ASSERT(is_owned); - is_owned = false; -} - -void VKFence::Commit() { - is_owned = true; - is_used = true; -} - -bool VKFence::Tick(bool gpu_wait, bool owner_wait) { - if (!is_used) { - // If a fence is not used it's always free. - return true; - } - if (is_owned && !owner_wait) { - // The fence is still being owned (Release has not been called) and ownership wait has - // not been asked. - return false; - } - - if (gpu_wait) { - // Wait for the fence if it has been requested. - (void)handle.Wait(); - } else { - if (handle.GetStatus() != VK_SUCCESS) { - // Vulkan fence is not ready, not much it can do here - return false; - } - } - - // Broadcast resources their free state. - for (auto* resource : protected_resources) { - resource->OnFenceRemoval(this); - } - protected_resources.clear(); - - // Prepare fence for reusage. - handle.Reset(); - is_used = false; - return true; -} - -void VKFence::Protect(VKResource* resource) { - protected_resources.push_back(resource); -} - -void VKFence::Unprotect(VKResource* resource) { - const auto it = std::find(protected_resources.begin(), protected_resources.end(), resource); - ASSERT(it != protected_resources.end()); - - resource->OnFenceRemoval(this); - protected_resources.erase(it); -} - -void VKFence::RedirectProtection(VKResource* old_resource, VKResource* new_resource) noexcept { - std::replace(std::begin(protected_resources), std::end(protected_resources), old_resource, - new_resource); -} - -VKFenceWatch::VKFenceWatch() = default; - -VKFenceWatch::VKFenceWatch(VKFence& initial_fence) { - Watch(initial_fence); -} - -VKFenceWatch::VKFenceWatch(VKFenceWatch&& rhs) noexcept { - fence = std::exchange(rhs.fence, nullptr); - if (fence) { - fence->RedirectProtection(&rhs, this); - } -} - -VKFenceWatch& VKFenceWatch::operator=(VKFenceWatch&& rhs) noexcept { - fence = std::exchange(rhs.fence, nullptr); - if (fence) { - fence->RedirectProtection(&rhs, this); - } - return *this; -} - -VKFenceWatch::~VKFenceWatch() { - if (fence) { - fence->Unprotect(this); - } -} - -void VKFenceWatch::Wait() { - if (fence == nullptr) { - return; - } - fence->Wait(); - fence->Unprotect(this); -} - -void VKFenceWatch::Watch(VKFence& new_fence) { - Wait(); - fence = &new_fence; - fence->Protect(this); -} - -bool VKFenceWatch::TryWatch(VKFence& new_fence) { - if (fence) { - return false; - } - fence = &new_fence; - fence->Protect(this); - return true; -} - -void VKFenceWatch::OnFenceRemoval(VKFence* signaling_fence) { - ASSERT_MSG(signaling_fence == fence, "Removing the wrong fence"); - fence = nullptr; -} - -VKFencedPool::VKFencedPool(std::size_t grow_step) : grow_step{grow_step} {} - -VKFencedPool::~VKFencedPool() = default; - -std::size_t VKFencedPool::CommitResource(VKFence& fence) { - const auto Search = [&](std::size_t begin, std::size_t end) -> std::optional<std::size_t> { - for (std::size_t iterator = begin; iterator < end; ++iterator) { - if (watches[iterator]->TryWatch(fence)) { - // The resource is now being watched, a free resource was successfully found. - return iterator; - } - } - return {}; - }; - // Try to find a free resource from the hinted position to the end. - auto found = Search(free_iterator, watches.size()); - if (!found) { - // Search from beginning to the hinted position. - found = Search(0, free_iterator); - if (!found) { - // Both searches failed, the pool is full; handle it. - const std::size_t free_resource = ManageOverflow(); - - // Watch will wait for the resource to be free. - watches[free_resource]->Watch(fence); - found = free_resource; - } - } - // Free iterator is hinted to the resource after the one that's been commited. - free_iterator = (*found + 1) % watches.size(); - return *found; -} - -std::size_t VKFencedPool::ManageOverflow() { - const std::size_t old_capacity = watches.size(); - Grow(); - - // The last entry is guaranted to be free, since it's the first element of the freshly - // allocated resources. - return old_capacity; -} - -void VKFencedPool::Grow() { - const std::size_t old_capacity = watches.size(); - watches.resize(old_capacity + grow_step); - std::generate(watches.begin() + old_capacity, watches.end(), - []() { return std::make_unique<VKFenceWatch>(); }); - Allocate(old_capacity, old_capacity + grow_step); -} - -VKResourceManager::VKResourceManager(const VKDevice& device) : device{device} { - GrowFences(FENCES_GROW_STEP); - command_buffer_pool = std::make_unique<CommandBufferPool>(device); -} - -VKResourceManager::~VKResourceManager() = default; - -VKFence& VKResourceManager::CommitFence() { - const auto StepFences = [&](bool gpu_wait, bool owner_wait) -> VKFence* { - const auto Tick = [=](auto& fence) { return fence->Tick(gpu_wait, owner_wait); }; - const auto hinted = fences.begin() + fences_iterator; - - auto it = std::find_if(hinted, fences.end(), Tick); - if (it == fences.end()) { - it = std::find_if(fences.begin(), hinted, Tick); - if (it == hinted) { - return nullptr; - } - } - fences_iterator = std::distance(fences.begin(), it) + 1; - if (fences_iterator >= fences.size()) - fences_iterator = 0; - - auto& fence = *it; - fence->Commit(); - return fence.get(); - }; - - VKFence* found_fence = StepFences(false, false); - if (!found_fence) { - // Try again, this time waiting. - found_fence = StepFences(true, false); - - if (!found_fence) { - // Allocate new fences and try again. - LOG_INFO(Render_Vulkan, "Allocating new fences {} -> {}", fences.size(), - fences.size() + FENCES_GROW_STEP); - - GrowFences(FENCES_GROW_STEP); - found_fence = StepFences(true, false); - ASSERT(found_fence != nullptr); - } - } - return *found_fence; -} - -VkCommandBuffer VKResourceManager::CommitCommandBuffer(VKFence& fence) { - return command_buffer_pool->Commit(fence); -} - -void VKResourceManager::GrowFences(std::size_t new_fences_count) { - const std::size_t previous_size = fences.size(); - fences.resize(previous_size + new_fences_count); - - std::generate(fences.begin() + previous_size, fences.end(), - [this] { return std::make_unique<VKFence>(device); }); -} - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.h b/src/video_core/renderer_vulkan/vk_resource_manager.h deleted file mode 100644 index f683d2276..000000000 --- a/src/video_core/renderer_vulkan/vk_resource_manager.h +++ /dev/null @@ -1,196 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <cstddef> -#include <memory> -#include <vector> -#include "video_core/renderer_vulkan/wrapper.h" - -namespace Vulkan { - -class VKDevice; -class VKFence; -class VKResourceManager; - -class CommandBufferPool; - -/// Interface for a Vulkan resource -class VKResource { -public: - explicit VKResource(); - virtual ~VKResource(); - - /** - * Signals the object that an owning fence has been signaled. - * @param signaling_fence Fence that signals its usage end. - */ - virtual void OnFenceRemoval(VKFence* signaling_fence) = 0; -}; - -/** - * Fences take ownership of objects, protecting them from GPU-side or driver-side concurrent access. - * They must be commited from the resource manager. Their usage flow is: commit the fence from the - * resource manager, protect resources with it and use them, send the fence to an execution queue - * and Wait for it if needed and then call Release. Used resources will automatically be signaled - * when they are free to be reused. - * @brief Protects resources for concurrent usage and signals its release. - */ -class VKFence { - friend class VKResourceManager; - -public: - explicit VKFence(const VKDevice& device); - ~VKFence(); - - /** - * Waits for the fence to be signaled. - * @warning You must have ownership of the fence and it has to be previously sent to a queue to - * call this function. - */ - void Wait(); - - /** - * Releases ownership of the fence. Pass after it has been sent to an execution queue. - * Unmanaged usage of the fence after the call will result in undefined behavior because it may - * be being used for something else. - */ - void Release(); - - /// Protects a resource with this fence. - void Protect(VKResource* resource); - - /// Removes protection for a resource. - void Unprotect(VKResource* resource); - - /// Redirects one protected resource to a new address. - void RedirectProtection(VKResource* old_resource, VKResource* new_resource) noexcept; - - /// Retreives the fence. - operator VkFence() const { - return *handle; - } - -private: - /// Take ownership of the fence. - void Commit(); - - /** - * Updates the fence status. - * @warning Waiting for the owner might soft lock the execution. - * @param gpu_wait Wait for the fence to be signaled by the driver. - * @param owner_wait Wait for the owner to signal its freedom. - * @returns True if the fence is free. Waiting for gpu and owner will always return true. - */ - bool Tick(bool gpu_wait, bool owner_wait); - - const VKDevice& device; ///< Device handler - vk::Fence handle; ///< Vulkan fence - std::vector<VKResource*> protected_resources; ///< List of resources protected by this fence - bool is_owned = false; ///< The fence has been commited but not released yet. - bool is_used = false; ///< The fence has been commited but it has not been checked to be free. -}; - -/** - * A fence watch is used to keep track of the usage of a fence and protect a resource or set of - * resources without having to inherit VKResource from their handlers. - */ -class VKFenceWatch final : public VKResource { -public: - explicit VKFenceWatch(); - VKFenceWatch(VKFence& initial_fence); - VKFenceWatch(VKFenceWatch&&) noexcept; - VKFenceWatch(const VKFenceWatch&) = delete; - ~VKFenceWatch() override; - - VKFenceWatch& operator=(VKFenceWatch&&) noexcept; - - /// Waits for the fence to be released. - void Wait(); - - /** - * Waits for a previous fence and watches a new one. - * @param new_fence New fence to wait to. - */ - void Watch(VKFence& new_fence); - - /** - * Checks if it's currently being watched and starts watching it if it's available. - * @returns True if a watch has started, false if it's being watched. - */ - bool TryWatch(VKFence& new_fence); - - void OnFenceRemoval(VKFence* signaling_fence) override; - - /** - * Do not use it paired with Watch. Use TryWatch instead. - * Returns true when the watch is free. - */ - bool IsUsed() const { - return fence != nullptr; - } - -private: - VKFence* fence{}; ///< Fence watching this resource. nullptr when the watch is free. -}; - -/** - * Handles a pool of resources protected by fences. Manages resource overflow allocating more - * resources. - */ -class VKFencedPool { -public: - explicit VKFencedPool(std::size_t grow_step); - virtual ~VKFencedPool(); - -protected: - /** - * Commits a free resource and protects it with a fence. It may allocate new resources. - * @param fence Fence that protects the commited resource. - * @returns Index of the resource commited. - */ - std::size_t CommitResource(VKFence& fence); - - /// Called when a chunk of resources have to be allocated. - virtual void Allocate(std::size_t begin, std::size_t end) = 0; - -private: - /// Manages pool overflow allocating new resources. - std::size_t ManageOverflow(); - - /// Allocates a new page of resources. - void Grow(); - - std::size_t grow_step = 0; ///< Number of new resources created after an overflow - std::size_t free_iterator = 0; ///< Hint to where the next free resources is likely to be found - std::vector<std::unique_ptr<VKFenceWatch>> watches; ///< Set of watched resources -}; - -/** - * The resource manager handles all resources that can be protected with a fence avoiding - * driver-side or GPU-side concurrent usage. Usage is documented in VKFence. - */ -class VKResourceManager final { -public: - explicit VKResourceManager(const VKDevice& device); - ~VKResourceManager(); - - /// Commits a fence. It has to be sent to a queue and released. - VKFence& CommitFence(); - - /// Commits an unused command buffer and protects it with a fence. - VkCommandBuffer CommitCommandBuffer(VKFence& fence); - -private: - /// Allocates new fences. - void GrowFences(std::size_t new_fences_count); - - const VKDevice& device; ///< Device handler. - std::size_t fences_iterator = 0; ///< Index where a free fence is likely to be found. - std::vector<std::unique_ptr<VKFence>> fences; ///< Pool of fences. - std::unique_ptr<CommandBufferPool> command_buffer_pool; ///< Pool of command buffers. -}; - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.cpp b/src/video_core/renderer_vulkan/vk_resource_pool.cpp new file mode 100644 index 000000000..ee274ac59 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_resource_pool.cpp @@ -0,0 +1,63 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <optional> + +#include "video_core/renderer_vulkan/vk_master_semaphore.h" +#include "video_core/renderer_vulkan/vk_resource_pool.h" + +namespace Vulkan { + +ResourcePool::ResourcePool(MasterSemaphore& master_semaphore_, size_t grow_step_) + : master_semaphore{master_semaphore_}, grow_step{grow_step_} {} + +ResourcePool::~ResourcePool() = default; + +size_t ResourcePool::CommitResource() { + // Refresh semaphore to query updated results + master_semaphore.Refresh(); + + const auto search = [this](size_t begin, size_t end) -> std::optional<size_t> { + for (size_t iterator = begin; iterator < end; ++iterator) { + if (master_semaphore.IsFree(ticks[iterator])) { + ticks[iterator] = master_semaphore.CurrentTick(); + return iterator; + } + } + return {}; + }; + // Try to find a free resource from the hinted position to the end. + auto found = search(free_iterator, ticks.size()); + if (!found) { + // Search from beginning to the hinted position. + found = search(0, free_iterator); + if (!found) { + // Both searches failed, the pool is full; handle it. + const size_t free_resource = ManageOverflow(); + + ticks[free_resource] = master_semaphore.CurrentTick(); + found = free_resource; + } + } + // Free iterator is hinted to the resource after the one that's been commited. + free_iterator = (*found + 1) % ticks.size(); + return *found; +} + +size_t ResourcePool::ManageOverflow() { + const size_t old_capacity = ticks.size(); + Grow(); + + // The last entry is guaranted to be free, since it's the first element of the freshly + // allocated resources. + return old_capacity; +} + +void ResourcePool::Grow() { + const size_t old_capacity = ticks.size(); + ticks.resize(old_capacity + grow_step); + Allocate(old_capacity, old_capacity + grow_step); +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.h b/src/video_core/renderer_vulkan/vk_resource_pool.h new file mode 100644 index 000000000..a018c7ec2 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_resource_pool.h @@ -0,0 +1,43 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <vector> + +#include "common/common_types.h" + +namespace Vulkan { + +class MasterSemaphore; + +/** + * Handles a pool of resources protected by fences. Manages resource overflow allocating more + * resources. + */ +class ResourcePool { +public: + explicit ResourcePool(MasterSemaphore& master_semaphore, size_t grow_step); + virtual ~ResourcePool(); + +protected: + size_t CommitResource(); + + /// Called when a chunk of resources have to be allocated. + virtual void Allocate(size_t begin, size_t end) = 0; + +private: + /// Manages pool overflow allocating new resources. + size_t ManageOverflow(); + + /// Allocates a new page of resources. + void Grow(); + + MasterSemaphore& master_semaphore; + size_t grow_step = 0; ///< Number of new resources created after an overflow + size_t free_iterator = 0; ///< Hint to where the next free resources is likely to be found + std::vector<u64> ticks; ///< Ticks for each resource +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp index 616eacc36..b068888f9 100644 --- a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp @@ -44,32 +44,36 @@ vk::Sampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) c const bool arbitrary_borders = device.IsExtCustomBorderColorSupported(); const std::array color = tsc.GetBorderColor(); - VkSamplerCustomBorderColorCreateInfoEXT border; - border.sType = VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT; - border.pNext = nullptr; - border.format = VK_FORMAT_UNDEFINED; + VkSamplerCustomBorderColorCreateInfoEXT border{ + .sType = VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT, + .pNext = nullptr, + .customBorderColor = {}, + .format = VK_FORMAT_UNDEFINED, + }; std::memcpy(&border.customBorderColor, color.data(), sizeof(color)); - VkSamplerCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; - ci.pNext = arbitrary_borders ? &border : nullptr; - ci.flags = 0; - ci.magFilter = MaxwellToVK::Sampler::Filter(tsc.mag_filter); - ci.minFilter = MaxwellToVK::Sampler::Filter(tsc.min_filter); - ci.mipmapMode = MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter); - ci.addressModeU = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter); - ci.addressModeV = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter); - ci.addressModeW = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter); - ci.mipLodBias = tsc.GetLodBias(); - ci.anisotropyEnable = tsc.GetMaxAnisotropy() > 1.0f ? VK_TRUE : VK_FALSE; - ci.maxAnisotropy = tsc.GetMaxAnisotropy(); - ci.compareEnable = tsc.depth_compare_enabled; - ci.compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func); - ci.minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.GetMinLod(); - ci.maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.GetMaxLod(); - ci.borderColor = arbitrary_borders ? VK_BORDER_COLOR_INT_CUSTOM_EXT : ConvertBorderColor(color); - ci.unnormalizedCoordinates = VK_FALSE; - return device.GetLogical().CreateSampler(ci); + return device.GetLogical().CreateSampler({ + .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, + .pNext = arbitrary_borders ? &border : nullptr, + .flags = 0, + .magFilter = MaxwellToVK::Sampler::Filter(tsc.mag_filter), + .minFilter = MaxwellToVK::Sampler::Filter(tsc.min_filter), + .mipmapMode = MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter), + .addressModeU = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter), + .addressModeV = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter), + .addressModeW = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter), + .mipLodBias = tsc.GetLodBias(), + .anisotropyEnable = + static_cast<VkBool32>(tsc.GetMaxAnisotropy() > 1.0f ? VK_TRUE : VK_FALSE), + .maxAnisotropy = tsc.GetMaxAnisotropy(), + .compareEnable = tsc.depth_compare_enabled, + .compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func), + .minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.GetMinLod(), + .maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.GetMaxLod(), + .borderColor = + arbitrary_borders ? VK_BORDER_COLOR_INT_CUSTOM_EXT : ConvertBorderColor(color), + .unnormalizedCoordinates = VK_FALSE, + }); } VkSampler VKSamplerCache::ToSamplerType(const vk::Sampler& sampler) const { diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 56524e6f3..1a483dc71 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -10,9 +10,10 @@ #include "common/microprofile.h" #include "common/thread.h" +#include "video_core/renderer_vulkan/vk_command_pool.h" #include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/renderer_vulkan/vk_master_semaphore.h" #include "video_core/renderer_vulkan/vk_query_cache.h" -#include "video_core/renderer_vulkan/vk_resource_manager.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_state_tracker.h" #include "video_core/renderer_vulkan/wrapper.h" @@ -35,10 +36,10 @@ void VKScheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf) { last = nullptr; } -VKScheduler::VKScheduler(const VKDevice& device, VKResourceManager& resource_manager, - StateTracker& state_tracker) - : device{device}, resource_manager{resource_manager}, state_tracker{state_tracker}, - next_fence{&resource_manager.CommitFence()} { +VKScheduler::VKScheduler(const VKDevice& device_, StateTracker& state_tracker_) + : device{device_}, state_tracker{state_tracker_}, + master_semaphore{std::make_unique<MasterSemaphore>(device)}, + command_pool{std::make_unique<CommandPool>(*master_semaphore, device)} { AcquireNewChunk(); AllocateNewContext(); worker_thread = std::thread(&VKScheduler::WorkerThread, this); @@ -50,20 +51,27 @@ VKScheduler::~VKScheduler() { worker_thread.join(); } -void VKScheduler::Flush(bool release_fence, VkSemaphore semaphore) { +u64 VKScheduler::CurrentTick() const noexcept { + return master_semaphore->CurrentTick(); +} + +bool VKScheduler::IsFree(u64 tick) const noexcept { + return master_semaphore->IsFree(tick); +} + +void VKScheduler::Wait(u64 tick) { + master_semaphore->Wait(tick); +} + +void VKScheduler::Flush(VkSemaphore semaphore) { SubmitExecution(semaphore); - if (release_fence) { - current_fence->Release(); - } AllocateNewContext(); } -void VKScheduler::Finish(bool release_fence, VkSemaphore semaphore) { +void VKScheduler::Finish(VkSemaphore semaphore) { + const u64 presubmit_tick = CurrentTick(); SubmitExecution(semaphore); - current_fence->Wait(); - if (release_fence) { - current_fence->Release(); - } + Wait(presubmit_tick); AllocateNewContext(); } @@ -100,16 +108,19 @@ void VKScheduler::RequestRenderpass(VkRenderPass renderpass, VkFramebuffer frame state.framebuffer = framebuffer; state.render_area = render_area; - VkRenderPassBeginInfo renderpass_bi; - renderpass_bi.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; - renderpass_bi.pNext = nullptr; - renderpass_bi.renderPass = renderpass; - renderpass_bi.framebuffer = framebuffer; - renderpass_bi.renderArea.offset.x = 0; - renderpass_bi.renderArea.offset.y = 0; - renderpass_bi.renderArea.extent = render_area; - renderpass_bi.clearValueCount = 0; - renderpass_bi.pClearValues = nullptr; + const VkRenderPassBeginInfo renderpass_bi{ + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .pNext = nullptr, + .renderPass = renderpass, + .framebuffer = framebuffer, + .renderArea = + { + .offset = {.x = 0, .y = 0}, + .extent = render_area, + }, + .clearValueCount = 0, + .pClearValues = nullptr, + }; Record([renderpass_bi, end_renderpass](vk::CommandBuffer cmdbuf) { if (end_renderpass) { @@ -157,17 +168,38 @@ void VKScheduler::SubmitExecution(VkSemaphore semaphore) { current_cmdbuf.End(); - VkSubmitInfo submit_info; - submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; - submit_info.pNext = nullptr; - submit_info.waitSemaphoreCount = 0; - submit_info.pWaitSemaphores = nullptr; - submit_info.pWaitDstStageMask = nullptr; - submit_info.commandBufferCount = 1; - submit_info.pCommandBuffers = current_cmdbuf.address(); - submit_info.signalSemaphoreCount = semaphore ? 1 : 0; - submit_info.pSignalSemaphores = &semaphore; - switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info, *current_fence)) { + const VkSemaphore timeline_semaphore = master_semaphore->Handle(); + const u32 num_signal_semaphores = semaphore ? 2U : 1U; + + const u64 signal_value = master_semaphore->CurrentTick(); + const u64 wait_value = signal_value - 1; + const VkPipelineStageFlags wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; + + master_semaphore->NextTick(); + + const std::array signal_values{signal_value, u64(0)}; + const std::array signal_semaphores{timeline_semaphore, semaphore}; + + const VkTimelineSemaphoreSubmitInfoKHR timeline_si{ + .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR, + .pNext = nullptr, + .waitSemaphoreValueCount = 1, + .pWaitSemaphoreValues = &wait_value, + .signalSemaphoreValueCount = num_signal_semaphores, + .pSignalSemaphoreValues = signal_values.data(), + }; + const VkSubmitInfo submit_info{ + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .pNext = &timeline_si, + .waitSemaphoreCount = 1, + .pWaitSemaphores = &timeline_semaphore, + .pWaitDstStageMask = &wait_stage_mask, + .commandBufferCount = 1, + .pCommandBuffers = current_cmdbuf.address(), + .signalSemaphoreCount = num_signal_semaphores, + .pSignalSemaphores = signal_semaphores.data(), + }; + switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info)) { case VK_SUCCESS: break; case VK_ERROR_DEVICE_LOST: @@ -179,21 +211,15 @@ void VKScheduler::SubmitExecution(VkSemaphore semaphore) { } void VKScheduler::AllocateNewContext() { - ++ticks; - - VkCommandBufferBeginInfo cmdbuf_bi; - cmdbuf_bi.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; - cmdbuf_bi.pNext = nullptr; - cmdbuf_bi.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; - cmdbuf_bi.pInheritanceInfo = nullptr; - std::unique_lock lock{mutex}; - current_fence = next_fence; - next_fence = &resource_manager.CommitFence(); - current_cmdbuf = vk::CommandBuffer(resource_manager.CommitCommandBuffer(*current_fence), - device.GetDispatchLoader()); - current_cmdbuf.Begin(cmdbuf_bi); + current_cmdbuf = vk::CommandBuffer(command_pool->Commit(), device.GetDispatchLoader()); + current_cmdbuf.Begin({ + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, + .pNext = nullptr, + .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, + .pInheritanceInfo = nullptr, + }); // Enable counters once again. These are disabled when a command buffer is finished. if (query_cache) { diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 970a65566..7be8a19f0 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -16,42 +16,33 @@ namespace Vulkan { +class CommandPool; +class MasterSemaphore; class StateTracker; class VKDevice; -class VKFence; class VKQueryCache; -class VKResourceManager; - -class VKFenceView { -public: - VKFenceView() = default; - VKFenceView(VKFence* const& fence) : fence{fence} {} - - VKFence* operator->() const noexcept { - return fence; - } - - operator VKFence&() const noexcept { - return *fence; - } - -private: - VKFence* const& fence; -}; /// The scheduler abstracts command buffer and fence management with an interface that's able to do /// OpenGL-like operations on Vulkan command buffers. class VKScheduler { public: - explicit VKScheduler(const VKDevice& device, VKResourceManager& resource_manager, - StateTracker& state_tracker); + explicit VKScheduler(const VKDevice& device, StateTracker& state_tracker); ~VKScheduler(); + /// Returns the current command buffer tick. + [[nodiscard]] u64 CurrentTick() const noexcept; + + /// Returns true when a tick has been triggered by the GPU. + [[nodiscard]] bool IsFree(u64 tick) const noexcept; + + /// Waits for the given tick to trigger on the GPU. + void Wait(u64 tick); + /// Sends the current execution context to the GPU. - void Flush(bool release_fence = true, VkSemaphore semaphore = nullptr); + void Flush(VkSemaphore semaphore = nullptr); /// Sends the current execution context to the GPU and waits for it to complete. - void Finish(bool release_fence = true, VkSemaphore semaphore = nullptr); + void Finish(VkSemaphore semaphore = nullptr); /// Waits for the worker thread to finish executing everything. After this function returns it's /// safe to touch worker resources. @@ -86,14 +77,9 @@ public: (void)chunk->Record(command); } - /// Gets a reference to the current fence. - VKFenceView GetFence() const { - return current_fence; - } - - /// Returns the current command buffer tick. - u64 Ticks() const { - return ticks; + /// Returns the master timeline semaphore. + [[nodiscard]] MasterSemaphore& GetMasterSemaphore() const noexcept { + return *master_semaphore; } private: @@ -171,6 +157,13 @@ private: std::array<u8, 0x8000> data{}; }; + struct State { + VkRenderPass renderpass = nullptr; + VkFramebuffer framebuffer = nullptr; + VkExtent2D render_area = {0, 0}; + VkPipeline graphics_pipeline = nullptr; + }; + void WorkerThread(); void SubmitExecution(VkSemaphore semaphore); @@ -186,30 +179,23 @@ private: void AcquireNewChunk(); const VKDevice& device; - VKResourceManager& resource_manager; StateTracker& state_tracker; + std::unique_ptr<MasterSemaphore> master_semaphore; + std::unique_ptr<CommandPool> command_pool; + VKQueryCache* query_cache = nullptr; vk::CommandBuffer current_cmdbuf; - VKFence* current_fence = nullptr; - VKFence* next_fence = nullptr; - - struct State { - VkRenderPass renderpass = nullptr; - VkFramebuffer framebuffer = nullptr; - VkExtent2D render_area = {0, 0}; - VkPipeline graphics_pipeline = nullptr; - } state; std::unique_ptr<CommandChunk> chunk; std::thread worker_thread; + State state; Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_queue; Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_reserve; std::mutex mutex; std::condition_variable cv; - std::atomic<u64> ticks = 0; bool quit = false; }; diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 97429cc59..cd7d7a4e4 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -685,13 +685,19 @@ private: } t_smem_uint = TypePointer(spv::StorageClass::Workgroup, t_uint); - const u32 smem_size = specialization.shared_memory_size; + u32 smem_size = specialization.shared_memory_size * 4; if (smem_size == 0) { // Avoid declaring an empty array. return; } - const auto element_count = static_cast<u32>(Common::AlignUp(smem_size, 4) / 4); - const Id type_array = TypeArray(t_uint, Constant(t_uint, element_count)); + const u32 limit = device.GetMaxComputeSharedMemorySize(); + if (smem_size > limit) { + LOG_ERROR(Render_Vulkan, "Shared memory size {} is clamped to host's limit {}", + smem_size, limit); + smem_size = limit; + } + + const Id type_array = TypeArray(t_uint, Constant(t_uint, smem_size / 4)); const Id type_pointer = TypePointer(spv::StorageClass::Workgroup, type_array); Name(type_pointer, "SharedMemory"); @@ -700,9 +706,9 @@ private: } void DeclareInternalFlags() { - constexpr std::array names = {"zero", "sign", "carry", "overflow"}; + static constexpr std::array names{"zero", "sign", "carry", "overflow"}; + for (std::size_t flag = 0; flag < INTERNAL_FLAGS_COUNT; ++flag) { - const auto flag_code = static_cast<InternalFlag>(flag); const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); internal_flags[flag] = AddGlobalVariable(Name(id, names[flag])); } @@ -2798,7 +2804,6 @@ private: std::map<GlobalMemoryBase, Id> global_buffers; std::map<u32, TexelBuffer> uniform_texels; std::map<u32, SampledImage> sampled_images; - std::map<u32, TexelBuffer> storage_texels; std::map<u32, StorageImage> images; std::array<Id, Maxwell::NumRenderTargets> frag_colors{}; diff --git a/src/video_core/renderer_vulkan/vk_shader_util.cpp b/src/video_core/renderer_vulkan/vk_shader_util.cpp index 112df9c71..c1a218d76 100644 --- a/src/video_core/renderer_vulkan/vk_shader_util.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_util.cpp @@ -19,13 +19,13 @@ vk::ShaderModule BuildShader(const VKDevice& device, std::size_t code_size, cons const auto data = std::make_unique<u32[]>(code_size / sizeof(u32)); std::memcpy(data.get(), code_data, code_size); - VkShaderModuleCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.codeSize = code_size; - ci.pCode = data.get(); - return device.GetLogical().CreateShaderModule(ci); + return device.GetLogical().CreateShaderModule({ + .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .codeSize = code_size, + .pCode = data.get(), + }); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp index 45c180221..2fd3b7f39 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp @@ -10,36 +10,18 @@ #include "common/bit_util.h" #include "common/common_types.h" #include "video_core/renderer_vulkan/vk_device.h" -#include "video_core/renderer_vulkan/vk_resource_manager.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { -VKStagingBufferPool::StagingBuffer::StagingBuffer(std::unique_ptr<VKBuffer> buffer, VKFence& fence, - u64 last_epoch) - : buffer{std::move(buffer)}, watch{fence}, last_epoch{last_epoch} {} +VKStagingBufferPool::StagingBuffer::StagingBuffer(std::unique_ptr<VKBuffer> buffer_) + : buffer{std::move(buffer_)} {} -VKStagingBufferPool::StagingBuffer::StagingBuffer(StagingBuffer&& rhs) noexcept { - buffer = std::move(rhs.buffer); - watch = std::move(rhs.watch); - last_epoch = rhs.last_epoch; -} - -VKStagingBufferPool::StagingBuffer::~StagingBuffer() = default; - -VKStagingBufferPool::StagingBuffer& VKStagingBufferPool::StagingBuffer::operator=( - StagingBuffer&& rhs) noexcept { - buffer = std::move(rhs.buffer); - watch = std::move(rhs.watch); - last_epoch = rhs.last_epoch; - return *this; -} - -VKStagingBufferPool::VKStagingBufferPool(const VKDevice& device, VKMemoryManager& memory_manager, - VKScheduler& scheduler) - : device{device}, memory_manager{memory_manager}, scheduler{scheduler} {} +VKStagingBufferPool::VKStagingBufferPool(const VKDevice& device_, VKMemoryManager& memory_manager_, + VKScheduler& scheduler_) + : device{device_}, memory_manager{memory_manager_}, scheduler{scheduler_} {} VKStagingBufferPool::~VKStagingBufferPool() = default; @@ -51,7 +33,6 @@ VKBuffer& VKStagingBufferPool::GetUnusedBuffer(std::size_t size, bool host_visib } void VKStagingBufferPool::TickFrame() { - ++epoch; current_delete_level = (current_delete_level + 1) % NumLevels; ReleaseCache(true); @@ -59,11 +40,12 @@ void VKStagingBufferPool::TickFrame() { } VKBuffer* VKStagingBufferPool::TryGetReservedBuffer(std::size_t size, bool host_visible) { - for (auto& entry : GetCache(host_visible)[Common::Log2Ceil64(size)].entries) { - if (entry.watch.TryWatch(scheduler.GetFence())) { - entry.last_epoch = epoch; - return &*entry.buffer; + for (StagingBuffer& entry : GetCache(host_visible)[Common::Log2Ceil64(size)].entries) { + if (!scheduler.IsFree(entry.tick)) { + continue; } + entry.tick = scheduler.CurrentTick(); + return &*entry.buffer; } return nullptr; } @@ -71,24 +53,25 @@ VKBuffer* VKStagingBufferPool::TryGetReservedBuffer(std::size_t size, bool host_ VKBuffer& VKStagingBufferPool::CreateStagingBuffer(std::size_t size, bool host_visible) { const u32 log2 = Common::Log2Ceil64(size); - VkBufferCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.size = 1ULL << log2; - ci.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | - VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | - VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; - ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - ci.queueFamilyIndexCount = 0; - ci.pQueueFamilyIndices = nullptr; - auto buffer = std::make_unique<VKBuffer>(); - buffer->handle = device.GetLogical().CreateBuffer(ci); + buffer->handle = device.GetLogical().CreateBuffer({ + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = 1ULL << log2, + .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + }); buffer->commit = memory_manager.Commit(buffer->handle, host_visible); - auto& entries = GetCache(host_visible)[log2].entries; - return *entries.emplace_back(std::move(buffer), scheduler.GetFence(), epoch).buffer; + std::vector<StagingBuffer>& entries = GetCache(host_visible)[log2].entries; + StagingBuffer& entry = entries.emplace_back(std::move(buffer)); + entry.tick = scheduler.CurrentTick(); + return *entry.buffer; } VKStagingBufferPool::StagingBuffersCache& VKStagingBufferPool::GetCache(bool host_visible) { @@ -110,9 +93,8 @@ u64 VKStagingBufferPool::ReleaseLevel(StagingBuffersCache& cache, std::size_t lo auto& entries = staging.entries; const std::size_t old_size = entries.size(); - const auto is_deleteable = [this](const auto& entry) { - static constexpr u64 epochs_to_destroy = 180; - return entry.last_epoch + epochs_to_destroy < epoch && !entry.watch.IsUsed(); + const auto is_deleteable = [this](const StagingBuffer& entry) { + return scheduler.IsFree(entry.tick); }; const std::size_t begin_offset = staging.delete_index; const std::size_t end_offset = std::min(begin_offset + deletions_per_tick, old_size); diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h index 3c4901437..2dd5049ac 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h @@ -10,13 +10,11 @@ #include "common/common_types.h" #include "video_core/renderer_vulkan/vk_memory_manager.h" -#include "video_core/renderer_vulkan/vk_resource_manager.h" #include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { class VKDevice; -class VKFenceWatch; class VKScheduler; struct VKBuffer final { @@ -36,16 +34,10 @@ public: private: struct StagingBuffer final { - explicit StagingBuffer(std::unique_ptr<VKBuffer> buffer, VKFence& fence, u64 last_epoch); - StagingBuffer(StagingBuffer&& rhs) noexcept; - StagingBuffer(const StagingBuffer&) = delete; - ~StagingBuffer(); - - StagingBuffer& operator=(StagingBuffer&& rhs) noexcept; + explicit StagingBuffer(std::unique_ptr<VKBuffer> buffer); std::unique_ptr<VKBuffer> buffer; - VKFenceWatch watch; - u64 last_epoch = 0; + u64 tick = 0; }; struct StagingBuffers final { @@ -73,8 +65,6 @@ private: StagingBuffersCache host_staging_buffers; StagingBuffersCache device_staging_buffers; - u64 epoch = 0; - std::size_t current_delete_level = 0; }; diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp index 9151d9fb1..5d2c4a796 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp +++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp @@ -42,7 +42,6 @@ Flags MakeInvalidationFlags() { flags[DepthWriteEnable] = true; flags[DepthCompareOp] = true; flags[FrontFace] = true; - flags[PrimitiveTopology] = true; flags[StencilOp] = true; flags[StencilTestEnable] = true; return flags; @@ -112,10 +111,6 @@ void SetupDirtyFrontFace(Tables& tables) { table[OFF(screen_y_control)] = FrontFace; } -void SetupDirtyPrimitiveTopology(Tables& tables) { - tables[0][OFF(draw.topology)] = PrimitiveTopology; -} - void SetupDirtyStencilOp(Tables& tables) { auto& table = tables[0]; table[OFF(stencil_front_op_fail)] = StencilOp; @@ -137,12 +132,9 @@ void SetupDirtyStencilTestEnable(Tables& tables) { } // Anonymous namespace -StateTracker::StateTracker(Core::System& system) - : system{system}, invalidation_flags{MakeInvalidationFlags()} {} - -void StateTracker::Initialize() { - auto& dirty = system.GPU().Maxwell3D().dirty; - auto& tables = dirty.tables; +StateTracker::StateTracker(Tegra::GPU& gpu) + : flags{gpu.Maxwell3D().dirty.flags}, invalidation_flags{MakeInvalidationFlags()} { + auto& tables = gpu.Maxwell3D().dirty.tables; SetupDirtyRenderTargets(tables); SetupDirtyViewports(tables); SetupDirtyScissors(tables); @@ -156,13 +148,8 @@ void StateTracker::Initialize() { SetupDirtyDepthWriteEnable(tables); SetupDirtyDepthCompareOp(tables); SetupDirtyFrontFace(tables); - SetupDirtyPrimitiveTopology(tables); SetupDirtyStencilOp(tables); SetupDirtyStencilTestEnable(tables); } -void StateTracker::InvalidateCommandBufferState() { - system.GPU().Maxwell3D().dirty.flags |= invalidation_flags; -} - } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h index 54ca0d6c6..1de789e57 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.h +++ b/src/video_core/renderer_vulkan/vk_state_tracker.h @@ -32,7 +32,6 @@ enum : u8 { DepthWriteEnable, DepthCompareOp, FrontFace, - PrimitiveTopology, StencilOp, StencilTestEnable, @@ -43,12 +42,15 @@ static_assert(Last <= std::numeric_limits<u8>::max()); } // namespace Dirty class StateTracker { -public: - explicit StateTracker(Core::System& system); + using Maxwell = Tegra::Engines::Maxwell3D::Regs; - void Initialize(); +public: + explicit StateTracker(Tegra::GPU& gpu); - void InvalidateCommandBufferState(); + void InvalidateCommandBufferState() { + flags |= invalidation_flags; + current_topology = INVALID_TOPOLOGY; + } bool TouchViewports() { return Exchange(Dirty::Viewports, false); @@ -102,10 +104,6 @@ public: return Exchange(Dirty::FrontFace, false); } - bool TouchPrimitiveTopology() { - return Exchange(Dirty::PrimitiveTopology, false); - } - bool TouchStencilOp() { return Exchange(Dirty::StencilOp, false); } @@ -114,16 +112,24 @@ public: return Exchange(Dirty::StencilTestEnable, false); } + bool ChangePrimitiveTopology(Maxwell::PrimitiveTopology new_topology) { + const bool has_changed = current_topology != new_topology; + current_topology = new_topology; + return has_changed; + } + private: + static constexpr auto INVALID_TOPOLOGY = static_cast<Maxwell::PrimitiveTopology>(~0u); + bool Exchange(std::size_t id, bool new_value) const noexcept { - auto& flags = system.GPU().Maxwell3D().dirty.flags; const bool is_dirty = flags[id]; flags[id] = new_value; return is_dirty; } - Core::System& system; + Tegra::Engines::Maxwell3D::DirtyState::Flags& flags; Tegra::Engines::Maxwell3D::DirtyState::Flags invalidation_flags; + Maxwell::PrimitiveTopology current_topology = INVALID_TOPOLOGY; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp index 2d28a6c47..1b59612b9 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp @@ -11,7 +11,6 @@ #include "common/alignment.h" #include "common/assert.h" #include "video_core/renderer_vulkan/vk_device.h" -#include "video_core/renderer_vulkan/vk_resource_manager.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_stream_buffer.h" #include "video_core/renderer_vulkan/wrapper.h" @@ -57,9 +56,9 @@ u32 GetMemoryType(const VkPhysicalDeviceMemoryProperties& properties, } // Anonymous namespace -VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler, +VKStreamBuffer::VKStreamBuffer(const VKDevice& device_, VKScheduler& scheduler_, VkBufferUsageFlags usage) - : device{device}, scheduler{scheduler} { + : device{device_}, scheduler{scheduler_} { CreateBuffers(usage); ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE); ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE); @@ -111,7 +110,7 @@ void VKStreamBuffer::Unmap(u64 size) { } auto& watch = current_watches[current_watch_cursor++]; watch.upper_bound = offset; - watch.fence.Watch(scheduler.GetFence()); + watch.tick = scheduler.CurrentTick(); } void VKStreamBuffer::CreateBuffers(VkBufferUsageFlags usage) { @@ -121,31 +120,29 @@ void VKStreamBuffer::CreateBuffers(VkBufferUsageFlags usage) { // Substract from the preferred heap size some bytes to avoid getting out of memory. const VkDeviceSize heap_size = memory_properties.memoryHeaps[preferred_heap].size; - const VkDeviceSize allocable_size = heap_size - 9 * 1024 * 1024; - - VkBufferCreateInfo buffer_ci; - buffer_ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; - buffer_ci.pNext = nullptr; - buffer_ci.flags = 0; - buffer_ci.size = std::min(PREFERRED_STREAM_BUFFER_SIZE, allocable_size); - buffer_ci.usage = usage; - buffer_ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - buffer_ci.queueFamilyIndexCount = 0; - buffer_ci.pQueueFamilyIndices = nullptr; - - buffer = device.GetLogical().CreateBuffer(buffer_ci); + // As per DXVK's example, using `heap_size / 2` + const VkDeviceSize allocable_size = heap_size / 2; + buffer = device.GetLogical().CreateBuffer({ + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = std::min(PREFERRED_STREAM_BUFFER_SIZE, allocable_size), + .usage = usage, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + }); const auto requirements = device.GetLogical().GetBufferMemoryRequirements(*buffer); const u32 required_flags = requirements.memoryTypeBits; stream_buffer_size = static_cast<u64>(requirements.size); - VkMemoryAllocateInfo memory_ai; - memory_ai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; - memory_ai.pNext = nullptr; - memory_ai.allocationSize = requirements.size; - memory_ai.memoryTypeIndex = GetMemoryType(memory_properties, required_flags); - - memory = device.GetLogical().AllocateMemory(memory_ai); + memory = device.GetLogical().AllocateMemory({ + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .pNext = nullptr, + .allocationSize = requirements.size, + .memoryTypeIndex = GetMemoryType(memory_properties, required_flags), + }); buffer.BindMemory(*memory, 0); } @@ -160,7 +157,7 @@ void VKStreamBuffer::WaitPendingOperations(u64 requested_upper_bound) { while (requested_upper_bound < wait_bound && wait_cursor < *invalidation_mark) { auto& watch = previous_watches[wait_cursor]; wait_bound = watch.upper_bound; - watch.fence.Wait(); + scheduler.Wait(watch.tick); ++wait_cursor; } } diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h index 689f0d276..5e15ad78f 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.h +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h @@ -14,7 +14,6 @@ namespace Vulkan { class VKDevice; -class VKFence; class VKFenceWatch; class VKScheduler; @@ -44,8 +43,8 @@ public: } private: - struct Watch final { - VKFenceWatch fence; + struct Watch { + u64 tick{}; u64 upper_bound{}; }; diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index bffd8f32a..9636a7c65 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -12,7 +12,7 @@ #include "core/core.h" #include "core/frontend/framebuffer_layout.h" #include "video_core/renderer_vulkan/vk_device.h" -#include "video_core/renderer_vulkan/vk_resource_manager.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_swapchain.h" #include "video_core/renderer_vulkan/wrapper.h" @@ -56,8 +56,8 @@ VkExtent2D ChooseSwapExtent(const VkSurfaceCapabilitiesKHR& capabilities, u32 wi } // Anonymous namespace -VKSwapchain::VKSwapchain(VkSurfaceKHR surface, const VKDevice& device) - : surface{surface}, device{device} {} +VKSwapchain::VKSwapchain(VkSurfaceKHR surface_, const VKDevice& device_, VKScheduler& scheduler_) + : surface{surface_}, device{device_}, scheduler{scheduler_} {} VKSwapchain::~VKSwapchain() = default; @@ -75,35 +75,33 @@ void VKSwapchain::Create(u32 width, u32 height, bool srgb) { CreateSemaphores(); CreateImageViews(); - fences.resize(image_count, nullptr); + resource_ticks.clear(); + resource_ticks.resize(image_count); } void VKSwapchain::AcquireNextImage() { device.GetLogical().AcquireNextImageKHR(*swapchain, std::numeric_limits<u64>::max(), *present_semaphores[frame_index], {}, &image_index); - if (auto& fence = fences[image_index]; fence) { - fence->Wait(); - fence->Release(); - fence = nullptr; - } + scheduler.Wait(resource_ticks[image_index]); } -bool VKSwapchain::Present(VkSemaphore render_semaphore, VKFence& fence) { +bool VKSwapchain::Present(VkSemaphore render_semaphore) { const VkSemaphore present_semaphore{*present_semaphores[frame_index]}; const std::array<VkSemaphore, 2> semaphores{present_semaphore, render_semaphore}; const auto present_queue{device.GetPresentQueue()}; bool recreated = false; - VkPresentInfoKHR present_info; - present_info.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR; - present_info.pNext = nullptr; - present_info.waitSemaphoreCount = render_semaphore ? 2U : 1U; - present_info.pWaitSemaphores = semaphores.data(); - present_info.swapchainCount = 1; - present_info.pSwapchains = swapchain.address(); - present_info.pImageIndices = &image_index; - present_info.pResults = nullptr; + const VkPresentInfoKHR present_info{ + .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, + .pNext = nullptr, + .waitSemaphoreCount = render_semaphore ? 2U : 1U, + .pWaitSemaphores = semaphores.data(), + .swapchainCount = 1, + .pSwapchains = swapchain.address(), + .pImageIndices = &image_index, + .pResults = nullptr, + }; switch (const VkResult result = present_queue.Present(present_info)) { case VK_SUCCESS: @@ -122,8 +120,7 @@ bool VKSwapchain::Present(VkSemaphore render_semaphore, VKFence& fence) { break; } - ASSERT(fences[image_index] == nullptr); - fences[image_index] = &fence; + resource_ticks[image_index] = scheduler.CurrentTick(); frame_index = (frame_index + 1) % static_cast<u32>(image_count); return recreated; } @@ -147,24 +144,26 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, requested_image_count = capabilities.maxImageCount; } - VkSwapchainCreateInfoKHR swapchain_ci; - swapchain_ci.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR; - swapchain_ci.pNext = nullptr; - swapchain_ci.flags = 0; - swapchain_ci.surface = surface; - swapchain_ci.minImageCount = requested_image_count; - swapchain_ci.imageFormat = surface_format.format; - swapchain_ci.imageColorSpace = surface_format.colorSpace; - swapchain_ci.imageArrayLayers = 1; - swapchain_ci.imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; - swapchain_ci.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE; - swapchain_ci.queueFamilyIndexCount = 0; - swapchain_ci.pQueueFamilyIndices = nullptr; - swapchain_ci.preTransform = capabilities.currentTransform; - swapchain_ci.compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; - swapchain_ci.presentMode = present_mode; - swapchain_ci.clipped = VK_FALSE; - swapchain_ci.oldSwapchain = nullptr; + VkSwapchainCreateInfoKHR swapchain_ci{ + .sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR, + .pNext = nullptr, + .flags = 0, + .surface = surface, + .minImageCount = requested_image_count, + .imageFormat = surface_format.format, + .imageColorSpace = surface_format.colorSpace, + .imageExtent = {}, + .imageArrayLayers = 1, + .imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + .imageSharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + .preTransform = capabilities.currentTransform, + .compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR, + .presentMode = present_mode, + .clipped = VK_FALSE, + .oldSwapchain = nullptr, + }; const u32 graphics_family{device.GetGraphicsFamily()}; const u32 present_family{device.GetPresentFamily()}; @@ -173,8 +172,6 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, swapchain_ci.imageSharingMode = VK_SHARING_MODE_CONCURRENT; swapchain_ci.queueFamilyIndexCount = static_cast<u32>(queue_indices.size()); swapchain_ci.pQueueFamilyIndices = queue_indices.data(); - } else { - swapchain_ci.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE; } // Request the size again to reduce the possibility of a TOCTOU race condition. @@ -200,20 +197,29 @@ void VKSwapchain::CreateSemaphores() { } void VKSwapchain::CreateImageViews() { - VkImageViewCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - // ci.image - ci.viewType = VK_IMAGE_VIEW_TYPE_2D; - ci.format = image_format; - ci.components = {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, - VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY}; - ci.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - ci.subresourceRange.baseMipLevel = 0; - ci.subresourceRange.levelCount = 1; - ci.subresourceRange.baseArrayLayer = 0; - ci.subresourceRange.layerCount = 1; + VkImageViewCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .image = {}, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = image_format, + .components = + { + .r = VK_COMPONENT_SWIZZLE_IDENTITY, + .g = VK_COMPONENT_SWIZZLE_IDENTITY, + .b = VK_COMPONENT_SWIZZLE_IDENTITY, + .a = VK_COMPONENT_SWIZZLE_IDENTITY, + }, + .subresourceRange = + { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }; image_views.resize(image_count); for (std::size_t i = 0; i < image_count; i++) { diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h index a35d61345..6b39befdf 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.h +++ b/src/video_core/renderer_vulkan/vk_swapchain.h @@ -16,11 +16,11 @@ struct FramebufferLayout; namespace Vulkan { class VKDevice; -class VKFence; +class VKScheduler; class VKSwapchain { public: - explicit VKSwapchain(VkSurfaceKHR surface, const VKDevice& device); + explicit VKSwapchain(VkSurfaceKHR surface, const VKDevice& device, VKScheduler& scheduler); ~VKSwapchain(); /// Creates (or recreates) the swapchain with a given size. @@ -31,7 +31,7 @@ public: /// Presents the rendered image to the swapchain. Returns true when the swapchains had to be /// recreated. Takes responsability for the ownership of fence. - bool Present(VkSemaphore render_semaphore, VKFence& fence); + bool Present(VkSemaphore render_semaphore); /// Returns true when the framebuffer layout has changed. bool HasFramebufferChanged(const Layout::FramebufferLayout& framebuffer) const; @@ -74,6 +74,7 @@ private: const VkSurfaceKHR surface; const VKDevice& device; + VKScheduler& scheduler; vk::SwapchainKHR swapchain; @@ -81,7 +82,7 @@ private: std::vector<VkImage> images; std::vector<vk::ImageView> image_views; std::vector<vk::Framebuffer> framebuffers; - std::vector<VKFence*> fences; + std::vector<u64> resource_ticks; std::vector<vk::Semaphore> present_semaphores; u32 image_index{}; diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index bd93dcf20..f2c8f2ae1 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -95,17 +95,18 @@ VkImageViewType GetImageViewType(SurfaceTarget target) { vk::Buffer CreateBuffer(const VKDevice& device, const SurfaceParams& params, std::size_t host_memory_size) { // TODO(Rodrigo): Move texture buffer creation to the buffer cache - VkBufferCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.size = static_cast<VkDeviceSize>(host_memory_size); - ci.usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | - VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; - ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - ci.queueFamilyIndexCount = 0; - ci.pQueueFamilyIndices = nullptr; - return device.GetLogical().CreateBuffer(ci); + return device.GetLogical().CreateBuffer({ + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = static_cast<VkDeviceSize>(host_memory_size), + .usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | + VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | + VK_BUFFER_USAGE_TRANSFER_DST_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + }); } VkBufferViewCreateInfo GenerateBufferViewCreateInfo(const VKDevice& device, @@ -113,15 +114,16 @@ VkBufferViewCreateInfo GenerateBufferViewCreateInfo(const VKDevice& device, std::size_t host_memory_size) { ASSERT(params.IsBuffer()); - VkBufferViewCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.buffer = buffer; - ci.format = MaxwellToVK::SurfaceFormat(device, FormatType::Buffer, params.pixel_format).format; - ci.offset = 0; - ci.range = static_cast<VkDeviceSize>(host_memory_size); - return ci; + return { + .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .buffer = buffer, + .format = + MaxwellToVK::SurfaceFormat(device, FormatType::Buffer, params.pixel_format).format, + .offset = 0, + .range = static_cast<VkDeviceSize>(host_memory_size), + }; } VkImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceParams& params) { @@ -130,23 +132,24 @@ VkImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceP const auto [format, attachable, storage] = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, params.pixel_format); - VkImageCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.imageType = SurfaceTargetToImage(params.target); - ci.format = format; - ci.mipLevels = params.num_levels; - ci.arrayLayers = static_cast<u32>(params.GetNumLayers()); - ci.samples = VK_SAMPLE_COUNT_1_BIT; - ci.tiling = VK_IMAGE_TILING_OPTIMAL; - ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - ci.queueFamilyIndexCount = 0; - ci.pQueueFamilyIndices = nullptr; - ci.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; - - ci.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | - VK_IMAGE_USAGE_TRANSFER_SRC_BIT; + VkImageCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .imageType = SurfaceTargetToImage(params.target), + .format = format, + .extent = {}, + .mipLevels = params.num_levels, + .arrayLayers = static_cast<u32>(params.GetNumLayers()), + .samples = VK_SAMPLE_COUNT_1_BIT, + .tiling = VK_IMAGE_TILING_OPTIMAL, + .usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | + VK_IMAGE_USAGE_TRANSFER_SRC_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, + }; if (attachable) { ci.usage |= params.IsPixelFormatZeta() ? VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT : VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; @@ -185,12 +188,10 @@ u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source, Tegra::Texture::Swizzl } // Anonymous namespace -CachedSurface::CachedSurface(Core::System& system, const VKDevice& device, - VKResourceManager& resource_manager, VKMemoryManager& memory_manager, +CachedSurface::CachedSurface(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler, VKStagingBufferPool& staging_pool, GPUVAddr gpu_addr, const SurfaceParams& params) - : SurfaceBase<View>{gpu_addr, params, device.IsOptimalAstcSupported()}, system{system}, - device{device}, resource_manager{resource_manager}, + : SurfaceBase<View>{gpu_addr, params, device.IsOptimalAstcSupported()}, device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{staging_pool} { if (params.IsBuffer()) { buffer = CreateBuffer(device, params, host_memory_size); @@ -233,7 +234,7 @@ void CachedSurface::UploadTexture(const std::vector<u8>& staging_buffer) { void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) { UNIMPLEMENTED_IF(params.IsBuffer()); - if (params.pixel_format == VideoCore::Surface::PixelFormat::A1B5G5R5U) { + if (params.pixel_format == VideoCore::Surface::PixelFormat::A1B5G5R5_UNORM) { LOG_WARNING(Render_Vulkan, "A1B5G5R5 flushing is stubbed"); } @@ -321,22 +322,25 @@ void CachedSurface::UploadImage(const std::vector<u8>& staging_buffer) { } VkBufferImageCopy CachedSurface::GetBufferImageCopy(u32 level) const { - VkBufferImageCopy copy; - copy.bufferOffset = params.GetHostMipmapLevelOffset(level, is_converted); - copy.bufferRowLength = 0; - copy.bufferImageHeight = 0; - copy.imageSubresource.aspectMask = image->GetAspectMask(); - copy.imageSubresource.mipLevel = level; - copy.imageSubresource.baseArrayLayer = 0; - copy.imageSubresource.layerCount = static_cast<u32>(params.GetNumLayers()); - copy.imageOffset.x = 0; - copy.imageOffset.y = 0; - copy.imageOffset.z = 0; - copy.imageExtent.width = params.GetMipWidth(level); - copy.imageExtent.height = params.GetMipHeight(level); - copy.imageExtent.depth = - params.target == SurfaceTarget::Texture3D ? params.GetMipDepth(level) : 1; - return copy; + return { + .bufferOffset = params.GetHostMipmapLevelOffset(level, is_converted), + .bufferRowLength = 0, + .bufferImageHeight = 0, + .imageSubresource = + { + .aspectMask = image->GetAspectMask(), + .mipLevel = level, + .baseArrayLayer = 0, + .layerCount = static_cast<u32>(params.GetNumLayers()), + }, + .imageOffset = {.x = 0, .y = 0, .z = 0}, + .imageExtent = + { + .width = params.GetMipWidth(level), + .height = params.GetMipHeight(level), + .depth = params.target == SurfaceTarget::Texture3D ? params.GetMipDepth(level) : 1U, + }, + }; } VkImageSubresourceRange CachedSurface::GetImageSubresourceRange() const { @@ -380,7 +384,7 @@ VkImageView CachedSurfaceView::GetImageView(SwizzleSource x_source, SwizzleSourc std::array swizzle{MaxwellToVK::SwizzleSource(x_source), MaxwellToVK::SwizzleSource(y_source), MaxwellToVK::SwizzleSource(z_source), MaxwellToVK::SwizzleSource(w_source)}; - if (params.pixel_format == VideoCore::Surface::PixelFormat::A1B5G5R5U) { + if (params.pixel_format == VideoCore::Surface::PixelFormat::A1B5G5R5_UNORM) { // A1B5G5R5 is implemented as A1R5G5B5, we have to change the swizzle here. std::swap(swizzle[0], swizzle[2]); } @@ -392,11 +396,11 @@ VkImageView CachedSurfaceView::GetImageView(SwizzleSource x_source, SwizzleSourc UNIMPLEMENTED_IF(x_source != SwizzleSource::R && x_source != SwizzleSource::G); const bool is_first = x_source == SwizzleSource::R; switch (params.pixel_format) { - case VideoCore::Surface::PixelFormat::Z24S8: - case VideoCore::Surface::PixelFormat::Z32FS8: + case VideoCore::Surface::PixelFormat::D24_UNORM_S8_UINT: + case VideoCore::Surface::PixelFormat::D32_FLOAT_S8_UINT: aspect = is_first ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_STENCIL_BIT; break; - case VideoCore::Surface::PixelFormat::S8Z24: + case VideoCore::Surface::PixelFormat::S8_UINT_D24_UNORM: aspect = is_first ? VK_IMAGE_ASPECT_STENCIL_BIT : VK_IMAGE_ASPECT_DEPTH_BIT; break; default: @@ -416,20 +420,29 @@ VkImageView CachedSurfaceView::GetImageView(SwizzleSource x_source, SwizzleSourc ASSERT(num_slices == params.depth); } - VkImageViewCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.image = surface.GetImageHandle(); - ci.viewType = image_view_type; - ci.format = surface.GetImage().GetFormat(); - ci.components = {swizzle[0], swizzle[1], swizzle[2], swizzle[3]}; - ci.subresourceRange.aspectMask = aspect; - ci.subresourceRange.baseMipLevel = base_level; - ci.subresourceRange.levelCount = num_levels; - ci.subresourceRange.baseArrayLayer = base_layer; - ci.subresourceRange.layerCount = num_layers; - image_view = device.GetLogical().CreateImageView(ci); + image_view = device.GetLogical().CreateImageView({ + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .image = surface.GetImageHandle(), + .viewType = image_view_type, + .format = surface.GetImage().GetFormat(), + .components = + { + .r = swizzle[0], + .g = swizzle[1], + .b = swizzle[2], + .a = swizzle[3], + }, + .subresourceRange = + { + .aspectMask = aspect, + .baseMipLevel = base_level, + .levelCount = num_levels, + .baseArrayLayer = base_layer, + .layerCount = num_layers, + }, + }); return last_image_view = *image_view; } @@ -439,17 +452,29 @@ VkImageView CachedSurfaceView::GetAttachment() { return *render_target; } - VkImageViewCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.image = surface.GetImageHandle(); - ci.format = surface.GetImage().GetFormat(); - ci.components = {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, - VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY}; - ci.subresourceRange.aspectMask = aspect_mask; - ci.subresourceRange.baseMipLevel = base_level; - ci.subresourceRange.levelCount = num_levels; + VkImageViewCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .image = surface.GetImageHandle(), + .viewType = VK_IMAGE_VIEW_TYPE_1D, + .format = surface.GetImage().GetFormat(), + .components = + { + .r = VK_COMPONENT_SWIZZLE_IDENTITY, + .g = VK_COMPONENT_SWIZZLE_IDENTITY, + .b = VK_COMPONENT_SWIZZLE_IDENTITY, + .a = VK_COMPONENT_SWIZZLE_IDENTITY, + }, + .subresourceRange = + { + .aspectMask = aspect_mask, + .baseMipLevel = base_level, + .levelCount = num_levels, + .baseArrayLayer = 0, + .layerCount = 0, + }, + }; if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) { ci.viewType = num_slices > 1 ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D; ci.subresourceRange.baseArrayLayer = base_slice; @@ -463,19 +488,20 @@ VkImageView CachedSurfaceView::GetAttachment() { return *render_target; } -VKTextureCache::VKTextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, - const VKDevice& device, VKResourceManager& resource_manager, - VKMemoryManager& memory_manager, VKScheduler& scheduler, - VKStagingBufferPool& staging_pool) - : TextureCache(system, rasterizer, device.IsOptimalAstcSupported()), device{device}, - resource_manager{resource_manager}, memory_manager{memory_manager}, scheduler{scheduler}, - staging_pool{staging_pool} {} +VKTextureCache::VKTextureCache(VideoCore::RasterizerInterface& rasterizer, + Tegra::Engines::Maxwell3D& maxwell3d, + Tegra::MemoryManager& gpu_memory, const VKDevice& device_, + VKMemoryManager& memory_manager_, VKScheduler& scheduler_, + VKStagingBufferPool& staging_pool_) + : TextureCache(rasterizer, maxwell3d, gpu_memory, device_.IsOptimalAstcSupported()), + device{device_}, memory_manager{memory_manager_}, scheduler{scheduler_}, staging_pool{ + staging_pool_} {} VKTextureCache::~VKTextureCache() = default; Surface VKTextureCache::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) { - return std::make_shared<CachedSurface>(system, device, resource_manager, memory_manager, - scheduler, staging_pool, gpu_addr, params); + return std::make_shared<CachedSurface>(device, memory_manager, scheduler, staging_pool, + gpu_addr, params); } void VKTextureCache::ImageCopy(Surface& src_surface, Surface& dst_surface, @@ -502,24 +528,40 @@ void VKTextureCache::ImageCopy(Surface& src_surface, Surface& dst_surface, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - VkImageCopy copy; - copy.srcSubresource.aspectMask = src_surface->GetAspectMask(); - copy.srcSubresource.mipLevel = copy_params.source_level; - copy.srcSubresource.baseArrayLayer = copy_params.source_z; - copy.srcSubresource.layerCount = num_layers; - copy.srcOffset.x = copy_params.source_x; - copy.srcOffset.y = copy_params.source_y; - copy.srcOffset.z = 0; - copy.dstSubresource.aspectMask = dst_surface->GetAspectMask(); - copy.dstSubresource.mipLevel = copy_params.dest_level; - copy.dstSubresource.baseArrayLayer = dst_base_layer; - copy.dstSubresource.layerCount = num_layers; - copy.dstOffset.x = copy_params.dest_x; - copy.dstOffset.y = copy_params.dest_y; - copy.dstOffset.z = dst_offset_z; - copy.extent.width = copy_params.width; - copy.extent.height = copy_params.height; - copy.extent.depth = extent_z; + const VkImageCopy copy{ + .srcSubresource = + { + .aspectMask = src_surface->GetAspectMask(), + .mipLevel = copy_params.source_level, + .baseArrayLayer = copy_params.source_z, + .layerCount = num_layers, + }, + .srcOffset = + { + .x = static_cast<s32>(copy_params.source_x), + .y = static_cast<s32>(copy_params.source_y), + .z = 0, + }, + .dstSubresource = + { + .aspectMask = dst_surface->GetAspectMask(), + .mipLevel = copy_params.dest_level, + .baseArrayLayer = dst_base_layer, + .layerCount = num_layers, + }, + .dstOffset = + { + .x = static_cast<s32>(copy_params.dest_x), + .y = static_cast<s32>(copy_params.dest_y), + .z = static_cast<s32>(dst_offset_z), + }, + .extent = + { + .width = copy_params.width, + .height = copy_params.height, + .depth = extent_z, + }, + }; const VkImage src_image = src_surface->GetImageHandle(); const VkImage dst_image = dst_surface->GetImageHandle(); diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 807e26c8a..39202feba 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -15,10 +15,6 @@ #include "video_core/texture_cache/surface_base.h" #include "video_core/texture_cache/texture_cache.h" -namespace Core { -class System; -} - namespace VideoCore { class RasterizerInterface; } @@ -27,7 +23,6 @@ namespace Vulkan { class RasterizerVulkan; class VKDevice; -class VKResourceManager; class VKScheduler; class VKStagingBufferPool; @@ -45,8 +40,7 @@ class CachedSurface final : public VideoCommon::SurfaceBase<View> { friend CachedSurfaceView; public: - explicit CachedSurface(Core::System& system, const VKDevice& device, - VKResourceManager& resource_manager, VKMemoryManager& memory_manager, + explicit CachedSurface(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler, VKStagingBufferPool& staging_pool, GPUVAddr gpu_addr, const SurfaceParams& params); ~CachedSurface(); @@ -101,9 +95,7 @@ private: VkImageSubresourceRange GetImageSubresourceRange() const; - Core::System& system; const VKDevice& device; - VKResourceManager& resource_manager; VKMemoryManager& memory_manager; VKScheduler& scheduler; VKStagingBufferPool& staging_pool; @@ -201,10 +193,10 @@ private: class VKTextureCache final : public TextureCacheBase { public: - explicit VKTextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, - const VKDevice& device, VKResourceManager& resource_manager, - VKMemoryManager& memory_manager, VKScheduler& scheduler, - VKStagingBufferPool& staging_pool); + explicit VKTextureCache(VideoCore::RasterizerInterface& rasterizer, + Tegra::Engines::Maxwell3D& maxwell3d, Tegra::MemoryManager& gpu_memory, + const VKDevice& device, VKMemoryManager& memory_manager, + VKScheduler& scheduler, VKStagingBufferPool& staging_pool); ~VKTextureCache(); private: @@ -219,7 +211,6 @@ private: void BufferCopy(Surface& src_surface, Surface& dst_surface) override; const VKDevice& device; - VKResourceManager& resource_manager; VKMemoryManager& memory_manager; VKScheduler& scheduler; VKStagingBufferPool& staging_pool; diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/renderer_vulkan/wrapper.cpp index 051298cc8..2598440fb 100644 --- a/src/video_core/renderer_vulkan/wrapper.cpp +++ b/src/video_core/renderer_vulkan/wrapper.cpp @@ -6,6 +6,7 @@ #include <exception> #include <memory> #include <optional> +#include <string_view> #include <utility> #include <vector> @@ -17,21 +18,42 @@ namespace Vulkan::vk { namespace { +template <typename Func> +void SortPhysicalDevices(std::vector<VkPhysicalDevice>& devices, const InstanceDispatch& dld, + Func&& func) { + // Calling GetProperties calls Vulkan more than needed. But they are supposed to be cheap + // functions. + std::stable_sort(devices.begin(), devices.end(), + [&dld, &func](VkPhysicalDevice lhs, VkPhysicalDevice rhs) { + return func(vk::PhysicalDevice(lhs, dld).GetProperties(), + vk::PhysicalDevice(rhs, dld).GetProperties()); + }); +} + +void SortPhysicalDevicesPerVendor(std::vector<VkPhysicalDevice>& devices, + const InstanceDispatch& dld, + std::initializer_list<u32> vendor_ids) { + for (auto it = vendor_ids.end(); it != vendor_ids.begin();) { + --it; + SortPhysicalDevices(devices, dld, [id = *it](const auto& lhs, const auto& rhs) { + return lhs.vendorID == id && rhs.vendorID != id; + }); + } +} + void SortPhysicalDevices(std::vector<VkPhysicalDevice>& devices, const InstanceDispatch& dld) { - std::stable_sort(devices.begin(), devices.end(), [&](auto lhs, auto rhs) { - // This will call Vulkan more than needed, but these calls are cheap. - const auto lhs_properties = vk::PhysicalDevice(lhs, dld).GetProperties(); - const auto rhs_properties = vk::PhysicalDevice(rhs, dld).GetProperties(); - - // Prefer discrete GPUs, Nvidia over AMD, AMD over Intel, Intel over the rest. - const bool preferred = - (lhs_properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU && - rhs_properties.deviceType != VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU) || - (lhs_properties.vendorID == 0x10DE && rhs_properties.vendorID != 0x10DE) || - (lhs_properties.vendorID == 0x1002 && rhs_properties.vendorID != 0x1002) || - (lhs_properties.vendorID == 0x8086 && rhs_properties.vendorID != 0x8086); - return !preferred; + // Sort by name, this will set a base and make GPUs with higher numbers appear first + // (e.g. GTX 1650 will intentionally be listed before a GTX 1080). + SortPhysicalDevices(devices, dld, [](const auto& lhs, const auto& rhs) { + return std::string_view{lhs.deviceName} > std::string_view{rhs.deviceName}; + }); + // Prefer discrete over non-discrete + SortPhysicalDevices(devices, dld, [](const auto& lhs, const auto& rhs) { + return lhs.deviceType == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU && + rhs.deviceType != VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU; }); + // Prefer Nvidia over AMD, AMD over Intel, Intel over the rest. + SortPhysicalDevicesPerVendor(devices, dld, {0x10DE, 0x1002, 0x8086}); } template <typename T> @@ -148,6 +170,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { X(vkGetFenceStatus); X(vkGetImageMemoryRequirements); X(vkGetQueryPoolResults); + X(vkGetSemaphoreCounterValueKHR); X(vkMapMemory); X(vkQueueSubmit); X(vkResetFences); @@ -156,6 +179,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { X(vkUpdateDescriptorSetWithTemplateKHR); X(vkUpdateDescriptorSets); X(vkWaitForFences); + X(vkWaitSemaphoresKHR); #undef X } @@ -262,6 +286,22 @@ const char* ToString(VkResult result) noexcept { return "VK_ERROR_INVALID_DEVICE_ADDRESS_EXT"; case VkResult::VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT: return "VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT"; + case VkResult::VK_ERROR_UNKNOWN: + return "VK_ERROR_UNKNOWN"; + case VkResult::VK_ERROR_INCOMPATIBLE_VERSION_KHR: + return "VK_ERROR_INCOMPATIBLE_VERSION_KHR"; + case VkResult::VK_THREAD_IDLE_KHR: + return "VK_THREAD_IDLE_KHR"; + case VkResult::VK_THREAD_DONE_KHR: + return "VK_THREAD_DONE_KHR"; + case VkResult::VK_OPERATION_DEFERRED_KHR: + return "VK_OPERATION_DEFERRED_KHR"; + case VkResult::VK_OPERATION_NOT_DEFERRED_KHR: + return "VK_OPERATION_NOT_DEFERRED_KHR"; + case VkResult::VK_PIPELINE_COMPILE_REQUIRED_EXT: + return "VK_PIPELINE_COMPILE_REQUIRED_EXT"; + case VkResult::VK_RESULT_MAX_ENUM: + return "VK_RESULT_MAX_ENUM"; } return "Unknown"; } @@ -377,24 +417,26 @@ VkResult Free(VkDevice device, VkCommandPool handle, Span<VkCommandBuffer> buffe Instance Instance::Create(Span<const char*> layers, Span<const char*> extensions, InstanceDispatch& dld) noexcept { - VkApplicationInfo application_info; - application_info.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO; - application_info.pNext = nullptr; - application_info.pApplicationName = "yuzu Emulator"; - application_info.applicationVersion = VK_MAKE_VERSION(0, 1, 0); - application_info.pEngineName = "yuzu Emulator"; - application_info.engineVersion = VK_MAKE_VERSION(0, 1, 0); - application_info.apiVersion = VK_API_VERSION_1_1; - - VkInstanceCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.pApplicationInfo = &application_info; - ci.enabledLayerCount = layers.size(); - ci.ppEnabledLayerNames = layers.data(); - ci.enabledExtensionCount = extensions.size(); - ci.ppEnabledExtensionNames = extensions.data(); + static constexpr VkApplicationInfo application_info{ + .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, + .pNext = nullptr, + .pApplicationName = "yuzu Emulator", + .applicationVersion = VK_MAKE_VERSION(0, 1, 0), + .pEngineName = "yuzu Emulator", + .engineVersion = VK_MAKE_VERSION(0, 1, 0), + .apiVersion = VK_API_VERSION_1_1, + }; + + const VkInstanceCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .pApplicationInfo = &application_info, + .enabledLayerCount = layers.size(), + .ppEnabledLayerNames = layers.data(), + .enabledExtensionCount = extensions.size(), + .ppEnabledExtensionNames = extensions.data(), + }; VkInstance instance; if (dld.vkCreateInstance(&ci, nullptr, &instance) != VK_SUCCESS) { @@ -425,19 +467,20 @@ std::optional<std::vector<VkPhysicalDevice>> Instance::EnumeratePhysicalDevices( DebugCallback Instance::TryCreateDebugCallback( PFN_vkDebugUtilsMessengerCallbackEXT callback) noexcept { - VkDebugUtilsMessengerCreateInfoEXT ci; - ci.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT; - ci.pNext = nullptr; - ci.flags = 0; - ci.messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT; - ci.messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT; - ci.pfnUserCallback = callback; - ci.pUserData = nullptr; + const VkDebugUtilsMessengerCreateInfoEXT ci{ + .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT, + .pNext = nullptr, + .flags = 0, + .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT, + .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT, + .pfnUserCallback = callback, + .pUserData = nullptr, + }; VkDebugUtilsMessengerEXT messenger; if (dld->vkCreateDebugUtilsMessengerEXT(handle, &ci, nullptr, &messenger) != VK_SUCCESS) { @@ -468,12 +511,13 @@ DescriptorSets DescriptorPool::Allocate(const VkDescriptorSetAllocateInfo& ai) c } CommandBuffers CommandPool::Allocate(std::size_t num_buffers, VkCommandBufferLevel level) const { - VkCommandBufferAllocateInfo ai; - ai.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; - ai.pNext = nullptr; - ai.commandPool = handle; - ai.level = level; - ai.commandBufferCount = static_cast<u32>(num_buffers); + const VkCommandBufferAllocateInfo ai{ + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, + .pNext = nullptr, + .commandPool = handle, + .level = level, + .commandBufferCount = static_cast<u32>(num_buffers), + }; std::unique_ptr buffers = std::make_unique<VkCommandBuffer[]>(num_buffers); switch (const VkResult result = dld->vkAllocateCommandBuffers(owner, &ai, buffers.get())) { @@ -497,17 +541,18 @@ std::vector<VkImage> SwapchainKHR::GetImages() const { Device Device::Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreateInfo> queues_ci, Span<const char*> enabled_extensions, const void* next, DeviceDispatch& dld) noexcept { - VkDeviceCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; - ci.pNext = next; - ci.flags = 0; - ci.queueCreateInfoCount = queues_ci.size(); - ci.pQueueCreateInfos = queues_ci.data(); - ci.enabledLayerCount = 0; - ci.ppEnabledLayerNames = nullptr; - ci.enabledExtensionCount = enabled_extensions.size(); - ci.ppEnabledExtensionNames = enabled_extensions.data(); - ci.pEnabledFeatures = nullptr; + const VkDeviceCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, + .pNext = next, + .flags = 0, + .queueCreateInfoCount = queues_ci.size(), + .pQueueCreateInfos = queues_ci.data(), + .enabledLayerCount = 0, + .ppEnabledLayerNames = nullptr, + .enabledExtensionCount = enabled_extensions.size(), + .ppEnabledExtensionNames = enabled_extensions.data(), + .pEnabledFeatures = nullptr, + }; VkDevice device; if (dld.vkCreateDevice(physical_device, &ci, nullptr, &device) != VK_SUCCESS) { @@ -548,11 +593,15 @@ ImageView Device::CreateImageView(const VkImageViewCreateInfo& ci) const { } Semaphore Device::CreateSemaphore() const { - VkSemaphoreCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; + static constexpr VkSemaphoreCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + }; + return CreateSemaphore(ci); +} +Semaphore Device::CreateSemaphore(const VkSemaphoreCreateInfo& ci) const { VkSemaphore object; Check(dld->vkCreateSemaphore(handle, &ci, nullptr, &object)); return Semaphore(object, handle, *dld); @@ -639,10 +688,12 @@ ShaderModule Device::CreateShaderModule(const VkShaderModuleCreateInfo& ci) cons } Event Device::CreateEvent() const { - VkEventCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; + static constexpr VkEventCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + }; + VkEvent object; Check(dld->vkCreateEvent(handle, &ci, nullptr, &object)); return Event(object, handle, *dld); @@ -778,7 +829,7 @@ std::optional<std::vector<VkExtensionProperties>> EnumerateInstanceExtensionProp VK_SUCCESS) { return std::nullopt; } - return properties; + return std::move(properties); } std::optional<std::vector<VkLayerProperties>> EnumerateInstanceLayerProperties( diff --git a/src/video_core/renderer_vulkan/wrapper.h b/src/video_core/renderer_vulkan/wrapper.h index 71daac9d7..234e01693 100644 --- a/src/video_core/renderer_vulkan/wrapper.h +++ b/src/video_core/renderer_vulkan/wrapper.h @@ -267,6 +267,7 @@ struct DeviceDispatch : public InstanceDispatch { PFN_vkGetFenceStatus vkGetFenceStatus; PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements; PFN_vkGetQueryPoolResults vkGetQueryPoolResults; + PFN_vkGetSemaphoreCounterValueKHR vkGetSemaphoreCounterValueKHR; PFN_vkMapMemory vkMapMemory; PFN_vkQueueSubmit vkQueueSubmit; PFN_vkResetFences vkResetFences; @@ -275,6 +276,7 @@ struct DeviceDispatch : public InstanceDispatch { PFN_vkUpdateDescriptorSetWithTemplateKHR vkUpdateDescriptorSetWithTemplateKHR; PFN_vkUpdateDescriptorSets vkUpdateDescriptorSets; PFN_vkWaitForFences vkWaitForFences; + PFN_vkWaitSemaphoresKHR vkWaitSemaphoresKHR; }; /// Loads instance agnostic function pointers. @@ -550,7 +552,6 @@ using PipelineLayout = Handle<VkPipelineLayout, VkDevice, DeviceDispatch>; using QueryPool = Handle<VkQueryPool, VkDevice, DeviceDispatch>; using RenderPass = Handle<VkRenderPass, VkDevice, DeviceDispatch>; using Sampler = Handle<VkSampler, VkDevice, DeviceDispatch>; -using Semaphore = Handle<VkSemaphore, VkDevice, DeviceDispatch>; using ShaderModule = Handle<VkShaderModule, VkDevice, DeviceDispatch>; using SurfaceKHR = Handle<VkSurfaceKHR, VkInstance, InstanceDispatch>; @@ -582,7 +583,8 @@ public: /// Construct a queue handle. constexpr Queue(VkQueue queue, const DeviceDispatch& dld) noexcept : queue{queue}, dld{&dld} {} - VkResult Submit(Span<VkSubmitInfo> submit_infos, VkFence fence) const noexcept { + VkResult Submit(Span<VkSubmitInfo> submit_infos, + VkFence fence = VK_NULL_HANDLE) const noexcept { return dld->vkQueueSubmit(queue, submit_infos.size(), submit_infos.data(), fence); } @@ -674,6 +676,44 @@ public: } }; +class Semaphore : public Handle<VkSemaphore, VkDevice, DeviceDispatch> { + using Handle<VkSemaphore, VkDevice, DeviceDispatch>::Handle; + +public: + [[nodiscard]] u64 GetCounter() const { + u64 value; + Check(dld->vkGetSemaphoreCounterValueKHR(owner, handle, &value)); + return value; + } + + /** + * Waits for a timeline semaphore on the host. + * + * @param value Value to wait + * @param timeout Time in nanoseconds to timeout + * @return True on successful wait, false on timeout + */ + bool Wait(u64 value, u64 timeout = std::numeric_limits<u64>::max()) const { + const VkSemaphoreWaitInfoKHR wait_info{ + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO_KHR, + .pNext = nullptr, + .flags = 0, + .semaphoreCount = 1, + .pSemaphores = &handle, + .pValues = &value, + }; + const VkResult result = dld->vkWaitSemaphoresKHR(owner, &wait_info, timeout); + switch (result) { + case VK_SUCCESS: + return true; + case VK_TIMEOUT: + return false; + default: + throw Exception(result); + } + } +}; + class Device : public Handle<VkDevice, NoOwner, DeviceDispatch> { using Handle<VkDevice, NoOwner, DeviceDispatch>::Handle; @@ -694,6 +734,8 @@ public: Semaphore CreateSemaphore() const; + Semaphore CreateSemaphore(const VkSemaphoreCreateInfo& ci) const; + Fence CreateFence(const VkFenceCreateInfo& ci) const; DescriptorPool CreateDescriptorPool(const VkDescriptorPoolCreateInfo& ci) const; @@ -756,8 +798,8 @@ public: } VkResult GetQueryResults(VkQueryPool query_pool, u32 first, u32 count, std::size_t data_size, - void* data, VkDeviceSize stride, VkQueryResultFlags flags) const - noexcept { + void* data, VkDeviceSize stride, + VkQueryResultFlags flags) const noexcept { return dld->vkGetQueryPoolResults(handle, query_pool, first, count, data_size, data, stride, flags); } @@ -849,8 +891,8 @@ public: dld->vkCmdBindPipeline(handle, bind_point, pipeline); } - void BindIndexBuffer(VkBuffer buffer, VkDeviceSize offset, VkIndexType index_type) const - noexcept { + void BindIndexBuffer(VkBuffer buffer, VkDeviceSize offset, + VkIndexType index_type) const noexcept { dld->vkCmdBindIndexBuffer(handle, buffer, offset, index_type); } @@ -863,8 +905,8 @@ public: BindVertexBuffers(binding, 1, &buffer, &offset); } - void Draw(u32 vertex_count, u32 instance_count, u32 first_vertex, u32 first_instance) const - noexcept { + void Draw(u32 vertex_count, u32 instance_count, u32 first_vertex, + u32 first_instance) const noexcept { dld->vkCmdDraw(handle, vertex_count, instance_count, first_vertex, first_instance); } @@ -874,15 +916,15 @@ public: first_instance); } - void ClearAttachments(Span<VkClearAttachment> attachments, Span<VkClearRect> rects) const - noexcept { + void ClearAttachments(Span<VkClearAttachment> attachments, + Span<VkClearRect> rects) const noexcept { dld->vkCmdClearAttachments(handle, attachments.size(), attachments.data(), rects.size(), rects.data()); } void BlitImage(VkImage src_image, VkImageLayout src_layout, VkImage dst_image, - VkImageLayout dst_layout, Span<VkImageBlit> regions, VkFilter filter) const - noexcept { + VkImageLayout dst_layout, Span<VkImageBlit> regions, + VkFilter filter) const noexcept { dld->vkCmdBlitImage(handle, src_image, src_layout, dst_image, dst_layout, regions.size(), regions.data(), filter); } @@ -907,8 +949,8 @@ public: regions.data()); } - void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer, Span<VkBufferCopy> regions) const - noexcept { + void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer, + Span<VkBufferCopy> regions) const noexcept { dld->vkCmdCopyBuffer(handle, src_buffer, dst_buffer, regions.size(), regions.data()); } @@ -924,8 +966,8 @@ public: regions.data()); } - void FillBuffer(VkBuffer dst_buffer, VkDeviceSize dst_offset, VkDeviceSize size, u32 data) const - noexcept { + void FillBuffer(VkBuffer dst_buffer, VkDeviceSize dst_offset, VkDeviceSize size, + u32 data) const noexcept { dld->vkCmdFillBuffer(handle, dst_buffer, dst_offset, size, data); } |