diff options
Diffstat (limited to 'src/video_core/renderer_vulkan')
19 files changed, 231 insertions, 75 deletions
diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index 28d4b15a0..1032c9d12 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -3,6 +3,8 @@ #include <algorithm> +#include "video_core/renderer_vulkan/vk_texture_cache.h" + #include "common/settings.h" #include "video_core/host_shaders/blit_color_float_frag_spv.h" #include "video_core/host_shaders/convert_abgr8_to_d24s8_frag_spv.h" @@ -14,12 +16,12 @@ #include "video_core/host_shaders/vulkan_blit_depth_stencil_frag_spv.h" #include "video_core/host_shaders/vulkan_color_clear_frag_spv.h" #include "video_core/host_shaders/vulkan_color_clear_vert_spv.h" +#include "video_core/host_shaders/vulkan_depthstencil_clear_frag_spv.h" #include "video_core/renderer_vulkan/blit_image.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_shader_util.h" #include "video_core/renderer_vulkan/vk_state_tracker.h" -#include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/surface.h" #include "video_core/vulkan_common/vulkan_device.h" @@ -427,6 +429,7 @@ BlitImageHelper::BlitImageHelper(const Device& device_, Scheduler& scheduler_, blit_depth_stencil_frag(BuildShader(device, VULKAN_BLIT_DEPTH_STENCIL_FRAG_SPV)), clear_color_vert(BuildShader(device, VULKAN_COLOR_CLEAR_VERT_SPV)), clear_color_frag(BuildShader(device, VULKAN_COLOR_CLEAR_FRAG_SPV)), + clear_stencil_frag(BuildShader(device, VULKAN_DEPTHSTENCIL_CLEAR_FRAG_SPV)), convert_depth_to_float_frag(BuildShader(device, CONVERT_DEPTH_TO_FLOAT_FRAG_SPV)), convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)), convert_abgr8_to_d24s8_frag(BuildShader(device, CONVERT_ABGR8_TO_D24S8_FRAG_SPV)), @@ -592,6 +595,28 @@ void BlitImageHelper::ClearColor(const Framebuffer* dst_framebuffer, u8 color_ma scheduler.InvalidateState(); } +void BlitImageHelper::ClearDepthStencil(const Framebuffer* dst_framebuffer, bool depth_clear, + f32 clear_depth, u8 stencil_mask, u32 stencil_ref, + u32 stencil_compare_mask, const Region2D& dst_region) { + const BlitDepthStencilPipelineKey key{ + .renderpass = dst_framebuffer->RenderPass(), + .depth_clear = depth_clear, + .stencil_mask = stencil_mask, + .stencil_compare_mask = stencil_compare_mask, + .stencil_ref = stencil_ref, + }; + const VkPipeline pipeline = FindOrEmplaceClearStencilPipeline(key); + const VkPipelineLayout layout = *clear_color_pipeline_layout; + scheduler.RequestRenderpass(dst_framebuffer); + scheduler.Record([pipeline, layout, clear_depth, dst_region](vk::CommandBuffer cmdbuf) { + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + BindBlitState(cmdbuf, dst_region); + cmdbuf.PushConstants(layout, VK_SHADER_STAGE_FRAGMENT_BIT, clear_depth); + cmdbuf.Draw(3, 1, 0, 0); + }); + scheduler.InvalidateState(); +} + void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, const ImageView& src_image_view) { const VkPipelineLayout layout = *one_texture_pipeline_layout; @@ -819,6 +844,61 @@ VkPipeline BlitImageHelper::FindOrEmplaceClearColorPipeline(const BlitImagePipel return *clear_color_pipelines.back(); } +VkPipeline BlitImageHelper::FindOrEmplaceClearStencilPipeline( + const BlitDepthStencilPipelineKey& key) { + const auto it = std::ranges::find(clear_stencil_keys, key); + if (it != clear_stencil_keys.end()) { + return *clear_stencil_pipelines[std::distance(clear_stencil_keys.begin(), it)]; + } + clear_stencil_keys.push_back(key); + const std::array stages = MakeStages(*clear_color_vert, *clear_stencil_frag); + const auto stencil = VkStencilOpState{ + .failOp = VK_STENCIL_OP_KEEP, + .passOp = VK_STENCIL_OP_REPLACE, + .depthFailOp = VK_STENCIL_OP_KEEP, + .compareOp = VK_COMPARE_OP_ALWAYS, + .compareMask = key.stencil_compare_mask, + .writeMask = key.stencil_mask, + .reference = key.stencil_ref, + }; + const VkPipelineDepthStencilStateCreateInfo depth_stencil_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .depthTestEnable = VK_FALSE, + .depthWriteEnable = key.depth_clear, + .depthCompareOp = VK_COMPARE_OP_ALWAYS, + .depthBoundsTestEnable = VK_FALSE, + .stencilTestEnable = VK_TRUE, + .front = stencil, + .back = stencil, + .minDepthBounds = 0.0f, + .maxDepthBounds = 0.0f, + }; + clear_stencil_pipelines.push_back(device.GetLogical().CreateGraphicsPipeline({ + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stageCount = static_cast<u32>(stages.size()), + .pStages = stages.data(), + .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .pTessellationState = nullptr, + .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .pDepthStencilState = &depth_stencil_ci, + .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO, + .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .layout = *clear_color_pipeline_layout, + .renderPass = key.renderpass, + .subpass = 0, + .basePipelineHandle = VK_NULL_HANDLE, + .basePipelineIndex = 0, + })); + return *clear_stencil_pipelines.back(); +} + void BlitImageHelper::ConvertPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass, bool is_target_depth) { if (pipeline) { diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h index 2976a7d91..dcfe217aa 100644 --- a/src/video_core/renderer_vulkan/blit_image.h +++ b/src/video_core/renderer_vulkan/blit_image.h @@ -27,6 +27,16 @@ struct BlitImagePipelineKey { Tegra::Engines::Fermi2D::Operation operation; }; +struct BlitDepthStencilPipelineKey { + constexpr auto operator<=>(const BlitDepthStencilPipelineKey&) const noexcept = default; + + VkRenderPass renderpass; + bool depth_clear; + u8 stencil_mask; + u32 stencil_compare_mask; + u32 stencil_ref; +}; + class BlitImageHelper { public: explicit BlitImageHelper(const Device& device, Scheduler& scheduler, @@ -64,6 +74,10 @@ public: void ClearColor(const Framebuffer* dst_framebuffer, u8 color_mask, const std::array<f32, 4>& clear_color, const Region2D& dst_region); + void ClearDepthStencil(const Framebuffer* dst_framebuffer, bool depth_clear, f32 clear_depth, + u8 stencil_mask, u32 stencil_ref, u32 stencil_compare_mask, + const Region2D& dst_region); + private: void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, const ImageView& src_image_view); @@ -76,6 +90,8 @@ private: [[nodiscard]] VkPipeline FindOrEmplaceDepthStencilPipeline(const BlitImagePipelineKey& key); [[nodiscard]] VkPipeline FindOrEmplaceClearColorPipeline(const BlitImagePipelineKey& key); + [[nodiscard]] VkPipeline FindOrEmplaceClearStencilPipeline( + const BlitDepthStencilPipelineKey& key); void ConvertPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass, bool is_target_depth); @@ -108,6 +124,7 @@ private: vk::ShaderModule blit_depth_stencil_frag; vk::ShaderModule clear_color_vert; vk::ShaderModule clear_color_frag; + vk::ShaderModule clear_stencil_frag; vk::ShaderModule convert_depth_to_float_frag; vk::ShaderModule convert_float_to_depth_frag; vk::ShaderModule convert_abgr8_to_d24s8_frag; @@ -122,6 +139,8 @@ private: std::vector<vk::Pipeline> blit_depth_stencil_pipelines; std::vector<BlitImagePipelineKey> clear_color_keys; std::vector<vk::Pipeline> clear_color_pipelines; + std::vector<BlitDepthStencilPipelineKey> clear_stencil_keys; + std::vector<vk::Pipeline> clear_stencil_pipelines; vk::Pipeline convert_d32_to_r32_pipeline; vk::Pipeline convert_r32_to_d32_pipeline; vk::Pipeline convert_d16_to_r16_pipeline; diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index a8540339d..208e88533 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -126,7 +126,7 @@ struct FormatTuple { {VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable}, // A1R5G5B5_UNORM {VK_FORMAT_A2B10G10R10_UNORM_PACK32, Attachable | Storage}, // A2B10G10R10_UNORM {VK_FORMAT_A2B10G10R10_UINT_PACK32, Attachable | Storage}, // A2B10G10R10_UINT - {VK_FORMAT_A2R10G10B10_UNORM_PACK32, Attachable | Storage}, // A2R10G10B10_UNORM + {VK_FORMAT_A2R10G10B10_UNORM_PACK32, Attachable}, // A2R10G10B10_UNORM {VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable}, // A1B5G5R5_UNORM (flipped with swizzle) {VK_FORMAT_R5G5B5A1_UNORM_PACK16}, // A5B5G5R1_UNORM (specially swizzled) {VK_FORMAT_R8_UNORM, Attachable | Storage}, // R8_UNORM @@ -185,7 +185,7 @@ struct FormatTuple { {VK_FORMAT_BC2_SRGB_BLOCK}, // BC2_SRGB {VK_FORMAT_BC3_SRGB_BLOCK}, // BC3_SRGB {VK_FORMAT_BC7_SRGB_BLOCK}, // BC7_SRGB - {VK_FORMAT_R4G4B4A4_UNORM_PACK16}, // A4B4G4R4_UNORM + {VK_FORMAT_A4B4G4R4_UNORM_PACK16_EXT}, // A4B4G4R4_UNORM {VK_FORMAT_R4G4_UNORM_PACK8}, // G4R4_UNORM {VK_FORMAT_ASTC_4x4_SRGB_BLOCK}, // ASTC_2D_4X4_SRGB {VK_FORMAT_ASTC_8x8_SRGB_BLOCK}, // ASTC_2D_8X8_SRGB diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 454bb66a4..c4c30d807 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -66,21 +66,6 @@ std::string BuildCommaSeparatedExtensions( return fmt::format("{}", fmt::join(available_extensions, ",")); } -DebugCallback MakeDebugCallback(const vk::Instance& instance, const vk::InstanceDispatch& dld) { - if (!Settings::values.renderer_debug) { - return DebugCallback{}; - } - const std::optional properties = vk::EnumerateInstanceExtensionProperties(dld); - const auto it = std::ranges::find_if(*properties, [](const auto& prop) { - return std::strcmp(VK_EXT_DEBUG_UTILS_EXTENSION_NAME, prop.extensionName) == 0; - }); - if (it != properties->end()) { - return CreateDebugUtilsCallback(instance); - } else { - return CreateDebugReportCallback(instance); - } -} - } // Anonymous namespace Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld, @@ -103,7 +88,8 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary(context.get())), instance(CreateInstance(*library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, Settings::values.renderer_debug.GetValue())), - debug_callback(MakeDebugCallback(instance, dld)), + debug_messenger(Settings::values.renderer_debug ? CreateDebugUtilsCallback(instance) + : vk::DebugUtilsMessenger{}), surface(CreateSurface(instance, render_window.GetWindowInfo())), device(CreateDevice(instance, dld, *surface)), memory_allocator(device), state_tracker(), scheduler(device, state_tracker), diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index ca22c0baa..590bc1c64 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -7,11 +7,12 @@ #include <string> #include <variant> +#include "video_core/renderer_vulkan/vk_rasterizer.h" + #include "common/dynamic_library.h" #include "video_core/renderer_base.h" #include "video_core/renderer_vulkan/vk_blit_screen.h" #include "video_core/renderer_vulkan/vk_present_manager.h" -#include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_state_tracker.h" #include "video_core/renderer_vulkan/vk_swapchain.h" @@ -34,8 +35,6 @@ class GPU; namespace Vulkan { -using DebugCallback = std::variant<vk::DebugUtilsMessenger, vk::DebugReportCallback>; - Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld, VkSurfaceKHR surface); @@ -74,7 +73,7 @@ private: vk::InstanceDispatch dld; vk::Instance instance; - DebugCallback debug_callback; + vk::DebugUtilsMessenger debug_messenger; vk::SurfaceKHR surface; ScreenInfo screen_info; diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index ad3b29f0e..31928bb94 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -566,7 +566,7 @@ void BlitScreen::CreateDescriptorPool() { const VkDescriptorPoolCreateInfo ci{ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, .pNext = nullptr, - .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, + .flags = 0, .maxSets = static_cast<u32>(image_count), .poolSizeCount = static_cast<u32>(pool_sizes.size()), .pPoolSizes = pool_sizes.data(), @@ -576,7 +576,7 @@ void BlitScreen::CreateDescriptorPool() { const VkDescriptorPoolCreateInfo ci_aa{ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, .pNext = nullptr, - .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, + .flags = 0, .maxSets = static_cast<u32>(image_count), .poolSizeCount = static_cast<u32>(pool_sizes_aa.size()), .pPoolSizes = pool_sizes_aa.data(), diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index f8cd2a5d8..e15865d16 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -7,8 +7,9 @@ #include <span> #include <vector> -#include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h" + +#include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" @@ -528,17 +529,20 @@ void BufferCacheRuntime::BindVertexBuffers(VideoCommon::HostBindings<Buffer>& bi buffer_handles.push_back(handle); } if (device.IsExtExtendedDynamicStateSupported()) { - scheduler.Record([bindings_ = std::move(bindings), + scheduler.Record([this, bindings_ = std::move(bindings), buffer_handles_ = std::move(buffer_handles)](vk::CommandBuffer cmdbuf) { cmdbuf.BindVertexBuffers2EXT(bindings_.min_index, - bindings_.max_index - bindings_.min_index, + std::min(bindings_.max_index - bindings_.min_index, + device.GetMaxVertexInputBindings()), buffer_handles_.data(), bindings_.offsets.data(), bindings_.sizes.data(), bindings_.strides.data()); }); } else { - scheduler.Record([bindings_ = std::move(bindings), + scheduler.Record([this, bindings_ = std::move(bindings), buffer_handles_ = std::move(buffer_handles)](vk::CommandBuffer cmdbuf) { - cmdbuf.BindVertexBuffers(bindings_.min_index, bindings_.max_index - bindings_.min_index, + cmdbuf.BindVertexBuffers(bindings_.min_index, + std::min(bindings_.max_index - bindings_.min_index, + device.GetMaxVertexInputBindings()), buffer_handles_.data(), bindings_.offsets.data()); }); } diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 3bc8553e1..54ee030ce 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -6,6 +6,8 @@ #include <optional> #include <utility> +#include "video_core/renderer_vulkan/vk_texture_cache.h" + #include "common/assert.h" #include "common/common_types.h" #include "common/div_ceil.h" @@ -16,7 +18,6 @@ #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" -#include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/texture_cache/accelerated_swizzle.h" #include "video_core/texture_cache/types.h" diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp index b5ae6443c..6048a301f 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp @@ -77,7 +77,7 @@ static void AllocatePool(const Device& device, DescriptorBank& bank) { bank.pools.push_back(device.GetLogical().CreateDescriptorPool({ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, .pNext = nullptr, - .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, + .flags = 0, .maxSets = sets_per_pool, .poolSizeCount = static_cast<u32>(pool_cursor), .pPoolSizes = std::data(pool_sizes), diff --git a/src/video_core/renderer_vulkan/vk_fsr.cpp b/src/video_core/renderer_vulkan/vk_fsr.cpp index 9bcdca2fb..ce8f3f3c2 100644 --- a/src/video_core/renderer_vulkan/vk_fsr.cpp +++ b/src/video_core/renderer_vulkan/vk_fsr.cpp @@ -150,7 +150,7 @@ void FSR::CreateDescriptorPool() { const VkDescriptorPoolCreateInfo ci{ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, .pNext = nullptr, - .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, + .flags = 0, .maxSets = static_cast<u32>(image_count * 2), .poolSizeCount = static_cast<u32>(pool_sizes.size()), .pPoolSizes = pool_sizes.data(), diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index ad35cacac..f2fd2670f 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -7,9 +7,10 @@ #include <boost/container/small_vector.hpp> #include <boost/container/static_vector.hpp> +#include "video_core/renderer_vulkan/pipeline_helper.h" + #include "common/bit_field.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" -#include "video_core/renderer_vulkan/pipeline_helper.h" #include "video_core/renderer_vulkan/pipeline_statistics.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 4f84d8497..a1ec1a100 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -294,10 +294,11 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device texture_cache{texture_cache_}, shader_notify{shader_notify_}, use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()}, use_vulkan_pipeline_cache{Settings::values.use_vulkan_driver_pipeline_cache.GetValue()}, - workers(device.GetDriverID() == VK_DRIVER_ID_QUALCOMM_PROPRIETARY - ? 1 - : (std::max(std::thread::hardware_concurrency(), 2U) - 1), - "VkPipelineBuilder"), +#ifdef ANDROID + workers(1, "VkPipelineBuilder"), +#else + workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "VkPipelineBuilder"), +#endif serialization_thread(1, "VkPipelineSerialization") { const auto& float_control{device.FloatControlProperties()}; const VkDriverId driver_id{device.GetDriverID()}; @@ -584,7 +585,8 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline( ShaderPools& pools, const GraphicsPipelineCacheKey& key, std::span<Shader::Environment* const> envs, PipelineStatistics* statistics, bool build_in_parallel) try { - LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); + auto hash = key.Hash(); + LOG_INFO(Render_Vulkan, "0x{:016x}", hash); size_t env_index{0}; std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs; const bool uses_vertex_a{key.unique_hashes[0] != 0}; @@ -610,9 +612,6 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline( const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))}; Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); - if (Settings::values.dump_shaders) { - env.Dump(key.unique_hashes[index]); - } if (!uses_vertex_a || index != 1) { // Normal path programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info); @@ -623,6 +622,10 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline( programs[index] = MergeDualVertexPrograms(program_va, program_vb, env); } + if (Settings::values.dump_shaders) { + env.Dump(hash, key.unique_hashes[index]); + } + if (programs[index].info.requires_layer_emulation) { layer_source_program = &programs[index]; } @@ -663,6 +666,19 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline( std::move(modules), infos); } catch (const Shader::Exception& exception) { + auto hash = key.Hash(); + size_t env_index{0}; + for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + if (key.unique_hashes[index] == 0) { + continue; + } + Shader::Environment& env{*envs[env_index]}; + ++env_index; + + const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))}; + Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); + env.Dump(hash, key.unique_hashes[index]); + } LOG_ERROR(Render_Vulkan, "{}", exception.what()); return nullptr; } @@ -712,18 +728,19 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline( std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline( ShaderPools& pools, const ComputePipelineCacheKey& key, Shader::Environment& env, PipelineStatistics* statistics, bool build_in_parallel) try { + auto hash = key.Hash(); if (device.HasBrokenCompute()) { - LOG_ERROR(Render_Vulkan, "Skipping 0x{:016x}", key.Hash()); + LOG_ERROR(Render_Vulkan, "Skipping 0x{:016x}", hash); return nullptr; } - LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); + LOG_INFO(Render_Vulkan, "0x{:016x}", hash); Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; // Dump it before error. if (Settings::values.dump_shaders) { - env.Dump(key.Hash()); + env.Dump(hash, key.unique_hash); } auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 456bb040e..01e76a82c 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -6,6 +6,8 @@ #include <memory> #include <mutex> +#include "video_core/renderer_vulkan/renderer_vulkan.h" + #include "common/assert.h" #include "common/logging/log.h" #include "common/microprofile.h" @@ -18,7 +20,6 @@ #include "video_core/renderer_vulkan/blit_image.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" -#include "video_core/renderer_vulkan/renderer_vulkan.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" @@ -427,15 +428,27 @@ void RasterizerVulkan::Clear(u32 layer_count) { if (aspect_flags == 0) { return; } - scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil, - clear_rect, aspect_flags](vk::CommandBuffer cmdbuf) { - VkClearAttachment attachment; - attachment.aspectMask = aspect_flags; - attachment.colorAttachment = 0; - attachment.clearValue.depthStencil.depth = clear_depth; - attachment.clearValue.depthStencil.stencil = clear_stencil; - cmdbuf.ClearAttachments(attachment, clear_rect); - }); + + if (use_stencil && regs.stencil_front_mask != 0xFF && regs.stencil_front_mask != 0) { + Region2D dst_region = { + Offset2D{.x = clear_rect.rect.offset.x, .y = clear_rect.rect.offset.y}, + Offset2D{.x = clear_rect.rect.offset.x + static_cast<s32>(clear_rect.rect.extent.width), + .y = clear_rect.rect.offset.y + + static_cast<s32>(clear_rect.rect.extent.height)}}; + blit_image.ClearDepthStencil(framebuffer, use_depth, regs.clear_depth, + static_cast<u8>(regs.stencil_front_mask), regs.clear_stencil, + regs.stencil_front_func_mask, dst_region); + } else { + scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil, + clear_rect, aspect_flags](vk::CommandBuffer cmdbuf) { + VkClearAttachment attachment; + attachment.aspectMask = aspect_flags; + attachment.colorAttachment = 0; + attachment.clearValue.depthStencil.depth = clear_depth; + attachment.clearValue.depthStencil.stencil = clear_stencil; + cmdbuf.ClearAttachments(attachment, clear_rect); + }); + } } void RasterizerVulkan::DispatchCompute() { @@ -450,6 +463,20 @@ void RasterizerVulkan::DispatchCompute() { pipeline->Configure(*kepler_compute, *gpu_memory, scheduler, buffer_cache, texture_cache); const auto& qmd{kepler_compute->launch_description}; + auto indirect_address = kepler_compute->GetIndirectComputeAddress(); + if (indirect_address) { + // DispatchIndirect + static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize; + const auto post_op = VideoCommon::ObtainBufferOperation::DiscardWrite; + const auto [buffer, offset] = + buffer_cache.ObtainBuffer(*indirect_address, 12, sync_info, post_op); + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([indirect_buffer = buffer->Handle(), + indirect_offset = offset](vk::CommandBuffer cmdbuf) { + cmdbuf.DispatchIndirect(indirect_buffer, indirect_offset); + }); + return; + } const std::array<u32, 3> dim{qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z}; scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([dim](vk::CommandBuffer cmdbuf) { cmdbuf.Dispatch(dim[0], dim[1], dim[2]); }); @@ -829,7 +856,8 @@ bool AccelerateDMA::DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info, } const u32 buffer_size = static_cast<u32>(buffer_operand.pitch * buffer_operand.height); static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize; - const auto post_op = VideoCommon::ObtainBufferOperation::DoNothing; + const auto post_op = IS_IMAGE_UPLOAD ? VideoCommon::ObtainBufferOperation::DoNothing + : VideoCommon::ObtainBufferOperation::MarkAsWritten; const auto [buffer, offset] = buffer_cache.ObtainBuffer(buffer_operand.address, buffer_size, sync_info, post_op); @@ -838,8 +866,12 @@ bool AccelerateDMA::DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info, const std::span copy_span{©, 1}; if constexpr (IS_IMAGE_UPLOAD) { + texture_cache.PrepareImage(image_id, true, false); image->UploadMemory(buffer->Handle(), offset, copy_span); } else { + if (offset % BytesPerBlock(image->info.format)) { + return false; + } texture_cache.DownloadImageIntoBuffer(image, buffer->Handle(), offset, copy_span, buffer_operand.address, buffer_size); } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 73257d964..b31982485 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -7,13 +7,14 @@ #include <boost/container/static_vector.hpp> +#include "video_core/renderer_vulkan/vk_buffer_cache.h" + #include "common/common_types.h" #include "video_core/control/channel_state_cache.h" #include "video_core/engines/maxwell_dma.h" #include "video_core/rasterizer_accelerated.h" #include "video_core/rasterizer_interface.h" #include "video_core/renderer_vulkan/blit_image.h" -#include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_fence_manager.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 17ef61147..89fd31b4f 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -6,11 +6,12 @@ #include <thread> #include <utility> +#include "video_core/renderer_vulkan/vk_query_cache.h" + #include "common/microprofile.h" #include "common/thread.h" #include "video_core/renderer_vulkan/vk_command_pool.h" #include "video_core/renderer_vulkan/vk_master_semaphore.h" -#include "video_core/renderer_vulkan/vk_query_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_state_tracker.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index d3cddac69..81ef98f61 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -45,8 +45,8 @@ static VkPresentModeKHR ChooseSwapPresentMode(bool has_imm, bool has_mailbox, return mode; } switch (mode) { - case Settings::VSyncMode::FIFO: - case Settings::VSyncMode::FIFORelaxed: + case Settings::VSyncMode::Fifo: + case Settings::VSyncMode::FifoRelaxed: if (has_mailbox) { return Settings::VSyncMode::Mailbox; } else if (has_imm) { @@ -59,8 +59,8 @@ static VkPresentModeKHR ChooseSwapPresentMode(bool has_imm, bool has_mailbox, }(); if ((setting == Settings::VSyncMode::Mailbox && !has_mailbox) || (setting == Settings::VSyncMode::Immediate && !has_imm) || - (setting == Settings::VSyncMode::FIFORelaxed && !has_fifo_relaxed)) { - setting = Settings::VSyncMode::FIFO; + (setting == Settings::VSyncMode::FifoRelaxed && !has_fifo_relaxed)) { + setting = Settings::VSyncMode::Fifo; } switch (setting) { @@ -68,9 +68,9 @@ static VkPresentModeKHR ChooseSwapPresentMode(bool has_imm, bool has_mailbox, return VK_PRESENT_MODE_IMMEDIATE_KHR; case Settings::VSyncMode::Mailbox: return VK_PRESENT_MODE_MAILBOX_KHR; - case Settings::VSyncMode::FIFO: + case Settings::VSyncMode::Fifo: return VK_PRESENT_MODE_FIFO_KHR; - case Settings::VSyncMode::FIFORelaxed: + case Settings::VSyncMode::FifoRelaxed: return VK_PRESENT_MODE_FIFO_RELAXED_KHR; default: return VK_PRESENT_MODE_FIFO_KHR; diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index ed048f7b8..f25842476 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -11,6 +11,8 @@ #include "common/bit_util.h" #include "common/settings.h" +#include "video_core/renderer_vulkan/vk_texture_cache.h" + #include "video_core/engines/fermi_2d.h" #include "video_core/renderer_vulkan/blit_image.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" @@ -18,7 +20,6 @@ #include "video_core/renderer_vulkan/vk_render_pass_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" -#include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/texture_cache/formatter.h" #include "video_core/texture_cache/samples_helper.h" #include "video_core/texture_cache/util.h" @@ -599,7 +600,7 @@ void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage im } void TryTransformSwizzleIfNeeded(PixelFormat format, std::array<SwizzleSource, 4>& swizzle, - bool emulate_bgr565) { + bool emulate_bgr565, bool emulate_a4b4g4r4) { switch (format) { case PixelFormat::A1B5G5R5_UNORM: std::ranges::transform(swizzle, swizzle.begin(), SwapBlueRed); @@ -615,6 +616,11 @@ void TryTransformSwizzleIfNeeded(PixelFormat format, std::array<SwizzleSource, 4 case PixelFormat::G4R4_UNORM: std::ranges::transform(swizzle, swizzle.begin(), SwapGreenRed); break; + case PixelFormat::A4B4G4R4_UNORM: + if (emulate_a4b4g4r4) { + std::ranges::reverse(swizzle); + } + break; default: break; } @@ -817,7 +823,7 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, Scheduler& sched : device{device_}, scheduler{scheduler_}, memory_allocator{memory_allocator_}, staging_buffer_pool{staging_buffer_pool_}, blit_image_helper{blit_image_helper_}, render_pass_cache{render_pass_cache_}, resolution{Settings::values.resolution_info} { - if (Settings::values.accelerate_astc) { + if (Settings::values.accelerate_astc.GetValue() == Settings::AstcDecodeMode::Gpu) { astc_decoder_pass.emplace(device, scheduler, descriptor_pool, staging_buffer_pool, compute_pass_descriptor_queue, memory_allocator); } @@ -1313,12 +1319,19 @@ Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu runtime->ViewFormats(info.format))), aspect_mask(ImageAspectMask(info.format)) { if (IsPixelFormatASTC(info.format) && !runtime->device.IsOptimalAstcSupported()) { - if (Settings::values.async_astc.GetValue()) { + switch (Settings::values.accelerate_astc.GetValue()) { + case Settings::AstcDecodeMode::Gpu: + if (Settings::values.astc_recompression.GetValue() == + Settings::AstcRecompression::Uncompressed && + info.size.depth == 1) { + flags |= VideoCommon::ImageFlagBits::AcceleratedUpload; + } + break; + case Settings::AstcDecodeMode::CpuAsynchronous: flags |= VideoCommon::ImageFlagBits::AsynchronousDecode; - } else if (Settings::values.astc_recompression.GetValue() == - Settings::AstcRecompression::Uncompressed && - Settings::values.accelerate_astc.GetValue() && info.size.depth == 1) { - flags |= VideoCommon::ImageFlagBits::AcceleratedUpload; + break; + default: + break; } flags |= VideoCommon::ImageFlagBits::Converted; flags |= VideoCommon::ImageFlagBits::CostlyLoad; @@ -1653,7 +1666,8 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI }; if (!info.IsRenderTarget()) { swizzle = info.Swizzle(); - TryTransformSwizzleIfNeeded(format, swizzle, device->MustEmulateBGR565()); + TryTransformSwizzleIfNeeded(format, swizzle, device->MustEmulateBGR565(), + !device->IsExt4444FormatsSupported()); if ((aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) != 0) { std::ranges::transform(swizzle, swizzle.begin(), ConvertGreenRed); } diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 6621210ea..565ce19a9 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -5,11 +5,12 @@ #include <span> +#include "video_core/texture_cache/texture_cache_base.h" + #include "shader_recompiler/shader_info.h" #include "video_core/renderer_vulkan/vk_compute_pass.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/texture_cache/image_view_base.h" -#include "video_core/texture_cache/texture_cache_base.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_wrapper.h" diff --git a/src/video_core/renderer_vulkan/vk_turbo_mode.cpp b/src/video_core/renderer_vulkan/vk_turbo_mode.cpp index 460d8d59d..04a51f2d1 100644 --- a/src/video_core/renderer_vulkan/vk_turbo_mode.cpp +++ b/src/video_core/renderer_vulkan/vk_turbo_mode.cpp @@ -62,7 +62,7 @@ void TurboMode::Run(std::stop_token stop_token) { auto descriptor_pool = dld.CreateDescriptorPool(VkDescriptorPoolCreateInfo{ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, .pNext = nullptr, - .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, + .flags = 0, .maxSets = 1, .poolSizeCount = 1, .pPoolSizes = &pool_size, |