diff options
Diffstat (limited to '')
-rw-r--r-- | src/video_core/renderer_vulkan/blit_image.cpp | 212 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/blit_image.h | 16 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/renderer_vulkan.cpp | 21 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_fsr.cpp | 144 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 26 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_rasterizer.cpp | 41 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_rasterizer.h | 1 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_scheduler.cpp | 5 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_scheduler.h | 7 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_smaa.cpp | 14 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_texture_cache.cpp | 5 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_texture_cache.h | 9 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_turbo_mode.cpp | 21 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_turbo_mode.h | 9 |
14 files changed, 312 insertions, 219 deletions
diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index 3f2b139e0..cf2964a3f 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -4,14 +4,16 @@ #include <algorithm> #include "common/settings.h" +#include "video_core/host_shaders/blit_color_float_frag_spv.h" #include "video_core/host_shaders/convert_abgr8_to_d24s8_frag_spv.h" #include "video_core/host_shaders/convert_d24s8_to_abgr8_frag_spv.h" #include "video_core/host_shaders/convert_depth_to_float_frag_spv.h" #include "video_core/host_shaders/convert_float_to_depth_frag_spv.h" #include "video_core/host_shaders/convert_s8d24_to_abgr8_frag_spv.h" #include "video_core/host_shaders/full_screen_triangle_vert_spv.h" -#include "video_core/host_shaders/vulkan_blit_color_float_frag_spv.h" #include "video_core/host_shaders/vulkan_blit_depth_stencil_frag_spv.h" +#include "video_core/host_shaders/vulkan_color_clear_frag_spv.h" +#include "video_core/host_shaders/vulkan_color_clear_vert_spv.h" #include "video_core/renderer_vulkan/blit_image.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -69,10 +71,11 @@ constexpr VkDescriptorSetLayoutCreateInfo TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CRE .bindingCount = static_cast<u32>(TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_BINDINGS.size()), .pBindings = TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_BINDINGS.data(), }; -constexpr VkPushConstantRange PUSH_CONSTANT_RANGE{ - .stageFlags = VK_SHADER_STAGE_VERTEX_BIT, +template <VkShaderStageFlags stageFlags, size_t size> +inline constexpr VkPushConstantRange PUSH_CONSTANT_RANGE{ + .stageFlags = stageFlags, .offset = 0, - .size = sizeof(PushConstants), + .size = static_cast<u32>(size), }; constexpr VkPipelineVertexInputStateCreateInfo PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO{ .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, @@ -125,10 +128,8 @@ constexpr VkPipelineMultisampleStateCreateInfo PIPELINE_MULTISAMPLE_STATE_CREATE .alphaToCoverageEnable = VK_FALSE, .alphaToOneEnable = VK_FALSE, }; -constexpr std::array DYNAMIC_STATES{ - VK_DYNAMIC_STATE_VIEWPORT, - VK_DYNAMIC_STATE_SCISSOR, -}; +constexpr std::array DYNAMIC_STATES{VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, + VK_DYNAMIC_STATE_BLEND_CONSTANTS}; constexpr VkPipelineDynamicStateCreateInfo PIPELINE_DYNAMIC_STATE_CREATE_INFO{ .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, .pNext = nullptr, @@ -205,15 +206,15 @@ inline constexpr VkSamplerCreateInfo SAMPLER_CREATE_INFO{ }; constexpr VkPipelineLayoutCreateInfo PipelineLayoutCreateInfo( - const VkDescriptorSetLayout* set_layout) { + const VkDescriptorSetLayout* set_layout, vk::Span<VkPushConstantRange> push_constants) { return VkPipelineLayoutCreateInfo{ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, .pNext = nullptr, .flags = 0, - .setLayoutCount = 1, + .setLayoutCount = (set_layout != nullptr ? 1u : 0u), .pSetLayouts = set_layout, - .pushConstantRangeCount = 1, - .pPushConstantRanges = &PUSH_CONSTANT_RANGE, + .pushConstantRangeCount = push_constants.size(), + .pPushConstantRanges = push_constants.data(), }; } @@ -302,8 +303,7 @@ void UpdateTwoTexturesDescriptorSet(const Device& device, VkDescriptorSet descri device.GetLogical().UpdateDescriptorSets(write_descriptor_sets, nullptr); } -void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Region2D& dst_region, - const Region2D& src_region) { +void BindBlitState(vk::CommandBuffer cmdbuf, const Region2D& dst_region) { const VkOffset2D offset{ .x = std::min(dst_region.start.x, dst_region.end.x), .y = std::min(dst_region.start.y, dst_region.end.y), @@ -325,15 +325,23 @@ void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Regi .offset = offset, .extent = extent, }; - const float scale_x = static_cast<float>(src_region.end.x - src_region.start.x); - const float scale_y = static_cast<float>(src_region.end.y - src_region.start.y); + cmdbuf.SetViewport(0, viewport); + cmdbuf.SetScissor(0, scissor); +} + +void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Region2D& dst_region, + const Region2D& src_region, const Extent3D& src_size = {1, 1, 1}) { + BindBlitState(cmdbuf, dst_region); + const float scale_x = static_cast<float>(src_region.end.x - src_region.start.x) / + static_cast<float>(src_size.width); + const float scale_y = static_cast<float>(src_region.end.y - src_region.start.y) / + static_cast<float>(src_size.height); const PushConstants push_constants{ .tex_scale = {scale_x, scale_y}, - .tex_offset = {static_cast<float>(src_region.start.x), - static_cast<float>(src_region.start.y)}, + .tex_offset = {static_cast<float>(src_region.start.x) / static_cast<float>(src_size.width), + static_cast<float>(src_region.start.y) / + static_cast<float>(src_size.height)}, }; - cmdbuf.SetViewport(0, viewport); - cmdbuf.SetScissor(0, scissor); cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants); } @@ -347,6 +355,51 @@ VkExtent2D GetConversionExtent(const ImageView& src_image_view) { .height = is_rescaled ? resolution.ScaleUp(height) : height, }; } + +void TransitionImageLayout(vk::CommandBuffer& cmdbuf, VkImage image, VkImageLayout target_layout, + VkImageLayout source_layout = VK_IMAGE_LAYOUT_GENERAL) { + constexpr VkFlags flags{VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT}; + const VkImageMemoryBarrier barrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = flags, + .dstAccessMask = flags, + .oldLayout = source_layout, + .newLayout = target_layout, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange{ + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }; + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + 0, barrier); +} + +void BeginRenderPass(vk::CommandBuffer& cmdbuf, const Framebuffer* framebuffer) { + const VkRenderPass render_pass = framebuffer->RenderPass(); + const VkFramebuffer framebuffer_handle = framebuffer->Handle(); + const VkExtent2D render_area = framebuffer->RenderArea(); + const VkRenderPassBeginInfo renderpass_bi{ + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .pNext = nullptr, + .renderPass = render_pass, + .framebuffer = framebuffer_handle, + .renderArea{ + .offset{}, + .extent = render_area, + }, + .clearValueCount = 0, + .pClearValues = nullptr, + }; + cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); +} } // Anonymous namespace BlitImageHelper::BlitImageHelper(const Device& device_, Scheduler& scheduler_, @@ -360,13 +413,20 @@ BlitImageHelper::BlitImageHelper(const Device& device_, Scheduler& scheduler_, descriptor_pool.Allocator(*one_texture_set_layout, TEXTURE_DESCRIPTOR_BANK_INFO<1>)}, two_textures_descriptor_allocator{ descriptor_pool.Allocator(*two_textures_set_layout, TEXTURE_DESCRIPTOR_BANK_INFO<2>)}, - one_texture_pipeline_layout(device.GetLogical().CreatePipelineLayout( - PipelineLayoutCreateInfo(one_texture_set_layout.address()))), - two_textures_pipeline_layout(device.GetLogical().CreatePipelineLayout( - PipelineLayoutCreateInfo(two_textures_set_layout.address()))), + one_texture_pipeline_layout(device.GetLogical().CreatePipelineLayout(PipelineLayoutCreateInfo( + one_texture_set_layout.address(), + PUSH_CONSTANT_RANGE<VK_SHADER_STAGE_VERTEX_BIT, sizeof(PushConstants)>))), + two_textures_pipeline_layout( + device.GetLogical().CreatePipelineLayout(PipelineLayoutCreateInfo( + two_textures_set_layout.address(), + PUSH_CONSTANT_RANGE<VK_SHADER_STAGE_VERTEX_BIT, sizeof(PushConstants)>))), + clear_color_pipeline_layout(device.GetLogical().CreatePipelineLayout(PipelineLayoutCreateInfo( + nullptr, PUSH_CONSTANT_RANGE<VK_SHADER_STAGE_FRAGMENT_BIT, sizeof(float) * 4>))), full_screen_vert(BuildShader(device, FULL_SCREEN_TRIANGLE_VERT_SPV)), - blit_color_to_color_frag(BuildShader(device, VULKAN_BLIT_COLOR_FLOAT_FRAG_SPV)), + blit_color_to_color_frag(BuildShader(device, BLIT_COLOR_FLOAT_FRAG_SPV)), blit_depth_stencil_frag(BuildShader(device, VULKAN_BLIT_DEPTH_STENCIL_FRAG_SPV)), + clear_color_vert(BuildShader(device, VULKAN_COLOR_CLEAR_VERT_SPV)), + clear_color_frag(BuildShader(device, VULKAN_COLOR_CLEAR_FRAG_SPV)), convert_depth_to_float_frag(BuildShader(device, CONVERT_DEPTH_TO_FLOAT_FRAG_SPV)), convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)), convert_abgr8_to_d24s8_frag(BuildShader(device, CONVERT_ABGR8_TO_D24S8_FRAG_SPV)), @@ -404,6 +464,32 @@ void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, VkImageView scheduler.InvalidateState(); } +void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, VkImageView src_image_view, + VkImage src_image, VkSampler src_sampler, + const Region2D& dst_region, const Region2D& src_region, + const Extent3D& src_size) { + const BlitImagePipelineKey key{ + .renderpass = dst_framebuffer->RenderPass(), + .operation = Tegra::Engines::Fermi2D::Operation::SrcCopy, + }; + const VkPipelineLayout layout = *one_texture_pipeline_layout; + const VkPipeline pipeline = FindOrEmplaceColorPipeline(key); + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([this, dst_framebuffer, src_image_view, src_image, src_sampler, dst_region, + src_region, src_size, pipeline, layout](vk::CommandBuffer cmdbuf) { + TransitionImageLayout(cmdbuf, src_image, VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL); + BeginRenderPass(cmdbuf, dst_framebuffer); + const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); + UpdateOneTextureDescriptorSet(device, descriptor_set, src_sampler, src_image_view); + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, + nullptr); + BindBlitState(cmdbuf, layout, dst_region, src_region, src_size); + cmdbuf.Draw(3, 1, 0, 0); + cmdbuf.EndRenderPass(); + }); +} + void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer, VkImageView src_depth_view, VkImageView src_stencil_view, const Region2D& dst_region, const Region2D& src_region, @@ -479,6 +565,30 @@ void BlitImageHelper::ConvertS8D24ToABGR8(const Framebuffer* dst_framebuffer, ConvertDepthStencil(*convert_s8d24_to_abgr8_pipeline, dst_framebuffer, src_image_view); } +void BlitImageHelper::ClearColor(const Framebuffer* dst_framebuffer, u8 color_mask, + const std::array<f32, 4>& clear_color, + const Region2D& dst_region) { + const BlitImagePipelineKey key{ + .renderpass = dst_framebuffer->RenderPass(), + .operation = Tegra::Engines::Fermi2D::Operation::BlendPremult, + }; + const VkPipeline pipeline = FindOrEmplaceClearColorPipeline(key); + const VkPipelineLayout layout = *clear_color_pipeline_layout; + scheduler.RequestRenderpass(dst_framebuffer); + scheduler.Record( + [pipeline, layout, color_mask, clear_color, dst_region](vk::CommandBuffer cmdbuf) { + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + const std::array blend_color = { + (color_mask & 0x1) ? 1.0f : 0.0f, (color_mask & 0x2) ? 1.0f : 0.0f, + (color_mask & 0x4) ? 1.0f : 0.0f, (color_mask & 0x8) ? 1.0f : 0.0f}; + cmdbuf.SetBlendConstants(blend_color.data()); + BindBlitState(cmdbuf, dst_region); + cmdbuf.PushConstants(layout, VK_SHADER_STAGE_FRAGMENT_BIT, clear_color); + cmdbuf.Draw(3, 1, 0, 0); + }); + scheduler.InvalidateState(); +} + void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, const ImageView& src_image_view) { const VkPipelineLayout layout = *one_texture_pipeline_layout; @@ -654,6 +764,58 @@ VkPipeline BlitImageHelper::FindOrEmplaceDepthStencilPipeline(const BlitImagePip return *blit_depth_stencil_pipelines.back(); } +VkPipeline BlitImageHelper::FindOrEmplaceClearColorPipeline(const BlitImagePipelineKey& key) { + const auto it = std::ranges::find(clear_color_keys, key); + if (it != clear_color_keys.end()) { + return *clear_color_pipelines[std::distance(clear_color_keys.begin(), it)]; + } + clear_color_keys.push_back(key); + const std::array stages = MakeStages(*clear_color_vert, *clear_color_frag); + const VkPipelineColorBlendAttachmentState color_blend_attachment_state{ + .blendEnable = VK_TRUE, + .srcColorBlendFactor = VK_BLEND_FACTOR_CONSTANT_COLOR, + .dstColorBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR, + .colorBlendOp = VK_BLEND_OP_ADD, + .srcAlphaBlendFactor = VK_BLEND_FACTOR_CONSTANT_ALPHA, + .dstAlphaBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA, + .alphaBlendOp = VK_BLEND_OP_ADD, + .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT, + }; + const VkPipelineColorBlendStateCreateInfo color_blend_state_generic_create_info{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .logicOpEnable = VK_FALSE, + .logicOp = VK_LOGIC_OP_CLEAR, + .attachmentCount = 1, + .pAttachments = &color_blend_attachment_state, + .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f}, + }; + clear_color_pipelines.push_back(device.GetLogical().CreateGraphicsPipeline({ + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stageCount = static_cast<u32>(stages.size()), + .pStages = stages.data(), + .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .pTessellationState = nullptr, + .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, + .pColorBlendState = &color_blend_state_generic_create_info, + .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .layout = *clear_color_pipeline_layout, + .renderPass = key.renderpass, + .subpass = 0, + .basePipelineHandle = VK_NULL_HANDLE, + .basePipelineIndex = 0, + })); + return *clear_color_pipelines.back(); +} + void BlitImageHelper::ConvertPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass, bool is_target_depth) { if (pipeline) { diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h index 5df679fb4..2976a7d91 100644 --- a/src/video_core/renderer_vulkan/blit_image.h +++ b/src/video_core/renderer_vulkan/blit_image.h @@ -10,6 +10,8 @@ namespace Vulkan { +using VideoCommon::Extent3D; +using VideoCommon::Offset2D; using VideoCommon::Region2D; class Device; @@ -36,6 +38,10 @@ public: Tegra::Engines::Fermi2D::Filter filter, Tegra::Engines::Fermi2D::Operation operation); + void BlitColor(const Framebuffer* dst_framebuffer, VkImageView src_image_view, + VkImage src_image, VkSampler src_sampler, const Region2D& dst_region, + const Region2D& src_region, const Extent3D& src_size); + void BlitDepthStencil(const Framebuffer* dst_framebuffer, VkImageView src_depth_view, VkImageView src_stencil_view, const Region2D& dst_region, const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter, @@ -55,6 +61,9 @@ public: void ConvertS8D24ToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view); + void ClearColor(const Framebuffer* dst_framebuffer, u8 color_mask, + const std::array<f32, 4>& clear_color, const Region2D& dst_region); + private: void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, const ImageView& src_image_view); @@ -66,6 +75,8 @@ private: [[nodiscard]] VkPipeline FindOrEmplaceDepthStencilPipeline(const BlitImagePipelineKey& key); + [[nodiscard]] VkPipeline FindOrEmplaceClearColorPipeline(const BlitImagePipelineKey& key); + void ConvertPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass, bool is_target_depth); void ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass); @@ -91,9 +102,12 @@ private: DescriptorAllocator two_textures_descriptor_allocator; vk::PipelineLayout one_texture_pipeline_layout; vk::PipelineLayout two_textures_pipeline_layout; + vk::PipelineLayout clear_color_pipeline_layout; vk::ShaderModule full_screen_vert; vk::ShaderModule blit_color_to_color_frag; vk::ShaderModule blit_depth_stencil_frag; + vk::ShaderModule clear_color_vert; + vk::ShaderModule clear_color_frag; vk::ShaderModule convert_depth_to_float_frag; vk::ShaderModule convert_float_to_depth_frag; vk::ShaderModule convert_abgr8_to_d24s8_frag; @@ -106,6 +120,8 @@ private: std::vector<vk::Pipeline> blit_color_pipelines; std::vector<BlitImagePipelineKey> blit_depth_stencil_keys; std::vector<vk::Pipeline> blit_depth_stencil_pipelines; + std::vector<BlitImagePipelineKey> clear_color_keys; + std::vector<vk::Pipeline> clear_color_pipelines; vk::Pipeline convert_d32_to_r32_pipeline; vk::Pipeline convert_r32_to_d32_pipeline; vk::Pipeline convert_d16_to_r16_pipeline; diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 52855120c..2a8d9e377 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -60,22 +60,9 @@ std::string GetDriverVersion(const Device& device) { return GetReadableVersion(version); } -std::string BuildCommaSeparatedExtensions(std::vector<std::string> available_extensions) { - std::sort(std::begin(available_extensions), std::end(available_extensions)); - - static constexpr std::size_t AverageExtensionSize = 64; - std::string separated_extensions; - separated_extensions.reserve(available_extensions.size() * AverageExtensionSize); - - const auto end = std::end(available_extensions); - for (auto extension = std::begin(available_extensions); extension != end; ++extension) { - if (const bool is_last = extension + 1 == end; is_last) { - separated_extensions += *extension; - } else { - separated_extensions += fmt::format("{},", *extension); - } - } - return separated_extensions; +std::string BuildCommaSeparatedExtensions( + const std::set<std::string, std::less<>>& available_extensions) { + return fmt::format("{}", fmt::join(available_extensions, ",")); } } // Anonymous namespace @@ -112,6 +99,7 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, state_tracker, scheduler) { if (Settings::values.renderer_force_max_clock.GetValue() && device.ShouldBoostClocks()) { turbo_mode.emplace(instance, dld); + scheduler.RegisterOnSubmit([this] { turbo_mode->QueueSubmitted(); }); } Report(); } catch (const vk::Exception& exception) { @@ -120,6 +108,7 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, } RendererVulkan::~RendererVulkan() { + scheduler.RegisterOnSubmit([] {}); void(device.GetLogical().WaitIdle()); } diff --git a/src/video_core/renderer_vulkan/vk_fsr.cpp b/src/video_core/renderer_vulkan/vk_fsr.cpp index 33daa8c1c..df972cd54 100644 --- a/src/video_core/renderer_vulkan/vk_fsr.cpp +++ b/src/video_core/renderer_vulkan/vk_fsr.cpp @@ -1,12 +1,11 @@ // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#include <cmath> -#include "common/bit_cast.h" #include "common/common_types.h" #include "common/div_ceil.h" #include "common/settings.h" +#include "video_core/fsr.h" #include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16_comp_spv.h" #include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32_comp_spv.h" #include "video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16_comp_spv.h" @@ -17,146 +16,7 @@ #include "video_core/vulkan_common/vulkan_device.h" namespace Vulkan { -namespace { -// Reimplementations of the constant generating functions in ffx_fsr1.h -// GCC generated a lot of warnings when using the official header. -u32 AU1_AH1_AF1(f32 f) { - static constexpr u32 base[512]{ - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, - 0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x0c00, 0x1000, 0x1400, 0x1800, 0x1c00, 0x2000, - 0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00, 0x4000, 0x4400, 0x4800, 0x4c00, - 0x5000, 0x5400, 0x5800, 0x5c00, 0x6000, 0x6400, 0x6800, 0x6c00, 0x7000, 0x7400, 0x7800, - 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, - 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, - 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, - 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, - 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, - 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, - 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, - 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, - 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, - 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, - 0x7bff, 0x7bff, 0x7bff, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001, 0x8002, 0x8004, 0x8008, - 0x8010, 0x8020, 0x8040, 0x8080, 0x8100, 0x8200, 0x8400, 0x8800, 0x8c00, 0x9000, 0x9400, - 0x9800, 0x9c00, 0xa000, 0xa400, 0xa800, 0xac00, 0xb000, 0xb400, 0xb800, 0xbc00, 0xc000, - 0xc400, 0xc800, 0xcc00, 0xd000, 0xd400, 0xd800, 0xdc00, 0xe000, 0xe400, 0xe800, 0xec00, - 0xf000, 0xf400, 0xf800, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, - 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, - 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, - 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, - 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, - 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, - 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, - 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, - 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, - 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, - 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, - }; - static constexpr s8 shift[512]{ - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17, 0x16, - 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, - 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, - 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17, - 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, - 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, - 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, - }; - const u32 u = Common::BitCast<u32>(f); - const u32 i = u >> 23; - return base[i] + ((u & 0x7fffff) >> shift[i]); -} - -u32 AU1_AH2_AF2(f32 a[2]) { - return AU1_AH1_AF1(a[0]) + (AU1_AH1_AF1(a[1]) << 16); -} - -void FsrEasuCon(u32 con0[4], u32 con1[4], u32 con2[4], u32 con3[4], f32 inputViewportInPixelsX, - f32 inputViewportInPixelsY, f32 inputSizeInPixelsX, f32 inputSizeInPixelsY, - f32 outputSizeInPixelsX, f32 outputSizeInPixelsY) { - con0[0] = Common::BitCast<u32>(inputViewportInPixelsX / outputSizeInPixelsX); - con0[1] = Common::BitCast<u32>(inputViewportInPixelsY / outputSizeInPixelsY); - con0[2] = Common::BitCast<u32>(0.5f * inputViewportInPixelsX / outputSizeInPixelsX - 0.5f); - con0[3] = Common::BitCast<u32>(0.5f * inputViewportInPixelsY / outputSizeInPixelsY - 0.5f); - con1[0] = Common::BitCast<u32>(1.0f / inputSizeInPixelsX); - con1[1] = Common::BitCast<u32>(1.0f / inputSizeInPixelsY); - con1[2] = Common::BitCast<u32>(1.0f / inputSizeInPixelsX); - con1[3] = Common::BitCast<u32>(-1.0f / inputSizeInPixelsY); - con2[0] = Common::BitCast<u32>(-1.0f / inputSizeInPixelsX); - con2[1] = Common::BitCast<u32>(2.0f / inputSizeInPixelsY); - con2[2] = Common::BitCast<u32>(1.0f / inputSizeInPixelsX); - con2[3] = Common::BitCast<u32>(2.0f / inputSizeInPixelsY); - con3[0] = Common::BitCast<u32>(0.0f / inputSizeInPixelsX); - con3[1] = Common::BitCast<u32>(4.0f / inputSizeInPixelsY); - con3[2] = con3[3] = 0; -} - -void FsrEasuConOffset(u32 con0[4], u32 con1[4], u32 con2[4], u32 con3[4], - f32 inputViewportInPixelsX, f32 inputViewportInPixelsY, - f32 inputSizeInPixelsX, f32 inputSizeInPixelsY, f32 outputSizeInPixelsX, - f32 outputSizeInPixelsY, f32 inputOffsetInPixelsX, f32 inputOffsetInPixelsY) { - FsrEasuCon(con0, con1, con2, con3, inputViewportInPixelsX, inputViewportInPixelsY, - inputSizeInPixelsX, inputSizeInPixelsY, outputSizeInPixelsX, outputSizeInPixelsY); - con0[2] = Common::BitCast<u32>(0.5f * inputViewportInPixelsX / outputSizeInPixelsX - 0.5f + - inputOffsetInPixelsX); - con0[3] = Common::BitCast<u32>(0.5f * inputViewportInPixelsY / outputSizeInPixelsY - 0.5f + - inputOffsetInPixelsY); -} - -void FsrRcasCon(u32* con, f32 sharpness) { - sharpness = std::exp2f(-sharpness); - f32 hSharp[2]{sharpness, sharpness}; - con[0] = Common::BitCast<u32>(sharpness); - con[1] = AU1_AH2_AF2(hSharp); - con[2] = 0; - con[3] = 0; -} -} // Anonymous namespace +using namespace FSR; FSR::FSR(const Device& device_, MemoryAllocator& memory_allocator_, size_t image_count_, VkExtent2D output_size_) diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index f91bb5a1d..baedc4424 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -548,31 +548,7 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { static_vector<VkVertexInputBindingDescription, 32> vertex_bindings; static_vector<VkVertexInputBindingDivisorDescriptionEXT, 32> vertex_binding_divisors; static_vector<VkVertexInputAttributeDescription, 32> vertex_attributes; - if (key.state.dynamic_vertex_input) { - const size_t num_vertex_arrays = std::min( - key.state.attributes.size(), static_cast<size_t>(device.GetMaxVertexInputBindings())); - for (size_t index = 0; index < num_vertex_arrays; ++index) { - const u32 type = key.state.DynamicAttributeType(index); - if (!stage_infos[0].loads.Generic(index) || type == 0) { - continue; - } - vertex_attributes.push_back({ - .location = static_cast<u32>(index), - .binding = 0, - .format = type == 1 ? VK_FORMAT_R32_SFLOAT - : type == 2 ? VK_FORMAT_R32_SINT - : VK_FORMAT_R32_UINT, - .offset = 0, - }); - } - if (!vertex_attributes.empty()) { - vertex_bindings.push_back({ - .binding = 0, - .stride = 4, - .inputRate = VK_VERTEX_INPUT_RATE_VERTEX, - }); - } - } else { + if (!key.state.dynamic_vertex_input) { const size_t num_vertex_arrays = std::min( Maxwell::NumVertexArrays, static_cast<size_t>(device.GetMaxVertexInputBindings())); for (size_t index = 0; index < num_vertex_arrays; ++index) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index ed4a72166..719edbcfb 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -266,10 +266,40 @@ void RasterizerVulkan::DrawIndirect() { buffer_cache.SetDrawIndirect(nullptr); } +void RasterizerVulkan::DrawTexture() { + MICROPROFILE_SCOPE(Vulkan_Drawing); + + SCOPE_EXIT({ gpu.TickWork(); }); + FlushWork(); + + query_cache.UpdateCounters(); + + texture_cache.SynchronizeGraphicsDescriptors(); + texture_cache.UpdateRenderTargets(false); + + UpdateDynamicStates(); + + const auto& draw_texture_state = maxwell3d->draw_manager->GetDrawTextureState(); + const auto& sampler = texture_cache.GetGraphicsSampler(draw_texture_state.src_sampler); + const auto& texture = texture_cache.GetImageView(draw_texture_state.src_texture); + Region2D dst_region = {Offset2D{.x = static_cast<s32>(draw_texture_state.dst_x0), + .y = static_cast<s32>(draw_texture_state.dst_y0)}, + Offset2D{.x = static_cast<s32>(draw_texture_state.dst_x1), + .y = static_cast<s32>(draw_texture_state.dst_y1)}}; + Region2D src_region = {Offset2D{.x = static_cast<s32>(draw_texture_state.src_x0), + .y = static_cast<s32>(draw_texture_state.src_y0)}, + Offset2D{.x = static_cast<s32>(draw_texture_state.src_x1), + .y = static_cast<s32>(draw_texture_state.src_y1)}}; + blit_image.BlitColor(texture_cache.GetFramebuffer(), texture.RenderTarget(), + texture.ImageHandle(), sampler->Handle(), dst_region, src_region, + texture.size); +} + void RasterizerVulkan::Clear(u32 layer_count) { MICROPROFILE_SCOPE(Vulkan_Clearing); FlushWork(); + gpu_memory->FlushCaching(); query_cache.UpdateCounters(); @@ -364,7 +394,15 @@ void RasterizerVulkan::Clear(u32 layer_count) { cmdbuf.ClearAttachments(attachment, clear_rect); }); } else { - UNIMPLEMENTED_MSG("Unimplemented Clear only the specified channel"); + u8 color_mask = static_cast<u8>(regs.clear_surface.R | regs.clear_surface.G << 1 | + regs.clear_surface.B << 2 | regs.clear_surface.A << 3); + Region2D dst_region = { + Offset2D{.x = clear_rect.rect.offset.x, .y = clear_rect.rect.offset.y}, + Offset2D{.x = clear_rect.rect.offset.x + + static_cast<s32>(clear_rect.rect.extent.width), + .y = clear_rect.rect.offset.y + + static_cast<s32>(clear_rect.rect.extent.height)}}; + blit_image.ClearColor(framebuffer, color_mask, regs.clear_color, dst_region); } } @@ -628,6 +666,7 @@ void RasterizerVulkan::TickFrame() { } bool RasterizerVulkan::AccelerateConditionalRendering() { + gpu_memory->FlushCaching(); if (Settings::IsGPULevelHigh()) { // TODO(Blinkhawk): Reimplement Host conditional rendering. return false; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 472cc64d9..a0508b57c 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -66,6 +66,7 @@ public: void Draw(bool is_indexed, u32 instance_count) override; void DrawIndirect() override; + void DrawTexture() override; void Clear(u32 layer_count) override; void DispatchCompute() override; void ResetCounter(VideoCore::QueryType type) override; diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index c2e53a5d5..e03685af1 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -213,6 +213,11 @@ void Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_s .signalSemaphoreCount = num_signal_semaphores, .pSignalSemaphores = signal_semaphores.data(), }; + + if (on_submit) { + on_submit(); + } + switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info)) { case VK_SUCCESS: break; diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 3858c506c..bd4cb0f7e 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -5,6 +5,7 @@ #include <condition_variable> #include <cstddef> +#include <functional> #include <memory> #include <thread> #include <utility> @@ -66,6 +67,11 @@ public: query_cache = &query_cache_; } + // Registers a callback to perform on queue submission. + void RegisterOnSubmit(std::function<void()>&& func) { + on_submit = std::move(func); + } + /// Send work to a separate thread. template <typename T> void Record(T&& command) { @@ -216,6 +222,7 @@ private: vk::CommandBuffer current_cmdbuf; std::unique_ptr<CommandChunk> chunk; + std::function<void()> on_submit; State state; diff --git a/src/video_core/renderer_vulkan/vk_smaa.cpp b/src/video_core/renderer_vulkan/vk_smaa.cpp index 8eb735489..f8735189d 100644 --- a/src/video_core/renderer_vulkan/vk_smaa.cpp +++ b/src/video_core/renderer_vulkan/vk_smaa.cpp @@ -468,7 +468,7 @@ VkWriteDescriptorSet CreateWriteDescriptorSet(std::vector<VkDescriptorImageInfo> } void ClearColorImage(vk::CommandBuffer& cmdbuf, VkImage image) { - constexpr std::array<VkImageSubresourceRange, 1> subresources{{{ + static constexpr std::array<VkImageSubresourceRange, 1> subresources{{{ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .baseMipLevel = 0, .levelCount = 1, @@ -528,8 +528,8 @@ SMAA::SMAA(const Device& device, MemoryAllocator& allocator, size_t image_count, } void SMAA::CreateImages() { - constexpr VkExtent2D area_extent{AREATEX_WIDTH, AREATEX_HEIGHT}; - constexpr VkExtent2D search_extent{SEARCHTEX_WIDTH, SEARCHTEX_HEIGHT}; + static constexpr VkExtent2D area_extent{AREATEX_WIDTH, AREATEX_HEIGHT}; + static constexpr VkExtent2D search_extent{SEARCHTEX_WIDTH, SEARCHTEX_HEIGHT}; std::tie(m_static_images[Area], m_static_buffer_commits[Area]) = CreateWrappedImage(m_device, m_allocator, area_extent, VK_FORMAT_R8G8_UNORM); @@ -586,12 +586,12 @@ void SMAA::CreateSampler() { void SMAA::CreateShaders() { // These match the order of the SMAAStage enum - constexpr std::array vert_shader_sources{ + static constexpr std::array vert_shader_sources{ ARRAY_TO_SPAN(SMAA_EDGE_DETECTION_VERT_SPV), ARRAY_TO_SPAN(SMAA_BLENDING_WEIGHT_CALCULATION_VERT_SPV), ARRAY_TO_SPAN(SMAA_NEIGHBORHOOD_BLENDING_VERT_SPV), }; - constexpr std::array frag_shader_sources{ + static constexpr std::array frag_shader_sources{ ARRAY_TO_SPAN(SMAA_EDGE_DETECTION_FRAG_SPV), ARRAY_TO_SPAN(SMAA_BLENDING_WEIGHT_CALCULATION_FRAG_SPV), ARRAY_TO_SPAN(SMAA_NEIGHBORHOOD_BLENDING_FRAG_SPV), @@ -675,8 +675,8 @@ void SMAA::UploadImages(Scheduler& scheduler) { return; } - constexpr VkExtent2D area_extent{AREATEX_WIDTH, AREATEX_HEIGHT}; - constexpr VkExtent2D search_extent{SEARCHTEX_WIDTH, SEARCHTEX_HEIGHT}; + static constexpr VkExtent2D area_extent{AREATEX_WIDTH, AREATEX_HEIGHT}; + static constexpr VkExtent2D search_extent{SEARCHTEX_WIDTH, SEARCHTEX_HEIGHT}; UploadImage(m_device, m_allocator, scheduler, m_static_images[Area], area_extent, VK_FORMAT_R8G8_UNORM, ARRAY_TO_SPAN(areaTexBytes)); diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index d39372ec4..9b85dfb5e 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -1230,6 +1230,11 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src, }); } +void TextureCacheRuntime::CopyImageMSAA(Image& dst, Image& src, + std::span<const VideoCommon::ImageCopy> copies) { + UNIMPLEMENTED_MSG("Copying images with different samples is not implemented in Vulkan."); +} + u64 TextureCacheRuntime::GetDeviceLocalMemory() const { return device.GetDeviceLocalMemory(); } diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 1f27a3589..0ce39616f 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -70,6 +70,8 @@ public: void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); + void CopyImageMSAA(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); + bool ShouldReinterpret(Image& dst, Image& src); void ReinterpretImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); @@ -80,6 +82,11 @@ public: return false; } + bool CanUploadMSAA() const noexcept { + // TODO: Implement buffer to MSAA uploads + return false; + } + void AccelerateImageUpload(Image&, const StagingBufferRef&, std::span<const VideoCommon::SwizzleParameters>); @@ -106,7 +113,7 @@ public: std::optional<ASTCDecoderPass> astc_decoder_pass; const Settings::ResolutionScalingInfo& resolution; - constexpr static size_t indexing_slots = 8 * sizeof(size_t); + static constexpr size_t indexing_slots = 8 * sizeof(size_t); std::array<vk::Buffer, indexing_slots> buffers{}; std::array<std::unique_ptr<MemoryCommit>, indexing_slots> buffer_commits{}; }; diff --git a/src/video_core/renderer_vulkan/vk_turbo_mode.cpp b/src/video_core/renderer_vulkan/vk_turbo_mode.cpp index 852b86f84..db04943eb 100644 --- a/src/video_core/renderer_vulkan/vk_turbo_mode.cpp +++ b/src/video_core/renderer_vulkan/vk_turbo_mode.cpp @@ -14,11 +14,21 @@ using namespace Common::Literals; TurboMode::TurboMode(const vk::Instance& instance, const vk::InstanceDispatch& dld) : m_device{CreateDevice(instance, dld, VK_NULL_HANDLE)}, m_allocator{m_device, false} { + { + std::scoped_lock lk{m_submission_lock}; + m_submission_time = std::chrono::steady_clock::now(); + } m_thread = std::jthread([&](auto stop_token) { Run(stop_token); }); } TurboMode::~TurboMode() = default; +void TurboMode::QueueSubmitted() { + std::scoped_lock lk{m_submission_lock}; + m_submission_time = std::chrono::steady_clock::now(); + m_submission_cv.notify_one(); +} + void TurboMode::Run(std::stop_token stop_token) { auto& dld = m_device.GetLogical(); @@ -38,7 +48,7 @@ void TurboMode::Run(std::stop_token stop_token) { auto commit = m_allocator.Commit(buffer, MemoryUsage::DeviceLocal); // Create the descriptor pool to contain our descriptor. - constexpr VkDescriptorPoolSize pool_size{ + static constexpr VkDescriptorPoolSize pool_size{ .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .descriptorCount = 1, }; @@ -53,7 +63,7 @@ void TurboMode::Run(std::stop_token stop_token) { }); // Create the descriptor set layout from the pool. - constexpr VkDescriptorSetLayoutBinding layout_binding{ + static constexpr VkDescriptorSetLayoutBinding layout_binding{ .binding = 0, .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .descriptorCount = 1, @@ -199,6 +209,13 @@ void TurboMode::Run(std::stop_token stop_token) { // Wait for completion. fence.Wait(); + + // Wait for the next graphics queue submission if necessary. + std::unique_lock lk{m_submission_lock}; + Common::CondvarWait(m_submission_cv, lk, stop_token, [this] { + return (std::chrono::steady_clock::now() - m_submission_time) <= + std::chrono::milliseconds{100}; + }); } } diff --git a/src/video_core/renderer_vulkan/vk_turbo_mode.h b/src/video_core/renderer_vulkan/vk_turbo_mode.h index 2060e2395..99b5ac50b 100644 --- a/src/video_core/renderer_vulkan/vk_turbo_mode.h +++ b/src/video_core/renderer_vulkan/vk_turbo_mode.h @@ -3,6 +3,9 @@ #pragma once +#include <chrono> +#include <mutex> + #include "common/polyfill_thread.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h" @@ -15,11 +18,17 @@ public: explicit TurboMode(const vk::Instance& instance, const vk::InstanceDispatch& dld); ~TurboMode(); + void QueueSubmitted(); + private: void Run(std::stop_token stop_token); Device m_device; MemoryAllocator m_allocator; + std::mutex m_submission_lock; + std::condition_variable_any m_submission_cv; + std::chrono::time_point<std::chrono::steady_clock> m_submission_time{}; + std::jthread m_thread; }; |