diff options
Diffstat (limited to '')
24 files changed, 305 insertions, 189 deletions
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index bdb71dc53..e7104d377 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -184,7 +184,7 @@ struct FormatTuple { {VK_FORMAT_BC3_SRGB_BLOCK}, // BC3_SRGB {VK_FORMAT_BC7_SRGB_BLOCK}, // BC7_SRGB {VK_FORMAT_R4G4B4A4_UNORM_PACK16, Attachable}, // A4B4G4R4_UNORM - {VK_FORMAT_R4G4_UNORM_PACK8}, // R4G4_UNORM + {VK_FORMAT_R4G4_UNORM_PACK8}, // G4R4_UNORM {VK_FORMAT_ASTC_4x4_SRGB_BLOCK}, // ASTC_2D_4X4_SRGB {VK_FORMAT_ASTC_8x8_SRGB_BLOCK}, // ASTC_2D_8X8_SRGB {VK_FORMAT_ASTC_8x5_SRGB_BLOCK}, // ASTC_2D_8X5_SRGB @@ -196,6 +196,8 @@ struct FormatTuple { {VK_FORMAT_ASTC_6x6_UNORM_BLOCK}, // ASTC_2D_6X6_UNORM {VK_FORMAT_ASTC_6x6_SRGB_BLOCK}, // ASTC_2D_6X6_SRGB {VK_FORMAT_ASTC_10x6_UNORM_BLOCK}, // ASTC_2D_10X6_UNORM + {VK_FORMAT_ASTC_10x5_UNORM_BLOCK}, // ASTC_2D_10X5_UNORM + {VK_FORMAT_ASTC_10x5_SRGB_BLOCK}, // ASTC_2D_10X5_SRGB {VK_FORMAT_ASTC_10x10_UNORM_BLOCK}, // ASTC_2D_10X10_UNORM {VK_FORMAT_ASTC_10x10_SRGB_BLOCK}, // ASTC_2D_10X10_SRGB {VK_FORMAT_ASTC_12x12_UNORM_BLOCK}, // ASTC_2D_12X12_UNORM diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 7c78d0299..d8131232a 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -102,13 +102,13 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr), surface(CreateSurface(instance, render_window)), device(CreateDevice(instance, dld, *surface)), memory_allocator(device, false), - state_tracker(gpu), scheduler(device, state_tracker), + state_tracker(), scheduler(device, state_tracker), swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width, render_window.GetFramebufferLayout().height, false), blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, scheduler, screen_info), - rasterizer(render_window, gpu, gpu.MemoryManager(), cpu_memory, screen_info, device, - memory_allocator, state_tracker, scheduler) { + rasterizer(render_window, gpu, cpu_memory, screen_info, device, memory_allocator, + state_tracker, scheduler) { Report(); } catch (const vk::Exception& exception) { LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what()); @@ -142,7 +142,7 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { const auto recreate_swapchain = [&] { if (!has_been_recreated) { has_been_recreated = true; - scheduler.WaitWorker(); + scheduler.Finish(); } const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout(); swapchain.Create(layout.width, layout.height, is_srgb); diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index 444c29f68..cb7fa2078 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -145,6 +145,11 @@ VkSemaphore BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, // Finish any pending renderpass scheduler.RequestOutsideRenderPassOperationContext(); + if (const auto swapchain_images = swapchain.GetImageCount(); swapchain_images != image_count) { + image_count = swapchain_images; + Recreate(); + } + const std::size_t image_index = swapchain.GetImageIndex(); scheduler.Wait(resource_ticks[image_index]); @@ -448,15 +453,15 @@ vk::Framebuffer BlitScreen::CreateFramebuffer(const VkImageView& image_view, VkE void BlitScreen::CreateStaticResources() { CreateShaders(); + CreateSampler(); +} + +void BlitScreen::CreateDynamicResources() { CreateSemaphores(); CreateDescriptorPool(); CreateDescriptorSetLayout(); CreateDescriptorSets(); CreatePipelineLayout(); - CreateSampler(); -} - -void BlitScreen::CreateDynamicResources() { CreateRenderPass(); CreateFramebuffers(); CreateGraphicsPipeline(); diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h index b8c67bef0..29e2ea925 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.h +++ b/src/video_core/renderer_vulkan/vk_blit_screen.h @@ -109,7 +109,7 @@ private: MemoryAllocator& memory_allocator; Swapchain& swapchain; Scheduler& scheduler; - const std::size_t image_count; + std::size_t image_count; const ScreenInfo& screen_info; vk::ShaderModule vertex_shader; diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index f17a5ccd6..241d7573e 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -26,8 +26,6 @@ namespace Vulkan { -using Tegra::Texture::SWIZZLE_TABLE; - namespace { constexpr u32 ASTC_BINDING_INPUT_BUFFER = 0; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 6447210e2..7906e11a8 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -126,8 +126,8 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, const u32 secondary_offset{desc.secondary_cbuf_offset + index_offset}; const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].Address() + secondary_offset}; - const u32 lhs_raw{gpu_memory.Read<u32>(addr)}; - const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)}; + const u32 lhs_raw{gpu_memory.Read<u32>(addr) << desc.shift_left}; + const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr) << desc.secondary_shift_left}; return TexturePair(lhs_raw | rhs_raw, via_header_index); } } diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp index c249b34d4..0214b103a 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp @@ -11,11 +11,8 @@ namespace Vulkan { -InnerFence::InnerFence(Scheduler& scheduler_, u32 payload_, bool is_stubbed_) - : FenceBase{payload_, is_stubbed_}, scheduler{scheduler_} {} - -InnerFence::InnerFence(Scheduler& scheduler_, GPUVAddr address_, u32 payload_, bool is_stubbed_) - : FenceBase{address_, payload_, is_stubbed_}, scheduler{scheduler_} {} +InnerFence::InnerFence(Scheduler& scheduler_, bool is_stubbed_) + : FenceBase{is_stubbed_}, scheduler{scheduler_} {} InnerFence::~InnerFence() = default; @@ -48,12 +45,8 @@ FenceManager::FenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::G : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_}, scheduler{scheduler_} {} -Fence FenceManager::CreateFence(u32 value, bool is_stubbed) { - return std::make_shared<InnerFence>(scheduler, value, is_stubbed); -} - -Fence FenceManager::CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) { - return std::make_shared<InnerFence>(scheduler, addr, value, is_stubbed); +Fence FenceManager::CreateFence(bool is_stubbed) { + return std::make_shared<InnerFence>(scheduler, is_stubbed); } void FenceManager::QueueFence(Fence& fence) { diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h index 7c0bbd80a..7fe2afcd9 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.h +++ b/src/video_core/renderer_vulkan/vk_fence_manager.h @@ -25,8 +25,7 @@ class Scheduler; class InnerFence : public VideoCommon::FenceBase { public: - explicit InnerFence(Scheduler& scheduler_, u32 payload_, bool is_stubbed_); - explicit InnerFence(Scheduler& scheduler_, GPUVAddr address_, u32 payload_, bool is_stubbed_); + explicit InnerFence(Scheduler& scheduler_, bool is_stubbed_); ~InnerFence(); void Queue(); @@ -50,8 +49,7 @@ public: QueryCache& query_cache, const Device& device, Scheduler& scheduler); protected: - Fence CreateFence(u32 value, bool is_stubbed) override; - Fence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) override; + Fence CreateFence(bool is_stubbed) override; void QueueFence(Fence& fence) override; bool IsFenceSignaled(Fence& fence) const override; void WaitFence(Fence& fence) override; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 5aca8f038..f47786f48 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -215,15 +215,14 @@ ConfigureFuncPtr ConfigureFunc(const std::array<vk::ShaderModule, NUM_STAGES>& m } // Anonymous namespace GraphicsPipeline::GraphicsPipeline( - Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_, Scheduler& scheduler_, - BufferCache& buffer_cache_, TextureCache& texture_cache_, + Scheduler& scheduler_, BufferCache& buffer_cache_, TextureCache& texture_cache_, VideoCore::ShaderNotify* shader_notify, const Device& device_, DescriptorPool& descriptor_pool, UpdateDescriptorQueue& update_descriptor_queue_, Common::ThreadWorker* worker_thread, PipelineStatistics* pipeline_statistics, RenderPassCache& render_pass_cache, const GraphicsPipelineCacheKey& key_, std::array<vk::ShaderModule, NUM_STAGES> stages, const std::array<const Shader::Info*, NUM_STAGES>& infos) - : key{key_}, maxwell3d{maxwell3d_}, gpu_memory{gpu_memory_}, device{device_}, - texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, scheduler{scheduler_}, + : key{key_}, device{device_}, texture_cache{texture_cache_}, + buffer_cache{buffer_cache_}, scheduler{scheduler_}, update_descriptor_queue{update_descriptor_queue_}, spv_modules{std::move(stages)} { if (shader_notify) { shader_notify->MarkShaderBuilding(); @@ -288,7 +287,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { buffer_cache.SetUniformBuffersState(enabled_uniform_buffer_masks, &uniform_buffer_sizes); - const auto& regs{maxwell3d.regs}; + const auto& regs{maxwell3d->regs}; const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; const auto config_stage{[&](size_t stage) LAMBDA_FORCEINLINE { const Shader::Info& info{stage_infos[stage]}; @@ -302,7 +301,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { ++ssbo_index; } } - const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers}; + const auto& cbufs{maxwell3d->state.shader_stages[stage].const_buffers}; const auto read_handle{[&](const auto& desc, u32 index) { ASSERT(cbufs[desc.cbuf_index].enabled); const u32 index_offset{index << desc.size_shift}; @@ -315,13 +314,14 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { const u32 second_offset{desc.secondary_cbuf_offset + index_offset}; const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].address + second_offset}; - const u32 lhs_raw{gpu_memory.Read<u32>(addr)}; - const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)}; + const u32 lhs_raw{gpu_memory->Read<u32>(addr) << desc.shift_left}; + const u32 rhs_raw{gpu_memory->Read<u32>(separate_addr) + << desc.secondary_shift_left}; const u32 raw{lhs_raw | rhs_raw}; return TexturePair(raw, via_header_index); } } - return TexturePair(gpu_memory.Read<u32>(addr), via_header_index); + return TexturePair(gpu_memory->Read<u32>(addr), via_header_index); }}; const auto add_image{[&](const auto& desc, bool blacklist) LAMBDA_FORCEINLINE { for (u32 index = 0; index < desc.count; ++index) { diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index e8949a9ab..85602592b 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -69,15 +69,16 @@ class GraphicsPipeline { static constexpr size_t NUM_STAGES = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage; public: - explicit GraphicsPipeline( - Tegra::Engines::Maxwell3D& maxwell3d, Tegra::MemoryManager& gpu_memory, - Scheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache, - VideoCore::ShaderNotify* shader_notify, const Device& device, - DescriptorPool& descriptor_pool, UpdateDescriptorQueue& update_descriptor_queue, - Common::ThreadWorker* worker_thread, PipelineStatistics* pipeline_statistics, - RenderPassCache& render_pass_cache, const GraphicsPipelineCacheKey& key, - std::array<vk::ShaderModule, NUM_STAGES> stages, - const std::array<const Shader::Info*, NUM_STAGES>& infos); + explicit GraphicsPipeline(Scheduler& scheduler, BufferCache& buffer_cache, + TextureCache& texture_cache, VideoCore::ShaderNotify* shader_notify, + const Device& device, DescriptorPool& descriptor_pool, + UpdateDescriptorQueue& update_descriptor_queue, + Common::ThreadWorker* worker_thread, + PipelineStatistics* pipeline_statistics, + RenderPassCache& render_pass_cache, + const GraphicsPipelineCacheKey& key, + std::array<vk::ShaderModule, NUM_STAGES> stages, + const std::array<const Shader::Info*, NUM_STAGES>& infos); GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = delete; GraphicsPipeline(GraphicsPipeline&&) noexcept = delete; @@ -109,6 +110,11 @@ public: return [](GraphicsPipeline* pl, bool is_indexed) { pl->ConfigureImpl<Spec>(is_indexed); }; } + void SetEngine(Tegra::Engines::Maxwell3D* maxwell3d_, Tegra::MemoryManager* gpu_memory_) { + maxwell3d = maxwell3d_; + gpu_memory = gpu_memory_; + } + private: template <typename Spec> void ConfigureImpl(bool is_indexed); @@ -120,8 +126,8 @@ private: void Validate(); const GraphicsPipelineCacheKey key; - Tegra::Engines::Maxwell3D& maxwell3d; - Tegra::MemoryManager& gpu_memory; + Tegra::Engines::Maxwell3D* maxwell3d; + Tegra::MemoryManager* gpu_memory; const Device& device; TextureCache& texture_cache; BufferCache& buffer_cache; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 9708dc45e..732e7b6f2 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -259,20 +259,18 @@ bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) c return std::memcmp(&rhs, this, Size()) == 0; } -PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_, - Tegra::Engines::KeplerCompute& kepler_compute_, - Tegra::MemoryManager& gpu_memory_, const Device& device_, +PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device_, Scheduler& scheduler_, DescriptorPool& descriptor_pool_, UpdateDescriptorQueue& update_descriptor_queue_, RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_, TextureCache& texture_cache_, VideoCore::ShaderNotify& shader_notify_) - : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_}, - device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, - update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, - buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, shader_notify{shader_notify_}, + : VideoCommon::ShaderCache{rasterizer_}, device{device_}, scheduler{scheduler_}, + descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_}, + render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_}, + texture_cache{texture_cache_}, shader_notify{shader_notify_}, use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()}, - workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:PipelineBuilder"), - serialization_thread(1, "yuzu:PipelineSerialization") { + workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "VkPipelineBuilder"), + serialization_thread(1, "VkPipelineSerialization") { const auto& float_control{device.FloatControlProperties()}; const VkDriverIdKHR driver_id{device.GetDriverID()}; profile = Shader::Profile{ @@ -337,7 +335,7 @@ GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { current_pipeline = nullptr; return nullptr; } - graphics_key.state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported(), + graphics_key.state.Refresh(*maxwell3d, device.IsExtExtendedDynamicStateSupported(), device.IsExtVertexInputDynamicStateSupported()); if (current_pipeline) { @@ -357,7 +355,7 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() { if (!shader) { return nullptr; } - const auto& qmd{kepler_compute.launch_description}; + const auto& qmd{kepler_compute->launch_description}; const ComputePipelineCacheKey key{ .unique_hash = shader->unique_hash, .shared_memory_size = qmd.shared_alloc, @@ -486,13 +484,13 @@ GraphicsPipeline* PipelineCache::BuiltPipeline(GraphicsPipeline* pipeline) const } // If something is using depth, we can assume that games are not rendering anything which // will be used one time. - if (maxwell3d.regs.zeta_enable) { + if (maxwell3d->regs.zeta_enable) { return nullptr; } // If games are using a small index count, we can assume these are full screen quads. // Usually these shaders are only used once for building textures so we can assume they // can't be built async - if (maxwell3d.regs.index_array.count <= 6 || maxwell3d.regs.vertex_buffer.count <= 6) { + if (maxwell3d->regs.index_array.count <= 6 || maxwell3d->regs.vertex_buffer.count <= 6) { return pipeline; } return nullptr; @@ -557,10 +555,10 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline( previous_stage = &program; } Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; - return std::make_unique<GraphicsPipeline>( - maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, &shader_notify, device, - descriptor_pool, update_descriptor_queue, thread_worker, statistics, render_pass_cache, key, - std::move(modules), infos); + return std::make_unique<GraphicsPipeline>(scheduler, buffer_cache, texture_cache, + &shader_notify, device, descriptor_pool, + update_descriptor_queue, thread_worker, statistics, + render_pass_cache, key, std::move(modules), infos); } catch (const Shader::Exception& exception) { LOG_ERROR(Render_Vulkan, "{}", exception.what()); @@ -592,9 +590,9 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() { std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline( const ComputePipelineCacheKey& key, const ShaderInfo* shader) { - const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; - const auto& qmd{kepler_compute.launch_description}; - ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start}; + const GPUVAddr program_base{kepler_compute->regs.code_loc.Address()}; + const auto& qmd{kepler_compute->launch_description}; + ComputeEnvironment env{*kepler_compute, *gpu_memory, program_base, qmd.program_start}; env.SetCachedSize(shader->size_bytes); main_pools.ReleaseContents(); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 127957dbf..61f9e9366 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -100,10 +100,8 @@ struct ShaderPools { class PipelineCache : public VideoCommon::ShaderCache { public: - explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::Engines::Maxwell3D& maxwell3d, - Tegra::Engines::KeplerCompute& kepler_compute, - Tegra::MemoryManager& gpu_memory, const Device& device, - Scheduler& scheduler, DescriptorPool& descriptor_pool, + explicit PipelineCache(RasterizerVulkan& rasterizer, const Device& device, Scheduler& scheduler, + DescriptorPool& descriptor_pool, UpdateDescriptorQueue& update_descriptor_queue, RenderPassCache& render_pass_cache, BufferCache& buffer_cache, TextureCache& texture_cache, VideoCore::ShaderNotify& shader_notify_); diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index 2b859c6b8..7cb02631c 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -65,10 +65,9 @@ void QueryPool::Reserve(std::pair<VkQueryPool, u32> query) { usage[pool_index * GROW_STEP + static_cast<std::ptrdiff_t>(query.second)] = false; } -QueryCache::QueryCache(VideoCore::RasterizerInterface& rasterizer_, - Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_, - const Device& device_, Scheduler& scheduler_) - : QueryCacheBase{rasterizer_, maxwell3d_, gpu_memory_}, device{device_}, scheduler{scheduler_}, +QueryCache::QueryCache(VideoCore::RasterizerInterface& rasterizer_, const Device& device_, + Scheduler& scheduler_) + : QueryCacheBase{rasterizer_}, device{device_}, scheduler{scheduler_}, query_pools{ QueryPool{device_, scheduler_, QueryType::SamplesPassed}, } {} diff --git a/src/video_core/renderer_vulkan/vk_query_cache.h b/src/video_core/renderer_vulkan/vk_query_cache.h index b0d86c4f8..26762ee09 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.h +++ b/src/video_core/renderer_vulkan/vk_query_cache.h @@ -52,9 +52,8 @@ private: class QueryCache final : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> { public: - explicit QueryCache(VideoCore::RasterizerInterface& rasterizer_, - Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_, - const Device& device_, Scheduler& scheduler_); + explicit QueryCache(VideoCore::RasterizerInterface& rasterizer_, const Device& device_, + Scheduler& scheduler_); ~QueryCache(); std::pair<VkQueryPool, u32> AllocateQuery(VideoCore::QueryType type); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 7e40c2df1..acfd5da7d 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -11,6 +11,7 @@ #include "common/microprofile.h" #include "common/scope_exit.h" #include "common/settings.h" +#include "video_core/control/channel_state.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_vulkan/blit_image.h" @@ -148,14 +149,11 @@ DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instan } // Anonymous namespace RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, - Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, ScreenInfo& screen_info_, const Device& device_, MemoryAllocator& memory_allocator_, StateTracker& state_tracker_, Scheduler& scheduler_) - : RasterizerAccelerated{cpu_memory_}, gpu{gpu_}, - gpu_memory{gpu_memory_}, maxwell3d{gpu.Maxwell3D()}, kepler_compute{gpu.KeplerCompute()}, - screen_info{screen_info_}, device{device_}, memory_allocator{memory_allocator_}, - state_tracker{state_tracker_}, scheduler{scheduler_}, + : RasterizerAccelerated{cpu_memory_}, gpu{gpu_}, screen_info{screen_info_}, device{device_}, + memory_allocator{memory_allocator_}, state_tracker{state_tracker_}, scheduler{scheduler_}, staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler), update_descriptor_queue(device, scheduler), blit_image(device, scheduler, state_tracker, descriptor_pool), @@ -165,14 +163,13 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra memory_allocator, staging_pool, blit_image, astc_decoder_pass, render_pass_cache}, - texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), + texture_cache(texture_cache_runtime, *this), buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, update_descriptor_queue, descriptor_pool), - buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), - pipeline_cache(*this, maxwell3d, kepler_compute, gpu_memory, device, scheduler, - descriptor_pool, update_descriptor_queue, render_pass_cache, buffer_cache, - texture_cache, gpu.ShaderNotify()), - query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{buffer_cache}, + buffer_cache(*this, cpu_memory_, buffer_cache_runtime), + pipeline_cache(*this, device, scheduler, descriptor_pool, update_descriptor_queue, + render_pass_cache, buffer_cache, texture_cache, gpu.ShaderNotify()), + query_cache{*this, device, scheduler}, accelerate_dma{buffer_cache}, fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), wfi_event(device.GetLogical().CreateEvent()) { scheduler.SetQueryCache(query_cache); @@ -193,14 +190,16 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { return; } std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; + // update engine as channel may be different. + pipeline->SetEngine(maxwell3d, gpu_memory); pipeline->Configure(is_indexed); BeginTransformFeedback(); UpdateDynamicStates(); - const auto& regs{maxwell3d.regs}; - const u32 num_instances{maxwell3d.mme_draw.instance_count}; + const auto& regs{maxwell3d->regs}; + const u32 num_instances{maxwell3d->mme_draw.instance_count}; const DrawParams draw_params{MakeDrawParams(regs, num_instances, is_instanced, is_indexed)}; scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) { if (draw_params.is_indexed) { @@ -218,14 +217,14 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { void RasterizerVulkan::Clear() { MICROPROFILE_SCOPE(Vulkan_Clearing); - if (!maxwell3d.ShouldExecute()) { + if (!maxwell3d->ShouldExecute()) { return; } FlushWork(); query_cache.UpdateCounters(); - auto& regs = maxwell3d.regs; + auto& regs = maxwell3d->regs; const bool use_color = regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B || regs.clear_buffers.A; const bool use_depth = regs.clear_buffers.Z; @@ -248,8 +247,15 @@ void RasterizerVulkan::Clear() { } UpdateViewportsState(regs); + VkRect2D default_scissor; + default_scissor.offset.x = 0; + default_scissor.offset.y = 0; + default_scissor.extent.width = std::numeric_limits<s32>::max(); + default_scissor.extent.height = std::numeric_limits<s32>::max(); + VkClearRect clear_rect{ - .rect = GetScissorState(regs, 0, up_scale, down_shift), + .rect = regs.clear_flags.scissor ? GetScissorState(regs, 0, up_scale, down_shift) + : default_scissor, .baseArrayLayer = regs.clear_buffers.layer, .layerCount = 1, }; @@ -339,9 +345,9 @@ void RasterizerVulkan::DispatchCompute() { return; } std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex}; - pipeline->Configure(kepler_compute, gpu_memory, scheduler, buffer_cache, texture_cache); + pipeline->Configure(*kepler_compute, *gpu_memory, scheduler, buffer_cache, texture_cache); - const auto& qmd{kepler_compute.launch_description}; + const auto& qmd{kepler_compute->launch_description}; const std::array<u32, 3> dim{qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z}; scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([dim](vk::CommandBuffer cmdbuf) { cmdbuf.Dispatch(dim[0], dim[1], dim[2]); }); @@ -422,7 +428,7 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { } } -void RasterizerVulkan::SyncGuestHost() { +void RasterizerVulkan::InvalidateGPUCache() { pipeline_cache.SyncGuestHost(); { std::scoped_lock lock{buffer_cache.mutex}; @@ -442,40 +448,30 @@ void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) { pipeline_cache.OnCPUWrite(addr, size); } -void RasterizerVulkan::ModifyGPUMemory(GPUVAddr addr, u64 size) { +void RasterizerVulkan::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) { { std::scoped_lock lock{texture_cache.mutex}; - texture_cache.UnmapGPUMemory(addr, size); + texture_cache.UnmapGPUMemory(as_id, addr, size); } } -void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) { - if (!gpu.IsAsync()) { - gpu_memory.Write<u32>(addr, value); - return; - } - fence_manager.SignalSemaphore(addr, value); +void RasterizerVulkan::SignalFence(std::function<void()>&& func) { + fence_manager.SignalFence(std::move(func)); +} + +void RasterizerVulkan::SyncOperation(std::function<void()>&& func) { + fence_manager.SyncOperation(std::move(func)); } void RasterizerVulkan::SignalSyncPoint(u32 value) { - if (!gpu.IsAsync()) { - gpu.IncrementSyncPoint(value); - return; - } fence_manager.SignalSyncPoint(value); } void RasterizerVulkan::SignalReference() { - if (!gpu.IsAsync()) { - return; - } fence_manager.SignalOrdering(); } void RasterizerVulkan::ReleaseFences() { - if (!gpu.IsAsync()) { - return; - } fence_manager.WaitPendingFences(); } @@ -552,13 +548,13 @@ Tegra::Engines::AccelerateDMAInterface& RasterizerVulkan::AccessAccelerateDMA() } void RasterizerVulkan::AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, - std::span<u8> memory) { - auto cpu_addr = gpu_memory.GpuToCpuAddress(address); + std::span<const u8> memory) { + auto cpu_addr = gpu_memory->GpuToCpuAddress(address); if (!cpu_addr) [[unlikely]] { - gpu_memory.WriteBlock(address, memory.data(), copy_size); + gpu_memory->WriteBlock(address, memory.data(), copy_size); return; } - gpu_memory.WriteBlockUnsafe(address, memory.data(), copy_size); + gpu_memory->WriteBlockUnsafe(address, memory.data(), copy_size); { std::unique_lock<std::mutex> lock{buffer_cache.mutex}; if (!buffer_cache.InlineMemory(*cpu_addr, copy_size, memory)) { @@ -627,7 +623,7 @@ bool AccelerateDMA::BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 } void RasterizerVulkan::UpdateDynamicStates() { - auto& regs = maxwell3d.regs; + auto& regs = maxwell3d->regs; UpdateViewportsState(regs); UpdateScissorsState(regs); UpdateDepthBias(regs); @@ -651,7 +647,7 @@ void RasterizerVulkan::UpdateDynamicStates() { } void RasterizerVulkan::BeginTransformFeedback() { - const auto& regs = maxwell3d.regs; + const auto& regs = maxwell3d->regs; if (regs.tfb_enabled == 0) { return; } @@ -667,7 +663,7 @@ void RasterizerVulkan::BeginTransformFeedback() { } void RasterizerVulkan::EndTransformFeedback() { - const auto& regs = maxwell3d.regs; + const auto& regs = maxwell3d->regs; if (regs.tfb_enabled == 0) { return; } @@ -917,7 +913,7 @@ void RasterizerVulkan::UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& } void RasterizerVulkan::UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs) { - auto& dirty{maxwell3d.dirty.flags}; + auto& dirty{maxwell3d->dirty.flags}; if (!dirty[Dirty::VertexInput]) { return; } @@ -974,4 +970,41 @@ void RasterizerVulkan::UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs) }); } +void RasterizerVulkan::InitializeChannel(Tegra::Control::ChannelState& channel) { + CreateChannel(channel); + { + std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; + texture_cache.CreateChannel(channel); + buffer_cache.CreateChannel(channel); + } + pipeline_cache.CreateChannel(channel); + query_cache.CreateChannel(channel); + state_tracker.SetupTables(channel); +} + +void RasterizerVulkan::BindChannel(Tegra::Control::ChannelState& channel) { + const s32 channel_id = channel.bind_id; + BindToChannel(channel_id); + { + std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; + texture_cache.BindToChannel(channel_id); + buffer_cache.BindToChannel(channel_id); + } + pipeline_cache.BindToChannel(channel_id); + query_cache.BindToChannel(channel_id); + state_tracker.ChangeChannel(channel); + state_tracker.InvalidateState(); +} + +void RasterizerVulkan::ReleaseChannel(s32 channel_id) { + EraseChannel(channel_id); + { + std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; + texture_cache.EraseChannel(channel_id); + buffer_cache.EraseChannel(channel_id); + } + pipeline_cache.EraseChannel(channel_id); + query_cache.EraseChannel(channel_id); +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 0370ea39b..4cde3c983 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -8,6 +8,7 @@ #include <boost/container/static_vector.hpp> #include "common/common_types.h" +#include "video_core/control/channel_state_cache.h" #include "video_core/engines/maxwell_dma.h" #include "video_core/rasterizer_accelerated.h" #include "video_core/rasterizer_interface.h" @@ -54,13 +55,13 @@ private: BufferCache& buffer_cache; }; -class RasterizerVulkan final : public VideoCore::RasterizerAccelerated { +class RasterizerVulkan final : public VideoCore::RasterizerAccelerated, + protected VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { public: explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, - Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, - ScreenInfo& screen_info_, const Device& device_, - MemoryAllocator& memory_allocator_, StateTracker& state_tracker_, - Scheduler& scheduler_); + Core::Memory::Memory& cpu_memory_, ScreenInfo& screen_info_, + const Device& device_, MemoryAllocator& memory_allocator_, + StateTracker& state_tracker_, Scheduler& scheduler_); ~RasterizerVulkan() override; void Draw(bool is_indexed, bool is_instanced) override; @@ -75,10 +76,11 @@ public: bool MustFlushRegion(VAddr addr, u64 size) override; void InvalidateRegion(VAddr addr, u64 size) override; void OnCPUWrite(VAddr addr, u64 size) override; - void SyncGuestHost() override; + void InvalidateGPUCache() override; void UnmapMemory(VAddr addr, u64 size) override; - void ModifyGPUMemory(GPUVAddr addr, u64 size) override; - void SignalSemaphore(GPUVAddr addr, u32 value) override; + void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; + void SignalFence(std::function<void()>&& func) override; + void SyncOperation(std::function<void()>&& func) override; void SignalSyncPoint(u32 value) override; void SignalReference() override; void ReleaseFences() override; @@ -93,12 +95,18 @@ public: const Tegra::Engines::Fermi2D::Config& copy_config) override; Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, - std::span<u8> memory) override; + std::span<const u8> memory) override; bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, u32 pixel_stride) override; void LoadDiskResources(u64 title_id, std::stop_token stop_loading, const VideoCore::DiskResourceLoadCallback& callback) override; + void InitializeChannel(Tegra::Control::ChannelState& channel) override; + + void BindChannel(Tegra::Control::ChannelState& channel) override; + + void ReleaseChannel(s32 channel_id) override; + private: static constexpr size_t MAX_TEXTURES = 192; static constexpr size_t MAX_IMAGES = 48; @@ -134,9 +142,6 @@ private: void UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs); Tegra::GPU& gpu; - Tegra::MemoryManager& gpu_memory; - Tegra::Engines::Maxwell3D& maxwell3d; - Tegra::Engines::KeplerCompute& kepler_compute; ScreenInfo& screen_info; const Device& device; diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index a331ff37e..d96720b80 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -136,7 +136,7 @@ bool Scheduler::UpdateRescaling(bool is_rescaling) { } void Scheduler::WorkerThread(std::stop_token stop_token) { - Common::SetCurrentThreadName("yuzu:VulkanWorker"); + Common::SetCurrentThreadName("VulkanWorker"); do { std::unique_ptr<CommandChunk> work; { diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp index 06f68d09a..7fb256953 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp @@ -26,20 +26,39 @@ using namespace Common::Literals; constexpr VkDeviceSize MAX_ALIGNMENT = 256; // Maximum size to put elements in the stream buffer constexpr VkDeviceSize MAX_STREAM_BUFFER_REQUEST_SIZE = 8_MiB; -// Stream buffer size in bytes -constexpr VkDeviceSize STREAM_BUFFER_SIZE = 128_MiB; -constexpr VkDeviceSize REGION_SIZE = STREAM_BUFFER_SIZE / StagingBufferPool::NUM_SYNCS; constexpr VkMemoryPropertyFlags HOST_FLAGS = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; constexpr VkMemoryPropertyFlags STREAM_FLAGS = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | HOST_FLAGS; -bool IsStreamHeap(VkMemoryHeap heap) noexcept { - return STREAM_BUFFER_SIZE < (heap.size * 2) / 3; +static bool IsStreamHeap(VkMemoryHeap heap, size_t staging_buffer_size) noexcept { + return staging_buffer_size < (heap.size * 2) / 3; +} + +static bool HasLargeDeviceLocalHostVisibleMemory(const VkPhysicalDeviceMemoryProperties& props) { + const auto flags{VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT}; + + for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) { + const auto& memory_type{props.memoryTypes[type_index]}; + + if ((memory_type.propertyFlags & flags) != flags) { + // Memory must be device local and host visible + continue; + } + + const auto& heap{props.memoryHeaps[memory_type.heapIndex]}; + if (heap.size >= 7168_MiB) { + // This is the right type of memory + return true; + } + } + + return false; } std::optional<u32> FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask, - VkMemoryPropertyFlags flags) noexcept { + VkMemoryPropertyFlags flags, + size_t staging_buffer_size) noexcept { for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) { if (((type_mask >> type_index) & 1) == 0) { // Memory type is incompatible @@ -50,7 +69,7 @@ std::optional<u32> FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& p // Memory type doesn't have the flags we want continue; } - if (!IsStreamHeap(props.memoryHeaps[memory_type.heapIndex])) { + if (!IsStreamHeap(props.memoryHeaps[memory_type.heapIndex], staging_buffer_size)) { // Memory heap is not suitable for streaming continue; } @@ -61,17 +80,17 @@ std::optional<u32> FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& p } u32 FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask, - bool try_device_local) { + bool try_device_local, size_t staging_buffer_size) { std::optional<u32> type; if (try_device_local) { // Try to find a DEVICE_LOCAL_BIT type, Nvidia and AMD have a dedicated heap for this - type = FindMemoryTypeIndex(props, type_mask, STREAM_FLAGS); + type = FindMemoryTypeIndex(props, type_mask, STREAM_FLAGS, staging_buffer_size); if (type) { return *type; } } // Otherwise try without the DEVICE_LOCAL_BIT - type = FindMemoryTypeIndex(props, type_mask, HOST_FLAGS); + type = FindMemoryTypeIndex(props, type_mask, HOST_FLAGS, staging_buffer_size); if (type) { return *type; } @@ -79,20 +98,32 @@ u32 FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_ throw vk::Exception(VK_ERROR_OUT_OF_DEVICE_MEMORY); } -size_t Region(size_t iterator) noexcept { - return iterator / REGION_SIZE; +size_t Region(size_t iterator, size_t region_size) noexcept { + return iterator / region_size; } } // Anonymous namespace StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_, Scheduler& scheduler_) : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} { + + const auto memory_properties{device.GetPhysical().GetMemoryProperties().memoryProperties}; + if (HasLargeDeviceLocalHostVisibleMemory(memory_properties)) { + // Possible on many integrated and newer discrete cards + staging_buffer_size = 1_GiB; + } else { + // Well-supported default size used by most Vulkan PC games + staging_buffer_size = 256_MiB; + } + + region_size = staging_buffer_size / StagingBufferPool::NUM_SYNCS; + const vk::Device& dev = device.GetLogical(); stream_buffer = dev.CreateBuffer(VkBufferCreateInfo{ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = nullptr, .flags = 0, - .size = STREAM_BUFFER_SIZE, + .size = staging_buffer_size, .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, @@ -117,19 +148,18 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem .image = nullptr, .buffer = *stream_buffer, }; - const auto memory_properties = device.GetPhysical().GetMemoryProperties().memoryProperties; VkMemoryAllocateInfo stream_memory_info{ .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, .pNext = make_dedicated ? &dedicated_info : nullptr, .allocationSize = requirements.size, - .memoryTypeIndex = - FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits, true), + .memoryTypeIndex = FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits, true, + staging_buffer_size), }; stream_memory = dev.TryAllocateMemory(stream_memory_info); if (!stream_memory) { LOG_INFO(Render_Vulkan, "Dynamic memory allocation failed, trying with system memory"); - stream_memory_info.memoryTypeIndex = - FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits, false); + stream_memory_info.memoryTypeIndex = FindMemoryTypeIndex( + memory_properties, requirements.memoryTypeBits, false, staging_buffer_size); stream_memory = dev.AllocateMemory(stream_memory_info); } @@ -137,7 +167,7 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem stream_memory.SetObjectNameEXT("Stream Buffer Memory"); } stream_buffer.BindMemory(*stream_memory, 0); - stream_pointer = stream_memory.Map(0, STREAM_BUFFER_SIZE); + stream_pointer = stream_memory.Map(0, staging_buffer_size); } StagingBufferPool::~StagingBufferPool() = default; @@ -158,25 +188,25 @@ void StagingBufferPool::TickFrame() { } StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) { - if (AreRegionsActive(Region(free_iterator) + 1, - std::min(Region(iterator + size) + 1, NUM_SYNCS))) { + if (AreRegionsActive(Region(free_iterator, region_size) + 1, + std::min(Region(iterator + size, region_size) + 1, NUM_SYNCS))) { // Avoid waiting for the previous usages to be free return GetStagingBuffer(size, MemoryUsage::Upload); } const u64 current_tick = scheduler.CurrentTick(); - std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + Region(iterator), - current_tick); + std::fill(sync_ticks.begin() + Region(used_iterator, region_size), + sync_ticks.begin() + Region(iterator, region_size), current_tick); used_iterator = iterator; free_iterator = std::max(free_iterator, iterator + size); - if (iterator + size >= STREAM_BUFFER_SIZE) { - std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + NUM_SYNCS, - current_tick); + if (iterator + size >= staging_buffer_size) { + std::fill(sync_ticks.begin() + Region(used_iterator, region_size), + sync_ticks.begin() + NUM_SYNCS, current_tick); used_iterator = 0; iterator = 0; free_iterator = size; - if (AreRegionsActive(0, Region(size) + 1)) { + if (AreRegionsActive(0, Region(size, region_size) + 1)) { // Avoid waiting for the previous usages to be free return GetStagingBuffer(size, MemoryUsage::Upload); } diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h index 91dc84da8..90c67177f 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h @@ -93,6 +93,9 @@ private: size_t free_iterator = 0; std::array<u64, NUM_SYNCS> sync_ticks{}; + size_t staging_buffer_size = 0; + size_t region_size = 0; + StagingBuffersCache device_local_cache; StagingBuffersCache upload_cache; StagingBuffersCache download_cache; diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp index 9ad096431..f234e1a31 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp +++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp @@ -7,9 +7,9 @@ #include "common/common_types.h" #include "core/core.h" +#include "video_core/control/channel_state.h" #include "video_core/dirty_flags.h" #include "video_core/engines/maxwell_3d.h" -#include "video_core/gpu.h" #include "video_core/renderer_vulkan/vk_state_tracker.h" #define OFF(field_name) MAXWELL3D_REG_INDEX(field_name) @@ -174,9 +174,8 @@ void SetupDirtyVertexBindings(Tables& tables) { } } // Anonymous namespace -StateTracker::StateTracker(Tegra::GPU& gpu) - : flags{gpu.Maxwell3D().dirty.flags}, invalidation_flags{MakeInvalidationFlags()} { - auto& tables{gpu.Maxwell3D().dirty.tables}; +void StateTracker::SetupTables(Tegra::Control::ChannelState& channel_state) { + auto& tables{channel_state.maxwell_3d->dirty.tables}; SetupDirtyFlags(tables); SetupDirtyViewports(tables); SetupDirtyScissors(tables); @@ -199,4 +198,15 @@ StateTracker::StateTracker(Tegra::GPU& gpu) SetupDirtyVertexBindings(tables); } +void StateTracker::ChangeChannel(Tegra::Control::ChannelState& channel_state) { + flags = &channel_state.maxwell_3d->dirty.flags; +} + +void StateTracker::InvalidateState() { + flags->set(); +} + +StateTracker::StateTracker() + : flags{&default_flags}, default_flags{}, invalidation_flags{MakeInvalidationFlags()} {} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h index a85bc1c10..2296dea60 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.h +++ b/src/video_core/renderer_vulkan/vk_state_tracker.h @@ -10,6 +10,12 @@ #include "video_core/dirty_flags.h" #include "video_core/engines/maxwell_3d.h" +namespace Tegra { +namespace Control { +struct ChannelState; +} +} // namespace Tegra + namespace Vulkan { namespace Dirty { @@ -53,19 +59,19 @@ class StateTracker { using Maxwell = Tegra::Engines::Maxwell3D::Regs; public: - explicit StateTracker(Tegra::GPU& gpu); + explicit StateTracker(); void InvalidateCommandBufferState() { - flags |= invalidation_flags; + (*flags) |= invalidation_flags; current_topology = INVALID_TOPOLOGY; } void InvalidateViewports() { - flags[Dirty::Viewports] = true; + (*flags)[Dirty::Viewports] = true; } void InvalidateScissors() { - flags[Dirty::Scissors] = true; + (*flags)[Dirty::Scissors] = true; } bool TouchViewports() { @@ -139,16 +145,23 @@ public: return has_changed; } + void SetupTables(Tegra::Control::ChannelState& channel_state); + + void ChangeChannel(Tegra::Control::ChannelState& channel_state); + + void InvalidateState(); + private: static constexpr auto INVALID_TOPOLOGY = static_cast<Maxwell::PrimitiveTopology>(~0u); bool Exchange(std::size_t id, bool new_value) const noexcept { - const bool is_dirty = flags[id]; - flags[id] = new_value; + const bool is_dirty = (*flags)[id]; + (*flags)[id] = new_value; return is_dirty; } - Tegra::Engines::Maxwell3D::DirtyState::Flags& flags; + Tegra::Engines::Maxwell3D::DirtyState::Flags* flags; + Tegra::Engines::Maxwell3D::DirtyState::Flags default_flags; Tegra::Engines::Maxwell3D::DirtyState::Flags invalidation_flags; Maxwell::PrimitiveTopology current_topology = INVALID_TOPOLOGY; }; diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index a69ae7725..706d9ba74 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -36,7 +36,8 @@ VkPresentModeKHR ChooseSwapPresentMode(vk::Span<VkPresentModeKHR> modes) { // Mailbox (triple buffering) doesn't lock the application like fifo (vsync), // prefer it if vsync option is not selected const auto found_mailbox = std::find(modes.begin(), modes.end(), VK_PRESENT_MODE_MAILBOX_KHR); - if (found_mailbox != modes.end() && !Settings::values.use_vsync.GetValue()) { + if (Settings::values.fullscreen_mode.GetValue() == Settings::FullscreenMode::Borderless && + found_mailbox != modes.end() && !Settings::values.use_vsync.GetValue()) { return VK_PRESENT_MODE_MAILBOX_KHR; } if (!Settings::values.use_speed_limit.GetValue()) { @@ -156,8 +157,16 @@ void Swapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u3 present_mode = ChooseSwapPresentMode(present_modes); u32 requested_image_count{capabilities.minImageCount + 1}; - if (capabilities.maxImageCount > 0 && requested_image_count > capabilities.maxImageCount) { - requested_image_count = capabilities.maxImageCount; + // Ensure Tripple buffering if possible. + if (capabilities.maxImageCount > 0) { + if (requested_image_count > capabilities.maxImageCount) { + requested_image_count = capabilities.maxImageCount; + } else { + requested_image_count = + std::max(requested_image_count, std::min(3U, capabilities.maxImageCount)); + } + } else { + requested_image_count = std::max(requested_image_count, 3U); } VkSwapchainCreateInfoKHR swapchain_ci{ .sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR, diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index caca79d79..305ad8aee 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -592,7 +592,7 @@ void TryTransformSwizzleIfNeeded(PixelFormat format, std::array<SwizzleSource, 4 case PixelFormat::A5B5G5R1_UNORM: std::ranges::transform(swizzle, swizzle.begin(), SwapSpecial); break; - case PixelFormat::R4G4_UNORM: + case PixelFormat::G4R4_UNORM: std::ranges::transform(swizzle, swizzle.begin(), SwapGreenRed); break; default: @@ -1474,13 +1474,14 @@ bool Image::BlitScaleHelper(bool scale_up) { }; const VkExtent2D extent{ .width = std::max(scaled_width, info.size.width), - .height = std::max(scaled_height, info.size.width), + .height = std::max(scaled_height, info.size.height), }; auto* view_ptr = blit_view.get(); if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT) { if (!blit_framebuffer) { - blit_framebuffer = std::make_unique<Framebuffer>(*runtime, view_ptr, nullptr, extent); + blit_framebuffer = + std::make_unique<Framebuffer>(*runtime, view_ptr, nullptr, extent, scale_up); } const auto color_view = blit_view->Handle(Shader::TextureType::Color2D); @@ -1488,7 +1489,8 @@ bool Image::BlitScaleHelper(bool scale_up) { src_region, operation, BLIT_OPERATION); } else if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { if (!blit_framebuffer) { - blit_framebuffer = std::make_unique<Framebuffer>(*runtime, nullptr, view_ptr, extent); + blit_framebuffer = + std::make_unique<Framebuffer>(*runtime, nullptr, view_ptr, extent, scale_up); } runtime->blit_image_helper.BlitDepthStencil(blit_framebuffer.get(), blit_view->DepthView(), blit_view->StencilView(), dst_region, @@ -1756,34 +1758,42 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM .width = key.size.width, .height = key.size.height, }} { - CreateFramebuffer(runtime, color_buffers, depth_buffer); + CreateFramebuffer(runtime, color_buffers, depth_buffer, key.is_rescaled); if (runtime.device.HasDebuggingToolAttached()) { framebuffer.SetObjectNameEXT(VideoCommon::Name(key).c_str()); } } Framebuffer::Framebuffer(TextureCacheRuntime& runtime, ImageView* color_buffer, - ImageView* depth_buffer, VkExtent2D extent) + ImageView* depth_buffer, VkExtent2D extent, bool is_rescaled) : render_area{extent} { std::array<ImageView*, NUM_RT> color_buffers{color_buffer}; - CreateFramebuffer(runtime, color_buffers, depth_buffer); + CreateFramebuffer(runtime, color_buffers, depth_buffer, is_rescaled); } Framebuffer::~Framebuffer() = default; void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers, - ImageView* depth_buffer) { + ImageView* depth_buffer, bool is_rescaled) { std::vector<VkImageView> attachments; RenderPassKey renderpass_key{}; s32 num_layers = 1; + const auto& resolution = runtime.resolution; + + u32 width = 0; + u32 height = 0; for (size_t index = 0; index < NUM_RT; ++index) { const ImageView* const color_buffer = color_buffers[index]; if (!color_buffer) { renderpass_key.color_formats[index] = PixelFormat::Invalid; continue; } + width = std::max(width, is_rescaled ? resolution.ScaleUp(color_buffer->size.width) + : color_buffer->size.width); + height = std::max(height, is_rescaled ? resolution.ScaleUp(color_buffer->size.height) + : color_buffer->size.height); attachments.push_back(color_buffer->RenderTarget()); renderpass_key.color_formats[index] = color_buffer->format; num_layers = std::max(num_layers, color_buffer->range.extent.layers); @@ -1794,6 +1804,10 @@ void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime, } const size_t num_colors = attachments.size(); if (depth_buffer) { + width = std::max(width, is_rescaled ? resolution.ScaleUp(depth_buffer->size.width) + : depth_buffer->size.width); + height = std::max(height, is_rescaled ? resolution.ScaleUp(depth_buffer->size.height) + : depth_buffer->size.height); attachments.push_back(depth_buffer->RenderTarget()); renderpass_key.depth_format = depth_buffer->format; num_layers = std::max(num_layers, depth_buffer->range.extent.layers); @@ -1810,6 +1824,8 @@ void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime, renderpass_key.samples = samples; renderpass = runtime.render_pass_cache.Get(renderpass_key); + render_area.width = std::min(render_area.width, width); + render_area.height = std::min(render_area.height, height); num_color_buffers = static_cast<u32>(num_colors); framebuffer = runtime.device.GetLogical().CreateFramebuffer({ diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 69f06ee7b..0b7ac0df1 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -268,7 +268,7 @@ public: ImageView* depth_buffer, const VideoCommon::RenderTargets& key); explicit Framebuffer(TextureCacheRuntime& runtime, ImageView* color_buffer, - ImageView* depth_buffer, VkExtent2D extent); + ImageView* depth_buffer, VkExtent2D extent, bool is_rescaled); ~Framebuffer(); @@ -279,7 +279,8 @@ public: Framebuffer& operator=(Framebuffer&&) = default; void CreateFramebuffer(TextureCacheRuntime& runtime, - std::span<ImageView*, NUM_RT> color_buffers, ImageView* depth_buffer); + std::span<ImageView*, NUM_RT> color_buffers, ImageView* depth_buffer, + bool is_rescaled = false); [[nodiscard]] VkFramebuffer Handle() const noexcept { return *framebuffer; |