diff options
Diffstat (limited to '')
22 files changed, 371 insertions, 244 deletions
diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp index 2a532b883..04d0f3a2f 100644 --- a/src/video_core/command_classes/codecs/codec.cpp +++ b/src/video_core/command_classes/codecs/codec.cpp @@ -32,7 +32,7 @@ constexpr std::array PREFERRED_GPU_DECODERS = { #ifdef _WIN32 AV_HWDEVICE_TYPE_D3D11VA, AV_HWDEVICE_TYPE_DXVA2, -#elif defined(__linux__) +#elif defined(__unix__) AV_HWDEVICE_TYPE_VAAPI, AV_HWDEVICE_TYPE_VDPAU, #endif @@ -130,6 +130,12 @@ bool Codec::CreateGpuAvDevice() { } if (config->methods & HW_CONFIG_METHOD && config->device_type == type) { av_codec_ctx->pix_fmt = config->pix_fmt; + if (config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_FRAMES_CTX) { + // skip zero-copy decoders, we don't currently support them + LOG_DEBUG(Service_NVDRV, "Skipping decoder {} with unsupported capability {}.", + av_hwdevice_get_type_name(type), config->methods); + continue; + } LOG_INFO(Service_NVDRV, "Using {} GPU decoder", av_hwdevice_get_type_name(type)); return true; } diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 8788f5148..705765c99 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -17,7 +17,6 @@ #include "core/frontend/emu_window.h" #include "core/hardware_interrupt_manager.h" #include "core/hle/service/nvdrv/nvdata.h" -#include "core/hle/service/nvflinger/buffer_queue.h" #include "core/perf_stats.h" #include "video_core/cdma_pusher.h" #include "video_core/dma_pusher.h" @@ -312,6 +311,12 @@ struct GPU::Impl { cpu_context->MakeCurrent(); } + void NotifyShutdown() { + std::unique_lock lk{sync_mutex}; + shutting_down.store(true, std::memory_order::relaxed); + sync_cv.notify_all(); + } + /// Obtain the CPU Context void ObtainContext() { cpu_context->MakeCurrent(); @@ -859,6 +864,10 @@ void GPU::Start() { impl->Start(); } +void GPU::NotifyShutdown() { + impl->NotifyShutdown(); +} + void GPU::ObtainContext() { impl->ObtainContext(); } diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 500411176..3188b83ed 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -232,6 +232,9 @@ public: /// core timing events. void Start(); + /// Performs any additional necessary steps to shutdown GPU emulation. + void NotifyShutdown(); + /// Obtain the CPU Context void ObtainContext(); diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 0764ea6e0..e62912a22 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -182,17 +182,13 @@ Device::Device() { shader_backend = Settings::ShaderBackend::GLSL; } - if (shader_backend == Settings::ShaderBackend::GLSL && is_nvidia && - !Settings::values.renderer_debug) { + if (shader_backend == Settings::ShaderBackend::GLSL && is_nvidia) { const std::string_view driver_version = version.substr(13); const int version_major = std::atoi(driver_version.substr(0, driver_version.find(".")).data()); - if (version_major >= 495) { - LOG_WARNING(Render_OpenGL, "NVIDIA drivers 495 and later causes significant problems " - "with yuzu. Forcing GLASM as a mitigation."); - shader_backend = Settings::ShaderBackend::GLASM; - use_assembly_shaders = true; + has_cbuf_ftou_bug = true; + has_bool_ref_bug = true; } } diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index de9e41659..95c2e8d38 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -152,6 +152,14 @@ public: return need_fastmath_off; } + bool HasCbufFtouBug() const { + return has_cbuf_ftou_bug; + } + + bool HasBoolRefBug() const { + return has_bool_ref_bug; + } + Settings::ShaderBackend GetShaderBackend() const { return shader_backend; } @@ -200,6 +208,8 @@ private: bool has_sparse_texture_2{}; bool warp_size_potentially_larger_than_guest{}; bool need_fastmath_off{}; + bool has_cbuf_ftou_bug{}; + bool has_bool_ref_bug{}; std::string vendor_name; }; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 42ef67628..f71e01a34 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -42,6 +42,7 @@ namespace { using Shader::Backend::GLASM::EmitGLASM; using Shader::Backend::GLSL::EmitGLSL; using Shader::Backend::SPIRV::EmitSPIRV; +using Shader::Maxwell::ConvertLegacyToGeneric; using Shader::Maxwell::MergeDualVertexPrograms; using Shader::Maxwell::TranslateProgram; using VideoCommon::ComputeEnvironment; @@ -213,6 +214,8 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .has_broken_fp16_float_controls = false, .has_gl_component_indexing_bug = device.HasComponentIndexingBug(), .has_gl_precise_bug = device.HasPreciseBug(), + .has_gl_cbuf_ftou_bug = device.HasCbufFtouBug(), + .has_gl_bool_ref_bug = device.HasBoolRefBug(), .ignore_nan_fp_comparisons = true, .gl_max_compute_smem_size = device.GetMaxComputeSharedMemorySize(), }, @@ -422,6 +425,11 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline( const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))}; Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); + + if (Settings::values.dump_shaders) { + env.Dump(key.unique_hashes[index]); + } + if (!uses_vertex_a || index != 1) { // Normal path programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info); @@ -462,12 +470,14 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline( MakeRuntimeInfo(key, program, previous_program, glasm_use_storage_buffers, use_glasm)}; switch (device.GetShaderBackend()) { case Settings::ShaderBackend::GLSL: + ConvertLegacyToGeneric(program, runtime_info); sources[stage_index] = EmitGLSL(profile, runtime_info, program, binding); break; case Settings::ShaderBackend::GLASM: sources[stage_index] = EmitGLASM(profile, runtime_info, program, binding); break; case Settings::ShaderBackend::SPIRV: + ConvertLegacyToGeneric(program, runtime_info); sources_spirv[stage_index] = EmitSPIRV(profile, runtime_info, program, binding); break; } @@ -506,8 +516,12 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline( LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; - auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; + if (Settings::values.dump_shaders) { + env.Dump(key.Hash()); + } + + auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; const u32 num_storage_buffers{Shader::NumDescriptors(program.info.storage_buffers_descriptors)}; Shader::RuntimeInfo info; info.glasm_use_storage_buffers = num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks(); diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 14e6522f2..3c1f79a27 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -1047,7 +1047,7 @@ bool Image::ScaleDown(bool ignore) { } ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info, - ImageId image_id_, Image& image) + ImageId image_id_, Image& image, const SlotVector<Image>&) : VideoCommon::ImageViewBase{info, image.info, image_id_}, views{runtime.null_image_views} { const Device& device = runtime.device; if (True(image.flags & ImageFlagBits::Converted)) { diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 37d5e6a6b..7f425631f 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -36,6 +36,7 @@ using VideoCommon::ImageViewType; using VideoCommon::NUM_RT; using VideoCommon::Region2D; using VideoCommon::RenderTargets; +using VideoCommon::SlotVector; struct ImageBufferMap { ~ImageBufferMap(); @@ -92,7 +93,7 @@ public: void ReinterpretImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); - void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view, bool rescaled) { + void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) { UNIMPLEMENTED(); } @@ -234,7 +235,8 @@ class ImageView : public VideoCommon::ImageViewBase { friend Image; public: - explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&); + explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&, + const SlotVector<Image>&); explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo&, const VideoCommon::ImageViewInfo&, GPUVAddr); explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 28daacd82..f81c1b233 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -437,39 +437,29 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { glBindTextureUnit(0, fxaa_texture.handle); } - - // Set projection matrix const std::array ortho_matrix = MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height)); - GLuint fragment_handle; - const auto filter = Settings::values.scaling_filter.GetValue(); - switch (filter) { - case Settings::ScalingFilter::NearestNeighbor: - fragment_handle = present_bilinear_fragment.handle; - break; - case Settings::ScalingFilter::Bilinear: - fragment_handle = present_bilinear_fragment.handle; - break; - case Settings::ScalingFilter::Bicubic: - fragment_handle = present_bicubic_fragment.handle; - break; - case Settings::ScalingFilter::Gaussian: - fragment_handle = present_gaussian_fragment.handle; - break; - case Settings::ScalingFilter::ScaleForce: - fragment_handle = present_scaleforce_fragment.handle; - break; - case Settings::ScalingFilter::Fsr: - LOG_WARNING( - Render_OpenGL, - "FidelityFX FSR Super Sampling is not supported in OpenGL, changing to ScaleForce"); - fragment_handle = present_scaleforce_fragment.handle; - break; - default: - fragment_handle = present_bilinear_fragment.handle; - break; - } + const auto fragment_handle = [this]() { + switch (Settings::values.scaling_filter.GetValue()) { + case Settings::ScalingFilter::NearestNeighbor: + case Settings::ScalingFilter::Bilinear: + return present_bilinear_fragment.handle; + case Settings::ScalingFilter::Bicubic: + return present_bicubic_fragment.handle; + case Settings::ScalingFilter::Gaussian: + return present_gaussian_fragment.handle; + case Settings::ScalingFilter::ScaleForce: + return present_scaleforce_fragment.handle; + case Settings::ScalingFilter::Fsr: + LOG_WARNING( + Render_OpenGL, + "FidelityFX Super Resolution is not supported in OpenGL, changing to ScaleForce"); + return present_scaleforce_fragment.handle; + default: + return present_bilinear_fragment.handle; + } + }(); program_manager.BindPresentPrograms(present_vertex.handle, fragment_handle); glProgramUniformMatrix3x2fv(present_vertex.handle, ModelViewMatrixLocation, 1, GL_FALSE, ortho_matrix.data()); diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index 9a38b6b34..2c3914459 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -4,6 +4,7 @@ #include <algorithm> +#include "common/settings.h" #include "video_core/host_shaders/convert_abgr8_to_d24s8_frag_spv.h" #include "video_core/host_shaders/convert_d24s8_to_abgr8_frag_spv.h" #include "video_core/host_shaders/convert_depth_to_float_frag_spv.h" @@ -335,6 +336,17 @@ void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Regi cmdbuf.SetScissor(0, scissor); cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants); } + +VkExtent2D GetConversionExtent(const ImageView& src_image_view) { + const auto& resolution = Settings::values.resolution_info; + const bool is_rescaled = src_image_view.IsRescaled(); + u32 width = src_image_view.size.width; + u32 height = src_image_view.size.height; + return VkExtent2D{ + .width = is_rescaled ? resolution.ScaleUp(width) : width, + .height = is_rescaled ? resolution.ScaleUp(height) : height, + }; +} } // Anonymous namespace BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_, @@ -425,108 +437,52 @@ void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer, } void BlitImageHelper::ConvertD32ToR32(const Framebuffer* dst_framebuffer, - const ImageView& src_image_view, u32 up_scale, - u32 down_shift) { + const ImageView& src_image_view) { ConvertDepthToColorPipeline(convert_d32_to_r32_pipeline, dst_framebuffer->RenderPass()); - Convert(*convert_d32_to_r32_pipeline, dst_framebuffer, src_image_view, up_scale, down_shift); + Convert(*convert_d32_to_r32_pipeline, dst_framebuffer, src_image_view); } void BlitImageHelper::ConvertR32ToD32(const Framebuffer* dst_framebuffer, - const ImageView& src_image_view, u32 up_scale, - u32 down_shift) { + const ImageView& src_image_view) { ConvertColorToDepthPipeline(convert_r32_to_d32_pipeline, dst_framebuffer->RenderPass()); - Convert(*convert_r32_to_d32_pipeline, dst_framebuffer, src_image_view, up_scale, down_shift); + Convert(*convert_r32_to_d32_pipeline, dst_framebuffer, src_image_view); } void BlitImageHelper::ConvertD16ToR16(const Framebuffer* dst_framebuffer, - const ImageView& src_image_view, u32 up_scale, - u32 down_shift) { + const ImageView& src_image_view) { ConvertDepthToColorPipeline(convert_d16_to_r16_pipeline, dst_framebuffer->RenderPass()); - Convert(*convert_d16_to_r16_pipeline, dst_framebuffer, src_image_view, up_scale, down_shift); + Convert(*convert_d16_to_r16_pipeline, dst_framebuffer, src_image_view); } void BlitImageHelper::ConvertR16ToD16(const Framebuffer* dst_framebuffer, - const ImageView& src_image_view, u32 up_scale, - u32 down_shift) { + const ImageView& src_image_view) { ConvertColorToDepthPipeline(convert_r16_to_d16_pipeline, dst_framebuffer->RenderPass()); - Convert(*convert_r16_to_d16_pipeline, dst_framebuffer, src_image_view, up_scale, down_shift); + Convert(*convert_r16_to_d16_pipeline, dst_framebuffer, src_image_view); } void BlitImageHelper::ConvertABGR8ToD24S8(const Framebuffer* dst_framebuffer, - ImageView& src_image_view, u32 up_scale, u32 down_shift) { + const ImageView& src_image_view) { ConvertPipelineDepthTargetEx(convert_abgr8_to_d24s8_pipeline, dst_framebuffer->RenderPass(), - convert_abgr8_to_d24s8_frag, true); - ConvertColor(*convert_abgr8_to_d24s8_pipeline, dst_framebuffer, src_image_view, up_scale, - down_shift); + convert_abgr8_to_d24s8_frag); + Convert(*convert_abgr8_to_d24s8_pipeline, dst_framebuffer, src_image_view); } void BlitImageHelper::ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer, - ImageView& src_image_view, u32 up_scale, u32 down_shift) { + ImageView& src_image_view) { ConvertPipelineColorTargetEx(convert_d24s8_to_abgr8_pipeline, dst_framebuffer->RenderPass(), - convert_d24s8_to_abgr8_frag, false); - ConvertDepthStencil(*convert_d24s8_to_abgr8_pipeline, dst_framebuffer, src_image_view, up_scale, - down_shift); + convert_d24s8_to_abgr8_frag); + ConvertDepthStencil(*convert_d24s8_to_abgr8_pipeline, dst_framebuffer, src_image_view); } void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, - const ImageView& src_image_view, u32 up_scale, u32 down_shift) { + const ImageView& src_image_view) { const VkPipelineLayout layout = *one_texture_pipeline_layout; const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D); const VkSampler sampler = *nearest_sampler; - const VkExtent2D extent{ - .width = std::max((src_image_view.size.width * up_scale) >> down_shift, 1U), - .height = std::max((src_image_view.size.height * up_scale) >> down_shift, 1U), - }; - scheduler.RequestRenderpass(dst_framebuffer); - scheduler.Record([pipeline, layout, sampler, src_view, extent, up_scale, down_shift, - this](vk::CommandBuffer cmdbuf) { - const VkOffset2D offset{ - .x = 0, - .y = 0, - }; - const VkViewport viewport{ - .x = 0.0f, - .y = 0.0f, - .width = static_cast<float>(extent.width), - .height = static_cast<float>(extent.height), - .minDepth = 0.0f, - .maxDepth = 0.0f, - }; - const VkRect2D scissor{ - .offset = offset, - .extent = extent, - }; - const PushConstants push_constants{ - .tex_scale = {viewport.width, viewport.height}, - .tex_offset = {0.0f, 0.0f}, - }; - const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); - UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view); - - // TODO: Barriers - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, - nullptr); - cmdbuf.SetViewport(0, viewport); - cmdbuf.SetScissor(0, scissor); - cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants); - cmdbuf.Draw(3, 1, 0, 0); - }); - scheduler.InvalidateState(); -} + const VkExtent2D extent = GetConversionExtent(src_image_view); -void BlitImageHelper::ConvertColor(VkPipeline pipeline, const Framebuffer* dst_framebuffer, - ImageView& src_image_view, u32 up_scale, u32 down_shift) { - const VkPipelineLayout layout = *one_texture_pipeline_layout; - const VkImageView src_view = src_image_view.ColorView(); - const VkSampler sampler = *nearest_sampler; - const VkExtent2D extent{ - .width = std::max((src_image_view.size.width * up_scale) >> down_shift, 1U), - .height = std::max((src_image_view.size.height * up_scale) >> down_shift, 1U), - }; scheduler.RequestRenderpass(dst_framebuffer); - scheduler.Record([pipeline, layout, sampler, src_view, extent, up_scale, down_shift, - this](vk::CommandBuffer cmdbuf) { + scheduler.Record([pipeline, layout, sampler, src_view, extent, this](vk::CommandBuffer cmdbuf) { const VkOffset2D offset{ .x = 0, .y = 0, @@ -563,18 +519,16 @@ void BlitImageHelper::ConvertColor(VkPipeline pipeline, const Framebuffer* dst_f } void BlitImageHelper::ConvertDepthStencil(VkPipeline pipeline, const Framebuffer* dst_framebuffer, - ImageView& src_image_view, u32 up_scale, u32 down_shift) { + ImageView& src_image_view) { const VkPipelineLayout layout = *two_textures_pipeline_layout; const VkImageView src_depth_view = src_image_view.DepthView(); const VkImageView src_stencil_view = src_image_view.StencilView(); const VkSampler sampler = *nearest_sampler; - const VkExtent2D extent{ - .width = std::max((src_image_view.size.width * up_scale) >> down_shift, 1U), - .height = std::max((src_image_view.size.height * up_scale) >> down_shift, 1U), - }; + const VkExtent2D extent = GetConversionExtent(src_image_view); + scheduler.RequestRenderpass(dst_framebuffer); - scheduler.Record([pipeline, layout, sampler, src_depth_view, src_stencil_view, extent, up_scale, - down_shift, this](vk::CommandBuffer cmdbuf) { + scheduler.Record([pipeline, layout, sampler, src_depth_view, src_stencil_view, extent, + this](vk::CommandBuffer cmdbuf) { const VkOffset2D offset{ .x = 0, .y = 0, @@ -695,11 +649,14 @@ VkPipeline BlitImageHelper::FindOrEmplaceDepthStencilPipeline(const BlitImagePip return *blit_depth_stencil_pipelines.back(); } -void BlitImageHelper::ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass) { +void BlitImageHelper::ConvertPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass, + bool is_target_depth) { if (pipeline) { return; } - const std::array stages = MakeStages(*full_screen_vert, *convert_depth_to_float_frag); + VkShaderModule frag_shader = + is_target_depth ? *convert_float_to_depth_frag : *convert_depth_to_float_frag; + const std::array stages = MakeStages(*full_screen_vert, frag_shader); pipeline = device.GetLogical().CreateGraphicsPipeline({ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, .pNext = nullptr, @@ -712,8 +669,9 @@ void BlitImageHelper::ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRend .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO, .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO, .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, - .pDepthStencilState = nullptr, - .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO, + .pDepthStencilState = is_target_depth ? &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO : nullptr, + .pColorBlendState = is_target_depth ? &PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO + : &PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO, .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO, .layout = *one_texture_pipeline_layout, .renderPass = renderpass, @@ -723,37 +681,17 @@ void BlitImageHelper::ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRend }); } +void BlitImageHelper::ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass) { + ConvertPipeline(pipeline, renderpass, false); +} + void BlitImageHelper::ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass) { - if (pipeline) { - return; - } - const std::array stages = MakeStages(*full_screen_vert, *convert_float_to_depth_frag); - pipeline = device.GetLogical().CreateGraphicsPipeline({ - .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stageCount = static_cast<u32>(stages.size()), - .pStages = stages.data(), - .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, - .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, - .pTessellationState = nullptr, - .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO, - .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO, - .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, - .pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, - .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO, - .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO, - .layout = *one_texture_pipeline_layout, - .renderPass = renderpass, - .subpass = 0, - .basePipelineHandle = VK_NULL_HANDLE, - .basePipelineIndex = 0, - }); + ConvertPipeline(pipeline, renderpass, true); } void BlitImageHelper::ConvertPipelineEx(vk::Pipeline& pipeline, VkRenderPass renderpass, - vk::ShaderModule& module, bool is_target_depth, - bool single_texture) { + vk::ShaderModule& module, bool single_texture, + bool is_target_depth) { if (pipeline) { return; } @@ -782,13 +720,13 @@ void BlitImageHelper::ConvertPipelineEx(vk::Pipeline& pipeline, VkRenderPass ren } void BlitImageHelper::ConvertPipelineColorTargetEx(vk::Pipeline& pipeline, VkRenderPass renderpass, - vk::ShaderModule& module, bool single_texture) { - ConvertPipelineEx(pipeline, renderpass, module, false, single_texture); + vk::ShaderModule& module) { + ConvertPipelineEx(pipeline, renderpass, module, false, false); } void BlitImageHelper::ConvertPipelineDepthTargetEx(vk::Pipeline& pipeline, VkRenderPass renderpass, - vk::ShaderModule& module, bool single_texture) { - ConvertPipelineEx(pipeline, renderpass, module, true, single_texture); + vk::ShaderModule& module) { + ConvertPipelineEx(pipeline, renderpass, module, true, true); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h index b1a717090..85e7dca5b 100644 --- a/src/video_core/renderer_vulkan/blit_image.h +++ b/src/video_core/renderer_vulkan/blit_image.h @@ -44,50 +44,43 @@ public: const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter, Tegra::Engines::Fermi2D::Operation operation); - void ConvertD32ToR32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, - u32 up_scale, u32 down_shift); + void ConvertD32ToR32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); - void ConvertR32ToD32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, - u32 up_scale, u32 down_shift); + void ConvertR32ToD32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); - void ConvertD16ToR16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, - u32 up_scale, u32 down_shift); + void ConvertD16ToR16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); - void ConvertR16ToD16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, - u32 up_scale, u32 down_shift); + void ConvertR16ToD16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); - void ConvertABGR8ToD24S8(const Framebuffer* dst_framebuffer, ImageView& src_image_view, - u32 up_scale, u32 down_shift); + void ConvertABGR8ToD24S8(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); - void ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view, - u32 up_scale, u32 down_shift); + void ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view); private: void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, - const ImageView& src_image_view, u32 up_scale, u32 down_shift); - - void ConvertColor(VkPipeline pipeline, const Framebuffer* dst_framebuffer, - ImageView& src_image_view, u32 up_scale, u32 down_shift); + const ImageView& src_image_view); void ConvertDepthStencil(VkPipeline pipeline, const Framebuffer* dst_framebuffer, - ImageView& src_image_view, u32 up_scale, u32 down_shift); + ImageView& src_image_view); [[nodiscard]] VkPipeline FindOrEmplaceColorPipeline(const BlitImagePipelineKey& key); [[nodiscard]] VkPipeline FindOrEmplaceDepthStencilPipeline(const BlitImagePipelineKey& key); + void ConvertPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass, bool is_target_depth); + void ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass); void ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass); void ConvertPipelineEx(vk::Pipeline& pipeline, VkRenderPass renderpass, - vk::ShaderModule& module, bool is_target_depth, bool single_texture); + vk::ShaderModule& module, bool single_texture, bool is_target_depth); void ConvertPipelineColorTargetEx(vk::Pipeline& pipeline, VkRenderPass renderpass, - vk::ShaderModule& module, bool single_texture); + vk::ShaderModule& module); void ConvertPipelineDepthTargetEx(vk::Pipeline& pipeline, VkRenderPass renderpass, - vk::ShaderModule& module, bool single_texture); + vk::ShaderModule& module); const Device& device; VKScheduler& scheduler; diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index 1e447e621..c71a1f44d 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -391,28 +391,23 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, .offset = {0, 0}, .extent = size, }; - const auto filter = Settings::values.scaling_filter.GetValue(); cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); - switch (filter) { - case Settings::ScalingFilter::NearestNeighbor: - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *bilinear_pipeline); - break; - case Settings::ScalingFilter::Bilinear: - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *bilinear_pipeline); - break; - case Settings::ScalingFilter::Bicubic: - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *bicubic_pipeline); - break; - case Settings::ScalingFilter::Gaussian: - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *gaussian_pipeline); - break; - case Settings::ScalingFilter::ScaleForce: - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *scaleforce_pipeline); - break; - default: - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *bilinear_pipeline); - break; - } + auto graphics_pipeline = [this]() { + switch (Settings::values.scaling_filter.GetValue()) { + case Settings::ScalingFilter::NearestNeighbor: + case Settings::ScalingFilter::Bilinear: + return *bilinear_pipeline; + case Settings::ScalingFilter::Bicubic: + return *bicubic_pipeline; + case Settings::ScalingFilter::Gaussian: + return *gaussian_pipeline; + case Settings::ScalingFilter::ScaleForce: + return *scaleforce_pipeline; + default: + return *bilinear_pipeline; + } + }(); + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, graphics_pipeline); cmdbuf.SetViewport(0, viewport); cmdbuf.SetScissor(0, scissor); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 616a7b457..d514b71d0 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -605,7 +605,11 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { .flags = 0, .topology = input_assembly_topology, .primitiveRestartEnable = key.state.primitive_restart_enable != 0 && - SupportsPrimitiveRestart(input_assembly_topology), + ((input_assembly_topology != VK_PRIMITIVE_TOPOLOGY_PATCH_LIST && + device.IsTopologyListPrimitiveRestartSupported()) || + SupportsPrimitiveRestart(input_assembly_topology) || + (input_assembly_topology == VK_PRIMITIVE_TOPOLOGY_PATCH_LIST && + device.IsPatchListPrimitiveRestartSupported())), }; const VkPipelineTessellationStateCreateInfo tessellation_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO, @@ -613,7 +617,6 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { .flags = 0, .patchControlPoints = key.state.patch_control_points_minus_one.Value() + 1, }; - std::array<VkViewportSwizzleNV, Maxwell::NumViewports> swizzles; std::ranges::transform(key.state.viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle); const VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{ @@ -748,8 +751,8 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, .pNext = nullptr, .flags = 0, - .logicOpEnable = VK_FALSE, - .logicOp = VK_LOGIC_OP_COPY, + .logicOpEnable = key.state.logic_op_enable != 0, + .logicOp = static_cast<VkLogicOp>(key.state.logic_op.Value()), .attachmentCount = static_cast<u32>(cb_attachments.size()), .pAttachments = cb_attachments.data(), .blendConstants = {}, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index eb8b4e08b..a633b73e5 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -48,6 +48,7 @@ MICROPROFILE_DECLARE(Vulkan_PipelineCache); namespace { using Shader::Backend::SPIRV::EmitSPIRV; +using Shader::Maxwell::ConvertLegacyToGeneric; using Shader::Maxwell::MergeDualVertexPrograms; using Shader::Maxwell::TranslateProgram; using VideoCommon::ComputeEnvironment; @@ -516,6 +517,9 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline( const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))}; Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); + if (Settings::values.dump_shaders) { + env.Dump(key.unique_hashes[index]); + } if (!uses_vertex_a || index != 1) { // Normal path programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info); @@ -543,6 +547,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline( infos[stage_index] = &program.info; const auto runtime_info{MakeRuntimeInfo(programs, key, program, previous_stage)}; + ConvertLegacyToGeneric(program, runtime_info); const std::vector<u32> code{EmitSPIRV(profile, runtime_info, program, binding)}; device.SaveShader(code); modules[stage_index] = BuildShader(device, code); @@ -611,6 +616,12 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline( LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; + + // Dump it before error. + if (Settings::values.dump_shaders) { + env.Dump(key.Hash()); + } + auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; const std::vector<u32> code{EmitSPIRV(profile, program)}; device.SaveShader(code); diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 197cba8e3..0ba56ff1e 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -1057,37 +1057,37 @@ void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst }); } -void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view, - bool rescaled) { - const u32 up_scale = rescaled ? resolution.up_scale : 1; - const u32 down_shift = rescaled ? resolution.down_shift : 0; +void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) { switch (dst_view.format) { case PixelFormat::R16_UNORM: if (src_view.format == PixelFormat::D16_UNORM) { - return blit_image_helper.ConvertD16ToR16(dst, src_view, up_scale, down_shift); + return blit_image_helper.ConvertD16ToR16(dst, src_view); } break; case PixelFormat::A8B8G8R8_UNORM: if (src_view.format == PixelFormat::S8_UINT_D24_UNORM) { - return blit_image_helper.ConvertD24S8ToABGR8(dst, src_view, up_scale, down_shift); + return blit_image_helper.ConvertD24S8ToABGR8(dst, src_view); } break; case PixelFormat::R32_FLOAT: if (src_view.format == PixelFormat::D32_FLOAT) { - return blit_image_helper.ConvertD32ToR32(dst, src_view, up_scale, down_shift); + return blit_image_helper.ConvertD32ToR32(dst, src_view); } break; case PixelFormat::D16_UNORM: if (src_view.format == PixelFormat::R16_UNORM) { - return blit_image_helper.ConvertR16ToD16(dst, src_view, up_scale, down_shift); + return blit_image_helper.ConvertR16ToD16(dst, src_view); } break; case PixelFormat::S8_UINT_D24_UNORM: - return blit_image_helper.ConvertABGR8ToD24S8(dst, src_view, up_scale, down_shift); + if (src_view.format == PixelFormat::A8B8G8R8_UNORM || + src_view.format == PixelFormat::B8G8R8A8_UNORM) { + return blit_image_helper.ConvertABGR8ToD24S8(dst, src_view); + } break; case PixelFormat::D32_FLOAT: if (src_view.format == PixelFormat::R32_FLOAT) { - return blit_image_helper.ConvertR32ToD32(dst, src_view, up_scale, down_shift); + return blit_image_helper.ConvertR32ToD32(dst, src_view); } break; default: @@ -1329,6 +1329,10 @@ void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferIm } } +bool Image::IsRescaled() const noexcept { + return True(flags & ImageFlagBits::Rescaled); +} + bool Image::ScaleUp(bool ignore) { if (True(flags & ImageFlagBits::Rescaled)) { return false; @@ -1340,7 +1344,6 @@ bool Image::ScaleUp(bool ignore) { return false; } has_scaled = true; - const auto& device = runtime->device; if (!scaled_image) { const bool is_2d = info.type == ImageType::e2D; const u32 scaled_width = resolution.ScaleUp(info.size.width); @@ -1348,7 +1351,7 @@ bool Image::ScaleUp(bool ignore) { auto scaled_info = info; scaled_info.size.width = scaled_width; scaled_info.size.height = scaled_height; - scaled_image = MakeImage(device, scaled_info); + scaled_image = MakeImage(runtime->device, scaled_info); auto& allocator = runtime->memory_allocator; scaled_commit = MemoryCommit(allocator.Commit(scaled_image, MemoryUsage::DeviceLocal)); ignore = false; @@ -1357,18 +1360,13 @@ bool Image::ScaleUp(bool ignore) { if (ignore) { return true; } - if (aspect_mask == 0) { aspect_mask = ImageAspectMask(info.format); } - static constexpr auto OPTIMAL_FORMAT = FormatType::Optimal; - const PixelFormat format = StorageFormat(info.format); - const auto vk_format = MaxwellToVK::SurfaceFormat(device, OPTIMAL_FORMAT, false, format).format; - const auto blit_usage = VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT; - if (device.IsFormatSupported(vk_format, blit_usage, OPTIMAL_FORMAT)) { - BlitScale(*scheduler, *original_image, *scaled_image, info, aspect_mask, resolution); - } else { + if (NeedsScaleHelper()) { return BlitScaleHelper(true); + } else { + BlitScale(*scheduler, *original_image, *scaled_image, info, aspect_mask, resolution); } return true; } @@ -1390,15 +1388,10 @@ bool Image::ScaleDown(bool ignore) { if (aspect_mask == 0) { aspect_mask = ImageAspectMask(info.format); } - static constexpr auto OPTIMAL_FORMAT = FormatType::Optimal; - const PixelFormat format = StorageFormat(info.format); - const auto& device = runtime->device; - const auto vk_format = MaxwellToVK::SurfaceFormat(device, OPTIMAL_FORMAT, false, format).format; - const auto blit_usage = VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT; - if (device.IsFormatSupported(vk_format, blit_usage, OPTIMAL_FORMAT)) { - BlitScale(*scheduler, *scaled_image, *original_image, info, aspect_mask, resolution, false); - } else { + if (NeedsScaleHelper()) { return BlitScaleHelper(false); + } else { + BlitScale(*scheduler, *scaled_image, *original_image, info, aspect_mask, resolution, false); } return true; } @@ -1466,10 +1459,24 @@ bool Image::BlitScaleHelper(bool scale_up) { return true; } +bool Image::NeedsScaleHelper() const { + const auto& device = runtime->device; + const bool needs_msaa_helper = info.num_samples > 1 && device.CantBlitMSAA(); + if (needs_msaa_helper) { + return true; + } + static constexpr auto OPTIMAL_FORMAT = FormatType::Optimal; + const PixelFormat format = StorageFormat(info.format); + const auto vk_format = MaxwellToVK::SurfaceFormat(device, OPTIMAL_FORMAT, false, format).format; + const auto blit_usage = VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT; + const bool needs_blit_helper = !device.IsFormatSupported(vk_format, blit_usage, OPTIMAL_FORMAT); + return needs_blit_helper; +} + ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info, ImageId image_id_, Image& image) : VideoCommon::ImageViewBase{info, image.info, image_id_}, device{&runtime.device}, - image_handle{image.Handle()}, samples{ConvertSampleCount(image.info.num_samples)} { + image_handle{image.Handle()}, samples(ConvertSampleCount(image.info.num_samples)) { using Shader::TextureType; const VkImageAspectFlags aspect_mask = ImageViewAspectMask(info); @@ -1552,6 +1559,12 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI } } +ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info, + ImageId image_id_, Image& image, const SlotVector<Image>& slot_imgs) + : ImageView{runtime, info, image_id_, image} { + slot_images = &slot_imgs; +} + ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, const VideoCommon::ImageViewInfo& view_info, GPUVAddr gpu_addr_) : VideoCommon::ImageViewBase{info, view_info}, gpu_addr{gpu_addr_}, @@ -1607,6 +1620,15 @@ VkImageView ImageView::StorageView(Shader::TextureType texture_type, return *view; } +bool ImageView::IsRescaled() const noexcept { + if (!slot_images) { + return false; + } + const auto& slots = *slot_images; + const auto& src_image = slots[image_id]; + return src_image.IsRescaled(); +} + vk::ImageView ImageView::MakeView(VkFormat vk_format, VkImageAspectFlags aspect_mask) { return device->GetLogical().CreateImageView({ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 753e3e8a1..c81130dd2 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -23,6 +23,7 @@ using VideoCommon::ImageId; using VideoCommon::NUM_RT; using VideoCommon::Region2D; using VideoCommon::RenderTargets; +using VideoCommon::SlotVector; using VideoCore::Surface::PixelFormat; class ASTCDecoderPass; @@ -65,7 +66,7 @@ public: void ReinterpretImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); - void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view, bool rescaled); + void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view); bool CanAccelerateImageUpload(Image&) const noexcept { return false; @@ -139,6 +140,8 @@ public: return std::exchange(initialized, true); } + bool IsRescaled() const noexcept; + bool ScaleUp(bool ignore = false); bool ScaleDown(bool ignore = false); @@ -146,6 +149,8 @@ public: private: bool BlitScaleHelper(bool scale_up); + bool NeedsScaleHelper() const; + VKScheduler* scheduler{}; TextureCacheRuntime* runtime{}; @@ -168,6 +173,8 @@ private: class ImageView : public VideoCommon::ImageViewBase { public: explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&); + explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&, + const SlotVector<Image>&); explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo&, const VideoCommon::ImageViewInfo&, GPUVAddr); explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageViewParams&); @@ -189,6 +196,8 @@ public: [[nodiscard]] VkImageView StorageView(Shader::TextureType texture_type, Shader::ImageFormat image_format); + [[nodiscard]] bool IsRescaled() const noexcept; + [[nodiscard]] VkImageView Handle(Shader::TextureType texture_type) const noexcept { return *image_views[static_cast<size_t>(texture_type)]; } @@ -222,6 +231,8 @@ private: [[nodiscard]] vk::ImageView MakeView(VkFormat vk_format, VkImageAspectFlags aspect_mask); const Device* device = nullptr; + const SlotVector<Image>* slot_images = nullptr; + std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> image_views; std::unique_ptr<StorageViews> storage_views; vk::ImageView depth_view; diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp index 05850afd0..7d3ae0de4 100644 --- a/src/video_core/shader_environment.cpp +++ b/src/video_core/shader_environment.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include <algorithm> +#include <bit> #include <filesystem> #include <fstream> #include <memory> @@ -14,6 +15,7 @@ #include "common/common_types.h" #include "common/div_ceil.h" #include "common/fs/fs.h" +#include "common/fs/path_util.h" #include "common/logging/log.h" #include "shader_recompiler/environment.h" #include "video_core/engines/kepler_compute.h" @@ -57,6 +59,47 @@ static Shader::TextureType ConvertType(const Tegra::Texture::TICEntry& entry) { } } +static std::string_view StageToPrefix(Shader::Stage stage) { + switch (stage) { + case Shader::Stage::VertexB: + return "VB"; + case Shader::Stage::TessellationControl: + return "TC"; + case Shader::Stage::TessellationEval: + return "TE"; + case Shader::Stage::Geometry: + return "GS"; + case Shader::Stage::Fragment: + return "FS"; + case Shader::Stage::Compute: + return "CS"; + case Shader::Stage::VertexA: + return "VA"; + default: + return "UK"; + } +} + +static void DumpImpl(u64 hash, const u64* code, u32 read_highest, u32 read_lowest, + u32 initial_offset, Shader::Stage stage) { + const auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::DumpDir)}; + const auto base_dir{shader_dir / "shaders"}; + if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir)) { + LOG_ERROR(Common_Filesystem, "Failed to create shader dump directories"); + return; + } + const auto prefix = StageToPrefix(stage); + const auto name{base_dir / fmt::format("{}{:016x}.ash", prefix, hash)}; + const size_t real_size = read_highest - read_lowest + initial_offset; + const size_t padding_needed = ((32 - (real_size % 32)) % 32); + std::fstream shader_file(name, std::ios::out | std::ios::binary); + const size_t jump_index = initial_offset / sizeof(u64); + shader_file.write(reinterpret_cast<const char*>(code + jump_index), real_size); + for (size_t i = 0; i < padding_needed; i++) { + shader_file.put(0); + } +} + GenericEnvironment::GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, u32 start_address_) : gpu_memory{&gpu_memory_}, program_base{program_base_} { @@ -128,6 +171,10 @@ u64 GenericEnvironment::CalculateHash() const { return Common::CityHash64(data.get(), size); } +void GenericEnvironment::Dump(u64 hash) { + DumpImpl(hash, code.data(), read_highest, read_lowest, initial_offset, stage); +} + void GenericEnvironment::Serialize(std::ofstream& file) const { const u64 code_size{static_cast<u64>(CachedSize())}; const u64 num_texture_types{static_cast<u64>(texture_types.size())}; @@ -207,6 +254,7 @@ GraphicsEnvironment::GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_, u32 start_address_) : GenericEnvironment{gpu_memory_, program_base_, start_address_}, maxwell3d{&maxwell3d_} { gpu_memory->ReadBlock(program_base + start_address, &sph, sizeof(sph)); + initial_offset = sizeof(sph); gp_passthrough_mask = maxwell3d->regs.gp_passthrough_mask; switch (program) { case Maxwell::ShaderProgram::VertexA: @@ -323,14 +371,20 @@ void FileEnvironment::Deserialize(std::ifstream& file) { if (stage == Shader::Stage::Compute) { file.read(reinterpret_cast<char*>(&workgroup_size), sizeof(workgroup_size)) .read(reinterpret_cast<char*>(&shared_memory_size), sizeof(shared_memory_size)); + initial_offset = 0; } else { file.read(reinterpret_cast<char*>(&sph), sizeof(sph)); + initial_offset = sizeof(sph); if (stage == Shader::Stage::Geometry) { file.read(reinterpret_cast<char*>(&gp_passthrough_mask), sizeof(gp_passthrough_mask)); } } } +void FileEnvironment::Dump(u64 [[maybe_unused]] hash) { + DumpImpl(hash, code.get(), read_highest, read_lowest, initial_offset, stage); +} + u64 FileEnvironment::ReadInstruction(u32 address) { if (address < read_lowest || address > read_highest) { throw Shader::LogicError("Out of bounds address {}", address); diff --git a/src/video_core/shader_environment.h b/src/video_core/shader_environment.h index 6640e53d0..aae762b27 100644 --- a/src/video_core/shader_environment.h +++ b/src/video_core/shader_environment.h @@ -57,6 +57,8 @@ public: [[nodiscard]] u64 CalculateHash() const; + void Dump(u64 hash) override; + void Serialize(std::ofstream& file) const; protected: @@ -82,6 +84,7 @@ protected: u32 cached_lowest = std::numeric_limits<u32>::max(); u32 cached_highest = 0; + u32 initial_offset = 0; bool has_unbound_instructions = false; }; @@ -149,6 +152,8 @@ public: [[nodiscard]] std::array<u32, 3> WorkgroupSize() const override; + void Dump(u64 hash) override; + private: std::unique_ptr<u64[]> code; std::unordered_map<u32, Shader::TextureType> texture_types; @@ -159,6 +164,7 @@ private: u32 texture_bound{}; u32 read_lowest{}; u32 read_highest{}; + u32 initial_offset{}; }; void SerializePipeline(std::span<const char> key, std::span<const GenericEnvironment* const> envs, diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 5aaeb16ca..b494152b8 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -1397,7 +1397,8 @@ ImageViewId TextureCache<P>::FindOrEmplaceImageView(ImageId image_id, const Imag if (const ImageViewId image_view_id = image.FindView(info); image_view_id) { return image_view_id; } - const ImageViewId image_view_id = slot_image_views.insert(runtime, info, image_id, image); + const ImageViewId image_view_id = + slot_image_views.insert(runtime, info, image_id, image, slot_images); image.InsertView(info, image_view_id); return image_view_id; } @@ -1855,9 +1856,20 @@ void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::vector<Imag .height = std::min(dst_view.size.height, src_view.size.height), .depth = std::min(dst_view.size.depth, src_view.size.depth), }; - UNIMPLEMENTED_IF(copy.extent != expected_size); + const Extent3D scaled_extent = [is_rescaled, expected_size]() { + if (!is_rescaled) { + return expected_size; + } + const auto& resolution = Settings::values.resolution_info; + return Extent3D{ + .width = resolution.ScaleUp(expected_size.width), + .height = resolution.ScaleUp(expected_size.height), + .depth = expected_size.depth, + }; + }(); + UNIMPLEMENTED_IF(copy.extent != scaled_extent); - runtime.ConvertImage(dst_framebuffer, dst_view, src_view, is_rescaled); + runtime.ConvertImage(dst_framebuffer, dst_view, src_view); } } diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index 7bd31b211..d8e19cb2f 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -364,14 +364,14 @@ template <u32 GOB_EXTENT> [[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapRightAddress2D( const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size) { - const u32 layer_stride = new_info.layer_stride; - const s32 new_size = layer_stride * new_info.resources.layers; - const s32 diff = static_cast<s32>(overlap.gpu_addr - gpu_addr); + const u64 layer_stride = new_info.layer_stride; + const u64 new_size = layer_stride * new_info.resources.layers; + const u64 diff = overlap.gpu_addr - gpu_addr; if (diff > new_size) { return std::nullopt; } - const s32 base_layer = diff / layer_stride; - const s32 mip_offset = diff % layer_stride; + const s32 base_layer = static_cast<s32>(diff / layer_stride); + const s32 mip_offset = static_cast<s32>(diff % layer_stride); const std::array offsets = CalculateMipLevelOffsets(new_info); const auto end = offsets.begin() + new_info.resources.levels; const auto it = std::find(offsets.begin(), end, static_cast<u32>(mip_offset)); diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 7bf5b6578..3d78efddc 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -271,7 +271,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR .tessellationShader = true, .sampleRateShading = true, .dualSrcBlend = true, - .logicOp = false, + .logicOp = true, .multiDrawIndirect = false, .drawIndirectFirstInstance = false, .depthClamp = true, @@ -433,6 +433,19 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR LOG_INFO(Render_Vulkan, "Device doesn't support uint8 indexes"); } + VkPhysicalDevicePrimitiveTopologyListRestartFeaturesEXT primitive_topology_list_restart; + if (is_topology_list_restart_supported || is_patch_list_restart_supported) { + primitive_topology_list_restart = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIMITIVE_TOPOLOGY_LIST_RESTART_FEATURES_EXT, + .pNext = nullptr, + .primitiveTopologyListRestart = is_topology_list_restart_supported, + .primitiveTopologyPatchListRestart = is_patch_list_restart_supported, + }; + SetNext(next, primitive_topology_list_restart); + } else { + LOG_INFO(Render_Vulkan, "Device doesn't support list topology primitive restart"); + } + VkPhysicalDeviceTransformFeedbackFeaturesEXT transform_feedback; if (ext_transform_feedback) { transform_feedback = { @@ -625,15 +638,20 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR } } - if (ext_vertex_input_dynamic_state && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) { + const bool is_intel_windows = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS; + if (ext_vertex_input_dynamic_state && is_intel_windows) { LOG_WARNING(Render_Vulkan, "Blacklisting Intel for VK_EXT_vertex_input_dynamic_state"); ext_vertex_input_dynamic_state = false; } - if (is_float16_supported && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) { + if (is_float16_supported && is_intel_windows) { // Intel's compiler crashes when using fp16 on Astral Chain, disable it for the time being. LOG_WARNING(Render_Vulkan, "Blacklisting Intel proprietary from float16 math"); is_float16_supported = false; } + if (is_intel_windows) { + LOG_WARNING(Render_Vulkan, "Intel proprietary drivers do not support MSAA image blits"); + cant_blit_msaa = true; + } supports_d24_depth = IsFormatSupported(VK_FORMAT_D24_UNORM_S8_UINT, @@ -891,6 +909,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) { bool has_ext_provoking_vertex{}; bool has_ext_vertex_input_dynamic_state{}; bool has_ext_line_rasterization{}; + bool has_ext_primitive_topology_list_restart{}; for (const std::string& extension : supported_extensions) { const auto test = [&](std::optional<std::reference_wrapper<bool>> status, const char* name, bool push) { @@ -915,6 +934,8 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) { test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false); test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true); test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true); + test(has_ext_primitive_topology_list_restart, + VK_EXT_PRIMITIVE_TOPOLOGY_LIST_RESTART_EXTENSION_NAME, true); test(ext_sampler_filter_minmax, VK_EXT_SAMPLER_FILTER_MINMAX_EXTENSION_NAME, true); test(ext_shader_viewport_index_layer, VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, true); @@ -1113,6 +1134,19 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) { khr_pipeline_executable_properties = true; } } + if (has_ext_primitive_topology_list_restart) { + VkPhysicalDevicePrimitiveTopologyListRestartFeaturesEXT primitive_topology_list_restart{}; + primitive_topology_list_restart.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIMITIVE_TOPOLOGY_LIST_RESTART_FEATURES_EXT; + primitive_topology_list_restart.pNext = nullptr; + features.pNext = &primitive_topology_list_restart; + physical.GetFeatures2KHR(features); + + is_topology_list_restart_supported = + primitive_topology_list_restart.primitiveTopologyListRestart; + is_patch_list_restart_supported = + primitive_topology_list_restart.primitiveTopologyPatchListRestart; + } if (has_khr_image_format_list && has_khr_swapchain_mutable_format) { extensions.push_back(VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME); extensions.push_back(VK_KHR_SWAPCHAIN_MUTABLE_FORMAT_EXTENSION_NAME); diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 10653ac6b..37d140ebd 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -238,6 +238,16 @@ public: return khr_workgroup_memory_explicit_layout; } + /// Returns true if the device supports VK_EXT_primitive_topology_list_restart. + bool IsTopologyListPrimitiveRestartSupported() const { + return is_topology_list_restart_supported; + } + + /// Returns true if the device supports VK_EXT_primitive_topology_list_restart. + bool IsPatchListPrimitiveRestartSupported() const { + return is_patch_list_restart_supported; + } + /// Returns true if the device supports VK_EXT_index_type_uint8. bool IsExtIndexTypeUint8Supported() const { return ext_index_type_uint8; @@ -340,6 +350,10 @@ public: return supports_d24_depth; } + bool CantBlitMSAA() const { + return cant_blit_msaa; + } + private: /// Checks if the physical device is suitable. void CheckSuitability(bool requires_swapchain) const; @@ -401,6 +415,9 @@ private: bool is_shader_int16_supported{}; ///< Support for int16. bool is_shader_storage_image_multisample{}; ///< Support for image operations on MSAA images. bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil. + bool is_topology_list_restart_supported{}; ///< Support for primitive restart with list + ///< topologies. + bool is_patch_list_restart_supported{}; ///< Support for primitive restart with list patch. bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle. bool nv_viewport_array2{}; ///< Support for VK_NV_viewport_array2. bool nv_geometry_shader_passthrough{}; ///< Support for VK_NV_geometry_shader_passthrough. @@ -430,6 +447,7 @@ private: bool has_renderdoc{}; ///< Has RenderDoc attached bool has_nsight_graphics{}; ///< Has Nsight Graphics attached bool supports_d24_depth{}; ///< Supports D24 depth buffers. + bool cant_blit_msaa{}; ///< Does not support MSAA<->MSAA blitting. // Telemetry parameters std::string vendor_name; ///< Device's driver name. |