diff options
Diffstat (limited to 'src/video_core')
23 files changed, 157 insertions, 58 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h index c4f6e8d12..eed267361 100644 --- a/src/video_core/buffer_cache/buffer_cache_base.h +++ b/src/video_core/buffer_cache/buffer_cache_base.h @@ -62,7 +62,11 @@ using BufferId = SlotId; using VideoCore::Surface::PixelFormat; using namespace Common::Literals; +#ifdef __APPLE__ +constexpr u32 NUM_VERTEX_BUFFERS = 16; +#else constexpr u32 NUM_VERTEX_BUFFERS = 32; +#endif constexpr u32 NUM_TRANSFORM_FEEDBACK_BUFFERS = 4; constexpr u32 NUM_GRAPHICS_UNIFORM_BUFFERS = 18; constexpr u32 NUM_COMPUTE_UNIFORM_BUFFERS = 8; diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h index 805a89900..c0e6471fe 100644 --- a/src/video_core/fence_manager.h +++ b/src/video_core/fence_manager.h @@ -86,7 +86,10 @@ public: uncommitted_operations.emplace_back(std::move(func)); } pending_operations.emplace_back(std::move(uncommitted_operations)); - QueueFence(new_fence); + { + std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; + QueueFence(new_fence); + } if (!delay_fence) { func(); } diff --git a/src/video_core/host1x/codecs/codec.cpp b/src/video_core/host1x/codecs/codec.cpp index 8d7da50fc..dbcf508e5 100644 --- a/src/video_core/host1x/codecs/codec.cpp +++ b/src/video_core/host1x/codecs/codec.cpp @@ -137,16 +137,6 @@ bool Codec::CreateGpuAvDevice() { break; } if ((config->methods & HW_CONFIG_METHOD) != 0 && config->device_type == type) { -#if defined(__unix__) - // Some linux decoding backends are reported to crash with this config method - // TODO(ameerj): Properly support this method - if ((config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_FRAMES_CTX) != 0) { - // skip zero-copy decoders, we don't currently support them - LOG_DEBUG(Service_NVDRV, "Skipping decoder {} with unsupported capability {}.", - av_hwdevice_get_type_name(type), config->methods); - continue; - } -#endif LOG_INFO(Service_NVDRV, "Using {} GPU decoder", av_hwdevice_get_type_name(type)); av_codec_ctx->pix_fmt = config->pix_fmt; return true; diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index 8bb429578..cd2549232 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -19,6 +19,7 @@ set(SHADER_FILES block_linear_unswizzle_2d.comp block_linear_unswizzle_3d.comp convert_abgr8_to_d24s8.frag + convert_abgr8_to_d32f.frag convert_d32f_to_abgr8.frag convert_d24s8_to_abgr8.frag convert_depth_to_float.frag diff --git a/src/video_core/host_shaders/convert_abgr8_to_d32f.frag b/src/video_core/host_shaders/convert_abgr8_to_d32f.frag new file mode 100644 index 000000000..095b910c2 --- /dev/null +++ b/src/video_core/host_shaders/convert_abgr8_to_d32f.frag @@ -0,0 +1,15 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#version 450 + +layout(binding = 0) uniform sampler2D color_texture; + +void main() { + ivec2 coord = ivec2(gl_FragCoord.xy); + vec4 color = texelFetch(color_texture, coord, 0).abgr; + + float value = color.a * (color.r + color.g + color.b) / 3.0f; + + gl_FragDepth = value; +} diff --git a/src/video_core/host_shaders/convert_d24s8_to_abgr8.frag b/src/video_core/host_shaders/convert_d24s8_to_abgr8.frag index d33131d7c..b81a54056 100644 --- a/src/video_core/host_shaders/convert_d24s8_to_abgr8.frag +++ b/src/video_core/host_shaders/convert_d24s8_to_abgr8.frag @@ -3,16 +3,16 @@ #version 450 +precision mediump int; +precision highp float; + layout(binding = 0) uniform sampler2D depth_tex; -layout(binding = 1) uniform isampler2D stencil_tex; +layout(binding = 1) uniform usampler2D stencil_tex; layout(location = 0) out vec4 color; void main() { ivec2 coord = ivec2(gl_FragCoord.xy); - uint depth = uint(textureLod(depth_tex, coord, 0).r * (exp2(24.0) - 1.0f)); - uint stencil = uint(textureLod(stencil_tex, coord, 0).r); - highp uint depth_val = uint(textureLod(depth_tex, coord, 0).r * (exp2(32.0) - 1.0)); lowp uint stencil_val = textureLod(stencil_tex, coord, 0).r; diff --git a/src/video_core/host_shaders/convert_d32f_to_abgr8.frag b/src/video_core/host_shaders/convert_d32f_to_abgr8.frag index 04cfef8b5..4e5a9f955 100644 --- a/src/video_core/host_shaders/convert_d32f_to_abgr8.frag +++ b/src/video_core/host_shaders/convert_d32f_to_abgr8.frag @@ -9,6 +9,6 @@ layout(location = 0) out vec4 color; void main() { ivec2 coord = ivec2(gl_FragCoord.xy); - float depth = textureLod(depth_tex, coord, 0).r; + float depth = texelFetch(depth_tex, coord, 0).r; color = vec4(depth, depth, depth, 1.0); } diff --git a/src/video_core/host_shaders/convert_s8d24_to_abgr8.frag b/src/video_core/host_shaders/convert_s8d24_to_abgr8.frag index 31db7d426..6a457981d 100644 --- a/src/video_core/host_shaders/convert_s8d24_to_abgr8.frag +++ b/src/video_core/host_shaders/convert_s8d24_to_abgr8.frag @@ -3,16 +3,16 @@ #version 450 +precision mediump int; +precision highp float; + layout(binding = 0) uniform sampler2D depth_tex; -layout(binding = 1) uniform isampler2D stencil_tex; +layout(binding = 1) uniform usampler2D stencil_tex; layout(location = 0) out vec4 color; void main() { ivec2 coord = ivec2(gl_FragCoord.xy); - uint depth = uint(textureLod(depth_tex, coord, 0).r * (exp2(24.0) - 1.0f)); - uint stencil = uint(textureLod(stencil_tex, coord, 0).r); - highp uint depth_val = uint(textureLod(depth_tex, coord, 0).r * (exp2(32.0) - 1.0)); lowp uint stencil_val = textureLod(stencil_tex, coord, 0).r; diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index f01d2394e..c3db09424 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -8,6 +8,7 @@ #include "common/settings.h" #include "video_core/host_shaders/blit_color_float_frag_spv.h" #include "video_core/host_shaders/convert_abgr8_to_d24s8_frag_spv.h" +#include "video_core/host_shaders/convert_abgr8_to_d32f_frag_spv.h" #include "video_core/host_shaders/convert_d24s8_to_abgr8_frag_spv.h" #include "video_core/host_shaders/convert_d32f_to_abgr8_frag_spv.h" #include "video_core/host_shaders/convert_depth_to_float_frag_spv.h" @@ -434,6 +435,7 @@ BlitImageHelper::BlitImageHelper(const Device& device_, Scheduler& scheduler_, convert_depth_to_float_frag(BuildShader(device, CONVERT_DEPTH_TO_FLOAT_FRAG_SPV)), convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)), convert_abgr8_to_d24s8_frag(BuildShader(device, CONVERT_ABGR8_TO_D24S8_FRAG_SPV)), + convert_abgr8_to_d32f_frag(BuildShader(device, CONVERT_ABGR8_TO_D32F_FRAG_SPV)), convert_d32f_to_abgr8_frag(BuildShader(device, CONVERT_D32F_TO_ABGR8_FRAG_SPV)), convert_d24s8_to_abgr8_frag(BuildShader(device, CONVERT_D24S8_TO_ABGR8_FRAG_SPV)), convert_s8d24_to_abgr8_frag(BuildShader(device, CONVERT_S8D24_TO_ABGR8_FRAG_SPV)), @@ -559,6 +561,13 @@ void BlitImageHelper::ConvertABGR8ToD24S8(const Framebuffer* dst_framebuffer, Convert(*convert_abgr8_to_d24s8_pipeline, dst_framebuffer, src_image_view); } +void BlitImageHelper::ConvertABGR8ToD32F(const Framebuffer* dst_framebuffer, + const ImageView& src_image_view) { + ConvertPipelineDepthTargetEx(convert_abgr8_to_d32f_pipeline, dst_framebuffer->RenderPass(), + convert_abgr8_to_d32f_frag); + Convert(*convert_abgr8_to_d32f_pipeline, dst_framebuffer, src_image_view); +} + void BlitImageHelper::ConvertD32FToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view) { ConvertPipelineColorTargetEx(convert_d32f_to_abgr8_pipeline, dst_framebuffer->RenderPass(), diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h index a032c71fb..b2104a59e 100644 --- a/src/video_core/renderer_vulkan/blit_image.h +++ b/src/video_core/renderer_vulkan/blit_image.h @@ -67,6 +67,8 @@ public: void ConvertABGR8ToD24S8(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); + void ConvertABGR8ToD32F(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); + void ConvertD32FToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view); void ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view); @@ -130,6 +132,7 @@ private: vk::ShaderModule convert_depth_to_float_frag; vk::ShaderModule convert_float_to_depth_frag; vk::ShaderModule convert_abgr8_to_d24s8_frag; + vk::ShaderModule convert_abgr8_to_d32f_frag; vk::ShaderModule convert_d32f_to_abgr8_frag; vk::ShaderModule convert_d24s8_to_abgr8_frag; vk::ShaderModule convert_s8d24_to_abgr8_frag; @@ -149,6 +152,7 @@ private: vk::Pipeline convert_d16_to_r16_pipeline; vk::Pipeline convert_r16_to_d16_pipeline; vk::Pipeline convert_abgr8_to_d24s8_pipeline; + vk::Pipeline convert_abgr8_to_d32f_pipeline; vk::Pipeline convert_d32f_to_abgr8_pipeline; vk::Pipeline convert_d24s8_to_abgr8_pipeline; vk::Pipeline convert_s8d24_to_abgr8_pipeline; diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index c4c30d807..7e7a80740 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -132,12 +132,16 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { const bool use_accelerated = rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); const bool is_srgb = use_accelerated && screen_info.is_srgb; - RenderScreenshot(*framebuffer, use_accelerated); - Frame* frame = present_manager.GetRenderFrame(); - blit_screen.DrawToSwapchain(frame, *framebuffer, use_accelerated, is_srgb); - scheduler.Flush(*frame->render_ready); - present_manager.Present(frame); + { + std::scoped_lock lock{rasterizer.LockCaches()}; + RenderScreenshot(*framebuffer, use_accelerated); + + Frame* frame = present_manager.GetRenderFrame(); + blit_screen.DrawToSwapchain(frame, *framebuffer, use_accelerated, is_srgb); + scheduler.Flush(*frame->render_ready); + present_manager.Present(frame); + } gpu.RendererFrameEndNotify(); rasterizer.TickFrame(); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index a1ec1a100..804b95989 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -356,7 +356,11 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device .has_broken_fp16_float_controls = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY, .ignore_nan_fp_comparisons = false, .has_broken_spirv_subgroup_mask_vector_extract_dynamic = - driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY}; + driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY, + .has_broken_robust = + device.IsNvidia() && device.GetNvidiaArch() <= NvidiaArchitecture::Arch_Maxwell, + }; + host_info = Shader::HostTranslateInfo{ .support_float64 = device.IsFloat64Supported(), .support_float16 = device.IsFloat16Supported(), diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index 2edaafa7e..66c03bf17 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -1436,6 +1436,7 @@ void QueryCacheRuntime::Barriers(bool is_prebarrier) { .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, }; + impl->scheduler.RequestOutsideRenderPassOperationContext(); if (is_prebarrier) { impl->scheduler.Record([](vk::CommandBuffer cmdbuf) { cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 83f2b6045..465eac37e 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -198,7 +198,7 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) { if (!pipeline) { return; } - std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; + std::scoped_lock lock{LockCaches()}; // update engine as channel may be different. pipeline->SetEngine(maxwell3d, gpu_memory); pipeline->Configure(is_indexed); @@ -708,6 +708,7 @@ void RasterizerVulkan::TiledCacheBarrier() { } void RasterizerVulkan::FlushCommands() { + std::scoped_lock lock{LockCaches()}; if (draw_counter == 0) { return; } @@ -805,6 +806,7 @@ void RasterizerVulkan::FlushWork() { if ((++draw_counter & 7) != 7) { return; } + std::scoped_lock lock{LockCaches()}; if (draw_counter < DRAWS_TO_DISPATCH) { // Send recorded tasks to the worker thread scheduler.DispatchWork(); @@ -975,6 +977,19 @@ void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs if (!state_tracker.TouchScissors()) { return; } + if (!regs.viewport_scale_offset_enabled) { + const auto x = static_cast<float>(regs.surface_clip.x); + const auto y = static_cast<float>(regs.surface_clip.y); + const auto width = static_cast<float>(regs.surface_clip.width); + const auto height = static_cast<float>(regs.surface_clip.height); + VkRect2D scissor; + scissor.offset.x = static_cast<u32>(x); + scissor.offset.y = static_cast<u32>(y); + scissor.extent.width = static_cast<u32>(width != 0.0f ? width : 1.0f); + scissor.extent.height = static_cast<u32>(height != 0.0f ? height : 1.0f); + scheduler.Record([scissor](vk::CommandBuffer cmdbuf) { cmdbuf.SetScissor(0, scissor); }); + return; + } u32 up_scale = 1; u32 down_shift = 0; if (texture_cache.IsRescaling()) { @@ -1486,7 +1501,7 @@ void RasterizerVulkan::UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs) void RasterizerVulkan::InitializeChannel(Tegra::Control::ChannelState& channel) { CreateChannel(channel); { - std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; + std::scoped_lock lock{LockCaches()}; texture_cache.CreateChannel(channel); buffer_cache.CreateChannel(channel); } @@ -1499,7 +1514,7 @@ void RasterizerVulkan::BindChannel(Tegra::Control::ChannelState& channel) { const s32 channel_id = channel.bind_id; BindToChannel(channel_id); { - std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; + std::scoped_lock lock{LockCaches()}; texture_cache.BindToChannel(channel_id); buffer_cache.BindToChannel(channel_id); } @@ -1512,7 +1527,7 @@ void RasterizerVulkan::BindChannel(Tegra::Control::ChannelState& channel) { void RasterizerVulkan::ReleaseChannel(s32 channel_id) { EraseChannel(channel_id); { - std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; + std::scoped_lock lock{LockCaches()}; texture_cache.EraseChannel(channel_id); buffer_cache.EraseChannel(channel_id); } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index ad069556c..ce3dfbaab 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -133,6 +133,10 @@ public: void ReleaseChannel(s32 channel_id) override; + std::scoped_lock<std::recursive_mutex, std::recursive_mutex> LockCaches() { + return std::scoped_lock{buffer_cache.mutex, texture_cache.mutex}; + } + private: static constexpr size_t MAX_TEXTURES = 192; static constexpr size_t MAX_IMAGES = 48; diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp index ae9f1de64..7746a88d3 100644 --- a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp @@ -19,7 +19,7 @@ VkAttachmentDescription AttachmentDescription(const Device& device, PixelFormat VkSampleCountFlagBits samples) { using MaxwellToVK::SurfaceFormat; return { - .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT, + .flags = {}, .format = SurfaceFormat(device, FormatType::Optimal, true, format).format, .samples = samples, .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 93773a69f..de34f6d49 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -1194,6 +1194,11 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im return blit_image_helper.ConvertD16ToR16(dst, src_view); } break; + case PixelFormat::A8B8G8R8_SRGB: + if (src_view.format == PixelFormat::D32_FLOAT) { + return blit_image_helper.ConvertD32FToABGR8(dst, src_view); + } + break; case PixelFormat::A8B8G8R8_UNORM: if (src_view.format == PixelFormat::S8_UINT_D24_UNORM) { return blit_image_helper.ConvertD24S8ToABGR8(dst, src_view); @@ -1205,6 +1210,16 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im return blit_image_helper.ConvertD32FToABGR8(dst, src_view); } break; + case PixelFormat::B8G8R8A8_SRGB: + if (src_view.format == PixelFormat::D32_FLOAT) { + return blit_image_helper.ConvertD32FToABGR8(dst, src_view); + } + break; + case PixelFormat::B8G8R8A8_UNORM: + if (src_view.format == PixelFormat::D32_FLOAT) { + return blit_image_helper.ConvertD32FToABGR8(dst, src_view); + } + break; case PixelFormat::R32_FLOAT: if (src_view.format == PixelFormat::D32_FLOAT) { return blit_image_helper.ConvertD32ToR32(dst, src_view); @@ -1222,6 +1237,12 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im } break; case PixelFormat::D32_FLOAT: + if (src_view.format == PixelFormat::A8B8G8R8_UNORM || + src_view.format == PixelFormat::B8G8R8A8_UNORM || + src_view.format == PixelFormat::A8B8G8R8_SRGB || + src_view.format == PixelFormat::B8G8R8A8_SRGB) { + return blit_image_helper.ConvertABGR8ToD32F(dst, src_view); + } if (src_view.format == PixelFormat::R32_FLOAT) { return blit_image_helper.ConvertR32ToD32(dst, src_view); } @@ -2034,7 +2055,7 @@ void TextureCacheRuntime::TransitionImageLayout(Image& image) { }, }; scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([barrier = barrier](vk::CommandBuffer cmdbuf) { + scheduler.Record([barrier](vk::CommandBuffer cmdbuf) { cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, barrier); }); diff --git a/src/video_core/texture_cache/formatter.cpp b/src/video_core/texture_cache/formatter.cpp index 6279d8e9e..2b7e0df72 100644 --- a/src/video_core/texture_cache/formatter.cpp +++ b/src/video_core/texture_cache/formatter.cpp @@ -10,19 +10,23 @@ #include "video_core/texture_cache/image_info.h" #include "video_core/texture_cache/image_view_base.h" #include "video_core/texture_cache/render_targets.h" +#include "video_core/texture_cache/samples_helper.h" namespace VideoCommon { std::string Name(const ImageBase& image) { const GPUVAddr gpu_addr = image.gpu_addr; const ImageInfo& info = image.info; - const u32 width = info.size.width; - const u32 height = info.size.height; + u32 width = info.size.width; + u32 height = info.size.height; const u32 depth = info.size.depth; const u32 num_layers = image.info.resources.layers; const u32 num_levels = image.info.resources.levels; std::string resource; if (image.info.num_samples > 1) { + const auto [samples_x, samples_y] = VideoCommon::SamplesLog2(image.info.num_samples); + width >>= samples_x; + height >>= samples_y; resource += fmt::format(":{}xMSAA", image.info.num_samples); } if (num_layers > 1) { diff --git a/src/video_core/texture_cache/samples_helper.h b/src/video_core/texture_cache/samples_helper.h index 203ac1b11..2ee2f8312 100644 --- a/src/video_core/texture_cache/samples_helper.h +++ b/src/video_core/texture_cache/samples_helper.h @@ -24,7 +24,7 @@ namespace VideoCommon { return {2, 2}; } ASSERT_MSG(false, "Invalid number of samples={}", num_samples); - return {1, 1}; + return {0, 0}; } [[nodiscard]] inline int NumSamples(Tegra::Texture::MsaaMode msaa_mode) { diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index 8151cabf0..15596c925 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -167,6 +167,13 @@ template <u32 GOB_EXTENT> } [[nodiscard]] constexpr Extent3D TileShift(const LevelInfo& info, u32 level) { + if (level == 0 && info.num_levels == 1) { + return Extent3D{ + .width = info.block.width, + .height = info.block.height, + .depth = info.block.depth, + }; + } const Extent3D blocks = NumLevelBlocks(info, level); return Extent3D{ .width = AdjustTileSize(info.block.width, GOB_SIZE_X, blocks.width), @@ -1293,9 +1300,9 @@ u32 MapSizeBytes(const ImageBase& image) { static_assert(CalculateLevelSize(LevelInfo{{1920, 1080, 1}, {0, 2, 0}, {1, 1}, 2, 0, 1}, 0) == 0x7f8000); -static_assert(CalculateLevelSize(LevelInfo{{32, 32, 1}, {0, 0, 4}, {1, 1}, 4, 0, 1}, 0) == 0x4000); +static_assert(CalculateLevelSize(LevelInfo{{32, 32, 1}, {0, 0, 4}, {1, 1}, 4, 0, 1}, 0) == 0x40000); -static_assert(CalculateLevelSize(LevelInfo{{128, 8, 1}, {0, 4, 0}, {1, 1}, 4, 0, 1}, 0) == 0x4000); +static_assert(CalculateLevelSize(LevelInfo{{128, 8, 1}, {0, 4, 0}, {1, 1}, 4, 0, 1}, 0) == 0x40000); static_assert(CalculateLevelOffset(PixelFormat::R8_SINT, {1920, 1080, 1}, {0, 2, 0}, 0, 7) == 0x2afc00); diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 876cec2e8..e518756d2 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -83,15 +83,6 @@ constexpr std::array VK_FORMAT_A4B4G4R4_UNORM_PACK16{ } // namespace Alternatives -enum class NvidiaArchitecture { - KeplerOrOlder, - Maxwell, - Pascal, - Volta, - Turing, - AmpereOrNewer, -}; - template <typename T> void SetNext(void**& next, T& data) { *next = &data; @@ -326,9 +317,9 @@ NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical, if (shading_rate_props.primitiveFragmentShadingRateWithMultipleViewports) { // Only Ampere and newer support this feature // TODO: Find a way to differentiate Ampere and Ada - return NvidiaArchitecture::AmpereOrNewer; + return NvidiaArchitecture::Arch_AmpereOrNewer; } - return NvidiaArchitecture::Turing; + return NvidiaArchitecture::Arch_Turing; } if (exts.contains(VK_EXT_BLEND_OPERATION_ADVANCED_EXTENSION_NAME)) { @@ -340,7 +331,7 @@ NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical, physical_properties.pNext = &advanced_blending_props; physical.GetProperties2(physical_properties); if (advanced_blending_props.advancedBlendMaxColorAttachments == 1) { - return NvidiaArchitecture::Maxwell; + return NvidiaArchitecture::Arch_Maxwell; } if (exts.contains(VK_EXT_CONSERVATIVE_RASTERIZATION_EXTENSION_NAME)) { @@ -350,13 +341,13 @@ NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical, physical_properties.pNext = &conservative_raster_props; physical.GetProperties2(physical_properties); if (conservative_raster_props.degenerateLinesRasterized) { - return NvidiaArchitecture::Volta; + return NvidiaArchitecture::Arch_Volta; } - return NvidiaArchitecture::Pascal; + return NvidiaArchitecture::Arch_Pascal; } } - return NvidiaArchitecture::KeplerOrOlder; + return NvidiaArchitecture::Arch_KeplerOrOlder; } std::vector<const char*> ExtensionListForVulkan( @@ -436,6 +427,10 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR throw vk::Exception(VK_ERROR_INCOMPATIBLE_DRIVER); } + if (is_nvidia) { + nvidia_arch = GetNvidiaArchitecture(physical, supported_extensions); + } + SetupFamilies(surface); const auto queue_cis = GetDeviceQueueCreateInfos(); @@ -532,11 +527,11 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR if (is_nvidia) { const u32 nv_major_version = (properties.properties.driverVersion >> 22) & 0x3ff; - const auto arch = GetNvidiaArchitecture(physical, supported_extensions); - if (arch >= NvidiaArchitecture::AmpereOrNewer) { + const auto arch = GetNvidiaArch(); + if (arch >= NvidiaArchitecture::Arch_AmpereOrNewer) { LOG_WARNING(Render_Vulkan, "Ampere and newer have broken float16 math"); features.shader_float16_int8.shaderFloat16 = false; - } else if (arch <= NvidiaArchitecture::Volta) { + } else if (arch <= NvidiaArchitecture::Arch_Volta) { if (nv_major_version < 527) { LOG_WARNING(Render_Vulkan, "Volta and older have broken VK_KHR_push_descriptor"); RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME); @@ -686,8 +681,8 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME); } } else if (extensions.push_descriptor && is_nvidia) { - const auto arch = GetNvidiaArchitecture(physical, supported_extensions); - if (arch <= NvidiaArchitecture::Pascal) { + const auto arch = GetNvidiaArch(); + if (arch <= NvidiaArchitecture::Arch_Pascal) { LOG_WARNING(Render_Vulkan, "Pascal and older architectures have broken VK_KHR_push_descriptor"); RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME); diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 282a2925d..b213ed7dd 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -177,6 +177,15 @@ enum class FormatType { Linear, Optimal, Buffer }; /// Subgroup size of the guest emulated hardware (Nvidia has 32 threads per subgroup). const u32 GuestWarpSize = 32; +enum class NvidiaArchitecture { + Arch_KeplerOrOlder, + Arch_Maxwell, + Arch_Pascal, + Arch_Volta, + Arch_Turing, + Arch_AmpereOrNewer, +}; + /// Handles data specific to a physical device. class Device { public: @@ -670,6 +679,14 @@ public: return false; } + bool IsNvidia() const noexcept { + return properties.driver.driverID == VK_DRIVER_ID_NVIDIA_PROPRIETARY; + } + + NvidiaArchitecture GetNvidiaArch() const noexcept { + return nvidia_arch; + } + private: /// Checks if the physical device is suitable and configures the object state /// with all necessary info about its properties. @@ -788,6 +805,7 @@ private: bool supports_conditional_barriers{}; ///< Allows barriers in conditional control flow. u64 device_access_memory{}; ///< Total size of device local memory in bytes. u32 sets_per_pool{}; ///< Sets per Description Pool + NvidiaArchitecture nvidia_arch{NvidiaArchitecture::Arch_AmpereOrNewer}; // Telemetry parameters std::set<std::string, std::less<>> supported_extensions; ///< Reported Vulkan extensions. diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp index 2f3254a97..70cf14afa 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.cpp +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp @@ -522,7 +522,7 @@ Instance Instance::Create(u32 version, Span<const char*> layers, Span<const char .applicationVersion = VK_MAKE_VERSION(0, 1, 0), .pEngineName = "yuzu Emulator", .engineVersion = VK_MAKE_VERSION(0, 1, 0), - .apiVersion = version, + .apiVersion = VK_API_VERSION_1_3, }; const VkInstanceCreateInfo ci{ .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, |