summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/buffer_cache/buffer_cache_base.h4
-rw-r--r--src/video_core/fence_manager.h5
-rw-r--r--src/video_core/host1x/codecs/codec.cpp10
-rw-r--r--src/video_core/host_shaders/CMakeLists.txt1
-rw-r--r--src/video_core/host_shaders/convert_abgr8_to_d32f.frag15
-rw-r--r--src/video_core/host_shaders/convert_d24s8_to_abgr8.frag8
-rw-r--r--src/video_core/host_shaders/convert_d32f_to_abgr8.frag2
-rw-r--r--src/video_core/host_shaders/convert_s8d24_to_abgr8.frag8
-rw-r--r--src/video_core/renderer_vulkan/blit_image.cpp9
-rw-r--r--src/video_core/renderer_vulkan/blit_image.h4
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.cpp14
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp6
-rw-r--r--src/video_core/renderer_vulkan/vk_query_cache.cpp1
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp23
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h4
-rw-r--r--src/video_core/renderer_vulkan/vk_render_pass_cache.cpp2
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp23
-rw-r--r--src/video_core/texture_cache/formatter.cpp8
-rw-r--r--src/video_core/texture_cache/samples_helper.h2
-rw-r--r--src/video_core/texture_cache/util.cpp11
-rw-r--r--src/video_core/vulkan_common/vulkan_device.cpp35
-rw-r--r--src/video_core/vulkan_common/vulkan_device.h18
-rw-r--r--src/video_core/vulkan_common/vulkan_wrapper.cpp2
23 files changed, 157 insertions, 58 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h
index c4f6e8d12..eed267361 100644
--- a/src/video_core/buffer_cache/buffer_cache_base.h
+++ b/src/video_core/buffer_cache/buffer_cache_base.h
@@ -62,7 +62,11 @@ using BufferId = SlotId;
using VideoCore::Surface::PixelFormat;
using namespace Common::Literals;
+#ifdef __APPLE__
+constexpr u32 NUM_VERTEX_BUFFERS = 16;
+#else
constexpr u32 NUM_VERTEX_BUFFERS = 32;
+#endif
constexpr u32 NUM_TRANSFORM_FEEDBACK_BUFFERS = 4;
constexpr u32 NUM_GRAPHICS_UNIFORM_BUFFERS = 18;
constexpr u32 NUM_COMPUTE_UNIFORM_BUFFERS = 8;
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h
index 805a89900..c0e6471fe 100644
--- a/src/video_core/fence_manager.h
+++ b/src/video_core/fence_manager.h
@@ -86,7 +86,10 @@ public:
uncommitted_operations.emplace_back(std::move(func));
}
pending_operations.emplace_back(std::move(uncommitted_operations));
- QueueFence(new_fence);
+ {
+ std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
+ QueueFence(new_fence);
+ }
if (!delay_fence) {
func();
}
diff --git a/src/video_core/host1x/codecs/codec.cpp b/src/video_core/host1x/codecs/codec.cpp
index 8d7da50fc..dbcf508e5 100644
--- a/src/video_core/host1x/codecs/codec.cpp
+++ b/src/video_core/host1x/codecs/codec.cpp
@@ -137,16 +137,6 @@ bool Codec::CreateGpuAvDevice() {
break;
}
if ((config->methods & HW_CONFIG_METHOD) != 0 && config->device_type == type) {
-#if defined(__unix__)
- // Some linux decoding backends are reported to crash with this config method
- // TODO(ameerj): Properly support this method
- if ((config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_FRAMES_CTX) != 0) {
- // skip zero-copy decoders, we don't currently support them
- LOG_DEBUG(Service_NVDRV, "Skipping decoder {} with unsupported capability {}.",
- av_hwdevice_get_type_name(type), config->methods);
- continue;
- }
-#endif
LOG_INFO(Service_NVDRV, "Using {} GPU decoder", av_hwdevice_get_type_name(type));
av_codec_ctx->pix_fmt = config->pix_fmt;
return true;
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt
index 8bb429578..cd2549232 100644
--- a/src/video_core/host_shaders/CMakeLists.txt
+++ b/src/video_core/host_shaders/CMakeLists.txt
@@ -19,6 +19,7 @@ set(SHADER_FILES
block_linear_unswizzle_2d.comp
block_linear_unswizzle_3d.comp
convert_abgr8_to_d24s8.frag
+ convert_abgr8_to_d32f.frag
convert_d32f_to_abgr8.frag
convert_d24s8_to_abgr8.frag
convert_depth_to_float.frag
diff --git a/src/video_core/host_shaders/convert_abgr8_to_d32f.frag b/src/video_core/host_shaders/convert_abgr8_to_d32f.frag
new file mode 100644
index 000000000..095b910c2
--- /dev/null
+++ b/src/video_core/host_shaders/convert_abgr8_to_d32f.frag
@@ -0,0 +1,15 @@
+// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#version 450
+
+layout(binding = 0) uniform sampler2D color_texture;
+
+void main() {
+ ivec2 coord = ivec2(gl_FragCoord.xy);
+ vec4 color = texelFetch(color_texture, coord, 0).abgr;
+
+ float value = color.a * (color.r + color.g + color.b) / 3.0f;
+
+ gl_FragDepth = value;
+}
diff --git a/src/video_core/host_shaders/convert_d24s8_to_abgr8.frag b/src/video_core/host_shaders/convert_d24s8_to_abgr8.frag
index d33131d7c..b81a54056 100644
--- a/src/video_core/host_shaders/convert_d24s8_to_abgr8.frag
+++ b/src/video_core/host_shaders/convert_d24s8_to_abgr8.frag
@@ -3,16 +3,16 @@
#version 450
+precision mediump int;
+precision highp float;
+
layout(binding = 0) uniform sampler2D depth_tex;
-layout(binding = 1) uniform isampler2D stencil_tex;
+layout(binding = 1) uniform usampler2D stencil_tex;
layout(location = 0) out vec4 color;
void main() {
ivec2 coord = ivec2(gl_FragCoord.xy);
- uint depth = uint(textureLod(depth_tex, coord, 0).r * (exp2(24.0) - 1.0f));
- uint stencil = uint(textureLod(stencil_tex, coord, 0).r);
-
highp uint depth_val =
uint(textureLod(depth_tex, coord, 0).r * (exp2(32.0) - 1.0));
lowp uint stencil_val = textureLod(stencil_tex, coord, 0).r;
diff --git a/src/video_core/host_shaders/convert_d32f_to_abgr8.frag b/src/video_core/host_shaders/convert_d32f_to_abgr8.frag
index 04cfef8b5..4e5a9f955 100644
--- a/src/video_core/host_shaders/convert_d32f_to_abgr8.frag
+++ b/src/video_core/host_shaders/convert_d32f_to_abgr8.frag
@@ -9,6 +9,6 @@ layout(location = 0) out vec4 color;
void main() {
ivec2 coord = ivec2(gl_FragCoord.xy);
- float depth = textureLod(depth_tex, coord, 0).r;
+ float depth = texelFetch(depth_tex, coord, 0).r;
color = vec4(depth, depth, depth, 1.0);
}
diff --git a/src/video_core/host_shaders/convert_s8d24_to_abgr8.frag b/src/video_core/host_shaders/convert_s8d24_to_abgr8.frag
index 31db7d426..6a457981d 100644
--- a/src/video_core/host_shaders/convert_s8d24_to_abgr8.frag
+++ b/src/video_core/host_shaders/convert_s8d24_to_abgr8.frag
@@ -3,16 +3,16 @@
#version 450
+precision mediump int;
+precision highp float;
+
layout(binding = 0) uniform sampler2D depth_tex;
-layout(binding = 1) uniform isampler2D stencil_tex;
+layout(binding = 1) uniform usampler2D stencil_tex;
layout(location = 0) out vec4 color;
void main() {
ivec2 coord = ivec2(gl_FragCoord.xy);
- uint depth = uint(textureLod(depth_tex, coord, 0).r * (exp2(24.0) - 1.0f));
- uint stencil = uint(textureLod(stencil_tex, coord, 0).r);
-
highp uint depth_val =
uint(textureLod(depth_tex, coord, 0).r * (exp2(32.0) - 1.0));
lowp uint stencil_val = textureLod(stencil_tex, coord, 0).r;
diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp
index f01d2394e..c3db09424 100644
--- a/src/video_core/renderer_vulkan/blit_image.cpp
+++ b/src/video_core/renderer_vulkan/blit_image.cpp
@@ -8,6 +8,7 @@
#include "common/settings.h"
#include "video_core/host_shaders/blit_color_float_frag_spv.h"
#include "video_core/host_shaders/convert_abgr8_to_d24s8_frag_spv.h"
+#include "video_core/host_shaders/convert_abgr8_to_d32f_frag_spv.h"
#include "video_core/host_shaders/convert_d24s8_to_abgr8_frag_spv.h"
#include "video_core/host_shaders/convert_d32f_to_abgr8_frag_spv.h"
#include "video_core/host_shaders/convert_depth_to_float_frag_spv.h"
@@ -434,6 +435,7 @@ BlitImageHelper::BlitImageHelper(const Device& device_, Scheduler& scheduler_,
convert_depth_to_float_frag(BuildShader(device, CONVERT_DEPTH_TO_FLOAT_FRAG_SPV)),
convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)),
convert_abgr8_to_d24s8_frag(BuildShader(device, CONVERT_ABGR8_TO_D24S8_FRAG_SPV)),
+ convert_abgr8_to_d32f_frag(BuildShader(device, CONVERT_ABGR8_TO_D32F_FRAG_SPV)),
convert_d32f_to_abgr8_frag(BuildShader(device, CONVERT_D32F_TO_ABGR8_FRAG_SPV)),
convert_d24s8_to_abgr8_frag(BuildShader(device, CONVERT_D24S8_TO_ABGR8_FRAG_SPV)),
convert_s8d24_to_abgr8_frag(BuildShader(device, CONVERT_S8D24_TO_ABGR8_FRAG_SPV)),
@@ -559,6 +561,13 @@ void BlitImageHelper::ConvertABGR8ToD24S8(const Framebuffer* dst_framebuffer,
Convert(*convert_abgr8_to_d24s8_pipeline, dst_framebuffer, src_image_view);
}
+void BlitImageHelper::ConvertABGR8ToD32F(const Framebuffer* dst_framebuffer,
+ const ImageView& src_image_view) {
+ ConvertPipelineDepthTargetEx(convert_abgr8_to_d32f_pipeline, dst_framebuffer->RenderPass(),
+ convert_abgr8_to_d32f_frag);
+ Convert(*convert_abgr8_to_d32f_pipeline, dst_framebuffer, src_image_view);
+}
+
void BlitImageHelper::ConvertD32FToABGR8(const Framebuffer* dst_framebuffer,
ImageView& src_image_view) {
ConvertPipelineColorTargetEx(convert_d32f_to_abgr8_pipeline, dst_framebuffer->RenderPass(),
diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h
index a032c71fb..b2104a59e 100644
--- a/src/video_core/renderer_vulkan/blit_image.h
+++ b/src/video_core/renderer_vulkan/blit_image.h
@@ -67,6 +67,8 @@ public:
void ConvertABGR8ToD24S8(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
+ void ConvertABGR8ToD32F(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
+
void ConvertD32FToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view);
void ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view);
@@ -130,6 +132,7 @@ private:
vk::ShaderModule convert_depth_to_float_frag;
vk::ShaderModule convert_float_to_depth_frag;
vk::ShaderModule convert_abgr8_to_d24s8_frag;
+ vk::ShaderModule convert_abgr8_to_d32f_frag;
vk::ShaderModule convert_d32f_to_abgr8_frag;
vk::ShaderModule convert_d24s8_to_abgr8_frag;
vk::ShaderModule convert_s8d24_to_abgr8_frag;
@@ -149,6 +152,7 @@ private:
vk::Pipeline convert_d16_to_r16_pipeline;
vk::Pipeline convert_r16_to_d16_pipeline;
vk::Pipeline convert_abgr8_to_d24s8_pipeline;
+ vk::Pipeline convert_abgr8_to_d32f_pipeline;
vk::Pipeline convert_d32f_to_abgr8_pipeline;
vk::Pipeline convert_d24s8_to_abgr8_pipeline;
vk::Pipeline convert_s8d24_to_abgr8_pipeline;
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index c4c30d807..7e7a80740 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -132,12 +132,16 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
const bool use_accelerated =
rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride);
const bool is_srgb = use_accelerated && screen_info.is_srgb;
- RenderScreenshot(*framebuffer, use_accelerated);
- Frame* frame = present_manager.GetRenderFrame();
- blit_screen.DrawToSwapchain(frame, *framebuffer, use_accelerated, is_srgb);
- scheduler.Flush(*frame->render_ready);
- present_manager.Present(frame);
+ {
+ std::scoped_lock lock{rasterizer.LockCaches()};
+ RenderScreenshot(*framebuffer, use_accelerated);
+
+ Frame* frame = present_manager.GetRenderFrame();
+ blit_screen.DrawToSwapchain(frame, *framebuffer, use_accelerated, is_srgb);
+ scheduler.Flush(*frame->render_ready);
+ present_manager.Present(frame);
+ }
gpu.RendererFrameEndNotify();
rasterizer.TickFrame();
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index a1ec1a100..804b95989 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -356,7 +356,11 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device
.has_broken_fp16_float_controls = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY,
.ignore_nan_fp_comparisons = false,
.has_broken_spirv_subgroup_mask_vector_extract_dynamic =
- driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY};
+ driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY,
+ .has_broken_robust =
+ device.IsNvidia() && device.GetNvidiaArch() <= NvidiaArchitecture::Arch_Maxwell,
+ };
+
host_info = Shader::HostTranslateInfo{
.support_float64 = device.IsFloat64Supported(),
.support_float16 = device.IsFloat16Supported(),
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp
index 2edaafa7e..66c03bf17 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp
@@ -1436,6 +1436,7 @@ void QueryCacheRuntime::Barriers(bool is_prebarrier) {
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
};
+ impl->scheduler.RequestOutsideRenderPassOperationContext();
if (is_prebarrier) {
impl->scheduler.Record([](vk::CommandBuffer cmdbuf) {
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 83f2b6045..465eac37e 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -198,7 +198,7 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) {
if (!pipeline) {
return;
}
- std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
+ std::scoped_lock lock{LockCaches()};
// update engine as channel may be different.
pipeline->SetEngine(maxwell3d, gpu_memory);
pipeline->Configure(is_indexed);
@@ -708,6 +708,7 @@ void RasterizerVulkan::TiledCacheBarrier() {
}
void RasterizerVulkan::FlushCommands() {
+ std::scoped_lock lock{LockCaches()};
if (draw_counter == 0) {
return;
}
@@ -805,6 +806,7 @@ void RasterizerVulkan::FlushWork() {
if ((++draw_counter & 7) != 7) {
return;
}
+ std::scoped_lock lock{LockCaches()};
if (draw_counter < DRAWS_TO_DISPATCH) {
// Send recorded tasks to the worker thread
scheduler.DispatchWork();
@@ -975,6 +977,19 @@ void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs
if (!state_tracker.TouchScissors()) {
return;
}
+ if (!regs.viewport_scale_offset_enabled) {
+ const auto x = static_cast<float>(regs.surface_clip.x);
+ const auto y = static_cast<float>(regs.surface_clip.y);
+ const auto width = static_cast<float>(regs.surface_clip.width);
+ const auto height = static_cast<float>(regs.surface_clip.height);
+ VkRect2D scissor;
+ scissor.offset.x = static_cast<u32>(x);
+ scissor.offset.y = static_cast<u32>(y);
+ scissor.extent.width = static_cast<u32>(width != 0.0f ? width : 1.0f);
+ scissor.extent.height = static_cast<u32>(height != 0.0f ? height : 1.0f);
+ scheduler.Record([scissor](vk::CommandBuffer cmdbuf) { cmdbuf.SetScissor(0, scissor); });
+ return;
+ }
u32 up_scale = 1;
u32 down_shift = 0;
if (texture_cache.IsRescaling()) {
@@ -1486,7 +1501,7 @@ void RasterizerVulkan::UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs)
void RasterizerVulkan::InitializeChannel(Tegra::Control::ChannelState& channel) {
CreateChannel(channel);
{
- std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
+ std::scoped_lock lock{LockCaches()};
texture_cache.CreateChannel(channel);
buffer_cache.CreateChannel(channel);
}
@@ -1499,7 +1514,7 @@ void RasterizerVulkan::BindChannel(Tegra::Control::ChannelState& channel) {
const s32 channel_id = channel.bind_id;
BindToChannel(channel_id);
{
- std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
+ std::scoped_lock lock{LockCaches()};
texture_cache.BindToChannel(channel_id);
buffer_cache.BindToChannel(channel_id);
}
@@ -1512,7 +1527,7 @@ void RasterizerVulkan::BindChannel(Tegra::Control::ChannelState& channel) {
void RasterizerVulkan::ReleaseChannel(s32 channel_id) {
EraseChannel(channel_id);
{
- std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
+ std::scoped_lock lock{LockCaches()};
texture_cache.EraseChannel(channel_id);
buffer_cache.EraseChannel(channel_id);
}
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index ad069556c..ce3dfbaab 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -133,6 +133,10 @@ public:
void ReleaseChannel(s32 channel_id) override;
+ std::scoped_lock<std::recursive_mutex, std::recursive_mutex> LockCaches() {
+ return std::scoped_lock{buffer_cache.mutex, texture_cache.mutex};
+ }
+
private:
static constexpr size_t MAX_TEXTURES = 192;
static constexpr size_t MAX_IMAGES = 48;
diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp
index ae9f1de64..7746a88d3 100644
--- a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp
@@ -19,7 +19,7 @@ VkAttachmentDescription AttachmentDescription(const Device& device, PixelFormat
VkSampleCountFlagBits samples) {
using MaxwellToVK::SurfaceFormat;
return {
- .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT,
+ .flags = {},
.format = SurfaceFormat(device, FormatType::Optimal, true, format).format,
.samples = samples,
.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 93773a69f..de34f6d49 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -1194,6 +1194,11 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im
return blit_image_helper.ConvertD16ToR16(dst, src_view);
}
break;
+ case PixelFormat::A8B8G8R8_SRGB:
+ if (src_view.format == PixelFormat::D32_FLOAT) {
+ return blit_image_helper.ConvertD32FToABGR8(dst, src_view);
+ }
+ break;
case PixelFormat::A8B8G8R8_UNORM:
if (src_view.format == PixelFormat::S8_UINT_D24_UNORM) {
return blit_image_helper.ConvertD24S8ToABGR8(dst, src_view);
@@ -1205,6 +1210,16 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im
return blit_image_helper.ConvertD32FToABGR8(dst, src_view);
}
break;
+ case PixelFormat::B8G8R8A8_SRGB:
+ if (src_view.format == PixelFormat::D32_FLOAT) {
+ return blit_image_helper.ConvertD32FToABGR8(dst, src_view);
+ }
+ break;
+ case PixelFormat::B8G8R8A8_UNORM:
+ if (src_view.format == PixelFormat::D32_FLOAT) {
+ return blit_image_helper.ConvertD32FToABGR8(dst, src_view);
+ }
+ break;
case PixelFormat::R32_FLOAT:
if (src_view.format == PixelFormat::D32_FLOAT) {
return blit_image_helper.ConvertD32ToR32(dst, src_view);
@@ -1222,6 +1237,12 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im
}
break;
case PixelFormat::D32_FLOAT:
+ if (src_view.format == PixelFormat::A8B8G8R8_UNORM ||
+ src_view.format == PixelFormat::B8G8R8A8_UNORM ||
+ src_view.format == PixelFormat::A8B8G8R8_SRGB ||
+ src_view.format == PixelFormat::B8G8R8A8_SRGB) {
+ return blit_image_helper.ConvertABGR8ToD32F(dst, src_view);
+ }
if (src_view.format == PixelFormat::R32_FLOAT) {
return blit_image_helper.ConvertR32ToD32(dst, src_view);
}
@@ -2034,7 +2055,7 @@ void TextureCacheRuntime::TransitionImageLayout(Image& image) {
},
};
scheduler.RequestOutsideRenderPassOperationContext();
- scheduler.Record([barrier = barrier](vk::CommandBuffer cmdbuf) {
+ scheduler.Record([barrier](vk::CommandBuffer cmdbuf) {
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, barrier);
});
diff --git a/src/video_core/texture_cache/formatter.cpp b/src/video_core/texture_cache/formatter.cpp
index 6279d8e9e..2b7e0df72 100644
--- a/src/video_core/texture_cache/formatter.cpp
+++ b/src/video_core/texture_cache/formatter.cpp
@@ -10,19 +10,23 @@
#include "video_core/texture_cache/image_info.h"
#include "video_core/texture_cache/image_view_base.h"
#include "video_core/texture_cache/render_targets.h"
+#include "video_core/texture_cache/samples_helper.h"
namespace VideoCommon {
std::string Name(const ImageBase& image) {
const GPUVAddr gpu_addr = image.gpu_addr;
const ImageInfo& info = image.info;
- const u32 width = info.size.width;
- const u32 height = info.size.height;
+ u32 width = info.size.width;
+ u32 height = info.size.height;
const u32 depth = info.size.depth;
const u32 num_layers = image.info.resources.layers;
const u32 num_levels = image.info.resources.levels;
std::string resource;
if (image.info.num_samples > 1) {
+ const auto [samples_x, samples_y] = VideoCommon::SamplesLog2(image.info.num_samples);
+ width >>= samples_x;
+ height >>= samples_y;
resource += fmt::format(":{}xMSAA", image.info.num_samples);
}
if (num_layers > 1) {
diff --git a/src/video_core/texture_cache/samples_helper.h b/src/video_core/texture_cache/samples_helper.h
index 203ac1b11..2ee2f8312 100644
--- a/src/video_core/texture_cache/samples_helper.h
+++ b/src/video_core/texture_cache/samples_helper.h
@@ -24,7 +24,7 @@ namespace VideoCommon {
return {2, 2};
}
ASSERT_MSG(false, "Invalid number of samples={}", num_samples);
- return {1, 1};
+ return {0, 0};
}
[[nodiscard]] inline int NumSamples(Tegra::Texture::MsaaMode msaa_mode) {
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
index 8151cabf0..15596c925 100644
--- a/src/video_core/texture_cache/util.cpp
+++ b/src/video_core/texture_cache/util.cpp
@@ -167,6 +167,13 @@ template <u32 GOB_EXTENT>
}
[[nodiscard]] constexpr Extent3D TileShift(const LevelInfo& info, u32 level) {
+ if (level == 0 && info.num_levels == 1) {
+ return Extent3D{
+ .width = info.block.width,
+ .height = info.block.height,
+ .depth = info.block.depth,
+ };
+ }
const Extent3D blocks = NumLevelBlocks(info, level);
return Extent3D{
.width = AdjustTileSize(info.block.width, GOB_SIZE_X, blocks.width),
@@ -1293,9 +1300,9 @@ u32 MapSizeBytes(const ImageBase& image) {
static_assert(CalculateLevelSize(LevelInfo{{1920, 1080, 1}, {0, 2, 0}, {1, 1}, 2, 0, 1}, 0) ==
0x7f8000);
-static_assert(CalculateLevelSize(LevelInfo{{32, 32, 1}, {0, 0, 4}, {1, 1}, 4, 0, 1}, 0) == 0x4000);
+static_assert(CalculateLevelSize(LevelInfo{{32, 32, 1}, {0, 0, 4}, {1, 1}, 4, 0, 1}, 0) == 0x40000);
-static_assert(CalculateLevelSize(LevelInfo{{128, 8, 1}, {0, 4, 0}, {1, 1}, 4, 0, 1}, 0) == 0x4000);
+static_assert(CalculateLevelSize(LevelInfo{{128, 8, 1}, {0, 4, 0}, {1, 1}, 4, 0, 1}, 0) == 0x40000);
static_assert(CalculateLevelOffset(PixelFormat::R8_SINT, {1920, 1080, 1}, {0, 2, 0}, 0, 7) ==
0x2afc00);
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index 876cec2e8..e518756d2 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -83,15 +83,6 @@ constexpr std::array VK_FORMAT_A4B4G4R4_UNORM_PACK16{
} // namespace Alternatives
-enum class NvidiaArchitecture {
- KeplerOrOlder,
- Maxwell,
- Pascal,
- Volta,
- Turing,
- AmpereOrNewer,
-};
-
template <typename T>
void SetNext(void**& next, T& data) {
*next = &data;
@@ -326,9 +317,9 @@ NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical,
if (shading_rate_props.primitiveFragmentShadingRateWithMultipleViewports) {
// Only Ampere and newer support this feature
// TODO: Find a way to differentiate Ampere and Ada
- return NvidiaArchitecture::AmpereOrNewer;
+ return NvidiaArchitecture::Arch_AmpereOrNewer;
}
- return NvidiaArchitecture::Turing;
+ return NvidiaArchitecture::Arch_Turing;
}
if (exts.contains(VK_EXT_BLEND_OPERATION_ADVANCED_EXTENSION_NAME)) {
@@ -340,7 +331,7 @@ NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical,
physical_properties.pNext = &advanced_blending_props;
physical.GetProperties2(physical_properties);
if (advanced_blending_props.advancedBlendMaxColorAttachments == 1) {
- return NvidiaArchitecture::Maxwell;
+ return NvidiaArchitecture::Arch_Maxwell;
}
if (exts.contains(VK_EXT_CONSERVATIVE_RASTERIZATION_EXTENSION_NAME)) {
@@ -350,13 +341,13 @@ NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical,
physical_properties.pNext = &conservative_raster_props;
physical.GetProperties2(physical_properties);
if (conservative_raster_props.degenerateLinesRasterized) {
- return NvidiaArchitecture::Volta;
+ return NvidiaArchitecture::Arch_Volta;
}
- return NvidiaArchitecture::Pascal;
+ return NvidiaArchitecture::Arch_Pascal;
}
}
- return NvidiaArchitecture::KeplerOrOlder;
+ return NvidiaArchitecture::Arch_KeplerOrOlder;
}
std::vector<const char*> ExtensionListForVulkan(
@@ -436,6 +427,10 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
throw vk::Exception(VK_ERROR_INCOMPATIBLE_DRIVER);
}
+ if (is_nvidia) {
+ nvidia_arch = GetNvidiaArchitecture(physical, supported_extensions);
+ }
+
SetupFamilies(surface);
const auto queue_cis = GetDeviceQueueCreateInfos();
@@ -532,11 +527,11 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
if (is_nvidia) {
const u32 nv_major_version = (properties.properties.driverVersion >> 22) & 0x3ff;
- const auto arch = GetNvidiaArchitecture(physical, supported_extensions);
- if (arch >= NvidiaArchitecture::AmpereOrNewer) {
+ const auto arch = GetNvidiaArch();
+ if (arch >= NvidiaArchitecture::Arch_AmpereOrNewer) {
LOG_WARNING(Render_Vulkan, "Ampere and newer have broken float16 math");
features.shader_float16_int8.shaderFloat16 = false;
- } else if (arch <= NvidiaArchitecture::Volta) {
+ } else if (arch <= NvidiaArchitecture::Arch_Volta) {
if (nv_major_version < 527) {
LOG_WARNING(Render_Vulkan, "Volta and older have broken VK_KHR_push_descriptor");
RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME);
@@ -686,8 +681,8 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME);
}
} else if (extensions.push_descriptor && is_nvidia) {
- const auto arch = GetNvidiaArchitecture(physical, supported_extensions);
- if (arch <= NvidiaArchitecture::Pascal) {
+ const auto arch = GetNvidiaArch();
+ if (arch <= NvidiaArchitecture::Arch_Pascal) {
LOG_WARNING(Render_Vulkan,
"Pascal and older architectures have broken VK_KHR_push_descriptor");
RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME);
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h
index 282a2925d..b213ed7dd 100644
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -177,6 +177,15 @@ enum class FormatType { Linear, Optimal, Buffer };
/// Subgroup size of the guest emulated hardware (Nvidia has 32 threads per subgroup).
const u32 GuestWarpSize = 32;
+enum class NvidiaArchitecture {
+ Arch_KeplerOrOlder,
+ Arch_Maxwell,
+ Arch_Pascal,
+ Arch_Volta,
+ Arch_Turing,
+ Arch_AmpereOrNewer,
+};
+
/// Handles data specific to a physical device.
class Device {
public:
@@ -670,6 +679,14 @@ public:
return false;
}
+ bool IsNvidia() const noexcept {
+ return properties.driver.driverID == VK_DRIVER_ID_NVIDIA_PROPRIETARY;
+ }
+
+ NvidiaArchitecture GetNvidiaArch() const noexcept {
+ return nvidia_arch;
+ }
+
private:
/// Checks if the physical device is suitable and configures the object state
/// with all necessary info about its properties.
@@ -788,6 +805,7 @@ private:
bool supports_conditional_barriers{}; ///< Allows barriers in conditional control flow.
u64 device_access_memory{}; ///< Total size of device local memory in bytes.
u32 sets_per_pool{}; ///< Sets per Description Pool
+ NvidiaArchitecture nvidia_arch{NvidiaArchitecture::Arch_AmpereOrNewer};
// Telemetry parameters
std::set<std::string, std::less<>> supported_extensions; ///< Reported Vulkan extensions.
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp
index 2f3254a97..70cf14afa 100644
--- a/src/video_core/vulkan_common/vulkan_wrapper.cpp
+++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp
@@ -522,7 +522,7 @@ Instance Instance::Create(u32 version, Span<const char*> layers, Span<const char
.applicationVersion = VK_MAKE_VERSION(0, 1, 0),
.pEngineName = "yuzu Emulator",
.engineVersion = VK_MAKE_VERSION(0, 1, 0),
- .apiVersion = version,
+ .apiVersion = VK_API_VERSION_1_3,
};
const VkInstanceCreateInfo ci{
.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,