summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/video_core/command_classes/codecs/codec.cpp8
-rw-r--r--src/video_core/gpu.cpp11
-rw-r--r--src/video_core/gpu.h3
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp10
-rw-r--r--src/video_core/renderer_opengl/gl_device.h10
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp16
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h6
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp50
-rw-r--r--src/video_core/renderer_vulkan/blit_image.cpp168
-rw-r--r--src/video_core/renderer_vulkan/blit_image.h33
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.cpp37
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp11
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp11
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp80
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h13
-rw-r--r--src/video_core/shader_environment.cpp54
-rw-r--r--src/video_core/shader_environment.h6
-rw-r--r--src/video_core/texture_cache/texture_cache.h18
-rw-r--r--src/video_core/texture_cache/util.cpp10
-rw-r--r--src/video_core/vulkan_common/vulkan_device.cpp40
-rw-r--r--src/video_core/vulkan_common/vulkan_device.h18
22 files changed, 371 insertions, 244 deletions
diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp
index 2a532b883..04d0f3a2f 100644
--- a/src/video_core/command_classes/codecs/codec.cpp
+++ b/src/video_core/command_classes/codecs/codec.cpp
@@ -32,7 +32,7 @@ constexpr std::array PREFERRED_GPU_DECODERS = {
#ifdef _WIN32
AV_HWDEVICE_TYPE_D3D11VA,
AV_HWDEVICE_TYPE_DXVA2,
-#elif defined(__linux__)
+#elif defined(__unix__)
AV_HWDEVICE_TYPE_VAAPI,
AV_HWDEVICE_TYPE_VDPAU,
#endif
@@ -130,6 +130,12 @@ bool Codec::CreateGpuAvDevice() {
}
if (config->methods & HW_CONFIG_METHOD && config->device_type == type) {
av_codec_ctx->pix_fmt = config->pix_fmt;
+ if (config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_FRAMES_CTX) {
+ // skip zero-copy decoders, we don't currently support them
+ LOG_DEBUG(Service_NVDRV, "Skipping decoder {} with unsupported capability {}.",
+ av_hwdevice_get_type_name(type), config->methods);
+ continue;
+ }
LOG_INFO(Service_NVDRV, "Using {} GPU decoder", av_hwdevice_get_type_name(type));
return true;
}
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 8788f5148..705765c99 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -17,7 +17,6 @@
#include "core/frontend/emu_window.h"
#include "core/hardware_interrupt_manager.h"
#include "core/hle/service/nvdrv/nvdata.h"
-#include "core/hle/service/nvflinger/buffer_queue.h"
#include "core/perf_stats.h"
#include "video_core/cdma_pusher.h"
#include "video_core/dma_pusher.h"
@@ -312,6 +311,12 @@ struct GPU::Impl {
cpu_context->MakeCurrent();
}
+ void NotifyShutdown() {
+ std::unique_lock lk{sync_mutex};
+ shutting_down.store(true, std::memory_order::relaxed);
+ sync_cv.notify_all();
+ }
+
/// Obtain the CPU Context
void ObtainContext() {
cpu_context->MakeCurrent();
@@ -859,6 +864,10 @@ void GPU::Start() {
impl->Start();
}
+void GPU::NotifyShutdown() {
+ impl->NotifyShutdown();
+}
+
void GPU::ObtainContext() {
impl->ObtainContext();
}
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 500411176..3188b83ed 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -232,6 +232,9 @@ public:
/// core timing events.
void Start();
+ /// Performs any additional necessary steps to shutdown GPU emulation.
+ void NotifyShutdown();
+
/// Obtain the CPU Context
void ObtainContext();
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 0764ea6e0..e62912a22 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -182,17 +182,13 @@ Device::Device() {
shader_backend = Settings::ShaderBackend::GLSL;
}
- if (shader_backend == Settings::ShaderBackend::GLSL && is_nvidia &&
- !Settings::values.renderer_debug) {
+ if (shader_backend == Settings::ShaderBackend::GLSL && is_nvidia) {
const std::string_view driver_version = version.substr(13);
const int version_major =
std::atoi(driver_version.substr(0, driver_version.find(".")).data());
-
if (version_major >= 495) {
- LOG_WARNING(Render_OpenGL, "NVIDIA drivers 495 and later causes significant problems "
- "with yuzu. Forcing GLASM as a mitigation.");
- shader_backend = Settings::ShaderBackend::GLASM;
- use_assembly_shaders = true;
+ has_cbuf_ftou_bug = true;
+ has_bool_ref_bug = true;
}
}
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index de9e41659..95c2e8d38 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -152,6 +152,14 @@ public:
return need_fastmath_off;
}
+ bool HasCbufFtouBug() const {
+ return has_cbuf_ftou_bug;
+ }
+
+ bool HasBoolRefBug() const {
+ return has_bool_ref_bug;
+ }
+
Settings::ShaderBackend GetShaderBackend() const {
return shader_backend;
}
@@ -200,6 +208,8 @@ private:
bool has_sparse_texture_2{};
bool warp_size_potentially_larger_than_guest{};
bool need_fastmath_off{};
+ bool has_cbuf_ftou_bug{};
+ bool has_bool_ref_bug{};
std::string vendor_name;
};
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 42ef67628..f71e01a34 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -42,6 +42,7 @@ namespace {
using Shader::Backend::GLASM::EmitGLASM;
using Shader::Backend::GLSL::EmitGLSL;
using Shader::Backend::SPIRV::EmitSPIRV;
+using Shader::Maxwell::ConvertLegacyToGeneric;
using Shader::Maxwell::MergeDualVertexPrograms;
using Shader::Maxwell::TranslateProgram;
using VideoCommon::ComputeEnvironment;
@@ -213,6 +214,8 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo
.has_broken_fp16_float_controls = false,
.has_gl_component_indexing_bug = device.HasComponentIndexingBug(),
.has_gl_precise_bug = device.HasPreciseBug(),
+ .has_gl_cbuf_ftou_bug = device.HasCbufFtouBug(),
+ .has_gl_bool_ref_bug = device.HasBoolRefBug(),
.ignore_nan_fp_comparisons = true,
.gl_max_compute_smem_size = device.GetMaxComputeSharedMemorySize(),
},
@@ -422,6 +425,11 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))};
Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0);
+
+ if (Settings::values.dump_shaders) {
+ env.Dump(key.unique_hashes[index]);
+ }
+
if (!uses_vertex_a || index != 1) {
// Normal path
programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info);
@@ -462,12 +470,14 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
MakeRuntimeInfo(key, program, previous_program, glasm_use_storage_buffers, use_glasm)};
switch (device.GetShaderBackend()) {
case Settings::ShaderBackend::GLSL:
+ ConvertLegacyToGeneric(program, runtime_info);
sources[stage_index] = EmitGLSL(profile, runtime_info, program, binding);
break;
case Settings::ShaderBackend::GLASM:
sources[stage_index] = EmitGLASM(profile, runtime_info, program, binding);
break;
case Settings::ShaderBackend::SPIRV:
+ ConvertLegacyToGeneric(program, runtime_info);
sources_spirv[stage_index] = EmitSPIRV(profile, runtime_info, program, binding);
break;
}
@@ -506,8 +516,12 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(
LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash());
Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()};
- auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
+ if (Settings::values.dump_shaders) {
+ env.Dump(key.Hash());
+ }
+
+ auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
const u32 num_storage_buffers{Shader::NumDescriptors(program.info.storage_buffers_descriptors)};
Shader::RuntimeInfo info;
info.glasm_use_storage_buffers = num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks();
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 14e6522f2..3c1f79a27 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -1047,7 +1047,7 @@ bool Image::ScaleDown(bool ignore) {
}
ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info,
- ImageId image_id_, Image& image)
+ ImageId image_id_, Image& image, const SlotVector<Image>&)
: VideoCommon::ImageViewBase{info, image.info, image_id_}, views{runtime.null_image_views} {
const Device& device = runtime.device;
if (True(image.flags & ImageFlagBits::Converted)) {
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 37d5e6a6b..7f425631f 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -36,6 +36,7 @@ using VideoCommon::ImageViewType;
using VideoCommon::NUM_RT;
using VideoCommon::Region2D;
using VideoCommon::RenderTargets;
+using VideoCommon::SlotVector;
struct ImageBufferMap {
~ImageBufferMap();
@@ -92,7 +93,7 @@ public:
void ReinterpretImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
- void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view, bool rescaled) {
+ void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) {
UNIMPLEMENTED();
}
@@ -234,7 +235,8 @@ class ImageView : public VideoCommon::ImageViewBase {
friend Image;
public:
- explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&);
+ explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&,
+ const SlotVector<Image>&);
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo&,
const VideoCommon::ImageViewInfo&, GPUVAddr);
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 28daacd82..f81c1b233 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -437,39 +437,29 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
glBindTextureUnit(0, fxaa_texture.handle);
}
-
- // Set projection matrix
const std::array ortho_matrix =
MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height));
- GLuint fragment_handle;
- const auto filter = Settings::values.scaling_filter.GetValue();
- switch (filter) {
- case Settings::ScalingFilter::NearestNeighbor:
- fragment_handle = present_bilinear_fragment.handle;
- break;
- case Settings::ScalingFilter::Bilinear:
- fragment_handle = present_bilinear_fragment.handle;
- break;
- case Settings::ScalingFilter::Bicubic:
- fragment_handle = present_bicubic_fragment.handle;
- break;
- case Settings::ScalingFilter::Gaussian:
- fragment_handle = present_gaussian_fragment.handle;
- break;
- case Settings::ScalingFilter::ScaleForce:
- fragment_handle = present_scaleforce_fragment.handle;
- break;
- case Settings::ScalingFilter::Fsr:
- LOG_WARNING(
- Render_OpenGL,
- "FidelityFX FSR Super Sampling is not supported in OpenGL, changing to ScaleForce");
- fragment_handle = present_scaleforce_fragment.handle;
- break;
- default:
- fragment_handle = present_bilinear_fragment.handle;
- break;
- }
+ const auto fragment_handle = [this]() {
+ switch (Settings::values.scaling_filter.GetValue()) {
+ case Settings::ScalingFilter::NearestNeighbor:
+ case Settings::ScalingFilter::Bilinear:
+ return present_bilinear_fragment.handle;
+ case Settings::ScalingFilter::Bicubic:
+ return present_bicubic_fragment.handle;
+ case Settings::ScalingFilter::Gaussian:
+ return present_gaussian_fragment.handle;
+ case Settings::ScalingFilter::ScaleForce:
+ return present_scaleforce_fragment.handle;
+ case Settings::ScalingFilter::Fsr:
+ LOG_WARNING(
+ Render_OpenGL,
+ "FidelityFX Super Resolution is not supported in OpenGL, changing to ScaleForce");
+ return present_scaleforce_fragment.handle;
+ default:
+ return present_bilinear_fragment.handle;
+ }
+ }();
program_manager.BindPresentPrograms(present_vertex.handle, fragment_handle);
glProgramUniformMatrix3x2fv(present_vertex.handle, ModelViewMatrixLocation, 1, GL_FALSE,
ortho_matrix.data());
diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp
index 9a38b6b34..2c3914459 100644
--- a/src/video_core/renderer_vulkan/blit_image.cpp
+++ b/src/video_core/renderer_vulkan/blit_image.cpp
@@ -4,6 +4,7 @@
#include <algorithm>
+#include "common/settings.h"
#include "video_core/host_shaders/convert_abgr8_to_d24s8_frag_spv.h"
#include "video_core/host_shaders/convert_d24s8_to_abgr8_frag_spv.h"
#include "video_core/host_shaders/convert_depth_to_float_frag_spv.h"
@@ -335,6 +336,17 @@ void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Regi
cmdbuf.SetScissor(0, scissor);
cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants);
}
+
+VkExtent2D GetConversionExtent(const ImageView& src_image_view) {
+ const auto& resolution = Settings::values.resolution_info;
+ const bool is_rescaled = src_image_view.IsRescaled();
+ u32 width = src_image_view.size.width;
+ u32 height = src_image_view.size.height;
+ return VkExtent2D{
+ .width = is_rescaled ? resolution.ScaleUp(width) : width,
+ .height = is_rescaled ? resolution.ScaleUp(height) : height,
+ };
+}
} // Anonymous namespace
BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_,
@@ -425,108 +437,52 @@ void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer,
}
void BlitImageHelper::ConvertD32ToR32(const Framebuffer* dst_framebuffer,
- const ImageView& src_image_view, u32 up_scale,
- u32 down_shift) {
+ const ImageView& src_image_view) {
ConvertDepthToColorPipeline(convert_d32_to_r32_pipeline, dst_framebuffer->RenderPass());
- Convert(*convert_d32_to_r32_pipeline, dst_framebuffer, src_image_view, up_scale, down_shift);
+ Convert(*convert_d32_to_r32_pipeline, dst_framebuffer, src_image_view);
}
void BlitImageHelper::ConvertR32ToD32(const Framebuffer* dst_framebuffer,
- const ImageView& src_image_view, u32 up_scale,
- u32 down_shift) {
+ const ImageView& src_image_view) {
ConvertColorToDepthPipeline(convert_r32_to_d32_pipeline, dst_framebuffer->RenderPass());
- Convert(*convert_r32_to_d32_pipeline, dst_framebuffer, src_image_view, up_scale, down_shift);
+ Convert(*convert_r32_to_d32_pipeline, dst_framebuffer, src_image_view);
}
void BlitImageHelper::ConvertD16ToR16(const Framebuffer* dst_framebuffer,
- const ImageView& src_image_view, u32 up_scale,
- u32 down_shift) {
+ const ImageView& src_image_view) {
ConvertDepthToColorPipeline(convert_d16_to_r16_pipeline, dst_framebuffer->RenderPass());
- Convert(*convert_d16_to_r16_pipeline, dst_framebuffer, src_image_view, up_scale, down_shift);
+ Convert(*convert_d16_to_r16_pipeline, dst_framebuffer, src_image_view);
}
void BlitImageHelper::ConvertR16ToD16(const Framebuffer* dst_framebuffer,
- const ImageView& src_image_view, u32 up_scale,
- u32 down_shift) {
+ const ImageView& src_image_view) {
ConvertColorToDepthPipeline(convert_r16_to_d16_pipeline, dst_framebuffer->RenderPass());
- Convert(*convert_r16_to_d16_pipeline, dst_framebuffer, src_image_view, up_scale, down_shift);
+ Convert(*convert_r16_to_d16_pipeline, dst_framebuffer, src_image_view);
}
void BlitImageHelper::ConvertABGR8ToD24S8(const Framebuffer* dst_framebuffer,
- ImageView& src_image_view, u32 up_scale, u32 down_shift) {
+ const ImageView& src_image_view) {
ConvertPipelineDepthTargetEx(convert_abgr8_to_d24s8_pipeline, dst_framebuffer->RenderPass(),
- convert_abgr8_to_d24s8_frag, true);
- ConvertColor(*convert_abgr8_to_d24s8_pipeline, dst_framebuffer, src_image_view, up_scale,
- down_shift);
+ convert_abgr8_to_d24s8_frag);
+ Convert(*convert_abgr8_to_d24s8_pipeline, dst_framebuffer, src_image_view);
}
void BlitImageHelper::ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer,
- ImageView& src_image_view, u32 up_scale, u32 down_shift) {
+ ImageView& src_image_view) {
ConvertPipelineColorTargetEx(convert_d24s8_to_abgr8_pipeline, dst_framebuffer->RenderPass(),
- convert_d24s8_to_abgr8_frag, false);
- ConvertDepthStencil(*convert_d24s8_to_abgr8_pipeline, dst_framebuffer, src_image_view, up_scale,
- down_shift);
+ convert_d24s8_to_abgr8_frag);
+ ConvertDepthStencil(*convert_d24s8_to_abgr8_pipeline, dst_framebuffer, src_image_view);
}
void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
- const ImageView& src_image_view, u32 up_scale, u32 down_shift) {
+ const ImageView& src_image_view) {
const VkPipelineLayout layout = *one_texture_pipeline_layout;
const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D);
const VkSampler sampler = *nearest_sampler;
- const VkExtent2D extent{
- .width = std::max((src_image_view.size.width * up_scale) >> down_shift, 1U),
- .height = std::max((src_image_view.size.height * up_scale) >> down_shift, 1U),
- };
- scheduler.RequestRenderpass(dst_framebuffer);
- scheduler.Record([pipeline, layout, sampler, src_view, extent, up_scale, down_shift,
- this](vk::CommandBuffer cmdbuf) {
- const VkOffset2D offset{
- .x = 0,
- .y = 0,
- };
- const VkViewport viewport{
- .x = 0.0f,
- .y = 0.0f,
- .width = static_cast<float>(extent.width),
- .height = static_cast<float>(extent.height),
- .minDepth = 0.0f,
- .maxDepth = 0.0f,
- };
- const VkRect2D scissor{
- .offset = offset,
- .extent = extent,
- };
- const PushConstants push_constants{
- .tex_scale = {viewport.width, viewport.height},
- .tex_offset = {0.0f, 0.0f},
- };
- const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit();
- UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view);
-
- // TODO: Barriers
- cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
- cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set,
- nullptr);
- cmdbuf.SetViewport(0, viewport);
- cmdbuf.SetScissor(0, scissor);
- cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants);
- cmdbuf.Draw(3, 1, 0, 0);
- });
- scheduler.InvalidateState();
-}
+ const VkExtent2D extent = GetConversionExtent(src_image_view);
-void BlitImageHelper::ConvertColor(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
- ImageView& src_image_view, u32 up_scale, u32 down_shift) {
- const VkPipelineLayout layout = *one_texture_pipeline_layout;
- const VkImageView src_view = src_image_view.ColorView();
- const VkSampler sampler = *nearest_sampler;
- const VkExtent2D extent{
- .width = std::max((src_image_view.size.width * up_scale) >> down_shift, 1U),
- .height = std::max((src_image_view.size.height * up_scale) >> down_shift, 1U),
- };
scheduler.RequestRenderpass(dst_framebuffer);
- scheduler.Record([pipeline, layout, sampler, src_view, extent, up_scale, down_shift,
- this](vk::CommandBuffer cmdbuf) {
+ scheduler.Record([pipeline, layout, sampler, src_view, extent, this](vk::CommandBuffer cmdbuf) {
const VkOffset2D offset{
.x = 0,
.y = 0,
@@ -563,18 +519,16 @@ void BlitImageHelper::ConvertColor(VkPipeline pipeline, const Framebuffer* dst_f
}
void BlitImageHelper::ConvertDepthStencil(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
- ImageView& src_image_view, u32 up_scale, u32 down_shift) {
+ ImageView& src_image_view) {
const VkPipelineLayout layout = *two_textures_pipeline_layout;
const VkImageView src_depth_view = src_image_view.DepthView();
const VkImageView src_stencil_view = src_image_view.StencilView();
const VkSampler sampler = *nearest_sampler;
- const VkExtent2D extent{
- .width = std::max((src_image_view.size.width * up_scale) >> down_shift, 1U),
- .height = std::max((src_image_view.size.height * up_scale) >> down_shift, 1U),
- };
+ const VkExtent2D extent = GetConversionExtent(src_image_view);
+
scheduler.RequestRenderpass(dst_framebuffer);
- scheduler.Record([pipeline, layout, sampler, src_depth_view, src_stencil_view, extent, up_scale,
- down_shift, this](vk::CommandBuffer cmdbuf) {
+ scheduler.Record([pipeline, layout, sampler, src_depth_view, src_stencil_view, extent,
+ this](vk::CommandBuffer cmdbuf) {
const VkOffset2D offset{
.x = 0,
.y = 0,
@@ -695,11 +649,14 @@ VkPipeline BlitImageHelper::FindOrEmplaceDepthStencilPipeline(const BlitImagePip
return *blit_depth_stencil_pipelines.back();
}
-void BlitImageHelper::ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass) {
+void BlitImageHelper::ConvertPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass,
+ bool is_target_depth) {
if (pipeline) {
return;
}
- const std::array stages = MakeStages(*full_screen_vert, *convert_depth_to_float_frag);
+ VkShaderModule frag_shader =
+ is_target_depth ? *convert_float_to_depth_frag : *convert_depth_to_float_frag;
+ const std::array stages = MakeStages(*full_screen_vert, frag_shader);
pipeline = device.GetLogical().CreateGraphicsPipeline({
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
.pNext = nullptr,
@@ -712,8 +669,9 @@ void BlitImageHelper::ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRend
.pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO,
.pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
.pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
- .pDepthStencilState = nullptr,
- .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO,
+ .pDepthStencilState = is_target_depth ? &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO : nullptr,
+ .pColorBlendState = is_target_depth ? &PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO
+ : &PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO,
.pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO,
.layout = *one_texture_pipeline_layout,
.renderPass = renderpass,
@@ -723,37 +681,17 @@ void BlitImageHelper::ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRend
});
}
+void BlitImageHelper::ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass) {
+ ConvertPipeline(pipeline, renderpass, false);
+}
+
void BlitImageHelper::ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass) {
- if (pipeline) {
- return;
- }
- const std::array stages = MakeStages(*full_screen_vert, *convert_float_to_depth_frag);
- pipeline = device.GetLogical().CreateGraphicsPipeline({
- .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
- .pNext = nullptr,
- .flags = 0,
- .stageCount = static_cast<u32>(stages.size()),
- .pStages = stages.data(),
- .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
- .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
- .pTessellationState = nullptr,
- .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO,
- .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
- .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
- .pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
- .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO,
- .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO,
- .layout = *one_texture_pipeline_layout,
- .renderPass = renderpass,
- .subpass = 0,
- .basePipelineHandle = VK_NULL_HANDLE,
- .basePipelineIndex = 0,
- });
+ ConvertPipeline(pipeline, renderpass, true);
}
void BlitImageHelper::ConvertPipelineEx(vk::Pipeline& pipeline, VkRenderPass renderpass,
- vk::ShaderModule& module, bool is_target_depth,
- bool single_texture) {
+ vk::ShaderModule& module, bool single_texture,
+ bool is_target_depth) {
if (pipeline) {
return;
}
@@ -782,13 +720,13 @@ void BlitImageHelper::ConvertPipelineEx(vk::Pipeline& pipeline, VkRenderPass ren
}
void BlitImageHelper::ConvertPipelineColorTargetEx(vk::Pipeline& pipeline, VkRenderPass renderpass,
- vk::ShaderModule& module, bool single_texture) {
- ConvertPipelineEx(pipeline, renderpass, module, false, single_texture);
+ vk::ShaderModule& module) {
+ ConvertPipelineEx(pipeline, renderpass, module, false, false);
}
void BlitImageHelper::ConvertPipelineDepthTargetEx(vk::Pipeline& pipeline, VkRenderPass renderpass,
- vk::ShaderModule& module, bool single_texture) {
- ConvertPipelineEx(pipeline, renderpass, module, true, single_texture);
+ vk::ShaderModule& module) {
+ ConvertPipelineEx(pipeline, renderpass, module, true, true);
}
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h
index b1a717090..85e7dca5b 100644
--- a/src/video_core/renderer_vulkan/blit_image.h
+++ b/src/video_core/renderer_vulkan/blit_image.h
@@ -44,50 +44,43 @@ public:
const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter,
Tegra::Engines::Fermi2D::Operation operation);
- void ConvertD32ToR32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
- u32 up_scale, u32 down_shift);
+ void ConvertD32ToR32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
- void ConvertR32ToD32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
- u32 up_scale, u32 down_shift);
+ void ConvertR32ToD32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
- void ConvertD16ToR16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
- u32 up_scale, u32 down_shift);
+ void ConvertD16ToR16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
- void ConvertR16ToD16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
- u32 up_scale, u32 down_shift);
+ void ConvertR16ToD16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
- void ConvertABGR8ToD24S8(const Framebuffer* dst_framebuffer, ImageView& src_image_view,
- u32 up_scale, u32 down_shift);
+ void ConvertABGR8ToD24S8(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
- void ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view,
- u32 up_scale, u32 down_shift);
+ void ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view);
private:
void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
- const ImageView& src_image_view, u32 up_scale, u32 down_shift);
-
- void ConvertColor(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
- ImageView& src_image_view, u32 up_scale, u32 down_shift);
+ const ImageView& src_image_view);
void ConvertDepthStencil(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
- ImageView& src_image_view, u32 up_scale, u32 down_shift);
+ ImageView& src_image_view);
[[nodiscard]] VkPipeline FindOrEmplaceColorPipeline(const BlitImagePipelineKey& key);
[[nodiscard]] VkPipeline FindOrEmplaceDepthStencilPipeline(const BlitImagePipelineKey& key);
+ void ConvertPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass, bool is_target_depth);
+
void ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass);
void ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass);
void ConvertPipelineEx(vk::Pipeline& pipeline, VkRenderPass renderpass,
- vk::ShaderModule& module, bool is_target_depth, bool single_texture);
+ vk::ShaderModule& module, bool single_texture, bool is_target_depth);
void ConvertPipelineColorTargetEx(vk::Pipeline& pipeline, VkRenderPass renderpass,
- vk::ShaderModule& module, bool single_texture);
+ vk::ShaderModule& module);
void ConvertPipelineDepthTargetEx(vk::Pipeline& pipeline, VkRenderPass renderpass,
- vk::ShaderModule& module, bool single_texture);
+ vk::ShaderModule& module);
const Device& device;
VKScheduler& scheduler;
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
index 1e447e621..c71a1f44d 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -391,28 +391,23 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
.offset = {0, 0},
.extent = size,
};
- const auto filter = Settings::values.scaling_filter.GetValue();
cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE);
- switch (filter) {
- case Settings::ScalingFilter::NearestNeighbor:
- cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *bilinear_pipeline);
- break;
- case Settings::ScalingFilter::Bilinear:
- cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *bilinear_pipeline);
- break;
- case Settings::ScalingFilter::Bicubic:
- cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *bicubic_pipeline);
- break;
- case Settings::ScalingFilter::Gaussian:
- cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *gaussian_pipeline);
- break;
- case Settings::ScalingFilter::ScaleForce:
- cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *scaleforce_pipeline);
- break;
- default:
- cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *bilinear_pipeline);
- break;
- }
+ auto graphics_pipeline = [this]() {
+ switch (Settings::values.scaling_filter.GetValue()) {
+ case Settings::ScalingFilter::NearestNeighbor:
+ case Settings::ScalingFilter::Bilinear:
+ return *bilinear_pipeline;
+ case Settings::ScalingFilter::Bicubic:
+ return *bicubic_pipeline;
+ case Settings::ScalingFilter::Gaussian:
+ return *gaussian_pipeline;
+ case Settings::ScalingFilter::ScaleForce:
+ return *scaleforce_pipeline;
+ default:
+ return *bilinear_pipeline;
+ }
+ }();
+ cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, graphics_pipeline);
cmdbuf.SetViewport(0, viewport);
cmdbuf.SetScissor(0, scissor);
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index 616a7b457..d514b71d0 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -605,7 +605,11 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
.flags = 0,
.topology = input_assembly_topology,
.primitiveRestartEnable = key.state.primitive_restart_enable != 0 &&
- SupportsPrimitiveRestart(input_assembly_topology),
+ ((input_assembly_topology != VK_PRIMITIVE_TOPOLOGY_PATCH_LIST &&
+ device.IsTopologyListPrimitiveRestartSupported()) ||
+ SupportsPrimitiveRestart(input_assembly_topology) ||
+ (input_assembly_topology == VK_PRIMITIVE_TOPOLOGY_PATCH_LIST &&
+ device.IsPatchListPrimitiveRestartSupported())),
};
const VkPipelineTessellationStateCreateInfo tessellation_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO,
@@ -613,7 +617,6 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
.flags = 0,
.patchControlPoints = key.state.patch_control_points_minus_one.Value() + 1,
};
-
std::array<VkViewportSwizzleNV, Maxwell::NumViewports> swizzles;
std::ranges::transform(key.state.viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle);
const VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{
@@ -748,8 +751,8 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
- .logicOpEnable = VK_FALSE,
- .logicOp = VK_LOGIC_OP_COPY,
+ .logicOpEnable = key.state.logic_op_enable != 0,
+ .logicOp = static_cast<VkLogicOp>(key.state.logic_op.Value()),
.attachmentCount = static_cast<u32>(cb_attachments.size()),
.pAttachments = cb_attachments.data(),
.blendConstants = {},
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index eb8b4e08b..a633b73e5 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -48,6 +48,7 @@ MICROPROFILE_DECLARE(Vulkan_PipelineCache);
namespace {
using Shader::Backend::SPIRV::EmitSPIRV;
+using Shader::Maxwell::ConvertLegacyToGeneric;
using Shader::Maxwell::MergeDualVertexPrograms;
using Shader::Maxwell::TranslateProgram;
using VideoCommon::ComputeEnvironment;
@@ -516,6 +517,9 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))};
Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0);
+ if (Settings::values.dump_shaders) {
+ env.Dump(key.unique_hashes[index]);
+ }
if (!uses_vertex_a || index != 1) {
// Normal path
programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info);
@@ -543,6 +547,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
infos[stage_index] = &program.info;
const auto runtime_info{MakeRuntimeInfo(programs, key, program, previous_stage)};
+ ConvertLegacyToGeneric(program, runtime_info);
const std::vector<u32> code{EmitSPIRV(profile, runtime_info, program, binding)};
device.SaveShader(code);
modules[stage_index] = BuildShader(device, code);
@@ -611,6 +616,12 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash());
Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()};
+
+ // Dump it before error.
+ if (Settings::values.dump_shaders) {
+ env.Dump(key.Hash());
+ }
+
auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
const std::vector<u32> code{EmitSPIRV(profile, program)};
device.SaveShader(code);
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 197cba8e3..0ba56ff1e 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -1057,37 +1057,37 @@ void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst
});
}
-void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view,
- bool rescaled) {
- const u32 up_scale = rescaled ? resolution.up_scale : 1;
- const u32 down_shift = rescaled ? resolution.down_shift : 0;
+void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) {
switch (dst_view.format) {
case PixelFormat::R16_UNORM:
if (src_view.format == PixelFormat::D16_UNORM) {
- return blit_image_helper.ConvertD16ToR16(dst, src_view, up_scale, down_shift);
+ return blit_image_helper.ConvertD16ToR16(dst, src_view);
}
break;
case PixelFormat::A8B8G8R8_UNORM:
if (src_view.format == PixelFormat::S8_UINT_D24_UNORM) {
- return blit_image_helper.ConvertD24S8ToABGR8(dst, src_view, up_scale, down_shift);
+ return blit_image_helper.ConvertD24S8ToABGR8(dst, src_view);
}
break;
case PixelFormat::R32_FLOAT:
if (src_view.format == PixelFormat::D32_FLOAT) {
- return blit_image_helper.ConvertD32ToR32(dst, src_view, up_scale, down_shift);
+ return blit_image_helper.ConvertD32ToR32(dst, src_view);
}
break;
case PixelFormat::D16_UNORM:
if (src_view.format == PixelFormat::R16_UNORM) {
- return blit_image_helper.ConvertR16ToD16(dst, src_view, up_scale, down_shift);
+ return blit_image_helper.ConvertR16ToD16(dst, src_view);
}
break;
case PixelFormat::S8_UINT_D24_UNORM:
- return blit_image_helper.ConvertABGR8ToD24S8(dst, src_view, up_scale, down_shift);
+ if (src_view.format == PixelFormat::A8B8G8R8_UNORM ||
+ src_view.format == PixelFormat::B8G8R8A8_UNORM) {
+ return blit_image_helper.ConvertABGR8ToD24S8(dst, src_view);
+ }
break;
case PixelFormat::D32_FLOAT:
if (src_view.format == PixelFormat::R32_FLOAT) {
- return blit_image_helper.ConvertR32ToD32(dst, src_view, up_scale, down_shift);
+ return blit_image_helper.ConvertR32ToD32(dst, src_view);
}
break;
default:
@@ -1329,6 +1329,10 @@ void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferIm
}
}
+bool Image::IsRescaled() const noexcept {
+ return True(flags & ImageFlagBits::Rescaled);
+}
+
bool Image::ScaleUp(bool ignore) {
if (True(flags & ImageFlagBits::Rescaled)) {
return false;
@@ -1340,7 +1344,6 @@ bool Image::ScaleUp(bool ignore) {
return false;
}
has_scaled = true;
- const auto& device = runtime->device;
if (!scaled_image) {
const bool is_2d = info.type == ImageType::e2D;
const u32 scaled_width = resolution.ScaleUp(info.size.width);
@@ -1348,7 +1351,7 @@ bool Image::ScaleUp(bool ignore) {
auto scaled_info = info;
scaled_info.size.width = scaled_width;
scaled_info.size.height = scaled_height;
- scaled_image = MakeImage(device, scaled_info);
+ scaled_image = MakeImage(runtime->device, scaled_info);
auto& allocator = runtime->memory_allocator;
scaled_commit = MemoryCommit(allocator.Commit(scaled_image, MemoryUsage::DeviceLocal));
ignore = false;
@@ -1357,18 +1360,13 @@ bool Image::ScaleUp(bool ignore) {
if (ignore) {
return true;
}
-
if (aspect_mask == 0) {
aspect_mask = ImageAspectMask(info.format);
}
- static constexpr auto OPTIMAL_FORMAT = FormatType::Optimal;
- const PixelFormat format = StorageFormat(info.format);
- const auto vk_format = MaxwellToVK::SurfaceFormat(device, OPTIMAL_FORMAT, false, format).format;
- const auto blit_usage = VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT;
- if (device.IsFormatSupported(vk_format, blit_usage, OPTIMAL_FORMAT)) {
- BlitScale(*scheduler, *original_image, *scaled_image, info, aspect_mask, resolution);
- } else {
+ if (NeedsScaleHelper()) {
return BlitScaleHelper(true);
+ } else {
+ BlitScale(*scheduler, *original_image, *scaled_image, info, aspect_mask, resolution);
}
return true;
}
@@ -1390,15 +1388,10 @@ bool Image::ScaleDown(bool ignore) {
if (aspect_mask == 0) {
aspect_mask = ImageAspectMask(info.format);
}
- static constexpr auto OPTIMAL_FORMAT = FormatType::Optimal;
- const PixelFormat format = StorageFormat(info.format);
- const auto& device = runtime->device;
- const auto vk_format = MaxwellToVK::SurfaceFormat(device, OPTIMAL_FORMAT, false, format).format;
- const auto blit_usage = VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT;
- if (device.IsFormatSupported(vk_format, blit_usage, OPTIMAL_FORMAT)) {
- BlitScale(*scheduler, *scaled_image, *original_image, info, aspect_mask, resolution, false);
- } else {
+ if (NeedsScaleHelper()) {
return BlitScaleHelper(false);
+ } else {
+ BlitScale(*scheduler, *scaled_image, *original_image, info, aspect_mask, resolution, false);
}
return true;
}
@@ -1466,10 +1459,24 @@ bool Image::BlitScaleHelper(bool scale_up) {
return true;
}
+bool Image::NeedsScaleHelper() const {
+ const auto& device = runtime->device;
+ const bool needs_msaa_helper = info.num_samples > 1 && device.CantBlitMSAA();
+ if (needs_msaa_helper) {
+ return true;
+ }
+ static constexpr auto OPTIMAL_FORMAT = FormatType::Optimal;
+ const PixelFormat format = StorageFormat(info.format);
+ const auto vk_format = MaxwellToVK::SurfaceFormat(device, OPTIMAL_FORMAT, false, format).format;
+ const auto blit_usage = VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT;
+ const bool needs_blit_helper = !device.IsFormatSupported(vk_format, blit_usage, OPTIMAL_FORMAT);
+ return needs_blit_helper;
+}
+
ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info,
ImageId image_id_, Image& image)
: VideoCommon::ImageViewBase{info, image.info, image_id_}, device{&runtime.device},
- image_handle{image.Handle()}, samples{ConvertSampleCount(image.info.num_samples)} {
+ image_handle{image.Handle()}, samples(ConvertSampleCount(image.info.num_samples)) {
using Shader::TextureType;
const VkImageAspectFlags aspect_mask = ImageViewAspectMask(info);
@@ -1552,6 +1559,12 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
}
}
+ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info,
+ ImageId image_id_, Image& image, const SlotVector<Image>& slot_imgs)
+ : ImageView{runtime, info, image_id_, image} {
+ slot_images = &slot_imgs;
+}
+
ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
const VideoCommon::ImageViewInfo& view_info, GPUVAddr gpu_addr_)
: VideoCommon::ImageViewBase{info, view_info}, gpu_addr{gpu_addr_},
@@ -1607,6 +1620,15 @@ VkImageView ImageView::StorageView(Shader::TextureType texture_type,
return *view;
}
+bool ImageView::IsRescaled() const noexcept {
+ if (!slot_images) {
+ return false;
+ }
+ const auto& slots = *slot_images;
+ const auto& src_image = slots[image_id];
+ return src_image.IsRescaled();
+}
+
vk::ImageView ImageView::MakeView(VkFormat vk_format, VkImageAspectFlags aspect_mask) {
return device->GetLogical().CreateImageView({
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index 753e3e8a1..c81130dd2 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -23,6 +23,7 @@ using VideoCommon::ImageId;
using VideoCommon::NUM_RT;
using VideoCommon::Region2D;
using VideoCommon::RenderTargets;
+using VideoCommon::SlotVector;
using VideoCore::Surface::PixelFormat;
class ASTCDecoderPass;
@@ -65,7 +66,7 @@ public:
void ReinterpretImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
- void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view, bool rescaled);
+ void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view);
bool CanAccelerateImageUpload(Image&) const noexcept {
return false;
@@ -139,6 +140,8 @@ public:
return std::exchange(initialized, true);
}
+ bool IsRescaled() const noexcept;
+
bool ScaleUp(bool ignore = false);
bool ScaleDown(bool ignore = false);
@@ -146,6 +149,8 @@ public:
private:
bool BlitScaleHelper(bool scale_up);
+ bool NeedsScaleHelper() const;
+
VKScheduler* scheduler{};
TextureCacheRuntime* runtime{};
@@ -168,6 +173,8 @@ private:
class ImageView : public VideoCommon::ImageViewBase {
public:
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&);
+ explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&,
+ const SlotVector<Image>&);
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo&,
const VideoCommon::ImageViewInfo&, GPUVAddr);
explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageViewParams&);
@@ -189,6 +196,8 @@ public:
[[nodiscard]] VkImageView StorageView(Shader::TextureType texture_type,
Shader::ImageFormat image_format);
+ [[nodiscard]] bool IsRescaled() const noexcept;
+
[[nodiscard]] VkImageView Handle(Shader::TextureType texture_type) const noexcept {
return *image_views[static_cast<size_t>(texture_type)];
}
@@ -222,6 +231,8 @@ private:
[[nodiscard]] vk::ImageView MakeView(VkFormat vk_format, VkImageAspectFlags aspect_mask);
const Device* device = nullptr;
+ const SlotVector<Image>* slot_images = nullptr;
+
std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> image_views;
std::unique_ptr<StorageViews> storage_views;
vk::ImageView depth_view;
diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp
index 05850afd0..7d3ae0de4 100644
--- a/src/video_core/shader_environment.cpp
+++ b/src/video_core/shader_environment.cpp
@@ -3,6 +3,7 @@
// Refer to the license.txt file included.
#include <algorithm>
+#include <bit>
#include <filesystem>
#include <fstream>
#include <memory>
@@ -14,6 +15,7 @@
#include "common/common_types.h"
#include "common/div_ceil.h"
#include "common/fs/fs.h"
+#include "common/fs/path_util.h"
#include "common/logging/log.h"
#include "shader_recompiler/environment.h"
#include "video_core/engines/kepler_compute.h"
@@ -57,6 +59,47 @@ static Shader::TextureType ConvertType(const Tegra::Texture::TICEntry& entry) {
}
}
+static std::string_view StageToPrefix(Shader::Stage stage) {
+ switch (stage) {
+ case Shader::Stage::VertexB:
+ return "VB";
+ case Shader::Stage::TessellationControl:
+ return "TC";
+ case Shader::Stage::TessellationEval:
+ return "TE";
+ case Shader::Stage::Geometry:
+ return "GS";
+ case Shader::Stage::Fragment:
+ return "FS";
+ case Shader::Stage::Compute:
+ return "CS";
+ case Shader::Stage::VertexA:
+ return "VA";
+ default:
+ return "UK";
+ }
+}
+
+static void DumpImpl(u64 hash, const u64* code, u32 read_highest, u32 read_lowest,
+ u32 initial_offset, Shader::Stage stage) {
+ const auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::DumpDir)};
+ const auto base_dir{shader_dir / "shaders"};
+ if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir)) {
+ LOG_ERROR(Common_Filesystem, "Failed to create shader dump directories");
+ return;
+ }
+ const auto prefix = StageToPrefix(stage);
+ const auto name{base_dir / fmt::format("{}{:016x}.ash", prefix, hash)};
+ const size_t real_size = read_highest - read_lowest + initial_offset;
+ const size_t padding_needed = ((32 - (real_size % 32)) % 32);
+ std::fstream shader_file(name, std::ios::out | std::ios::binary);
+ const size_t jump_index = initial_offset / sizeof(u64);
+ shader_file.write(reinterpret_cast<const char*>(code + jump_index), real_size);
+ for (size_t i = 0; i < padding_needed; i++) {
+ shader_file.put(0);
+ }
+}
+
GenericEnvironment::GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_,
u32 start_address_)
: gpu_memory{&gpu_memory_}, program_base{program_base_} {
@@ -128,6 +171,10 @@ u64 GenericEnvironment::CalculateHash() const {
return Common::CityHash64(data.get(), size);
}
+void GenericEnvironment::Dump(u64 hash) {
+ DumpImpl(hash, code.data(), read_highest, read_lowest, initial_offset, stage);
+}
+
void GenericEnvironment::Serialize(std::ofstream& file) const {
const u64 code_size{static_cast<u64>(CachedSize())};
const u64 num_texture_types{static_cast<u64>(texture_types.size())};
@@ -207,6 +254,7 @@ GraphicsEnvironment::GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_,
u32 start_address_)
: GenericEnvironment{gpu_memory_, program_base_, start_address_}, maxwell3d{&maxwell3d_} {
gpu_memory->ReadBlock(program_base + start_address, &sph, sizeof(sph));
+ initial_offset = sizeof(sph);
gp_passthrough_mask = maxwell3d->regs.gp_passthrough_mask;
switch (program) {
case Maxwell::ShaderProgram::VertexA:
@@ -323,14 +371,20 @@ void FileEnvironment::Deserialize(std::ifstream& file) {
if (stage == Shader::Stage::Compute) {
file.read(reinterpret_cast<char*>(&workgroup_size), sizeof(workgroup_size))
.read(reinterpret_cast<char*>(&shared_memory_size), sizeof(shared_memory_size));
+ initial_offset = 0;
} else {
file.read(reinterpret_cast<char*>(&sph), sizeof(sph));
+ initial_offset = sizeof(sph);
if (stage == Shader::Stage::Geometry) {
file.read(reinterpret_cast<char*>(&gp_passthrough_mask), sizeof(gp_passthrough_mask));
}
}
}
+void FileEnvironment::Dump(u64 [[maybe_unused]] hash) {
+ DumpImpl(hash, code.get(), read_highest, read_lowest, initial_offset, stage);
+}
+
u64 FileEnvironment::ReadInstruction(u32 address) {
if (address < read_lowest || address > read_highest) {
throw Shader::LogicError("Out of bounds address {}", address);
diff --git a/src/video_core/shader_environment.h b/src/video_core/shader_environment.h
index 6640e53d0..aae762b27 100644
--- a/src/video_core/shader_environment.h
+++ b/src/video_core/shader_environment.h
@@ -57,6 +57,8 @@ public:
[[nodiscard]] u64 CalculateHash() const;
+ void Dump(u64 hash) override;
+
void Serialize(std::ofstream& file) const;
protected:
@@ -82,6 +84,7 @@ protected:
u32 cached_lowest = std::numeric_limits<u32>::max();
u32 cached_highest = 0;
+ u32 initial_offset = 0;
bool has_unbound_instructions = false;
};
@@ -149,6 +152,8 @@ public:
[[nodiscard]] std::array<u32, 3> WorkgroupSize() const override;
+ void Dump(u64 hash) override;
+
private:
std::unique_ptr<u64[]> code;
std::unordered_map<u32, Shader::TextureType> texture_types;
@@ -159,6 +164,7 @@ private:
u32 texture_bound{};
u32 read_lowest{};
u32 read_highest{};
+ u32 initial_offset{};
};
void SerializePipeline(std::span<const char> key, std::span<const GenericEnvironment* const> envs,
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 5aaeb16ca..b494152b8 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -1397,7 +1397,8 @@ ImageViewId TextureCache<P>::FindOrEmplaceImageView(ImageId image_id, const Imag
if (const ImageViewId image_view_id = image.FindView(info); image_view_id) {
return image_view_id;
}
- const ImageViewId image_view_id = slot_image_views.insert(runtime, info, image_id, image);
+ const ImageViewId image_view_id =
+ slot_image_views.insert(runtime, info, image_id, image, slot_images);
image.InsertView(info, image_view_id);
return image_view_id;
}
@@ -1855,9 +1856,20 @@ void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::vector<Imag
.height = std::min(dst_view.size.height, src_view.size.height),
.depth = std::min(dst_view.size.depth, src_view.size.depth),
};
- UNIMPLEMENTED_IF(copy.extent != expected_size);
+ const Extent3D scaled_extent = [is_rescaled, expected_size]() {
+ if (!is_rescaled) {
+ return expected_size;
+ }
+ const auto& resolution = Settings::values.resolution_info;
+ return Extent3D{
+ .width = resolution.ScaleUp(expected_size.width),
+ .height = resolution.ScaleUp(expected_size.height),
+ .depth = expected_size.depth,
+ };
+ }();
+ UNIMPLEMENTED_IF(copy.extent != scaled_extent);
- runtime.ConvertImage(dst_framebuffer, dst_view, src_view, is_rescaled);
+ runtime.ConvertImage(dst_framebuffer, dst_view, src_view);
}
}
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
index 7bd31b211..d8e19cb2f 100644
--- a/src/video_core/texture_cache/util.cpp
+++ b/src/video_core/texture_cache/util.cpp
@@ -364,14 +364,14 @@ template <u32 GOB_EXTENT>
[[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapRightAddress2D(
const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size) {
- const u32 layer_stride = new_info.layer_stride;
- const s32 new_size = layer_stride * new_info.resources.layers;
- const s32 diff = static_cast<s32>(overlap.gpu_addr - gpu_addr);
+ const u64 layer_stride = new_info.layer_stride;
+ const u64 new_size = layer_stride * new_info.resources.layers;
+ const u64 diff = overlap.gpu_addr - gpu_addr;
if (diff > new_size) {
return std::nullopt;
}
- const s32 base_layer = diff / layer_stride;
- const s32 mip_offset = diff % layer_stride;
+ const s32 base_layer = static_cast<s32>(diff / layer_stride);
+ const s32 mip_offset = static_cast<s32>(diff % layer_stride);
const std::array offsets = CalculateMipLevelOffsets(new_info);
const auto end = offsets.begin() + new_info.resources.levels;
const auto it = std::find(offsets.begin(), end, static_cast<u32>(mip_offset));
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index 7bf5b6578..3d78efddc 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -271,7 +271,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
.tessellationShader = true,
.sampleRateShading = true,
.dualSrcBlend = true,
- .logicOp = false,
+ .logicOp = true,
.multiDrawIndirect = false,
.drawIndirectFirstInstance = false,
.depthClamp = true,
@@ -433,6 +433,19 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
LOG_INFO(Render_Vulkan, "Device doesn't support uint8 indexes");
}
+ VkPhysicalDevicePrimitiveTopologyListRestartFeaturesEXT primitive_topology_list_restart;
+ if (is_topology_list_restart_supported || is_patch_list_restart_supported) {
+ primitive_topology_list_restart = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIMITIVE_TOPOLOGY_LIST_RESTART_FEATURES_EXT,
+ .pNext = nullptr,
+ .primitiveTopologyListRestart = is_topology_list_restart_supported,
+ .primitiveTopologyPatchListRestart = is_patch_list_restart_supported,
+ };
+ SetNext(next, primitive_topology_list_restart);
+ } else {
+ LOG_INFO(Render_Vulkan, "Device doesn't support list topology primitive restart");
+ }
+
VkPhysicalDeviceTransformFeedbackFeaturesEXT transform_feedback;
if (ext_transform_feedback) {
transform_feedback = {
@@ -625,15 +638,20 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
}
}
- if (ext_vertex_input_dynamic_state && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) {
+ const bool is_intel_windows = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS;
+ if (ext_vertex_input_dynamic_state && is_intel_windows) {
LOG_WARNING(Render_Vulkan, "Blacklisting Intel for VK_EXT_vertex_input_dynamic_state");
ext_vertex_input_dynamic_state = false;
}
- if (is_float16_supported && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) {
+ if (is_float16_supported && is_intel_windows) {
// Intel's compiler crashes when using fp16 on Astral Chain, disable it for the time being.
LOG_WARNING(Render_Vulkan, "Blacklisting Intel proprietary from float16 math");
is_float16_supported = false;
}
+ if (is_intel_windows) {
+ LOG_WARNING(Render_Vulkan, "Intel proprietary drivers do not support MSAA image blits");
+ cant_blit_msaa = true;
+ }
supports_d24_depth =
IsFormatSupported(VK_FORMAT_D24_UNORM_S8_UINT,
@@ -891,6 +909,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
bool has_ext_provoking_vertex{};
bool has_ext_vertex_input_dynamic_state{};
bool has_ext_line_rasterization{};
+ bool has_ext_primitive_topology_list_restart{};
for (const std::string& extension : supported_extensions) {
const auto test = [&](std::optional<std::reference_wrapper<bool>> status, const char* name,
bool push) {
@@ -915,6 +934,8 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false);
test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true);
test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true);
+ test(has_ext_primitive_topology_list_restart,
+ VK_EXT_PRIMITIVE_TOPOLOGY_LIST_RESTART_EXTENSION_NAME, true);
test(ext_sampler_filter_minmax, VK_EXT_SAMPLER_FILTER_MINMAX_EXTENSION_NAME, true);
test(ext_shader_viewport_index_layer, VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME,
true);
@@ -1113,6 +1134,19 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
khr_pipeline_executable_properties = true;
}
}
+ if (has_ext_primitive_topology_list_restart) {
+ VkPhysicalDevicePrimitiveTopologyListRestartFeaturesEXT primitive_topology_list_restart{};
+ primitive_topology_list_restart.sType =
+ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIMITIVE_TOPOLOGY_LIST_RESTART_FEATURES_EXT;
+ primitive_topology_list_restart.pNext = nullptr;
+ features.pNext = &primitive_topology_list_restart;
+ physical.GetFeatures2KHR(features);
+
+ is_topology_list_restart_supported =
+ primitive_topology_list_restart.primitiveTopologyListRestart;
+ is_patch_list_restart_supported =
+ primitive_topology_list_restart.primitiveTopologyPatchListRestart;
+ }
if (has_khr_image_format_list && has_khr_swapchain_mutable_format) {
extensions.push_back(VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME);
extensions.push_back(VK_KHR_SWAPCHAIN_MUTABLE_FORMAT_EXTENSION_NAME);
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h
index 10653ac6b..37d140ebd 100644
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -238,6 +238,16 @@ public:
return khr_workgroup_memory_explicit_layout;
}
+ /// Returns true if the device supports VK_EXT_primitive_topology_list_restart.
+ bool IsTopologyListPrimitiveRestartSupported() const {
+ return is_topology_list_restart_supported;
+ }
+
+ /// Returns true if the device supports VK_EXT_primitive_topology_list_restart.
+ bool IsPatchListPrimitiveRestartSupported() const {
+ return is_patch_list_restart_supported;
+ }
+
/// Returns true if the device supports VK_EXT_index_type_uint8.
bool IsExtIndexTypeUint8Supported() const {
return ext_index_type_uint8;
@@ -340,6 +350,10 @@ public:
return supports_d24_depth;
}
+ bool CantBlitMSAA() const {
+ return cant_blit_msaa;
+ }
+
private:
/// Checks if the physical device is suitable.
void CheckSuitability(bool requires_swapchain) const;
@@ -401,6 +415,9 @@ private:
bool is_shader_int16_supported{}; ///< Support for int16.
bool is_shader_storage_image_multisample{}; ///< Support for image operations on MSAA images.
bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil.
+ bool is_topology_list_restart_supported{}; ///< Support for primitive restart with list
+ ///< topologies.
+ bool is_patch_list_restart_supported{}; ///< Support for primitive restart with list patch.
bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle.
bool nv_viewport_array2{}; ///< Support for VK_NV_viewport_array2.
bool nv_geometry_shader_passthrough{}; ///< Support for VK_NV_geometry_shader_passthrough.
@@ -430,6 +447,7 @@ private:
bool has_renderdoc{}; ///< Has RenderDoc attached
bool has_nsight_graphics{}; ///< Has Nsight Graphics attached
bool supports_d24_depth{}; ///< Supports D24 depth buffers.
+ bool cant_blit_msaa{}; ///< Does not support MSAA<->MSAA blitting.
// Telemetry parameters
std::string vendor_name; ///< Device's driver name.