From 22f4b290b6f0894d29302102f539dd8753961f04 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 18 Jul 2021 18:40:14 +0200 Subject: VideoCore: Initial Setup for the Resolution Scaler. --- src/common/settings.cpp | 1 + src/common/settings.h | 18 ++++ src/video_core/dirty_flags.h | 1 + .../renderer_opengl/gl_texture_cache.cpp | 16 +++ src/video_core/renderer_opengl/gl_texture_cache.h | 8 ++ .../renderer_vulkan/vk_texture_cache.cpp | 81 ++++++++++++--- src/video_core/renderer_vulkan/vk_texture_cache.h | 19 ++++ src/video_core/texture_cache/image_base.h | 4 + src/video_core/texture_cache/image_info.h | 2 +- src/video_core/texture_cache/texture_cache.h | 113 ++++++++++++++++++++- src/video_core/texture_cache/texture_cache_base.h | 10 ++ 11 files changed, 255 insertions(+), 18 deletions(-) (limited to 'src') diff --git a/src/common/settings.cpp b/src/common/settings.cpp index 9dd5e3efb..8c6be2c84 100644 --- a/src/common/settings.cpp +++ b/src/common/settings.cpp @@ -47,6 +47,7 @@ void LogSettings() { log_setting("System_TimeZoneIndex", values.time_zone_index.GetValue()); log_setting("Core_UseMultiCore", values.use_multi_core.GetValue()); log_setting("CPU_Accuracy", values.cpu_accuracy.GetValue()); + log_setting("Renderer_UseResolutionScaling", values.resolution_setup.GetValue()); log_setting("Renderer_UseResolutionFactor", values.resolution_factor.GetValue()); log_setting("Renderer_UseSpeedLimit", values.use_speed_limit.GetValue()); log_setting("Renderer_SpeedLimit", values.speed_limit.GetValue()); diff --git a/src/common/settings.h b/src/common/settings.h index 9ff4cf85d..08f3da055 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -52,6 +52,22 @@ enum class NvdecEmulation : u32 { GPU = 2, }; +enum class ResolutionSetup : u32 { + Res1_2X = 0, + Res3_4X = 1, + Res1X = 2, + Res3_2K = 3, + Res2X = 4, + Res3X = 5, +}; + +struct ResolutionScalingInfo { + u32 up_scale{2}; + u32 down_shift{0}; + f32 up_factor{2.0f}; + f32 down_factor{0.5f}; +}; + /** The BasicSetting class is a simple resource manager. It defines a label and default value * alongside the actual value of the setting for simpler and less-error prone use with frontend * configurations. Setting a default value and label is required, though subclasses may deviate from @@ -451,6 +467,8 @@ struct Values { "disable_shader_loop_safety_checks"}; Setting vulkan_device{0, "vulkan_device"}; + ResolutionScalingInfo resolution_info{}; + Setting resolution_setup{ResolutionSetup::Res1X, "resolution_setup"}; Setting resolution_factor{1, "resolution_factor"}; // *nix platforms may have issues with the borderless windowed fullscreen mode. // Default to exclusive fullscreen on these platforms for now. diff --git a/src/video_core/dirty_flags.h b/src/video_core/dirty_flags.h index f0d545f90..f11ff5d94 100644 --- a/src/video_core/dirty_flags.h +++ b/src/video_core/dirty_flags.h @@ -29,6 +29,7 @@ enum : u8 { ColorBuffer6, ColorBuffer7, ZetaBuffer, + Rescale, VertexBuffers, VertexBuffer0, diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 8c3ca3d82..1e594838f 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -849,6 +849,22 @@ void Image::CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t b } } +void Image::ScaleUp() { + if (True(flags & ImageFlagBits::Rescaled)) { + return; + } + flags |= ImageFlagBits::Rescaled; + UNIMPLEMENTED(); +} + +void Image::ScaleDown() { + if (False(flags & ImageFlagBits::Rescaled)) { + return; + } + flags &= ~ImageFlagBits::Rescaled; + UNIMPLEMENTED(); +} + ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info, ImageId image_id_, Image& image) : VideoCommon::ImageViewBase{info, image.info, image_id_}, views{runtime.null_image_views} { diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 1ca2c90be..58b36494b 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -72,6 +72,8 @@ public: StateTracker& state_tracker); ~TextureCacheRuntime(); + void Init() {} + void Finish(); ImageBufferMap UploadStagingBuffer(size_t size); @@ -110,6 +112,8 @@ public: bool HasNativeASTC() const noexcept; + void TickFrame() {} + private: struct StagingBuffers { explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_); @@ -185,6 +189,10 @@ public: return gl_type; } + bool ScaleUp(); + + bool ScaleDown(); + private: void CopyBufferToImage(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset); diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 06c5fb867..be5b1d84d 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -32,6 +32,7 @@ using Tegra::Engines::Fermi2D; using Tegra::Texture::SwizzleSource; using Tegra::Texture::TextureMipmapFilter; using VideoCommon::BufferImageCopy; +using VideoCommon::ImageFlagBits; using VideoCommon::ImageInfo; using VideoCommon::ImageType; using VideoCommon::SubresourceRange; @@ -123,7 +124,8 @@ constexpr VkBorderColor ConvertBorderColor(const std::array& color) { } } -[[nodiscard]] VkImageCreateInfo MakeImageCreateInfo(const Device& device, const ImageInfo& info) { +[[nodiscard]] VkImageCreateInfo MakeImageCreateInfo(const Device& device, const ImageInfo& info, + u32 up, u32 down) { const PixelFormat format = StorageFormat(info.format); const auto format_info = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, false, format); VkImageCreateFlags flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT; @@ -142,9 +144,9 @@ constexpr VkBorderColor ConvertBorderColor(const std::array& color) { .imageType = ConvertImageType(info.type), .format = format_info.format, .extent{ - .width = info.size.width >> samples_x, - .height = info.size.height >> samples_y, - .depth = info.size.depth, + .width = ((info.size.width << up) >> down) >> samples_x, + .height = ((info.size.height << up) >> down) >> samples_y, + .depth = (info.size.depth << up) >> down, }, .mipLevels = static_cast(info.resources.levels), .arrayLayers = static_cast(info.resources.layers), @@ -158,11 +160,12 @@ constexpr VkBorderColor ConvertBorderColor(const std::array& color) { }; } -[[nodiscard]] vk::Image MakeImage(const Device& device, const ImageInfo& info) { +[[nodiscard]] vk::Image MakeImage(const Device& device, const ImageInfo& info, u32 up = 0, + u32 down = 0) { if (info.type == ImageType::Buffer) { return vk::Image{}; } - return device.GetLogical().CreateImage(MakeImageCreateInfo(device, info)); + return device.GetLogical().CreateImage(MakeImageCreateInfo(device, info, up, down)); } [[nodiscard]] VkImageAspectFlags ImageAspectMask(PixelFormat format) { @@ -590,6 +593,11 @@ struct RangedBarrierRange { } } // Anonymous namespace +void TextureCacheRuntime::Init() { + resolution = Settings::values.resolution_info; + is_rescaling_on = resolution.up_scale != 1 || resolution.down_shift != 0; +} + void TextureCacheRuntime::Finish() { scheduler.Finish(); } @@ -840,20 +848,26 @@ u64 TextureCacheRuntime::GetDeviceLocalMemory() const { return device.GetDeviceLocalMemory(); } -Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_addr_, +void TextureCacheRuntime::TickFrame() { + prescaled_images.Tick(); + prescaled_commits.Tick(); + prescaled_views.Tick(); +} + +Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_) - : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime.scheduler}, - image(MakeImage(runtime.device, info)), - commit(runtime.memory_allocator.Commit(image, MemoryUsage::DeviceLocal)), - aspect_mask(ImageAspectMask(info.format)) { - if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) { + : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime_.scheduler}, + image(MakeImage(runtime_.device, info)), + commit(runtime_.memory_allocator.Commit(image, MemoryUsage::DeviceLocal)), + aspect_mask(ImageAspectMask(info.format)), runtime{&runtime_} { + if (IsPixelFormatASTC(info.format) && !runtime->device.IsOptimalAstcSupported()) { if (Settings::values.accelerate_astc.GetValue()) { flags |= VideoCommon::ImageFlagBits::AcceleratedUpload; } else { flags |= VideoCommon::ImageFlagBits::Converted; } } - if (runtime.device.HasDebuggingToolAttached()) { + if (runtime->device.HasDebuggingToolAttached()) { image.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); } static constexpr VkImageViewUsageCreateInfo storage_image_view_usage_create_info{ @@ -861,8 +875,8 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_ .pNext = nullptr, .usage = VK_IMAGE_USAGE_STORAGE_BIT, }; - if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) { - const auto& device = runtime.device.GetLogical(); + if (IsPixelFormatASTC(info.format) && !runtime->device.IsOptimalAstcSupported()) { + const auto& device = runtime->device.GetLogical(); storage_image_views.reserve(info.resources.levels); for (s32 level = 0; level < info.resources.levels; ++level) { storage_image_views.push_back(device.CreateImageView(VkImageViewCreateInfo{ @@ -907,6 +921,10 @@ void Image::UploadMemory(const StagingBufferRef& map, std::span copies) { + const bool is_rescaled = True(flags & ImageFlagBits::Rescaled); + if (is_rescaled) { + ScaleDown(); + } std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask); scheduler->RequestOutsideRenderPassOperationContext(); scheduler->Record([buffer = map.buffer, image = *image, aspect_mask = aspect_mask, @@ -959,6 +977,39 @@ void Image::DownloadMemory(const StagingBufferRef& map, std::spanis_rescaling_on) { + flags |= ImageFlagBits::Rescaled; + return; + } + flags |= ImageFlagBits::Rescaled; + scaling_count++; + ASSERT(scaling_count < 10); + return; +} + +void Image::ScaleDown() { + if (False(flags & ImageFlagBits::Rescaled)) { + return; + } + ASSERT(info.type != ImageType::Linear); + if (!runtime->is_rescaling_on) { + flags &= ~ImageFlagBits::Rescaled; + return; + } + flags &= ~ImageFlagBits::Rescaled; + scaling_count++; + ASSERT(scaling_count < 10); + return; } ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info, diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index b09c468e4..f7e782c44 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -6,7 +6,9 @@ #include +#include "common/settings.h" #include "shader_recompiler/shader_info.h" +#include "video_core/delayed_destruction_ring.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/texture_cache/image_view_base.h" #include "video_core/texture_cache/texture_cache_base.h" @@ -15,6 +17,7 @@ namespace Vulkan { +using VideoCommon::DelayedDestructionRing; using VideoCommon::ImageId; using VideoCommon::NUM_RT; using VideoCommon::Region2D; @@ -39,6 +42,14 @@ struct TextureCacheRuntime { BlitImageHelper& blit_image_helper; ASTCDecoderPass& astc_decoder_pass; RenderPassCache& render_pass_cache; + static constexpr size_t TICKS_TO_DESTROY = 6; + DelayedDestructionRing prescaled_images; + DelayedDestructionRing prescaled_commits; + DelayedDestructionRing prescaled_views; + Settings::ResolutionScalingInfo resolution; + bool is_rescaling_on{}; + + void Init(); void Finish(); @@ -74,6 +85,8 @@ struct TextureCacheRuntime { return true; } + void TickFrame(); + u64 GetDeviceLocalMemory() const; }; @@ -113,6 +126,10 @@ public: return std::exchange(initialized, true); } + void ScaleUp(); + + void ScaleDown(); + private: VKScheduler* scheduler; vk::Image image; @@ -121,6 +138,8 @@ private: std::vector storage_image_views; VkImageAspectFlags aspect_mask = 0; bool initialized = false; + TextureCacheRuntime* runtime; + u32 scaling_count{}; }; class ImageView : public VideoCommon::ImageViewBase { diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index 0c17a791b..1cd30fd37 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h @@ -33,6 +33,10 @@ enum class ImageFlagBits : u32 { ///< garbage collection priority Alias = 1 << 11, ///< This image has aliases and has priority on garbage ///< collection + + // Rescaler + Rescaled = 1 << 12, + RescaleChecked = 1 << 13, }; DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) diff --git a/src/video_core/texture_cache/image_info.h b/src/video_core/texture_cache/image_info.h index 5049fc36e..16d4cee37 100644 --- a/src/video_core/texture_cache/image_info.h +++ b/src/video_core/texture_cache/image_info.h @@ -15,7 +15,7 @@ using Tegra::Texture::TICEntry; using VideoCore::Surface::PixelFormat; struct ImageInfo { - explicit ImageInfo() = default; + ImageInfo() = default; explicit ImageInfo(const TICEntry& config) noexcept; explicit ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index) noexcept; explicit ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index f70c1f764..560da4f16 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -35,6 +35,7 @@ TextureCache

::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& Tegra::MemoryManager& gpu_memory_) : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_} { + runtime.Init(); // Configure null sampler TSCEntry sampler_descriptor{}; sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear); @@ -103,6 +104,7 @@ void TextureCache

::TickFrame() { sentenced_images.Tick(); sentenced_framebuffers.Tick(); sentenced_image_view.Tick(); + runtime.TickFrame(); ++frame_tick; } @@ -208,18 +210,63 @@ void TextureCache

::UpdateRenderTargets(bool is_clear) { const bool force = flags[Dirty::RenderTargetControl]; flags[Dirty::RenderTargetControl] = false; + bool can_rescale = true; + std::array tmp_color_images{}; + ImageId tmp_depth_image{}; + const auto check_rescale = [&](ImageViewId view_id, ImageId& id_save) { + if (view_id) { + const auto& view = slot_image_views[view_id]; + const auto image_id = view.image_id; + id_save = image_id; + auto& image = slot_images[image_id]; + can_rescale &= ImageCanRescale(image); + } else { + id_save = CORRUPT_ID; + } + }; for (size_t index = 0; index < NUM_RT; ++index) { ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; if (flags[Dirty::ColorBuffer0 + index] || force) { flags[Dirty::ColorBuffer0 + index] = false; BindRenderTarget(&color_buffer_id, FindColorBuffer(index, is_clear)); } - PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id)); + check_rescale(color_buffer_id, tmp_color_images[index]); } if (flags[Dirty::ZetaBuffer] || force) { flags[Dirty::ZetaBuffer] = false; BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear)); } + check_rescale(render_targets.depth_buffer_id, tmp_depth_image); + + if (can_rescale) { + const auto scale_up = [this](ImageId image_id) { + if (image_id != CORRUPT_ID) { + Image& image = slot_images[image_id]; + image.ScaleUp(); + } + }; + for (size_t index = 0; index < NUM_RT; ++index) { + scale_up(tmp_color_images[index]); + } + scale_up(tmp_depth_image); + } else { + const auto scale_down = [this](ImageId image_id) { + if (image_id != CORRUPT_ID) { + Image& image = slot_images[image_id]; + image.ScaleDown(); + } + }; + for (size_t index = 0; index < NUM_RT; ++index) { + scale_down(tmp_color_images[index]); + } + scale_down(tmp_depth_image); + } + // Rescale End + + for (size_t index = 0; index < NUM_RT; ++index) { + ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; + PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id)); + } const ImageViewId depth_buffer_id = render_targets.depth_buffer_id; PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id)); @@ -623,6 +670,31 @@ ImageId TextureCache

::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, return image_id; } +template +bool TextureCache

::ImageCanRescale(Image& image) { + if (True(image.flags & ImageFlagBits::Rescaled) || + True(image.flags & ImageFlagBits::RescaleChecked)) { + return true; + } + const auto& info = image.info; + const bool can_this_rescale = + (info.type == ImageType::e1D || info.type == ImageType::e2D) && info.block.depth == 0; + if (!can_this_rescale) { + image.flags &= ~ImageFlagBits::RescaleChecked; + return false; + } + image.flags |= ImageFlagBits::RescaleChecked; + for (const auto& alias : image.aliased_images) { + Image& other_image = slot_images[alias.id]; + if (!ImageCanRescale(other_image)) { + image.flags &= ~ImageFlagBits::RescaleChecked; + return false; + } + } + image.flags &= ~ImageFlagBits::RescaleChecked; + return true; +} + template ImageId TextureCache

::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, RelaxedOptions options) { @@ -660,12 +732,18 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA std::vector right_aliased_ids; std::unordered_set ignore_textures; std::vector bad_overlap_ids; + std::vector all_siblings; + const bool this_is_linear = info.type == ImageType::Linear; const auto region_check = [&](ImageId overlap_id, ImageBase& overlap) { if (True(overlap.flags & ImageFlagBits::Remapped)) { ignore_textures.insert(overlap_id); return; } - if (info.type == ImageType::Linear) { + const bool overlap_is_linear = overlap.info.type == ImageType::Linear; + if (this_is_linear != overlap_is_linear) { + return; + } + if (this_is_linear && overlap_is_linear) { if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) { // Alias linear images with the same pitch left_aliased_ids.push_back(overlap_id); @@ -681,6 +759,7 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA cpu_addr = solution->cpu_addr; new_info.resources = solution->resources; overlap_ids.push_back(overlap_id); + all_siblings.push_back(overlap_id); return; } static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format; @@ -688,10 +767,12 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) { left_aliased_ids.push_back(overlap_id); overlap.flags |= ImageFlagBits::Alias; + all_siblings.push_back(overlap_id); } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options, broken_views, native_bgr)) { right_aliased_ids.push_back(overlap_id); overlap.flags |= ImageFlagBits::Alias; + all_siblings.push_back(overlap_id); } else { bad_overlap_ids.push_back(overlap_id); overlap.flags |= ImageFlagBits::BadOverlap; @@ -709,8 +790,36 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA } }; ForEachSparseImageInRegion(gpu_addr, size_bytes, region_check_gpu); + + bool can_rescale = + (info.type == ImageType::e1D || info.type == ImageType::e2D) && info.block.depth == 0; + for (const ImageId sibling_id : all_siblings) { + if (!can_rescale) { + break; + } + Image& sibling = slot_images[sibling_id]; + can_rescale &= ImageCanRescale(sibling); + } + + if (can_rescale) { + for (const ImageId sibling_id : all_siblings) { + Image& sibling = slot_images[sibling_id]; + sibling.ScaleUp(); + } + } else { + for (const ImageId sibling_id : all_siblings) { + Image& sibling = slot_images[sibling_id]; + sibling.ScaleDown(); + } + } + const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); Image& new_image = slot_images[new_image_id]; + if (can_rescale) { + new_image.ScaleUp(); + } else { + new_image.ScaleDown(); + } if (!gpu_memory.IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) { new_image.flags |= ImageFlagBits::Sparse; diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 2d1893c1c..a4a2c0832 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -142,6 +142,14 @@ public: const Tegra::Engines::Fermi2D::Surface& src, const Tegra::Engines::Fermi2D::Config& copy); + /// Invalidate the contents of the color buffer index + /// These contents become unspecified, the cache can assume aggressive optimizations. + void InvalidateColorBuffer(size_t index); + + /// Invalidate the contents of the depth buffer + /// These contents become unspecified, the cache can assume aggressive optimizations. + void InvalidateDepthBuffer(); + /// Try to find a cached image view in the given CPU address [[nodiscard]] ImageView* TryFindFramebufferImageView(VAddr cpu_addr); @@ -318,6 +326,8 @@ private: /// Returns true if the current clear parameters clear the whole image of a given image view [[nodiscard]] bool IsFullClear(ImageViewId id); + bool ImageCanRescale(Image& image); + Runtime& runtime; VideoCore::RasterizerInterface& rasterizer; Tegra::Engines::Maxwell3D& maxwell3d; -- cgit v1.2.3 From 37ef9c913028e234509bcf70bad049b0210e4592 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 18 Jul 2021 20:33:20 +0200 Subject: Settings: Add resolution scaling to settings. --- src/common/settings.cpp | 51 +++++++++++++++++++++ src/common/settings.h | 13 ++++-- src/yuzu/configuration/config.cpp | 5 +++ src/yuzu/configuration/config.h | 1 + src/yuzu/configuration/configure_graphics.cpp | 26 ++++++++++- src/yuzu/configuration/configure_graphics.ui | 64 +++++++++++++++++++++++++++ 6 files changed, 155 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/common/settings.cpp b/src/common/settings.cpp index 8c6be2c84..dd3a3d456 100644 --- a/src/common/settings.cpp +++ b/src/common/settings.cpp @@ -106,6 +106,57 @@ float Volume() { return values.volume.GetValue() / 100.0f; } +void UpdateRescalingInfo() { + auto setup = values.resolution_setup.GetValue(); + auto& info = values.resolution_info; + switch (setup) { + case ResolutionSetup::Res1_2X: { + info.up_scale = 1; + info.down_shift = 1; + break; + } + case ResolutionSetup::Res3_4X: { + info.up_scale = 3; + info.down_shift = 2; + break; + } + case ResolutionSetup::Res1X: { + info.up_scale = 1; + info.down_shift = 0; + break; + } + case ResolutionSetup::Res3_2X: { + info.up_scale = 3; + info.down_shift = 1; + break; + } + case ResolutionSetup::Res2X: { + info.up_scale = 2; + info.down_shift = 0; + break; + } + case ResolutionSetup::Res3X: { + info.up_scale = 3; + info.down_shift = 0; + break; + } + case ResolutionSetup::Res4X: { + info.up_scale = 4; + info.down_shift = 0; + break; + } + default: { + UNREACHABLE(); + info.up_scale = 1; + info.down_shift = 0; + } + } + info.up_factor = static_cast(info.up_scale) / (1U << info.down_shift); + info.down_factor = static_cast(1U << info.down_shift) / info.up_scale; + info.size_up = info.up_scale * info.up_scale; + info.size_shift = info.down_shift * 2; +} + void RestoreGlobalState(bool is_powered_on) { // If a game is running, DO NOT restore the global settings state if (is_powered_on) { diff --git a/src/common/settings.h b/src/common/settings.h index 08f3da055..f4df2fc95 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -56,16 +56,19 @@ enum class ResolutionSetup : u32 { Res1_2X = 0, Res3_4X = 1, Res1X = 2, - Res3_2K = 3, + Res3_2X = 3, Res2X = 4, Res3X = 5, + Res4X = 6, }; struct ResolutionScalingInfo { - u32 up_scale{2}; + u32 up_scale{1}; u32 down_shift{0}; - f32 up_factor{2.0f}; - f32 down_factor{0.5f}; + f32 up_factor{1.0f}; + f32 down_factor{1.0f}; + u32 size_up{1}; + u32 size_shift{0}; }; /** The BasicSetting class is a simple resource manager. It defines a label and default value @@ -613,6 +616,8 @@ std::string GetTimeZoneString(); void LogSettings(); +void UpdateRescalingInfo(); + // Restore the global state of all applicable settings in the Values struct void RestoreGlobalState(bool is_powered_on); diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index faea5dda1..7ddc40b00 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -824,6 +824,7 @@ void Config::ReadRendererValues() { ReadGlobalSetting(Settings::values.vulkan_device); ReadGlobalSetting(Settings::values.fullscreen_mode); ReadGlobalSetting(Settings::values.aspect_ratio); + ReadGlobalSetting(Settings::values.resolution_setup); ReadGlobalSetting(Settings::values.max_anisotropy); ReadGlobalSetting(Settings::values.use_speed_limit); ReadGlobalSetting(Settings::values.speed_limit); @@ -1364,6 +1365,10 @@ void Config::SaveRendererValues() { static_cast(Settings::values.fullscreen_mode.GetDefault()), Settings::values.fullscreen_mode.UsingGlobal()); WriteGlobalSetting(Settings::values.aspect_ratio); + WriteSetting(QString::fromStdString(Settings::values.resolution_setup.GetLabel()), + static_cast(Settings::values.resolution_setup.GetValue(global)), + static_cast(Settings::values.resolution_setup.GetDefault()), + Settings::values.resolution_setup.UsingGlobal()); WriteGlobalSetting(Settings::values.max_anisotropy); WriteGlobalSetting(Settings::values.use_speed_limit); WriteGlobalSetting(Settings::values.speed_limit); diff --git a/src/yuzu/configuration/config.h b/src/yuzu/configuration/config.h index a7f4a6720..fbb91d312 100644 --- a/src/yuzu/configuration/config.h +++ b/src/yuzu/configuration/config.h @@ -189,5 +189,6 @@ Q_DECLARE_METATYPE(Settings::CPUAccuracy); Q_DECLARE_METATYPE(Settings::GPUAccuracy); Q_DECLARE_METATYPE(Settings::FullscreenMode); Q_DECLARE_METATYPE(Settings::NvdecEmulation); +Q_DECLARE_METATYPE(Settings::ResolutionSetup); Q_DECLARE_METATYPE(Settings::RendererBackend); Q_DECLARE_METATYPE(Settings::ShaderBackend); diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp index 8e20cc6f3..4f08ae3e0 100644 --- a/src/yuzu/configuration/configure_graphics.cpp +++ b/src/yuzu/configuration/configure_graphics.cpp @@ -102,6 +102,8 @@ void ConfigureGraphics::SetConfiguration() { ui->nvdec_emulation->setCurrentIndex( static_cast(Settings::values.nvdec_emulation.GetValue())); ui->aspect_ratio_combobox->setCurrentIndex(Settings::values.aspect_ratio.GetValue()); + ui->resolution_combobox->setCurrentIndex( + static_cast(Settings::values.resolution_setup.GetValue())); } else { ConfigurationShared::SetPerGameSetting(ui->api, &Settings::values.renderer_backend); ConfigurationShared::SetHighlight(ui->api_widget, @@ -122,6 +124,11 @@ void ConfigureGraphics::SetConfiguration() { ConfigurationShared::SetHighlight(ui->ar_label, !Settings::values.aspect_ratio.UsingGlobal()); + ConfigurationShared::SetPerGameSetting(ui->resolution_combobox, + &Settings::values.resolution_setup); + ConfigurationShared::SetHighlight(ui->resolution_label, + !Settings::values.resolution_setup.UsingGlobal()); + ui->bg_combobox->setCurrentIndex(Settings::values.bg_red.UsingGlobal() ? 0 : 1); ui->bg_button->setEnabled(!Settings::values.bg_red.UsingGlobal()); ConfigurationShared::SetHighlight(ui->bg_layout, !Settings::values.bg_red.UsingGlobal()); @@ -133,11 +140,14 @@ void ConfigureGraphics::SetConfiguration() { } void ConfigureGraphics::ApplyConfiguration() { + const auto resolution_setup = static_cast( + ui->resolution_combobox->currentIndex() - + ((Settings::IsConfiguringGlobal()) ? 0 : ConfigurationShared::USE_GLOBAL_OFFSET)); + ConfigurationShared::ApplyPerGameSetting(&Settings::values.fullscreen_mode, ui->fullscreen_mode_combobox); ConfigurationShared::ApplyPerGameSetting(&Settings::values.aspect_ratio, ui->aspect_ratio_combobox); - ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_disk_shader_cache, ui->use_disk_shader_cache, use_disk_shader_cache); ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_gpu_emulation, @@ -165,7 +175,16 @@ void ConfigureGraphics::ApplyConfiguration() { Settings::values.bg_green.SetValue(static_cast(bg_color.green())); Settings::values.bg_blue.SetValue(static_cast(bg_color.blue())); } + if (Settings::values.resolution_setup.UsingGlobal()) { + Settings::values.resolution_setup.SetValue(resolution_setup); + } } else { + if (ui->resolution_combobox->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) { + Settings::values.resolution_setup.SetGlobal(true); + } else { + Settings::values.resolution_setup.SetGlobal(false); + Settings::values.resolution_setup.SetValue(resolution_setup); + } if (ui->api->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) { Settings::values.renderer_backend.SetGlobal(true); Settings::values.shader_backend.SetGlobal(true); @@ -207,6 +226,7 @@ void ConfigureGraphics::ApplyConfiguration() { Settings::values.bg_blue.SetValue(static_cast(bg_color.blue())); } } + Settings::UpdateRescalingInfo(); } void ConfigureGraphics::changeEvent(QEvent* event) { @@ -312,6 +332,7 @@ void ConfigureGraphics::SetupPerGameUI() { ui->device->setEnabled(Settings::values.renderer_backend.UsingGlobal()); ui->fullscreen_mode_combobox->setEnabled(Settings::values.fullscreen_mode.UsingGlobal()); ui->aspect_ratio_combobox->setEnabled(Settings::values.aspect_ratio.UsingGlobal()); + ui->resolution_combobox->setEnabled(Settings::values.resolution_setup.UsingGlobal()); ui->use_asynchronous_gpu_emulation->setEnabled( Settings::values.use_asynchronous_gpu_emulation.UsingGlobal()); ui->nvdec_emulation->setEnabled(Settings::values.nvdec_emulation.UsingGlobal()); @@ -340,6 +361,9 @@ void ConfigureGraphics::SetupPerGameUI() { ConfigurationShared::SetColoredComboBox( ui->fullscreen_mode_combobox, ui->fullscreen_mode_label, static_cast(Settings::values.fullscreen_mode.GetValue(true))); + ConfigurationShared::SetColoredComboBox( + ui->resolution_combobox, ui->resolution_label, + static_cast(Settings::values.resolution_setup.GetValue(true))); ConfigurationShared::InsertGlobalItem( ui->api, static_cast(Settings::values.renderer_backend.GetValue(true))); ConfigurationShared::InsertGlobalItem( diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui index beae74344..1b6ac3cbb 100644 --- a/src/yuzu/configuration/configure_graphics.ui +++ b/src/yuzu/configuration/configure_graphics.ui @@ -309,6 +309,70 @@ + + + + + 0 + + + 0 + + + 0 + + + 0 + + + + + Resolution: + + + + + + + + 0.5X (360p/540p) + + + + + 0.75X (540p/810p) + + + + + 1X (720p/1080p) + + + + + 1.5X (1080p/1620p) + + + + + 2X (1440p/2160[4K]p) + + + + + 3X (2160p[4K]/3240p[6K]) + + + + + 4X (2880p/4320p[8K]) + + + + + + + -- cgit v1.2.3 From 360e897ccd53bf863cea1ad6184d35e9b6ffbf40 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 18 Jul 2021 23:06:12 +0200 Subject: ShaderDecompiler: Add initial support for rescaling. --- src/shader_recompiler/ir_opt/rescaling_pass.cpp | 72 +++++++++++++++++++++++++ src/shader_recompiler/shader_info.h | 1 + 2 files changed, 73 insertions(+) create mode 100644 src/shader_recompiler/ir_opt/rescaling_pass.cpp (limited to 'src') diff --git a/src/shader_recompiler/ir_opt/rescaling_pass.cpp b/src/shader_recompiler/ir_opt/rescaling_pass.cpp new file mode 100644 index 000000000..d3ae3f159 --- /dev/null +++ b/src/shader_recompiler/ir_opt/rescaling_pass.cpp @@ -0,0 +1,72 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/alignment.h" +#include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/ir_opt/passes.h" +#include "shader_recompiler/shader_info.h" + +namespace Shader::Optimization { +namespace { + +void PatchFragCoord(IR::Inst& inst) { + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + const IR::F32 inv_resolution_factor = IR::F32{Settings::values.resolution_info.down_factor}; + const IR::F32 new_get_attribute = ir.GetAttribute(inst.Arg(0).Attribute()); + const IR::F32 mul = ir.FMul(new_get_attribute, inv_resolution_factor); + const IR::U1 should_rescale = IR::U1{true}; + const IR::F32 selection = ir.Select(should_rescale, mul, new_get_attribute); + inst.ReplaceUsesWith(selection); +} + +void Visit(Info& info, IR::Inst& inst) { + info.requires_rescaling_uniform = false; + switch (inst.GetOpcode()) { + case IR::Opcode::GetAttribute: { + conast auto attrib = inst.Arg(0).Attribute(); + const bool is_frag = + attrib == IR::Attribute::PositionX || attrib == IR::Attribute::PositionY; + const bool must_path = is_frag && program.stage == Stage::Fragment; + if (must_path) { + PatchFragCoord(inst); + info.requires_rescaling_uniform = true; + } + break; + } + case IR::Opcode::ImageQueryDimensions: { + info.requires_rescaling_uniform |= true; + break; + } + case IR::Opcode::ImageFetch: { + info.requires_rescaling_uniform |= true; + break; + } + case IR::Opcode::ImageRead: { + info.requires_rescaling_uniform |= true; + break; + } + case IR::Opcode::ImageWrite: { + info.requires_rescaling_uniform |= true; + break; + } + default: + break; + } +} + +} // namespace + +void RescalingPass(Environment& env, IR::Program& program) { + Info& info{program.info}; + for (IR::Block* const block : program.post_order_blocks) { + for (IR::Inst& inst : block->Instructions()) { + Visit(info, inst); + } + } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 4ef4dbd40..e7981a08c 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -172,6 +172,7 @@ struct Info { bool uses_global_memory{}; bool uses_atomic_image_u32{}; bool uses_shadow_lod{}; + bool requires_rescaling_uniform{}; IR::Type used_constant_buffer_types{}; IR::Type used_storage_buffer_types{}; -- cgit v1.2.3 From ba18047e8d06584de0ce18cdbb303a6d9a8742aa Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 19 Jul 2021 04:32:03 +0200 Subject: Texture Cache: Implement Vulkan UpScaling & DownScaling --- .../renderer_opengl/gl_texture_cache.cpp | 10 +- .../renderer_vulkan/vk_texture_cache.cpp | 259 +++++++++++++++++++-- src/video_core/renderer_vulkan/vk_texture_cache.h | 10 +- src/video_core/texture_cache/image_info.cpp | 3 + src/video_core/texture_cache/texture_cache.h | 84 +++++-- src/video_core/texture_cache/texture_cache_base.h | 3 + 6 files changed, 327 insertions(+), 42 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 1e594838f..cdd352aef 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -849,20 +849,22 @@ void Image::CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t b } } -void Image::ScaleUp() { +bool Image::ScaleUp() { if (True(flags & ImageFlagBits::Rescaled)) { - return; + return false; } flags |= ImageFlagBits::Rescaled; UNIMPLEMENTED(); + return true; } -void Image::ScaleDown() { +bool Image::ScaleDown() { if (False(flags & ImageFlagBits::Rescaled)) { - return; + return false; } flags &= ~ImageFlagBits::Rescaled; UNIMPLEMENTED(); + return true; } ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info, diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index be5b1d84d..668554d1e 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -137,6 +137,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array& color) { flags |= VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT; } const auto [samples_x, samples_y] = VideoCommon::SamplesLog2(info.num_samples); + const bool is_2d = info.type == ImageType::e2D; return VkImageCreateInfo{ .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, .pNext = nullptr, @@ -144,9 +145,9 @@ constexpr VkBorderColor ConvertBorderColor(const std::array& color) { .imageType = ConvertImageType(info.type), .format = format_info.format, .extent{ - .width = ((info.size.width << up) >> down) >> samples_x, - .height = ((info.size.height << up) >> down) >> samples_y, - .depth = (info.size.depth << up) >> down, + .width = ((info.size.width * up) >> down) >> samples_x, + .height = (is_2d ? ((info.size.height * up) >> down) : info.size.height) >> samples_y, + .depth = info.size.depth, }, .mipLevels = static_cast(info.resources.levels), .arrayLayers = static_cast(info.resources.layers), @@ -160,7 +161,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array& color) { }; } -[[nodiscard]] vk::Image MakeImage(const Device& device, const ImageInfo& info, u32 up = 0, +[[nodiscard]] vk::Image MakeImage(const Device& device, const ImageInfo& info, u32 up = 1, u32 down = 0) { if (info.type == ImageType::Buffer) { return vk::Image{}; @@ -851,7 +852,6 @@ u64 TextureCacheRuntime::GetDeviceLocalMemory() const { void TextureCacheRuntime::TickFrame() { prescaled_images.Tick(); prescaled_commits.Tick(); - prescaled_views.Tick(); } Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu_addr_, @@ -923,7 +923,7 @@ void Image::UploadMemory(const StagingBufferRef& map, std::span copies) { const bool is_rescaled = True(flags & ImageFlagBits::Rescaled); if (is_rescaled) { - ScaleDown(); + ScaleDown(true); } std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask); scheduler->RequestOutsideRenderPassOperationContext(); @@ -978,38 +978,253 @@ void Image::DownloadMemory(const StagingBufferRef& map, std::span& blit_regions, + VkImageAspectFlags aspect_mask) { + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([dst_image, src_image, aspect_mask, + regions = std::move(blit_regions)](vk::CommandBuffer cmdbuf) { + const std::array read_barriers{ + VkImageMemoryBarrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | + VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, + .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = src_image, + .subresourceRange{ + .aspectMask = aspect_mask, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }, + VkImageMemoryBarrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | + VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = dst_image, + .subresourceRange{ + .aspectMask = aspect_mask, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }, + }; + VkImageMemoryBarrier write_barrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | + VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, + .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = dst_image, + .subresourceRange{ + .aspectMask = aspect_mask, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }; + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + 0, nullptr, nullptr, read_barriers); + const VkFilter vk_filter = VK_FILTER_NEAREST; + cmdbuf.BlitImage(src_image, VK_IMAGE_LAYOUT_GENERAL, dst_image, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, regions, vk_filter); + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + 0, write_barrier); + }); +} + +bool Image::ScaleUp(bool save_as_backup) { if (True(flags & ImageFlagBits::Rescaled)) { - return; + return false; } ASSERT(info.type != ImageType::Linear); - if (!runtime->is_rescaling_on) { - flags |= ImageFlagBits::Rescaled; - return; - } - flags |= ImageFlagBits::Rescaled; scaling_count++; ASSERT(scaling_count < 10); - return; + flags |= ImageFlagBits::Rescaled; + /*if (!runtime->is_rescaling_on) { + return; + }*/ + const auto& resolution = runtime->resolution; + vk::Image rescaled_image = + MakeImage(runtime->device, info, resolution.up_scale, resolution.down_shift); + MemoryCommit new_commit( + runtime->memory_allocator.Commit(rescaled_image, MemoryUsage::DeviceLocal)); + + const auto scale_up = [&](u32 value) { + return (value * resolution.up_scale) >> resolution.down_shift; + }; + + const bool is_2d = info.type == ImageType::e2D; + boost::container::small_vector vkRegions(info.resources.levels); + for (s32 level = 0; level < info.resources.levels; level++) { + VkImageBlit blit{ + .srcSubresource{ + .aspectMask = aspect_mask, + .mipLevel = u32(level), + .baseArrayLayer = 0, + .layerCount = u32(info.resources.layers), + }, + .srcOffsets{ + { + .x = 0, + .y = 0, + .z = 0, + }, + { + .x = s32(info.size.width), + .y = s32(info.size.height), + .z = 1, + }, + }, + .dstSubresource{ + .aspectMask = aspect_mask, + .mipLevel = u32(level), + .baseArrayLayer = 0, + .layerCount = u32(info.resources.layers), + }, + .dstOffsets{ + { + .x = 0, + .y = 0, + .z = 0, + }, + { + .x = s32(scale_up(info.size.width)), + .y = is_2d ? s32(scale_up(info.size.height)) : s32(info.size.height), + .z = 1, + }, + }, + }; + vkRegions.push_back(blit); + } + BlitScale(*scheduler, *image, *rescaled_image, vkRegions, aspect_mask); + if (save_as_backup) { + backup_image = std::move(image); + backup_commit = std::move(commit); + has_backup = true; + } else { + runtime->prescaled_images.Push(std::move(image)); + runtime->prescaled_commits.Push(std::move(commit)); + } + image = std::move(rescaled_image); + commit = std::move(new_commit); + return true; } -void Image::ScaleDown() { +void Image::SwapBackup() { + ASSERT(has_backup); + runtime->prescaled_images.Push(std::move(image)); + runtime->prescaled_commits.Push(std::move(commit)); + image = std::move(backup_image); + commit = std::move(backup_commit); + has_backup = false; +} + +bool Image::ScaleDown(bool save_as_backup) { if (False(flags & ImageFlagBits::Rescaled)) { - return; + return false; } ASSERT(info.type != ImageType::Linear); - if (!runtime->is_rescaling_on) { - flags &= ~ImageFlagBits::Rescaled; - return; - } flags &= ~ImageFlagBits::Rescaled; scaling_count++; ASSERT(scaling_count < 10); - return; + /*if (!runtime->is_rescaling_on) { + return false; + }*/ + + const auto& resolution = runtime->resolution; + vk::Image downscaled_image = + MakeImage(runtime->device, info, resolution.up_scale, resolution.down_shift); + MemoryCommit new_commit( + runtime->memory_allocator.Commit(downscaled_image, MemoryUsage::DeviceLocal)); + + const auto scale_up = [&](u32 value) { + return (value * resolution.up_scale) >> resolution.down_shift; + }; + + const bool is_2d = info.type == ImageType::e2D; + boost::container::small_vector vkRegions(info.resources.levels); + for (s32 level = 0; level < info.resources.levels; level++) { + VkImageBlit blit{ + .srcSubresource{ + .aspectMask = aspect_mask, + .mipLevel = u32(level), + .baseArrayLayer = 0, + .layerCount = u32(info.resources.layers), + }, + .srcOffsets{ + { + .x = 0, + .y = 0, + .z = 0, + }, + { + .x = s32(scale_up(info.size.width)), + .y = is_2d ? s32(scale_up(info.size.height)) : s32(info.size.height), + .z = 1, + }, + }, + .dstSubresource{ + .aspectMask = aspect_mask, + .mipLevel = u32(level), + .baseArrayLayer = 0, + .layerCount = u32(info.resources.layers), + }, + .dstOffsets{ + { + .x = 0, + .y = 0, + .z = 0, + }, + { + .x = s32(info.size.width), + .y = s32(info.size.height), + .z = 1, + }, + }, + }; + vkRegions.push_back(blit); + } + BlitScale(*scheduler, *image, *downscaled_image, vkRegions, aspect_mask); + if (save_as_backup) { + backup_image = std::move(image); + backup_commit = std::move(commit); + has_backup = true; + } else { + runtime->prescaled_images.Push(std::move(image)); + runtime->prescaled_commits.Push(std::move(commit)); + } + image = std::move(downscaled_image); + commit = std::move(new_commit); + return true; } ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info, diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index f7e782c44..958a64651 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -45,7 +45,6 @@ struct TextureCacheRuntime { static constexpr size_t TICKS_TO_DESTROY = 6; DelayedDestructionRing prescaled_images; DelayedDestructionRing prescaled_commits; - DelayedDestructionRing prescaled_views; Settings::ResolutionScalingInfo resolution; bool is_rescaling_on{}; @@ -126,9 +125,11 @@ public: return std::exchange(initialized, true); } - void ScaleUp(); + bool ScaleUp(bool save_as_backup = false); - void ScaleDown(); + bool ScaleDown(bool save_as_backup = false); + + void SwapBackup(); private: VKScheduler* scheduler; @@ -140,6 +141,9 @@ private: bool initialized = false; TextureCacheRuntime* runtime; u32 scaling_count{}; + vk::Image backup_image{}; + MemoryCommit backup_commit{}; + bool has_backup{}; }; class ImageView : public VideoCommon::ImageViewBase { diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp index 64fd7010a..022ca9033 100644 --- a/src/video_core/texture_cache/image_info.cpp +++ b/src/video_core/texture_cache/image_info.cpp @@ -41,6 +41,7 @@ ImageInfo::ImageInfo(const TICEntry& config) noexcept { ASSERT(config.BaseLayer() == 0); type = ImageType::e1D; size.width = config.Width(); + resources.layers = 1; break; case TextureType::Texture1DArray: UNIMPLEMENTED_IF(config.BaseLayer() != 0); @@ -82,10 +83,12 @@ ImageInfo::ImageInfo(const TICEntry& config) noexcept { size.width = config.Width(); size.height = config.Height(); size.depth = config.Depth(); + resources.layers = 1; break; case TextureType::Texture1DBuffer: type = ImageType::Buffer; size.width = config.Width(); + resources.layers = 1; break; default: UNREACHABLE_MSG("Invalid texture_type={}", static_cast(config.texture_type.Value())); diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 560da4f16..95a9e8fe9 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -242,24 +242,36 @@ void TextureCache

::UpdateRenderTargets(bool is_clear) { const auto scale_up = [this](ImageId image_id) { if (image_id != CORRUPT_ID) { Image& image = slot_images[image_id]; - image.ScaleUp(); + return ScaleUp(image); } + return false; }; for (size_t index = 0; index < NUM_RT; ++index) { - scale_up(tmp_color_images[index]); + if (scale_up(tmp_color_images[index])) { + BindRenderTarget(&render_targets.color_buffer_ids[index], + FindColorBuffer(index, is_clear)); + } + } + if (scale_up(tmp_depth_image)) { + BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear)); } - scale_up(tmp_depth_image); } else { const auto scale_down = [this](ImageId image_id) { if (image_id != CORRUPT_ID) { Image& image = slot_images[image_id]; - image.ScaleDown(); + return ScaleDown(image); } + return false; }; for (size_t index = 0; index < NUM_RT; ++index) { - scale_down(tmp_color_images[index]); + if (scale_down(tmp_color_images[index])) { + BindRenderTarget(&render_targets.color_buffer_ids[index], + FindColorBuffer(index, is_clear)); + } + } + if (scale_down(tmp_depth_image)) { + BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear)); } - scale_down(tmp_depth_image); } // Rescale End @@ -695,6 +707,47 @@ bool TextureCache

::ImageCanRescale(Image& image) { return true; } +template +void TextureCache

::InvalidateScale(Image& image, bool invalidate_rt) { + const std::span image_view_ids = image.image_view_ids; + if (invalidate_rt) { + auto& dirty = maxwell3d.dirty.flags; + dirty[Dirty::RenderTargets] = true; + dirty[Dirty::ZetaBuffer] = true; + for (size_t rt = 0; rt < NUM_RT; ++rt) { + dirty[Dirty::ColorBuffer0 + rt] = true; + } + for (const ImageViewId image_view_id : image_view_ids) { + std::ranges::replace(render_targets.color_buffer_ids, image_view_id, ImageViewId{}); + if (render_targets.depth_buffer_id == image_view_id) { + render_targets.depth_buffer_id = ImageViewId{}; + } + } + } + RemoveImageViewReferences(image_view_ids); + RemoveFramebuffers(image_view_ids); +} + +template +bool TextureCache

::ScaleUp(Image& image, bool invalidate_rt) { + const bool rescaled = image.ScaleUp(); + if (!rescaled) { + return false; + } + InvalidateScale(image, invalidate_rt); + return true; +} + +template +bool TextureCache

::ScaleDown(Image& image, bool invalidate_rt) { + const bool rescaled = image.ScaleDown(); + if (!rescaled) { + return false; + } + InvalidateScale(image, invalidate_rt); + return true; +} + template ImageId TextureCache

::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, RelaxedOptions options) { @@ -793,33 +846,32 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA bool can_rescale = (info.type == ImageType::e1D || info.type == ImageType::e2D) && info.block.depth == 0; + bool any_rescaled = false; for (const ImageId sibling_id : all_siblings) { if (!can_rescale) { break; } Image& sibling = slot_images[sibling_id]; can_rescale &= ImageCanRescale(sibling); + any_rescaled |= True(sibling.flags & ImageFlagBits::Rescaled); } + can_rescale &= any_rescaled; + if (can_rescale) { for (const ImageId sibling_id : all_siblings) { Image& sibling = slot_images[sibling_id]; - sibling.ScaleUp(); + ScaleUp(sibling, true); } } else { for (const ImageId sibling_id : all_siblings) { Image& sibling = slot_images[sibling_id]; - sibling.ScaleDown(); + ScaleDown(sibling, true); } } const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); Image& new_image = slot_images[new_image_id]; - if (can_rescale) { - new_image.ScaleUp(); - } else { - new_image.ScaleDown(); - } if (!gpu_memory.IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) { new_image.flags |= ImageFlagBits::Sparse; @@ -840,6 +892,12 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA // TODO: Only upload what we need RefreshContents(new_image, new_image_id); + if (can_rescale) { + new_image.ScaleUp(); + } else { + new_image.ScaleDown(); + } + for (const ImageId overlap_id : overlap_ids) { Image& overlap = slot_images[overlap_id]; if (overlap.info.num_samples != new_image.info.num_samples) { diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index a4a2c0832..042678786 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -327,6 +327,9 @@ private: [[nodiscard]] bool IsFullClear(ImageViewId id); bool ImageCanRescale(Image& image); + void InvalidateScale(Image& image, bool invalidate_rt = false); + bool ScaleUp(Image& image, bool invalidate_rt = false); + bool ScaleDown(Image& image, bool invalidate_rt = false); Runtime& runtime; VideoCore::RasterizerInterface& rasterizer; -- cgit v1.2.3 From 10e5065a5c72e9a704bac1500d92f41e5adb5751 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 18 Jul 2021 23:59:31 -0400 Subject: gl_texture_cache: WIP texture rescale --- .../renderer_opengl/gl_texture_cache.cpp | 70 +++++++++++++++++++++- src/video_core/renderer_opengl/gl_texture_cache.h | 2 + 2 files changed, 69 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index cdd352aef..9b2a09007 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -698,7 +698,10 @@ void Image::UploadMemory(const ImageBufferMap& map, void Image::DownloadMemory(ImageBufferMap& map, std::span copies) { glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API - + const bool is_rescaled = True(flags & ImageFlagBits::Rescaled); + if (is_rescaled) { + ScaleDown(); + } glBindBuffer(GL_PIXEL_PACK_BUFFER, map.buffer); glPixelStorei(GL_PACK_ALIGNMENT, 1); @@ -716,6 +719,9 @@ void Image::DownloadMemory(ImageBufferMap& map, } CopyImageToBuffer(copy, map.offset); } + if (is_rescaled) { + ScaleUp(); + } } GLuint Image::StorageHandle() noexcept { @@ -849,12 +855,70 @@ void Image::CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t b } } +void Image::Scale() { + // TODO: Pass scaling factor? + if (gl_format == 0 || gl_type == 0) { + // compressed textures + return; + } + if (info.type == ImageType::Linear) { + UNIMPLEMENTED(); + return; + } + GLint prev_draw_fbo; + GLint prev_read_fbo; + glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &prev_draw_fbo); + glGetIntegerv(GL_READ_FRAMEBUFFER_BINDING, &prev_read_fbo); + const GLenum attachment = [this] { + switch (GetFormatType(info.format)) { + case SurfaceType::ColorTexture: + return GL_COLOR_ATTACHMENT0; + case SurfaceType::Depth: + return GL_DEPTH_ATTACHMENT; + case SurfaceType::DepthStencil: + return GL_DEPTH_STENCIL_ATTACHMENT; + default: + UNREACHABLE(); + return GL_COLOR_ATTACHMENT0; + } + }(); + const GLenum mask = [this] { + switch (GetFormatType(info.format)) { + case SurfaceType::ColorTexture: + return GL_COLOR_BUFFER_BIT; + case SurfaceType::Depth: + return GL_DEPTH_BUFFER_BIT; + case SurfaceType::DepthStencil: + return GL_STENCIL_BUFFER_BIT | GL_DEPTH_BUFFER_BIT; + default: + UNREACHABLE(); + return GL_COLOR_BUFFER_BIT; + } + }(); + const GLenum filter = (mask & GL_COLOR_BUFFER_BIT) != 0 ? GL_LINEAR : GL_NEAREST; + GLuint fbo_handle; + glGenFramebuffers(1, &fbo_handle); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, fbo_handle); + glBindFramebuffer(GL_READ_FRAMEBUFFER, fbo_handle); + glNamedFramebufferTexture(fbo_handle, attachment, texture.handle, 0); + + const size_t scaled_width = info.size.width; + const size_t scaled_height = info.size.height * 2; + glBlitNamedFramebuffer(fbo_handle, fbo_handle, 0, 0, info.size.width, info.size.height, 0, 0, + scaled_width, scaled_height, mask, filter); + // TODO: resize texture? + glCopyTextureSubImage3D(texture.handle, 0, 0, 0, 0, 0, 0, scaled_width, scaled_height / 2); + // Restore previous framebuffers + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, prev_draw_fbo); + glBindFramebuffer(GL_READ_FRAMEBUFFER, prev_read_fbo); +} + bool Image::ScaleUp() { if (True(flags & ImageFlagBits::Rescaled)) { return false; } flags |= ImageFlagBits::Rescaled; - UNIMPLEMENTED(); + Scale(); return true; } @@ -863,7 +927,7 @@ bool Image::ScaleDown() { return false; } flags &= ~ImageFlagBits::Rescaled; - UNIMPLEMENTED(); + Scale(); return true; } diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 58b36494b..324a0f1cb 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -198,6 +198,8 @@ private: void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset); + void Scale(); + OGLTexture texture; OGLTextureView store_view; GLenum gl_internal_format = GL_NONE; -- cgit v1.2.3 From 84f2aea8962146be899131b032fcdf9b4e1f6ddf Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 20 Jul 2021 07:40:05 +0200 Subject: Texture Cache: More rescaling fixes. --- .../renderer_opengl/gl_texture_cache.cpp | 4 +- .../renderer_vulkan/vk_texture_cache.cpp | 8 + src/video_core/texture_cache/texture_cache.h | 162 +++++++++++---------- src/video_core/texture_cache/texture_cache_base.h | 6 +- 4 files changed, 96 insertions(+), 84 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 9b2a09007..2d9f770cd 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -918,7 +918,7 @@ bool Image::ScaleUp() { return false; } flags |= ImageFlagBits::Rescaled; - Scale(); + //Scale(); return true; } @@ -927,7 +927,7 @@ bool Image::ScaleDown() { return false; } flags &= ~ImageFlagBits::Rescaled; - Scale(); + //Scale(); return true; } diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 668554d1e..5fd190825 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -1078,6 +1078,10 @@ bool Image::ScaleUp(bool save_as_backup) { MemoryCommit new_commit( runtime->memory_allocator.Commit(rescaled_image, MemoryUsage::DeviceLocal)); + if (aspect_mask == 0) { + aspect_mask = ImageAspectMask(info.format); + } + const auto scale_up = [&](u32 value) { return (value * resolution.up_scale) >> resolution.down_shift; }; @@ -1170,6 +1174,10 @@ bool Image::ScaleDown(bool save_as_backup) { return (value * resolution.up_scale) >> resolution.down_shift; }; + if (aspect_mask == 0) { + aspect_mask = ImageAspectMask(info.format); + } + const bool is_2d = info.type == ImageType::e2D; boost::container::small_vector vkRegions(info.resources.levels); for (s32 level = 0; level < info.resources.levels; level++) { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 95a9e8fe9..b7d1ae92d 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -204,75 +204,68 @@ void TextureCache

::UpdateRenderTargets(bool is_clear) { PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id)); return; } - flags[Dirty::RenderTargets] = false; - - // Render target control is used on all render targets, so force look ups when this one is up - const bool force = flags[Dirty::RenderTargetControl]; - flags[Dirty::RenderTargetControl] = false; - - bool can_rescale = true; - std::array tmp_color_images{}; - ImageId tmp_depth_image{}; - const auto check_rescale = [&](ImageViewId view_id, ImageId& id_save) { - if (view_id) { - const auto& view = slot_image_views[view_id]; - const auto image_id = view.image_id; - id_save = image_id; - auto& image = slot_images[image_id]; - can_rescale &= ImageCanRescale(image); - } else { - id_save = CORRUPT_ID; - } - }; - for (size_t index = 0; index < NUM_RT; ++index) { - ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; - if (flags[Dirty::ColorBuffer0 + index] || force) { - flags[Dirty::ColorBuffer0 + index] = false; - BindRenderTarget(&color_buffer_id, FindColorBuffer(index, is_clear)); - } - check_rescale(color_buffer_id, tmp_color_images[index]); - } - if (flags[Dirty::ZetaBuffer] || force) { - flags[Dirty::ZetaBuffer] = false; - BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear)); - } - check_rescale(render_targets.depth_buffer_id, tmp_depth_image); - if (can_rescale) { - const auto scale_up = [this](ImageId image_id) { - if (image_id != CORRUPT_ID) { - Image& image = slot_images[image_id]; - return ScaleUp(image); + do { + flags[Dirty::RenderTargets] = false; + + has_deleted_images = false; + // Render target control is used on all render targets, so force look ups when this one is + // up + const bool force = flags[Dirty::RenderTargetControl]; + flags[Dirty::RenderTargetControl] = false; + + bool can_rescale = true; + std::array tmp_color_images{}; + ImageId tmp_depth_image{}; + const auto check_rescale = [&](ImageViewId view_id, ImageId& id_save) { + if (view_id) { + const auto& view = slot_image_views[view_id]; + const auto image_id = view.image_id; + id_save = image_id; + auto& image = slot_images[image_id]; + can_rescale &= ImageCanRescale(image); + } else { + id_save = CORRUPT_ID; } - return false; }; for (size_t index = 0; index < NUM_RT; ++index) { - if (scale_up(tmp_color_images[index])) { - BindRenderTarget(&render_targets.color_buffer_ids[index], - FindColorBuffer(index, is_clear)); + ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; + if (flags[Dirty::ColorBuffer0 + index] || force) { + flags[Dirty::ColorBuffer0 + index] = false; + BindRenderTarget(&color_buffer_id, FindColorBuffer(index, is_clear)); } + check_rescale(color_buffer_id, tmp_color_images[index]); } - if (scale_up(tmp_depth_image)) { + if (flags[Dirty::ZetaBuffer] || force) { + flags[Dirty::ZetaBuffer] = false; BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear)); } - } else { - const auto scale_down = [this](ImageId image_id) { - if (image_id != CORRUPT_ID) { - Image& image = slot_images[image_id]; - return ScaleDown(image); + check_rescale(render_targets.depth_buffer_id, tmp_depth_image); + + if (can_rescale) { + const auto scale_up = [this](ImageId image_id) { + if (image_id != CORRUPT_ID) { + Image& image = slot_images[image_id]; + ScaleUp(image); + } + }; + for (size_t index = 0; index < NUM_RT; ++index) { + scale_up(tmp_color_images[index]); } - return false; - }; - for (size_t index = 0; index < NUM_RT; ++index) { - if (scale_down(tmp_color_images[index])) { - BindRenderTarget(&render_targets.color_buffer_ids[index], - FindColorBuffer(index, is_clear)); + scale_up(tmp_depth_image); + } else { + const auto scale_down = [this](ImageId image_id) { + if (image_id != CORRUPT_ID) { + Image& image = slot_images[image_id]; + ScaleDown(image); + } + }; + for (size_t index = 0; index < NUM_RT; ++index) { + scale_down(tmp_color_images[index]); } + scale_down(tmp_depth_image); } - if (scale_down(tmp_depth_image)) { - BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear)); - } - } + } while (has_deleted_images); // Rescale End for (size_t index = 0; index < NUM_RT; ++index) { @@ -708,43 +701,54 @@ bool TextureCache

::ImageCanRescale(Image& image) { } template -void TextureCache

::InvalidateScale(Image& image, bool invalidate_rt) { +void TextureCache

::InvalidateScale(Image& image) { const std::span image_view_ids = image.image_view_ids; - if (invalidate_rt) { - auto& dirty = maxwell3d.dirty.flags; - dirty[Dirty::RenderTargets] = true; - dirty[Dirty::ZetaBuffer] = true; - for (size_t rt = 0; rt < NUM_RT; ++rt) { - dirty[Dirty::ColorBuffer0 + rt] = true; - } - for (const ImageViewId image_view_id : image_view_ids) { - std::ranges::replace(render_targets.color_buffer_ids, image_view_id, ImageViewId{}); - if (render_targets.depth_buffer_id == image_view_id) { - render_targets.depth_buffer_id = ImageViewId{}; - } + auto& dirty = maxwell3d.dirty.flags; + dirty[Dirty::RenderTargets] = true; + dirty[Dirty::ZetaBuffer] = true; + for (size_t rt = 0; rt < NUM_RT; ++rt) { + dirty[Dirty::ColorBuffer0 + rt] = true; + } + for (const ImageViewId image_view_id : image_view_ids) { + std::ranges::replace(render_targets.color_buffer_ids, image_view_id, ImageViewId{}); + if (render_targets.depth_buffer_id == image_view_id) { + render_targets.depth_buffer_id = ImageViewId{}; } } RemoveImageViewReferences(image_view_ids); RemoveFramebuffers(image_view_ids); + for (const ImageViewId image_view_id : image_view_ids) { + sentenced_image_view.Push(std::move(slot_image_views[image_view_id])); + slot_image_views.erase(image_view_id); + } + image.image_view_ids.clear(); + image.image_view_infos.clear(); + if constexpr (ENABLE_VALIDATION) { + std::ranges::fill(graphics_image_view_ids, CORRUPT_ID); + std::ranges::fill(compute_image_view_ids, CORRUPT_ID); + } + graphics_image_table.Invalidate(); + compute_image_table.Invalidate(); + has_deleted_images = true; } template -bool TextureCache

::ScaleUp(Image& image, bool invalidate_rt) { +bool TextureCache

::ScaleUp(Image& image) { const bool rescaled = image.ScaleUp(); if (!rescaled) { return false; } - InvalidateScale(image, invalidate_rt); + InvalidateScale(image); return true; } template -bool TextureCache

::ScaleDown(Image& image, bool invalidate_rt) { +bool TextureCache

::ScaleDown(Image& image) { const bool rescaled = image.ScaleDown(); if (!rescaled) { return false; } - InvalidateScale(image, invalidate_rt); + InvalidateScale(image); return true; } @@ -861,12 +865,12 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA if (can_rescale) { for (const ImageId sibling_id : all_siblings) { Image& sibling = slot_images[sibling_id]; - ScaleUp(sibling, true); + ScaleUp(sibling); } } else { for (const ImageId sibling_id : all_siblings) { Image& sibling = slot_images[sibling_id]; - ScaleDown(sibling, true); + ScaleDown(sibling); } } @@ -893,9 +897,9 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA RefreshContents(new_image, new_image_id); if (can_rescale) { - new_image.ScaleUp(); + ScaleUp(new_image); } else { - new_image.ScaleDown(); + ScaleDown(new_image); } for (const ImageId overlap_id : overlap_ids) { diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 042678786..cdd99242b 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -327,9 +327,9 @@ private: [[nodiscard]] bool IsFullClear(ImageViewId id); bool ImageCanRescale(Image& image); - void InvalidateScale(Image& image, bool invalidate_rt = false); - bool ScaleUp(Image& image, bool invalidate_rt = false); - bool ScaleDown(Image& image, bool invalidate_rt = false); + void InvalidateScale(Image& image); + bool ScaleUp(Image& image); + bool ScaleDown(Image& image); Runtime& runtime; VideoCore::RasterizerInterface& rasterizer; -- cgit v1.2.3 From 71ca84d8299f7eb6779e95e808b3ec7f8505354b Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 20 Jul 2021 18:29:52 +0200 Subject: Settings: eliminate rescaling_factor. --- src/common/settings.cpp | 2 +- src/common/settings.h | 2 +- src/core/hle/service/am/am.cpp | 12 ++++-------- src/core/hle/service/vi/vi.cpp | 27 ++++++++------------------- src/core/telemetry_session.cpp | 2 -- src/video_core/video_core.cpp | 9 ++++----- src/video_core/video_core.h | 2 +- 7 files changed, 19 insertions(+), 37 deletions(-) (limited to 'src') diff --git a/src/common/settings.cpp b/src/common/settings.cpp index dd3a3d456..6f3acee79 100644 --- a/src/common/settings.cpp +++ b/src/common/settings.cpp @@ -48,7 +48,6 @@ void LogSettings() { log_setting("Core_UseMultiCore", values.use_multi_core.GetValue()); log_setting("CPU_Accuracy", values.cpu_accuracy.GetValue()); log_setting("Renderer_UseResolutionScaling", values.resolution_setup.GetValue()); - log_setting("Renderer_UseResolutionFactor", values.resolution_factor.GetValue()); log_setting("Renderer_UseSpeedLimit", values.use_speed_limit.GetValue()); log_setting("Renderer_SpeedLimit", values.speed_limit.GetValue()); log_setting("Renderer_UseDiskShaderCache", values.use_disk_shader_cache.GetValue()); @@ -155,6 +154,7 @@ void UpdateRescalingInfo() { info.down_factor = static_cast(1U << info.down_shift) / info.up_scale; info.size_up = info.up_scale * info.up_scale; info.size_shift = info.down_shift * 2; + info.active = info.up_scale != 1 || info.down_shift != 0; } void RestoreGlobalState(bool is_powered_on) { diff --git a/src/common/settings.h b/src/common/settings.h index f4df2fc95..2b11984b4 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -69,6 +69,7 @@ struct ResolutionScalingInfo { f32 down_factor{1.0f}; u32 size_up{1}; u32 size_shift{0}; + bool active{}; }; /** The BasicSetting class is a simple resource manager. It defines a label and default value @@ -472,7 +473,6 @@ struct Values { ResolutionScalingInfo resolution_info{}; Setting resolution_setup{ResolutionSetup::Res1X, "resolution_setup"}; - Setting resolution_factor{1, "resolution_factor"}; // *nix platforms may have issues with the borderless windowed fullscreen mode. // Default to exclusive fullscreen on these platforms for now. RangedSetting fullscreen_mode{ diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp index 50c2ace93..aee8d4f93 100644 --- a/src/core/hle/service/am/am.cpp +++ b/src/core/hle/service/am/am.cpp @@ -797,15 +797,11 @@ void ICommonStateGetter::GetDefaultDisplayResolution(Kernel::HLERequestContext& rb.Push(ResultSuccess); if (Settings::values.use_docked_mode.GetValue()) { - rb.Push(static_cast(Service::VI::DisplayResolution::DockedWidth) * - static_cast(Settings::values.resolution_factor.GetValue())); - rb.Push(static_cast(Service::VI::DisplayResolution::DockedHeight) * - static_cast(Settings::values.resolution_factor.GetValue())); + rb.Push(static_cast(Service::VI::DisplayResolution::DockedWidth)); + rb.Push(static_cast(Service::VI::DisplayResolution::DockedHeight)); } else { - rb.Push(static_cast(Service::VI::DisplayResolution::UndockedWidth) * - static_cast(Settings::values.resolution_factor.GetValue())); - rb.Push(static_cast(Service::VI::DisplayResolution::UndockedHeight) * - static_cast(Settings::values.resolution_factor.GetValue())); + rb.Push(static_cast(Service::VI::DisplayResolution::UndockedWidth)); + rb.Push(static_cast(Service::VI::DisplayResolution::UndockedHeight)); } } diff --git a/src/core/hle/service/vi/vi.cpp b/src/core/hle/service/vi/vi.cpp index 63d5242c4..75ee3e5e4 100644 --- a/src/core/hle/service/vi/vi.cpp +++ b/src/core/hle/service/vi/vi.cpp @@ -541,11 +541,8 @@ private: switch (transaction) { case TransactionId::Connect: { IGBPConnectRequestParcel request{ctx.ReadBuffer()}; - IGBPConnectResponseParcel response{ - static_cast(static_cast(DisplayResolution::UndockedWidth) * - Settings::values.resolution_factor.GetValue()), - static_cast(static_cast(DisplayResolution::UndockedHeight) * - Settings::values.resolution_factor.GetValue())}; + IGBPConnectResponseParcel response{static_cast(DisplayResolution::UndockedWidth), + static_cast(DisplayResolution::UndockedHeight)}; buffer_queue.Connect(); @@ -775,15 +772,11 @@ private: rb.Push(ResultSuccess); if (Settings::values.use_docked_mode.GetValue()) { - rb.Push(static_cast(Service::VI::DisplayResolution::DockedWidth) * - static_cast(Settings::values.resolution_factor.GetValue())); - rb.Push(static_cast(Service::VI::DisplayResolution::DockedHeight) * - static_cast(Settings::values.resolution_factor.GetValue())); + rb.Push(static_cast(Service::VI::DisplayResolution::DockedWidth)); + rb.Push(static_cast(Service::VI::DisplayResolution::DockedHeight)); } else { - rb.Push(static_cast(Service::VI::DisplayResolution::UndockedWidth) * - static_cast(Settings::values.resolution_factor.GetValue())); - rb.Push(static_cast(Service::VI::DisplayResolution::UndockedHeight) * - static_cast(Settings::values.resolution_factor.GetValue())); + rb.Push(static_cast(Service::VI::DisplayResolution::UndockedWidth)); + rb.Push(static_cast(Service::VI::DisplayResolution::UndockedHeight)); } rb.PushRaw(60.0f); // This wouldn't seem to be correct for 30 fps games. @@ -1063,10 +1056,8 @@ private: // This only returns the fixed values of 1280x720 and makes no distinguishing // between docked and undocked dimensions. We take the liberty of applying // the resolution scaling factor here. - rb.Push(static_cast(DisplayResolution::UndockedWidth) * - static_cast(Settings::values.resolution_factor.GetValue())); - rb.Push(static_cast(DisplayResolution::UndockedHeight) * - static_cast(Settings::values.resolution_factor.GetValue())); + rb.Push(static_cast(DisplayResolution::UndockedWidth)); + rb.Push(static_cast(DisplayResolution::UndockedHeight)); } void SetLayerScalingMode(Kernel::HLERequestContext& ctx) { @@ -1099,8 +1090,6 @@ private: LOG_WARNING(Service_VI, "(STUBBED) called"); DisplayInfo display_info; - display_info.width *= static_cast(Settings::values.resolution_factor.GetValue()); - display_info.height *= static_cast(Settings::values.resolution_factor.GetValue()); ctx.WriteBuffer(&display_info, sizeof(DisplayInfo)); IPC::ResponseBuilder rb{ctx, 4}; rb.Push(ResultSuccess); diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp index 191475f71..654db0b52 100644 --- a/src/core/telemetry_session.cpp +++ b/src/core/telemetry_session.cpp @@ -229,8 +229,6 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader, AddField(field_type, "Core_UseMultiCore", Settings::values.use_multi_core.GetValue()); AddField(field_type, "Renderer_Backend", TranslateRenderer(Settings::values.renderer_backend.GetValue())); - AddField(field_type, "Renderer_ResolutionFactor", - Settings::values.resolution_factor.GetValue()); AddField(field_type, "Renderer_UseSpeedLimit", Settings::values.use_speed_limit.GetValue()); AddField(field_type, "Renderer_SpeedLimit", Settings::values.speed_limit.GetValue()); AddField(field_type, "Renderer_UseDiskShaderCache", diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index cae543a51..508173db3 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp @@ -53,11 +53,10 @@ std::unique_ptr CreateGPU(Core::Frontend::EmuWindow& emu_window, Cor } } -u16 GetResolutionScaleFactor(const RendererBase& renderer) { - return static_cast( - Settings::values.resolution_factor.GetValue() != 0 - ? Settings::values.resolution_factor.GetValue() - : renderer.GetRenderWindow().GetFramebufferLayout().GetScalingRatio()); +float GetResolutionScaleFactor(const RendererBase& renderer) { + return Settings::values.resolution_info.active + ? Settings::values.resolution_info.up_factor + : renderer.GetRenderWindow().GetFramebufferLayout().GetScalingRatio(); } } // namespace VideoCore diff --git a/src/video_core/video_core.h b/src/video_core/video_core.h index f5c27125d..f86877e86 100644 --- a/src/video_core/video_core.h +++ b/src/video_core/video_core.h @@ -25,6 +25,6 @@ class RendererBase; /// Creates an emulated GPU instance using the given system context. std::unique_ptr CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system); -u16 GetResolutionScaleFactor(const RendererBase& renderer); +float GetResolutionScaleFactor(const RendererBase& renderer); } // namespace VideoCore -- cgit v1.2.3 From 778700ff9d6eca96945deebcd4415e70d58330d9 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 20 Jul 2021 19:36:38 +0200 Subject: TextureCache: Modify Viewports/Scissors according to Rescale. --- src/video_core/dirty_flags.h | 3 +- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 87 +++++++++++++++-------- src/video_core/renderer_vulkan/vk_state_tracker.h | 6 +- src/video_core/texture_cache/texture_cache.h | 25 ++++++- src/video_core/texture_cache/texture_cache_base.h | 3 + src/yuzu/configuration/config.cpp | 4 ++ 6 files changed, 93 insertions(+), 35 deletions(-) (limited to 'src') diff --git a/src/video_core/dirty_flags.h b/src/video_core/dirty_flags.h index f11ff5d94..d63ad5a35 100644 --- a/src/video_core/dirty_flags.h +++ b/src/video_core/dirty_flags.h @@ -29,7 +29,8 @@ enum : u8 { ColorBuffer6, ColorBuffer7, ZetaBuffer, - Rescale, + RescaleViewports, + RescaleScissors, VertexBuffers, VertexBuffer0, diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 30b47a7a0..7a7374b78 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -58,19 +58,14 @@ struct DrawParams { bool is_indexed; }; -VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t index) { +VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t index, float scale) { const auto& src = regs.viewport_transform[index]; - const float width = src.scale_x * 2.0f; - float y = src.translate_y - src.scale_y; - float height = src.scale_y * 2.0f; - if (regs.screen_y_control.y_negate) { - y += height; - height = -height; - } + const float width = src.scale_x * 2.0f * scale; + const float height = src.scale_y * 2.0f * scale; const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1.0f : 0.0f; VkViewport viewport{ - .x = src.translate_x - src.scale_x, - .y = y, + .x = (src.translate_x - src.scale_x) * scale, + .y = (src.translate_y - src.scale_y) * scale, .width = width != 0.0f ? width : 1.0f, .height = height != 0.0f ? height : 1.0f, .minDepth = src.translate_z - src.scale_z * reduce_z, @@ -83,14 +78,21 @@ VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t in return viewport; } -VkRect2D GetScissorState(const Maxwell& regs, size_t index) { +VkRect2D GetScissorState(const Maxwell& regs, size_t index, u32 up_scale = 1, u32 down_shift = 0) { const auto& src = regs.scissor_test[index]; VkRect2D scissor; + const auto scale_up = [&](u32 value) -> u32 { + if (value == 0) { + return 0U; + } + const u32 converted_value = (value * up_scale) >> down_shift; + return std::max(converted_value, 1U); + }; if (src.enable) { - scissor.offset.x = static_cast(src.min_x); - scissor.offset.y = static_cast(src.min_y); - scissor.extent.width = src.max_x - src.min_x; - scissor.extent.height = src.max_y - src.min_y; + scissor.offset.x = static_cast(scale_up(src.min_x)); + scissor.offset.y = static_cast(scale_up(src.min_y)); + scissor.extent.width = scale_up(src.max_x - src.min_x); + scissor.extent.height = scale_up(src.max_y - src.min_y); } else { scissor.offset.x = 0; scissor.offset.y = 0; @@ -214,8 +216,15 @@ void RasterizerVulkan::Clear() { const VkExtent2D render_area = framebuffer->RenderArea(); scheduler.RequestRenderpass(framebuffer); + u32 up_scale = 1; + u32 down_shift = 0; + if (texture_cache.IsRescaling()) { + up_scale = Settings::values.resolution_info.up_scale; + down_shift = Settings::values.resolution_info.down_shift; + } + VkClearRect clear_rect{ - .rect = GetScissorState(regs, 0), + .rect = GetScissorState(regs, 0, up_scale, down_shift), .baseArrayLayer = regs.clear_buffers.layer, .layerCount = 1, }; @@ -595,15 +604,17 @@ void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& reg if (!state_tracker.TouchViewports()) { return; } + const float scale = + texture_cache.IsRescaling() ? Settings::values.resolution_info.up_factor : 1.0f; const std::array viewports{ - GetViewportState(device, regs, 0), GetViewportState(device, regs, 1), - GetViewportState(device, regs, 2), GetViewportState(device, regs, 3), - GetViewportState(device, regs, 4), GetViewportState(device, regs, 5), - GetViewportState(device, regs, 6), GetViewportState(device, regs, 7), - GetViewportState(device, regs, 8), GetViewportState(device, regs, 9), - GetViewportState(device, regs, 10), GetViewportState(device, regs, 11), - GetViewportState(device, regs, 12), GetViewportState(device, regs, 13), - GetViewportState(device, regs, 14), GetViewportState(device, regs, 15), + GetViewportState(device, regs, 0, scale), GetViewportState(device, regs, 1, scale), + GetViewportState(device, regs, 2, scale), GetViewportState(device, regs, 3, scale), + GetViewportState(device, regs, 4, scale), GetViewportState(device, regs, 5, scale), + GetViewportState(device, regs, 6, scale), GetViewportState(device, regs, 7, scale), + GetViewportState(device, regs, 8, scale), GetViewportState(device, regs, 9, scale), + GetViewportState(device, regs, 10, scale), GetViewportState(device, regs, 11, scale), + GetViewportState(device, regs, 12, scale), GetViewportState(device, regs, 13, scale), + GetViewportState(device, regs, 14, scale), GetViewportState(device, regs, 15, scale), }; scheduler.Record([viewports](vk::CommandBuffer cmdbuf) { cmdbuf.SetViewport(0, viewports); }); } @@ -612,13 +623,29 @@ void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs if (!state_tracker.TouchScissors()) { return; } + u32 up_scale = 1; + u32 down_shift = 0; + if (texture_cache.IsRescaling()) { + up_scale = Settings::values.resolution_info.up_scale; + down_shift = Settings::values.resolution_info.down_shift; + } const std::array scissors{ - GetScissorState(regs, 0), GetScissorState(regs, 1), GetScissorState(regs, 2), - GetScissorState(regs, 3), GetScissorState(regs, 4), GetScissorState(regs, 5), - GetScissorState(regs, 6), GetScissorState(regs, 7), GetScissorState(regs, 8), - GetScissorState(regs, 9), GetScissorState(regs, 10), GetScissorState(regs, 11), - GetScissorState(regs, 12), GetScissorState(regs, 13), GetScissorState(regs, 14), - GetScissorState(regs, 15), + GetScissorState(regs, 0, up_scale, down_shift), + GetScissorState(regs, 1, up_scale, down_shift), + GetScissorState(regs, 2, up_scale, down_shift), + GetScissorState(regs, 3, up_scale, down_shift), + GetScissorState(regs, 4, up_scale, down_shift), + GetScissorState(regs, 5, up_scale, down_shift), + GetScissorState(regs, 6, up_scale, down_shift), + GetScissorState(regs, 7, up_scale, down_shift), + GetScissorState(regs, 8, up_scale, down_shift), + GetScissorState(regs, 9, up_scale, down_shift), + GetScissorState(regs, 10, up_scale, down_shift), + GetScissorState(regs, 11, up_scale, down_shift), + GetScissorState(regs, 12, up_scale, down_shift), + GetScissorState(regs, 13, up_scale, down_shift), + GetScissorState(regs, 14, up_scale, down_shift), + GetScissorState(regs, 15, up_scale, down_shift), }; scheduler.Record([scissors](vk::CommandBuffer cmdbuf) { cmdbuf.SetScissor(0, scissors); }); } diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h index 2f2d6b31f..ac2bbebe0 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.h +++ b/src/video_core/renderer_vulkan/vk_state_tracker.h @@ -71,11 +71,13 @@ public: } bool TouchViewports() { - return Exchange(Dirty::Viewports, false); + return Exchange(Dirty::Viewports, false) || + Exchange(VideoCommon::Dirty::RescaleViewports, false); } bool TouchScissors() { - return Exchange(Dirty::Scissors, false); + return Exchange(Dirty::Scissors, false) || + Exchange(VideoCommon::Dirty::RescaleScissors, false); } bool TouchDepthBias() { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index b7d1ae92d..4e5031acc 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -7,6 +7,7 @@ #include #include "common/alignment.h" +#include "common/settings.h" #include "video_core/dirty_flags.h" #include "video_core/engines/kepler_compute.h" #include "video_core/texture_cache/image_view_base.h" @@ -205,6 +206,7 @@ void TextureCache

::UpdateRenderTargets(bool is_clear) { return; } + bool rescaled; do { flags[Dirty::RenderTargets] = false; @@ -243,6 +245,7 @@ void TextureCache

::UpdateRenderTargets(bool is_clear) { check_rescale(render_targets.depth_buffer_id, tmp_depth_image); if (can_rescale) { + rescaled = true; const auto scale_up = [this](ImageId image_id) { if (image_id != CORRUPT_ID) { Image& image = slot_images[image_id]; @@ -254,6 +257,7 @@ void TextureCache

::UpdateRenderTargets(bool is_clear) { } scale_up(tmp_depth_image); } else { + rescaled = false; const auto scale_down = [this](ImageId image_id) { if (image_id != CORRUPT_ID) { Image& image = slot_images[image_id]; @@ -268,6 +272,12 @@ void TextureCache

::UpdateRenderTargets(bool is_clear) { } while (has_deleted_images); // Rescale End + if (is_rescaling != rescaled) { + flags[Dirty::RescaleViewports] = true; + flags[Dirty::RescaleScissors] = true; + is_rescaling = rescaled; + } + for (size_t index = 0; index < NUM_RT; ++index) { ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id)); @@ -279,9 +289,15 @@ void TextureCache

::UpdateRenderTargets(bool is_clear) { for (size_t index = 0; index < NUM_RT; ++index) { render_targets.draw_buffers[index] = static_cast(maxwell3d.regs.rt_control.Map(index)); } + u32 up_scale = 1; + u32 down_shift = 0; + if (is_rescaling) { + up_scale = Settings::values.resolution_info.up_scale; + down_shift = Settings::values.resolution_info.down_shift; + } render_targets.size = Extent2D{ - maxwell3d.regs.render_area.width, - maxwell3d.regs.render_area.height, + (maxwell3d.regs.render_area.width * up_scale) >> down_shift, + (maxwell3d.regs.render_area.height * up_scale) >> down_shift, }; flags[Dirty::DepthBiasGlobal] = true; @@ -538,6 +554,11 @@ void TextureCache

::PopAsyncFlushes() { committed_downloads.pop(); } +template +bool TextureCache

::IsRescaling() { + return is_rescaling; +} + template bool TextureCache

::IsRegionGpuModified(VAddr addr, size_t size) { bool is_modified = false; diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index cdd99242b..1f51fcee8 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -168,6 +168,8 @@ public: /// Return true when a CPU region is modified from the GPU [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); + [[nodiscard]] bool IsRescaling(); + std::mutex mutex; private: @@ -362,6 +364,7 @@ private: VAddr virtual_invalid_space{}; bool has_deleted_images = false; + bool is_rescaling = false; u64 total_used_memory = 0; u64 minimum_memory; u64 expected_memory; diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index 7ddc40b00..7ed833203 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -849,6 +849,8 @@ void Config::ReadRendererValues() { ReadBasicSetting(Settings::values.disable_shader_loop_safety_checks); } + Settings::UpdateRescalingInfo(); + qt_config->endGroup(); } @@ -1402,6 +1404,8 @@ void Config::SaveRendererValues() { WriteBasicSetting(Settings::values.disable_shader_loop_safety_checks); } + Settings::UpdateRescalingInfo(); + qt_config->endGroup(); } -- cgit v1.2.3 From 8704c939136e88876d65fc670bce98d8250a6588 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 20 Jul 2021 22:51:25 +0200 Subject: TextureCache: Fix rescaling of ImageCopies --- .../renderer_vulkan/vk_texture_cache.cpp | 40 +++++++++++++------- src/video_core/texture_cache/texture_cache.h | 43 ++++++++++++++++++++-- src/video_core/texture_cache/texture_cache_base.h | 2 +- 3 files changed, 67 insertions(+), 18 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 5fd190825..54236e87f 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -136,6 +136,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array& color) { if (info.type == ImageType::e3D) { flags |= VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT; } + const auto scale_up = [&](u32 value) { return std::max((value * up) >> down, 1U); }; const auto [samples_x, samples_y] = VideoCommon::SamplesLog2(info.num_samples); const bool is_2d = info.type == ImageType::e2D; return VkImageCreateInfo{ @@ -145,8 +146,8 @@ constexpr VkBorderColor ConvertBorderColor(const std::array& color) { .imageType = ConvertImageType(info.type), .format = format_info.format, .extent{ - .width = ((info.size.width * up) >> down) >> samples_x, - .height = (is_2d ? ((info.size.height * up) >> down) : info.size.height) >> samples_y, + .width = scale_up(info.size.width) >> samples_x, + .height = (is_2d ? scale_up(info.size.height) : info.size.height) >> samples_y, .depth = info.size.depth, }, .mipLevels = static_cast(info.resources.levels), @@ -1078,12 +1079,35 @@ bool Image::ScaleUp(bool save_as_backup) { MemoryCommit new_commit( runtime->memory_allocator.Commit(rescaled_image, MemoryUsage::DeviceLocal)); + SCOPE_EXIT({ + if (save_as_backup) { + backup_image = std::move(image); + backup_commit = std::move(commit); + has_backup = true; + } else { + runtime->prescaled_images.Push(std::move(image)); + runtime->prescaled_commits.Push(std::move(commit)); + } + image = std::move(rescaled_image); + commit = std::move(new_commit); + }); + + const PixelFormat format = StorageFormat(info.format); + const auto format_info = + MaxwellToVK::SurfaceFormat(runtime->device, FormatType::Optimal, false, format); + const auto similar = runtime->device.GetSupportedFormat( + format_info.format, (VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT), + FormatType::Optimal); + + if (similar != format_info.format) { + return true; + } if (aspect_mask == 0) { aspect_mask = ImageAspectMask(info.format); } const auto scale_up = [&](u32 value) { - return (value * resolution.up_scale) >> resolution.down_shift; + return std::max((value * resolution.up_scale) >> resolution.down_shift, 1U); }; const bool is_2d = info.type == ImageType::e2D; @@ -1130,16 +1154,6 @@ bool Image::ScaleUp(bool save_as_backup) { vkRegions.push_back(blit); } BlitScale(*scheduler, *image, *rescaled_image, vkRegions, aspect_mask); - if (save_as_backup) { - backup_image = std::move(image); - backup_commit = std::move(commit); - has_backup = true; - } else { - runtime->prescaled_images.Push(std::move(image)); - runtime->prescaled_commits.Push(std::move(commit)); - } - image = std::move(rescaled_image); - commit = std::move(new_commit); return true; } diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 4e5031acc..df697cdeb 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -929,8 +929,8 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented"); } else { const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value(); - const auto copies = MakeShrinkImageCopies(new_info, overlap.info, base); - runtime.CopyImage(new_image, overlap, copies); + auto copies = MakeShrinkImageCopies(new_info, overlap.info, base); + runtime.CopyImage(new_image, overlap, std::move(copies)); } if (True(overlap.flags & ImageFlagBits::Tracked)) { UntrackImage(overlap, overlap_id); @@ -1569,9 +1569,33 @@ void TextureCache

::PrepareImageView(ImageViewId image_view_id, bool is_modifi } template -void TextureCache

::CopyImage(ImageId dst_id, ImageId src_id, std::span copies) { +void TextureCache

::CopyImage(ImageId dst_id, ImageId src_id, std::vector copies) { Image& dst = slot_images[dst_id]; Image& src = slot_images[src_id]; + const bool is_rescaled = True(src.flags & ImageFlagBits::Rescaled); + if (is_rescaled) { + ASSERT(True(dst.flags & ImageFlagBits::Rescaled)); + const bool both_2d{src.info.type == ImageType::e2D && dst.info.type == ImageType::e2D}; + const auto& resolution = Settings::values.resolution_info; + const auto scale_up = [&](u32 value) -> u32 { + if (value == 0) { + return 0U; + } + return std::max((value * resolution.up_scale) >> resolution.down_shift, 1U); + }; + for (auto& copy : copies) { + copy.src_offset.x = scale_up(copy.src_offset.x); + + copy.dst_offset.x = scale_up(copy.dst_offset.x); + + copy.extent.width = scale_up(copy.extent.width); + if (both_2d) { + copy.src_offset.y = scale_up(copy.src_offset.y); + copy.dst_offset.y = scale_up(copy.dst_offset.y); + copy.extent.height = scale_up(copy.extent.height); + } + } + } const auto dst_format_type = GetFormatType(dst.info.format); const auto src_format_type = GetFormatType(src.info.format); if (src_format_type == dst_format_type) { @@ -1639,10 +1663,21 @@ std::pair TextureCache

::RenderTargetFromImage( ImageId image_id, const ImageViewInfo& view_info) { const ImageViewId view_id = FindOrEmplaceImageView(image_id, view_info); const ImageBase& image = slot_images[image_id]; + const bool is_rescaled = True(image.flags & ImageFlagBits::Rescaled); const bool is_color = GetFormatType(image.info.format) == SurfaceType::ColorTexture; const ImageViewId color_view_id = is_color ? view_id : ImageViewId{}; const ImageViewId depth_view_id = is_color ? ImageViewId{} : view_id; - const Extent3D extent = MipSize(image.info.size, view_info.range.base.level); + Extent3D extent = MipSize(image.info.size, view_info.range.base.level); + if (is_rescaled) { + const auto& resolution = Settings::values.resolution_info; + const auto scale_up = [&](u32 value) { + return std::max((value * resolution.up_scale) >> resolution.down_shift, 1U); + }; + extent.width = scale_up(extent.width); + if (image.info.type == ImageType::e2D) { + extent.height = scale_up(extent.height); + } + } const u32 num_samples = image.info.num_samples; const auto [samples_x, samples_y] = SamplesLog2(num_samples); const FramebufferId framebuffer_id = GetFramebufferId(RenderTargets{ diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 1f51fcee8..deddf0d30 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -316,7 +316,7 @@ private: void PrepareImageView(ImageViewId image_view_id, bool is_modification, bool invalidate); /// Execute copies from one image to the other, even if they are incompatible - void CopyImage(ImageId dst_id, ImageId src_id, std::span copies); + void CopyImage(ImageId dst_id, ImageId src_id, std::vector copies); /// Bind an image view as render target, downloading resources preemtively if needed void BindRenderTarget(ImageViewId* old_id, ImageViewId new_id); -- cgit v1.2.3 From de66a69ed4b556ad96f38570ea0a31cb2a1870f1 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 21 Jul 2021 00:02:35 -0400 Subject: renderer_gl: Resolution scaling fixes --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 23 +++- .../renderer_opengl/gl_texture_cache.cpp | 136 ++++++++++++--------- src/video_core/renderer_opengl/gl_texture_cache.h | 9 +- 3 files changed, 107 insertions(+), 61 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index a6d9f7c43..b91e7edf8 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -577,6 +577,14 @@ void RasterizerOpenGL::SyncViewport() { const bool force = flags[Dirty::ViewportTransform]; flags[Dirty::ViewportTransform] = false; + const auto& resolution = Settings::values.resolution_info; + const auto scale_up = [&](u32 value) -> u32 { + if (value == 0) { + return 0U; + } + const u32 converted_value = (value * resolution.up_scale) >> resolution.down_shift; + return std::max(converted_value, 1U); + }; for (std::size_t i = 0; i < Maxwell::NumViewports; ++i) { if (!force && !flags[Dirty::Viewport0 + i]) { continue; @@ -585,8 +593,8 @@ void RasterizerOpenGL::SyncViewport() { const auto& src = regs.viewport_transform[i]; const Common::Rectangle rect{src.GetRect()}; - glViewportIndexedf(static_cast(i), rect.left, rect.bottom, rect.GetWidth(), - rect.GetHeight()); + glViewportIndexedf(static_cast(i), rect.left, rect.bottom, + scale_up(rect.GetWidth()), scale_up(rect.GetHeight())); const GLdouble reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne; const GLdouble near_depth = src.translate_z - src.scale_z * reduce_z; @@ -909,6 +917,15 @@ void RasterizerOpenGL::SyncScissorTest() { flags[Dirty::Scissors] = false; const auto& regs = maxwell3d.regs; + + const auto& resolution = Settings::values.resolution_info; + const auto scale_up = [&](u32 value) -> u32 { + if (value == 0) { + return 0U; + } + const u32 converted_value = (value * resolution.up_scale) >> resolution.down_shift; + return std::max(converted_value, 1U); + }; for (std::size_t index = 0; index < Maxwell::NumViewports; ++index) { if (!flags[Dirty::Scissor0 + index]) { continue; @@ -919,7 +936,7 @@ void RasterizerOpenGL::SyncScissorTest() { if (src.enable) { glEnablei(GL_SCISSOR_TEST, static_cast(index)); glScissorIndexed(static_cast(index), src.min_x, src.min_y, - src.max_x - src.min_x, src.max_y - src.min_y); + scale_up(src.max_x - src.min_x), scale_up(src.max_y - src.min_y)); } else { glDisablei(GL_SCISSOR_TEST, static_cast(index)); } diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 2d9f770cd..ecde5b600 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -316,6 +316,52 @@ void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) { } } +OGLTexture MakeImage(const VideoCommon::ImageInfo& info, GLenum gl_internal_format) { + const GLenum target = ImageTarget(info); + const GLsizei width = info.size.width; + const GLsizei height = info.size.height; + const GLsizei depth = info.size.depth; + const int max_host_mip_levels = std::bit_width(info.size.width); + const GLsizei num_levels = std::min(info.resources.levels, max_host_mip_levels); + const GLsizei num_layers = info.resources.layers; + const GLsizei num_samples = info.num_samples; + + GLuint handle = 0; + OGLTexture texture; + if (target != GL_TEXTURE_BUFFER) { + texture.Create(target); + handle = texture.handle; + } + switch (target) { + case GL_TEXTURE_1D_ARRAY: + glTextureStorage2D(handle, num_levels, gl_internal_format, width, num_layers); + break; + case GL_TEXTURE_2D_ARRAY: + glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, num_layers); + break; + case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: { + // TODO: Where should 'fixedsamplelocations' come from? + const auto [samples_x, samples_y] = SamplesLog2(info.num_samples); + glTextureStorage3DMultisample(handle, num_samples, gl_internal_format, width >> samples_x, + height >> samples_y, num_layers, GL_FALSE); + break; + } + case GL_TEXTURE_RECTANGLE: + glTextureStorage2D(handle, num_levels, gl_internal_format, width, height); + break; + case GL_TEXTURE_3D: + glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, depth); + break; + case GL_TEXTURE_BUFFER: + UNREACHABLE(); + break; + default: + UNREACHABLE_MSG("Invalid target=0x{:x}", target); + break; + } + return texture; +} + [[nodiscard]] bool IsPixelFormatBGR(PixelFormat format) { switch (format) { case PixelFormat::B5G6R5_UNORM: @@ -430,6 +476,11 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& TextureCacheRuntime::~TextureCacheRuntime() = default; +void TextureCacheRuntime::Init() { + resolution = Settings::values.resolution_info; + is_rescaling_on = resolution.up_scale != 1 || resolution.down_shift != 0; +} + void TextureCacheRuntime::Finish() { glFinish(); } @@ -605,13 +656,13 @@ std::optional TextureCacheRuntime::StagingBuffers::FindBuffer(size_t req return found; } -Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_, GPUVAddr gpu_addr_, +Image::Image(TextureCacheRuntime& runtime_, const VideoCommon::ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_) - : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_) { - if (CanBeAccelerated(runtime, info)) { + : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), runtime{&runtime_} { + if (CanBeAccelerated(*runtime, info)) { flags |= ImageFlagBits::AcceleratedUpload; } - if (IsConverted(runtime.device, info.format, info.type)) { + if (IsConverted(runtime->device, info.format, info.type)) { flags |= ImageFlagBits::Converted; gl_internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8; gl_format = GL_RGBA; @@ -622,51 +673,11 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_, gl_format = tuple.format; gl_type = tuple.type; } - const GLenum target = ImageTarget(info); - const GLsizei width = info.size.width; - const GLsizei height = info.size.height; - const GLsizei depth = info.size.depth; - const int max_host_mip_levels = std::bit_width(info.size.width); - const GLsizei num_levels = std::min(info.resources.levels, max_host_mip_levels); - const GLsizei num_layers = info.resources.layers; - const GLsizei num_samples = info.num_samples; - - GLuint handle = 0; - if (target != GL_TEXTURE_BUFFER) { - texture.Create(target); - handle = texture.handle; - } - switch (target) { - case GL_TEXTURE_1D_ARRAY: - glTextureStorage2D(handle, num_levels, gl_internal_format, width, num_layers); - break; - case GL_TEXTURE_2D_ARRAY: - glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, num_layers); - break; - case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: { - // TODO: Where should 'fixedsamplelocations' come from? - const auto [samples_x, samples_y] = SamplesLog2(info.num_samples); - glTextureStorage3DMultisample(handle, num_samples, gl_internal_format, width >> samples_x, - height >> samples_y, num_layers, GL_FALSE); - break; - } - case GL_TEXTURE_RECTANGLE: - glTextureStorage2D(handle, num_levels, gl_internal_format, width, height); - break; - case GL_TEXTURE_3D: - glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, depth); - break; - case GL_TEXTURE_BUFFER: - UNREACHABLE(); - break; - default: - UNREACHABLE_MSG("Invalid target=0x{:x}", target); - break; - } - if (runtime.device.HasDebuggingToolAttached()) { + texture = MakeImage(info, gl_internal_format); + if (runtime->device.HasDebuggingToolAttached()) { const std::string name = VideoCommon::Name(*this); - glObjectLabel(target == GL_TEXTURE_BUFFER ? GL_BUFFER : GL_TEXTURE, handle, - static_cast(name.size()), name.data()); + glObjectLabel(ImageTarget(info) == GL_TEXTURE_BUFFER ? GL_BUFFER : GL_TEXTURE, + texture.handle, static_cast(name.size()), name.data()); } } @@ -855,7 +866,7 @@ void Image::CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t b } } -void Image::Scale() { +void Image::Scale(u32 up, u32 down) { // TODO: Pass scaling factor? if (gl_format == 0 || gl_type == 0) { // compressed textures @@ -902,12 +913,22 @@ void Image::Scale() { glBindFramebuffer(GL_READ_FRAMEBUFFER, fbo_handle); glNamedFramebufferTexture(fbo_handle, attachment, texture.handle, 0); - const size_t scaled_width = info.size.width; - const size_t scaled_height = info.size.height * 2; - glBlitNamedFramebuffer(fbo_handle, fbo_handle, 0, 0, info.size.width, info.size.height, 0, 0, + const auto scale_up = [&](u32 value) { return std::max((value * up) >> down, 1U); }; + const u32 scaled_width = scale_up(info.size.width); + const u32 scaled_height = scale_up(info.size.height); + const u32 original_width = info.size.width; + const u32 original_height = info.size.height; + + auto scaled_info = info; + scaled_info.size.width = scaled_width; + scaled_info.size.height = scaled_height; + auto scaled_texture = MakeImage(scaled_info, gl_internal_format); + + glBlitNamedFramebuffer(fbo_handle, fbo_handle, 0, 0, original_width, original_height, 0, 0, scaled_width, scaled_height, mask, filter); - // TODO: resize texture? - glCopyTextureSubImage3D(texture.handle, 0, 0, 0, 0, 0, 0, scaled_width, scaled_height / 2); + glCopyTextureSubImage3D(scaled_texture.handle, 0, 0, 0, 0, 0, 0, scaled_width, scaled_height); + texture = std::move(scaled_texture); + // Restore previous framebuffers glBindFramebuffer(GL_DRAW_FRAMEBUFFER, prev_draw_fbo); glBindFramebuffer(GL_READ_FRAMEBUFFER, prev_read_fbo); @@ -918,7 +939,8 @@ bool Image::ScaleUp() { return false; } flags |= ImageFlagBits::Rescaled; - //Scale(); + const auto& resolution = runtime->resolution; + Scale(resolution.up_scale, resolution.down_shift); return true; } @@ -927,7 +949,9 @@ bool Image::ScaleDown() { return false; } flags &= ~ImageFlagBits::Rescaled; - //Scale(); + UNIMPLEMENTED(); + // const auto& resolution = runtime->resolution; + // Scale(); return true; } diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 324a0f1cb..77ca14132 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -9,6 +9,7 @@ #include +#include "common/settings.h" #include "shader_recompiler/shader_info.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/util_shaders.h" @@ -72,7 +73,7 @@ public: StateTracker& state_tracker); ~TextureCacheRuntime(); - void Init() {} + void Init(); void Finish(); @@ -153,6 +154,9 @@ private: OGLTextureView null_image_view_cube; std::array null_image_views{}; + + Settings::ResolutionScalingInfo resolution; + bool is_rescaling_on{}; }; class Image : public VideoCommon::ImageBase { @@ -198,13 +202,14 @@ private: void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset); - void Scale(); + void Scale(u32 up, u32 down); OGLTexture texture; OGLTextureView store_view; GLenum gl_internal_format = GL_NONE; GLenum gl_format = GL_NONE; GLenum gl_type = GL_NONE; + TextureCacheRuntime* runtime; }; class ImageView : public VideoCommon::ImageViewBase { -- cgit v1.2.3 From 973f8f1d08995e4b3e3e849b6c5fa77e5ce824a7 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 21 Jul 2021 02:55:55 -0300 Subject: Fix blits --- src/video_core/renderer_vulkan/vk_texture_cache.cpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 54236e87f..b061ea08b 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -1111,9 +1111,10 @@ bool Image::ScaleUp(bool save_as_backup) { }; const bool is_2d = info.type == ImageType::e2D; - boost::container::small_vector vkRegions(info.resources.levels); + boost::container::small_vector regions; + regions.reserve(info.resources.levels); for (s32 level = 0; level < info.resources.levels; level++) { - VkImageBlit blit{ + regions.push_back({ .srcSubresource{ .aspectMask = aspect_mask, .mipLevel = u32(level), @@ -1150,10 +1151,9 @@ bool Image::ScaleUp(bool save_as_backup) { .z = 1, }, }, - }; - vkRegions.push_back(blit); + }); } - BlitScale(*scheduler, *image, *rescaled_image, vkRegions, aspect_mask); + BlitScale(*scheduler, *image, *rescaled_image, regions, aspect_mask); return true; } @@ -1193,9 +1193,10 @@ bool Image::ScaleDown(bool save_as_backup) { } const bool is_2d = info.type == ImageType::e2D; - boost::container::small_vector vkRegions(info.resources.levels); + boost::container::small_vector regions; + regions.reserve(info.resources.levels); for (s32 level = 0; level < info.resources.levels; level++) { - VkImageBlit blit{ + regions.push_back({ .srcSubresource{ .aspectMask = aspect_mask, .mipLevel = u32(level), @@ -1232,10 +1233,9 @@ bool Image::ScaleDown(bool save_as_backup) { .z = 1, }, }, - }; - vkRegions.push_back(blit); + }); } - BlitScale(*scheduler, *image, *downscaled_image, vkRegions, aspect_mask); + BlitScale(*scheduler, *image, *downscaled_image, regions, aspect_mask); if (save_as_backup) { backup_image = std::move(image); backup_commit = std::move(commit); -- cgit v1.2.3 From d464b122d5385105a6f4b0bc34bc1695c706417c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 21 Jul 2021 03:07:18 -0300 Subject: Fix blits with mips --- .../renderer_vulkan/vk_texture_cache.cpp | 28 ++++++++++++---------- 1 file changed, 16 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index b061ea08b..4e05058c9 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -1128,8 +1128,8 @@ bool Image::ScaleUp(bool save_as_backup) { .z = 0, }, { - .x = s32(info.size.width), - .y = s32(info.size.height), + .x = static_cast(info.size.width), + .y = static_cast(info.size.height), .z = 1, }, }, @@ -1146,8 +1146,10 @@ bool Image::ScaleUp(bool save_as_backup) { .z = 0, }, { - .x = s32(scale_up(info.size.width)), - .y = is_2d ? s32(scale_up(info.size.height)) : s32(info.size.height), + .x = std::max(1, static_cast(scale_up(info.size.width)) >> level), + .y = std::max(1, static_cast(is_2d ? scale_up(info.size.height) + : info.size.height) >> + level), .z = 1, }, }, @@ -1199,9 +1201,9 @@ bool Image::ScaleDown(bool save_as_backup) { regions.push_back({ .srcSubresource{ .aspectMask = aspect_mask, - .mipLevel = u32(level), + .mipLevel = static_cast(level), .baseArrayLayer = 0, - .layerCount = u32(info.resources.layers), + .layerCount = static_cast(info.resources.layers), }, .srcOffsets{ { @@ -1210,16 +1212,18 @@ bool Image::ScaleDown(bool save_as_backup) { .z = 0, }, { - .x = s32(scale_up(info.size.width)), - .y = is_2d ? s32(scale_up(info.size.height)) : s32(info.size.height), + .x = std::max(1, static_cast(scale_up(info.size.width)) >> level), + .y = std::max(1, static_cast(is_2d ? scale_up(info.size.height) + : info.size.height) >> + level), .z = 1, }, }, .dstSubresource{ .aspectMask = aspect_mask, - .mipLevel = u32(level), + .mipLevel = static_cast(level), .baseArrayLayer = 0, - .layerCount = u32(info.resources.layers), + .layerCount = static_cast(info.resources.layers), }, .dstOffsets{ { @@ -1228,8 +1232,8 @@ bool Image::ScaleDown(bool save_as_backup) { .z = 0, }, { - .x = s32(info.size.width), - .y = s32(info.size.height), + .x = std::max(1, static_cast(info.size.width) >> level), + .y = std::max(1, static_cast(info.size.height) >> level), .z = 1, }, }, -- cgit v1.2.3 From dfc65cd0a3b259588b47db1d32c827e7fc071aeb Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 21 Jul 2021 10:15:09 +0200 Subject: Texture Cache: Implement Rescaling on Aliases and Blits. --- src/video_core/texture_cache/texture_cache.h | 58 +++++++++++++++++++++++++--- 1 file changed, 53 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index df697cdeb..25fea8240 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -437,8 +437,32 @@ void TextureCache

::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, PrepareImage(src_id, false, false); PrepareImage(dst_id, true, false); - ImageBase& dst_image = slot_images[dst_id]; - const ImageBase& src_image = slot_images[src_id]; + Image& dst_image = slot_images[dst_id]; + const Image& src_image = slot_images[src_id]; + + const bool is_src_rescaled = True(src_image.flags & ImageFlagBits::Rescaled); + bool is_dst_rescaled = True(dst_image.flags & ImageFlagBits::Rescaled); + + if (is_src_rescaled && !is_dst_rescaled) { + if (ImageCanRescale(dst_image)) { + is_dst_rescaled = dst_image.ScaleUp(); + } + } + + const auto& resolution = Settings::values.resolution_info; + const auto scale_up = [&](u32 value) -> u32 { + if (value == 0) { + return 0U; + } + return std::max((value * resolution.up_scale) >> resolution.down_shift, 1U); + }; + + const auto scale_region = [&](Region2D& region) { + region.start.x = scale_up(region.start.x); + region.start.y = scale_up(region.start.y); + region.end.x = scale_up(region.end.x); + region.end.y = scale_up(region.end.y); + }; // TODO: Deduplicate const std::optional src_base = src_image.TryFindBase(src.Address()); @@ -446,20 +470,26 @@ void TextureCache

::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, const ImageViewInfo src_view_info(ImageViewType::e2D, images.src_format, src_range); const auto [src_framebuffer_id, src_view_id] = RenderTargetFromImage(src_id, src_view_info); const auto [src_samples_x, src_samples_y] = SamplesLog2(src_image.info.num_samples); - const Region2D src_region{ + Region2D src_region{ Offset2D{.x = copy.src_x0 >> src_samples_x, .y = copy.src_y0 >> src_samples_y}, Offset2D{.x = copy.src_x1 >> src_samples_x, .y = copy.src_y1 >> src_samples_y}, }; + if (is_src_rescaled) { + scale_region(src_region); + } const std::optional dst_base = dst_image.TryFindBase(dst.Address()); const SubresourceRange dst_range{.base = dst_base.value(), .extent = {1, 1}}; const ImageViewInfo dst_view_info(ImageViewType::e2D, images.dst_format, dst_range); const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info); const auto [dst_samples_x, dst_samples_y] = SamplesLog2(dst_image.info.num_samples); - const Region2D dst_region{ + Region2D dst_region{ Offset2D{.x = copy.dst_x0 >> dst_samples_x, .y = copy.dst_y0 >> dst_samples_y}, Offset2D{.x = copy.dst_x1 >> dst_samples_x, .y = copy.dst_y1 >> dst_samples_y}, }; + if (is_dst_rescaled) { + scale_region(dst_region); + } // Always call this after src_framebuffer_id was queried, as the address might be invalidated. Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id]; @@ -1514,18 +1544,28 @@ void TextureCache

::MarkModification(ImageBase& image) noexcept { template void TextureCache

::SynchronizeAliases(ImageId image_id) { boost::container::small_vector aliased_images; - ImageBase& image = slot_images[image_id]; + Image& image = slot_images[image_id]; + bool any_rescaled = True(image.flags & ImageFlagBits::Rescaled); u64 most_recent_tick = image.modification_tick; for (const AliasedImage& aliased : image.aliased_images) { ImageBase& aliased_image = slot_images[aliased.id]; if (image.modification_tick < aliased_image.modification_tick) { most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick); aliased_images.push_back(&aliased); + any_rescaled |= True(image.flags & ImageFlagBits::Rescaled); } } if (aliased_images.empty()) { return; } + const bool can_rescale = ImageCanRescale(image); + if (any_rescaled) { + if (can_rescale) { + ScaleUp(image); + } else { + ScaleDown(image); + } + } image.modification_tick = most_recent_tick; std::ranges::sort(aliased_images, [this](const AliasedImage* lhs, const AliasedImage* rhs) { const ImageBase& lhs_image = slot_images[lhs->id]; @@ -1533,6 +1573,14 @@ void TextureCache

::SynchronizeAliases(ImageId image_id) { return lhs_image.modification_tick < rhs_image.modification_tick; }); for (const AliasedImage* const aliased : aliased_images) { + if (any_rescaled) { + Image& aliased_image = slot_images[aliased->id]; + if (can_rescale) { + ScaleUp(aliased_image); + } else { + ScaleDown(aliased_image); + } + } CopyImage(image_id, aliased->id, aliased->copies); } } -- cgit v1.2.3 From 0a6c895af76503e7658f8660cc1af097e5d9c11f Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 21 Jul 2021 20:50:15 -0400 Subject: gl_texture_cache: Rescale fixes for multi-layered textures --- .../renderer_opengl/gl_texture_cache.cpp | 47 ++++++++++++++-------- src/video_core/renderer_opengl/gl_texture_cache.h | 1 + 2 files changed, 32 insertions(+), 16 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index ecde5b600..53b5c0947 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -479,6 +479,9 @@ TextureCacheRuntime::~TextureCacheRuntime() = default; void TextureCacheRuntime::Init() { resolution = Settings::values.resolution_info; is_rescaling_on = resolution.up_scale != 1 || resolution.down_shift != 0; + if (is_rescaling_on) { + rescale_fbo.Create(); + } } void TextureCacheRuntime::Finish() { @@ -867,8 +870,10 @@ void Image::CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t b } void Image::Scale(u32 up, u32 down) { - // TODO: Pass scaling factor? - if (gl_format == 0 || gl_type == 0) { + if (!runtime->is_rescaling_on) { + return; + } + if (gl_format == 0 && gl_type == 0) { // compressed textures return; } @@ -876,9 +881,7 @@ void Image::Scale(u32 up, u32 down) { UNIMPLEMENTED(); return; } - GLint prev_draw_fbo; GLint prev_read_fbo; - glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &prev_draw_fbo); glGetIntegerv(GL_READ_FRAMEBUFFER_BINDING, &prev_read_fbo); const GLenum attachment = [this] { switch (GetFormatType(info.format)) { @@ -907,15 +910,10 @@ void Image::Scale(u32 up, u32 down) { } }(); const GLenum filter = (mask & GL_COLOR_BUFFER_BIT) != 0 ? GL_LINEAR : GL_NEAREST; - GLuint fbo_handle; - glGenFramebuffers(1, &fbo_handle); - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, fbo_handle); - glBindFramebuffer(GL_READ_FRAMEBUFFER, fbo_handle); - glNamedFramebufferTexture(fbo_handle, attachment, texture.handle, 0); - const auto scale_up = [&](u32 value) { return std::max((value * up) >> down, 1U); }; + const bool is_2d = info.type == ImageType::e2D; const u32 scaled_width = scale_up(info.size.width); - const u32 scaled_height = scale_up(info.size.height); + const u32 scaled_height = is_2d ? scale_up(info.size.height) : info.size.height; const u32 original_width = info.size.width; const u32 original_height = info.size.height; @@ -923,14 +921,31 @@ void Image::Scale(u32 up, u32 down) { scaled_info.size.width = scaled_width; scaled_info.size.height = scaled_height; auto scaled_texture = MakeImage(scaled_info, gl_internal_format); - - glBlitNamedFramebuffer(fbo_handle, fbo_handle, 0, 0, original_width, original_height, 0, 0, - scaled_width, scaled_height, mask, filter); - glCopyTextureSubImage3D(scaled_texture.handle, 0, 0, 0, 0, 0, 0, scaled_width, scaled_height); + const auto& blit_fbo = runtime->rescale_fbo; + for (s32 level = 0; level < info.resources.levels; ++level) { + const u32 level_width = scaled_width >> level; + const u32 level_height = scaled_height >> level; + glBindFramebuffer(GL_READ_FRAMEBUFFER, blit_fbo.handle); + glNamedFramebufferTexture(blit_fbo.handle, attachment, texture.handle, level); + glBlitNamedFramebuffer(blit_fbo.handle, blit_fbo.handle, 0, 0, original_width, + original_height, 0, 0, level_width, level_height, mask, filter); + switch (info.type) { + case ImageType::e1D: + glCopyTextureSubImage2D(scaled_texture.handle, level, 0, 0, 0, 0, level_width, + level_height); + break; + case ImageType::e2D: + glCopyTextureSubImage3D(scaled_texture.handle, level, 0, 0, 0, 0, 0, level_width, + level_height); + break; + case ImageType::e3D: + default: + UNREACHABLE(); + } + } texture = std::move(scaled_texture); // Restore previous framebuffers - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, prev_draw_fbo); glBindFramebuffer(GL_READ_FRAMEBUFFER, prev_read_fbo); } diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 77ca14132..03de50ad5 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -155,6 +155,7 @@ private: std::array null_image_views{}; + OGLFramebuffer rescale_fbo; Settings::ResolutionScalingInfo resolution; bool is_rescaling_on{}; }; -- cgit v1.2.3 From fddf372c689d466372ac94b6920beb883c7740e4 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 21 Jul 2021 21:23:00 -0400 Subject: gl_texture_cache: Implement ScaleDown --- .../renderer_opengl/gl_texture_cache.cpp | 60 +++++++++++++--------- src/video_core/renderer_opengl/gl_texture_cache.h | 2 +- 2 files changed, 36 insertions(+), 26 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 53b5c0947..34f74e37d 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -869,17 +869,17 @@ void Image::CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t b } } -void Image::Scale(u32 up, u32 down) { +bool Image::Scale(bool scale_src, bool scale_dst) { if (!runtime->is_rescaling_on) { - return; + return false; } if (gl_format == 0 && gl_type == 0) { // compressed textures - return; + return false; } if (info.type == ImageType::Linear) { UNIMPLEMENTED(); - return; + return false; } GLint prev_read_fbo; glGetIntegerv(GL_READ_FRAMEBUFFER_BINDING, &prev_read_fbo); @@ -910,43 +910,58 @@ void Image::Scale(u32 up, u32 down) { } }(); const GLenum filter = (mask & GL_COLOR_BUFFER_BIT) != 0 ? GL_LINEAR : GL_NEAREST; - const auto scale_up = [&](u32 value) { return std::max((value * up) >> down, 1U); }; const bool is_2d = info.type == ImageType::e2D; + const auto& resolution = runtime->resolution; + const u32 up = resolution.up_scale; + const u32 down = resolution.down_shift; + + const auto scale_up = [&](u32 value) { return std::max((value * up) >> down, 1U); }; const u32 scaled_width = scale_up(info.size.width); const u32 scaled_height = is_2d ? scale_up(info.size.height) : info.size.height; const u32 original_width = info.size.width; const u32 original_height = info.size.height; - auto scaled_info = info; - scaled_info.size.width = scaled_width; - scaled_info.size.height = scaled_height; - auto scaled_texture = MakeImage(scaled_info, gl_internal_format); + const u32 src_width = scale_src ? scaled_width : original_width; + const u32 src_height = scale_src ? scaled_height : original_height; + const u32 dst_width = scale_dst ? scaled_width : original_width; + const u32 dst_height = scale_dst ? scaled_height : original_height; + + auto dst_info = info; + dst_info.size.width = dst_width; + dst_info.size.height = dst_height; + auto dst_texture = MakeImage(dst_info, gl_internal_format); + const auto& blit_fbo = runtime->rescale_fbo; for (s32 level = 0; level < info.resources.levels; ++level) { - const u32 level_width = scaled_width >> level; - const u32 level_height = scaled_height >> level; + const u32 src_level_width = std::max(1u, src_width >> level); + const u32 src_level_height = std::max(1u, src_height >> level); + const u32 dst_level_width = std::max(1u, dst_width >> level); + const u32 dst_level_height = std::max(1u, dst_height >> level); + glBindFramebuffer(GL_READ_FRAMEBUFFER, blit_fbo.handle); glNamedFramebufferTexture(blit_fbo.handle, attachment, texture.handle, level); - glBlitNamedFramebuffer(blit_fbo.handle, blit_fbo.handle, 0, 0, original_width, - original_height, 0, 0, level_width, level_height, mask, filter); + glBlitNamedFramebuffer(blit_fbo.handle, blit_fbo.handle, 0, 0, src_level_width, + src_level_height, 0, 0, dst_level_width, dst_level_height, mask, + filter); switch (info.type) { case ImageType::e1D: - glCopyTextureSubImage2D(scaled_texture.handle, level, 0, 0, 0, 0, level_width, - level_height); + glCopyTextureSubImage2D(dst_texture.handle, level, 0, 0, 0, 0, dst_level_width, + dst_level_height); break; case ImageType::e2D: - glCopyTextureSubImage3D(scaled_texture.handle, level, 0, 0, 0, 0, 0, level_width, - level_height); + glCopyTextureSubImage3D(dst_texture.handle, level, 0, 0, 0, 0, 0, dst_level_width, + dst_level_height); break; case ImageType::e3D: default: UNREACHABLE(); } } - texture = std::move(scaled_texture); + texture = std::move(dst_texture); // Restore previous framebuffers glBindFramebuffer(GL_READ_FRAMEBUFFER, prev_read_fbo); + return true; } bool Image::ScaleUp() { @@ -954,9 +969,7 @@ bool Image::ScaleUp() { return false; } flags |= ImageFlagBits::Rescaled; - const auto& resolution = runtime->resolution; - Scale(resolution.up_scale, resolution.down_shift); - return true; + return Scale(false, true); } bool Image::ScaleDown() { @@ -964,10 +977,7 @@ bool Image::ScaleDown() { return false; } flags &= ~ImageFlagBits::Rescaled; - UNIMPLEMENTED(); - // const auto& resolution = runtime->resolution; - // Scale(); - return true; + return Scale(true, false); } ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info, diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 03de50ad5..f2e48b4c7 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -203,7 +203,7 @@ private: void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset); - void Scale(u32 up, u32 down); + bool Scale(bool scale_src, bool scale_dst); OGLTexture texture; OGLTextureView store_view; -- cgit v1.2.3 From fb924ea85c22353c1a6f108d38372ab14355695b Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 21 Jul 2021 22:24:33 -0300 Subject: shader: Add resolution down factor opcode --- src/shader_recompiler/backend/glasm/emit_glasm_instructions.h | 1 + src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp | 5 +++++ src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp | 5 +++++ src/shader_recompiler/backend/glsl/emit_glsl_instructions.h | 1 + src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp | 5 +++++ src/shader_recompiler/backend/spirv/emit_spirv_instructions.h | 1 + src/shader_recompiler/frontend/ir/ir_emitter.cpp | 4 ++++ src/shader_recompiler/frontend/ir/ir_emitter.h | 2 ++ src/shader_recompiler/frontend/ir/opcodes.inc | 1 + 9 files changed, 25 insertions(+) (limited to 'src') diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index 12afda43b..cb7232704 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -72,6 +72,7 @@ void EmitInvocationId(EmitContext& ctx, IR::Inst& inst); void EmitSampleId(EmitContext& ctx, IR::Inst& inst); void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst); void EmitYDirection(EmitContext& ctx, IR::Inst& inst); +void EmitResolutionDownFactor(EmitContext& ctx, IR::Inst& inst); void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, ScalarU32 word_offset); void EmitWriteLocal(EmitContext& ctx, ScalarU32 word_offset, ScalarU32 value); void EmitUndefU1(EmitContext& ctx, IR::Inst& inst); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp index e537f6073..807494063 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp @@ -210,6 +210,11 @@ void EmitYDirection(EmitContext& ctx, IR::Inst& inst) { ctx.Add("MOV.F {}.x,y_direction[0].w;", inst); } +void EmitResolutionDownFactor(EmitContext& ctx, IR::Inst& inst) { + UNIMPLEMENTED(); + ctx.Add("MOV.F {}.x,1;", inst); +} + void EmitUndefU1(EmitContext& ctx, IR::Inst& inst) { ctx.Add("MOV.S {}.x,0;", inst); } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 170db269a..f4ed090e3 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -445,6 +445,11 @@ void EmitYDirection(EmitContext& ctx, IR::Inst& inst) { ctx.AddF32("{}=gl_FrontMaterial.ambient.a;", inst); } +void EmitResolutionDownFactor(EmitContext& ctx, IR::Inst& inst) { + UNIMPLEMENTED(); + ctx.AddF32("{}=1.0f;", inst); +} + void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, std::string_view word_offset) { ctx.AddU32("{}=lmem[{}];", inst, word_offset); } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h index 5936d086f..6cae0b84a 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h @@ -85,6 +85,7 @@ void EmitInvocationId(EmitContext& ctx, IR::Inst& inst); void EmitSampleId(EmitContext& ctx, IR::Inst& inst); void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst); void EmitYDirection(EmitContext& ctx, IR::Inst& inst); +void EmitResolutionDownFactor(EmitContext& ctx, IR::Inst& inst); void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, std::string_view word_offset); void EmitWriteLocal(EmitContext& ctx, std::string_view word_offset, std::string_view value); void EmitUndefU1(EmitContext& ctx, IR::Inst& inst); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index d3a93d5f4..43f440dfb 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -526,6 +526,11 @@ Id EmitYDirection(EmitContext& ctx) { return ctx.Const(ctx.runtime_info.y_negate ? -1.0f : 1.0f); } +Id EmitResolutionDownFactor(EmitContext& ctx) { + UNIMPLEMENTED(); + return ctx.Const(1.0f); +} + Id EmitLoadLocal(EmitContext& ctx, Id word_offset) { const Id pointer{ctx.OpAccessChain(ctx.private_u32, ctx.local_memory, word_offset)}; return ctx.OpLoad(ctx.U32[1], pointer); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index c9db1c164..3d90b2286 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -75,6 +75,7 @@ Id EmitInvocationId(EmitContext& ctx); Id EmitSampleId(EmitContext& ctx); Id EmitIsHelperInvocation(EmitContext& ctx); Id EmitYDirection(EmitContext& ctx); +Id EmitResolutionDownFactor(EmitContext& ctx); Id EmitLoadLocal(EmitContext& ctx, Id word_offset); void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value); Id EmitUndefU1(EmitContext& ctx); diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 13159a68d..9ae5da2a1 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -375,6 +375,10 @@ F32 IREmitter::YDirection() { return Inst(Opcode::YDirection); } +F32 IREmitter::ResolutionDownFactor() { + return Inst(Opcode::ResolutionDownFactor); +} + U32 IREmitter::LaneId() { return Inst(Opcode::LaneId); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 1b89ca5a0..0c664d2fe 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -102,6 +102,8 @@ public: [[nodiscard]] U1 IsHelperInvocation(); [[nodiscard]] F32 YDirection(); + [[nodiscard]] F32 ResolutionDownFactor(); + [[nodiscard]] U32 LaneId(); [[nodiscard]] U32 LoadGlobalU8(const U64& address); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index d91098c80..72751c5a0 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -62,6 +62,7 @@ OPCODE(InvocationId, U32, OPCODE(SampleId, U32, ) OPCODE(IsHelperInvocation, U1, ) OPCODE(YDirection, F32, ) +OPCODE(ResolutionDownFactor, F32, ) // Undefined OPCODE(UndefU1, U1, ) -- cgit v1.2.3 From 1672e9ba092f6bc268ece7619c4bae793c00c580 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 21 Jul 2021 22:25:34 -0300 Subject: shader: Fix resolution scaling pass --- src/shader_recompiler/CMakeLists.txt | 1 + .../ir_opt/collect_shader_info_pass.cpp | 3 ++ src/shader_recompiler/ir_opt/passes.h | 1 + src/shader_recompiler/ir_opt/rescaling_pass.cpp | 60 ++++++++++------------ src/shader_recompiler/shader_info.h | 2 +- 5 files changed, 32 insertions(+), 35 deletions(-) (limited to 'src') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index b5b7e5e83..bc3df80c8 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -221,6 +221,7 @@ add_library(shader_recompiler STATIC ir_opt/lower_fp16_to_fp32.cpp ir_opt/lower_int64_to_int32.cpp ir_opt/passes.h + ir_opt/rescaling_pass.cpp ir_opt/ssa_rewrite_pass.cpp ir_opt/texture_pass.cpp ir_opt/verification_pass.cpp diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index f69e1c9cc..ef918f4d4 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -430,6 +430,9 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::IsHelperInvocation: info.uses_is_helper_invocation = true; break; + case IR::Opcode::ResolutionDownFactor: + info.uses_rescaling_uniform = true; + break; case IR::Opcode::LaneId: info.uses_subgroup_invocation_id = true; break; diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 2f89b1ea0..f877c7ba0 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h @@ -19,6 +19,7 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program); void IdentityRemovalPass(IR::Program& program); void LowerFp16ToFp32(IR::Program& program); void LowerInt64ToInt32(IR::Program& program); +void RescalingPass(IR::Program& program); void SsaRewritePass(IR::Program& program); void TexturePass(Environment& env, IR::Program& program); void VerificationPass(const IR::Program& program); diff --git a/src/shader_recompiler/ir_opt/rescaling_pass.cpp b/src/shader_recompiler/ir_opt/rescaling_pass.cpp index d3ae3f159..293593c78 100644 --- a/src/shader_recompiler/ir_opt/rescaling_pass.cpp +++ b/src/shader_recompiler/ir_opt/rescaling_pass.cpp @@ -3,7 +3,9 @@ // Refer to the license.txt file included. #include "common/alignment.h" +#include "common/settings.h" #include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" #include "shader_recompiler/frontend/ir/modifiers.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/frontend/ir/value.h" @@ -12,59 +14,49 @@ namespace Shader::Optimization { namespace { - -void PatchFragCoord(IR::Inst& inst) { +void PatchFragCoord(IR::Block& block, IR::Inst& inst) { IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; - const IR::F32 inv_resolution_factor = IR::F32{Settings::values.resolution_info.down_factor}; - const IR::F32 new_get_attribute = ir.GetAttribute(inst.Arg(0).Attribute()); - const IR::F32 mul = ir.FMul(new_get_attribute, inv_resolution_factor); - const IR::U1 should_rescale = IR::U1{true}; - const IR::F32 selection = ir.Select(should_rescale, mul, new_get_attribute); - inst.ReplaceUsesWith(selection); + const IR::F32 down_factor{ir.ResolutionDownFactor()}; + const IR::F32 frag_coord{&inst}; + const IR::F32 downscaled_frag_coord{ir.FPMul(frag_coord, down_factor)}; + inst.ReplaceUsesWith(downscaled_frag_coord); } -void Visit(Info& info, IR::Inst& inst) { - info.requires_rescaling_uniform = false; +void Visit(const IR::Program& program, IR::Block& block, IR::Inst& inst) { + const bool is_fragment_shader{program.stage == Stage::Fragment}; switch (inst.GetOpcode()) { case IR::Opcode::GetAttribute: { - conast auto attrib = inst.Arg(0).Attribute(); - const bool is_frag = - attrib == IR::Attribute::PositionX || attrib == IR::Attribute::PositionY; - const bool must_path = is_frag && program.stage == Stage::Fragment; - if (must_path) { - PatchFragCoord(inst); - info.requires_rescaling_uniform = true; + const IR::Attribute attr{inst.Arg(0).Attribute()}; + switch (attr) { + case IR::Attribute::PositionX: + case IR::Attribute::PositionY: + if (is_fragment_shader) { + PatchFragCoord(block, inst); + } + break; + default: + break; } break; } - case IR::Opcode::ImageQueryDimensions: { - info.requires_rescaling_uniform |= true; + case IR::Opcode::ImageQueryDimensions: break; - } - case IR::Opcode::ImageFetch: { - info.requires_rescaling_uniform |= true; + case IR::Opcode::ImageFetch: break; - } - case IR::Opcode::ImageRead: { - info.requires_rescaling_uniform |= true; + case IR::Opcode::ImageRead: break; - } - case IR::Opcode::ImageWrite: { - info.requires_rescaling_uniform |= true; + case IR::Opcode::ImageWrite: break; - } default: break; } } +} // Anonymous namespace -} // namespace - -void RescalingPass(Environment& env, IR::Program& program) { - Info& info{program.info}; +void RescalingPass(IR::Program& program) { for (IR::Block* const block : program.post_order_blocks) { for (IR::Inst& inst : block->Instructions()) { - Visit(info, inst); + Visit(program, *block, inst); } } } diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index e7981a08c..7bac9e2cd 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -172,7 +172,7 @@ struct Info { bool uses_global_memory{}; bool uses_atomic_image_u32{}; bool uses_shadow_lod{}; - bool requires_rescaling_uniform{}; + bool uses_rescaling_uniform{}; IR::Type used_constant_buffer_types{}; IR::Type used_storage_buffer_types{}; -- cgit v1.2.3 From 520c4a44f6dfa78a462517c807d2e99d4775b84e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 21 Jul 2021 22:39:05 -0300 Subject: vk_texture_cache: Fix scaling blit validation errors --- .../renderer_vulkan/vk_texture_cache.cpp | 159 ++++++++++----------- 1 file changed, 78 insertions(+), 81 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 4e05058c9..d95eeafb9 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -593,6 +593,82 @@ struct RangedBarrierRange { UNREACHABLE_MSG("Invalid image format={}", format); return VK_FORMAT_R32_UINT; } + +void BlitScale(VKScheduler& scheduler, VkImage src_image, VkImage dst_image, + boost::container::small_vector&& blit_regions, + VkImageAspectFlags aspect_mask) { + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([dst_image, src_image, aspect_mask, + regions = std::move(blit_regions)](vk::CommandBuffer cmdbuf) { + const VkImageSubresourceRange subresource_range{ + .aspectMask = aspect_mask, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }; + const std::array read_barriers{ + VkImageMemoryBarrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, + .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = src_image, + .subresourceRange = subresource_range, + }, + VkImageMemoryBarrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | + VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .oldLayout = VK_IMAGE_LAYOUT_UNDEFINED, // Discard contents + .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = dst_image, + .subresourceRange = subresource_range, + }, + }; + const std::array write_barriers{ + VkImageMemoryBarrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = 0, + .dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT | VK_ACCESS_MEMORY_READ_BIT, + .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = src_image, + .subresourceRange = subresource_range, + }, + VkImageMemoryBarrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT | VK_ACCESS_MEMORY_READ_BIT, + .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = dst_image, + .subresourceRange = subresource_range, + }, + }; + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + 0, nullptr, nullptr, read_barriers); + cmdbuf.BlitImage(src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_image, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, regions, VK_FILTER_NEAREST); + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + 0, nullptr, nullptr, write_barriers); + }); +} } // Anonymous namespace void TextureCacheRuntime::Init() { @@ -983,85 +1059,6 @@ void Image::DownloadMemory(const StagingBufferRef& map, std::span& blit_regions, - VkImageAspectFlags aspect_mask) { - scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([dst_image, src_image, aspect_mask, - regions = std::move(blit_regions)](vk::CommandBuffer cmdbuf) { - const std::array read_barriers{ - VkImageMemoryBarrier{ - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .pNext = nullptr, - .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | - VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | - VK_ACCESS_TRANSFER_WRITE_BIT, - .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, - .oldLayout = VK_IMAGE_LAYOUT_GENERAL, - .newLayout = VK_IMAGE_LAYOUT_GENERAL, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = src_image, - .subresourceRange{ - .aspectMask = aspect_mask, - .baseMipLevel = 0, - .levelCount = VK_REMAINING_MIP_LEVELS, - .baseArrayLayer = 0, - .layerCount = VK_REMAINING_ARRAY_LAYERS, - }, - }, - VkImageMemoryBarrier{ - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .pNext = nullptr, - .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | - VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | - VK_ACCESS_TRANSFER_WRITE_BIT, - .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, - .oldLayout = VK_IMAGE_LAYOUT_GENERAL, - .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = dst_image, - .subresourceRange{ - .aspectMask = aspect_mask, - .baseMipLevel = 0, - .levelCount = VK_REMAINING_MIP_LEVELS, - .baseArrayLayer = 0, - .layerCount = VK_REMAINING_ARRAY_LAYERS, - }, - }, - }; - VkImageMemoryBarrier write_barrier{ - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .pNext = nullptr, - .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, - .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | - VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | - VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | - VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, - .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, - .newLayout = VK_IMAGE_LAYOUT_GENERAL, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = dst_image, - .subresourceRange{ - .aspectMask = aspect_mask, - .baseMipLevel = 0, - .levelCount = VK_REMAINING_MIP_LEVELS, - .baseArrayLayer = 0, - .layerCount = VK_REMAINING_ARRAY_LAYERS, - }, - }; - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, - 0, nullptr, nullptr, read_barriers); - const VkFilter vk_filter = VK_FILTER_NEAREST; - cmdbuf.BlitImage(src_image, VK_IMAGE_LAYOUT_GENERAL, dst_image, - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, regions, vk_filter); - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, - 0, write_barrier); - }); -} - bool Image::ScaleUp(bool save_as_backup) { if (True(flags & ImageFlagBits::Rescaled)) { return false; @@ -1155,7 +1152,7 @@ bool Image::ScaleUp(bool save_as_backup) { }, }); } - BlitScale(*scheduler, *image, *rescaled_image, regions, aspect_mask); + BlitScale(*scheduler, *image, *rescaled_image, std::move(regions), aspect_mask); return true; } @@ -1239,7 +1236,7 @@ bool Image::ScaleDown(bool save_as_backup) { }, }); } - BlitScale(*scheduler, *image, *downscaled_image, regions, aspect_mask); + BlitScale(*scheduler, *image, *downscaled_image, std::move(regions), aspect_mask); if (save_as_backup) { backup_image = std::move(image); backup_commit = std::move(commit); -- cgit v1.2.3 From 0fb4b84383c457328ff3786f6359071543a0728d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 21 Jul 2021 22:57:53 -0300 Subject: vk_texture_cache: Simplify and optimize scaling blits --- .../renderer_vulkan/vk_texture_cache.cpp | 168 ++++++++------------- 1 file changed, 62 insertions(+), 106 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index d95eeafb9..720247b4e 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -594,12 +594,67 @@ struct RangedBarrierRange { return VK_FORMAT_R32_UINT; } -void BlitScale(VKScheduler& scheduler, VkImage src_image, VkImage dst_image, - boost::container::small_vector&& blit_regions, - VkImageAspectFlags aspect_mask) { +void BlitScale(VKScheduler& scheduler, VkImage src_image, VkImage dst_image, const ImageInfo& info, + VkImageAspectFlags aspect_mask, const Settings::ResolutionScalingInfo& resolution) { + const auto type = info.type; + const auto resources = info.resources; + const VkExtent2D extent{ + .width = info.size.width, + .height = info.size.height, + }; scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([dst_image, src_image, aspect_mask, - regions = std::move(blit_regions)](vk::CommandBuffer cmdbuf) { + scheduler.Record([dst_image, src_image, extent, resources, aspect_mask, resolution, + type](vk::CommandBuffer cmdbuf) { + const auto scale_up = [&](u32 value) { + return std::max((value * resolution.up_scale) >> resolution.down_shift, 1U); + }; + const bool is_2d = type == ImageType::e2D; + const VkOffset2D mip0_size{ + .x = static_cast(scale_up(extent.width)), + .y = static_cast(is_2d ? scale_up(extent.height) : extent.height), + }; + boost::container::small_vector regions; + regions.reserve(resources.levels); + for (s32 level = 0; level < resources.levels; level++) { + regions.push_back({ + .srcSubresource{ + .aspectMask = aspect_mask, + .mipLevel = static_cast(level), + .baseArrayLayer = 0, + .layerCount = static_cast(resources.layers), + }, + .srcOffsets{ + { + .x = 0, + .y = 0, + .z = 0, + }, + { + .x = static_cast(extent.width), + .y = static_cast(extent.height), + .z = 1, + }, + }, + .dstSubresource{ + .aspectMask = aspect_mask, + .mipLevel = static_cast(level), + .baseArrayLayer = 0, + .layerCount = static_cast(resources.layers), + }, + .dstOffsets{ + { + .x = 0, + .y = 0, + .z = 0, + }, + { + .x = std::max(1, mip0_size.x >> level), + .y = std::max(1, mip0_size.y >> level), + .z = 1, + }, + }, + }); + } const VkImageSubresourceRange subresource_range{ .aspectMask = aspect_mask, .baseMipLevel = 0, @@ -1102,57 +1157,7 @@ bool Image::ScaleUp(bool save_as_backup) { if (aspect_mask == 0) { aspect_mask = ImageAspectMask(info.format); } - - const auto scale_up = [&](u32 value) { - return std::max((value * resolution.up_scale) >> resolution.down_shift, 1U); - }; - - const bool is_2d = info.type == ImageType::e2D; - boost::container::small_vector regions; - regions.reserve(info.resources.levels); - for (s32 level = 0; level < info.resources.levels; level++) { - regions.push_back({ - .srcSubresource{ - .aspectMask = aspect_mask, - .mipLevel = u32(level), - .baseArrayLayer = 0, - .layerCount = u32(info.resources.layers), - }, - .srcOffsets{ - { - .x = 0, - .y = 0, - .z = 0, - }, - { - .x = static_cast(info.size.width), - .y = static_cast(info.size.height), - .z = 1, - }, - }, - .dstSubresource{ - .aspectMask = aspect_mask, - .mipLevel = u32(level), - .baseArrayLayer = 0, - .layerCount = u32(info.resources.layers), - }, - .dstOffsets{ - { - .x = 0, - .y = 0, - .z = 0, - }, - { - .x = std::max(1, static_cast(scale_up(info.size.width)) >> level), - .y = std::max(1, static_cast(is_2d ? scale_up(info.size.height) - : info.size.height) >> - level), - .z = 1, - }, - }, - }); - } - BlitScale(*scheduler, *image, *rescaled_image, std::move(regions), aspect_mask); + BlitScale(*scheduler, *image, *rescaled_image, info, aspect_mask, resolution); return true; } @@ -1183,60 +1188,11 @@ bool Image::ScaleDown(bool save_as_backup) { MemoryCommit new_commit( runtime->memory_allocator.Commit(downscaled_image, MemoryUsage::DeviceLocal)); - const auto scale_up = [&](u32 value) { - return (value * resolution.up_scale) >> resolution.down_shift; - }; - if (aspect_mask == 0) { aspect_mask = ImageAspectMask(info.format); } + BlitScale(*scheduler, *image, *downscaled_image, info, aspect_mask, resolution); - const bool is_2d = info.type == ImageType::e2D; - boost::container::small_vector regions; - regions.reserve(info.resources.levels); - for (s32 level = 0; level < info.resources.levels; level++) { - regions.push_back({ - .srcSubresource{ - .aspectMask = aspect_mask, - .mipLevel = static_cast(level), - .baseArrayLayer = 0, - .layerCount = static_cast(info.resources.layers), - }, - .srcOffsets{ - { - .x = 0, - .y = 0, - .z = 0, - }, - { - .x = std::max(1, static_cast(scale_up(info.size.width)) >> level), - .y = std::max(1, static_cast(is_2d ? scale_up(info.size.height) - : info.size.height) >> - level), - .z = 1, - }, - }, - .dstSubresource{ - .aspectMask = aspect_mask, - .mipLevel = static_cast(level), - .baseArrayLayer = 0, - .layerCount = static_cast(info.resources.layers), - }, - .dstOffsets{ - { - .x = 0, - .y = 0, - .z = 0, - }, - { - .x = std::max(1, static_cast(info.size.width) >> level), - .y = std::max(1, static_cast(info.size.height) >> level), - .z = 1, - }, - }, - }); - } - BlitScale(*scheduler, *image, *downscaled_image, std::move(regions), aspect_mask); if (save_as_backup) { backup_image = std::move(image); backup_commit = std::move(commit); -- cgit v1.2.3 From d5143c83a9eacf23cc66616bcd1a1b0ccfda5082 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Thu, 22 Jul 2021 00:16:19 -0400 Subject: texture_cache: Fix typo in aliased image rescaling --- src/video_core/texture_cache/texture_cache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 25fea8240..179f37526 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -1552,7 +1552,7 @@ void TextureCache

::SynchronizeAliases(ImageId image_id) { if (image.modification_tick < aliased_image.modification_tick) { most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick); aliased_images.push_back(&aliased); - any_rescaled |= True(image.flags & ImageFlagBits::Rescaled); + any_rescaled |= True(aliased_image.flags & ImageFlagBits::Rescaled); } } if (aliased_images.empty()) { -- cgit v1.2.3 From fad2c92a39cb0cfba2bc3241e779e3a983646d82 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Thu, 22 Jul 2021 01:34:46 -0400 Subject: gl_texture_cache: Simplify rescaling --- .../renderer_opengl/gl_texture_cache.cpp | 31 +++++++++------------- src/video_core/renderer_opengl/gl_texture_cache.h | 3 ++- 2 files changed, 15 insertions(+), 19 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 34f74e37d..5e2695576 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -480,7 +480,8 @@ void TextureCacheRuntime::Init() { resolution = Settings::values.resolution_info; is_rescaling_on = resolution.up_scale != 1 || resolution.down_shift != 0; if (is_rescaling_on) { - rescale_fbo.Create(); + rescale_draw_fbo.Create(); + rescale_read_fbo.Create(); } } @@ -881,8 +882,11 @@ bool Image::Scale(bool scale_src, bool scale_dst) { UNIMPLEMENTED(); return false; } + GLint prev_draw_fbo; GLint prev_read_fbo; + glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &prev_draw_fbo); glGetIntegerv(GL_READ_FRAMEBUFFER_BINDING, &prev_read_fbo); + const GLenum attachment = [this] { switch (GetFormatType(info.format)) { case SurfaceType::ColorTexture: @@ -931,35 +935,26 @@ bool Image::Scale(bool scale_src, bool scale_dst) { dst_info.size.height = dst_height; auto dst_texture = MakeImage(dst_info, gl_internal_format); - const auto& blit_fbo = runtime->rescale_fbo; + const auto& read_fbo = runtime->rescale_read_fbo; + const auto& draw_fbo = runtime->rescale_draw_fbo; + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, draw_fbo.handle); + glBindFramebuffer(GL_READ_FRAMEBUFFER, read_fbo.handle); for (s32 level = 0; level < info.resources.levels; ++level) { const u32 src_level_width = std::max(1u, src_width >> level); const u32 src_level_height = std::max(1u, src_height >> level); const u32 dst_level_width = std::max(1u, dst_width >> level); const u32 dst_level_height = std::max(1u, dst_height >> level); - glBindFramebuffer(GL_READ_FRAMEBUFFER, blit_fbo.handle); - glNamedFramebufferTexture(blit_fbo.handle, attachment, texture.handle, level); - glBlitNamedFramebuffer(blit_fbo.handle, blit_fbo.handle, 0, 0, src_level_width, + glNamedFramebufferTexture(read_fbo.handle, attachment, texture.handle, level); + glNamedFramebufferTexture(draw_fbo.handle, attachment, dst_texture.handle, level); + glBlitNamedFramebuffer(read_fbo.handle, draw_fbo.handle, 0, 0, src_level_width, src_level_height, 0, 0, dst_level_width, dst_level_height, mask, filter); - switch (info.type) { - case ImageType::e1D: - glCopyTextureSubImage2D(dst_texture.handle, level, 0, 0, 0, 0, dst_level_width, - dst_level_height); - break; - case ImageType::e2D: - glCopyTextureSubImage3D(dst_texture.handle, level, 0, 0, 0, 0, 0, dst_level_width, - dst_level_height); - break; - case ImageType::e3D: - default: - UNREACHABLE(); - } } texture = std::move(dst_texture); // Restore previous framebuffers + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, prev_draw_fbo); glBindFramebuffer(GL_READ_FRAMEBUFFER, prev_read_fbo); return true; } diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index f2e48b4c7..787b63e87 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -155,7 +155,8 @@ private: std::array null_image_views{}; - OGLFramebuffer rescale_fbo; + OGLFramebuffer rescale_draw_fbo; + OGLFramebuffer rescale_read_fbo; Settings::ResolutionScalingInfo resolution; bool is_rescaling_on{}; }; -- cgit v1.2.3 From e580299467a8aa7f56e8b66a63112dc06c870b15 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 22 Jul 2021 04:29:00 -0300 Subject: shader: Fix rescaling pass --- src/shader_recompiler/ir_opt/rescaling_pass.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/shader_recompiler/ir_opt/rescaling_pass.cpp b/src/shader_recompiler/ir_opt/rescaling_pass.cpp index 293593c78..f8d04b6e3 100644 --- a/src/shader_recompiler/ir_opt/rescaling_pass.cpp +++ b/src/shader_recompiler/ir_opt/rescaling_pass.cpp @@ -17,7 +17,7 @@ namespace { void PatchFragCoord(IR::Block& block, IR::Inst& inst) { IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; const IR::F32 down_factor{ir.ResolutionDownFactor()}; - const IR::F32 frag_coord{&inst}; + const IR::F32 frag_coord{ir.GetAttribute(inst.Arg(0).Attribute())}; const IR::F32 downscaled_frag_coord{ir.FPMul(frag_coord, down_factor)}; inst.ReplaceUsesWith(downscaled_frag_coord); } -- cgit v1.2.3 From 43aa695a0415821e42fabf78a8a624edaadebab7 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 25 Jul 2021 21:20:12 -0300 Subject: common/settings: Remove unused scaling options --- src/common/settings.cpp | 23 +++++++---------------- src/common/settings.h | 2 -- 2 files changed, 7 insertions(+), 18 deletions(-) (limited to 'src') diff --git a/src/common/settings.cpp b/src/common/settings.cpp index 6f3acee79..4b7fa4b82 100644 --- a/src/common/settings.cpp +++ b/src/common/settings.cpp @@ -109,51 +109,42 @@ void UpdateRescalingInfo() { auto setup = values.resolution_setup.GetValue(); auto& info = values.resolution_info; switch (setup) { - case ResolutionSetup::Res1_2X: { + case ResolutionSetup::Res1_2X: info.up_scale = 1; info.down_shift = 1; break; - } - case ResolutionSetup::Res3_4X: { + case ResolutionSetup::Res3_4X: info.up_scale = 3; info.down_shift = 2; break; - } - case ResolutionSetup::Res1X: { + case ResolutionSetup::Res1X: info.up_scale = 1; info.down_shift = 0; break; - } case ResolutionSetup::Res3_2X: { info.up_scale = 3; info.down_shift = 1; break; } - case ResolutionSetup::Res2X: { + case ResolutionSetup::Res2X: info.up_scale = 2; info.down_shift = 0; break; - } - case ResolutionSetup::Res3X: { + case ResolutionSetup::Res3X: info.up_scale = 3; info.down_shift = 0; break; - } - case ResolutionSetup::Res4X: { + case ResolutionSetup::Res4X: info.up_scale = 4; info.down_shift = 0; break; - } - default: { + default: UNREACHABLE(); info.up_scale = 1; info.down_shift = 0; } - } info.up_factor = static_cast(info.up_scale) / (1U << info.down_shift); info.down_factor = static_cast(1U << info.down_shift) / info.up_scale; - info.size_up = info.up_scale * info.up_scale; - info.size_shift = info.down_shift * 2; info.active = info.up_scale != 1 || info.down_shift != 0; } diff --git a/src/common/settings.h b/src/common/settings.h index 2b11984b4..ca88c086b 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -67,8 +67,6 @@ struct ResolutionScalingInfo { u32 down_shift{0}; f32 up_factor{1.0f}; f32 down_factor{1.0f}; - u32 size_up{1}; - u32 size_shift{0}; bool active{}; }; -- cgit v1.2.3 From 95761cc6a70987b2625d68c4d9da4e2622f57808 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 25 Jul 2021 21:27:21 -0300 Subject: shader: Add integer division opcodes --- src/shader_recompiler/backend/glasm/emit_glasm_instructions.h | 2 ++ src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp | 8 ++++++++ src/shader_recompiler/backend/glsl/emit_glsl_instructions.h | 2 ++ src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp | 8 ++++++++ src/shader_recompiler/backend/spirv/emit_spirv_instructions.h | 2 ++ src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp | 8 ++++++++ src/shader_recompiler/frontend/ir/ir_emitter.cpp | 4 ++++ src/shader_recompiler/frontend/ir/ir_emitter.h | 1 + src/shader_recompiler/frontend/ir/opcodes.inc | 2 ++ 9 files changed, 37 insertions(+) (limited to 'src') diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index cb7232704..4f8dd8e42 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -304,6 +304,8 @@ void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, Register a, Register b); void EmitISub32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); void EmitISub64(EmitContext& ctx, IR::Inst& inst, Register a, Register b); void EmitIMul32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); +void EmitSDiv32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); +void EmitUDiv32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b); void EmitINeg32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); void EmitINeg64(EmitContext& ctx, IR::Inst& inst, Register value); void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp index f55c26b76..8aa494a4d 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp @@ -90,6 +90,14 @@ void EmitIMul32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { ctx.Add("MUL.S {}.x,{},{};", inst, a, b); } +void EmitSDiv32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { + ctx.Add("DIV.S {}.x,{},{};", inst, a, b); +} + +void EmitUDiv32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b) { + ctx.Add("DIV.U {}.x,{},{};", inst, a, b); +} + void EmitINeg32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { if (value.type != Type::Register && static_cast(value.imm_u32) < 0) { ctx.Add("MOV.S {},{};", inst, -static_cast(value.imm_u32)); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h index 6cae0b84a..159e4b770 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h @@ -363,6 +363,8 @@ void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::strin void EmitISub32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); void EmitISub64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); void EmitIMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitSDiv32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitUDiv32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); void EmitINeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value); void EmitINeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value); void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp index 38419f88f..88c1d4c5e 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp @@ -78,6 +78,14 @@ void EmitIMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::strin ctx.AddU32("{}=uint({}*{});", inst, a, b); } +void EmitSDiv32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + ctx.AddU32("{}=uint(int({})/int({}));", inst, a, b); +} + +void EmitUDiv32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + ctx.AddU32("{}={}/{};", inst, a, b); +} + void EmitINeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { ctx.AddU32("{}=uint(-({}));", inst, value); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 3d90b2286..44eda16ca 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -284,6 +284,8 @@ Id EmitIAdd64(EmitContext& ctx, Id a, Id b); Id EmitISub32(EmitContext& ctx, Id a, Id b); Id EmitISub64(EmitContext& ctx, Id a, Id b); Id EmitIMul32(EmitContext& ctx, Id a, Id b); +Id EmitSDiv32(EmitContext& ctx, Id a, Id b); +Id EmitUDiv32(EmitContext& ctx, Id a, Id b); Id EmitINeg32(EmitContext& ctx, Id value); Id EmitINeg64(EmitContext& ctx, Id value); Id EmitIAbs32(EmitContext& ctx, Id value); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index 3501d7495..50277eec3 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -72,6 +72,14 @@ Id EmitIMul32(EmitContext& ctx, Id a, Id b) { return ctx.OpIMul(ctx.U32[1], a, b); } +Id EmitSDiv32(EmitContext& ctx, Id a, Id b) { + return ctx.OpSDiv(ctx.U32[1], a, b); +} + +Id EmitUDiv32(EmitContext& ctx, Id a, Id b) { + return ctx.OpUDiv(ctx.U32[1], a, b); +} + Id EmitINeg32(EmitContext& ctx, Id value) { return ctx.OpSNegate(ctx.U32[1], value); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 9ae5da2a1..3dfba8e71 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -1145,6 +1145,10 @@ U32 IREmitter::IMul(const U32& a, const U32& b) { return Inst(Opcode::IMul32, a, b); } +U32 IREmitter::IDiv(const U32& a, const U32& b, bool is_signed) { + return Inst(is_signed ? Opcode::SDiv32 : Opcode::UDiv32, a, b); +} + U32U64 IREmitter::INeg(const U32U64& value) { switch (value.Type()) { case Type::U32: diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 0c664d2fe..1959be42e 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -209,6 +209,7 @@ public: [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b); [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b); [[nodiscard]] U32 IMul(const U32& a, const U32& b); + [[nodiscard]] U32 IDiv(const U32& a, const U32& b, bool is_signed = false); [[nodiscard]] U32U64 INeg(const U32U64& value); [[nodiscard]] U32 IAbs(const U32& value); [[nodiscard]] U32U64 ShiftLeftLogical(const U32U64& base, const U32& shift); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 72751c5a0..c05e6d312 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -287,6 +287,8 @@ OPCODE(IAdd64, U64, U64, OPCODE(ISub32, U32, U32, U32, ) OPCODE(ISub64, U64, U64, U64, ) OPCODE(IMul32, U32, U32, U32, ) +OPCODE(SDiv32, U32, U32, U32, ) +OPCODE(UDiv32, U32, U32, U32, ) OPCODE(INeg32, U32, U32, ) OPCODE(INeg64, U64, U64, ) OPCODE(IAbs32, U32, U32, ) -- cgit v1.2.3 From c892359d1bf228d3c119c953c20fff44f280a7c4 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 25 Jul 2021 21:34:17 -0300 Subject: shader: Add copy constructor to instructions --- src/shader_recompiler/frontend/ir/basic_block.cpp | 5 +++++ src/shader_recompiler/frontend/ir/basic_block.h | 3 +++ src/shader_recompiler/frontend/ir/microinstruction.cpp | 11 +++++++++++ src/shader_recompiler/frontend/ir/value.h | 2 +- 4 files changed, 20 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp index 7c08b25ce..974efa4a0 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.cpp +++ b/src/shader_recompiler/frontend/ir/basic_block.cpp @@ -22,6 +22,11 @@ void Block::AppendNewInst(Opcode op, std::initializer_list args) { PrependNewInst(end(), op, args); } +Block::iterator Block::PrependNewInst(iterator insertion_point, const Inst& base_inst) { + Inst* const inst{inst_pool->Create(base_inst)}; + return instructions.insert(insertion_point, *inst); +} + Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op, std::initializer_list args, u32 flags) { Inst* const inst{inst_pool->Create(op, flags)}; diff --git a/src/shader_recompiler/frontend/ir/basic_block.h b/src/shader_recompiler/frontend/ir/basic_block.h index 9ce1ed07e..fbfe98266 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.h +++ b/src/shader_recompiler/frontend/ir/basic_block.h @@ -40,6 +40,9 @@ public: /// Appends a new instruction to the end of this basic block. void AppendNewInst(Opcode op, std::initializer_list args); + /// Prepends a copy of an instruction to this basic block before the insertion point. + iterator PrependNewInst(iterator insertion_point, const Inst& base_inst); + /// Prepends a new instruction to this basic block before the insertion point. iterator PrependNewInst(iterator insertion_point, Opcode op, std::initializer_list args = {}, u32 flags = 0); diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 30b470bdd..97e2bf6af 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -47,6 +47,17 @@ Inst::Inst(IR::Opcode op_, u32 flags_) noexcept : op{op_}, flags{flags_} { } } +Inst::Inst(const Inst& base) : op{base.op}, flags{base.flags} { + if (base.op == Opcode::Phi) { + throw NotImplementedException("Copying phi node"); + } + std::construct_at(&args); + const size_t num_args{base.NumArgs()}; + for (size_t index = 0; index < num_args; ++index) { + SetArg(index, base.Arg(index)); + } +} + Inst::~Inst() { if (op == Opcode::Phi) { std::destroy_at(&phi_args); diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index 6c9ef6bdd..947579852 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -116,10 +116,10 @@ public: class Inst : public boost::intrusive::list_base_hook<> { public: explicit Inst(IR::Opcode op_, u32 flags_) noexcept; + explicit Inst(const Inst& base); ~Inst(); Inst& operator=(const Inst&) = delete; - Inst(const Inst&) = delete; Inst& operator=(Inst&&) = delete; Inst(Inst&&) = delete; -- cgit v1.2.3 From 74efa57c1b78b4a07ad0003e847bd5f0aa7c7bb5 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 25 Jul 2021 21:44:13 -0300 Subject: texture_cache: Add image getters --- src/video_core/texture_cache/texture_cache.h | 10 ++++++++++ src/video_core/texture_cache/texture_cache_base.h | 6 ++++++ 2 files changed, 16 insertions(+) (limited to 'src') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 179f37526..ae74a6ecf 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -119,6 +119,16 @@ typename P::ImageView& TextureCache

::GetImageView(ImageViewId id) noexcept { return slot_image_views[id]; } +template +const typename P::Image& TextureCache

::GetImage(ImageId id) const noexcept { + return slot_images[id]; +} + +template +typename P::Image& TextureCache

::GetImage(ImageId id) noexcept { + return slot_images[id]; +} + template void TextureCache

::MarkModification(ImageId id) noexcept { MarkModification(slot_images[id]); diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index deddf0d30..0d2d9ec2e 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -95,6 +95,12 @@ public: /// Return a reference to the given image view id [[nodiscard]] ImageView& GetImageView(ImageViewId id) noexcept; + /// Return a constant reference to the given image id + [[nodiscard]] const Image& GetImage(ImageId id) const noexcept; + + /// Return a reference to the given image id + [[nodiscard]] Image& GetImage(ImageId id) noexcept; + /// Mark an image as modified from the GPU void MarkModification(ImageId id) noexcept; -- cgit v1.2.3 From c15332c44fa50dc44e2ebd1a682048f1e30dc136 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 25 Jul 2021 22:04:53 -0300 Subject: shader: Add IsTextureScaled opcode --- src/shader_recompiler/backend/glasm/emit_glasm_image.cpp | 8 ++++++++ src/shader_recompiler/backend/glasm/emit_glasm_instructions.h | 1 + src/shader_recompiler/backend/glsl/emit_glsl_image.cpp | 8 ++++++++ src/shader_recompiler/backend/glsl/emit_glsl_instructions.h | 2 ++ src/shader_recompiler/backend/spirv/emit_spirv_image.cpp | 4 ++++ src/shader_recompiler/backend/spirv/emit_spirv_instructions.h | 1 + src/shader_recompiler/frontend/ir/ir_emitter.cpp | 4 ++++ src/shader_recompiler/frontend/ir/ir_emitter.h | 3 +++ src/shader_recompiler/frontend/ir/opcodes.inc | 2 ++ src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 1 + 10 files changed, 34 insertions(+) (limited to 'src') diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp index 09e3a9b82..583ed3cf2 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp @@ -608,6 +608,14 @@ void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Re ctx.Add("STOREIM.{} {},{},{},{};", format, image, color, coord, type); } +void EmitIsTextureScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& index) { + if (!index.IsImmediate()) { + throw NotImplementedException("Non-constant texture rescaling"); + } + UNIMPLEMENTED(); + ctx.Add("MOV.S {}.x,-1;", inst); +} + void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, ScalarU32 value) { ImageAtomic(ctx, inst, index, coord, value, "ADD.U32"); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index 4f8dd8e42..e2b7d601d 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -556,6 +556,7 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord); void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, Register color); +void EmitIsTextureScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& index); void EmitBindlessImageAtomicIAdd32(EmitContext&); void EmitBindlessImageAtomicSMin32(EmitContext&); void EmitBindlessImageAtomicUMin32(EmitContext&); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp index 447eb8e0a..099e0160b 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp @@ -612,6 +612,14 @@ void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value value); } +void EmitIsTextureScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& index) { + if (!index.IsImmediate()) { + throw NotImplementedException("Non-constant texture rescaling"); + } + UNIMPLEMENTED(); + ctx.AddU1("{}=true;", inst); +} + void EmitBindlessImageSampleImplicitLod(EmitContext&) { NotImplemented(); } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h index 159e4b770..f86502e4c 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h @@ -630,6 +630,8 @@ void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords); void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view color); +void EmitIsTextureScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& index); +void EmitIsImageScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& index); void EmitBindlessImageAtomicIAdd32(EmitContext&); void EmitBindlessImageAtomicSMin32(EmitContext&); void EmitBindlessImageAtomicUMin32(EmitContext&); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 1d5364309..2f925cc3e 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -470,4 +470,8 @@ void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id ctx.OpImageWrite(Image(ctx, index, info), coords, color); } +Id EmitIsTextureScaled([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& index) { + return ctx.false_value; +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 44eda16ca..69fc18f5f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -513,6 +513,7 @@ Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, I Id derivates, Id offset, Id lod_clamp); Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color); +Id EmitIsTextureScaled(EmitContext& ctx, const IR::Value& index); Id EmitBindlessImageAtomicIAdd32(EmitContext&); Id EmitBindlessImageAtomicSMin32(EmitContext&); Id EmitBindlessImageAtomicUMin32(EmitContext&); diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 3dfba8e71..3ccd91c10 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -1946,6 +1946,10 @@ Value IREmitter::ImageAtomicExchange(const Value& handle, const Value& coords, c return Inst(op, Flags{info}, handle, coords, value); } +U1 IREmitter::IsTextureScaled(const U32& index) { + return Inst(Opcode::IsTextureScaled, index); +} + U1 IREmitter::VoteAll(const U1& value) { return Inst(Opcode::VoteAll, value); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 1959be42e..a78628413 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -359,6 +359,9 @@ public: TextureInstInfo info); [[nodiscard]] Value ImageAtomicExchange(const Value& handle, const Value& coords, const Value& value, TextureInstInfo info); + + [[nodiscard]] U1 IsTextureScaled(const U32& index); + [[nodiscard]] U1 VoteAll(const U1& value); [[nodiscard]] U1 VoteAny(const U1& value); [[nodiscard]] U1 VoteEqual(const U1& value); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index c05e6d312..ec629428a 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -493,6 +493,8 @@ OPCODE(ImageGradient, F32x4, Opaq OPCODE(ImageRead, U32x4, Opaque, Opaque, ) OPCODE(ImageWrite, Void, Opaque, Opaque, U32x4, ) +OPCODE(IsTextureScaled, U1, U32, ) + // Atomic Image operations OPCODE(BindlessImageAtomicIAdd32, U32, U32, Opaque, U32, ) diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index ef918f4d4..ed82fa2ac 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -431,6 +431,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { info.uses_is_helper_invocation = true; break; case IR::Opcode::ResolutionDownFactor: + case IR::Opcode::IsTextureScaled: info.uses_rescaling_uniform = true; break; case IR::Opcode::LaneId: -- cgit v1.2.3 From 01379c5e3cd1181c7c7d37a672364fbcad627fb0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 25 Jul 2021 22:20:56 -0300 Subject: shader/rescaling_pass: Patch more instructions --- src/shader_recompiler/ir_opt/rescaling_pass.cpp | 105 +++++++++++++++++++++++- 1 file changed, 101 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/shader_recompiler/ir_opt/rescaling_pass.cpp b/src/shader_recompiler/ir_opt/rescaling_pass.cpp index f8d04b6e3..d5b98ae6e 100644 --- a/src/shader_recompiler/ir_opt/rescaling_pass.cpp +++ b/src/shader_recompiler/ir_opt/rescaling_pass.cpp @@ -22,6 +22,105 @@ void PatchFragCoord(IR::Block& block, IR::Inst& inst) { inst.ReplaceUsesWith(downscaled_frag_coord); } +[[nodiscard]] IR::U32 Scale(IR::IREmitter& ir, const IR::U1& is_scaled, const IR::U32& value) { + IR::U32 scaled_value{value}; + bool changed{}; + if (const u32 up_scale = Settings::values.resolution_info.up_scale; up_scale != 1) { + scaled_value = ir.IMul(value, ir.Imm32(up_scale)); + changed = true; + } + if (const u32 down_shift = Settings::values.resolution_info.down_shift; down_shift != 0) { + scaled_value = ir.ShiftRightArithmetic(value, ir.Imm32(down_shift)); + changed = true; + } + if (changed) { + return IR::U32{ir.Select(is_scaled, scaled_value, value)}; + } else { + return value; + } +} + +[[nodiscard]] IR::U32 DownScale(IR::IREmitter& ir, IR::U32 value) { + if (const u32 down_shift = Settings::values.resolution_info.down_shift; down_shift != 0) { + value = ir.ShiftLeftLogical(value, ir.Imm32(down_shift)); + } + if (const u32 up_scale = Settings::values.resolution_info.up_scale; up_scale != 1) { + value = ir.IDiv(value, ir.Imm32(up_scale)); + } + return value; +} + +void PatchImageQueryDimensions(IR::Block& block, IR::Inst& inst) { + const auto it{IR::Block::InstructionList::s_iterator_to(inst)}; + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + const auto info{inst.Flags()}; + switch (info.type) { + case TextureType::Color1D: + case TextureType::ColorArray1D: { + const IR::Value new_inst{&*block.PrependNewInst(it, inst)}; + const IR::U32 width{DownScale(ir, IR::U32{ir.CompositeExtract(new_inst, 0)})}; + const IR::Value replacement{ir.CompositeConstruct(width, ir.CompositeExtract(new_inst, 1), + ir.CompositeExtract(new_inst, 2), + ir.CompositeExtract(new_inst, 3))}; + inst.ReplaceUsesWith(replacement); + break; + } + case TextureType::Color2D: + case TextureType::ColorArray2D: { + const IR::Value new_inst{&*block.PrependNewInst(it, inst)}; + const IR::U32 width{DownScale(ir, IR::U32{ir.CompositeExtract(new_inst, 0)})}; + const IR::U32 height{DownScale(ir, IR::U32{ir.CompositeExtract(new_inst, 1)})}; + const IR::Value replacement{ir.CompositeConstruct( + width, height, ir.CompositeExtract(new_inst, 2), ir.CompositeExtract(new_inst, 3))}; + inst.ReplaceUsesWith(replacement); + break; + } + case TextureType::Color3D: + case TextureType::ColorCube: + case TextureType::ColorArrayCube: + case TextureType::Buffer: + // Nothing to patch here + break; + } +} + +void PatchImageFetch(IR::Block& block, IR::Inst& inst) { + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + const auto info{inst.Flags()}; + const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))}; + const IR::Value coord{inst.Arg(1)}; + switch (info.type) { + case TextureType::Color1D: + inst.SetArg(1, Scale(ir, is_scaled, IR::U32{coord})); + break; + case TextureType::ColorArray1D: { + const IR::U32 x{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(coord, 0)})}; + const IR::U32 y{ir.CompositeExtract(coord, 1)}; + inst.SetArg(1, ir.CompositeConstruct(x, y)); + break; + } + case TextureType::Color2D: { + const IR::U32 x{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(coord, 0)})}; + const IR::U32 y{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(coord, 1)})}; + inst.SetArg(1, ir.CompositeConstruct(x, y)); + break; + } + case TextureType::ColorArray2D: { + const IR::U32 x{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(coord, 0)})}; + const IR::U32 y{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(coord, 1)})}; + const IR::U32 z{ir.CompositeExtract(coord, 2)}; + inst.SetArg(1, ir.CompositeConstruct(x, y, z)); + break; + } + case TextureType::Color3D: + case TextureType::ColorCube: + case TextureType::ColorArrayCube: + case TextureType::Buffer: + // Nothing to patch here + break; + } +} + void Visit(const IR::Program& program, IR::Block& block, IR::Inst& inst) { const bool is_fragment_shader{program.stage == Stage::Fragment}; switch (inst.GetOpcode()) { @@ -40,12 +139,10 @@ void Visit(const IR::Program& program, IR::Block& block, IR::Inst& inst) { break; } case IR::Opcode::ImageQueryDimensions: + PatchImageQueryDimensions(block, inst); break; case IR::Opcode::ImageFetch: - break; - case IR::Opcode::ImageRead: - break; - case IR::Opcode::ImageWrite: + PatchImageFetch(block, inst); break; default: break; -- cgit v1.2.3 From 656adee630cb1de738ed4dc6cf1cbc35af40f64f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 25 Jul 2021 22:26:23 -0300 Subject: spirv: Implement rescaling patching --- .../backend/spirv/emit_context.cpp | 35 ++++++++++++++++++++++ src/shader_recompiler/backend/spirv/emit_context.h | 5 ++++ src/shader_recompiler/backend/spirv/emit_spirv.h | 5 +++- .../backend/spirv/emit_spirv_context_get_set.cpp | 6 ++-- .../backend/spirv/emit_spirv_image.cpp | 26 ++++++++++++++-- .../frontend/maxwell/translate_program.cpp | 3 ++ src/shader_recompiler/runtime_info.h | 2 ++ src/shader_recompiler/shader_info.h | 9 ++++++ 8 files changed, 86 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 3c84e6466..222baa177 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -7,6 +7,8 @@ #include #include +#include + #include #include "common/common_types.h" @@ -496,6 +498,7 @@ EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_inf DefineImages(program.info, image_binding); DefineAttributeMemAccess(program.info); DefineGlobalMemoryFunctions(program.info); + DefineRescalingInput(program.info); } EmitContext::~EmitContext() = default; @@ -996,6 +999,38 @@ void EmitContext::DefineGlobalMemoryFunctions(const Info& info) { define(&StorageDefinitions::U32x4, storage_types.U32x4, U32[4], sizeof(u32[4])); } +void EmitContext::DefineRescalingInput(const Info& info) { + if (!info.uses_rescaling_uniform) { + return; + } + boost::container::static_vector members{F32[1]}; + u32 member_index{0}; + const u32 num_texture_words{Common::DivCeil(runtime_info.num_textures, 32u)}; + if (runtime_info.num_textures > 0) { + rescaling_textures_type = TypeArray(U32[1], Const(num_texture_words)); + Decorate(rescaling_textures_type, spv::Decoration::ArrayStride, 4u); + members.push_back(rescaling_textures_type); + rescaling_textures_member_index = ++member_index; + } + const Id push_constant_struct{TypeStruct(std::span(members.data(), members.size()))}; + Decorate(push_constant_struct, spv::Decoration::Block); + Name(push_constant_struct, "ResolutionInfo"); + MemberDecorate(push_constant_struct, 0u, spv::Decoration::Offset, 0u); + MemberName(push_constant_struct, 0u, "down_factor"); + if (runtime_info.num_textures > 0) { + MemberDecorate(push_constant_struct, rescaling_textures_member_index, + spv::Decoration::Offset, 4u); + MemberName(push_constant_struct, rescaling_textures_member_index, "rescaling_textures"); + } + const Id pointer_type{TypePointer(spv::StorageClass::PushConstant, push_constant_struct)}; + rescaling_push_constants = AddGlobalVariable(pointer_type, spv::StorageClass::PushConstant); + Name(rescaling_push_constants, "rescaling_push_constants"); + + if (profile.supported_spirv >= 0x00010400) { + interfaces.push_back(rescaling_push_constants); + } +} + void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) { if (info.constant_buffer_descriptors.empty()) { return; diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 112c52382..a7917ac51 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -238,6 +238,10 @@ public: Id indexed_load_func{}; Id indexed_store_func{}; + Id rescaling_push_constants{}; + Id rescaling_textures_type{}; + u32 rescaling_textures_member_index{}; + Id local_memory{}; Id shared_memory_u8{}; @@ -314,6 +318,7 @@ private: void DefineImages(const Info& info, u32& binding); void DefineAttributeMemAccess(const Info& info); void DefineGlobalMemoryFunctions(const Info& info); + void DefineRescalingInput(const Info& info); void DefineInputs(const IR::Program& program); void DefineOutputs(const IR::Program& program); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index db0c935fe..7b0d8d980 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -20,8 +20,11 @@ namespace Shader::Backend::SPIRV { IR::Program& program, Bindings& bindings); [[nodiscard]] inline std::vector EmitSPIRV(const Profile& profile, IR::Program& program) { + RuntimeInfo runtime_info{}; + runtime_info.num_textures = Shader::NumDescriptors(program.info.texture_descriptors); + Bindings binding; - return EmitSPIRV(profile, {}, program, binding); + return EmitSPIRV(profile, runtime_info, program, binding); } } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 43f440dfb..6bb791b03 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -527,8 +527,10 @@ Id EmitYDirection(EmitContext& ctx) { } Id EmitResolutionDownFactor(EmitContext& ctx) { - UNIMPLEMENTED(); - return ctx.Const(1.0f); + const Id pointer_type{ctx.TypePointer(spv::StorageClass::PushConstant, ctx.F32[1])}; + const Id pointer{ + ctx.OpAccessChain(pointer_type, ctx.rescaling_push_constants, ctx.u32_zero_value)}; + return ctx.OpLoad(ctx.F32[1], pointer); } Id EmitLoadLocal(EmitContext& ctx, Id word_offset) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 2f925cc3e..7d7c0627e 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -470,8 +470,30 @@ void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id ctx.OpImageWrite(Image(ctx, index, info), coords, color); } -Id EmitIsTextureScaled([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& index) { - return ctx.false_value; +Id EmitIsTextureScaled(EmitContext& ctx, const IR::Value& index) { + const Id push_constant_u32{ctx.TypePointer(spv::StorageClass::PushConstant, ctx.U32[1])}; + const Id member_index{ctx.Const(ctx.rescaling_textures_member_index)}; + Id bit{}; + if (index.IsImmediate()) { + // Use BitwiseAnd instead of BitfieldExtract for better codegen on Nvidia OpenGL. + // LOP32I.NZ is used to set the predicate rather than BFE+ISETP. + const u32 index_value{index.U32()}; + const Id word_index{ctx.Const(index_value / 32)}; + const Id bit_index_mask{ctx.Const(1u << (index_value % 32))}; + const Id pointer{ctx.OpAccessChain(push_constant_u32, ctx.rescaling_push_constants, + member_index, word_index)}; + const Id word{ctx.OpLoad(ctx.U32[1], pointer)}; + bit = ctx.OpBitwiseAnd(ctx.U32[1], word, bit_index_mask); + } else { + const Id index_value{ctx.Def(index)}; + const Id word_index{ctx.OpShiftRightArithmetic(ctx.U32[1], index_value, ctx.Const(5u))}; + const Id pointer{ctx.OpAccessChain(push_constant_u32, ctx.rescaling_push_constants, + member_index, word_index)}; + const Id word{ctx.OpLoad(ctx.U32[1], pointer)}; + const Id bit_index{ctx.OpBitwiseAnd(ctx.U32[1], index_value, ctx.Const(31u))}; + bit = ctx.OpBitFieldUExtract(ctx.U32[1], index_value, bit_index, ctx.Const(1u)); + } + return ctx.OpINotEqual(ctx.U1, bit, ctx.u32_zero_value); } } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index 2fc542f0e..795f5cf08 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -178,6 +178,9 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool generic_input_types{}; VaryingState previous_stage_stores; + u32 num_textures{}; + bool convert_depth_mode{}; bool force_early_z{}; diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 7bac9e2cd..9f375c30e 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -191,4 +191,13 @@ struct Info { ImageDescriptors image_descriptors; }; +template +u32 NumDescriptors(const Descriptors& descriptors) { + u32 num{}; + for (const auto& desc : descriptors) { + num += desc.count; + } + return num; +} + } // namespace Shader -- cgit v1.2.3 From 6f3a41abe2caa617ae540fb7e4a3c4a092478963 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 25 Jul 2021 22:27:16 -0300 Subject: opengl: Use Shader::NumDescriptors when possible --- .../renderer_opengl/gl_compute_pipeline.cpp | 19 ++++--------- .../renderer_opengl/gl_graphics_pipeline.cpp | 32 +++++++--------------- src/video_core/renderer_opengl/gl_shader_cache.cpp | 15 ++++------ 3 files changed, 20 insertions(+), 46 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp index aa1cc592f..19c8ca7b2 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp @@ -19,15 +19,6 @@ using VideoCommon::ImageId; constexpr u32 MAX_TEXTURES = 64; constexpr u32 MAX_IMAGES = 16; -template -u32 AccumulateCount(const Range& range) { - u32 num{}; - for (const auto& desc : range) { - num += desc.count; - } - return num; -} - size_t ComputePipelineKey::Hash() const noexcept { return static_cast( Common::CityHash64(reinterpret_cast(this), sizeof *this)); @@ -58,17 +49,17 @@ ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cac std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(), uniform_buffer_sizes.begin()); - num_texture_buffers = AccumulateCount(info.texture_buffer_descriptors); - num_image_buffers = AccumulateCount(info.image_buffer_descriptors); + num_texture_buffers = Shader::NumDescriptors(info.texture_buffer_descriptors); + num_image_buffers = Shader::NumDescriptors(info.image_buffer_descriptors); - const u32 num_textures{num_texture_buffers + AccumulateCount(info.texture_descriptors)}; + const u32 num_textures{num_texture_buffers + Shader::NumDescriptors(info.texture_descriptors)}; ASSERT(num_textures <= MAX_TEXTURES); - const u32 num_images{num_image_buffers + AccumulateCount(info.image_descriptors)}; + const u32 num_images{num_image_buffers + Shader::NumDescriptors(info.image_descriptors)}; ASSERT(num_images <= MAX_IMAGES); const bool is_glasm{assembly_program.handle != 0}; - const u32 num_storage_buffers{AccumulateCount(info.storage_buffers_descriptors)}; + const u32 num_storage_buffers{Shader::NumDescriptors(info.storage_buffers_descriptors)}; use_storage_buffers = !is_glasm || num_storage_buffers < device.GetMaxGLASMStorageBufferBlocks(); writes_global_memory = !use_storage_buffers && diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index bccb37a58..43ab5c03b 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -27,6 +27,7 @@ namespace OpenGL { namespace { using Shader::ImageBufferDescriptor; using Shader::ImageDescriptor; +using Shader::NumDescriptors; using Shader::TextureBufferDescriptor; using Shader::TextureDescriptor; using Tegra::Texture::TexturePair; @@ -35,15 +36,6 @@ using VideoCommon::ImageId; constexpr u32 MAX_TEXTURES = 64; constexpr u32 MAX_IMAGES = 8; -template -u32 AccumulateCount(const Range& range) { - u32 num{}; - for (const auto& desc : range) { - num += desc.count; - } - return num; -} - GLenum Stage(size_t stage_index) { switch (stage_index) { case 0: @@ -204,23 +196,23 @@ GraphicsPipeline::GraphicsPipeline( base_uniform_bindings[stage + 1] = base_uniform_bindings[stage]; base_storage_bindings[stage + 1] = base_storage_bindings[stage]; - base_uniform_bindings[stage + 1] += AccumulateCount(info.constant_buffer_descriptors); - base_storage_bindings[stage + 1] += AccumulateCount(info.storage_buffers_descriptors); + base_uniform_bindings[stage + 1] += NumDescriptors(info.constant_buffer_descriptors); + base_storage_bindings[stage + 1] += NumDescriptors(info.storage_buffers_descriptors); } enabled_uniform_buffer_masks[stage] = info.constant_buffer_mask; std::ranges::copy(info.constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin()); - const u32 num_tex_buffer_bindings{AccumulateCount(info.texture_buffer_descriptors)}; + const u32 num_tex_buffer_bindings{NumDescriptors(info.texture_buffer_descriptors)}; num_texture_buffers[stage] += num_tex_buffer_bindings; num_textures += num_tex_buffer_bindings; - const u32 num_img_buffers_bindings{AccumulateCount(info.image_buffer_descriptors)}; + const u32 num_img_buffers_bindings{NumDescriptors(info.image_buffer_descriptors)}; num_image_buffers[stage] += num_img_buffers_bindings; num_images += num_img_buffers_bindings; - num_textures += AccumulateCount(info.texture_descriptors); - num_images += AccumulateCount(info.image_descriptors); - num_storage_buffers += AccumulateCount(info.storage_buffers_descriptors); + num_textures += NumDescriptors(info.texture_descriptors); + num_images += NumDescriptors(info.image_descriptors); + num_storage_buffers += NumDescriptors(info.storage_buffers_descriptors); writes_global_memory |= std::ranges::any_of( info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; }); @@ -423,13 +415,9 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { add_buffer(desc); } } - for (const auto& desc : info.texture_descriptors) { - texture_buffer_index += desc.count; - } + texture_buffer_index += Shader::NumDescriptors(info.texture_descriptors); if constexpr (Spec::has_images) { - for (const auto& desc : info.image_descriptors) { - texture_buffer_index += desc.count; - } + texture_buffer_index += Shader::NumDescriptors(info.image_descriptors); } }}; if constexpr (Spec::enabled_stages[0]) { diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 02682bd76..42ef67628 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -426,16 +426,14 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( // Normal path programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info); - for (const auto& desc : programs[index].info.storage_buffers_descriptors) { - total_storage_buffers += desc.count; - } + total_storage_buffers += + Shader::NumDescriptors(programs[index].info.storage_buffers_descriptors); } else { // VertexB path when VertexA is present. auto& program_va{programs[0]}; auto program_vb{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; - for (const auto& desc : program_vb.info.storage_buffers_descriptors) { - total_storage_buffers += desc.count; - } + total_storage_buffers += + Shader::NumDescriptors(program_vb.info.storage_buffers_descriptors); programs[index] = MergeDualVertexPrograms(program_va, program_vb, env); } } @@ -510,10 +508,7 @@ std::unique_ptr ShaderCache::CreateComputePipeline( Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; - u32 num_storage_buffers{}; - for (const auto& desc : program.info.storage_buffers_descriptors) { - num_storage_buffers += desc.count; - } + const u32 num_storage_buffers{Shader::NumDescriptors(program.info.storage_buffers_descriptors)}; Shader::RuntimeInfo info; info.glasm_use_storage_buffers = num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks(); -- cgit v1.2.3 From baf0993d5c019a1ef617a94fe17035f872ae6954 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 25 Jul 2021 22:28:26 -0300 Subject: vk_graphics_pipeline: Use Shader::NumDescriptors when possible --- .../renderer_vulkan/vk_graphics_pipeline.cpp | 24 ++++++---------------- 1 file changed, 6 insertions(+), 18 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 8634c3316..7e48d4458 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -402,13 +402,9 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { add_buffer(desc); } } - for (const auto& desc : info.texture_descriptors) { - texture_buffer_index += desc.count; - } + texture_buffer_index += Shader::NumDescriptors(info.texture_descriptors); if constexpr (Spec::has_images) { - for (const auto& desc : info.image_descriptors) { - texture_buffer_index += desc.count; - } + texture_buffer_index += Shader::NumDescriptors(info.image_descriptors); } }}; if constexpr (Spec::enabled_stages[0]) { @@ -826,18 +822,10 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { void GraphicsPipeline::Validate() { size_t num_images{}; for (const auto& info : stage_infos) { - for (const auto& desc : info.texture_buffer_descriptors) { - num_images += desc.count; - } - for (const auto& desc : info.image_buffer_descriptors) { - num_images += desc.count; - } - for (const auto& desc : info.texture_descriptors) { - num_images += desc.count; - } - for (const auto& desc : info.image_descriptors) { - num_images += desc.count; - } + num_images += Shader::NumDescriptors(info.texture_buffer_descriptors); + num_images += Shader::NumDescriptors(info.image_buffer_descriptors); + num_images += Shader::NumDescriptors(info.texture_descriptors); + num_images += Shader::NumDescriptors(info.image_descriptors); } ASSERT(num_images <= MAX_IMAGE_ELEMENTS); } -- cgit v1.2.3 From dc72d4d4f5240b12bc771427404e7a74d56c87d9 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 25 Jul 2021 22:28:57 -0300 Subject: vk_texture_cache: Properly scale blit source images --- src/video_core/renderer_vulkan/vk_texture_cache.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 720247b4e..575f12566 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -630,8 +630,8 @@ void BlitScale(VKScheduler& scheduler, VkImage src_image, VkImage dst_image, con .z = 0, }, { - .x = static_cast(extent.width), - .y = static_cast(extent.height), + .x = std::max(1, static_cast(extent.width) >> level), + .y = std::max(1, static_cast(extent.height) >> level), .z = 1, }, }, -- cgit v1.2.3 From d2388dd0d0d36a230b58efbdd17f8366c79555b5 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 25 Jul 2021 22:34:12 -0300 Subject: vulkan: Implement rescaling shader patching --- src/video_core/renderer_vulkan/pipeline_helper.h | 56 ++++++++++++++++++---- .../renderer_vulkan/vk_compute_pipeline.cpp | 30 +++++++----- .../renderer_vulkan/vk_compute_pipeline.h | 1 + .../renderer_vulkan/vk_graphics_pipeline.cpp | 21 ++++++-- .../renderer_vulkan/vk_graphics_pipeline.h | 4 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 3 ++ src/video_core/renderer_vulkan/vk_scheduler.cpp | 10 ++++ src/video_core/renderer_vulkan/vk_scheduler.h | 5 ++ 8 files changed, 103 insertions(+), 27 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index 4847db6b6..7ba6078df 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -20,6 +20,8 @@ namespace Vulkan { +constexpr size_t MAX_RESCALING_WORDS = 4; + class DescriptorLayoutBuilder { public: DescriptorLayoutBuilder(const Device& device_) : device{&device_} {} @@ -68,18 +70,26 @@ public: } vk::PipelineLayout CreatePipelineLayout(VkDescriptorSetLayout descriptor_set_layout) const { + const VkPushConstantRange range{ + .stageFlags = static_cast( + is_compute ? VK_SHADER_STAGE_COMPUTE_BIT : VK_SHADER_STAGE_ALL_GRAPHICS), + .offset = 0, + .size = (is_compute ? 0 : sizeof(f32)) + sizeof(std::array), + }; return device->GetLogical().CreatePipelineLayout({ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, .pNext = nullptr, .flags = 0, .setLayoutCount = descriptor_set_layout ? 1U : 0U, .pSetLayouts = bindings.empty() ? nullptr : &descriptor_set_layout, - .pushConstantRangeCount = 0, - .pPushConstantRanges = nullptr, + .pushConstantRangeCount = 1, + .pPushConstantRanges = &range, }); } void Add(const Shader::Info& info, VkShaderStageFlags stage) { + is_compute |= (stage & VK_SHADER_STAGE_COMPUTE_BIT) != 0; + Add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, stage, info.constant_buffer_descriptors); Add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stage, info.storage_buffers_descriptors); Add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, stage, info.texture_buffer_descriptors); @@ -115,6 +125,7 @@ private: } const Device* device{}; + bool is_compute{}; boost::container::small_vector bindings; boost::container::small_vector entries; u32 binding{}; @@ -122,21 +133,46 @@ private: size_t offset{}; }; -inline void PushImageDescriptors(const Shader::Info& info, const VkSampler*& samplers, - const ImageId*& image_view_ids, TextureCache& texture_cache, - VKUpdateDescriptorQueue& update_descriptor_queue) { - for (const auto& desc : info.texture_buffer_descriptors) { - image_view_ids += desc.count; +class RescalingPushConstant { +public: + explicit RescalingPushConstant(u32 num_textures) noexcept {} + + void PushTexture(bool is_rescaled) noexcept { + *texture_ptr |= is_rescaled ? texture_bit : 0; + texture_bit <<= 1; + if (texture_bit == 0) { + texture_bit = 1u; + ++texture_ptr; + } } - for (const auto& desc : info.image_buffer_descriptors) { - image_view_ids += desc.count; + + const std::array& Data() const noexcept { + return words; } + +private: + std::array words{}; + u32* texture_ptr{words.data()}; + u32 texture_bit{1u}; +}; + +inline void PushImageDescriptors(const Shader::Info& info, const VkSampler*& samplers, + const ImageId*& image_view_ids, TextureCache& texture_cache, + VKUpdateDescriptorQueue& update_descriptor_queue, + RescalingPushConstant& rescaling) { + static constexpr VideoCommon::ImageViewId NULL_IMAGE_VIEW_ID{0}; + image_view_ids += Shader::NumDescriptors(info.texture_buffer_descriptors); + image_view_ids += Shader::NumDescriptors(info.image_buffer_descriptors); for (const auto& desc : info.texture_descriptors) { for (u32 index = 0; index < desc.count; ++index) { + const VideoCommon::ImageViewId image_view_id{*(image_view_ids++)}; const VkSampler sampler{*(samplers++)}; - ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))}; + ImageView& image_view{texture_cache.GetImageView(image_view_id)}; + const Image& image{texture_cache.GetImage(image_view.image_id)}; const VkImageView vk_image_view{image_view.Handle(desc.type)}; update_descriptor_queue.AddSampledImage(vk_image_view, sampler); + rescaling.PushTexture(image_view_id != NULL_IMAGE_VIEW_ID && + True(image.flags & VideoCommon::ImageFlagBits::Rescaled)); } } for (const auto& desc : info.image_descriptors) { diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 44faf626a..bda75788c 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -180,9 +180,11 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, buffer_cache.UpdateComputeBuffers(); buffer_cache.BindHostComputeBuffers(); + RescalingPushConstant rescaling(num_textures); const VkSampler* samplers_it{samplers.data()}; const ImageId* views_it{image_view_ids.data()}; - PushImageDescriptors(info, samplers_it, views_it, texture_cache, update_descriptor_queue); + PushImageDescriptors(info, samplers_it, views_it, texture_cache, update_descriptor_queue, + rescaling); if (!is_built.load(std::memory_order::relaxed)) { // Wait for the pipeline to be built @@ -192,17 +194,21 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, }); } const void* const descriptor_data{update_descriptor_queue.UpdateData()}; - scheduler.Record([this, descriptor_data](vk::CommandBuffer cmdbuf) { - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); - if (!descriptor_set_layout) { - return; - } - const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; - const vk::Device& dev{device.GetLogical()}; - dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, descriptor_data); - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0, - descriptor_set, nullptr); - }); + scheduler.Record( + [this, descriptor_data, rescaling_data = rescaling.Data()](vk::CommandBuffer cmdbuf) { + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); + if (!descriptor_set_layout) { + return; + } + if (num_textures > 0) { + cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, rescaling_data); + } + const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; + const vk::Device& dev{device.GetLogical()}; + dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, descriptor_data); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0, + descriptor_set, nullptr); + }); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 8c4b0a301..e79ce4d7c 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -59,6 +59,7 @@ private: vk::PipelineLayout pipeline_layout; vk::DescriptorUpdateTemplateKHR descriptor_update_template; vk::Pipeline pipeline; + u32 num_textures{}; std::condition_variable build_condvar; std::mutex build_mutex; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 7e48d4458..967762c37 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -235,6 +235,7 @@ GraphicsPipeline::GraphicsPipeline( stage_infos[stage] = *info; enabled_uniform_buffer_masks[stage] = info->constant_buffer_mask; std::ranges::copy(info->constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin()); + num_textures += Shader::NumDescriptors(info->texture_descriptors); } auto func{[this, shader_notify, &render_pass_cache, &descriptor_pool, pipeline_statistics] { DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)}; @@ -428,12 +429,13 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { update_descriptor_queue.Acquire(); + RescalingPushConstant rescaling(num_textures); const VkSampler* samplers_it{samplers.data()}; const ImageId* views_it{image_view_ids.data()}; const auto prepare_stage{[&](size_t stage) LAMBDA_FORCEINLINE { buffer_cache.BindHostStageBuffers(stage); PushImageDescriptors(stage_infos[stage], samplers_it, views_it, texture_cache, - update_descriptor_queue); + update_descriptor_queue, rescaling); }}; if constexpr (Spec::enabled_stages[0]) { prepare_stage(0); @@ -450,10 +452,10 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { if constexpr (Spec::enabled_stages[4]) { prepare_stage(4); } - ConfigureDraw(); + ConfigureDraw(rescaling); } -void GraphicsPipeline::ConfigureDraw() { +void GraphicsPipeline::ConfigureDraw(const RescalingPushConstant& rescaling) { texture_cache.UpdateRenderTargets(false); scheduler.RequestRenderpass(texture_cache.GetFramebuffer()); @@ -464,12 +466,23 @@ void GraphicsPipeline::ConfigureDraw() { build_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); }); }); } + const bool is_rescaling{texture_cache.IsRescaling()}; + const bool update_rescaling{scheduler.UpdateRescaling(is_rescaling)}; const bool bind_pipeline{scheduler.UpdateGraphicsPipeline(this)}; const void* const descriptor_data{update_descriptor_queue.UpdateData()}; - scheduler.Record([this, descriptor_data, bind_pipeline](vk::CommandBuffer cmdbuf) { + scheduler.Record([this, descriptor_data, bind_pipeline, rescaling_data = rescaling.Data(), + is_rescaling, update_rescaling](vk::CommandBuffer cmdbuf) { if (bind_pipeline) { cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); } + if (update_rescaling) { + const f32 config_down_factor{Settings::values.resolution_info.down_factor}; + const float scale_down_factor{is_rescaling ? config_down_factor : 1.0f}; + cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_ALL_GRAPHICS, 0, + sizeof(scale_down_factor), &scale_down_factor); + } + cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_ALL_GRAPHICS, sizeof(f32), + sizeof(rescaling_data), rescaling_data.data()); if (!descriptor_set_layout) { return; } diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 1c780e944..a0c1d8f07 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -62,6 +62,7 @@ namespace Vulkan { class Device; class PipelineStatistics; class RenderPassCache; +class RescalingPushConstant; class VKScheduler; class VKUpdateDescriptorQueue; @@ -113,7 +114,7 @@ private: template void ConfigureImpl(bool is_indexed); - void ConfigureDraw(); + void ConfigureDraw(const RescalingPushConstant& rescaling); void MakePipeline(VkRenderPass render_pass); @@ -138,6 +139,7 @@ private: std::array stage_infos; std::array enabled_uniform_buffer_masks{}; VideoCommon::UniformBufferSizes uniform_buffer_sizes{}; + u32 num_textures{}; vk::DescriptorSetLayout descriptor_set_layout; DescriptorAllocator descriptor_allocator; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index eb8b4e08b..691ef0841 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -139,6 +139,9 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span program } else { info.previous_stage_stores.mask.set(); } + for (const auto& stage : programs) { + info.num_textures += Shader::NumDescriptors(stage.info.texture_descriptors); + } const Shader::Stage stage{program.stage}; const bool has_geometry{key.unique_hashes[4] != 0 && !programs[4].is_geometry_passthrough}; const bool gl_ndc{key.state.ndc_minus_one_to_one != 0}; diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 0c11c814f..3bfdf41ba 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -128,6 +128,15 @@ bool VKScheduler::UpdateGraphicsPipeline(GraphicsPipeline* pipeline) { return true; } +bool VKScheduler::UpdateRescaling(bool is_rescaling) { + if (state.rescaling_defined && is_rescaling == state.is_rescaling) { + return false; + } + state.rescaling_defined = true; + state.is_rescaling = is_rescaling; + return true; +} + void VKScheduler::WorkerThread(std::stop_token stop_token) { Common::SetCurrentThreadName("yuzu:VulkanWorker"); do { @@ -227,6 +236,7 @@ void VKScheduler::AllocateNewContext() { void VKScheduler::InvalidateState() { state.graphics_pipeline = nullptr; + state.rescaling_defined = false; state_tracker.InvalidateCommandBufferState(); } diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 85fc1712f..1b06c9296 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -56,6 +56,9 @@ public: /// Update the pipeline to the current execution context. bool UpdateGraphicsPipeline(GraphicsPipeline* pipeline); + /// Update the rescaling state. Returns true if the state has to be updated. + bool UpdateRescaling(bool is_rescaling); + /// Invalidates current command buffer state except for render passes void InvalidateState(); @@ -185,6 +188,8 @@ private: VkFramebuffer framebuffer = nullptr; VkExtent2D render_area = {0, 0}; GraphicsPipeline* graphics_pipeline = nullptr; + bool is_rescaling = false; + bool rescaling_defined = false; }; void WorkerThread(std::stop_token stop_token); -- cgit v1.2.3 From 138d9d7effa17d9bae9af60493c4cdc99a2d0487 Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Mon, 26 Jul 2021 01:58:02 -0400 Subject: main: Add resolution scale label in the status bar Shows the resolution scale as "Scale: {}x" in the status bar, where {} is a floating point value representing the current resolution scaling factor. --- src/yuzu/main.cpp | 13 +++++++++++-- src/yuzu/main.h | 1 + 2 files changed, 12 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index 4e5552d2a..a246f6bb3 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -747,6 +747,8 @@ void GMainWindow::InitializeWidgets() { shader_building_label = new QLabel(); shader_building_label->setToolTip(tr("The amount of shaders currently being built")); + res_scale_label = new QLabel(); + res_scale_label->setToolTip(tr("The current selected resolution scaling multiplier.")); emu_speed_label = new QLabel(); emu_speed_label->setToolTip( tr("Current emulation speed. Values higher or lower than 100% " @@ -759,8 +761,8 @@ void GMainWindow::InitializeWidgets() { tr("Time taken to emulate a Switch frame, not counting framelimiting or v-sync. For " "full-speed emulation this should be at most 16.67 ms.")); - for (auto& label : - {shader_building_label, emu_speed_label, game_fps_label, emu_frametime_label}) { + for (auto& label : {shader_building_label, res_scale_label, emu_speed_label, game_fps_label, + emu_frametime_label}) { label->setVisible(false); label->setFrameStyle(QFrame::NoFrame); label->setContentsMargins(4, 0, 4, 0); @@ -1535,6 +1537,7 @@ void GMainWindow::ShutdownGame() { // Disable status bar updates status_bar_update_timer.stop(); shader_building_label->setVisible(false); + res_scale_label->setVisible(false); emu_speed_label->setVisible(false); game_fps_label->setVisible(false); emu_frametime_label->setVisible(false); @@ -2981,6 +2984,11 @@ void GMainWindow::UpdateStatusBar() { shader_building_label->setVisible(false); } + const auto res_info = Settings::values.resolution_info; + const auto res_scale = res_info.up_factor; + res_scale_label->setText( + tr("Scale: %1x", "%1 is the resolution scaling factor").arg(res_scale)); + if (Settings::values.use_speed_limit.GetValue()) { emu_speed_label->setText(tr("Speed: %1% / %2%") .arg(results.emulation_speed * 100.0, 0, 'f', 0) @@ -2996,6 +3004,7 @@ void GMainWindow::UpdateStatusBar() { } emu_frametime_label->setText(tr("Frame: %1 ms").arg(results.frametime * 1000.0, 0, 'f', 2)); + res_scale_label->setVisible(true); emu_speed_label->setVisible(!Settings::values.use_multi_core.GetValue()); game_fps_label->setVisible(true); emu_frametime_label->setVisible(true); diff --git a/src/yuzu/main.h b/src/yuzu/main.h index 981102daa..beb4f2984 100644 --- a/src/yuzu/main.h +++ b/src/yuzu/main.h @@ -328,6 +328,7 @@ private: // Status bar elements QLabel* message_label = nullptr; QLabel* shader_building_label = nullptr; + QLabel* res_scale_label = nullptr; QLabel* emu_speed_label = nullptr; QLabel* game_fps_label = nullptr; QLabel* emu_frametime_label = nullptr; -- cgit v1.2.3 From 0e8cf38f392f2ea6f7f5195070ad721b78590c04 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 26 Jul 2021 09:33:00 +0200 Subject: Texture Cache: Implement Blacklisting. --- .../renderer_vulkan/vk_compute_pipeline.cpp | 29 ++++++++++++++++-- .../renderer_vulkan/vk_graphics_pipeline.cpp | 28 +++++++++++++++++- src/video_core/texture_cache/image_base.h | 1 + src/video_core/texture_cache/texture_cache.h | 34 +++++++++++++++++++++- src/video_core/texture_cache/texture_cache_base.h | 2 ++ 5 files changed, 90 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index bda75788c..5c591e345 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -111,6 +111,7 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, std::array image_view_ids; boost::container::static_vector image_view_indices; boost::container::static_vector samplers; + boost::container::static_vector image_view_blacklist; const auto& qmd{kepler_compute.launch_description}; const auto& cbufs{qmd.const_buffer_config}; @@ -151,10 +152,34 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, samplers.push_back(sampler->Handle()); } } - std::ranges::for_each(info.image_descriptors, add_image); + const u32 black_list_base = image_view_indices.size(); + bool atleast_one_blacklisted = false; + for (const auto& desc : info.image_descriptors) { + const bool is_black_listed = + desc.is_written && (desc.type == Shader::TextureType::Color2D || + desc.type == Shader::TextureType::ColorArray2D); + for (u32 index = 0; index < desc.count; ++index) { + image_view_blacklist.push_back(is_black_listed); + } + atleast_one_blacklisted |= is_black_listed; + add_image(desc); + } const std::span indices_span(image_view_indices.data(), image_view_indices.size()); - texture_cache.FillComputeImageViews(indices_span, image_view_ids); + bool has_listed_stuffs; + do { + has_listed_stuffs = false; + texture_cache.FillComputeImageViews(indices_span, image_view_ids); + if (atleast_one_blacklisted) { + for (u32 index = 0; index < image_view_blacklist.size(); index++) { + if (image_view_blacklist[index]) { + ImageView& image_view{ + texture_cache.GetImageView(image_view_ids[index + black_list_base])}; + has_listed_stuffs |= texture_cache.BlackListImage(image_view.image_id); + } + } + } + } while (has_listed_stuffs); buffer_cache.UnbindComputeTextureBuffers(); ImageId* texture_buffer_ids{image_view_ids.data()}; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 967762c37..4d966ee4b 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -280,6 +280,7 @@ template void GraphicsPipeline::ConfigureImpl(bool is_indexed) { std::array image_view_ids; std::array image_view_indices; + std::array image_view_blacklist; std::array samplers; size_t sampler_index{}; size_t image_index{}; @@ -290,6 +291,8 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { const auto& regs{maxwell3d.regs}; const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; + u32 start_black_list = std::numeric_limits::max(); + u32 end_black_list = 0; const auto config_stage{[&](size_t stage) LAMBDA_FORCEINLINE { const Shader::Info& info{stage_infos[stage]}; buffer_cache.UnbindGraphicsStorageBuffers(stage); @@ -350,6 +353,15 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { } if constexpr (Spec::has_images) { for (const auto& desc : info.image_descriptors) { + if (desc.is_written && (desc.type == Shader::TextureType::Color2D || + desc.type == Shader::TextureType::ColorArray2D)) { + auto index_copy = image_index; + for (u32 index = 0; index < desc.count; ++index) { + start_black_list = std::min(start_black_list, index_copy); + image_view_blacklist[index_copy++] = true; + end_black_list = std::max(end_black_list, index_copy); + } + } add_image(desc); } } @@ -370,7 +382,21 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { config_stage(4); } const std::span indices_span(image_view_indices.data(), image_index); - texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); + bool has_listed_stuffs; + do { + has_listed_stuffs = false; + texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); + if constexpr (Spec::has_images) { + if (start_black_list < end_black_list) { + for (u32 index = start_black_list; index < end_black_list; index++) { + if (image_view_blacklist[index]) { + ImageView& image_view{texture_cache.GetImageView(image_view_ids[index])}; + has_listed_stuffs |= texture_cache.BlackListImage(image_view.image_id); + } + } + } + } + } while (has_listed_stuffs); ImageId* texture_buffer_index{image_view_ids.data()}; const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE { diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index 1cd30fd37..10dd52e28 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h @@ -37,6 +37,7 @@ enum class ImageFlagBits : u32 { // Rescaler Rescaled = 1 << 12, RescaleChecked = 1 << 13, + Blacklisted = 1 << 14, }; DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index ae74a6ecf..ce5994d5f 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -227,6 +227,7 @@ void TextureCache

::UpdateRenderTargets(bool is_clear) { flags[Dirty::RenderTargetControl] = false; bool can_rescale = true; + bool any_blacklisted = false; std::array tmp_color_images{}; ImageId tmp_depth_image{}; const auto check_rescale = [&](ImageViewId view_id, ImageId& id_save) { @@ -236,6 +237,7 @@ void TextureCache

::UpdateRenderTargets(bool is_clear) { id_save = image_id; auto& image = slot_images[image_id]; can_rescale &= ImageCanRescale(image); + any_blacklisted |= True(image.flags & ImageFlagBits::Blacklisted); } else { id_save = CORRUPT_ID; } @@ -268,10 +270,13 @@ void TextureCache

::UpdateRenderTargets(bool is_clear) { scale_up(tmp_depth_image); } else { rescaled = false; - const auto scale_down = [this](ImageId image_id) { + const auto scale_down = [this, any_blacklisted](ImageId image_id) { if (image_id != CORRUPT_ID) { Image& image = slot_images[image_id]; ScaleDown(image); + if (any_blacklisted) { + image.flags |= ImageFlagBits::Blacklisted; + } } }; for (size_t index = 0; index < NUM_RT; ++index) { @@ -736,8 +741,22 @@ ImageId TextureCache

::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, return image_id; } +template +bool TextureCache

::BlackListImage(ImageId image_id) { + auto& image = slot_images[image_id]; + if (True(image.flags & ImageFlagBits::Blacklisted)) { + return false; + } + image.flags |= ImageFlagBits::Blacklisted; + ScaleDown(image); + return true; +} + template bool TextureCache

::ImageCanRescale(Image& image) { + if (True(image.flags & ImageFlagBits::Blacklisted)) { + return false; + } if (True(image.flags & ImageFlagBits::Rescaled) || True(image.flags & ImageFlagBits::RescaleChecked)) { return true; @@ -912,6 +931,7 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA bool can_rescale = (info.type == ImageType::e1D || info.type == ImageType::e2D) && info.block.depth == 0; bool any_rescaled = false; + bool any_blacklisted = false; for (const ImageId sibling_id : all_siblings) { if (!can_rescale) { break; @@ -919,6 +939,7 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA Image& sibling = slot_images[sibling_id]; can_rescale &= ImageCanRescale(sibling); any_rescaled |= True(sibling.flags & ImageFlagBits::Rescaled); + any_blacklisted |= True(sibling.flags & ImageFlagBits::Blacklisted); } can_rescale &= any_rescaled; @@ -932,6 +953,9 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA for (const ImageId sibling_id : all_siblings) { Image& sibling = slot_images[sibling_id]; ScaleDown(sibling); + if (any_blacklisted) { + sibling.flags |= ImageFlagBits::Blacklisted; + } } } @@ -1556,6 +1580,7 @@ void TextureCache

::SynchronizeAliases(ImageId image_id) { boost::container::small_vector aliased_images; Image& image = slot_images[image_id]; bool any_rescaled = True(image.flags & ImageFlagBits::Rescaled); + bool any_blacklisted = True(image.flags & ImageFlagBits::Blacklisted); u64 most_recent_tick = image.modification_tick; for (const AliasedImage& aliased : image.aliased_images) { ImageBase& aliased_image = slot_images[aliased.id]; @@ -1563,6 +1588,7 @@ void TextureCache

::SynchronizeAliases(ImageId image_id) { most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick); aliased_images.push_back(&aliased); any_rescaled |= True(aliased_image.flags & ImageFlagBits::Rescaled); + any_blacklisted |= True(aliased_image.flags & ImageFlagBits::Blacklisted); } } if (aliased_images.empty()) { @@ -1574,6 +1600,9 @@ void TextureCache

::SynchronizeAliases(ImageId image_id) { ScaleUp(image); } else { ScaleDown(image); + if (any_blacklisted) { + image.flags |= ImageFlagBits::Blacklisted; + } } } image.modification_tick = most_recent_tick; @@ -1589,6 +1618,9 @@ void TextureCache

::SynchronizeAliases(ImageId image_id) { ScaleUp(aliased_image); } else { ScaleDown(aliased_image); + if (any_blacklisted) { + aliased_image.flags |= ImageFlagBits::Blacklisted; + } } } CopyImage(image_id, aliased->id, aliased->copies); diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 0d2d9ec2e..35a29cd9b 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -176,6 +176,8 @@ public: [[nodiscard]] bool IsRescaling(); + [[nodiscard]] bool BlackListImage(ImageId image_id); + std::mutex mutex; private: -- cgit v1.2.3 From 117f8ee7a4fa17e76a4f7eb29560fcc534ce8099 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 27 Jul 2021 00:15:27 +0200 Subject: Vulkan: Fix AA when rescaling. --- src/video_core/renderer_vulkan/vk_blit_screen.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index 888bc7392..7051e6559 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -614,7 +614,7 @@ void VKBlitScreen::CreateSampler() { .pNext = nullptr, .flags = 0, .magFilter = VK_FILTER_LINEAR, - .minFilter = VK_FILTER_NEAREST, + .minFilter = VK_FILTER_LINEAR, .mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR, .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, -- cgit v1.2.3 From cee7eba64e639d53613fee45f569377e05a4c6f9 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 27 Jul 2021 00:48:22 +0200 Subject: OpenGL: set linear mag filter when blitting a downscaled image. --- src/video_core/renderer_opengl/renderer_opengl.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 7d7cba69c..0f7b69c6d 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -257,6 +257,7 @@ void RendererOpenGL::InitOpenGLObjects() { // Generate presentation sampler present_sampler.Create(); glSamplerParameteri(present_sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glSamplerParameteri(present_sampler.handle, GL_TEXTURE_MAG_FILTER, GL_LINEAR); // Generate VBO handle for drawing vertex_buffer.Create(); -- cgit v1.2.3 From 07c564f38b238af9be7a9d8aee1149a353c2880b Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 27 Jul 2021 01:29:55 +0200 Subject: Texture Cache: Implement Rating System. --- src/video_core/texture_cache/image_base.cpp | 6 ++-- src/video_core/texture_cache/image_base.h | 2 ++ src/video_core/texture_cache/image_info.cpp | 11 ++++++++ src/video_core/texture_cache/image_info.h | 1 + src/video_core/texture_cache/texture_cache.h | 42 ++++++++++++++++++++-------- 5 files changed, 47 insertions(+), 15 deletions(-) (limited to 'src') diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp index 6052d148a..e9e725edf 100644 --- a/src/video_core/texture_cache/image_base.cpp +++ b/src/video_core/texture_cache/image_base.cpp @@ -60,9 +60,9 @@ namespace { ImageBase::ImageBase(const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_) : info{info_}, guest_size_bytes{CalculateGuestSizeInBytes(info)}, unswizzled_size_bytes{CalculateUnswizzledSizeBytes(info)}, - converted_size_bytes{CalculateConvertedSizeBytes(info)}, gpu_addr{gpu_addr_}, - cpu_addr{cpu_addr_}, cpu_addr_end{cpu_addr + guest_size_bytes}, - mip_level_offsets{CalculateMipLevelOffsets(info)} { + converted_size_bytes{CalculateConvertedSizeBytes(info)}, scale_rating{}, + scale_tick{}, gpu_addr{gpu_addr_}, cpu_addr{cpu_addr_}, + cpu_addr_end{cpu_addr + guest_size_bytes}, mip_level_offsets{CalculateMipLevelOffsets(info)} { if (info.type == ImageType::e3D) { slice_offsets = CalculateSliceOffsets(info); slice_subresources = CalculateSliceSubresources(info); diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index 10dd52e28..97f107b4d 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h @@ -78,6 +78,8 @@ struct ImageBase { u32 guest_size_bytes = 0; u32 unswizzled_size_bytes = 0; u32 converted_size_bytes = 0; + u32 scale_rating = 0; + u64 scale_tick = 0; ImageFlagBits flags = ImageFlagBits::CpuModified; GPUVAddr gpu_addr = 0; diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp index 022ca9033..7fa8fd4fe 100644 --- a/src/video_core/texture_cache/image_info.cpp +++ b/src/video_core/texture_cache/image_info.cpp @@ -31,6 +31,7 @@ ImageInfo::ImageInfo(const TICEntry& config) noexcept { .depth = config.block_depth, }; } + rescaleable = false; tile_width_spacing = config.tile_width_spacing; if (config.texture_type != TextureType::Texture2D && config.texture_type != TextureType::Texture2DNoMipmap) { @@ -53,12 +54,14 @@ ImageInfo::ImageInfo(const TICEntry& config) noexcept { case TextureType::Texture2DNoMipmap: ASSERT(config.Depth() == 1); type = config.IsPitchLinear() ? ImageType::Linear : ImageType::e2D; + rescaleable = !config.IsPitchLinear(); size.width = config.Width(); size.height = config.Height(); resources.layers = config.BaseLayer() + 1; break; case TextureType::Texture2DArray: type = ImageType::e2D; + rescaleable = true; size.width = config.Width(); size.height = config.Height(); resources.layers = config.BaseLayer() + config.Depth(); @@ -98,12 +101,14 @@ ImageInfo::ImageInfo(const TICEntry& config) noexcept { // FIXME: Call this without passing *this layer_stride = CalculateLayerStride(*this); maybe_unaligned_layer_stride = CalculateLayerSize(*this); + rescaleable &= (block.depth == 0) && resources.levels == 1; } } ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index) noexcept { const auto& rt = regs.rt[index]; format = VideoCore::Surface::PixelFormatFromRenderTargetFormat(rt.format); + rescaleable = false; if (rt.tile_mode.is_pitch_linear) { ASSERT(rt.tile_mode.is_3d == 0); type = ImageType::Linear; @@ -129,6 +134,7 @@ ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index) type = ImageType::e3D; size.depth = rt.depth; } else { + rescaleable = block.depth == 0 && size.height > 256; type = ImageType::e2D; resources.layers = rt.depth; } @@ -138,6 +144,7 @@ ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept { format = VideoCore::Surface::PixelFormatFromDepthFormat(regs.zeta.format); size.width = regs.zeta_width; size.height = regs.zeta_height; + rescaleable = false; resources.levels = 1; layer_stride = regs.zeta.layer_stride * 4; maybe_unaligned_layer_stride = layer_stride; @@ -156,6 +163,7 @@ ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept { type = ImageType::e3D; size.depth = regs.zeta_depth; } else { + rescaleable = block.depth == 0 && size.height > 256; type = ImageType::e2D; resources.layers = regs.zeta_depth; } @@ -164,6 +172,7 @@ ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept { ImageInfo::ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept { UNIMPLEMENTED_IF_MSG(config.layer != 0, "Surface layer is not zero"); format = VideoCore::Surface::PixelFormatFromRenderTargetFormat(config.format); + rescaleable = false; if (config.linear == Tegra::Engines::Fermi2D::MemoryLayout::Pitch) { type = ImageType::Linear; size = Extent3D{ @@ -174,6 +183,7 @@ ImageInfo::ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept { pitch = config.pitch; } else { type = config.block_depth > 0 ? ImageType::e3D : ImageType::e2D; + block = Extent3D{ .width = config.block_width, .height = config.block_height, @@ -186,6 +196,7 @@ ImageInfo::ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept { .height = config.height, .depth = 1, }; + rescaleable = block.depth == 0 && size.height > 256; } } diff --git a/src/video_core/texture_cache/image_info.h b/src/video_core/texture_cache/image_info.h index 16d4cee37..e874d2870 100644 --- a/src/video_core/texture_cache/image_info.h +++ b/src/video_core/texture_cache/image_info.h @@ -33,6 +33,7 @@ struct ImageInfo { u32 maybe_unaligned_layer_stride = 0; u32 num_samples = 1; u32 tile_width_spacing = 0; + bool rescaleable = false; }; } // namespace VideoCommon diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index ce5994d5f..be40f6b88 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -216,7 +216,10 @@ void TextureCache

::UpdateRenderTargets(bool is_clear) { return; } + u32 scale_rating; bool rescaled; + std::array tmp_color_images{}; + ImageId tmp_depth_image{}; do { flags[Dirty::RenderTargets] = false; @@ -226,10 +229,10 @@ void TextureCache

::UpdateRenderTargets(bool is_clear) { const bool force = flags[Dirty::RenderTargetControl]; flags[Dirty::RenderTargetControl] = false; + scale_rating = 0; + bool any_rescaled = false; bool can_rescale = true; bool any_blacklisted = false; - std::array tmp_color_images{}; - ImageId tmp_depth_image{}; const auto check_rescale = [&](ImageViewId view_id, ImageId& id_save) { if (view_id) { const auto& view = slot_image_views[view_id]; @@ -238,6 +241,10 @@ void TextureCache

::UpdateRenderTargets(bool is_clear) { auto& image = slot_images[image_id]; can_rescale &= ImageCanRescale(image); any_blacklisted |= True(image.flags & ImageFlagBits::Blacklisted); + any_rescaled |= True(image.flags & ImageFlagBits::Rescaled); + scale_rating = std::max(scale_rating, image.scale_tick <= frame_tick + ? image.scale_rating + 1U + : image.scale_rating); } else { id_save = CORRUPT_ID; } @@ -257,17 +264,19 @@ void TextureCache

::UpdateRenderTargets(bool is_clear) { check_rescale(render_targets.depth_buffer_id, tmp_depth_image); if (can_rescale) { - rescaled = true; + rescaled = any_rescaled || scale_rating >= 2; const auto scale_up = [this](ImageId image_id) { if (image_id != CORRUPT_ID) { Image& image = slot_images[image_id]; ScaleUp(image); } }; - for (size_t index = 0; index < NUM_RT; ++index) { - scale_up(tmp_color_images[index]); + if (rescaled) { + for (size_t index = 0; index < NUM_RT; ++index) { + scale_up(tmp_color_images[index]); + } + scale_up(tmp_depth_image); } - scale_up(tmp_depth_image); } else { rescaled = false; const auto scale_down = [this, any_blacklisted](ImageId image_id) { @@ -283,10 +292,23 @@ void TextureCache

::UpdateRenderTargets(bool is_clear) { scale_down(tmp_color_images[index]); } scale_down(tmp_depth_image); + scale_rating = 0; } } while (has_deleted_images); // Rescale End + const auto set_rating = [this, scale_rating](ImageId image_id) { + if (image_id != CORRUPT_ID) { + Image& image = slot_images[image_id]; + image.scale_rating = scale_rating; + image.scale_tick = frame_tick + 1; + } + }; + for (size_t index = 0; index < NUM_RT; ++index) { + set_rating(tmp_color_images[index]); + } + set_rating(tmp_depth_image); + if (is_rescaling != rescaled) { flags[Dirty::RescaleViewports] = true; flags[Dirty::RescaleScissors] = true; @@ -761,10 +783,7 @@ bool TextureCache

::ImageCanRescale(Image& image) { True(image.flags & ImageFlagBits::RescaleChecked)) { return true; } - const auto& info = image.info; - const bool can_this_rescale = - (info.type == ImageType::e1D || info.type == ImageType::e2D) && info.block.depth == 0; - if (!can_this_rescale) { + if (!image.info.rescaleable) { image.flags &= ~ImageFlagBits::RescaleChecked; return false; } @@ -928,8 +947,7 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA }; ForEachSparseImageInRegion(gpu_addr, size_bytes, region_check_gpu); - bool can_rescale = - (info.type == ImageType::e1D || info.type == ImageType::e2D) && info.block.depth == 0; + bool can_rescale = info.rescaleable; bool any_rescaled = false; bool any_blacklisted = false; for (const ImageId sibling_id : all_siblings) { -- cgit v1.2.3 From 48d81506a32deb019dc65cd8099be290a392fd8d Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 27 Jul 2021 01:46:15 +0200 Subject: Vulkan: Fix downscaling Blit. --- .../renderer_vulkan/vk_texture_cache.cpp | 32 ++++++++++++---------- 1 file changed, 18 insertions(+), 14 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 575f12566..b21992fce 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -595,7 +595,8 @@ struct RangedBarrierRange { } void BlitScale(VKScheduler& scheduler, VkImage src_image, VkImage dst_image, const ImageInfo& info, - VkImageAspectFlags aspect_mask, const Settings::ResolutionScalingInfo& resolution) { + VkImageAspectFlags aspect_mask, const Settings::ResolutionScalingInfo& resolution, + bool scaling) { const auto type = info.type; const auto resources = info.resources; const VkExtent2D extent{ @@ -603,15 +604,18 @@ void BlitScale(VKScheduler& scheduler, VkImage src_image, VkImage dst_image, con .height = info.size.height, }; scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([dst_image, src_image, extent, resources, aspect_mask, resolution, - type](vk::CommandBuffer cmdbuf) { + scheduler.Record([dst_image, src_image, extent, resources, aspect_mask, resolution, type, + scaling](vk::CommandBuffer cmdbuf) { const auto scale_up = [&](u32 value) { return std::max((value * resolution.up_scale) >> resolution.down_shift, 1U); }; - const bool is_2d = type == ImageType::e2D; - const VkOffset2D mip0_size{ - .x = static_cast(scale_up(extent.width)), - .y = static_cast(is_2d ? scale_up(extent.height) : extent.height), + const VkOffset2D src_size{ + .x = static_cast(scaling ? extent.width : scale_up(extent.width)), + .y = static_cast(scaling ? extent.height : scale_up(extent.height)), + }; + const VkOffset2D dst_size{ + .x = static_cast(scaling ? scale_up(extent.width) : extent.width), + .y = static_cast(scaling ? scale_up(extent.height) : extent.height), }; boost::container::small_vector regions; regions.reserve(resources.levels); @@ -630,8 +634,8 @@ void BlitScale(VKScheduler& scheduler, VkImage src_image, VkImage dst_image, con .z = 0, }, { - .x = std::max(1, static_cast(extent.width) >> level), - .y = std::max(1, static_cast(extent.height) >> level), + .x = std::max(1, src_size.x >> level), + .y = std::max(1, src_size.y >> level), .z = 1, }, }, @@ -648,8 +652,8 @@ void BlitScale(VKScheduler& scheduler, VkImage src_image, VkImage dst_image, con .z = 0, }, { - .x = std::max(1, mip0_size.x >> level), - .y = std::max(1, mip0_size.y >> level), + .x = std::max(1, dst_size.x >> level), + .y = std::max(1, dst_size.y >> level), .z = 1, }, }, @@ -1157,7 +1161,7 @@ bool Image::ScaleUp(bool save_as_backup) { if (aspect_mask == 0) { aspect_mask = ImageAspectMask(info.format); } - BlitScale(*scheduler, *image, *rescaled_image, info, aspect_mask, resolution); + BlitScale(*scheduler, *image, *rescaled_image, info, aspect_mask, resolution, true); return true; } @@ -1184,14 +1188,14 @@ bool Image::ScaleDown(bool save_as_backup) { const auto& resolution = runtime->resolution; vk::Image downscaled_image = - MakeImage(runtime->device, info, resolution.up_scale, resolution.down_shift); + MakeImage(runtime->device, info); MemoryCommit new_commit( runtime->memory_allocator.Commit(downscaled_image, MemoryUsage::DeviceLocal)); if (aspect_mask == 0) { aspect_mask = ImageAspectMask(info.format); } - BlitScale(*scheduler, *image, *downscaled_image, info, aspect_mask, resolution); + BlitScale(*scheduler, *image, *downscaled_image, info, aspect_mask, resolution, false); if (save_as_backup) { backup_image = std::move(image); -- cgit v1.2.3 From 56ccda1d9952368d0c1e29d7c4b486c547de9549 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 28 Jul 2021 02:47:06 -0300 Subject: texture_cache: Simplify image view queries and blacklisting --- .../renderer_opengl/gl_compute_pipeline.cpp | 36 +++++++------ .../renderer_opengl/gl_graphics_pipeline.cpp | 42 +++++++-------- .../renderer_opengl/gl_texture_cache.cpp | 10 ++-- src/video_core/renderer_opengl/gl_texture_cache.h | 5 +- src/video_core/renderer_vulkan/pipeline_helper.h | 19 ++++--- .../renderer_vulkan/vk_compute_pipeline.cpp | 60 ++++++++-------------- .../renderer_vulkan/vk_graphics_pipeline.cpp | 54 +++++-------------- .../renderer_vulkan/vk_texture_cache.cpp | 22 +++++--- src/video_core/renderer_vulkan/vk_texture_cache.h | 41 +++++++++------ src/video_core/texture_cache/image_base.cpp | 2 + src/video_core/texture_cache/image_base.h | 3 ++ src/video_core/texture_cache/image_view_base.cpp | 2 +- src/video_core/texture_cache/image_view_base.h | 4 +- src/video_core/texture_cache/texture_cache.h | 47 +++++++++-------- src/video_core/texture_cache/texture_cache_base.h | 30 ++++++----- src/video_core/texture_cache/types.h | 7 +++ 16 files changed, 192 insertions(+), 192 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp index 19c8ca7b2..ab2baefbb 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp @@ -79,8 +79,7 @@ void ComputePipeline::Configure() { } texture_cache.SynchronizeComputeDescriptors(); - std::array image_view_ids; - boost::container::static_vector image_view_indices; + boost::container::static_vector views; std::array samplers; std::array textures; std::array images; @@ -110,33 +109,39 @@ void ComputePipeline::Configure() { } return TexturePair(gpu_memory.Read(addr), via_header_index); }}; - const auto add_image{[&](const auto& desc) { + const auto add_image{[&](const auto& desc, bool blacklist) { for (u32 index = 0; index < desc.count; ++index) { const auto handle{read_handle(desc, index)}; - image_view_indices.push_back(handle.first); + views.push_back({ + .index = handle.first, + .blacklist = blacklist, + .id = {}, + }); } }}; for (const auto& desc : info.texture_buffer_descriptors) { for (u32 index = 0; index < desc.count; ++index) { const auto handle{read_handle(desc, index)}; - image_view_indices.push_back(handle.first); + views.push_back({handle.first}); samplers[sampler_binding++] = 0; } } - std::ranges::for_each(info.image_buffer_descriptors, add_image); + for (const auto& desc : info.image_buffer_descriptors) { + add_image(desc, false); + } for (const auto& desc : info.texture_descriptors) { for (u32 index = 0; index < desc.count; ++index) { const auto handle{read_handle(desc, index)}; - image_view_indices.push_back(handle.first); + views.push_back({handle.first}); Sampler* const sampler = texture_cache.GetComputeSampler(handle.second); samplers[sampler_binding++] = sampler->Handle(); } } - std::ranges::for_each(info.image_descriptors, add_image); - - const std::span indices_span(image_view_indices.data(), image_view_indices.size()); - texture_cache.FillComputeImageViews(indices_span, image_view_ids); + for (const auto& desc : info.image_descriptors) { + add_image(desc, true); + } + texture_cache.FillComputeImageViews(std::span(views.data(), views.size())); if (assembly_program.handle != 0) { program_manager.BindComputeAssemblyProgram(assembly_program.handle); @@ -152,7 +157,7 @@ void ComputePipeline::Configure() { if constexpr (is_image) { is_written = desc.is_written; } - ImageView& image_view{texture_cache.GetImageView(image_view_ids[texbuf_index])}; + ImageView& image_view{texture_cache.GetImageView(views[texbuf_index].id)}; buffer_cache.BindComputeTextureBuffer(texbuf_index, image_view.GpuAddr(), image_view.BufferSize(), image_view.format, is_written, is_image); @@ -168,19 +173,20 @@ void ComputePipeline::Configure() { buffer_cache.runtime.SetImagePointers(textures.data(), images.data()); buffer_cache.BindHostComputeBuffers(); - const ImageId* views_it{image_view_ids.data() + num_texture_buffers + num_image_buffers}; + const VideoCommon::ImageViewInOut* views_it{views.data() + num_texture_buffers + + num_image_buffers}; texture_binding += num_texture_buffers; image_binding += num_image_buffers; for (const auto& desc : info.texture_descriptors) { for (u32 index = 0; index < desc.count; ++index) { - ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; + ImageView& image_view{texture_cache.GetImageView((views_it++)->id)}; textures[texture_binding++] = image_view.Handle(desc.type); } } for (const auto& desc : info.image_descriptors) { for (u32 index = 0; index < desc.count; ++index) { - ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; + ImageView& image_view{texture_cache.GetImageView((views_it++)->id)}; if (desc.is_written) { texture_cache.MarkModification(image_view.image_id); } diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index 43ab5c03b..0bbda7951 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -15,7 +15,7 @@ #include "video_core/renderer_opengl/gl_shader_util.h" #include "video_core/renderer_opengl/gl_state_tracker.h" #include "video_core/shader_notify.h" -#include "video_core/texture_cache/texture_cache_base.h" +#include "video_core/texture_cache/texture_cache.h" #if defined(_MSC_VER) && defined(NDEBUG) #define LAMBDA_FORCEINLINE [[msvc::forceinline]] @@ -280,10 +280,9 @@ GraphicsPipeline::GraphicsPipeline( template void GraphicsPipeline::ConfigureImpl(bool is_indexed) { - std::array image_view_ids; - std::array image_view_indices; + std::array views; std::array samplers; - size_t image_view_index{}; + size_t views_index{}; GLsizei sampler_binding{}; texture_cache.SynchronizeGraphicsDescriptors(); @@ -328,30 +327,34 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { } return TexturePair(gpu_memory.Read(addr), via_header_index); }}; - const auto add_image{[&](const auto& desc) { + const auto add_image{[&](const auto& desc, bool blacklist) LAMBDA_FORCEINLINE { for (u32 index = 0; index < desc.count; ++index) { const auto handle{read_handle(desc, index)}; - image_view_indices[image_view_index++] = handle.first; + views[views_index++] = { + .index = handle.first, + .blacklist = blacklist, + .id = {}, + }; } }}; if constexpr (Spec::has_texture_buffers) { for (const auto& desc : info.texture_buffer_descriptors) { for (u32 index = 0; index < desc.count; ++index) { const auto handle{read_handle(desc, index)}; - image_view_indices[image_view_index++] = handle.first; + views[views_index++] = {handle.first}; samplers[sampler_binding++] = 0; } } } if constexpr (Spec::has_image_buffers) { for (const auto& desc : info.image_buffer_descriptors) { - add_image(desc); + add_image(desc, false); } } for (const auto& desc : info.texture_descriptors) { for (u32 index = 0; index < desc.count; ++index) { const auto handle{read_handle(desc, index)}; - image_view_indices[image_view_index++] = handle.first; + views[views_index++] = {handle.first}; Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)}; samplers[sampler_binding++] = sampler->Handle(); @@ -359,7 +362,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { } if constexpr (Spec::has_images) { for (const auto& desc : info.image_descriptors) { - add_image(desc); + add_image(desc, true); } } }}; @@ -378,13 +381,12 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { if constexpr (Spec::enabled_stages[4]) { config_stage(4); } - const std::span indices_span(image_view_indices.data(), image_view_index); - texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); + texture_cache.FillGraphicsImageViews(std::span(views.data(), views_index)); texture_cache.UpdateRenderTargets(false); state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); - ImageId* texture_buffer_index{image_view_ids.data()}; + VideoCommon::ImageViewInOut* texture_buffer_it{views.data()}; const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE { size_t index{}; const auto add_buffer{[&](const auto& desc) { @@ -394,12 +396,12 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { if constexpr (is_image) { is_written = desc.is_written; } - ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)}; + ImageView& image_view{texture_cache.GetImageView(texture_buffer_it->id)}; buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(), image_view.BufferSize(), image_view.format, is_written, is_image); ++index; - ++texture_buffer_index; + ++texture_buffer_it; } }}; const Shader::Info& info{stage_infos[stage]}; @@ -415,9 +417,9 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { add_buffer(desc); } } - texture_buffer_index += Shader::NumDescriptors(info.texture_descriptors); + texture_buffer_it += Shader::NumDescriptors(info.texture_descriptors); if constexpr (Spec::has_images) { - texture_buffer_index += Shader::NumDescriptors(info.image_descriptors); + texture_buffer_it += Shader::NumDescriptors(info.image_descriptors); } }}; if constexpr (Spec::enabled_stages[0]) { @@ -446,7 +448,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { } else { program_manager.BindSourcePrograms(source_programs); } - const ImageId* views_it{image_view_ids.data()}; + const VideoCommon::ImageViewInOut* views_it{views.data()}; GLsizei texture_binding = 0; GLsizei image_binding = 0; std::array textures; @@ -464,13 +466,13 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { const auto& info{stage_infos[stage]}; for (const auto& desc : info.texture_descriptors) { for (u32 index = 0; index < desc.count; ++index) { - ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; + ImageView& image_view{texture_cache.GetImageView((views_it++)->id)}; textures[texture_binding++] = image_view.Handle(desc.type); } } for (const auto& desc : info.image_descriptors) { for (u32 index = 0; index < desc.count; ++index) { - ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; + ImageView& image_view{texture_cache.GetImageView((views_it++)->id)}; if (desc.is_written) { texture_cache.MarkModification(image_view.image_id); } diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 5e2695576..5d14bfc97 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -472,11 +472,7 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& set_view(Shader::TextureType::ColorArray1D, null_image_1d_array.handle); set_view(Shader::TextureType::ColorArray2D, null_image_view_2d_array.handle); set_view(Shader::TextureType::ColorArrayCube, null_image_cube_array.handle); -} - -TextureCacheRuntime::~TextureCacheRuntime() = default; -void TextureCacheRuntime::Init() { resolution = Settings::values.resolution_info; is_rescaling_on = resolution.up_scale != 1 || resolution.down_shift != 0; if (is_rescaling_on) { @@ -485,6 +481,8 @@ void TextureCacheRuntime::Init() { } } +TextureCacheRuntime::~TextureCacheRuntime() = default; + void TextureCacheRuntime::Finish() { glFinish(); } @@ -685,6 +683,8 @@ Image::Image(TextureCacheRuntime& runtime_, const VideoCommon::ImageInfo& info_, } } +Image::Image(const VideoCommon::NullImageParams& params) : VideoCommon::ImageBase{params} {} + Image::~Image() = default; void Image::UploadMemory(const ImageBufferMap& map, @@ -1076,7 +1076,7 @@ ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, const VideoCommon::ImageViewInfo& view_info) : VideoCommon::ImageViewBase{info, view_info} {} -ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageParams& params) +ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageViewParams& params) : VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {} GLuint ImageView::StorageView(Shader::TextureType texture_type, Shader::ImageFormat image_format) { diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 787b63e87..e76ec522a 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -73,8 +73,6 @@ public: StateTracker& state_tracker); ~TextureCacheRuntime(); - void Init(); - void Finish(); ImageBufferMap UploadStagingBuffer(size_t size); @@ -167,6 +165,7 @@ class Image : public VideoCommon::ImageBase { public: explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr); + explicit Image(const VideoCommon::NullImageParams&); ~Image(); @@ -223,7 +222,7 @@ public: const VideoCommon::ImageViewInfo&, GPUVAddr); explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, const VideoCommon::ImageViewInfo& view_info); - explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&); + explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageViewParams&); [[nodiscard]] GLuint StorageView(Shader::TextureType texture_type, Shader::ImageFormat image_format); diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index 7ba6078df..bf18b34d1 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -156,28 +156,27 @@ private: u32 texture_bit{1u}; }; -inline void PushImageDescriptors(const Shader::Info& info, const VkSampler*& samplers, - const ImageId*& image_view_ids, TextureCache& texture_cache, +inline void PushImageDescriptors(TextureCache& texture_cache, VKUpdateDescriptorQueue& update_descriptor_queue, - RescalingPushConstant& rescaling) { - static constexpr VideoCommon::ImageViewId NULL_IMAGE_VIEW_ID{0}; - image_view_ids += Shader::NumDescriptors(info.texture_buffer_descriptors); - image_view_ids += Shader::NumDescriptors(info.image_buffer_descriptors); + const Shader::Info& info, RescalingPushConstant& rescaling, + const VkSampler*& samplers, + const VideoCommon::ImageViewInOut*& views) { + views += Shader::NumDescriptors(info.texture_buffer_descriptors); + views += Shader::NumDescriptors(info.image_buffer_descriptors); for (const auto& desc : info.texture_descriptors) { for (u32 index = 0; index < desc.count; ++index) { - const VideoCommon::ImageViewId image_view_id{*(image_view_ids++)}; + const VideoCommon::ImageViewId image_view_id{(views++)->id}; const VkSampler sampler{*(samplers++)}; ImageView& image_view{texture_cache.GetImageView(image_view_id)}; const Image& image{texture_cache.GetImage(image_view.image_id)}; const VkImageView vk_image_view{image_view.Handle(desc.type)}; update_descriptor_queue.AddSampledImage(vk_image_view, sampler); - rescaling.PushTexture(image_view_id != NULL_IMAGE_VIEW_ID && - True(image.flags & VideoCommon::ImageFlagBits::Rescaled)); + rescaling.PushTexture(True(image.flags & VideoCommon::ImageFlagBits::Rescaled)); } } for (const auto& desc : info.image_descriptors) { for (u32 index = 0; index < desc.count; ++index) { - ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))}; + ImageView& image_view{texture_cache.GetImageView((views++)->id)}; if (desc.is_written) { texture_cache.MarkModification(image_view.image_id); } diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 5c591e345..f89b84c6e 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -108,10 +108,8 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, texture_cache.SynchronizeComputeDescriptors(); static constexpr size_t max_elements = 64; - std::array image_view_ids; - boost::container::static_vector image_view_indices; + boost::container::static_vector views; boost::container::static_vector samplers; - boost::container::static_vector image_view_blacklist; const auto& qmd{kepler_compute.launch_description}; const auto& cbufs{qmd.const_buffer_config}; @@ -135,54 +133,37 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, } return TexturePair(gpu_memory.Read(addr), via_header_index); }}; - const auto add_image{[&](const auto& desc) { + const auto add_image{[&](const auto& desc, bool blacklist) { for (u32 index = 0; index < desc.count; ++index) { const auto handle{read_handle(desc, index)}; - image_view_indices.push_back(handle.first); + views.push_back({ + .index = handle.first, + .blacklist = blacklist, + .id = {}, + }); } }}; - std::ranges::for_each(info.texture_buffer_descriptors, add_image); - std::ranges::for_each(info.image_buffer_descriptors, add_image); + for (const auto& desc : info.texture_buffer_descriptors) { + add_image(desc, false); + } + for (const auto& desc : info.image_buffer_descriptors) { + add_image(desc, false); + } for (const auto& desc : info.texture_descriptors) { for (u32 index = 0; index < desc.count; ++index) { const auto handle{read_handle(desc, index)}; - image_view_indices.push_back(handle.first); + views.push_back({handle.first}); Sampler* const sampler = texture_cache.GetComputeSampler(handle.second); samplers.push_back(sampler->Handle()); } } - const u32 black_list_base = image_view_indices.size(); - bool atleast_one_blacklisted = false; for (const auto& desc : info.image_descriptors) { - const bool is_black_listed = - desc.is_written && (desc.type == Shader::TextureType::Color2D || - desc.type == Shader::TextureType::ColorArray2D); - for (u32 index = 0; index < desc.count; ++index) { - image_view_blacklist.push_back(is_black_listed); - } - atleast_one_blacklisted |= is_black_listed; - add_image(desc); + add_image(desc, true); } - - const std::span indices_span(image_view_indices.data(), image_view_indices.size()); - bool has_listed_stuffs; - do { - has_listed_stuffs = false; - texture_cache.FillComputeImageViews(indices_span, image_view_ids); - if (atleast_one_blacklisted) { - for (u32 index = 0; index < image_view_blacklist.size(); index++) { - if (image_view_blacklist[index]) { - ImageView& image_view{ - texture_cache.GetImageView(image_view_ids[index + black_list_base])}; - has_listed_stuffs |= texture_cache.BlackListImage(image_view.image_id); - } - } - } - } while (has_listed_stuffs); + texture_cache.FillComputeImageViews(std::span(views.data(), views.size())); buffer_cache.UnbindComputeTextureBuffers(); - ImageId* texture_buffer_ids{image_view_ids.data()}; size_t index{}; const auto add_buffer{[&](const auto& desc) { constexpr bool is_image = std::is_same_v; @@ -191,11 +172,10 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, if constexpr (is_image) { is_written = desc.is_written; } - ImageView& image_view = texture_cache.GetImageView(*texture_buffer_ids); + ImageView& image_view = texture_cache.GetImageView(views[index].id); buffer_cache.BindComputeTextureBuffer(index, image_view.GpuAddr(), image_view.BufferSize(), image_view.format, is_written, is_image); - ++texture_buffer_ids; ++index; } }}; @@ -207,9 +187,9 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, RescalingPushConstant rescaling(num_textures); const VkSampler* samplers_it{samplers.data()}; - const ImageId* views_it{image_view_ids.data()}; - PushImageDescriptors(info, samplers_it, views_it, texture_cache, update_descriptor_queue, - rescaling); + const VideoCommon::ImageViewInOut* views_it{views.data()}; + PushImageDescriptors(texture_cache, update_descriptor_queue, info, rescaling, samplers_it, + views_it); if (!is_built.load(std::memory_order::relaxed)) { // Wait for the pipeline to be built diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 4d966ee4b..4efb5d735 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -278,12 +278,10 @@ void GraphicsPipeline::AddTransition(GraphicsPipeline* transition) { template void GraphicsPipeline::ConfigureImpl(bool is_indexed) { - std::array image_view_ids; - std::array image_view_indices; - std::array image_view_blacklist; + std::array views; std::array samplers; size_t sampler_index{}; - size_t image_index{}; + size_t view_index{}; texture_cache.SynchronizeGraphicsDescriptors(); @@ -291,8 +289,6 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { const auto& regs{maxwell3d.regs}; const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; - u32 start_black_list = std::numeric_limits::max(); - u32 end_black_list = 0; const auto config_stage{[&](size_t stage) LAMBDA_FORCEINLINE { const Shader::Info& info{stage_infos[stage]}; buffer_cache.UnbindGraphicsStorageBuffers(stage); @@ -329,7 +325,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { const auto add_image{[&](const auto& desc) { for (u32 index = 0; index < desc.count; ++index) { const auto handle{read_handle(desc, index)}; - image_view_indices[image_index++] = handle.first; + views[view_index++] = {handle.first}; } }}; if constexpr (Spec::has_texture_buffers) { @@ -345,7 +341,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { for (const auto& desc : info.texture_descriptors) { for (u32 index = 0; index < desc.count; ++index) { const auto handle{read_handle(desc, index)}; - image_view_indices[image_index++] = handle.first; + views[view_index++] = {handle.first}; Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)}; samplers[sampler_index++] = sampler->Handle(); @@ -353,15 +349,6 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { } if constexpr (Spec::has_images) { for (const auto& desc : info.image_descriptors) { - if (desc.is_written && (desc.type == Shader::TextureType::Color2D || - desc.type == Shader::TextureType::ColorArray2D)) { - auto index_copy = image_index; - for (u32 index = 0; index < desc.count; ++index) { - start_black_list = std::min(start_black_list, index_copy); - image_view_blacklist[index_copy++] = true; - end_black_list = std::max(end_black_list, index_copy); - } - } add_image(desc); } } @@ -381,24 +368,9 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { if constexpr (Spec::enabled_stages[4]) { config_stage(4); } - const std::span indices_span(image_view_indices.data(), image_index); - bool has_listed_stuffs; - do { - has_listed_stuffs = false; - texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); - if constexpr (Spec::has_images) { - if (start_black_list < end_black_list) { - for (u32 index = start_black_list; index < end_black_list; index++) { - if (image_view_blacklist[index]) { - ImageView& image_view{texture_cache.GetImageView(image_view_ids[index])}; - has_listed_stuffs |= texture_cache.BlackListImage(image_view.image_id); - } - } - } - } - } while (has_listed_stuffs); + texture_cache.FillGraphicsImageViews(std::span(views.data(), view_index)); - ImageId* texture_buffer_index{image_view_ids.data()}; + VideoCommon::ImageViewInOut* texture_buffer_it{views.data()}; const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE { size_t index{}; const auto add_buffer{[&](const auto& desc) { @@ -408,12 +380,12 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { if constexpr (is_image) { is_written = desc.is_written; } - ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)}; + ImageView& image_view{texture_cache.GetImageView(texture_buffer_it->id)}; buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(), image_view.BufferSize(), image_view.format, is_written, is_image); ++index; - ++texture_buffer_index; + ++texture_buffer_it; } }}; buffer_cache.UnbindGraphicsTextureBuffers(stage); @@ -429,9 +401,9 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { add_buffer(desc); } } - texture_buffer_index += Shader::NumDescriptors(info.texture_descriptors); + texture_buffer_it += Shader::NumDescriptors(info.texture_descriptors); if constexpr (Spec::has_images) { - texture_buffer_index += Shader::NumDescriptors(info.image_descriptors); + texture_buffer_it += Shader::NumDescriptors(info.image_descriptors); } }}; if constexpr (Spec::enabled_stages[0]) { @@ -457,11 +429,11 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { RescalingPushConstant rescaling(num_textures); const VkSampler* samplers_it{samplers.data()}; - const ImageId* views_it{image_view_ids.data()}; + const VideoCommon::ImageViewInOut* views_it{views.data()}; const auto prepare_stage{[&](size_t stage) LAMBDA_FORCEINLINE { buffer_cache.BindHostStageBuffers(stage); - PushImageDescriptors(stage_infos[stage], samplers_it, views_it, texture_cache, - update_descriptor_queue, rescaling); + PushImageDescriptors(texture_cache, update_descriptor_queue, stage_infos[stage], rescaling, + samplers_it, views_it); }}; if constexpr (Spec::enabled_stages[0]) { prepare_stage(0); diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index b21992fce..3400066a6 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -730,10 +730,17 @@ void BlitScale(VKScheduler& scheduler, VkImage src_image, VkImage dst_image, con } } // Anonymous namespace -void TextureCacheRuntime::Init() { - resolution = Settings::values.resolution_info; - is_rescaling_on = resolution.up_scale != 1 || resolution.down_shift != 0; -} +TextureCacheRuntime::TextureCacheRuntime(const Device& device_, VKScheduler& scheduler_, + MemoryAllocator& memory_allocator_, + StagingBufferPool& staging_buffer_pool_, + BlitImageHelper& blit_image_helper_, + ASTCDecoderPass& astc_decoder_pass_, + RenderPassCache& render_pass_cache_) + : device{device_}, scheduler{scheduler_}, memory_allocator{memory_allocator_}, + staging_buffer_pool{staging_buffer_pool_}, blit_image_helper{blit_image_helper_}, + astc_decoder_pass{astc_decoder_pass_}, render_pass_cache{render_pass_cache_}, + resolution{Settings::values.resolution_info}, + is_rescaling_on(resolution.up_scale != 1 || resolution.down_shift != 0) {} void TextureCacheRuntime::Finish() { scheduler.Finish(); @@ -1040,6 +1047,8 @@ Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu } } +Image::Image(const VideoCommon::NullImageParams& params) : VideoCommon::ImageBase{params} {} + Image::~Image() = default; void Image::UploadMemory(const StagingBufferRef& map, std::span copies) { @@ -1187,8 +1196,7 @@ bool Image::ScaleDown(bool save_as_backup) { }*/ const auto& resolution = runtime->resolution; - vk::Image downscaled_image = - MakeImage(runtime->device, info); + vk::Image downscaled_image = MakeImage(runtime->device, info); MemoryCommit new_commit( runtime->memory_allocator.Commit(downscaled_image, MemoryUsage::DeviceLocal)); @@ -1301,7 +1309,7 @@ ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, : VideoCommon::ImageViewBase{info, view_info}, gpu_addr{gpu_addr_}, buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {} -ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams& params) +ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::NullImageViewParams& params) : VideoCommon::ImageViewBase{params} {} VkImageView ImageView::DepthView() { diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 958a64651..9c39a6d99 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -34,21 +34,16 @@ class RenderPassCache; class StagingBufferPool; class VKScheduler; -struct TextureCacheRuntime { - const Device& device; - VKScheduler& scheduler; - MemoryAllocator& memory_allocator; - StagingBufferPool& staging_buffer_pool; - BlitImageHelper& blit_image_helper; - ASTCDecoderPass& astc_decoder_pass; - RenderPassCache& render_pass_cache; +class TextureCacheRuntime { +public: static constexpr size_t TICKS_TO_DESTROY = 6; - DelayedDestructionRing prescaled_images; - DelayedDestructionRing prescaled_commits; - Settings::ResolutionScalingInfo resolution; - bool is_rescaling_on{}; - void Init(); + explicit TextureCacheRuntime(const Device& device_, VKScheduler& scheduler_, + MemoryAllocator& memory_allocator_, + StagingBufferPool& staging_buffer_pool_, + BlitImageHelper& blit_image_helper_, + ASTCDecoderPass& astc_decoder_pass_, + RenderPassCache& render_pass_cache_); void Finish(); @@ -56,6 +51,10 @@ struct TextureCacheRuntime { StagingBufferRef DownloadStagingBuffer(size_t size); + void TickFrame(); + + u64 GetDeviceLocalMemory() const; + void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, const Region2D& dst_region, const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter, @@ -84,15 +83,25 @@ struct TextureCacheRuntime { return true; } - void TickFrame(); + const Device& device; + VKScheduler& scheduler; + MemoryAllocator& memory_allocator; + StagingBufferPool& staging_buffer_pool; + BlitImageHelper& blit_image_helper; + ASTCDecoderPass& astc_decoder_pass; + RenderPassCache& render_pass_cache; - u64 GetDeviceLocalMemory() const; + DelayedDestructionRing prescaled_images; + DelayedDestructionRing prescaled_commits; + Settings::ResolutionScalingInfo resolution; + bool is_rescaling_on{}; }; class Image : public VideoCommon::ImageBase { public: explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr); + explicit Image(const VideoCommon::NullImageParams&); ~Image(); @@ -151,7 +160,7 @@ public: explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&); explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo&, const VideoCommon::ImageViewInfo&, GPUVAddr); - explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&); + explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageViewParams&); [[nodiscard]] VkImageView DepthView(); diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp index e9e725edf..25a211df8 100644 --- a/src/video_core/texture_cache/image_base.cpp +++ b/src/video_core/texture_cache/image_base.cpp @@ -69,6 +69,8 @@ ImageBase::ImageBase(const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_ } } +ImageBase::ImageBase(const NullImageParams&) {} + ImageMapView::ImageMapView(GPUVAddr gpu_addr_, VAddr cpu_addr_, size_t size_, ImageId image_id_) : gpu_addr{gpu_addr_}, cpu_addr{cpu_addr_}, size{size_}, image_id{image_id_} {} diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index 97f107b4d..9c34687e0 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h @@ -48,8 +48,11 @@ struct AliasedImage { ImageId id; }; +struct NullImageParams {}; + struct ImageBase { explicit ImageBase(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr); + explicit ImageBase(const NullImageParams&); [[nodiscard]] std::optional TryFindBase(GPUVAddr other_addr) const noexcept; diff --git a/src/video_core/texture_cache/image_view_base.cpp b/src/video_core/texture_cache/image_view_base.cpp index 450becbeb..e66dc9320 100644 --- a/src/video_core/texture_cache/image_view_base.cpp +++ b/src/video_core/texture_cache/image_view_base.cpp @@ -45,6 +45,6 @@ ImageViewBase::ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_in ASSERT_MSG(view_info.type == ImageViewType::Buffer, "Expected texture buffer"); } -ImageViewBase::ImageViewBase(const NullImageParams&) {} +ImageViewBase::ImageViewBase(const NullImageViewParams&) : image_id{NULL_IMAGE_ID} {} } // namespace VideoCommon diff --git a/src/video_core/texture_cache/image_view_base.h b/src/video_core/texture_cache/image_view_base.h index 903f715c5..9c24c5359 100644 --- a/src/video_core/texture_cache/image_view_base.h +++ b/src/video_core/texture_cache/image_view_base.h @@ -15,7 +15,7 @@ using VideoCore::Surface::PixelFormat; struct ImageViewInfo; struct ImageInfo; -struct NullImageParams {}; +struct NullImageViewParams {}; enum class ImageViewFlagBits : u16 { PreemtiveDownload = 1 << 0, @@ -28,7 +28,7 @@ struct ImageViewBase { explicit ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info, ImageId image_id); explicit ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_info); - explicit ImageViewBase(const NullImageParams&); + explicit ImageViewBase(const NullImageViewParams&); [[nodiscard]] bool IsBuffer() const noexcept { return type == ImageViewType::Buffer; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index be40f6b88..4e97a9e6a 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -36,7 +36,6 @@ TextureCache

::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& Tegra::MemoryManager& gpu_memory_) : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_} { - runtime.Init(); // Configure null sampler TSCEntry sampler_descriptor{}; sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear); @@ -46,7 +45,8 @@ TextureCache

::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& // Make sure the first index is reserved for the null resources // This way the null resource becomes a compile time constant - void(slot_image_views.insert(runtime, NullImageParams{})); + void(slot_images.insert(NullImageParams{})); + void(slot_image_views.insert(runtime, NullImageViewParams{})); void(slot_samplers.insert(runtime, sampler_descriptor)); if constexpr (HAS_DEVICE_MEMORY_INFO) { @@ -57,7 +57,7 @@ TextureCache

::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY); minimum_memory = 0; } else { - // on OGL we can be more conservatives as the driver takes care. + // On OpenGL we can be more conservatives as the driver takes care. expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB; critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB; minimum_memory = expected_memory; @@ -135,15 +135,14 @@ void TextureCache

::MarkModification(ImageId id) noexcept { } template -void TextureCache

::FillGraphicsImageViews(std::span indices, - std::span image_view_ids) { - FillImageViews(graphics_image_table, graphics_image_view_ids, indices, image_view_ids); +template +void TextureCache

::FillGraphicsImageViews(std::span views) { + FillImageViews(graphics_image_table, graphics_image_view_ids, views); } template -void TextureCache

::FillComputeImageViews(std::span indices, - std::span image_view_ids) { - FillImageViews(compute_image_table, compute_image_view_ids, indices, image_view_ids); +void TextureCache

::FillComputeImageViews(std::span views) { + FillImageViews(compute_image_table, compute_image_view_ids, views); } template @@ -346,17 +345,26 @@ typename P::Framebuffer* TextureCache

::GetFramebuffer() { } template +template void TextureCache

::FillImageViews(DescriptorTable& table, std::span cached_image_view_ids, - std::span indices, - std::span image_view_ids) { - ASSERT(indices.size() <= image_view_ids.size()); + std::span views) { + bool has_blacklisted; do { has_deleted_images = false; - std::ranges::transform(indices, image_view_ids.begin(), [&](u32 index) { - return VisitImageView(table, cached_image_view_ids, index); - }); - } while (has_deleted_images); + if constexpr (has_blacklists) { + has_blacklisted = false; + } + for (ImageViewInOut& view : views) { + view.id = VisitImageView(table, cached_image_view_ids, view.index); + if constexpr (has_blacklists) { + if (view.blacklist && view.id != NULL_IMAGE_VIEW_ID) { + const ImageViewBase& image_view{slot_image_views[view.id]}; + has_blacklisted |= BlackListImage(image_view.image_id); + } + } + } + } while (has_deleted_images || (has_blacklists && has_blacklisted)); } template @@ -622,7 +630,7 @@ void TextureCache

::PopAsyncFlushes() { } template -bool TextureCache

::IsRescaling() { +bool TextureCache

::IsRescaling() const noexcept { return is_rescaling; } @@ -775,12 +783,11 @@ bool TextureCache

::BlackListImage(ImageId image_id) { } template -bool TextureCache

::ImageCanRescale(Image& image) { +bool TextureCache

::ImageCanRescale(ImageBase& image) { if (True(image.flags & ImageFlagBits::Blacklisted)) { return false; } - if (True(image.flags & ImageFlagBits::Rescaled) || - True(image.flags & ImageFlagBits::RescaleChecked)) { + if (True(image.flags & (ImageFlagBits::Rescaled | ImageFlagBits::RescaleChecked))) { return true; } if (!image.info.rescaleable) { diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 35a29cd9b..b6cc09682 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -39,6 +39,16 @@ using VideoCore::Surface::PixelFormatFromDepthFormat; using VideoCore::Surface::PixelFormatFromRenderTargetFormat; using namespace Common::Literals; +struct ImageViewInOut { + u32 index; + bool blacklist; + union { + struct Empty { + } empty{}; + ImageViewId id; + }; +}; + template class TextureCache { /// Address shift for caching images into a hash table @@ -53,11 +63,6 @@ class TextureCache { /// True when the API can provide info about the memory of the device. static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO; - /// Image view ID for null descriptors - static constexpr ImageViewId NULL_IMAGE_VIEW_ID{0}; - /// Sampler ID for bugged sampler ids - static constexpr SamplerId NULL_SAMPLER_ID{0}; - static constexpr u64 DEFAULT_EXPECTED_MEMORY = 1_GiB; static constexpr u64 DEFAULT_CRITICAL_MEMORY = 2_GiB; @@ -105,11 +110,11 @@ public: void MarkModification(ImageId id) noexcept; /// Fill image_view_ids with the graphics images in indices - void FillGraphicsImageViews(std::span indices, - std::span image_view_ids); + template + void FillGraphicsImageViews(std::span views); /// Fill image_view_ids with the compute images in indices - void FillComputeImageViews(std::span indices, std::span image_view_ids); + void FillComputeImageViews(std::span views); /// Get the sampler from the graphics descriptor table in the specified index Sampler* GetGraphicsSampler(u32 index); @@ -174,7 +179,7 @@ public: /// Return true when a CPU region is modified from the GPU [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); - [[nodiscard]] bool IsRescaling(); + [[nodiscard]] bool IsRescaling() const noexcept; [[nodiscard]] bool BlackListImage(ImageId image_id); @@ -216,9 +221,10 @@ private: void RunGarbageCollector(); /// Fills image_view_ids in the image views in indices + template void FillImageViews(DescriptorTable& table, - std::span cached_image_view_ids, std::span indices, - std::span image_view_ids); + std::span cached_image_view_ids, + std::span views); /// Find or create an image view in the guest descriptor table ImageViewId VisitImageView(DescriptorTable& table, @@ -336,7 +342,7 @@ private: /// Returns true if the current clear parameters clear the whole image of a given image view [[nodiscard]] bool IsFullClear(ImageViewId id); - bool ImageCanRescale(Image& image); + bool ImageCanRescale(ImageBase& image); void InvalidateScale(Image& image); bool ScaleUp(Image& image); bool ScaleDown(Image& image); diff --git a/src/video_core/texture_cache/types.h b/src/video_core/texture_cache/types.h index 47a11cb2f..5c274abdf 100644 --- a/src/video_core/texture_cache/types.h +++ b/src/video_core/texture_cache/types.h @@ -22,6 +22,13 @@ using ImageAllocId = SlotId; using SamplerId = SlotId; using FramebufferId = SlotId; +/// Fake image ID for null image views +constexpr ImageId NULL_IMAGE_ID{0}; +/// Image view ID for null descriptors +constexpr ImageViewId NULL_IMAGE_VIEW_ID{0}; +/// Sampler ID for bugged sampler ids +constexpr SamplerId NULL_SAMPLER_ID{0}; + enum class ImageType : u32 { e1D, e2D, -- cgit v1.2.3 From 2182d2575010a5a85c99c09c6a1c57962242444d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 28 Jul 2021 02:53:24 -0300 Subject: texture_cache: Fix blacklists on compute --- src/video_core/texture_cache/texture_cache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 4e97a9e6a..4dbded635 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -142,7 +142,7 @@ void TextureCache

::FillGraphicsImageViews(std::span views) { template void TextureCache

::FillComputeImageViews(std::span views) { - FillImageViews(compute_image_table, compute_image_view_ids, views); + FillImageViews(compute_image_table, compute_image_view_ids, views); } template -- cgit v1.2.3 From f086c82e1f80cae088bb22de9092598dc51979da Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Thu, 29 Jul 2021 13:27:01 -0400 Subject: gl_graphics_pipeline: Add downscale factor to shader uniforms --- .../backend/glasm/emit_glasm_not_implemented.cpp | 3 +-- src/shader_recompiler/backend/glsl/emit_context.cpp | 3 +++ .../backend/glsl/emit_glsl_context_get_set.cpp | 3 +-- src/video_core/renderer_opengl/gl_graphics_pipeline.cpp | 15 ++++++++++++++- 4 files changed, 19 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp index 807494063..77ee6dc0e 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp @@ -211,8 +211,7 @@ void EmitYDirection(EmitContext& ctx, IR::Inst& inst) { } void EmitResolutionDownFactor(EmitContext& ctx, IR::Inst& inst) { - UNIMPLEMENTED(); - ctx.Add("MOV.F {}.x,1;", inst); + ctx.Add("MOV.F {}.x,program.env[0].x;", inst); } void EmitUndefU1(EmitContext& ctx, IR::Inst& inst) { diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 4e6f2c0fe..7c9ed9159 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -393,6 +393,9 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile DefineGenericOutput(index, program.invocations); } } + if (info.uses_rescaling_uniform) { + header += "layout(location=0) uniform float down_factor;"; + } DefineConstantBuffers(bindings); DefineStorageBuffers(bindings); SetupImages(bindings); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index f4ed090e3..3db3083f9 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -446,8 +446,7 @@ void EmitYDirection(EmitContext& ctx, IR::Inst& inst) { } void EmitResolutionDownFactor(EmitContext& ctx, IR::Inst& inst) { - UNIMPLEMENTED(); - ctx.AddF32("{}=1.0f;", inst); + ctx.AddF32("{}=down_factor;", inst); } void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, std::string_view word_offset) { diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index 0bbda7951..92fda9af0 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -443,11 +443,24 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { if (!is_built.load(std::memory_order::relaxed)) { WaitForBuild(); } - if (assembly_programs[0].handle != 0) { + const bool use_assembly{assembly_programs[0].handle != 0}; + const bool is_rescaling{texture_cache.IsRescaling()}; + const f32 config_down_factor{Settings::values.resolution_info.down_factor}; + const f32 down_factor{is_rescaling ? config_down_factor : 1.0f}; + if (use_assembly) { program_manager.BindAssemblyPrograms(assembly_programs, enabled_stages_mask); } else { program_manager.BindSourcePrograms(source_programs); } + for (size_t stage = 0; stage < source_programs.size(); ++stage) { + if (stage_infos[stage].uses_rescaling_uniform) { + if (use_assembly) { + glProgramEnvParameter4fARB(AssemblyStage(stage), 0, down_factor, 0.0f, 0.0f, 1.0f); + } else { + glProgramUniform1f(source_programs[stage].handle, 0, down_factor); + } + } + } const VideoCommon::ImageViewInOut* views_it{views.data()}; GLsizei texture_binding = 0; GLsizei image_binding = 0; -- cgit v1.2.3 From 9bc7b04ca587a349a9fc865d05e30966d6a84d65 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Thu, 29 Jul 2021 14:19:51 -0400 Subject: gl_rasterizer: Fix rescale dirty state checking --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index b91e7edf8..615704711 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -533,7 +533,8 @@ void RasterizerOpenGL::SyncViewport() { auto& flags = maxwell3d.dirty.flags; const auto& regs = maxwell3d.regs; - const bool dirty_viewport = flags[Dirty::Viewports]; + const bool rescale_viewports = flags[VideoCommon::Dirty::RescaleViewports]; + const bool dirty_viewport = flags[Dirty::Viewports] || rescale_viewports; const bool dirty_clip_control = flags[Dirty::ClipControl]; if (dirty_clip_control || flags[Dirty::FrontFace]) { @@ -574,8 +575,9 @@ void RasterizerOpenGL::SyncViewport() { if (dirty_viewport) { flags[Dirty::Viewports] = false; - const bool force = flags[Dirty::ViewportTransform]; + const bool force = flags[Dirty::ViewportTransform] || rescale_viewports; flags[Dirty::ViewportTransform] = false; + flags[VideoCommon::Dirty::RescaleViewports] = false; const auto& resolution = Settings::values.resolution_info; const auto scale_up = [&](u32 value) -> u32 { @@ -911,11 +913,14 @@ void RasterizerOpenGL::SyncLogicOpState() { void RasterizerOpenGL::SyncScissorTest() { auto& flags = maxwell3d.dirty.flags; - if (!flags[Dirty::Scissors]) { + if (!flags[Dirty::Scissors] && !flags[VideoCommon::Dirty::RescaleScissors]) { return; } flags[Dirty::Scissors] = false; + const bool force = flags[VideoCommon::Dirty::RescaleScissors]; + flags[VideoCommon::Dirty::RescaleScissors] = false; + const auto& regs = maxwell3d.regs; const auto& resolution = Settings::values.resolution_info; @@ -927,7 +932,7 @@ void RasterizerOpenGL::SyncScissorTest() { return std::max(converted_value, 1U); }; for (std::size_t index = 0; index < Maxwell::NumViewports; ++index) { - if (!flags[Dirty::Scissor0 + index]) { + if (!force && !flags[Dirty::Scissor0 + index]) { continue; } flags[Dirty::Scissor0 + index] = false; -- cgit v1.2.3 From b6060873ce1eea02f99a350f955362e57391ecd1 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Thu, 29 Jul 2021 15:45:53 -0400 Subject: gl_compute_pipeline: Add downscale factor to shader uniforms --- src/video_core/renderer_opengl/gl_compute_pipeline.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'src') diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp index ab2baefbb..a11bd5a02 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp @@ -143,10 +143,19 @@ void ComputePipeline::Configure() { } texture_cache.FillComputeImageViews(std::span(views.data(), views.size())); + const bool is_rescaling{texture_cache.IsRescaling()}; + const f32 config_down_factor{Settings::values.resolution_info.down_factor}; + const f32 down_factor{is_rescaling ? config_down_factor : 1.0f}; if (assembly_program.handle != 0) { program_manager.BindComputeAssemblyProgram(assembly_program.handle); + if (info.uses_rescaling_uniform) { + glProgramEnvParameter4fARB(GL_COMPUTE_PROGRAM_NV, 0, down_factor, 0.0f, 0.0f, 1.0f); + } } else { program_manager.BindComputeProgram(source_program.handle); + if (info.uses_rescaling_uniform) { + glProgramUniform1f(source_program.handle, 0, down_factor); + } } buffer_cache.UnbindComputeTextureBuffers(); size_t texbuf_index{}; -- cgit v1.2.3 From 05d98d9bbffde2f43ff9558a8b1676dfca0bd0f3 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Thu, 29 Jul 2021 18:08:06 -0400 Subject: gl_texture_cache: Fix multi layered texture Scale --- .../renderer_opengl/gl_texture_cache.cpp | 26 +++++++++++++--------- 1 file changed, 15 insertions(+), 11 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 5d14bfc97..8b86136e0 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -939,17 +939,21 @@ bool Image::Scale(bool scale_src, bool scale_dst) { const auto& draw_fbo = runtime->rescale_draw_fbo; glBindFramebuffer(GL_DRAW_FRAMEBUFFER, draw_fbo.handle); glBindFramebuffer(GL_READ_FRAMEBUFFER, read_fbo.handle); - for (s32 level = 0; level < info.resources.levels; ++level) { - const u32 src_level_width = std::max(1u, src_width >> level); - const u32 src_level_height = std::max(1u, src_height >> level); - const u32 dst_level_width = std::max(1u, dst_width >> level); - const u32 dst_level_height = std::max(1u, dst_height >> level); - - glNamedFramebufferTexture(read_fbo.handle, attachment, texture.handle, level); - glNamedFramebufferTexture(draw_fbo.handle, attachment, dst_texture.handle, level); - glBlitNamedFramebuffer(read_fbo.handle, draw_fbo.handle, 0, 0, src_level_width, - src_level_height, 0, 0, dst_level_width, dst_level_height, mask, - filter); + for (s32 layer = 0; layer < info.resources.layers; ++layer) { + for (s32 level = 0; level < info.resources.levels; ++level) { + const u32 src_level_width = std::max(1u, src_width >> level); + const u32 src_level_height = std::max(1u, src_height >> level); + const u32 dst_level_width = std::max(1u, dst_width >> level); + const u32 dst_level_height = std::max(1u, dst_height >> level); + + glNamedFramebufferTextureLayer(read_fbo.handle, attachment, texture.handle, level, + layer); + glNamedFramebufferTextureLayer(draw_fbo.handle, attachment, dst_texture.handle, level, + layer); + glBlitNamedFramebuffer(read_fbo.handle, draw_fbo.handle, 0, 0, src_level_width, + src_level_height, 0, 0, dst_level_width, dst_level_height, mask, + filter); + } } texture = std::move(dst_texture); -- cgit v1.2.3 From 4a512d6827609b13cf991d8e8efb0c789936167f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 31 Jul 2021 02:37:06 -0300 Subject: gl_rasterizer: Properly scale viewports and scissors --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 47 ++++++++++++------------ 1 file changed, 24 insertions(+), 23 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 615704711..d94f1e89f 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -214,8 +214,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { query_cache.UpdateCounters(); - SyncState(); - GraphicsPipeline* const pipeline{shader_cache.CurrentGraphicsPipeline()}; if (!pipeline) { return; @@ -223,6 +221,8 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; pipeline->Configure(is_indexed); + SyncState(); + const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology); BeginTransformFeedback(pipeline, primitive_mode); @@ -554,7 +554,6 @@ void RasterizerOpenGL::SyncViewport() { } glFrontFace(mode); } - if (dirty_viewport || flags[Dirty::ClipControl]) { flags[Dirty::ClipControl] = false; @@ -571,6 +570,8 @@ void RasterizerOpenGL::SyncViewport() { state_tracker.ClipControl(origin, depth); state_tracker.SetYNegate(regs.screen_y_control.y_negate != 0); } + const bool is_rescaling{texture_cache.IsRescaling()}; + const float scale = is_rescaling ? Settings::values.resolution_info.up_factor : 1.0f; if (dirty_viewport) { flags[Dirty::Viewports] = false; @@ -579,38 +580,38 @@ void RasterizerOpenGL::SyncViewport() { flags[Dirty::ViewportTransform] = false; flags[VideoCommon::Dirty::RescaleViewports] = false; - const auto& resolution = Settings::values.resolution_info; - const auto scale_up = [&](u32 value) -> u32 { - if (value == 0) { - return 0U; - } - const u32 converted_value = (value * resolution.up_scale) >> resolution.down_shift; - return std::max(converted_value, 1U); - }; - for (std::size_t i = 0; i < Maxwell::NumViewports; ++i) { - if (!force && !flags[Dirty::Viewport0 + i]) { + for (size_t index = 0; index < Maxwell::NumViewports; ++index) { + if (!force && !flags[Dirty::Viewport0 + index]) { continue; } - flags[Dirty::Viewport0 + i] = false; - - const auto& src = regs.viewport_transform[i]; - const Common::Rectangle rect{src.GetRect()}; - glViewportIndexedf(static_cast(i), rect.left, rect.bottom, - scale_up(rect.GetWidth()), scale_up(rect.GetHeight())); + flags[Dirty::Viewport0 + index] = false; + + const auto& src = regs.viewport_transform[index]; + GLfloat x = (src.translate_x - src.scale_x) * scale; + GLfloat y = (src.translate_y - src.scale_y) * scale; + GLfloat width = src.scale_x * 2.0f * scale; + GLfloat height = src.scale_y * 2.0f * scale; + if (height < 0) { + y += height; + height = -height; + } + glViewportIndexedf(static_cast(index), x, y, width != 0.0f ? width : 1.0f, + height != 0.0f ? height : 1.0f); const GLdouble reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne; const GLdouble near_depth = src.translate_z - src.scale_z * reduce_z; const GLdouble far_depth = src.translate_z + src.scale_z; if (device.HasDepthBufferFloat()) { - glDepthRangeIndexeddNV(static_cast(i), near_depth, far_depth); + glDepthRangeIndexeddNV(static_cast(index), near_depth, far_depth); } else { - glDepthRangeIndexed(static_cast(i), near_depth, far_depth); + glDepthRangeIndexed(static_cast(index), near_depth, far_depth); } if (!GLAD_GL_NV_viewport_swizzle) { continue; } - glViewportSwizzleNV(static_cast(i), MaxwellToGL::ViewportSwizzle(src.swizzle.x), + glViewportSwizzleNV(static_cast(index), + MaxwellToGL::ViewportSwizzle(src.swizzle.x), MaxwellToGL::ViewportSwizzle(src.swizzle.y), MaxwellToGL::ViewportSwizzle(src.swizzle.z), MaxwellToGL::ViewportSwizzle(src.swizzle.w)); @@ -940,7 +941,7 @@ void RasterizerOpenGL::SyncScissorTest() { const auto& src = regs.scissor_test[index]; if (src.enable) { glEnablei(GL_SCISSOR_TEST, static_cast(index)); - glScissorIndexed(static_cast(index), src.min_x, src.min_y, + glScissorIndexed(static_cast(index), scale_up(src.min_x), scale_up(src.min_y), scale_up(src.max_x - src.min_x), scale_up(src.max_y - src.min_y)); } else { glDisablei(GL_SCISSOR_TEST, static_cast(index)); -- cgit v1.2.3 From cfeb161c7ebf93bf6ac39e430fc998dc13abfc66 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 31 Jul 2021 03:04:08 -0300 Subject: glsl/glasm: Pass and use scaling parameters in shaders --- src/shader_recompiler/backend/glasm/emit_glasm.cpp | 3 ++ .../backend/glasm/emit_glasm_image.cpp | 5 +-- .../backend/glasm/emit_glasm_not_implemented.cpp | 2 +- .../backend/glsl/emit_context.cpp | 2 +- .../backend/glsl/emit_glsl_context_get_set.cpp | 2 +- .../backend/glsl/emit_glsl_image.cpp | 4 +-- .../renderer_opengl/gl_compute_pipeline.cpp | 23 +++++++++----- .../renderer_opengl/gl_graphics_pipeline.cpp | 36 ++++++++++++++-------- src/video_core/renderer_opengl/gl_texture_cache.h | 2 +- 9 files changed, 51 insertions(+), 28 deletions(-) (limited to 'src') diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 4ce1c4f54..004658546 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -448,6 +448,9 @@ std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, I header += fmt::format("SHARED_MEMORY {};", program.shared_memory_size); header += fmt::format("SHARED shared_mem[]={{program.sharedmem}};"); } + if (program.info.uses_rescaling_uniform) { + header += "PARAM scaling[1]={program.local[0..0]};"; + } header += "TEMP "; for (size_t index = 0; index < ctx.reg_alloc.NumUsedRegisters(); ++index) { header += fmt::format("R{},", index); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp index 583ed3cf2..05e88cd97 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp @@ -612,8 +612,9 @@ void EmitIsTextureScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& inde if (!index.IsImmediate()) { throw NotImplementedException("Non-constant texture rescaling"); } - UNIMPLEMENTED(); - ctx.Add("MOV.S {}.x,-1;", inst); + ctx.Add("AND.U RC.x,scaling[0].x,{};" + "SNE.S {},RC.x,0;", + 1u << index.U32(), ctx.reg_alloc.Define(inst)); } void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp index 77ee6dc0e..c0f8ddcad 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp @@ -211,7 +211,7 @@ void EmitYDirection(EmitContext& ctx, IR::Inst& inst) { } void EmitResolutionDownFactor(EmitContext& ctx, IR::Inst& inst) { - ctx.Add("MOV.F {}.x,program.env[0].x;", inst); + ctx.Add("MOV.F {}.x,scaling[0].y;", inst); } void EmitUndefU1(EmitContext& ctx, IR::Inst& inst) { diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 7c9ed9159..97bd59302 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -394,7 +394,7 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile } } if (info.uses_rescaling_uniform) { - header += "layout(location=0) uniform float down_factor;"; + header += "layout(location=0) uniform vec4 scaling;"; } DefineConstantBuffers(bindings); DefineStorageBuffers(bindings); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 3db3083f9..542a79230 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -446,7 +446,7 @@ void EmitYDirection(EmitContext& ctx, IR::Inst& inst) { } void EmitResolutionDownFactor(EmitContext& ctx, IR::Inst& inst) { - ctx.AddF32("{}=down_factor;", inst); + ctx.AddF32("{}=scaling.y;", inst); } void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, std::string_view word_offset) { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp index 099e0160b..82b6f0d77 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp @@ -616,8 +616,8 @@ void EmitIsTextureScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& inde if (!index.IsImmediate()) { throw NotImplementedException("Non-constant texture rescaling"); } - UNIMPLEMENTED(); - ctx.AddU1("{}=true;", inst); + const u32 image_index{index.U32()}; + ctx.AddU1("{}=(ftou(scaling.x)&{})!=0;", inst, 1u << image_index); } void EmitBindlessImageSampleImplicitLod(EmitContext&) { diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp index a11bd5a02..12093c3c4 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp @@ -148,14 +148,8 @@ void ComputePipeline::Configure() { const f32 down_factor{is_rescaling ? config_down_factor : 1.0f}; if (assembly_program.handle != 0) { program_manager.BindComputeAssemblyProgram(assembly_program.handle); - if (info.uses_rescaling_uniform) { - glProgramEnvParameter4fARB(GL_COMPUTE_PROGRAM_NV, 0, down_factor, 0.0f, 0.0f, 1.0f); - } } else { program_manager.BindComputeProgram(source_program.handle); - if (info.uses_rescaling_uniform) { - glProgramUniform1f(source_program.handle, 0, down_factor); - } } buffer_cache.UnbindComputeTextureBuffers(); size_t texbuf_index{}; @@ -187,10 +181,16 @@ void ComputePipeline::Configure() { texture_binding += num_texture_buffers; image_binding += num_image_buffers; + u32 scaling_mask{}; for (const auto& desc : info.texture_descriptors) { for (u32 index = 0; index < desc.count; ++index) { ImageView& image_view{texture_cache.GetImageView((views_it++)->id)}; - textures[texture_binding++] = image_view.Handle(desc.type); + textures[texture_binding] = image_view.Handle(desc.type); + if (True(texture_cache.GetImage(image_view.image_id).flags & + VideoCommon::ImageFlagBits::Rescaled)) { + scaling_mask |= 1u << texture_binding; + } + ++texture_binding; } } for (const auto& desc : info.image_descriptors) { @@ -202,6 +202,15 @@ void ComputePipeline::Configure() { images[image_binding++] = image_view.StorageView(desc.type, desc.format); } } + if (info.uses_rescaling_uniform) { + const f32 float_scaling_mask{Common::BitCast(scaling_mask)}; + if (assembly_program.handle != 0) { + glProgramLocalParameter4fARB(GL_COMPUTE_PROGRAM_NV, 0, float_scaling_mask, 0.0f, 0.0f, + 0.0f); + } else { + glProgramUniform4f(source_program.handle, 0, float_scaling_mask, 0.0f, 0.0f, 0.0f); + } + } if (texture_binding != 0) { ASSERT(texture_binding == sampler_binding); glBindTextures(0, texture_binding, textures.data()); diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index 92fda9af0..01aa2897a 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -444,23 +444,11 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { WaitForBuild(); } const bool use_assembly{assembly_programs[0].handle != 0}; - const bool is_rescaling{texture_cache.IsRescaling()}; - const f32 config_down_factor{Settings::values.resolution_info.down_factor}; - const f32 down_factor{is_rescaling ? config_down_factor : 1.0f}; if (use_assembly) { program_manager.BindAssemblyPrograms(assembly_programs, enabled_stages_mask); } else { program_manager.BindSourcePrograms(source_programs); } - for (size_t stage = 0; stage < source_programs.size(); ++stage) { - if (stage_infos[stage].uses_rescaling_uniform) { - if (use_assembly) { - glProgramEnvParameter4fARB(AssemblyStage(stage), 0, down_factor, 0.0f, 0.0f, 1.0f); - } else { - glProgramUniform1f(source_programs[stage].handle, 0, down_factor); - } - } - } const VideoCommon::ImageViewInOut* views_it{views.data()}; GLsizei texture_binding = 0; GLsizei image_binding = 0; @@ -476,11 +464,20 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { views_it += num_texture_buffers[stage]; views_it += num_image_buffers[stage]; + u32 scaling_mask{}; + u32 stage_texture_binding{}; + const auto& info{stage_infos[stage]}; for (const auto& desc : info.texture_descriptors) { for (u32 index = 0; index < desc.count; ++index) { ImageView& image_view{texture_cache.GetImageView((views_it++)->id)}; - textures[texture_binding++] = image_view.Handle(desc.type); + textures[texture_binding] = image_view.Handle(desc.type); + if (True(texture_cache.GetImage(image_view.image_id).flags & + VideoCommon::ImageFlagBits::Rescaled)) { + scaling_mask |= 1u << stage_texture_binding; + } + ++texture_binding; + ++stage_texture_binding; } } for (const auto& desc : info.image_descriptors) { @@ -492,6 +489,19 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { images[image_binding++] = image_view.StorageView(desc.type, desc.format); } } + if (info.uses_rescaling_uniform) { + const f32 float_scaling_mask{Common::BitCast(scaling_mask)}; + const bool is_rescaling{texture_cache.IsRescaling()}; + const f32 config_down_factor{Settings::values.resolution_info.down_factor}; + const f32 down_factor{is_rescaling ? config_down_factor : 1.0f}; + if (use_assembly) { + glProgramLocalParameter4fARB(AssemblyStage(stage), 0, float_scaling_mask, + down_factor, 0.0f, 0.0f); + } else { + glProgramUniform4f(source_programs[stage].handle, 0, float_scaling_mask, + down_factor, 0.0f, 0.0f); + } + } }}; if constexpr (Spec::enabled_stages[0]) { prepare_stage(0); diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index e76ec522a..28c91b368 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -210,7 +210,7 @@ private: GLenum gl_internal_format = GL_NONE; GLenum gl_format = GL_NONE; GLenum gl_type = GL_NONE; - TextureCacheRuntime* runtime; + TextureCacheRuntime* runtime{}; }; class ImageView : public VideoCommon::ImageViewBase { -- cgit v1.2.3 From c9238555f7bb809e322adf7c70676d008e1413ff Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 31 Jul 2021 03:04:33 -0300 Subject: gl_texture_cache: Fix scaling blits --- .../renderer_opengl/gl_texture_cache.cpp | 32 ++++++++-------------- 1 file changed, 12 insertions(+), 20 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 8b86136e0..6aea375f1 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -478,6 +478,10 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& if (is_rescaling_on) { rescale_draw_fbo.Create(); rescale_read_fbo.Create(); + + // Make sure the framebuffer is created without DSA + glBindFramebuffer(GL_READ_FRAMEBUFFER, rescale_draw_fbo.handle); + glBindFramebuffer(GL_READ_FRAMEBUFFER, rescale_read_fbo.handle); } } @@ -882,11 +886,6 @@ bool Image::Scale(bool scale_src, bool scale_dst) { UNIMPLEMENTED(); return false; } - GLint prev_draw_fbo; - GLint prev_read_fbo; - glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &prev_draw_fbo); - glGetIntegerv(GL_READ_FRAMEBUFFER_BINDING, &prev_read_fbo); - const GLenum attachment = [this] { switch (GetFormatType(info.format)) { case SurfaceType::ColorTexture: @@ -935,10 +934,8 @@ bool Image::Scale(bool scale_src, bool scale_dst) { dst_info.size.height = dst_height; auto dst_texture = MakeImage(dst_info, gl_internal_format); - const auto& read_fbo = runtime->rescale_read_fbo; - const auto& draw_fbo = runtime->rescale_draw_fbo; - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, draw_fbo.handle); - glBindFramebuffer(GL_READ_FRAMEBUFFER, read_fbo.handle); + const GLuint read_fbo = runtime->rescale_read_fbo.handle; + const GLuint draw_fbo = runtime->rescale_draw_fbo.handle; for (s32 layer = 0; layer < info.resources.layers; ++layer) { for (s32 level = 0; level < info.resources.levels; ++level) { const u32 src_level_width = std::max(1u, src_width >> level); @@ -946,20 +943,15 @@ bool Image::Scale(bool scale_src, bool scale_dst) { const u32 dst_level_width = std::max(1u, dst_width >> level); const u32 dst_level_height = std::max(1u, dst_height >> level); - glNamedFramebufferTextureLayer(read_fbo.handle, attachment, texture.handle, level, - layer); - glNamedFramebufferTextureLayer(draw_fbo.handle, attachment, dst_texture.handle, level, - layer); - glBlitNamedFramebuffer(read_fbo.handle, draw_fbo.handle, 0, 0, src_level_width, - src_level_height, 0, 0, dst_level_width, dst_level_height, mask, - filter); + glNamedFramebufferTextureLayer(read_fbo, attachment, texture.handle, level, layer); + glNamedFramebufferTextureLayer(draw_fbo, attachment, dst_texture.handle, level, layer); + glBlitNamedFramebuffer(read_fbo, draw_fbo, 0, 0, src_level_width, src_level_height, 0, + 0, dst_level_width, dst_level_height, mask, filter); + glNamedFramebufferTextureLayer(read_fbo, attachment, 0, level, layer); + glNamedFramebufferTextureLayer(draw_fbo, attachment, 0, level, layer); } } texture = std::move(dst_texture); - - // Restore previous framebuffers - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, prev_draw_fbo); - glBindFramebuffer(GL_READ_FRAMEBUFFER, prev_read_fbo); return true; } -- cgit v1.2.3 From 526e47f1486c361e10fc930eff1df4f13d178816 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 31 Jul 2021 03:04:44 -0300 Subject: vk_rasterizer: Minor style change --- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 7a7374b78..20bb05e7d 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -604,8 +604,8 @@ void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& reg if (!state_tracker.TouchViewports()) { return; } - const float scale = - texture_cache.IsRescaling() ? Settings::values.resolution_info.up_factor : 1.0f; + const bool is_rescaling{texture_cache.IsRescaling()}; + const float scale = is_rescaling ? Settings::values.resolution_info.up_factor : 1.0f; const std::array viewports{ GetViewportState(device, regs, 0, scale), GetViewportState(device, regs, 1, scale), GetViewportState(device, regs, 2, scale), GetViewportState(device, regs, 3, scale), -- cgit v1.2.3 From c7a1cbad44487b2c5f9da31ce6d3c76b7dec4f05 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 31 Jul 2021 17:42:37 -0300 Subject: texture_cache: Add getter to query if image view is rescaled --- src/video_core/renderer_opengl/gl_compute_pipeline.cpp | 3 +-- src/video_core/renderer_opengl/gl_graphics_pipeline.cpp | 3 +-- src/video_core/renderer_vulkan/pipeline_helper.h | 3 +-- src/video_core/texture_cache/texture_cache.h | 16 ++++++---------- src/video_core/texture_cache/texture_cache_base.h | 9 +++------ 5 files changed, 12 insertions(+), 22 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp index 12093c3c4..02853b078 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp @@ -186,8 +186,7 @@ void ComputePipeline::Configure() { for (u32 index = 0; index < desc.count; ++index) { ImageView& image_view{texture_cache.GetImageView((views_it++)->id)}; textures[texture_binding] = image_view.Handle(desc.type); - if (True(texture_cache.GetImage(image_view.image_id).flags & - VideoCommon::ImageFlagBits::Rescaled)) { + if (texture_cache.IsRescaling(image_view)) { scaling_mask |= 1u << texture_binding; } ++texture_binding; diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index 01aa2897a..c3d549a6e 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -472,8 +472,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { for (u32 index = 0; index < desc.count; ++index) { ImageView& image_view{texture_cache.GetImageView((views_it++)->id)}; textures[texture_binding] = image_view.Handle(desc.type); - if (True(texture_cache.GetImage(image_view.image_id).flags & - VideoCommon::ImageFlagBits::Rescaled)) { + if (texture_cache.IsRescaling(image_view)) { scaling_mask |= 1u << stage_texture_binding; } ++texture_binding; diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index bf18b34d1..bce4220c6 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -168,10 +168,9 @@ inline void PushImageDescriptors(TextureCache& texture_cache, const VideoCommon::ImageViewId image_view_id{(views++)->id}; const VkSampler sampler{*(samplers++)}; ImageView& image_view{texture_cache.GetImageView(image_view_id)}; - const Image& image{texture_cache.GetImage(image_view.image_id)}; const VkImageView vk_image_view{image_view.Handle(desc.type)}; update_descriptor_queue.AddSampledImage(vk_image_view, sampler); - rescaling.PushTexture(True(image.flags & VideoCommon::ImageFlagBits::Rescaled)); + rescaling.PushTexture(texture_cache.IsRescaling(image_view)); } } for (const auto& desc : info.image_descriptors) { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 4dbded635..0e70c4db2 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -119,16 +119,6 @@ typename P::ImageView& TextureCache

::GetImageView(ImageViewId id) noexcept { return slot_image_views[id]; } -template -const typename P::Image& TextureCache

::GetImage(ImageId id) const noexcept { - return slot_images[id]; -} - -template -typename P::Image& TextureCache

::GetImage(ImageId id) noexcept { - return slot_images[id]; -} - template void TextureCache

::MarkModification(ImageId id) noexcept { MarkModification(slot_images[id]); @@ -634,6 +624,12 @@ bool TextureCache

::IsRescaling() const noexcept { return is_rescaling; } +template +bool TextureCache

::IsRescaling(const ImageViewBase& image_view) const noexcept { + const ImageBase& image = slot_images[image_view.image_id]; + return True(image.flags & ImageFlagBits::Rescaled); +} + template bool TextureCache

::IsRegionGpuModified(VAddr addr, size_t size) { bool is_modified = false; diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index b6cc09682..8b417b611 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -21,6 +21,7 @@ #include "video_core/texture_cache/descriptor_table.h" #include "video_core/texture_cache/image_base.h" #include "video_core/texture_cache/image_info.h" +#include "video_core/texture_cache/image_view_base.h" #include "video_core/texture_cache/image_view_info.h" #include "video_core/texture_cache/render_targets.h" #include "video_core/texture_cache/slot_vector.h" @@ -100,12 +101,6 @@ public: /// Return a reference to the given image view id [[nodiscard]] ImageView& GetImageView(ImageViewId id) noexcept; - /// Return a constant reference to the given image id - [[nodiscard]] const Image& GetImage(ImageId id) const noexcept; - - /// Return a reference to the given image id - [[nodiscard]] Image& GetImage(ImageId id) noexcept; - /// Mark an image as modified from the GPU void MarkModification(ImageId id) noexcept; @@ -181,6 +176,8 @@ public: [[nodiscard]] bool IsRescaling() const noexcept; + [[nodiscard]] bool IsRescaling(const ImageViewBase& image_view) const noexcept; + [[nodiscard]] bool BlackListImage(ImageId image_id); std::mutex mutex; -- cgit v1.2.3 From fc9bb3c3fed4721b06bda46deea3770e5285b104 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 1 Aug 2021 02:26:02 -0300 Subject: shader: Properly blacklist and scale image loads --- src/shader_recompiler/ir_opt/rescaling_pass.cpp | 22 +++++++++++++++++++--- .../renderer_opengl/gl_compute_pipeline.cpp | 2 +- .../renderer_opengl/gl_graphics_pipeline.cpp | 2 +- .../renderer_vulkan/vk_compute_pipeline.cpp | 2 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 14 +++++++++----- 5 files changed, 31 insertions(+), 11 deletions(-) (limited to 'src') diff --git a/src/shader_recompiler/ir_opt/rescaling_pass.cpp b/src/shader_recompiler/ir_opt/rescaling_pass.cpp index d5b98ae6e..86c8f0c69 100644 --- a/src/shader_recompiler/ir_opt/rescaling_pass.cpp +++ b/src/shader_recompiler/ir_opt/rescaling_pass.cpp @@ -84,10 +84,8 @@ void PatchImageQueryDimensions(IR::Block& block, IR::Inst& inst) { } } -void PatchImageFetch(IR::Block& block, IR::Inst& inst) { - IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; +void ScaleIntegerCoord(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scaled) { const auto info{inst.Flags()}; - const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))}; const IR::Value coord{inst.Arg(1)}; switch (info.type) { case TextureType::Color1D: @@ -121,6 +119,21 @@ void PatchImageFetch(IR::Block& block, IR::Inst& inst) { } } +void PatchImageFetch(IR::Block& block, IR::Inst& inst) { + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + const auto info{inst.Flags()}; + const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))}; + ScaleIntegerCoord(ir, inst, is_scaled); +} + +void PatchImageRead(IR::Block& block, IR::Inst& inst) { + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + const auto info{inst.Flags()}; + // TODO: Scale conditionally + const IR::U1 is_scaled{IR::Value{true}}; + ScaleIntegerCoord(ir, inst, is_scaled); +} + void Visit(const IR::Program& program, IR::Block& block, IR::Inst& inst) { const bool is_fragment_shader{program.stage == Stage::Fragment}; switch (inst.GetOpcode()) { @@ -144,6 +157,9 @@ void Visit(const IR::Program& program, IR::Block& block, IR::Inst& inst) { case IR::Opcode::ImageFetch: PatchImageFetch(block, inst); break; + case IR::Opcode::ImageRead: + PatchImageRead(block, inst); + break; default: break; } diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp index 02853b078..60c65047b 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp @@ -139,7 +139,7 @@ void ComputePipeline::Configure() { } } for (const auto& desc : info.image_descriptors) { - add_image(desc, true); + add_image(desc, desc.is_written); } texture_cache.FillComputeImageViews(std::span(views.data(), views.size())); diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index c3d549a6e..11559d6ce 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -362,7 +362,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { } if constexpr (Spec::has_images) { for (const auto& desc : info.image_descriptors) { - add_image(desc, true); + add_image(desc, desc.is_written); } } }}; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index f89b84c6e..6dc52e399 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -159,7 +159,7 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, } } for (const auto& desc : info.image_descriptors) { - add_image(desc, true); + add_image(desc, desc.is_written); } texture_cache.FillComputeImageViews(std::span(views.data(), views.size())); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 4efb5d735..c29bab678 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -322,20 +322,24 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { } return TexturePair(gpu_memory.Read(addr), via_header_index); }}; - const auto add_image{[&](const auto& desc) { + const auto add_image{[&](const auto& desc, bool blacklist) LAMBDA_FORCEINLINE { for (u32 index = 0; index < desc.count; ++index) { const auto handle{read_handle(desc, index)}; - views[view_index++] = {handle.first}; + views[view_index++] = { + .index = handle.first, + .blacklist = blacklist, + .id = {}, + }; } }}; if constexpr (Spec::has_texture_buffers) { for (const auto& desc : info.texture_buffer_descriptors) { - add_image(desc); + add_image(desc, false); } } if constexpr (Spec::has_image_buffers) { for (const auto& desc : info.image_buffer_descriptors) { - add_image(desc); + add_image(desc, false); } } for (const auto& desc : info.texture_descriptors) { @@ -349,7 +353,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { } if constexpr (Spec::has_images) { for (const auto& desc : info.image_descriptors) { - add_image(desc); + add_image(desc, desc.is_written); } } }}; -- cgit v1.2.3 From e66d5b88a6f1c2d85c5cd8e351c6ed52c96a0ecf Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 1 Aug 2021 18:57:45 -0300 Subject: shader: Properly scale image reads and add GL SPIR-V support Thanks for everything! --- src/shader_recompiler/backend/bindings.h | 2 + .../backend/glasm/emit_context.cpp | 4 +- src/shader_recompiler/backend/glasm/emit_glasm.h | 2 + .../backend/glasm/emit_glasm_image.cpp | 9 +++ .../backend/glasm/emit_glasm_instructions.h | 1 + .../backend/glasm/emit_glasm_not_implemented.cpp | 2 +- .../backend/glsl/emit_glsl_context_get_set.cpp | 2 +- .../backend/glsl/emit_glsl_image.cpp | 8 +++ .../backend/spirv/emit_context.cpp | 65 +++++++++++++++---- src/shader_recompiler/backend/spirv/emit_context.h | 11 +++- src/shader_recompiler/backend/spirv/emit_spirv.h | 16 +++-- .../backend/spirv/emit_spirv_context_get_set.cpp | 13 ++-- .../backend/spirv/emit_spirv_image.cpp | 74 ++++++++++++++++------ .../backend/spirv/emit_spirv_instructions.h | 1 + src/shader_recompiler/frontend/ir/ir_emitter.cpp | 4 ++ src/shader_recompiler/frontend/ir/ir_emitter.h | 1 + src/shader_recompiler/frontend/ir/opcodes.inc | 1 + .../ir_opt/collect_shader_info_pass.cpp | 1 + src/shader_recompiler/ir_opt/rescaling_pass.cpp | 3 +- src/shader_recompiler/runtime_info.h | 2 - src/video_core/renderer_opengl/gl_buffer_cache.cpp | 13 ++-- .../renderer_opengl/gl_compute_pipeline.cpp | 21 ++++-- .../renderer_opengl/gl_graphics_pipeline.cpp | 24 ++++--- src/video_core/renderer_vulkan/pipeline_helper.h | 22 +++++-- .../renderer_vulkan/vk_pipeline_cache.cpp | 3 - 25 files changed, 228 insertions(+), 77 deletions(-) (limited to 'src') diff --git a/src/shader_recompiler/backend/bindings.h b/src/shader_recompiler/backend/bindings.h index 35503000c..669702553 100644 --- a/src/shader_recompiler/backend/bindings.h +++ b/src/shader_recompiler/backend/bindings.h @@ -14,6 +14,8 @@ struct Bindings { u32 storage_buffer{}; u32 texture{}; u32 image{}; + u32 texture_scaling_index{}; + u32 image_scaling_index{}; }; } // namespace Shader::Backend diff --git a/src/shader_recompiler/backend/glasm/emit_context.cpp b/src/shader_recompiler/backend/glasm/emit_context.cpp index 069c019ad..8fd459dfe 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.cpp +++ b/src/shader_recompiler/backend/glasm/emit_context.cpp @@ -6,6 +6,7 @@ #include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/backend/glasm/emit_context.h" +#include "shader_recompiler/backend/glasm/emit_glasm.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/profile.h" #include "shader_recompiler/runtime_info.h" @@ -55,7 +56,8 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile } if (!runtime_info.glasm_use_storage_buffers) { if (const size_t num = info.storage_buffers_descriptors.size(); num > 0) { - Add("PARAM c[{}]={{program.local[0..{}]}};", num, num - 1); + const size_t index{num + PROGRAM_LOCAL_PARAMETER_STORAGE_BUFFER_BASE}; + Add("PARAM c[{}]={{program.local[0..{}]}};", index, index - 1); } } stage = program.stage; diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.h b/src/shader_recompiler/backend/glasm/emit_glasm.h index bcb55f062..292655acb 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm.h @@ -13,6 +13,8 @@ namespace Shader::Backend::GLASM { +constexpr u32 PROGRAM_LOCAL_PARAMETER_STORAGE_BUFFER_BASE = 1; + [[nodiscard]] std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, IR::Program& program, Bindings& bindings); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp index 05e88cd97..d325d31c7 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp @@ -617,6 +617,15 @@ void EmitIsTextureScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& inde 1u << index.U32(), ctx.reg_alloc.Define(inst)); } +void EmitIsImageScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& index) { + if (!index.IsImmediate()) { + throw NotImplementedException("Non-constant texture rescaling"); + } + ctx.Add("AND.U RC.x,scaling[0].y,{};" + "SNE.S {},RC.x,0;", + 1u << index.U32(), ctx.reg_alloc.Define(inst)); +} + void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, ScalarU32 value) { ImageAtomic(ctx, inst, index, coord, value, "ADD.U32"); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index e2b7d601d..1f343bff5 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -557,6 +557,7 @@ void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Reg void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, Register color); void EmitIsTextureScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& index); +void EmitIsImageScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& index); void EmitBindlessImageAtomicIAdd32(EmitContext&); void EmitBindlessImageAtomicSMin32(EmitContext&); void EmitBindlessImageAtomicUMin32(EmitContext&); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp index c0f8ddcad..681aeda8d 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp @@ -211,7 +211,7 @@ void EmitYDirection(EmitContext& ctx, IR::Inst& inst) { } void EmitResolutionDownFactor(EmitContext& ctx, IR::Inst& inst) { - ctx.Add("MOV.F {}.x,scaling[0].y;", inst); + ctx.Add("MOV.F {}.x,scaling[0].z;", inst); } void EmitUndefU1(EmitContext& ctx, IR::Inst& inst) { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 542a79230..4c26f3829 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -446,7 +446,7 @@ void EmitYDirection(EmitContext& ctx, IR::Inst& inst) { } void EmitResolutionDownFactor(EmitContext& ctx, IR::Inst& inst) { - ctx.AddF32("{}=scaling.y;", inst); + ctx.AddF32("{}=scaling.z;", inst); } void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, std::string_view word_offset) { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp index 82b6f0d77..2f78d0267 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp @@ -620,6 +620,14 @@ void EmitIsTextureScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& inde ctx.AddU1("{}=(ftou(scaling.x)&{})!=0;", inst, 1u << image_index); } +void EmitIsImageScaled(EmitContext& ctx, IR::Inst& inst, const IR::Value& index) { + if (!index.IsImmediate()) { + throw NotImplementedException("Non-constant texture rescaling"); + } + const u32 image_index{index.U32()}; + ctx.AddU1("{}=(ftou(scaling.y)&{})!=0;", inst, 1u << image_index); +} + void EmitBindlessImageSampleImplicitLod(EmitContext&) { NotImplemented(); } diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 222baa177..8646fe989 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -14,6 +14,7 @@ #include "common/common_types.h" #include "common/div_ceil.h" #include "shader_recompiler/backend/spirv/emit_context.h" +#include "shader_recompiler/backend/spirv/emit_spirv.h" namespace Shader::Backend::SPIRV { namespace { @@ -476,8 +477,9 @@ void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_vie EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_info_, IR::Program& program, Bindings& bindings) - : Sirit::Module(profile_.supported_spirv), profile{profile_}, - runtime_info{runtime_info_}, stage{program.stage} { + : Sirit::Module(profile_.supported_spirv), profile{profile_}, runtime_info{runtime_info_}, + stage{program.stage}, texture_rescaling_index{bindings.texture_scaling_index}, + image_rescaling_index{bindings.image_scaling_index} { const bool is_unified{profile.unified_descriptor_binding}; u32& uniform_binding{is_unified ? bindings.unified : bindings.uniform_buffer}; u32& storage_binding{is_unified ? bindings.unified : bindings.storage_buffer}; @@ -494,8 +496,8 @@ EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_inf DefineStorageBuffers(program.info, storage_binding); DefineTextureBuffers(program.info, texture_binding); DefineImageBuffers(program.info, image_binding); - DefineTextures(program.info, texture_binding); - DefineImages(program.info, image_binding); + DefineTextures(program.info, texture_binding, bindings.texture_scaling_index); + DefineImages(program.info, image_binding, bindings.image_scaling_index); DefineAttributeMemAccess(program.info); DefineGlobalMemoryFunctions(program.info); DefineRescalingInput(program.info); @@ -1003,25 +1005,49 @@ void EmitContext::DefineRescalingInput(const Info& info) { if (!info.uses_rescaling_uniform) { return; } - boost::container::static_vector members{F32[1]}; + if (profile.unified_descriptor_binding) { + DefineRescalingInputPushConstant(info); + } else { + DefineRescalingInputUniformConstant(); + } +} + +void EmitContext::DefineRescalingInputPushConstant(const Info& info) { + boost::container::static_vector members{F32[1]}; u32 member_index{0}; - const u32 num_texture_words{Common::DivCeil(runtime_info.num_textures, 32u)}; - if (runtime_info.num_textures > 0) { - rescaling_textures_type = TypeArray(U32[1], Const(num_texture_words)); + if (!info.texture_descriptors.empty()) { + rescaling_textures_type = TypeArray(U32[1], Const(4u)); Decorate(rescaling_textures_type, spv::Decoration::ArrayStride, 4u); members.push_back(rescaling_textures_type); rescaling_textures_member_index = ++member_index; } + if (!info.image_descriptors.empty()) { + rescaling_images_type = TypeArray(U32[1], Const(NUM_IMAGE_SCALING_WORDS)); + if (rescaling_textures_type.value != rescaling_images_type.value) { + Decorate(rescaling_images_type, spv::Decoration::ArrayStride, 4u); + } + members.push_back(rescaling_images_type); + rescaling_images_member_index = ++member_index; + } const Id push_constant_struct{TypeStruct(std::span(members.data(), members.size()))}; Decorate(push_constant_struct, spv::Decoration::Block); Name(push_constant_struct, "ResolutionInfo"); + MemberDecorate(push_constant_struct, 0u, spv::Decoration::Offset, 0u); MemberName(push_constant_struct, 0u, "down_factor"); - if (runtime_info.num_textures > 0) { - MemberDecorate(push_constant_struct, rescaling_textures_member_index, - spv::Decoration::Offset, 4u); + + const u32 offset_bias = stage == Stage::Compute ? sizeof(u32) : 0; + if (!info.texture_descriptors.empty()) { + MemberDecorate( + push_constant_struct, rescaling_textures_member_index, spv::Decoration::Offset, + static_cast(offsetof(RescalingLayout, rescaling_textures) - offset_bias)); MemberName(push_constant_struct, rescaling_textures_member_index, "rescaling_textures"); } + if (!info.image_descriptors.empty()) { + MemberDecorate(push_constant_struct, rescaling_images_member_index, spv::Decoration::Offset, + static_cast(offsetof(RescalingLayout, rescaling_images) - offset_bias)); + MemberName(push_constant_struct, rescaling_images_member_index, "rescaling_images"); + } const Id pointer_type{TypePointer(spv::StorageClass::PushConstant, push_constant_struct)}; rescaling_push_constants = AddGlobalVariable(pointer_type, spv::StorageClass::PushConstant); Name(rescaling_push_constants, "rescaling_push_constants"); @@ -1031,6 +1057,17 @@ void EmitContext::DefineRescalingInput(const Info& info) { } } +void EmitContext::DefineRescalingInputUniformConstant() { + const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, F32[4])}; + rescaling_uniform_constant = + AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant); + Decorate(rescaling_uniform_constant, spv::Decoration::Location, 0u); + + if (profile.supported_spirv >= 0x00010400) { + interfaces.push_back(rescaling_uniform_constant); + } +} + void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) { if (info.constant_buffer_descriptors.empty()) { return; @@ -1219,7 +1256,7 @@ void EmitContext::DefineImageBuffers(const Info& info, u32& binding) { } } -void EmitContext::DefineTextures(const Info& info, u32& binding) { +void EmitContext::DefineTextures(const Info& info, u32& binding, u32& scaling_index) { textures.reserve(info.texture_descriptors.size()); for (const TextureDescriptor& desc : info.texture_descriptors) { const Id image_type{ImageType(*this, desc)}; @@ -1241,13 +1278,14 @@ void EmitContext::DefineTextures(const Info& info, u32& binding) { interfaces.push_back(id); } ++binding; + ++scaling_index; } if (info.uses_atomic_image_u32) { image_u32 = TypePointer(spv::StorageClass::Image, U32[1]); } } -void EmitContext::DefineImages(const Info& info, u32& binding) { +void EmitContext::DefineImages(const Info& info, u32& binding, u32& scaling_index) { images.reserve(info.image_descriptors.size()); for (const ImageDescriptor& desc : info.image_descriptors) { if (desc.count != 1) { @@ -1268,6 +1306,7 @@ void EmitContext::DefineImages(const Info& info, u32& binding) { interfaces.push_back(id); } ++binding; + ++scaling_index; } } diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index a7917ac51..b67704baa 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -238,9 +238,14 @@ public: Id indexed_load_func{}; Id indexed_store_func{}; + Id rescaling_uniform_constant{}; Id rescaling_push_constants{}; Id rescaling_textures_type{}; + Id rescaling_images_type{}; u32 rescaling_textures_member_index{}; + u32 rescaling_images_member_index{}; + u32 texture_rescaling_index{}; + u32 image_rescaling_index{}; Id local_memory{}; @@ -314,11 +319,13 @@ private: void DefineStorageBuffers(const Info& info, u32& binding); void DefineTextureBuffers(const Info& info, u32& binding); void DefineImageBuffers(const Info& info, u32& binding); - void DefineTextures(const Info& info, u32& binding); - void DefineImages(const Info& info, u32& binding); + void DefineTextures(const Info& info, u32& binding, u32& scaling_index); + void DefineImages(const Info& info, u32& binding, u32& scaling_index); void DefineAttributeMemAccess(const Info& info); void DefineGlobalMemoryFunctions(const Info& info); void DefineRescalingInput(const Info& info); + void DefineRescalingInputPushConstant(const Info& info); + void DefineRescalingInputUniformConstant(); void DefineInputs(const IR::Program& program); void DefineOutputs(const IR::Program& program); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 7b0d8d980..db0998ad6 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -16,15 +16,23 @@ namespace Shader::Backend::SPIRV { +constexpr u32 NUM_TEXTURE_SCALING_WORDS = 4; +constexpr u32 NUM_IMAGE_SCALING_WORDS = 2; +constexpr u32 NUM_TEXTURE_AND_IMAGE_SCALING_WORDS = + NUM_TEXTURE_SCALING_WORDS + NUM_IMAGE_SCALING_WORDS; + +struct RescalingLayout { + u32 down_factor; + std::array rescaling_textures; + std::array rescaling_images; +}; + [[nodiscard]] std::vector EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_info, IR::Program& program, Bindings& bindings); [[nodiscard]] inline std::vector EmitSPIRV(const Profile& profile, IR::Program& program) { - RuntimeInfo runtime_info{}; - runtime_info.num_textures = Shader::NumDescriptors(program.info.texture_descriptors); - Bindings binding; - return EmitSPIRV(profile, runtime_info, program, binding); + return EmitSPIRV(profile, {}, program, binding); } } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 6bb791b03..c0db7452f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -527,10 +527,15 @@ Id EmitYDirection(EmitContext& ctx) { } Id EmitResolutionDownFactor(EmitContext& ctx) { - const Id pointer_type{ctx.TypePointer(spv::StorageClass::PushConstant, ctx.F32[1])}; - const Id pointer{ - ctx.OpAccessChain(pointer_type, ctx.rescaling_push_constants, ctx.u32_zero_value)}; - return ctx.OpLoad(ctx.F32[1], pointer); + if (ctx.profile.unified_descriptor_binding) { + const Id pointer_type{ctx.TypePointer(spv::StorageClass::PushConstant, ctx.F32[1])}; + const Id pointer{ + ctx.OpAccessChain(pointer_type, ctx.rescaling_push_constants, ctx.u32_zero_value)}; + return ctx.OpLoad(ctx.F32[1], pointer); + } else { + const Id composite{ctx.OpLoad(ctx.F32[4], ctx.rescaling_uniform_constant)}; + return ctx.OpCompositeExtract(ctx.F32[1], composite, 2u); + } } Id EmitLoadLocal(EmitContext& ctx, Id word_offset) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 7d7c0627e..519ce8b9b 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -224,6 +224,40 @@ Id Emit(MethodPtrType sparse_ptr, MethodPtrType non_sparse_ptr, EmitContext& ctx Decorate(ctx, inst, sample); return ctx.OpCompositeExtract(result_type, sample, 1U); } + +Id IsScaled(EmitContext& ctx, const IR::Value& index, Id member_index, u32 base_index) { + const Id push_constant_u32{ctx.TypePointer(spv::StorageClass::PushConstant, ctx.U32[1])}; + Id bit{}; + if (index.IsImmediate()) { + // Use BitwiseAnd instead of BitfieldExtract for better codegen on Nvidia OpenGL. + // LOP32I.NZ is used to set the predicate rather than BFE+ISETP. + const u32 index_value{index.U32() + base_index}; + const Id word_index{ctx.Const(index_value / 32)}; + const Id bit_index_mask{ctx.Const(1u << (index_value % 32))}; + const Id pointer{ctx.OpAccessChain(push_constant_u32, ctx.rescaling_push_constants, + member_index, word_index)}; + const Id word{ctx.OpLoad(ctx.U32[1], pointer)}; + bit = ctx.OpBitwiseAnd(ctx.U32[1], word, bit_index_mask); + } else { + Id index_value{ctx.Def(index)}; + if (base_index != 0) { + index_value = ctx.OpIAdd(ctx.U32[1], index_value, ctx.Const(base_index)); + } + const Id word_index{ctx.OpShiftRightArithmetic(ctx.U32[1], index_value, ctx.Const(5u))}; + const Id pointer{ctx.OpAccessChain(push_constant_u32, ctx.rescaling_push_constants, + member_index, word_index)}; + const Id word{ctx.OpLoad(ctx.U32[1], pointer)}; + const Id bit_index{ctx.OpBitwiseAnd(ctx.U32[1], index_value, ctx.Const(31u))}; + bit = ctx.OpBitFieldUExtract(ctx.U32[1], index_value, bit_index, ctx.Const(1u)); + } + return ctx.OpINotEqual(ctx.U1, bit, ctx.u32_zero_value); +} + +Id BitTest(EmitContext& ctx, Id mask, Id bit) { + const Id shifted{ctx.OpShiftRightLogical(ctx.U32[1], mask, bit)}; + const Id bit_value{ctx.OpBitwiseAnd(ctx.U32[1], shifted, ctx.Const(1u))}; + return ctx.OpINotEqual(ctx.U1, bit_value, ctx.u32_zero_value); +} } // Anonymous namespace Id EmitBindlessImageSampleImplicitLod(EmitContext&) { @@ -471,29 +505,27 @@ void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id } Id EmitIsTextureScaled(EmitContext& ctx, const IR::Value& index) { - const Id push_constant_u32{ctx.TypePointer(spv::StorageClass::PushConstant, ctx.U32[1])}; - const Id member_index{ctx.Const(ctx.rescaling_textures_member_index)}; - Id bit{}; - if (index.IsImmediate()) { - // Use BitwiseAnd instead of BitfieldExtract for better codegen on Nvidia OpenGL. - // LOP32I.NZ is used to set the predicate rather than BFE+ISETP. - const u32 index_value{index.U32()}; - const Id word_index{ctx.Const(index_value / 32)}; - const Id bit_index_mask{ctx.Const(1u << (index_value % 32))}; - const Id pointer{ctx.OpAccessChain(push_constant_u32, ctx.rescaling_push_constants, - member_index, word_index)}; - const Id word{ctx.OpLoad(ctx.U32[1], pointer)}; - bit = ctx.OpBitwiseAnd(ctx.U32[1], word, bit_index_mask); + if (ctx.profile.unified_descriptor_binding) { + const Id member_index{ctx.Const(ctx.rescaling_textures_member_index)}; + return IsScaled(ctx, index, member_index, ctx.texture_rescaling_index); } else { - const Id index_value{ctx.Def(index)}; - const Id word_index{ctx.OpShiftRightArithmetic(ctx.U32[1], index_value, ctx.Const(5u))}; - const Id pointer{ctx.OpAccessChain(push_constant_u32, ctx.rescaling_push_constants, - member_index, word_index)}; - const Id word{ctx.OpLoad(ctx.U32[1], pointer)}; - const Id bit_index{ctx.OpBitwiseAnd(ctx.U32[1], index_value, ctx.Const(31u))}; - bit = ctx.OpBitFieldUExtract(ctx.U32[1], index_value, bit_index, ctx.Const(1u)); + const Id composite{ctx.OpLoad(ctx.F32[4], ctx.rescaling_uniform_constant)}; + const Id mask_f32{ctx.OpCompositeExtract(ctx.F32[1], composite, 0u)}; + const Id mask{ctx.OpBitcast(ctx.U32[1], mask_f32)}; + return BitTest(ctx, mask, ctx.Def(index)); + } +} + +Id EmitIsImageScaled(EmitContext& ctx, const IR::Value& index) { + if (ctx.profile.unified_descriptor_binding) { + const Id member_index{ctx.Const(ctx.rescaling_images_member_index)}; + return IsScaled(ctx, index, member_index, ctx.image_rescaling_index); + } else { + const Id composite{ctx.OpLoad(ctx.F32[4], ctx.rescaling_uniform_constant)}; + const Id mask_f32{ctx.OpCompositeExtract(ctx.F32[1], composite, 1u)}; + const Id mask{ctx.OpBitcast(ctx.U32[1], mask_f32)}; + return BitTest(ctx, mask, ctx.Def(index)); } - return ctx.OpINotEqual(ctx.U1, bit, ctx.u32_zero_value); } } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 69fc18f5f..6cd22dd3e 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -514,6 +514,7 @@ Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, I Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color); Id EmitIsTextureScaled(EmitContext& ctx, const IR::Value& index); +Id EmitIsImageScaled(EmitContext& ctx, const IR::Value& index); Id EmitBindlessImageAtomicIAdd32(EmitContext&); Id EmitBindlessImageAtomicSMin32(EmitContext&); Id EmitBindlessImageAtomicUMin32(EmitContext&); diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 3ccd91c10..356f889ac 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -1950,6 +1950,10 @@ U1 IREmitter::IsTextureScaled(const U32& index) { return Inst(Opcode::IsTextureScaled, index); } +U1 IREmitter::IsImageScaled(const U32& index) { + return Inst(Opcode::IsImageScaled, index); +} + U1 IREmitter::VoteAll(const U1& value) { return Inst(Opcode::VoteAll, value); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index a78628413..13eefa88b 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -361,6 +361,7 @@ public: const Value& value, TextureInstInfo info); [[nodiscard]] U1 IsTextureScaled(const U32& index); + [[nodiscard]] U1 IsImageScaled(const U32& index); [[nodiscard]] U1 VoteAll(const U1& value); [[nodiscard]] U1 VoteAny(const U1& value); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index ec629428a..6929919df 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -494,6 +494,7 @@ OPCODE(ImageRead, U32x4, Opaq OPCODE(ImageWrite, Void, Opaque, Opaque, U32x4, ) OPCODE(IsTextureScaled, U1, U32, ) +OPCODE(IsImageScaled, U1, U32, ) // Atomic Image operations diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index ed82fa2ac..1e476d83d 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -432,6 +432,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { break; case IR::Opcode::ResolutionDownFactor: case IR::Opcode::IsTextureScaled: + case IR::Opcode::IsImageScaled: info.uses_rescaling_uniform = true; break; case IR::Opcode::LaneId: diff --git a/src/shader_recompiler/ir_opt/rescaling_pass.cpp b/src/shader_recompiler/ir_opt/rescaling_pass.cpp index 86c8f0c69..2af12fc07 100644 --- a/src/shader_recompiler/ir_opt/rescaling_pass.cpp +++ b/src/shader_recompiler/ir_opt/rescaling_pass.cpp @@ -129,8 +129,7 @@ void PatchImageFetch(IR::Block& block, IR::Inst& inst) { void PatchImageRead(IR::Block& block, IR::Inst& inst) { IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; const auto info{inst.Flags()}; - // TODO: Scale conditionally - const IR::U1 is_scaled{IR::Value{true}}; + const IR::U1 is_scaled{ir.IsImageScaled(ir.Imm32(info.descriptor_index))}; ScaleIntegerCoord(ir, inst, is_scaled); } diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index dc89cb923..f3f83a258 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -63,8 +63,6 @@ struct RuntimeInfo { std::array generic_input_types{}; VaryingState previous_stage_stores; - u32 num_textures{}; - bool convert_depth_mode{}; bool force_early_z{}; diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 187a28e4d..d4dd10bb6 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -5,6 +5,7 @@ #include #include +#include "shader_recompiler/backend/glasm/emit_glasm.h" #include "video_core/buffer_cache/buffer_cache.h" #include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_device.h" @@ -229,8 +230,10 @@ void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buff .padding = 0, }; buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY); - glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1, - reinterpret_cast(&ssbo)); + glProgramLocalParametersI4uivNV( + PROGRAM_LUT[stage], + Shader::Backend::GLASM::PROGRAM_LOCAL_PARAMETER_STORAGE_BUFFER_BASE + binding_index, 1, + reinterpret_cast(&ssbo)); } } @@ -250,8 +253,10 @@ void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buf .padding = 0, }; buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY); - glProgramLocalParametersI4uivNV(GL_COMPUTE_PROGRAM_NV, binding_index, 1, - reinterpret_cast(&ssbo)); + glProgramLocalParametersI4uivNV( + GL_COMPUTE_PROGRAM_NV, + Shader::Backend::GLASM::PROGRAM_LOCAL_PARAMETER_STORAGE_BUFFER_BASE + binding_index, 1, + reinterpret_cast(&ssbo)); } } diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp index 60c65047b..9af61c340 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp @@ -181,33 +181,40 @@ void ComputePipeline::Configure() { texture_binding += num_texture_buffers; image_binding += num_image_buffers; - u32 scaling_mask{}; + u32 texture_scaling_mask{}; for (const auto& desc : info.texture_descriptors) { for (u32 index = 0; index < desc.count; ++index) { ImageView& image_view{texture_cache.GetImageView((views_it++)->id)}; textures[texture_binding] = image_view.Handle(desc.type); if (texture_cache.IsRescaling(image_view)) { - scaling_mask |= 1u << texture_binding; + texture_scaling_mask |= 1u << texture_binding; } ++texture_binding; } } + u32 image_scaling_mask{}; for (const auto& desc : info.image_descriptors) { for (u32 index = 0; index < desc.count; ++index) { ImageView& image_view{texture_cache.GetImageView((views_it++)->id)}; if (desc.is_written) { texture_cache.MarkModification(image_view.image_id); } - images[image_binding++] = image_view.StorageView(desc.type, desc.format); + images[image_binding] = image_view.StorageView(desc.type, desc.format); + if (texture_cache.IsRescaling(image_view)) { + image_scaling_mask |= 1u << image_binding; + } + ++image_binding; } } if (info.uses_rescaling_uniform) { - const f32 float_scaling_mask{Common::BitCast(scaling_mask)}; + const f32 float_texture_scaling_mask{Common::BitCast(texture_scaling_mask)}; + const f32 float_image_scaling_mask{Common::BitCast(image_scaling_mask)}; if (assembly_program.handle != 0) { - glProgramLocalParameter4fARB(GL_COMPUTE_PROGRAM_NV, 0, float_scaling_mask, 0.0f, 0.0f, - 0.0f); + glProgramLocalParameter4fARB(GL_COMPUTE_PROGRAM_NV, 0, float_texture_scaling_mask, + float_image_scaling_mask, 0.0f, 0.0f); } else { - glProgramUniform4f(source_program.handle, 0, float_scaling_mask, 0.0f, 0.0f, 0.0f); + glProgramUniform4f(source_program.handle, 0, float_texture_scaling_mask, + float_image_scaling_mask, 0.0f, 0.0f); } } if (texture_binding != 0) { diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index 11559d6ce..f8495896c 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -464,8 +464,10 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { views_it += num_texture_buffers[stage]; views_it += num_image_buffers[stage]; - u32 scaling_mask{}; + u32 texture_scaling_mask{}; + u32 image_scaling_mask{}; u32 stage_texture_binding{}; + u32 stage_image_binding{}; const auto& info{stage_infos[stage]}; for (const auto& desc : info.texture_descriptors) { @@ -473,7 +475,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { ImageView& image_view{texture_cache.GetImageView((views_it++)->id)}; textures[texture_binding] = image_view.Handle(desc.type); if (texture_cache.IsRescaling(image_view)) { - scaling_mask |= 1u << stage_texture_binding; + texture_scaling_mask |= 1u << stage_texture_binding; } ++texture_binding; ++stage_texture_binding; @@ -485,20 +487,26 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { if (desc.is_written) { texture_cache.MarkModification(image_view.image_id); } - images[image_binding++] = image_view.StorageView(desc.type, desc.format); + images[image_binding] = image_view.StorageView(desc.type, desc.format); + if (texture_cache.IsRescaling(image_view)) { + image_scaling_mask |= 1u << stage_image_binding; + } + ++image_binding; + ++stage_image_binding; } } if (info.uses_rescaling_uniform) { - const f32 float_scaling_mask{Common::BitCast(scaling_mask)}; + const f32 float_texture_scaling_mask{Common::BitCast(texture_scaling_mask)}; + const f32 float_image_scaling_mask{Common::BitCast(image_scaling_mask)}; const bool is_rescaling{texture_cache.IsRescaling()}; const f32 config_down_factor{Settings::values.resolution_info.down_factor}; const f32 down_factor{is_rescaling ? config_down_factor : 1.0f}; if (use_assembly) { - glProgramLocalParameter4fARB(AssemblyStage(stage), 0, float_scaling_mask, - down_factor, 0.0f, 0.0f); + glProgramLocalParameter4fARB(AssemblyStage(stage), 0, float_texture_scaling_mask, + float_image_scaling_mask, down_factor, 0.0f); } else { - glProgramUniform4f(source_programs[stage].handle, 0, float_scaling_mask, - down_factor, 0.0f, 0.0f); + glProgramUniform4f(source_programs[stage].handle, 0, float_texture_scaling_mask, + float_image_scaling_mask, down_factor, 0.0f); } } }}; diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index bce4220c6..85ae726d1 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -10,6 +10,7 @@ #include "common/assert.h" #include "common/common_types.h" +#include "shader_recompiler/backend/spirv/emit_spirv.h" #include "shader_recompiler/shader_info.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" @@ -20,7 +21,7 @@ namespace Vulkan { -constexpr size_t MAX_RESCALING_WORDS = 4; +using Shader::Backend::SPIRV::NUM_TEXTURE_AND_IMAGE_SCALING_WORDS; class DescriptorLayoutBuilder { public: @@ -74,7 +75,8 @@ public: .stageFlags = static_cast( is_compute ? VK_SHADER_STAGE_COMPUTE_BIT : VK_SHADER_STAGE_ALL_GRAPHICS), .offset = 0, - .size = (is_compute ? 0 : sizeof(f32)) + sizeof(std::array), + .size = (is_compute ? 0 : sizeof(f32)) + + sizeof(std::array), }; return device->GetLogical().CreatePipelineLayout({ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, @@ -146,14 +148,25 @@ public: } } - const std::array& Data() const noexcept { + void PushImage(bool is_rescaled) noexcept { + *image_ptr |= is_rescaled ? image_bit : 0; + image_bit <<= 1; + if (image_bit == 0) { + image_bit = 1u; + ++image_ptr; + } + } + + const std::array& Data() const noexcept { return words; } private: - std::array words{}; + std::array words{}; u32* texture_ptr{words.data()}; + u32* image_ptr{words.data() + Shader::Backend::SPIRV::NUM_TEXTURE_SCALING_WORDS}; u32 texture_bit{1u}; + u32 image_bit{1u}; }; inline void PushImageDescriptors(TextureCache& texture_cache, @@ -181,6 +194,7 @@ inline void PushImageDescriptors(TextureCache& texture_cache, } const VkImageView vk_image_view{image_view.StorageView(desc.type, desc.format)}; update_descriptor_queue.AddImage(vk_image_view); + rescaling.PushImage(texture_cache.IsRescaling(image_view)); } } } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 691ef0841..eb8b4e08b 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -139,9 +139,6 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span program } else { info.previous_stage_stores.mask.set(); } - for (const auto& stage : programs) { - info.num_textures += Shader::NumDescriptors(stage.info.texture_descriptors); - } const Shader::Stage stage{program.stage}; const bool has_geometry{key.unique_hashes[4] != 0 && !programs[4].is_geometry_passthrough}; const bool gl_ndc{key.state.ndc_minus_one_to_one != 0}; -- cgit v1.2.3 From dc28284437c7f99baa98a242f4713a1ab94418c8 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 4 Aug 2021 00:30:16 -0400 Subject: emit_spirv: Fix RescalingLayout alignment --- src/shader_recompiler/backend/spirv/emit_spirv.h | 4 ++-- src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index db0998ad6..dd6dff0c8 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -23,8 +23,8 @@ constexpr u32 NUM_TEXTURE_AND_IMAGE_SCALING_WORDS = struct RescalingLayout { u32 down_factor; - std::array rescaling_textures; - std::array rescaling_images; + alignas(16) std::array rescaling_textures; + alignas(16) std::array rescaling_images; }; [[nodiscard]] std::vector EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_info, diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index c29bab678..5ad1180bb 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -479,7 +479,7 @@ void GraphicsPipeline::ConfigureDraw(const RescalingPushConstant& rescaling) { } if (update_rescaling) { const f32 config_down_factor{Settings::values.resolution_info.down_factor}; - const float scale_down_factor{is_rescaling ? config_down_factor : 1.0f}; + const f32 scale_down_factor{is_rescaling ? config_down_factor : 1.0f}; cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_ALL_GRAPHICS, 0, sizeof(scale_down_factor), &scale_down_factor); } -- cgit v1.2.3 From ed675cfd8cc89d64c763becfd991d1dd40deac5a Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 4 Aug 2021 19:02:30 -0400 Subject: texture_cache: Disable dst_image scaling in BlitImage Fixes scaling in Super Mario Party --- src/video_core/texture_cache/texture_cache.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 0e70c4db2..d86f80b5d 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -478,11 +478,13 @@ void TextureCache

::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, const bool is_src_rescaled = True(src_image.flags & ImageFlagBits::Rescaled); bool is_dst_rescaled = True(dst_image.flags & ImageFlagBits::Rescaled); - if (is_src_rescaled && !is_dst_rescaled) { - if (ImageCanRescale(dst_image)) { - is_dst_rescaled = dst_image.ScaleUp(); - } - } + // TODO: This requires the rendertarget image views to be updated with the upscaled sizes, + // otherwise the blit will use a larger framebuffer size than the image view attachment. + // if (is_src_rescaled && !is_dst_rescaled) { + // if (ImageCanRescale(dst_image)) { + // is_dst_rescaled = dst_image.ScaleUp(); + // } + // } const auto& resolution = Settings::values.resolution_info; const auto scale_up = [&](u32 value) -> u32 { -- cgit v1.2.3 From 8f78444de35bdbdc83a709b8a822d66018bb9852 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 2 Aug 2021 01:03:15 +0200 Subject: shader: Fix TextureSize check on rescaling. --- src/shader_recompiler/ir_opt/rescaling_pass.cpp | 48 +++++++++++-------------- 1 file changed, 21 insertions(+), 27 deletions(-) (limited to 'src') diff --git a/src/shader_recompiler/ir_opt/rescaling_pass.cpp b/src/shader_recompiler/ir_opt/rescaling_pass.cpp index 2af12fc07..b94273aa5 100644 --- a/src/shader_recompiler/ir_opt/rescaling_pass.cpp +++ b/src/shader_recompiler/ir_opt/rescaling_pass.cpp @@ -26,11 +26,11 @@ void PatchFragCoord(IR::Block& block, IR::Inst& inst) { IR::U32 scaled_value{value}; bool changed{}; if (const u32 up_scale = Settings::values.resolution_info.up_scale; up_scale != 1) { - scaled_value = ir.IMul(value, ir.Imm32(up_scale)); + scaled_value = ir.IMul(scaled_value, ir.Imm32(up_scale)); changed = true; } if (const u32 down_shift = Settings::values.resolution_info.down_shift; down_shift != 0) { - scaled_value = ir.ShiftRightArithmetic(value, ir.Imm32(down_shift)); + scaled_value = ir.ShiftRightArithmetic(scaled_value, ir.Imm32(down_shift)); changed = true; } if (changed) { @@ -40,41 +40,42 @@ void PatchFragCoord(IR::Block& block, IR::Inst& inst) { } } -[[nodiscard]] IR::U32 DownScale(IR::IREmitter& ir, IR::U32 value) { +[[nodiscard]] IR::U32 DownScale(IR::IREmitter& ir, const IR::U1& is_scaled, IR::U32 value) { + IR::U32 scaled_value{value}; + bool changed{}; if (const u32 down_shift = Settings::values.resolution_info.down_shift; down_shift != 0) { - value = ir.ShiftLeftLogical(value, ir.Imm32(down_shift)); + scaled_value = ir.ShiftLeftLogical(scaled_value, ir.Imm32(down_shift)); + changed = true; } if (const u32 up_scale = Settings::values.resolution_info.up_scale; up_scale != 1) { - value = ir.IDiv(value, ir.Imm32(up_scale)); + scaled_value = ir.IDiv(scaled_value, ir.Imm32(up_scale)); + changed = true; + } + if (changed) { + return IR::U32{ir.Select(is_scaled, scaled_value, value)}; + } else { + return value; } - return value; } void PatchImageQueryDimensions(IR::Block& block, IR::Inst& inst) { const auto it{IR::Block::InstructionList::s_iterator_to(inst)}; IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; const auto info{inst.Flags()}; + const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))}; switch (info.type) { - case TextureType::Color1D: - case TextureType::ColorArray1D: { - const IR::Value new_inst{&*block.PrependNewInst(it, inst)}; - const IR::U32 width{DownScale(ir, IR::U32{ir.CompositeExtract(new_inst, 0)})}; - const IR::Value replacement{ir.CompositeConstruct(width, ir.CompositeExtract(new_inst, 1), - ir.CompositeExtract(new_inst, 2), - ir.CompositeExtract(new_inst, 3))}; - inst.ReplaceUsesWith(replacement); - break; - } case TextureType::Color2D: case TextureType::ColorArray2D: { const IR::Value new_inst{&*block.PrependNewInst(it, inst)}; - const IR::U32 width{DownScale(ir, IR::U32{ir.CompositeExtract(new_inst, 0)})}; - const IR::U32 height{DownScale(ir, IR::U32{ir.CompositeExtract(new_inst, 1)})}; + const IR::U32 width{DownScale(ir, is_scaled, IR::U32{ir.CompositeExtract(new_inst, 0)})}; + const IR::U32 height{DownScale(ir, is_scaled, IR::U32{ir.CompositeExtract(new_inst, 1)})}; const IR::Value replacement{ir.CompositeConstruct( width, height, ir.CompositeExtract(new_inst, 2), ir.CompositeExtract(new_inst, 3))}; inst.ReplaceUsesWith(replacement); break; } + case TextureType::Color1D: + case TextureType::ColorArray1D: case TextureType::Color3D: case TextureType::ColorCube: case TextureType::ColorArrayCube: @@ -88,15 +89,6 @@ void ScaleIntegerCoord(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scale const auto info{inst.Flags()}; const IR::Value coord{inst.Arg(1)}; switch (info.type) { - case TextureType::Color1D: - inst.SetArg(1, Scale(ir, is_scaled, IR::U32{coord})); - break; - case TextureType::ColorArray1D: { - const IR::U32 x{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(coord, 0)})}; - const IR::U32 y{ir.CompositeExtract(coord, 1)}; - inst.SetArg(1, ir.CompositeConstruct(x, y)); - break; - } case TextureType::Color2D: { const IR::U32 x{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(coord, 0)})}; const IR::U32 y{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(coord, 1)})}; @@ -110,6 +102,8 @@ void ScaleIntegerCoord(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scale inst.SetArg(1, ir.CompositeConstruct(x, y, z)); break; } + case TextureType::Color1D: + case TextureType::ColorArray1D: case TextureType::Color3D: case TextureType::ColorCube: case TextureType::ColorArrayCube: -- cgit v1.2.3 From 4b1393a691d1d8d79c57e7b73734cb8287b91760 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 7 Aug 2021 02:15:24 +0200 Subject: Texture Cache: Correctly fix Blits Rescaling. --- src/video_core/texture_cache/texture_cache.h | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) (limited to 'src') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index d86f80b5d..2de439889 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -473,18 +473,21 @@ void TextureCache

::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, PrepareImage(dst_id, true, false); Image& dst_image = slot_images[dst_id]; - const Image& src_image = slot_images[src_id]; + Image& src_image = slot_images[src_id]; - const bool is_src_rescaled = True(src_image.flags & ImageFlagBits::Rescaled); + bool is_src_rescaled = True(src_image.flags & ImageFlagBits::Rescaled); bool is_dst_rescaled = True(dst_image.flags & ImageFlagBits::Rescaled); - // TODO: This requires the rendertarget image views to be updated with the upscaled sizes, - // otherwise the blit will use a larger framebuffer size than the image view attachment. - // if (is_src_rescaled && !is_dst_rescaled) { - // if (ImageCanRescale(dst_image)) { - // is_dst_rescaled = dst_image.ScaleUp(); - // } - // } + if (is_src_rescaled != is_dst_rescaled) { + if (ImageCanRescale(dst_image)) { + ScaleUp(dst_image); + is_dst_rescaled = True(dst_image.flags & ImageFlagBits::Rescaled); + } + if (ImageCanRescale(src_image)) { + ScaleUp(src_image); + is_src_rescaled = True(src_image.flags & ImageFlagBits::Rescaled); + } + } const auto& resolution = Settings::values.resolution_info; const auto scale_up = [&](u32 value) -> u32 { -- cgit v1.2.3 From dfa82915262ca26d0884528b7bdae791554332ca Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 7 Aug 2021 02:59:05 +0200 Subject: RescalingPass: Agregate pixels on texelFetch while on Fragment Shader --- src/shader_recompiler/ir_opt/rescaling_pass.cpp | 100 +++++++++++++++++++++++- 1 file changed, 97 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/shader_recompiler/ir_opt/rescaling_pass.cpp b/src/shader_recompiler/ir_opt/rescaling_pass.cpp index b94273aa5..71c9d9e6f 100644 --- a/src/shader_recompiler/ir_opt/rescaling_pass.cpp +++ b/src/shader_recompiler/ir_opt/rescaling_pass.cpp @@ -40,6 +40,22 @@ void PatchFragCoord(IR::Block& block, IR::Inst& inst) { } } +[[nodiscard]] IR::U32 SubScale(IR::IREmitter& ir, const IR::U1& is_scaled, const IR::U32& value, + const IR::Attribute attrib) { + if (Settings::values.resolution_info.active) { + const IR::F32 opt1{ir.Imm32(Settings::values.resolution_info.up_factor)}; + const IR::F32 base{ir.FPMul(ir.ConvertUToF(32, 32, value), opt1)}; + const IR::F32 frag_coord{ir.GetAttribute(attrib)}; + const IR::F32 opt2{ir.Imm32(Settings::values.resolution_info.down_factor)}; + const IR::F32 floor{ir.FPMul(opt1, ir.FPFloor(ir.FPMul(frag_coord, opt2)))}; + const IR::U32 deviation{ + ir.ConvertFToU(32, ir.FPAdd(base, ir.FPAdd(frag_coord, ir.FPNeg(floor))))}; + return IR::U32{ir.Select(is_scaled, deviation, value)}; + } else { + return value; + } +} + [[nodiscard]] IR::U32 DownScale(IR::IREmitter& ir, const IR::U1& is_scaled, IR::U32 value) { IR::U32 scaled_value{value}; bool changed{}; @@ -113,6 +129,74 @@ void ScaleIntegerCoord(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scale } } +void SubScaleImageFetch(IR::Block& block, IR::Inst& inst) { + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + const auto info{inst.Flags()}; + const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))}; + const IR::Value coord{inst.Arg(1)}; + switch (info.type) { + case TextureType::Color2D: { + const IR::U32 x{SubScale(ir, is_scaled, IR::U32{ir.CompositeExtract(coord, 0)}, + IR::Attribute::PositionX)}; + const IR::U32 y{SubScale(ir, is_scaled, IR::U32{ir.CompositeExtract(coord, 1)}, + IR::Attribute::PositionY)}; + inst.SetArg(1, ir.CompositeConstruct(x, y)); + break; + } + case TextureType::ColorArray2D: { + const IR::U32 x{SubScale(ir, is_scaled, IR::U32{ir.CompositeExtract(coord, 0)}, + IR::Attribute::PositionX)}; + const IR::U32 y{SubScale(ir, is_scaled, IR::U32{ir.CompositeExtract(coord, 1)}, + IR::Attribute::PositionY)}; + const IR::U32 z{ir.CompositeExtract(coord, 2)}; + inst.SetArg(1, ir.CompositeConstruct(x, y, z)); + break; + } + case TextureType::Color1D: + case TextureType::ColorArray1D: + case TextureType::Color3D: + case TextureType::ColorCube: + case TextureType::ColorArrayCube: + case TextureType::Buffer: + // Nothing to patch here + break; + } +} + +void SubScaleImageRead(IR::Block& block, IR::Inst& inst) { + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + const auto info{inst.Flags()}; + const IR::U1 is_scaled{ir.IsImageScaled(ir.Imm32(info.descriptor_index))}; + const IR::Value coord{inst.Arg(1)}; + switch (info.type) { + case TextureType::Color2D: { + const IR::U32 x{SubScale(ir, is_scaled, IR::U32{ir.CompositeExtract(coord, 0)}, + IR::Attribute::PositionX)}; + const IR::U32 y{SubScale(ir, is_scaled, IR::U32{ir.CompositeExtract(coord, 1)}, + IR::Attribute::PositionY)}; + inst.SetArg(1, ir.CompositeConstruct(x, y)); + break; + } + case TextureType::ColorArray2D: { + const IR::U32 x{SubScale(ir, is_scaled, IR::U32{ir.CompositeExtract(coord, 0)}, + IR::Attribute::PositionX)}; + const IR::U32 y{SubScale(ir, is_scaled, IR::U32{ir.CompositeExtract(coord, 1)}, + IR::Attribute::PositionY)}; + const IR::U32 z{ir.CompositeExtract(coord, 2)}; + inst.SetArg(1, ir.CompositeConstruct(x, y, z)); + break; + } + case TextureType::Color1D: + case TextureType::ColorArray1D: + case TextureType::Color3D: + case TextureType::ColorCube: + case TextureType::ColorArrayCube: + case TextureType::Buffer: + // Nothing to patch here + break; + } +} + void PatchImageFetch(IR::Block& block, IR::Inst& inst) { IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; const auto info{inst.Flags()}; @@ -145,13 +229,23 @@ void Visit(const IR::Program& program, IR::Block& block, IR::Inst& inst) { break; } case IR::Opcode::ImageQueryDimensions: - PatchImageQueryDimensions(block, inst); + if (program.stage == Stage::Compute) { + PatchImageQueryDimensions(block, inst); + } break; case IR::Opcode::ImageFetch: - PatchImageFetch(block, inst); + if (is_fragment_shader) { + SubScaleImageFetch(block, inst); + } else if (program.stage == Stage::Compute) { + PatchImageFetch(block, inst); + } break; case IR::Opcode::ImageRead: - PatchImageRead(block, inst); + if (is_fragment_shader) { + SubScaleImageRead(block, inst); + } else if (program.stage == Stage::Compute) { + PatchImageRead(block, inst); + } break; default: break; -- cgit v1.2.3 From d7c97921696486a95aaaf5c805b9fcc12230de77 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 7 Aug 2021 04:32:17 +0200 Subject: TextureCache: Fix Buffer Views Scaling. --- src/video_core/texture_cache/image_view_base.cpp | 11 ++++++----- src/video_core/texture_cache/texture_cache.h | 3 +++ 2 files changed, 9 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/video_core/texture_cache/image_view_base.cpp b/src/video_core/texture_cache/image_view_base.cpp index e66dc9320..c7b4fc231 100644 --- a/src/video_core/texture_cache/image_view_base.cpp +++ b/src/video_core/texture_cache/image_view_base.cpp @@ -37,11 +37,12 @@ ImageViewBase::ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_i } ImageViewBase::ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_info) - : format{info.format}, type{ImageViewType::Buffer}, size{ - .width = info.size.width, - .height = 1, - .depth = 1, - } { + : image_id{NULL_IMAGE_ID}, format{info.format}, type{ImageViewType::Buffer}, + size{ + .width = info.size.width, + .height = 1, + .depth = 1, + } { ASSERT_MSG(view_info.type == ImageViewType::Buffer, "Expected texture buffer"); } diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 2de439889..764984546 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -631,6 +631,9 @@ bool TextureCache

::IsRescaling() const noexcept { template bool TextureCache

::IsRescaling(const ImageViewBase& image_view) const noexcept { + if (image_view.type == ImageViewType::Buffer) { + return false; + } const ImageBase& image = slot_images[image_view.image_id]; return True(image.flags & ImageFlagBits::Rescaled); } -- cgit v1.2.3 From 65781f88f80a322b08241dc7dbceceed83434e30 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 4 Aug 2021 00:30:16 -0400 Subject: emit_spirv: Fix RescalingLayout alignment --- src/shader_recompiler/backend/spirv/emit_spirv.h | 1 + src/video_core/renderer_vulkan/pipeline_helper.h | 6 ++++-- src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 5 +++-- 3 files changed, 8 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index dd6dff0c8..cf59f2572 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -26,6 +26,7 @@ struct RescalingLayout { alignas(16) std::array rescaling_textures; alignas(16) std::array rescaling_images; }; +constexpr u32 RESCALING_PUSH_CONSTANT_WORDS_OFFSET = offsetof(RescalingLayout, rescaling_textures); [[nodiscard]] std::vector EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_info, IR::Program& program, Bindings& bindings); diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index 85ae726d1..3612e8a18 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -22,6 +22,7 @@ namespace Vulkan { using Shader::Backend::SPIRV::NUM_TEXTURE_AND_IMAGE_SCALING_WORDS; +using Shader::Backend::SPIRV::RESCALING_PUSH_CONSTANT_WORDS_OFFSET; class DescriptorLayoutBuilder { public: @@ -71,12 +72,13 @@ public: } vk::PipelineLayout CreatePipelineLayout(VkDescriptorSetLayout descriptor_set_layout) const { + using Shader::Backend::SPIRV::RescalingLayout; + const u32 push_offset = is_compute ? RESCALING_PUSH_CONSTANT_WORDS_OFFSET : 0; const VkPushConstantRange range{ .stageFlags = static_cast( is_compute ? VK_SHADER_STAGE_COMPUTE_BIT : VK_SHADER_STAGE_ALL_GRAPHICS), .offset = 0, - .size = (is_compute ? 0 : sizeof(f32)) + - sizeof(std::array), + .size = sizeof(RescalingLayout) - push_offset, }; return device->GetLogical().CreatePipelineLayout({ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 5ad1180bb..f08e9e840 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -483,8 +483,9 @@ void GraphicsPipeline::ConfigureDraw(const RescalingPushConstant& rescaling) { cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_ALL_GRAPHICS, 0, sizeof(scale_down_factor), &scale_down_factor); } - cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_ALL_GRAPHICS, sizeof(f32), - sizeof(rescaling_data), rescaling_data.data()); + cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_ALL_GRAPHICS, + RESCALING_PUSH_CONSTANT_WORDS_OFFSET, sizeof(rescaling_data), + rescaling_data.data()); if (!descriptor_set_layout) { return; } -- cgit v1.2.3 From 68e038404cc0069d9f59068a60b56e67b4321e7a Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 14 Aug 2021 00:01:47 -0400 Subject: shader, video_core: Fix GCC build errors --- src/shader_recompiler/backend/spirv/emit_spirv_image.cpp | 4 ---- src/video_core/renderer_opengl/gl_compute_pipeline.cpp | 3 --- src/video_core/texture_cache/texture_cache_base.h | 10 +++------- 3 files changed, 3 insertions(+), 14 deletions(-) (limited to 'src') diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 519ce8b9b..4d168a96d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -243,10 +243,6 @@ Id IsScaled(EmitContext& ctx, const IR::Value& index, Id member_index, u32 base_ if (base_index != 0) { index_value = ctx.OpIAdd(ctx.U32[1], index_value, ctx.Const(base_index)); } - const Id word_index{ctx.OpShiftRightArithmetic(ctx.U32[1], index_value, ctx.Const(5u))}; - const Id pointer{ctx.OpAccessChain(push_constant_u32, ctx.rescaling_push_constants, - member_index, word_index)}; - const Id word{ctx.OpLoad(ctx.U32[1], pointer)}; const Id bit_index{ctx.OpBitwiseAnd(ctx.U32[1], index_value, ctx.Const(31u))}; bit = ctx.OpBitFieldUExtract(ctx.U32[1], index_value, bit_index, ctx.Const(1u)); } diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp index 9af61c340..5c1f21c65 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp @@ -143,9 +143,6 @@ void ComputePipeline::Configure() { } texture_cache.FillComputeImageViews(std::span(views.data(), views.size())); - const bool is_rescaling{texture_cache.IsRescaling()}; - const f32 config_down_factor{Settings::values.resolution_info.down_factor}; - const f32 down_factor{is_rescaling ? config_down_factor : 1.0f}; if (assembly_program.handle != 0) { program_manager.BindComputeAssemblyProgram(assembly_program.handle); } else { diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 8b417b611..517a4c224 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -41,13 +41,9 @@ using VideoCore::Surface::PixelFormatFromRenderTargetFormat; using namespace Common::Literals; struct ImageViewInOut { - u32 index; - bool blacklist; - union { - struct Empty { - } empty{}; - ImageViewId id; - }; + u32 index{}; + bool blacklist{}; + ImageViewId id{}; }; template -- cgit v1.2.3 From c5bbbf3902d97fc89775d6db5e5bebd3929ca24b Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 27 Aug 2021 23:24:05 +0200 Subject: Texture Cache: fix scaling on upload and stop scaling on base resolution. --- .../renderer_vulkan/vk_texture_cache.cpp | 46 +++++++++++++++------- 1 file changed, 32 insertions(+), 14 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 3400066a6..9afe49387 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -1053,6 +1053,10 @@ Image::~Image() = default; void Image::UploadMemory(const StagingBufferRef& map, std::span copies) { // TODO: Move this to another API + const bool is_rescaled = True(flags & ImageFlagBits::Rescaled); + if (is_rescaled) { + ScaleDown(true); + } scheduler->RequestOutsideRenderPassOperationContext(); std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask); const VkBuffer src_buffer = map.buffer; @@ -1063,6 +1067,9 @@ void Image::UploadMemory(const StagingBufferRef& map, std::span copies) { @@ -1133,17 +1140,23 @@ bool Image::ScaleUp(bool save_as_backup) { } ASSERT(info.type != ImageType::Linear); scaling_count++; - ASSERT(scaling_count < 10); flags |= ImageFlagBits::Rescaled; - /*if (!runtime->is_rescaling_on) { - return; - }*/ + if (!runtime->is_rescaling_on) { + return true; + } + const auto& resolution = runtime->resolution; vk::Image rescaled_image = - MakeImage(runtime->device, info, resolution.up_scale, resolution.down_shift); - MemoryCommit new_commit( - runtime->memory_allocator.Commit(rescaled_image, MemoryUsage::DeviceLocal)); + has_backup ? std::move(backup_image) + : MakeImage(runtime->device, info, resolution.up_scale, resolution.down_shift); + MemoryCommit new_commit = has_backup ? std::move(backup_commit) + : MemoryCommit(runtime->memory_allocator.Commit( + rescaled_image, MemoryUsage::DeviceLocal)); + has_backup = false; + if (aspect_mask == 0) { + aspect_mask = ImageAspectMask(info.format); + } SCOPE_EXIT({ if (save_as_backup) { backup_image = std::move(image); @@ -1175,6 +1188,9 @@ bool Image::ScaleUp(bool save_as_backup) { } void Image::SwapBackup() { + if (!runtime->is_rescaling_on) { + return; + } ASSERT(has_backup); runtime->prescaled_images.Push(std::move(image)); runtime->prescaled_commits.Push(std::move(commit)); @@ -1190,16 +1206,18 @@ bool Image::ScaleDown(bool save_as_backup) { ASSERT(info.type != ImageType::Linear); flags &= ~ImageFlagBits::Rescaled; scaling_count++; - ASSERT(scaling_count < 10); - /*if (!runtime->is_rescaling_on) { - return false; - }*/ + if (!runtime->is_rescaling_on) { + return true; + } const auto& resolution = runtime->resolution; - vk::Image downscaled_image = MakeImage(runtime->device, info); - MemoryCommit new_commit( - runtime->memory_allocator.Commit(downscaled_image, MemoryUsage::DeviceLocal)); + vk::Image downscaled_image = + has_backup ? std::move(backup_image) : MakeImage(runtime->device, info); + MemoryCommit new_commit = has_backup ? std::move(backup_commit) + : MemoryCommit(runtime->memory_allocator.Commit( + downscaled_image, MemoryUsage::DeviceLocal)); + has_backup = false; if (aspect_mask == 0) { aspect_mask = ImageAspectMask(info.format); } -- cgit v1.2.3 From a6b88e85bfb14c45345f6443b54d15a61e3975d5 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 17 Aug 2021 00:12:52 +0200 Subject: Renderer: Implement Bicubic and ScaleForce filters. --- src/common/settings.cpp | 10 +- src/common/settings.h | 17 ++- src/video_core/host_shaders/CMakeLists.txt | 4 + .../host_shaders/opengl_present_bicubic.frag | 56 +++++++++ .../host_shaders/opengl_present_scaleforce.frag | 135 ++++++++++++++++++++ .../host_shaders/vulkan_present_bicubic.frag | 56 +++++++++ .../host_shaders/vulkan_present_scaleforce.frag | 137 +++++++++++++++++++++ src/video_core/renderer_opengl/renderer_opengl.cpp | 27 +++- src/video_core/renderer_opengl/renderer_opengl.h | 4 +- src/video_core/renderer_vulkan/vk_blit_screen.cpp | 123 ++++++++++++++++-- src/video_core/renderer_vulkan/vk_blit_screen.h | 8 +- src/yuzu/configuration/config.cpp | 5 + src/yuzu/configuration/config.h | 1 + src/yuzu/configuration/configure_graphics.cpp | 25 ++++ src/yuzu/configuration/configure_graphics.ui | 46 ++++++- 15 files changed, 620 insertions(+), 34 deletions(-) create mode 100644 src/video_core/host_shaders/opengl_present_bicubic.frag create mode 100644 src/video_core/host_shaders/opengl_present_scaleforce.frag create mode 100644 src/video_core/host_shaders/vulkan_present_bicubic.frag create mode 100644 src/video_core/host_shaders/vulkan_present_scaleforce.frag (limited to 'src') diff --git a/src/common/settings.cpp b/src/common/settings.cpp index 4b7fa4b82..f0686a7c5 100644 --- a/src/common/settings.cpp +++ b/src/common/settings.cpp @@ -48,6 +48,7 @@ void LogSettings() { log_setting("Core_UseMultiCore", values.use_multi_core.GetValue()); log_setting("CPU_Accuracy", values.cpu_accuracy.GetValue()); log_setting("Renderer_UseResolutionScaling", values.resolution_setup.GetValue()); + log_setting("Renderer_ScalingFilter", values.scaling_filter.GetValue()); log_setting("Renderer_UseSpeedLimit", values.use_speed_limit.GetValue()); log_setting("Renderer_SpeedLimit", values.speed_limit.GetValue()); log_setting("Renderer_UseDiskShaderCache", values.use_disk_shader_cache.GetValue()); @@ -113,19 +114,10 @@ void UpdateRescalingInfo() { info.up_scale = 1; info.down_shift = 1; break; - case ResolutionSetup::Res3_4X: - info.up_scale = 3; - info.down_shift = 2; - break; case ResolutionSetup::Res1X: info.up_scale = 1; info.down_shift = 0; break; - case ResolutionSetup::Res3_2X: { - info.up_scale = 3; - info.down_shift = 1; - break; - } case ResolutionSetup::Res2X: info.up_scale = 2; info.down_shift = 0; diff --git a/src/common/settings.h b/src/common/settings.h index ca88c086b..f629c7c56 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -54,12 +54,16 @@ enum class NvdecEmulation : u32 { enum class ResolutionSetup : u32 { Res1_2X = 0, - Res3_4X = 1, - Res1X = 2, - Res3_2X = 3, - Res2X = 4, - Res3X = 5, - Res4X = 6, + Res1X = 1, + Res2X = 2, + Res3X = 3, + Res4X = 4, +}; + +enum class ScalingFilter : u32 { + Bilinear = 0, + Bicubic = 1, + ScaleForce = 2, }; struct ResolutionScalingInfo { @@ -471,6 +475,7 @@ struct Values { ResolutionScalingInfo resolution_info{}; Setting resolution_setup{ResolutionSetup::Res1X, "resolution_setup"}; + Setting scaling_filter{ScalingFilter::Bilinear, "scaling_filter"}; // *nix platforms may have issues with the borderless windowed fullscreen mode. // Default to exclusive fullscreen on these platforms for now. RangedSetting fullscreen_mode{ diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index 20d748c12..835b37944 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -6,11 +6,15 @@ set(SHADER_FILES convert_float_to_depth.frag full_screen_triangle.vert opengl_copy_bc4.comp + opengl_present_scaleforce.frag + opengl_present_bicubic.frag opengl_present.frag opengl_present.vert pitch_unswizzle.comp vulkan_blit_color_float.frag vulkan_blit_depth_stencil.frag + vulkan_present_bicubic.frag + vulkan_present_scaleforce.frag vulkan_present.frag vulkan_present.vert vulkan_quad_indexed.comp diff --git a/src/video_core/host_shaders/opengl_present_bicubic.frag b/src/video_core/host_shaders/opengl_present_bicubic.frag new file mode 100644 index 000000000..17772095a --- /dev/null +++ b/src/video_core/host_shaders/opengl_present_bicubic.frag @@ -0,0 +1,56 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 460 core + +layout (location = 0) in vec2 frag_tex_coord; + +layout (location = 0) out vec4 color; + +layout (binding = 1) uniform sampler2D color_texture; + +vec4 cubic(float v) { + vec4 n = vec4(1.0, 2.0, 3.0, 4.0) - v; + vec4 s = n * n * n; + float x = s.x; + float y = s.y - 4.0 * s.x; + float z = s.z - 4.0 * s.y + 6.0 * s.x; + float w = 6.0 - x - y - z; + return vec4(x, y, z, w) * (1.0 / 6.0); +} + +vec4 textureBicubic( sampler2D textureSampler, vec2 texCoords ) { + + vec2 texSize = textureSize(textureSampler, 0); + vec2 invTexSize = 1.0 / texSize; + + texCoords = texCoords * texSize - 0.5; + + vec2 fxy = fract(texCoords); + texCoords -= fxy; + + vec4 xcubic = cubic(fxy.x); + vec4 ycubic = cubic(fxy.y); + + vec4 c = texCoords.xxyy + vec2(-0.5, +1.5).xyxy; + + vec4 s = vec4(xcubic.xz + xcubic.yw, ycubic.xz + ycubic.yw); + vec4 offset = c + vec4(xcubic.yw, ycubic.yw) / s; + + offset *= invTexSize.xxyy; + + vec4 sample0 = texture(textureSampler, offset.xz); + vec4 sample1 = texture(textureSampler, offset.yz); + vec4 sample2 = texture(textureSampler, offset.xw); + vec4 sample3 = texture(textureSampler, offset.yw); + + float sx = s.x / (s.x + s.y); + float sy = s.z / (s.z + s.w); + + return mix(mix(sample3, sample2, sx), mix(sample1, sample0, sx), sy); +} + +void main() { + color = vec4(textureBicubic(color_texture, frag_tex_coord).rgb, 1.0f); +} diff --git a/src/video_core/host_shaders/opengl_present_scaleforce.frag b/src/video_core/host_shaders/opengl_present_scaleforce.frag new file mode 100644 index 000000000..0153f62c0 --- /dev/null +++ b/src/video_core/host_shaders/opengl_present_scaleforce.frag @@ -0,0 +1,135 @@ +// from https://github.com/BreadFish64/ScaleFish/tree/master/scale_force + +// MIT License +// +// Copyright (c) 2020 BreadFish64 +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +precision mediump float; + +layout (location = 0) in vec2 tex_coord; + +layout (location = 0) out vec4 frag_color; + +layout (binding = 1) uniform sampler2D input_texture; + +vec2 tex_size; +vec2 inv_tex_size; + +vec4 cubic(float v) { + vec3 n = vec3(1.0, 2.0, 3.0) - v; + vec3 s = n * n * n; + float x = s.x; + float y = s.y - 4.0 * s.x; + float z = s.z - 4.0 * s.y + 6.0 * s.x; + float w = 6.0 - x - y - z; + return vec4(x, y, z, w) / 6.0; +} + +// Bicubic interpolation +vec4 textureBicubic(vec2 tex_coords) { + tex_coords = tex_coords * tex_size - 0.5; + + vec2 fxy = modf(tex_coords, tex_coords); + + vec4 xcubic = cubic(fxy.x); + vec4 ycubic = cubic(fxy.y); + + vec4 c = tex_coords.xxyy + vec2(-0.5, +1.5).xyxy; + + vec4 s = vec4(xcubic.xz + xcubic.yw, ycubic.xz + ycubic.yw); + vec4 offset = c + vec4(xcubic.yw, ycubic.yw) / s; + + offset *= inv_tex_size.xxyy; + + vec4 sample0 = textureLod(input_texture, offset.xz, 0.0); + vec4 sample1 = textureLod(input_texture, offset.yz, 0.0); + vec4 sample2 = textureLod(input_texture, offset.xw, 0.0); + vec4 sample3 = textureLod(input_texture, offset.yw, 0.0); + + float sx = s.x / (s.x + s.y); + float sy = s.z / (s.z + s.w); + + return mix(mix(sample3, sample2, sx), mix(sample1, sample0, sx), sy); +} + +mat4x3 center_matrix; +vec4 center_alpha; + +// Finds the distance between four colors and cc in YCbCr space +vec4 ColorDist(vec4 A, vec4 B, vec4 C, vec4 D) { + // https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.2020_conversion + const vec3 K = vec3(0.2627, 0.6780, 0.0593); + const float LUMINANCE_WEIGHT = .6; + const mat3 YCBCR_MATRIX = + mat3(K * LUMINANCE_WEIGHT, -.5 * K.r / (1.0 - K.b), -.5 * K.g / (1.0 - K.b), .5, .5, + -.5 * K.g / (1.0 - K.r), -.5 * K.b / (1.0 - K.r)); + + mat4x3 colors = mat4x3(A.rgb, B.rgb, C.rgb, D.rgb) - center_matrix; + mat4x3 YCbCr = YCBCR_MATRIX * colors; + vec4 color_dist = vec3(1.0) * YCbCr; + color_dist *= color_dist; + vec4 alpha = vec4(A.a, B.a, C.a, D.a); + + return sqrt((color_dist + abs(center_alpha - alpha)) * alpha * center_alpha); +} + +void main() { + vec4 bl = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(-1, -1)); + vec4 bc = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(0, -1)); + vec4 br = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(1, -1)); + vec4 cl = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(-1, 0)); + vec4 cc = textureLod(input_texture, tex_coord, 0.0); + vec4 cr = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(1, 0)); + vec4 tl = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(-1, 1)); + vec4 tc = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(0, 1)); + vec4 tr = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(1, 1)); + + + tex_size = vec2(textureSize(input_texture, 0)); + inv_tex_size = 1.0 / tex_size; + center_matrix = mat4x3(cc.rgb, cc.rgb, cc.rgb, cc.rgb); + center_alpha = cc.aaaa; + + vec4 offset_tl = ColorDist(tl, tc, tr, cr); + vec4 offset_br = ColorDist(br, bc, bl, cl); + + // Calculate how different cc is from the texels around it + float total_dist = dot(offset_tl + offset_br, vec4(1.0)); + + // Add together all the distances with direction taken into account + vec4 tmp = offset_tl - offset_br; + vec2 total_offset = tmp.wy + tmp.zz + vec2(-tmp.x, tmp.x); + + if (total_dist == 0.0) { + // Doing bicubic filtering just past the edges where the offset is 0 causes black floaters + // and it doesn't really matter which filter is used when the colors aren't changing. + frag_color = vec4(cc.rgb, 1.0f); + } else { + // When the image has thin points, they tend to split apart. + // This is because the texels all around are different + // and total_offset reaches into clear areas. + // This works pretty well to keep the offset in bounds for these cases. + float clamp_val = length(total_offset) / total_dist; + vec2 final_offset = clamp(total_offset, -clamp_val, clamp_val) * inv_tex_size; + + frag_color = vec4(textureBicubic(tex_coord - final_offset).rgb, 1.0f); + } +} diff --git a/src/video_core/host_shaders/vulkan_present_bicubic.frag b/src/video_core/host_shaders/vulkan_present_bicubic.frag new file mode 100644 index 000000000..17772095a --- /dev/null +++ b/src/video_core/host_shaders/vulkan_present_bicubic.frag @@ -0,0 +1,56 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 460 core + +layout (location = 0) in vec2 frag_tex_coord; + +layout (location = 0) out vec4 color; + +layout (binding = 1) uniform sampler2D color_texture; + +vec4 cubic(float v) { + vec4 n = vec4(1.0, 2.0, 3.0, 4.0) - v; + vec4 s = n * n * n; + float x = s.x; + float y = s.y - 4.0 * s.x; + float z = s.z - 4.0 * s.y + 6.0 * s.x; + float w = 6.0 - x - y - z; + return vec4(x, y, z, w) * (1.0 / 6.0); +} + +vec4 textureBicubic( sampler2D textureSampler, vec2 texCoords ) { + + vec2 texSize = textureSize(textureSampler, 0); + vec2 invTexSize = 1.0 / texSize; + + texCoords = texCoords * texSize - 0.5; + + vec2 fxy = fract(texCoords); + texCoords -= fxy; + + vec4 xcubic = cubic(fxy.x); + vec4 ycubic = cubic(fxy.y); + + vec4 c = texCoords.xxyy + vec2(-0.5, +1.5).xyxy; + + vec4 s = vec4(xcubic.xz + xcubic.yw, ycubic.xz + ycubic.yw); + vec4 offset = c + vec4(xcubic.yw, ycubic.yw) / s; + + offset *= invTexSize.xxyy; + + vec4 sample0 = texture(textureSampler, offset.xz); + vec4 sample1 = texture(textureSampler, offset.yz); + vec4 sample2 = texture(textureSampler, offset.xw); + vec4 sample3 = texture(textureSampler, offset.yw); + + float sx = s.x / (s.x + s.y); + float sy = s.z / (s.z + s.w); + + return mix(mix(sample3, sample2, sx), mix(sample1, sample0, sx), sy); +} + +void main() { + color = vec4(textureBicubic(color_texture, frag_tex_coord).rgb, 1.0f); +} diff --git a/src/video_core/host_shaders/vulkan_present_scaleforce.frag b/src/video_core/host_shaders/vulkan_present_scaleforce.frag new file mode 100644 index 000000000..801c8eae9 --- /dev/null +++ b/src/video_core/host_shaders/vulkan_present_scaleforce.frag @@ -0,0 +1,137 @@ +#version 320 es + +// from https://github.com/BreadFish64/ScaleFish/tree/master/scale_force + +// MIT License +// +// Copyright (c) 2020 BreadFish64 +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +precision mediump float; + +layout (location = 0) in vec2 tex_coord; + +layout (location = 0) out vec4 frag_color; + +layout (binding = 1) uniform sampler2D input_texture; + +vec2 tex_size; +vec2 inv_tex_size; + +vec4 cubic(float v) { + vec3 n = vec3(1.0, 2.0, 3.0) - v; + vec3 s = n * n * n; + float x = s.x; + float y = s.y - 4.0 * s.x; + float z = s.z - 4.0 * s.y + 6.0 * s.x; + float w = 6.0 - x - y - z; + return vec4(x, y, z, w) / 6.0; +} + +// Bicubic interpolation +vec4 textureBicubic(vec2 tex_coords) { + tex_coords = tex_coords * tex_size - 0.5; + + vec2 fxy = modf(tex_coords, tex_coords); + + vec4 xcubic = cubic(fxy.x); + vec4 ycubic = cubic(fxy.y); + + vec4 c = tex_coords.xxyy + vec2(-0.5, +1.5).xyxy; + + vec4 s = vec4(xcubic.xz + xcubic.yw, ycubic.xz + ycubic.yw); + vec4 offset = c + vec4(xcubic.yw, ycubic.yw) / s; + + offset *= inv_tex_size.xxyy; + + vec4 sample0 = textureLod(input_texture, offset.xz, 0.0); + vec4 sample1 = textureLod(input_texture, offset.yz, 0.0); + vec4 sample2 = textureLod(input_texture, offset.xw, 0.0); + vec4 sample3 = textureLod(input_texture, offset.yw, 0.0); + + float sx = s.x / (s.x + s.y); + float sy = s.z / (s.z + s.w); + + return mix(mix(sample3, sample2, sx), mix(sample1, sample0, sx), sy); +} + +mat4x3 center_matrix; +vec4 center_alpha; + +// Finds the distance between four colors and cc in YCbCr space +vec4 ColorDist(vec4 A, vec4 B, vec4 C, vec4 D) { + // https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.2020_conversion + const vec3 K = vec3(0.2627, 0.6780, 0.0593); + const float LUMINANCE_WEIGHT = .6; + const mat3 YCBCR_MATRIX = + mat3(K * LUMINANCE_WEIGHT, -.5 * K.r / (1.0 - K.b), -.5 * K.g / (1.0 - K.b), .5, .5, + -.5 * K.g / (1.0 - K.r), -.5 * K.b / (1.0 - K.r)); + + mat4x3 colors = mat4x3(A.rgb, B.rgb, C.rgb, D.rgb) - center_matrix; + mat4x3 YCbCr = YCBCR_MATRIX * colors; + vec4 color_dist = vec3(1.0) * YCbCr; + color_dist *= color_dist; + vec4 alpha = vec4(A.a, B.a, C.a, D.a); + + return sqrt((color_dist + abs(center_alpha - alpha)) * alpha * center_alpha); +} + +void main() { + vec4 bl = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(-1, -1)); + vec4 bc = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(0, -1)); + vec4 br = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(1, -1)); + vec4 cl = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(-1, 0)); + vec4 cc = textureLod(input_texture, tex_coord, 0.0); + vec4 cr = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(1, 0)); + vec4 tl = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(-1, 1)); + vec4 tc = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(0, 1)); + vec4 tr = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(1, 1)); + + + tex_size = vec2(textureSize(input_texture, 0)); + inv_tex_size = 1.0 / tex_size; + center_matrix = mat4x3(cc.rgb, cc.rgb, cc.rgb, cc.rgb); + center_alpha = cc.aaaa; + + vec4 offset_tl = ColorDist(tl, tc, tr, cr); + vec4 offset_br = ColorDist(br, bc, bl, cl); + + // Calculate how different cc is from the texels around it + float total_dist = dot(offset_tl + offset_br, vec4(1.0)); + + // Add together all the distances with direction taken into account + vec4 tmp = offset_tl - offset_br; + vec2 total_offset = tmp.wy + tmp.zz + vec2(-tmp.x, tmp.x); + + if (total_dist == 0.0) { + // Doing bicubic filtering just past the edges where the offset is 0 causes black floaters + // and it doesn't really matter which filter is used when the colors aren't changing. + frag_color = vec4(cc.rgb, 1.0f); + } else { + // When the image has thin points, they tend to split apart. + // This is because the texels all around are different + // and total_offset reaches into clear areas. + // This works pretty well to keep the offset in bounds for these cases. + float clamp_val = length(total_offset) / total_dist; + vec2 final_offset = clamp(total_offset, -clamp_val, clamp_val) * inv_tex_size; + + frag_color = vec4(textureBicubic(tex_coord - final_offset).rgb, 1.0f); + } +} diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 0f7b69c6d..71a5e3adf 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -21,7 +21,9 @@ #include "core/memory.h" #include "core/perf_stats.h" #include "core/telemetry_session.h" +#include "video_core/host_shaders/opengl_present_bicubic_frag.h" #include "video_core/host_shaders/opengl_present_frag.h" +#include "video_core/host_shaders/opengl_present_scaleforce_frag.h" #include "video_core/host_shaders/opengl_present_vert.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_manager.h" @@ -252,7 +254,11 @@ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color void RendererOpenGL::InitOpenGLObjects() { // Create shader programs present_vertex = CreateProgram(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER); - present_fragment = CreateProgram(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER); + present_bilinear_fragment = CreateProgram(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER); + present_bicubic_fragment = + CreateProgram(HostShaders::OPENGL_PRESENT_BICUBIC_FRAG, GL_FRAGMENT_SHADER); + present_scaleforce_fragment = + CreateProgram(HostShaders::OPENGL_PRESENT_SCALEFORCE_FRAG, GL_FRAGMENT_SHADER); // Generate presentation sampler present_sampler.Create(); @@ -337,7 +343,24 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { // Set projection matrix const std::array ortho_matrix = MakeOrthographicMatrix(static_cast(layout.width), static_cast(layout.height)); - program_manager.BindPresentPrograms(present_vertex.handle, present_fragment.handle); + + GLuint fragment_handle; + const auto filter = Settings::values.scaling_filter.GetValue(); + switch (filter) { + case Settings::ScalingFilter::Bilinear: + fragment_handle = present_bilinear_fragment.handle; + break; + case Settings::ScalingFilter::Bicubic: + fragment_handle = present_bicubic_fragment.handle; + break; + case Settings::ScalingFilter::ScaleForce: + fragment_handle = present_scaleforce_fragment.handle; + break; + default: + fragment_handle = present_bilinear_fragment.handle; + break; + } + program_manager.BindPresentPrograms(present_vertex.handle, fragment_handle); glProgramUniformMatrix3x2fv(present_vertex.handle, ModelViewMatrixLocation, 1, GL_FALSE, ortho_matrix.data()); diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index d455f572f..bf3d3502c 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -111,7 +111,9 @@ private: OGLSampler present_sampler; OGLBuffer vertex_buffer; OGLProgram present_vertex; - OGLProgram present_fragment; + OGLProgram present_bilinear_fragment; + OGLProgram present_bicubic_fragment; + OGLProgram present_scaleforce_fragment; OGLFramebuffer screenshot_framebuffer; // GPU address of the vertex buffer diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index 7051e6559..19d91ecfc 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -12,11 +12,14 @@ #include "common/assert.h" #include "common/common_types.h" #include "common/math_util.h" +#include "common/settings.h" #include "core/core.h" #include "core/frontend/emu_window.h" #include "core/memory.h" #include "video_core/gpu.h" +#include "video_core/host_shaders/vulkan_present_bicubic_frag_spv.h" #include "video_core/host_shaders/vulkan_present_frag_spv.h" +#include "video_core/host_shaders/vulkan_present_scaleforce_frag_spv.h" #include "video_core/host_shaders/vulkan_present_vert_spv.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" #include "video_core/renderer_vulkan/vk_blit_screen.h" @@ -258,8 +261,22 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, .offset = {0, 0}, .extent = size, }; + const auto filter = Settings::values.scaling_filter.GetValue(); cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); + switch (filter) { + case Settings::ScalingFilter::Bilinear: + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *bilinear_pipeline); + break; + case Settings::ScalingFilter::Bicubic: + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *bicubic_pipeline); + break; + case Settings::ScalingFilter::ScaleForce: + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *scaleforce_pipeline); + break; + default: + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *bilinear_pipeline); + break; + } cmdbuf.SetViewport(0, viewport); cmdbuf.SetScissor(0, scissor); @@ -324,7 +341,9 @@ void VKBlitScreen::RefreshResources(const Tegra::FramebufferConfig& framebuffer) void VKBlitScreen::CreateShaders() { vertex_shader = BuildShader(device, VULKAN_PRESENT_VERT_SPV); - fragment_shader = BuildShader(device, VULKAN_PRESENT_FRAG_SPV); + bilinear_fragment_shader = BuildShader(device, VULKAN_PRESENT_FRAG_SPV); + bicubic_fragment_shader = BuildShader(device, VULKAN_PRESENT_BICUBIC_FRAG_SPV); + scaleforce_fragment_shader = BuildShader(device, VULKAN_PRESENT_SCALEFORCE_FRAG_SPV); } void VKBlitScreen::CreateSemaphores() { @@ -468,7 +487,7 @@ void VKBlitScreen::CreatePipelineLayout() { } void VKBlitScreen::CreateGraphicsPipeline() { - const std::array shader_stages{{ + const std::array bilinear_shader_stages{{ { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, .pNext = nullptr, @@ -483,7 +502,49 @@ void VKBlitScreen::CreateGraphicsPipeline() { .pNext = nullptr, .flags = 0, .stage = VK_SHADER_STAGE_FRAGMENT_BIT, - .module = *fragment_shader, + .module = *bilinear_fragment_shader, + .pName = "main", + .pSpecializationInfo = nullptr, + }, + }}; + + const std::array bicubic_shader_stages{{ + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_VERTEX_BIT, + .module = *vertex_shader, + .pName = "main", + .pSpecializationInfo = nullptr, + }, + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_FRAGMENT_BIT, + .module = *bicubic_fragment_shader, + .pName = "main", + .pSpecializationInfo = nullptr, + }, + }}; + + const std::array scaleforce_shader_stages{{ + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_VERTEX_BIT, + .module = *vertex_shader, + .pName = "main", + .pSpecializationInfo = nullptr, + }, + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_FRAGMENT_BIT, + .module = *scaleforce_fragment_shader, .pName = "main", .pSpecializationInfo = nullptr, }, @@ -583,12 +644,56 @@ void VKBlitScreen::CreateGraphicsPipeline() { .pDynamicStates = dynamic_states.data(), }; - const VkGraphicsPipelineCreateInfo pipeline_ci{ + const VkGraphicsPipelineCreateInfo bilinear_pipeline_ci{ + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stageCount = static_cast(bilinear_shader_stages.size()), + .pStages = bilinear_shader_stages.data(), + .pVertexInputState = &vertex_input_ci, + .pInputAssemblyState = &input_assembly_ci, + .pTessellationState = nullptr, + .pViewportState = &viewport_state_ci, + .pRasterizationState = &rasterization_ci, + .pMultisampleState = &multisampling_ci, + .pDepthStencilState = nullptr, + .pColorBlendState = &color_blend_ci, + .pDynamicState = &dynamic_state_ci, + .layout = *pipeline_layout, + .renderPass = *renderpass, + .subpass = 0, + .basePipelineHandle = 0, + .basePipelineIndex = 0, + }; + + const VkGraphicsPipelineCreateInfo bicubic_pipeline_ci{ + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stageCount = static_cast(bicubic_shader_stages.size()), + .pStages = bicubic_shader_stages.data(), + .pVertexInputState = &vertex_input_ci, + .pInputAssemblyState = &input_assembly_ci, + .pTessellationState = nullptr, + .pViewportState = &viewport_state_ci, + .pRasterizationState = &rasterization_ci, + .pMultisampleState = &multisampling_ci, + .pDepthStencilState = nullptr, + .pColorBlendState = &color_blend_ci, + .pDynamicState = &dynamic_state_ci, + .layout = *pipeline_layout, + .renderPass = *renderpass, + .subpass = 0, + .basePipelineHandle = 0, + .basePipelineIndex = 0, + }; + + const VkGraphicsPipelineCreateInfo scaleforce_pipeline_ci{ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, .pNext = nullptr, .flags = 0, - .stageCount = static_cast(shader_stages.size()), - .pStages = shader_stages.data(), + .stageCount = static_cast(scaleforce_shader_stages.size()), + .pStages = scaleforce_shader_stages.data(), .pVertexInputState = &vertex_input_ci, .pInputAssemblyState = &input_assembly_ci, .pTessellationState = nullptr, @@ -605,7 +710,9 @@ void VKBlitScreen::CreateGraphicsPipeline() { .basePipelineIndex = 0, }; - pipeline = device.GetLogical().CreateGraphicsPipeline(pipeline_ci); + bilinear_pipeline = device.GetLogical().CreateGraphicsPipeline(bilinear_pipeline_ci); + bicubic_pipeline = device.GetLogical().CreateGraphicsPipeline(bicubic_pipeline_ci); + scaleforce_pipeline = device.GetLogical().CreateGraphicsPipeline(scaleforce_pipeline_ci); } void VKBlitScreen::CreateSampler() { diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h index 430bcfbca..d3a16f0ba 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.h +++ b/src/video_core/renderer_vulkan/vk_blit_screen.h @@ -107,11 +107,15 @@ private: const VKScreenInfo& screen_info; vk::ShaderModule vertex_shader; - vk::ShaderModule fragment_shader; + vk::ShaderModule bilinear_fragment_shader; + vk::ShaderModule bicubic_fragment_shader; + vk::ShaderModule scaleforce_fragment_shader; vk::DescriptorPool descriptor_pool; vk::DescriptorSetLayout descriptor_set_layout; vk::PipelineLayout pipeline_layout; - vk::Pipeline pipeline; + vk::Pipeline bilinear_pipeline; + vk::Pipeline bicubic_pipeline; + vk::Pipeline scaleforce_pipeline; vk::RenderPass renderpass; std::vector framebuffers; vk::DescriptorSets descriptor_sets; diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index 7ed833203..3803bf501 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -825,6 +825,7 @@ void Config::ReadRendererValues() { ReadGlobalSetting(Settings::values.fullscreen_mode); ReadGlobalSetting(Settings::values.aspect_ratio); ReadGlobalSetting(Settings::values.resolution_setup); + ReadGlobalSetting(Settings::values.scaling_filter); ReadGlobalSetting(Settings::values.max_anisotropy); ReadGlobalSetting(Settings::values.use_speed_limit); ReadGlobalSetting(Settings::values.speed_limit); @@ -1371,6 +1372,10 @@ void Config::SaveRendererValues() { static_cast(Settings::values.resolution_setup.GetValue(global)), static_cast(Settings::values.resolution_setup.GetDefault()), Settings::values.resolution_setup.UsingGlobal()); + WriteSetting(QString::fromStdString(Settings::values.scaling_filter.GetLabel()), + static_cast(Settings::values.scaling_filter.GetValue(global)), + static_cast(Settings::values.scaling_filter.GetDefault()), + Settings::values.scaling_filter.UsingGlobal()); WriteGlobalSetting(Settings::values.max_anisotropy); WriteGlobalSetting(Settings::values.use_speed_limit); WriteGlobalSetting(Settings::values.speed_limit); diff --git a/src/yuzu/configuration/config.h b/src/yuzu/configuration/config.h index fbb91d312..97dc1bb47 100644 --- a/src/yuzu/configuration/config.h +++ b/src/yuzu/configuration/config.h @@ -190,5 +190,6 @@ Q_DECLARE_METATYPE(Settings::GPUAccuracy); Q_DECLARE_METATYPE(Settings::FullscreenMode); Q_DECLARE_METATYPE(Settings::NvdecEmulation); Q_DECLARE_METATYPE(Settings::ResolutionSetup); +Q_DECLARE_METATYPE(Settings::ScalingFilter); Q_DECLARE_METATYPE(Settings::RendererBackend); Q_DECLARE_METATYPE(Settings::ShaderBackend); diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp index 4f08ae3e0..e01efaeda 100644 --- a/src/yuzu/configuration/configure_graphics.cpp +++ b/src/yuzu/configuration/configure_graphics.cpp @@ -89,6 +89,7 @@ void ConfigureGraphics::SetConfiguration() { ui->use_asynchronous_gpu_emulation->setEnabled(runtime_lock); ui->use_disk_shader_cache->setEnabled(runtime_lock); ui->nvdec_emulation_widget->setEnabled(runtime_lock); + ui->resolution_combobox->setEnabled(runtime_lock); ui->accelerate_astc->setEnabled(runtime_lock); ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache.GetValue()); ui->use_asynchronous_gpu_emulation->setChecked( @@ -104,6 +105,8 @@ void ConfigureGraphics::SetConfiguration() { ui->aspect_ratio_combobox->setCurrentIndex(Settings::values.aspect_ratio.GetValue()); ui->resolution_combobox->setCurrentIndex( static_cast(Settings::values.resolution_setup.GetValue())); + ui->scaling_filter_combobox->setCurrentIndex( + static_cast(Settings::values.scaling_filter.GetValue())); } else { ConfigurationShared::SetPerGameSetting(ui->api, &Settings::values.renderer_backend); ConfigurationShared::SetHighlight(ui->api_widget, @@ -129,6 +132,11 @@ void ConfigureGraphics::SetConfiguration() { ConfigurationShared::SetHighlight(ui->resolution_label, !Settings::values.resolution_setup.UsingGlobal()); + ConfigurationShared::SetPerGameSetting(ui->scaling_filter_combobox, + &Settings::values.scaling_filter); + ConfigurationShared::SetHighlight(ui->scaling_filter_label, + !Settings::values.scaling_filter.UsingGlobal()); + ui->bg_combobox->setCurrentIndex(Settings::values.bg_red.UsingGlobal() ? 0 : 1); ui->bg_button->setEnabled(!Settings::values.bg_red.UsingGlobal()); ConfigurationShared::SetHighlight(ui->bg_layout, !Settings::values.bg_red.UsingGlobal()); @@ -144,6 +152,10 @@ void ConfigureGraphics::ApplyConfiguration() { ui->resolution_combobox->currentIndex() - ((Settings::IsConfiguringGlobal()) ? 0 : ConfigurationShared::USE_GLOBAL_OFFSET)); + const auto scaling_filter = static_cast( + ui->scaling_filter_combobox->currentIndex() - + ((Settings::IsConfiguringGlobal()) ? 0 : ConfigurationShared::USE_GLOBAL_OFFSET)); + ConfigurationShared::ApplyPerGameSetting(&Settings::values.fullscreen_mode, ui->fullscreen_mode_combobox); ConfigurationShared::ApplyPerGameSetting(&Settings::values.aspect_ratio, @@ -178,6 +190,9 @@ void ConfigureGraphics::ApplyConfiguration() { if (Settings::values.resolution_setup.UsingGlobal()) { Settings::values.resolution_setup.SetValue(resolution_setup); } + if (Settings::values.scaling_filter.UsingGlobal()) { + Settings::values.scaling_filter.SetValue(scaling_filter); + } } else { if (ui->resolution_combobox->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) { Settings::values.resolution_setup.SetGlobal(true); @@ -185,6 +200,12 @@ void ConfigureGraphics::ApplyConfiguration() { Settings::values.resolution_setup.SetGlobal(false); Settings::values.resolution_setup.SetValue(resolution_setup); } + if (ui->scaling_filter_combobox->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) { + Settings::values.scaling_filter.SetGlobal(true); + } else { + Settings::values.scaling_filter.SetGlobal(false); + Settings::values.scaling_filter.SetValue(scaling_filter); + } if (ui->api->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) { Settings::values.renderer_backend.SetGlobal(true); Settings::values.shader_backend.SetGlobal(true); @@ -333,6 +354,7 @@ void ConfigureGraphics::SetupPerGameUI() { ui->fullscreen_mode_combobox->setEnabled(Settings::values.fullscreen_mode.UsingGlobal()); ui->aspect_ratio_combobox->setEnabled(Settings::values.aspect_ratio.UsingGlobal()); ui->resolution_combobox->setEnabled(Settings::values.resolution_setup.UsingGlobal()); + ui->scaling_filter_combobox->setEnabled(Settings::values.scaling_filter.UsingGlobal()); ui->use_asynchronous_gpu_emulation->setEnabled( Settings::values.use_asynchronous_gpu_emulation.UsingGlobal()); ui->nvdec_emulation->setEnabled(Settings::values.nvdec_emulation.UsingGlobal()); @@ -364,6 +386,9 @@ void ConfigureGraphics::SetupPerGameUI() { ConfigurationShared::SetColoredComboBox( ui->resolution_combobox, ui->resolution_label, static_cast(Settings::values.resolution_setup.GetValue(true))); + ConfigurationShared::SetColoredComboBox( + ui->scaling_filter_combobox, ui->scaling_filter_label, + static_cast(Settings::values.scaling_filter.GetValue(true))); ConfigurationShared::InsertGlobalItem( ui->api, static_cast(Settings::values.renderer_backend.GetValue(true))); ConfigurationShared::InsertGlobalItem( diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui index 1b6ac3cbb..d5e0d4e89 100644 --- a/src/yuzu/configuration/configure_graphics.ui +++ b/src/yuzu/configuration/configure_graphics.ui @@ -340,32 +340,66 @@ - 0.75X (540p/810p) + 1X (720p/1080p) - 1X (720p/1080p) + 2X (1440p/2160[4K]p) - 1.5X (1080p/1620p) + 3X (2160p[4K]/3240p[6K]) - 2X (1440p/2160[4K]p) + 4X (2880p/4320p[8K]) + + + + + + + + + + 0 + + + 0 + + + 0 + + + 0 + + + + + Window Adapting Filter: + + + + + - 3X (2160p[4K]/3240p[6K]) + Bilinear - 4X (2880p/4320p[8K]) + Bicubic + + + + + ScaleForce -- cgit v1.2.3 From 29710f3250413bac75eddb613b7f7d2c079021c2 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Thu, 9 Sep 2021 19:31:20 -0400 Subject: gl_texture_cache: fix scaling on upload --- src/video_core/renderer_opengl/gl_texture_cache.cpp | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'src') diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 6aea375f1..edb8503cb 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -693,6 +693,10 @@ Image::~Image() = default; void Image::UploadMemory(const ImageBufferMap& map, std::span copies) { + const bool is_rescaled = True(flags & ImageFlagBits::Rescaled); + if (is_rescaled) { + ScaleDown(); + } glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.buffer); glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, map.offset, unswizzled_size_bytes); @@ -712,6 +716,9 @@ void Image::UploadMemory(const ImageBufferMap& map, } CopyBufferToImage(copy, map.offset); } + if (is_rescaled) { + ScaleUp(); + } } void Image::DownloadMemory(ImageBufferMap& map, -- cgit v1.2.3 From ae8d19d17eb5448207ece99ae07507c542c6ddae Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Thu, 9 Sep 2021 23:01:39 -0400 Subject: Renderers: Unify post processing filter shaders --- src/video_core/host_shaders/CMakeLists.txt | 6 +- .../host_shaders/opengl_present_bicubic.frag | 56 -------- .../host_shaders/opengl_present_scaleforce.frag | 135 ------------------- src/video_core/host_shaders/present_bicubic.frag | 67 ++++++++++ .../host_shaders/present_scaleforce.frag | 145 +++++++++++++++++++++ .../host_shaders/vulkan_present_bicubic.frag | 56 -------- .../host_shaders/vulkan_present_scaleforce.frag | 137 ------------------- src/video_core/renderer_opengl/renderer_opengl.cpp | 9 +- src/video_core/renderer_vulkan/vk_blit_screen.cpp | 8 +- 9 files changed, 222 insertions(+), 397 deletions(-) delete mode 100644 src/video_core/host_shaders/opengl_present_bicubic.frag delete mode 100644 src/video_core/host_shaders/opengl_present_scaleforce.frag create mode 100644 src/video_core/host_shaders/present_bicubic.frag create mode 100644 src/video_core/host_shaders/present_scaleforce.frag delete mode 100644 src/video_core/host_shaders/vulkan_present_bicubic.frag delete mode 100644 src/video_core/host_shaders/vulkan_present_scaleforce.frag (limited to 'src') diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index 835b37944..664d6ce5d 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -6,15 +6,13 @@ set(SHADER_FILES convert_float_to_depth.frag full_screen_triangle.vert opengl_copy_bc4.comp - opengl_present_scaleforce.frag - opengl_present_bicubic.frag opengl_present.frag opengl_present.vert pitch_unswizzle.comp + present_scaleforce.frag + present_bicubic.frag vulkan_blit_color_float.frag vulkan_blit_depth_stencil.frag - vulkan_present_bicubic.frag - vulkan_present_scaleforce.frag vulkan_present.frag vulkan_present.vert vulkan_quad_indexed.comp diff --git a/src/video_core/host_shaders/opengl_present_bicubic.frag b/src/video_core/host_shaders/opengl_present_bicubic.frag deleted file mode 100644 index 17772095a..000000000 --- a/src/video_core/host_shaders/opengl_present_bicubic.frag +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#version 460 core - -layout (location = 0) in vec2 frag_tex_coord; - -layout (location = 0) out vec4 color; - -layout (binding = 1) uniform sampler2D color_texture; - -vec4 cubic(float v) { - vec4 n = vec4(1.0, 2.0, 3.0, 4.0) - v; - vec4 s = n * n * n; - float x = s.x; - float y = s.y - 4.0 * s.x; - float z = s.z - 4.0 * s.y + 6.0 * s.x; - float w = 6.0 - x - y - z; - return vec4(x, y, z, w) * (1.0 / 6.0); -} - -vec4 textureBicubic( sampler2D textureSampler, vec2 texCoords ) { - - vec2 texSize = textureSize(textureSampler, 0); - vec2 invTexSize = 1.0 / texSize; - - texCoords = texCoords * texSize - 0.5; - - vec2 fxy = fract(texCoords); - texCoords -= fxy; - - vec4 xcubic = cubic(fxy.x); - vec4 ycubic = cubic(fxy.y); - - vec4 c = texCoords.xxyy + vec2(-0.5, +1.5).xyxy; - - vec4 s = vec4(xcubic.xz + xcubic.yw, ycubic.xz + ycubic.yw); - vec4 offset = c + vec4(xcubic.yw, ycubic.yw) / s; - - offset *= invTexSize.xxyy; - - vec4 sample0 = texture(textureSampler, offset.xz); - vec4 sample1 = texture(textureSampler, offset.yz); - vec4 sample2 = texture(textureSampler, offset.xw); - vec4 sample3 = texture(textureSampler, offset.yw); - - float sx = s.x / (s.x + s.y); - float sy = s.z / (s.z + s.w); - - return mix(mix(sample3, sample2, sx), mix(sample1, sample0, sx), sy); -} - -void main() { - color = vec4(textureBicubic(color_texture, frag_tex_coord).rgb, 1.0f); -} diff --git a/src/video_core/host_shaders/opengl_present_scaleforce.frag b/src/video_core/host_shaders/opengl_present_scaleforce.frag deleted file mode 100644 index 0153f62c0..000000000 --- a/src/video_core/host_shaders/opengl_present_scaleforce.frag +++ /dev/null @@ -1,135 +0,0 @@ -// from https://github.com/BreadFish64/ScaleFish/tree/master/scale_force - -// MIT License -// -// Copyright (c) 2020 BreadFish64 -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -precision mediump float; - -layout (location = 0) in vec2 tex_coord; - -layout (location = 0) out vec4 frag_color; - -layout (binding = 1) uniform sampler2D input_texture; - -vec2 tex_size; -vec2 inv_tex_size; - -vec4 cubic(float v) { - vec3 n = vec3(1.0, 2.0, 3.0) - v; - vec3 s = n * n * n; - float x = s.x; - float y = s.y - 4.0 * s.x; - float z = s.z - 4.0 * s.y + 6.0 * s.x; - float w = 6.0 - x - y - z; - return vec4(x, y, z, w) / 6.0; -} - -// Bicubic interpolation -vec4 textureBicubic(vec2 tex_coords) { - tex_coords = tex_coords * tex_size - 0.5; - - vec2 fxy = modf(tex_coords, tex_coords); - - vec4 xcubic = cubic(fxy.x); - vec4 ycubic = cubic(fxy.y); - - vec4 c = tex_coords.xxyy + vec2(-0.5, +1.5).xyxy; - - vec4 s = vec4(xcubic.xz + xcubic.yw, ycubic.xz + ycubic.yw); - vec4 offset = c + vec4(xcubic.yw, ycubic.yw) / s; - - offset *= inv_tex_size.xxyy; - - vec4 sample0 = textureLod(input_texture, offset.xz, 0.0); - vec4 sample1 = textureLod(input_texture, offset.yz, 0.0); - vec4 sample2 = textureLod(input_texture, offset.xw, 0.0); - vec4 sample3 = textureLod(input_texture, offset.yw, 0.0); - - float sx = s.x / (s.x + s.y); - float sy = s.z / (s.z + s.w); - - return mix(mix(sample3, sample2, sx), mix(sample1, sample0, sx), sy); -} - -mat4x3 center_matrix; -vec4 center_alpha; - -// Finds the distance between four colors and cc in YCbCr space -vec4 ColorDist(vec4 A, vec4 B, vec4 C, vec4 D) { - // https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.2020_conversion - const vec3 K = vec3(0.2627, 0.6780, 0.0593); - const float LUMINANCE_WEIGHT = .6; - const mat3 YCBCR_MATRIX = - mat3(K * LUMINANCE_WEIGHT, -.5 * K.r / (1.0 - K.b), -.5 * K.g / (1.0 - K.b), .5, .5, - -.5 * K.g / (1.0 - K.r), -.5 * K.b / (1.0 - K.r)); - - mat4x3 colors = mat4x3(A.rgb, B.rgb, C.rgb, D.rgb) - center_matrix; - mat4x3 YCbCr = YCBCR_MATRIX * colors; - vec4 color_dist = vec3(1.0) * YCbCr; - color_dist *= color_dist; - vec4 alpha = vec4(A.a, B.a, C.a, D.a); - - return sqrt((color_dist + abs(center_alpha - alpha)) * alpha * center_alpha); -} - -void main() { - vec4 bl = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(-1, -1)); - vec4 bc = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(0, -1)); - vec4 br = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(1, -1)); - vec4 cl = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(-1, 0)); - vec4 cc = textureLod(input_texture, tex_coord, 0.0); - vec4 cr = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(1, 0)); - vec4 tl = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(-1, 1)); - vec4 tc = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(0, 1)); - vec4 tr = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(1, 1)); - - - tex_size = vec2(textureSize(input_texture, 0)); - inv_tex_size = 1.0 / tex_size; - center_matrix = mat4x3(cc.rgb, cc.rgb, cc.rgb, cc.rgb); - center_alpha = cc.aaaa; - - vec4 offset_tl = ColorDist(tl, tc, tr, cr); - vec4 offset_br = ColorDist(br, bc, bl, cl); - - // Calculate how different cc is from the texels around it - float total_dist = dot(offset_tl + offset_br, vec4(1.0)); - - // Add together all the distances with direction taken into account - vec4 tmp = offset_tl - offset_br; - vec2 total_offset = tmp.wy + tmp.zz + vec2(-tmp.x, tmp.x); - - if (total_dist == 0.0) { - // Doing bicubic filtering just past the edges where the offset is 0 causes black floaters - // and it doesn't really matter which filter is used when the colors aren't changing. - frag_color = vec4(cc.rgb, 1.0f); - } else { - // When the image has thin points, they tend to split apart. - // This is because the texels all around are different - // and total_offset reaches into clear areas. - // This works pretty well to keep the offset in bounds for these cases. - float clamp_val = length(total_offset) / total_dist; - vec2 final_offset = clamp(total_offset, -clamp_val, clamp_val) * inv_tex_size; - - frag_color = vec4(textureBicubic(tex_coord - final_offset).rgb, 1.0f); - } -} diff --git a/src/video_core/host_shaders/present_bicubic.frag b/src/video_core/host_shaders/present_bicubic.frag new file mode 100644 index 000000000..f3e5410e7 --- /dev/null +++ b/src/video_core/host_shaders/present_bicubic.frag @@ -0,0 +1,67 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 460 core + +#ifdef VULKAN + +#define BINDING_COLOR_TEXTURE 1 + +#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv + +#define BINDING_COLOR_TEXTURE 0 + +#endif + + +layout (location = 0) in vec2 frag_tex_coord; + +layout (location = 0) out vec4 color; + +layout (binding = BINDING_COLOR_TEXTURE) uniform sampler2D color_texture; + +vec4 cubic(float v) { + vec4 n = vec4(1.0, 2.0, 3.0, 4.0) - v; + vec4 s = n * n * n; + float x = s.x; + float y = s.y - 4.0 * s.x; + float z = s.z - 4.0 * s.y + 6.0 * s.x; + float w = 6.0 - x - y - z; + return vec4(x, y, z, w) * (1.0 / 6.0); +} + +vec4 textureBicubic( sampler2D textureSampler, vec2 texCoords ) { + + vec2 texSize = textureSize(textureSampler, 0); + vec2 invTexSize = 1.0 / texSize; + + texCoords = texCoords * texSize - 0.5; + + vec2 fxy = fract(texCoords); + texCoords -= fxy; + + vec4 xcubic = cubic(fxy.x); + vec4 ycubic = cubic(fxy.y); + + vec4 c = texCoords.xxyy + vec2(-0.5, +1.5).xyxy; + + vec4 s = vec4(xcubic.xz + xcubic.yw, ycubic.xz + ycubic.yw); + vec4 offset = c + vec4(xcubic.yw, ycubic.yw) / s; + + offset *= invTexSize.xxyy; + + vec4 sample0 = texture(textureSampler, offset.xz); + vec4 sample1 = texture(textureSampler, offset.yz); + vec4 sample2 = texture(textureSampler, offset.xw); + vec4 sample3 = texture(textureSampler, offset.yw); + + float sx = s.x / (s.x + s.y); + float sy = s.z / (s.z + s.w); + + return mix(mix(sample3, sample2, sx), mix(sample1, sample0, sx), sy); +} + +void main() { + color = vec4(textureBicubic(color_texture, frag_tex_coord).rgb, 1.0f); +} diff --git a/src/video_core/host_shaders/present_scaleforce.frag b/src/video_core/host_shaders/present_scaleforce.frag new file mode 100644 index 000000000..1829a9be8 --- /dev/null +++ b/src/video_core/host_shaders/present_scaleforce.frag @@ -0,0 +1,145 @@ +// MIT License +// +// Copyright (c) 2020 BreadFish64 +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +// Adapted from https://github.com/BreadFish64/ScaleFish/tree/master/scaleforce + +#version 460 + +#ifdef VULKAN + +#define BINDING_COLOR_TEXTURE 1 + +#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv + +#define BINDING_COLOR_TEXTURE 0 + +#endif + +layout (location = 0) in vec2 tex_coord; + +layout (location = 0) out vec4 frag_color; + +layout (binding = BINDING_COLOR_TEXTURE) uniform sampler2D input_texture; + +vec2 tex_size; +vec2 inv_tex_size; + +vec4 cubic(float v) { + vec3 n = vec3(1.0, 2.0, 3.0) - v; + vec3 s = n * n * n; + float x = s.x; + float y = s.y - 4.0 * s.x; + float z = s.z - 4.0 * s.y + 6.0 * s.x; + float w = 6.0 - x - y - z; + return vec4(x, y, z, w) / 6.0; +} + +// Bicubic interpolation +vec4 textureBicubic(vec2 tex_coords) { + tex_coords = tex_coords * tex_size - 0.5; + + vec2 fxy = modf(tex_coords, tex_coords); + + vec4 xcubic = cubic(fxy.x); + vec4 ycubic = cubic(fxy.y); + + vec4 c = tex_coords.xxyy + vec2(-0.5, +1.5).xyxy; + + vec4 s = vec4(xcubic.xz + xcubic.yw, ycubic.xz + ycubic.yw); + vec4 offset = c + vec4(xcubic.yw, ycubic.yw) / s; + + offset *= inv_tex_size.xxyy; + + vec4 sample0 = textureLod(input_texture, offset.xz, 0.0); + vec4 sample1 = textureLod(input_texture, offset.yz, 0.0); + vec4 sample2 = textureLod(input_texture, offset.xw, 0.0); + vec4 sample3 = textureLod(input_texture, offset.yw, 0.0); + + float sx = s.x / (s.x + s.y); + float sy = s.z / (s.z + s.w); + + return mix(mix(sample3, sample2, sx), mix(sample1, sample0, sx), sy); +} + +mat4x3 center_matrix; +vec4 center_alpha; + +// Finds the distance between four colors and cc in YCbCr space +vec4 ColorDist(vec4 A, vec4 B, vec4 C, vec4 D) { + // https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.2020_conversion + const vec3 K = vec3(0.2627, 0.6780, 0.0593); + const float LUMINANCE_WEIGHT = .6; + const mat3 YCBCR_MATRIX = + mat3(K * LUMINANCE_WEIGHT, -.5 * K.r / (1.0 - K.b), -.5 * K.g / (1.0 - K.b), .5, .5, + -.5 * K.g / (1.0 - K.r), -.5 * K.b / (1.0 - K.r)); + + mat4x3 colors = mat4x3(A.rgb, B.rgb, C.rgb, D.rgb) - center_matrix; + mat4x3 YCbCr = YCBCR_MATRIX * colors; + vec4 color_dist = vec3(1.0) * YCbCr; + color_dist *= color_dist; + vec4 alpha = vec4(A.a, B.a, C.a, D.a); + + return sqrt((color_dist + abs(center_alpha - alpha)) * alpha * center_alpha); +} + +void main() { + vec4 bl = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(-1, -1)); + vec4 bc = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(0, -1)); + vec4 br = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(1, -1)); + vec4 cl = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(-1, 0)); + vec4 cc = textureLod(input_texture, tex_coord, 0.0); + vec4 cr = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(1, 0)); + vec4 tl = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(-1, 1)); + vec4 tc = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(0, 1)); + vec4 tr = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(1, 1)); + + + tex_size = vec2(textureSize(input_texture, 0)); + inv_tex_size = 1.0 / tex_size; + center_matrix = mat4x3(cc.rgb, cc.rgb, cc.rgb, cc.rgb); + center_alpha = cc.aaaa; + + vec4 offset_tl = ColorDist(tl, tc, tr, cr); + vec4 offset_br = ColorDist(br, bc, bl, cl); + + // Calculate how different cc is from the texels around it + float total_dist = dot(offset_tl + offset_br, vec4(1.0)); + + // Add together all the distances with direction taken into account + vec4 tmp = offset_tl - offset_br; + vec2 total_offset = tmp.wy + tmp.zz + vec2(-tmp.x, tmp.x); + + if (total_dist == 0.0) { + // Doing bicubic filtering just past the edges where the offset is 0 causes black floaters + // and it doesn't really matter which filter is used when the colors aren't changing. + frag_color = vec4(cc.rgb, 1.0f); + } else { + // When the image has thin points, they tend to split apart. + // This is because the texels all around are different + // and total_offset reaches into clear areas. + // This works pretty well to keep the offset in bounds for these cases. + float clamp_val = length(total_offset) / total_dist; + vec2 final_offset = clamp(total_offset, -clamp_val, clamp_val) * inv_tex_size; + + frag_color = vec4(textureBicubic(tex_coord - final_offset).rgb, 1.0f); + } +} diff --git a/src/video_core/host_shaders/vulkan_present_bicubic.frag b/src/video_core/host_shaders/vulkan_present_bicubic.frag deleted file mode 100644 index 17772095a..000000000 --- a/src/video_core/host_shaders/vulkan_present_bicubic.frag +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#version 460 core - -layout (location = 0) in vec2 frag_tex_coord; - -layout (location = 0) out vec4 color; - -layout (binding = 1) uniform sampler2D color_texture; - -vec4 cubic(float v) { - vec4 n = vec4(1.0, 2.0, 3.0, 4.0) - v; - vec4 s = n * n * n; - float x = s.x; - float y = s.y - 4.0 * s.x; - float z = s.z - 4.0 * s.y + 6.0 * s.x; - float w = 6.0 - x - y - z; - return vec4(x, y, z, w) * (1.0 / 6.0); -} - -vec4 textureBicubic( sampler2D textureSampler, vec2 texCoords ) { - - vec2 texSize = textureSize(textureSampler, 0); - vec2 invTexSize = 1.0 / texSize; - - texCoords = texCoords * texSize - 0.5; - - vec2 fxy = fract(texCoords); - texCoords -= fxy; - - vec4 xcubic = cubic(fxy.x); - vec4 ycubic = cubic(fxy.y); - - vec4 c = texCoords.xxyy + vec2(-0.5, +1.5).xyxy; - - vec4 s = vec4(xcubic.xz + xcubic.yw, ycubic.xz + ycubic.yw); - vec4 offset = c + vec4(xcubic.yw, ycubic.yw) / s; - - offset *= invTexSize.xxyy; - - vec4 sample0 = texture(textureSampler, offset.xz); - vec4 sample1 = texture(textureSampler, offset.yz); - vec4 sample2 = texture(textureSampler, offset.xw); - vec4 sample3 = texture(textureSampler, offset.yw); - - float sx = s.x / (s.x + s.y); - float sy = s.z / (s.z + s.w); - - return mix(mix(sample3, sample2, sx), mix(sample1, sample0, sx), sy); -} - -void main() { - color = vec4(textureBicubic(color_texture, frag_tex_coord).rgb, 1.0f); -} diff --git a/src/video_core/host_shaders/vulkan_present_scaleforce.frag b/src/video_core/host_shaders/vulkan_present_scaleforce.frag deleted file mode 100644 index 801c8eae9..000000000 --- a/src/video_core/host_shaders/vulkan_present_scaleforce.frag +++ /dev/null @@ -1,137 +0,0 @@ -#version 320 es - -// from https://github.com/BreadFish64/ScaleFish/tree/master/scale_force - -// MIT License -// -// Copyright (c) 2020 BreadFish64 -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -precision mediump float; - -layout (location = 0) in vec2 tex_coord; - -layout (location = 0) out vec4 frag_color; - -layout (binding = 1) uniform sampler2D input_texture; - -vec2 tex_size; -vec2 inv_tex_size; - -vec4 cubic(float v) { - vec3 n = vec3(1.0, 2.0, 3.0) - v; - vec3 s = n * n * n; - float x = s.x; - float y = s.y - 4.0 * s.x; - float z = s.z - 4.0 * s.y + 6.0 * s.x; - float w = 6.0 - x - y - z; - return vec4(x, y, z, w) / 6.0; -} - -// Bicubic interpolation -vec4 textureBicubic(vec2 tex_coords) { - tex_coords = tex_coords * tex_size - 0.5; - - vec2 fxy = modf(tex_coords, tex_coords); - - vec4 xcubic = cubic(fxy.x); - vec4 ycubic = cubic(fxy.y); - - vec4 c = tex_coords.xxyy + vec2(-0.5, +1.5).xyxy; - - vec4 s = vec4(xcubic.xz + xcubic.yw, ycubic.xz + ycubic.yw); - vec4 offset = c + vec4(xcubic.yw, ycubic.yw) / s; - - offset *= inv_tex_size.xxyy; - - vec4 sample0 = textureLod(input_texture, offset.xz, 0.0); - vec4 sample1 = textureLod(input_texture, offset.yz, 0.0); - vec4 sample2 = textureLod(input_texture, offset.xw, 0.0); - vec4 sample3 = textureLod(input_texture, offset.yw, 0.0); - - float sx = s.x / (s.x + s.y); - float sy = s.z / (s.z + s.w); - - return mix(mix(sample3, sample2, sx), mix(sample1, sample0, sx), sy); -} - -mat4x3 center_matrix; -vec4 center_alpha; - -// Finds the distance between four colors and cc in YCbCr space -vec4 ColorDist(vec4 A, vec4 B, vec4 C, vec4 D) { - // https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.2020_conversion - const vec3 K = vec3(0.2627, 0.6780, 0.0593); - const float LUMINANCE_WEIGHT = .6; - const mat3 YCBCR_MATRIX = - mat3(K * LUMINANCE_WEIGHT, -.5 * K.r / (1.0 - K.b), -.5 * K.g / (1.0 - K.b), .5, .5, - -.5 * K.g / (1.0 - K.r), -.5 * K.b / (1.0 - K.r)); - - mat4x3 colors = mat4x3(A.rgb, B.rgb, C.rgb, D.rgb) - center_matrix; - mat4x3 YCbCr = YCBCR_MATRIX * colors; - vec4 color_dist = vec3(1.0) * YCbCr; - color_dist *= color_dist; - vec4 alpha = vec4(A.a, B.a, C.a, D.a); - - return sqrt((color_dist + abs(center_alpha - alpha)) * alpha * center_alpha); -} - -void main() { - vec4 bl = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(-1, -1)); - vec4 bc = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(0, -1)); - vec4 br = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(1, -1)); - vec4 cl = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(-1, 0)); - vec4 cc = textureLod(input_texture, tex_coord, 0.0); - vec4 cr = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(1, 0)); - vec4 tl = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(-1, 1)); - vec4 tc = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(0, 1)); - vec4 tr = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(1, 1)); - - - tex_size = vec2(textureSize(input_texture, 0)); - inv_tex_size = 1.0 / tex_size; - center_matrix = mat4x3(cc.rgb, cc.rgb, cc.rgb, cc.rgb); - center_alpha = cc.aaaa; - - vec4 offset_tl = ColorDist(tl, tc, tr, cr); - vec4 offset_br = ColorDist(br, bc, bl, cl); - - // Calculate how different cc is from the texels around it - float total_dist = dot(offset_tl + offset_br, vec4(1.0)); - - // Add together all the distances with direction taken into account - vec4 tmp = offset_tl - offset_br; - vec2 total_offset = tmp.wy + tmp.zz + vec2(-tmp.x, tmp.x); - - if (total_dist == 0.0) { - // Doing bicubic filtering just past the edges where the offset is 0 causes black floaters - // and it doesn't really matter which filter is used when the colors aren't changing. - frag_color = vec4(cc.rgb, 1.0f); - } else { - // When the image has thin points, they tend to split apart. - // This is because the texels all around are different - // and total_offset reaches into clear areas. - // This works pretty well to keep the offset in bounds for these cases. - float clamp_val = length(total_offset) / total_dist; - vec2 final_offset = clamp(total_offset, -clamp_val, clamp_val) * inv_tex_size; - - frag_color = vec4(textureBicubic(tex_coord - final_offset).rgb, 1.0f); - } -} diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 71a5e3adf..955dbc744 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -21,10 +21,10 @@ #include "core/memory.h" #include "core/perf_stats.h" #include "core/telemetry_session.h" -#include "video_core/host_shaders/opengl_present_bicubic_frag.h" #include "video_core/host_shaders/opengl_present_frag.h" -#include "video_core/host_shaders/opengl_present_scaleforce_frag.h" #include "video_core/host_shaders/opengl_present_vert.h" +#include "video_core/host_shaders/present_bicubic_frag.h" +#include "video_core/host_shaders/present_scaleforce_frag.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_shader_util.h" @@ -255,10 +255,9 @@ void RendererOpenGL::InitOpenGLObjects() { // Create shader programs present_vertex = CreateProgram(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER); present_bilinear_fragment = CreateProgram(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER); - present_bicubic_fragment = - CreateProgram(HostShaders::OPENGL_PRESENT_BICUBIC_FRAG, GL_FRAGMENT_SHADER); + present_bicubic_fragment = CreateProgram(HostShaders::PRESENT_BICUBIC_FRAG, GL_FRAGMENT_SHADER); present_scaleforce_fragment = - CreateProgram(HostShaders::OPENGL_PRESENT_SCALEFORCE_FRAG, GL_FRAGMENT_SHADER); + CreateProgram(HostShaders::PRESENT_SCALEFORCE_FRAG, GL_FRAGMENT_SHADER); // Generate presentation sampler present_sampler.Create(); diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index 19d91ecfc..c91b24e3a 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -17,9 +17,9 @@ #include "core/frontend/emu_window.h" #include "core/memory.h" #include "video_core/gpu.h" -#include "video_core/host_shaders/vulkan_present_bicubic_frag_spv.h" +#include "video_core/host_shaders/present_bicubic_frag_spv.h" +#include "video_core/host_shaders/present_scaleforce_frag_spv.h" #include "video_core/host_shaders/vulkan_present_frag_spv.h" -#include "video_core/host_shaders/vulkan_present_scaleforce_frag_spv.h" #include "video_core/host_shaders/vulkan_present_vert_spv.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" #include "video_core/renderer_vulkan/vk_blit_screen.h" @@ -342,8 +342,8 @@ void VKBlitScreen::RefreshResources(const Tegra::FramebufferConfig& framebuffer) void VKBlitScreen::CreateShaders() { vertex_shader = BuildShader(device, VULKAN_PRESENT_VERT_SPV); bilinear_fragment_shader = BuildShader(device, VULKAN_PRESENT_FRAG_SPV); - bicubic_fragment_shader = BuildShader(device, VULKAN_PRESENT_BICUBIC_FRAG_SPV); - scaleforce_fragment_shader = BuildShader(device, VULKAN_PRESENT_SCALEFORCE_FRAG_SPV); + bicubic_fragment_shader = BuildShader(device, PRESENT_BICUBIC_FRAG_SPV); + scaleforce_fragment_shader = BuildShader(device, PRESENT_SCALEFORCE_FRAG_SPV); } void VKBlitScreen::CreateSemaphores() { -- cgit v1.2.3 From fcf2b2c78a3c45ddcda6594b2fd3df733ceb951c Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 10 Sep 2021 01:10:59 -0400 Subject: gl_texture_cache: Simplify scaling We don't need to reconstruct new textures every time we ScaleUp/ScaleDown. We can scale up once, and revert to the original texture whenever scaling down. Fixes memory leaks due to glDeleteTextures being deferred for later handling on some drivers --- .../renderer_opengl/gl_texture_cache.cpp | 67 ++++++++++++---------- src/video_core/renderer_opengl/gl_texture_cache.h | 3 +- 2 files changed, 39 insertions(+), 31 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index edb8503cb..22fffb19b 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -696,6 +696,7 @@ void Image::UploadMemory(const ImageBufferMap& map, const bool is_rescaled = True(flags & ImageFlagBits::Rescaled); if (is_rescaled) { ScaleDown(); + scale_backup.Release(); } glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.buffer); glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, map.offset, unswizzled_size_bytes); @@ -727,6 +728,7 @@ void Image::DownloadMemory(ImageBufferMap& map, const bool is_rescaled = True(flags & ImageFlagBits::Rescaled); if (is_rescaled) { ScaleDown(); + scale_backup.Release(); } glBindBuffer(GL_PIXEL_PACK_BUFFER, map.buffer); glPixelStorei(GL_PACK_ALIGNMENT, 1); @@ -881,17 +883,11 @@ void Image::CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t b } } -bool Image::Scale(bool scale_src, bool scale_dst) { - if (!runtime->is_rescaling_on) { - return false; - } - if (gl_format == 0 && gl_type == 0) { - // compressed textures - return false; - } - if (info.type == ImageType::Linear) { - UNIMPLEMENTED(); - return false; +bool Image::Scale() { + if (scale_backup.handle) { + // This was a texture which was scaled previously, no need to repeat scaling + std::swap(texture, scale_backup); + return true; } const GLenum attachment = [this] { switch (GetFormatType(info.format)) { @@ -924,41 +920,36 @@ bool Image::Scale(bool scale_src, bool scale_dst) { const auto& resolution = runtime->resolution; const u32 up = resolution.up_scale; const u32 down = resolution.down_shift; + const auto scale = [&](u32 value) { return std::max((value * up) >> down, 1U); }; - const auto scale_up = [&](u32 value) { return std::max((value * up) >> down, 1U); }; - const u32 scaled_width = scale_up(info.size.width); - const u32 scaled_height = is_2d ? scale_up(info.size.height) : info.size.height; + const u32 scaled_width = scale(info.size.width); + const u32 scaled_height = is_2d ? scale(info.size.height) : info.size.height; const u32 original_width = info.size.width; const u32 original_height = info.size.height; - const u32 src_width = scale_src ? scaled_width : original_width; - const u32 src_height = scale_src ? scaled_height : original_height; - const u32 dst_width = scale_dst ? scaled_width : original_width; - const u32 dst_height = scale_dst ? scaled_height : original_height; - auto dst_info = info; - dst_info.size.width = dst_width; - dst_info.size.height = dst_height; - auto dst_texture = MakeImage(dst_info, gl_internal_format); + dst_info.size.width = scaled_width; + dst_info.size.height = scaled_height; + scale_backup = MakeImage(dst_info, gl_internal_format); const GLuint read_fbo = runtime->rescale_read_fbo.handle; const GLuint draw_fbo = runtime->rescale_draw_fbo.handle; for (s32 layer = 0; layer < info.resources.layers; ++layer) { for (s32 level = 0; level < info.resources.levels; ++level) { - const u32 src_level_width = std::max(1u, src_width >> level); - const u32 src_level_height = std::max(1u, src_height >> level); - const u32 dst_level_width = std::max(1u, dst_width >> level); - const u32 dst_level_height = std::max(1u, dst_height >> level); + const u32 src_level_width = std::max(1u, original_width >> level); + const u32 src_level_height = std::max(1u, original_height >> level); + const u32 dst_level_width = std::max(1u, scaled_width >> level); + const u32 dst_level_height = std::max(1u, scaled_height >> level); glNamedFramebufferTextureLayer(read_fbo, attachment, texture.handle, level, layer); - glNamedFramebufferTextureLayer(draw_fbo, attachment, dst_texture.handle, level, layer); + glNamedFramebufferTextureLayer(draw_fbo, attachment, scale_backup.handle, level, layer); glBlitNamedFramebuffer(read_fbo, draw_fbo, 0, 0, src_level_width, src_level_height, 0, 0, dst_level_width, dst_level_height, mask, filter); glNamedFramebufferTextureLayer(read_fbo, attachment, 0, level, layer); glNamedFramebufferTextureLayer(draw_fbo, attachment, 0, level, layer); } } - texture = std::move(dst_texture); + std::swap(texture, scale_backup); return true; } @@ -966,16 +957,32 @@ bool Image::ScaleUp() { if (True(flags & ImageFlagBits::Rescaled)) { return false; } + if (!runtime->is_rescaling_on) { + return false; + } + if (gl_format == 0 && gl_type == 0) { + // compressed textures + return false; + } + if (info.type == ImageType::Linear) { + UNIMPLEMENTED(); + return false; + } flags |= ImageFlagBits::Rescaled; - return Scale(false, true); + return Scale(); } bool Image::ScaleDown() { if (False(flags & ImageFlagBits::Rescaled)) { return false; } + if (!scale_backup.handle) { + LOG_ERROR(Render_OpenGL, "Downscaling an upscaled texture that didn't backup original"); + return false; + } flags &= ~ImageFlagBits::Rescaled; - return Scale(true, false); + std::swap(texture, scale_backup); + return true; } ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info, diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 28c91b368..f4dcc6f9b 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -203,9 +203,10 @@ private: void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset); - bool Scale(bool scale_src, bool scale_dst); + bool Scale(); OGLTexture texture; + OGLTexture scale_backup; OGLTextureView store_view; GLenum gl_internal_format = GL_NONE; GLenum gl_format = GL_NONE; -- cgit v1.2.3 From 80f8d4989eca127c7ca8c7bd63134127d6fd5edc Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 10 Sep 2021 01:28:02 -0400 Subject: bootmanager: Fix screenshot resolution factor usage Fixes screenshots at non integer scaling --- src/core/frontend/framebuffer_layout.cpp | 15 ++++++--------- src/core/frontend/framebuffer_layout.h | 2 +- src/yuzu/bootmanager.cpp | 8 +++----- src/yuzu/bootmanager.h | 2 +- src/yuzu/debugger/profiler.cpp | 2 +- src/yuzu/main.cpp | 3 +-- src/yuzu/uisettings.h | 1 - 7 files changed, 13 insertions(+), 20 deletions(-) (limited to 'src') diff --git a/src/core/frontend/framebuffer_layout.cpp b/src/core/frontend/framebuffer_layout.cpp index 0832463d6..4b58b672a 100644 --- a/src/core/frontend/framebuffer_layout.cpp +++ b/src/core/frontend/framebuffer_layout.cpp @@ -44,16 +44,13 @@ FramebufferLayout DefaultFrameLayout(u32 width, u32 height) { return res; } -FramebufferLayout FrameLayoutFromResolutionScale(u32 res_scale) { - u32 width, height; +FramebufferLayout FrameLayoutFromResolutionScale(f32 res_scale) { + const bool is_docked = Settings::values.use_docked_mode.GetValue(); + const u32 screen_width = is_docked ? ScreenDocked::Width : ScreenUndocked::Width; + const u32 screen_height = is_docked ? ScreenDocked::Height : ScreenUndocked::Height; - if (Settings::values.use_docked_mode.GetValue()) { - width = ScreenDocked::Width * res_scale; - height = ScreenDocked::Height * res_scale; - } else { - width = ScreenUndocked::Width * res_scale; - height = ScreenUndocked::Height * res_scale; - } + const u32 width = static_cast(static_cast(screen_width) * res_scale); + const u32 height = static_cast(static_cast(screen_height) * res_scale); return DefaultFrameLayout(width, height); } diff --git a/src/core/frontend/framebuffer_layout.h b/src/core/frontend/framebuffer_layout.h index e2e3bbbb3..2e36c0163 100644 --- a/src/core/frontend/framebuffer_layout.h +++ b/src/core/frontend/framebuffer_layout.h @@ -60,7 +60,7 @@ FramebufferLayout DefaultFrameLayout(u32 width, u32 height); * Convenience method to get frame layout by resolution scale * @param res_scale resolution scale factor */ -FramebufferLayout FrameLayoutFromResolutionScale(u32 res_scale); +FramebufferLayout FrameLayoutFromResolutionScale(f32 res_scale); /** * Convenience method to determine emulation aspect ratio diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp index 46ab0603d..976acd176 100644 --- a/src/yuzu/bootmanager.cpp +++ b/src/yuzu/bootmanager.cpp @@ -628,11 +628,9 @@ void GRenderWindow::ReleaseRenderTarget() { main_context.reset(); } -void GRenderWindow::CaptureScreenshot(u32 res_scale, const QString& screenshot_path) { - VideoCore::RendererBase& renderer = system.Renderer(); - if (res_scale == 0) { - res_scale = VideoCore::GetResolutionScaleFactor(renderer); - } +void GRenderWindow::CaptureScreenshot(const QString& screenshot_path) { + auto& renderer = system.Renderer(); + const f32 res_scale = VideoCore::GetResolutionScaleFactor(renderer); const Layout::FramebufferLayout layout{Layout::FrameLayoutFromResolutionScale(res_scale)}; screenshot_image = QImage(QSize(layout.width, layout.height), QImage::Format_RGB32); diff --git a/src/yuzu/bootmanager.h b/src/yuzu/bootmanager.h index e6a0666e9..40fd4a9d6 100644 --- a/src/yuzu/bootmanager.h +++ b/src/yuzu/bootmanager.h @@ -178,7 +178,7 @@ public: bool IsLoadingComplete() const; - void CaptureScreenshot(u32 res_scale, const QString& screenshot_path); + void CaptureScreenshot(const QString& screenshot_path); std::pair ScaleTouch(const QPointF& pos) const; diff --git a/src/yuzu/debugger/profiler.cpp b/src/yuzu/debugger/profiler.cpp index 33110685a..a8b254199 100644 --- a/src/yuzu/debugger/profiler.cpp +++ b/src/yuzu/debugger/profiler.cpp @@ -163,7 +163,7 @@ void MicroProfileWidget::mouseReleaseEvent(QMouseEvent* ev) { } void MicroProfileWidget::wheelEvent(QWheelEvent* ev) { - const auto wheel_position = ev->position().toPoint(); + const auto wheel_position = ev->pos(); MicroProfileMousePosition(wheel_position.x() / x_scale, wheel_position.y() / y_scale, ev->angleDelta().y() / 120); ev->accept(); diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index a246f6bb3..3cb146982 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -2892,8 +2892,7 @@ void GMainWindow::OnCaptureScreenshot() { } } #endif - render_window->CaptureScreenshot(UISettings::values.screenshot_resolution_factor.GetValue(), - filename); + render_window->CaptureScreenshot(filename); } // TODO: Written 2020-10-01: Remove per-game config migration code when it is irrelevant diff --git a/src/yuzu/uisettings.h b/src/yuzu/uisettings.h index cac19452f..936914ef3 100644 --- a/src/yuzu/uisettings.h +++ b/src/yuzu/uisettings.h @@ -68,7 +68,6 @@ struct Values { Settings::BasicSetting enable_discord_presence{true, "enable_discord_presence"}; Settings::BasicSetting enable_screenshot_save_as{true, "enable_screenshot_save_as"}; - Settings::BasicSetting screenshot_resolution_factor{0, "screenshot_resolution_factor"}; QString roms_path; QString symbols_path; -- cgit v1.2.3 From 6000fe69a4b2805a48ce045d9a383fda27d5e57b Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 14 Sep 2021 00:45:50 -0400 Subject: image_info: Mark MSAA textures as non-rescalable Blitting or resolving multisampled images requires the dimensions of the src and dst to be equal for valid usage, making them difficult for resolution scaling using the current implementation. --- src/video_core/texture_cache/image_info.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp index 7fa8fd4fe..bdf306bf9 100644 --- a/src/video_core/texture_cache/image_info.cpp +++ b/src/video_core/texture_cache/image_info.cpp @@ -101,7 +101,7 @@ ImageInfo::ImageInfo(const TICEntry& config) noexcept { // FIXME: Call this without passing *this layer_stride = CalculateLayerStride(*this); maybe_unaligned_layer_stride = CalculateLayerSize(*this); - rescaleable &= (block.depth == 0) && resources.levels == 1; + rescaleable &= (block.depth == 0) && resources.levels == 1 && num_samples == 1; } } @@ -134,7 +134,7 @@ ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index) type = ImageType::e3D; size.depth = rt.depth; } else { - rescaleable = block.depth == 0 && size.height > 256; + rescaleable = block.depth == 0 && size.height > 256 && num_samples == 1; type = ImageType::e2D; resources.layers = rt.depth; } -- cgit v1.2.3 From b3a9c8f108d90234c7e5e88b41f8e4bc9c163d96 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 18 Sep 2021 02:26:33 +0200 Subject: Shader: Don't rescale FragCoord if used by Shuffle --- .../frontend/maxwell/translate_program.cpp | 4 +- src/shader_recompiler/ir_opt/rescaling_pass.cpp | 53 +++++++++++++++++++++- 2 files changed, 55 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index 795f5cf08..743fb2420 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -178,10 +178,12 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPoolArg(0).Attribute()}; + switch (attr) { + case IR::Attribute::PositionX: + case IR::Attribute::PositionY: + if (is_fragment_shader) { + op->SetFlags(0xDEADBEEF); + } + break; + default: + break; + } + }; + const IR::Value param_1{inst.Arg(0)}; + if (param_1.IsImmediate()) { + break; + } + IR::Inst* op_a{param_1.InstRecursive()}; + if (op_a->GetOpcode() == IR::Opcode::GetAttribute) { + try_mark(op_a); + break; + } + if (op_a->GetOpcode() != IR::Opcode::BitCastF32U32) { + break; + } + const IR::Value param_2{op_a->Arg(0)}; + if (param_2.IsImmediate()) { + break; + } + IR::Inst* op_b{param_2.InstRecursive()}; + if (op_b->GetOpcode() == IR::Opcode::GetAttribute) { + try_mark(op_b); + } + break; + } + default: + break; + } +} void PatchFragCoord(IR::Block& block, IR::Inst& inst) { IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; const IR::F32 down_factor{ir.ResolutionDownFactor()}; @@ -219,7 +265,7 @@ void Visit(const IR::Program& program, IR::Block& block, IR::Inst& inst) { switch (attr) { case IR::Attribute::PositionX: case IR::Attribute::PositionY: - if (is_fragment_shader) { + if (is_fragment_shader && inst.Flags() != 0xDEADBEEF) { PatchFragCoord(block, inst); } break; @@ -254,6 +300,11 @@ void Visit(const IR::Program& program, IR::Block& block, IR::Inst& inst) { } // Anonymous namespace void RescalingPass(IR::Program& program) { + for (IR::Block* const block : program.post_order_blocks) { + for (IR::Inst& inst : block->Instructions()) { + VisitMark(program, inst); + } + } for (IR::Block* const block : program.post_order_blocks) { for (IR::Inst& inst : block->Instructions()) { Visit(program, *block, inst); -- cgit v1.2.3 From edb5844240c339846d505735d2c2e1ad731f8be7 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 17 Sep 2021 21:31:29 -0400 Subject: rescaling_pass: Fix and simplify shuffle/fragcoord pass --- src/shader_recompiler/ir_opt/rescaling_pass.cpp | 46 +++++++++++-------------- 1 file changed, 20 insertions(+), 26 deletions(-) (limited to 'src') diff --git a/src/shader_recompiler/ir_opt/rescaling_pass.cpp b/src/shader_recompiler/ir_opt/rescaling_pass.cpp index 4d23b60c8..8bbaa55e4 100644 --- a/src/shader_recompiler/ir_opt/rescaling_pass.cpp +++ b/src/shader_recompiler/ir_opt/rescaling_pass.cpp @@ -14,45 +14,39 @@ namespace Shader::Optimization { namespace { -void VisitMark(const IR::Program& program, IR::Inst& inst) { +void VisitMark(const IR::Program& program, const IR::Inst& inst) { const bool is_fragment_shader{program.stage == Stage::Fragment}; + if (!is_fragment_shader) { + return; + } switch (inst.GetOpcode()) { case IR::Opcode::ShuffleIndex: case IR::Opcode::ShuffleUp: case IR::Opcode::ShuffleDown: case IR::Opcode::ShuffleButterfly: { - const auto try_mark = [is_fragment_shader](IR::Inst* op) { - const IR::Attribute attr{op->Arg(0).Attribute()}; + const IR::Value shfl_arg{inst.Arg(0)}; + if (shfl_arg.IsImmediate()) { + break; + } + const IR::Inst* const arg_inst{shfl_arg.InstRecursive()}; + if (arg_inst->GetOpcode() != IR::Opcode::BitCastU32F32) { + break; + } + const IR::Value bitcast_arg{arg_inst->Arg(0)}; + if (bitcast_arg.IsImmediate()) { + break; + } + IR::Inst* const bitcast_inst{bitcast_arg.InstRecursive()}; + if (bitcast_inst->GetOpcode() == IR::Opcode::GetAttribute) { + const IR::Attribute attr{bitcast_inst->Arg(0).Attribute()}; switch (attr) { case IR::Attribute::PositionX: case IR::Attribute::PositionY: - if (is_fragment_shader) { - op->SetFlags(0xDEADBEEF); - } + bitcast_inst->SetFlags(0xDEADBEEF); break; default: break; } - }; - const IR::Value param_1{inst.Arg(0)}; - if (param_1.IsImmediate()) { - break; - } - IR::Inst* op_a{param_1.InstRecursive()}; - if (op_a->GetOpcode() == IR::Opcode::GetAttribute) { - try_mark(op_a); - break; - } - if (op_a->GetOpcode() != IR::Opcode::BitCastF32U32) { - break; - } - const IR::Value param_2{op_a->Arg(0)}; - if (param_2.IsImmediate()) { - break; - } - IR::Inst* op_b{param_2.InstRecursive()}; - if (op_b->GetOpcode() == IR::Opcode::GetAttribute) { - try_mark(op_b); } break; } -- cgit v1.2.3 From c8a971be919158a265ec4c0f934ba368b8a3f315 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 17 Sep 2021 21:31:50 -0400 Subject: vk_texture_cache: Minor cleanup --- src/video_core/renderer_vulkan/vk_texture_cache.cpp | 18 ++++++++---------- src/video_core/renderer_vulkan/vk_texture_cache.h | 1 - 2 files changed, 8 insertions(+), 11 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 9afe49387..855f0a5d7 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -739,8 +739,7 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, VKScheduler& sch : device{device_}, scheduler{scheduler_}, memory_allocator{memory_allocator_}, staging_buffer_pool{staging_buffer_pool_}, blit_image_helper{blit_image_helper_}, astc_decoder_pass{astc_decoder_pass_}, render_pass_cache{render_pass_cache_}, - resolution{Settings::values.resolution_info}, - is_rescaling_on(resolution.up_scale != 1 || resolution.down_shift != 0) {} + resolution{Settings::values.resolution_info} {} void TextureCacheRuntime::Finish() { scheduler.Finish(); @@ -1141,11 +1140,11 @@ bool Image::ScaleUp(bool save_as_backup) { ASSERT(info.type != ImageType::Linear); scaling_count++; flags |= ImageFlagBits::Rescaled; - if (!runtime->is_rescaling_on) { - return true; - } const auto& resolution = runtime->resolution; + if (!resolution.active) { + return true; + } vk::Image rescaled_image = has_backup ? std::move(backup_image) : MakeImage(runtime->device, info, resolution.up_scale, resolution.down_shift); @@ -1188,7 +1187,7 @@ bool Image::ScaleUp(bool save_as_backup) { } void Image::SwapBackup() { - if (!runtime->is_rescaling_on) { + if (!runtime->resolution.active) { return; } ASSERT(has_backup); @@ -1206,17 +1205,16 @@ bool Image::ScaleDown(bool save_as_backup) { ASSERT(info.type != ImageType::Linear); flags &= ~ImageFlagBits::Rescaled; scaling_count++; - if (!runtime->is_rescaling_on) { - return true; - } const auto& resolution = runtime->resolution; + if (!resolution.active) { + return true; + } vk::Image downscaled_image = has_backup ? std::move(backup_image) : MakeImage(runtime->device, info); MemoryCommit new_commit = has_backup ? std::move(backup_commit) : MemoryCommit(runtime->memory_allocator.Commit( downscaled_image, MemoryUsage::DeviceLocal)); - has_backup = false; if (aspect_mask == 0) { aspect_mask = ImageAspectMask(info.format); diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 9c39a6d99..84194b833 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -94,7 +94,6 @@ public: DelayedDestructionRing prescaled_images; DelayedDestructionRing prescaled_commits; Settings::ResolutionScalingInfo resolution; - bool is_rescaling_on{}; }; class Image : public VideoCommon::ImageBase { -- cgit v1.2.3 From b027fac7945184d644aa00940e528a20edcf0d06 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 18 Sep 2021 00:43:41 -0400 Subject: gl_texture_cache/rescaling_pass: minor cleanup --- src/shader_recompiler/ir_opt/rescaling_pass.cpp | 20 ++++++++------------ src/video_core/renderer_opengl/gl_texture_cache.cpp | 5 ++--- src/video_core/renderer_opengl/gl_texture_cache.h | 1 - 3 files changed, 10 insertions(+), 16 deletions(-) (limited to 'src') diff --git a/src/shader_recompiler/ir_opt/rescaling_pass.cpp b/src/shader_recompiler/ir_opt/rescaling_pass.cpp index 8bbaa55e4..357e41f2b 100644 --- a/src/shader_recompiler/ir_opt/rescaling_pass.cpp +++ b/src/shader_recompiler/ir_opt/rescaling_pass.cpp @@ -82,18 +82,14 @@ void PatchFragCoord(IR::Block& block, IR::Inst& inst) { [[nodiscard]] IR::U32 SubScale(IR::IREmitter& ir, const IR::U1& is_scaled, const IR::U32& value, const IR::Attribute attrib) { - if (Settings::values.resolution_info.active) { - const IR::F32 opt1{ir.Imm32(Settings::values.resolution_info.up_factor)}; - const IR::F32 base{ir.FPMul(ir.ConvertUToF(32, 32, value), opt1)}; - const IR::F32 frag_coord{ir.GetAttribute(attrib)}; - const IR::F32 opt2{ir.Imm32(Settings::values.resolution_info.down_factor)}; - const IR::F32 floor{ir.FPMul(opt1, ir.FPFloor(ir.FPMul(frag_coord, opt2)))}; - const IR::U32 deviation{ - ir.ConvertFToU(32, ir.FPAdd(base, ir.FPAdd(frag_coord, ir.FPNeg(floor))))}; - return IR::U32{ir.Select(is_scaled, deviation, value)}; - } else { - return value; - } + const IR::F32 opt1{ir.Imm32(Settings::values.resolution_info.up_factor)}; + const IR::F32 base{ir.FPMul(ir.ConvertUToF(32, 32, value), opt1)}; + const IR::F32 frag_coord{ir.GetAttribute(attrib)}; + const IR::F32 opt2{ir.Imm32(Settings::values.resolution_info.down_factor)}; + const IR::F32 floor{ir.FPMul(opt1, ir.FPFloor(ir.FPMul(frag_coord, opt2)))}; + const IR::U32 deviation{ + ir.ConvertFToU(32, ir.FPAdd(base, ir.FPAdd(frag_coord, ir.FPNeg(floor))))}; + return IR::U32{ir.Select(is_scaled, deviation, value)}; } [[nodiscard]] IR::U32 DownScale(IR::IREmitter& ir, const IR::U1& is_scaled, IR::U32 value) { diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 22fffb19b..64bd88c3b 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -474,8 +474,7 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& set_view(Shader::TextureType::ColorArrayCube, null_image_cube_array.handle); resolution = Settings::values.resolution_info; - is_rescaling_on = resolution.up_scale != 1 || resolution.down_shift != 0; - if (is_rescaling_on) { + if (resolution.active) { rescale_draw_fbo.Create(); rescale_read_fbo.Create(); @@ -957,7 +956,7 @@ bool Image::ScaleUp() { if (True(flags & ImageFlagBits::Rescaled)) { return false; } - if (!runtime->is_rescaling_on) { + if (!runtime->resolution.active) { return false; } if (gl_format == 0 && gl_type == 0) { diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index f4dcc6f9b..6c8033003 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -156,7 +156,6 @@ private: OGLFramebuffer rescale_draw_fbo; OGLFramebuffer rescale_read_fbo; Settings::ResolutionScalingInfo resolution; - bool is_rescaling_on{}; }; class Image : public VideoCommon::ImageBase { -- cgit v1.2.3 From 27af298e78ffa976bf126d7f276fa95c4f4f1363 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 18 Sep 2021 19:15:10 -0400 Subject: gl_texture_cache: Fix depth and integer format scaling blits --- .../renderer_opengl/gl_texture_cache.cpp | 73 +++++++++++++++++----- src/video_core/renderer_opengl/gl_texture_cache.h | 4 +- 2 files changed, 61 insertions(+), 16 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 64bd88c3b..dc850e7b0 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -395,6 +395,33 @@ OGLTexture MakeImage(const VideoCommon::ImageInfo& info, GLenum gl_internal_form UNREACHABLE_MSG("Invalid image format={}", format); return GL_R32UI; } + +[[nodiscard]] bool IsPixelFormatInteger(PixelFormat format) { + switch (format) { + case PixelFormat::A8B8G8R8_SINT: + case PixelFormat::A8B8G8R8_UINT: + case PixelFormat::A2B10G10R10_UINT: + case PixelFormat::R8_SINT: + case PixelFormat::R8_UINT: + case PixelFormat::R16G16B16A16_SINT: + case PixelFormat::R16G16B16A16_UINT: + case PixelFormat::R32G32B32A32_UINT: + case PixelFormat::R32G32B32A32_SINT: + case PixelFormat::R32G32_SINT: + case PixelFormat::R16_UINT: + case PixelFormat::R16_SINT: + case PixelFormat::R16G16_UINT: + case PixelFormat::R16G16_SINT: + case PixelFormat::R8G8_SINT: + case PixelFormat::R8G8_UINT: + case PixelFormat::R32G32_UINT: + case PixelFormat::R32_UINT: + case PixelFormat::R32_SINT: + return true; + default: + return false; + } +} } // Anonymous namespace ImageBufferMap::~ImageBufferMap() { @@ -475,12 +502,14 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& resolution = Settings::values.resolution_info; if (resolution.active) { - rescale_draw_fbo.Create(); - rescale_read_fbo.Create(); + for (size_t i = 0; i < rescale_draw_fbos.size(); ++i) { + rescale_draw_fbos[i].Create(); + rescale_read_fbos[i].Create(); - // Make sure the framebuffer is created without DSA - glBindFramebuffer(GL_READ_FRAMEBUFFER, rescale_draw_fbo.handle); - glBindFramebuffer(GL_READ_FRAMEBUFFER, rescale_read_fbo.handle); + // Make sure the framebuffer is created without DSA + glBindFramebuffer(GL_READ_FRAMEBUFFER, rescale_draw_fbos[i].handle); + glBindFramebuffer(GL_READ_FRAMEBUFFER, rescale_read_fbos[i].handle); + } } } @@ -888,8 +917,9 @@ bool Image::Scale() { std::swap(texture, scale_backup); return true; } - const GLenum attachment = [this] { - switch (GetFormatType(info.format)) { + const auto format_type = GetFormatType(info.format); + const GLenum attachment = [format_type] { + switch (format_type) { case SurfaceType::ColorTexture: return GL_COLOR_ATTACHMENT0; case SurfaceType::Depth: @@ -901,8 +931,8 @@ bool Image::Scale() { return GL_COLOR_ATTACHMENT0; } }(); - const GLenum mask = [this] { - switch (GetFormatType(info.format)) { + const GLenum mask = [format_type] { + switch (format_type) { case SurfaceType::ColorTexture: return GL_COLOR_BUFFER_BIT; case SurfaceType::Depth: @@ -914,8 +944,25 @@ bool Image::Scale() { return GL_COLOR_BUFFER_BIT; } }(); - const GLenum filter = (mask & GL_COLOR_BUFFER_BIT) != 0 ? GL_LINEAR : GL_NEAREST; + const size_t fbo_index = [format_type] { + switch (format_type) { + case SurfaceType::ColorTexture: + return 0; + case SurfaceType::Depth: + return 1; + case SurfaceType::DepthStencil: + return 2; + default: + UNREACHABLE(); + return 0; + } + }(); const bool is_2d = info.type == ImageType::e2D; + const bool is_color{(mask & GL_COLOR_BUFFER_BIT) != 0}; + // Integer formats must use NEAREST filter + const bool linear_color_format{is_color && !IsPixelFormatInteger(info.format)}; + const GLenum filter = linear_color_format ? GL_LINEAR : GL_NEAREST; + const auto& resolution = runtime->resolution; const u32 up = resolution.up_scale; const u32 down = resolution.down_shift; @@ -931,8 +978,8 @@ bool Image::Scale() { dst_info.size.height = scaled_height; scale_backup = MakeImage(dst_info, gl_internal_format); - const GLuint read_fbo = runtime->rescale_read_fbo.handle; - const GLuint draw_fbo = runtime->rescale_draw_fbo.handle; + const GLuint read_fbo = runtime->rescale_read_fbos[fbo_index].handle; + const GLuint draw_fbo = runtime->rescale_draw_fbos[fbo_index].handle; for (s32 layer = 0; layer < info.resources.layers; ++layer) { for (s32 level = 0; level < info.resources.levels; ++level) { const u32 src_level_width = std::max(1u, original_width >> level); @@ -944,8 +991,6 @@ bool Image::Scale() { glNamedFramebufferTextureLayer(draw_fbo, attachment, scale_backup.handle, level, layer); glBlitNamedFramebuffer(read_fbo, draw_fbo, 0, 0, src_level_width, src_level_height, 0, 0, dst_level_width, dst_level_height, mask, filter); - glNamedFramebufferTextureLayer(read_fbo, attachment, 0, level, layer); - glNamedFramebufferTextureLayer(draw_fbo, attachment, 0, level, layer); } } std::swap(texture, scale_backup); diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 6c8033003..79448f670 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -153,8 +153,8 @@ private: std::array null_image_views{}; - OGLFramebuffer rescale_draw_fbo; - OGLFramebuffer rescale_read_fbo; + std::array rescale_draw_fbos; + std::array rescale_read_fbos; Settings::ResolutionScalingInfo resolution; }; -- cgit v1.2.3 From 16017ac4503603bcf8189583120ad8888242b0e1 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 18 Sep 2021 20:50:00 -0400 Subject: vk_texture_cache: Use nearest neighbor scaling when available --- .../renderer_opengl/gl_texture_cache.cpp | 27 ---------------------- .../renderer_vulkan/vk_texture_cache.cpp | 9 ++++++-- src/video_core/surface.cpp | 27 ++++++++++++++++++++++ src/video_core/surface.h | 2 ++ 4 files changed, 36 insertions(+), 29 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index dc850e7b0..c75386e37 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -395,33 +395,6 @@ OGLTexture MakeImage(const VideoCommon::ImageInfo& info, GLenum gl_internal_form UNREACHABLE_MSG("Invalid image format={}", format); return GL_R32UI; } - -[[nodiscard]] bool IsPixelFormatInteger(PixelFormat format) { - switch (format) { - case PixelFormat::A8B8G8R8_SINT: - case PixelFormat::A8B8G8R8_UINT: - case PixelFormat::A2B10G10R10_UINT: - case PixelFormat::R8_SINT: - case PixelFormat::R8_UINT: - case PixelFormat::R16G16B16A16_SINT: - case PixelFormat::R16G16B16A16_UINT: - case PixelFormat::R32G32B32A32_UINT: - case PixelFormat::R32G32B32A32_SINT: - case PixelFormat::R32G32_SINT: - case PixelFormat::R16_UINT: - case PixelFormat::R16_SINT: - case PixelFormat::R16G16_UINT: - case PixelFormat::R16G16_SINT: - case PixelFormat::R8G8_SINT: - case PixelFormat::R8G8_UINT: - case PixelFormat::R32G32_UINT: - case PixelFormat::R32_UINT: - case PixelFormat::R32_SINT: - return true; - default: - return false; - } -} } // Anonymous namespace ImageBufferMap::~ImageBufferMap() { diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 855f0a5d7..a34cd31f0 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -37,6 +37,7 @@ using VideoCommon::ImageInfo; using VideoCommon::ImageType; using VideoCommon::SubresourceRange; using VideoCore::Surface::IsPixelFormatASTC; +using VideoCore::Surface::IsPixelFormatInteger; namespace { constexpr VkBorderColor ConvertBorderColor(const std::array& color) { @@ -603,9 +604,13 @@ void BlitScale(VKScheduler& scheduler, VkImage src_image, VkImage dst_image, con .width = info.size.width, .height = info.size.height, }; + const bool is_zeta = (aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT) != 0; + const bool is_int_format = IsPixelFormatInteger(info.format); + const VkFilter vk_filter = (is_zeta || is_int_format) ? VK_FILTER_NEAREST : VK_FILTER_LINEAR; + scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([dst_image, src_image, extent, resources, aspect_mask, resolution, type, - scaling](vk::CommandBuffer cmdbuf) { + scaling, vk_filter](vk::CommandBuffer cmdbuf) { const auto scale_up = [&](u32 value) { return std::max((value * resolution.up_scale) >> resolution.down_shift, 1U); }; @@ -723,7 +728,7 @@ void BlitScale(VKScheduler& scheduler, VkImage src_image, VkImage dst_image, con cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, nullptr, nullptr, read_barriers); cmdbuf.BlitImage(src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_image, - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, regions, VK_FILTER_NEAREST); + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, regions, vk_filter); cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, nullptr, nullptr, write_barriers); }); diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index eb1746265..64941a486 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp @@ -279,6 +279,33 @@ bool IsPixelFormatSRGB(PixelFormat format) { } } +bool IsPixelFormatInteger(PixelFormat format) { + switch (format) { + case PixelFormat::A8B8G8R8_SINT: + case PixelFormat::A8B8G8R8_UINT: + case PixelFormat::A2B10G10R10_UINT: + case PixelFormat::R8_SINT: + case PixelFormat::R8_UINT: + case PixelFormat::R16G16B16A16_SINT: + case PixelFormat::R16G16B16A16_UINT: + case PixelFormat::R32G32B32A32_UINT: + case PixelFormat::R32G32B32A32_SINT: + case PixelFormat::R32G32_SINT: + case PixelFormat::R16_UINT: + case PixelFormat::R16_SINT: + case PixelFormat::R16G16_UINT: + case PixelFormat::R16G16_SINT: + case PixelFormat::R8G8_SINT: + case PixelFormat::R8G8_UINT: + case PixelFormat::R32G32_UINT: + case PixelFormat::R32_UINT: + case PixelFormat::R32_SINT: + return true; + default: + return false; + } +} + std::pair GetASTCBlockSize(PixelFormat format) { return {DefaultBlockWidth(format), DefaultBlockHeight(format)}; } diff --git a/src/video_core/surface.h b/src/video_core/surface.h index 1503db81f..3bb24abb7 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h @@ -460,6 +460,8 @@ bool IsPixelFormatASTC(PixelFormat format); bool IsPixelFormatSRGB(PixelFormat format); +bool IsPixelFormatInteger(PixelFormat format); + std::pair GetASTCBlockSize(PixelFormat format); u64 EstimatedDecompressedSize(u64 base_size, PixelFormat format); -- cgit v1.2.3 From 122ddeb7ff948a607b0bee9bae968dc4d9c72188 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 19 Sep 2021 00:03:14 -0400 Subject: vk_rasterizer: Fix scaling on Y_NEGATE --- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 20bb05e7d..87f265e09 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -60,12 +60,18 @@ struct DrawParams { VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t index, float scale) { const auto& src = regs.viewport_transform[index]; + const float x = (src.translate_x - src.scale_x) * scale; const float width = src.scale_x * 2.0f * scale; - const float height = src.scale_y * 2.0f * scale; + float y = (src.translate_y - src.scale_y) * scale; + float height = src.scale_y * 2.0f * scale; + if (regs.screen_y_control.y_negate) { + y += height; + height = -height; + } const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1.0f : 0.0f; VkViewport viewport{ - .x = (src.translate_x - src.scale_x) * scale, - .y = (src.translate_y - src.scale_y) * scale, + .x = x, + .y = y, .width = width != 0.0f ? width : 1.0f, .height = height != 0.0f ? height : 1.0f, .minDepth = src.translate_z - src.scale_z * reduce_z, -- cgit v1.2.3 From 8183142cd4c70355e6275eaba3d2939211b4b9c9 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 21 Sep 2021 20:28:22 -0400 Subject: gl_texture_cache: Fix scaling backup logic --- .../renderer_opengl/gl_texture_cache.cpp | 33 +++++++++------------- src/video_core/renderer_opengl/gl_texture_cache.h | 3 +- 2 files changed, 16 insertions(+), 20 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index c75386e37..7ded7415d 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -681,6 +681,7 @@ Image::Image(TextureCacheRuntime& runtime_, const VideoCommon::ImageInfo& info_, gl_type = tuple.type; } texture = MakeImage(info, gl_internal_format); + original_backup = texture.handle; if (runtime->device.HasDebuggingToolAttached()) { const std::string name = VideoCommon::Name(*this); glObjectLabel(ImageTarget(info) == GL_TEXTURE_BUFFER ? GL_BUFFER : GL_TEXTURE, @@ -697,7 +698,6 @@ void Image::UploadMemory(const ImageBufferMap& map, const bool is_rescaled = True(flags & ImageFlagBits::Rescaled); if (is_rescaled) { ScaleDown(); - scale_backup.Release(); } glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.buffer); glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, map.offset, unswizzled_size_bytes); @@ -729,7 +729,6 @@ void Image::DownloadMemory(ImageBufferMap& map, const bool is_rescaled = True(flags & ImageFlagBits::Rescaled); if (is_rescaled) { ScaleDown(); - scale_backup.Release(); } glBindBuffer(GL_PIXEL_PACK_BUFFER, map.buffer); glPixelStorei(GL_PACK_ALIGNMENT, 1); @@ -749,7 +748,7 @@ void Image::DownloadMemory(ImageBufferMap& map, CopyImageToBuffer(copy, map.offset); } if (is_rescaled) { - ScaleUp(); + texture.handle = upscaled_backup.handle; } } @@ -885,11 +884,6 @@ void Image::CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t b } bool Image::Scale() { - if (scale_backup.handle) { - // This was a texture which was scaled previously, no need to repeat scaling - std::swap(texture, scale_backup); - return true; - } const auto format_type = GetFormatType(info.format); const GLenum attachment = [format_type] { switch (format_type) { @@ -949,8 +943,9 @@ bool Image::Scale() { auto dst_info = info; dst_info.size.width = scaled_width; dst_info.size.height = scaled_height; - scale_backup = MakeImage(dst_info, gl_internal_format); - + if (!upscaled_backup.handle) { + upscaled_backup = MakeImage(dst_info, gl_internal_format); + } const GLuint read_fbo = runtime->rescale_read_fbos[fbo_index].handle; const GLuint draw_fbo = runtime->rescale_draw_fbos[fbo_index].handle; for (s32 layer = 0; layer < info.resources.layers; ++layer) { @@ -960,13 +955,14 @@ bool Image::Scale() { const u32 dst_level_width = std::max(1u, scaled_width >> level); const u32 dst_level_height = std::max(1u, scaled_height >> level); - glNamedFramebufferTextureLayer(read_fbo, attachment, texture.handle, level, layer); - glNamedFramebufferTextureLayer(draw_fbo, attachment, scale_backup.handle, level, layer); + glNamedFramebufferTextureLayer(read_fbo, attachment, original_backup, level, layer); + glNamedFramebufferTextureLayer(draw_fbo, attachment, upscaled_backup.handle, level, + layer); glBlitNamedFramebuffer(read_fbo, draw_fbo, 0, 0, src_level_width, src_level_height, 0, 0, dst_level_width, dst_level_height, mask, filter); } } - std::swap(texture, scale_backup); + texture.handle = upscaled_backup.handle; return true; } @@ -985,20 +981,19 @@ bool Image::ScaleUp() { UNIMPLEMENTED(); return false; } + if (!Scale()) { + return false; + } flags |= ImageFlagBits::Rescaled; - return Scale(); + return true; } bool Image::ScaleDown() { if (False(flags & ImageFlagBits::Rescaled)) { return false; } - if (!scale_backup.handle) { - LOG_ERROR(Render_OpenGL, "Downscaling an upscaled texture that didn't backup original"); - return false; - } flags &= ~ImageFlagBits::Rescaled; - std::swap(texture, scale_backup); + texture.handle = original_backup; return true; } diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 79448f670..61f9b0259 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -205,12 +205,13 @@ private: bool Scale(); OGLTexture texture; - OGLTexture scale_backup; + OGLTexture upscaled_backup; OGLTextureView store_view; GLenum gl_internal_format = GL_NONE; GLenum gl_format = GL_NONE; GLenum gl_type = GL_NONE; TextureCacheRuntime* runtime{}; + GLuint original_backup{}; }; class ImageView : public VideoCommon::ImageViewBase { -- cgit v1.2.3 From 36f261edefd2e16d34f2726f0a0295e089ed1c17 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 21 Sep 2021 22:22:24 -0400 Subject: vk_texture_cache: Simplify scaled image management --- .../renderer_vulkan/vk_texture_cache.cpp | 111 +++++---------------- src/video_core/renderer_vulkan/vk_texture_cache.h | 30 ++---- 2 files changed, 34 insertions(+), 107 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index a34cd31f0..5b4f51a31 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -996,17 +996,14 @@ u64 TextureCacheRuntime::GetDeviceLocalMemory() const { return device.GetDeviceLocalMemory(); } -void TextureCacheRuntime::TickFrame() { - prescaled_images.Tick(); - prescaled_commits.Tick(); -} +void TextureCacheRuntime::TickFrame() {} Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_) : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime_.scheduler}, - image(MakeImage(runtime_.device, info)), - commit(runtime_.memory_allocator.Commit(image, MemoryUsage::DeviceLocal)), - aspect_mask(ImageAspectMask(info.format)), runtime{&runtime_} { + runtime{&runtime_}, original_image(MakeImage(runtime_.device, info)), + commit(runtime_.memory_allocator.Commit(original_image, MemoryUsage::DeviceLocal)), + aspect_mask(ImageAspectMask(info.format)) { if (IsPixelFormatASTC(info.format) && !runtime->device.IsOptimalAstcSupported()) { if (Settings::values.accelerate_astc.GetValue()) { flags |= VideoCommon::ImageFlagBits::AcceleratedUpload; @@ -1015,13 +1012,14 @@ Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu } } if (runtime->device.HasDebuggingToolAttached()) { - image.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); + original_image.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); } static constexpr VkImageViewUsageCreateInfo storage_image_view_usage_create_info{ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO, .pNext = nullptr, .usage = VK_IMAGE_USAGE_STORAGE_BIT, }; + current_image = *original_image; if (IsPixelFormatASTC(info.format) && !runtime->device.IsOptimalAstcSupported()) { const auto& device = runtime->device.GetLogical(); storage_image_views.reserve(info.resources.levels); @@ -1030,7 +1028,7 @@ Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .pNext = &storage_image_view_usage_create_info, .flags = 0, - .image = *image, + .image = *original_image, .viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY, .format = VK_FORMAT_A8B8G8R8_UNORM_PACK32, .components{ @@ -1059,12 +1057,12 @@ void Image::UploadMemory(const StagingBufferRef& map, std::spanRequestOutsideRenderPassOperationContext(); std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask); const VkBuffer src_buffer = map.buffer; - const VkImage vk_image = *image; + const VkImage vk_image = *original_image; const VkImageAspectFlags vk_aspect_mask = aspect_mask; const bool is_initialized = std::exchange(initialized, true); scheduler->Record([src_buffer, vk_image, vk_aspect_mask, is_initialized, @@ -1072,18 +1070,14 @@ void Image::UploadMemory(const StagingBufferRef& map, std::span copies) { - const bool is_rescaled = True(flags & ImageFlagBits::Rescaled); - if (is_rescaled) { - ScaleDown(true); - } std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask); scheduler->RequestOutsideRenderPassOperationContext(); - scheduler->Record([buffer = map.buffer, image = *image, aspect_mask = aspect_mask, + scheduler->Record([buffer = map.buffer, image = *original_image, aspect_mask = aspect_mask, vk_copies](vk::CommandBuffer cmdbuf) { const VkImageMemoryBarrier read_barrier{ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, @@ -1133,51 +1127,31 @@ void Image::DownloadMemory(const StagingBufferRef& map, std::spanresolution; if (!resolution.active) { return true; } - vk::Image rescaled_image = - has_backup ? std::move(backup_image) - : MakeImage(runtime->device, info, resolution.up_scale, resolution.down_shift); - MemoryCommit new_commit = has_backup ? std::move(backup_commit) - : MemoryCommit(runtime->memory_allocator.Commit( - rescaled_image, MemoryUsage::DeviceLocal)); - has_backup = false; - + const auto& device = runtime->device; + if (!scaled_image) { + scaled_image = MakeImage(device, info, resolution.up_scale, resolution.down_shift); + auto& allocator = runtime->memory_allocator; + scaled_commit = MemoryCommit(allocator.Commit(scaled_image, MemoryUsage::DeviceLocal)); + } if (aspect_mask == 0) { aspect_mask = ImageAspectMask(info.format); } - SCOPE_EXIT({ - if (save_as_backup) { - backup_image = std::move(image); - backup_commit = std::move(commit); - has_backup = true; - } else { - runtime->prescaled_images.Push(std::move(image)); - runtime->prescaled_commits.Push(std::move(commit)); - } - image = std::move(rescaled_image); - commit = std::move(new_commit); - }); - const PixelFormat format = StorageFormat(info.format); - const auto format_info = - MaxwellToVK::SurfaceFormat(runtime->device, FormatType::Optimal, false, format); - const auto similar = runtime->device.GetSupportedFormat( + const auto format_info = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, false, format); + const auto similar = device.GetSupportedFormat( format_info.format, (VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT), FormatType::Optimal); @@ -1187,55 +1161,18 @@ bool Image::ScaleUp(bool save_as_backup) { if (aspect_mask == 0) { aspect_mask = ImageAspectMask(info.format); } - BlitScale(*scheduler, *image, *rescaled_image, info, aspect_mask, resolution, true); + BlitScale(*scheduler, *original_image, *scaled_image, info, aspect_mask, resolution, true); + current_image = *scaled_image; return true; } -void Image::SwapBackup() { - if (!runtime->resolution.active) { - return; - } - ASSERT(has_backup); - runtime->prescaled_images.Push(std::move(image)); - runtime->prescaled_commits.Push(std::move(commit)); - image = std::move(backup_image); - commit = std::move(backup_commit); - has_backup = false; -} - -bool Image::ScaleDown(bool save_as_backup) { +bool Image::ScaleDown() { if (False(flags & ImageFlagBits::Rescaled)) { return false; } ASSERT(info.type != ImageType::Linear); flags &= ~ImageFlagBits::Rescaled; - scaling_count++; - - const auto& resolution = runtime->resolution; - if (!resolution.active) { - return true; - } - vk::Image downscaled_image = - has_backup ? std::move(backup_image) : MakeImage(runtime->device, info); - MemoryCommit new_commit = has_backup ? std::move(backup_commit) - : MemoryCommit(runtime->memory_allocator.Commit( - downscaled_image, MemoryUsage::DeviceLocal)); - has_backup = false; - if (aspect_mask == 0) { - aspect_mask = ImageAspectMask(info.format); - } - BlitScale(*scheduler, *image, *downscaled_image, info, aspect_mask, resolution, false); - - if (save_as_backup) { - backup_image = std::move(image); - backup_commit = std::move(commit); - has_backup = true; - } else { - runtime->prescaled_images.Push(std::move(image)); - runtime->prescaled_commits.Push(std::move(commit)); - } - image = std::move(downscaled_image); - commit = std::move(new_commit); + current_image = *original_image; return true; } diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 84194b833..e5060e3f1 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -8,7 +8,6 @@ #include "common/settings.h" #include "shader_recompiler/shader_info.h" -#include "video_core/delayed_destruction_ring.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/texture_cache/image_view_base.h" #include "video_core/texture_cache/texture_cache_base.h" @@ -17,7 +16,6 @@ namespace Vulkan { -using VideoCommon::DelayedDestructionRing; using VideoCommon::ImageId; using VideoCommon::NUM_RT; using VideoCommon::Region2D; @@ -36,8 +34,6 @@ class VKScheduler; class TextureCacheRuntime { public: - static constexpr size_t TICKS_TO_DESTROY = 6; - explicit TextureCacheRuntime(const Device& device_, VKScheduler& scheduler_, MemoryAllocator& memory_allocator_, StagingBufferPool& staging_buffer_pool_, @@ -90,9 +86,6 @@ public: BlitImageHelper& blit_image_helper; ASTCDecoderPass& astc_decoder_pass; RenderPassCache& render_pass_cache; - - DelayedDestructionRing prescaled_images; - DelayedDestructionRing prescaled_commits; Settings::ResolutionScalingInfo resolution; }; @@ -117,7 +110,7 @@ public: std::span copies); [[nodiscard]] VkImage Handle() const noexcept { - return *image; + return current_image; } [[nodiscard]] VkImageAspectFlags AspectMask() const noexcept { @@ -133,25 +126,22 @@ public: return std::exchange(initialized, true); } - bool ScaleUp(bool save_as_backup = false); + bool ScaleUp(); - bool ScaleDown(bool save_as_backup = false); - - void SwapBackup(); + bool ScaleDown(); private: - VKScheduler* scheduler; - vk::Image image; + VKScheduler* scheduler{}; + TextureCacheRuntime* runtime{}; + + vk::Image original_image; MemoryCommit commit; - vk::ImageView image_view; std::vector storage_image_views; VkImageAspectFlags aspect_mask = 0; bool initialized = false; - TextureCacheRuntime* runtime; - u32 scaling_count{}; - vk::Image backup_image{}; - MemoryCommit backup_commit{}; - bool has_backup{}; + vk::Image scaled_image{}; + MemoryCommit scaled_commit{}; + VkImage current_image{}; }; class ImageView : public VideoCommon::ImageViewBase { -- cgit v1.2.3 From dd663844513c82a24456dbc68b9ad6665506bea9 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 26 Sep 2021 22:43:06 -0400 Subject: rescaling_pass: Enable PatchImageQueryDimensions on fragment stages --- src/shader_recompiler/ir_opt/rescaling_pass.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/shader_recompiler/ir_opt/rescaling_pass.cpp b/src/shader_recompiler/ir_opt/rescaling_pass.cpp index 357e41f2b..51125f45a 100644 --- a/src/shader_recompiler/ir_opt/rescaling_pass.cpp +++ b/src/shader_recompiler/ir_opt/rescaling_pass.cpp @@ -249,6 +249,7 @@ void PatchImageRead(IR::Block& block, IR::Inst& inst) { void Visit(const IR::Program& program, IR::Block& block, IR::Inst& inst) { const bool is_fragment_shader{program.stage == Stage::Fragment}; + const bool is_compute_shader{program.stage == Stage::Compute}; switch (inst.GetOpcode()) { case IR::Opcode::GetAttribute: { const IR::Attribute attr{inst.Arg(0).Attribute()}; @@ -265,21 +266,19 @@ void Visit(const IR::Program& program, IR::Block& block, IR::Inst& inst) { break; } case IR::Opcode::ImageQueryDimensions: - if (program.stage == Stage::Compute) { - PatchImageQueryDimensions(block, inst); - } + PatchImageQueryDimensions(block, inst); break; case IR::Opcode::ImageFetch: if (is_fragment_shader) { SubScaleImageFetch(block, inst); - } else if (program.stage == Stage::Compute) { + } else if (is_compute_shader) { PatchImageFetch(block, inst); } break; case IR::Opcode::ImageRead: if (is_fragment_shader) { SubScaleImageRead(block, inst); - } else if (program.stage == Stage::Compute) { + } else if (is_compute_shader) { PatchImageRead(block, inst); } break; -- cgit v1.2.3 From 276565973f373b2dbf7f19a68a6d0304803dc2c3 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 28 Sep 2021 21:29:17 -0400 Subject: rescaling_pass: Scale ImageFetch offset if it exists Plus some code deduplication --- src/shader_recompiler/ir_opt/rescaling_pass.cpp | 96 ++++++++++--------------- 1 file changed, 37 insertions(+), 59 deletions(-) (limited to 'src') diff --git a/src/shader_recompiler/ir_opt/rescaling_pass.cpp b/src/shader_recompiler/ir_opt/rescaling_pass.cpp index 51125f45a..2aa9c31dc 100644 --- a/src/shader_recompiler/ir_opt/rescaling_pass.cpp +++ b/src/shader_recompiler/ir_opt/rescaling_pass.cpp @@ -137,21 +137,22 @@ void PatchImageQueryDimensions(IR::Block& block, IR::Inst& inst) { } } -void ScaleIntegerCoord(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scaled) { +void ScaleIntegerComposite(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scaled, + size_t index) { + const IR::Value composite{inst.Arg(index)}; + if (composite.IsEmpty()) { + return; + } const auto info{inst.Flags()}; - const IR::Value coord{inst.Arg(1)}; + const IR::U32 x{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(composite, 0)})}; + const IR::U32 y{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(composite, 1)})}; switch (info.type) { - case TextureType::Color2D: { - const IR::U32 x{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(coord, 0)})}; - const IR::U32 y{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(coord, 1)})}; - inst.SetArg(1, ir.CompositeConstruct(x, y)); + case TextureType::Color2D: + inst.SetArg(index, ir.CompositeConstruct(x, y)); break; - } case TextureType::ColorArray2D: { - const IR::U32 x{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(coord, 0)})}; - const IR::U32 y{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(coord, 1)})}; - const IR::U32 z{ir.CompositeExtract(coord, 2)}; - inst.SetArg(1, ir.CompositeConstruct(x, y, z)); + const IR::U32 z{ir.CompositeExtract(composite, 2)}; + inst.SetArg(index, ir.CompositeConstruct(x, y, z)); break; } case TextureType::Color1D: @@ -165,27 +166,21 @@ void ScaleIntegerCoord(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scale } } -void SubScaleImageFetch(IR::Block& block, IR::Inst& inst) { - IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; +void SubScaleCoord(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scaled) { const auto info{inst.Flags()}; - const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))}; const IR::Value coord{inst.Arg(1)}; + const IR::U32 coord_x{ir.CompositeExtract(coord, 0)}; + const IR::U32 coord_y{ir.CompositeExtract(coord, 1)}; + + const IR::U32 scaled_x{SubScale(ir, is_scaled, coord_x, IR::Attribute::PositionX)}; + const IR::U32 scaled_y{SubScale(ir, is_scaled, coord_y, IR::Attribute::PositionY)}; switch (info.type) { - case TextureType::Color2D: { - const IR::U32 x{SubScale(ir, is_scaled, IR::U32{ir.CompositeExtract(coord, 0)}, - IR::Attribute::PositionX)}; - const IR::U32 y{SubScale(ir, is_scaled, IR::U32{ir.CompositeExtract(coord, 1)}, - IR::Attribute::PositionY)}; - inst.SetArg(1, ir.CompositeConstruct(x, y)); + case TextureType::Color2D: + inst.SetArg(1, ir.CompositeConstruct(scaled_x, scaled_y)); break; - } case TextureType::ColorArray2D: { - const IR::U32 x{SubScale(ir, is_scaled, IR::U32{ir.CompositeExtract(coord, 0)}, - IR::Attribute::PositionX)}; - const IR::U32 y{SubScale(ir, is_scaled, IR::U32{ir.CompositeExtract(coord, 1)}, - IR::Attribute::PositionY)}; const IR::U32 z{ir.CompositeExtract(coord, 2)}; - inst.SetArg(1, ir.CompositeConstruct(x, y, z)); + inst.SetArg(1, ir.CompositeConstruct(scaled_x, scaled_y, z)); break; } case TextureType::Color1D: @@ -199,57 +194,40 @@ void SubScaleImageFetch(IR::Block& block, IR::Inst& inst) { } } +void SubScaleImageFetch(IR::Block& block, IR::Inst& inst) { + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + const auto info{inst.Flags()}; + const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))}; + SubScaleCoord(ir, inst, is_scaled); + // Scale ImageFetch offset + ScaleIntegerComposite(ir, inst, is_scaled, 2); +} + void SubScaleImageRead(IR::Block& block, IR::Inst& inst) { IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; const auto info{inst.Flags()}; const IR::U1 is_scaled{ir.IsImageScaled(ir.Imm32(info.descriptor_index))}; - const IR::Value coord{inst.Arg(1)}; - switch (info.type) { - case TextureType::Color2D: { - const IR::U32 x{SubScale(ir, is_scaled, IR::U32{ir.CompositeExtract(coord, 0)}, - IR::Attribute::PositionX)}; - const IR::U32 y{SubScale(ir, is_scaled, IR::U32{ir.CompositeExtract(coord, 1)}, - IR::Attribute::PositionY)}; - inst.SetArg(1, ir.CompositeConstruct(x, y)); - break; - } - case TextureType::ColorArray2D: { - const IR::U32 x{SubScale(ir, is_scaled, IR::U32{ir.CompositeExtract(coord, 0)}, - IR::Attribute::PositionX)}; - const IR::U32 y{SubScale(ir, is_scaled, IR::U32{ir.CompositeExtract(coord, 1)}, - IR::Attribute::PositionY)}; - const IR::U32 z{ir.CompositeExtract(coord, 2)}; - inst.SetArg(1, ir.CompositeConstruct(x, y, z)); - break; - } - case TextureType::Color1D: - case TextureType::ColorArray1D: - case TextureType::Color3D: - case TextureType::ColorCube: - case TextureType::ColorArrayCube: - case TextureType::Buffer: - // Nothing to patch here - break; - } + SubScaleCoord(ir, inst, is_scaled); } void PatchImageFetch(IR::Block& block, IR::Inst& inst) { IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; const auto info{inst.Flags()}; const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))}; - ScaleIntegerCoord(ir, inst, is_scaled); + ScaleIntegerComposite(ir, inst, is_scaled, 1); + // Scale ImageFetch offset + ScaleIntegerComposite(ir, inst, is_scaled, 2); } void PatchImageRead(IR::Block& block, IR::Inst& inst) { IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; const auto info{inst.Flags()}; const IR::U1 is_scaled{ir.IsImageScaled(ir.Imm32(info.descriptor_index))}; - ScaleIntegerCoord(ir, inst, is_scaled); + ScaleIntegerComposite(ir, inst, is_scaled, 1); } void Visit(const IR::Program& program, IR::Block& block, IR::Inst& inst) { const bool is_fragment_shader{program.stage == Stage::Fragment}; - const bool is_compute_shader{program.stage == Stage::Compute}; switch (inst.GetOpcode()) { case IR::Opcode::GetAttribute: { const IR::Attribute attr{inst.Arg(0).Attribute()}; @@ -271,14 +249,14 @@ void Visit(const IR::Program& program, IR::Block& block, IR::Inst& inst) { case IR::Opcode::ImageFetch: if (is_fragment_shader) { SubScaleImageFetch(block, inst); - } else if (is_compute_shader) { + } else { PatchImageFetch(block, inst); } break; case IR::Opcode::ImageRead: if (is_fragment_shader) { SubScaleImageRead(block, inst); - } else if (is_compute_shader) { + } else { PatchImageRead(block, inst); } break; -- cgit v1.2.3 From 99eec162da567ce08a7ab6ce4d1f4b5fa8b5af5e Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 28 Sep 2021 21:37:54 -0400 Subject: rescaling_pass: Logic simplification and minor style cleanup --- .../frontend/maxwell/translate_program.cpp | 1 - src/shader_recompiler/ir_opt/rescaling_pass.cpp | 49 ++++++++-------------- 2 files changed, 17 insertions(+), 33 deletions(-) (limited to 'src') diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index 743fb2420..267ebe4af 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -183,7 +183,6 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPoolInstructions()) { - VisitMark(program, inst); + const bool is_fragment_shader{program.stage == Stage::Fragment}; + if (is_fragment_shader) { + for (IR::Block* const block : program.post_order_blocks) { + for (IR::Inst& inst : block->Instructions()) { + VisitMark(inst); + } } } for (IR::Block* const block : program.post_order_blocks) { -- cgit v1.2.3 From 19ca0c9ab5cbaa86e30743ea760e0aab5c40c1d6 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 20 Sep 2021 19:11:03 +0200 Subject: TextureCache: Base fixes on rescaling. --- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 3 ++- src/video_core/texture_cache/texture_cache.h | 7 ++++--- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 87f265e09..1ceffa718 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -207,7 +207,7 @@ void RasterizerVulkan::Clear() { query_cache.UpdateCounters(); - const auto& regs = maxwell3d.regs; + auto& regs = maxwell3d.regs; const bool use_color = regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B || regs.clear_buffers.A; const bool use_depth = regs.clear_buffers.Z; @@ -228,6 +228,7 @@ void RasterizerVulkan::Clear() { up_scale = Settings::values.resolution_info.up_scale; down_shift = Settings::values.resolution_info.down_shift; } + UpdateViewportsState(regs); VkClearRect clear_rect{ .rect = GetScissorState(regs, 0, up_scale, down_shift), diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 764984546..a543776fd 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -205,8 +205,8 @@ void TextureCache

::UpdateRenderTargets(bool is_clear) { return; } - u32 scale_rating; - bool rescaled; + u32 scale_rating = 0; + bool rescaled = false; std::array tmp_color_images{}; ImageId tmp_depth_image{}; do { @@ -223,7 +223,7 @@ void TextureCache

::UpdateRenderTargets(bool is_clear) { bool can_rescale = true; bool any_blacklisted = false; const auto check_rescale = [&](ImageViewId view_id, ImageId& id_save) { - if (view_id) { + if (view_id != NULL_IMAGE_VIEW_ID && view_id != ImageViewId{}) { const auto& view = slot_image_views[view_id]; const auto image_id = view.image_id; id_save = image_id; @@ -265,6 +265,7 @@ void TextureCache

::UpdateRenderTargets(bool is_clear) { scale_up(tmp_color_images[index]); } scale_up(tmp_depth_image); + scale_rating = 2; } } else { rescaled = false; -- cgit v1.2.3 From ea82bd4b7e4c4f23a40f8a35858d8b74950fc347 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 20 Sep 2021 22:18:15 +0200 Subject: Texture Cache: Fix Rescaling on Multisample --- src/video_core/renderer_vulkan/vk_texture_cache.cpp | 8 ++++++-- src/video_core/texture_cache/image_info.cpp | 4 ++-- src/video_core/texture_cache/texture_cache.h | 17 +++++++++++++---- 3 files changed, 21 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 5b4f51a31..4f0bab274 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -860,9 +860,10 @@ void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, nullptr, nullptr, read_barriers); if (is_resolve) { + VkImageResolve resolve_info = + MakeImageResolve(dst_region, src_region, dst_layers, src_layers); cmdbuf.ResolveImage(src_image, VK_IMAGE_LAYOUT_GENERAL, dst_image, - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, - MakeImageResolve(dst_region, src_region, dst_layers, src_layers)); + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, resolve_info); } else { const bool is_linear = filter == Fermi2D::Filter::Bilinear; const VkFilter vk_filter = is_linear ? VK_FILTER_LINEAR : VK_FILTER_NEAREST; @@ -1149,6 +1150,9 @@ bool Image::ScaleUp() { if (aspect_mask == 0) { aspect_mask = ImageAspectMask(info.format); } + if (info.num_samples > 1) { + return true; + } const PixelFormat format = StorageFormat(info.format); const auto format_info = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, false, format); const auto similar = device.GetSupportedFormat( diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp index bdf306bf9..7fa8fd4fe 100644 --- a/src/video_core/texture_cache/image_info.cpp +++ b/src/video_core/texture_cache/image_info.cpp @@ -101,7 +101,7 @@ ImageInfo::ImageInfo(const TICEntry& config) noexcept { // FIXME: Call this without passing *this layer_stride = CalculateLayerStride(*this); maybe_unaligned_layer_stride = CalculateLayerSize(*this); - rescaleable &= (block.depth == 0) && resources.levels == 1 && num_samples == 1; + rescaleable &= (block.depth == 0) && resources.levels == 1; } } @@ -134,7 +134,7 @@ ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index) type = ImageType::e3D; size.depth = rt.depth; } else { - rescaleable = block.depth == 0 && size.height > 256 && num_samples == 1; + rescaleable = block.depth == 0 && size.height > 256; type = ImageType::e2D; resources.layers = rt.depth; } diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index a543776fd..b60f840c1 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -476,17 +476,26 @@ void TextureCache

::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, Image& dst_image = slot_images[dst_id]; Image& src_image = slot_images[src_id]; + bool is_resolve = src_image.info.num_samples != 1 && dst_image.info.num_samples == 1; + bool is_src_rescaled = True(src_image.flags & ImageFlagBits::Rescaled); bool is_dst_rescaled = True(dst_image.flags & ImageFlagBits::Rescaled); if (is_src_rescaled != is_dst_rescaled) { - if (ImageCanRescale(dst_image)) { - ScaleUp(dst_image); - is_dst_rescaled = True(dst_image.flags & ImageFlagBits::Rescaled); - } if (ImageCanRescale(src_image)) { ScaleUp(src_image); is_src_rescaled = True(src_image.flags & ImageFlagBits::Rescaled); + if (is_resolve) { + dst_image.info.rescaleable = true; + for (const auto& alias : dst_image.aliased_images) { + Image& other_image = slot_images[alias.id]; + other_image.info.rescaleable = true; + } + } + } + if (ImageCanRescale(dst_image)) { + ScaleUp(dst_image); + is_dst_rescaled = True(dst_image.flags & ImageFlagBits::Rescaled); } } -- cgit v1.2.3 From 581ea900627b398c2fa06b70facd5dcd8bbb7d68 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 29 Sep 2021 20:53:30 -0400 Subject: rescaling_pass: Fix IR errors when unscalable texture types are encountered --- src/shader_recompiler/ir_opt/rescaling_pass.cpp | 28 +++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'src') diff --git a/src/shader_recompiler/ir_opt/rescaling_pass.cpp b/src/shader_recompiler/ir_opt/rescaling_pass.cpp index 0d642dd0d..a5fa4ee83 100644 --- a/src/shader_recompiler/ir_opt/rescaling_pass.cpp +++ b/src/shader_recompiler/ir_opt/rescaling_pass.cpp @@ -14,6 +14,22 @@ namespace Shader::Optimization { namespace { +[[nodiscard]] bool IsTextureTypeRescalable(TextureType type) { + switch (type) { + case TextureType::Color2D: + case TextureType::ColorArray2D: + return true; + case TextureType::Color1D: + case TextureType::ColorArray1D: + case TextureType::Color3D: + case TextureType::ColorCube: + case TextureType::ColorArrayCube: + case TextureType::Buffer: + break; + } + return false; +} + void VisitMark(const IR::Inst& inst) { switch (inst.GetOpcode()) { case IR::Opcode::ShuffleIndex: @@ -179,6 +195,9 @@ void SubScaleCoord(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scaled) { void SubScaleImageFetch(IR::Block& block, IR::Inst& inst) { IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; const auto info{inst.Flags()}; + if (!IsTextureTypeRescalable(info.type)) { + return; + } const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))}; SubScaleCoord(ir, inst, is_scaled); // Scale ImageFetch offset @@ -188,6 +207,9 @@ void SubScaleImageFetch(IR::Block& block, IR::Inst& inst) { void SubScaleImageRead(IR::Block& block, IR::Inst& inst) { IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; const auto info{inst.Flags()}; + if (!IsTextureTypeRescalable(info.type)) { + return; + } const IR::U1 is_scaled{ir.IsImageScaled(ir.Imm32(info.descriptor_index))}; SubScaleCoord(ir, inst, is_scaled); } @@ -195,6 +217,9 @@ void SubScaleImageRead(IR::Block& block, IR::Inst& inst) { void PatchImageFetch(IR::Block& block, IR::Inst& inst) { IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; const auto info{inst.Flags()}; + if (!IsTextureTypeRescalable(info.type)) { + return; + } const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))}; ScaleIntegerComposite(ir, inst, is_scaled, 1); // Scale ImageFetch offset @@ -204,6 +229,9 @@ void PatchImageFetch(IR::Block& block, IR::Inst& inst) { void PatchImageRead(IR::Block& block, IR::Inst& inst) { IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; const auto info{inst.Flags()}; + if (!IsTextureTypeRescalable(info.type)) { + return; + } const IR::U1 is_scaled{ir.IsImageScaled(ir.Imm32(info.descriptor_index))}; ScaleIntegerComposite(ir, inst, is_scaled, 1); } -- cgit v1.2.3 From e0a383085598afd43d2960e40ff60288d25c7d0e Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 29 Sep 2021 21:34:56 -0400 Subject: gl_texture_cache: Fix BGR pbo size for scaled textures --- src/video_core/renderer_opengl/gl_texture_cache.cpp | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 7ded7415d..fafee62ee 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -1264,25 +1264,24 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span copies) { static constexpr VideoCommon::Offset3D zero_offset{0, 0, 0}; - const u32 requested_pbo_size = - std::max(src_image.unswizzled_size_bytes, dst_image.unswizzled_size_bytes); - - if (bgr_pbo_size < requested_pbo_size) { - bgr_pbo.Create(); - bgr_pbo_size = requested_pbo_size; - glNamedBufferData(bgr_pbo.handle, bgr_pbo_size, nullptr, GL_STREAM_COPY); - } + const u32 img_bpp = BytesPerBlock(src_image.info.format); for (const ImageCopy& copy : copies) { ASSERT(copy.src_offset == zero_offset); ASSERT(copy.dst_offset == zero_offset); - + const u32 num_src_layers = static_cast(copy.src_subresource.num_layers); + const u32 copy_size = copy.extent.width * copy.extent.height * num_src_layers * img_bpp; + if (bgr_pbo_size < copy_size) { + bgr_pbo.Create(); + bgr_pbo_size = copy_size; + glNamedBufferData(bgr_pbo.handle, bgr_pbo_size, nullptr, GL_STREAM_COPY); + } // Copy from source to PBO glPixelStorei(GL_PACK_ALIGNMENT, 1); glPixelStorei(GL_PACK_ROW_LENGTH, copy.extent.width); glBindBuffer(GL_PIXEL_PACK_BUFFER, bgr_pbo.handle); glGetTextureSubImage(src_image.Handle(), 0, 0, 0, 0, copy.extent.width, copy.extent.height, - copy.src_subresource.num_layers, src_image.GlFormat(), - src_image.GlType(), static_cast(bgr_pbo_size), nullptr); + num_src_layers, src_image.GlFormat(), src_image.GlType(), + static_cast(bgr_pbo_size), nullptr); // Copy from PBO to destination in desired GL format glPixelStorei(GL_UNPACK_ALIGNMENT, 1); -- cgit v1.2.3 From 237a43004fb27a273495a0b44515cf7389dea553 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 3 Oct 2021 22:42:29 +0200 Subject: Texture Cache: Fix calculations when scaling. --- src/video_core/texture_cache/texture_cache.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'src') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index b60f840c1..691198853 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -858,6 +858,12 @@ bool TextureCache

::ScaleUp(Image& image) { if (!rescaled) { return false; } + const auto& add_to_size = Settings::values.resolution_info.up_factor - 1.0f; + const auto sign = std::signbit(add_to_size); + const u64 tentative_size = static_cast( + std::max(image.guest_size_bytes, image.unswizzled_size_bytes) * std::abs(add_to_size)); + const u64 fitted_size = Common::AlignUp(tentative_size, 1024); + total_used_memory += sign ? -fitted_size : fitted_size; InvalidateScale(image); return true; } @@ -868,6 +874,12 @@ bool TextureCache

::ScaleDown(Image& image) { if (!rescaled) { return false; } + const auto& add_to_size = Settings::values.resolution_info.up_factor - 1.0f; + const auto sign = std::signbit(add_to_size); + const u64 tentative_size = static_cast( + std::max(image.guest_size_bytes, image.unswizzled_size_bytes) * std::abs(add_to_size)); + const u64 fitted_size = Common::AlignUp(tentative_size, 1024); + total_used_memory += sign ? fitted_size : -fitted_size; InvalidateScale(image); return true; } -- cgit v1.2.3 From 88ef04dbaf26ab83ec85bfa3c68434c283c66e50 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 5 Oct 2021 00:07:51 -0400 Subject: texture_cache: Refactor scaled image size calculation --- src/video_core/texture_cache/texture_cache.h | 24 +++++++++++------------ src/video_core/texture_cache/texture_cache_base.h | 1 + 2 files changed, 13 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 691198853..b708e41b5 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -852,18 +852,23 @@ void TextureCache

::InvalidateScale(Image& image) { has_deleted_images = true; } +template +u64 TextureCache

::GetScaledImageSizeBytes(Image& image) { + const f32 add_to_size = Settings::values.resolution_info.up_factor - 1.0f; + const bool sign = std::signbit(add_to_size); + const u32 image_size_bytes = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); + const u64 tentative_size = static_cast(image_size_bytes * std::abs(add_to_size)); + const u64 fitted_size = Common::AlignUp(tentative_size, 1024); + return sign ? -fitted_size : fitted_size; +} + template bool TextureCache

::ScaleUp(Image& image) { const bool rescaled = image.ScaleUp(); if (!rescaled) { return false; } - const auto& add_to_size = Settings::values.resolution_info.up_factor - 1.0f; - const auto sign = std::signbit(add_to_size); - const u64 tentative_size = static_cast( - std::max(image.guest_size_bytes, image.unswizzled_size_bytes) * std::abs(add_to_size)); - const u64 fitted_size = Common::AlignUp(tentative_size, 1024); - total_used_memory += sign ? -fitted_size : fitted_size; + total_used_memory += GetScaledImageSizeBytes(image); InvalidateScale(image); return true; } @@ -874,12 +879,7 @@ bool TextureCache

::ScaleDown(Image& image) { if (!rescaled) { return false; } - const auto& add_to_size = Settings::values.resolution_info.up_factor - 1.0f; - const auto sign = std::signbit(add_to_size); - const u64 tentative_size = static_cast( - std::max(image.guest_size_bytes, image.unswizzled_size_bytes) * std::abs(add_to_size)); - const u64 fitted_size = Common::AlignUp(tentative_size, 1024); - total_used_memory += sign ? fitted_size : -fitted_size; + total_used_memory += GetScaledImageSizeBytes(image); InvalidateScale(image); return true; } diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 517a4c224..40e003b60 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -339,6 +339,7 @@ private: void InvalidateScale(Image& image); bool ScaleUp(Image& image); bool ScaleDown(Image& image); + u64 GetScaledImageSizeBytes(Image& image); Runtime& runtime; VideoCore::RasterizerInterface& rasterizer; -- cgit v1.2.3 From 31478c6c1b841b9a820742830b136775fafe270f Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 6 Oct 2021 01:18:00 -0400 Subject: video_core: Misc resolution scaling related refactoring --- src/common/settings.cpp | 2 +- src/video_core/renderer_opengl/gl_rasterizer.cpp | 4 +-- .../renderer_opengl/gl_texture_cache.cpp | 31 +++++++++------------- src/video_core/renderer_opengl/gl_texture_cache.h | 11 +++++--- src/video_core/renderer_vulkan/vk_state_tracker.h | 10 ++++--- .../renderer_vulkan/vk_texture_cache.cpp | 31 +++++++++++++--------- src/video_core/renderer_vulkan/vk_texture_cache.h | 7 +++-- src/video_core/texture_cache/texture_cache.h | 2 -- 8 files changed, 51 insertions(+), 47 deletions(-) (limited to 'src') diff --git a/src/common/settings.cpp b/src/common/settings.cpp index f0686a7c5..12fdb0f9b 100644 --- a/src/common/settings.cpp +++ b/src/common/settings.cpp @@ -107,7 +107,7 @@ float Volume() { } void UpdateRescalingInfo() { - auto setup = values.resolution_setup.GetValue(); + const auto setup = values.resolution_setup.GetValue(); auto& info = values.resolution_info; switch (setup) { case ResolutionSetup::Res1_2X: diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index d94f1e89f..bb24a0656 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -554,7 +554,7 @@ void RasterizerOpenGL::SyncViewport() { } glFrontFace(mode); } - if (dirty_viewport || flags[Dirty::ClipControl]) { + if (dirty_viewport || dirty_clip_control) { flags[Dirty::ClipControl] = false; bool flip_y = false; @@ -925,7 +925,7 @@ void RasterizerOpenGL::SyncScissorTest() { const auto& regs = maxwell3d.regs; const auto& resolution = Settings::values.resolution_info; - const auto scale_up = [&](u32 value) -> u32 { + const auto scale_up = [resolution](u32 value) -> u32 { if (value == 0) { return 0U; } diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index fafee62ee..c68a51ebb 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -405,7 +405,8 @@ ImageBufferMap::~ImageBufferMap() { TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& program_manager, StateTracker& state_tracker_) - : device{device_}, state_tracker{state_tracker_}, util_shaders(program_manager) { + : device{device_}, state_tracker{state_tracker_}, + util_shaders(program_manager), resolution{Settings::values.resolution_info} { static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D}; for (size_t i = 0; i < TARGETS.size(); ++i) { const GLenum target = TARGETS[i]; @@ -473,7 +474,6 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& set_view(Shader::TextureType::ColorArray2D, null_image_view_2d_array.handle); set_view(Shader::TextureType::ColorArrayCube, null_image_cube_array.handle); - resolution = Settings::values.resolution_info; if (resolution.active) { for (size_t i = 0; i < rescale_draw_fbos.size(); ++i) { rescale_draw_fbos[i].Create(); @@ -681,7 +681,7 @@ Image::Image(TextureCacheRuntime& runtime_, const VideoCommon::ImageInfo& info_, gl_type = tuple.type; } texture = MakeImage(info, gl_internal_format); - original_backup = texture.handle; + current_texture = texture.handle; if (runtime->device.HasDebuggingToolAttached()) { const std::string name = VideoCommon::Name(*this); glObjectLabel(ImageTarget(info) == GL_TEXTURE_BUFFER ? GL_BUFFER : GL_TEXTURE, @@ -726,10 +726,6 @@ void Image::UploadMemory(const ImageBufferMap& map, void Image::DownloadMemory(ImageBufferMap& map, std::span copies) { glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API - const bool is_rescaled = True(flags & ImageFlagBits::Rescaled); - if (is_rescaled) { - ScaleDown(); - } glBindBuffer(GL_PIXEL_PACK_BUFFER, map.buffer); glPixelStorei(GL_PACK_ALIGNMENT, 1); @@ -747,9 +743,6 @@ void Image::DownloadMemory(ImageBufferMap& map, } CopyImageToBuffer(copy, map.offset); } - if (is_rescaled) { - texture.handle = upscaled_backup.handle; - } } GLuint Image::StorageHandle() noexcept { @@ -775,11 +768,11 @@ GLuint Image::StorageHandle() noexcept { return store_view.handle; } store_view.Create(); - glTextureView(store_view.handle, ImageTarget(info), texture.handle, GL_RGBA8, 0, + glTextureView(store_view.handle, ImageTarget(info), current_texture, GL_RGBA8, 0, info.resources.levels, 0, info.resources.layers); return store_view.handle; default: - return texture.handle; + return current_texture; } } @@ -940,10 +933,10 @@ bool Image::Scale() { const u32 original_width = info.size.width; const u32 original_height = info.size.height; - auto dst_info = info; - dst_info.size.width = scaled_width; - dst_info.size.height = scaled_height; if (!upscaled_backup.handle) { + auto dst_info = info; + dst_info.size.width = scaled_width; + dst_info.size.height = scaled_height; upscaled_backup = MakeImage(dst_info, gl_internal_format); } const GLuint read_fbo = runtime->rescale_read_fbos[fbo_index].handle; @@ -955,14 +948,14 @@ bool Image::Scale() { const u32 dst_level_width = std::max(1u, scaled_width >> level); const u32 dst_level_height = std::max(1u, scaled_height >> level); - glNamedFramebufferTextureLayer(read_fbo, attachment, original_backup, level, layer); + glNamedFramebufferTextureLayer(read_fbo, attachment, texture.handle, level, layer); glNamedFramebufferTextureLayer(draw_fbo, attachment, upscaled_backup.handle, level, layer); glBlitNamedFramebuffer(read_fbo, draw_fbo, 0, 0, src_level_width, src_level_height, 0, 0, dst_level_width, dst_level_height, mask, filter); } } - texture.handle = upscaled_backup.handle; + current_texture = upscaled_backup.handle; return true; } @@ -993,7 +986,7 @@ bool Image::ScaleDown() { return false; } flags &= ~ImageFlagBits::Rescaled; - texture.handle = original_backup; + current_texture = texture.handle; return true; } @@ -1010,7 +1003,7 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI flat_range = info.range; set_object_label = device.HasDebuggingToolAttached(); is_render_target = info.IsRenderTarget(); - original_texture = image.texture.handle; + original_texture = image.Handle(); num_samples = image.info.num_samples; if (!is_render_target) { swizzle[0] = info.x_source; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 61f9b0259..cf7f37a16 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -9,13 +9,16 @@ #include -#include "common/settings.h" #include "shader_recompiler/shader_info.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/util_shaders.h" #include "video_core/texture_cache/image_view_base.h" #include "video_core/texture_cache/texture_cache_base.h" +namespace Settings { +struct ResolutionScalingInfo; +} + namespace OpenGL { class Device; @@ -155,7 +158,7 @@ private: std::array rescale_draw_fbos; std::array rescale_read_fbos; - Settings::ResolutionScalingInfo resolution; + const Settings::ResolutionScalingInfo& resolution; }; class Image : public VideoCommon::ImageBase { @@ -182,7 +185,7 @@ public: GLuint StorageHandle() noexcept; GLuint Handle() const noexcept { - return texture.handle; + return current_texture; } GLuint GlFormat() const noexcept { @@ -211,7 +214,7 @@ private: GLenum gl_format = GL_NONE; GLenum gl_type = GL_NONE; TextureCacheRuntime* runtime{}; - GLuint original_backup{}; + GLuint current_texture{}; }; class ImageView : public VideoCommon::ImageViewBase { diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h index ac2bbebe0..40a149832 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.h +++ b/src/video_core/renderer_vulkan/vk_state_tracker.h @@ -71,13 +71,15 @@ public: } bool TouchViewports() { - return Exchange(Dirty::Viewports, false) || - Exchange(VideoCommon::Dirty::RescaleViewports, false); + const bool dirty_viewports = Exchange(Dirty::Viewports, false); + const bool rescale_viewports = Exchange(VideoCommon::Dirty::RescaleViewports, false); + return dirty_viewports || rescale_viewports; } bool TouchScissors() { - return Exchange(Dirty::Scissors, false) || - Exchange(VideoCommon::Dirty::RescaleScissors, false); + const bool dirty_scissors = Exchange(Dirty::Scissors, false); + const bool rescale_scissors = Exchange(VideoCommon::Dirty::RescaleScissors, false); + return dirty_scissors || rescale_scissors; } bool TouchDepthBias() { diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 4f0bab274..930c7d569 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -125,8 +125,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array& color) { } } -[[nodiscard]] VkImageCreateInfo MakeImageCreateInfo(const Device& device, const ImageInfo& info, - u32 up, u32 down) { +[[nodiscard]] VkImageCreateInfo MakeImageCreateInfo(const Device& device, const ImageInfo& info) { const PixelFormat format = StorageFormat(info.format); const auto format_info = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, false, format); VkImageCreateFlags flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT; @@ -137,9 +136,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array& color) { if (info.type == ImageType::e3D) { flags |= VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT; } - const auto scale_up = [&](u32 value) { return std::max((value * up) >> down, 1U); }; const auto [samples_x, samples_y] = VideoCommon::SamplesLog2(info.num_samples); - const bool is_2d = info.type == ImageType::e2D; return VkImageCreateInfo{ .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, .pNext = nullptr, @@ -147,8 +144,8 @@ constexpr VkBorderColor ConvertBorderColor(const std::array& color) { .imageType = ConvertImageType(info.type), .format = format_info.format, .extent{ - .width = scale_up(info.size.width) >> samples_x, - .height = (is_2d ? scale_up(info.size.height) : info.size.height) >> samples_y, + .width = info.size.width >> samples_x, + .height = info.size.height >> samples_y, .depth = info.size.depth, }, .mipLevels = static_cast(info.resources.levels), @@ -163,12 +160,11 @@ constexpr VkBorderColor ConvertBorderColor(const std::array& color) { }; } -[[nodiscard]] vk::Image MakeImage(const Device& device, const ImageInfo& info, u32 up = 1, - u32 down = 0) { +[[nodiscard]] vk::Image MakeImage(const Device& device, const ImageInfo& info) { if (info.type == ImageType::Buffer) { return vk::Image{}; } - return device.GetLogical().CreateImage(MakeImageCreateInfo(device, info, up, down)); + return device.GetLogical().CreateImage(MakeImageCreateInfo(device, info)); } [[nodiscard]] VkImageAspectFlags ImageAspectMask(PixelFormat format) { @@ -860,10 +856,9 @@ void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, nullptr, nullptr, read_barriers); if (is_resolve) { - VkImageResolve resolve_info = - MakeImageResolve(dst_region, src_region, dst_layers, src_layers); cmdbuf.ResolveImage(src_image, VK_IMAGE_LAYOUT_GENERAL, dst_image, - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, resolve_info); + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + MakeImageResolve(dst_region, src_region, dst_layers, src_layers)); } else { const bool is_linear = filter == Fermi2D::Filter::Bilinear; const VkFilter vk_filter = is_linear ? VK_FILTER_LINEAR : VK_FILTER_NEAREST; @@ -1143,7 +1138,17 @@ bool Image::ScaleUp() { } const auto& device = runtime->device; if (!scaled_image) { - scaled_image = MakeImage(device, info, resolution.up_scale, resolution.down_shift); + const u32 up = resolution.up_scale; + const u32 down = resolution.down_shift; + const auto scale = [&](u32 value) { return std::max((value * up) >> down, 1U); }; + + const bool is_2d = info.type == ImageType::e2D; + const u32 scaled_width = scale(info.size.width); + const u32 scaled_height = is_2d ? scale(info.size.height) : info.size.height; + auto scaled_info = info; + scaled_info.size.width = scaled_width; + scaled_info.size.height = scaled_height; + scaled_image = MakeImage(device, scaled_info); auto& allocator = runtime->memory_allocator; scaled_commit = MemoryCommit(allocator.Commit(scaled_image, MemoryUsage::DeviceLocal)); } diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index e5060e3f1..5381343e9 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -6,7 +6,6 @@ #include -#include "common/settings.h" #include "shader_recompiler/shader_info.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/texture_cache/image_view_base.h" @@ -14,6 +13,10 @@ #include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_wrapper.h" +namespace Settings { +struct ResolutionScalingInfo; +} + namespace Vulkan { using VideoCommon::ImageId; @@ -86,7 +89,7 @@ public: BlitImageHelper& blit_image_helper; ASTCDecoderPass& astc_decoder_pass; RenderPassCache& render_pass_cache; - Settings::ResolutionScalingInfo resolution; + const Settings::ResolutionScalingInfo& resolution; }; class Image : public VideoCommon::ImageBase { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index b708e41b5..630c73005 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -1726,9 +1726,7 @@ void TextureCache

::CopyImage(ImageId dst_id, ImageId src_id, std::vector Date: Wed, 6 Oct 2021 02:02:05 -0400 Subject: vk_texture_cache: Fix early returns on unsupported scales --- .../renderer_vulkan/vk_texture_cache.cpp | 28 ++++++++-------------- src/video_core/texture_cache/texture_cache.h | 2 +- 2 files changed, 11 insertions(+), 19 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 930c7d569..1ab2b1fe9 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -1130,13 +1130,19 @@ bool Image::ScaleUp() { return false; } ASSERT(info.type != ImageType::Linear); - flags |= ImageFlagBits::Rescaled; - const auto& resolution = runtime->resolution; if (!resolution.active) { - return true; + return false; } const auto& device = runtime->device; + const PixelFormat format = StorageFormat(info.format); + const auto format_info = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, false, format); + const auto similar = device.GetSupportedFormat( + format_info.format, (VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT), + FormatType::Optimal); + if (similar != format_info.format) { + return true; + } if (!scaled_image) { const u32 up = resolution.up_scale; const u32 down = resolution.down_shift; @@ -1155,23 +1161,9 @@ bool Image::ScaleUp() { if (aspect_mask == 0) { aspect_mask = ImageAspectMask(info.format); } - if (info.num_samples > 1) { - return true; - } - const PixelFormat format = StorageFormat(info.format); - const auto format_info = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, false, format); - const auto similar = device.GetSupportedFormat( - format_info.format, (VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT), - FormatType::Optimal); - - if (similar != format_info.format) { - return true; - } - if (aspect_mask == 0) { - aspect_mask = ImageAspectMask(info.format); - } BlitScale(*scheduler, *original_image, *scaled_image, info, aspect_mask, resolution, true); current_image = *scaled_image; + flags |= ImageFlagBits::Rescaled; return true; } diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 630c73005..de522cc43 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -857,7 +857,7 @@ u64 TextureCache

::GetScaledImageSizeBytes(Image& image) { const f32 add_to_size = Settings::values.resolution_info.up_factor - 1.0f; const bool sign = std::signbit(add_to_size); const u32 image_size_bytes = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); - const u64 tentative_size = static_cast(image_size_bytes * std::abs(add_to_size)); + const u64 tentative_size = image_size_bytes * static_cast(std::abs(add_to_size)); const u64 fitted_size = Common::AlignUp(tentative_size, 1024); return sign ? -fitted_size : fitted_size; } -- cgit v1.2.3 From 89a7e566c7a101d688e96641cc2a485f2da54d4b Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Thu, 7 Oct 2021 02:15:16 -0400 Subject: vk_texture_cache: Fix unsupported blit format error checking --- src/video_core/renderer_vulkan/vk_texture_cache.cpp | 10 +++++----- src/video_core/vulkan_common/vulkan_device.h | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 1ab2b1fe9..65506f75e 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -1137,11 +1137,11 @@ bool Image::ScaleUp() { const auto& device = runtime->device; const PixelFormat format = StorageFormat(info.format); const auto format_info = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, false, format); - const auto similar = device.GetSupportedFormat( - format_info.format, (VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT), - FormatType::Optimal); - if (similar != format_info.format) { - return true; + const auto blit_usage = VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT; + if (!device.IsFormatSupported(format_info.format, blit_usage, FormatType::Optimal)) { + LOG_ERROR(Render_Vulkan, "Device does not support scaling format {}", format); + // TODO: Use helper blits where applicable + return false; } if (!scaled_image) { const u32 up = resolution.up_scale; diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 2d5daf6cd..10653ac6b 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -40,6 +40,10 @@ public: VkFormat GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, FormatType format_type) const; + /// Returns true if a format is supported. + bool IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, + FormatType format_type) const; + /// Reports a device loss. void ReportLoss() const; @@ -370,10 +374,6 @@ private: /// Returns true if the device natively supports blitting depth stencil images. bool TestDepthStencilBlits() const; - /// Returns true if a format is supported. - bool IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, - FormatType format_type) const; - VkInstance instance; ///< Vulkan instance. vk::DeviceDispatch dld; ///< Device function pointers. vk::PhysicalDevice physical; ///< Physical device. -- cgit v1.2.3 From 3233fa5dc8780975497dc8ce70d10d0186e50b62 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Thu, 7 Oct 2021 23:55:40 -0400 Subject: gl_texture_cache: Disable scissor test when scaling textures Fixes a bug on BOTW where some objects were no longer being rendered after blitting --- src/video_core/renderer_opengl/gl_texture_cache.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'src') diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index c68a51ebb..3dfd13d6a 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -939,6 +939,11 @@ bool Image::Scale() { dst_info.size.height = scaled_height; upscaled_backup = MakeImage(dst_info, gl_internal_format); } + // TODO (ameerj): Investigate other GL states that affect blitting. + GLboolean scissor_test; + glGetBooleani_v(GL_SCISSOR_TEST, 0, &scissor_test); + glDisablei(GL_SCISSOR_TEST, 0); + const GLuint read_fbo = runtime->rescale_read_fbos[fbo_index].handle; const GLuint draw_fbo = runtime->rescale_draw_fbos[fbo_index].handle; for (s32 layer = 0; layer < info.resources.layers; ++layer) { @@ -955,6 +960,9 @@ bool Image::Scale() { 0, dst_level_width, dst_level_height, mask, filter); } } + if (scissor_test != GL_FALSE) { + glEnablei(GL_SCISSOR_TEST, 0); + } current_texture = upscaled_backup.handle; return true; } -- cgit v1.2.3 From 1c93476a803f8cb93de341a43a013d5cc302b05b Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Fri, 8 Oct 2021 20:47:14 -0400 Subject: video_core,yuzu: Move UpdateRescalingInfo call to video_core This only needs to happen once per game boot, so we can just call it during CreateGPU and be done with it, avoiding the need to call it in the frontends. --- src/video_core/video_core.cpp | 2 ++ src/yuzu/configuration/config.cpp | 4 ---- src/yuzu/configuration/configure_graphics.cpp | 1 - 3 files changed, 2 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index 508173db3..e852c817e 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp @@ -37,6 +37,8 @@ std::unique_ptr CreateRenderer( namespace VideoCore { std::unique_ptr CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) { + Settings::UpdateRescalingInfo(); + const auto nvdec_value = Settings::values.nvdec_emulation.GetValue(); const bool use_nvdec = nvdec_value != Settings::NvdecEmulation::Off; const bool use_async = Settings::values.use_asynchronous_gpu_emulation.GetValue(); diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index 3803bf501..4c296a94d 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -850,8 +850,6 @@ void Config::ReadRendererValues() { ReadBasicSetting(Settings::values.disable_shader_loop_safety_checks); } - Settings::UpdateRescalingInfo(); - qt_config->endGroup(); } @@ -1409,8 +1407,6 @@ void Config::SaveRendererValues() { WriteBasicSetting(Settings::values.disable_shader_loop_safety_checks); } - Settings::UpdateRescalingInfo(); - qt_config->endGroup(); } diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp index e01efaeda..02498fad7 100644 --- a/src/yuzu/configuration/configure_graphics.cpp +++ b/src/yuzu/configuration/configure_graphics.cpp @@ -247,7 +247,6 @@ void ConfigureGraphics::ApplyConfiguration() { Settings::values.bg_blue.SetValue(static_cast(bg_color.blue())); } } - Settings::UpdateRescalingInfo(); } void ConfigureGraphics::changeEvent(QEvent* event) { -- cgit v1.2.3 From 49c0c7efd2485e01cd014928f7987533ce62509b Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Fri, 8 Oct 2021 20:48:10 -0400 Subject: yuzu_cmd: Read resolution_setup and scaling_filter from config Also adds descriptions and the settings to the default config. --- src/yuzu_cmd/config.cpp | 2 ++ src/yuzu_cmd/default_ini.h | 23 +++++++++++++++++++++++ 2 files changed, 25 insertions(+) (limited to 'src') diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index 0b8fde691..3c888d84e 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp @@ -451,6 +451,8 @@ void Config::ReadValues() { ReadSetting("Renderer", Settings::values.disable_shader_loop_safety_checks); ReadSetting("Renderer", Settings::values.vulkan_device); + ReadSetting("Renderer", Settings::values.resolution_setup); + ReadSetting("Renderer", Settings::values.scaling_filter); ReadSetting("Renderer", Settings::values.fullscreen_mode); ReadSetting("Renderer", Settings::values.aspect_ratio); ReadSetting("Renderer", Settings::values.max_anisotropy); diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index 339dca766..ecdc271a8 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h @@ -236,6 +236,29 @@ disable_shader_loop_safety_checks = # Which Vulkan physical device to use (defaults to 0) vulkan_device = +# 0: 0.5x (360p/540p) [EXPERIMENTAL] +# 1: 0.75x (540p/810p) [EXPERIMENTAL] +# 2 (default): 1x (720p/1080p) +# 3: 2x (1440p/2160p) +# 4: 3x (2160p/3240p) +# 5: 4x (2880p/4320p) +# 6: 5x (3600p/5400p) +# 7: 6x (4320p/6480p) +resolution_setup = + +# Pixel filter to use when up- or down-sampling rendered frames. +# 0: Nearest Neighbor +# 1 (default): Bilinear +# 2: Bicubic +# 3: Gaussian +# 4: ScaleForce +# 5: AMD FidelityFX™️ Super Resolution [Vulkan Only] +scaling_filter = + +# Anti-Aliasing (AA) +# 0 (default): None, 1: FXAA +anti_aliasing = + # Whether to use fullscreen or borderless window mode # 0 (Windows default): Borderless window, 1 (All other default): Exclusive fullscreen fullscreen_mode = -- cgit v1.2.3 From b14f2c7c826b8bbea02c1f2674ab024a5ae0695e Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 11 Oct 2021 23:55:53 -0400 Subject: texture_cache: Fix image resolves when src/dst are not both scaled --- src/video_core/texture_cache/texture_cache.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index de522cc43..38895c2e9 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -475,12 +475,10 @@ void TextureCache

::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, Image& dst_image = slot_images[dst_id]; Image& src_image = slot_images[src_id]; - - bool is_resolve = src_image.info.num_samples != 1 && dst_image.info.num_samples == 1; - bool is_src_rescaled = True(src_image.flags & ImageFlagBits::Rescaled); bool is_dst_rescaled = True(dst_image.flags & ImageFlagBits::Rescaled); + const bool is_resolve = src_image.info.num_samples != 1 && dst_image.info.num_samples == 1; if (is_src_rescaled != is_dst_rescaled) { if (ImageCanRescale(src_image)) { ScaleUp(src_image); @@ -498,7 +496,13 @@ void TextureCache

::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, is_dst_rescaled = True(dst_image.flags & ImageFlagBits::Rescaled); } } - + if (is_resolve && (is_src_rescaled != is_dst_rescaled)) { + // A resolve requires both images to be the same dimensions. Resize down if needed. + ScaleDown(src_image); + ScaleDown(dst_image); + is_src_rescaled = True(src_image.flags & ImageFlagBits::Rescaled); + is_dst_rescaled = True(dst_image.flags & ImageFlagBits::Rescaled); + } const auto& resolution = Settings::values.resolution_info; const auto scale_up = [&](u32 value) -> u32 { if (value == 0) { @@ -506,7 +510,6 @@ void TextureCache

::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, } return std::max((value * resolution.up_scale) >> resolution.down_shift, 1U); }; - const auto scale_region = [&](Region2D& region) { region.start.x = scale_up(region.start.x); region.start.y = scale_up(region.start.y); -- cgit v1.2.3 From abd07e41582b6d8f7efdedb936cdd7a7fddf9912 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 12 Oct 2021 00:35:01 -0400 Subject: video_core: Refactor resolution scale function --- src/common/settings.h | 14 ++++++++ .../renderer_opengl/gl_texture_cache.cpp | 8 ++--- .../renderer_vulkan/vk_texture_cache.cpp | 19 ++++------- src/video_core/texture_cache/texture_cache.h | 39 +++++++--------------- 4 files changed, 34 insertions(+), 46 deletions(-) (limited to 'src') diff --git a/src/common/settings.h b/src/common/settings.h index f629c7c56..09f7cdd84 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -72,6 +72,20 @@ struct ResolutionScalingInfo { f32 up_factor{1.0f}; f32 down_factor{1.0f}; bool active{}; + + s32 ScaleUp(s32 value) const { + if (value == 0) { + return 0; + } + return std::max((value * static_cast(up_scale)) >> static_cast(down_shift), 1); + } + + u32 ScaleUp(u32 value) const { + if (value == 0U) { + return 0U; + } + return std::max((value * up_scale) >> down_shift, 1U); + } }; /** The BasicSetting class is a simple resource manager. It defines a label and default value diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 3dfd13d6a..ec1afd31a 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -924,12 +924,8 @@ bool Image::Scale() { const GLenum filter = linear_color_format ? GL_LINEAR : GL_NEAREST; const auto& resolution = runtime->resolution; - const u32 up = resolution.up_scale; - const u32 down = resolution.down_shift; - const auto scale = [&](u32 value) { return std::max((value * up) >> down, 1U); }; - - const u32 scaled_width = scale(info.size.width); - const u32 scaled_height = is_2d ? scale(info.size.height) : info.size.height; + const u32 scaled_width = resolution.ScaleUp(info.size.width); + const u32 scaled_height = is_2d ? resolution.ScaleUp(info.size.height) : info.size.height; const u32 original_width = info.size.width; const u32 original_height = info.size.height; diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 65506f75e..caefce5fc 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -607,16 +607,13 @@ void BlitScale(VKScheduler& scheduler, VkImage src_image, VkImage dst_image, con scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([dst_image, src_image, extent, resources, aspect_mask, resolution, type, scaling, vk_filter](vk::CommandBuffer cmdbuf) { - const auto scale_up = [&](u32 value) { - return std::max((value * resolution.up_scale) >> resolution.down_shift, 1U); - }; const VkOffset2D src_size{ - .x = static_cast(scaling ? extent.width : scale_up(extent.width)), - .y = static_cast(scaling ? extent.height : scale_up(extent.height)), + .x = static_cast(scaling ? extent.width : resolution.ScaleUp(extent.width)), + .y = static_cast(scaling ? extent.height : resolution.ScaleUp(extent.height)), }; const VkOffset2D dst_size{ - .x = static_cast(scaling ? scale_up(extent.width) : extent.width), - .y = static_cast(scaling ? scale_up(extent.height) : extent.height), + .x = static_cast(scaling ? resolution.ScaleUp(extent.width) : extent.width), + .y = static_cast(scaling ? resolution.ScaleUp(extent.height) : extent.height), }; boost::container::small_vector regions; regions.reserve(resources.levels); @@ -1144,13 +1141,9 @@ bool Image::ScaleUp() { return false; } if (!scaled_image) { - const u32 up = resolution.up_scale; - const u32 down = resolution.down_shift; - const auto scale = [&](u32 value) { return std::max((value * up) >> down, 1U); }; - const bool is_2d = info.type == ImageType::e2D; - const u32 scaled_width = scale(info.size.width); - const u32 scaled_height = is_2d ? scale(info.size.height) : info.size.height; + const u32 scaled_width = resolution.ScaleUp(info.size.width); + const u32 scaled_height = is_2d ? resolution.ScaleUp(info.size.height) : info.size.height; auto scaled_info = info; scaled_info.size.width = scaled_width; scaled_info.size.height = scaled_height; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 38895c2e9..c77332b46 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -504,17 +504,11 @@ void TextureCache

::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, is_dst_rescaled = True(dst_image.flags & ImageFlagBits::Rescaled); } const auto& resolution = Settings::values.resolution_info; - const auto scale_up = [&](u32 value) -> u32 { - if (value == 0) { - return 0U; - } - return std::max((value * resolution.up_scale) >> resolution.down_shift, 1U); - }; const auto scale_region = [&](Region2D& region) { - region.start.x = scale_up(region.start.x); - region.start.y = scale_up(region.start.y); - region.end.x = scale_up(region.end.x); - region.end.y = scale_up(region.end.y); + region.start.x = resolution.ScaleUp(region.start.x); + region.start.y = resolution.ScaleUp(region.start.y); + region.end.x = resolution.ScaleUp(region.end.x); + region.end.y = resolution.ScaleUp(region.end.y); }; // TODO: Deduplicate @@ -1721,20 +1715,14 @@ void TextureCache

::CopyImage(ImageId dst_id, ImageId src_id, std::vector u32 { - if (value == 0) { - return 0U; - } - return std::max((value * resolution.up_scale) >> resolution.down_shift, 1U); - }; for (auto& copy : copies) { - copy.src_offset.x = scale_up(copy.src_offset.x); - copy.dst_offset.x = scale_up(copy.dst_offset.x); - copy.extent.width = scale_up(copy.extent.width); + copy.src_offset.x = resolution.ScaleUp(copy.src_offset.x); + copy.dst_offset.x = resolution.ScaleUp(copy.dst_offset.x); + copy.extent.width = resolution.ScaleUp(copy.extent.width); if (both_2d) { - copy.src_offset.y = scale_up(copy.src_offset.y); - copy.dst_offset.y = scale_up(copy.dst_offset.y); - copy.extent.height = scale_up(copy.extent.height); + copy.src_offset.y = resolution.ScaleUp(copy.src_offset.y); + copy.dst_offset.y = resolution.ScaleUp(copy.dst_offset.y); + copy.extent.height = resolution.ScaleUp(copy.extent.height); } } } @@ -1812,12 +1800,9 @@ std::pair TextureCache

::RenderTargetFromImage( Extent3D extent = MipSize(image.info.size, view_info.range.base.level); if (is_rescaled) { const auto& resolution = Settings::values.resolution_info; - const auto scale_up = [&](u32 value) { - return std::max((value * resolution.up_scale) >> resolution.down_shift, 1U); - }; - extent.width = scale_up(extent.width); + extent.width = resolution.ScaleUp(extent.width); if (image.info.type == ImageType::e2D) { - extent.height = scale_up(extent.height); + extent.height = resolution.ScaleUp(extent.height); } } const u32 num_samples = image.info.num_samples; -- cgit v1.2.3 From b1ae935f114e1011b19d4ada352c401e6655279a Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 12 Oct 2021 00:54:28 -0400 Subject: vk_texture_cache: Fix BlitScale of non-2D images --- src/video_core/renderer_vulkan/vk_texture_cache.cpp | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index caefce5fc..ccfdf64ea 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -592,9 +592,8 @@ struct RangedBarrierRange { } void BlitScale(VKScheduler& scheduler, VkImage src_image, VkImage dst_image, const ImageInfo& info, - VkImageAspectFlags aspect_mask, const Settings::ResolutionScalingInfo& resolution, - bool scaling) { - const auto type = info.type; + VkImageAspectFlags aspect_mask, const Settings::ResolutionScalingInfo& resolution) { + const bool is_2d = info.type == ImageType::e2D; const auto resources = info.resources; const VkExtent2D extent{ .width = info.size.width, @@ -605,15 +604,15 @@ void BlitScale(VKScheduler& scheduler, VkImage src_image, VkImage dst_image, con const VkFilter vk_filter = (is_zeta || is_int_format) ? VK_FILTER_NEAREST : VK_FILTER_LINEAR; scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([dst_image, src_image, extent, resources, aspect_mask, resolution, type, - scaling, vk_filter](vk::CommandBuffer cmdbuf) { + scheduler.Record([dst_image, src_image, extent, resources, aspect_mask, resolution, is_2d, + vk_filter](vk::CommandBuffer cmdbuf) { const VkOffset2D src_size{ - .x = static_cast(scaling ? extent.width : resolution.ScaleUp(extent.width)), - .y = static_cast(scaling ? extent.height : resolution.ScaleUp(extent.height)), + .x = static_cast(extent.width), + .y = static_cast(extent.height), }; const VkOffset2D dst_size{ - .x = static_cast(scaling ? resolution.ScaleUp(extent.width) : extent.width), - .y = static_cast(scaling ? resolution.ScaleUp(extent.height) : extent.height), + .x = static_cast(resolution.ScaleUp(extent.width)), + .y = static_cast(is_2d ? resolution.ScaleUp(extent.height) : extent.height), }; boost::container::small_vector regions; regions.reserve(resources.levels); @@ -1154,7 +1153,7 @@ bool Image::ScaleUp() { if (aspect_mask == 0) { aspect_mask = ImageAspectMask(info.format); } - BlitScale(*scheduler, *original_image, *scaled_image, info, aspect_mask, resolution, true); + BlitScale(*scheduler, *original_image, *scaled_image, info, aspect_mask, resolution); current_image = *scaled_image; flags |= ImageFlagBits::Rescaled; return true; -- cgit v1.2.3 From 4de584005fe8ae00608f8c3267a78e7cf0eb52aa Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 12 Oct 2021 01:45:54 -0400 Subject: texture_cache: Fix infinitely recursive ImageCanRescale check --- src/video_core/texture_cache/image_base.cpp | 2 ++ src/video_core/texture_cache/image_base.h | 5 +++-- src/video_core/texture_cache/texture_cache.h | 16 ++++++++-------- 3 files changed, 13 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp index 25a211df8..1909c9ecb 100644 --- a/src/video_core/texture_cache/image_base.cpp +++ b/src/video_core/texture_cache/image_base.cpp @@ -256,6 +256,8 @@ void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_i } lhs.aliased_images.push_back(std::move(lhs_alias)); rhs.aliased_images.push_back(std::move(rhs_alias)); + lhs.flags &= ~ImageFlagBits::IsRescalable; + rhs.flags &= ~ImageFlagBits::IsRescalable; } } // namespace VideoCommon diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index 9c34687e0..bab290ac7 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h @@ -36,8 +36,9 @@ enum class ImageFlagBits : u32 { // Rescaler Rescaled = 1 << 12, - RescaleChecked = 1 << 13, - Blacklisted = 1 << 14, + CheckingRescalable = 1 << 13, + IsRescalable = 1 << 14, + Blacklisted = 1 << 15, }; DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index c77332b46..c1fb12679 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -795,25 +795,25 @@ bool TextureCache

::BlackListImage(ImageId image_id) { template bool TextureCache

::ImageCanRescale(ImageBase& image) { - if (True(image.flags & ImageFlagBits::Blacklisted)) { + if (!image.info.rescaleable || True(image.flags & ImageFlagBits::Blacklisted)) { return false; } - if (True(image.flags & (ImageFlagBits::Rescaled | ImageFlagBits::RescaleChecked))) { + if (True(image.flags & (ImageFlagBits::Rescaled | ImageFlagBits::CheckingRescalable))) { return true; } - if (!image.info.rescaleable) { - image.flags &= ~ImageFlagBits::RescaleChecked; - return false; + if (True(image.flags & ImageFlagBits::IsRescalable)) { + return true; } - image.flags |= ImageFlagBits::RescaleChecked; + image.flags |= ImageFlagBits::CheckingRescalable; for (const auto& alias : image.aliased_images) { Image& other_image = slot_images[alias.id]; if (!ImageCanRescale(other_image)) { - image.flags &= ~ImageFlagBits::RescaleChecked; + image.flags &= ~ImageFlagBits::CheckingRescalable; return false; } } - image.flags &= ~ImageFlagBits::RescaleChecked; + image.flags &= ~ImageFlagBits::CheckingRescalable; + image.flags |= ImageFlagBits::IsRescalable; return true; } -- cgit v1.2.3 From ebf36f23dd781c06fd100de10cc2ec25d4bec215 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 13 Oct 2021 23:53:59 -0400 Subject: vk_texture_cache: Use 3D to scale images when blit is unsupported --- src/video_core/renderer_vulkan/blit_image.cpp | 3 +- src/video_core/renderer_vulkan/blit_image.h | 2 +- .../renderer_vulkan/vk_texture_cache.cpp | 100 +++++++++++++++------ src/video_core/renderer_vulkan/vk_texture_cache.h | 11 ++- 4 files changed, 87 insertions(+), 29 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index 6c1b2f063..b97aac550 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -363,7 +363,7 @@ BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_, BlitImageHelper::~BlitImageHelper() = default; -void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, +void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, VkImageView src_view, const Region2D& dst_region, const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter, Tegra::Engines::Fermi2D::Operation operation) { @@ -373,7 +373,6 @@ void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, const ImageV .operation = operation, }; const VkPipelineLayout layout = *one_texture_pipeline_layout; - const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D); const VkSampler sampler = is_linear ? *linear_sampler : *nearest_sampler; const VkPipeline pipeline = FindOrEmplacePipeline(key); scheduler.RequestRenderpass(dst_framebuffer); diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h index 33ee095c1..e11f8c214 100644 --- a/src/video_core/renderer_vulkan/blit_image.h +++ b/src/video_core/renderer_vulkan/blit_image.h @@ -34,7 +34,7 @@ public: StateTracker& state_tracker, DescriptorPool& descriptor_pool); ~BlitImageHelper(); - void BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, + void BlitColor(const Framebuffer* dst_framebuffer, VkImageView src_image_view, const Region2D& dst_region, const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter, Tegra::Engines::Fermi2D::Operation operation); diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index ccfdf64ea..9b90c7d9b 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -762,8 +762,8 @@ void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst return; } if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT && !is_src_msaa && !is_dst_msaa) { - blit_image_helper.BlitColor(dst_framebuffer, src, dst_region, src_region, filter, - operation); + blit_image_helper.BlitColor(dst_framebuffer, src.Handle(Shader::TextureType::Color2D), + dst_region, src_region, filter, operation); return; } if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { @@ -1131,18 +1131,10 @@ bool Image::ScaleUp() { return false; } const auto& device = runtime->device; - const PixelFormat format = StorageFormat(info.format); - const auto format_info = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, false, format); - const auto blit_usage = VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT; - if (!device.IsFormatSupported(format_info.format, blit_usage, FormatType::Optimal)) { - LOG_ERROR(Render_Vulkan, "Device does not support scaling format {}", format); - // TODO: Use helper blits where applicable - return false; - } + const bool is_2d = info.type == ImageType::e2D; + const u32 scaled_width = resolution.ScaleUp(info.size.width); + const u32 scaled_height = is_2d ? resolution.ScaleUp(info.size.height) : info.size.height; if (!scaled_image) { - const bool is_2d = info.type == ImageType::e2D; - const u32 scaled_width = resolution.ScaleUp(info.size.width); - const u32 scaled_height = is_2d ? resolution.ScaleUp(info.size.height) : info.size.height; auto scaled_info = info; scaled_info.size.width = scaled_width; scaled_info.size.height = scaled_height; @@ -1150,11 +1142,56 @@ bool Image::ScaleUp() { auto& allocator = runtime->memory_allocator; scaled_commit = MemoryCommit(allocator.Commit(scaled_image, MemoryUsage::DeviceLocal)); } + current_image = *scaled_image; + if (aspect_mask == 0) { aspect_mask = ImageAspectMask(info.format); } - BlitScale(*scheduler, *original_image, *scaled_image, info, aspect_mask, resolution); - current_image = *scaled_image; + static constexpr auto OPTIMAL_FORMAT = FormatType::Optimal; + const PixelFormat format = StorageFormat(info.format); + const auto vk_format = MaxwellToVK::SurfaceFormat(device, OPTIMAL_FORMAT, false, format).format; + const auto blit_usage = VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT; + if (device.IsFormatSupported(vk_format, blit_usage, OPTIMAL_FORMAT)) { + BlitScale(*scheduler, *original_image, *scaled_image, info, aspect_mask, resolution); + } else { + using namespace VideoCommon; + static constexpr auto BLIT_OPERATION = Tegra::Engines::Fermi2D::Operation::SrcCopy; + + const auto view_info = ImageViewInfo(ImageViewType::e2D, info.format); + scale_view = std::make_unique(*runtime, view_info, NULL_IMAGE_ID, *this); + auto* view_ptr = scale_view.get(); + + const Region2D src_region{ + .start = {0, 0}, + .end = {static_cast(info.size.width), static_cast(info.size.height)}, + }; + const Region2D dst_region{ + .start = {0, 0}, + .end = {static_cast(scaled_width), static_cast(scaled_height)}, + }; + const VkExtent2D extent{ + .width = scaled_width, + .height = scaled_height, + }; + if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT) { + scale_framebuffer = std::make_unique(*runtime, view_ptr, nullptr, extent); + const auto color_view = scale_view->Handle(Shader::TextureType::Color2D); + + runtime->blit_image_helper.BlitColor( + scale_framebuffer.get(), color_view, dst_region, src_region, + Tegra::Engines::Fermi2D::Filter::Bilinear, BLIT_OPERATION); + } else if (!runtime->device.IsBlitDepthStencilSupported() && + aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { + scale_framebuffer = std::make_unique(*runtime, nullptr, view_ptr, extent); + runtime->blit_image_helper.BlitDepthStencil( + scale_framebuffer.get(), scale_view->DepthView(), scale_view->StencilView(), + dst_region, src_region, Tegra::Engines::Fermi2D::Filter::Point, BLIT_OPERATION); + } else { + // TODO: Use helper blits where applicable + LOG_ERROR(Render_Vulkan, "Device does not support scaling format {}", format); + return false; + } + } flags |= ImageFlagBits::Rescaled; return true; } @@ -1370,7 +1407,27 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& t } Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span color_buffers, - ImageView* depth_buffer, const VideoCommon::RenderTargets& key) { + ImageView* depth_buffer, const VideoCommon::RenderTargets& key) + : render_area{VkExtent2D{ + .width = key.size.width, + .height = key.size.height, + }} { + CreateFramebuffer(runtime, color_buffers, depth_buffer); + if (runtime.device.HasDebuggingToolAttached()) { + framebuffer.SetObjectNameEXT(VideoCommon::Name(key).c_str()); + } +} + +Framebuffer::Framebuffer(TextureCacheRuntime& runtime, ImageView* color_buffer, + ImageView* depth_buffer, VkExtent2D extent) + : render_area{extent} { + std::array color_buffers{color_buffer}; + CreateFramebuffer(runtime, color_buffers, depth_buffer); +} + +void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime, + std::span color_buffers, + ImageView* depth_buffer) { std::vector attachments; RenderPassKey renderpass_key{}; s32 num_layers = 1; @@ -1408,10 +1465,6 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span(num_colors); framebuffer = runtime.device.GetLogical().CreateFramebuffer({ .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, @@ -1420,13 +1473,10 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span(attachments.size()), .pAttachments = attachments.data(), - .width = key.size.width, - .height = key.size.height, + .width = render_area.width, + .height = render_area.height, .layers = static_cast(std::max(num_layers, 1)), }); - if (runtime.device.HasDebuggingToolAttached()) { - framebuffer.SetObjectNameEXT(VideoCommon::Name(key).c_str()); - } } void TextureCacheRuntime::AccelerateImageUpload( diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 5381343e9..dc9175ee1 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -145,6 +145,9 @@ private: vk::Image scaled_image{}; MemoryCommit scaled_commit{}; VkImage current_image{}; + + std::unique_ptr scale_framebuffer; + std::unique_ptr scale_view; }; class ImageView : public VideoCommon::ImageViewBase { @@ -221,9 +224,15 @@ private: class Framebuffer { public: - explicit Framebuffer(TextureCacheRuntime&, std::span color_buffers, + explicit Framebuffer(TextureCacheRuntime& runtime, std::span color_buffers, ImageView* depth_buffer, const VideoCommon::RenderTargets& key); + explicit Framebuffer(TextureCacheRuntime& runtime, ImageView* color_buffer, + ImageView* depth_buffer, VkExtent2D extent); + + void CreateFramebuffer(TextureCacheRuntime& runtime, + std::span color_buffers, ImageView* depth_buffer); + [[nodiscard]] VkFramebuffer Handle() const noexcept { return *framebuffer; } -- cgit v1.2.3 From ca1db6311631df4945a223c556dba1b9db5b5484 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Thu, 14 Oct 2021 00:08:57 -0400 Subject: yuzu: Fix build errors --- src/yuzu/game_list.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/yuzu/game_list.cpp b/src/yuzu/game_list.cpp index 6bd0f9ee9..2af95dbe5 100644 --- a/src/yuzu/game_list.cpp +++ b/src/yuzu/game_list.cpp @@ -159,7 +159,7 @@ GameListSearchField::GameListSearchField(GameList* parent) : QWidget{parent} { * @return true if the haystack contains all words of userinput */ static bool ContainsAllWords(const QString& haystack, const QString& userinput) { - const QStringList userinput_split = userinput.split(QLatin1Char{' '}, Qt::SkipEmptyParts); + const QStringList userinput_split = userinput.split(QLatin1Char{' '}, QString::SkipEmptyParts); return std::all_of(userinput_split.begin(), userinput_split.end(), [&haystack](const QString& s) { return haystack.contains(s); }); -- cgit v1.2.3 From 0f14c9379eae9c3caf8f4b932eace0a84d728f94 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Thu, 14 Oct 2021 14:12:25 -0400 Subject: texture_cache_base: Remove unused function declarations --- src/video_core/texture_cache/texture_cache_base.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'src') diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 40e003b60..4dbe050af 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -144,14 +144,6 @@ public: const Tegra::Engines::Fermi2D::Surface& src, const Tegra::Engines::Fermi2D::Config& copy); - /// Invalidate the contents of the color buffer index - /// These contents become unspecified, the cache can assume aggressive optimizations. - void InvalidateColorBuffer(size_t index); - - /// Invalidate the contents of the depth buffer - /// These contents become unspecified, the cache can assume aggressive optimizations. - void InvalidateDepthBuffer(); - /// Try to find a cached image view in the given CPU address [[nodiscard]] ImageView* TryFindFramebufferImageView(VAddr cpu_addr); -- cgit v1.2.3 From b7ccc58f235d9e442677eb10259b7196a387c6bc Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 15 Oct 2021 22:59:16 +0200 Subject: Texture Cahe: Fix downscaling on SMO. --- src/common/settings.cpp | 2 ++ src/common/settings.h | 1 + src/video_core/texture_cache/image_info.cpp | 4 ++++ src/video_core/texture_cache/image_info.h | 1 + src/video_core/texture_cache/texture_cache.h | 3 +++ 5 files changed, 11 insertions(+) (limited to 'src') diff --git a/src/common/settings.cpp b/src/common/settings.cpp index 12fdb0f9b..bc2c8c7d7 100644 --- a/src/common/settings.cpp +++ b/src/common/settings.cpp @@ -109,10 +109,12 @@ float Volume() { void UpdateRescalingInfo() { const auto setup = values.resolution_setup.GetValue(); auto& info = values.resolution_info; + info.downscale = false; switch (setup) { case ResolutionSetup::Res1_2X: info.up_scale = 1; info.down_shift = 1; + info.downscale = true; break; case ResolutionSetup::Res1X: info.up_scale = 1; diff --git a/src/common/settings.h b/src/common/settings.h index 09f7cdd84..a09db0822 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -72,6 +72,7 @@ struct ResolutionScalingInfo { f32 up_factor{1.0f}; f32 down_factor{1.0f}; bool active{}; + bool downscale{}; s32 ScaleUp(s32 value) const { if (value == 0) { diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp index 7fa8fd4fe..d8e414247 100644 --- a/src/video_core/texture_cache/image_info.cpp +++ b/src/video_core/texture_cache/image_info.cpp @@ -102,6 +102,7 @@ ImageInfo::ImageInfo(const TICEntry& config) noexcept { layer_stride = CalculateLayerStride(*this); maybe_unaligned_layer_stride = CalculateLayerSize(*this); rescaleable &= (block.depth == 0) && resources.levels == 1; + downscaleable = size.height > 512; } } @@ -135,6 +136,7 @@ ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index) size.depth = rt.depth; } else { rescaleable = block.depth == 0 && size.height > 256; + downscaleable = size.height > 512; type = ImageType::e2D; resources.layers = rt.depth; } @@ -164,6 +166,7 @@ ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept { size.depth = regs.zeta_depth; } else { rescaleable = block.depth == 0 && size.height > 256; + downscaleable = size.height > 512; type = ImageType::e2D; resources.layers = regs.zeta_depth; } @@ -197,6 +200,7 @@ ImageInfo::ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept { .depth = 1, }; rescaleable = block.depth == 0 && size.height > 256; + downscaleable = size.height > 512; } } diff --git a/src/video_core/texture_cache/image_info.h b/src/video_core/texture_cache/image_info.h index e874d2870..5932dcaba 100644 --- a/src/video_core/texture_cache/image_info.h +++ b/src/video_core/texture_cache/image_info.h @@ -34,6 +34,7 @@ struct ImageInfo { u32 num_samples = 1; u32 tile_width_spacing = 0; bool rescaleable = false; + bool downscaleable = false; }; } // namespace VideoCommon diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index c1fb12679..261cb6c48 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -798,6 +798,9 @@ bool TextureCache

::ImageCanRescale(ImageBase& image) { if (!image.info.rescaleable || True(image.flags & ImageFlagBits::Blacklisted)) { return false; } + if (Settings::values.resolution_info.downscale && !image.info.downscaleable) { + return false; + } if (True(image.flags & (ImageFlagBits::Rescaled | ImageFlagBits::CheckingRescalable))) { return true; } -- cgit v1.2.3 From 618de4e7871898f165c028293becd235ce3ccb09 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 16 Oct 2021 00:30:43 -0400 Subject: vulkan: Fix rescaling push constant usage --- .../backend/spirv/emit_context.cpp | 58 +++++++++++----------- src/shader_recompiler/backend/spirv/emit_context.h | 3 +- src/shader_recompiler/backend/spirv/emit_spirv.h | 5 +- .../backend/spirv/emit_spirv_context_get_set.cpp | 4 +- src/video_core/renderer_vulkan/pipeline_helper.h | 25 +++++----- .../renderer_vulkan/vk_compute_pipeline.cpp | 36 ++++++++------ .../renderer_vulkan/vk_compute_pipeline.h | 1 - .../renderer_vulkan/vk_graphics_pipeline.cpp | 15 +++--- 8 files changed, 78 insertions(+), 69 deletions(-) (limited to 'src') diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 8646fe989..723455462 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -1006,47 +1006,47 @@ void EmitContext::DefineRescalingInput(const Info& info) { return; } if (profile.unified_descriptor_binding) { - DefineRescalingInputPushConstant(info); + DefineRescalingInputPushConstant(); } else { DefineRescalingInputUniformConstant(); } } -void EmitContext::DefineRescalingInputPushConstant(const Info& info) { - boost::container::static_vector members{F32[1]}; +void EmitContext::DefineRescalingInputPushConstant() { + boost::container::static_vector members{}; u32 member_index{0}; - if (!info.texture_descriptors.empty()) { - rescaling_textures_type = TypeArray(U32[1], Const(4u)); - Decorate(rescaling_textures_type, spv::Decoration::ArrayStride, 4u); - members.push_back(rescaling_textures_type); - rescaling_textures_member_index = ++member_index; - } - if (!info.image_descriptors.empty()) { - rescaling_images_type = TypeArray(U32[1], Const(NUM_IMAGE_SCALING_WORDS)); - if (rescaling_textures_type.value != rescaling_images_type.value) { - Decorate(rescaling_images_type, spv::Decoration::ArrayStride, 4u); - } - members.push_back(rescaling_images_type); - rescaling_images_member_index = ++member_index; + + rescaling_textures_type = TypeArray(U32[1], Const(4u)); + Decorate(rescaling_textures_type, spv::Decoration::ArrayStride, 4u); + members.push_back(rescaling_textures_type); + rescaling_textures_member_index = member_index++; + + rescaling_images_type = TypeArray(U32[1], Const(NUM_IMAGE_SCALING_WORDS)); + Decorate(rescaling_images_type, spv::Decoration::ArrayStride, 4u); + members.push_back(rescaling_images_type); + rescaling_images_member_index = member_index++; + + if (stage != Stage::Compute) { + members.push_back(F32[1]); + rescaling_downfactor_member_index = member_index++; } const Id push_constant_struct{TypeStruct(std::span(members.data(), members.size()))}; Decorate(push_constant_struct, spv::Decoration::Block); Name(push_constant_struct, "ResolutionInfo"); - MemberDecorate(push_constant_struct, 0u, spv::Decoration::Offset, 0u); - MemberName(push_constant_struct, 0u, "down_factor"); + MemberDecorate(push_constant_struct, rescaling_textures_member_index, spv::Decoration::Offset, + static_cast(offsetof(RescalingLayout, rescaling_textures))); + MemberName(push_constant_struct, rescaling_textures_member_index, "rescaling_textures"); - const u32 offset_bias = stage == Stage::Compute ? sizeof(u32) : 0; - if (!info.texture_descriptors.empty()) { - MemberDecorate( - push_constant_struct, rescaling_textures_member_index, spv::Decoration::Offset, - static_cast(offsetof(RescalingLayout, rescaling_textures) - offset_bias)); - MemberName(push_constant_struct, rescaling_textures_member_index, "rescaling_textures"); - } - if (!info.image_descriptors.empty()) { - MemberDecorate(push_constant_struct, rescaling_images_member_index, spv::Decoration::Offset, - static_cast(offsetof(RescalingLayout, rescaling_images) - offset_bias)); - MemberName(push_constant_struct, rescaling_images_member_index, "rescaling_images"); + MemberDecorate(push_constant_struct, rescaling_images_member_index, spv::Decoration::Offset, + static_cast(offsetof(RescalingLayout, rescaling_images))); + MemberName(push_constant_struct, rescaling_images_member_index, "rescaling_images"); + + if (stage != Stage::Compute) { + MemberDecorate(push_constant_struct, rescaling_downfactor_member_index, + spv::Decoration::Offset, + static_cast(offsetof(RescalingLayout, down_factor))); + MemberName(push_constant_struct, rescaling_downfactor_member_index, "down_factor"); } const Id pointer_type{TypePointer(spv::StorageClass::PushConstant, push_constant_struct)}; rescaling_push_constants = AddGlobalVariable(pointer_type, spv::StorageClass::PushConstant); diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index b67704baa..63f8185d9 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -244,6 +244,7 @@ public: Id rescaling_images_type{}; u32 rescaling_textures_member_index{}; u32 rescaling_images_member_index{}; + u32 rescaling_downfactor_member_index{}; u32 texture_rescaling_index{}; u32 image_rescaling_index{}; @@ -324,7 +325,7 @@ private: void DefineAttributeMemAccess(const Info& info); void DefineGlobalMemoryFunctions(const Info& info); void DefineRescalingInput(const Info& info); - void DefineRescalingInputPushConstant(const Info& info); + void DefineRescalingInputPushConstant(); void DefineRescalingInputUniformConstant(); void DefineInputs(const IR::Program& program); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index cf59f2572..4b25534ce 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -22,11 +22,12 @@ constexpr u32 NUM_TEXTURE_AND_IMAGE_SCALING_WORDS = NUM_TEXTURE_SCALING_WORDS + NUM_IMAGE_SCALING_WORDS; struct RescalingLayout { - u32 down_factor; alignas(16) std::array rescaling_textures; alignas(16) std::array rescaling_images; + alignas(16) u32 down_factor; }; -constexpr u32 RESCALING_PUSH_CONSTANT_WORDS_OFFSET = offsetof(RescalingLayout, rescaling_textures); +constexpr u32 RESCALING_LAYOUT_WORDS_OFFSET = offsetof(RescalingLayout, rescaling_textures); +constexpr u32 RESCALING_LAYOUT_DOWN_FACTOR_OFFSET = offsetof(RescalingLayout, down_factor); [[nodiscard]] std::vector EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_info, IR::Program& program, Bindings& bindings); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index c0db7452f..bac683ae1 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -529,8 +529,8 @@ Id EmitYDirection(EmitContext& ctx) { Id EmitResolutionDownFactor(EmitContext& ctx) { if (ctx.profile.unified_descriptor_binding) { const Id pointer_type{ctx.TypePointer(spv::StorageClass::PushConstant, ctx.F32[1])}; - const Id pointer{ - ctx.OpAccessChain(pointer_type, ctx.rescaling_push_constants, ctx.u32_zero_value)}; + const Id index{ctx.Const(ctx.rescaling_downfactor_member_index)}; + const Id pointer{ctx.OpAccessChain(pointer_type, ctx.rescaling_push_constants, index)}; return ctx.OpLoad(ctx.F32[1], pointer); } else { const Id composite{ctx.OpLoad(ctx.F32[4], ctx.rescaling_uniform_constant)}; diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index 3612e8a18..ae5e66ef4 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -22,7 +22,6 @@ namespace Vulkan { using Shader::Backend::SPIRV::NUM_TEXTURE_AND_IMAGE_SCALING_WORDS; -using Shader::Backend::SPIRV::RESCALING_PUSH_CONSTANT_WORDS_OFFSET; class DescriptorLayoutBuilder { public: @@ -73,12 +72,12 @@ public: vk::PipelineLayout CreatePipelineLayout(VkDescriptorSetLayout descriptor_set_layout) const { using Shader::Backend::SPIRV::RescalingLayout; - const u32 push_offset = is_compute ? RESCALING_PUSH_CONSTANT_WORDS_OFFSET : 0; + const u32 size_offset = is_compute ? sizeof(RescalingLayout::down_factor) : 0u; const VkPushConstantRange range{ .stageFlags = static_cast( is_compute ? VK_SHADER_STAGE_COMPUTE_BIT : VK_SHADER_STAGE_ALL_GRAPHICS), .offset = 0, - .size = sizeof(RescalingLayout) - push_offset, + .size = sizeof(RescalingLayout) - size_offset, }; return device->GetLogical().CreatePipelineLayout({ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, @@ -139,21 +138,21 @@ private: class RescalingPushConstant { public: - explicit RescalingPushConstant(u32 num_textures) noexcept {} + explicit RescalingPushConstant() noexcept {} void PushTexture(bool is_rescaled) noexcept { - *texture_ptr |= is_rescaled ? texture_bit : 0; - texture_bit <<= 1; - if (texture_bit == 0) { + *texture_ptr |= is_rescaled ? texture_bit : 0u; + texture_bit <<= 1u; + if (texture_bit == 0u) { texture_bit = 1u; ++texture_ptr; } } void PushImage(bool is_rescaled) noexcept { - *image_ptr |= is_rescaled ? image_bit : 0; - image_bit <<= 1; - if (image_bit == 0) { + *image_ptr |= is_rescaled ? image_bit : 0u; + image_bit <<= 1u; + if (image_bit == 0u) { image_bit = 1u; ++image_ptr; } @@ -176,8 +175,10 @@ inline void PushImageDescriptors(TextureCache& texture_cache, const Shader::Info& info, RescalingPushConstant& rescaling, const VkSampler*& samplers, const VideoCommon::ImageViewInOut*& views) { - views += Shader::NumDescriptors(info.texture_buffer_descriptors); - views += Shader::NumDescriptors(info.image_buffer_descriptors); + const u32 num_texture_buffers = Shader::NumDescriptors(info.texture_buffer_descriptors); + const u32 num_image_buffers = Shader::NumDescriptors(info.image_buffer_descriptors); + views += num_texture_buffers; + views += num_image_buffers; for (const auto& desc : info.texture_descriptors) { for (u32 index = 0; index < desc.count; ++index) { const VideoCommon::ImageViewId image_view_id{(views++)->id}; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 6dc52e399..de36bcdb7 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -22,6 +22,7 @@ namespace Vulkan { using Shader::ImageBufferDescriptor; +using Shader::Backend::SPIRV::RESCALING_LAYOUT_WORDS_OFFSET; using Tegra::Texture::TexturePair; ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descriptor_pool, @@ -185,7 +186,7 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, buffer_cache.UpdateComputeBuffers(); buffer_cache.BindHostComputeBuffers(); - RescalingPushConstant rescaling(num_textures); + RescalingPushConstant rescaling; const VkSampler* samplers_it{samplers.data()}; const VideoCommon::ImageViewInOut* views_it{views.data()}; PushImageDescriptors(texture_cache, update_descriptor_queue, info, rescaling, samplers_it, @@ -199,21 +200,24 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, }); } const void* const descriptor_data{update_descriptor_queue.UpdateData()}; - scheduler.Record( - [this, descriptor_data, rescaling_data = rescaling.Data()](vk::CommandBuffer cmdbuf) { - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); - if (!descriptor_set_layout) { - return; - } - if (num_textures > 0) { - cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, rescaling_data); - } - const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; - const vk::Device& dev{device.GetLogical()}; - dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, descriptor_data); - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0, - descriptor_set, nullptr); - }); + const bool is_rescaling = !info.texture_descriptors.empty() || !info.image_descriptors.empty(); + scheduler.Record([this, descriptor_data, is_rescaling, + rescaling_data = rescaling.Data()](vk::CommandBuffer cmdbuf) { + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); + if (!descriptor_set_layout) { + return; + } + if (is_rescaling) { + cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, + RESCALING_LAYOUT_WORDS_OFFSET, sizeof(rescaling_data), + rescaling_data.data()); + } + const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; + const vk::Device& dev{device.GetLogical()}; + dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, descriptor_data); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0, + descriptor_set, nullptr); + }); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index e79ce4d7c..8c4b0a301 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -59,7 +59,6 @@ private: vk::PipelineLayout pipeline_layout; vk::DescriptorUpdateTemplateKHR descriptor_update_template; vk::Pipeline pipeline; - u32 num_textures{}; std::condition_variable build_condvar; std::mutex build_mutex; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index f08e9e840..616a7b457 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -32,6 +32,8 @@ namespace { using boost::container::small_vector; using boost::container::static_vector; using Shader::ImageBufferDescriptor; +using Shader::Backend::SPIRV::RESCALING_LAYOUT_DOWN_FACTOR_OFFSET; +using Shader::Backend::SPIRV::RESCALING_LAYOUT_WORDS_OFFSET; using Tegra::Texture::TexturePair; using VideoCore::Surface::PixelFormat; using VideoCore::Surface::PixelFormatFromDepthFormat; @@ -431,7 +433,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { update_descriptor_queue.Acquire(); - RescalingPushConstant rescaling(num_textures); + RescalingPushConstant rescaling; const VkSampler* samplers_it{samplers.data()}; const VideoCommon::ImageViewInOut* views_it{views.data()}; const auto prepare_stage{[&](size_t stage) LAMBDA_FORCEINLINE { @@ -477,15 +479,16 @@ void GraphicsPipeline::ConfigureDraw(const RescalingPushConstant& rescaling) { if (bind_pipeline) { cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); } + cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_ALL_GRAPHICS, + RESCALING_LAYOUT_WORDS_OFFSET, sizeof(rescaling_data), + rescaling_data.data()); if (update_rescaling) { const f32 config_down_factor{Settings::values.resolution_info.down_factor}; const f32 scale_down_factor{is_rescaling ? config_down_factor : 1.0f}; - cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_ALL_GRAPHICS, 0, - sizeof(scale_down_factor), &scale_down_factor); + cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_ALL_GRAPHICS, + RESCALING_LAYOUT_DOWN_FACTOR_OFFSET, sizeof(scale_down_factor), + &scale_down_factor); } - cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_ALL_GRAPHICS, - RESCALING_PUSH_CONSTANT_WORDS_OFFSET, sizeof(rescaling_data), - rescaling_data.data()); if (!descriptor_set_layout) { return; } -- cgit v1.2.3 From ef1dc4263586f5b81b53a5158db2c1cd2086ed4c Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 17 Oct 2021 01:22:13 +0200 Subject: Texture cache: Fix memory consumption and ignore rating when a depth texture is rendered. --- src/video_core/renderer_opengl/gl_texture_cache.cpp | 10 ++++++++-- src/video_core/renderer_vulkan/vk_texture_cache.cpp | 9 +++++++-- src/video_core/texture_cache/texture_cache.h | 7 ++++--- 3 files changed, 19 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index ec1afd31a..944a3aa65 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -967,21 +967,24 @@ bool Image::ScaleUp() { if (True(flags & ImageFlagBits::Rescaled)) { return false; } + flags |= ImageFlagBits::Rescaled; if (!runtime->resolution.active) { return false; } if (gl_format == 0 && gl_type == 0) { // compressed textures + flags &= ~ImageFlagBits::Rescaled; return false; } if (info.type == ImageType::Linear) { - UNIMPLEMENTED(); + UNREACHABLE(); + flags &= ~ImageFlagBits::Rescaled; return false; } if (!Scale()) { + flags &= ~ImageFlagBits::Rescaled; return false; } - flags |= ImageFlagBits::Rescaled; return true; } @@ -990,6 +993,9 @@ bool Image::ScaleDown() { return false; } flags &= ~ImageFlagBits::Rescaled; + if (!runtime->resolution.active) { + return false; + } current_texture = texture.handle; return true; } diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 9b90c7d9b..a4fbbc735 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -1126,6 +1126,7 @@ bool Image::ScaleUp() { return false; } ASSERT(info.type != ImageType::Linear); + flags |= ImageFlagBits::Rescaled; const auto& resolution = runtime->resolution; if (!resolution.active) { return false; @@ -1188,11 +1189,11 @@ bool Image::ScaleUp() { dst_region, src_region, Tegra::Engines::Fermi2D::Filter::Point, BLIT_OPERATION); } else { // TODO: Use helper blits where applicable + flags &= ~ImageFlagBits::Rescaled; LOG_ERROR(Render_Vulkan, "Device does not support scaling format {}", format); return false; } } - flags |= ImageFlagBits::Rescaled; return true; } @@ -1200,8 +1201,12 @@ bool Image::ScaleDown() { if (False(flags & ImageFlagBits::Rescaled)) { return false; } - ASSERT(info.type != ImageType::Linear); flags &= ~ImageFlagBits::Rescaled; + const auto& resolution = runtime->resolution; + if (!resolution.active) { + return false; + } + ASSERT(info.type != ImageType::Linear); current_image = *original_image; return true; } diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 261cb6c48..c06cddae9 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -230,7 +230,8 @@ void TextureCache

::UpdateRenderTargets(bool is_clear) { auto& image = slot_images[image_id]; can_rescale &= ImageCanRescale(image); any_blacklisted |= True(image.flags & ImageFlagBits::Blacklisted); - any_rescaled |= True(image.flags & ImageFlagBits::Rescaled); + any_rescaled |= True(image.flags & ImageFlagBits::Rescaled) || + GetFormatType(image.info.format) != SurfaceType::ColorTexture; scale_rating = std::max(scale_rating, image.scale_tick <= frame_tick ? image.scale_rating + 1U : image.scale_rating); @@ -857,7 +858,7 @@ u64 TextureCache

::GetScaledImageSizeBytes(Image& image) { const f32 add_to_size = Settings::values.resolution_info.up_factor - 1.0f; const bool sign = std::signbit(add_to_size); const u32 image_size_bytes = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); - const u64 tentative_size = image_size_bytes * static_cast(std::abs(add_to_size)); + const u64 tentative_size = image_size_bytes * static_cast(std::abs(add_to_size)); const u64 fitted_size = Common::AlignUp(tentative_size, 1024); return sign ? -fitted_size : fitted_size; } @@ -879,7 +880,7 @@ bool TextureCache

::ScaleDown(Image& image) { if (!rescaled) { return false; } - total_used_memory += GetScaledImageSizeBytes(image); + total_used_memory -= GetScaledImageSizeBytes(image); InvalidateScale(image); return true; } -- cgit v1.2.3 From d4f5193bd308988a80f52941d9eefc4c857bfa99 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sun, 17 Oct 2021 02:21:26 +0200 Subject: Texture Cache: Rescale conversions between depth and color --- src/video_core/renderer_opengl/gl_texture_cache.h | 2 +- src/video_core/renderer_vulkan/blit_image.cpp | 29 +++++++++++++--------- src/video_core/renderer_vulkan/blit_image.h | 14 +++++++---- .../renderer_vulkan/vk_texture_cache.cpp | 13 ++++++---- src/video_core/renderer_vulkan/vk_texture_cache.h | 2 +- src/video_core/texture_cache/texture_cache.h | 2 +- 6 files changed, 37 insertions(+), 25 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index cf7f37a16..f90dbfe9e 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -84,7 +84,7 @@ public: void CopyImage(Image& dst, Image& src, std::span copies); - void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) { + void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view, bool rescaled) { UNIMPLEMENTED(); } diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index b97aac550..bc3e4b93d 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -418,40 +418,45 @@ void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer, } void BlitImageHelper::ConvertD32ToR32(const Framebuffer* dst_framebuffer, - const ImageView& src_image_view) { + const ImageView& src_image_view, u32 up_scale, + u32 down_shift) { ConvertDepthToColorPipeline(convert_d32_to_r32_pipeline, dst_framebuffer->RenderPass()); - Convert(*convert_d32_to_r32_pipeline, dst_framebuffer, src_image_view); + Convert(*convert_d32_to_r32_pipeline, dst_framebuffer, src_image_view, up_scale, down_shift); } void BlitImageHelper::ConvertR32ToD32(const Framebuffer* dst_framebuffer, - const ImageView& src_image_view) { + const ImageView& src_image_view, u32 up_scale, + u32 down_shift) { ConvertColorToDepthPipeline(convert_r32_to_d32_pipeline, dst_framebuffer->RenderPass()); - Convert(*convert_r32_to_d32_pipeline, dst_framebuffer, src_image_view); + Convert(*convert_r32_to_d32_pipeline, dst_framebuffer, src_image_view, up_scale, down_shift); } void BlitImageHelper::ConvertD16ToR16(const Framebuffer* dst_framebuffer, - const ImageView& src_image_view) { + const ImageView& src_image_view, u32 up_scale, + u32 down_shift) { ConvertDepthToColorPipeline(convert_d16_to_r16_pipeline, dst_framebuffer->RenderPass()); - Convert(*convert_d16_to_r16_pipeline, dst_framebuffer, src_image_view); + Convert(*convert_d16_to_r16_pipeline, dst_framebuffer, src_image_view, up_scale, down_shift); } void BlitImageHelper::ConvertR16ToD16(const Framebuffer* dst_framebuffer, - const ImageView& src_image_view) { + const ImageView& src_image_view, u32 up_scale, + u32 down_shift) { ConvertColorToDepthPipeline(convert_r16_to_d16_pipeline, dst_framebuffer->RenderPass()); - Convert(*convert_r16_to_d16_pipeline, dst_framebuffer, src_image_view); + Convert(*convert_r16_to_d16_pipeline, dst_framebuffer, src_image_view, up_scale, down_shift); } void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, - const ImageView& src_image_view) { + const ImageView& src_image_view, u32 up_scale, u32 down_shift) { const VkPipelineLayout layout = *one_texture_pipeline_layout; const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D); const VkSampler sampler = *nearest_sampler; const VkExtent2D extent{ - .width = src_image_view.size.width, - .height = src_image_view.size.height, + .width = std::max((src_image_view.size.width * up_scale) >> down_shift, 1U), + .height = std::max((src_image_view.size.height * up_scale) >> down_shift, 1U), }; scheduler.RequestRenderpass(dst_framebuffer); - scheduler.Record([pipeline, layout, sampler, src_view, extent, this](vk::CommandBuffer cmdbuf) { + scheduler.Record([pipeline, layout, sampler, src_view, extent, up_scale, down_shift, + this](vk::CommandBuffer cmdbuf) { const VkOffset2D offset{ .x = 0, .y = 0, diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h index e11f8c214..c0f4a16a4 100644 --- a/src/video_core/renderer_vulkan/blit_image.h +++ b/src/video_core/renderer_vulkan/blit_image.h @@ -44,17 +44,21 @@ public: const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter, Tegra::Engines::Fermi2D::Operation operation); - void ConvertD32ToR32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); + void ConvertD32ToR32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, + u32 up_scale, u32 down_shift); - void ConvertR32ToD32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); + void ConvertR32ToD32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, + u32 up_scale, u32 down_shift); - void ConvertD16ToR16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); + void ConvertD16ToR16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, + u32 up_scale, u32 down_shift); - void ConvertR16ToD16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); + void ConvertR16ToD16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, + u32 up_scale, u32 down_shift); private: void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, - const ImageView& src_image_view); + const ImageView& src_image_view, u32 up_scale, u32 down_shift); [[nodiscard]] VkPipeline FindOrEmplacePipeline(const BlitImagePipelineKey& key); diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index a4fbbc735..17c62e27d 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -867,26 +867,29 @@ void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst }); } -void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) { +void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view, + bool rescaled) { + const u32 up_scale = rescaled ? resolution.up_scale : 1; + const u32 down_shift = rescaled ? resolution.down_shift : 0; switch (dst_view.format) { case PixelFormat::R16_UNORM: if (src_view.format == PixelFormat::D16_UNORM) { - return blit_image_helper.ConvertD16ToR16(dst, src_view); + return blit_image_helper.ConvertD16ToR16(dst, src_view, up_scale, down_shift); } break; case PixelFormat::R32_FLOAT: if (src_view.format == PixelFormat::D32_FLOAT) { - return blit_image_helper.ConvertD32ToR32(dst, src_view); + return blit_image_helper.ConvertD32ToR32(dst, src_view, up_scale, down_shift); } break; case PixelFormat::D16_UNORM: if (src_view.format == PixelFormat::R16_UNORM) { - return blit_image_helper.ConvertR16ToD16(dst, src_view); + return blit_image_helper.ConvertR16ToD16(dst, src_view, up_scale, down_shift); } break; case PixelFormat::D32_FLOAT: if (src_view.format == PixelFormat::R32_FLOAT) { - return blit_image_helper.ConvertR32ToD32(dst, src_view); + return blit_image_helper.ConvertR32ToD32(dst, src_view, up_scale, down_shift); } break; default: diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index dc9175ee1..6dc190632 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -61,7 +61,7 @@ public: void CopyImage(Image& dst, Image& src, std::span copies); - void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view); + void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view, bool rescaled); bool CanAccelerateImageUpload(Image&) const noexcept { return false; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index c06cddae9..a035d2b18 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -1774,7 +1774,7 @@ void TextureCache

::CopyImage(ImageId dst_id, ImageId src_id, std::vector Date: Sat, 16 Oct 2021 20:33:58 -0500 Subject: vulkan: Implement FidelityFX Super Resolution --- src/common/settings.h | 1 + src/video_core/CMakeLists.txt | 2 + src/video_core/host_shaders/CMakeLists.txt | 17 +- src/video_core/host_shaders/fidelityfx_fsr.comp | 114 +++++++ .../host_shaders/vulkan_fidelityfx_fsr_easu.comp | 13 + .../host_shaders/vulkan_fidelityfx_fsr_rcas.comp | 13 + src/video_core/renderer_vulkan/vk_blit_screen.cpp | 62 +++- src/video_core/renderer_vulkan/vk_blit_screen.h | 4 + src/video_core/renderer_vulkan/vk_fsr.cpp | 375 +++++++++++++++++++++ src/video_core/renderer_vulkan/vk_fsr.h | 54 +++ src/yuzu/configuration/configure_graphics.ui | 5 + 11 files changed, 643 insertions(+), 17 deletions(-) create mode 100644 src/video_core/host_shaders/fidelityfx_fsr.comp create mode 100644 src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu.comp create mode 100644 src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas.comp create mode 100644 src/video_core/renderer_vulkan/vk_fsr.cpp create mode 100644 src/video_core/renderer_vulkan/vk_fsr.h (limited to 'src') diff --git a/src/common/settings.h b/src/common/settings.h index a09db0822..9da447ce0 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -64,6 +64,7 @@ enum class ScalingFilter : u32 { Bilinear = 0, Bicubic = 1, ScaleForce = 2, + Fsr = 3, }; struct ResolutionScalingInfo { diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 6aac7f305..91a30fef7 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -132,6 +132,8 @@ add_library(video_core STATIC renderer_vulkan/vk_descriptor_pool.h renderer_vulkan/vk_fence_manager.cpp renderer_vulkan/vk_fence_manager.h + renderer_vulkan/vk_fsr.cpp + renderer_vulkan/vk_fsr.h renderer_vulkan/vk_graphics_pipeline.cpp renderer_vulkan/vk_graphics_pipeline.h renderer_vulkan/vk_master_semaphore.cpp diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index 664d6ce5d..32e2ab500 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -1,3 +1,11 @@ +set(FIDELITYFX_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/externals/FidelityFX-FSR/ffx-fsr) + +set(GLSL_INCLUDES + fidelityfx_fsr.comp + ${FIDELITYFX_INCLUDE_DIR}/ffx_a.h + ${FIDELITYFX_INCLUDE_DIR}/ffx_fsr1.h +) + set(SHADER_FILES astc_decoder.comp block_linear_unswizzle_2d.comp @@ -13,6 +21,8 @@ set(SHADER_FILES present_bicubic.frag vulkan_blit_color_float.frag vulkan_blit_depth_stencil.frag + vulkan_fidelityfx_fsr_easu.comp + vulkan_fidelityfx_fsr_rcas.comp vulkan_present.frag vulkan_present.vert vulkan_quad_indexed.comp @@ -78,7 +88,7 @@ foreach(FILENAME IN ITEMS ${SHADER_FILES}) OUTPUT ${SPIRV_HEADER_FILE} COMMAND - ${GLSLANGVALIDATOR} -V ${QUIET_FLAG} ${GLSL_FLAGS} --variable-name ${SPIRV_VARIABLE_NAME} -o ${SPIRV_HEADER_FILE} ${SOURCE_FILE} + ${GLSLANGVALIDATOR} -V ${QUIET_FLAG} -I"${FIDELITYFX_INCLUDE_DIR}" ${GLSL_FLAGS} --variable-name ${SPIRV_VARIABLE_NAME} -o ${SPIRV_HEADER_FILE} ${SOURCE_FILE} MAIN_DEPENDENCY ${SOURCE_FILE} ) @@ -86,9 +96,12 @@ foreach(FILENAME IN ITEMS ${SHADER_FILES}) endif() endforeach() +set(SHADER_SOURCES ${SHADER_FILES}) +list(APPEND SHADER_SOURCES ${GLSL_INCLUDES}) + add_custom_target(host_shaders DEPENDS ${SHADER_HEADERS} SOURCES - ${SHADER_FILES} + ${SHADER_SOURCES} ) diff --git a/src/video_core/host_shaders/fidelityfx_fsr.comp b/src/video_core/host_shaders/fidelityfx_fsr.comp new file mode 100644 index 000000000..cbb601580 --- /dev/null +++ b/src/video_core/host_shaders/fidelityfx_fsr.comp @@ -0,0 +1,114 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +//!#version 460 core +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable +#extension GL_GOOGLE_include_directive : enable +#extension GL_EXT_shader_explicit_arithmetic_types : require + +// FidelityFX Super Resolution Sample +// +// Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +layout( push_constant ) uniform constants { + u32vec2 input_size; +}; + +uvec4 Const0; +uvec4 Const1; +uvec4 Const2; +uvec4 Const3; + +#define A_GPU 1 +#define A_GLSL 1 +#define A_HALF + +#include "ffx_a.h" + +f16vec4 LinearToSRGB(f16vec4 linear) { + bvec4 selector = greaterThan(linear, f16vec4(0.00313066844250063)); + f16vec4 low = linear * float16_t(12.92); + f16vec4 high = float16_t(1.055) * pow(linear, f16vec4(1 / 2.4)) - float16_t(0.055); + return mix(low, high, selector); +} + +f16vec4 SRGBToLinear(f16vec4 srgb) { + bvec4 selector = greaterThan(srgb, f16vec4(0.0404482362771082)); + f16vec4 low = srgb * float16_t(1.0 / 12.92); + f16vec4 high = pow((srgb + float16_t(0.055)) * float16_t(1.0 / 1.055), f16vec4(2.4)); + return mix(low, high, selector); +} + +#if USE_EASU + #define FSR_EASU_H 1 + f16vec4 FsrEasuRH(vec2 p) { f16vec4 res = f16vec4(textureGather(InputTexture, p, 0)); return res; } + f16vec4 FsrEasuGH(vec2 p) { f16vec4 res = f16vec4(textureGather(InputTexture, p, 1)); return res; } + f16vec4 FsrEasuBH(vec2 p) { f16vec4 res = f16vec4(textureGather(InputTexture, p, 2)); return res; } +#endif +#if USE_RCAS + #define FSR_RCAS_H 1 + f16vec4 FsrRcasLoadH(ASW2 p) { return f16vec4(texelFetch(InputTexture, ASU2(p), 0)); } + void FsrRcasInputH(inout float16_t r, inout float16_t g, inout float16_t b) {} +#endif + +#include "ffx_fsr1.h" + +void CurrFilter(u32vec2 pos) { + // For debugging +#if USE_BILINEAR + vec2 pp = (vec2(pos) * vec2_AU2(Const0.xy) + vec2_AU2(Const0.zw)) * vec2_AU2(Const1.xy) + vec2(0.5, -0.5) * vec2_AU2(Const1.zw); + imageStore(OutputTexture, ivec2(pos), textureLod(InputTexture, pp, 0.0)); +#endif +#if USE_EASU + f16vec3 c; + FsrEasuH(c, pos, Const0, Const1, Const2, Const3); + imageStore(OutputTexture, ivec2(pos), f16vec4(c, 1)); +#endif +#if USE_RCAS + f16vec3 c; + FsrRcasH(c.r, c.g, c.b, pos, Const0); + imageStore(OutputTexture, ivec2(pos), f16vec4(c, 1)); +#endif + +} + +layout(local_size_x=64) in; +void main() { + +#if USE_EASU || USE_BILINEAR + vec2 ires = vec2(input_size); + vec2 tres = textureSize(InputTexture, 0); + vec2 ores = imageSize(OutputTexture); + FsrEasuCon(Const0, Const1, Const2, Const3, ires.x, ires.y, tres.x, tres.y, ores.x, ores.y); +#endif +#if USE_RCAS + FsrRcasCon(Const0, 0.25f); +#endif + + // Do remapping of local xy in workgroup for a more PS-like swizzle pattern. + AU2 gxy = ARmp8x8(gl_LocalInvocationID.x) + AU2(gl_WorkGroupID.x << 4u, gl_WorkGroupID.y << 4u); + CurrFilter(gxy); + gxy.x += 8u; + CurrFilter(gxy); + gxy.y += 8u; + CurrFilter(gxy); + gxy.x -= 8u; + CurrFilter(gxy); +} diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu.comp b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu.comp new file mode 100644 index 000000000..6525eeeb5 --- /dev/null +++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu.comp @@ -0,0 +1,13 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 460 core +#extension GL_GOOGLE_include_directive : enable + +layout(set=0,binding=0) uniform sampler2D InputTexture; +layout(set=0,binding=1,rgba16f) uniform image2D OutputTexture; + +#define USE_EASU 1 + +#include "fidelityfx_fsr.comp" diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas.comp b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas.comp new file mode 100644 index 000000000..9463ed842 --- /dev/null +++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas.comp @@ -0,0 +1,13 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 460 core +#extension GL_GOOGLE_include_directive : enable + +layout(set=0,binding=0) uniform sampler2D InputTexture; +layout(set=0,binding=1,rgba16f) uniform image2D OutputTexture; + +#define USE_RCAS 1 + +#include "fidelityfx_fsr.comp" diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index c91b24e3a..8ce60e874 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -23,6 +23,7 @@ #include "video_core/host_shaders/vulkan_present_vert_spv.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" #include "video_core/renderer_vulkan/vk_blit_screen.h" +#include "video_core/renderer_vulkan/vk_fsr.h" #include "video_core/renderer_vulkan/vk_master_semaphore.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_shader_util.h" @@ -147,8 +148,12 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, scheduler.Wait(resource_ticks[image_index]); resource_ticks[image_index] = scheduler.CurrentTick(); - UpdateDescriptorSet(image_index, - use_accelerated ? screen_info.image_view : *raw_image_views[image_index]); + const VkImageView source_image_view = + use_accelerated ? screen_info.image_view : *raw_image_views[image_index]; + + if (!fsr) { + UpdateDescriptorSet(image_index, source_image_view); + } BufferData data; SetUniformData(data, layout); @@ -225,9 +230,26 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, read_barrier); cmdbuf.CopyBufferToImage(*buffer, image, VK_IMAGE_LAYOUT_GENERAL, copy); cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, write_barrier); + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + 0, write_barrier); }); } + + if (fsr) { + auto crop_rect = framebuffer.crop_rect; + if (crop_rect.GetWidth() == 0) { + crop_rect.right = framebuffer.width; + } + if (crop_rect.GetHeight() == 0) { + crop_rect.bottom = framebuffer.height; + } + crop_rect = crop_rect.Scale(Settings::values.resolution_info.up_factor); + VkImageView fsr_image_view = + fsr->Draw(scheduler, image_index, source_image_view, crop_rect); + UpdateDescriptorSet(image_index, fsr_image_view); + } + scheduler.Record( [this, host_framebuffer, image_index, size = render_area](vk::CommandBuffer cmdbuf) { const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f; @@ -325,6 +347,13 @@ void VKBlitScreen::CreateDynamicResources() { CreateRenderPass(); CreateFramebuffers(); CreateGraphicsPipeline(); + fsr.reset(); + if (Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::Fsr) { + const auto& layout = render_window.GetFramebufferLayout(); + fsr = std::make_unique( + device, memory_allocator, image_count, + VkExtent2D{.width = layout.screen.GetWidth(), .height = layout.screen.GetHeight()}); + } } void VKBlitScreen::RefreshResources(const Tegra::FramebufferConfig& framebuffer) { @@ -716,13 +745,14 @@ void VKBlitScreen::CreateGraphicsPipeline() { } void VKBlitScreen::CreateSampler() { + bool linear = Settings::values.scaling_filter.GetValue() != Settings::ScalingFilter::Fsr; const VkSamplerCreateInfo ci{ .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, .pNext = nullptr, .flags = 0, - .magFilter = VK_FILTER_LINEAR, - .minFilter = VK_FILTER_LINEAR, - .mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR, + .magFilter = linear ? VK_FILTER_LINEAR : VK_FILTER_NEAREST, + .minFilter = linear ? VK_FILTER_LINEAR : VK_FILTER_NEAREST, + .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST, .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, @@ -905,17 +935,19 @@ void VKBlitScreen::SetVertexData(BufferData& data, const Tegra::FramebufferConfi UNIMPLEMENTED_IF(framebuffer_crop_rect.top != 0); UNIMPLEMENTED_IF(framebuffer_crop_rect.left != 0); - // Scale the output by the crop width/height. This is commonly used with 1280x720 rendering - // (e.g. handheld mode) on a 1920x1080 framebuffer. f32 scale_u = 1.0f; f32 scale_v = 1.0f; - if (framebuffer_crop_rect.GetWidth() > 0) { - scale_u = static_cast(framebuffer_crop_rect.GetWidth()) / - static_cast(screen_info.width); - } - if (framebuffer_crop_rect.GetHeight() > 0) { - scale_v = static_cast(framebuffer_crop_rect.GetHeight()) / - static_cast(screen_info.height); + // Scale the output by the crop width/height. This is commonly used with 1280x720 rendering + // (e.g. handheld mode) on a 1920x1080 framebuffer. + if (!fsr) { + if (framebuffer_crop_rect.GetWidth() > 0) { + scale_u = static_cast(framebuffer_crop_rect.GetWidth()) / + static_cast(screen_info.width); + } + if (framebuffer_crop_rect.GetHeight() > 0) { + scale_v = static_cast(framebuffer_crop_rect.GetHeight()) / + static_cast(screen_info.height); + } } const auto& screen = layout.screen; diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h index d3a16f0ba..337931468 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.h +++ b/src/video_core/renderer_vulkan/vk_blit_screen.h @@ -38,6 +38,8 @@ class RasterizerVulkan; class VKScheduler; class VKSwapchain; +class FSR; + struct VKScreenInfo { VkImageView image_view{}; u32 width{}; @@ -132,6 +134,8 @@ private: std::vector raw_buffer_commits; u32 raw_width = 0; u32 raw_height = 0; + + std::unique_ptr fsr; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_fsr.cpp b/src/video_core/renderer_vulkan/vk_fsr.cpp new file mode 100644 index 000000000..fd0a4aa42 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_fsr.cpp @@ -0,0 +1,375 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/div_ceil.h" +#include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_comp_spv.h" +#include "video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_comp_spv.h" +#include "video_core/renderer_vulkan/vk_fsr.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_shader_util.h" +#include "video_core/vulkan_common/vulkan_device.h" + +namespace Vulkan { + +FSR::FSR(const Device& device, MemoryAllocator& memory_allocator, size_t image_count, + VkExtent2D output_size) + : device{device}, memory_allocator{memory_allocator}, image_count{image_count}, + output_size{output_size} { + + CreateImages(); + CreateSampler(); + CreateShaders(); + CreateDescriptorPool(); + CreateDescriptorSetLayout(); + CreateDescriptorSets(); + CreatePipelineLayout(); + CreatePipeline(); +} + +VkImageView FSR::Draw(VKScheduler& scheduler, size_t image_index, VkImageView image_view, + const Common::Rectangle& crop_rect) { + + UpdateDescriptorSet(image_index, image_view); + + scheduler.Record([this, image_index, crop_rect](vk::CommandBuffer cmdbuf) { + const VkImageMemoryBarrier base_barrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = 0, + .dstAccessMask = 0, + .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = {}, + .subresourceRange = + { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }; + + // TODO: Support clear color + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *easu_pipeline); + cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, + VkExtent2D{ + .width = static_cast(crop_rect.GetWidth()), + .height = static_cast(crop_rect.GetHeight()), + }); + + { + VkImageMemoryBarrier fsr_write_barrier = base_barrier; + fsr_write_barrier.image = *images[image_index], + fsr_write_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, fsr_write_barrier); + } + + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0, + descriptor_sets[image_index * 2], {}); + cmdbuf.Dispatch(Common::DivCeil(output_size.width, 16u), + Common::DivCeil(output_size.height, 16u), 1); + + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *rcas_pipeline); + cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, output_size); + + { + std::array barriers; + auto& fsr_read_barrier = barriers[0]; + auto& blit_write_barrier = barriers[1]; + + fsr_read_barrier = base_barrier; + fsr_read_barrier.image = *images[image_index]; + fsr_read_barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + fsr_read_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + + blit_write_barrier = base_barrier; + blit_write_barrier.image = *images[image_count + image_index]; + blit_write_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + blit_write_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; + + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, {}, {}, barriers); + } + + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0, + descriptor_sets[image_index * 2 + 1], {}); + cmdbuf.Dispatch(Common::DivCeil(output_size.width, 16u), + Common::DivCeil(output_size.height, 16u), 1); + + { + std::array barriers; + auto& blit_read_barrier = barriers[0]; + + blit_read_barrier = base_barrier; + blit_read_barrier.image = *images[image_count + image_index]; + blit_read_barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + blit_read_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, {}, {}, barriers); + } + }); + + return *image_views[image_count + image_index]; +} + +void FSR::CreateDescriptorPool() { + const std::array pool_sizes{{ + { + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = static_cast(image_count * 2), + }, + { + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .descriptorCount = static_cast(image_count * 2), + }, + }}; + + const VkDescriptorPoolCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, + .pNext = nullptr, + .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, + .maxSets = static_cast(image_count * 2), + .poolSizeCount = static_cast(pool_sizes.size()), + .pPoolSizes = pool_sizes.data(), + }; + descriptor_pool = device.GetLogical().CreateDescriptorPool(ci); +} + +void FSR::CreateDescriptorSetLayout() { + const std::array layout_bindings{{ + { + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = sampler.address(), + }, + { + .binding = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = sampler.address(), + }, + }}; + + const VkDescriptorSetLayoutCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .bindingCount = static_cast(layout_bindings.size()), + .pBindings = layout_bindings.data(), + }; + + descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout(ci); +} + +void FSR::CreateDescriptorSets() { + const u32 sets = static_cast(image_count * 2); + const std::vector layouts(sets, *descriptor_set_layout); + + const VkDescriptorSetAllocateInfo ai{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .pNext = nullptr, + .descriptorPool = *descriptor_pool, + .descriptorSetCount = sets, + .pSetLayouts = layouts.data(), + }; + + descriptor_sets = descriptor_pool.Allocate(ai); +} + +void FSR::CreateImages() { + images.resize(image_count * 2); + image_views.resize(image_count * 2); + buffer_commits.resize(image_count * 2); + + for (size_t i = 0; i < image_count * 2; ++i) { + images[i] = device.GetLogical().CreateImage(VkImageCreateInfo{ + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .imageType = VK_IMAGE_TYPE_2D, + .format = VK_FORMAT_R16G16B16A16_SFLOAT, + .extent = + { + .width = output_size.width, + .height = output_size.height, + .depth = 1, + }, + .mipLevels = 1, + .arrayLayers = 1, + .samples = VK_SAMPLE_COUNT_1_BIT, + .tiling = VK_IMAGE_TILING_OPTIMAL, + .usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_STORAGE_BIT | + VK_IMAGE_USAGE_SAMPLED_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, + }); + buffer_commits[i] = memory_allocator.Commit(images[i], MemoryUsage::DeviceLocal); + image_views[i] = device.GetLogical().CreateImageView(VkImageViewCreateInfo{ + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .image = *images[i], + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = VK_FORMAT_R16G16B16A16_SFLOAT, + .components = + { + .r = VK_COMPONENT_SWIZZLE_IDENTITY, + .g = VK_COMPONENT_SWIZZLE_IDENTITY, + .b = VK_COMPONENT_SWIZZLE_IDENTITY, + .a = VK_COMPONENT_SWIZZLE_IDENTITY, + }, + .subresourceRange = + { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }); + } +} + +void FSR::CreatePipelineLayout() { + VkPushConstantRange push_const{ + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .offset = 0, + .size = sizeof(std::array), + }; + VkPipelineLayoutCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .setLayoutCount = 1, + .pSetLayouts = descriptor_set_layout.address(), + .pushConstantRangeCount = 1, + .pPushConstantRanges = &push_const, + }; + + pipeline_layout = device.GetLogical().CreatePipelineLayout(ci); +} + +void FSR::UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const { + const auto fsr_image_view = *image_views[image_index]; + const auto blit_image_view = *image_views[image_count + image_index]; + + const VkDescriptorImageInfo image_info{ + .imageView = image_view, + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }; + const VkDescriptorImageInfo fsr_image_info{ + .imageView = fsr_image_view, + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }; + const VkDescriptorImageInfo blit_image_info{ + .imageView = blit_image_view, + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }; + + VkWriteDescriptorSet sampler_write{ + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .pNext = nullptr, + .dstSet = descriptor_sets[image_index * 2], + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .pImageInfo = &image_info, + .pBufferInfo = nullptr, + .pTexelBufferView = nullptr, + }; + + VkWriteDescriptorSet output_write{ + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .pNext = nullptr, + .dstSet = descriptor_sets[image_index * 2], + .dstBinding = 1, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .pImageInfo = &fsr_image_info, + .pBufferInfo = nullptr, + .pTexelBufferView = nullptr, + }; + + device.GetLogical().UpdateDescriptorSets(std::array{sampler_write, output_write}, {}); + + sampler_write.dstSet = descriptor_sets[image_index * 2 + 1]; + sampler_write.pImageInfo = &fsr_image_info; + output_write.dstSet = descriptor_sets[image_index * 2 + 1]; + output_write.pImageInfo = &blit_image_info; + + device.GetLogical().UpdateDescriptorSets(std::array{sampler_write, output_write}, {}); +} + +void FSR::CreateSampler() { + const VkSamplerCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .magFilter = VK_FILTER_LINEAR, + .minFilter = VK_FILTER_LINEAR, + .mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR, + .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, + .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, + .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, + .mipLodBias = 0.0f, + .anisotropyEnable = VK_FALSE, + .maxAnisotropy = 0.0f, + .compareEnable = VK_FALSE, + .compareOp = VK_COMPARE_OP_NEVER, + .minLod = 0.0f, + .maxLod = 0.0f, + .borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK, + .unnormalizedCoordinates = VK_FALSE, + }; + + sampler = device.GetLogical().CreateSampler(ci); +} + +void FSR::CreateShaders() { + easu_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_EASU_COMP_SPV); + rcas_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_EASU_COMP_SPV); +} + +void FSR::CreatePipeline() { + VkPipelineShaderStageCreateInfo shader_stage{ + + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .pName = "main", + .pSpecializationInfo = nullptr, + }; + + VkComputePipelineCreateInfo pipeline_ci{ + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .layout = *pipeline_layout, + .basePipelineIndex = 0, + }; + + shader_stage.module = *easu_shader; + pipeline_ci.stage = shader_stage; + easu_pipeline = device.GetLogical().CreateComputePipeline(pipeline_ci); + + shader_stage.module = *rcas_shader; + pipeline_ci.stage = shader_stage; + rcas_pipeline = device.GetLogical().CreateComputePipeline(pipeline_ci); +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_fsr.h b/src/video_core/renderer_vulkan/vk_fsr.h new file mode 100644 index 000000000..8391e2e58 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_fsr.h @@ -0,0 +1,54 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/math_util.h" +#include "video_core/vulkan_common/vulkan_memory_allocator.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" + +namespace Vulkan { + +class Device; +class VKScheduler; + +class FSR { +public: + explicit FSR(const Device& device, MemoryAllocator& memory_allocator, size_t image_count, + VkExtent2D output_size); + VkImageView Draw(VKScheduler& scheduler, size_t image_index, VkImageView image_view, + const Common::Rectangle& crop_rect); + +private: + void CreateDescriptorPool(); + void CreateDescriptorSetLayout(); + void CreateDescriptorSets(); + void CreateImages(); + void CreateSampler(); + void CreateShaders(); + void CreatePipeline(); + void CreatePipelineLayout(); + + void UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const; + + const Device& device; + MemoryAllocator& memory_allocator; + size_t image_count; + VkExtent2D output_size; + + vk::DescriptorPool descriptor_pool; + vk::DescriptorSetLayout descriptor_set_layout; + vk::DescriptorSets descriptor_sets; + vk::PipelineLayout pipeline_layout; + vk::ShaderModule easu_shader; + vk::ShaderModule rcas_shader; + vk::Pipeline easu_pipeline; + vk::Pipeline rcas_pipeline; + vk::Sampler sampler; + std::vector images; + std::vector image_views; + std::vector buffer_commits; +}; + +} // namespace Vulkan diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui index d5e0d4e89..014ca6683 100644 --- a/src/yuzu/configuration/configure_graphics.ui +++ b/src/yuzu/configuration/configure_graphics.ui @@ -402,6 +402,11 @@ ScaleForce + + + FidelityFX Super Resolution + + -- cgit v1.2.3 From b60966041c5b1dccd9c5c5ca00fb02353c2151bb Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 17 Oct 2021 17:22:16 +0200 Subject: Presentation: add Nearest Neighbor filter. --- src/common/settings.h | 9 ++--- src/video_core/renderer_opengl/renderer_opengl.cpp | 19 +++++++++- src/video_core/renderer_opengl/renderer_opengl.h | 1 + src/video_core/renderer_vulkan/vk_blit_screen.cpp | 41 ++++++++++++++++++---- src/video_core/renderer_vulkan/vk_blit_screen.h | 4 ++- src/yuzu/configuration/configure_graphics.ui | 7 +++- 6 files changed, 67 insertions(+), 14 deletions(-) (limited to 'src') diff --git a/src/common/settings.h b/src/common/settings.h index 9da447ce0..84dab5217 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -61,10 +61,11 @@ enum class ResolutionSetup : u32 { }; enum class ScalingFilter : u32 { - Bilinear = 0, - Bicubic = 1, - ScaleForce = 2, - Fsr = 3, + NearestNeighbor = 0, + Bilinear = 1, + Bicubic = 2, + ScaleForce = 3, + Fsr = 4, }; struct ResolutionScalingInfo { diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 955dbc744..68423601c 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -264,6 +264,10 @@ void RendererOpenGL::InitOpenGLObjects() { glSamplerParameteri(present_sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR); glSamplerParameteri(present_sampler.handle, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + present_sampler_nn.Create(); + glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + // Generate VBO handle for drawing vertex_buffer.Create(); @@ -346,6 +350,9 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { GLuint fragment_handle; const auto filter = Settings::values.scaling_filter.GetValue(); switch (filter) { + case Settings::ScalingFilter::NearestNeighbor: + fragment_handle = present_bilinear_fragment.handle; + break; case Settings::ScalingFilter::Bilinear: fragment_handle = present_bilinear_fragment.handle; break; @@ -355,6 +362,12 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { case Settings::ScalingFilter::ScaleForce: fragment_handle = present_scaleforce_fragment.handle; break; + case Settings::ScalingFilter::Fsr: + LOG_WARNING( + Render_OpenGL, + "FidelityFX FSR Super Sampling is not supported in OpenGL, changing to ScaleForce"); + fragment_handle = present_scaleforce_fragment.handle; + break; default: fragment_handle = present_bilinear_fragment.handle; break; @@ -464,7 +477,11 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { } glBindTextureUnit(0, screen_info.display_texture); - glBindSampler(0, present_sampler.handle); + if (Settings::values.scaling_filter.GetValue() != Settings::ScalingFilter::NearestNeighbor) { + glBindSampler(0, present_sampler.handle); + } else { + glBindSampler(0, present_sampler_nn.handle); + } glClear(GL_COLOR_BUFFER_BIT); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index bf3d3502c..504ddbe7b 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -109,6 +109,7 @@ private: // OpenGL object IDs OGLSampler present_sampler; + OGLSampler present_sampler_nn; OGLBuffer vertex_buffer; OGLProgram present_vertex; OGLProgram present_bilinear_fragment; diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index 8ce60e874..334eeb92e 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -152,7 +152,9 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, use_accelerated ? screen_info.image_view : *raw_image_views[image_index]; if (!fsr) { - UpdateDescriptorSet(image_index, source_image_view); + const bool is_nn = + Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::NearestNeighbor; + UpdateDescriptorSet(image_index, source_image_view, is_nn); } BufferData data; @@ -247,7 +249,7 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, crop_rect = crop_rect.Scale(Settings::values.resolution_info.up_factor); VkImageView fsr_image_view = fsr->Draw(scheduler, image_index, source_image_view, crop_rect); - UpdateDescriptorSet(image_index, fsr_image_view); + UpdateDescriptorSet(image_index, fsr_image_view, true); } scheduler.Record( @@ -286,6 +288,9 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, const auto filter = Settings::values.scaling_filter.GetValue(); cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); switch (filter) { + case Settings::ScalingFilter::NearestNeighbor: + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *bilinear_pipeline); + break; case Settings::ScalingFilter::Bilinear: cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *bilinear_pipeline); break; @@ -745,13 +750,33 @@ void VKBlitScreen::CreateGraphicsPipeline() { } void VKBlitScreen::CreateSampler() { - bool linear = Settings::values.scaling_filter.GetValue() != Settings::ScalingFilter::Fsr; const VkSamplerCreateInfo ci{ .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, .pNext = nullptr, .flags = 0, - .magFilter = linear ? VK_FILTER_LINEAR : VK_FILTER_NEAREST, - .minFilter = linear ? VK_FILTER_LINEAR : VK_FILTER_NEAREST, + .magFilter = VK_FILTER_LINEAR, + .minFilter = VK_FILTER_LINEAR, + .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST, + .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, + .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, + .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, + .mipLodBias = 0.0f, + .anisotropyEnable = VK_FALSE, + .maxAnisotropy = 0.0f, + .compareEnable = VK_FALSE, + .compareOp = VK_COMPARE_OP_NEVER, + .minLod = 0.0f, + .maxLod = 0.0f, + .borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK, + .unnormalizedCoordinates = VK_FALSE, + }; + + const VkSamplerCreateInfo ci_nn{ + .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .magFilter = VK_FILTER_NEAREST, + .minFilter = VK_FILTER_NEAREST, .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST, .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, @@ -768,6 +793,7 @@ void VKBlitScreen::CreateSampler() { }; sampler = device.GetLogical().CreateSampler(ci); + nn_sampler = device.GetLogical().CreateSampler(ci_nn); } void VKBlitScreen::CreateFramebuffers() { @@ -862,7 +888,8 @@ void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) } } -void VKBlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const { +void VKBlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView image_view, + bool nn) const { const VkDescriptorBufferInfo buffer_info{ .buffer = *buffer, .offset = offsetof(BufferData, uniform), @@ -883,7 +910,7 @@ void VKBlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView imag }; const VkDescriptorImageInfo image_info{ - .sampler = *sampler, + .sampler = nn ? *nn_sampler : *sampler, .imageView = image_view, .imageLayout = VK_IMAGE_LAYOUT_GENERAL, }; diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h index 337931468..448a2fbe6 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.h +++ b/src/video_core/renderer_vulkan/vk_blit_screen.h @@ -90,7 +90,7 @@ private: void CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer); void CreateRawImages(const Tegra::FramebufferConfig& framebuffer); - void UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const; + void UpdateDescriptorSet(std::size_t image_index, VkImageView image_view, bool nn) const; void SetUniformData(BufferData& data, const Layout::FramebufferLayout layout) const; void SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer, const Layout::FramebufferLayout layout) const; @@ -115,12 +115,14 @@ private: vk::DescriptorPool descriptor_pool; vk::DescriptorSetLayout descriptor_set_layout; vk::PipelineLayout pipeline_layout; + vk::Pipeline nearest_neightbor_pipeline; vk::Pipeline bilinear_pipeline; vk::Pipeline bicubic_pipeline; vk::Pipeline scaleforce_pipeline; vk::RenderPass renderpass; std::vector framebuffers; vk::DescriptorSets descriptor_sets; + vk::Sampler nn_sampler; vk::Sampler sampler; vk::Buffer buffer; diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui index 014ca6683..fe2f6bb7f 100644 --- a/src/yuzu/configuration/configure_graphics.ui +++ b/src/yuzu/configuration/configure_graphics.ui @@ -387,6 +387,11 @@ + + + Nearest Neighbor + + Bilinear @@ -404,7 +409,7 @@ - FidelityFX Super Resolution + FidelityFX Super Resolution [Vulkan Only] -- cgit v1.2.3 From 425ab9ef4b982213f4ee0d53196f5474e255374f Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 17 Oct 2021 18:01:18 +0200 Subject: Texture Cache: Fix downscaling and correct memory comsumption. --- .../renderer_opengl/gl_texture_cache.cpp | 43 +++++++--- src/video_core/renderer_opengl/gl_texture_cache.h | 2 +- .../renderer_vulkan/vk_texture_cache.cpp | 93 +++++++++++++++++++--- src/video_core/renderer_vulkan/vk_texture_cache.h | 3 + src/video_core/texture_cache/image_base.cpp | 4 +- src/video_core/texture_cache/image_base.h | 5 ++ src/video_core/texture_cache/texture_cache.h | 31 +++++--- src/video_core/texture_cache/texture_cache_base.h | 2 +- 8 files changed, 147 insertions(+), 36 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 944a3aa65..34d3723e5 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -876,7 +876,7 @@ void Image::CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t b } } -bool Image::Scale() { +bool Image::Scale(bool up_scale) { const auto format_type = GetFormatType(info.format); const GLenum attachment = [format_type] { switch (format_type) { @@ -944,14 +944,25 @@ bool Image::Scale() { const GLuint draw_fbo = runtime->rescale_draw_fbos[fbo_index].handle; for (s32 layer = 0; layer < info.resources.layers; ++layer) { for (s32 level = 0; level < info.resources.levels; ++level) { - const u32 src_level_width = std::max(1u, original_width >> level); - const u32 src_level_height = std::max(1u, original_height >> level); - const u32 dst_level_width = std::max(1u, scaled_width >> level); - const u32 dst_level_height = std::max(1u, scaled_height >> level); - - glNamedFramebufferTextureLayer(read_fbo, attachment, texture.handle, level, layer); - glNamedFramebufferTextureLayer(draw_fbo, attachment, upscaled_backup.handle, level, - layer); + const u32 src_level_width = + std::max(1u, (up_scale ? original_width : scaled_width) >> level); + const u32 src_level_height = + std::max(1u, (up_scale ? original_height : scaled_height) >> level); + const u32 dst_level_width = + std::max(1u, (up_scale ? scaled_width : original_width) >> level); + const u32 dst_level_height = + std::max(1u, (up_scale ? scaled_height : original_height) >> level); + + if (up_scale) { + glNamedFramebufferTextureLayer(read_fbo, attachment, texture.handle, level, layer); + glNamedFramebufferTextureLayer(draw_fbo, attachment, upscaled_backup.handle, level, + layer); + } else { + glNamedFramebufferTextureLayer(read_fbo, attachment, upscaled_backup.handle, level, + layer); + glNamedFramebufferTextureLayer(draw_fbo, attachment, texture.handle, level, layer); + } + glBlitNamedFramebuffer(read_fbo, draw_fbo, 0, 0, src_level_width, src_level_height, 0, 0, dst_level_width, dst_level_height, mask, filter); } @@ -959,7 +970,12 @@ bool Image::Scale() { if (scissor_test != GL_FALSE) { glEnablei(GL_SCISSOR_TEST, 0); } - current_texture = upscaled_backup.handle; + if (up_scale) { + current_texture = upscaled_backup.handle; + } else { + current_texture = texture.handle; + } + return true; } @@ -981,6 +997,7 @@ bool Image::ScaleUp() { flags &= ~ImageFlagBits::Rescaled; return false; } + scale_count++; if (!Scale()) { flags &= ~ImageFlagBits::Rescaled; return false; @@ -996,7 +1013,11 @@ bool Image::ScaleDown() { if (!runtime->resolution.active) { return false; } - current_texture = texture.handle; + scale_count++; + if (!Scale(false)) { + flags &= ~ImageFlagBits::Rescaled; + return false; + } return true; } diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index f90dbfe9e..81aaef3da 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -205,7 +205,7 @@ private: void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset); - bool Scale(); + bool Scale(bool up_scale = true); OGLTexture texture; OGLTexture upscaled_backup; diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 17c62e27d..51367c01d 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -592,7 +592,8 @@ struct RangedBarrierRange { } void BlitScale(VKScheduler& scheduler, VkImage src_image, VkImage dst_image, const ImageInfo& info, - VkImageAspectFlags aspect_mask, const Settings::ResolutionScalingInfo& resolution) { + VkImageAspectFlags aspect_mask, const Settings::ResolutionScalingInfo& resolution, + bool up_scaling = true) { const bool is_2d = info.type == ImageType::e2D; const auto resources = info.resources; const VkExtent2D extent{ @@ -605,14 +606,16 @@ void BlitScale(VKScheduler& scheduler, VkImage src_image, VkImage dst_image, con scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([dst_image, src_image, extent, resources, aspect_mask, resolution, is_2d, - vk_filter](vk::CommandBuffer cmdbuf) { + vk_filter, up_scaling](vk::CommandBuffer cmdbuf) { const VkOffset2D src_size{ - .x = static_cast(extent.width), - .y = static_cast(extent.height), + .x = static_cast(up_scaling ? extent.width : resolution.ScaleUp(extent.width)), + .y = static_cast(is_2d && up_scaling ? extent.height + : resolution.ScaleUp(extent.height)), }; const VkOffset2D dst_size{ - .x = static_cast(resolution.ScaleUp(extent.width)), - .y = static_cast(is_2d ? resolution.ScaleUp(extent.height) : extent.height), + .x = static_cast(up_scaling ? resolution.ScaleUp(extent.width) : extent.width), + .y = static_cast(is_2d && up_scaling ? resolution.ScaleUp(extent.height) + : extent.height), }; boost::container::small_vector regions; regions.reserve(resources.levels); @@ -1134,6 +1137,7 @@ bool Image::ScaleUp() { if (!resolution.active) { return false; } + scale_count++; const auto& device = runtime->device; const bool is_2d = info.type == ImageType::e2D; const u32 scaled_width = resolution.ScaleUp(info.size.width); @@ -1161,8 +1165,10 @@ bool Image::ScaleUp() { using namespace VideoCommon; static constexpr auto BLIT_OPERATION = Tegra::Engines::Fermi2D::Operation::SrcCopy; - const auto view_info = ImageViewInfo(ImageViewType::e2D, info.format); - scale_view = std::make_unique(*runtime, view_info, NULL_IMAGE_ID, *this); + if (!scale_view) { + const auto view_info = ImageViewInfo(ImageViewType::e2D, info.format); + scale_view = std::make_unique(*runtime, view_info, NULL_IMAGE_ID, *this); + } auto* view_ptr = scale_view.get(); const Region2D src_region{ @@ -1178,7 +1184,10 @@ bool Image::ScaleUp() { .height = scaled_height, }; if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT) { - scale_framebuffer = std::make_unique(*runtime, view_ptr, nullptr, extent); + if (!scale_framebuffer) { + scale_framebuffer = + std::make_unique(*runtime, view_ptr, nullptr, extent); + } const auto color_view = scale_view->Handle(Shader::TextureType::Color2D); runtime->blit_image_helper.BlitColor( @@ -1186,7 +1195,10 @@ bool Image::ScaleUp() { Tegra::Engines::Fermi2D::Filter::Bilinear, BLIT_OPERATION); } else if (!runtime->device.IsBlitDepthStencilSupported() && aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { - scale_framebuffer = std::make_unique(*runtime, nullptr, view_ptr, extent); + if (!scale_framebuffer) { + scale_framebuffer = + std::make_unique(*runtime, view_ptr, nullptr, extent); + } runtime->blit_image_helper.BlitDepthStencil( scale_framebuffer.get(), scale_view->DepthView(), scale_view->StencilView(), dst_region, src_region, Tegra::Engines::Fermi2D::Filter::Point, BLIT_OPERATION); @@ -1209,6 +1221,67 @@ bool Image::ScaleDown() { if (!resolution.active) { return false; } + const auto& device = runtime->device; + const bool is_2d = info.type == ImageType::e2D; + const u32 scaled_width = resolution.ScaleUp(info.size.width); + const u32 scaled_height = is_2d ? resolution.ScaleUp(info.size.height) : info.size.height; + if (aspect_mask == 0) { + aspect_mask = ImageAspectMask(info.format); + } + static constexpr auto OPTIMAL_FORMAT = FormatType::Optimal; + const PixelFormat format = StorageFormat(info.format); + const auto vk_format = MaxwellToVK::SurfaceFormat(device, OPTIMAL_FORMAT, false, format).format; + const auto blit_usage = VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT; + if (device.IsFormatSupported(vk_format, blit_usage, OPTIMAL_FORMAT)) { + BlitScale(*scheduler, *scaled_image, *original_image, info, aspect_mask, resolution, false); + } else { + using namespace VideoCommon; + static constexpr auto BLIT_OPERATION = Tegra::Engines::Fermi2D::Operation::SrcCopy; + + if (!normal_view) { + const auto view_info = ImageViewInfo(ImageViewType::e2D, info.format); + normal_view = std::make_unique(*runtime, view_info, NULL_IMAGE_ID, *this); + } + auto* view_ptr = normal_view.get(); + + const Region2D src_region{ + .start = {0, 0}, + .end = {static_cast(scaled_width), static_cast(scaled_height)}, + }; + const Region2D dst_region{ + .start = {0, 0}, + .end = {static_cast(info.size.width), static_cast(info.size.height)}, + }; + const VkExtent2D extent{ + .width = scaled_width, + .height = scaled_height, + }; + if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT) { + if (!normal_framebuffer) { + normal_framebuffer = + std::make_unique(*runtime, view_ptr, nullptr, extent); + } + const auto color_view = normal_view->Handle(Shader::TextureType::Color2D); + + runtime->blit_image_helper.BlitColor( + normal_framebuffer.get(), color_view, dst_region, src_region, + Tegra::Engines::Fermi2D::Filter::Bilinear, BLIT_OPERATION); + } else if (!runtime->device.IsBlitDepthStencilSupported() && + aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { + if (!normal_framebuffer) { + normal_framebuffer = + std::make_unique(*runtime, view_ptr, nullptr, extent); + } + runtime->blit_image_helper.BlitDepthStencil( + normal_framebuffer.get(), normal_view->DepthView(), normal_view->StencilView(), + dst_region, src_region, Tegra::Engines::Fermi2D::Filter::Point, BLIT_OPERATION); + } else { + // TODO: Use helper blits where applicable + flags &= ~ImageFlagBits::Rescaled; + LOG_ERROR(Render_Vulkan, "Device does not support scaling format {}", format); + return false; + } + } ASSERT(info.type != ImageType::Linear); current_image = *original_image; return true; diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 6dc190632..df854a20c 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -148,6 +148,9 @@ private: std::unique_ptr scale_framebuffer; std::unique_ptr scale_view; + + std::unique_ptr normal_framebuffer; + std::unique_ptr normal_view; }; class ImageView : public VideoCommon::ImageViewBase { diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp index 1909c9ecb..3db2ec825 100644 --- a/src/video_core/texture_cache/image_base.cpp +++ b/src/video_core/texture_cache/image_base.cpp @@ -60,8 +60,8 @@ namespace { ImageBase::ImageBase(const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_) : info{info_}, guest_size_bytes{CalculateGuestSizeInBytes(info)}, unswizzled_size_bytes{CalculateUnswizzledSizeBytes(info)}, - converted_size_bytes{CalculateConvertedSizeBytes(info)}, scale_rating{}, - scale_tick{}, gpu_addr{gpu_addr_}, cpu_addr{cpu_addr_}, + converted_size_bytes{CalculateConvertedSizeBytes(info)}, scale_rating{}, scale_tick{}, + scale_count{}, gpu_addr{gpu_addr_}, cpu_addr{cpu_addr_}, cpu_addr_end{cpu_addr + guest_size_bytes}, mip_level_offsets{CalculateMipLevelOffsets(info)} { if (info.type == ImageType::e3D) { slice_offsets = CalculateSliceOffsets(info); diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index bab290ac7..cd4b5f636 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h @@ -77,6 +77,10 @@ struct ImageBase { void CheckBadOverlapState(); void CheckAliasState(); + bool HasScaled() { + return scale_count > 0; + } + ImageInfo info; u32 guest_size_bytes = 0; @@ -84,6 +88,7 @@ struct ImageBase { u32 converted_size_bytes = 0; u32 scale_rating = 0; u64 scale_tick = 0; + u32 scale_count = 0; ImageFlagBits flags = ImageFlagBits::CpuModified; GPUVAddr gpu_addr = 0; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index a035d2b18..cf0d33a45 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -854,8 +854,8 @@ void TextureCache

::InvalidateScale(Image& image) { } template -u64 TextureCache

::GetScaledImageSizeBytes(Image& image) { - const f32 add_to_size = Settings::values.resolution_info.up_factor - 1.0f; +u64 TextureCache

::GetScaledImageSizeBytes(ImageBase& image) { + const f32 add_to_size = Settings::values.resolution_info.up_factor; const bool sign = std::signbit(add_to_size); const u32 image_size_bytes = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); const u64 tentative_size = image_size_bytes * static_cast(std::abs(add_to_size)); @@ -865,11 +865,14 @@ u64 TextureCache

::GetScaledImageSizeBytes(Image& image) { template bool TextureCache

::ScaleUp(Image& image) { + const bool has_copy = image.HasScaled(); const bool rescaled = image.ScaleUp(); if (!rescaled) { return false; } - total_used_memory += GetScaledImageSizeBytes(image); + if (!has_copy) { + total_used_memory += GetScaledImageSizeBytes(image); + } InvalidateScale(image); return true; } @@ -880,7 +883,10 @@ bool TextureCache

::ScaleDown(Image& image) { if (!rescaled) { return false; } - total_used_memory -= GetScaledImageSizeBytes(image); + const bool has_copy = image.HasScaled(); + if (!has_copy) { + total_used_memory -= GetScaledImageSizeBytes(image); + } InvalidateScale(image); return true; } @@ -1391,13 +1397,6 @@ void TextureCache

::UnregisterImage(ImageId image_id) { "Trying to unregister an already registered image"); image.flags &= ~ImageFlagBits::Registered; image.flags &= ~ImageFlagBits::BadOverlap; - u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); - if ((IsPixelFormatASTC(image.info.format) && - True(image.flags & ImageFlagBits::AcceleratedUpload)) || - True(image.flags & ImageFlagBits::Converted)) { - tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); - } - total_used_memory -= Common::AlignUp(tentative_size, 1024); lru_cache.Free(image.lru_index); const auto& clear_page_table = [this, image_id]( @@ -1478,6 +1477,16 @@ template void TextureCache

::TrackImage(ImageBase& image, ImageId image_id) { ASSERT(False(image.flags & ImageFlagBits::Tracked)); image.flags |= ImageFlagBits::Tracked; + if (image.HasScaled()) { + total_used_memory -= GetScaledImageSizeBytes(image); + } + u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); + if ((IsPixelFormatASTC(image.info.format) && + True(image.flags & ImageFlagBits::AcceleratedUpload)) || + True(image.flags & ImageFlagBits::Converted)) { + tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); + } + total_used_memory -= Common::AlignUp(tentative_size, 1024); if (False(image.flags & ImageFlagBits::Sparse)) { rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); return; diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 4dbe050af..e210393ba 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -331,7 +331,7 @@ private: void InvalidateScale(Image& image); bool ScaleUp(Image& image); bool ScaleDown(Image& image); - u64 GetScaledImageSizeBytes(Image& image); + u64 GetScaledImageSizeBytes(ImageBase& image); Runtime& runtime; VideoCore::RasterizerInterface& rasterizer; -- cgit v1.2.3 From 50b4c774cb30380761936f4cb897c31fc2d49075 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 17 Oct 2021 20:08:39 +0200 Subject: Vulkan: Fix Blit Depth Stencil --- src/video_core/renderer_vulkan/blit_image.cpp | 27 ++++++++++++++++----------- src/video_core/renderer_vulkan/blit_image.h | 7 ++++--- 2 files changed, 20 insertions(+), 14 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index bc3e4b93d..239698423 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -374,7 +374,7 @@ void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, VkImageView }; const VkPipelineLayout layout = *one_texture_pipeline_layout; const VkSampler sampler = is_linear ? *linear_sampler : *nearest_sampler; - const VkPipeline pipeline = FindOrEmplacePipeline(key); + const VkPipeline pipeline = FindOrEmplaceColorPipeline(key); scheduler.RequestRenderpass(dst_framebuffer); scheduler.Record([this, dst_region, src_region, pipeline, layout, sampler, src_view](vk::CommandBuffer cmdbuf) { @@ -397,10 +397,13 @@ void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer, Tegra::Engines::Fermi2D::Operation operation) { ASSERT(filter == Tegra::Engines::Fermi2D::Filter::Point); ASSERT(operation == Tegra::Engines::Fermi2D::Operation::SrcCopy); - + const BlitImagePipelineKey key{ + .renderpass = dst_framebuffer->RenderPass(), + .operation = operation, + }; const VkPipelineLayout layout = *two_textures_pipeline_layout; const VkSampler sampler = *nearest_sampler; - const VkPipeline pipeline = BlitDepthStencilPipeline(dst_framebuffer->RenderPass()); + const VkPipeline pipeline = FindOrEmplaceDepthStencilPipeline(key); scheduler.RequestRenderpass(dst_framebuffer); scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_depth_view, src_stencil_view, this](vk::CommandBuffer cmdbuf) { @@ -492,7 +495,7 @@ void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_frameb scheduler.InvalidateState(); } -VkPipeline BlitImageHelper::FindOrEmplacePipeline(const BlitImagePipelineKey& key) { +VkPipeline BlitImageHelper::FindOrEmplaceColorPipeline(const BlitImagePipelineKey& key) { const auto it = std::ranges::find(blit_color_keys, key); if (it != blit_color_keys.end()) { return *blit_color_pipelines[std::distance(blit_color_keys.begin(), it)]; @@ -546,12 +549,14 @@ VkPipeline BlitImageHelper::FindOrEmplacePipeline(const BlitImagePipelineKey& ke return *blit_color_pipelines.back(); } -VkPipeline BlitImageHelper::BlitDepthStencilPipeline(VkRenderPass renderpass) { - if (blit_depth_stencil_pipeline) { - return *blit_depth_stencil_pipeline; +VkPipeline BlitImageHelper::FindOrEmplaceDepthStencilPipeline(const BlitImagePipelineKey& key) { + const auto it = std::ranges::find(blit_depth_stencil_keys, key); + if (it != blit_depth_stencil_keys.end()) { + return *blit_depth_stencil_pipelines[std::distance(blit_depth_stencil_keys.begin(), it)]; } + blit_depth_stencil_keys.push_back(key); const std::array stages = MakeStages(*full_screen_vert, *blit_depth_stencil_frag); - blit_depth_stencil_pipeline = device.GetLogical().CreateGraphicsPipeline({ + blit_depth_stencil_pipelines.push_back(device.GetLogical().CreateGraphicsPipeline({ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, .pNext = nullptr, .flags = 0, @@ -567,12 +572,12 @@ VkPipeline BlitImageHelper::BlitDepthStencilPipeline(VkRenderPass renderpass) { .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO, .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO, .layout = *two_textures_pipeline_layout, - .renderPass = renderpass, + .renderPass = key.renderpass, .subpass = 0, .basePipelineHandle = VK_NULL_HANDLE, .basePipelineIndex = 0, - }); - return *blit_depth_stencil_pipeline; + })); + return *blit_depth_stencil_pipelines.back(); } void BlitImageHelper::ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass) { diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h index c0f4a16a4..d77f76678 100644 --- a/src/video_core/renderer_vulkan/blit_image.h +++ b/src/video_core/renderer_vulkan/blit_image.h @@ -60,9 +60,9 @@ private: void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, const ImageView& src_image_view, u32 up_scale, u32 down_shift); - [[nodiscard]] VkPipeline FindOrEmplacePipeline(const BlitImagePipelineKey& key); + [[nodiscard]] VkPipeline FindOrEmplaceColorPipeline(const BlitImagePipelineKey& key); - [[nodiscard]] VkPipeline BlitDepthStencilPipeline(VkRenderPass renderpass); + [[nodiscard]] VkPipeline FindOrEmplaceDepthStencilPipeline(const BlitImagePipelineKey& key); void ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass); @@ -88,7 +88,8 @@ private: std::vector blit_color_keys; std::vector blit_color_pipelines; - vk::Pipeline blit_depth_stencil_pipeline; + std::vector blit_depth_stencil_keys; + std::vector blit_depth_stencil_pipelines; vk::Pipeline convert_d32_to_r32_pipeline; vk::Pipeline convert_r32_to_d32_pipeline; vk::Pipeline convert_d16_to_r16_pipeline; -- cgit v1.2.3 From c2ca55c9d576940cfb37ba8569b1656b72c65569 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 18 Oct 2021 14:04:54 +0200 Subject: Texture Cache: ease the requirements of textures being blacklisted. --- src/video_core/texture_cache/texture_cache.h | 27 ++++++----------------- src/video_core/texture_cache/texture_cache_base.h | 2 -- 2 files changed, 7 insertions(+), 22 deletions(-) (limited to 'src') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index cf0d33a45..c885586e8 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -221,7 +221,6 @@ void TextureCache

::UpdateRenderTargets(bool is_clear) { scale_rating = 0; bool any_rescaled = false; bool can_rescale = true; - bool any_blacklisted = false; const auto check_rescale = [&](ImageViewId view_id, ImageId& id_save) { if (view_id != NULL_IMAGE_VIEW_ID && view_id != ImageViewId{}) { const auto& view = slot_image_views[view_id]; @@ -229,7 +228,6 @@ void TextureCache

::UpdateRenderTargets(bool is_clear) { id_save = image_id; auto& image = slot_images[image_id]; can_rescale &= ImageCanRescale(image); - any_blacklisted |= True(image.flags & ImageFlagBits::Blacklisted); any_rescaled |= True(image.flags & ImageFlagBits::Rescaled) || GetFormatType(image.info.format) != SurfaceType::ColorTexture; scale_rating = std::max(scale_rating, image.scale_tick <= frame_tick @@ -270,20 +268,17 @@ void TextureCache

::UpdateRenderTargets(bool is_clear) { } } else { rescaled = false; - const auto scale_down = [this, any_blacklisted](ImageId image_id) { + const auto scale_down = [this](ImageId image_id) { if (image_id != CORRUPT_ID) { Image& image = slot_images[image_id]; ScaleDown(image); - if (any_blacklisted) { - image.flags |= ImageFlagBits::Blacklisted; - } } }; for (size_t index = 0; index < NUM_RT; ++index) { scale_down(tmp_color_images[index]); } scale_down(tmp_depth_image); - scale_rating = 0; + scale_rating = 1; } } while (has_deleted_images); // Rescale End @@ -352,7 +347,10 @@ void TextureCache

::FillImageViews(DescriptorTable& table, if constexpr (has_blacklists) { if (view.blacklist && view.id != NULL_IMAGE_VIEW_ID) { const ImageViewBase& image_view{slot_image_views[view.id]}; - has_blacklisted |= BlackListImage(image_view.image_id); + auto& image = slot_images[image_view.image_id]; + image.flags |= ImageFlagBits::Blacklisted; + has_blacklisted |= ScaleDown(image); + image.scale_rating = 0; } } } @@ -783,20 +781,9 @@ ImageId TextureCache

::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, return image_id; } -template -bool TextureCache

::BlackListImage(ImageId image_id) { - auto& image = slot_images[image_id]; - if (True(image.flags & ImageFlagBits::Blacklisted)) { - return false; - } - image.flags |= ImageFlagBits::Blacklisted; - ScaleDown(image); - return true; -} - template bool TextureCache

::ImageCanRescale(ImageBase& image) { - if (!image.info.rescaleable || True(image.flags & ImageFlagBits::Blacklisted)) { + if (!image.info.rescaleable) { return false; } if (Settings::values.resolution_info.downscale && !image.info.downscaleable) { diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index e210393ba..4f876b2f4 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -166,8 +166,6 @@ public: [[nodiscard]] bool IsRescaling(const ImageViewBase& image_view) const noexcept; - [[nodiscard]] bool BlackListImage(ImageId image_id); - std::mutex mutex; private: -- cgit v1.2.3 From 3b61de74e6dc7526ffa8f03c21d81e2c3566ce90 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 18 Oct 2021 22:56:36 +0200 Subject: Texture Cache: fix memory managment and optimize scaled downloads, uploads. --- .../renderer_opengl/gl_texture_cache.cpp | 24 +++++++++++++++++----- src/video_core/renderer_opengl/gl_texture_cache.h | 4 ++-- .../renderer_vulkan/vk_texture_cache.cpp | 23 +++++++++++++++++---- src/video_core/renderer_vulkan/vk_texture_cache.h | 4 ++-- src/video_core/texture_cache/image_base.cpp | 2 +- src/video_core/texture_cache/image_base.h | 6 +++--- src/video_core/texture_cache/texture_cache.h | 22 ++++++++++---------- 7 files changed, 57 insertions(+), 28 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 34d3723e5..a6e9eb60b 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -697,7 +697,7 @@ void Image::UploadMemory(const ImageBufferMap& map, std::span copies) { const bool is_rescaled = True(flags & ImageFlagBits::Rescaled); if (is_rescaled) { - ScaleDown(); + ScaleDown(true); } glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.buffer); glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, map.offset, unswizzled_size_bytes); @@ -725,6 +725,10 @@ void Image::UploadMemory(const ImageBufferMap& map, void Image::DownloadMemory(ImageBufferMap& map, std::span copies) { + const bool is_rescaled = True(flags & ImageFlagBits::Rescaled); + if (is_rescaled) { + ScaleDown(); + } glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API glBindBuffer(GL_PIXEL_PACK_BUFFER, map.buffer); glPixelStorei(GL_PACK_ALIGNMENT, 1); @@ -743,6 +747,9 @@ void Image::DownloadMemory(ImageBufferMap& map, } CopyImageToBuffer(copy, map.offset); } + if (is_rescaled) { + ScaleUp(true); + } } GLuint Image::StorageHandle() noexcept { @@ -979,7 +986,7 @@ bool Image::Scale(bool up_scale) { return true; } -bool Image::ScaleUp() { +bool Image::ScaleUp(bool ignore) { if (True(flags & ImageFlagBits::Rescaled)) { return false; } @@ -997,7 +1004,11 @@ bool Image::ScaleUp() { flags &= ~ImageFlagBits::Rescaled; return false; } - scale_count++; + has_scaled = true; + if (ignore) { + current_texture = upscaled_backup.handle; + return true; + } if (!Scale()) { flags &= ~ImageFlagBits::Rescaled; return false; @@ -1005,7 +1016,7 @@ bool Image::ScaleUp() { return true; } -bool Image::ScaleDown() { +bool Image::ScaleDown(bool ignore) { if (False(flags & ImageFlagBits::Rescaled)) { return false; } @@ -1013,7 +1024,10 @@ bool Image::ScaleDown() { if (!runtime->resolution.active) { return false; } - scale_count++; + if (ignore) { + current_texture = texture.handle; + return true; + } if (!Scale(false)) { flags &= ~ImageFlagBits::Rescaled; return false; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 81aaef3da..eeb5133d5 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -196,9 +196,9 @@ public: return gl_type; } - bool ScaleUp(); + bool ScaleUp(bool ignore = false); - bool ScaleDown(); + bool ScaleDown(bool ignore = false); private: void CopyBufferToImage(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset); diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 51367c01d..02aac3b98 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -1055,7 +1055,7 @@ void Image::UploadMemory(const StagingBufferRef& map, std::spanRequestOutsideRenderPassOperationContext(); std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask); @@ -1073,6 +1073,10 @@ void Image::UploadMemory(const StagingBufferRef& map, std::span copies) { + const bool is_rescaled = True(flags & ImageFlagBits::Rescaled); + if (is_rescaled) { + ScaleDown(); + } std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask); scheduler->RequestOutsideRenderPassOperationContext(); scheduler->Record([buffer = map.buffer, image = *original_image, aspect_mask = aspect_mask, @@ -1125,9 +1129,12 @@ void Image::DownloadMemory(const StagingBufferRef& map, std::spandevice; const bool is_2d = info.type == ImageType::e2D; const u32 scaled_width = resolution.ScaleUp(info.size.width); @@ -1149,8 +1156,12 @@ bool Image::ScaleUp() { scaled_image = MakeImage(device, scaled_info); auto& allocator = runtime->memory_allocator; scaled_commit = MemoryCommit(allocator.Commit(scaled_image, MemoryUsage::DeviceLocal)); + ignore = false; } current_image = *scaled_image; + if (ignore) { + return true; + } if (aspect_mask == 0) { aspect_mask = ImageAspectMask(info.format); @@ -1212,7 +1223,7 @@ bool Image::ScaleUp() { return true; } -bool Image::ScaleDown() { +bool Image::ScaleDown(bool ignore) { if (False(flags & ImageFlagBits::Rescaled)) { return false; } @@ -1221,6 +1232,10 @@ bool Image::ScaleDown() { if (!resolution.active) { return false; } + if (ignore) { + current_image = *original_image; + return true; + } const auto& device = runtime->device; const bool is_2d = info.type == ImageType::e2D; const u32 scaled_width = resolution.ScaleUp(info.size.width); diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index df854a20c..8dbddfaf7 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -129,9 +129,9 @@ public: return std::exchange(initialized, true); } - bool ScaleUp(); + bool ScaleUp(bool ignore = false); - bool ScaleDown(); + bool ScaleDown(bool ignore = false); private: VKScheduler* scheduler{}; diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp index 3db2ec825..3db2fdf34 100644 --- a/src/video_core/texture_cache/image_base.cpp +++ b/src/video_core/texture_cache/image_base.cpp @@ -61,7 +61,7 @@ ImageBase::ImageBase(const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_ : info{info_}, guest_size_bytes{CalculateGuestSizeInBytes(info)}, unswizzled_size_bytes{CalculateUnswizzledSizeBytes(info)}, converted_size_bytes{CalculateConvertedSizeBytes(info)}, scale_rating{}, scale_tick{}, - scale_count{}, gpu_addr{gpu_addr_}, cpu_addr{cpu_addr_}, + has_scaled{}, gpu_addr{gpu_addr_}, cpu_addr{cpu_addr_}, cpu_addr_end{cpu_addr + guest_size_bytes}, mip_level_offsets{CalculateMipLevelOffsets(info)} { if (info.type == ImageType::e3D) { slice_offsets = CalculateSliceOffsets(info); diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index cd4b5f636..02c669766 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h @@ -77,8 +77,8 @@ struct ImageBase { void CheckBadOverlapState(); void CheckAliasState(); - bool HasScaled() { - return scale_count > 0; + bool HasScaled() const { + return has_scaled; } ImageInfo info; @@ -88,7 +88,7 @@ struct ImageBase { u32 converted_size_bytes = 0; u32 scale_rating = 0; u64 scale_tick = 0; - u32 scale_count = 0; + bool has_scaled = false; ImageFlagBits flags = ImageFlagBits::CpuModified; GPUVAddr gpu_addr = 0; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index c885586e8..13914dc8b 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -60,7 +60,7 @@ TextureCache

::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& // On OpenGL we can be more conservatives as the driver takes care. expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB; critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB; - minimum_memory = expected_memory; + minimum_memory = 0; } } @@ -1464,16 +1464,6 @@ template void TextureCache

::TrackImage(ImageBase& image, ImageId image_id) { ASSERT(False(image.flags & ImageFlagBits::Tracked)); image.flags |= ImageFlagBits::Tracked; - if (image.HasScaled()) { - total_used_memory -= GetScaledImageSizeBytes(image); - } - u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); - if ((IsPixelFormatASTC(image.info.format) && - True(image.flags & ImageFlagBits::AcceleratedUpload)) || - True(image.flags & ImageFlagBits::Converted)) { - tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); - } - total_used_memory -= Common::AlignUp(tentative_size, 1024); if (False(image.flags & ImageFlagBits::Sparse)) { rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); return; @@ -1519,6 +1509,16 @@ void TextureCache

::UntrackImage(ImageBase& image, ImageId image_id) { template void TextureCache

::DeleteImage(ImageId image_id) { ImageBase& image = slot_images[image_id]; + if (image.HasScaled()) { + total_used_memory -= GetScaledImageSizeBytes(image); + } + u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); + if ((IsPixelFormatASTC(image.info.format) && + True(image.flags & ImageFlagBits::AcceleratedUpload)) || + True(image.flags & ImageFlagBits::Converted)) { + tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); + } + total_used_memory -= Common::AlignUp(tentative_size, 1024); const GPUVAddr gpu_addr = image.gpu_addr; const auto alloc_it = image_allocs_table.find(gpu_addr); if (alloc_it == image_allocs_table.end()) { -- cgit v1.2.3 From f3ff8bdc0e8c6c25c1725b82d6862b93a8df3c84 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Tue, 19 Oct 2021 17:46:01 +0200 Subject: TextureCache: Fix blitting filter in Vulkan and correct viewport/scissor calculation when downscaling. --- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 24 ++++++++++--- .../renderer_vulkan/vk_texture_cache.cpp | 40 ++++++++++++++-------- 2 files changed, 44 insertions(+), 20 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 1ceffa718..a9334e101 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -60,10 +60,19 @@ struct DrawParams { VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t index, float scale) { const auto& src = regs.viewport_transform[index]; - const float x = (src.translate_x - src.scale_x) * scale; - const float width = src.scale_x * 2.0f * scale; - float y = (src.translate_y - src.scale_y) * scale; - float height = src.scale_y * 2.0f * scale; + const auto conv = [scale](float value) { + float new_value = value * scale; + if (scale < 1.0f) { + bool sign = std::signbit(new_value); + new_value = std::round(std::abs(new_value)); + new_value = sign ? -new_value : new_value; + } + return new_value; + }; + const float x = conv(src.translate_x - src.scale_x); + const float width = conv(src.scale_x * 2.0f); + float y = conv(src.translate_y - src.scale_y); + float height = conv(src.scale_y * 2.0f); if (regs.screen_y_control.y_negate) { y += height; height = -height; @@ -91,8 +100,13 @@ VkRect2D GetScissorState(const Maxwell& regs, size_t index, u32 up_scale = 1, u3 if (value == 0) { return 0U; } + const u32 upset = value * up_scale; + u32 acumm = 0; + if ((up_scale >> down_shift) == 0) { + acumm = upset & 0x1; + } const u32 converted_value = (value * up_scale) >> down_shift; - return std::max(converted_value, 1U); + return std::max(converted_value + acumm, 1U); }; if (src.enable) { scissor.offset.x = static_cast(scale_up(src.min_x)); diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 02aac3b98..84ec803ba 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -593,7 +593,7 @@ struct RangedBarrierRange { void BlitScale(VKScheduler& scheduler, VkImage src_image, VkImage dst_image, const ImageInfo& info, VkImageAspectFlags aspect_mask, const Settings::ResolutionScalingInfo& resolution, - bool up_scaling = true) { + bool is_bilinear, bool up_scaling = true) { const bool is_2d = info.type == ImageType::e2D; const auto resources = info.resources; const VkExtent2D extent{ @@ -602,7 +602,7 @@ void BlitScale(VKScheduler& scheduler, VkImage src_image, VkImage dst_image, con }; const bool is_zeta = (aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT) != 0; const bool is_int_format = IsPixelFormatInteger(info.format); - const VkFilter vk_filter = (is_zeta || is_int_format) ? VK_FILTER_NEAREST : VK_FILTER_LINEAR; + const VkFilter vk_filter = is_bilinear ? VK_FILTER_LINEAR : VK_FILTER_NEAREST; scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([dst_image, src_image, extent, resources, aspect_mask, resolution, is_2d, @@ -1160,7 +1160,7 @@ bool Image::ScaleUp(bool ignore) { } current_image = *scaled_image; if (ignore) { - return true; + return true; } if (aspect_mask == 0) { @@ -1170,11 +1170,18 @@ bool Image::ScaleUp(bool ignore) { const PixelFormat format = StorageFormat(info.format); const auto vk_format = MaxwellToVK::SurfaceFormat(device, OPTIMAL_FORMAT, false, format).format; const auto blit_usage = VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT; + const bool is_color{aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT}; + const bool is_bilinear{is_color && !IsPixelFormatInteger(info.format)}; if (device.IsFormatSupported(vk_format, blit_usage, OPTIMAL_FORMAT)) { - BlitScale(*scheduler, *original_image, *scaled_image, info, aspect_mask, resolution); + BlitScale(*scheduler, *original_image, *scaled_image, info, aspect_mask, resolution, + device.IsFormatSupported(vk_format, + VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT, + OPTIMAL_FORMAT)); } else { using namespace VideoCommon; static constexpr auto BLIT_OPERATION = Tegra::Engines::Fermi2D::Operation::SrcCopy; + const auto operation = is_bilinear ? Tegra::Engines::Fermi2D::Filter::Bilinear + : Tegra::Engines::Fermi2D::Filter::Point; if (!scale_view) { const auto view_info = ImageViewInfo(ImageViewType::e2D, info.format); @@ -1201,9 +1208,8 @@ bool Image::ScaleUp(bool ignore) { } const auto color_view = scale_view->Handle(Shader::TextureType::Color2D); - runtime->blit_image_helper.BlitColor( - scale_framebuffer.get(), color_view, dst_region, src_region, - Tegra::Engines::Fermi2D::Filter::Bilinear, BLIT_OPERATION); + runtime->blit_image_helper.BlitColor(scale_framebuffer.get(), color_view, dst_region, + src_region, operation, BLIT_OPERATION); } else if (!runtime->device.IsBlitDepthStencilSupported() && aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { if (!scale_framebuffer) { @@ -1212,7 +1218,7 @@ bool Image::ScaleUp(bool ignore) { } runtime->blit_image_helper.BlitDepthStencil( scale_framebuffer.get(), scale_view->DepthView(), scale_view->StencilView(), - dst_region, src_region, Tegra::Engines::Fermi2D::Filter::Point, BLIT_OPERATION); + dst_region, src_region, operation, BLIT_OPERATION); } else { // TODO: Use helper blits where applicable flags &= ~ImageFlagBits::Rescaled; @@ -1233,8 +1239,8 @@ bool Image::ScaleDown(bool ignore) { return false; } if (ignore) { - current_image = *original_image; - return true; + current_image = *original_image; + return true; } const auto& device = runtime->device; const bool is_2d = info.type == ImageType::e2D; @@ -1247,11 +1253,16 @@ bool Image::ScaleDown(bool ignore) { const PixelFormat format = StorageFormat(info.format); const auto vk_format = MaxwellToVK::SurfaceFormat(device, OPTIMAL_FORMAT, false, format).format; const auto blit_usage = VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT; + const bool is_color{aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT}; + const bool is_bilinear{is_color && !IsPixelFormatInteger(info.format)}; if (device.IsFormatSupported(vk_format, blit_usage, OPTIMAL_FORMAT)) { - BlitScale(*scheduler, *scaled_image, *original_image, info, aspect_mask, resolution, false); + BlitScale(*scheduler, *scaled_image, *original_image, info, aspect_mask, resolution, + is_bilinear, false); } else { using namespace VideoCommon; static constexpr auto BLIT_OPERATION = Tegra::Engines::Fermi2D::Operation::SrcCopy; + const auto operation = is_bilinear ? Tegra::Engines::Fermi2D::Filter::Bilinear + : Tegra::Engines::Fermi2D::Filter::Point; if (!normal_view) { const auto view_info = ImageViewInfo(ImageViewType::e2D, info.format); @@ -1278,9 +1289,8 @@ bool Image::ScaleDown(bool ignore) { } const auto color_view = normal_view->Handle(Shader::TextureType::Color2D); - runtime->blit_image_helper.BlitColor( - normal_framebuffer.get(), color_view, dst_region, src_region, - Tegra::Engines::Fermi2D::Filter::Bilinear, BLIT_OPERATION); + runtime->blit_image_helper.BlitColor(normal_framebuffer.get(), color_view, dst_region, + src_region, operation, BLIT_OPERATION); } else if (!runtime->device.IsBlitDepthStencilSupported() && aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { if (!normal_framebuffer) { @@ -1289,7 +1299,7 @@ bool Image::ScaleDown(bool ignore) { } runtime->blit_image_helper.BlitDepthStencil( normal_framebuffer.get(), normal_view->DepthView(), normal_view->StencilView(), - dst_region, src_region, Tegra::Engines::Fermi2D::Filter::Point, BLIT_OPERATION); + dst_region, src_region, operation, BLIT_OPERATION); } else { // TODO: Use helper blits where applicable flags &= ~ImageFlagBits::Rescaled; -- cgit v1.2.3 From 150bc45401c7c6e5cbcf936371269f0c1d2a0e83 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Tue, 19 Oct 2021 18:32:08 +0200 Subject: Texture cache: fix Intel with rescaler. --- src/video_core/renderer_vulkan/vk_texture_cache.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 84ec803ba..7c8732ec1 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -1214,7 +1214,7 @@ bool Image::ScaleUp(bool ignore) { aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { if (!scale_framebuffer) { scale_framebuffer = - std::make_unique(*runtime, view_ptr, nullptr, extent); + std::make_unique(*runtime, nullptr, view_ptr, extent); } runtime->blit_image_helper.BlitDepthStencil( scale_framebuffer.get(), scale_view->DepthView(), scale_view->StencilView(), @@ -1295,7 +1295,7 @@ bool Image::ScaleDown(bool ignore) { aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { if (!normal_framebuffer) { normal_framebuffer = - std::make_unique(*runtime, view_ptr, nullptr, extent); + std::make_unique(*runtime, nullptr, view_ptr, extent); } runtime->blit_image_helper.BlitDepthStencil( normal_framebuffer.get(), normal_view->DepthView(), normal_view->StencilView(), -- cgit v1.2.3 From 826a350e2b6aadb4f123189c28f065b0e7926264 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Tue, 19 Oct 2021 19:41:57 +0200 Subject: Vulkan Rasterizer: Fix clears on integer textures. --- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 34 ++++++++++++++++- src/video_core/surface.cpp | 47 ++++++++++++++++++++++++ src/video_core/surface.h | 4 ++ 3 files changed, 84 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index a9334e101..ff75d14a1 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -211,6 +211,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { EndTransformFeedback(); } +#pragma optimize("", off) + void RasterizerVulkan::Clear() { MICROPROFILE_SCOPE(Vulkan_Clearing); @@ -260,7 +262,37 @@ void RasterizerVulkan::Clear() { const u32 color_attachment = regs.clear_buffers.RT; if (use_color && framebuffer->HasAspectColorBit(color_attachment)) { VkClearValue clear_value; - std::memcpy(clear_value.color.float32, regs.clear_color, sizeof(regs.clear_color)); + bool is_integer = false; + bool is_signed = false; + size_t int_size = 8; + for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; ++i) { + const auto& this_rt = regs.rt[i]; + if (this_rt.Address() == 0) { + continue; + } + if (this_rt.format == Tegra::RenderTargetFormat::NONE) { + continue; + } + const auto format = + VideoCore::Surface::PixelFormatFromRenderTargetFormat(this_rt.format); + is_integer = IsPixelFormatInteger(format); + is_signed = IsPixelFormatSignedInteger(format); + int_size = PixelComponentSizeBitsInteger(format); + break; + } + if (!is_integer) { + std::memcpy(clear_value.color.float32, regs.clear_color, sizeof(regs.clear_color)); + } else if (!is_signed) { + for (size_t i = 0; i < 4; i++) { + clear_value.color.uint32[i] = + static_cast(static_cast(int_size << 1U) * regs.clear_color[i]); + } + } else { + for (size_t i = 0; i < 4; i++) { + clear_value.color.int32[i] = static_cast( + (static_cast(int_size - 1) << 1) * (regs.clear_color[i] - 0.5f)); + } + } scheduler.Record([color_attachment, clear_value, clear_rect](vk::CommandBuffer cmdbuf) { const VkClearAttachment attachment{ diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index 64941a486..58d262446 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp @@ -306,6 +306,53 @@ bool IsPixelFormatInteger(PixelFormat format) { } } +bool IsPixelFormatSignedInteger(PixelFormat format) { + switch (format) { + case PixelFormat::A8B8G8R8_SINT: + case PixelFormat::R8_SINT: + case PixelFormat::R16G16B16A16_SINT: + case PixelFormat::R32G32B32A32_SINT: + case PixelFormat::R32G32_SINT: + case PixelFormat::R16_SINT: + case PixelFormat::R16G16_SINT: + case PixelFormat::R8G8_SINT: + case PixelFormat::R32_SINT: + return true; + default: + return false; + } +} + +size_t PixelComponentSizeBitsInteger(PixelFormat format) { + switch (format) { + case PixelFormat::A8B8G8R8_SINT: + case PixelFormat::A8B8G8R8_UINT: + case PixelFormat::R8_SINT: + case PixelFormat::R8_UINT: + case PixelFormat::R8G8_SINT: + case PixelFormat::R8G8_UINT: + return 8; + case PixelFormat::A2B10G10R10_UINT: + return 10; + case PixelFormat::R16G16B16A16_SINT: + case PixelFormat::R16G16B16A16_UINT: + case PixelFormat::R16_UINT: + case PixelFormat::R16_SINT: + case PixelFormat::R16G16_UINT: + case PixelFormat::R16G16_SINT: + return 16; + case PixelFormat::R32G32B32A32_UINT: + case PixelFormat::R32G32B32A32_SINT: + case PixelFormat::R32G32_SINT: + case PixelFormat::R32G32_UINT: + case PixelFormat::R32_UINT: + case PixelFormat::R32_SINT: + return 32; + default: + return 0; + } +} + std::pair GetASTCBlockSize(PixelFormat format) { return {DefaultBlockWidth(format), DefaultBlockHeight(format)}; } diff --git a/src/video_core/surface.h b/src/video_core/surface.h index 3bb24abb7..2ce7c7d33 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h @@ -462,6 +462,10 @@ bool IsPixelFormatSRGB(PixelFormat format); bool IsPixelFormatInteger(PixelFormat format); +bool IsPixelFormatSignedInteger(PixelFormat format); + +size_t PixelComponentSizeBitsInteger(PixelFormat format); + std::pair GetASTCBlockSize(PixelFormat format); u64 EstimatedDecompressedSize(u64 base_size, PixelFormat format); -- cgit v1.2.3 From 4ad22c7d2b9d8fdfffc380f0b52f4ba943599bef Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 20 Oct 2021 00:33:03 +0200 Subject: Video Core: fix building for GCC. --- src/common/math_util.h | 4 +- src/video_core/renderer_vulkan/pipeline_helper.h | 2 +- src/video_core/renderer_vulkan/vk_fsr.cpp | 47 ++++++++++++++++------ src/video_core/renderer_vulkan/vk_rasterizer.cpp | 11 +++-- .../renderer_vulkan/vk_texture_cache.cpp | 2 - 5 files changed, 42 insertions(+), 24 deletions(-) (limited to 'src') diff --git a/src/common/math_util.h b/src/common/math_util.h index 4c38d8040..510c4e56d 100644 --- a/src/common/math_util.h +++ b/src/common/math_util.h @@ -48,8 +48,8 @@ struct Rectangle { } [[nodiscard]] Rectangle Scale(const float s) const { - return Rectangle{left, top, static_cast(left + GetWidth() * s), - static_cast(top + GetHeight() * s)}; + return Rectangle{left, top, static_cast(static_cast(left + GetWidth()) * s), + static_cast(static_cast(top + GetHeight()) * s)}; } }; diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index ae5e66ef4..11c160570 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -77,7 +77,7 @@ public: .stageFlags = static_cast( is_compute ? VK_SHADER_STAGE_COMPUTE_BIT : VK_SHADER_STAGE_ALL_GRAPHICS), .offset = 0, - .size = sizeof(RescalingLayout) - size_offset, + .size = static_cast(sizeof(RescalingLayout)) - size_offset, }; return device->GetLogical().CreatePipelineLayout({ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, diff --git a/src/video_core/renderer_vulkan/vk_fsr.cpp b/src/video_core/renderer_vulkan/vk_fsr.cpp index fd0a4aa42..1f60974be 100644 --- a/src/video_core/renderer_vulkan/vk_fsr.cpp +++ b/src/video_core/renderer_vulkan/vk_fsr.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include "common/common_types.h" #include "common/div_ceil.h" #include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_comp_spv.h" #include "video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_comp_spv.h" @@ -12,10 +13,10 @@ namespace Vulkan { -FSR::FSR(const Device& device, MemoryAllocator& memory_allocator, size_t image_count, - VkExtent2D output_size) - : device{device}, memory_allocator{memory_allocator}, image_count{image_count}, - output_size{output_size} { +FSR::FSR(const Device& device_, MemoryAllocator& memory_allocator_, size_t image_count_, + VkExtent2D output_size_) + : device{device_}, memory_allocator{memory_allocator_}, image_count{image_count_}, + output_size{output_size_} { CreateImages(); CreateSampler(); @@ -266,14 +267,17 @@ void FSR::UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) c const auto blit_image_view = *image_views[image_count + image_index]; const VkDescriptorImageInfo image_info{ + .sampler = VK_NULL_HANDLE, .imageView = image_view, .imageLayout = VK_IMAGE_LAYOUT_GENERAL, }; const VkDescriptorImageInfo fsr_image_info{ + .sampler = VK_NULL_HANDLE, .imageView = fsr_image_view, .imageLayout = VK_IMAGE_LAYOUT_GENERAL, }; const VkDescriptorImageInfo blit_image_info{ + .sampler = VK_NULL_HANDLE, .imageView = blit_image_view, .imageLayout = VK_IMAGE_LAYOUT_GENERAL, }; @@ -341,35 +345,52 @@ void FSR::CreateSampler() { void FSR::CreateShaders() { easu_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_EASU_COMP_SPV); - rcas_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_EASU_COMP_SPV); + rcas_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_RCAS_COMP_SPV); } void FSR::CreatePipeline() { - VkPipelineShaderStageCreateInfo shader_stage{ + VkPipelineShaderStageCreateInfo shader_stage_easu{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = *easu_shader, + .pName = "main", + .pSpecializationInfo = nullptr, + }; + VkPipelineShaderStageCreateInfo shader_stage_rcas{ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, .pNext = nullptr, .flags = 0, .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = *rcas_shader, .pName = "main", .pSpecializationInfo = nullptr, }; - VkComputePipelineCreateInfo pipeline_ci{ + VkComputePipelineCreateInfo pipeline_ci_easu{ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, .pNext = nullptr, .flags = 0, + .stage = shader_stage_easu, .layout = *pipeline_layout, + .basePipelineHandle = VK_NULL_HANDLE, .basePipelineIndex = 0, }; - shader_stage.module = *easu_shader; - pipeline_ci.stage = shader_stage; - easu_pipeline = device.GetLogical().CreateComputePipeline(pipeline_ci); + VkComputePipelineCreateInfo pipeline_ci_rcas{ + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = shader_stage_rcas, + .layout = *pipeline_layout, + .basePipelineHandle = VK_NULL_HANDLE, + .basePipelineIndex = 0, + }; - shader_stage.module = *rcas_shader; - pipeline_ci.stage = shader_stage; - rcas_pipeline = device.GetLogical().CreateComputePipeline(pipeline_ci); + easu_pipeline = device.GetLogical().CreateComputePipeline(pipeline_ci_easu); + rcas_pipeline = device.GetLogical().CreateComputePipeline(pipeline_ci_rcas); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index ff75d14a1..5ca67c413 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -211,8 +211,6 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { EndTransformFeedback(); } -#pragma optimize("", off) - void RasterizerVulkan::Clear() { MICROPROFILE_SCOPE(Vulkan_Clearing); @@ -284,13 +282,14 @@ void RasterizerVulkan::Clear() { std::memcpy(clear_value.color.float32, regs.clear_color, sizeof(regs.clear_color)); } else if (!is_signed) { for (size_t i = 0; i < 4; i++) { - clear_value.color.uint32[i] = - static_cast(static_cast(int_size << 1U) * regs.clear_color[i]); + clear_value.color.uint32[i] = static_cast( + static_cast(static_cast(int_size) << 1U) * regs.clear_color[i]); } } else { for (size_t i = 0; i < 4; i++) { - clear_value.color.int32[i] = static_cast( - (static_cast(int_size - 1) << 1) * (regs.clear_color[i] - 0.5f)); + clear_value.color.int32[i] = + static_cast(static_cast(static_cast(int_size - 1) << 1) * + (regs.clear_color[i] - 0.5f)); } } diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 7c8732ec1..413d472cd 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -600,8 +600,6 @@ void BlitScale(VKScheduler& scheduler, VkImage src_image, VkImage dst_image, con .width = info.size.width, .height = info.size.height, }; - const bool is_zeta = (aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT) != 0; - const bool is_int_format = IsPixelFormatInteger(info.format); const VkFilter vk_filter = is_bilinear ? VK_FILTER_LINEAR : VK_FILTER_NEAREST; scheduler.RequestOutsideRenderPassOperationContext(); -- cgit v1.2.3 From 7506ac4118c6065b2557fe3cb94bab52c099ca37 Mon Sep 17 00:00:00 2001 From: Marshall Mohror Date: Tue, 19 Oct 2021 21:20:28 -0500 Subject: Presentation: Fix turning FSR on and off in settings --- src/video_core/renderer_vulkan/vk_blit_screen.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'src') diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index 334eeb92e..ccf721008 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -362,6 +362,17 @@ void VKBlitScreen::CreateDynamicResources() { } void VKBlitScreen::RefreshResources(const Tegra::FramebufferConfig& framebuffer) { + if (Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::Fsr) { + if (!fsr) { + const auto& layout = render_window.GetFramebufferLayout(); + fsr = std::make_unique( + device, memory_allocator, image_count, + VkExtent2D{.width = layout.screen.GetWidth(), .height = layout.screen.GetHeight()}); + } + } else { + fsr.reset(); + } + if (framebuffer.width == raw_width && framebuffer.height == raw_height && !raw_images.empty()) { return; } -- cgit v1.2.3 From d37d10e7a7b9037a259b27923716e5ce3084d6c3 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Wed, 20 Oct 2021 18:27:25 +0200 Subject: TextureCache: fix rescaling in aliases and overlap joins. --- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 19 +++++------ src/video_core/texture_cache/texture_cache.h | 41 +++++++++++++++++------- src/video_core/texture_cache/util.cpp | 8 +++-- src/video_core/texture_cache/util.h | 3 +- 4 files changed, 48 insertions(+), 23 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 5ca67c413..fd334a146 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -63,7 +63,7 @@ VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t in const auto conv = [scale](float value) { float new_value = value * scale; if (scale < 1.0f) { - bool sign = std::signbit(new_value); + const bool sign = std::signbit(value); new_value = std::round(std::abs(new_value)); new_value = sign ? -new_value : new_value; } @@ -96,21 +96,22 @@ VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t in VkRect2D GetScissorState(const Maxwell& regs, size_t index, u32 up_scale = 1, u32 down_shift = 0) { const auto& src = regs.scissor_test[index]; VkRect2D scissor; - const auto scale_up = [&](u32 value) -> u32 { + const auto scale_up = [&](s32 value) -> s32 { if (value == 0) { return 0U; } - const u32 upset = value * up_scale; - u32 acumm = 0; + const s32 upset = value * up_scale; + s32 acumm = 0; if ((up_scale >> down_shift) == 0) { - acumm = upset & 0x1; + acumm = upset % 2; } - const u32 converted_value = (value * up_scale) >> down_shift; - return std::max(converted_value + acumm, 1U); + const s32 converted_value = (value * up_scale) >> down_shift; + return value < 0 ? std::min(converted_value - acumm, -1) + : std::max(converted_value + acumm, 1); }; if (src.enable) { - scissor.offset.x = static_cast(scale_up(src.min_x)); - scissor.offset.y = static_cast(scale_up(src.min_y)); + scissor.offset.x = scale_up(static_cast(src.min_x)); + scissor.offset.y = scale_up(static_cast(src.min_y)); scissor.extent.width = scale_up(src.max_x - src.min_x); scissor.extent.height = scale_up(src.max_y - src.min_y); } else { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 13914dc8b..a32c11d04 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -1037,8 +1037,11 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA if (overlap.info.num_samples != new_image.info.num_samples) { LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented"); } else { + const auto& resolution = Settings::values.resolution_info; const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value(); - auto copies = MakeShrinkImageCopies(new_info, overlap.info, base); + const u32 up_scale = can_rescale ? resolution.up_scale : 1; + const u32 down_shift = can_rescale ? resolution.down_shift : 0; + auto copies = MakeShrinkImageCopies(new_info, overlap.info, base, up_scale, down_shift); runtime.CopyImage(new_image, overlap, std::move(copies)); } if (True(overlap.flags & ImageFlagBits::Tracked)) { @@ -1659,19 +1662,35 @@ void TextureCache

::SynchronizeAliases(ImageId image_id) { const ImageBase& rhs_image = slot_images[rhs->id]; return lhs_image.modification_tick < rhs_image.modification_tick; }); + const auto& resolution = Settings::values.resolution_info; for (const AliasedImage* const aliased : aliased_images) { - if (any_rescaled) { - Image& aliased_image = slot_images[aliased->id]; - if (can_rescale) { - ScaleUp(aliased_image); - } else { - ScaleDown(aliased_image); - if (any_blacklisted) { - aliased_image.flags |= ImageFlagBits::Blacklisted; - } + if (!resolution.active | !any_rescaled) { + CopyImage(image_id, aliased->id, aliased->copies); + continue; + } + Image& aliased_image = slot_images[aliased->id]; + if (!can_rescale) { + ScaleDown(aliased_image); + if (any_blacklisted) { + aliased_image.flags |= ImageFlagBits::Blacklisted; + } + CopyImage(image_id, aliased->id, aliased->copies); + continue; + } + ScaleUp(aliased_image); + + const bool both_2d{image.info.type == ImageType::e2D && + aliased_image.info.type == ImageType::e2D}; + auto copies = aliased->copies; + for (auto copy : copies) { + copy.extent.width = std::max( + (copy.extent.width * resolution.up_scale) >> resolution.down_shift, 1); + if (both_2d) { + copy.extent.height = std::max( + (copy.extent.height * resolution.up_scale) >> resolution.down_shift, 1); } } - CopyImage(image_id, aliased->id, aliased->copies); + CopyImage(image_id, aliased->id, copies); } } diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index 59cf2f561..9922aa0cc 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -723,7 +723,7 @@ ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept { } std::vector MakeShrinkImageCopies(const ImageInfo& dst, const ImageInfo& src, - SubresourceBase base) { + SubresourceBase base, u32 up_scale, u32 down_shift) { ASSERT(dst.resources.levels >= src.resources.levels); ASSERT(dst.num_samples == src.num_samples); @@ -732,7 +732,7 @@ std::vector MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn ASSERT(src.type == ImageType::e3D); ASSERT(src.resources.levels == 1); } - + const bool both_2d{src.type == ImageType::e2D && dst.type == ImageType::e2D}; std::vector copies; copies.reserve(src.resources.levels); for (s32 level = 0; level < src.resources.levels; ++level) { @@ -762,6 +762,10 @@ std::vector MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn if (is_dst_3d) { copy.extent.depth = src.size.depth; } + copy.extent.width = std::max((copy.extent.width * up_scale) >> down_shift, 1); + if (both_2d) { + copy.extent.height = std::max((copy.extent.height * up_scale) >> down_shift, 1); + } } return copies; } diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h index 766502908..7af52de2e 100644 --- a/src/video_core/texture_cache/util.h +++ b/src/video_core/texture_cache/util.h @@ -55,7 +55,8 @@ struct OverlapResult { [[nodiscard]] std::vector MakeShrinkImageCopies(const ImageInfo& dst, const ImageInfo& src, - SubresourceBase base); + SubresourceBase base, u32 up_scale = 1, + u32 down_shift = 0); [[nodiscard]] bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config); -- cgit v1.2.3 From e7fc60406ef5309268d77edb5e5266febe147e53 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Wed, 20 Oct 2021 18:56:34 +0200 Subject: VideoCore: Add more rescaling option. --- src/common/settings.cpp | 13 +++++++++++++ src/common/settings.h | 11 +++++++---- src/yuzu/configuration/configure_graphics.ui | 21 ++++++++++++++++++--- 3 files changed, 38 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/common/settings.cpp b/src/common/settings.cpp index bc2c8c7d7..84ac937e5 100644 --- a/src/common/settings.cpp +++ b/src/common/settings.cpp @@ -116,6 +116,11 @@ void UpdateRescalingInfo() { info.down_shift = 1; info.downscale = true; break; + case ResolutionSetup::Res3_4X: + info.up_scale = 3; + info.down_shift = 2; + info.downscale = true; + break; case ResolutionSetup::Res1X: info.up_scale = 1; info.down_shift = 0; @@ -132,6 +137,14 @@ void UpdateRescalingInfo() { info.up_scale = 4; info.down_shift = 0; break; + case ResolutionSetup::Res5X: + info.up_scale = 5; + info.down_shift = 0; + break; + case ResolutionSetup::Res6X: + info.up_scale = 6; + info.down_shift = 0; + break; default: UNREACHABLE(); info.up_scale = 1; diff --git a/src/common/settings.h b/src/common/settings.h index 84dab5217..f6acf5bdf 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -54,10 +54,13 @@ enum class NvdecEmulation : u32 { enum class ResolutionSetup : u32 { Res1_2X = 0, - Res1X = 1, - Res2X = 2, - Res3X = 3, - Res4X = 4, + Res3_4X = 1, + Res1X = 2, + Res2X = 3, + Res3X = 4, + Res4X = 5, + Res5X = 6, + Res6X = 7, }; enum class ScalingFilter : u32 { diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui index fe2f6bb7f..c1d7e8349 100644 --- a/src/yuzu/configuration/configure_graphics.ui +++ b/src/yuzu/configuration/configure_graphics.ui @@ -335,7 +335,12 @@ - 0.5X (360p/540p) + 0.5X (360p/540p)[MAY BREAK] + + + + + 0.75X (540p/810p)[MAY BREAK] @@ -350,12 +355,22 @@ - 3X (2160p[4K]/3240p[6K]) + 3X (2160p[4K]/3240p) + + + + + 4X (2880p/4320p) + + + + + 5X (3600p/5400p) - 4X (2880p/4320p[8K]) + 6X (4320p/6480p) -- cgit v1.2.3 From 916b882ea8870e695d50e8ca8c8e4c35fb1895d5 Mon Sep 17 00:00:00 2001 From: Marshall Mohror Date: Wed, 20 Oct 2021 13:35:59 -0500 Subject: Update scaleforce to use FP16 --- .../host_shaders/present_scaleforce.frag | 143 ++++++++------------- 1 file changed, 55 insertions(+), 88 deletions(-) (limited to 'src') diff --git a/src/video_core/host_shaders/present_scaleforce.frag b/src/video_core/host_shaders/present_scaleforce.frag index 1829a9be8..ebc0d9b90 100644 --- a/src/video_core/host_shaders/present_scaleforce.frag +++ b/src/video_core/host_shaders/present_scaleforce.frag @@ -24,6 +24,9 @@ #version 460 +#extension GL_AMD_gpu_shader_half_float : enable +#extension GL_NV_gpu_shader5 : enable + #ifdef VULKAN #define BINDING_COLOR_TEXTURE 1 @@ -40,106 +43,70 @@ layout (location = 0) out vec4 frag_color; layout (binding = BINDING_COLOR_TEXTURE) uniform sampler2D input_texture; -vec2 tex_size; -vec2 inv_tex_size; - -vec4 cubic(float v) { - vec3 n = vec3(1.0, 2.0, 3.0) - v; - vec3 s = n * n * n; - float x = s.x; - float y = s.y - 4.0 * s.x; - float z = s.z - 4.0 * s.y + 6.0 * s.x; - float w = 6.0 - x - y - z; - return vec4(x, y, z, w) / 6.0; -} - -// Bicubic interpolation -vec4 textureBicubic(vec2 tex_coords) { - tex_coords = tex_coords * tex_size - 0.5; - - vec2 fxy = modf(tex_coords, tex_coords); - - vec4 xcubic = cubic(fxy.x); - vec4 ycubic = cubic(fxy.y); - - vec4 c = tex_coords.xxyy + vec2(-0.5, +1.5).xyxy; +const bool ignore_alpha = true; - vec4 s = vec4(xcubic.xz + xcubic.yw, ycubic.xz + ycubic.yw); - vec4 offset = c + vec4(xcubic.yw, ycubic.yw) / s; - - offset *= inv_tex_size.xxyy; - - vec4 sample0 = textureLod(input_texture, offset.xz, 0.0); - vec4 sample1 = textureLod(input_texture, offset.yz, 0.0); - vec4 sample2 = textureLod(input_texture, offset.xw, 0.0); - vec4 sample3 = textureLod(input_texture, offset.yw, 0.0); - - float sx = s.x / (s.x + s.y); - float sy = s.z / (s.z + s.w); - - return mix(mix(sample3, sample2, sx), mix(sample1, sample0, sx), sy); -} - -mat4x3 center_matrix; -vec4 center_alpha; - -// Finds the distance between four colors and cc in YCbCr space -vec4 ColorDist(vec4 A, vec4 B, vec4 C, vec4 D) { +float16_t ColorDist1(f16vec4 a, f16vec4 b) { // https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.2020_conversion - const vec3 K = vec3(0.2627, 0.6780, 0.0593); - const float LUMINANCE_WEIGHT = .6; - const mat3 YCBCR_MATRIX = - mat3(K * LUMINANCE_WEIGHT, -.5 * K.r / (1.0 - K.b), -.5 * K.g / (1.0 - K.b), .5, .5, - -.5 * K.g / (1.0 - K.r), -.5 * K.b / (1.0 - K.r)); - - mat4x3 colors = mat4x3(A.rgb, B.rgb, C.rgb, D.rgb) - center_matrix; - mat4x3 YCbCr = YCBCR_MATRIX * colors; - vec4 color_dist = vec3(1.0) * YCbCr; - color_dist *= color_dist; - vec4 alpha = vec4(A.a, B.a, C.a, D.a); - - return sqrt((color_dist + abs(center_alpha - alpha)) * alpha * center_alpha); + const f16vec3 K = f16vec3(0.2627, 0.6780, 0.0593); + const float16_t scaleB = float16_t(0.5) / (float16_t(1.0) - K.b); + const float16_t scaleR = float16_t(0.5) / (float16_t(1.0) - K.r); + f16vec4 diff = a - b; + float16_t Y = dot(diff.rgb, K); + float16_t Cb = scaleB * (diff.b - Y); + float16_t Cr = scaleR * (diff.r - Y); + f16vec3 YCbCr = f16vec3(Y, Cb, Cr); + float16_t d = length(YCbCr); + if (ignore_alpha) { + return d; + } + return sqrt(a.a * b.a * d * d + diff.a * diff.a); } -void main() { - vec4 bl = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(-1, -1)); - vec4 bc = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(0, -1)); - vec4 br = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(1, -1)); - vec4 cl = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(-1, 0)); - vec4 cc = textureLod(input_texture, tex_coord, 0.0); - vec4 cr = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(1, 0)); - vec4 tl = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(-1, 1)); - vec4 tc = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(0, 1)); - vec4 tr = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(1, 1)); - +f16vec4 ColorDist(f16vec4 ref, f16vec4 A, f16vec4 B, f16vec4 C, f16vec4 D) { + return f16vec4( + ColorDist1(ref, A), + ColorDist1(ref, B), + ColorDist1(ref, C), + ColorDist1(ref, D) + ); +} - tex_size = vec2(textureSize(input_texture, 0)); - inv_tex_size = 1.0 / tex_size; - center_matrix = mat4x3(cc.rgb, cc.rgb, cc.rgb, cc.rgb); - center_alpha = cc.aaaa; +vec4 Scaleforce(sampler2D tex, vec2 tex_coord) { + f16vec4 bl = f16vec4(textureOffset(tex, tex_coord, ivec2(-1, -1))); + f16vec4 bc = f16vec4(textureOffset(tex, tex_coord, ivec2(0, -1))); + f16vec4 br = f16vec4(textureOffset(tex, tex_coord, ivec2(1, -1))); + f16vec4 cl = f16vec4(textureOffset(tex, tex_coord, ivec2(-1, 0))); + f16vec4 cc = f16vec4(texture(tex, tex_coord)); + f16vec4 cr = f16vec4(textureOffset(tex, tex_coord, ivec2(1, 0))); + f16vec4 tl = f16vec4(textureOffset(tex, tex_coord, ivec2(-1, 1))); + f16vec4 tc = f16vec4(textureOffset(tex, tex_coord, ivec2(0, 1))); + f16vec4 tr = f16vec4(textureOffset(tex, tex_coord, ivec2(1, 1))); - vec4 offset_tl = ColorDist(tl, tc, tr, cr); - vec4 offset_br = ColorDist(br, bc, bl, cl); + f16vec4 offset_tl = ColorDist(cc, tl, tc, tr, cr); + f16vec4 offset_br = ColorDist(cc, br, bc, bl, cl); // Calculate how different cc is from the texels around it - float total_dist = dot(offset_tl + offset_br, vec4(1.0)); - - // Add together all the distances with direction taken into account - vec4 tmp = offset_tl - offset_br; - vec2 total_offset = tmp.wy + tmp.zz + vec2(-tmp.x, tmp.x); + const float16_t plus_weight = float16_t(1.5); + const float16_t cross_weight = float16_t(1.5); + float16_t total_dist = dot(offset_tl + offset_br, f16vec4(cross_weight, plus_weight, cross_weight, plus_weight)); - if (total_dist == 0.0) { - // Doing bicubic filtering just past the edges where the offset is 0 causes black floaters - // and it doesn't really matter which filter is used when the colors aren't changing. - frag_color = vec4(cc.rgb, 1.0f); + if (total_dist == float16_t(0.0)) { + return cc; } else { + // Add together all the distances with direction taken into account + f16vec4 tmp = offset_tl - offset_br; + f16vec2 total_offset = tmp.wy * plus_weight + (tmp.zz + f16vec2(-tmp.x, tmp.x)) * cross_weight; + // When the image has thin points, they tend to split apart. - // This is because the texels all around are different - // and total_offset reaches into clear areas. + // This is because the texels all around are different and total_offset reaches into clear areas. // This works pretty well to keep the offset in bounds for these cases. - float clamp_val = length(total_offset) / total_dist; - vec2 final_offset = clamp(total_offset, -clamp_val, clamp_val) * inv_tex_size; + float16_t clamp_val = length(total_offset) / total_dist; + f16vec2 final_offset = clamp(total_offset, -clamp_val, clamp_val) / f16vec2(textureSize(tex, 0)); - frag_color = vec4(textureBicubic(tex_coord - final_offset).rgb, 1.0f); + return texture(tex, tex_coord - final_offset); } } + +void main() { + frag_color = Scaleforce(input_texture, tex_coord); +} \ No newline at end of file -- cgit v1.2.3 From bb3e95133d49878e75d800e2fc405ed5cee29318 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Wed, 20 Oct 2021 23:13:29 +0200 Subject: Vulkan: fix waiting on semaphore. --- src/video_core/renderer_vulkan/vk_master_semaphore.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h index 0886b7da8..9be9c9bed 100644 --- a/src/video_core/renderer_vulkan/vk_master_semaphore.h +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h @@ -70,7 +70,9 @@ public: return; } // If none of the above is hit, fallback to a regular wait - semaphore.Wait(tick); + while (!semaphore.Wait(tick)) { + } + Refresh(); } private: -- cgit v1.2.3 From bf01b7993dca835a516abdc2142be96bc0f216ec Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Wed, 20 Oct 2021 23:21:52 +0200 Subject: TextureCache: Improve Reaper. --- src/video_core/texture_cache/texture_cache.h | 38 +++++++++++++++-------- src/video_core/texture_cache/texture_cache_base.h | 2 +- 2 files changed, 26 insertions(+), 14 deletions(-) (limited to 'src') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index a32c11d04..f1254ef62 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -51,8 +51,8 @@ TextureCache

::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& if constexpr (HAS_DEVICE_MEMORY_INFO) { const auto device_memory = runtime.GetDeviceLocalMemory(); - const u64 possible_expected_memory = (device_memory * 3) / 10; - const u64 possible_critical_memory = (device_memory * 6) / 10; + const u64 possible_expected_memory = (device_memory * 4) / 10; + const u64 possible_critical_memory = (device_memory * 7) / 10; expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY); critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY); minimum_memory = 0; @@ -69,7 +69,7 @@ void TextureCache

::RunGarbageCollector() { const bool high_priority_mode = total_used_memory >= expected_memory; const bool aggressive_mode = total_used_memory >= critical_memory; const u64 ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 100ULL; - size_t num_iterations = aggressive_mode ? 10000 : (high_priority_mode ? 100 : 5); + size_t num_iterations = aggressive_mode ? 300 : (high_priority_mode ? 50 : 10); const auto clean_up = [this, &num_iterations, high_priority_mode](ImageId image_id) { if (num_iterations == 0) { return true; @@ -91,7 +91,7 @@ void TextureCache

::RunGarbageCollector() { UntrackImage(image, image_id); } UnregisterImage(image_id); - DeleteImage(image_id); + DeleteImage(image_id, image.scale_tick > frame_tick + 5); return false; }; lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up); @@ -287,7 +287,9 @@ void TextureCache

::UpdateRenderTargets(bool is_clear) { if (image_id != CORRUPT_ID) { Image& image = slot_images[image_id]; image.scale_rating = scale_rating; - image.scale_tick = frame_tick + 1; + if (image.scale_tick <= frame_tick) { + image.scale_tick = frame_tick + 1; + } } }; for (size_t index = 0; index < NUM_RT; ++index) { @@ -810,6 +812,9 @@ bool TextureCache

::ImageCanRescale(ImageBase& image) { template void TextureCache

::InvalidateScale(Image& image) { + if (image.scale_tick <= frame_tick) { + image.scale_tick = frame_tick + 1; + } const std::span image_view_ids = image.image_view_ids; auto& dirty = maxwell3d.dirty.flags; dirty[Dirty::RenderTargets] = true; @@ -842,12 +847,15 @@ void TextureCache

::InvalidateScale(Image& image) { template u64 TextureCache

::GetScaledImageSizeBytes(ImageBase& image) { - const f32 add_to_size = Settings::values.resolution_info.up_factor; - const bool sign = std::signbit(add_to_size); - const u32 image_size_bytes = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); - const u64 tentative_size = image_size_bytes * static_cast(std::abs(add_to_size)); + const u64 scale_up = static_cast(Settings::values.resolution_info.up_scale * + Settings::values.resolution_info.up_scale); + const u64 down_shift = static_cast(Settings::values.resolution_info.down_shift + + Settings::values.resolution_info.down_shift); + const u64 image_size_bytes = + static_cast(std::max(image.guest_size_bytes, image.unswizzled_size_bytes)); + const u64 tentative_size = (image_size_bytes * scale_up) >> down_shift; const u64 fitted_size = Common::AlignUp(tentative_size, 1024); - return sign ? -fitted_size : fitted_size; + return fitted_size; } template @@ -1510,7 +1518,7 @@ void TextureCache

::UntrackImage(ImageBase& image, ImageId image_id) { } template -void TextureCache

::DeleteImage(ImageId image_id) { +void TextureCache

::DeleteImage(ImageId image_id, bool immediate_delete) { ImageBase& image = slot_images[image_id]; if (image.HasScaled()) { total_used_memory -= GetScaledImageSizeBytes(image); @@ -1576,10 +1584,14 @@ void TextureCache

::DeleteImage(ImageId image_id) { num_removed_overlaps); } for (const ImageViewId image_view_id : image_view_ids) { - sentenced_image_view.Push(std::move(slot_image_views[image_view_id])); + if (!immediate_delete) { + sentenced_image_view.Push(std::move(slot_image_views[image_view_id])); + } slot_image_views.erase(image_view_id); } - sentenced_images.Push(std::move(slot_images[image_id])); + if (!immediate_delete) { + sentenced_images.Push(std::move(slot_images[image_id])); + } slot_images.erase(image_id); alloc_images.erase(alloc_image_it); diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 4f876b2f4..eea589269 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -292,7 +292,7 @@ private: void UntrackImage(ImageBase& image, ImageId image_id); /// Delete image from the cache - void DeleteImage(ImageId image); + void DeleteImage(ImageId image, bool immediate_delete = false); /// Remove image views references from the cache void RemoveImageViewReferences(std::span removed_views); -- cgit v1.2.3 From 9e065b9c7d3b25ddfe20afa4a945cca6e9767fa9 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Thu, 21 Oct 2021 01:27:54 +0200 Subject: VideoCore: Add gaussian filtering. --- src/common/settings.h | 5 +- src/video_core/host_shaders/CMakeLists.txt | 1 + src/video_core/host_shaders/present_gaussian.frag | 74 ++++++++++++++++++++++ src/video_core/renderer_opengl/renderer_opengl.cpp | 5 ++ src/video_core/renderer_opengl/renderer_opengl.h | 1 + src/video_core/renderer_vulkan/vk_blit_screen.cpp | 49 ++++++++++++++ src/video_core/renderer_vulkan/vk_blit_screen.h | 2 + src/yuzu/configuration/configure_graphics.ui | 5 ++ 8 files changed, 140 insertions(+), 2 deletions(-) create mode 100644 src/video_core/host_shaders/present_gaussian.frag (limited to 'src') diff --git a/src/common/settings.h b/src/common/settings.h index f6acf5bdf..830030efd 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -67,8 +67,9 @@ enum class ScalingFilter : u32 { NearestNeighbor = 0, Bilinear = 1, Bicubic = 2, - ScaleForce = 3, - Fsr = 4, + Gaussian = 3, + ScaleForce = 4, + Fsr = 5, }; struct ResolutionScalingInfo { diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index 32e2ab500..b0e15773c 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -19,6 +19,7 @@ set(SHADER_FILES pitch_unswizzle.comp present_scaleforce.frag present_bicubic.frag + present_gaussian.frag vulkan_blit_color_float.frag vulkan_blit_depth_stencil.frag vulkan_fidelityfx_fsr_easu.comp diff --git a/src/video_core/host_shaders/present_gaussian.frag b/src/video_core/host_shaders/present_gaussian.frag new file mode 100644 index 000000000..d5e2b1781 --- /dev/null +++ b/src/video_core/host_shaders/present_gaussian.frag @@ -0,0 +1,74 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 460 core + +#ifdef VULKAN + +#define BINDING_COLOR_TEXTURE 1 + +#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv + +#define BINDING_COLOR_TEXTURE 0 + +#endif + +layout (location = 0) in vec2 frag_tex_coord; + +layout (location = 0) out vec4 color; + +layout (binding = BINDING_COLOR_TEXTURE) uniform sampler2D color_texture; + +const float offset[3] = float[](0.0, 1.3846153846, 3.2307692308); +const float weight[3] = float[](0.2270270270, 0.3162162162, 0.0702702703); + +vec4 blurVertical(sampler2D textureSampler, vec2 coord, vec2 norm) { + vec4 result = vec4(0.0f); + for (int i=1; i<3; i++) { + result += + texture(textureSampler, vec2(coord) + (vec2(0.0, offset[i]) * norm)) + * weight[i]; + result += + texture(textureSampler, vec2(coord) - (vec2(0.0, offset[i]) * norm)) + * weight[i]; + } + return result; +} + +vec4 blurHorizontal(sampler2D textureSampler, vec2 coord, vec2 norm) { + vec4 result = vec4(0.0f); + for (int i=1; i<3; i++) { + result += + texture(textureSampler, vec2(coord) + (vec2(offset[i], 0.0) * norm)) + * weight[i]; + result += + texture(textureSampler, vec2(coord) - (vec2(offset[i], 0.0) * norm)) + * weight[i]; + } + return result; +} + +vec4 blurDiagonal(sampler2D textureSampler, vec2 coord, vec2 norm) { + vec4 result = vec4(0.0f); + for (int i=1; i<3; i++) { + result += + texture(textureSampler, vec2(coord) + (vec2(offset[i], offset[i]) * norm)) + * weight[i]; + result += + texture(textureSampler, vec2(coord) - (vec2(offset[i], offset[i]) * norm)) + * weight[i]; + } + return result; +} + +void main() { + vec3 base = texture(color_texture, vec2(frag_tex_coord)).rgb * weight[0]; + vec2 tex_offset = 1.0f / textureSize(color_texture, 0); + vec3 horizontal = blurHorizontal(color_texture, frag_tex_coord, tex_offset).rgb; + vec3 vertical = blurVertical(color_texture, frag_tex_coord, tex_offset).rgb; + vec3 diagonalA = blurVertical(color_texture, frag_tex_coord, tex_offset).rgb; + vec3 diagonalB = blurVertical(color_texture, frag_tex_coord, -tex_offset).rgb; + vec3 combination = mix(mix(horizontal, vertical, 0.5f), mix(diagonalA, diagonalB, 0.5f), 0.5f); + color = vec4(combination + base, 1.0f); +} diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 68423601c..6132b3c49 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -256,6 +256,8 @@ void RendererOpenGL::InitOpenGLObjects() { present_vertex = CreateProgram(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER); present_bilinear_fragment = CreateProgram(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER); present_bicubic_fragment = CreateProgram(HostShaders::PRESENT_BICUBIC_FRAG, GL_FRAGMENT_SHADER); + present_gaussian_fragment = + CreateProgram(HostShaders::PRESENT_GAUSSIAN_FRAG, GL_FRAGMENT_SHADER); present_scaleforce_fragment = CreateProgram(HostShaders::PRESENT_SCALEFORCE_FRAG, GL_FRAGMENT_SHADER); @@ -359,6 +361,9 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { case Settings::ScalingFilter::Bicubic: fragment_handle = present_bicubic_fragment.handle; break; + case Settings::ScalingFilter::Gaussian: + fragment_handle = present_gaussian_fragment.handle; + break; case Settings::ScalingFilter::ScaleForce: fragment_handle = present_scaleforce_fragment.handle; break; diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 504ddbe7b..62a746e41 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -114,6 +114,7 @@ private: OGLProgram present_vertex; OGLProgram present_bilinear_fragment; OGLProgram present_bicubic_fragment; + OGLProgram present_gaussian_fragment; OGLProgram present_scaleforce_fragment; OGLFramebuffer screenshot_framebuffer; diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index ccf721008..0d6bce214 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -18,6 +18,7 @@ #include "core/memory.h" #include "video_core/gpu.h" #include "video_core/host_shaders/present_bicubic_frag_spv.h" +#include "video_core/host_shaders/present_gaussian_frag_spv.h" #include "video_core/host_shaders/present_scaleforce_frag_spv.h" #include "video_core/host_shaders/vulkan_present_frag_spv.h" #include "video_core/host_shaders/vulkan_present_vert_spv.h" @@ -297,6 +298,9 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, case Settings::ScalingFilter::Bicubic: cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *bicubic_pipeline); break; + case Settings::ScalingFilter::Gaussian: + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *gaussian_pipeline); + break; case Settings::ScalingFilter::ScaleForce: cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *scaleforce_pipeline); break; @@ -388,6 +392,7 @@ void VKBlitScreen::CreateShaders() { vertex_shader = BuildShader(device, VULKAN_PRESENT_VERT_SPV); bilinear_fragment_shader = BuildShader(device, VULKAN_PRESENT_FRAG_SPV); bicubic_fragment_shader = BuildShader(device, PRESENT_BICUBIC_FRAG_SPV); + gaussian_fragment_shader = BuildShader(device, PRESENT_GAUSSIAN_FRAG_SPV); scaleforce_fragment_shader = BuildShader(device, PRESENT_SCALEFORCE_FRAG_SPV); } @@ -574,6 +579,27 @@ void VKBlitScreen::CreateGraphicsPipeline() { }, }}; + const std::array gaussian_shader_stages{{ + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_VERTEX_BIT, + .module = *vertex_shader, + .pName = "main", + .pSpecializationInfo = nullptr, + }, + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_FRAGMENT_BIT, + .module = *gaussian_fragment_shader, + .pName = "main", + .pSpecializationInfo = nullptr, + }, + }}; + const std::array scaleforce_shader_stages{{ { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, @@ -733,6 +759,28 @@ void VKBlitScreen::CreateGraphicsPipeline() { .basePipelineIndex = 0, }; + const VkGraphicsPipelineCreateInfo gaussian_pipeline_ci{ + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stageCount = static_cast(gaussian_shader_stages.size()), + .pStages = gaussian_shader_stages.data(), + .pVertexInputState = &vertex_input_ci, + .pInputAssemblyState = &input_assembly_ci, + .pTessellationState = nullptr, + .pViewportState = &viewport_state_ci, + .pRasterizationState = &rasterization_ci, + .pMultisampleState = &multisampling_ci, + .pDepthStencilState = nullptr, + .pColorBlendState = &color_blend_ci, + .pDynamicState = &dynamic_state_ci, + .layout = *pipeline_layout, + .renderPass = *renderpass, + .subpass = 0, + .basePipelineHandle = 0, + .basePipelineIndex = 0, + }; + const VkGraphicsPipelineCreateInfo scaleforce_pipeline_ci{ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, .pNext = nullptr, @@ -757,6 +805,7 @@ void VKBlitScreen::CreateGraphicsPipeline() { bilinear_pipeline = device.GetLogical().CreateGraphicsPipeline(bilinear_pipeline_ci); bicubic_pipeline = device.GetLogical().CreateGraphicsPipeline(bicubic_pipeline_ci); + gaussian_pipeline = device.GetLogical().CreateGraphicsPipeline(gaussian_pipeline_ci); scaleforce_pipeline = device.GetLogical().CreateGraphicsPipeline(scaleforce_pipeline_ci); } diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h index 448a2fbe6..96a5598ad 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.h +++ b/src/video_core/renderer_vulkan/vk_blit_screen.h @@ -111,6 +111,7 @@ private: vk::ShaderModule vertex_shader; vk::ShaderModule bilinear_fragment_shader; vk::ShaderModule bicubic_fragment_shader; + vk::ShaderModule gaussian_fragment_shader; vk::ShaderModule scaleforce_fragment_shader; vk::DescriptorPool descriptor_pool; vk::DescriptorSetLayout descriptor_set_layout; @@ -118,6 +119,7 @@ private: vk::Pipeline nearest_neightbor_pipeline; vk::Pipeline bilinear_pipeline; vk::Pipeline bicubic_pipeline; + vk::Pipeline gaussian_pipeline; vk::Pipeline scaleforce_pipeline; vk::RenderPass renderpass; std::vector framebuffers; diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui index c1d7e8349..848ee2d08 100644 --- a/src/yuzu/configuration/configure_graphics.ui +++ b/src/yuzu/configuration/configure_graphics.ui @@ -417,6 +417,11 @@ Bicubic + + + Gaussian + + ScaleForce -- cgit v1.2.3 From 2eff80b47f578b346bb80c5afa2271859cd7943b Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Thu, 21 Oct 2021 02:05:16 +0200 Subject: QtGUI: Add buttton to toggle the filter. --- src/common/settings.h | 1 + src/video_core/renderer_opengl/renderer_opengl.cpp | 1 + src/yuzu/configuration/configure_graphics.ui | 2 +- src/yuzu/main.cpp | 56 ++++++++++++++++++++++ src/yuzu/main.h | 2 + 5 files changed, 61 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/common/settings.h b/src/common/settings.h index 830030efd..e926a3268 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -70,6 +70,7 @@ enum class ScalingFilter : u32 { Gaussian = 3, ScaleForce = 4, Fsr = 5, + LastFilter = Fsr, }; struct ResolutionScalingInfo { diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 6132b3c49..227697c4f 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -24,6 +24,7 @@ #include "video_core/host_shaders/opengl_present_frag.h" #include "video_core/host_shaders/opengl_present_vert.h" #include "video_core/host_shaders/present_bicubic_frag.h" +#include "video_core/host_shaders/present_gaussian_frag.h" #include "video_core/host_shaders/present_scaleforce_frag.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_manager.h" diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui index 848ee2d08..7cfb2860d 100644 --- a/src/yuzu/configuration/configure_graphics.ui +++ b/src/yuzu/configuration/configure_graphics.ui @@ -429,7 +429,7 @@ - FidelityFX Super Resolution [Vulkan Only] + AMD's FidelityFX™️ Super Resolution [Vulkan Only] diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index 3cb146982..379bd0b17 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -774,6 +774,34 @@ void GMainWindow::InitializeWidgets() { tas_label->setFocusPolicy(Qt::NoFocus); statusBar()->insertPermanentWidget(0, tas_label); + // Setup Filter button + filter_status_button = new QPushButton(); + filter_status_button->setObjectName(QStringLiteral("TogglableStatusBarButton")); + filter_status_button->setFocusPolicy(Qt::NoFocus); + connect(filter_status_button, &QPushButton::clicked, [&] { + auto filter = Settings::values.scaling_filter.GetValue(); + if (filter == Settings::ScalingFilter::LastFilter) { + filter = Settings::ScalingFilter::NearestNeighbor; + } else { + filter = static_cast(static_cast(filter) + 1); + } + if (Settings::values.renderer_backend.GetValue() == Settings::RendererBackend::OpenGL && + filter == Settings::ScalingFilter::Fsr) { + filter = Settings::ScalingFilter::NearestNeighbor; + } + Settings::values.scaling_filter.SetValue(filter); + filter_status_button->setChecked(true); + UpdateFilterText(); + }); + auto filter = Settings::values.scaling_filter.GetValue(); + if (Settings::values.renderer_backend.GetValue() == Settings::RendererBackend::OpenGL && + filter == Settings::ScalingFilter::Fsr) { + Settings::values.scaling_filter.SetValue(Settings::ScalingFilter::NearestNeighbor); + } + UpdateFilterText(); + filter_status_button->setCheckable(true); + statusBar()->insertPermanentWidget(0, filter_status_button); + // Setup Dock button dock_status_button = new QPushButton(); dock_status_button->setObjectName(QStringLiteral("TogglableStatusBarButton")); @@ -3033,11 +3061,39 @@ void GMainWindow::UpdateGPUAccuracyButton() { } } +void GMainWindow::UpdateFilterText() { + const auto filter = Settings::values.scaling_filter.GetValue(); + switch (filter) { + case Settings::ScalingFilter::NearestNeighbor: + filter_status_button->setText(tr("NEAREST")); + break; + case Settings::ScalingFilter::Bilinear: + filter_status_button->setText(tr("BILINEAR")); + break; + case Settings::ScalingFilter::Bicubic: + filter_status_button->setText(tr("BICUBIC")); + break; + case Settings::ScalingFilter::Gaussian: + filter_status_button->setText(tr("GAUSSIAN")); + break; + case Settings::ScalingFilter::ScaleForce: + filter_status_button->setText(tr("SCALEFORCE")); + break; + case Settings::ScalingFilter::Fsr: + filter_status_button->setText(tr("AMD'S FIDELITYFX SR")); + break; + default: + filter_status_button->setText(tr("BILINEAR")); + break; + } +} + void GMainWindow::UpdateStatusButtons() { dock_status_button->setChecked(Settings::values.use_docked_mode.GetValue()); renderer_status_button->setChecked(Settings::values.renderer_backend.GetValue() == Settings::RendererBackend::Vulkan); UpdateGPUAccuracyButton(); + UpdateFilterText(); } void GMainWindow::UpdateUISettings() { diff --git a/src/yuzu/main.h b/src/yuzu/main.h index beb4f2984..d4d2f3d58 100644 --- a/src/yuzu/main.h +++ b/src/yuzu/main.h @@ -302,6 +302,7 @@ private: void MigrateConfigFiles(); void UpdateWindowTitle(std::string_view title_name = {}, std::string_view title_version = {}, std::string_view gpu_vendor = {}); + void UpdateFilterText(); void UpdateStatusBar(); void UpdateGPUAccuracyButton(); void UpdateStatusButtons(); @@ -336,6 +337,7 @@ private: QPushButton* gpu_accuracy_button = nullptr; QPushButton* renderer_status_button = nullptr; QPushButton* dock_status_button = nullptr; + QPushButton* filter_status_button = nullptr; QTimer status_bar_update_timer; std::unique_ptr config; -- cgit v1.2.3 From 510caeefb3e3bf4b365f08a97cdfeffaaf8a80ce Mon Sep 17 00:00:00 2001 From: Marshall Mohror Date: Wed, 20 Oct 2021 18:32:11 -0500 Subject: Settings: Add anti-aliasing method setting --- src/common/settings.cpp | 1 + src/common/settings.h | 6 ++++++ 2 files changed, 7 insertions(+) (limited to 'src') diff --git a/src/common/settings.cpp b/src/common/settings.cpp index 84ac937e5..3bcaa072f 100644 --- a/src/common/settings.cpp +++ b/src/common/settings.cpp @@ -49,6 +49,7 @@ void LogSettings() { log_setting("CPU_Accuracy", values.cpu_accuracy.GetValue()); log_setting("Renderer_UseResolutionScaling", values.resolution_setup.GetValue()); log_setting("Renderer_ScalingFilter", values.scaling_filter.GetValue()); + log_setting("Renderer_AntiAliasing", values.anti_aliasing.GetValue()); log_setting("Renderer_UseSpeedLimit", values.use_speed_limit.GetValue()); log_setting("Renderer_SpeedLimit", values.speed_limit.GetValue()); log_setting("Renderer_UseDiskShaderCache", values.use_disk_shader_cache.GetValue()); diff --git a/src/common/settings.h b/src/common/settings.h index e926a3268..ca1c3c1aa 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -73,6 +73,11 @@ enum class ScalingFilter : u32 { LastFilter = Fsr, }; +enum class AntiAliasing : u32 { + None = 0, + Fxaa = 1, +}; + struct ResolutionScalingInfo { u32 up_scale{1}; u32 down_shift{0}; @@ -498,6 +503,7 @@ struct Values { ResolutionScalingInfo resolution_info{}; Setting resolution_setup{ResolutionSetup::Res1X, "resolution_setup"}; Setting scaling_filter{ScalingFilter::Bilinear, "scaling_filter"}; + Setting anti_aliasing{AntiAliasing::None, "anti_aliasing"}; // *nix platforms may have issues with the borderless windowed fullscreen mode. // Default to exclusive fullscreen on these platforms for now. RangedSetting fullscreen_mode{ -- cgit v1.2.3 From 74e39ed6eeb2362316044ad92e3de9d75034ca58 Mon Sep 17 00:00:00 2001 From: Marshall Mohror Date: Wed, 20 Oct 2021 18:33:07 -0500 Subject: Frontend: Add anti-aliasing method setting --- src/yuzu/configuration/config.cpp | 5 ++++ src/yuzu/configuration/config.h | 1 + src/yuzu/configuration/configure_graphics.cpp | 24 +++++++++++++++++ src/yuzu/configuration/configure_graphics.ui | 39 +++++++++++++++++++++++++++ src/yuzu_cmd/config.cpp | 1 + 5 files changed, 70 insertions(+) (limited to 'src') diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index 4c296a94d..8227d06bc 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -826,6 +826,7 @@ void Config::ReadRendererValues() { ReadGlobalSetting(Settings::values.aspect_ratio); ReadGlobalSetting(Settings::values.resolution_setup); ReadGlobalSetting(Settings::values.scaling_filter); + ReadGlobalSetting(Settings::values.anti_aliasing); ReadGlobalSetting(Settings::values.max_anisotropy); ReadGlobalSetting(Settings::values.use_speed_limit); ReadGlobalSetting(Settings::values.speed_limit); @@ -1374,6 +1375,10 @@ void Config::SaveRendererValues() { static_cast(Settings::values.scaling_filter.GetValue(global)), static_cast(Settings::values.scaling_filter.GetDefault()), Settings::values.scaling_filter.UsingGlobal()); + WriteSetting(QString::fromStdString(Settings::values.anti_aliasing.GetLabel()), + static_cast(Settings::values.anti_aliasing.GetValue(global)), + static_cast(Settings::values.anti_aliasing.GetDefault()), + Settings::values.anti_aliasing.UsingGlobal()); WriteGlobalSetting(Settings::values.max_anisotropy); WriteGlobalSetting(Settings::values.use_speed_limit); WriteGlobalSetting(Settings::values.speed_limit); diff --git a/src/yuzu/configuration/config.h b/src/yuzu/configuration/config.h index 97dc1bb47..d673c1cdc 100644 --- a/src/yuzu/configuration/config.h +++ b/src/yuzu/configuration/config.h @@ -191,5 +191,6 @@ Q_DECLARE_METATYPE(Settings::FullscreenMode); Q_DECLARE_METATYPE(Settings::NvdecEmulation); Q_DECLARE_METATYPE(Settings::ResolutionSetup); Q_DECLARE_METATYPE(Settings::ScalingFilter); +Q_DECLARE_METATYPE(Settings::AntiAliasing); Q_DECLARE_METATYPE(Settings::RendererBackend); Q_DECLARE_METATYPE(Settings::ShaderBackend); diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp index 02498fad7..59f975a6e 100644 --- a/src/yuzu/configuration/configure_graphics.cpp +++ b/src/yuzu/configuration/configure_graphics.cpp @@ -107,6 +107,8 @@ void ConfigureGraphics::SetConfiguration() { static_cast(Settings::values.resolution_setup.GetValue())); ui->scaling_filter_combobox->setCurrentIndex( static_cast(Settings::values.scaling_filter.GetValue())); + ui->anti_aliasing_combobox->setCurrentIndex( + static_cast(Settings::values.anti_aliasing.GetValue())); } else { ConfigurationShared::SetPerGameSetting(ui->api, &Settings::values.renderer_backend); ConfigurationShared::SetHighlight(ui->api_widget, @@ -137,6 +139,11 @@ void ConfigureGraphics::SetConfiguration() { ConfigurationShared::SetHighlight(ui->scaling_filter_label, !Settings::values.scaling_filter.UsingGlobal()); + ConfigurationShared::SetPerGameSetting(ui->anti_aliasing_combobox, + &Settings::values.anti_aliasing); + ConfigurationShared::SetHighlight(ui->anti_aliasing_label, + !Settings::values.anti_aliasing.UsingGlobal()); + ui->bg_combobox->setCurrentIndex(Settings::values.bg_red.UsingGlobal() ? 0 : 1); ui->bg_button->setEnabled(!Settings::values.bg_red.UsingGlobal()); ConfigurationShared::SetHighlight(ui->bg_layout, !Settings::values.bg_red.UsingGlobal()); @@ -156,6 +163,10 @@ void ConfigureGraphics::ApplyConfiguration() { ui->scaling_filter_combobox->currentIndex() - ((Settings::IsConfiguringGlobal()) ? 0 : ConfigurationShared::USE_GLOBAL_OFFSET)); + const auto anti_aliasing = static_cast( + ui->anti_aliasing_combobox->currentIndex() - + ((Settings::IsConfiguringGlobal()) ? 0 : ConfigurationShared::USE_GLOBAL_OFFSET)); + ConfigurationShared::ApplyPerGameSetting(&Settings::values.fullscreen_mode, ui->fullscreen_mode_combobox); ConfigurationShared::ApplyPerGameSetting(&Settings::values.aspect_ratio, @@ -193,6 +204,9 @@ void ConfigureGraphics::ApplyConfiguration() { if (Settings::values.scaling_filter.UsingGlobal()) { Settings::values.scaling_filter.SetValue(scaling_filter); } + if (Settings::values.anti_aliasing.UsingGlobal()) { + Settings::values.anti_aliasing.SetValue(anti_aliasing); + } } else { if (ui->resolution_combobox->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) { Settings::values.resolution_setup.SetGlobal(true); @@ -206,6 +220,12 @@ void ConfigureGraphics::ApplyConfiguration() { Settings::values.scaling_filter.SetGlobal(false); Settings::values.scaling_filter.SetValue(scaling_filter); } + if (ui->anti_aliasing_combobox->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) { + Settings::values.anti_aliasing.SetGlobal(true); + } else { + Settings::values.anti_aliasing.SetGlobal(false); + Settings::values.anti_aliasing.SetValue(anti_aliasing); + } if (ui->api->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) { Settings::values.renderer_backend.SetGlobal(true); Settings::values.shader_backend.SetGlobal(true); @@ -354,6 +374,7 @@ void ConfigureGraphics::SetupPerGameUI() { ui->aspect_ratio_combobox->setEnabled(Settings::values.aspect_ratio.UsingGlobal()); ui->resolution_combobox->setEnabled(Settings::values.resolution_setup.UsingGlobal()); ui->scaling_filter_combobox->setEnabled(Settings::values.scaling_filter.UsingGlobal()); + ui->anti_aliasing_combobox->setEnabled(Settings::values.anti_aliasing.UsingGlobal()); ui->use_asynchronous_gpu_emulation->setEnabled( Settings::values.use_asynchronous_gpu_emulation.UsingGlobal()); ui->nvdec_emulation->setEnabled(Settings::values.nvdec_emulation.UsingGlobal()); @@ -388,6 +409,9 @@ void ConfigureGraphics::SetupPerGameUI() { ConfigurationShared::SetColoredComboBox( ui->scaling_filter_combobox, ui->scaling_filter_label, static_cast(Settings::values.scaling_filter.GetValue(true))); + ConfigurationShared::SetColoredComboBox( + ui->anti_aliasing_combobox, ui->anti_aliasing_label, + static_cast(Settings::values.anti_aliasing.GetValue(true))); ConfigurationShared::InsertGlobalItem( ui->api, static_cast(Settings::values.renderer_backend.GetValue(true))); ConfigurationShared::InsertGlobalItem( diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui index 7cfb2860d..0d2987fcf 100644 --- a/src/yuzu/configuration/configure_graphics.ui +++ b/src/yuzu/configuration/configure_graphics.ui @@ -437,6 +437,45 @@ + + + + + 0 + + + 0 + + + 0 + + + 0 + + + + + Anti-Aliasing Method: + + + + + + + + None + + + + + FXAA + + + + + + + diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index 3c888d84e..33241ea98 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp @@ -453,6 +453,7 @@ void Config::ReadValues() { ReadSetting("Renderer", Settings::values.resolution_setup); ReadSetting("Renderer", Settings::values.scaling_filter); + ReadSetting("Renderer", Settings::values.anti_aliasing); ReadSetting("Renderer", Settings::values.fullscreen_mode); ReadSetting("Renderer", Settings::values.aspect_ratio); ReadSetting("Renderer", Settings::values.max_anisotropy); -- cgit v1.2.3 From 48cf3764626e1ed30450d15e00befb75a4eae329 Mon Sep 17 00:00:00 2001 From: Marshall Mohror Date: Wed, 20 Oct 2021 18:36:06 -0500 Subject: OpenGL: Implement FXAA --- src/video_core/host_shaders/CMakeLists.txt | 2 + src/video_core/host_shaders/fxaa.frag | 72 ++++++++++++++ src/video_core/host_shaders/fxaa.vert | 40 ++++++++ .../renderer_opengl/gl_resource_manager.cpp | 2 +- src/video_core/renderer_opengl/renderer_opengl.cpp | 109 ++++++++++++++------- src/video_core/renderer_opengl/renderer_opengl.h | 4 + 6 files changed, 194 insertions(+), 35 deletions(-) create mode 100644 src/video_core/host_shaders/fxaa.frag create mode 100644 src/video_core/host_shaders/fxaa.vert (limited to 'src') diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index b0e15773c..6b5ea649a 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -13,6 +13,8 @@ set(SHADER_FILES convert_depth_to_float.frag convert_float_to_depth.frag full_screen_triangle.vert + fxaa.frag + fxaa.vert opengl_copy_bc4.comp opengl_present.frag opengl_present.vert diff --git a/src/video_core/host_shaders/fxaa.frag b/src/video_core/host_shaders/fxaa.frag new file mode 100644 index 000000000..23f910d4c --- /dev/null +++ b/src/video_core/host_shaders/fxaa.frag @@ -0,0 +1,72 @@ +// Adapted from +// https://www.geeks3d.com/20110405/fxaa-fast-approximate-anti-aliasing-demo-glsl-opengl-test-radeon-geforce/3/ + +#version 460 + +#ifdef VULKAN + +#define BINDING_COLOR_TEXTURE 1 + +#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv + +#define BINDING_COLOR_TEXTURE 0 + +#endif + +layout (location = 0) in vec4 posPos; + +layout (location = 0) out vec4 frag_color; + +layout (binding = BINDING_COLOR_TEXTURE) uniform sampler2D input_texture; + +const float FXAA_SPAN_MAX = 8.0; +const float FXAA_REDUCE_MUL = 1.0 / 8.0; +const float FXAA_REDUCE_MIN = 1.0 / 128.0; + +#define FxaaTexLod0(t, p) textureLod(t, p, 0.0) +#define FxaaTexOff(t, p, o) textureLodOffset(t, p, 0.0, o) + +vec3 FxaaPixelShader(vec4 posPos, sampler2D tex) { + + vec3 rgbNW = FxaaTexLod0(tex, posPos.zw).xyz; + vec3 rgbNE = FxaaTexOff(tex, posPos.zw, ivec2(1,0)).xyz; + vec3 rgbSW = FxaaTexOff(tex, posPos.zw, ivec2(0,1)).xyz; + vec3 rgbSE = FxaaTexOff(tex, posPos.zw, ivec2(1,1)).xyz; + vec3 rgbM = FxaaTexLod0(tex, posPos.xy).xyz; +/*---------------------------------------------------------*/ + vec3 luma = vec3(0.299, 0.587, 0.114); + float lumaNW = dot(rgbNW, luma); + float lumaNE = dot(rgbNE, luma); + float lumaSW = dot(rgbSW, luma); + float lumaSE = dot(rgbSE, luma); + float lumaM = dot(rgbM, luma); +/*---------------------------------------------------------*/ + float lumaMin = min(lumaM, min(min(lumaNW, lumaNE), min(lumaSW, lumaSE))); + float lumaMax = max(lumaM, max(max(lumaNW, lumaNE), max(lumaSW, lumaSE))); +/*---------------------------------------------------------*/ + vec2 dir; + dir.x = -((lumaNW + lumaNE) - (lumaSW + lumaSE)); + dir.y = ((lumaNW + lumaSW) - (lumaNE + lumaSE)); +/*---------------------------------------------------------*/ + float dirReduce = max( + (lumaNW + lumaNE + lumaSW + lumaSE) * (0.25 * FXAA_REDUCE_MUL), + FXAA_REDUCE_MIN); + float rcpDirMin = 1.0/(min(abs(dir.x), abs(dir.y)) + dirReduce); + dir = min(vec2( FXAA_SPAN_MAX, FXAA_SPAN_MAX), + max(vec2(-FXAA_SPAN_MAX, -FXAA_SPAN_MAX), + dir * rcpDirMin)) / textureSize(tex, 0); +/*--------------------------------------------------------*/ + vec3 rgbA = (1.0 / 2.0) * ( + FxaaTexLod0(tex, posPos.xy + dir * (1.0 / 3.0 - 0.5)).xyz + + FxaaTexLod0(tex, posPos.xy + dir * (2.0 / 3.0 - 0.5)).xyz); + vec3 rgbB = rgbA * (1.0 / 2.0) + (1.0 / 4.0) * ( + FxaaTexLod0(tex, posPos.xy + dir * (0.0 / 3.0 - 0.5)).xyz + + FxaaTexLod0(tex, posPos.xy + dir * (3.0 / 3.0 - 0.5)).xyz); + float lumaB = dot(rgbB, luma); + if((lumaB < lumaMin) || (lumaB > lumaMax)) return rgbA; + return rgbB; +} + +void main() { + frag_color = vec4(FxaaPixelShader(posPos, input_texture), 1.0); +} diff --git a/src/video_core/host_shaders/fxaa.vert b/src/video_core/host_shaders/fxaa.vert new file mode 100644 index 000000000..715fce462 --- /dev/null +++ b/src/video_core/host_shaders/fxaa.vert @@ -0,0 +1,40 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 460 + +out gl_PerVertex { + vec4 gl_Position; +}; + +const vec2 vertices[4] = + vec2[4](vec2(-1.0, 1.0), vec2(1.0, 1.0), vec2(-1.0, -1.0), vec2(1.0, -1.0)); + +layout (location = 0) out vec4 posPos; + +#ifdef VULKAN + +#define BINDING_COLOR_TEXTURE 1 + +#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv + +#define BINDING_COLOR_TEXTURE 0 + +#endif + +layout (binding = BINDING_COLOR_TEXTURE) uniform sampler2D input_texture; + +const float FXAA_SUBPIX_SHIFT = 0; + +void main() { +#ifdef VULKAN + vec2 vertex = vertices[gl_VertexIndex]; +#else + vec2 vertex = vertices[gl_VertexID]; +#endif + gl_Position = vec4(vertex, 0.0, 1.0); + vec2 vert_tex_coord = (vertex + 1.0) / 2.0; + posPos.xy = vert_tex_coord; + posPos.zw = vert_tex_coord - (0.5 + FXAA_SUBPIX_SHIFT) / textureSize(input_texture, 0); +} diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp index 8695c29e3..70947838c 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.cpp +++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp @@ -166,7 +166,7 @@ void OGLFramebuffer::Create() { return; MICROPROFILE_SCOPE(OpenGL_ResourceCreation); - glGenFramebuffers(1, &handle); + glCreateFramebuffers(1, &handle); } void OGLFramebuffer::Release() { diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 227697c4f..dbe66a1b6 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -21,6 +21,8 @@ #include "core/memory.h" #include "core/perf_stats.h" #include "core/telemetry_session.h" +#include "video_core/host_shaders/fxaa_frag.h" +#include "video_core/host_shaders/fxaa_vert.h" #include "video_core/host_shaders/opengl_present_frag.h" #include "video_core/host_shaders/opengl_present_vert.h" #include "video_core/host_shaders/present_bicubic_frag.h" @@ -254,6 +256,8 @@ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color void RendererOpenGL::InitOpenGLObjects() { // Create shader programs + fxaa_vertex = CreateProgram(HostShaders::FXAA_VERT, GL_VERTEX_SHADER); + fxaa_fragment = CreateProgram(HostShaders::FXAA_FRAG, GL_FRAGMENT_SHADER); present_vertex = CreateProgram(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER); present_bilinear_fragment = CreateProgram(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER); present_bicubic_fragment = CreateProgram(HostShaders::PRESENT_BICUBIC_FRAG, GL_FRAGMENT_SHADER); @@ -287,6 +291,8 @@ void RendererOpenGL::InitOpenGLObjects() { // Clear screen to black LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture); + + fxaa_framebuffer.Create(); } void RendererOpenGL::AddTelemetryFields() { @@ -338,14 +344,83 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, texture.resource.Release(); texture.resource.Create(GL_TEXTURE_2D); glTextureStorage2D(texture.resource.handle, 1, internal_format, texture.width, texture.height); + fxaa_texture.Release(); + fxaa_texture.Create(GL_TEXTURE_2D); + glTextureStorage2D(fxaa_texture.handle, 1, GL_RGBA16F, texture.width, texture.height); + glNamedFramebufferTexture(fxaa_framebuffer.handle, GL_COLOR_ATTACHMENT0, fxaa_texture.handle, + 0); } void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { + // TODO: Signal state tracker about these changes + state_tracker.NotifyScreenDrawVertexArray(); + state_tracker.NotifyPolygonModes(); + state_tracker.NotifyViewport0(); + state_tracker.NotifyScissor0(); + state_tracker.NotifyColorMask(0); + state_tracker.NotifyBlend0(); + state_tracker.NotifyFramebuffer(); + state_tracker.NotifyFrontFace(); + state_tracker.NotifyCullTest(); + state_tracker.NotifyDepthTest(); + state_tracker.NotifyStencilTest(); + state_tracker.NotifyPolygonOffset(); + state_tracker.NotifyRasterizeEnable(); + state_tracker.NotifyFramebufferSRGB(); + state_tracker.NotifyLogicOp(); + state_tracker.NotifyClipControl(); + state_tracker.NotifyAlphaTest(); + + state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE); + // Update background color before drawing glClearColor(Settings::values.bg_red.GetValue() / 255.0f, Settings::values.bg_green.GetValue() / 255.0f, Settings::values.bg_blue.GetValue() / 255.0f, 1.0f); + glEnable(GL_CULL_FACE); + glDisable(GL_COLOR_LOGIC_OP); + glDisable(GL_DEPTH_TEST); + glDisable(GL_STENCIL_TEST); + glDisable(GL_POLYGON_OFFSET_FILL); + glDisable(GL_RASTERIZER_DISCARD); + glDisable(GL_ALPHA_TEST); + glDisablei(GL_BLEND, 0); + glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); + glCullFace(GL_BACK); + glFrontFace(GL_CW); + glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + + glBindTextureUnit(0, screen_info.display_texture); + + if (Settings::values.anti_aliasing.GetValue() == Settings::AntiAliasing::Fxaa) { + program_manager.BindPresentPrograms(fxaa_vertex.handle, fxaa_fragment.handle); + + glEnablei(GL_SCISSOR_TEST, 0); + glScissorIndexed(0, 0, 0, + framebuffer_crop_rect.GetWidth() != 0 ? framebuffer_crop_rect.GetWidth() + : screen_info.texture.width, + framebuffer_crop_rect.GetHeight() != 0 ? framebuffer_crop_rect.GetHeight() + : screen_info.texture.height); + glViewportIndexedf(0, 0.0f, 0.0f, static_cast(screen_info.texture.width), + static_cast(screen_info.texture.height)); + glDepthRangeIndexed(0, 0.0, 0.0); + + glBindSampler(0, present_sampler.handle); + GLint old_read_fb; + GLint old_draw_fb; + glGetIntegerv(GL_READ_FRAMEBUFFER_BINDING, &old_read_fb); + glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &old_draw_fb); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, fxaa_framebuffer.handle); + + glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); + + glBindFramebuffer(GL_READ_FRAMEBUFFER, old_read_fb); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, old_draw_fb); + + glBindTextureUnit(0, fxaa_texture.handle); + } + // Set projection matrix const std::array ortho_matrix = MakeOrthographicMatrix(static_cast(layout.width), static_cast(layout.height)); @@ -422,47 +497,14 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { }; glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), std::data(vertices)); - // TODO: Signal state tracker about these changes - state_tracker.NotifyScreenDrawVertexArray(); - state_tracker.NotifyPolygonModes(); - state_tracker.NotifyViewport0(); - state_tracker.NotifyScissor0(); - state_tracker.NotifyColorMask(0); - state_tracker.NotifyBlend0(); - state_tracker.NotifyFramebuffer(); - state_tracker.NotifyFrontFace(); - state_tracker.NotifyCullTest(); - state_tracker.NotifyDepthTest(); - state_tracker.NotifyStencilTest(); - state_tracker.NotifyPolygonOffset(); - state_tracker.NotifyRasterizeEnable(); - state_tracker.NotifyFramebufferSRGB(); - state_tracker.NotifyLogicOp(); - state_tracker.NotifyClipControl(); - state_tracker.NotifyAlphaTest(); - - state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE); - glEnable(GL_CULL_FACE); if (screen_info.display_srgb) { glEnable(GL_FRAMEBUFFER_SRGB); } else { glDisable(GL_FRAMEBUFFER_SRGB); } - glDisable(GL_COLOR_LOGIC_OP); - glDisable(GL_DEPTH_TEST); - glDisable(GL_STENCIL_TEST); - glDisable(GL_POLYGON_OFFSET_FILL); - glDisable(GL_RASTERIZER_DISCARD); - glDisable(GL_ALPHA_TEST); - glDisablei(GL_BLEND, 0); glDisablei(GL_SCISSOR_TEST, 0); - glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); - glCullFace(GL_BACK); - glFrontFace(GL_CW); - glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); glViewportIndexedf(0, 0.0f, 0.0f, static_cast(layout.width), static_cast(layout.height)); - glDepthRangeIndexed(0, 0.0, 0.0); glEnableVertexAttribArray(PositionLocation); glEnableVertexAttribArray(TexCoordLocation); @@ -482,7 +524,6 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex)); } - glBindTextureUnit(0, screen_info.display_texture); if (Settings::values.scaling_filter.GetValue() != Settings::ScalingFilter::NearestNeighbor) { glBindSampler(0, present_sampler.handle); } else { diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 62a746e41..f6c66f804 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -111,6 +111,8 @@ private: OGLSampler present_sampler; OGLSampler present_sampler_nn; OGLBuffer vertex_buffer; + OGLProgram fxaa_vertex; + OGLProgram fxaa_fragment; OGLProgram present_vertex; OGLProgram present_bilinear_fragment; OGLProgram present_bicubic_fragment; @@ -123,6 +125,8 @@ private: /// Display information for Switch screen ScreenInfo screen_info; + OGLTexture fxaa_texture; + OGLFramebuffer fxaa_framebuffer; /// OpenGL framebuffer data std::vector gl_framebuffer_data; -- cgit v1.2.3 From 056894f07ae4c1ef295fefb1e8f120964f2e04b4 Mon Sep 17 00:00:00 2001 From: Marshall Mohror Date: Wed, 20 Oct 2021 21:40:02 -0500 Subject: OpenGL: fix FXAA with scaling --- src/video_core/renderer_opengl/renderer_opengl.cpp | 39 +++++++++++++++++----- src/video_core/renderer_opengl/renderer_opengl.h | 1 + 2 files changed, 31 insertions(+), 9 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index dbe66a1b6..e63f0bdd8 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -213,7 +213,9 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf framebuffer_crop_rect = framebuffer.crop_rect; const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset}; - if (rasterizer.AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride)) { + screen_info.was_accelerated = + rasterizer.AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride); + if (screen_info.was_accelerated) { return; } @@ -346,7 +348,9 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, glTextureStorage2D(texture.resource.handle, 1, internal_format, texture.width, texture.height); fxaa_texture.Release(); fxaa_texture.Create(GL_TEXTURE_2D); - glTextureStorage2D(fxaa_texture.handle, 1, GL_RGBA16F, texture.width, texture.height); + glTextureStorage2D(fxaa_texture.handle, 1, GL_RGBA16F, + Settings::values.resolution_info.ScaleUp(screen_info.texture.width), + Settings::values.resolution_info.ScaleUp(screen_info.texture.height)); glNamedFramebufferTexture(fxaa_framebuffer.handle, GL_COLOR_ATTACHMENT0, fxaa_texture.handle, 0); } @@ -397,13 +401,25 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { program_manager.BindPresentPrograms(fxaa_vertex.handle, fxaa_fragment.handle); glEnablei(GL_SCISSOR_TEST, 0); - glScissorIndexed(0, 0, 0, - framebuffer_crop_rect.GetWidth() != 0 ? framebuffer_crop_rect.GetWidth() - : screen_info.texture.width, - framebuffer_crop_rect.GetHeight() != 0 ? framebuffer_crop_rect.GetHeight() - : screen_info.texture.height); - glViewportIndexedf(0, 0.0f, 0.0f, static_cast(screen_info.texture.width), - static_cast(screen_info.texture.height)); + auto viewport_width = screen_info.texture.width; + auto scissor_width = framebuffer_crop_rect.GetWidth(); + if (scissor_width <= 0) { + scissor_width = viewport_width; + } + auto viewport_height = screen_info.texture.height; + auto scissor_height = framebuffer_crop_rect.GetHeight(); + if (scissor_height <= 0) { + scissor_height = viewport_height; + } + if (screen_info.was_accelerated) { + viewport_width = Settings::values.resolution_info.ScaleUp(viewport_width); + scissor_width = Settings::values.resolution_info.ScaleUp(scissor_width); + viewport_height = Settings::values.resolution_info.ScaleUp(viewport_height); + scissor_height = Settings::values.resolution_info.ScaleUp(scissor_height); + } + glScissorIndexed(0, 0, 0, scissor_width, scissor_height); + glViewportIndexedf(0, 0.0f, 0.0f, static_cast(viewport_width), + static_cast(viewport_height)); glDepthRangeIndexed(0, 0.0, 0.0); glBindSampler(0, present_sampler.handle); @@ -487,6 +503,11 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { scale_v = static_cast(framebuffer_crop_rect.GetHeight()) / static_cast(screen_info.texture.height); } + if (Settings::values.anti_aliasing.GetValue() == Settings::AntiAliasing::Fxaa && + !screen_info.was_accelerated) { + scale_u /= Settings::values.resolution_info.up_factor; + scale_v /= Settings::values.resolution_info.up_factor; + } const auto& screen = layout.screen; const std::array vertices = { diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index f6c66f804..cda333cad 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -50,6 +50,7 @@ struct TextureInfo { /// Structure used for storing information about the display target for the Switch screen struct ScreenInfo { GLuint display_texture{}; + bool was_accelerated = false; bool display_srgb{}; const Common::Rectangle display_texcoords{0.0f, 0.0f, 1.0f, 1.0f}; TextureInfo texture; -- cgit v1.2.3 From e6f1ed08fb1f11d86bb4cb7c03d83a9f443d6c12 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Fri, 22 Oct 2021 19:22:34 +0200 Subject: Vulkan: Implement FXAA --- src/video_core/host_shaders/fxaa.vert | 2 +- src/video_core/renderer_vulkan/vk_blit_screen.cpp | 390 ++++++++++++++++++++-- src/video_core/renderer_vulkan/vk_blit_screen.h | 17 + 3 files changed, 387 insertions(+), 22 deletions(-) (limited to 'src') diff --git a/src/video_core/host_shaders/fxaa.vert b/src/video_core/host_shaders/fxaa.vert index 715fce462..01d5ff4df 100644 --- a/src/video_core/host_shaders/fxaa.vert +++ b/src/video_core/host_shaders/fxaa.vert @@ -15,7 +15,7 @@ layout (location = 0) out vec4 posPos; #ifdef VULKAN -#define BINDING_COLOR_TEXTURE 1 +#define BINDING_COLOR_TEXTURE 0 #else // ^^^ Vulkan ^^^ // vvv OpenGL vvv diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index 0d6bce214..c0abcc17a 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -17,6 +17,8 @@ #include "core/frontend/emu_window.h" #include "core/memory.h" #include "video_core/gpu.h" +#include "video_core/host_shaders/fxaa_frag_spv.h" +#include "video_core/host_shaders/fxaa_vert_spv.h" #include "video_core/host_shaders/present_bicubic_frag_spv.h" #include "video_core/host_shaders/present_gaussian_frag_spv.h" #include "video_core/host_shaders/present_scaleforce_frag_spv.h" @@ -149,15 +151,9 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, scheduler.Wait(resource_ticks[image_index]); resource_ticks[image_index] = scheduler.CurrentTick(); - const VkImageView source_image_view = + VkImageView source_image_view = use_accelerated ? screen_info.image_view : *raw_image_views[image_index]; - if (!fsr) { - const bool is_nn = - Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::NearestNeighbor; - UpdateDescriptorSet(image_index, source_image_view, is_nn); - } - BufferData data; SetUniformData(data, layout); SetVertexData(data, framebuffer, layout); @@ -239,6 +235,68 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, }); } + const auto anti_alias_pass = Settings::values.anti_aliasing.GetValue(); + if (use_accelerated && anti_alias_pass != Settings::AntiAliasing::None) { + UpdateAADescriptorSet(image_index, source_image_view, false); + const u32 up_scale = Settings::values.resolution_info.up_scale; + const u32 down_shift = Settings::values.resolution_info.down_shift; + VkExtent2D size{ + .width = (up_scale * framebuffer.width) >> down_shift, + .height = (up_scale * framebuffer.height) >> down_shift, + }; + source_image_view = *aa_image_view; + scheduler.Record([this, image_index, size, anti_alias_pass](vk::CommandBuffer cmdbuf) { + const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f; + const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f; + const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f; + const VkClearValue clear_color{ + .color = {.float32 = {bg_red, bg_green, bg_blue, 1.0f}}, + }; + const VkRenderPassBeginInfo renderpass_bi{ + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .pNext = nullptr, + .renderPass = *aa_renderpass, + .framebuffer = *aa_framebuffer, + .renderArea = + { + .offset = {0, 0}, + .extent = size, + }, + .clearValueCount = 1, + .pClearValues = &clear_color, + }; + const VkViewport viewport{ + .x = 0.0f, + .y = 0.0f, + .width = static_cast(size.width), + .height = static_cast(size.height), + .minDepth = 0.0f, + .maxDepth = 1.0f, + }; + const VkRect2D scissor{ + .offset = {0, 0}, + .extent = size, + }; + cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); + switch (anti_alias_pass) { + case Settings::AntiAliasing::Fxaa: + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *aa_pipeline); + break; + default: + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *aa_pipeline); + break; + } + cmdbuf.SetViewport(0, viewport); + cmdbuf.SetScissor(0, scissor); + + cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices)); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *aa_pipeline_layout, 0, + aa_descriptor_sets[image_index], {}); + cmdbuf.Draw(4, 1, 0, 0); + cmdbuf.EndRenderPass(); + }); + } + if (fsr) { auto crop_rect = framebuffer.crop_rect; if (crop_rect.GetWidth() == 0) { @@ -251,6 +309,10 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, VkImageView fsr_image_view = fsr->Draw(scheduler, image_index, source_image_view, crop_rect); UpdateDescriptorSet(image_index, fsr_image_view, true); + } else { + const bool is_nn = + Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::NearestNeighbor; + UpdateDescriptorSet(image_index, source_image_view, is_nn); } scheduler.Record( @@ -329,11 +391,16 @@ VkSemaphore VKBlitScreen::DrawToSwapchain(const Tegra::FramebufferConfig& frameb } vk::Framebuffer VKBlitScreen::CreateFramebuffer(const VkImageView& image_view, VkExtent2D extent) { + return CreateFramebuffer(image_view, extent, renderpass); +} + +vk::Framebuffer VKBlitScreen::CreateFramebuffer(const VkImageView& image_view, VkExtent2D extent, + vk::RenderPass& rd) { return device.GetLogical().CreateFramebuffer(VkFramebufferCreateInfo{ .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, .pNext = nullptr, .flags = 0, - .renderPass = *renderpass, + .renderPass = *rd, .attachmentCount = 1, .pAttachments = &image_view, .width = extent.width, @@ -390,6 +457,8 @@ void VKBlitScreen::RefreshResources(const Tegra::FramebufferConfig& framebuffer) void VKBlitScreen::CreateShaders() { vertex_shader = BuildShader(device, VULKAN_PRESENT_VERT_SPV); + fxaa_vertex_shader = BuildShader(device, FXAA_VERT_SPV); + fxaa_fragment_shader = BuildShader(device, FXAA_FRAG_SPV); bilinear_fragment_shader = BuildShader(device, VULKAN_PRESENT_FRAG_SPV); bicubic_fragment_shader = BuildShader(device, PRESENT_BICUBIC_FRAG_SPV); gaussian_fragment_shader = BuildShader(device, PRESENT_GAUSSIAN_FRAG_SPV); @@ -413,6 +482,13 @@ void VKBlitScreen::CreateDescriptorPool() { }, }}; + const std::array pool_sizes_aa{{ + { + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = static_cast(2 * image_count), + }, + }}; + const VkDescriptorPoolCreateInfo ci{ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, .pNext = nullptr, @@ -422,19 +498,33 @@ void VKBlitScreen::CreateDescriptorPool() { .pPoolSizes = pool_sizes.data(), }; descriptor_pool = device.GetLogical().CreateDescriptorPool(ci); + + const VkDescriptorPoolCreateInfo ci_aa{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, + .pNext = nullptr, + .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, + .maxSets = static_cast(image_count), + .poolSizeCount = static_cast(pool_sizes_aa.size()), + .pPoolSizes = pool_sizes_aa.data(), + }; + aa_descriptor_pool = device.GetLogical().CreateDescriptorPool(ci_aa); } void VKBlitScreen::CreateRenderPass() { + renderpass = CreateRenderPassImpl(swapchain.GetImageViewFormat()); +} + +vk::RenderPass VKBlitScreen::CreateRenderPassImpl(VkFormat format, bool is_present) { const VkAttachmentDescription color_attachment{ .flags = 0, - .format = swapchain.GetImageViewFormat(), + .format = format, .samples = VK_SAMPLE_COUNT_1_BIT, .loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR, .storeOp = VK_ATTACHMENT_STORE_OP_STORE, .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE, .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE, .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, - .finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, + .finalLayout = is_present ? VK_IMAGE_LAYOUT_PRESENT_SRC_KHR : VK_IMAGE_LAYOUT_GENERAL, }; const VkAttachmentReference color_attachment_ref{ @@ -477,7 +567,7 @@ void VKBlitScreen::CreateRenderPass() { .pDependencies = &dependency, }; - renderpass = device.GetLogical().CreateRenderPass(renderpass_ci); + return device.GetLogical().CreateRenderPass(renderpass_ci); } void VKBlitScreen::CreateDescriptorSetLayout() { @@ -498,6 +588,23 @@ void VKBlitScreen::CreateDescriptorSetLayout() { }, }}; + const std::array layout_bindings_aa{{ + { + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_VERTEX_BIT, + .pImmutableSamplers = nullptr, + }, + { + .binding = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, + .pImmutableSamplers = nullptr, + }, + }}; + const VkDescriptorSetLayoutCreateInfo ci{ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, .pNext = nullptr, @@ -506,11 +613,21 @@ void VKBlitScreen::CreateDescriptorSetLayout() { .pBindings = layout_bindings.data(), }; + const VkDescriptorSetLayoutCreateInfo ci_aa{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .bindingCount = static_cast(layout_bindings_aa.size()), + .pBindings = layout_bindings_aa.data(), + }; + descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout(ci); + aa_descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout(ci_aa); } void VKBlitScreen::CreateDescriptorSets() { const std::vector layouts(image_count, *descriptor_set_layout); + const std::vector layouts_aa(image_count, *aa_descriptor_set_layout); const VkDescriptorSetAllocateInfo ai{ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, @@ -520,7 +637,16 @@ void VKBlitScreen::CreateDescriptorSets() { .pSetLayouts = layouts.data(), }; + const VkDescriptorSetAllocateInfo ai_aa{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .pNext = nullptr, + .descriptorPool = *aa_descriptor_pool, + .descriptorSetCount = static_cast(image_count), + .pSetLayouts = layouts_aa.data(), + }; + descriptor_sets = descriptor_pool.Allocate(ai); + aa_descriptor_sets = aa_descriptor_pool.Allocate(ai_aa); } void VKBlitScreen::CreatePipelineLayout() { @@ -533,7 +659,17 @@ void VKBlitScreen::CreatePipelineLayout() { .pushConstantRangeCount = 0, .pPushConstantRanges = nullptr, }; + const VkPipelineLayoutCreateInfo ci_aa{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .setLayoutCount = 1, + .pSetLayouts = aa_descriptor_set_layout.address(), + .pushConstantRangeCount = 0, + .pPushConstantRanges = nullptr, + }; pipeline_layout = device.GetLogical().CreatePipelineLayout(ci); + aa_pipeline_layout = device.GetLogical().CreatePipelineLayout(ci_aa); } void VKBlitScreen::CreateGraphicsPipeline() { @@ -862,7 +998,7 @@ void VKBlitScreen::CreateFramebuffers() { for (std::size_t i = 0; i < image_count; ++i) { const VkImageView image_view{swapchain.GetImageViewIndex(i)}; - framebuffers[i] = CreateFramebuffer(image_view, size); + framebuffers[i] = CreateFramebuffer(image_view, size, renderpass); } } @@ -872,6 +1008,11 @@ void VKBlitScreen::ReleaseRawImages() { } raw_images.clear(); raw_buffer_commits.clear(); + + aa_image_view.reset(); + aa_image.reset(); + aa_commit = MemoryCommit{}; + buffer.reset(); buffer_commit = MemoryCommit{}; } @@ -898,8 +1039,11 @@ void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) raw_image_views.resize(image_count); raw_buffer_commits.resize(image_count); - for (size_t i = 0; i < image_count; ++i) { - raw_images[i] = device.GetLogical().CreateImage(VkImageCreateInfo{ + const auto create_image = [&](bool used_on_framebuffer = false, u32 up_scale = 1, + u32 down_shift = 0) { + u32 extra_usages = used_on_framebuffer ? VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT + : VK_IMAGE_USAGE_TRANSFER_DST_BIT; + return device.GetLogical().CreateImage(VkImageCreateInfo{ .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, .pNext = nullptr, .flags = 0, @@ -907,26 +1051,30 @@ void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) .format = GetFormat(framebuffer), .extent = { - .width = framebuffer.width, - .height = framebuffer.height, + .width = (up_scale * framebuffer.width) >> down_shift, + .height = (up_scale * framebuffer.height) >> down_shift, .depth = 1, }, .mipLevels = 1, .arrayLayers = 1, .samples = VK_SAMPLE_COUNT_1_BIT, - .tiling = VK_IMAGE_TILING_LINEAR, - .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, + .tiling = used_on_framebuffer ? VK_IMAGE_TILING_OPTIMAL : VK_IMAGE_TILING_LINEAR, + .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | extra_usages, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, .queueFamilyIndexCount = 0, .pQueueFamilyIndices = nullptr, .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, }); - raw_buffer_commits[i] = memory_allocator.Commit(raw_images[i], MemoryUsage::DeviceLocal); - raw_image_views[i] = device.GetLogical().CreateImageView(VkImageViewCreateInfo{ + }; + const auto create_commit = [&](vk::Image& image) { + return memory_allocator.Commit(image, MemoryUsage::DeviceLocal); + }; + const auto create_image_view = [&](vk::Image& image) { + return device.GetLogical().CreateImageView(VkImageViewCreateInfo{ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .pNext = nullptr, .flags = 0, - .image = *raw_images[i], + .image = *image, .viewType = VK_IMAGE_VIEW_TYPE_2D, .format = GetFormat(framebuffer), .components = @@ -945,7 +1093,207 @@ void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) .layerCount = 1, }, }); + }; + + for (size_t i = 0; i < image_count; ++i) { + raw_images[i] = create_image(); + raw_buffer_commits[i] = create_commit(raw_images[i]); + raw_image_views[i] = create_image_view(raw_images[i]); } + + // AA Resources + const u32 up_scale = Settings::values.resolution_info.up_scale; + const u32 down_shift = Settings::values.resolution_info.down_shift; + aa_image = create_image(true, up_scale, down_shift); + aa_commit = create_commit(aa_image); + aa_image_view = create_image_view(aa_image); + VkExtent2D size{ + .width = (up_scale * framebuffer.width) >> down_shift, + .height = (up_scale * framebuffer.height) >> down_shift, + }; + if (aa_renderpass) { + aa_framebuffer = CreateFramebuffer(*aa_image_view, size, aa_renderpass); + return; + } + aa_renderpass = CreateRenderPassImpl(GetFormat(framebuffer), false); + aa_framebuffer = CreateFramebuffer(*aa_image_view, size, aa_renderpass); + + const std::array fxaa_shader_stages{{ + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_VERTEX_BIT, + .module = *fxaa_vertex_shader, + .pName = "main", + .pSpecializationInfo = nullptr, + }, + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_FRAGMENT_BIT, + .module = *fxaa_fragment_shader, + .pName = "main", + .pSpecializationInfo = nullptr, + }, + }}; + + const auto vertex_binding_description = ScreenRectVertex::GetDescription(); + const auto vertex_attrs_description = ScreenRectVertex::GetAttributes(); + + const VkPipelineVertexInputStateCreateInfo vertex_input_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .vertexBindingDescriptionCount = 1, + .pVertexBindingDescriptions = &vertex_binding_description, + .vertexAttributeDescriptionCount = u32{vertex_attrs_description.size()}, + .pVertexAttributeDescriptions = vertex_attrs_description.data(), + }; + + const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, + .primitiveRestartEnable = VK_FALSE, + }; + + const VkPipelineViewportStateCreateInfo viewport_state_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .viewportCount = 1, + .pViewports = nullptr, + .scissorCount = 1, + .pScissors = nullptr, + }; + + const VkPipelineRasterizationStateCreateInfo rasterization_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .depthClampEnable = VK_FALSE, + .rasterizerDiscardEnable = VK_FALSE, + .polygonMode = VK_POLYGON_MODE_FILL, + .cullMode = VK_CULL_MODE_NONE, + .frontFace = VK_FRONT_FACE_CLOCKWISE, + .depthBiasEnable = VK_FALSE, + .depthBiasConstantFactor = 0.0f, + .depthBiasClamp = 0.0f, + .depthBiasSlopeFactor = 0.0f, + .lineWidth = 1.0f, + }; + + const VkPipelineMultisampleStateCreateInfo multisampling_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT, + .sampleShadingEnable = VK_FALSE, + .minSampleShading = 0.0f, + .pSampleMask = nullptr, + .alphaToCoverageEnable = VK_FALSE, + .alphaToOneEnable = VK_FALSE, + }; + + const VkPipelineColorBlendAttachmentState color_blend_attachment{ + .blendEnable = VK_FALSE, + .srcColorBlendFactor = VK_BLEND_FACTOR_ZERO, + .dstColorBlendFactor = VK_BLEND_FACTOR_ZERO, + .colorBlendOp = VK_BLEND_OP_ADD, + .srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, + .dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, + .alphaBlendOp = VK_BLEND_OP_ADD, + .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT, + }; + + const VkPipelineColorBlendStateCreateInfo color_blend_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .logicOpEnable = VK_FALSE, + .logicOp = VK_LOGIC_OP_COPY, + .attachmentCount = 1, + .pAttachments = &color_blend_attachment, + .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f}, + }; + + static constexpr std::array dynamic_states{ + VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR, + }; + const VkPipelineDynamicStateCreateInfo dynamic_state_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .dynamicStateCount = static_cast(dynamic_states.size()), + .pDynamicStates = dynamic_states.data(), + }; + + const VkGraphicsPipelineCreateInfo fxaa_pipeline_ci{ + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stageCount = static_cast(fxaa_shader_stages.size()), + .pStages = fxaa_shader_stages.data(), + .pVertexInputState = &vertex_input_ci, + .pInputAssemblyState = &input_assembly_ci, + .pTessellationState = nullptr, + .pViewportState = &viewport_state_ci, + .pRasterizationState = &rasterization_ci, + .pMultisampleState = &multisampling_ci, + .pDepthStencilState = nullptr, + .pColorBlendState = &color_blend_ci, + .pDynamicState = &dynamic_state_ci, + .layout = *aa_pipeline_layout, + .renderPass = *aa_renderpass, + .subpass = 0, + .basePipelineHandle = 0, + .basePipelineIndex = 0, + }; + + // AA + aa_pipeline = device.GetLogical().CreateGraphicsPipeline(fxaa_pipeline_ci); +} + +void VKBlitScreen::UpdateAADescriptorSet(std::size_t image_index, VkImageView image_view, + bool nn) const { + const VkDescriptorImageInfo image_info{ + .sampler = nn ? *nn_sampler : *sampler, + .imageView = image_view, + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }; + + const VkWriteDescriptorSet sampler_write{ + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .pNext = nullptr, + .dstSet = aa_descriptor_sets[image_index], + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .pImageInfo = &image_info, + .pBufferInfo = nullptr, + .pTexelBufferView = nullptr, + }; + + const VkWriteDescriptorSet sampler_write_2{ + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .pNext = nullptr, + .dstSet = aa_descriptor_sets[image_index], + .dstBinding = 1, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .pImageInfo = &image_info, + .pBufferInfo = nullptr, + .pTexelBufferView = nullptr, + }; + + device.GetLogical().UpdateDescriptorSets(std::array{sampler_write, sampler_write_2}, {}); } void VKBlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView image_view, diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h index 96a5598ad..e8737537e 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.h +++ b/src/video_core/renderer_vulkan/vk_blit_screen.h @@ -68,6 +68,9 @@ public: [[nodiscard]] vk::Framebuffer CreateFramebuffer(const VkImageView& image_view, VkExtent2D extent); + [[nodiscard]] vk::Framebuffer CreateFramebuffer(const VkImageView& image_view, + VkExtent2D extent, vk::RenderPass& rd); + private: struct BufferData; @@ -76,6 +79,7 @@ private: void CreateSemaphores(); void CreateDescriptorPool(); void CreateRenderPass(); + vk::RenderPass CreateRenderPassImpl(VkFormat, bool is_present = true); void CreateDescriptorSetLayout(); void CreateDescriptorSets(); void CreatePipelineLayout(); @@ -91,6 +95,7 @@ private: void CreateRawImages(const Tegra::FramebufferConfig& framebuffer); void UpdateDescriptorSet(std::size_t image_index, VkImageView image_view, bool nn) const; + void UpdateAADescriptorSet(std::size_t image_index, VkImageView image_view, bool nn) const; void SetUniformData(BufferData& data, const Layout::FramebufferLayout layout) const; void SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer, const Layout::FramebufferLayout layout) const; @@ -109,6 +114,8 @@ private: const VKScreenInfo& screen_info; vk::ShaderModule vertex_shader; + vk::ShaderModule fxaa_vertex_shader; + vk::ShaderModule fxaa_fragment_shader; vk::ShaderModule bilinear_fragment_shader; vk::ShaderModule bicubic_fragment_shader; vk::ShaderModule gaussian_fragment_shader; @@ -116,6 +123,7 @@ private: vk::DescriptorPool descriptor_pool; vk::DescriptorSetLayout descriptor_set_layout; vk::PipelineLayout pipeline_layout; + vk::Pipeline aa_pipeline; vk::Pipeline nearest_neightbor_pipeline; vk::Pipeline bilinear_pipeline; vk::Pipeline bicubic_pipeline; @@ -136,6 +144,15 @@ private: std::vector raw_images; std::vector raw_image_views; std::vector raw_buffer_commits; + vk::Image aa_image; + vk::ImageView aa_image_view; + MemoryCommit aa_commit; + vk::Framebuffer aa_framebuffer; + vk::RenderPass aa_renderpass; + vk::DescriptorSets aa_descriptor_sets; + vk::DescriptorPool aa_descriptor_pool; + vk::DescriptorSetLayout aa_descriptor_set_layout; + vk::PipelineLayout aa_pipeline_layout; u32 raw_width = 0; u32 raw_height = 0; -- cgit v1.2.3 From 6cdfaee7b4f3f68310e9cca755482e3e6993da10 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 21 Oct 2021 21:31:33 +0200 Subject: Texture Cache: Fix blitting. --- src/video_core/texture_cache/util.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index 9922aa0cc..ddc9fb13a 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -1157,10 +1157,10 @@ void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* if (dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) { dst_info.format = dst->info.format; } - if (!dst && src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) { + if (src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) { dst_info.format = src->info.format; } - if (!src && dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) { + if (dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) { src_info.format = dst->info.format; } } -- cgit v1.2.3 From 21a8ba0437989e4255b347c2b2cabbbc2a332fd3 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 22 Oct 2021 19:44:10 +0200 Subject: Vulkan: Fix FXAA in AMD. --- src/video_core/renderer_vulkan/vk_blit_screen.cpp | 42 +++++++++++++++++++++-- 1 file changed, 40 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index c0abcc17a..2bed4f3c5 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -244,8 +244,35 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, .width = (up_scale * framebuffer.width) >> down_shift, .height = (up_scale * framebuffer.height) >> down_shift, }; - source_image_view = *aa_image_view; scheduler.Record([this, image_index, size, anti_alias_pass](vk::CommandBuffer cmdbuf) { + const VkImageMemoryBarrier base_barrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = 0, + .dstAccessMask = 0, + .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = {}, + .subresourceRange = + { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }; + + { + VkImageMemoryBarrier fsr_write_barrier = base_barrier; + fsr_write_barrier.image = *aa_image; + fsr_write_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, fsr_write_barrier); + } + const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f; const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f; const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f; @@ -294,7 +321,18 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, aa_descriptor_sets[image_index], {}); cmdbuf.Draw(4, 1, 0, 0); cmdbuf.EndRenderPass(); + + { + VkImageMemoryBarrier blit_read_barrier = base_barrier; + blit_read_barrier.image = *aa_image; + blit_read_barrier.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + blit_read_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT , + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, blit_read_barrier); + } }); + source_image_view = *aa_image_view; } if (fsr) { @@ -485,7 +523,7 @@ void VKBlitScreen::CreateDescriptorPool() { const std::array pool_sizes_aa{{ { .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .descriptorCount = static_cast(2 * image_count), + .descriptorCount = static_cast(image_count * 2), }, }}; -- cgit v1.2.3 From a96c9c803be9aca0b9775c37c1e77e13cca56c80 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 22 Oct 2021 22:56:08 +0200 Subject: Yuzu UI: Add button for Anti Alias --- src/common/settings.h | 1 + src/yuzu/main.cpp | 42 ++++++++++++++++++++++++++++++++++++++++++ src/yuzu/main.h | 2 ++ 3 files changed, 45 insertions(+) (limited to 'src') diff --git a/src/common/settings.h b/src/common/settings.h index ca1c3c1aa..c7610ef1c 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -76,6 +76,7 @@ enum class ScalingFilter : u32 { enum class AntiAliasing : u32 { None = 0, Fxaa = 1, + LastAA = Fxaa, }; struct ResolutionScalingInfo { diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index 379bd0b17..d057dc889 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -774,6 +774,26 @@ void GMainWindow::InitializeWidgets() { tas_label->setFocusPolicy(Qt::NoFocus); statusBar()->insertPermanentWidget(0, tas_label); + // setup AA button + aa_status_button = new QPushButton(); + aa_status_button->setObjectName(QStringLiteral("TogglableStatusBarButton")); + aa_status_button->setFocusPolicy(Qt::NoFocus); + connect(aa_status_button, &QPushButton::clicked, [&] { + auto aa_mode = Settings::values.anti_aliasing.GetValue(); + if (aa_mode == Settings::AntiAliasing::LastAA) { + aa_mode = Settings::AntiAliasing::None; + } else { + aa_mode = static_cast(static_cast(aa_mode) + 1); + } + Settings::values.anti_aliasing.SetValue(aa_mode); + aa_status_button->setChecked(true); + UpdateAAText(); + }); + UpdateAAText(); + aa_status_button->setCheckable(true); + aa_status_button->setChecked(true); + statusBar()->insertPermanentWidget(0, aa_status_button); + // Setup Filter button filter_status_button = new QPushButton(); filter_status_button->setObjectName(QStringLiteral("TogglableStatusBarButton")); @@ -800,6 +820,7 @@ void GMainWindow::InitializeWidgets() { } UpdateFilterText(); filter_status_button->setCheckable(true); + filter_status_button->setChecked(true); statusBar()->insertPermanentWidget(0, filter_status_button); // Setup Dock button @@ -872,6 +893,11 @@ void GMainWindow::InitializeWidgets() { Settings::values.renderer_backend.SetValue(Settings::RendererBackend::Vulkan); } else { Settings::values.renderer_backend.SetValue(Settings::RendererBackend::OpenGL); + const auto filter = Settings::values.scaling_filter.GetValue(); + if (filter == Settings::ScalingFilter::Fsr) { + Settings::values.scaling_filter.SetValue(Settings::ScalingFilter::NearestNeighbor); + UpdateFilterText(); + } } system->ApplySettings(); @@ -3088,12 +3114,28 @@ void GMainWindow::UpdateFilterText() { } } +void GMainWindow::UpdateAAText() { + const auto aa_mode = Settings::values.anti_aliasing.GetValue(); + switch (aa_mode) { + case Settings::AntiAliasing::Fxaa: + aa_status_button->setText(tr("FXAA")); + break; + case Settings::AntiAliasing::None: + aa_status_button->setText(tr("NO AA")); + break; + default: + aa_status_button->setText(tr("FXAA")); + break; + } +} + void GMainWindow::UpdateStatusButtons() { dock_status_button->setChecked(Settings::values.use_docked_mode.GetValue()); renderer_status_button->setChecked(Settings::values.renderer_backend.GetValue() == Settings::RendererBackend::Vulkan); UpdateGPUAccuracyButton(); UpdateFilterText(); + UpdateAAText(); } void GMainWindow::UpdateUISettings() { diff --git a/src/yuzu/main.h b/src/yuzu/main.h index d4d2f3d58..24633ff2d 100644 --- a/src/yuzu/main.h +++ b/src/yuzu/main.h @@ -303,6 +303,7 @@ private: void UpdateWindowTitle(std::string_view title_name = {}, std::string_view title_version = {}, std::string_view gpu_vendor = {}); void UpdateFilterText(); + void UpdateAAText(); void UpdateStatusBar(); void UpdateGPUAccuracyButton(); void UpdateStatusButtons(); @@ -338,6 +339,7 @@ private: QPushButton* renderer_status_button = nullptr; QPushButton* dock_status_button = nullptr; QPushButton* filter_status_button = nullptr; + QPushButton* aa_status_button = nullptr; QTimer status_bar_update_timer; std::unique_ptr config; -- cgit v1.2.3 From 99547d2656ee8e84b684794fa8e013b146f15284 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 23 Oct 2021 00:23:50 +0200 Subject: HostShader: Fix gaussian and add attribution. --- src/video_core/host_shaders/present_gaussian.frag | 42 ++++++++++------------- 1 file changed, 19 insertions(+), 23 deletions(-) (limited to 'src') diff --git a/src/video_core/host_shaders/present_gaussian.frag b/src/video_core/host_shaders/present_gaussian.frag index d5e2b1781..a9558548f 100644 --- a/src/video_core/host_shaders/present_gaussian.frag +++ b/src/video_core/host_shaders/present_gaussian.frag @@ -2,6 +2,10 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +// Code obtained from this 2 sources: +// - https://learnopengl.com/Advanced-Lighting/Bloom +// - https://www.rastergrid.com/blog/2010/09/efficient-gaussian-blur-with-linear-sampling/ + #version 460 core #ifdef VULKAN @@ -14,50 +18,40 @@ #endif -layout (location = 0) in vec2 frag_tex_coord; +layout(location = 0) in vec2 frag_tex_coord; -layout (location = 0) out vec4 color; +layout(location = 0) out vec4 color; -layout (binding = BINDING_COLOR_TEXTURE) uniform sampler2D color_texture; +layout(binding = BINDING_COLOR_TEXTURE) uniform sampler2D color_texture; const float offset[3] = float[](0.0, 1.3846153846, 3.2307692308); const float weight[3] = float[](0.2270270270, 0.3162162162, 0.0702702703); vec4 blurVertical(sampler2D textureSampler, vec2 coord, vec2 norm) { vec4 result = vec4(0.0f); - for (int i=1; i<3; i++) { - result += - texture(textureSampler, vec2(coord) + (vec2(0.0, offset[i]) * norm)) - * weight[i]; - result += - texture(textureSampler, vec2(coord) - (vec2(0.0, offset[i]) * norm)) - * weight[i]; + for (int i = 1; i < 3; i++) { + result += texture(textureSampler, vec2(coord) + (vec2(0.0, offset[i]) * norm)) * weight[i]; + result += texture(textureSampler, vec2(coord) - (vec2(0.0, offset[i]) * norm)) * weight[i]; } return result; } vec4 blurHorizontal(sampler2D textureSampler, vec2 coord, vec2 norm) { vec4 result = vec4(0.0f); - for (int i=1; i<3; i++) { - result += - texture(textureSampler, vec2(coord) + (vec2(offset[i], 0.0) * norm)) - * weight[i]; - result += - texture(textureSampler, vec2(coord) - (vec2(offset[i], 0.0) * norm)) - * weight[i]; + for (int i = 1; i < 3; i++) { + result += texture(textureSampler, vec2(coord) + (vec2(offset[i], 0.0) * norm)) * weight[i]; + result += texture(textureSampler, vec2(coord) - (vec2(offset[i], 0.0) * norm)) * weight[i]; } return result; } vec4 blurDiagonal(sampler2D textureSampler, vec2 coord, vec2 norm) { vec4 result = vec4(0.0f); - for (int i=1; i<3; i++) { + for (int i = 1; i < 3; i++) { result += - texture(textureSampler, vec2(coord) + (vec2(offset[i], offset[i]) * norm)) - * weight[i]; + texture(textureSampler, vec2(coord) + (vec2(offset[i], offset[i]) * norm)) * weight[i]; result += - texture(textureSampler, vec2(coord) - (vec2(offset[i], offset[i]) * norm)) - * weight[i]; + texture(textureSampler, vec2(coord) - (vec2(offset[i], offset[i]) * norm)) * weight[i]; } return result; } @@ -65,10 +59,12 @@ vec4 blurDiagonal(sampler2D textureSampler, vec2 coord, vec2 norm) { void main() { vec3 base = texture(color_texture, vec2(frag_tex_coord)).rgb * weight[0]; vec2 tex_offset = 1.0f / textureSize(color_texture, 0); + + // TODO(Blinkhawk): This code can be optimized through shader group instructions. vec3 horizontal = blurHorizontal(color_texture, frag_tex_coord, tex_offset).rgb; vec3 vertical = blurVertical(color_texture, frag_tex_coord, tex_offset).rgb; vec3 diagonalA = blurVertical(color_texture, frag_tex_coord, tex_offset).rgb; - vec3 diagonalB = blurVertical(color_texture, frag_tex_coord, -tex_offset).rgb; + vec3 diagonalB = blurVertical(color_texture, frag_tex_coord, tex_offset * vec2(1.0, -1.0)).rgb; vec3 combination = mix(mix(horizontal, vertical, 0.5f), mix(diagonalA, diagonalB, 0.5f), 0.5f); color = vec4(combination + base, 1.0f); } -- cgit v1.2.3 From c5dbd93adb0566f0b2b09657b4340cc3da59d703 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 23 Oct 2021 00:25:19 +0200 Subject: VulkanBufferCache: Avoid adding barriers between multiple copies. --- src/video_core/buffer_cache/buffer_cache.h | 4 ++- src/video_core/renderer_vulkan/vk_buffer_cache.cpp | 38 ++++++++++++++++++++-- src/video_core/renderer_vulkan/vk_buffer_cache.h | 6 +++- 3 files changed, 43 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index d350c9b36..43bed63ac 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -853,12 +853,14 @@ void BufferCache

::CommitAsyncFlushesHigh() { } if constexpr (USE_MEMORY_MAPS) { auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); + runtime.PreCopyBarrier(); for (auto& [copy, buffer_id] : downloads) { // Have in mind the staging buffer offset for the copy copy.dst_offset += download_staging.offset; const std::array copies{copy}; - runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies); + runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies, false); } + runtime.PostCopyBarrier(); runtime.Finish(); for (const auto& [copy, buffer_id] : downloads) { const Buffer& buffer = slot_buffers[buffer_id]; diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 8ac58bc2f..5ffd93499 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -146,7 +146,7 @@ void BufferCacheRuntime::Finish() { } void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer, - std::span copies) { + std::span copies, bool barrier) { static constexpr VkMemoryBarrier READ_BARRIER{ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, .pNext = nullptr, @@ -163,10 +163,42 @@ void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer, boost::container::small_vector vk_copies(copies.size()); std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy); scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) { + scheduler.Record([src_buffer, dst_buffer, vk_copies, barrier](vk::CommandBuffer cmdbuf) { + if (barrier) { + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, 0, READ_BARRIER); + } + cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies); + if (barrier) { + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, WRITE_BARRIER); + } + }); +} + +void BufferCacheRuntime::PreCopyBarrier() { + static constexpr VkMemoryBarrier READ_BARRIER{ + .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, + }; + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([](vk::CommandBuffer cmdbuf) { cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, READ_BARRIER); - cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies); + }); +} + +void BufferCacheRuntime::PostCopyBarrier() { + static constexpr VkMemoryBarrier WRITE_BARRIER{ + .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, + }; + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([](vk::CommandBuffer cmdbuf) { cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, WRITE_BARRIER); }); diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index c27402ff0..1ee0d8420 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -69,8 +69,12 @@ public: [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size); + void PreCopyBarrier(); + void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer, - std::span copies); + std::span copies, bool barrier = true); + + void PostCopyBarrier(); void ClearBuffer(VkBuffer dest_buffer, u32 offset, size_t size, u32 value); -- cgit v1.2.3 From 5c6fa8893589fd70bc743c0d0b77c0c375b24bd3 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 23 Oct 2021 01:52:34 +0200 Subject: OpenGlTextureCache: Fix state invalidation on rescaling. --- src/video_core/renderer_opengl/gl_texture_cache.cpp | 11 +++++++++++ src/video_core/renderer_opengl/gl_texture_cache.h | 4 ++++ src/video_core/texture_cache/texture_cache.h | 4 ++-- 3 files changed, 17 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index a6e9eb60b..6e7f66ef0 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -942,10 +942,21 @@ bool Image::Scale(bool up_scale) { dst_info.size.height = scaled_height; upscaled_backup = MakeImage(dst_info, gl_internal_format); } + auto& state_tracker = runtime->GetStateTracker(); + state_tracker.NotifyViewport0(); + state_tracker.NotifyScissor0(); // TODO (ameerj): Investigate other GL states that affect blitting. GLboolean scissor_test; glGetBooleani_v(GL_SCISSOR_TEST, 0, &scissor_test); glDisablei(GL_SCISSOR_TEST, 0); + if (up_scale) { + glViewportIndexedf(0, 0.0f, 0.0f, static_cast(scaled_width), + static_cast(scaled_height)); + } else { + glViewportIndexedf(0, 0.0f, 0.0f, static_cast(original_width), + static_cast(original_height)); + } + const GLuint read_fbo = runtime->rescale_read_fbos[fbo_index].handle; const GLuint draw_fbo = runtime->rescale_draw_fbos[fbo_index].handle; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index eeb5133d5..8161e6b72 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -116,6 +116,10 @@ public: void TickFrame() {} + StateTracker& GetStateTracker() { + return state_tracker; + } + private: struct StagingBuffers { explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_); diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index f1254ef62..dd9553806 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -1810,8 +1810,8 @@ void TextureCache

::BindRenderTarget(ImageViewId* old_id, ImageViewId new_id) if (*old_id == new_id) { return; } - if (*old_id) { - const ImageViewBase& old_view = slot_image_views[*old_id]; + if (new_id) { + const ImageViewBase& old_view = slot_image_views[new_id]; if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) { uncommitted_downloads.push_back(old_view.image_id); } -- cgit v1.2.3 From 172d4f1e3b08901548ae1d2e7e476f97e8032585 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 22 Oct 2021 22:11:23 -0400 Subject: gl_texture_cache: Simplify scaling procedures --- .../renderer_opengl/gl_texture_cache.cpp | 83 +++++++--------------- src/video_core/renderer_opengl/gl_texture_cache.h | 2 +- 2 files changed, 28 insertions(+), 57 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 6e7f66ef0..6841b5450 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -883,7 +883,7 @@ void Image::CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t b } } -bool Image::Scale(bool up_scale) { +void Image::Scale(bool up_scale) { const auto format_type = GetFormatType(info.format); const GLenum attachment = [format_type] { switch (format_type) { @@ -942,77 +942,54 @@ bool Image::Scale(bool up_scale) { dst_info.size.height = scaled_height; upscaled_backup = MakeImage(dst_info, gl_internal_format); } - auto& state_tracker = runtime->GetStateTracker(); - state_tracker.NotifyViewport0(); - state_tracker.NotifyScissor0(); + const u32 src_width = up_scale ? original_width : scaled_width; + const u32 src_height = up_scale ? original_height : scaled_height; + const u32 dst_width = up_scale ? scaled_width : original_width; + const u32 dst_height = up_scale ? scaled_height : original_height; + const auto src_handle = up_scale ? texture.handle : upscaled_backup.handle; + const auto dst_handle = up_scale ? upscaled_backup.handle : texture.handle; + // TODO (ameerj): Investigate other GL states that affect blitting. - GLboolean scissor_test; - glGetBooleani_v(GL_SCISSOR_TEST, 0, &scissor_test); glDisablei(GL_SCISSOR_TEST, 0); - if (up_scale) { - glViewportIndexedf(0, 0.0f, 0.0f, static_cast(scaled_width), - static_cast(scaled_height)); - } else { - glViewportIndexedf(0, 0.0f, 0.0f, static_cast(original_width), - static_cast(original_height)); - } - + glViewportIndexedf(0, 0.0f, 0.0f, static_cast(dst_width), + static_cast(dst_height)); const GLuint read_fbo = runtime->rescale_read_fbos[fbo_index].handle; const GLuint draw_fbo = runtime->rescale_draw_fbos[fbo_index].handle; for (s32 layer = 0; layer < info.resources.layers; ++layer) { for (s32 level = 0; level < info.resources.levels; ++level) { - const u32 src_level_width = - std::max(1u, (up_scale ? original_width : scaled_width) >> level); - const u32 src_level_height = - std::max(1u, (up_scale ? original_height : scaled_height) >> level); - const u32 dst_level_width = - std::max(1u, (up_scale ? scaled_width : original_width) >> level); - const u32 dst_level_height = - std::max(1u, (up_scale ? scaled_height : original_height) >> level); - - if (up_scale) { - glNamedFramebufferTextureLayer(read_fbo, attachment, texture.handle, level, layer); - glNamedFramebufferTextureLayer(draw_fbo, attachment, upscaled_backup.handle, level, - layer); - } else { - glNamedFramebufferTextureLayer(read_fbo, attachment, upscaled_backup.handle, level, - layer); - glNamedFramebufferTextureLayer(draw_fbo, attachment, texture.handle, level, layer); - } + const u32 src_level_width = std::max(1u, src_width >> level); + const u32 src_level_height = std::max(1u, src_height >> level); + const u32 dst_level_width = std::max(1u, dst_width >> level); + const u32 dst_level_height = std::max(1u, dst_height >> level); + + glNamedFramebufferTextureLayer(read_fbo, attachment, src_handle, level, layer); + glNamedFramebufferTextureLayer(draw_fbo, attachment, dst_handle, level, layer); glBlitNamedFramebuffer(read_fbo, draw_fbo, 0, 0, src_level_width, src_level_height, 0, 0, dst_level_width, dst_level_height, mask, filter); } } - if (scissor_test != GL_FALSE) { - glEnablei(GL_SCISSOR_TEST, 0); - } - if (up_scale) { - current_texture = upscaled_backup.handle; - } else { - current_texture = texture.handle; - } - - return true; + current_texture = dst_handle; + auto& state_tracker = runtime->GetStateTracker(); + state_tracker.NotifyViewport0(); + state_tracker.NotifyScissor0(); } bool Image::ScaleUp(bool ignore) { if (True(flags & ImageFlagBits::Rescaled)) { return false; } - flags |= ImageFlagBits::Rescaled; - if (!runtime->resolution.active) { - return false; - } if (gl_format == 0 && gl_type == 0) { // compressed textures - flags &= ~ImageFlagBits::Rescaled; return false; } if (info.type == ImageType::Linear) { UNREACHABLE(); - flags &= ~ImageFlagBits::Rescaled; + return false; + } + flags |= ImageFlagBits::Rescaled; + if (!runtime->resolution.active) { return false; } has_scaled = true; @@ -1020,10 +997,7 @@ bool Image::ScaleUp(bool ignore) { current_texture = upscaled_backup.handle; return true; } - if (!Scale()) { - flags &= ~ImageFlagBits::Rescaled; - return false; - } + Scale(true); return true; } @@ -1039,10 +1013,7 @@ bool Image::ScaleDown(bool ignore) { current_texture = texture.handle; return true; } - if (!Scale(false)) { - flags &= ~ImageFlagBits::Rescaled; - return false; - } + Scale(false); return true; } diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 8161e6b72..c51a7428d 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -209,7 +209,7 @@ private: void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset); - bool Scale(bool up_scale = true); + void Scale(bool up_scale); OGLTexture texture; OGLTexture upscaled_backup; -- cgit v1.2.3 From 93c9eb196f4444495f50220c3e6b89d2f0b582db Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 22 Oct 2021 22:59:30 -0400 Subject: gl_rasterizer: Fix ScissorTest and Clear when scaling --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index bb24a0656..4df8a684a 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -184,6 +184,9 @@ void RasterizerOpenGL::Clear() { SyncRasterizeEnable(); SyncStencilTestState(); + std::scoped_lock lock{texture_cache.mutex}; + texture_cache.UpdateRenderTargets(true); + state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); if (regs.clear_flags.scissor) { SyncScissorTest(); } else { @@ -192,10 +195,6 @@ void RasterizerOpenGL::Clear() { } UNIMPLEMENTED_IF(regs.clear_flags.viewport); - std::scoped_lock lock{texture_cache.mutex}; - texture_cache.UpdateRenderTargets(true); - state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); - if (use_color) { glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color); } @@ -925,12 +924,9 @@ void RasterizerOpenGL::SyncScissorTest() { const auto& regs = maxwell3d.regs; const auto& resolution = Settings::values.resolution_info; - const auto scale_up = [resolution](u32 value) -> u32 { - if (value == 0) { - return 0U; - } - const u32 converted_value = (value * resolution.up_scale) >> resolution.down_shift; - return std::max(converted_value, 1U); + const bool is_rescaling{texture_cache.IsRescaling()}; + const auto scale_up = [resolution, is_rescaling](u32 value) { + return is_rescaling ? resolution.ScaleUp(value) : value; }; for (std::size_t index = 0; index < Maxwell::NumViewports; ++index) { if (!force && !flags[Dirty::Scissor0 + index]) { -- cgit v1.2.3 From 282a4501d9b8c3e68e1c4545778097888caa7a88 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 22 Oct 2021 23:46:21 -0400 Subject: vk_texture_cache: Refactor 3D scaling helpers --- .../renderer_vulkan/vk_texture_cache.cpp | 185 ++++++++------------- src/video_core/renderer_vulkan/vk_texture_cache.h | 2 + 2 files changed, 74 insertions(+), 113 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 413d472cd..85a1d520b 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -593,13 +593,16 @@ struct RangedBarrierRange { void BlitScale(VKScheduler& scheduler, VkImage src_image, VkImage dst_image, const ImageInfo& info, VkImageAspectFlags aspect_mask, const Settings::ResolutionScalingInfo& resolution, - bool is_bilinear, bool up_scaling = true) { + bool up_scaling = true) { const bool is_2d = info.type == ImageType::e2D; const auto resources = info.resources; const VkExtent2D extent{ .width = info.size.width, .height = info.size.height, }; + // Depth and integer formats must use NEAREST filter for blits. + const bool is_color{aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT}; + const bool is_bilinear{is_color && !IsPixelFormatInteger(info.format)}; const VkFilter vk_filter = is_bilinear ? VK_FILTER_LINEAR : VK_FILTER_NEAREST; scheduler.RequestOutsideRenderPassOperationContext(); @@ -1144,10 +1147,10 @@ bool Image::ScaleUp(bool ignore) { } has_scaled = true; const auto& device = runtime->device; - const bool is_2d = info.type == ImageType::e2D; - const u32 scaled_width = resolution.ScaleUp(info.size.width); - const u32 scaled_height = is_2d ? resolution.ScaleUp(info.size.height) : info.size.height; if (!scaled_image) { + const bool is_2d = info.type == ImageType::e2D; + const u32 scaled_width = resolution.ScaleUp(info.size.width); + const u32 scaled_height = is_2d ? resolution.ScaleUp(info.size.height) : info.size.height; auto scaled_info = info; scaled_info.size.width = scaled_width; scaled_info.size.height = scaled_height; @@ -1168,61 +1171,10 @@ bool Image::ScaleUp(bool ignore) { const PixelFormat format = StorageFormat(info.format); const auto vk_format = MaxwellToVK::SurfaceFormat(device, OPTIMAL_FORMAT, false, format).format; const auto blit_usage = VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT; - const bool is_color{aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT}; - const bool is_bilinear{is_color && !IsPixelFormatInteger(info.format)}; if (device.IsFormatSupported(vk_format, blit_usage, OPTIMAL_FORMAT)) { - BlitScale(*scheduler, *original_image, *scaled_image, info, aspect_mask, resolution, - device.IsFormatSupported(vk_format, - VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT, - OPTIMAL_FORMAT)); + BlitScale(*scheduler, *original_image, *scaled_image, info, aspect_mask, resolution); } else { - using namespace VideoCommon; - static constexpr auto BLIT_OPERATION = Tegra::Engines::Fermi2D::Operation::SrcCopy; - const auto operation = is_bilinear ? Tegra::Engines::Fermi2D::Filter::Bilinear - : Tegra::Engines::Fermi2D::Filter::Point; - - if (!scale_view) { - const auto view_info = ImageViewInfo(ImageViewType::e2D, info.format); - scale_view = std::make_unique(*runtime, view_info, NULL_IMAGE_ID, *this); - } - auto* view_ptr = scale_view.get(); - - const Region2D src_region{ - .start = {0, 0}, - .end = {static_cast(info.size.width), static_cast(info.size.height)}, - }; - const Region2D dst_region{ - .start = {0, 0}, - .end = {static_cast(scaled_width), static_cast(scaled_height)}, - }; - const VkExtent2D extent{ - .width = scaled_width, - .height = scaled_height, - }; - if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT) { - if (!scale_framebuffer) { - scale_framebuffer = - std::make_unique(*runtime, view_ptr, nullptr, extent); - } - const auto color_view = scale_view->Handle(Shader::TextureType::Color2D); - - runtime->blit_image_helper.BlitColor(scale_framebuffer.get(), color_view, dst_region, - src_region, operation, BLIT_OPERATION); - } else if (!runtime->device.IsBlitDepthStencilSupported() && - aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { - if (!scale_framebuffer) { - scale_framebuffer = - std::make_unique(*runtime, nullptr, view_ptr, extent); - } - runtime->blit_image_helper.BlitDepthStencil( - scale_framebuffer.get(), scale_view->DepthView(), scale_view->StencilView(), - dst_region, src_region, operation, BLIT_OPERATION); - } else { - // TODO: Use helper blits where applicable - flags &= ~ImageFlagBits::Rescaled; - LOG_ERROR(Render_Vulkan, "Device does not support scaling format {}", format); - return false; - } + return BlitScaleHelper(true); } return true; } @@ -1231,82 +1183,89 @@ bool Image::ScaleDown(bool ignore) { if (False(flags & ImageFlagBits::Rescaled)) { return false; } + ASSERT(info.type != ImageType::Linear); flags &= ~ImageFlagBits::Rescaled; const auto& resolution = runtime->resolution; if (!resolution.active) { return false; } + current_image = *original_image; if (ignore) { - current_image = *original_image; return true; } - const auto& device = runtime->device; - const bool is_2d = info.type == ImageType::e2D; - const u32 scaled_width = resolution.ScaleUp(info.size.width); - const u32 scaled_height = is_2d ? resolution.ScaleUp(info.size.height) : info.size.height; if (aspect_mask == 0) { aspect_mask = ImageAspectMask(info.format); } static constexpr auto OPTIMAL_FORMAT = FormatType::Optimal; const PixelFormat format = StorageFormat(info.format); + const auto& device = runtime->device; const auto vk_format = MaxwellToVK::SurfaceFormat(device, OPTIMAL_FORMAT, false, format).format; const auto blit_usage = VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT; - const bool is_color{aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT}; - const bool is_bilinear{is_color && !IsPixelFormatInteger(info.format)}; if (device.IsFormatSupported(vk_format, blit_usage, OPTIMAL_FORMAT)) { - BlitScale(*scheduler, *scaled_image, *original_image, info, aspect_mask, resolution, - is_bilinear, false); + BlitScale(*scheduler, *scaled_image, *original_image, info, aspect_mask, resolution, false); } else { - using namespace VideoCommon; - static constexpr auto BLIT_OPERATION = Tegra::Engines::Fermi2D::Operation::SrcCopy; - const auto operation = is_bilinear ? Tegra::Engines::Fermi2D::Filter::Bilinear - : Tegra::Engines::Fermi2D::Filter::Point; - - if (!normal_view) { - const auto view_info = ImageViewInfo(ImageViewType::e2D, info.format); - normal_view = std::make_unique(*runtime, view_info, NULL_IMAGE_ID, *this); - } - auto* view_ptr = normal_view.get(); + return BlitScaleHelper(false); + } + return true; +} - const Region2D src_region{ - .start = {0, 0}, - .end = {static_cast(scaled_width), static_cast(scaled_height)}, - }; - const Region2D dst_region{ - .start = {0, 0}, - .end = {static_cast(info.size.width), static_cast(info.size.height)}, - }; - const VkExtent2D extent{ - .width = scaled_width, - .height = scaled_height, - }; - if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT) { - if (!normal_framebuffer) { - normal_framebuffer = - std::make_unique(*runtime, view_ptr, nullptr, extent); - } - const auto color_view = normal_view->Handle(Shader::TextureType::Color2D); - - runtime->blit_image_helper.BlitColor(normal_framebuffer.get(), color_view, dst_region, - src_region, operation, BLIT_OPERATION); - } else if (!runtime->device.IsBlitDepthStencilSupported() && - aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { - if (!normal_framebuffer) { - normal_framebuffer = - std::make_unique(*runtime, nullptr, view_ptr, extent); - } - runtime->blit_image_helper.BlitDepthStencil( - normal_framebuffer.get(), normal_view->DepthView(), normal_view->StencilView(), - dst_region, src_region, operation, BLIT_OPERATION); - } else { - // TODO: Use helper blits where applicable - flags &= ~ImageFlagBits::Rescaled; - LOG_ERROR(Render_Vulkan, "Device does not support scaling format {}", format); - return false; +bool Image::BlitScaleHelper(bool scale_up) { + using namespace VideoCommon; + static constexpr auto BLIT_OPERATION = Tegra::Engines::Fermi2D::Operation::SrcCopy; + const bool is_color{aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT}; + const bool is_bilinear{is_color && !IsPixelFormatInteger(info.format)}; + const auto operation = is_bilinear ? Tegra::Engines::Fermi2D::Filter::Bilinear + : Tegra::Engines::Fermi2D::Filter::Point; + + const bool is_2d = info.type == ImageType::e2D; + const auto& resolution = runtime->resolution; + const u32 scaled_width = resolution.ScaleUp(info.size.width); + const u32 scaled_height = is_2d ? resolution.ScaleUp(info.size.height) : info.size.height; + if (!scale_view) { + const auto view_info = ImageViewInfo(ImageViewType::e2D, info.format); + scale_view = std::make_unique(*runtime, view_info, NULL_IMAGE_ID, *this); + } + + const u32 src_width = scale_up ? info.size.width : scaled_width; + const u32 src_height = scale_up ? info.size.height : scaled_height; + const u32 dst_width = scale_up ? scaled_width : info.size.width; + const u32 dst_height = scale_up ? scaled_height : info.size.height; + const Region2D src_region{ + .start = {0, 0}, + .end = {static_cast(src_width), static_cast(src_height)}, + }; + const Region2D dst_region{ + .start = {0, 0}, + .end = {static_cast(dst_width), static_cast(dst_height)}, + }; + const VkExtent2D extent{ + .width = scaled_width, + .height = scaled_height, + }; + + auto* view_ptr = scale_view.get(); + if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT) { + if (!scale_framebuffer) { + scale_framebuffer = std::make_unique(*runtime, view_ptr, nullptr, extent); + } + const auto color_view = scale_view->Handle(Shader::TextureType::Color2D); + + runtime->blit_image_helper.BlitColor(scale_framebuffer.get(), color_view, dst_region, + src_region, operation, BLIT_OPERATION); + } else if (!runtime->device.IsBlitDepthStencilSupported() && + aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { + if (!scale_framebuffer) { + scale_framebuffer = std::make_unique(*runtime, nullptr, view_ptr, extent); } + runtime->blit_image_helper.BlitDepthStencil( + scale_framebuffer.get(), scale_view->DepthView(), scale_view->StencilView(), dst_region, + src_region, operation, BLIT_OPERATION); + } else { + // TODO: Use helper blits where applicable + flags &= ~ImageFlagBits::Rescaled; + LOG_ERROR(Render_Vulkan, "Device does not support scaling format {}", info.format); + return false; } - ASSERT(info.type != ImageType::Linear); - current_image = *original_image; return true; } diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 8dbddfaf7..9d149d306 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -134,6 +134,8 @@ public: bool ScaleDown(bool ignore = false); private: + bool BlitScaleHelper(bool scale_up); + VKScheduler* scheduler{}; TextureCacheRuntime* runtime{}; -- cgit v1.2.3 From a39e867c73140955120700b170dbdb4773d32745 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 23 Oct 2021 00:02:19 -0400 Subject: renderer_vulkan/blit_image: Use generic color state on Depth to Color blits Fixes Bayonetta 2 on AMD --- src/video_core/renderer_vulkan/blit_image.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index 239698423..b3884a4f5 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -569,7 +569,7 @@ VkPipeline BlitImageHelper::FindOrEmplaceDepthStencilPipeline(const BlitImagePip .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO, .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, .pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, - .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO, + .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO, .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO, .layout = *two_textures_pipeline_layout, .renderPass = key.renderpass, -- cgit v1.2.3 From dcc5b4f6b005a2c89bb4e77bca4cfe8705734021 Mon Sep 17 00:00:00 2001 From: Marshall Mohror Date: Fri, 22 Oct 2021 23:09:29 -0500 Subject: Presentation: Only use FP16 in scaling shaders on supported devices in Vulkan --- src/video_core/CMakeLists.txt | 1 + src/video_core/host_shaders/CMakeLists.txt | 10 +- src/video_core/host_shaders/fidelityfx_fsr.comp | 106 ++++++++--------- .../host_shaders/opengl_present_scaleforce.frag | 130 +++++++++++++++++++++ .../host_shaders/present_scaleforce.frag | 112 ------------------ .../host_shaders/vulkan_fidelityfx_fsr_easu.comp | 13 --- .../vulkan_fidelityfx_fsr_easu_fp16.comp | 11 ++ .../vulkan_fidelityfx_fsr_easu_fp32.comp | 10 ++ .../host_shaders/vulkan_fidelityfx_fsr_rcas.comp | 13 --- .../vulkan_fidelityfx_fsr_rcas_fp16.comp | 11 ++ .../vulkan_fidelityfx_fsr_rcas_fp32.comp | 10 ++ .../vulkan_present_scaleforce_fp16.frag | 7 ++ .../vulkan_present_scaleforce_fp32.frag | 5 + src/video_core/renderer_opengl/renderer_opengl.cpp | 5 +- src/video_core/renderer_vulkan/vk_blit_screen.cpp | 17 ++- src/video_core/renderer_vulkan/vk_fsr.cpp | 46 +++++--- src/video_core/renderer_vulkan/vk_fsr.h | 2 +- 17 files changed, 295 insertions(+), 214 deletions(-) create mode 100644 src/video_core/host_shaders/opengl_present_scaleforce.frag delete mode 100644 src/video_core/host_shaders/present_scaleforce.frag delete mode 100644 src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu.comp create mode 100644 src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16.comp create mode 100644 src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32.comp delete mode 100644 src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas.comp create mode 100644 src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16.comp create mode 100644 src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32.comp create mode 100644 src/video_core/host_shaders/vulkan_present_scaleforce_fp16.frag create mode 100644 src/video_core/host_shaders/vulkan_present_scaleforce_fp32.frag (limited to 'src') diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 91a30fef7..07b94dcc8 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -237,6 +237,7 @@ target_include_directories(video_core PRIVATE ${FFmpeg_INCLUDE_DIR}) target_link_libraries(video_core PRIVATE ${FFmpeg_LIBRARIES}) target_link_options(video_core PRIVATE ${FFmpeg_LDFLAGS}) +target_link_libraries(video_core PRIVATE ffx-fsr) add_dependencies(video_core host_shaders) target_include_directories(video_core PRIVATE ${HOST_SHADERS_INCLUDE}) target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include) diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index 6b5ea649a..d779a967a 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -18,16 +18,20 @@ set(SHADER_FILES opengl_copy_bc4.comp opengl_present.frag opengl_present.vert + opengl_present_scaleforce.frag pitch_unswizzle.comp - present_scaleforce.frag present_bicubic.frag present_gaussian.frag vulkan_blit_color_float.frag vulkan_blit_depth_stencil.frag - vulkan_fidelityfx_fsr_easu.comp - vulkan_fidelityfx_fsr_rcas.comp + vulkan_fidelityfx_fsr_easu_fp16.comp + vulkan_fidelityfx_fsr_easu_fp32.comp + vulkan_fidelityfx_fsr_rcas_fp16.comp + vulkan_fidelityfx_fsr_rcas_fp32.comp vulkan_present.frag vulkan_present.vert + vulkan_present_scaleforce_fp16.frag + vulkan_present_scaleforce_fp32.frag vulkan_quad_indexed.comp vulkan_uint8.comp ) diff --git a/src/video_core/host_shaders/fidelityfx_fsr.comp b/src/video_core/host_shaders/fidelityfx_fsr.comp index cbb601580..6b97f789d 100644 --- a/src/video_core/host_shaders/fidelityfx_fsr.comp +++ b/src/video_core/host_shaders/fidelityfx_fsr.comp @@ -28,80 +28,82 @@ // THE SOFTWARE. layout( push_constant ) uniform constants { - u32vec2 input_size; + uvec4 Const0; + uvec4 Const1; + uvec4 Const2; + uvec4 Const3; }; -uvec4 Const0; -uvec4 Const1; -uvec4 Const2; -uvec4 Const3; +layout(set=0,binding=0) uniform sampler2D InputTexture; +layout(set=0,binding=1,rgba16f) uniform image2D OutputTexture; #define A_GPU 1 #define A_GLSL 1 -#define A_HALF -#include "ffx_a.h" +#ifndef YUZU_USE_FP16 + #include "ffx_a.h" -f16vec4 LinearToSRGB(f16vec4 linear) { - bvec4 selector = greaterThan(linear, f16vec4(0.00313066844250063)); - f16vec4 low = linear * float16_t(12.92); - f16vec4 high = float16_t(1.055) * pow(linear, f16vec4(1 / 2.4)) - float16_t(0.055); - return mix(low, high, selector); -} - -f16vec4 SRGBToLinear(f16vec4 srgb) { - bvec4 selector = greaterThan(srgb, f16vec4(0.0404482362771082)); - f16vec4 low = srgb * float16_t(1.0 / 12.92); - f16vec4 high = pow((srgb + float16_t(0.055)) * float16_t(1.0 / 1.055), f16vec4(2.4)); - return mix(low, high, selector); -} + #if USE_EASU + #define FSR_EASU_F 1 + AF4 FsrEasuRF(AF2 p) { AF4 res = textureGather(InputTexture, p, 0); return res; } + AF4 FsrEasuGF(AF2 p) { AF4 res = textureGather(InputTexture, p, 1); return res; } + AF4 FsrEasuBF(AF2 p) { AF4 res = textureGather(InputTexture, p, 2); return res; } + #endif + #if USE_RCAS + #define FSR_RCAS_F 1 + AF4 FsrRcasLoadF(ASU2 p) { return texelFetch(InputTexture, ASU2(p), 0); } + void FsrRcasInputF(inout AF1 r, inout AF1 g, inout AF1 b) {} + #endif +#else + #define A_HALF + #include "ffx_a.h" -#if USE_EASU - #define FSR_EASU_H 1 - f16vec4 FsrEasuRH(vec2 p) { f16vec4 res = f16vec4(textureGather(InputTexture, p, 0)); return res; } - f16vec4 FsrEasuGH(vec2 p) { f16vec4 res = f16vec4(textureGather(InputTexture, p, 1)); return res; } - f16vec4 FsrEasuBH(vec2 p) { f16vec4 res = f16vec4(textureGather(InputTexture, p, 2)); return res; } -#endif -#if USE_RCAS - #define FSR_RCAS_H 1 - f16vec4 FsrRcasLoadH(ASW2 p) { return f16vec4(texelFetch(InputTexture, ASU2(p), 0)); } - void FsrRcasInputH(inout float16_t r, inout float16_t g, inout float16_t b) {} + #if USE_EASU + #define FSR_EASU_H 1 + AH4 FsrEasuRH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 0)); return res; } + AH4 FsrEasuGH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 1)); return res; } + AH4 FsrEasuBH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 2)); return res; } + #endif + #if USE_RCAS + #define FSR_RCAS_H 1 + AH4 FsrRcasLoadH(ASW2 p) { return AH4(texelFetch(InputTexture, ASU2(p), 0)); } + void FsrRcasInputH(inout AH1 r,inout AH1 g,inout AH1 b){} + #endif #endif #include "ffx_fsr1.h" -void CurrFilter(u32vec2 pos) { - // For debugging +void CurrFilter(AU2 pos) { #if USE_BILINEAR - vec2 pp = (vec2(pos) * vec2_AU2(Const0.xy) + vec2_AU2(Const0.zw)) * vec2_AU2(Const1.xy) + vec2(0.5, -0.5) * vec2_AU2(Const1.zw); - imageStore(OutputTexture, ivec2(pos), textureLod(InputTexture, pp, 0.0)); + AF2 pp = (AF2(pos) * AF2_AU2(Const0.xy) + AF2_AU2(Const0.zw)) * AF2_AU2(Const1.xy) + AF2(0.5, -0.5) * AF2_AU2(Const1.zw); + imageStore(OutputTexture, ASU2(pos), textureLod(InputTexture, pp, 0.0)); #endif #if USE_EASU - f16vec3 c; - FsrEasuH(c, pos, Const0, Const1, Const2, Const3); - imageStore(OutputTexture, ivec2(pos), f16vec4(c, 1)); + #ifndef YUZU_USE_FP16 + AF3 c; + FsrEasuF(c, pos, Const0, Const1, Const2, Const3); + imageStore(OutputTexture, ASU2(pos), AF4(c, 1)); + #else + AH3 c; + FsrEasuH(c, pos, Const0, Const1, Const2, Const3); + imageStore(OutputTexture, ASU2(pos), AH4(c, 1)); + #endif #endif #if USE_RCAS - f16vec3 c; - FsrRcasH(c.r, c.g, c.b, pos, Const0); - imageStore(OutputTexture, ivec2(pos), f16vec4(c, 1)); + #ifndef YUZU_USE_FP16 + AF3 c; + FsrRcasF(c.r, c.g, c.b, pos, Const0); + imageStore(OutputTexture, ASU2(pos), AF4(c, 1)); + #else + AH3 c; + FsrRcasH(c.r, c.g, c.b, pos, Const0); + imageStore(OutputTexture, ASU2(pos), AH4(c, 1)); + #endif #endif - } layout(local_size_x=64) in; void main() { - -#if USE_EASU || USE_BILINEAR - vec2 ires = vec2(input_size); - vec2 tres = textureSize(InputTexture, 0); - vec2 ores = imageSize(OutputTexture); - FsrEasuCon(Const0, Const1, Const2, Const3, ires.x, ires.y, tres.x, tres.y, ores.x, ores.y); -#endif -#if USE_RCAS - FsrRcasCon(Const0, 0.25f); -#endif - // Do remapping of local xy in workgroup for a more PS-like swizzle pattern. AU2 gxy = ARmp8x8(gl_LocalInvocationID.x) + AU2(gl_WorkGroupID.x << 4u, gl_WorkGroupID.y << 4u); CurrFilter(gxy); diff --git a/src/video_core/host_shaders/opengl_present_scaleforce.frag b/src/video_core/host_shaders/opengl_present_scaleforce.frag new file mode 100644 index 000000000..71ff9e1e3 --- /dev/null +++ b/src/video_core/host_shaders/opengl_present_scaleforce.frag @@ -0,0 +1,130 @@ +// MIT License +// +// Copyright (c) 2020 BreadFish64 +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +// Adapted from https://github.com/BreadFish64/ScaleFish/tree/master/scaleforce + +//! #version 460 + +#extension GL_ARB_separate_shader_objects : enable + +#ifdef YUZU_USE_FP16 + +#extension GL_AMD_gpu_shader_half_float : enable +#extension GL_NV_gpu_shader5 : enable + +#define lfloat float16_t +#define lvec2 f16vec2 +#define lvec3 f16vec3 +#define lvec4 f16vec4 + +#else + +#define lfloat float +#define lvec2 vec2 +#define lvec3 vec3 +#define lvec4 vec4 + +#endif + +#ifdef VULKAN + +#define BINDING_COLOR_TEXTURE 1 + +#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv + +#define BINDING_COLOR_TEXTURE 0 + +#endif + +layout (location = 0) in vec2 tex_coord; + +layout (location = 0) out vec4 frag_color; + +layout (binding = BINDING_COLOR_TEXTURE) uniform sampler2D input_texture; + +const bool ignore_alpha = true; + +lfloat ColorDist1(lvec4 a, lvec4 b) { + // https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.2020_conversion + const lvec3 K = lvec3(0.2627, 0.6780, 0.0593); + const lfloat scaleB = lfloat(0.5) / (lfloat(1.0) - K.b); + const lfloat scaleR = lfloat(0.5) / (lfloat(1.0) - K.r); + lvec4 diff = a - b; + lfloat Y = dot(diff.rgb, K); + lfloat Cb = scaleB * (diff.b - Y); + lfloat Cr = scaleR * (diff.r - Y); + lvec3 YCbCr = lvec3(Y, Cb, Cr); + lfloat d = length(YCbCr); + if (ignore_alpha) { + return d; + } + return sqrt(a.a * b.a * d * d + diff.a * diff.a); +} + +lvec4 ColorDist(lvec4 ref, lvec4 A, lvec4 B, lvec4 C, lvec4 D) { + return lvec4( + ColorDist1(ref, A), + ColorDist1(ref, B), + ColorDist1(ref, C), + ColorDist1(ref, D) + ); +} + +vec4 Scaleforce(sampler2D tex, vec2 tex_coord) { + lvec4 bl = lvec4(textureOffset(tex, tex_coord, ivec2(-1, -1))); + lvec4 bc = lvec4(textureOffset(tex, tex_coord, ivec2(0, -1))); + lvec4 br = lvec4(textureOffset(tex, tex_coord, ivec2(1, -1))); + lvec4 cl = lvec4(textureOffset(tex, tex_coord, ivec2(-1, 0))); + lvec4 cc = lvec4(texture(tex, tex_coord)); + lvec4 cr = lvec4(textureOffset(tex, tex_coord, ivec2(1, 0))); + lvec4 tl = lvec4(textureOffset(tex, tex_coord, ivec2(-1, 1))); + lvec4 tc = lvec4(textureOffset(tex, tex_coord, ivec2(0, 1))); + lvec4 tr = lvec4(textureOffset(tex, tex_coord, ivec2(1, 1))); + + lvec4 offset_tl = ColorDist(cc, tl, tc, tr, cr); + lvec4 offset_br = ColorDist(cc, br, bc, bl, cl); + + // Calculate how different cc is from the texels around it + const lfloat plus_weight = lfloat(1.5); + const lfloat cross_weight = lfloat(1.5); + lfloat total_dist = dot(offset_tl + offset_br, lvec4(cross_weight, plus_weight, cross_weight, plus_weight)); + + if (total_dist == lfloat(0.0)) { + return cc; + } else { + // Add together all the distances with direction taken into account + lvec4 tmp = offset_tl - offset_br; + lvec2 total_offset = tmp.wy * plus_weight + (tmp.zz + lvec2(-tmp.x, tmp.x)) * cross_weight; + + // When the image has thin points, they tend to split apart. + // This is because the texels all around are different and total_offset reaches into clear areas. + // This works pretty well to keep the offset in bounds for these cases. + lfloat clamp_val = length(total_offset) / total_dist; + vec2 final_offset = vec2(clamp(total_offset, -clamp_val, clamp_val)) / textureSize(tex, 0); + + return texture(tex, tex_coord - final_offset); + } +} + +void main() { + frag_color = Scaleforce(input_texture, tex_coord); +} diff --git a/src/video_core/host_shaders/present_scaleforce.frag b/src/video_core/host_shaders/present_scaleforce.frag deleted file mode 100644 index ebc0d9b90..000000000 --- a/src/video_core/host_shaders/present_scaleforce.frag +++ /dev/null @@ -1,112 +0,0 @@ -// MIT License -// -// Copyright (c) 2020 BreadFish64 -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -// Adapted from https://github.com/BreadFish64/ScaleFish/tree/master/scaleforce - -#version 460 - -#extension GL_AMD_gpu_shader_half_float : enable -#extension GL_NV_gpu_shader5 : enable - -#ifdef VULKAN - -#define BINDING_COLOR_TEXTURE 1 - -#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv - -#define BINDING_COLOR_TEXTURE 0 - -#endif - -layout (location = 0) in vec2 tex_coord; - -layout (location = 0) out vec4 frag_color; - -layout (binding = BINDING_COLOR_TEXTURE) uniform sampler2D input_texture; - -const bool ignore_alpha = true; - -float16_t ColorDist1(f16vec4 a, f16vec4 b) { - // https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.2020_conversion - const f16vec3 K = f16vec3(0.2627, 0.6780, 0.0593); - const float16_t scaleB = float16_t(0.5) / (float16_t(1.0) - K.b); - const float16_t scaleR = float16_t(0.5) / (float16_t(1.0) - K.r); - f16vec4 diff = a - b; - float16_t Y = dot(diff.rgb, K); - float16_t Cb = scaleB * (diff.b - Y); - float16_t Cr = scaleR * (diff.r - Y); - f16vec3 YCbCr = f16vec3(Y, Cb, Cr); - float16_t d = length(YCbCr); - if (ignore_alpha) { - return d; - } - return sqrt(a.a * b.a * d * d + diff.a * diff.a); -} - -f16vec4 ColorDist(f16vec4 ref, f16vec4 A, f16vec4 B, f16vec4 C, f16vec4 D) { - return f16vec4( - ColorDist1(ref, A), - ColorDist1(ref, B), - ColorDist1(ref, C), - ColorDist1(ref, D) - ); -} - -vec4 Scaleforce(sampler2D tex, vec2 tex_coord) { - f16vec4 bl = f16vec4(textureOffset(tex, tex_coord, ivec2(-1, -1))); - f16vec4 bc = f16vec4(textureOffset(tex, tex_coord, ivec2(0, -1))); - f16vec4 br = f16vec4(textureOffset(tex, tex_coord, ivec2(1, -1))); - f16vec4 cl = f16vec4(textureOffset(tex, tex_coord, ivec2(-1, 0))); - f16vec4 cc = f16vec4(texture(tex, tex_coord)); - f16vec4 cr = f16vec4(textureOffset(tex, tex_coord, ivec2(1, 0))); - f16vec4 tl = f16vec4(textureOffset(tex, tex_coord, ivec2(-1, 1))); - f16vec4 tc = f16vec4(textureOffset(tex, tex_coord, ivec2(0, 1))); - f16vec4 tr = f16vec4(textureOffset(tex, tex_coord, ivec2(1, 1))); - - f16vec4 offset_tl = ColorDist(cc, tl, tc, tr, cr); - f16vec4 offset_br = ColorDist(cc, br, bc, bl, cl); - - // Calculate how different cc is from the texels around it - const float16_t plus_weight = float16_t(1.5); - const float16_t cross_weight = float16_t(1.5); - float16_t total_dist = dot(offset_tl + offset_br, f16vec4(cross_weight, plus_weight, cross_weight, plus_weight)); - - if (total_dist == float16_t(0.0)) { - return cc; - } else { - // Add together all the distances with direction taken into account - f16vec4 tmp = offset_tl - offset_br; - f16vec2 total_offset = tmp.wy * plus_weight + (tmp.zz + f16vec2(-tmp.x, tmp.x)) * cross_weight; - - // When the image has thin points, they tend to split apart. - // This is because the texels all around are different and total_offset reaches into clear areas. - // This works pretty well to keep the offset in bounds for these cases. - float16_t clamp_val = length(total_offset) / total_dist; - f16vec2 final_offset = clamp(total_offset, -clamp_val, clamp_val) / f16vec2(textureSize(tex, 0)); - - return texture(tex, tex_coord - final_offset); - } -} - -void main() { - frag_color = Scaleforce(input_texture, tex_coord); -} \ No newline at end of file diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu.comp b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu.comp deleted file mode 100644 index 6525eeeb5..000000000 --- a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu.comp +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#version 460 core -#extension GL_GOOGLE_include_directive : enable - -layout(set=0,binding=0) uniform sampler2D InputTexture; -layout(set=0,binding=1,rgba16f) uniform image2D OutputTexture; - -#define USE_EASU 1 - -#include "fidelityfx_fsr.comp" diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16.comp b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16.comp new file mode 100644 index 000000000..1c96a7905 --- /dev/null +++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16.comp @@ -0,0 +1,11 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 460 core +#extension GL_GOOGLE_include_directive : enable + +#define YUZU_USE_FP16 +#define USE_EASU 1 + +#include "fidelityfx_fsr.comp" diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32.comp b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32.comp new file mode 100644 index 000000000..f4daff739 --- /dev/null +++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32.comp @@ -0,0 +1,10 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 460 core +#extension GL_GOOGLE_include_directive : enable + +#define USE_EASU 1 + +#include "fidelityfx_fsr.comp" diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas.comp b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas.comp deleted file mode 100644 index 9463ed842..000000000 --- a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas.comp +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#version 460 core -#extension GL_GOOGLE_include_directive : enable - -layout(set=0,binding=0) uniform sampler2D InputTexture; -layout(set=0,binding=1,rgba16f) uniform image2D OutputTexture; - -#define USE_RCAS 1 - -#include "fidelityfx_fsr.comp" diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16.comp b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16.comp new file mode 100644 index 000000000..6b6796dd1 --- /dev/null +++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16.comp @@ -0,0 +1,11 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 460 core +#extension GL_GOOGLE_include_directive : enable + +#define YUZU_USE_FP16 +#define USE_RCAS 1 + +#include "fidelityfx_fsr.comp" diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32.comp b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32.comp new file mode 100644 index 000000000..f785eebf3 --- /dev/null +++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32.comp @@ -0,0 +1,10 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 460 core +#extension GL_GOOGLE_include_directive : enable + +#define USE_RCAS 1 + +#include "fidelityfx_fsr.comp" diff --git a/src/video_core/host_shaders/vulkan_present_scaleforce_fp16.frag b/src/video_core/host_shaders/vulkan_present_scaleforce_fp16.frag new file mode 100644 index 000000000..924c03060 --- /dev/null +++ b/src/video_core/host_shaders/vulkan_present_scaleforce_fp16.frag @@ -0,0 +1,7 @@ +#version 460 + +#extension GL_GOOGLE_include_directive : enable + +#define YUZU_USE_FP16 + +#include "opengl_present_scaleforce.frag" diff --git a/src/video_core/host_shaders/vulkan_present_scaleforce_fp32.frag b/src/video_core/host_shaders/vulkan_present_scaleforce_fp32.frag new file mode 100644 index 000000000..a594b83ca --- /dev/null +++ b/src/video_core/host_shaders/vulkan_present_scaleforce_fp32.frag @@ -0,0 +1,5 @@ +#version 460 + +#extension GL_GOOGLE_include_directive : enable + +#include "opengl_present_scaleforce.frag" diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index e63f0bdd8..28daacd82 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -24,10 +24,10 @@ #include "video_core/host_shaders/fxaa_frag.h" #include "video_core/host_shaders/fxaa_vert.h" #include "video_core/host_shaders/opengl_present_frag.h" +#include "video_core/host_shaders/opengl_present_scaleforce_frag.h" #include "video_core/host_shaders/opengl_present_vert.h" #include "video_core/host_shaders/present_bicubic_frag.h" #include "video_core/host_shaders/present_gaussian_frag.h" -#include "video_core/host_shaders/present_scaleforce_frag.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_shader_util.h" @@ -266,7 +266,8 @@ void RendererOpenGL::InitOpenGLObjects() { present_gaussian_fragment = CreateProgram(HostShaders::PRESENT_GAUSSIAN_FRAG, GL_FRAGMENT_SHADER); present_scaleforce_fragment = - CreateProgram(HostShaders::PRESENT_SCALEFORCE_FRAG, GL_FRAGMENT_SHADER); + CreateProgram(fmt::format("#version 460\n{}", HostShaders::OPENGL_PRESENT_SCALEFORCE_FRAG), + GL_FRAGMENT_SHADER); // Generate presentation sampler present_sampler.Create(); diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index 2bed4f3c5..9dfc508bc 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -21,8 +21,9 @@ #include "video_core/host_shaders/fxaa_vert_spv.h" #include "video_core/host_shaders/present_bicubic_frag_spv.h" #include "video_core/host_shaders/present_gaussian_frag_spv.h" -#include "video_core/host_shaders/present_scaleforce_frag_spv.h" #include "video_core/host_shaders/vulkan_present_frag_spv.h" +#include "video_core/host_shaders/vulkan_present_scaleforce_fp16_frag_spv.h" +#include "video_core/host_shaders/vulkan_present_scaleforce_fp32_frag_spv.h" #include "video_core/host_shaders/vulkan_present_vert_spv.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" #include "video_core/renderer_vulkan/vk_blit_screen.h" @@ -328,7 +329,7 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, blit_read_barrier.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; blit_read_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT , + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, blit_read_barrier); } }); @@ -344,8 +345,12 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, crop_rect.bottom = framebuffer.height; } crop_rect = crop_rect.Scale(Settings::values.resolution_info.up_factor); + VkExtent2D fsr_input_size{ + .width = Settings::values.resolution_info.ScaleUp(framebuffer.width), + .height = Settings::values.resolution_info.ScaleUp(framebuffer.height), + }; VkImageView fsr_image_view = - fsr->Draw(scheduler, image_index, source_image_view, crop_rect); + fsr->Draw(scheduler, image_index, source_image_view, fsr_input_size, crop_rect); UpdateDescriptorSet(image_index, fsr_image_view, true); } else { const bool is_nn = @@ -500,7 +505,11 @@ void VKBlitScreen::CreateShaders() { bilinear_fragment_shader = BuildShader(device, VULKAN_PRESENT_FRAG_SPV); bicubic_fragment_shader = BuildShader(device, PRESENT_BICUBIC_FRAG_SPV); gaussian_fragment_shader = BuildShader(device, PRESENT_GAUSSIAN_FRAG_SPV); - scaleforce_fragment_shader = BuildShader(device, PRESENT_SCALEFORCE_FRAG_SPV); + if (device.IsFloat16Supported()) { + scaleforce_fragment_shader = BuildShader(device, VULKAN_PRESENT_SCALEFORCE_FP16_FRAG_SPV); + } else { + scaleforce_fragment_shader = BuildShader(device, VULKAN_PRESENT_SCALEFORCE_FP32_FRAG_SPV); + } } void VKBlitScreen::CreateSemaphores() { diff --git a/src/video_core/renderer_vulkan/vk_fsr.cpp b/src/video_core/renderer_vulkan/vk_fsr.cpp index 1f60974be..9288aa7c2 100644 --- a/src/video_core/renderer_vulkan/vk_fsr.cpp +++ b/src/video_core/renderer_vulkan/vk_fsr.cpp @@ -4,13 +4,19 @@ #include "common/common_types.h" #include "common/div_ceil.h" -#include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_comp_spv.h" -#include "video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_comp_spv.h" +#include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16_comp_spv.h" +#include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32_comp_spv.h" +#include "video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16_comp_spv.h" +#include "video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32_comp_spv.h" #include "video_core/renderer_vulkan/vk_fsr.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_shader_util.h" #include "video_core/vulkan_common/vulkan_device.h" +#define A_CPU +#include +#include + namespace Vulkan { FSR::FSR(const Device& device_, MemoryAllocator& memory_allocator_, size_t image_count_, @@ -29,11 +35,11 @@ FSR::FSR(const Device& device_, MemoryAllocator& memory_allocator_, size_t image } VkImageView FSR::Draw(VKScheduler& scheduler, size_t image_index, VkImageView image_view, - const Common::Rectangle& crop_rect) { + VkExtent2D input_image_extent, const Common::Rectangle& crop_rect) { UpdateDescriptorSet(image_index, image_view); - scheduler.Record([this, image_index, crop_rect](vk::CommandBuffer cmdbuf) { + scheduler.Record([this, image_index, input_image_extent, crop_rect](vk::CommandBuffer cmdbuf) { const VkImageMemoryBarrier base_barrier{ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, .pNext = nullptr, @@ -54,13 +60,18 @@ VkImageView FSR::Draw(VKScheduler& scheduler, size_t image_index, VkImageView im }, }; - // TODO: Support clear color cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *easu_pipeline); - cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, - VkExtent2D{ - .width = static_cast(crop_rect.GetWidth()), - .height = static_cast(crop_rect.GetHeight()), - }); + + std::array push_constants; + FsrEasuConOffset( + push_constants.data() + 0, push_constants.data() + 4, push_constants.data() + 8, + push_constants.data() + 12, + + static_cast(crop_rect.GetWidth()), static_cast(crop_rect.GetHeight()), + static_cast(input_image_extent.width), static_cast(input_image_extent.height), + static_cast(output_size.width), static_cast(output_size.height), + static_cast(crop_rect.left), static_cast(crop_rect.top)); + cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, push_constants); { VkImageMemoryBarrier fsr_write_barrier = base_barrier; @@ -77,7 +88,9 @@ VkImageView FSR::Draw(VKScheduler& scheduler, size_t image_index, VkImageView im Common::DivCeil(output_size.height, 16u), 1); cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *rcas_pipeline); - cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, output_size); + + FsrRcasCon(push_constants.data(), 0.25f); + cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, push_constants); { std::array barriers; @@ -247,7 +260,7 @@ void FSR::CreatePipelineLayout() { VkPushConstantRange push_const{ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, .offset = 0, - .size = sizeof(std::array), + .size = sizeof(std::array), }; VkPipelineLayoutCreateInfo ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, @@ -344,8 +357,13 @@ void FSR::CreateSampler() { } void FSR::CreateShaders() { - easu_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_EASU_COMP_SPV); - rcas_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_RCAS_COMP_SPV); + if (device.IsFloat16Supported()) { + easu_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_EASU_FP16_COMP_SPV); + rcas_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_RCAS_FP16_COMP_SPV); + } else { + easu_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_EASU_FP32_COMP_SPV); + rcas_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_RCAS_FP32_COMP_SPV); + } } void FSR::CreatePipeline() { diff --git a/src/video_core/renderer_vulkan/vk_fsr.h b/src/video_core/renderer_vulkan/vk_fsr.h index 8391e2e58..6bbec3d36 100644 --- a/src/video_core/renderer_vulkan/vk_fsr.h +++ b/src/video_core/renderer_vulkan/vk_fsr.h @@ -18,7 +18,7 @@ public: explicit FSR(const Device& device, MemoryAllocator& memory_allocator, size_t image_count, VkExtent2D output_size); VkImageView Draw(VKScheduler& scheduler, size_t image_index, VkImageView image_view, - const Common::Rectangle& crop_rect); + VkExtent2D input_image_extent, const Common::Rectangle& crop_rect); private: void CreateDescriptorPool(); -- cgit v1.2.3 From 47369faaabee9fa47208890ed2945e54f3251f8a Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 23 Oct 2021 00:15:19 -0400 Subject: vk_blit_screen: Fix AA destruction order --- src/video_core/renderer_vulkan/vk_blit_screen.h | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h index e8737537e..ad0cd8ee1 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.h +++ b/src/video_core/renderer_vulkan/vk_blit_screen.h @@ -34,12 +34,11 @@ namespace Vulkan { struct ScreenInfo; class Device; +class FSR; class RasterizerVulkan; class VKScheduler; class VKSwapchain; -class FSR; - struct VKScreenInfo { VkImageView image_view{}; u32 width{}; @@ -123,7 +122,6 @@ private: vk::DescriptorPool descriptor_pool; vk::DescriptorSetLayout descriptor_set_layout; vk::PipelineLayout pipeline_layout; - vk::Pipeline aa_pipeline; vk::Pipeline nearest_neightbor_pipeline; vk::Pipeline bilinear_pipeline; vk::Pipeline bicubic_pipeline; @@ -144,15 +142,18 @@ private: std::vector raw_images; std::vector raw_image_views; std::vector raw_buffer_commits; - vk::Image aa_image; - vk::ImageView aa_image_view; - MemoryCommit aa_commit; - vk::Framebuffer aa_framebuffer; - vk::RenderPass aa_renderpass; - vk::DescriptorSets aa_descriptor_sets; + vk::DescriptorPool aa_descriptor_pool; vk::DescriptorSetLayout aa_descriptor_set_layout; vk::PipelineLayout aa_pipeline_layout; + vk::Pipeline aa_pipeline; + vk::RenderPass aa_renderpass; + vk::Framebuffer aa_framebuffer; + vk::DescriptorSets aa_descriptor_sets; + vk::Image aa_image; + vk::ImageView aa_image_view; + MemoryCommit aa_commit; + u32 raw_width = 0; u32 raw_height = 0; -- cgit v1.2.3 From bb0367548591a66cd963b14373ed5b9c61adabb1 Mon Sep 17 00:00:00 2001 From: Marshall Mohror Date: Sat, 23 Oct 2021 00:11:12 -0500 Subject: Vulkan: Reimplement FSR constant generation functions to avoid GCC warnings --- src/video_core/CMakeLists.txt | 1 - src/video_core/renderer_vulkan/vk_fsr.cpp | 153 ++++++++++++++++++++++++++++-- 2 files changed, 145 insertions(+), 9 deletions(-) (limited to 'src') diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 07b94dcc8..91a30fef7 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -237,7 +237,6 @@ target_include_directories(video_core PRIVATE ${FFmpeg_INCLUDE_DIR}) target_link_libraries(video_core PRIVATE ${FFmpeg_LIBRARIES}) target_link_options(video_core PRIVATE ${FFmpeg_LDFLAGS}) -target_link_libraries(video_core PRIVATE ffx-fsr) add_dependencies(video_core host_shaders) target_include_directories(video_core PRIVATE ${HOST_SHADERS_INCLUDE}) target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include) diff --git a/src/video_core/renderer_vulkan/vk_fsr.cpp b/src/video_core/renderer_vulkan/vk_fsr.cpp index 9288aa7c2..2feaa9d37 100644 --- a/src/video_core/renderer_vulkan/vk_fsr.cpp +++ b/src/video_core/renderer_vulkan/vk_fsr.cpp @@ -13,9 +13,146 @@ #include "video_core/renderer_vulkan/vk_shader_util.h" #include "video_core/vulkan_common/vulkan_device.h" -#define A_CPU -#include -#include +// Reimplementations of the constant generating functions in ffx_fsr1.h +// GCC generated a lot of warnings when using the official header. +static u32 AU1_AH1_AF1(f32 f) { + static constexpr u32 base[512]{ + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, + 0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x0c00, 0x1000, 0x1400, 0x1800, 0x1c00, 0x2000, + 0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00, 0x4000, 0x4400, 0x4800, 0x4c00, + 0x5000, 0x5400, 0x5800, 0x5c00, 0x6000, 0x6400, 0x6800, 0x6c00, 0x7000, 0x7400, 0x7800, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001, 0x8002, 0x8004, 0x8008, + 0x8010, 0x8020, 0x8040, 0x8080, 0x8100, 0x8200, 0x8400, 0x8800, 0x8c00, 0x9000, 0x9400, + 0x9800, 0x9c00, 0xa000, 0xa400, 0xa800, 0xac00, 0xb000, 0xb400, 0xb800, 0xbc00, 0xc000, + 0xc400, 0xc800, 0xcc00, 0xd000, 0xd400, 0xd800, 0xdc00, 0xe000, 0xe400, 0xe800, 0xec00, + 0xf000, 0xf400, 0xf800, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + }; + static constexpr s8 shift[512]{ + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17, 0x16, + 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, + 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, + 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17, + 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, + 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, + 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, + }; + auto u = std::bit_cast(f); + u32 i = u >> 23; + return base[i] + ((u & 0x7fffff) >> shift[i]); +} + +static u32 AU1_AH2_AF2(f32 a[2]) { + return AU1_AH1_AF1(a[0]) + (AU1_AH1_AF1(a[1]) << 16); +} + +static void FsrEasuCon(u32 con0[4], u32 con1[4], u32 con2[4], u32 con3[4], + f32 inputViewportInPixelsX, f32 inputViewportInPixelsY, + f32 inputSizeInPixelsX, f32 inputSizeInPixelsY, f32 outputSizeInPixelsX, + f32 outputSizeInPixelsY) { + con0[0] = std::bit_cast(inputViewportInPixelsX / outputSizeInPixelsX); + con0[1] = std::bit_cast(inputViewportInPixelsY / outputSizeInPixelsY); + con0[2] = std::bit_cast(0.5f * inputViewportInPixelsX / outputSizeInPixelsX - 0.5f); + con0[3] = std::bit_cast(0.5f * inputViewportInPixelsY / outputSizeInPixelsY - 0.5f); + con1[0] = std::bit_cast(1.0f / inputSizeInPixelsX); + con1[1] = std::bit_cast(1.0f / inputSizeInPixelsY); + con1[2] = std::bit_cast(1.0f / inputSizeInPixelsX); + con1[3] = std::bit_cast(-1.0f / inputSizeInPixelsY); + con2[0] = std::bit_cast(-1.0f / inputSizeInPixelsX); + con2[1] = std::bit_cast(2.0f / inputSizeInPixelsY); + con2[2] = std::bit_cast(1.0f / inputSizeInPixelsX); + con2[3] = std::bit_cast(2.0f / inputSizeInPixelsY); + con3[0] = std::bit_cast(0.0f / inputSizeInPixelsX); + con3[1] = std::bit_cast(4.0f / inputSizeInPixelsY); + con3[2] = con3[3] = 0; +} + +static void FsrEasuConOffset(u32 con0[4], u32 con1[4], u32 con2[4], u32 con3[4], + f32 inputViewportInPixelsX, f32 inputViewportInPixelsY, + f32 inputSizeInPixelsX, f32 inputSizeInPixelsY, + f32 outputSizeInPixelsX, f32 outputSizeInPixelsY, + f32 inputOffsetInPixelsX, f32 inputOffsetInPixelsY) { + FsrEasuCon(con0, con1, con2, con3, inputViewportInPixelsX, inputViewportInPixelsY, + inputSizeInPixelsX, inputSizeInPixelsY, outputSizeInPixelsX, outputSizeInPixelsY); + con0[2] = std::bit_cast(0.5f * inputViewportInPixelsX / outputSizeInPixelsX - 0.5f + + inputOffsetInPixelsX); + con0[3] = std::bit_cast(0.5f * inputViewportInPixelsY / outputSizeInPixelsY - 0.5f + + inputOffsetInPixelsY); +} + +static void FsrRcasCon(u32* con, f32 sharpness) { + sharpness = std::exp2f(-sharpness); + f32 hSharp[2]{sharpness, sharpness}; + con[0] = std::bit_cast(sharpness); + con[1] = AU1_AH2_AF2(hSharp); + con[2] = 0; + con[3] = 0; +} namespace Vulkan { @@ -62,15 +199,15 @@ VkImageView FSR::Draw(VKScheduler& scheduler, size_t image_index, VkImageView im cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *easu_pipeline); - std::array push_constants; + std::array push_constants; FsrEasuConOffset( push_constants.data() + 0, push_constants.data() + 4, push_constants.data() + 8, push_constants.data() + 12, - static_cast(crop_rect.GetWidth()), static_cast(crop_rect.GetHeight()), - static_cast(input_image_extent.width), static_cast(input_image_extent.height), - static_cast(output_size.width), static_cast(output_size.height), - static_cast(crop_rect.left), static_cast(crop_rect.top)); + static_cast(crop_rect.GetWidth()), static_cast(crop_rect.GetHeight()), + static_cast(input_image_extent.width), static_cast(input_image_extent.height), + static_cast(output_size.width), static_cast(output_size.height), + static_cast(crop_rect.left), static_cast(crop_rect.top)); cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, push_constants); { -- cgit v1.2.3 From 99124b72618285114ac3ff820732a510bbf7aae4 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 23 Oct 2021 01:29:44 -0400 Subject: FSR: Fix GCC build errors --- src/video_core/renderer_vulkan/vk_blit_screen.cpp | 19 +++--- src/video_core/renderer_vulkan/vk_blit_screen.h | 2 + src/video_core/renderer_vulkan/vk_fsr.cpp | 72 ++++++++++++----------- 3 files changed, 50 insertions(+), 43 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index 9dfc508bc..1e447e621 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -468,20 +468,14 @@ void VKBlitScreen::CreateDynamicResources() { CreateGraphicsPipeline(); fsr.reset(); if (Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::Fsr) { - const auto& layout = render_window.GetFramebufferLayout(); - fsr = std::make_unique( - device, memory_allocator, image_count, - VkExtent2D{.width = layout.screen.GetWidth(), .height = layout.screen.GetHeight()}); + CreateFSR(); } } void VKBlitScreen::RefreshResources(const Tegra::FramebufferConfig& framebuffer) { if (Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::Fsr) { if (!fsr) { - const auto& layout = render_window.GetFramebufferLayout(); - fsr = std::make_unique( - device, memory_allocator, image_count, - VkExtent2D{.width = layout.screen.GetWidth(), .height = layout.screen.GetHeight()}); + CreateFSR(); } } else { fsr.reset(); @@ -1443,6 +1437,15 @@ void VKBlitScreen::SetVertexData(BufferData& data, const Tegra::FramebufferConfi data.vertices[3] = ScreenRectVertex(x + w, y + h, texcoords.bottom * scale_u, right * scale_v); } +void VKBlitScreen::CreateFSR() { + const auto& layout = render_window.GetFramebufferLayout(); + const VkExtent2D fsr_size{ + .width = layout.screen.GetWidth(), + .height = layout.screen.GetHeight(), + }; + fsr = std::make_unique(device, memory_allocator, image_count, fsr_size); +} + u64 VKBlitScreen::CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const { return sizeof(BufferData) + GetSizeInBytes(framebuffer) * image_count; } diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h index ad0cd8ee1..bbca71af3 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.h +++ b/src/video_core/renderer_vulkan/vk_blit_screen.h @@ -99,6 +99,8 @@ private: void SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer, const Layout::FramebufferLayout layout) const; + void CreateFSR(); + u64 CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const; u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer, std::size_t image_index) const; diff --git a/src/video_core/renderer_vulkan/vk_fsr.cpp b/src/video_core/renderer_vulkan/vk_fsr.cpp index 2feaa9d37..73629d229 100644 --- a/src/video_core/renderer_vulkan/vk_fsr.cpp +++ b/src/video_core/renderer_vulkan/vk_fsr.cpp @@ -2,8 +2,11 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include +#include "common/bit_cast.h" #include "common/common_types.h" #include "common/div_ceil.h" + #include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16_comp_spv.h" #include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32_comp_spv.h" #include "video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16_comp_spv.h" @@ -13,9 +16,11 @@ #include "video_core/renderer_vulkan/vk_shader_util.h" #include "video_core/vulkan_common/vulkan_device.h" +namespace Vulkan { +namespace { // Reimplementations of the constant generating functions in ffx_fsr1.h // GCC generated a lot of warnings when using the official header. -static u32 AU1_AH1_AF1(f32 f) { +u32 AU1_AH1_AF1(f32 f) { static constexpr u32 base[512]{ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, @@ -102,59 +107,56 @@ static u32 AU1_AH1_AF1(f32 f) { 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, }; - auto u = std::bit_cast(f); - u32 i = u >> 23; + const u32 u = Common::BitCast(f); + const u32 i = u >> 23; return base[i] + ((u & 0x7fffff) >> shift[i]); } -static u32 AU1_AH2_AF2(f32 a[2]) { +u32 AU1_AH2_AF2(f32 a[2]) { return AU1_AH1_AF1(a[0]) + (AU1_AH1_AF1(a[1]) << 16); } -static void FsrEasuCon(u32 con0[4], u32 con1[4], u32 con2[4], u32 con3[4], - f32 inputViewportInPixelsX, f32 inputViewportInPixelsY, - f32 inputSizeInPixelsX, f32 inputSizeInPixelsY, f32 outputSizeInPixelsX, - f32 outputSizeInPixelsY) { - con0[0] = std::bit_cast(inputViewportInPixelsX / outputSizeInPixelsX); - con0[1] = std::bit_cast(inputViewportInPixelsY / outputSizeInPixelsY); - con0[2] = std::bit_cast(0.5f * inputViewportInPixelsX / outputSizeInPixelsX - 0.5f); - con0[3] = std::bit_cast(0.5f * inputViewportInPixelsY / outputSizeInPixelsY - 0.5f); - con1[0] = std::bit_cast(1.0f / inputSizeInPixelsX); - con1[1] = std::bit_cast(1.0f / inputSizeInPixelsY); - con1[2] = std::bit_cast(1.0f / inputSizeInPixelsX); - con1[3] = std::bit_cast(-1.0f / inputSizeInPixelsY); - con2[0] = std::bit_cast(-1.0f / inputSizeInPixelsX); - con2[1] = std::bit_cast(2.0f / inputSizeInPixelsY); - con2[2] = std::bit_cast(1.0f / inputSizeInPixelsX); - con2[3] = std::bit_cast(2.0f / inputSizeInPixelsY); - con3[0] = std::bit_cast(0.0f / inputSizeInPixelsX); - con3[1] = std::bit_cast(4.0f / inputSizeInPixelsY); +void FsrEasuCon(u32 con0[4], u32 con1[4], u32 con2[4], u32 con3[4], f32 inputViewportInPixelsX, + f32 inputViewportInPixelsY, f32 inputSizeInPixelsX, f32 inputSizeInPixelsY, + f32 outputSizeInPixelsX, f32 outputSizeInPixelsY) { + con0[0] = Common::BitCast(inputViewportInPixelsX / outputSizeInPixelsX); + con0[1] = Common::BitCast(inputViewportInPixelsY / outputSizeInPixelsY); + con0[2] = Common::BitCast(0.5f * inputViewportInPixelsX / outputSizeInPixelsX - 0.5f); + con0[3] = Common::BitCast(0.5f * inputViewportInPixelsY / outputSizeInPixelsY - 0.5f); + con1[0] = Common::BitCast(1.0f / inputSizeInPixelsX); + con1[1] = Common::BitCast(1.0f / inputSizeInPixelsY); + con1[2] = Common::BitCast(1.0f / inputSizeInPixelsX); + con1[3] = Common::BitCast(-1.0f / inputSizeInPixelsY); + con2[0] = Common::BitCast(-1.0f / inputSizeInPixelsX); + con2[1] = Common::BitCast(2.0f / inputSizeInPixelsY); + con2[2] = Common::BitCast(1.0f / inputSizeInPixelsX); + con2[3] = Common::BitCast(2.0f / inputSizeInPixelsY); + con3[0] = Common::BitCast(0.0f / inputSizeInPixelsX); + con3[1] = Common::BitCast(4.0f / inputSizeInPixelsY); con3[2] = con3[3] = 0; } -static void FsrEasuConOffset(u32 con0[4], u32 con1[4], u32 con2[4], u32 con3[4], - f32 inputViewportInPixelsX, f32 inputViewportInPixelsY, - f32 inputSizeInPixelsX, f32 inputSizeInPixelsY, - f32 outputSizeInPixelsX, f32 outputSizeInPixelsY, - f32 inputOffsetInPixelsX, f32 inputOffsetInPixelsY) { +void FsrEasuConOffset(u32 con0[4], u32 con1[4], u32 con2[4], u32 con3[4], + f32 inputViewportInPixelsX, f32 inputViewportInPixelsY, + f32 inputSizeInPixelsX, f32 inputSizeInPixelsY, f32 outputSizeInPixelsX, + f32 outputSizeInPixelsY, f32 inputOffsetInPixelsX, f32 inputOffsetInPixelsY) { FsrEasuCon(con0, con1, con2, con3, inputViewportInPixelsX, inputViewportInPixelsY, inputSizeInPixelsX, inputSizeInPixelsY, outputSizeInPixelsX, outputSizeInPixelsY); - con0[2] = std::bit_cast(0.5f * inputViewportInPixelsX / outputSizeInPixelsX - 0.5f + - inputOffsetInPixelsX); - con0[3] = std::bit_cast(0.5f * inputViewportInPixelsY / outputSizeInPixelsY - 0.5f + - inputOffsetInPixelsY); + con0[2] = Common::BitCast(0.5f * inputViewportInPixelsX / outputSizeInPixelsX - 0.5f + + inputOffsetInPixelsX); + con0[3] = Common::BitCast(0.5f * inputViewportInPixelsY / outputSizeInPixelsY - 0.5f + + inputOffsetInPixelsY); } -static void FsrRcasCon(u32* con, f32 sharpness) { +void FsrRcasCon(u32* con, f32 sharpness) { sharpness = std::exp2f(-sharpness); f32 hSharp[2]{sharpness, sharpness}; - con[0] = std::bit_cast(sharpness); + con[0] = Common::BitCast(sharpness); con[1] = AU1_AH2_AF2(hSharp); con[2] = 0; con[3] = 0; } - -namespace Vulkan { +} // Anonymous namespace FSR::FSR(const Device& device_, MemoryAllocator& memory_allocator_, size_t image_count_, VkExtent2D output_size_) -- cgit v1.2.3 From 864f2e0b8144a0814f2555d06fffe1e7439854fe Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 23 Oct 2021 01:49:41 -0400 Subject: configure_graphics.ui: Cleanup scaling options and fix duplicate name warning --- src/yuzu/configuration/configure_graphics.ui | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui index 0d2987fcf..660b68c1c 100644 --- a/src/yuzu/configuration/configure_graphics.ui +++ b/src/yuzu/configuration/configure_graphics.ui @@ -335,12 +335,12 @@ - 0.5X (360p/540p)[MAY BREAK] + 0.5X (360p/540p) [EXPERIMENTAL] - 0.75X (540p/810p)[MAY BREAK] + 0.75X (540p/810p) [EXPERIMENTAL] @@ -350,12 +350,12 @@ - 2X (1440p/2160[4K]p) + 2X (1440p/2160p) - 3X (2160p[4K]/3240p) + 3X (2160p/3240p) @@ -380,7 +380,7 @@ - + 0 -- cgit v1.2.3 From 87abab71fff2189c549ae247eb6c281c8f618acd Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 23 Oct 2021 02:40:02 -0400 Subject: host_shaders: Misc copyright/style changes --- src/video_core/host_shaders/fxaa.frag | 6 +++++- src/video_core/host_shaders/fxaa.vert | 10 ++++------ src/video_core/host_shaders/present_bicubic.frag | 2 +- src/video_core/host_shaders/present_gaussian.frag | 4 ++-- 4 files changed, 12 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/video_core/host_shaders/fxaa.frag b/src/video_core/host_shaders/fxaa.frag index 23f910d4c..02f4068d1 100644 --- a/src/video_core/host_shaders/fxaa.frag +++ b/src/video_core/host_shaders/fxaa.frag @@ -1,4 +1,8 @@ -// Adapted from +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +// Source code is adapted from // https://www.geeks3d.com/20110405/fxaa-fast-approximate-anti-aliasing-demo-glsl-opengl-test-radeon-geforce/3/ #version 460 diff --git a/src/video_core/host_shaders/fxaa.vert b/src/video_core/host_shaders/fxaa.vert index 01d5ff4df..ac20c04e9 100644 --- a/src/video_core/host_shaders/fxaa.vert +++ b/src/video_core/host_shaders/fxaa.vert @@ -1,4 +1,4 @@ -// Copyright 2019 yuzu Emulator Project +// Copyright 2021 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. @@ -16,10 +16,12 @@ layout (location = 0) out vec4 posPos; #ifdef VULKAN #define BINDING_COLOR_TEXTURE 0 +#define VERTEX_ID gl_VertexIndex #else // ^^^ Vulkan ^^^ // vvv OpenGL vvv #define BINDING_COLOR_TEXTURE 0 +#define VERTEX_ID gl_VertexID #endif @@ -28,11 +30,7 @@ layout (binding = BINDING_COLOR_TEXTURE) uniform sampler2D input_texture; const float FXAA_SUBPIX_SHIFT = 0; void main() { -#ifdef VULKAN - vec2 vertex = vertices[gl_VertexIndex]; -#else - vec2 vertex = vertices[gl_VertexID]; -#endif + vec2 vertex = vertices[VERTEX_ID]; gl_Position = vec4(vertex, 0.0, 1.0); vec2 vert_tex_coord = (vertex + 1.0) / 2.0; posPos.xy = vert_tex_coord; diff --git a/src/video_core/host_shaders/present_bicubic.frag b/src/video_core/host_shaders/present_bicubic.frag index f3e5410e7..902b70c2b 100644 --- a/src/video_core/host_shaders/present_bicubic.frag +++ b/src/video_core/host_shaders/present_bicubic.frag @@ -1,4 +1,4 @@ -// Copyright 2019 yuzu Emulator Project +// Copyright 2021 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. diff --git a/src/video_core/host_shaders/present_gaussian.frag b/src/video_core/host_shaders/present_gaussian.frag index a9558548f..72a300dac 100644 --- a/src/video_core/host_shaders/present_gaussian.frag +++ b/src/video_core/host_shaders/present_gaussian.frag @@ -1,8 +1,8 @@ -// Copyright 2019 yuzu Emulator Project +// Copyright 2021 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -// Code obtained from this 2 sources: +// Code adapted from the following sources: // - https://learnopengl.com/Advanced-Lighting/Bloom // - https://www.rastergrid.com/blog/2010/09/efficient-gaussian-blur-with-linear-sampling/ -- cgit v1.2.3 From c97c46747d07756075ce6a498a51126189d2be0d Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sat, 23 Oct 2021 15:56:44 +0200 Subject: Vulkan: fix regression. --- .../renderer_vulkan/vk_texture_cache.cpp | 31 ++++++++++++---------- 1 file changed, 17 insertions(+), 14 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 85a1d520b..1c0741250 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -1221,9 +1221,12 @@ bool Image::BlitScaleHelper(bool scale_up) { const auto& resolution = runtime->resolution; const u32 scaled_width = resolution.ScaleUp(info.size.width); const u32 scaled_height = is_2d ? resolution.ScaleUp(info.size.height) : info.size.height; - if (!scale_view) { + std::unique_ptr& blit_view = scale_up ? scale_view : normal_view; + std::unique_ptr& blit_framebuffer = + scale_up ? scale_framebuffer : normal_framebuffer; + if (!blit_view) { const auto view_info = ImageViewInfo(ImageViewType::e2D, info.format); - scale_view = std::make_unique(*runtime, view_info, NULL_IMAGE_ID, *this); + blit_view = std::make_unique(*runtime, view_info, NULL_IMAGE_ID, *this); } const u32 src_width = scale_up ? info.size.width : scaled_width; @@ -1239,27 +1242,27 @@ bool Image::BlitScaleHelper(bool scale_up) { .end = {static_cast(dst_width), static_cast(dst_height)}, }; const VkExtent2D extent{ - .width = scaled_width, - .height = scaled_height, + .width = std::max(scaled_width, info.size.width), + .height = std::max(scaled_height, info.size.width), }; - auto* view_ptr = scale_view.get(); + auto* view_ptr = blit_view.get(); if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT) { - if (!scale_framebuffer) { - scale_framebuffer = std::make_unique(*runtime, view_ptr, nullptr, extent); + if (!blit_framebuffer) { + blit_framebuffer = std::make_unique(*runtime, view_ptr, nullptr, extent); } - const auto color_view = scale_view->Handle(Shader::TextureType::Color2D); + const auto color_view = blit_view->Handle(Shader::TextureType::Color2D); - runtime->blit_image_helper.BlitColor(scale_framebuffer.get(), color_view, dst_region, + runtime->blit_image_helper.BlitColor(blit_framebuffer.get(), color_view, dst_region, src_region, operation, BLIT_OPERATION); } else if (!runtime->device.IsBlitDepthStencilSupported() && aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { - if (!scale_framebuffer) { - scale_framebuffer = std::make_unique(*runtime, nullptr, view_ptr, extent); + if (!blit_framebuffer) { + blit_framebuffer = std::make_unique(*runtime, nullptr, view_ptr, extent); } - runtime->blit_image_helper.BlitDepthStencil( - scale_framebuffer.get(), scale_view->DepthView(), scale_view->StencilView(), dst_region, - src_region, operation, BLIT_OPERATION); + runtime->blit_image_helper.BlitDepthStencil(blit_framebuffer.get(), blit_view->DepthView(), + blit_view->StencilView(), dst_region, + src_region, operation, BLIT_OPERATION); } else { // TODO: Use helper blits where applicable flags &= ~ImageFlagBits::Rescaled; -- cgit v1.2.3 From 9189aacfe28d0114fbfff0e3f89e8912f5377454 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sat, 23 Oct 2021 15:57:05 +0200 Subject: OpenGL: Fix viewport/Scissor scaling on downscaling. --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 34 +++++++++++++++++++----- 1 file changed, 28 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 4df8a684a..d8ac46d2a 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -187,6 +187,7 @@ void RasterizerOpenGL::Clear() { std::scoped_lock lock{texture_cache.mutex}; texture_cache.UpdateRenderTargets(true); state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); + SyncViewport(); if (regs.clear_flags.scissor) { SyncScissorTest(); } else { @@ -571,6 +572,15 @@ void RasterizerOpenGL::SyncViewport() { } const bool is_rescaling{texture_cache.IsRescaling()}; const float scale = is_rescaling ? Settings::values.resolution_info.up_factor : 1.0f; + const auto conv = [scale](float value) -> GLfloat { + float new_value = value * scale; + if (scale < 1.0f) { + const bool sign = std::signbit(value); + new_value = std::round(std::abs(new_value)); + new_value = sign ? -new_value : new_value; + } + return static_cast(new_value); + }; if (dirty_viewport) { flags[Dirty::Viewports] = false; @@ -586,10 +596,11 @@ void RasterizerOpenGL::SyncViewport() { flags[Dirty::Viewport0 + index] = false; const auto& src = regs.viewport_transform[index]; - GLfloat x = (src.translate_x - src.scale_x) * scale; - GLfloat y = (src.translate_y - src.scale_y) * scale; - GLfloat width = src.scale_x * 2.0f * scale; - GLfloat height = src.scale_y * 2.0f * scale; + GLfloat x = conv(src.translate_x - src.scale_x); + GLfloat y = conv(src.translate_y - src.scale_y); + GLfloat width = conv(src.scale_x * 2.0f); + GLfloat height = conv(src.scale_y * 2.0f); + if (height < 0) { y += height; height = -height; @@ -925,8 +936,19 @@ void RasterizerOpenGL::SyncScissorTest() { const auto& resolution = Settings::values.resolution_info; const bool is_rescaling{texture_cache.IsRescaling()}; - const auto scale_up = [resolution, is_rescaling](u32 value) { - return is_rescaling ? resolution.ScaleUp(value) : value; + const u32 up_scale = is_rescaling ? resolution.up_scale : 1U; + const u32 down_shift = is_rescaling ? resolution.down_shift : 0U; + const auto scale_up = [up_scale, down_shift](u32 value) -> u32 { + if (value == 0) { + return 0U; + } + const u32 upset = value * up_scale; + u32 acumm{}; + if ((up_scale >> down_shift) == 0) { + acumm = upset % 2; + } + const u32 converted_value = upset >> down_shift; + return std::max(converted_value + acumm, 1U); }; for (std::size_t index = 0; index < Maxwell::NumViewports; ++index) { if (!force && !flags[Dirty::Scissor0 + index]) { -- cgit v1.2.3 From 099b0b3167d6dc47b764331f40aa935e8a9ef86a Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sat, 23 Oct 2021 17:17:02 +0200 Subject: Texture Cache: Fix memory usage on ScaleDown. --- src/video_core/texture_cache/texture_cache.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'src') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index dd9553806..26ab857c9 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -878,10 +878,6 @@ bool TextureCache

::ScaleDown(Image& image) { if (!rescaled) { return false; } - const bool has_copy = image.HasScaled(); - if (!has_copy) { - total_used_memory -= GetScaledImageSizeBytes(image); - } InvalidateScale(image); return true; } -- cgit v1.2.3 From 9fc1fa1b0dd015db15c6eaafe68206943bf4cbc1 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 24 Oct 2021 22:52:43 -0400 Subject: gl_resource_manager: Ensure non EXT_framebuffer objects are created --- src/video_core/renderer_opengl/gl_resource_manager.cpp | 7 ++++++- src/video_core/renderer_opengl/gl_texture_cache.cpp | 14 ++------------ 2 files changed, 8 insertions(+), 13 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp index 70947838c..5e7101d28 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.cpp +++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp @@ -166,7 +166,12 @@ void OGLFramebuffer::Create() { return; MICROPROFILE_SCOPE(OpenGL_ResourceCreation); - glCreateFramebuffers(1, &handle); + // Bind to READ_FRAMEBUFFER to stop Nvidia's driver from creating an EXT_framebuffer instead of + // a core framebuffer. EXT framebuffer attachments have to match in size and can be shared + // across contexts. yuzu doesn't share framebuffers across contexts and we need attachments with + // mismatching size, this is why core framebuffers are preferred. + glGenFramebuffers(1, &handle); + glBindFramebuffer(GL_READ_FRAMEBUFFER, handle); } void OGLFramebuffer::Release() { diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 6841b5450..00610ea2c 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -478,10 +478,6 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& for (size_t i = 0; i < rescale_draw_fbos.size(); ++i) { rescale_draw_fbos[i].Create(); rescale_read_fbos[i].Create(); - - // Make sure the framebuffer is created without DSA - glBindFramebuffer(GL_READ_FRAMEBUFFER, rescale_draw_fbos[i].handle); - glBindFramebuffer(GL_READ_FRAMEBUFFER, rescale_read_fbos[i].handle); } } } @@ -1224,13 +1220,8 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const TSCEntry& config) { Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span color_buffers, ImageView* depth_buffer, const VideoCommon::RenderTargets& key) { - // Bind to READ_FRAMEBUFFER to stop Nvidia's driver from creating an EXT_framebuffer instead of - // a core framebuffer. EXT framebuffer attachments have to match in size and can be shared - // across contexts. yuzu doesn't share framebuffers across contexts and we need attachments with - // mismatching size, this is why core framebuffers are preferred. - GLuint handle; - glGenFramebuffers(1, &handle); - glBindFramebuffer(GL_READ_FRAMEBUFFER, handle); + framebuffer.Create(); + GLuint handle = framebuffer.handle; GLsizei num_buffers = 0; std::array gl_draw_buffers; @@ -1278,7 +1269,6 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span(name.size()), name.data()); } - framebuffer.handle = handle; } void BGRCopyPass::CopyBGR(Image& dst_image, Image& src_image, -- cgit v1.2.3 From 917b2466ad996cae75d9a0ca31226597b256acf9 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 24 Oct 2021 23:07:15 -0400 Subject: texture_cache: Refactor Render Target scaling function --- src/video_core/texture_cache/texture_cache.h | 33 +++++++++++++---------- src/video_core/texture_cache/texture_cache_base.h | 5 ++++ 2 files changed, 24 insertions(+), 14 deletions(-) (limited to 'src') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 26ab857c9..c8031b695 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -192,19 +192,8 @@ void TextureCache

::SynchronizeComputeDescriptors() { } template -void TextureCache

::UpdateRenderTargets(bool is_clear) { - using namespace VideoCommon::Dirty; +bool TextureCache

::RescaleRenderTargets(bool is_clear) { auto& flags = maxwell3d.dirty.flags; - if (!flags[Dirty::RenderTargets]) { - for (size_t index = 0; index < NUM_RT; ++index) { - ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; - PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id)); - } - const ImageViewId depth_buffer_id = render_targets.depth_buffer_id; - PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id)); - return; - } - u32 scale_rating = 0; bool rescaled = false; std::array tmp_color_images{}; @@ -281,8 +270,6 @@ void TextureCache

::UpdateRenderTargets(bool is_clear) { scale_rating = 1; } } while (has_deleted_images); - // Rescale End - const auto set_rating = [this, scale_rating](ImageId image_id) { if (image_id != CORRUPT_ID) { Image& image = slot_images[image_id]; @@ -297,6 +284,24 @@ void TextureCache

::UpdateRenderTargets(bool is_clear) { } set_rating(tmp_depth_image); + return rescaled; +} + +template +void TextureCache

::UpdateRenderTargets(bool is_clear) { + using namespace VideoCommon::Dirty; + auto& flags = maxwell3d.dirty.flags; + if (!flags[Dirty::RenderTargets]) { + for (size_t index = 0; index < NUM_RT; ++index) { + ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; + PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id)); + } + const ImageViewId depth_buffer_id = render_targets.depth_buffer_id; + PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id)); + return; + } + + const bool rescaled = RescaleRenderTargets(is_clear); if (is_rescaling != rescaled) { flags[Dirty::RescaleViewports] = true; flags[Dirty::RescaleScissors] = true; diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index eea589269..643ad811c 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -119,6 +119,11 @@ public: /// Refresh the state for compute image view and sampler descriptors void SynchronizeComputeDescriptors(); + /// Updates the Render Targets if they can be rescaled + /// @param is_clear True when the render targets are being used for clears + /// @retval True if the Render Targets have been rescaled. + bool RescaleRenderTargets(bool is_clear); + /// Update bound render targets and upload memory if necessary /// @param is_clear True when the render targets are being used for clears void UpdateRenderTargets(bool is_clear); -- cgit v1.2.3 From de1c8c5c2c3131bb122351e676014cdc7c442e78 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Fri, 29 Oct 2021 17:02:57 +0200 Subject: Texture Cahe/Shader decompiler: Resize PointSize on rescaling, refactor and make reaper more agressive on 4Gb GPUs. --- src/shader_recompiler/ir_opt/rescaling_pass.cpp | 21 +++++++++++++++++++++ src/video_core/renderer_opengl/gl_rasterizer.cpp | 5 +++-- src/video_core/texture_cache/image_base.h | 1 - src/video_core/texture_cache/image_info.cpp | 6 +++--- src/video_core/texture_cache/texture_cache.h | 18 ++---------------- 5 files changed, 29 insertions(+), 22 deletions(-) (limited to 'src') diff --git a/src/shader_recompiler/ir_opt/rescaling_pass.cpp b/src/shader_recompiler/ir_opt/rescaling_pass.cpp index a5fa4ee83..81098c038 100644 --- a/src/shader_recompiler/ir_opt/rescaling_pass.cpp +++ b/src/shader_recompiler/ir_opt/rescaling_pass.cpp @@ -75,6 +75,14 @@ void PatchFragCoord(IR::Block& block, IR::Inst& inst) { inst.ReplaceUsesWith(downscaled_frag_coord); } +void PatchPointSize(IR::Block& block, IR::Inst& inst) { + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + const IR::F32 point_value{inst.Arg(1)}; + const IR::F32 up_factor{ir.FPRecip(ir.ResolutionDownFactor())}; + const IR::F32 upscaled_point_value{ir.FPMul(point_value, up_factor)}; + inst.SetArg(1, upscaled_point_value); +} + [[nodiscard]] IR::U32 Scale(IR::IREmitter& ir, const IR::U1& is_scaled, const IR::U32& value) { IR::U32 scaled_value{value}; if (const u32 up_scale = Settings::values.resolution_info.up_scale; up_scale != 1) { @@ -253,6 +261,19 @@ void Visit(const IR::Program& program, IR::Block& block, IR::Inst& inst) { } break; } + case IR::Opcode::SetAttribute: { + const IR::Attribute attr{inst.Arg(0).Attribute()}; + switch (attr) { + case IR::Attribute::PointSize: + if (inst.Flags() != 0xDEADBEEF) { + PatchPointSize(block, inst); + } + break; + default: + break; + } + break; + } case IR::Opcode::ImageQueryDimensions: PatchImageQueryDimensions(block, inst); break; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index d8ac46d2a..9b516c64f 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -976,8 +976,9 @@ void RasterizerOpenGL::SyncPointState() { oglEnable(GL_POINT_SPRITE, maxwell3d.regs.point_sprite_enable); oglEnable(GL_PROGRAM_POINT_SIZE, maxwell3d.regs.vp_point_size.enable); - - glPointSize(std::max(1.0f, maxwell3d.regs.point_size)); + const bool is_rescaling{texture_cache.IsRescaling()}; + const float scale = is_rescaling ? Settings::values.resolution_info.up_factor : 1.0f; + glPointSize(std::max(1.0f, maxwell3d.regs.point_size * scale)); } void RasterizerOpenGL::SyncLineState() { diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index 02c669766..89c111c00 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h @@ -38,7 +38,6 @@ enum class ImageFlagBits : u32 { Rescaled = 1 << 12, CheckingRescalable = 1 << 13, IsRescalable = 1 << 14, - Blacklisted = 1 << 15, }; DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp index d8e414247..015a2d33d 100644 --- a/src/video_core/texture_cache/image_info.cpp +++ b/src/video_core/texture_cache/image_info.cpp @@ -135,7 +135,7 @@ ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index) type = ImageType::e3D; size.depth = rt.depth; } else { - rescaleable = block.depth == 0 && size.height > 256; + rescaleable = block.depth == 0; downscaleable = size.height > 512; type = ImageType::e2D; resources.layers = rt.depth; @@ -165,7 +165,7 @@ ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept { type = ImageType::e3D; size.depth = regs.zeta_depth; } else { - rescaleable = block.depth == 0 && size.height > 256; + rescaleable = block.depth == 0; downscaleable = size.height > 512; type = ImageType::e2D; resources.layers = regs.zeta_depth; @@ -199,7 +199,7 @@ ImageInfo::ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept { .height = config.height, .depth = 1, }; - rescaleable = block.depth == 0 && size.height > 256; + rescaleable = block.depth == 0; downscaleable = size.height > 512; } } diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index c8031b695..aec130a32 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -53,8 +53,8 @@ TextureCache

::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& const auto device_memory = runtime.GetDeviceLocalMemory(); const u64 possible_expected_memory = (device_memory * 4) / 10; const u64 possible_critical_memory = (device_memory * 7) / 10; - expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY); - critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY); + expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY - 256_MiB); + critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY - 512_MiB); minimum_memory = 0; } else { // On OpenGL we can be more conservatives as the driver takes care. @@ -355,7 +355,6 @@ void TextureCache

::FillImageViews(DescriptorTable& table, if (view.blacklist && view.id != NULL_IMAGE_VIEW_ID) { const ImageViewBase& image_view{slot_image_views[view.id]}; auto& image = slot_images[image_view.image_id]; - image.flags |= ImageFlagBits::Blacklisted; has_blacklisted |= ScaleDown(image); image.scale_rating = 0; } @@ -985,7 +984,6 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA bool can_rescale = info.rescaleable; bool any_rescaled = false; - bool any_blacklisted = false; for (const ImageId sibling_id : all_siblings) { if (!can_rescale) { break; @@ -993,7 +991,6 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA Image& sibling = slot_images[sibling_id]; can_rescale &= ImageCanRescale(sibling); any_rescaled |= True(sibling.flags & ImageFlagBits::Rescaled); - any_blacklisted |= True(sibling.flags & ImageFlagBits::Blacklisted); } can_rescale &= any_rescaled; @@ -1007,9 +1004,6 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA for (const ImageId sibling_id : all_siblings) { Image& sibling = slot_images[sibling_id]; ScaleDown(sibling); - if (any_blacklisted) { - sibling.flags |= ImageFlagBits::Blacklisted; - } } } @@ -1644,7 +1638,6 @@ void TextureCache

::SynchronizeAliases(ImageId image_id) { boost::container::small_vector aliased_images; Image& image = slot_images[image_id]; bool any_rescaled = True(image.flags & ImageFlagBits::Rescaled); - bool any_blacklisted = True(image.flags & ImageFlagBits::Blacklisted); u64 most_recent_tick = image.modification_tick; for (const AliasedImage& aliased : image.aliased_images) { ImageBase& aliased_image = slot_images[aliased.id]; @@ -1652,7 +1645,6 @@ void TextureCache

::SynchronizeAliases(ImageId image_id) { most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick); aliased_images.push_back(&aliased); any_rescaled |= True(aliased_image.flags & ImageFlagBits::Rescaled); - any_blacklisted |= True(aliased_image.flags & ImageFlagBits::Blacklisted); } } if (aliased_images.empty()) { @@ -1664,9 +1656,6 @@ void TextureCache

::SynchronizeAliases(ImageId image_id) { ScaleUp(image); } else { ScaleDown(image); - if (any_blacklisted) { - image.flags |= ImageFlagBits::Blacklisted; - } } } image.modification_tick = most_recent_tick; @@ -1684,9 +1673,6 @@ void TextureCache

::SynchronizeAliases(ImageId image_id) { Image& aliased_image = slot_images[aliased->id]; if (!can_rescale) { ScaleDown(aliased_image); - if (any_blacklisted) { - aliased_image.flags |= ImageFlagBits::Blacklisted; - } CopyImage(image_id, aliased->id, aliased->copies); continue; } -- cgit v1.2.3 From d46a71e786b42ecfe702ae00e01e6dcdf9121875 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Fri, 29 Oct 2021 17:45:02 +0200 Subject: HostShader: fix Gaussian filter. --- src/video_core/host_shaders/present_gaussian.frag | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/video_core/host_shaders/present_gaussian.frag b/src/video_core/host_shaders/present_gaussian.frag index 72a300dac..66fed3238 100644 --- a/src/video_core/host_shaders/present_gaussian.frag +++ b/src/video_core/host_shaders/present_gaussian.frag @@ -63,8 +63,8 @@ void main() { // TODO(Blinkhawk): This code can be optimized through shader group instructions. vec3 horizontal = blurHorizontal(color_texture, frag_tex_coord, tex_offset).rgb; vec3 vertical = blurVertical(color_texture, frag_tex_coord, tex_offset).rgb; - vec3 diagonalA = blurVertical(color_texture, frag_tex_coord, tex_offset).rgb; - vec3 diagonalB = blurVertical(color_texture, frag_tex_coord, tex_offset * vec2(1.0, -1.0)).rgb; + vec3 diagonalA = blurDiagonal(color_texture, frag_tex_coord, tex_offset).rgb; + vec3 diagonalB = blurDiagonal(color_texture, frag_tex_coord, tex_offset * vec2(1.0, -1.0)).rgb; vec3 combination = mix(mix(horizontal, vertical, 0.5f), mix(diagonalA, diagonalB, 0.5f), 0.5f); color = vec4(combination + base, 1.0f); } -- cgit v1.2.3 From 6f9869096378a5100620b3f2b685d61bf8ccae16 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 30 Oct 2021 01:47:18 +0200 Subject: ShaderCache: Better fix for Shuffling gl_FragCoord --- src/shader_recompiler/ir_opt/rescaling_pass.cpp | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/shader_recompiler/ir_opt/rescaling_pass.cpp b/src/shader_recompiler/ir_opt/rescaling_pass.cpp index 81098c038..c28500dd1 100644 --- a/src/shader_recompiler/ir_opt/rescaling_pass.cpp +++ b/src/shader_recompiler/ir_opt/rescaling_pass.cpp @@ -30,7 +30,7 @@ namespace { return false; } -void VisitMark(const IR::Inst& inst) { +void VisitMark(IR::Block& block, IR::Inst& inst) { switch (inst.GetOpcode()) { case IR::Opcode::ShuffleIndex: case IR::Opcode::ShuffleUp: @@ -49,19 +49,30 @@ void VisitMark(const IR::Inst& inst) { break; } IR::Inst* const bitcast_inst{bitcast_arg.InstRecursive()}; + bool must_patch_outside = false; if (bitcast_inst->GetOpcode() == IR::Opcode::GetAttribute) { const IR::Attribute attr{bitcast_inst->Arg(0).Attribute()}; switch (attr) { case IR::Attribute::PositionX: case IR::Attribute::PositionY: bitcast_inst->SetFlags(0xDEADBEEF); + must_patch_outside = true; break; default: break; } } + if (must_patch_outside) { + const auto it{IR::Block::InstructionList::s_iterator_to(inst)}; + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + const IR::F32 new_inst{&*block.PrependNewInst(it, inst)}; + const IR::F32 up_factor{ir.FPRecip(ir.ResolutionDownFactor())}; + const IR::Value converted{ir.FPMul(new_inst, up_factor)}; + inst.ReplaceUsesWith(converted); + } break; } + default: break; } @@ -302,7 +313,7 @@ void RescalingPass(IR::Program& program) { if (is_fragment_shader) { for (IR::Block* const block : program.post_order_blocks) { for (IR::Inst& inst : block->Instructions()) { - VisitMark(inst); + VisitMark(*block, inst); } } } -- cgit v1.2.3 From 6c97ab571a3d169d1d2a5472040d4373ea61184d Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 30 Oct 2021 01:50:32 +0200 Subject: Texture Cache: revert Image changes. --- src/video_core/texture_cache/image_info.cpp | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src') diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp index 015a2d33d..afb94082b 100644 --- a/src/video_core/texture_cache/image_info.cpp +++ b/src/video_core/texture_cache/image_info.cpp @@ -16,6 +16,7 @@ namespace VideoCommon { using Tegra::Texture::TextureType; using Tegra::Texture::TICEntry; using VideoCore::Surface::PixelFormat; +using VideoCore::Surface::SurfaceType; ImageInfo::ImageInfo(const TICEntry& config) noexcept { format = PixelFormatFromTextureInfo(config.format, config.r_type, config.g_type, config.b_type, @@ -102,6 +103,7 @@ ImageInfo::ImageInfo(const TICEntry& config) noexcept { layer_stride = CalculateLayerStride(*this); maybe_unaligned_layer_stride = CalculateLayerSize(*this); rescaleable &= (block.depth == 0) && resources.levels == 1; + rescaleable &= size.height > 256 || GetFormatType(format) != SurfaceType::ColorTexture; downscaleable = size.height > 512; } } @@ -136,6 +138,7 @@ ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index) size.depth = rt.depth; } else { rescaleable = block.depth == 0; + rescaleable &= size.height > 256; downscaleable = size.height > 512; type = ImageType::e2D; resources.layers = rt.depth; @@ -200,6 +203,7 @@ ImageInfo::ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept { .depth = 1, }; rescaleable = block.depth == 0; + rescaleable &= size.height > 256; downscaleable = size.height > 512; } } -- cgit v1.2.3 From 5230378709470da56927e85c50d0524f9ce3f81b Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 30 Oct 2021 01:52:11 +0200 Subject: TextureCache: Make a better Anisotropic setter. --- src/video_core/renderer_opengl/gl_texture_cache.cpp | 9 ++++++++- src/video_core/renderer_vulkan/vk_texture_cache.cpp | 9 ++++++++- src/video_core/textures/texture.cpp | 19 +------------------ src/yuzu/configuration/configure_graphics_advanced.ui | 8 ++++---- 4 files changed, 21 insertions(+), 24 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 00610ea2c..c2668fee6 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -1201,7 +1201,14 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const TSCEntry& config) { glSamplerParameterfv(handle, GL_TEXTURE_BORDER_COLOR, config.BorderColor().data()); if (GLAD_GL_ARB_texture_filter_anisotropic || GLAD_GL_EXT_texture_filter_anisotropic) { - glSamplerParameterf(handle, GL_TEXTURE_MAX_ANISOTROPY, config.MaxAnisotropy()); + const f32 setting_anisotropic = + static_cast(1U << Settings::values.max_anisotropy.GetValue()); + const f32 game_anisotropic = std::clamp(config.MaxAnisotropy(), 1.0f, 16.0f); + const bool aument_anisotropic = + game_anisotropic > 1.0f || config.mipmap_filter == TextureMipmapFilter::Linear; + const f32 max_anisotropy = + aument_anisotropic ? std::max(game_anisotropic, setting_anisotropic) : game_anisotropic; + glSamplerParameterf(handle, GL_TEXTURE_MAX_ANISOTROPY, max_anisotropy); } else { LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_anisotropic is required"); } diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 1c0741250..7db561ca0 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -1448,7 +1448,14 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& t LOG_WARNING(Render_Vulkan, "VK_EXT_sampler_filter_minmax is required"); } // Some games have samplers with garbage. Sanitize them here. - const float max_anisotropy = std::clamp(tsc.MaxAnisotropy(), 1.0f, 16.0f); + const f32 setting_anisotropic = + static_cast(1U << Settings::values.max_anisotropy.GetValue()); + const f32 game_anisotropic = std::clamp(tsc.MaxAnisotropy(), 1.0f, 16.0f); + const bool aument_anisotropic = + game_anisotropic > 1.0f || tsc.mipmap_filter == TextureMipmapFilter::Linear; + const f32 max_anisotropy = + aument_anisotropic ? std::max(game_anisotropic, setting_anisotropic) : game_anisotropic; + sampler = device.GetLogical().CreateSampler(VkSamplerCreateInfo{ .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, .pNext = pnext, diff --git a/src/video_core/textures/texture.cpp b/src/video_core/textures/texture.cpp index a552543ed..b2d5bb03e 100644 --- a/src/video_core/textures/texture.cpp +++ b/src/video_core/textures/texture.cpp @@ -6,7 +6,6 @@ #include #include "common/cityhash.h" -#include "common/settings.h" #include "video_core/textures/texture.h" using Tegra::Texture::TICEntry; @@ -51,22 +50,6 @@ constexpr std::array SRGB_CONVERSION_LUT = { 0.917104f, 0.929242f, 0.941493f, 0.953859f, 0.966338f, 1.000000f, 1.000000f, 1.000000f, }; -unsigned SettingsMinimumAnisotropy() noexcept { - switch (static_cast(Settings::values.max_anisotropy.GetValue())) { - default: - case Anisotropy::Default: - return 1U; - case Anisotropy::Filter2x: - return 2U; - case Anisotropy::Filter4x: - return 4U; - case Anisotropy::Filter8x: - return 8U; - case Anisotropy::Filter16x: - return 16U; - } -} - } // Anonymous namespace std::array TSCEntry::BorderColor() const noexcept { @@ -78,7 +61,7 @@ std::array TSCEntry::BorderColor() const noexcept { } float TSCEntry::MaxAnisotropy() const noexcept { - return static_cast(std::max(1U << max_anisotropy, SettingsMinimumAnisotropy())); + return static_cast(1U << max_anisotropy); } } // namespace Tegra::Texture diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui index d06b45f17..cbbcd45a0 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.ui +++ b/src/yuzu/configuration/configure_graphics_advanced.ui @@ -130,22 +130,22 @@ - 2x (WILL BREAK THINGS) + 2x - 4x (WILL BREAK THINGS) + 4x - 8x (WILL BREAK THINGS) + 8x - 16x (WILL BREAK THINGS) + 16x -- cgit v1.2.3 From 282e04bffb4962dcc1d8aee2cb0fd2a1a45c86e6 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 16 Nov 2021 23:07:17 +0100 Subject: TextureCache: Add automatic anisotropic filtering and refactor code. --- src/common/settings.h | 2 +- src/video_core/renderer_opengl/gl_texture_cache.cpp | 8 +------- src/video_core/renderer_vulkan/vk_texture_cache.cpp | 8 +------- src/video_core/textures/texture.cpp | 15 ++++++++++++++- src/yuzu/configuration/configure_graphics_advanced.ui | 5 +++++ 5 files changed, 22 insertions(+), 16 deletions(-) (limited to 'src') diff --git a/src/common/settings.h b/src/common/settings.h index c7610ef1c..42f8b4a7d 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -515,7 +515,7 @@ struct Values { #endif FullscreenMode::Borderless, FullscreenMode::Exclusive, "fullscreen_mode"}; RangedSetting aspect_ratio{0, 0, 3, "aspect_ratio"}; - RangedSetting max_anisotropy{0, 0, 4, "max_anisotropy"}; + RangedSetting max_anisotropy{0, 0, 5, "max_anisotropy"}; Setting use_speed_limit{true, "use_speed_limit"}; RangedSetting speed_limit{100, 0, 9999, "speed_limit"}; Setting use_disk_shader_cache{true, "use_disk_shader_cache"}; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index c2668fee6..1d3f193af 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -1201,13 +1201,7 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const TSCEntry& config) { glSamplerParameterfv(handle, GL_TEXTURE_BORDER_COLOR, config.BorderColor().data()); if (GLAD_GL_ARB_texture_filter_anisotropic || GLAD_GL_EXT_texture_filter_anisotropic) { - const f32 setting_anisotropic = - static_cast(1U << Settings::values.max_anisotropy.GetValue()); - const f32 game_anisotropic = std::clamp(config.MaxAnisotropy(), 1.0f, 16.0f); - const bool aument_anisotropic = - game_anisotropic > 1.0f || config.mipmap_filter == TextureMipmapFilter::Linear; - const f32 max_anisotropy = - aument_anisotropic ? std::max(game_anisotropic, setting_anisotropic) : game_anisotropic; + const f32 max_anisotropy = std::clamp(config.MaxAnisotropy(), 1.0f, 16.0f); glSamplerParameterf(handle, GL_TEXTURE_MAX_ANISOTROPY, max_anisotropy); } else { LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_anisotropic is required"); diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 7db561ca0..daf26f380 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -1448,13 +1448,7 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& t LOG_WARNING(Render_Vulkan, "VK_EXT_sampler_filter_minmax is required"); } // Some games have samplers with garbage. Sanitize them here. - const f32 setting_anisotropic = - static_cast(1U << Settings::values.max_anisotropy.GetValue()); - const f32 game_anisotropic = std::clamp(tsc.MaxAnisotropy(), 1.0f, 16.0f); - const bool aument_anisotropic = - game_anisotropic > 1.0f || tsc.mipmap_filter == TextureMipmapFilter::Linear; - const f32 max_anisotropy = - aument_anisotropic ? std::max(game_anisotropic, setting_anisotropic) : game_anisotropic; + const f32 max_anisotropy = std::clamp(tsc.MaxAnisotropy(), 1.0f, 16.0f); sampler = device.GetLogical().CreateSampler(VkSamplerCreateInfo{ .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, diff --git a/src/video_core/textures/texture.cpp b/src/video_core/textures/texture.cpp index b2d5bb03e..ba066f98f 100644 --- a/src/video_core/textures/texture.cpp +++ b/src/video_core/textures/texture.cpp @@ -6,6 +6,7 @@ #include #include "common/cityhash.h" +#include "common/settings.h" #include "video_core/textures/texture.h" using Tegra::Texture::TICEntry; @@ -61,7 +62,19 @@ std::array TSCEntry::BorderColor() const noexcept { } float TSCEntry::MaxAnisotropy() const noexcept { - return static_cast(1U << max_anisotropy); + if (max_anisotropy == 0 && mipmap_filter != TextureMipmapFilter::Linear) { + return 1.0f; + } + const auto anisotropic_settings = Settings::values.max_anisotropy.GetValue(); + u32 new_max_anisotropic{}; + if (anisotropic_settings == 0) { + const auto anisotropic_based_onscale = Settings::values.resolution_info.up_scale >> + Settings::values.resolution_info.down_shift; + new_max_anisotropic = std::max(anisotropic_based_onscale + 1U, 1U); + } else { + new_max_anisotropic = Settings::values.max_anisotropy.GetValue(); + } + return static_cast(1U << std::min(max_anisotropy + anisotropic_settings - 1, 31U)); } } // namespace Tegra::Texture diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui index cbbcd45a0..96de0b3d1 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.ui +++ b/src/yuzu/configuration/configure_graphics_advanced.ui @@ -123,6 +123,11 @@ + + + Automatic + + Default -- cgit v1.2.3 From 978f598ff64d3bd0299d06c47e6cbd63a496122c Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 17 Nov 2021 00:59:46 +0100 Subject: TextureCache: Fix OGL cleaning --- src/video_core/renderer_opengl/gl_texture_cache.cpp | 4 ++++ src/video_core/renderer_opengl/gl_texture_cache.h | 16 ++++++++++++++++ src/video_core/renderer_vulkan/vk_texture_cache.cpp | 4 ++++ src/video_core/renderer_vulkan/vk_texture_cache.h | 16 ++++++++++++++++ src/video_core/texture_cache/texture_cache.h | 3 +++ 5 files changed, 43 insertions(+) (limited to 'src') diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 1d3f193af..30dfcfa6a 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -1117,6 +1117,8 @@ ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageViewParams& params) : VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {} +ImageView::~ImageView() = default; + GLuint ImageView::StorageView(Shader::TextureType texture_type, Shader::ImageFormat image_format) { if (image_format == Shader::ImageFormat::Typeless) { return Handle(texture_type); @@ -1272,6 +1274,8 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span copies) { static constexpr VideoCommon::Offset3D zero_offset{0, 0, 0}; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index c51a7428d..a717cf8c8 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -232,6 +232,14 @@ public: const VideoCommon::ImageViewInfo& view_info); explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageViewParams&); + ~ImageView(); + + ImageView(const ImageView&) = delete; + ImageView& operator=(const ImageView&) = delete; + + ImageView(ImageView&&) = default; + ImageView& operator=(ImageView&&) = default; + [[nodiscard]] GLuint StorageView(Shader::TextureType texture_type, Shader::ImageFormat image_format); @@ -300,6 +308,14 @@ public: explicit Framebuffer(TextureCacheRuntime&, std::span color_buffers, ImageView* depth_buffer, const VideoCommon::RenderTargets& key); + ~Framebuffer(); + + Framebuffer(const Framebuffer&) = delete; + Framebuffer& operator=(const Framebuffer&) = delete; + + Framebuffer(Framebuffer&&) = default; + Framebuffer& operator=(Framebuffer&&) = default; + [[nodiscard]] GLuint Handle() const noexcept { return framebuffer.handle; } diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index daf26f380..407fd2a15 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -1366,6 +1366,8 @@ ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::NullImageViewParams& params) : VideoCommon::ImageViewBase{params} {} +ImageView::~ImageView() = default; + VkImageView ImageView::DepthView() { if (depth_view) { return *depth_view; @@ -1492,6 +1494,8 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, ImageView* color_buffer, CreateFramebuffer(runtime, color_buffers, depth_buffer); } +Framebuffer::~Framebuffer() = default; + void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime, std::span color_buffers, ImageView* depth_buffer) { diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 9d149d306..ff28b4e96 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -162,6 +162,14 @@ public: const VideoCommon::ImageViewInfo&, GPUVAddr); explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageViewParams&); + ~ImageView(); + + ImageView(const ImageView&) = delete; + ImageView& operator=(const ImageView&) = delete; + + ImageView(ImageView&&) = default; + ImageView& operator=(ImageView&&) = default; + [[nodiscard]] VkImageView DepthView(); [[nodiscard]] VkImageView StencilView(); @@ -235,6 +243,14 @@ public: explicit Framebuffer(TextureCacheRuntime& runtime, ImageView* color_buffer, ImageView* depth_buffer, VkExtent2D extent); + ~Framebuffer(); + + Framebuffer(const Framebuffer&) = delete; + Framebuffer& operator=(const Framebuffer&) = delete; + + Framebuffer(Framebuffer&&) = default; + Framebuffer& operator=(Framebuffer&&) = default; + void CreateFramebuffer(TextureCacheRuntime& runtime, std::span color_buffers, ImageView* depth_buffer); diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index aec130a32..4d2874bf2 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -1620,6 +1620,9 @@ void TextureCache

::RemoveFramebuffers(std::span removed_vi auto it = framebuffers.begin(); while (it != framebuffers.end()) { if (it->first.Contains(removed_views)) { + auto framebuffer_id = it->second; + ASSERT(framebuffer_id); + sentenced_framebuffers.Push(std::move(slot_framebuffers[framebuffer_id])); it = framebuffers.erase(it); } else { ++it; -- cgit v1.2.3 From 1128cc35b97b70bd597e353a11ca3ec22784a58a Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Wed, 17 Nov 2021 01:40:49 +0100 Subject: TextureCache: OGL query device memory if possible. --- src/video_core/renderer_opengl/gl_texture_cache.cpp | 12 +++++++++++- src/video_core/renderer_opengl/gl_texture_cache.h | 4 +++- 2 files changed, 14 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 30dfcfa6a..2f7d98d8b 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -9,8 +9,8 @@ #include +#include "common/literals.h" #include "common/settings.h" - #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_state_tracker.h" @@ -42,6 +42,7 @@ using VideoCore::Surface::IsPixelFormatSRGB; using VideoCore::Surface::MaxPixelFormat; using VideoCore::Surface::PixelFormat; using VideoCore::Surface::SurfaceType; +using namespace Common::Literals; struct CopyOrigin { GLint level; @@ -496,6 +497,15 @@ ImageBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size) { return download_buffers.RequestMap(size, false); } +u64 TextureCacheRuntime::GetDeviceLocalMemory() const { + if (GLAD_GL_NVX_gpu_memory_info) { + GLint cur_avail_mem_kb = 0; + glGetIntegerv(GL_GPU_MEMORY_INFO_CURRENT_AVAILABLE_VIDMEM_NVX, &cur_avail_mem_kb); + return static_cast(cur_avail_mem_kb) * 1_KiB; + } + return 2_GiB; // Return minimum requirements +} + void TextureCacheRuntime::CopyImage(Image& dst_image, Image& src_image, std::span copies) { const GLuint dst_name = dst_image.Handle(); diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index a717cf8c8..1bb762568 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -82,6 +82,8 @@ public: ImageBufferMap DownloadStagingBuffer(size_t size); + u64 GetDeviceLocalMemory() const; + void CopyImage(Image& dst, Image& src, std::span copies); void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view, bool rescaled) { @@ -333,7 +335,7 @@ struct TextureCacheParams { static constexpr bool ENABLE_VALIDATION = true; static constexpr bool FRAMEBUFFER_BLITS = true; static constexpr bool HAS_EMULATED_COPIES = true; - static constexpr bool HAS_DEVICE_MEMORY_INFO = false; + static constexpr bool HAS_DEVICE_MEMORY_INFO = true; using Runtime = OpenGL::TextureCacheRuntime; using Image = OpenGL::Image; -- cgit v1.2.3 From 1c8a3d8d2916e8d43808c9b4e75c756f162890e8 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 17 Nov 2021 02:22:21 +0100 Subject: TextureCache: Fix Automatic Anisotropic. --- src/video_core/textures/texture.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/video_core/textures/texture.cpp b/src/video_core/textures/texture.cpp index ba066f98f..06954963d 100644 --- a/src/video_core/textures/texture.cpp +++ b/src/video_core/textures/texture.cpp @@ -66,15 +66,14 @@ float TSCEntry::MaxAnisotropy() const noexcept { return 1.0f; } const auto anisotropic_settings = Settings::values.max_anisotropy.GetValue(); - u32 new_max_anisotropic{}; + u32 added_anisotropic{}; if (anisotropic_settings == 0) { - const auto anisotropic_based_onscale = Settings::values.resolution_info.up_scale >> - Settings::values.resolution_info.down_shift; - new_max_anisotropic = std::max(anisotropic_based_onscale + 1U, 1U); + added_anisotropic = Settings::values.resolution_info.up_scale >> + Settings::values.resolution_info.down_shift; } else { - new_max_anisotropic = Settings::values.max_anisotropy.GetValue(); + added_anisotropic = Settings::values.max_anisotropy.GetValue() - 1U; } - return static_cast(1U << std::min(max_anisotropy + anisotropic_settings - 1, 31U)); + return static_cast(1U << (max_anisotropy + added_anisotropic)); } } // namespace Tegra::Texture -- cgit v1.2.3