diff options
Diffstat (limited to 'src/video_core/texture_cache')
-rw-r--r-- | src/video_core/texture_cache/formatter.h | 2 | ||||
-rw-r--r-- | src/video_core/texture_cache/image_base.cpp | 10 | ||||
-rw-r--r-- | src/video_core/texture_cache/image_base.h | 15 | ||||
-rw-r--r-- | src/video_core/texture_cache/image_info.cpp | 22 | ||||
-rw-r--r-- | src/video_core/texture_cache/image_info.h | 4 | ||||
-rw-r--r-- | src/video_core/texture_cache/image_view_base.cpp | 13 | ||||
-rw-r--r-- | src/video_core/texture_cache/image_view_base.h | 4 | ||||
-rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 498 | ||||
-rw-r--r-- | src/video_core/texture_cache/texture_cache_base.h | 45 | ||||
-rw-r--r-- | src/video_core/texture_cache/types.h | 7 | ||||
-rw-r--r-- | src/video_core/texture_cache/util.cpp | 12 | ||||
-rw-r--r-- | src/video_core/texture_cache/util.h | 3 |
12 files changed, 546 insertions, 89 deletions
diff --git a/src/video_core/texture_cache/formatter.h b/src/video_core/texture_cache/formatter.h index c6cf0583f..b2c81057b 100644 --- a/src/video_core/texture_cache/formatter.h +++ b/src/video_core/texture_cache/formatter.h @@ -194,6 +194,8 @@ struct fmt::formatter<VideoCore::Surface::PixelFormat> : fmt::formatter<fmt::str return "D32_FLOAT"; case PixelFormat::D16_UNORM: return "D16_UNORM"; + case PixelFormat::S8_UINT: + return "S8_UINT"; case PixelFormat::D24_UNORM_S8_UINT: return "D24_UNORM_S8_UINT"; case PixelFormat::S8_UINT_D24_UNORM: diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp index 6052d148a..3db2fdf34 100644 --- a/src/video_core/texture_cache/image_base.cpp +++ b/src/video_core/texture_cache/image_base.cpp @@ -60,15 +60,17 @@ namespace { ImageBase::ImageBase(const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_) : info{info_}, guest_size_bytes{CalculateGuestSizeInBytes(info)}, unswizzled_size_bytes{CalculateUnswizzledSizeBytes(info)}, - converted_size_bytes{CalculateConvertedSizeBytes(info)}, gpu_addr{gpu_addr_}, - cpu_addr{cpu_addr_}, cpu_addr_end{cpu_addr + guest_size_bytes}, - mip_level_offsets{CalculateMipLevelOffsets(info)} { + converted_size_bytes{CalculateConvertedSizeBytes(info)}, scale_rating{}, scale_tick{}, + has_scaled{}, gpu_addr{gpu_addr_}, cpu_addr{cpu_addr_}, + cpu_addr_end{cpu_addr + guest_size_bytes}, mip_level_offsets{CalculateMipLevelOffsets(info)} { if (info.type == ImageType::e3D) { slice_offsets = CalculateSliceOffsets(info); slice_subresources = CalculateSliceSubresources(info); } } +ImageBase::ImageBase(const NullImageParams&) {} + ImageMapView::ImageMapView(GPUVAddr gpu_addr_, VAddr cpu_addr_, size_t size_, ImageId image_id_) : gpu_addr{gpu_addr_}, cpu_addr{cpu_addr_}, size{size_}, image_id{image_id_} {} @@ -254,6 +256,8 @@ void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_i } lhs.aliased_images.push_back(std::move(lhs_alias)); rhs.aliased_images.push_back(std::move(rhs_alias)); + lhs.flags &= ~ImageFlagBits::IsRescalable; + rhs.flags &= ~ImageFlagBits::IsRescalable; } } // namespace VideoCommon diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index 0c17a791b..89c111c00 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h @@ -33,6 +33,11 @@ enum class ImageFlagBits : u32 { ///< garbage collection priority Alias = 1 << 11, ///< This image has aliases and has priority on garbage ///< collection + + // Rescaler + Rescaled = 1 << 12, + CheckingRescalable = 1 << 13, + IsRescalable = 1 << 14, }; DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) @@ -43,8 +48,11 @@ struct AliasedImage { ImageId id; }; +struct NullImageParams {}; + struct ImageBase { explicit ImageBase(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr); + explicit ImageBase(const NullImageParams&); [[nodiscard]] std::optional<SubresourceBase> TryFindBase(GPUVAddr other_addr) const noexcept; @@ -68,11 +76,18 @@ struct ImageBase { void CheckBadOverlapState(); void CheckAliasState(); + bool HasScaled() const { + return has_scaled; + } + ImageInfo info; u32 guest_size_bytes = 0; u32 unswizzled_size_bytes = 0; u32 converted_size_bytes = 0; + u32 scale_rating = 0; + u64 scale_tick = 0; + bool has_scaled = false; ImageFlagBits flags = ImageFlagBits::CpuModified; GPUVAddr gpu_addr = 0; diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp index 64fd7010a..afb94082b 100644 --- a/src/video_core/texture_cache/image_info.cpp +++ b/src/video_core/texture_cache/image_info.cpp @@ -16,6 +16,7 @@ namespace VideoCommon { using Tegra::Texture::TextureType; using Tegra::Texture::TICEntry; using VideoCore::Surface::PixelFormat; +using VideoCore::Surface::SurfaceType; ImageInfo::ImageInfo(const TICEntry& config) noexcept { format = PixelFormatFromTextureInfo(config.format, config.r_type, config.g_type, config.b_type, @@ -31,6 +32,7 @@ ImageInfo::ImageInfo(const TICEntry& config) noexcept { .depth = config.block_depth, }; } + rescaleable = false; tile_width_spacing = config.tile_width_spacing; if (config.texture_type != TextureType::Texture2D && config.texture_type != TextureType::Texture2DNoMipmap) { @@ -41,6 +43,7 @@ ImageInfo::ImageInfo(const TICEntry& config) noexcept { ASSERT(config.BaseLayer() == 0); type = ImageType::e1D; size.width = config.Width(); + resources.layers = 1; break; case TextureType::Texture1DArray: UNIMPLEMENTED_IF(config.BaseLayer() != 0); @@ -52,12 +55,14 @@ ImageInfo::ImageInfo(const TICEntry& config) noexcept { case TextureType::Texture2DNoMipmap: ASSERT(config.Depth() == 1); type = config.IsPitchLinear() ? ImageType::Linear : ImageType::e2D; + rescaleable = !config.IsPitchLinear(); size.width = config.Width(); size.height = config.Height(); resources.layers = config.BaseLayer() + 1; break; case TextureType::Texture2DArray: type = ImageType::e2D; + rescaleable = true; size.width = config.Width(); size.height = config.Height(); resources.layers = config.BaseLayer() + config.Depth(); @@ -82,10 +87,12 @@ ImageInfo::ImageInfo(const TICEntry& config) noexcept { size.width = config.Width(); size.height = config.Height(); size.depth = config.Depth(); + resources.layers = 1; break; case TextureType::Texture1DBuffer: type = ImageType::Buffer; size.width = config.Width(); + resources.layers = 1; break; default: UNREACHABLE_MSG("Invalid texture_type={}", static_cast<int>(config.texture_type.Value())); @@ -95,12 +102,16 @@ ImageInfo::ImageInfo(const TICEntry& config) noexcept { // FIXME: Call this without passing *this layer_stride = CalculateLayerStride(*this); maybe_unaligned_layer_stride = CalculateLayerSize(*this); + rescaleable &= (block.depth == 0) && resources.levels == 1; + rescaleable &= size.height > 256 || GetFormatType(format) != SurfaceType::ColorTexture; + downscaleable = size.height > 512; } } ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index) noexcept { const auto& rt = regs.rt[index]; format = VideoCore::Surface::PixelFormatFromRenderTargetFormat(rt.format); + rescaleable = false; if (rt.tile_mode.is_pitch_linear) { ASSERT(rt.tile_mode.is_3d == 0); type = ImageType::Linear; @@ -126,6 +137,9 @@ ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index) type = ImageType::e3D; size.depth = rt.depth; } else { + rescaleable = block.depth == 0; + rescaleable &= size.height > 256; + downscaleable = size.height > 512; type = ImageType::e2D; resources.layers = rt.depth; } @@ -135,6 +149,7 @@ ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept { format = VideoCore::Surface::PixelFormatFromDepthFormat(regs.zeta.format); size.width = regs.zeta_width; size.height = regs.zeta_height; + rescaleable = false; resources.levels = 1; layer_stride = regs.zeta.layer_stride * 4; maybe_unaligned_layer_stride = layer_stride; @@ -153,6 +168,8 @@ ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept { type = ImageType::e3D; size.depth = regs.zeta_depth; } else { + rescaleable = block.depth == 0; + downscaleable = size.height > 512; type = ImageType::e2D; resources.layers = regs.zeta_depth; } @@ -161,6 +178,7 @@ ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept { ImageInfo::ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept { UNIMPLEMENTED_IF_MSG(config.layer != 0, "Surface layer is not zero"); format = VideoCore::Surface::PixelFormatFromRenderTargetFormat(config.format); + rescaleable = false; if (config.linear == Tegra::Engines::Fermi2D::MemoryLayout::Pitch) { type = ImageType::Linear; size = Extent3D{ @@ -171,6 +189,7 @@ ImageInfo::ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept { pitch = config.pitch; } else { type = config.block_depth > 0 ? ImageType::e3D : ImageType::e2D; + block = Extent3D{ .width = config.block_width, .height = config.block_height, @@ -183,6 +202,9 @@ ImageInfo::ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept { .height = config.height, .depth = 1, }; + rescaleable = block.depth == 0; + rescaleable &= size.height > 256; + downscaleable = size.height > 512; } } diff --git a/src/video_core/texture_cache/image_info.h b/src/video_core/texture_cache/image_info.h index 5049fc36e..5932dcaba 100644 --- a/src/video_core/texture_cache/image_info.h +++ b/src/video_core/texture_cache/image_info.h @@ -15,7 +15,7 @@ using Tegra::Texture::TICEntry; using VideoCore::Surface::PixelFormat; struct ImageInfo { - explicit ImageInfo() = default; + ImageInfo() = default; explicit ImageInfo(const TICEntry& config) noexcept; explicit ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index) noexcept; explicit ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept; @@ -33,6 +33,8 @@ struct ImageInfo { u32 maybe_unaligned_layer_stride = 0; u32 num_samples = 1; u32 tile_width_spacing = 0; + bool rescaleable = false; + bool downscaleable = false; }; } // namespace VideoCommon diff --git a/src/video_core/texture_cache/image_view_base.cpp b/src/video_core/texture_cache/image_view_base.cpp index 450becbeb..c7b4fc231 100644 --- a/src/video_core/texture_cache/image_view_base.cpp +++ b/src/video_core/texture_cache/image_view_base.cpp @@ -37,14 +37,15 @@ ImageViewBase::ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_i } ImageViewBase::ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_info) - : format{info.format}, type{ImageViewType::Buffer}, size{ - .width = info.size.width, - .height = 1, - .depth = 1, - } { + : image_id{NULL_IMAGE_ID}, format{info.format}, type{ImageViewType::Buffer}, + size{ + .width = info.size.width, + .height = 1, + .depth = 1, + } { ASSERT_MSG(view_info.type == ImageViewType::Buffer, "Expected texture buffer"); } -ImageViewBase::ImageViewBase(const NullImageParams&) {} +ImageViewBase::ImageViewBase(const NullImageViewParams&) : image_id{NULL_IMAGE_ID} {} } // namespace VideoCommon diff --git a/src/video_core/texture_cache/image_view_base.h b/src/video_core/texture_cache/image_view_base.h index 903f715c5..9c24c5359 100644 --- a/src/video_core/texture_cache/image_view_base.h +++ b/src/video_core/texture_cache/image_view_base.h @@ -15,7 +15,7 @@ using VideoCore::Surface::PixelFormat; struct ImageViewInfo; struct ImageInfo; -struct NullImageParams {}; +struct NullImageViewParams {}; enum class ImageViewFlagBits : u16 { PreemtiveDownload = 1 << 0, @@ -28,7 +28,7 @@ struct ImageViewBase { explicit ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info, ImageId image_id); explicit ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_info); - explicit ImageViewBase(const NullImageParams&); + explicit ImageViewBase(const NullImageViewParams&); [[nodiscard]] bool IsBuffer() const noexcept { return type == ImageViewType::Buffer; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index f70c1f764..241f71a91 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -7,6 +7,7 @@ #include <unordered_set> #include "common/alignment.h" +#include "common/settings.h" #include "video_core/dirty_flags.h" #include "video_core/engines/kepler_compute.h" #include "video_core/texture_cache/image_view_base.h" @@ -44,21 +45,22 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& // Make sure the first index is reserved for the null resources // This way the null resource becomes a compile time constant - void(slot_image_views.insert(runtime, NullImageParams{})); + void(slot_images.insert(NullImageParams{})); + void(slot_image_views.insert(runtime, NullImageViewParams{})); void(slot_samplers.insert(runtime, sampler_descriptor)); if constexpr (HAS_DEVICE_MEMORY_INFO) { const auto device_memory = runtime.GetDeviceLocalMemory(); - const u64 possible_expected_memory = (device_memory * 3) / 10; - const u64 possible_critical_memory = (device_memory * 6) / 10; - expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY); - critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY); + const u64 possible_expected_memory = (device_memory * 4) / 10; + const u64 possible_critical_memory = (device_memory * 7) / 10; + expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY - 256_MiB); + critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY - 512_MiB); minimum_memory = 0; } else { - // on OGL we can be more conservatives as the driver takes care. + // On OpenGL we can be more conservatives as the driver takes care. expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB; critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB; - minimum_memory = expected_memory; + minimum_memory = 0; } } @@ -67,7 +69,7 @@ void TextureCache<P>::RunGarbageCollector() { const bool high_priority_mode = total_used_memory >= expected_memory; const bool aggressive_mode = total_used_memory >= critical_memory; const u64 ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 100ULL; - size_t num_iterations = aggressive_mode ? 10000 : (high_priority_mode ? 100 : 5); + size_t num_iterations = aggressive_mode ? 300 : (high_priority_mode ? 50 : 10); const auto clean_up = [this, &num_iterations, high_priority_mode](ImageId image_id) { if (num_iterations == 0) { return true; @@ -89,7 +91,7 @@ void TextureCache<P>::RunGarbageCollector() { UntrackImage(image, image_id); } UnregisterImage(image_id); - DeleteImage(image_id); + DeleteImage(image_id, image.scale_tick > frame_tick + 5); return false; }; lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up); @@ -103,6 +105,7 @@ void TextureCache<P>::TickFrame() { sentenced_images.Tick(); sentenced_framebuffers.Tick(); sentenced_image_view.Tick(); + runtime.TickFrame(); ++frame_tick; } @@ -122,15 +125,14 @@ void TextureCache<P>::MarkModification(ImageId id) noexcept { } template <class P> -void TextureCache<P>::FillGraphicsImageViews(std::span<const u32> indices, - std::span<ImageViewId> image_view_ids) { - FillImageViews(graphics_image_table, graphics_image_view_ids, indices, image_view_ids); +template <bool has_blacklists> +void TextureCache<P>::FillGraphicsImageViews(std::span<ImageViewInOut> views) { + FillImageViews<has_blacklists>(graphics_image_table, graphics_image_view_ids, views); } template <class P> -void TextureCache<P>::FillComputeImageViews(std::span<const u32> indices, - std::span<ImageViewId> image_view_ids) { - FillImageViews(compute_image_table, compute_image_view_ids, indices, image_view_ids); +void TextureCache<P>::FillComputeImageViews(std::span<ImageViewInOut> views) { + FillImageViews<true>(compute_image_table, compute_image_view_ids, views); } template <class P> @@ -190,6 +192,102 @@ void TextureCache<P>::SynchronizeComputeDescriptors() { } template <class P> +bool TextureCache<P>::RescaleRenderTargets(bool is_clear) { + auto& flags = maxwell3d.dirty.flags; + u32 scale_rating = 0; + bool rescaled = false; + std::array<ImageId, NUM_RT> tmp_color_images{}; + ImageId tmp_depth_image{}; + do { + flags[Dirty::RenderTargets] = false; + + has_deleted_images = false; + // Render target control is used on all render targets, so force look ups when this one is + // up + const bool force = flags[Dirty::RenderTargetControl]; + flags[Dirty::RenderTargetControl] = false; + + scale_rating = 0; + bool any_rescaled = false; + bool can_rescale = true; + const auto check_rescale = [&](ImageViewId view_id, ImageId& id_save) { + if (view_id != NULL_IMAGE_VIEW_ID && view_id != ImageViewId{}) { + const auto& view = slot_image_views[view_id]; + const auto image_id = view.image_id; + id_save = image_id; + auto& image = slot_images[image_id]; + can_rescale &= ImageCanRescale(image); + any_rescaled |= True(image.flags & ImageFlagBits::Rescaled) || + GetFormatType(image.info.format) != SurfaceType::ColorTexture; + scale_rating = std::max<u32>(scale_rating, image.scale_tick <= frame_tick + ? image.scale_rating + 1U + : image.scale_rating); + } else { + id_save = CORRUPT_ID; + } + }; + for (size_t index = 0; index < NUM_RT; ++index) { + ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; + if (flags[Dirty::ColorBuffer0 + index] || force) { + flags[Dirty::ColorBuffer0 + index] = false; + BindRenderTarget(&color_buffer_id, FindColorBuffer(index, is_clear)); + } + check_rescale(color_buffer_id, tmp_color_images[index]); + } + if (flags[Dirty::ZetaBuffer] || force) { + flags[Dirty::ZetaBuffer] = false; + BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear)); + } + check_rescale(render_targets.depth_buffer_id, tmp_depth_image); + + if (can_rescale) { + rescaled = any_rescaled || scale_rating >= 2; + const auto scale_up = [this](ImageId image_id) { + if (image_id != CORRUPT_ID) { + Image& image = slot_images[image_id]; + ScaleUp(image); + } + }; + if (rescaled) { + for (size_t index = 0; index < NUM_RT; ++index) { + scale_up(tmp_color_images[index]); + } + scale_up(tmp_depth_image); + scale_rating = 2; + } + } else { + rescaled = false; + const auto scale_down = [this](ImageId image_id) { + if (image_id != CORRUPT_ID) { + Image& image = slot_images[image_id]; + ScaleDown(image); + } + }; + for (size_t index = 0; index < NUM_RT; ++index) { + scale_down(tmp_color_images[index]); + } + scale_down(tmp_depth_image); + scale_rating = 1; + } + } while (has_deleted_images); + const auto set_rating = [this, scale_rating](ImageId image_id) { + if (image_id != CORRUPT_ID) { + Image& image = slot_images[image_id]; + image.scale_rating = scale_rating; + if (image.scale_tick <= frame_tick) { + image.scale_tick = frame_tick + 1; + } + } + }; + for (size_t index = 0; index < NUM_RT; ++index) { + set_rating(tmp_color_images[index]); + } + set_rating(tmp_depth_image); + + return rescaled; +} + +template <class P> void TextureCache<P>::UpdateRenderTargets(bool is_clear) { using namespace VideoCommon::Dirty; auto& flags = maxwell3d.dirty.flags; @@ -202,24 +300,18 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) { PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id)); return; } - flags[Dirty::RenderTargets] = false; - // Render target control is used on all render targets, so force look ups when this one is up - const bool force = flags[Dirty::RenderTargetControl]; - flags[Dirty::RenderTargetControl] = false; + const bool rescaled = RescaleRenderTargets(is_clear); + if (is_rescaling != rescaled) { + flags[Dirty::RescaleViewports] = true; + flags[Dirty::RescaleScissors] = true; + is_rescaling = rescaled; + } for (size_t index = 0; index < NUM_RT; ++index) { ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; - if (flags[Dirty::ColorBuffer0 + index] || force) { - flags[Dirty::ColorBuffer0 + index] = false; - BindRenderTarget(&color_buffer_id, FindColorBuffer(index, is_clear)); - } PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id)); } - if (flags[Dirty::ZetaBuffer] || force) { - flags[Dirty::ZetaBuffer] = false; - BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear)); - } const ImageViewId depth_buffer_id = render_targets.depth_buffer_id; PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id)); @@ -227,9 +319,15 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) { for (size_t index = 0; index < NUM_RT; ++index) { render_targets.draw_buffers[index] = static_cast<u8>(maxwell3d.regs.rt_control.Map(index)); } + u32 up_scale = 1; + u32 down_shift = 0; + if (is_rescaling) { + up_scale = Settings::values.resolution_info.up_scale; + down_shift = Settings::values.resolution_info.down_shift; + } render_targets.size = Extent2D{ - maxwell3d.regs.render_area.width, - maxwell3d.regs.render_area.height, + (maxwell3d.regs.render_area.width * up_scale) >> down_shift, + (maxwell3d.regs.render_area.height * up_scale) >> down_shift, }; flags[Dirty::DepthBiasGlobal] = true; @@ -241,17 +339,28 @@ typename P::Framebuffer* TextureCache<P>::GetFramebuffer() { } template <class P> +template <bool has_blacklists> void TextureCache<P>::FillImageViews(DescriptorTable<TICEntry>& table, std::span<ImageViewId> cached_image_view_ids, - std::span<const u32> indices, - std::span<ImageViewId> image_view_ids) { - ASSERT(indices.size() <= image_view_ids.size()); + std::span<ImageViewInOut> views) { + bool has_blacklisted; do { has_deleted_images = false; - std::ranges::transform(indices, image_view_ids.begin(), [&](u32 index) { - return VisitImageView(table, cached_image_view_ids, index); - }); - } while (has_deleted_images); + if constexpr (has_blacklists) { + has_blacklisted = false; + } + for (ImageViewInOut& view : views) { + view.id = VisitImageView(table, cached_image_view_ids, view.index); + if constexpr (has_blacklists) { + if (view.blacklist && view.id != NULL_IMAGE_VIEW_ID) { + const ImageViewBase& image_view{slot_image_views[view.id]}; + auto& image = slot_images[image_view.image_id]; + has_blacklisted |= ScaleDown(image); + image.scale_rating = 0; + } + } + } + } while (has_deleted_images || (has_blacklists && has_blacklisted)); } template <class P> @@ -369,8 +478,43 @@ void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, PrepareImage(src_id, false, false); PrepareImage(dst_id, true, false); - ImageBase& dst_image = slot_images[dst_id]; - const ImageBase& src_image = slot_images[src_id]; + Image& dst_image = slot_images[dst_id]; + Image& src_image = slot_images[src_id]; + bool is_src_rescaled = True(src_image.flags & ImageFlagBits::Rescaled); + bool is_dst_rescaled = True(dst_image.flags & ImageFlagBits::Rescaled); + + const bool is_resolve = src_image.info.num_samples != 1 && dst_image.info.num_samples == 1; + if (is_src_rescaled != is_dst_rescaled) { + if (ImageCanRescale(src_image)) { + ScaleUp(src_image); + is_src_rescaled = True(src_image.flags & ImageFlagBits::Rescaled); + if (is_resolve) { + dst_image.info.rescaleable = true; + for (const auto& alias : dst_image.aliased_images) { + Image& other_image = slot_images[alias.id]; + other_image.info.rescaleable = true; + } + } + } + if (ImageCanRescale(dst_image)) { + ScaleUp(dst_image); + is_dst_rescaled = True(dst_image.flags & ImageFlagBits::Rescaled); + } + } + if (is_resolve && (is_src_rescaled != is_dst_rescaled)) { + // A resolve requires both images to be the same dimensions. Resize down if needed. + ScaleDown(src_image); + ScaleDown(dst_image); + is_src_rescaled = True(src_image.flags & ImageFlagBits::Rescaled); + is_dst_rescaled = True(dst_image.flags & ImageFlagBits::Rescaled); + } + const auto& resolution = Settings::values.resolution_info; + const auto scale_region = [&](Region2D& region) { + region.start.x = resolution.ScaleUp(region.start.x); + region.start.y = resolution.ScaleUp(region.start.y); + region.end.x = resolution.ScaleUp(region.end.x); + region.end.y = resolution.ScaleUp(region.end.y); + }; // TODO: Deduplicate const std::optional src_base = src_image.TryFindBase(src.Address()); @@ -378,20 +522,26 @@ void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, const ImageViewInfo src_view_info(ImageViewType::e2D, images.src_format, src_range); const auto [src_framebuffer_id, src_view_id] = RenderTargetFromImage(src_id, src_view_info); const auto [src_samples_x, src_samples_y] = SamplesLog2(src_image.info.num_samples); - const Region2D src_region{ + Region2D src_region{ Offset2D{.x = copy.src_x0 >> src_samples_x, .y = copy.src_y0 >> src_samples_y}, Offset2D{.x = copy.src_x1 >> src_samples_x, .y = copy.src_y1 >> src_samples_y}, }; + if (is_src_rescaled) { + scale_region(src_region); + } const std::optional dst_base = dst_image.TryFindBase(dst.Address()); const SubresourceRange dst_range{.base = dst_base.value(), .extent = {1, 1}}; const ImageViewInfo dst_view_info(ImageViewType::e2D, images.dst_format, dst_range); const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info); const auto [dst_samples_x, dst_samples_y] = SamplesLog2(dst_image.info.num_samples); - const Region2D dst_region{ + Region2D dst_region{ Offset2D{.x = copy.dst_x0 >> dst_samples_x, .y = copy.dst_y0 >> dst_samples_y}, Offset2D{.x = copy.dst_x1 >> dst_samples_x, .y = copy.dst_y1 >> dst_samples_y}, }; + if (is_dst_rescaled) { + scale_region(dst_region); + } // Always call this after src_framebuffer_id was queried, as the address might be invalidated. Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id]; @@ -487,6 +637,20 @@ void TextureCache<P>::PopAsyncFlushes() { } template <class P> +bool TextureCache<P>::IsRescaling() const noexcept { + return is_rescaling; +} + +template <class P> +bool TextureCache<P>::IsRescaling(const ImageViewBase& image_view) const noexcept { + if (image_view.type == ImageViewType::Buffer) { + return false; + } + const ImageBase& image = slot_images[image_view.image_id]; + return True(image.flags & ImageFlagBits::Rescaled); +} + +template <class P> bool TextureCache<P>::IsRegionGpuModified(VAddr addr, size_t size) { bool is_modified = false; ForEachImageInRegion(addr, size, [&is_modified](ImageId, ImageBase& image) { @@ -624,6 +788,105 @@ ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, } template <class P> +bool TextureCache<P>::ImageCanRescale(ImageBase& image) { + if (!image.info.rescaleable) { + return false; + } + if (Settings::values.resolution_info.downscale && !image.info.downscaleable) { + return false; + } + if (True(image.flags & (ImageFlagBits::Rescaled | ImageFlagBits::CheckingRescalable))) { + return true; + } + if (True(image.flags & ImageFlagBits::IsRescalable)) { + return true; + } + image.flags |= ImageFlagBits::CheckingRescalable; + for (const auto& alias : image.aliased_images) { + Image& other_image = slot_images[alias.id]; + if (!ImageCanRescale(other_image)) { + image.flags &= ~ImageFlagBits::CheckingRescalable; + return false; + } + } + image.flags &= ~ImageFlagBits::CheckingRescalable; + image.flags |= ImageFlagBits::IsRescalable; + return true; +} + +template <class P> +void TextureCache<P>::InvalidateScale(Image& image) { + if (image.scale_tick <= frame_tick) { + image.scale_tick = frame_tick + 1; + } + const std::span<const ImageViewId> image_view_ids = image.image_view_ids; + auto& dirty = maxwell3d.dirty.flags; + dirty[Dirty::RenderTargets] = true; + dirty[Dirty::ZetaBuffer] = true; + for (size_t rt = 0; rt < NUM_RT; ++rt) { + dirty[Dirty::ColorBuffer0 + rt] = true; + } + for (const ImageViewId image_view_id : image_view_ids) { + std::ranges::replace(render_targets.color_buffer_ids, image_view_id, ImageViewId{}); + if (render_targets.depth_buffer_id == image_view_id) { + render_targets.depth_buffer_id = ImageViewId{}; + } + } + RemoveImageViewReferences(image_view_ids); + RemoveFramebuffers(image_view_ids); + for (const ImageViewId image_view_id : image_view_ids) { + sentenced_image_view.Push(std::move(slot_image_views[image_view_id])); + slot_image_views.erase(image_view_id); + } + image.image_view_ids.clear(); + image.image_view_infos.clear(); + if constexpr (ENABLE_VALIDATION) { + std::ranges::fill(graphics_image_view_ids, CORRUPT_ID); + std::ranges::fill(compute_image_view_ids, CORRUPT_ID); + } + graphics_image_table.Invalidate(); + compute_image_table.Invalidate(); + has_deleted_images = true; +} + +template <class P> +u64 TextureCache<P>::GetScaledImageSizeBytes(ImageBase& image) { + const u64 scale_up = static_cast<u64>(Settings::values.resolution_info.up_scale * + Settings::values.resolution_info.up_scale); + const u64 down_shift = static_cast<u64>(Settings::values.resolution_info.down_shift + + Settings::values.resolution_info.down_shift); + const u64 image_size_bytes = + static_cast<u64>(std::max(image.guest_size_bytes, image.unswizzled_size_bytes)); + const u64 tentative_size = (image_size_bytes * scale_up) >> down_shift; + const u64 fitted_size = Common::AlignUp(tentative_size, 1024); + return fitted_size; +} + +template <class P> +bool TextureCache<P>::ScaleUp(Image& image) { + const bool has_copy = image.HasScaled(); + const bool rescaled = image.ScaleUp(); + if (!rescaled) { + return false; + } + if (!has_copy) { + total_used_memory += GetScaledImageSizeBytes(image); + } + InvalidateScale(image); + return true; +} + +template <class P> +bool TextureCache<P>::ScaleDown(Image& image) { + const bool rescaled = image.ScaleDown(); + if (!rescaled) { + return false; + } + InvalidateScale(image); + return true; +} + +template <class P> ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, RelaxedOptions options) { std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); @@ -660,12 +923,18 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA std::vector<ImageId> right_aliased_ids; std::unordered_set<ImageId> ignore_textures; std::vector<ImageId> bad_overlap_ids; + std::vector<ImageId> all_siblings; + const bool this_is_linear = info.type == ImageType::Linear; const auto region_check = [&](ImageId overlap_id, ImageBase& overlap) { if (True(overlap.flags & ImageFlagBits::Remapped)) { ignore_textures.insert(overlap_id); return; } - if (info.type == ImageType::Linear) { + const bool overlap_is_linear = overlap.info.type == ImageType::Linear; + if (this_is_linear != overlap_is_linear) { + return; + } + if (this_is_linear && overlap_is_linear) { if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) { // Alias linear images with the same pitch left_aliased_ids.push_back(overlap_id); @@ -681,6 +950,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA cpu_addr = solution->cpu_addr; new_info.resources = solution->resources; overlap_ids.push_back(overlap_id); + all_siblings.push_back(overlap_id); return; } static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format; @@ -688,10 +958,12 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) { left_aliased_ids.push_back(overlap_id); overlap.flags |= ImageFlagBits::Alias; + all_siblings.push_back(overlap_id); } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options, broken_views, native_bgr)) { right_aliased_ids.push_back(overlap_id); overlap.flags |= ImageFlagBits::Alias; + all_siblings.push_back(overlap_id); } else { bad_overlap_ids.push_back(overlap_id); overlap.flags |= ImageFlagBits::BadOverlap; @@ -709,6 +981,32 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA } }; ForEachSparseImageInRegion(gpu_addr, size_bytes, region_check_gpu); + + bool can_rescale = info.rescaleable; + bool any_rescaled = false; + for (const ImageId sibling_id : all_siblings) { + if (!can_rescale) { + break; + } + Image& sibling = slot_images[sibling_id]; + can_rescale &= ImageCanRescale(sibling); + any_rescaled |= True(sibling.flags & ImageFlagBits::Rescaled); + } + + can_rescale &= any_rescaled; + + if (can_rescale) { + for (const ImageId sibling_id : all_siblings) { + Image& sibling = slot_images[sibling_id]; + ScaleUp(sibling); + } + } else { + for (const ImageId sibling_id : all_siblings) { + Image& sibling = slot_images[sibling_id]; + ScaleDown(sibling); + } + } + const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); Image& new_image = slot_images[new_image_id]; @@ -731,14 +1029,23 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA // TODO: Only upload what we need RefreshContents(new_image, new_image_id); + if (can_rescale) { + ScaleUp(new_image); + } else { + ScaleDown(new_image); + } + for (const ImageId overlap_id : overlap_ids) { Image& overlap = slot_images[overlap_id]; if (overlap.info.num_samples != new_image.info.num_samples) { LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented"); } else { + const auto& resolution = Settings::values.resolution_info; const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value(); - const auto copies = MakeShrinkImageCopies(new_info, overlap.info, base); - runtime.CopyImage(new_image, overlap, copies); + const u32 up_scale = can_rescale ? resolution.up_scale : 1; + const u32 down_shift = can_rescale ? resolution.down_shift : 0; + auto copies = MakeShrinkImageCopies(new_info, overlap.info, base, up_scale, down_shift); + runtime.CopyImage(new_image, overlap, std::move(copies)); } if (True(overlap.flags & ImageFlagBits::Tracked)) { UntrackImage(overlap, overlap_id); @@ -1083,13 +1390,6 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) { "Trying to unregister an already registered image"); image.flags &= ~ImageFlagBits::Registered; image.flags &= ~ImageFlagBits::BadOverlap; - u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); - if ((IsPixelFormatASTC(image.info.format) && - True(image.flags & ImageFlagBits::AcceleratedUpload)) || - True(image.flags & ImageFlagBits::Converted)) { - tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); - } - total_used_memory -= Common::AlignUp(tentative_size, 1024); lru_cache.Free(image.lru_index); const auto& clear_page_table = [this, image_id]( @@ -1213,8 +1513,18 @@ void TextureCache<P>::UntrackImage(ImageBase& image, ImageId image_id) { } template <class P> -void TextureCache<P>::DeleteImage(ImageId image_id) { +void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) { ImageBase& image = slot_images[image_id]; + if (image.HasScaled()) { + total_used_memory -= GetScaledImageSizeBytes(image); + } + u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); + if ((IsPixelFormatASTC(image.info.format) && + True(image.flags & ImageFlagBits::AcceleratedUpload)) || + True(image.flags & ImageFlagBits::Converted)) { + tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); + } + total_used_memory -= Common::AlignUp(tentative_size, 1024); const GPUVAddr gpu_addr = image.gpu_addr; const auto alloc_it = image_allocs_table.find(gpu_addr); if (alloc_it == image_allocs_table.end()) { @@ -1269,10 +1579,14 @@ void TextureCache<P>::DeleteImage(ImageId image_id) { num_removed_overlaps); } for (const ImageViewId image_view_id : image_view_ids) { - sentenced_image_view.Push(std::move(slot_image_views[image_view_id])); + if (!immediate_delete) { + sentenced_image_view.Push(std::move(slot_image_views[image_view_id])); + } slot_image_views.erase(image_view_id); } - sentenced_images.Push(std::move(slot_images[image_id])); + if (!immediate_delete) { + sentenced_images.Push(std::move(slot_images[image_id])); + } slot_images.erase(image_id); alloc_images.erase(alloc_image_it); @@ -1306,6 +1620,9 @@ void TextureCache<P>::RemoveFramebuffers(std::span<const ImageViewId> removed_vi auto it = framebuffers.begin(); while (it != framebuffers.end()) { if (it->first.Contains(removed_views)) { + auto framebuffer_id = it->second; + ASSERT(framebuffer_id); + sentenced_framebuffers.Push(std::move(slot_framebuffers[framebuffer_id])); it = framebuffers.erase(it); } else { ++it; @@ -1322,26 +1639,60 @@ void TextureCache<P>::MarkModification(ImageBase& image) noexcept { template <class P> void TextureCache<P>::SynchronizeAliases(ImageId image_id) { boost::container::small_vector<const AliasedImage*, 1> aliased_images; - ImageBase& image = slot_images[image_id]; + Image& image = slot_images[image_id]; + bool any_rescaled = True(image.flags & ImageFlagBits::Rescaled); u64 most_recent_tick = image.modification_tick; for (const AliasedImage& aliased : image.aliased_images) { ImageBase& aliased_image = slot_images[aliased.id]; if (image.modification_tick < aliased_image.modification_tick) { most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick); aliased_images.push_back(&aliased); + any_rescaled |= True(aliased_image.flags & ImageFlagBits::Rescaled); } } if (aliased_images.empty()) { return; } + const bool can_rescale = ImageCanRescale(image); + if (any_rescaled) { + if (can_rescale) { + ScaleUp(image); + } else { + ScaleDown(image); + } + } image.modification_tick = most_recent_tick; std::ranges::sort(aliased_images, [this](const AliasedImage* lhs, const AliasedImage* rhs) { const ImageBase& lhs_image = slot_images[lhs->id]; const ImageBase& rhs_image = slot_images[rhs->id]; return lhs_image.modification_tick < rhs_image.modification_tick; }); + const auto& resolution = Settings::values.resolution_info; for (const AliasedImage* const aliased : aliased_images) { - CopyImage(image_id, aliased->id, aliased->copies); + if (!resolution.active | !any_rescaled) { + CopyImage(image_id, aliased->id, aliased->copies); + continue; + } + Image& aliased_image = slot_images[aliased->id]; + if (!can_rescale) { + ScaleDown(aliased_image); + CopyImage(image_id, aliased->id, aliased->copies); + continue; + } + ScaleUp(aliased_image); + + const bool both_2d{image.info.type == ImageType::e2D && + aliased_image.info.type == ImageType::e2D}; + auto copies = aliased->copies; + for (auto copy : copies) { + copy.extent.width = std::max<u32>( + (copy.extent.width * resolution.up_scale) >> resolution.down_shift, 1); + if (both_2d) { + copy.extent.height = std::max<u32>( + (copy.extent.height * resolution.up_scale) >> resolution.down_shift, 1); + } + } + CopyImage(image_id, aliased->id, copies); } } @@ -1377,9 +1728,25 @@ void TextureCache<P>::PrepareImageView(ImageViewId image_view_id, bool is_modifi } template <class P> -void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::span<const ImageCopy> copies) { +void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::vector<ImageCopy> copies) { Image& dst = slot_images[dst_id]; Image& src = slot_images[src_id]; + const bool is_rescaled = True(src.flags & ImageFlagBits::Rescaled); + if (is_rescaled) { + ASSERT(True(dst.flags & ImageFlagBits::Rescaled)); + const bool both_2d{src.info.type == ImageType::e2D && dst.info.type == ImageType::e2D}; + const auto& resolution = Settings::values.resolution_info; + for (auto& copy : copies) { + copy.src_offset.x = resolution.ScaleUp(copy.src_offset.x); + copy.dst_offset.x = resolution.ScaleUp(copy.dst_offset.x); + copy.extent.width = resolution.ScaleUp(copy.extent.width); + if (both_2d) { + copy.src_offset.y = resolution.ScaleUp(copy.src_offset.y); + copy.dst_offset.y = resolution.ScaleUp(copy.dst_offset.y); + copy.extent.height = resolution.ScaleUp(copy.extent.height); + } + } + } const auto dst_format_type = GetFormatType(dst.info.format); const auto src_format_type = GetFormatType(src.info.format); if (src_format_type == dst_format_type) { @@ -1392,6 +1759,9 @@ void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::span<const } UNIMPLEMENTED_IF(dst.info.type != ImageType::e2D); UNIMPLEMENTED_IF(src.info.type != ImageType::e2D); + if constexpr (HAS_PIXEL_FORMAT_CONVERSIONS) { + return runtime.ConvertImage(dst, src, copies); + } for (const ImageCopy& copy : copies) { UNIMPLEMENTED_IF(copy.dst_subresource.num_layers != 1); UNIMPLEMENTED_IF(copy.src_subresource.num_layers != 1); @@ -1424,7 +1794,7 @@ void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::span<const }; UNIMPLEMENTED_IF(copy.extent != expected_size); - runtime.ConvertImage(dst_framebuffer, dst_view, src_view); + runtime.ConvertImage(dst_framebuffer, dst_view, src_view, is_rescaled); } } @@ -1433,8 +1803,8 @@ void TextureCache<P>::BindRenderTarget(ImageViewId* old_id, ImageViewId new_id) if (*old_id == new_id) { return; } - if (*old_id) { - const ImageViewBase& old_view = slot_image_views[*old_id]; + if (new_id) { + const ImageViewBase& old_view = slot_image_views[new_id]; if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) { uncommitted_downloads.push_back(old_view.image_id); } @@ -1447,10 +1817,18 @@ std::pair<FramebufferId, ImageViewId> TextureCache<P>::RenderTargetFromImage( ImageId image_id, const ImageViewInfo& view_info) { const ImageViewId view_id = FindOrEmplaceImageView(image_id, view_info); const ImageBase& image = slot_images[image_id]; + const bool is_rescaled = True(image.flags & ImageFlagBits::Rescaled); const bool is_color = GetFormatType(image.info.format) == SurfaceType::ColorTexture; const ImageViewId color_view_id = is_color ? view_id : ImageViewId{}; const ImageViewId depth_view_id = is_color ? ImageViewId{} : view_id; - const Extent3D extent = MipSize(image.info.size, view_info.range.base.level); + Extent3D extent = MipSize(image.info.size, view_info.range.base.level); + if (is_rescaled) { + const auto& resolution = Settings::values.resolution_info; + extent.width = resolution.ScaleUp(extent.width); + if (image.info.type == ImageType::e2D) { + extent.height = resolution.ScaleUp(extent.height); + } + } const u32 num_samples = image.info.num_samples; const auto [samples_x, samples_y] = SamplesLog2(num_samples); const FramebufferId framebuffer_id = GetFramebufferId(RenderTargets{ diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 2d1893c1c..a9504c0e8 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -21,6 +21,7 @@ #include "video_core/texture_cache/descriptor_table.h" #include "video_core/texture_cache/image_base.h" #include "video_core/texture_cache/image_info.h" +#include "video_core/texture_cache/image_view_base.h" #include "video_core/texture_cache/image_view_info.h" #include "video_core/texture_cache/render_targets.h" #include "video_core/texture_cache/slot_vector.h" @@ -39,6 +40,12 @@ using VideoCore::Surface::PixelFormatFromDepthFormat; using VideoCore::Surface::PixelFormatFromRenderTargetFormat; using namespace Common::Literals; +struct ImageViewInOut { + u32 index{}; + bool blacklist{}; + ImageViewId id{}; +}; + template <class P> class TextureCache { /// Address shift for caching images into a hash table @@ -52,11 +59,8 @@ class TextureCache { static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES; /// True when the API can provide info about the memory of the device. static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO; - - /// Image view ID for null descriptors - static constexpr ImageViewId NULL_IMAGE_VIEW_ID{0}; - /// Sampler ID for bugged sampler ids - static constexpr SamplerId NULL_SAMPLER_ID{0}; + /// True when the API provides utilities for pixel format conversions. + static constexpr bool HAS_PIXEL_FORMAT_CONVERSIONS = P::HAS_PIXEL_FORMAT_CONVERSIONS; static constexpr u64 DEFAULT_EXPECTED_MEMORY = 1_GiB; static constexpr u64 DEFAULT_CRITICAL_MEMORY = 2_GiB; @@ -99,11 +103,11 @@ public: void MarkModification(ImageId id) noexcept; /// Fill image_view_ids with the graphics images in indices - void FillGraphicsImageViews(std::span<const u32> indices, - std::span<ImageViewId> image_view_ids); + template <bool has_blacklists> + void FillGraphicsImageViews(std::span<ImageViewInOut> views); /// Fill image_view_ids with the compute images in indices - void FillComputeImageViews(std::span<const u32> indices, std::span<ImageViewId> image_view_ids); + void FillComputeImageViews(std::span<ImageViewInOut> views); /// Get the sampler from the graphics descriptor table in the specified index Sampler* GetGraphicsSampler(u32 index); @@ -117,6 +121,11 @@ public: /// Refresh the state for compute image view and sampler descriptors void SynchronizeComputeDescriptors(); + /// Updates the Render Targets if they can be rescaled + /// @param is_clear True when the render targets are being used for clears + /// @retval True if the Render Targets have been rescaled. + bool RescaleRenderTargets(bool is_clear); + /// Update bound render targets and upload memory if necessary /// @param is_clear True when the render targets are being used for clears void UpdateRenderTargets(bool is_clear); @@ -160,6 +169,10 @@ public: /// Return true when a CPU region is modified from the GPU [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); + [[nodiscard]] bool IsRescaling() const noexcept; + + [[nodiscard]] bool IsRescaling(const ImageViewBase& image_view) const noexcept; + std::mutex mutex; private: @@ -198,9 +211,10 @@ private: void RunGarbageCollector(); /// Fills image_view_ids in the image views in indices + template <bool has_blacklists> void FillImageViews(DescriptorTable<TICEntry>& table, - std::span<ImageViewId> cached_image_view_ids, std::span<const u32> indices, - std::span<ImageViewId> image_view_ids); + std::span<ImageViewId> cached_image_view_ids, + std::span<ImageViewInOut> views); /// Find or create an image view in the guest descriptor table ImageViewId VisitImageView(DescriptorTable<TICEntry>& table, @@ -285,7 +299,7 @@ private: void UntrackImage(ImageBase& image, ImageId image_id); /// Delete image from the cache - void DeleteImage(ImageId image); + void DeleteImage(ImageId image, bool immediate_delete = false); /// Remove image views references from the cache void RemoveImageViewReferences(std::span<const ImageViewId> removed_views); @@ -306,7 +320,7 @@ private: void PrepareImageView(ImageViewId image_view_id, bool is_modification, bool invalidate); /// Execute copies from one image to the other, even if they are incompatible - void CopyImage(ImageId dst_id, ImageId src_id, std::span<const ImageCopy> copies); + void CopyImage(ImageId dst_id, ImageId src_id, std::vector<ImageCopy> copies); /// Bind an image view as render target, downloading resources preemtively if needed void BindRenderTarget(ImageViewId* old_id, ImageViewId new_id); @@ -318,6 +332,12 @@ private: /// Returns true if the current clear parameters clear the whole image of a given image view [[nodiscard]] bool IsFullClear(ImageViewId id); + bool ImageCanRescale(ImageBase& image); + void InvalidateScale(Image& image); + bool ScaleUp(Image& image); + bool ScaleDown(Image& image); + u64 GetScaledImageSizeBytes(ImageBase& image); + Runtime& runtime; VideoCore::RasterizerInterface& rasterizer; Tegra::Engines::Maxwell3D& maxwell3d; @@ -349,6 +369,7 @@ private: VAddr virtual_invalid_space{}; bool has_deleted_images = false; + bool is_rescaling = false; u64 total_used_memory = 0; u64 minimum_memory; u64 expected_memory; diff --git a/src/video_core/texture_cache/types.h b/src/video_core/texture_cache/types.h index 47a11cb2f..5c274abdf 100644 --- a/src/video_core/texture_cache/types.h +++ b/src/video_core/texture_cache/types.h @@ -22,6 +22,13 @@ using ImageAllocId = SlotId; using SamplerId = SlotId; using FramebufferId = SlotId; +/// Fake image ID for null image views +constexpr ImageId NULL_IMAGE_ID{0}; +/// Image view ID for null descriptors +constexpr ImageViewId NULL_IMAGE_VIEW_ID{0}; +/// Sampler ID for bugged sampler ids +constexpr SamplerId NULL_SAMPLER_ID{0}; + enum class ImageType : u32 { e1D, e2D, diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index 59cf2f561..ddc9fb13a 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -723,7 +723,7 @@ ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept { } std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageInfo& src, - SubresourceBase base) { + SubresourceBase base, u32 up_scale, u32 down_shift) { ASSERT(dst.resources.levels >= src.resources.levels); ASSERT(dst.num_samples == src.num_samples); @@ -732,7 +732,7 @@ std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn ASSERT(src.type == ImageType::e3D); ASSERT(src.resources.levels == 1); } - + const bool both_2d{src.type == ImageType::e2D && dst.type == ImageType::e2D}; std::vector<ImageCopy> copies; copies.reserve(src.resources.levels); for (s32 level = 0; level < src.resources.levels; ++level) { @@ -762,6 +762,10 @@ std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn if (is_dst_3d) { copy.extent.depth = src.size.depth; } + copy.extent.width = std::max<u32>((copy.extent.width * up_scale) >> down_shift, 1); + if (both_2d) { + copy.extent.height = std::max<u32>((copy.extent.height * up_scale) >> down_shift, 1); + } } return copies; } @@ -1153,10 +1157,10 @@ void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* if (dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) { dst_info.format = dst->info.format; } - if (!dst && src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) { + if (src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) { dst_info.format = src->info.format; } - if (!src && dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) { + if (dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) { src_info.format = dst->info.format; } } diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h index 766502908..7af52de2e 100644 --- a/src/video_core/texture_cache/util.h +++ b/src/video_core/texture_cache/util.h @@ -55,7 +55,8 @@ struct OverlapResult { [[nodiscard]] std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageInfo& src, - SubresourceBase base); + SubresourceBase base, u32 up_scale = 1, + u32 down_shift = 0); [[nodiscard]] bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config); |