summaryrefslogtreecommitdiffstats
path: root/src/video_core/texture_cache/texture_cache.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/texture_cache/texture_cache.h')
-rw-r--r--src/video_core/texture_cache/texture_cache.h498
1 files changed, 438 insertions, 60 deletions
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index f70c1f764..241f71a91 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -7,6 +7,7 @@
#include <unordered_set>
#include "common/alignment.h"
+#include "common/settings.h"
#include "video_core/dirty_flags.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/texture_cache/image_view_base.h"
@@ -44,21 +45,22 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface&
// Make sure the first index is reserved for the null resources
// This way the null resource becomes a compile time constant
- void(slot_image_views.insert(runtime, NullImageParams{}));
+ void(slot_images.insert(NullImageParams{}));
+ void(slot_image_views.insert(runtime, NullImageViewParams{}));
void(slot_samplers.insert(runtime, sampler_descriptor));
if constexpr (HAS_DEVICE_MEMORY_INFO) {
const auto device_memory = runtime.GetDeviceLocalMemory();
- const u64 possible_expected_memory = (device_memory * 3) / 10;
- const u64 possible_critical_memory = (device_memory * 6) / 10;
- expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY);
- critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY);
+ const u64 possible_expected_memory = (device_memory * 4) / 10;
+ const u64 possible_critical_memory = (device_memory * 7) / 10;
+ expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY - 256_MiB);
+ critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY - 512_MiB);
minimum_memory = 0;
} else {
- // on OGL we can be more conservatives as the driver takes care.
+ // On OpenGL we can be more conservatives as the driver takes care.
expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB;
critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB;
- minimum_memory = expected_memory;
+ minimum_memory = 0;
}
}
@@ -67,7 +69,7 @@ void TextureCache<P>::RunGarbageCollector() {
const bool high_priority_mode = total_used_memory >= expected_memory;
const bool aggressive_mode = total_used_memory >= critical_memory;
const u64 ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 100ULL;
- size_t num_iterations = aggressive_mode ? 10000 : (high_priority_mode ? 100 : 5);
+ size_t num_iterations = aggressive_mode ? 300 : (high_priority_mode ? 50 : 10);
const auto clean_up = [this, &num_iterations, high_priority_mode](ImageId image_id) {
if (num_iterations == 0) {
return true;
@@ -89,7 +91,7 @@ void TextureCache<P>::RunGarbageCollector() {
UntrackImage(image, image_id);
}
UnregisterImage(image_id);
- DeleteImage(image_id);
+ DeleteImage(image_id, image.scale_tick > frame_tick + 5);
return false;
};
lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up);
@@ -103,6 +105,7 @@ void TextureCache<P>::TickFrame() {
sentenced_images.Tick();
sentenced_framebuffers.Tick();
sentenced_image_view.Tick();
+ runtime.TickFrame();
++frame_tick;
}
@@ -122,15 +125,14 @@ void TextureCache<P>::MarkModification(ImageId id) noexcept {
}
template <class P>
-void TextureCache<P>::FillGraphicsImageViews(std::span<const u32> indices,
- std::span<ImageViewId> image_view_ids) {
- FillImageViews(graphics_image_table, graphics_image_view_ids, indices, image_view_ids);
+template <bool has_blacklists>
+void TextureCache<P>::FillGraphicsImageViews(std::span<ImageViewInOut> views) {
+ FillImageViews<has_blacklists>(graphics_image_table, graphics_image_view_ids, views);
}
template <class P>
-void TextureCache<P>::FillComputeImageViews(std::span<const u32> indices,
- std::span<ImageViewId> image_view_ids) {
- FillImageViews(compute_image_table, compute_image_view_ids, indices, image_view_ids);
+void TextureCache<P>::FillComputeImageViews(std::span<ImageViewInOut> views) {
+ FillImageViews<true>(compute_image_table, compute_image_view_ids, views);
}
template <class P>
@@ -190,6 +192,102 @@ void TextureCache<P>::SynchronizeComputeDescriptors() {
}
template <class P>
+bool TextureCache<P>::RescaleRenderTargets(bool is_clear) {
+ auto& flags = maxwell3d.dirty.flags;
+ u32 scale_rating = 0;
+ bool rescaled = false;
+ std::array<ImageId, NUM_RT> tmp_color_images{};
+ ImageId tmp_depth_image{};
+ do {
+ flags[Dirty::RenderTargets] = false;
+
+ has_deleted_images = false;
+ // Render target control is used on all render targets, so force look ups when this one is
+ // up
+ const bool force = flags[Dirty::RenderTargetControl];
+ flags[Dirty::RenderTargetControl] = false;
+
+ scale_rating = 0;
+ bool any_rescaled = false;
+ bool can_rescale = true;
+ const auto check_rescale = [&](ImageViewId view_id, ImageId& id_save) {
+ if (view_id != NULL_IMAGE_VIEW_ID && view_id != ImageViewId{}) {
+ const auto& view = slot_image_views[view_id];
+ const auto image_id = view.image_id;
+ id_save = image_id;
+ auto& image = slot_images[image_id];
+ can_rescale &= ImageCanRescale(image);
+ any_rescaled |= True(image.flags & ImageFlagBits::Rescaled) ||
+ GetFormatType(image.info.format) != SurfaceType::ColorTexture;
+ scale_rating = std::max<u32>(scale_rating, image.scale_tick <= frame_tick
+ ? image.scale_rating + 1U
+ : image.scale_rating);
+ } else {
+ id_save = CORRUPT_ID;
+ }
+ };
+ for (size_t index = 0; index < NUM_RT; ++index) {
+ ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index];
+ if (flags[Dirty::ColorBuffer0 + index] || force) {
+ flags[Dirty::ColorBuffer0 + index] = false;
+ BindRenderTarget(&color_buffer_id, FindColorBuffer(index, is_clear));
+ }
+ check_rescale(color_buffer_id, tmp_color_images[index]);
+ }
+ if (flags[Dirty::ZetaBuffer] || force) {
+ flags[Dirty::ZetaBuffer] = false;
+ BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear));
+ }
+ check_rescale(render_targets.depth_buffer_id, tmp_depth_image);
+
+ if (can_rescale) {
+ rescaled = any_rescaled || scale_rating >= 2;
+ const auto scale_up = [this](ImageId image_id) {
+ if (image_id != CORRUPT_ID) {
+ Image& image = slot_images[image_id];
+ ScaleUp(image);
+ }
+ };
+ if (rescaled) {
+ for (size_t index = 0; index < NUM_RT; ++index) {
+ scale_up(tmp_color_images[index]);
+ }
+ scale_up(tmp_depth_image);
+ scale_rating = 2;
+ }
+ } else {
+ rescaled = false;
+ const auto scale_down = [this](ImageId image_id) {
+ if (image_id != CORRUPT_ID) {
+ Image& image = slot_images[image_id];
+ ScaleDown(image);
+ }
+ };
+ for (size_t index = 0; index < NUM_RT; ++index) {
+ scale_down(tmp_color_images[index]);
+ }
+ scale_down(tmp_depth_image);
+ scale_rating = 1;
+ }
+ } while (has_deleted_images);
+ const auto set_rating = [this, scale_rating](ImageId image_id) {
+ if (image_id != CORRUPT_ID) {
+ Image& image = slot_images[image_id];
+ image.scale_rating = scale_rating;
+ if (image.scale_tick <= frame_tick) {
+ image.scale_tick = frame_tick + 1;
+ }
+ }
+ };
+ for (size_t index = 0; index < NUM_RT; ++index) {
+ set_rating(tmp_color_images[index]);
+ }
+ set_rating(tmp_depth_image);
+
+ return rescaled;
+}
+
+template <class P>
void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
using namespace VideoCommon::Dirty;
auto& flags = maxwell3d.dirty.flags;
@@ -202,24 +300,18 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id));
return;
}
- flags[Dirty::RenderTargets] = false;
- // Render target control is used on all render targets, so force look ups when this one is up
- const bool force = flags[Dirty::RenderTargetControl];
- flags[Dirty::RenderTargetControl] = false;
+ const bool rescaled = RescaleRenderTargets(is_clear);
+ if (is_rescaling != rescaled) {
+ flags[Dirty::RescaleViewports] = true;
+ flags[Dirty::RescaleScissors] = true;
+ is_rescaling = rescaled;
+ }
for (size_t index = 0; index < NUM_RT; ++index) {
ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index];
- if (flags[Dirty::ColorBuffer0 + index] || force) {
- flags[Dirty::ColorBuffer0 + index] = false;
- BindRenderTarget(&color_buffer_id, FindColorBuffer(index, is_clear));
- }
PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id));
}
- if (flags[Dirty::ZetaBuffer] || force) {
- flags[Dirty::ZetaBuffer] = false;
- BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear));
- }
const ImageViewId depth_buffer_id = render_targets.depth_buffer_id;
PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id));
@@ -227,9 +319,15 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
for (size_t index = 0; index < NUM_RT; ++index) {
render_targets.draw_buffers[index] = static_cast<u8>(maxwell3d.regs.rt_control.Map(index));
}
+ u32 up_scale = 1;
+ u32 down_shift = 0;
+ if (is_rescaling) {
+ up_scale = Settings::values.resolution_info.up_scale;
+ down_shift = Settings::values.resolution_info.down_shift;
+ }
render_targets.size = Extent2D{
- maxwell3d.regs.render_area.width,
- maxwell3d.regs.render_area.height,
+ (maxwell3d.regs.render_area.width * up_scale) >> down_shift,
+ (maxwell3d.regs.render_area.height * up_scale) >> down_shift,
};
flags[Dirty::DepthBiasGlobal] = true;
@@ -241,17 +339,28 @@ typename P::Framebuffer* TextureCache<P>::GetFramebuffer() {
}
template <class P>
+template <bool has_blacklists>
void TextureCache<P>::FillImageViews(DescriptorTable<TICEntry>& table,
std::span<ImageViewId> cached_image_view_ids,
- std::span<const u32> indices,
- std::span<ImageViewId> image_view_ids) {
- ASSERT(indices.size() <= image_view_ids.size());
+ std::span<ImageViewInOut> views) {
+ bool has_blacklisted;
do {
has_deleted_images = false;
- std::ranges::transform(indices, image_view_ids.begin(), [&](u32 index) {
- return VisitImageView(table, cached_image_view_ids, index);
- });
- } while (has_deleted_images);
+ if constexpr (has_blacklists) {
+ has_blacklisted = false;
+ }
+ for (ImageViewInOut& view : views) {
+ view.id = VisitImageView(table, cached_image_view_ids, view.index);
+ if constexpr (has_blacklists) {
+ if (view.blacklist && view.id != NULL_IMAGE_VIEW_ID) {
+ const ImageViewBase& image_view{slot_image_views[view.id]};
+ auto& image = slot_images[image_view.image_id];
+ has_blacklisted |= ScaleDown(image);
+ image.scale_rating = 0;
+ }
+ }
+ }
+ } while (has_deleted_images || (has_blacklists && has_blacklisted));
}
template <class P>
@@ -369,8 +478,43 @@ void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
PrepareImage(src_id, false, false);
PrepareImage(dst_id, true, false);
- ImageBase& dst_image = slot_images[dst_id];
- const ImageBase& src_image = slot_images[src_id];
+ Image& dst_image = slot_images[dst_id];
+ Image& src_image = slot_images[src_id];
+ bool is_src_rescaled = True(src_image.flags & ImageFlagBits::Rescaled);
+ bool is_dst_rescaled = True(dst_image.flags & ImageFlagBits::Rescaled);
+
+ const bool is_resolve = src_image.info.num_samples != 1 && dst_image.info.num_samples == 1;
+ if (is_src_rescaled != is_dst_rescaled) {
+ if (ImageCanRescale(src_image)) {
+ ScaleUp(src_image);
+ is_src_rescaled = True(src_image.flags & ImageFlagBits::Rescaled);
+ if (is_resolve) {
+ dst_image.info.rescaleable = true;
+ for (const auto& alias : dst_image.aliased_images) {
+ Image& other_image = slot_images[alias.id];
+ other_image.info.rescaleable = true;
+ }
+ }
+ }
+ if (ImageCanRescale(dst_image)) {
+ ScaleUp(dst_image);
+ is_dst_rescaled = True(dst_image.flags & ImageFlagBits::Rescaled);
+ }
+ }
+ if (is_resolve && (is_src_rescaled != is_dst_rescaled)) {
+ // A resolve requires both images to be the same dimensions. Resize down if needed.
+ ScaleDown(src_image);
+ ScaleDown(dst_image);
+ is_src_rescaled = True(src_image.flags & ImageFlagBits::Rescaled);
+ is_dst_rescaled = True(dst_image.flags & ImageFlagBits::Rescaled);
+ }
+ const auto& resolution = Settings::values.resolution_info;
+ const auto scale_region = [&](Region2D& region) {
+ region.start.x = resolution.ScaleUp(region.start.x);
+ region.start.y = resolution.ScaleUp(region.start.y);
+ region.end.x = resolution.ScaleUp(region.end.x);
+ region.end.y = resolution.ScaleUp(region.end.y);
+ };
// TODO: Deduplicate
const std::optional src_base = src_image.TryFindBase(src.Address());
@@ -378,20 +522,26 @@ void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
const ImageViewInfo src_view_info(ImageViewType::e2D, images.src_format, src_range);
const auto [src_framebuffer_id, src_view_id] = RenderTargetFromImage(src_id, src_view_info);
const auto [src_samples_x, src_samples_y] = SamplesLog2(src_image.info.num_samples);
- const Region2D src_region{
+ Region2D src_region{
Offset2D{.x = copy.src_x0 >> src_samples_x, .y = copy.src_y0 >> src_samples_y},
Offset2D{.x = copy.src_x1 >> src_samples_x, .y = copy.src_y1 >> src_samples_y},
};
+ if (is_src_rescaled) {
+ scale_region(src_region);
+ }
const std::optional dst_base = dst_image.TryFindBase(dst.Address());
const SubresourceRange dst_range{.base = dst_base.value(), .extent = {1, 1}};
const ImageViewInfo dst_view_info(ImageViewType::e2D, images.dst_format, dst_range);
const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info);
const auto [dst_samples_x, dst_samples_y] = SamplesLog2(dst_image.info.num_samples);
- const Region2D dst_region{
+ Region2D dst_region{
Offset2D{.x = copy.dst_x0 >> dst_samples_x, .y = copy.dst_y0 >> dst_samples_y},
Offset2D{.x = copy.dst_x1 >> dst_samples_x, .y = copy.dst_y1 >> dst_samples_y},
};
+ if (is_dst_rescaled) {
+ scale_region(dst_region);
+ }
// Always call this after src_framebuffer_id was queried, as the address might be invalidated.
Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id];
@@ -487,6 +637,20 @@ void TextureCache<P>::PopAsyncFlushes() {
}
template <class P>
+bool TextureCache<P>::IsRescaling() const noexcept {
+ return is_rescaling;
+}
+
+template <class P>
+bool TextureCache<P>::IsRescaling(const ImageViewBase& image_view) const noexcept {
+ if (image_view.type == ImageViewType::Buffer) {
+ return false;
+ }
+ const ImageBase& image = slot_images[image_view.image_id];
+ return True(image.flags & ImageFlagBits::Rescaled);
+}
+
+template <class P>
bool TextureCache<P>::IsRegionGpuModified(VAddr addr, size_t size) {
bool is_modified = false;
ForEachImageInRegion(addr, size, [&is_modified](ImageId, ImageBase& image) {
@@ -624,6 +788,105 @@ ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
}
template <class P>
+bool TextureCache<P>::ImageCanRescale(ImageBase& image) {
+ if (!image.info.rescaleable) {
+ return false;
+ }
+ if (Settings::values.resolution_info.downscale && !image.info.downscaleable) {
+ return false;
+ }
+ if (True(image.flags & (ImageFlagBits::Rescaled | ImageFlagBits::CheckingRescalable))) {
+ return true;
+ }
+ if (True(image.flags & ImageFlagBits::IsRescalable)) {
+ return true;
+ }
+ image.flags |= ImageFlagBits::CheckingRescalable;
+ for (const auto& alias : image.aliased_images) {
+ Image& other_image = slot_images[alias.id];
+ if (!ImageCanRescale(other_image)) {
+ image.flags &= ~ImageFlagBits::CheckingRescalable;
+ return false;
+ }
+ }
+ image.flags &= ~ImageFlagBits::CheckingRescalable;
+ image.flags |= ImageFlagBits::IsRescalable;
+ return true;
+}
+
+template <class P>
+void TextureCache<P>::InvalidateScale(Image& image) {
+ if (image.scale_tick <= frame_tick) {
+ image.scale_tick = frame_tick + 1;
+ }
+ const std::span<const ImageViewId> image_view_ids = image.image_view_ids;
+ auto& dirty = maxwell3d.dirty.flags;
+ dirty[Dirty::RenderTargets] = true;
+ dirty[Dirty::ZetaBuffer] = true;
+ for (size_t rt = 0; rt < NUM_RT; ++rt) {
+ dirty[Dirty::ColorBuffer0 + rt] = true;
+ }
+ for (const ImageViewId image_view_id : image_view_ids) {
+ std::ranges::replace(render_targets.color_buffer_ids, image_view_id, ImageViewId{});
+ if (render_targets.depth_buffer_id == image_view_id) {
+ render_targets.depth_buffer_id = ImageViewId{};
+ }
+ }
+ RemoveImageViewReferences(image_view_ids);
+ RemoveFramebuffers(image_view_ids);
+ for (const ImageViewId image_view_id : image_view_ids) {
+ sentenced_image_view.Push(std::move(slot_image_views[image_view_id]));
+ slot_image_views.erase(image_view_id);
+ }
+ image.image_view_ids.clear();
+ image.image_view_infos.clear();
+ if constexpr (ENABLE_VALIDATION) {
+ std::ranges::fill(graphics_image_view_ids, CORRUPT_ID);
+ std::ranges::fill(compute_image_view_ids, CORRUPT_ID);
+ }
+ graphics_image_table.Invalidate();
+ compute_image_table.Invalidate();
+ has_deleted_images = true;
+}
+
+template <class P>
+u64 TextureCache<P>::GetScaledImageSizeBytes(ImageBase& image) {
+ const u64 scale_up = static_cast<u64>(Settings::values.resolution_info.up_scale *
+ Settings::values.resolution_info.up_scale);
+ const u64 down_shift = static_cast<u64>(Settings::values.resolution_info.down_shift +
+ Settings::values.resolution_info.down_shift);
+ const u64 image_size_bytes =
+ static_cast<u64>(std::max(image.guest_size_bytes, image.unswizzled_size_bytes));
+ const u64 tentative_size = (image_size_bytes * scale_up) >> down_shift;
+ const u64 fitted_size = Common::AlignUp(tentative_size, 1024);
+ return fitted_size;
+}
+
+template <class P>
+bool TextureCache<P>::ScaleUp(Image& image) {
+ const bool has_copy = image.HasScaled();
+ const bool rescaled = image.ScaleUp();
+ if (!rescaled) {
+ return false;
+ }
+ if (!has_copy) {
+ total_used_memory += GetScaledImageSizeBytes(image);
+ }
+ InvalidateScale(image);
+ return true;
+}
+
+template <class P>
+bool TextureCache<P>::ScaleDown(Image& image) {
+ const bool rescaled = image.ScaleDown();
+ if (!rescaled) {
+ return false;
+ }
+ InvalidateScale(image);
+ return true;
+}
+
+template <class P>
ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
RelaxedOptions options) {
std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
@@ -660,12 +923,18 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
std::vector<ImageId> right_aliased_ids;
std::unordered_set<ImageId> ignore_textures;
std::vector<ImageId> bad_overlap_ids;
+ std::vector<ImageId> all_siblings;
+ const bool this_is_linear = info.type == ImageType::Linear;
const auto region_check = [&](ImageId overlap_id, ImageBase& overlap) {
if (True(overlap.flags & ImageFlagBits::Remapped)) {
ignore_textures.insert(overlap_id);
return;
}
- if (info.type == ImageType::Linear) {
+ const bool overlap_is_linear = overlap.info.type == ImageType::Linear;
+ if (this_is_linear != overlap_is_linear) {
+ return;
+ }
+ if (this_is_linear && overlap_is_linear) {
if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) {
// Alias linear images with the same pitch
left_aliased_ids.push_back(overlap_id);
@@ -681,6 +950,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
cpu_addr = solution->cpu_addr;
new_info.resources = solution->resources;
overlap_ids.push_back(overlap_id);
+ all_siblings.push_back(overlap_id);
return;
}
static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format;
@@ -688,10 +958,12 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) {
left_aliased_ids.push_back(overlap_id);
overlap.flags |= ImageFlagBits::Alias;
+ all_siblings.push_back(overlap_id);
} else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options,
broken_views, native_bgr)) {
right_aliased_ids.push_back(overlap_id);
overlap.flags |= ImageFlagBits::Alias;
+ all_siblings.push_back(overlap_id);
} else {
bad_overlap_ids.push_back(overlap_id);
overlap.flags |= ImageFlagBits::BadOverlap;
@@ -709,6 +981,32 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
}
};
ForEachSparseImageInRegion(gpu_addr, size_bytes, region_check_gpu);
+
+ bool can_rescale = info.rescaleable;
+ bool any_rescaled = false;
+ for (const ImageId sibling_id : all_siblings) {
+ if (!can_rescale) {
+ break;
+ }
+ Image& sibling = slot_images[sibling_id];
+ can_rescale &= ImageCanRescale(sibling);
+ any_rescaled |= True(sibling.flags & ImageFlagBits::Rescaled);
+ }
+
+ can_rescale &= any_rescaled;
+
+ if (can_rescale) {
+ for (const ImageId sibling_id : all_siblings) {
+ Image& sibling = slot_images[sibling_id];
+ ScaleUp(sibling);
+ }
+ } else {
+ for (const ImageId sibling_id : all_siblings) {
+ Image& sibling = slot_images[sibling_id];
+ ScaleDown(sibling);
+ }
+ }
+
const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr);
Image& new_image = slot_images[new_image_id];
@@ -731,14 +1029,23 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
// TODO: Only upload what we need
RefreshContents(new_image, new_image_id);
+ if (can_rescale) {
+ ScaleUp(new_image);
+ } else {
+ ScaleDown(new_image);
+ }
+
for (const ImageId overlap_id : overlap_ids) {
Image& overlap = slot_images[overlap_id];
if (overlap.info.num_samples != new_image.info.num_samples) {
LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented");
} else {
+ const auto& resolution = Settings::values.resolution_info;
const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value();
- const auto copies = MakeShrinkImageCopies(new_info, overlap.info, base);
- runtime.CopyImage(new_image, overlap, copies);
+ const u32 up_scale = can_rescale ? resolution.up_scale : 1;
+ const u32 down_shift = can_rescale ? resolution.down_shift : 0;
+ auto copies = MakeShrinkImageCopies(new_info, overlap.info, base, up_scale, down_shift);
+ runtime.CopyImage(new_image, overlap, std::move(copies));
}
if (True(overlap.flags & ImageFlagBits::Tracked)) {
UntrackImage(overlap, overlap_id);
@@ -1083,13 +1390,6 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {
"Trying to unregister an already registered image");
image.flags &= ~ImageFlagBits::Registered;
image.flags &= ~ImageFlagBits::BadOverlap;
- u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes);
- if ((IsPixelFormatASTC(image.info.format) &&
- True(image.flags & ImageFlagBits::AcceleratedUpload)) ||
- True(image.flags & ImageFlagBits::Converted)) {
- tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
- }
- total_used_memory -= Common::AlignUp(tentative_size, 1024);
lru_cache.Free(image.lru_index);
const auto& clear_page_table =
[this, image_id](
@@ -1213,8 +1513,18 @@ void TextureCache<P>::UntrackImage(ImageBase& image, ImageId image_id) {
}
template <class P>
-void TextureCache<P>::DeleteImage(ImageId image_id) {
+void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) {
ImageBase& image = slot_images[image_id];
+ if (image.HasScaled()) {
+ total_used_memory -= GetScaledImageSizeBytes(image);
+ }
+ u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes);
+ if ((IsPixelFormatASTC(image.info.format) &&
+ True(image.flags & ImageFlagBits::AcceleratedUpload)) ||
+ True(image.flags & ImageFlagBits::Converted)) {
+ tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
+ }
+ total_used_memory -= Common::AlignUp(tentative_size, 1024);
const GPUVAddr gpu_addr = image.gpu_addr;
const auto alloc_it = image_allocs_table.find(gpu_addr);
if (alloc_it == image_allocs_table.end()) {
@@ -1269,10 +1579,14 @@ void TextureCache<P>::DeleteImage(ImageId image_id) {
num_removed_overlaps);
}
for (const ImageViewId image_view_id : image_view_ids) {
- sentenced_image_view.Push(std::move(slot_image_views[image_view_id]));
+ if (!immediate_delete) {
+ sentenced_image_view.Push(std::move(slot_image_views[image_view_id]));
+ }
slot_image_views.erase(image_view_id);
}
- sentenced_images.Push(std::move(slot_images[image_id]));
+ if (!immediate_delete) {
+ sentenced_images.Push(std::move(slot_images[image_id]));
+ }
slot_images.erase(image_id);
alloc_images.erase(alloc_image_it);
@@ -1306,6 +1620,9 @@ void TextureCache<P>::RemoveFramebuffers(std::span<const ImageViewId> removed_vi
auto it = framebuffers.begin();
while (it != framebuffers.end()) {
if (it->first.Contains(removed_views)) {
+ auto framebuffer_id = it->second;
+ ASSERT(framebuffer_id);
+ sentenced_framebuffers.Push(std::move(slot_framebuffers[framebuffer_id]));
it = framebuffers.erase(it);
} else {
++it;
@@ -1322,26 +1639,60 @@ void TextureCache<P>::MarkModification(ImageBase& image) noexcept {
template <class P>
void TextureCache<P>::SynchronizeAliases(ImageId image_id) {
boost::container::small_vector<const AliasedImage*, 1> aliased_images;
- ImageBase& image = slot_images[image_id];
+ Image& image = slot_images[image_id];
+ bool any_rescaled = True(image.flags & ImageFlagBits::Rescaled);
u64 most_recent_tick = image.modification_tick;
for (const AliasedImage& aliased : image.aliased_images) {
ImageBase& aliased_image = slot_images[aliased.id];
if (image.modification_tick < aliased_image.modification_tick) {
most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick);
aliased_images.push_back(&aliased);
+ any_rescaled |= True(aliased_image.flags & ImageFlagBits::Rescaled);
}
}
if (aliased_images.empty()) {
return;
}
+ const bool can_rescale = ImageCanRescale(image);
+ if (any_rescaled) {
+ if (can_rescale) {
+ ScaleUp(image);
+ } else {
+ ScaleDown(image);
+ }
+ }
image.modification_tick = most_recent_tick;
std::ranges::sort(aliased_images, [this](const AliasedImage* lhs, const AliasedImage* rhs) {
const ImageBase& lhs_image = slot_images[lhs->id];
const ImageBase& rhs_image = slot_images[rhs->id];
return lhs_image.modification_tick < rhs_image.modification_tick;
});
+ const auto& resolution = Settings::values.resolution_info;
for (const AliasedImage* const aliased : aliased_images) {
- CopyImage(image_id, aliased->id, aliased->copies);
+ if (!resolution.active | !any_rescaled) {
+ CopyImage(image_id, aliased->id, aliased->copies);
+ continue;
+ }
+ Image& aliased_image = slot_images[aliased->id];
+ if (!can_rescale) {
+ ScaleDown(aliased_image);
+ CopyImage(image_id, aliased->id, aliased->copies);
+ continue;
+ }
+ ScaleUp(aliased_image);
+
+ const bool both_2d{image.info.type == ImageType::e2D &&
+ aliased_image.info.type == ImageType::e2D};
+ auto copies = aliased->copies;
+ for (auto copy : copies) {
+ copy.extent.width = std::max<u32>(
+ (copy.extent.width * resolution.up_scale) >> resolution.down_shift, 1);
+ if (both_2d) {
+ copy.extent.height = std::max<u32>(
+ (copy.extent.height * resolution.up_scale) >> resolution.down_shift, 1);
+ }
+ }
+ CopyImage(image_id, aliased->id, copies);
}
}
@@ -1377,9 +1728,25 @@ void TextureCache<P>::PrepareImageView(ImageViewId image_view_id, bool is_modifi
}
template <class P>
-void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::span<const ImageCopy> copies) {
+void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::vector<ImageCopy> copies) {
Image& dst = slot_images[dst_id];
Image& src = slot_images[src_id];
+ const bool is_rescaled = True(src.flags & ImageFlagBits::Rescaled);
+ if (is_rescaled) {
+ ASSERT(True(dst.flags & ImageFlagBits::Rescaled));
+ const bool both_2d{src.info.type == ImageType::e2D && dst.info.type == ImageType::e2D};
+ const auto& resolution = Settings::values.resolution_info;
+ for (auto& copy : copies) {
+ copy.src_offset.x = resolution.ScaleUp(copy.src_offset.x);
+ copy.dst_offset.x = resolution.ScaleUp(copy.dst_offset.x);
+ copy.extent.width = resolution.ScaleUp(copy.extent.width);
+ if (both_2d) {
+ copy.src_offset.y = resolution.ScaleUp(copy.src_offset.y);
+ copy.dst_offset.y = resolution.ScaleUp(copy.dst_offset.y);
+ copy.extent.height = resolution.ScaleUp(copy.extent.height);
+ }
+ }
+ }
const auto dst_format_type = GetFormatType(dst.info.format);
const auto src_format_type = GetFormatType(src.info.format);
if (src_format_type == dst_format_type) {
@@ -1392,6 +1759,9 @@ void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::span<const
}
UNIMPLEMENTED_IF(dst.info.type != ImageType::e2D);
UNIMPLEMENTED_IF(src.info.type != ImageType::e2D);
+ if constexpr (HAS_PIXEL_FORMAT_CONVERSIONS) {
+ return runtime.ConvertImage(dst, src, copies);
+ }
for (const ImageCopy& copy : copies) {
UNIMPLEMENTED_IF(copy.dst_subresource.num_layers != 1);
UNIMPLEMENTED_IF(copy.src_subresource.num_layers != 1);
@@ -1424,7 +1794,7 @@ void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::span<const
};
UNIMPLEMENTED_IF(copy.extent != expected_size);
- runtime.ConvertImage(dst_framebuffer, dst_view, src_view);
+ runtime.ConvertImage(dst_framebuffer, dst_view, src_view, is_rescaled);
}
}
@@ -1433,8 +1803,8 @@ void TextureCache<P>::BindRenderTarget(ImageViewId* old_id, ImageViewId new_id)
if (*old_id == new_id) {
return;
}
- if (*old_id) {
- const ImageViewBase& old_view = slot_image_views[*old_id];
+ if (new_id) {
+ const ImageViewBase& old_view = slot_image_views[new_id];
if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) {
uncommitted_downloads.push_back(old_view.image_id);
}
@@ -1447,10 +1817,18 @@ std::pair<FramebufferId, ImageViewId> TextureCache<P>::RenderTargetFromImage(
ImageId image_id, const ImageViewInfo& view_info) {
const ImageViewId view_id = FindOrEmplaceImageView(image_id, view_info);
const ImageBase& image = slot_images[image_id];
+ const bool is_rescaled = True(image.flags & ImageFlagBits::Rescaled);
const bool is_color = GetFormatType(image.info.format) == SurfaceType::ColorTexture;
const ImageViewId color_view_id = is_color ? view_id : ImageViewId{};
const ImageViewId depth_view_id = is_color ? ImageViewId{} : view_id;
- const Extent3D extent = MipSize(image.info.size, view_info.range.base.level);
+ Extent3D extent = MipSize(image.info.size, view_info.range.base.level);
+ if (is_rescaled) {
+ const auto& resolution = Settings::values.resolution_info;
+ extent.width = resolution.ScaleUp(extent.width);
+ if (image.info.type == ImageType::e2D) {
+ extent.height = resolution.ScaleUp(extent.height);
+ }
+ }
const u32 num_samples = image.info.num_samples;
const auto [samples_x, samples_y] = SamplesLog2(num_samples);
const FramebufferId framebuffer_id = GetFramebufferId(RenderTargets{