summaryrefslogtreecommitdiffstats
path: root/src/video_core/texture_cache/texture_cache.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/texture_cache/texture_cache.h')
-rw-r--r--src/video_core/texture_cache/texture_cache.h139
1 files changed, 139 insertions, 0 deletions
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 3e2cbb0b0..335338434 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -85,6 +85,11 @@ void TextureCache<P>::RunGarbageCollector() {
}
--num_iterations;
auto& image = slot_images[image_id];
+ if (True(image.flags & ImageFlagBits::IsDecoding)) {
+ // This image is still being decoded, deleting it will invalidate the slot
+ // used by the async decoder thread.
+ return false;
+ }
const bool must_download =
image.IsSafeDownload() && False(image.flags & ImageFlagBits::BadOverlap);
if (!high_priority_mode &&
@@ -133,6 +138,8 @@ void TextureCache<P>::TickFrame() {
sentenced_images.Tick();
sentenced_framebuffers.Tick();
sentenced_image_view.Tick();
+ TickAsyncDecode();
+
runtime.TickFrame();
critical_gc = 0;
++frame_tick;
@@ -777,6 +784,10 @@ void TextureCache<P>::RefreshContents(Image& image, ImageId image_id) {
LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented");
return;
}
+ if (True(image.flags & ImageFlagBits::AsynchronousDecode)) {
+ QueueAsyncDecode(image, image_id);
+ return;
+ }
auto staging = runtime.UploadStagingBuffer(MapSizeBytes(image));
UploadImageContents(image, staging);
runtime.InsertUploadMemoryBarrier();
@@ -990,6 +1001,65 @@ u64 TextureCache<P>::GetScaledImageSizeBytes(const ImageBase& image) {
}
template <class P>
+void TextureCache<P>::QueueAsyncDecode(Image& image, ImageId image_id) {
+ UNIMPLEMENTED_IF(False(image.flags & ImageFlagBits::Converted));
+ LOG_INFO(HW_GPU, "Queuing async texture decode");
+
+ image.flags |= ImageFlagBits::IsDecoding;
+ auto decode = std::make_unique<AsyncDecodeContext>();
+ auto* decode_ptr = decode.get();
+ decode->image_id = image_id;
+ async_decodes.push_back(std::move(decode));
+
+ Common::ScratchBuffer<u8> local_unswizzle_data_buffer(image.unswizzled_size_bytes);
+ const size_t guest_size_bytes = image.guest_size_bytes;
+ swizzle_data_buffer.resize_destructive(guest_size_bytes);
+ gpu_memory->ReadBlockUnsafe(image.gpu_addr, swizzle_data_buffer.data(), guest_size_bytes);
+ auto copies = UnswizzleImage(*gpu_memory, image.gpu_addr, image.info, swizzle_data_buffer,
+ local_unswizzle_data_buffer);
+ const size_t out_size = MapSizeBytes(image);
+
+ auto func = [out_size, copies, info = image.info,
+ input = std::move(local_unswizzle_data_buffer),
+ async_decode = decode_ptr]() mutable {
+ async_decode->decoded_data.resize_destructive(out_size);
+ std::span copies_span{copies.data(), copies.size()};
+ ConvertImage(input, info, async_decode->decoded_data, copies_span);
+
+ // TODO: Do we need this lock?
+ std::unique_lock lock{async_decode->mutex};
+ async_decode->copies = std::move(copies);
+ async_decode->complete = true;
+ };
+ texture_decode_worker.QueueWork(std::move(func));
+}
+
+template <class P>
+void TextureCache<P>::TickAsyncDecode() {
+ bool has_uploads{};
+ auto i = async_decodes.begin();
+ while (i != async_decodes.end()) {
+ auto* async_decode = i->get();
+ std::unique_lock lock{async_decode->mutex};
+ if (!async_decode->complete) {
+ ++i;
+ continue;
+ }
+ Image& image = slot_images[async_decode->image_id];
+ auto staging = runtime.UploadStagingBuffer(MapSizeBytes(image));
+ std::memcpy(staging.mapped_span.data(), async_decode->decoded_data.data(),
+ async_decode->decoded_data.size());
+ image.UploadMemory(staging, async_decode->copies);
+ image.flags &= ~ImageFlagBits::IsDecoding;
+ has_uploads = true;
+ i = async_decodes.erase(i);
+ }
+ if (has_uploads) {
+ runtime.InsertUploadMemoryBarrier();
+ }
+}
+
+template <class P>
bool TextureCache<P>::ScaleUp(Image& image) {
const bool has_copy = image.HasScaled();
const bool rescaled = image.ScaleUp();
@@ -1289,6 +1359,75 @@ std::optional<typename TextureCache<P>::BlitImages> TextureCache<P>::GetBlitImag
}
template <class P>
+ImageId TextureCache<P>::FindDMAImage(const ImageInfo& info, GPUVAddr gpu_addr) {
+ std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
+ if (!cpu_addr) {
+ cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info));
+ if (!cpu_addr) {
+ return ImageId{};
+ }
+ }
+ ImageId image_id{};
+ boost::container::small_vector<ImageId, 1> image_ids;
+ const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) {
+ if (True(existing_image.flags & ImageFlagBits::Remapped)) {
+ return false;
+ }
+ if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear)
+ [[unlikely]] {
+ const bool strict_size = True(existing_image.flags & ImageFlagBits::Strong);
+ const ImageInfo& existing = existing_image.info;
+ if (existing_image.gpu_addr == gpu_addr && existing.type == info.type &&
+ existing.pitch == info.pitch &&
+ IsPitchLinearSameSize(existing, info, strict_size) &&
+ IsViewCompatible(existing.format, info.format, false, true)) {
+ image_id = existing_image_id;
+ image_ids.push_back(existing_image_id);
+ return true;
+ }
+ } else if (IsSubCopy(info, existing_image, gpu_addr)) {
+ image_id = existing_image_id;
+ image_ids.push_back(existing_image_id);
+ return true;
+ }
+ return false;
+ };
+ ForEachImageInRegion(*cpu_addr, CalculateGuestSizeInBytes(info), lambda);
+ if (image_ids.size() <= 1) [[likely]] {
+ return image_id;
+ }
+ auto image_ids_compare = [this](ImageId a, ImageId b) {
+ auto& image_a = slot_images[a];
+ auto& image_b = slot_images[b];
+ return image_a.modification_tick < image_b.modification_tick;
+ };
+ return *std::ranges::max_element(image_ids, image_ids_compare);
+}
+
+template <class P>
+std::optional<std::pair<typename TextureCache<P>::Image*, std::pair<u32, u32>>>
+TextureCache<P>::ObtainImage(const Tegra::DMA::ImageOperand& operand, bool mark_as_modified) {
+ ImageInfo dst_info(operand);
+ ImageId dst_id = FindDMAImage(dst_info, operand.address);
+ if (!dst_id) {
+ return std::nullopt;
+ }
+ auto& image = slot_images[dst_id];
+ auto base = image.TryFindBase(operand.address);
+ if (!base) {
+ return std::nullopt;
+ }
+ if (False(image.flags & ImageFlagBits::GpuModified)) {
+ // No need to waste time on an image that's synced with guest
+ return std::nullopt;
+ }
+ PrepareImage(dst_id, mark_as_modified, false);
+ auto& new_image = slot_images[dst_id];
+ lru_cache.Touch(new_image.lru_index, frame_tick);
+ return std::make_pair(&new_image, std::make_pair(base->level, base->layer));
+}
+
+template <class P>
SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) {
if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) {
return NULL_SAMPLER_ID;