diff options
Diffstat (limited to 'src/video_core/texture_cache/texture_cache.h')
-rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 139 |
1 files changed, 139 insertions, 0 deletions
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 3e2cbb0b0..335338434 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -85,6 +85,11 @@ void TextureCache<P>::RunGarbageCollector() { } --num_iterations; auto& image = slot_images[image_id]; + if (True(image.flags & ImageFlagBits::IsDecoding)) { + // This image is still being decoded, deleting it will invalidate the slot + // used by the async decoder thread. + return false; + } const bool must_download = image.IsSafeDownload() && False(image.flags & ImageFlagBits::BadOverlap); if (!high_priority_mode && @@ -133,6 +138,8 @@ void TextureCache<P>::TickFrame() { sentenced_images.Tick(); sentenced_framebuffers.Tick(); sentenced_image_view.Tick(); + TickAsyncDecode(); + runtime.TickFrame(); critical_gc = 0; ++frame_tick; @@ -777,6 +784,10 @@ void TextureCache<P>::RefreshContents(Image& image, ImageId image_id) { LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); return; } + if (True(image.flags & ImageFlagBits::AsynchronousDecode)) { + QueueAsyncDecode(image, image_id); + return; + } auto staging = runtime.UploadStagingBuffer(MapSizeBytes(image)); UploadImageContents(image, staging); runtime.InsertUploadMemoryBarrier(); @@ -990,6 +1001,65 @@ u64 TextureCache<P>::GetScaledImageSizeBytes(const ImageBase& image) { } template <class P> +void TextureCache<P>::QueueAsyncDecode(Image& image, ImageId image_id) { + UNIMPLEMENTED_IF(False(image.flags & ImageFlagBits::Converted)); + LOG_INFO(HW_GPU, "Queuing async texture decode"); + + image.flags |= ImageFlagBits::IsDecoding; + auto decode = std::make_unique<AsyncDecodeContext>(); + auto* decode_ptr = decode.get(); + decode->image_id = image_id; + async_decodes.push_back(std::move(decode)); + + Common::ScratchBuffer<u8> local_unswizzle_data_buffer(image.unswizzled_size_bytes); + const size_t guest_size_bytes = image.guest_size_bytes; + swizzle_data_buffer.resize_destructive(guest_size_bytes); + gpu_memory->ReadBlockUnsafe(image.gpu_addr, swizzle_data_buffer.data(), guest_size_bytes); + auto copies = UnswizzleImage(*gpu_memory, image.gpu_addr, image.info, swizzle_data_buffer, + local_unswizzle_data_buffer); + const size_t out_size = MapSizeBytes(image); + + auto func = [out_size, copies, info = image.info, + input = std::move(local_unswizzle_data_buffer), + async_decode = decode_ptr]() mutable { + async_decode->decoded_data.resize_destructive(out_size); + std::span copies_span{copies.data(), copies.size()}; + ConvertImage(input, info, async_decode->decoded_data, copies_span); + + // TODO: Do we need this lock? + std::unique_lock lock{async_decode->mutex}; + async_decode->copies = std::move(copies); + async_decode->complete = true; + }; + texture_decode_worker.QueueWork(std::move(func)); +} + +template <class P> +void TextureCache<P>::TickAsyncDecode() { + bool has_uploads{}; + auto i = async_decodes.begin(); + while (i != async_decodes.end()) { + auto* async_decode = i->get(); + std::unique_lock lock{async_decode->mutex}; + if (!async_decode->complete) { + ++i; + continue; + } + Image& image = slot_images[async_decode->image_id]; + auto staging = runtime.UploadStagingBuffer(MapSizeBytes(image)); + std::memcpy(staging.mapped_span.data(), async_decode->decoded_data.data(), + async_decode->decoded_data.size()); + image.UploadMemory(staging, async_decode->copies); + image.flags &= ~ImageFlagBits::IsDecoding; + has_uploads = true; + i = async_decodes.erase(i); + } + if (has_uploads) { + runtime.InsertUploadMemoryBarrier(); + } +} + +template <class P> bool TextureCache<P>::ScaleUp(Image& image) { const bool has_copy = image.HasScaled(); const bool rescaled = image.ScaleUp(); @@ -1289,6 +1359,75 @@ std::optional<typename TextureCache<P>::BlitImages> TextureCache<P>::GetBlitImag } template <class P> +ImageId TextureCache<P>::FindDMAImage(const ImageInfo& info, GPUVAddr gpu_addr) { + std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); + if (!cpu_addr) { + cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info)); + if (!cpu_addr) { + return ImageId{}; + } + } + ImageId image_id{}; + boost::container::small_vector<ImageId, 1> image_ids; + const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { + if (True(existing_image.flags & ImageFlagBits::Remapped)) { + return false; + } + if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) + [[unlikely]] { + const bool strict_size = True(existing_image.flags & ImageFlagBits::Strong); + const ImageInfo& existing = existing_image.info; + if (existing_image.gpu_addr == gpu_addr && existing.type == info.type && + existing.pitch == info.pitch && + IsPitchLinearSameSize(existing, info, strict_size) && + IsViewCompatible(existing.format, info.format, false, true)) { + image_id = existing_image_id; + image_ids.push_back(existing_image_id); + return true; + } + } else if (IsSubCopy(info, existing_image, gpu_addr)) { + image_id = existing_image_id; + image_ids.push_back(existing_image_id); + return true; + } + return false; + }; + ForEachImageInRegion(*cpu_addr, CalculateGuestSizeInBytes(info), lambda); + if (image_ids.size() <= 1) [[likely]] { + return image_id; + } + auto image_ids_compare = [this](ImageId a, ImageId b) { + auto& image_a = slot_images[a]; + auto& image_b = slot_images[b]; + return image_a.modification_tick < image_b.modification_tick; + }; + return *std::ranges::max_element(image_ids, image_ids_compare); +} + +template <class P> +std::optional<std::pair<typename TextureCache<P>::Image*, std::pair<u32, u32>>> +TextureCache<P>::ObtainImage(const Tegra::DMA::ImageOperand& operand, bool mark_as_modified) { + ImageInfo dst_info(operand); + ImageId dst_id = FindDMAImage(dst_info, operand.address); + if (!dst_id) { + return std::nullopt; + } + auto& image = slot_images[dst_id]; + auto base = image.TryFindBase(operand.address); + if (!base) { + return std::nullopt; + } + if (False(image.flags & ImageFlagBits::GpuModified)) { + // No need to waste time on an image that's synced with guest + return std::nullopt; + } + PrepareImage(dst_id, mark_as_modified, false); + auto& new_image = slot_images[dst_id]; + lru_cache.Touch(new_image.lru_index, frame_tick); + return std::make_pair(&new_image, std::make_pair(base->level, base->layer)); +} + +template <class P> SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) { if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) { return NULL_SAMPLER_ID; |