// SPDX-FileCopyrightText: 2021 yuzu Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later #pragma once #include #include #include #include #include #include #include #include #include #include "common/common_types.h" #include "common/hash.h" #include "common/literals.h" #include "common/lru_cache.h" #include "common/polyfill_ranges.h" #include "common/scratch_buffer.h" #include "common/thread_worker.h" #include "video_core/compatible_formats.h" #include "video_core/control/channel_state_cache.h" #include "video_core/delayed_destruction_ring.h" #include "video_core/engines/fermi_2d.h" #include "video_core/surface.h" #include "video_core/texture_cache/descriptor_table.h" #include "video_core/texture_cache/image_base.h" #include "video_core/texture_cache/image_info.h" #include "video_core/texture_cache/image_view_base.h" #include "video_core/texture_cache/render_targets.h" #include "video_core/texture_cache/slot_vector.h" #include "video_core/texture_cache/types.h" #include "video_core/textures/texture.h" namespace Tegra::Control { struct ChannelState; } namespace VideoCommon { using Tegra::Texture::SwizzleSource; using Tegra::Texture::TICEntry; using Tegra::Texture::TSCEntry; using VideoCore::Surface::GetFormatType; using VideoCore::Surface::IsCopyCompatible; using VideoCore::Surface::PixelFormat; using VideoCore::Surface::PixelFormatFromDepthFormat; using VideoCore::Surface::PixelFormatFromRenderTargetFormat; using namespace Common::Literals; struct ImageViewInOut { u32 index{}; bool blacklist{}; ImageViewId id{}; }; struct AsyncDecodeContext { ImageId image_id; Common::ScratchBuffer decoded_data; std::vector copies; std::mutex mutex; std::atomic_bool complete; }; using TextureCacheGPUMap = std::unordered_map, Common::IdentityHash>; class TextureCacheChannelInfo : public ChannelInfo { public: TextureCacheChannelInfo() = delete; TextureCacheChannelInfo(Tegra::Control::ChannelState& state) noexcept; TextureCacheChannelInfo(const TextureCacheChannelInfo& state) = delete; TextureCacheChannelInfo& operator=(const TextureCacheChannelInfo&) = delete; DescriptorTable graphics_image_table{gpu_memory}; DescriptorTable graphics_sampler_table{gpu_memory}; std::vector graphics_sampler_ids; std::vector graphics_image_view_ids; DescriptorTable compute_image_table{gpu_memory}; DescriptorTable compute_sampler_table{gpu_memory}; std::vector compute_sampler_ids; std::vector compute_image_view_ids; std::unordered_map image_views; std::unordered_map samplers; TextureCacheGPUMap* gpu_page_table; }; template class TextureCache : public VideoCommon::ChannelSetupCaches { /// Address shift for caching images into a hash table static constexpr u64 YUZU_PAGEBITS = 20; /// Enables debugging features to the texture cache static constexpr bool ENABLE_VALIDATION = P::ENABLE_VALIDATION; /// Implement blits as copies between framebuffers static constexpr bool FRAMEBUFFER_BLITS = P::FRAMEBUFFER_BLITS; /// True when some copies have to be emulated static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES; /// True when the API can provide info about the memory of the device. static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO; /// True when the API can do asynchronous texture downloads. static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = P::IMPLEMENTS_ASYNC_DOWNLOADS; static constexpr size_t UNSET_CHANNEL{std::numeric_limits::max()}; static constexpr s64 TARGET_THRESHOLD = 4_GiB; static constexpr s64 DEFAULT_EXPECTED_MEMORY = 1_GiB + 125_MiB; static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB + 625_MiB; static constexpr size_t GC_EMERGENCY_COUNTS = 2; using Runtime = typename P::Runtime; using Image = typename P::Image; using ImageAlloc = typename P::ImageAlloc; using ImageView = typename P::ImageView; using Sampler = typename P::Sampler; using Framebuffer = typename P::Framebuffer; using AsyncBuffer = typename P::AsyncBuffer; struct BlitImages { ImageId dst_id; ImageId src_id; PixelFormat dst_format; PixelFormat src_format; }; public: explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&); /// Notify the cache that a new frame has been queued void TickFrame(); /// Return a constant reference to the given image view id [[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept; /// Return a reference to the given image view id [[nodiscard]] ImageView& GetImageView(ImageViewId id) noexcept; /// Get the imageview from the graphics descriptor table in the specified index [[nodiscard]] ImageView& GetImageView(u32 index) noexcept; /// Mark an image as modified from the GPU void MarkModification(ImageId id) noexcept; /// Fill image_view_ids with the graphics images in indices template void FillGraphicsImageViews(std::span views); /// Fill image_view_ids with the compute images in indices void FillComputeImageViews(std::span views); /// Get the sampler from the graphics descriptor table in the specified index Sampler* GetGraphicsSampler(u32 index); /// Get the sampler from the compute descriptor table in the specified index Sampler* GetComputeSampler(u32 index); /// Refresh the state for graphics image view and sampler descriptors void SynchronizeGraphicsDescriptors(); /// Refresh the state for compute image view and sampler descriptors void SynchronizeComputeDescriptors(); /// Updates the Render Targets if they can be rescaled /// @param is_clear True when the render targets are being used for clears /// @retval True if the Render Targets have been rescaled. bool RescaleRenderTargets(bool is_clear); /// Update bound render targets and upload memory if necessary /// @param is_clear True when the render targets are being used for clears void UpdateRenderTargets(bool is_clear); /// Find a framebuffer with the currently bound render targets /// UpdateRenderTargets should be called before this Framebuffer* GetFramebuffer(); /// Mark images in a range as modified from the CPU void WriteMemory(VAddr cpu_addr, size_t size); /// Download contents of host images to guest memory in a region void DownloadMemory(VAddr cpu_addr, size_t size); /// Remove images in a region void UnmapMemory(VAddr cpu_addr, size_t size); /// Remove images in a region void UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size); /// Blit an image with the given parameters bool BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src, const Tegra::Engines::Fermi2D::Config& copy); /// Try to find a cached image view in the given CPU address [[nodiscard]] ImageView* TryFindFramebufferImageView(VAddr cpu_addr); /// Return true when there are uncommitted images to be downloaded [[nodiscard]] bool HasUncommittedFlushes() const noexcept; /// Return true when the caller should wait for async downloads [[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept; /// Commit asynchronous downloads void CommitAsyncFlushes(); /// Pop asynchronous downloads void PopAsyncFlushes(); [[nodiscard]] std::optional>> ObtainImage( const Tegra::DMA::ImageOperand& operand, bool mark_as_modified); /// Return true when a CPU region is modified from the GPU [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); [[nodiscard]] bool IsRescaling() const noexcept; [[nodiscard]] bool IsRescaling(const ImageViewBase& image_view) const noexcept; /// Create channel state. void CreateChannel(Tegra::Control::ChannelState& channel) final override; std::recursive_mutex mutex; private: /// Iterate over all page indices in a range template static void ForEachCPUPage(VAddr addr, size_t size, Func&& func) { static constexpr bool RETURNS_BOOL = std::is_same_v, bool>; const u64 page_end = (addr + size - 1) >> YUZU_PAGEBITS; for (u64 page = addr >> YUZU_PAGEBITS; page <= page_end; ++page) { if constexpr (RETURNS_BOOL) { if (func(page)) { break; } } else { func(page); } } } template static void ForEachGPUPage(GPUVAddr addr, size_t size, Func&& func) { static constexpr bool RETURNS_BOOL = std::is_same_v, bool>; const u64 page_end = (addr + size - 1) >> YUZU_PAGEBITS; for (u64 page = addr >> YUZU_PAGEBITS; page <= page_end; ++page) { if constexpr (RETURNS_BOOL) { if (func(page)) { break; } } else { func(page); } } } void OnGPUASRegister(size_t map_id) final override; /// Runs the Garbage Collector. void RunGarbageCollector(); /// Fills image_view_ids in the image views in indices template void FillImageViews(DescriptorTable& table, std::span cached_image_view_ids, std::span views); /// Find or create an image view in the guest descriptor table ImageViewId VisitImageView(DescriptorTable& table, std::span cached_image_view_ids, u32 index); /// Find or create a framebuffer with the given render target parameters FramebufferId GetFramebufferId(const RenderTargets& key); /// Refresh the contents (pixel data) of an image void RefreshContents(Image& image, ImageId image_id); /// Upload data from guest to an image template void UploadImageContents(Image& image, StagingBuffer& staging_buffer); /// Find or create an image view from a guest descriptor [[nodiscard]] ImageViewId FindImageView(const TICEntry& config); /// Create a new image view from a guest descriptor [[nodiscard]] ImageViewId CreateImageView(const TICEntry& config); /// Find or create an image from the given parameters [[nodiscard]] ImageId FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr, RelaxedOptions options = RelaxedOptions{}); /// Find an image from the given parameters [[nodiscard]] ImageId FindImage(const ImageInfo& info, GPUVAddr gpu_addr, RelaxedOptions options); /// Create an image from the given parameters [[nodiscard]] ImageId InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, RelaxedOptions options); /// Create a new image and join perfectly matching existing images /// Remove joined images from the cache [[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr); [[nodiscard]] ImageId FindDMAImage(const ImageInfo& info, GPUVAddr gpu_addr); /// Return a blit image pair from the given guest blit parameters [[nodiscard]] std::optional GetBlitImages( const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src, const Tegra::Engines::Fermi2D::Config& copy); /// Find or create a sampler from a guest descriptor sampler [[nodiscard]] SamplerId FindSampler(const TSCEntry& config); /// Find or create an image view for the given color buffer index [[nodiscard]] ImageViewId FindColorBuffer(size_t index, bool is_clear); /// Find or create an image view for the depth buffer [[nodiscard]] ImageViewId FindDepthBuffer(bool is_clear); /// Find or create a view for a render target with the given image parameters [[nodiscard]] ImageViewId FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr, bool is_clear); /// Iterates over all the images in a region calling func template void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func); template void ForEachImageInRegionGPU(size_t as_id, GPUVAddr gpu_addr, size_t size, Func&& func); template void ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func); /// Iterates over all the images in a region calling func template void ForEachSparseSegment(ImageBase& image, Func&& func); /// Find or create an image view in the given image with the passed parameters [[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info); /// Register image in the page table void RegisterImage(ImageId image); /// Unregister image from the page table void UnregisterImage(ImageId image); /// Track CPU reads and writes for image void TrackImage(ImageBase& image, ImageId image_id); /// Stop tracking CPU reads and writes for image void UntrackImage(ImageBase& image, ImageId image_id); /// Delete image from the cache void DeleteImage(ImageId image, bool immediate_delete = false); /// Remove image views references from the cache void RemoveImageViewReferences(std::span removed_views); /// Remove framebuffers using the given image views from the cache void RemoveFramebuffers(std::span removed_views); /// Mark an image as modified from the GPU void MarkModification(ImageBase& image) noexcept; /// Synchronize image aliases, copying data if needed void SynchronizeAliases(ImageId image_id); /// Prepare an image to be used void PrepareImage(ImageId image_id, bool is_modification, bool invalidate); /// Prepare an image view to be used void PrepareImageView(ImageViewId image_view_id, bool is_modification, bool invalidate); /// Execute copies from one image to the other, even if they are incompatible void CopyImage(ImageId dst_id, ImageId src_id, std::vector copies); /// Bind an image view as render target, downloading resources preemtively if needed void BindRenderTarget(ImageViewId* old_id, ImageViewId new_id); /// Create a render target from a given image and image view parameters [[nodiscard]] std::pair RenderTargetFromImage( ImageId, const ImageViewInfo& view_info); /// Returns true if the current clear parameters clear the whole image of a given image view [[nodiscard]] bool IsFullClear(ImageViewId id); bool ImageCanRescale(ImageBase& image); void InvalidateScale(Image& image); bool ScaleUp(Image& image); bool ScaleDown(Image& image); u64 GetScaledImageSizeBytes(const ImageBase& image); void QueueAsyncDecode(Image& image, ImageId image_id); void TickAsyncDecode(); Runtime& runtime; VideoCore::RasterizerInterface& rasterizer; std::deque gpu_page_table_storage; RenderTargets render_targets; std::unordered_map framebuffers; std::unordered_map, Common::IdentityHash> page_table; std::unordered_map, Common::IdentityHash> sparse_page_table; std::unordered_map> sparse_views; VAddr virtual_invalid_space{}; bool has_deleted_images = false; bool is_rescaling = false; u64 total_used_memory = 0; u64 minimum_memory; u64 expected_memory; u64 critical_memory; size_t critical_gc; SlotVector slot_images; SlotVector slot_map_views; SlotVector slot_image_views; SlotVector slot_image_allocs; SlotVector slot_samplers; SlotVector slot_framebuffers; // TODO: This data structure is not optimal and it should be reworked std::vector uncommitted_downloads; std::deque> committed_downloads; std::deque> async_buffers; struct LRUItemParams { using ObjectType = ImageId; using TickType = u64; }; Common::LeastRecentlyUsedCache lru_cache; static constexpr size_t TICKS_TO_DESTROY = 6; DelayedDestructionRing sentenced_images; DelayedDestructionRing sentenced_image_view; DelayedDestructionRing sentenced_framebuffers; std::unordered_map image_allocs_table; Common::ScratchBuffer swizzle_data_buffer; Common::ScratchBuffer unswizzle_data_buffer; u64 modification_tick = 0; u64 frame_tick = 0; Common::ThreadWorker texture_decode_worker{1, "TextureDecoder"}; std::vector> async_decodes; }; } // namespace VideoCommon