diff options
Diffstat (limited to 'src/video_core/texture_cache')
-rw-r--r-- | src/video_core/texture_cache/format_lookup_table.cpp | 6 | ||||
-rw-r--r-- | src/video_core/texture_cache/formatter.cpp | 22 | ||||
-rw-r--r-- | src/video_core/texture_cache/formatter.h | 8 | ||||
-rw-r--r-- | src/video_core/texture_cache/image_info.cpp | 12 | ||||
-rw-r--r-- | src/video_core/texture_cache/image_info.h | 2 | ||||
-rw-r--r-- | src/video_core/texture_cache/image_view_base.cpp | 12 | ||||
-rw-r--r-- | src/video_core/texture_cache/image_view_base.h | 7 | ||||
-rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 189 | ||||
-rw-r--r-- | src/video_core/texture_cache/texture_cache_base.h | 37 | ||||
-rw-r--r-- | src/video_core/texture_cache/util.cpp | 10 |
10 files changed, 229 insertions, 76 deletions
diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp index 5fc2b2fec..11ced6c38 100644 --- a/src/video_core/texture_cache/format_lookup_table.cpp +++ b/src/video_core/texture_cache/format_lookup_table.cpp @@ -210,6 +210,8 @@ PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red, return PixelFormat::ASTC_2D_6X6_SRGB; case Hash(TextureFormat::ASTC_2D_10X6, UNORM, LINEAR): return PixelFormat::ASTC_2D_10X6_UNORM; + case Hash(TextureFormat::ASTC_2D_10X6, UNORM, SRGB): + return PixelFormat::ASTC_2D_10X6_SRGB; case Hash(TextureFormat::ASTC_2D_10X5, UNORM, LINEAR): return PixelFormat::ASTC_2D_10X5_UNORM; case Hash(TextureFormat::ASTC_2D_10X5, UNORM, SRGB): @@ -218,6 +220,10 @@ PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red, return PixelFormat::ASTC_2D_10X10_UNORM; case Hash(TextureFormat::ASTC_2D_10X10, UNORM, SRGB): return PixelFormat::ASTC_2D_10X10_SRGB; + case Hash(TextureFormat::ASTC_2D_12X10, UNORM, LINEAR): + return PixelFormat::ASTC_2D_12X10_UNORM; + case Hash(TextureFormat::ASTC_2D_12X10, UNORM, SRGB): + return PixelFormat::ASTC_2D_12X10_SRGB; case Hash(TextureFormat::ASTC_2D_12X12, UNORM, LINEAR): return PixelFormat::ASTC_2D_12X12_UNORM; case Hash(TextureFormat::ASTC_2D_12X12, UNORM, SRGB): diff --git a/src/video_core/texture_cache/formatter.cpp b/src/video_core/texture_cache/formatter.cpp index 30f72361d..6279d8e9e 100644 --- a/src/video_core/texture_cache/formatter.cpp +++ b/src/video_core/texture_cache/formatter.cpp @@ -46,7 +46,7 @@ std::string Name(const ImageBase& image) { return "Invalid"; } -std::string Name(const ImageViewBase& image_view) { +std::string Name(const ImageViewBase& image_view, GPUVAddr addr) { const u32 width = image_view.size.width; const u32 height = image_view.size.height; const u32 depth = image_view.size.depth; @@ -56,23 +56,25 @@ std::string Name(const ImageViewBase& image_view) { const std::string level = num_levels > 1 ? fmt::format(":{}", num_levels) : ""; switch (image_view.type) { case ImageViewType::e1D: - return fmt::format("ImageView 1D {}{}", width, level); + return fmt::format("ImageView 1D 0x{:X} {}{}", addr, width, level); case ImageViewType::e2D: - return fmt::format("ImageView 2D {}x{}{}", width, height, level); + return fmt::format("ImageView 2D 0x{:X} {}x{}{}", addr, width, height, level); case ImageViewType::Cube: - return fmt::format("ImageView Cube {}x{}{}", width, height, level); + return fmt::format("ImageView Cube 0x{:X} {}x{}{}", addr, width, height, level); case ImageViewType::e3D: - return fmt::format("ImageView 3D {}x{}x{}{}", width, height, depth, level); + return fmt::format("ImageView 3D 0x{:X} {}x{}x{}{}", addr, width, height, depth, level); case ImageViewType::e1DArray: - return fmt::format("ImageView 1DArray {}{}|{}", width, level, num_layers); + return fmt::format("ImageView 1DArray 0x{:X} {}{}|{}", addr, width, level, num_layers); case ImageViewType::e2DArray: - return fmt::format("ImageView 2DArray {}x{}{}|{}", width, height, level, num_layers); + return fmt::format("ImageView 2DArray 0x{:X} {}x{}{}|{}", addr, width, height, level, + num_layers); case ImageViewType::CubeArray: - return fmt::format("ImageView CubeArray {}x{}{}|{}", width, height, level, num_layers); + return fmt::format("ImageView CubeArray 0x{:X} {}x{}{}|{}", addr, width, height, level, + num_layers); case ImageViewType::Rect: - return fmt::format("ImageView Rect {}x{}{}", width, height, level); + return fmt::format("ImageView Rect 0x{:X} {}x{}{}", addr, width, height, level); case ImageViewType::Buffer: - return fmt::format("BufferView {}", width); + return fmt::format("BufferView 0x{:X} {}", addr, width); } return "Invalid"; } diff --git a/src/video_core/texture_cache/formatter.h b/src/video_core/texture_cache/formatter.h index f1f0a057b..9ee57a076 100644 --- a/src/video_core/texture_cache/formatter.h +++ b/src/video_core/texture_cache/formatter.h @@ -179,6 +179,8 @@ struct fmt::formatter<VideoCore::Surface::PixelFormat> : fmt::formatter<fmt::str return "ASTC_2D_6X6_SRGB"; case PixelFormat::ASTC_2D_10X6_UNORM: return "ASTC_2D_10X6_UNORM"; + case PixelFormat::ASTC_2D_10X6_SRGB: + return "ASTC_2D_10X6_SRGB"; case PixelFormat::ASTC_2D_10X5_UNORM: return "ASTC_2D_10X5_UNORM"; case PixelFormat::ASTC_2D_10X5_SRGB: @@ -187,6 +189,10 @@ struct fmt::formatter<VideoCore::Surface::PixelFormat> : fmt::formatter<fmt::str return "ASTC_2D_10X10_UNORM"; case PixelFormat::ASTC_2D_10X10_SRGB: return "ASTC_2D_10X10_SRGB"; + case PixelFormat::ASTC_2D_12X10_UNORM: + return "ASTC_2D_12X10_UNORM"; + case PixelFormat::ASTC_2D_12X10_SRGB: + return "ASTC_2D_12X10_SRGB"; case PixelFormat::ASTC_2D_12X12_UNORM: return "ASTC_2D_12X12_UNORM"; case PixelFormat::ASTC_2D_12X12_SRGB: @@ -268,7 +274,7 @@ struct RenderTargets; [[nodiscard]] std::string Name(const ImageBase& image); -[[nodiscard]] std::string Name(const ImageViewBase& image_view); +[[nodiscard]] std::string Name(const ImageViewBase& image_view, GPUVAddr addr); [[nodiscard]] std::string Name(const RenderTargets& render_targets); diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp index 11f3f78a1..e8ddde691 100644 --- a/src/video_core/texture_cache/image_info.cpp +++ b/src/video_core/texture_cache/image_info.cpp @@ -4,6 +4,7 @@ #include <fmt/format.h> #include "common/assert.h" +#include "common/settings.h" #include "video_core/surface.h" #include "video_core/texture_cache/format_lookup_table.h" #include "video_core/texture_cache/image_info.h" @@ -22,6 +23,8 @@ using VideoCore::Surface::PixelFormat; using VideoCore::Surface::SurfaceType; ImageInfo::ImageInfo(const TICEntry& config) noexcept { + forced_flushed = config.IsPitchLinear() && !Settings::values.use_reactive_flushing.GetValue(); + dma_downloaded = forced_flushed; format = PixelFormatFromTextureInfo(config.format, config.r_type, config.g_type, config.b_type, config.a_type, config.srgb_conversion); num_samples = NumSamples(config.msaa_mode); @@ -117,6 +120,9 @@ ImageInfo::ImageInfo(const TICEntry& config) noexcept { ImageInfo::ImageInfo(const Maxwell3D::Regs::RenderTargetConfig& ct, Tegra::Texture::MsaaMode msaa_mode) noexcept { + forced_flushed = + ct.tile_mode.is_pitch_linear && !Settings::values.use_reactive_flushing.GetValue(); + dma_downloaded = forced_flushed; format = VideoCore::Surface::PixelFormatFromRenderTargetFormat(ct.format); rescaleable = false; if (ct.tile_mode.is_pitch_linear) { @@ -155,6 +161,9 @@ ImageInfo::ImageInfo(const Maxwell3D::Regs::RenderTargetConfig& ct, ImageInfo::ImageInfo(const Maxwell3D::Regs::Zeta& zt, const Maxwell3D::Regs::ZetaSize& zt_size, Tegra::Texture::MsaaMode msaa_mode) noexcept { + forced_flushed = + zt.tile_mode.is_pitch_linear && !Settings::values.use_reactive_flushing.GetValue(); + dma_downloaded = forced_flushed; format = VideoCore::Surface::PixelFormatFromDepthFormat(zt.format); size.width = zt_size.width; size.height = zt_size.height; @@ -195,6 +204,9 @@ ImageInfo::ImageInfo(const Maxwell3D::Regs::Zeta& zt, const Maxwell3D::Regs::Zet ImageInfo::ImageInfo(const Fermi2D::Surface& config) noexcept { UNIMPLEMENTED_IF_MSG(config.layer != 0, "Surface layer is not zero"); + forced_flushed = config.linear == Fermi2D::MemoryLayout::Pitch && + !Settings::values.use_reactive_flushing.GetValue(); + dma_downloaded = forced_flushed; format = VideoCore::Surface::PixelFormatFromRenderTargetFormat(config.format); rescaleable = false; if (config.linear == Fermi2D::MemoryLayout::Pitch) { diff --git a/src/video_core/texture_cache/image_info.h b/src/video_core/texture_cache/image_info.h index 4b7dfa315..8a4cb0cbd 100644 --- a/src/video_core/texture_cache/image_info.h +++ b/src/video_core/texture_cache/image_info.h @@ -39,6 +39,8 @@ struct ImageInfo { u32 tile_width_spacing = 0; bool rescaleable = false; bool downscaleable = false; + bool forced_flushed = false; + bool dma_downloaded = false; }; } // namespace VideoCommon diff --git a/src/video_core/texture_cache/image_view_base.cpp b/src/video_core/texture_cache/image_view_base.cpp index 04fb84bfa..d134b6738 100644 --- a/src/video_core/texture_cache/image_view_base.cpp +++ b/src/video_core/texture_cache/image_view_base.cpp @@ -4,7 +4,6 @@ #include <algorithm> #include "common/assert.h" -#include "common/settings.h" #include "video_core/compatible_formats.h" #include "video_core/surface.h" #include "video_core/texture_cache/formatter.h" @@ -16,8 +15,8 @@ namespace VideoCommon { ImageViewBase::ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info, - ImageId image_id_) - : image_id{image_id_}, format{info.format}, type{info.type}, range{info.range}, + ImageId image_id_, GPUVAddr addr) + : image_id{image_id_}, gpu_addr{addr}, format{info.format}, type{info.type}, range{info.range}, size{ .width = std::max(image_info.size.width >> range.base.level, 1u), .height = std::max(image_info.size.height >> range.base.level, 1u), @@ -26,8 +25,7 @@ ImageViewBase::ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_i ASSERT_MSG(VideoCore::Surface::IsViewCompatible(image_info.format, info.format, false, true), "Image view format {} is incompatible with image format {}", info.format, image_info.format); - const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue(); - if (image_info.type == ImageType::Linear && is_async) { + if (image_info.forced_flushed) { flags |= ImageViewFlagBits::PreemtiveDownload; } if (image_info.type == ImageType::e3D && info.type != ImageViewType::e3D) { @@ -35,8 +33,8 @@ ImageViewBase::ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_i } } -ImageViewBase::ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_info) - : image_id{NULL_IMAGE_ID}, format{info.format}, type{ImageViewType::Buffer}, +ImageViewBase::ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_info, GPUVAddr addr) + : image_id{NULL_IMAGE_ID}, gpu_addr{addr}, format{info.format}, type{ImageViewType::Buffer}, size{ .width = info.size.width, .height = 1, diff --git a/src/video_core/texture_cache/image_view_base.h b/src/video_core/texture_cache/image_view_base.h index 69c9776e7..a25ae1d4a 100644 --- a/src/video_core/texture_cache/image_view_base.h +++ b/src/video_core/texture_cache/image_view_base.h @@ -24,9 +24,9 @@ enum class ImageViewFlagBits : u16 { DECLARE_ENUM_FLAG_OPERATORS(ImageViewFlagBits) struct ImageViewBase { - explicit ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info, - ImageId image_id); - explicit ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_info); + explicit ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info, ImageId image_id, + GPUVAddr addr); + explicit ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_info, GPUVAddr addr); explicit ImageViewBase(const NullImageViewParams&); [[nodiscard]] bool IsBuffer() const noexcept { @@ -34,6 +34,7 @@ struct ImageViewBase { } ImageId image_id{}; + GPUVAddr gpu_addr = 0; PixelFormat format{}; ImageViewType type{}; SubresourceRange range; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index ed5c768d8..b24086fce 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -1,9 +1,10 @@ -// SPDX-FileCopyrightText: 2021 yuzu Emulator Project +// SPDX-FileCopyrightText: 2023 yuzu Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later #pragma once #include <unordered_set> +#include <boost/container/small_vector.hpp> #include "common/alignment.h" #include "common/settings.h" @@ -17,15 +18,10 @@ namespace VideoCommon { -using Tegra::Texture::SwizzleSource; -using Tegra::Texture::TextureType; using Tegra::Texture::TICEntry; using Tegra::Texture::TSCEntry; using VideoCore::Surface::GetFormatType; -using VideoCore::Surface::IsCopyCompatible; using VideoCore::Surface::PixelFormat; -using VideoCore::Surface::PixelFormatFromDepthFormat; -using VideoCore::Surface::PixelFormatFromRenderTargetFormat; using VideoCore::Surface::SurfaceType; using namespace Common::Literals; @@ -143,6 +139,13 @@ void TextureCache<P>::TickFrame() { runtime.TickFrame(); critical_gc = 0; ++frame_tick; + + if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { + for (auto& buffer : async_buffers_death_ring) { + runtime.FreeDeferredStagingBuffer(buffer); + } + async_buffers_death_ring.clear(); + } } template <class P> @@ -488,6 +491,32 @@ void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) { } template <class P> +std::optional<VideoCore::RasterizerDownloadArea> TextureCache<P>::GetFlushArea(VAddr cpu_addr, + u64 size) { + std::optional<VideoCore::RasterizerDownloadArea> area{}; + ForEachImageInRegion(cpu_addr, size, [&](ImageId, ImageBase& image) { + if (False(image.flags & ImageFlagBits::GpuModified)) { + return; + } + if (!area) { + area.emplace(); + area->start_address = cpu_addr; + area->end_address = cpu_addr + size; + area->preemtive = true; + } + area->start_address = std::min(area->start_address, image.cpu_addr); + area->end_address = std::max(area->end_address, image.cpu_addr_end); + for (auto image_view_id : image.image_view_ids) { + auto& image_view = slot_image_views[image_view_id]; + image_view.flags |= ImageViewFlagBits::PreemtiveDownload; + } + area->preemtive &= image.info.forced_flushed; + image.info.forced_flushed = true; + }); + return area; +} + +template <class P> void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) { std::vector<ImageId> deleted_images; ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); }); @@ -661,25 +690,41 @@ template <class P> void TextureCache<P>::CommitAsyncFlushes() { // This is intentionally passing the value by copy if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { - const std::span<const ImageId> download_ids = uncommitted_downloads; + auto& download_ids = uncommitted_downloads; if (download_ids.empty()) { committed_downloads.emplace_back(std::move(uncommitted_downloads)); uncommitted_downloads.clear(); - async_buffers.emplace_back(std::optional<AsyncBuffer>{}); + async_buffers.emplace_back(std::move(uncommitted_async_buffers)); + uncommitted_async_buffers.clear(); return; } size_t total_size_bytes = 0; - for (const ImageId image_id : download_ids) { - total_size_bytes += slot_images[image_id].unswizzled_size_bytes; + size_t last_async_buffer_id = uncommitted_async_buffers.size(); + bool any_none_dma = false; + for (PendingDownload& download_info : download_ids) { + if (download_info.is_swizzle) { + total_size_bytes += + Common::AlignUp(slot_images[download_info.object_id].unswizzled_size_bytes, 64); + any_none_dma = true; + download_info.async_buffer_id = last_async_buffer_id; + } } - auto download_map = runtime.DownloadStagingBuffer(total_size_bytes, true); - for (const ImageId image_id : download_ids) { - Image& image = slot_images[image_id]; - const auto copies = FullDownloadCopies(image.info); - image.DownloadMemory(download_map, copies); - download_map.offset += Common::AlignUp(image.unswizzled_size_bytes, 64); + + if (any_none_dma) { + auto download_map = runtime.DownloadStagingBuffer(total_size_bytes, true); + for (const PendingDownload& download_info : download_ids) { + if (download_info.is_swizzle) { + Image& image = slot_images[download_info.object_id]; + const auto copies = FullDownloadCopies(image.info); + image.DownloadMemory(download_map, copies); + download_map.offset += Common::AlignUp(image.unswizzled_size_bytes, 64); + } + } + uncommitted_async_buffers.emplace_back(download_map); } - async_buffers.emplace_back(download_map); + + async_buffers.emplace_back(std::move(uncommitted_async_buffers)); + uncommitted_async_buffers.clear(); } committed_downloads.emplace_back(std::move(uncommitted_downloads)); uncommitted_downloads.clear(); @@ -691,39 +736,57 @@ void TextureCache<P>::PopAsyncFlushes() { return; } if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { - const std::span<const ImageId> download_ids = committed_downloads.front(); + const auto& download_ids = committed_downloads.front(); if (download_ids.empty()) { committed_downloads.pop_front(); async_buffers.pop_front(); return; } - auto download_map = *async_buffers.front(); - std::span<u8> download_span = download_map.mapped_span; + auto download_map = std::move(async_buffers.front()); for (size_t i = download_ids.size(); i > 0; i--) { - const ImageBase& image = slot_images[download_ids[i - 1]]; - const auto copies = FullDownloadCopies(image.info); - download_map.offset -= Common::AlignUp(image.unswizzled_size_bytes, 64); - std::span<u8> download_span_alt = download_span.subspan(download_map.offset); - SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span_alt, - swizzle_data_buffer); + auto& download_info = download_ids[i - 1]; + auto& download_buffer = download_map[download_info.async_buffer_id]; + if (download_info.is_swizzle) { + const ImageBase& image = slot_images[download_info.object_id]; + const auto copies = FullDownloadCopies(image.info); + download_buffer.offset -= Common::AlignUp(image.unswizzled_size_bytes, 64); + std::span<u8> download_span = + download_buffer.mapped_span.subspan(download_buffer.offset); + SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span, + swizzle_data_buffer); + } else { + const BufferDownload& buffer_info = slot_buffer_downloads[download_info.object_id]; + std::span<u8> download_span = + download_buffer.mapped_span.subspan(download_buffer.offset); + gpu_memory->WriteBlockUnsafe(buffer_info.address, download_span.data(), + buffer_info.size); + slot_buffer_downloads.erase(download_info.object_id); + } + } + for (auto& download_buffer : download_map) { + async_buffers_death_ring.emplace_back(download_buffer); } - runtime.FreeDeferredStagingBuffer(download_map); committed_downloads.pop_front(); async_buffers.pop_front(); } else { - const std::span<const ImageId> download_ids = committed_downloads.front(); + const auto& download_ids = committed_downloads.front(); if (download_ids.empty()) { committed_downloads.pop_front(); return; } size_t total_size_bytes = 0; - for (const ImageId image_id : download_ids) { - total_size_bytes += slot_images[image_id].unswizzled_size_bytes; + for (const PendingDownload& download_info : download_ids) { + if (download_info.is_swizzle) { + total_size_bytes += slot_images[download_info.object_id].unswizzled_size_bytes; + } } auto download_map = runtime.DownloadStagingBuffer(total_size_bytes); const size_t original_offset = download_map.offset; - for (const ImageId image_id : download_ids) { - Image& image = slot_images[image_id]; + for (const PendingDownload& download_info : download_ids) { + if (!download_info.is_swizzle) { + continue; + } + Image& image = slot_images[download_info.object_id]; const auto copies = FullDownloadCopies(image.info); image.DownloadMemory(download_map, copies); download_map.offset += image.unswizzled_size_bytes; @@ -732,8 +795,11 @@ void TextureCache<P>::PopAsyncFlushes() { runtime.Finish(); download_map.offset = original_offset; std::span<u8> download_span = download_map.mapped_span; - for (const ImageId image_id : download_ids) { - const ImageBase& image = slot_images[image_id]; + for (const PendingDownload& download_info : download_ids) { + if (!download_info.is_swizzle) { + continue; + } + const ImageBase& image = slot_images[download_info.object_id]; const auto copies = FullDownloadCopies(image.info); SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span, swizzle_data_buffer); @@ -745,17 +811,22 @@ void TextureCache<P>::PopAsyncFlushes() { } template <class P> -ImageId TextureCache<P>::DmaImageId(const Tegra::DMA::ImageOperand& operand) { +ImageId TextureCache<P>::DmaImageId(const Tegra::DMA::ImageOperand& operand, bool is_upload) { const ImageInfo dst_info(operand); const ImageId dst_id = FindDMAImage(dst_info, operand.address); if (!dst_id) { return NULL_IMAGE_ID; } - const auto& image = slot_images[dst_id]; + auto& image = slot_images[dst_id]; if (False(image.flags & ImageFlagBits::GpuModified)) { // No need to waste time on an image that's synced with guest return NULL_IMAGE_ID; } + if (!is_upload && !image.info.dma_downloaded) { + // Force a full sync. + image.info.dma_downloaded = true; + return NULL_IMAGE_ID; + } const auto base = image.TryFindBase(operand.address); if (!base) { return NULL_IMAGE_ID; @@ -834,6 +905,33 @@ std::pair<typename TextureCache<P>::Image*, BufferImageCopy> TextureCache<P>::Dm } template <class P> +void TextureCache<P>::DownloadImageIntoBuffer(typename TextureCache<P>::Image* image, + typename TextureCache<P>::BufferType buffer, + size_t buffer_offset, + std::span<const VideoCommon::BufferImageCopy> copies, + GPUVAddr address, size_t size) { + if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { + const BufferDownload new_buffer_download{address, size}; + auto slot = slot_buffer_downloads.insert(new_buffer_download); + const PendingDownload new_download{false, uncommitted_async_buffers.size(), slot}; + uncommitted_downloads.emplace_back(new_download); + auto download_map = runtime.DownloadStagingBuffer(size, true); + uncommitted_async_buffers.emplace_back(download_map); + std::array buffers{ + buffer, + download_map.buffer, + }; + std::array<u64, 2> buffer_offsets{ + buffer_offset, + download_map.offset, + }; + image->DownloadMemory(buffers, buffer_offsets, copies); + } else { + image->DownloadMemory(buffer, buffer_offset, copies); + } +} + +template <class P> void TextureCache<P>::RefreshContents(Image& image, ImageId image_id) { if (False(image.flags & ImageFlagBits::CpuModified)) { // Only upload modified images @@ -1225,7 +1323,6 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA all_siblings.push_back(overlap_id); } else { bad_overlap_ids.push_back(overlap_id); - overlap.flags |= ImageFlagBits::BadOverlap; } }; ForEachImageInRegion(cpu_addr, size_bytes, region_check); @@ -1294,6 +1391,12 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA ScaleDown(new_image); } + std::ranges::sort(overlap_ids, [this](const ImageId lhs, const ImageId rhs) { + const ImageBase& lhs_image = slot_images[lhs]; + const ImageBase& rhs_image = slot_images[rhs]; + return lhs_image.modification_tick < rhs_image.modification_tick; + }); + for (const ImageId overlap_id : overlap_ids) { Image& overlap = slot_images[overlap_id]; if (True(overlap.flags & ImageFlagBits::GpuModified)) { @@ -1330,7 +1433,12 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA ImageBase& aliased = slot_images[aliased_id]; aliased.overlapping_images.push_back(new_image_id); new_image.overlapping_images.push_back(aliased_id); - new_image.flags |= ImageFlagBits::BadOverlap; + if (aliased.info.resources.levels == 1 && aliased.overlapping_images.size() > 1) { + aliased.flags |= ImageFlagBits::BadOverlap; + } + if (new_image.info.resources.levels == 1 && new_image.overlapping_images.size() > 1) { + new_image.flags |= ImageFlagBits::BadOverlap; + } } RegisterImage(new_image_id); return new_image_id; @@ -1361,7 +1469,7 @@ std::optional<typename TextureCache<P>::BlitImages> TextureCache<P>::GetBlitImag if (!copy.must_accelerate) { do { if (!src_id && !dst_id) { - return std::nullopt; + break; } if (src_id && True(slot_images[src_id].flags & ImageFlagBits::GpuModified)) { break; @@ -2209,7 +2317,8 @@ void TextureCache<P>::BindRenderTarget(ImageViewId* old_id, ImageViewId new_id) if (new_id) { const ImageViewBase& old_view = slot_image_views[new_id]; if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) { - uncommitted_downloads.push_back(old_view.image_id); + const PendingDownload new_download{true, 0, old_view.image_id}; + uncommitted_downloads.emplace_back(new_download); } } *old_id = new_id; diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 5a5b4179c..0720494e5 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2021 yuzu Emulator Project +// SPDX-FileCopyrightText: 2023 yuzu Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later #pragma once @@ -40,14 +40,9 @@ struct ChannelState; namespace VideoCommon { -using Tegra::Texture::SwizzleSource; using Tegra::Texture::TICEntry; using Tegra::Texture::TSCEntry; -using VideoCore::Surface::GetFormatType; -using VideoCore::Surface::IsCopyCompatible; using VideoCore::Surface::PixelFormat; -using VideoCore::Surface::PixelFormatFromDepthFormat; -using VideoCore::Surface::PixelFormatFromRenderTargetFormat; using namespace Common::Literals; struct ImageViewInOut { @@ -119,6 +114,7 @@ class TextureCache : public VideoCommon::ChannelSetupCaches<TextureCacheChannelI using Sampler = typename P::Sampler; using Framebuffer = typename P::Framebuffer; using AsyncBuffer = typename P::AsyncBuffer; + using BufferType = typename P::BufferType; struct BlitImages { ImageId dst_id; @@ -183,6 +179,8 @@ public: /// Download contents of host images to guest memory in a region void DownloadMemory(VAddr cpu_addr, size_t size); + std::optional<VideoCore::RasterizerDownloadArea> GetFlushArea(VAddr cpu_addr, u64 size); + /// Remove images in a region void UnmapMemory(VAddr cpu_addr, size_t size); @@ -209,12 +207,16 @@ public: /// Pop asynchronous downloads void PopAsyncFlushes(); - [[nodiscard]] ImageId DmaImageId(const Tegra::DMA::ImageOperand& operand); + [[nodiscard]] ImageId DmaImageId(const Tegra::DMA::ImageOperand& operand, bool is_upload); [[nodiscard]] std::pair<Image*, BufferImageCopy> DmaBufferImageCopy( const Tegra::DMA::ImageCopy& copy_info, const Tegra::DMA::BufferOperand& buffer_operand, const Tegra::DMA::ImageOperand& image_operand, ImageId image_id, bool modifies_image); + void DownloadImageIntoBuffer(Image* image, BufferType buffer, size_t buffer_offset, + std::span<const VideoCommon::BufferImageCopy> copies, + GPUVAddr address = 0, size_t size = 0); + /// Return true when a CPU region is modified from the GPU [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); @@ -424,17 +426,32 @@ private: u64 critical_memory; size_t critical_gc; + struct BufferDownload { + GPUVAddr address; + size_t size; + }; + + struct PendingDownload { + bool is_swizzle; + size_t async_buffer_id; + SlotId object_id; + }; + SlotVector<Image> slot_images; SlotVector<ImageMapView> slot_map_views; SlotVector<ImageView> slot_image_views; SlotVector<ImageAlloc> slot_image_allocs; SlotVector<Sampler> slot_samplers; SlotVector<Framebuffer> slot_framebuffers; + SlotVector<BufferDownload> slot_buffer_downloads; // TODO: This data structure is not optimal and it should be reworked - std::vector<ImageId> uncommitted_downloads; - std::deque<std::vector<ImageId>> committed_downloads; - std::deque<std::optional<AsyncBuffer>> async_buffers; + + std::vector<PendingDownload> uncommitted_downloads; + std::deque<std::vector<PendingDownload>> committed_downloads; + std::vector<AsyncBuffer> uncommitted_async_buffers; + std::deque<std::vector<AsyncBuffer>> async_buffers; + std::deque<AsyncBuffer> async_buffers_death_ring; struct LRUItemParams { using ObjectType = ImageId; diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index de37db684..f1071aa23 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -896,11 +896,11 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8 ASSERT(copy.buffer_row_length == Common::AlignUp(mip_size.width, tile_size.width)); ASSERT(copy.buffer_image_height == Common::AlignUp(mip_size.height, tile_size.height)); if (IsPixelFormatASTC(info.format)) { - ASSERT(copy.image_extent.depth == 1); - Tegra::Texture::ASTC::Decompress(input.subspan(copy.buffer_offset), - copy.image_extent.width, copy.image_extent.height, - copy.image_subresource.num_layers, tile_size.width, - tile_size.height, output.subspan(output_offset)); + Tegra::Texture::ASTC::Decompress( + input.subspan(copy.buffer_offset), copy.image_extent.width, + copy.image_extent.height, + copy.image_subresource.num_layers * copy.image_extent.depth, tile_size.width, + tile_size.height, output.subspan(output_offset)); } else { DecompressBC4(input.subspan(copy.buffer_offset), copy.image_extent, output.subspan(output_offset)); |