diff options
Diffstat (limited to 'src/video_core/texture_cache')
-rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 40 | ||||
-rw-r--r-- | src/video_core/texture_cache/types.h | 1 | ||||
-rw-r--r-- | src/video_core/texture_cache/util.cpp | 35 | ||||
-rw-r--r-- | src/video_core/texture_cache/util.h | 3 |
4 files changed, 35 insertions, 44 deletions
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 8190f3ba1..4457b366f 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -8,6 +8,7 @@ #include "common/alignment.h" #include "common/settings.h" +#include "core/memory.h" #include "video_core/control/channel_state.h" #include "video_core/dirty_flags.h" #include "video_core/engines/kepler_compute.h" @@ -598,6 +599,10 @@ void TextureCache<P>::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t siz [&](ImageId id, Image&) { deleted_images.push_back(id); }); for (const ImageId id : deleted_images) { Image& image = slot_images[id]; + if (True(image.flags & ImageFlagBits::CpuModified)) { + continue; + } + image.flags |= ImageFlagBits::CpuModified; if (True(image.flags & ImageFlagBits::Remapped)) { continue; } @@ -865,11 +870,15 @@ void TextureCache<P>::PopAsyncFlushes() { template <class P> ImageId TextureCache<P>::DmaImageId(const Tegra::DMA::ImageOperand& operand, bool is_upload) { const ImageInfo dst_info(operand); - const ImageId image_id = FindDMAImage(dst_info, operand.address); - if (!image_id) { + const ImageId dst_id = FindDMAImage(dst_info, operand.address); + if (!dst_id) { + return NULL_IMAGE_ID; + } + auto& image = slot_images[dst_id]; + if (False(image.flags & ImageFlagBits::GpuModified)) { + // No need to waste time on an image that's synced with guest return NULL_IMAGE_ID; } - auto& image = slot_images[image_id]; if (image.info.type == ImageType::e3D) { // Don't accelerate 3D images. return NULL_IMAGE_ID; @@ -883,7 +892,7 @@ ImageId TextureCache<P>::DmaImageId(const Tegra::DMA::ImageOperand& operand, boo if (!base) { return NULL_IMAGE_ID; } - return image_id; + return dst_id; } template <class P> @@ -1018,19 +1027,19 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging) runtime.AccelerateImageUpload(image, staging, uploads); return; } - const size_t guest_size_bytes = image.guest_size_bytes; - swizzle_data_buffer.resize_destructive(guest_size_bytes); - gpu_memory->ReadBlockUnsafe(gpu_addr, swizzle_data_buffer.data(), guest_size_bytes); + + Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::UnsafeRead> swizzle_data( + *gpu_memory, gpu_addr, image.guest_size_bytes, &swizzle_data_buffer); if (True(image.flags & ImageFlagBits::Converted)) { unswizzle_data_buffer.resize_destructive(image.unswizzled_size_bytes); - auto copies = UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data_buffer, - unswizzle_data_buffer); + auto copies = + UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data, unswizzle_data_buffer); ConvertImage(unswizzle_data_buffer, image.info, mapped_span, copies); image.UploadMemory(staging, copies); } else { const auto copies = - UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data_buffer, mapped_span); + UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data, mapped_span); image.UploadMemory(staging, copies); } } @@ -1223,11 +1232,12 @@ void TextureCache<P>::QueueAsyncDecode(Image& image, ImageId image_id) { decode->image_id = image_id; async_decodes.push_back(std::move(decode)); - Common::ScratchBuffer<u8> local_unswizzle_data_buffer(image.unswizzled_size_bytes); - const size_t guest_size_bytes = image.guest_size_bytes; - swizzle_data_buffer.resize_destructive(guest_size_bytes); - gpu_memory->ReadBlockUnsafe(image.gpu_addr, swizzle_data_buffer.data(), guest_size_bytes); - auto copies = UnswizzleImage(*gpu_memory, image.gpu_addr, image.info, swizzle_data_buffer, + static Common::ScratchBuffer<u8> local_unswizzle_data_buffer; + local_unswizzle_data_buffer.resize_destructive(image.unswizzled_size_bytes); + Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::UnsafeRead> swizzle_data( + *gpu_memory, image.gpu_addr, image.guest_size_bytes, &swizzle_data_buffer); + + auto copies = UnswizzleImage(*gpu_memory, image.gpu_addr, image.info, swizzle_data, local_unswizzle_data_buffer); const size_t out_size = MapSizeBytes(image); diff --git a/src/video_core/texture_cache/types.h b/src/video_core/texture_cache/types.h index a0e10643f..0453456b4 100644 --- a/src/video_core/texture_cache/types.h +++ b/src/video_core/texture_cache/types.h @@ -54,7 +54,6 @@ enum class RelaxedOptions : u32 { Format = 1 << 1, Samples = 1 << 2, ForceBrokenViews = 1 << 3, - FormatBpp = 1 << 4, }; DECLARE_ENUM_FLAG_OPERATORS(RelaxedOptions) diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index 9a618a57a..a83f5d41c 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -20,6 +20,7 @@ #include "common/div_ceil.h" #include "common/scratch_buffer.h" #include "common/settings.h" +#include "core/memory.h" #include "video_core/compatible_formats.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/memory_manager.h" @@ -544,17 +545,15 @@ void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr tile_size.height, info.tile_width_spacing); const size_t subresource_size = sizes[level]; - tmp_buffer.resize_destructive(subresource_size); - const std::span<u8> dst(tmp_buffer); - for (s32 layer = 0; layer < info.resources.layers; ++layer) { const std::span<const u8> src = input.subspan(host_offset); - gpu_memory.ReadBlockUnsafe(gpu_addr + guest_offset, dst.data(), dst.size_bytes()); - - SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height, - num_tiles.depth, block.height, block.depth); + { + Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::UnsafeReadWrite> + dst(gpu_memory, gpu_addr + guest_offset, subresource_size, &tmp_buffer); - gpu_memory.WriteBlockUnsafe(gpu_addr + guest_offset, dst.data(), dst.size_bytes()); + SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height, + num_tiles.depth, block.height, block.depth); + } host_offset += host_bytes_per_layer; guest_offset += layer_stride; @@ -837,6 +836,7 @@ boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::Memory const Extent3D size = info.size; if (info.type == ImageType::Linear) { + ASSERT(output.size_bytes() >= guest_size_bytes); gpu_memory.ReadBlockUnsafe(gpu_addr, output.data(), guest_size_bytes); ASSERT((info.pitch >> bpp_log2) << bpp_log2 == info.pitch); @@ -904,16 +904,6 @@ boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::Memory return copies; } -BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, - const ImageBase& image, std::span<u8> output) { - gpu_memory.ReadBlockUnsafe(gpu_addr, output.data(), image.guest_size_bytes); - return BufferCopy{ - .src_offset = 0, - .dst_offset = 0, - .size = image.guest_size_bytes, - }; -} - void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output, std::span<BufferImageCopy> copies) { u32 output_offset = 0; @@ -1201,8 +1191,7 @@ std::optional<SubresourceBase> FindSubresource(const ImageInfo& candidate, const // Format checking is relaxed, but we still have to check for matching bytes per block. // This avoids creating a view for blits on UE4 titles where formats with different bytes // per block are aliased. - if (BytesPerBlock(existing.format) != BytesPerBlock(candidate.format) && - False(options & RelaxedOptions::FormatBpp)) { + if (BytesPerBlock(existing.format) != BytesPerBlock(candidate.format)) { return std::nullopt; } } else { @@ -1233,11 +1222,7 @@ std::optional<SubresourceBase> FindSubresource(const ImageInfo& candidate, const } const bool strict_size = False(options & RelaxedOptions::Size); if (!IsBlockLinearSizeCompatible(existing, candidate, base->level, 0, strict_size)) { - if (False(options & RelaxedOptions::FormatBpp)) { - return std::nullopt; - } else if (!IsBlockLinearSizeCompatibleBPPRelaxed(existing, candidate, base->level, 0)) { - return std::nullopt; - } + return std::nullopt; } // TODO: compare block sizes return base; diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h index ab45a43c4..5a0649d24 100644 --- a/src/video_core/texture_cache/util.h +++ b/src/video_core/texture_cache/util.h @@ -66,9 +66,6 @@ struct OverlapResult { Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, std::span<const u8> input, std::span<u8> output); -[[nodiscard]] BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, - const ImageBase& image, std::span<u8> output); - void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output, std::span<BufferImageCopy> copies); |