From 8d6aefdcc452b602d94a84d13bbbc15f806b689c Mon Sep 17 00:00:00 2001 From: Liam Date: Wed, 14 Jun 2023 14:11:46 -0400 Subject: video_core: optionally skip barriers on feedback loops --- src/video_core/texture_cache/texture_cache.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index c7f7448e9..43b7ac0a6 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -186,6 +186,10 @@ void TextureCache

::FillComputeImageViews(std::span views) { template void TextureCache

::CheckFeedbackLoop(std::span views) { + if (!Settings::values.barrier_feedback_loops.GetValue()) { + return; + } + const bool requires_barrier = [&] { for (const auto& view : views) { if (!view.id) { -- cgit v1.2.3 From 76a676883a17523fb12eeac6f2b9702e4916b2c2 Mon Sep 17 00:00:00 2001 From: FengChen Date: Sat, 17 Jun 2023 23:26:39 +0800 Subject: video_core: add samples check when find render target --- src/video_core/texture_cache/texture_cache.h | 22 ++++++++++------------ src/video_core/texture_cache/texture_cache_base.h | 10 ++++------ 2 files changed, 14 insertions(+), 18 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index c7f7448e9..f11998e20 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -280,7 +280,7 @@ void TextureCache

::SynchronizeComputeDescriptors() { } template -bool TextureCache

::RescaleRenderTargets(bool is_clear) { +bool TextureCache

::RescaleRenderTargets() { auto& flags = maxwell3d->dirty.flags; u32 scale_rating = 0; bool rescaled = false; @@ -318,13 +318,13 @@ bool TextureCache

::RescaleRenderTargets(bool is_clear) { ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; if (flags[Dirty::ColorBuffer0 + index] || force) { flags[Dirty::ColorBuffer0 + index] = false; - BindRenderTarget(&color_buffer_id, FindColorBuffer(index, is_clear)); + BindRenderTarget(&color_buffer_id, FindColorBuffer(index)); } check_rescale(color_buffer_id, tmp_color_images[index]); } if (flags[Dirty::ZetaBuffer] || force) { flags[Dirty::ZetaBuffer] = false; - BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear)); + BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer()); } check_rescale(render_targets.depth_buffer_id, tmp_depth_image); @@ -389,7 +389,7 @@ void TextureCache

::UpdateRenderTargets(bool is_clear) { return; } - const bool rescaled = RescaleRenderTargets(is_clear); + const bool rescaled = RescaleRenderTargets(); if (is_rescaling != rescaled) { flags[Dirty::RescaleViewports] = true; flags[Dirty::RescaleScissors] = true; @@ -1658,7 +1658,7 @@ SamplerId TextureCache

::FindSampler(const TSCEntry& config) { } template -ImageViewId TextureCache

::FindColorBuffer(size_t index, bool is_clear) { +ImageViewId TextureCache

::FindColorBuffer(size_t index) { const auto& regs = maxwell3d->regs; if (index >= regs.rt_control.count) { return ImageViewId{}; @@ -1672,11 +1672,11 @@ ImageViewId TextureCache

::FindColorBuffer(size_t index, bool is_clear) { return ImageViewId{}; } const ImageInfo info(regs.rt[index], regs.anti_alias_samples_mode); - return FindRenderTargetView(info, gpu_addr, is_clear); + return FindRenderTargetView(info, gpu_addr); } template -ImageViewId TextureCache

::FindDepthBuffer(bool is_clear) { +ImageViewId TextureCache

::FindDepthBuffer() { const auto& regs = maxwell3d->regs; if (!regs.zeta_enable) { return ImageViewId{}; @@ -1686,18 +1686,16 @@ ImageViewId TextureCache

::FindDepthBuffer(bool is_clear) { return ImageViewId{}; } const ImageInfo info(regs.zeta, regs.zeta_size, regs.anti_alias_samples_mode); - return FindRenderTargetView(info, gpu_addr, is_clear); + return FindRenderTargetView(info, gpu_addr); } template -ImageViewId TextureCache

::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr, - bool is_clear) { - const auto options = is_clear ? RelaxedOptions::Samples : RelaxedOptions{}; +ImageViewId TextureCache

::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr) { ImageId image_id{}; bool delete_state = has_deleted_images; do { has_deleted_images = false; - image_id = FindOrInsertImage(info, gpu_addr, options); + image_id = FindOrInsertImage(info, gpu_addr); delete_state |= has_deleted_images; } while (has_deleted_images); has_deleted_images = delete_state; diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 3bfa92154..c347eccd6 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -166,9 +166,8 @@ public: void SynchronizeComputeDescriptors(); /// Updates the Render Targets if they can be rescaled - /// @param is_clear True when the render targets are being used for clears /// @retval True if the Render Targets have been rescaled. - bool RescaleRenderTargets(bool is_clear); + bool RescaleRenderTargets(); /// Update bound render targets and upload memory if necessary /// @param is_clear True when the render targets are being used for clears @@ -324,14 +323,13 @@ private: [[nodiscard]] SamplerId FindSampler(const TSCEntry& config); /// Find or create an image view for the given color buffer index - [[nodiscard]] ImageViewId FindColorBuffer(size_t index, bool is_clear); + [[nodiscard]] ImageViewId FindColorBuffer(size_t index); /// Find or create an image view for the depth buffer - [[nodiscard]] ImageViewId FindDepthBuffer(bool is_clear); + [[nodiscard]] ImageViewId FindDepthBuffer(); /// Find or create a view for a render target with the given image parameters - [[nodiscard]] ImageViewId FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr, - bool is_clear); + [[nodiscard]] ImageViewId FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr); /// Iterates over all the images in a region calling func template -- cgit v1.2.3 From 5da70f719703084482933e103e561cc98163f370 Mon Sep 17 00:00:00 2001 From: Kelebek1 Date: Tue, 23 May 2023 14:45:54 +0100 Subject: Remove memory allocations in some hot paths --- src/video_core/texture_cache/image_base.h | 5 ++- src/video_core/texture_cache/texture_cache.h | 14 +++---- src/video_core/texture_cache/texture_cache_base.h | 4 +- src/video_core/texture_cache/util.cpp | 48 +++++++++++++---------- src/video_core/texture_cache/util.h | 31 ++++++++------- 5 files changed, 55 insertions(+), 47 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index 1b8a17ee8..55d49d017 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h @@ -6,6 +6,7 @@ #include #include #include +#include #include "common/common_funcs.h" #include "common/common_types.h" @@ -108,8 +109,8 @@ struct ImageBase { std::vector image_view_infos; std::vector image_view_ids; - std::vector slice_offsets; - std::vector slice_subresources; + boost::container::small_vector slice_offsets; + boost::container::small_vector slice_subresources; std::vector aliased_images; std::vector overlapping_images; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index d58bb69ff..d3f03a995 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -526,7 +526,7 @@ void TextureCache

::WriteMemory(VAddr cpu_addr, size_t size) { template void TextureCache

::DownloadMemory(VAddr cpu_addr, size_t size) { - std::vector images; + boost::container::small_vector images; ForEachImageInRegion(cpu_addr, size, [&images](ImageId image_id, ImageBase& image) { if (!image.IsSafeDownload()) { return; @@ -579,7 +579,7 @@ std::optional TextureCache

::GetFlushArea(V template void TextureCache

::UnmapMemory(VAddr cpu_addr, size_t size) { - std::vector deleted_images; + boost::container::small_vector deleted_images; ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); }); for (const ImageId id : deleted_images) { Image& image = slot_images[id]; @@ -593,7 +593,7 @@ void TextureCache

::UnmapMemory(VAddr cpu_addr, size_t size) { template void TextureCache

::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size) { - std::vector deleted_images; + boost::container::small_vector deleted_images; ForEachImageInRegionGPU(as_id, gpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); }); for (const ImageId id : deleted_images) { @@ -1101,7 +1101,7 @@ ImageId TextureCache

::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, const bool native_bgr = runtime.HasNativeBgr(); const bool flexible_formats = True(options & RelaxedOptions::Format); ImageId image_id{}; - boost::container::small_vector image_ids; + boost::container::small_vector image_ids; const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { if (True(existing_image.flags & ImageFlagBits::Remapped)) { return false; @@ -1622,7 +1622,7 @@ ImageId TextureCache

::FindDMAImage(const ImageInfo& info, GPUVAddr gpu_addr) } } ImageId image_id{}; - boost::container::small_vector image_ids; + boost::container::small_vector image_ids; const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { if (True(existing_image.flags & ImageFlagBits::Remapped)) { return false; @@ -1942,7 +1942,7 @@ void TextureCache

::RegisterImage(ImageId image_id) { image.map_view_id = map_id; return; } - std::vector sparse_maps{}; + boost::container::small_vector sparse_maps; ForEachSparseSegment( image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id); @@ -2217,7 +2217,7 @@ void TextureCache

::MarkModification(ImageBase& image) noexcept { template void TextureCache

::SynchronizeAliases(ImageId image_id) { - boost::container::small_vector aliased_images; + boost::container::small_vector aliased_images; Image& image = slot_images[image_id]; bool any_rescaled = True(image.flags & ImageFlagBits::Rescaled); bool any_modified = True(image.flags & ImageFlagBits::GpuModified); diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 44232b961..e9ec91265 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -56,7 +56,7 @@ struct ImageViewInOut { struct AsyncDecodeContext { ImageId image_id; Common::ScratchBuffer decoded_data; - std::vector copies; + boost::container::small_vector copies; std::mutex mutex; std::atomic_bool complete; }; @@ -429,7 +429,7 @@ private: std::unordered_map, Common::IdentityHash> page_table; std::unordered_map, Common::IdentityHash> sparse_page_table; - std::unordered_map> sparse_views; + std::unordered_map> sparse_views; VAddr virtual_invalid_space{}; diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index 95a5b47d8..f781cb7a0 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -329,13 +329,13 @@ template [[nodiscard]] std::optional ResolveOverlapRightAddress3D( const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size) { - const std::vector slice_offsets = CalculateSliceOffsets(new_info); + const auto slice_offsets = CalculateSliceOffsets(new_info); const u32 diff = static_cast(overlap.gpu_addr - gpu_addr); const auto it = std::ranges::find(slice_offsets, diff); if (it == slice_offsets.end()) { return std::nullopt; } - const std::vector subresources = CalculateSliceSubresources(new_info); + const auto subresources = CalculateSliceSubresources(new_info); const SubresourceBase base = subresources[std::distance(slice_offsets.begin(), it)]; const ImageInfo& info = overlap.info; if (!IsBlockLinearSizeCompatible(new_info, info, base.level, 0, strict_size)) { @@ -655,9 +655,9 @@ LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept { return sizes; } -std::vector CalculateSliceOffsets(const ImageInfo& info) { +boost::container::small_vector CalculateSliceOffsets(const ImageInfo& info) { ASSERT(info.type == ImageType::e3D); - std::vector offsets; + boost::container::small_vector offsets; offsets.reserve(NumSlices(info)); const LevelInfo level_info = MakeLevelInfo(info); @@ -679,9 +679,10 @@ std::vector CalculateSliceOffsets(const ImageInfo& info) { return offsets; } -std::vector CalculateSliceSubresources(const ImageInfo& info) { +boost::container::small_vector CalculateSliceSubresources( + const ImageInfo& info) { ASSERT(info.type == ImageType::e3D); - std::vector subresources; + boost::container::small_vector subresources; subresources.reserve(NumSlices(info)); for (s32 level = 0; level < info.resources.levels; ++level) { const s32 depth = AdjustMipSize(info.size.depth, level); @@ -723,8 +724,10 @@ ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept { } } -std::vector MakeShrinkImageCopies(const ImageInfo& dst, const ImageInfo& src, - SubresourceBase base, u32 up_scale, u32 down_shift) { +boost::container::small_vector MakeShrinkImageCopies(const ImageInfo& dst, + const ImageInfo& src, + SubresourceBase base, + u32 up_scale, u32 down_shift) { ASSERT(dst.resources.levels >= src.resources.levels); const bool is_dst_3d = dst.type == ImageType::e3D; @@ -733,7 +736,7 @@ std::vector MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn ASSERT(src.resources.levels == 1); } const bool both_2d{src.type == ImageType::e2D && dst.type == ImageType::e2D}; - std::vector copies; + boost::container::small_vector copies; copies.reserve(src.resources.levels); for (s32 level = 0; level < src.resources.levels; ++level) { ImageCopy& copy = copies.emplace_back(); @@ -770,9 +773,10 @@ std::vector MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn return copies; } -std::vector MakeReinterpretImageCopies(const ImageInfo& src, u32 up_scale, - u32 down_shift) { - std::vector copies; +boost::container::small_vector MakeReinterpretImageCopies(const ImageInfo& src, + u32 up_scale, + u32 down_shift) { + boost::container::small_vector copies; copies.reserve(src.resources.levels); const bool is_3d = src.type == ImageType::e3D; for (s32 level = 0; level < src.resources.levels; ++level) { @@ -824,9 +828,11 @@ bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config return gpu_memory.GpuToCpuAddress(address, guest_size_bytes).has_value(); } -std::vector UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, - const ImageInfo& info, std::span input, - std::span output) { +boost::container::small_vector UnswizzleImage(Tegra::MemoryManager& gpu_memory, + GPUVAddr gpu_addr, + const ImageInfo& info, + std::span input, + std::span output) { const size_t guest_size_bytes = input.size_bytes(); const u32 bpp_log2 = BytesPerBlockLog2(info.format); const Extent3D size = info.size; @@ -861,7 +867,7 @@ std::vector UnswizzleImage(Tegra::MemoryManager& gpu_memory, GP info.tile_width_spacing); size_t guest_offset = 0; u32 host_offset = 0; - std::vector copies(num_levels); + boost::container::small_vector copies(num_levels); for (s32 level = 0; level < num_levels; ++level) { const Extent3D level_size = AdjustMipSize(size, level); @@ -978,7 +984,7 @@ void ConvertImage(std::span input, const ImageInfo& info, std::span FullDownloadCopies(const ImageInfo& info) { +boost::container::small_vector FullDownloadCopies(const ImageInfo& info) { const Extent3D size = info.size; const u32 bytes_per_block = BytesPerBlock(info.format); if (info.type == ImageType::Linear) { @@ -1006,7 +1012,7 @@ std::vector FullDownloadCopies(const ImageInfo& info) { u32 host_offset = 0; - std::vector copies(num_levels); + boost::container::small_vector copies(num_levels); for (s32 level = 0; level < num_levels; ++level) { const Extent3D level_size = AdjustMipSize(size, level); const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size); @@ -1042,10 +1048,10 @@ Extent3D MipBlockSize(const ImageInfo& info, u32 level) { return AdjustMipBlockSize(num_tiles, level_info.block, level); } -std::vector FullUploadSwizzles(const ImageInfo& info) { +boost::container::small_vector FullUploadSwizzles(const ImageInfo& info) { const Extent2D tile_size = DefaultBlockSize(info.format); if (info.type == ImageType::Linear) { - return std::vector{SwizzleParameters{ + return {SwizzleParameters{ .num_tiles = AdjustTileSize(info.size, tile_size), .block = {}, .buffer_offset = 0, @@ -1057,7 +1063,7 @@ std::vector FullUploadSwizzles(const ImageInfo& info) { const s32 num_levels = info.resources.levels; u32 guest_offset = 0; - std::vector params(num_levels); + boost::container::small_vector params(num_levels); for (s32 level = 0; level < num_levels; ++level) { const Extent3D level_size = AdjustMipSize(size, level); const Extent3D num_tiles = AdjustTileSize(level_size, tile_size); diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h index 84aa6880d..ab45a43c4 100644 --- a/src/video_core/texture_cache/util.h +++ b/src/video_core/texture_cache/util.h @@ -5,6 +5,7 @@ #include #include +#include #include "common/common_types.h" #include "common/scratch_buffer.h" @@ -40,9 +41,10 @@ struct OverlapResult { [[nodiscard]] LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept; -[[nodiscard]] std::vector CalculateSliceOffsets(const ImageInfo& info); +[[nodiscard]] boost::container::small_vector CalculateSliceOffsets(const ImageInfo& info); -[[nodiscard]] std::vector CalculateSliceSubresources(const ImageInfo& info); +[[nodiscard]] boost::container::small_vector CalculateSliceSubresources( + const ImageInfo& info); [[nodiscard]] u32 CalculateLevelStrideAlignment(const ImageInfo& info, u32 level); @@ -51,21 +53,18 @@ struct OverlapResult { [[nodiscard]] ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept; -[[nodiscard]] std::vector MakeShrinkImageCopies(const ImageInfo& dst, - const ImageInfo& src, - SubresourceBase base, u32 up_scale = 1, - u32 down_shift = 0); +[[nodiscard]] boost::container::small_vector MakeShrinkImageCopies( + const ImageInfo& dst, const ImageInfo& src, SubresourceBase base, u32 up_scale = 1, + u32 down_shift = 0); -[[nodiscard]] std::vector MakeReinterpretImageCopies(const ImageInfo& src, - u32 up_scale = 1, - u32 down_shift = 0); +[[nodiscard]] boost::container::small_vector MakeReinterpretImageCopies( + const ImageInfo& src, u32 up_scale = 1, u32 down_shift = 0); [[nodiscard]] bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config); -[[nodiscard]] std::vector UnswizzleImage(Tegra::MemoryManager& gpu_memory, - GPUVAddr gpu_addr, const ImageInfo& info, - std::span input, - std::span output); +[[nodiscard]] boost::container::small_vector UnswizzleImage( + Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, + std::span input, std::span output); [[nodiscard]] BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageBase& image, std::span output); @@ -73,13 +72,15 @@ struct OverlapResult { void ConvertImage(std::span input, const ImageInfo& info, std::span output, std::span copies); -[[nodiscard]] std::vector FullDownloadCopies(const ImageInfo& info); +[[nodiscard]] boost::container::small_vector FullDownloadCopies( + const ImageInfo& info); [[nodiscard]] Extent3D MipSize(Extent3D size, u32 level); [[nodiscard]] Extent3D MipBlockSize(const ImageInfo& info, u32 level); -[[nodiscard]] std::vector FullUploadSwizzles(const ImageInfo& info); +[[nodiscard]] boost::container::small_vector FullUploadSwizzles( + const ImageInfo& info); void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, std::span copies, std::span memory, -- cgit v1.2.3 From eac46ad7ceca5e35b396a8b80bfc38dc6ef1a4fe Mon Sep 17 00:00:00 2001 From: GPUCode Date: Tue, 6 Jun 2023 23:10:06 +0300 Subject: video_core: Add BCn decoding support --- src/video_core/texture_cache/decode_bc.cpp | 129 ++++++++++++++++++++++++++++ src/video_core/texture_cache/decode_bc.h | 19 ++++ src/video_core/texture_cache/decode_bc4.cpp | 96 --------------------- src/video_core/texture_cache/decode_bc4.h | 15 ---- src/video_core/texture_cache/util.cpp | 24 ++++-- 5 files changed, 163 insertions(+), 120 deletions(-) create mode 100644 src/video_core/texture_cache/decode_bc.cpp create mode 100644 src/video_core/texture_cache/decode_bc.h delete mode 100644 src/video_core/texture_cache/decode_bc4.cpp delete mode 100644 src/video_core/texture_cache/decode_bc4.h (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/decode_bc.cpp b/src/video_core/texture_cache/decode_bc.cpp new file mode 100644 index 000000000..3e26474a3 --- /dev/null +++ b/src/video_core/texture_cache/decode_bc.cpp @@ -0,0 +1,129 @@ +// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include +#include +#include + +#include "common/common_types.h" +#include "video_core/texture_cache/decode_bc.h" + +namespace VideoCommon { + +namespace { +constexpr u32 BLOCK_SIZE = 4; + +using VideoCore::Surface::PixelFormat; + +constexpr bool IsSigned(PixelFormat pixel_format) { + switch (pixel_format) { + case PixelFormat::BC4_SNORM: + case PixelFormat::BC4_UNORM: + case PixelFormat::BC5_SNORM: + case PixelFormat::BC5_UNORM: + case PixelFormat::BC6H_SFLOAT: + case PixelFormat::BC6H_UFLOAT: + return true; + default: + return false; + } +} + +constexpr u32 BlockSize(PixelFormat pixel_format) { + switch (pixel_format) { + case PixelFormat::BC1_RGBA_SRGB: + case PixelFormat::BC1_RGBA_UNORM: + case PixelFormat::BC4_SNORM: + case PixelFormat::BC4_UNORM: + return 8; + default: + return 16; + } +} +} // Anonymous namespace + +u32 ConvertedBytesPerBlock(VideoCore::Surface::PixelFormat pixel_format) { + switch (pixel_format) { + case PixelFormat::BC4_SNORM: + case PixelFormat::BC4_UNORM: + return 1; + case PixelFormat::BC5_SNORM: + case PixelFormat::BC5_UNORM: + return 2; + case PixelFormat::BC6H_SFLOAT: + case PixelFormat::BC6H_UFLOAT: + return 8; + default: + return 4; + } +} + +template +void DecompressBlocks(std::span input, std::span output, Extent3D extent, + bool is_signed = false) { + const u32 out_bpp = ConvertedBytesPerBlock(pixel_format); + const u32 block_width = std::min(extent.width, BLOCK_SIZE); + const u32 block_height = std::min(extent.height, BLOCK_SIZE); + const u32 pitch = extent.width * out_bpp; + size_t input_offset = 0; + size_t output_offset = 0; + for (u32 slice = 0; slice < extent.depth; ++slice) { + for (u32 y = 0; y < extent.height; y += block_height) { + size_t row_offset = 0; + for (u32 x = 0; x < extent.width; + x += block_width, row_offset += block_width * out_bpp) { + const u8* src = input.data() + input_offset; + u8* const dst = output.data() + output_offset + row_offset; + if constexpr (IsSigned(pixel_format)) { + decompress(src, dst, x, y, extent.width, extent.height, is_signed); + } else { + decompress(src, dst, x, y, extent.width, extent.height); + } + input_offset += BlockSize(pixel_format); + } + output_offset += block_height * pitch; + } + } +} + +void DecompressBCn(std::span input, std::span output, Extent3D extent, + VideoCore::Surface::PixelFormat pixel_format) { + switch (pixel_format) { + case PixelFormat::BC1_RGBA_UNORM: + case PixelFormat::BC1_RGBA_SRGB: + DecompressBlocks(input, output, extent); + break; + case PixelFormat::BC2_UNORM: + case PixelFormat::BC2_SRGB: + DecompressBlocks(input, output, extent); + break; + case PixelFormat::BC3_UNORM: + case PixelFormat::BC3_SRGB: + DecompressBlocks(input, output, extent); + break; + case PixelFormat::BC4_SNORM: + case PixelFormat::BC4_UNORM: + DecompressBlocks( + input, output, extent, pixel_format == PixelFormat::BC4_SNORM); + break; + case PixelFormat::BC5_SNORM: + case PixelFormat::BC5_UNORM: + DecompressBlocks( + input, output, extent, pixel_format == PixelFormat::BC5_SNORM); + break; + case PixelFormat::BC6H_SFLOAT: + case PixelFormat::BC6H_UFLOAT: + DecompressBlocks( + input, output, extent, pixel_format == PixelFormat::BC6H_SFLOAT); + break; + case PixelFormat::BC7_SRGB: + case PixelFormat::BC7_UNORM: + DecompressBlocks(input, output, extent); + break; + default: + LOG_WARNING(HW_GPU, "Unimplemented BCn decompression {}", pixel_format); + } +} + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/decode_bc.h b/src/video_core/texture_cache/decode_bc.h new file mode 100644 index 000000000..41d1ec0a3 --- /dev/null +++ b/src/video_core/texture_cache/decode_bc.h @@ -0,0 +1,19 @@ +// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include + +#include "common/common_types.h" +#include "video_core/surface.h" +#include "video_core/texture_cache/types.h" + +namespace VideoCommon { + +[[nodiscard]] u32 ConvertedBytesPerBlock(VideoCore::Surface::PixelFormat pixel_format); + +void DecompressBCn(std::span input, std::span output, Extent3D extent, + VideoCore::Surface::PixelFormat pixel_format); + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/decode_bc4.cpp b/src/video_core/texture_cache/decode_bc4.cpp deleted file mode 100644 index ef98afdca..000000000 --- a/src/video_core/texture_cache/decode_bc4.cpp +++ /dev/null @@ -1,96 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#include -#include -#include - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/texture_cache/decode_bc4.h" -#include "video_core/texture_cache/types.h" - -namespace VideoCommon { - -// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_compression_rgtc.txt -[[nodiscard]] constexpr u32 DecompressBlock(u64 bits, u32 x, u32 y) { - const u32 code_offset = 16 + 3 * (4 * y + x); - const u32 code = (bits >> code_offset) & 7; - const u32 red0 = (bits >> 0) & 0xff; - const u32 red1 = (bits >> 8) & 0xff; - if (red0 > red1) { - switch (code) { - case 0: - return red0; - case 1: - return red1; - case 2: - return (6 * red0 + 1 * red1) / 7; - case 3: - return (5 * red0 + 2 * red1) / 7; - case 4: - return (4 * red0 + 3 * red1) / 7; - case 5: - return (3 * red0 + 4 * red1) / 7; - case 6: - return (2 * red0 + 5 * red1) / 7; - case 7: - return (1 * red0 + 6 * red1) / 7; - } - } else { - switch (code) { - case 0: - return red0; - case 1: - return red1; - case 2: - return (4 * red0 + 1 * red1) / 5; - case 3: - return (3 * red0 + 2 * red1) / 5; - case 4: - return (2 * red0 + 3 * red1) / 5; - case 5: - return (1 * red0 + 4 * red1) / 5; - case 6: - return 0; - case 7: - return 0xff; - } - } - return 0; -} - -void DecompressBC4(std::span input, Extent3D extent, std::span output) { - UNIMPLEMENTED_IF_MSG(extent.width % 4 != 0, "Unaligned width={}", extent.width); - UNIMPLEMENTED_IF_MSG(extent.height % 4 != 0, "Unaligned height={}", extent.height); - static constexpr u32 BLOCK_SIZE = 4; - size_t input_offset = 0; - for (u32 slice = 0; slice < extent.depth; ++slice) { - for (u32 block_y = 0; block_y < extent.height / 4; ++block_y) { - for (u32 block_x = 0; block_x < extent.width / 4; ++block_x) { - u64 bits; - std::memcpy(&bits, &input[input_offset], sizeof(bits)); - input_offset += sizeof(bits); - - for (u32 y = 0; y < BLOCK_SIZE; ++y) { - for (u32 x = 0; x < BLOCK_SIZE; ++x) { - const u32 linear_z = slice; - const u32 linear_y = block_y * BLOCK_SIZE + y; - const u32 linear_x = block_x * BLOCK_SIZE + x; - const u32 offset_z = linear_z * extent.width * extent.height; - const u32 offset_y = linear_y * extent.width; - const u32 offset_x = linear_x; - const u32 output_offset = (offset_z + offset_y + offset_x) * 4ULL; - const u32 color = DecompressBlock(bits, x, y); - output[output_offset + 0] = static_cast(color); - output[output_offset + 1] = 0; - output[output_offset + 2] = 0; - output[output_offset + 3] = 0xff; - } - } - } - } - } -} - -} // namespace VideoCommon diff --git a/src/video_core/texture_cache/decode_bc4.h b/src/video_core/texture_cache/decode_bc4.h deleted file mode 100644 index ab2f735be..000000000 --- a/src/video_core/texture_cache/decode_bc4.h +++ /dev/null @@ -1,15 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#pragma once - -#include - -#include "common/common_types.h" -#include "video_core/texture_cache/types.h" - -namespace VideoCommon { - -void DecompressBC4(std::span data, Extent3D extent, std::span output); - -} // namespace VideoCommon diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index f781cb7a0..9a618a57a 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -24,7 +24,7 @@ #include "video_core/engines/maxwell_3d.h" #include "video_core/memory_manager.h" #include "video_core/surface.h" -#include "video_core/texture_cache/decode_bc4.h" +#include "video_core/texture_cache/decode_bc.h" #include "video_core/texture_cache/format_lookup_table.h" #include "video_core/texture_cache/formatter.h" #include "video_core/texture_cache/samples_helper.h" @@ -61,8 +61,6 @@ using VideoCore::Surface::PixelFormatFromDepthFormat; using VideoCore::Surface::PixelFormatFromRenderTargetFormat; using VideoCore::Surface::SurfaceType; -constexpr u32 CONVERTED_BYTES_PER_BLOCK = BytesPerBlock(PixelFormat::A8B8G8R8_UNORM); - struct LevelInfo { Extent3D size; Extent3D block; @@ -612,7 +610,8 @@ u32 CalculateConvertedSizeBytes(const ImageInfo& info) noexcept { } return output_size; } - return NumBlocksPerLayer(info, TILE_SIZE) * info.resources.layers * CONVERTED_BYTES_PER_BLOCK; + return NumBlocksPerLayer(info, TILE_SIZE) * info.resources.layers * + ConvertedBytesPerBlock(info.format); } u32 CalculateLayerStride(const ImageInfo& info) noexcept { @@ -945,7 +944,8 @@ void ConvertImage(std::span input, const ImageInfo& info, std::span input, const ImageInfo& info, std::span input, const ImageInfo& info, std::span(copy.buffer_size); } else { - DecompressBC4(input_offset, copy.image_extent, output.subspan(output_offset)); - + const Extent3D image_extent{ + .width = copy.image_extent.width, + .height = copy.image_extent.height * copy.image_subresource.num_layers, + .depth = copy.image_extent.depth, + }; + DecompressBCn(input_offset, output.subspan(output_offset), image_extent, info.format); output_offset += copy.image_extent.width * copy.image_extent.height * - copy.image_subresource.num_layers * CONVERTED_BYTES_PER_BLOCK; + copy.image_subresource.num_layers * + ConvertedBytesPerBlock(info.format); } } } -- cgit v1.2.3 From b62121fd605663dc9aaaae72fe8e444312f9c5d5 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 29 Jun 2023 11:58:45 +0200 Subject: Texture cache: Fix YFC regression due to code testing --- src/video_core/texture_cache/texture_cache.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index d3f03a995..485f6b6f3 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -598,14 +598,6 @@ void TextureCache

::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t siz [&](ImageId id, Image&) { deleted_images.push_back(id); }); for (const ImageId id : deleted_images) { Image& image = slot_images[id]; - if (True(image.flags & ImageFlagBits::CpuModified)) { - return; - } - image.flags |= ImageFlagBits::CpuModified; - if (True(image.flags & ImageFlagBits::Tracked)) { - UntrackImage(image, id); - } - /* if (True(image.flags & ImageFlagBits::Remapped)) { continue; } @@ -613,7 +605,6 @@ void TextureCache

::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t siz if (True(image.flags & ImageFlagBits::Tracked)) { UntrackImage(image, id); } - */ } } -- cgit v1.2.3 From 596a6132b974dd73935854d8f51842424e058be8 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 29 Jun 2023 17:23:29 +0200 Subject: AccelerateDMA: Don't accelerate 3D texture DMA operations --- src/video_core/texture_cache/texture_cache.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index d3f03a995..0330415b7 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -879,6 +879,10 @@ ImageId TextureCache

::DmaImageId(const Tegra::DMA::ImageOperand& operand, boo return NULL_IMAGE_ID; } auto& image = slot_images[image_id]; + if (image.info.type == ImageType::e3D) { + // Don't accelerate 3D images. + return NULL_IMAGE_ID; + } if (!is_upload && !image.info.dma_downloaded) { // Force a full sync. image.info.dma_downloaded = true; -- cgit v1.2.3