diff options
Diffstat (limited to 'src/video_core')
36 files changed, 455 insertions, 255 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 9bafd8cc0..58a45ab67 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -207,7 +207,7 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am if (has_new_downloads) { memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount); } - tmp_buffer.resize(amount); + tmp_buffer.resize_destructive(amount); cpu_memory.ReadBlockUnsafe(*cpu_src_address, tmp_buffer.data(), amount); cpu_memory.WriteBlockUnsafe(*cpu_dest_address, tmp_buffer.data(), amount); return true; @@ -719,9 +719,15 @@ void BufferCache<P>::BindHostVertexBuffers() { bool any_valid{false}; auto& flags = maxwell3d->dirty.flags; for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { + const Binding& binding = channel_state->vertex_buffers[index]; + Buffer& buffer = slot_buffers[binding.buffer_id]; + TouchBuffer(buffer, binding.buffer_id); + SynchronizeBuffer(buffer, binding.cpu_addr, binding.size); if (!flags[Dirty::VertexBuffer0 + index]) { continue; } + flags[Dirty::VertexBuffer0 + index] = false; + host_bindings.min_index = std::min(host_bindings.min_index, index); host_bindings.max_index = std::max(host_bindings.max_index, index); any_valid = true; @@ -735,9 +741,6 @@ void BufferCache<P>::BindHostVertexBuffers() { const Binding& binding = channel_state->vertex_buffers[index]; Buffer& buffer = slot_buffers[binding.buffer_id]; - TouchBuffer(buffer, binding.buffer_id); - SynchronizeBuffer(buffer, binding.cpu_addr, binding.size); - const u32 stride = maxwell3d->regs.vertex_streams[index].stride; const u32 offset = buffer.Offset(binding.cpu_addr); @@ -1276,7 +1279,7 @@ template <class P> typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu_addr, u32 wanted_size) { static constexpr int STREAM_LEAP_THRESHOLD = 16; - std::vector<BufferId> overlap_ids; + boost::container::small_vector<BufferId, 16> overlap_ids; VAddr begin = cpu_addr; VAddr end = cpu_addr + wanted_size; int stream_score = 0; diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h index 63a120f7a..fe6068cfe 100644 --- a/src/video_core/buffer_cache/buffer_cache_base.h +++ b/src/video_core/buffer_cache/buffer_cache_base.h @@ -229,7 +229,7 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInf using OverlapCounter = boost::icl::split_interval_map<VAddr, int>; struct OverlapResult { - std::vector<BufferId> ids; + boost::container::small_vector<BufferId, 16> ids; VAddr begin; VAddr end; bool has_stream_leap = false; @@ -582,7 +582,7 @@ private: BufferId inline_buffer_id; std::array<BufferId, ((1ULL << 39) >> CACHING_PAGEBITS)> page_table; - std::vector<u8> tmp_buffer; + Common::ScratchBuffer<u8> tmp_buffer; }; } // namespace VideoCommon diff --git a/src/video_core/cdma_pusher.h b/src/video_core/cdma_pusher.h index 83112dfce..7d660af47 100644 --- a/src/video_core/cdma_pusher.h +++ b/src/video_core/cdma_pusher.h @@ -63,7 +63,6 @@ struct ChCommand { }; using ChCommandHeaderList = std::vector<ChCommandHeader>; -using ChCommandList = std::vector<ChCommand>; struct ThiRegisters { u32_le increment_syncpt{}; diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h index 1cdb690ed..8a2784cdc 100644 --- a/src/video_core/dma_pusher.h +++ b/src/video_core/dma_pusher.h @@ -6,6 +6,7 @@ #include <array> #include <span> #include <vector> +#include <boost/container/small_vector.hpp> #include <queue> #include "common/bit_field.h" @@ -102,11 +103,12 @@ inline CommandHeader BuildCommandHeader(BufferMethods method, u32 arg_count, Sub struct CommandList final { CommandList() = default; explicit CommandList(std::size_t size) : command_lists(size) {} - explicit CommandList(std::vector<CommandHeader>&& prefetch_command_list_) + explicit CommandList( + boost::container::small_vector<CommandHeader, 512>&& prefetch_command_list_) : prefetch_command_list{std::move(prefetch_command_list_)} {} - std::vector<CommandListHeader> command_lists; - std::vector<CommandHeader> prefetch_command_list; + boost::container::small_vector<CommandListHeader, 512> command_lists; + boost::container::small_vector<CommandHeader, 512> prefetch_command_list; }; /** diff --git a/src/video_core/engines/draw_manager.cpp b/src/video_core/engines/draw_manager.cpp index 0e94c521a..f34090791 100644 --- a/src/video_core/engines/draw_manager.cpp +++ b/src/video_core/engines/draw_manager.cpp @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include "common/settings.h" #include "video_core/dirty_flags.h" #include "video_core/engines/draw_manager.h" #include "video_core/rasterizer_interface.h" @@ -195,8 +196,12 @@ void DrawManager::DrawTexture() { if (lower_left) { draw_texture_state.dst_y0 -= dst_height; } - draw_texture_state.dst_x1 = draw_texture_state.dst_x0 + dst_width; - draw_texture_state.dst_y1 = draw_texture_state.dst_y0 + dst_height; + draw_texture_state.dst_x1 = + draw_texture_state.dst_x0 + + static_cast<f32>(Settings::values.resolution_info.ScaleUp(static_cast<u32>(dst_width))); + draw_texture_state.dst_y1 = + draw_texture_state.dst_y0 + + static_cast<f32>(Settings::values.resolution_info.ScaleUp(static_cast<u32>(dst_height))); draw_texture_state.src_x0 = static_cast<float>(regs.draw_texture.src_x0) / 4096.f; draw_texture_state.src_y0 = static_cast<float>(regs.draw_texture.src_y0) / 4096.f; draw_texture_state.src_x1 = @@ -207,7 +212,6 @@ void DrawManager::DrawTexture() { draw_texture_state.src_y0; draw_texture_state.src_sampler = regs.draw_texture.src_sampler; draw_texture_state.src_texture = regs.draw_texture.src_texture; - maxwell3d->rasterizer->DrawTexture(); } diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index ebe5536de..bc1eb41e7 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -108,9 +108,11 @@ void MaxwellDMA::Launch() { if (regs.launch_dma.remap_enable != 0 && is_const_a_dst) { ASSERT(regs.remap_const.component_size_minus_one == 3); accelerate.BufferClear(regs.offset_out, regs.line_length_in, regs.remap_consta_value); - std::vector<u32> tmp_buffer(regs.line_length_in, regs.remap_consta_value); + read_buffer.resize_destructive(regs.line_length_in * sizeof(u32)); + std::span<u32> span(reinterpret_cast<u32*>(read_buffer.data()), regs.line_length_in); + std::ranges::fill(span, regs.remap_consta_value); memory_manager.WriteBlockUnsafe(regs.offset_out, - reinterpret_cast<u8*>(tmp_buffer.data()), + reinterpret_cast<u8*>(read_buffer.data()), regs.line_length_in * sizeof(u32)); } else { memory_manager.FlushCaching(); @@ -126,32 +128,32 @@ void MaxwellDMA::Launch() { UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0); UNIMPLEMENTED_IF(regs.offset_in % 16 != 0); UNIMPLEMENTED_IF(regs.offset_out % 16 != 0); - std::vector<u8> tmp_buffer(16); + read_buffer.resize_destructive(16); for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { memory_manager.ReadBlockUnsafe( convert_linear_2_blocklinear_addr(regs.offset_in + offset), - tmp_buffer.data(), tmp_buffer.size()); - memory_manager.WriteBlockCached(regs.offset_out + offset, tmp_buffer.data(), - tmp_buffer.size()); + read_buffer.data(), read_buffer.size()); + memory_manager.WriteBlockCached(regs.offset_out + offset, read_buffer.data(), + read_buffer.size()); } } else if (is_src_pitch && !is_dst_pitch) { UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0); UNIMPLEMENTED_IF(regs.offset_in % 16 != 0); UNIMPLEMENTED_IF(regs.offset_out % 16 != 0); - std::vector<u8> tmp_buffer(16); + read_buffer.resize_destructive(16); for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { - memory_manager.ReadBlockUnsafe(regs.offset_in + offset, tmp_buffer.data(), - tmp_buffer.size()); + memory_manager.ReadBlockUnsafe(regs.offset_in + offset, read_buffer.data(), + read_buffer.size()); memory_manager.WriteBlockCached( convert_linear_2_blocklinear_addr(regs.offset_out + offset), - tmp_buffer.data(), tmp_buffer.size()); + read_buffer.data(), read_buffer.size()); } } else { if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) { - std::vector<u8> tmp_buffer(regs.line_length_in); - memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(), + read_buffer.resize_destructive(regs.line_length_in); + memory_manager.ReadBlockUnsafe(regs.offset_in, read_buffer.data(), regs.line_length_in); - memory_manager.WriteBlockCached(regs.offset_out, tmp_buffer.data(), + memory_manager.WriteBlockCached(regs.offset_out, read_buffer.data(), regs.line_length_in); } } @@ -171,7 +173,8 @@ void MaxwellDMA::CopyBlockLinearToPitch() { src_operand.address = regs.offset_in; DMA::BufferOperand dst_operand; - dst_operand.pitch = regs.pitch_out; + u32 abs_pitch_out = std::abs(static_cast<s32>(regs.pitch_out)); + dst_operand.pitch = abs_pitch_out; dst_operand.width = regs.line_length_in; dst_operand.height = regs.line_count; dst_operand.address = regs.offset_out; @@ -218,7 +221,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() { const size_t src_size = CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth); - const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count; + const size_t dst_size = static_cast<size_t>(abs_pitch_out) * regs.line_count; read_buffer.resize_destructive(src_size); write_buffer.resize_destructive(dst_size); @@ -227,7 +230,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() { UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset, src_params.origin.y, x_elements, regs.line_count, block_height, block_depth, - regs.pitch_out); + abs_pitch_out); memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size); } diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 456f733cf..db385076d 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -193,18 +193,13 @@ struct GPU::Impl { } [[nodiscard]] u64 GetTicks() const { - // This values were reversed engineered by fincs from NVN - // The gpu clock is reported in units of 385/625 nanoseconds - constexpr u64 gpu_ticks_num = 384; - constexpr u64 gpu_ticks_den = 625; + u64 gpu_tick = system.CoreTiming().GetGPUTicks(); - u64 nanoseconds = system.CoreTiming().GetCPUTimeNs().count(); if (Settings::values.use_fast_gpu_time.GetValue()) { - nanoseconds /= 256; + gpu_tick /= 256; } - const u64 nanoseconds_num = nanoseconds / gpu_ticks_den; - const u64 nanoseconds_rem = nanoseconds % gpu_ticks_den; - return nanoseconds_num * gpu_ticks_num + (nanoseconds_rem * gpu_ticks_num) / gpu_ticks_den; + + return gpu_tick; } [[nodiscard]] bool IsAsync() const { diff --git a/src/video_core/host1x/codecs/h264.cpp b/src/video_core/host1x/codecs/h264.cpp index 6ce179167..ce827eb6c 100644 --- a/src/video_core/host1x/codecs/h264.cpp +++ b/src/video_core/host1x/codecs/h264.cpp @@ -4,6 +4,7 @@ #include <array> #include <bit> +#include "common/scratch_buffer.h" #include "common/settings.h" #include "video_core/host1x/codecs/h264.h" #include "video_core/host1x/host1x.h" @@ -188,7 +189,8 @@ void H264BitWriter::WriteBit(bool state) { } void H264BitWriter::WriteScalingList(std::span<const u8> list, s32 start, s32 count) { - std::vector<u8> scan(count); + static Common::ScratchBuffer<u8> scan{}; + scan.resize_destructive(count); if (count == 16) { std::memcpy(scan.data(), zig_zag_scan.data(), scan.size()); } else { diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 7b2cde7a7..45141e488 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -111,7 +111,7 @@ GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cp [[maybe_unused]] const auto current_entry_type = GetEntry<false>(current_gpu_addr); SetEntry<false>(current_gpu_addr, entry_type); if (current_entry_type != entry_type) { - rasterizer->ModifyGPUMemory(unique_identifier, gpu_addr, page_size); + rasterizer->ModifyGPUMemory(unique_identifier, current_gpu_addr, page_size); } if constexpr (entry_type == EntryType::Mapped) { const VAddr current_cpu_addr = cpu_addr + offset; @@ -134,7 +134,7 @@ GPUVAddr MemoryManager::BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr [[maybe_unused]] const auto current_entry_type = GetEntry<true>(current_gpu_addr); SetEntry<true>(current_gpu_addr, entry_type); if (current_entry_type != entry_type) { - rasterizer->ModifyGPUMemory(unique_identifier, gpu_addr, big_page_size); + rasterizer->ModifyGPUMemory(unique_identifier, current_gpu_addr, big_page_size); } if constexpr (entry_type == EntryType::Mapped) { const VAddr current_cpu_addr = cpu_addr + offset; @@ -587,7 +587,7 @@ void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size, void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size, VideoCommon::CacheType which) { - std::vector<u8> tmp_buffer(size); + tmp_buffer.resize_destructive(size); ReadBlock(gpu_src_addr, tmp_buffer.data(), size, which); // The output block must be flushed in case it has data modified from the GPU. @@ -670,9 +670,9 @@ bool MemoryManager::IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) cons return result; } -std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange( - GPUVAddr gpu_addr, std::size_t size) const { - std::vector<std::pair<GPUVAddr, std::size_t>> result{}; +boost::container::small_vector<std::pair<GPUVAddr, std::size_t>, 32> +MemoryManager::GetSubmappedRange(GPUVAddr gpu_addr, std::size_t size) const { + boost::container::small_vector<std::pair<GPUVAddr, std::size_t>, 32> result{}; GetSubmappedRangeImpl<true>(gpu_addr, size, result); return result; } @@ -680,8 +680,9 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange( template <bool is_gpu_address> void MemoryManager::GetSubmappedRangeImpl( GPUVAddr gpu_addr, std::size_t size, - std::vector<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>& - result) const { + boost::container::small_vector< + std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>, 32>& result) + const { std::optional<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>> last_segment{}; std::optional<VAddr> old_page_addr{}; diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 794535122..4202c26ff 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -8,10 +8,12 @@ #include <mutex> #include <optional> #include <vector> +#include <boost/container/small_vector.hpp> #include "common/common_types.h" #include "common/multi_level_page_table.h" #include "common/range_map.h" +#include "common/scratch_buffer.h" #include "common/virtual_buffer.h" #include "video_core/cache_types.h" #include "video_core/pte_kind.h" @@ -107,8 +109,8 @@ public: * if the region is continuous, a single pair will be returned. If it's unmapped, an empty * vector will be returned; */ - std::vector<std::pair<GPUVAddr, std::size_t>> GetSubmappedRange(GPUVAddr gpu_addr, - std::size_t size) const; + boost::container::small_vector<std::pair<GPUVAddr, std::size_t>, 32> GetSubmappedRange( + GPUVAddr gpu_addr, std::size_t size) const; GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size, PTEKind kind = PTEKind::INVALID, bool is_big_pages = true); @@ -165,7 +167,8 @@ private: template <bool is_gpu_address> void GetSubmappedRangeImpl( GPUVAddr gpu_addr, std::size_t size, - std::vector<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>& + boost::container::small_vector< + std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>, 32>& result) const; Core::System& system; @@ -215,8 +218,8 @@ private: Common::VirtualBuffer<u32> big_page_table_cpu; std::vector<u64> big_page_continuous; - std::vector<std::pair<VAddr, std::size_t>> page_stash{}; - std::vector<std::pair<VAddr, std::size_t>> page_stash2{}; + boost::container::small_vector<std::pair<VAddr, std::size_t>, 32> page_stash{}; + boost::container::small_vector<std::pair<VAddr, std::size_t>, 32> page_stash2{}; mutable std::mutex guard; @@ -226,6 +229,8 @@ private: std::unique_ptr<VideoCommon::InvalidationAccumulator> accumulator; static std::atomic<size_t> unique_identifier_generator; + + Common::ScratchBuffer<u8> tmp_buffer; }; } // namespace Tegra diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp index 1a0cea9b7..3151c0db8 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp @@ -87,7 +87,8 @@ void ComputePipeline::Configure() { texture_cache.SynchronizeComputeDescriptors(); boost::container::static_vector<VideoCommon::ImageViewInOut, MAX_TEXTURES + MAX_IMAGES> views; - std::array<GLuint, MAX_TEXTURES> samplers; + boost::container::static_vector<VideoCommon::SamplerId, MAX_TEXTURES> samplers; + std::array<GLuint, MAX_TEXTURES> gl_samplers; std::array<GLuint, MAX_TEXTURES> textures; std::array<GLuint, MAX_IMAGES> images; GLsizei sampler_binding{}; @@ -131,7 +132,6 @@ void ComputePipeline::Configure() { for (u32 index = 0; index < desc.count; ++index) { const auto handle{read_handle(desc, index)}; views.push_back({handle.first}); - samplers[sampler_binding++] = 0; } } for (const auto& desc : info.image_buffer_descriptors) { @@ -142,8 +142,8 @@ void ComputePipeline::Configure() { const auto handle{read_handle(desc, index)}; views.push_back({handle.first}); - Sampler* const sampler = texture_cache.GetComputeSampler(handle.second); - samplers[sampler_binding++] = sampler->Handle(); + VideoCommon::SamplerId sampler = texture_cache.GetComputeSamplerId(handle.second); + samplers.push_back(sampler); } } for (const auto& desc : info.image_descriptors) { @@ -186,10 +186,17 @@ void ComputePipeline::Configure() { const VideoCommon::ImageViewInOut* views_it{views.data() + num_texture_buffers + num_image_buffers}; + const VideoCommon::SamplerId* samplers_it{samplers.data()}; texture_binding += num_texture_buffers; image_binding += num_image_buffers; u32 texture_scaling_mask{}; + + for (const auto& desc : info.texture_buffer_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + gl_samplers[sampler_binding++] = 0; + } + } for (const auto& desc : info.texture_descriptors) { for (u32 index = 0; index < desc.count; ++index) { ImageView& image_view{texture_cache.GetImageView((views_it++)->id)}; @@ -198,6 +205,12 @@ void ComputePipeline::Configure() { texture_scaling_mask |= 1u << texture_binding; } ++texture_binding; + + const Sampler& sampler{texture_cache.GetSampler(*(samplers_it++))}; + const bool use_fallback_sampler{sampler.HasAddedAnisotropy() && + !image_view.SupportsAnisotropy()}; + gl_samplers[sampler_binding++] = + use_fallback_sampler ? sampler.HandleWithDefaultAnisotropy() : sampler.Handle(); } } u32 image_scaling_mask{}; @@ -228,7 +241,7 @@ void ComputePipeline::Configure() { if (texture_binding != 0) { ASSERT(texture_binding == sampler_binding); glBindTextures(0, texture_binding, textures.data()); - glBindSamplers(0, sampler_binding, samplers.data()); + glBindSamplers(0, sampler_binding, gl_samplers.data()); } if (image_binding != 0) { glBindImageTextures(0, image_binding, images.data()); diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index 89000d6e0..c58f760b8 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -275,9 +275,9 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c template <typename Spec> void GraphicsPipeline::ConfigureImpl(bool is_indexed) { std::array<VideoCommon::ImageViewInOut, MAX_TEXTURES + MAX_IMAGES> views; - std::array<GLuint, MAX_TEXTURES> samplers; + std::array<VideoCommon::SamplerId, MAX_TEXTURES> samplers; size_t views_index{}; - GLsizei sampler_binding{}; + size_t samplers_index{}; texture_cache.SynchronizeGraphicsDescriptors(); @@ -337,7 +337,6 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { for (u32 index = 0; index < desc.count; ++index) { const auto handle{read_handle(desc, index)}; views[views_index++] = {handle.first}; - samplers[sampler_binding++] = 0; } } } @@ -351,8 +350,8 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { const auto handle{read_handle(desc, index)}; views[views_index++] = {handle.first}; - Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)}; - samplers[sampler_binding++] = sampler->Handle(); + VideoCommon::SamplerId sampler{texture_cache.GetGraphicsSamplerId(handle.second)}; + samplers[samplers_index++] = sampler; } } if constexpr (Spec::has_images) { @@ -445,10 +444,13 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { program_manager.BindSourcePrograms(source_programs); } const VideoCommon::ImageViewInOut* views_it{views.data()}; + const VideoCommon::SamplerId* samplers_it{samplers.data()}; GLsizei texture_binding = 0; GLsizei image_binding = 0; + GLsizei sampler_binding{}; std::array<GLuint, MAX_TEXTURES> textures; std::array<GLuint, MAX_IMAGES> images; + std::array<GLuint, MAX_TEXTURES> gl_samplers; const auto prepare_stage{[&](size_t stage) { buffer_cache.runtime.SetImagePointers(&textures[texture_binding], &images[image_binding]); buffer_cache.BindHostStageBuffers(stage); @@ -465,6 +467,13 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { u32 stage_image_binding{}; const auto& info{stage_infos[stage]}; + if constexpr (Spec::has_texture_buffers) { + for (const auto& desc : info.texture_buffer_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + gl_samplers[sampler_binding++] = 0; + } + } + } for (const auto& desc : info.texture_descriptors) { for (u32 index = 0; index < desc.count; ++index) { ImageView& image_view{texture_cache.GetImageView((views_it++)->id)}; @@ -474,6 +483,12 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { } ++texture_binding; ++stage_texture_binding; + + const Sampler& sampler{texture_cache.GetSampler(*(samplers_it++))}; + const bool use_fallback_sampler{sampler.HasAddedAnisotropy() && + !image_view.SupportsAnisotropy()}; + gl_samplers[sampler_binding++] = + use_fallback_sampler ? sampler.HandleWithDefaultAnisotropy() : sampler.Handle(); } } for (const auto& desc : info.image_descriptors) { @@ -534,7 +549,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { if (texture_binding != 0) { ASSERT(texture_binding == sampler_binding); glBindTextures(0, texture_binding, textures.data()); - glBindSamplers(0, sampler_binding, samplers.data()); + glBindSamplers(0, sampler_binding, gl_samplers.data()); } if (image_binding != 0) { glBindImageTextures(0, image_binding, images.data()); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 3f077311e..0329ed820 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -85,7 +85,9 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key, case Shader::Stage::VertexB: case Shader::Stage::Geometry: if (!use_assembly_shaders && key.xfb_enabled != 0) { - info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.xfb_state); + auto [varyings, count] = VideoCommon::MakeTransformFeedbackVaryings(key.xfb_state); + info.xfb_varyings = varyings; + info.xfb_count = count; } break; case Shader::Stage::TessellationEval: diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 1c5dbcdd8..3b446be07 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -1268,36 +1268,48 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const TSCEntry& config) { UNIMPLEMENTED_IF(config.cubemap_anisotropy != 1); - sampler.Create(); - const GLuint handle = sampler.handle; - glSamplerParameteri(handle, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(config.wrap_u)); - glSamplerParameteri(handle, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(config.wrap_v)); - glSamplerParameteri(handle, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(config.wrap_p)); - glSamplerParameteri(handle, GL_TEXTURE_COMPARE_MODE, compare_mode); - glSamplerParameteri(handle, GL_TEXTURE_COMPARE_FUNC, compare_func); - glSamplerParameteri(handle, GL_TEXTURE_MAG_FILTER, mag); - glSamplerParameteri(handle, GL_TEXTURE_MIN_FILTER, min); - glSamplerParameterf(handle, GL_TEXTURE_LOD_BIAS, config.LodBias()); - glSamplerParameterf(handle, GL_TEXTURE_MIN_LOD, config.MinLod()); - glSamplerParameterf(handle, GL_TEXTURE_MAX_LOD, config.MaxLod()); - glSamplerParameterfv(handle, GL_TEXTURE_BORDER_COLOR, config.BorderColor().data()); - - if (GLAD_GL_ARB_texture_filter_anisotropic || GLAD_GL_EXT_texture_filter_anisotropic) { - const f32 max_anisotropy = std::clamp(config.MaxAnisotropy(), 1.0f, 16.0f); - glSamplerParameterf(handle, GL_TEXTURE_MAX_ANISOTROPY, max_anisotropy); - } else { - LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_anisotropic is required"); - } - if (GLAD_GL_ARB_texture_filter_minmax || GLAD_GL_EXT_texture_filter_minmax) { - glSamplerParameteri(handle, GL_TEXTURE_REDUCTION_MODE_ARB, reduction_filter); - } else if (reduction_filter != GL_WEIGHTED_AVERAGE_ARB) { - LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_minmax is required"); - } - if (GLAD_GL_ARB_seamless_cubemap_per_texture || GLAD_GL_AMD_seamless_cubemap_per_texture) { - glSamplerParameteri(handle, GL_TEXTURE_CUBE_MAP_SEAMLESS, seamless); - } else if (seamless == GL_FALSE) { - // We default to false because it's more common - LOG_WARNING(Render_OpenGL, "GL_ARB_seamless_cubemap_per_texture is required"); + const f32 max_anisotropy = std::clamp(config.MaxAnisotropy(), 1.0f, 16.0f); + + const auto create_sampler = [&](const f32 anisotropy) { + OGLSampler new_sampler; + new_sampler.Create(); + const GLuint handle = new_sampler.handle; + glSamplerParameteri(handle, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(config.wrap_u)); + glSamplerParameteri(handle, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(config.wrap_v)); + glSamplerParameteri(handle, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(config.wrap_p)); + glSamplerParameteri(handle, GL_TEXTURE_COMPARE_MODE, compare_mode); + glSamplerParameteri(handle, GL_TEXTURE_COMPARE_FUNC, compare_func); + glSamplerParameteri(handle, GL_TEXTURE_MAG_FILTER, mag); + glSamplerParameteri(handle, GL_TEXTURE_MIN_FILTER, min); + glSamplerParameterf(handle, GL_TEXTURE_LOD_BIAS, config.LodBias()); + glSamplerParameterf(handle, GL_TEXTURE_MIN_LOD, config.MinLod()); + glSamplerParameterf(handle, GL_TEXTURE_MAX_LOD, config.MaxLod()); + glSamplerParameterfv(handle, GL_TEXTURE_BORDER_COLOR, config.BorderColor().data()); + + if (GLAD_GL_ARB_texture_filter_anisotropic || GLAD_GL_EXT_texture_filter_anisotropic) { + glSamplerParameterf(handle, GL_TEXTURE_MAX_ANISOTROPY, anisotropy); + } else { + LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_anisotropic is required"); + } + if (GLAD_GL_ARB_texture_filter_minmax || GLAD_GL_EXT_texture_filter_minmax) { + glSamplerParameteri(handle, GL_TEXTURE_REDUCTION_MODE_ARB, reduction_filter); + } else if (reduction_filter != GL_WEIGHTED_AVERAGE_ARB) { + LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_minmax is required"); + } + if (GLAD_GL_ARB_seamless_cubemap_per_texture || GLAD_GL_AMD_seamless_cubemap_per_texture) { + glSamplerParameteri(handle, GL_TEXTURE_CUBE_MAP_SEAMLESS, seamless); + } else if (seamless == GL_FALSE) { + // We default to false because it's more common + LOG_WARNING(Render_OpenGL, "GL_ARB_seamless_cubemap_per_texture is required"); + } + return new_sampler; + }; + + sampler = create_sampler(max_anisotropy); + + const f32 max_anisotropy_default = static_cast<f32>(1U << config.max_anisotropy); + if (max_anisotropy > max_anisotropy_default) { + sampler_default_anisotropy = create_sampler(max_anisotropy_default); } } diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 1148b73d7..3676eaaa9 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -309,12 +309,21 @@ class Sampler { public: explicit Sampler(TextureCacheRuntime&, const Tegra::Texture::TSCEntry&); - GLuint Handle() const noexcept { + [[nodiscard]] GLuint Handle() const noexcept { return sampler.handle; } + [[nodiscard]] GLuint HandleWithDefaultAnisotropy() const noexcept { + return sampler_default_anisotropy.handle; + } + + [[nodiscard]] bool HasAddedAnisotropy() const noexcept { + return static_cast<bool>(sampler_default_anisotropy.handle); + } + private: OGLSampler sampler; + OGLSampler sampler_default_anisotropy; }; class Framebuffer { diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index 983e1c2e1..71c783709 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -178,7 +178,7 @@ public: inline void PushImageDescriptors(TextureCache& texture_cache, GuestDescriptorQueue& guest_descriptor_queue, const Shader::Info& info, RescalingPushConstant& rescaling, - const VkSampler*& samplers, + const VideoCommon::SamplerId*& samplers, const VideoCommon::ImageViewInOut*& views) { const u32 num_texture_buffers = Shader::NumDescriptors(info.texture_buffer_descriptors); const u32 num_image_buffers = Shader::NumDescriptors(info.image_buffer_descriptors); @@ -187,10 +187,15 @@ inline void PushImageDescriptors(TextureCache& texture_cache, for (const auto& desc : info.texture_descriptors) { for (u32 index = 0; index < desc.count; ++index) { const VideoCommon::ImageViewId image_view_id{(views++)->id}; - const VkSampler sampler{*(samplers++)}; + const VideoCommon::SamplerId sampler_id{*(samplers++)}; ImageView& image_view{texture_cache.GetImageView(image_view_id)}; const VkImageView vk_image_view{image_view.Handle(desc.type)}; - guest_descriptor_queue.AddSampledImage(vk_image_view, sampler); + const Sampler& sampler{texture_cache.GetSampler(sampler_id)}; + const bool use_fallback_sampler{sampler.HasAddedAnisotropy() && + !image_view.SupportsAnisotropy()}; + const VkSampler vk_sampler{use_fallback_sampler ? sampler.HandleWithDefaultAnisotropy() + : sampler.Handle()}; + guest_descriptor_queue.AddSampledImage(vk_image_view, vk_sampler); rescaling.PushTexture(texture_cache.IsRescaling(image_view)); } } diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 8c33722d3..f47301ad5 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -361,7 +361,7 @@ void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer, .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, }; // Measuring a popular game, this number never exceeds the specified size once data is warmed up - boost::container::small_vector<VkBufferCopy, 3> vk_copies(copies.size()); + boost::container::small_vector<VkBufferCopy, 8> vk_copies(copies.size()); std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy); scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([src_buffer, dst_buffer, vk_copies, barrier](vk::CommandBuffer cmdbuf) { @@ -516,15 +516,15 @@ void BufferCacheRuntime::BindVertexBuffers(VideoCommon::HostBindings<Buffer>& bi buffer_handles.push_back(handle); } if (device.IsExtExtendedDynamicStateSupported()) { - scheduler.Record([bindings = bindings, - buffer_handles = buffer_handles](vk::CommandBuffer cmdbuf) { + scheduler.Record([bindings = std::move(bindings), + buffer_handles = std::move(buffer_handles)](vk::CommandBuffer cmdbuf) { cmdbuf.BindVertexBuffers2EXT( bindings.min_index, bindings.max_index - bindings.min_index, buffer_handles.data(), bindings.offsets.data(), bindings.sizes.data(), bindings.strides.data()); }); } else { - scheduler.Record([bindings = bindings, - buffer_handles = buffer_handles](vk::CommandBuffer cmdbuf) { + scheduler.Record([bindings = std::move(bindings), + buffer_handles = std::move(buffer_handles)](vk::CommandBuffer cmdbuf) { cmdbuf.BindVertexBuffers(bindings.min_index, bindings.max_index - bindings.min_index, buffer_handles.data(), bindings.offsets.data()); }); @@ -561,12 +561,12 @@ void BufferCacheRuntime::BindTransformFeedbackBuffers(VideoCommon::HostBindings< for (u32 index = 0; index < bindings.buffers.size(); ++index) { buffer_handles.push_back(bindings.buffers[index]->Handle()); } - scheduler.Record( - [bindings = bindings, buffer_handles = buffer_handles](vk::CommandBuffer cmdbuf) { - cmdbuf.BindTransformFeedbackBuffersEXT(0, static_cast<u32>(buffer_handles.size()), - buffer_handles.data(), bindings.offsets.data(), - bindings.sizes.data()); - }); + scheduler.Record([bindings = std::move(bindings), + buffer_handles = std::move(buffer_handles)](vk::CommandBuffer cmdbuf) { + cmdbuf.BindTransformFeedbackBuffersEXT(0, static_cast<u32>(buffer_handles.size()), + buffer_handles.data(), bindings.offsets.data(), + bindings.sizes.data()); + }); } void BufferCacheRuntime::ReserveNullBuffer() { diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 733e70d9d..73e585c2b 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -115,7 +115,7 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, static constexpr size_t max_elements = 64; boost::container::static_vector<VideoCommon::ImageViewInOut, max_elements> views; - boost::container::static_vector<VkSampler, max_elements> samplers; + boost::container::static_vector<VideoCommon::SamplerId, max_elements> samplers; const auto& qmd{kepler_compute.launch_description}; const auto& cbufs{qmd.const_buffer_config}; @@ -160,8 +160,8 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, const auto handle{read_handle(desc, index)}; views.push_back({handle.first}); - Sampler* const sampler = texture_cache.GetComputeSampler(handle.second); - samplers.push_back(sampler->Handle()); + VideoCommon::SamplerId sampler = texture_cache.GetComputeSamplerId(handle.second); + samplers.push_back(sampler); } } for (const auto& desc : info.image_descriptors) { @@ -192,7 +192,7 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, buffer_cache.BindHostComputeBuffers(); RescalingPushConstant rescaling; - const VkSampler* samplers_it{samplers.data()}; + const VideoCommon::SamplerId* samplers_it{samplers.data()}; const VideoCommon::ImageViewInOut* views_it{views.data()}; PushImageDescriptors(texture_cache, guest_descriptor_queue, info, rescaling, samplers_it, views_it); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 506b78f08..c1595642e 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -298,7 +298,7 @@ void GraphicsPipeline::AddTransition(GraphicsPipeline* transition) { template <typename Spec> void GraphicsPipeline::ConfigureImpl(bool is_indexed) { std::array<VideoCommon::ImageViewInOut, MAX_IMAGE_ELEMENTS> views; - std::array<VkSampler, MAX_IMAGE_ELEMENTS> samplers; + std::array<VideoCommon::SamplerId, MAX_IMAGE_ELEMENTS> samplers; size_t sampler_index{}; size_t view_index{}; @@ -367,8 +367,8 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { const auto handle{read_handle(desc, index)}; views[view_index++] = {handle.first}; - Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)}; - samplers[sampler_index++] = sampler->Handle(); + VideoCommon::SamplerId sampler{texture_cache.GetGraphicsSamplerId(handle.second)}; + samplers[sampler_index++] = sampler; } } if constexpr (Spec::has_images) { @@ -453,7 +453,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { RescalingPushConstant rescaling; RenderAreaPushConstant render_area; - const VkSampler* samplers_it{samplers.data()}; + const VideoCommon::SamplerId* samplers_it{samplers.data()}; const VideoCommon::ImageViewInOut* views_it{views.data()}; const auto prepare_stage{[&](size_t stage) LAMBDA_FORCEINLINE { buffer_cache.BindHostStageBuffers(stage); diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp index b128c4f6e..5eeda08d2 100644 --- a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp @@ -3,6 +3,7 @@ #include <thread> +#include "common/polyfill_ranges.h" #include "common/settings.h" #include "video_core/renderer_vulkan/vk_master_semaphore.h" #include "video_core/vulkan_common/vulkan_device.h" diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 18e040a1b..9f316113c 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -167,7 +167,10 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> program info.fixed_state_point_size = point_size; } if (key.state.xfb_enabled) { - info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state); + auto [varyings, count] = + VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state); + info.xfb_varyings = varyings; + info.xfb_count = count; } info.convert_depth_mode = gl_ndc; } @@ -214,7 +217,10 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> program info.fixed_state_point_size = point_size; } if (key.state.xfb_enabled != 0) { - info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state); + auto [varyings, count] = + VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state); + info.xfb_varyings = varyings; + info.xfb_count = count; } info.convert_depth_mode = gl_ndc; break; @@ -705,10 +711,7 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline( std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline( ShaderPools& pools, const ComputePipelineCacheKey& key, Shader::Environment& env, PipelineStatistics* statistics, bool build_in_parallel) try { - // TODO: Remove this when Intel fixes their shader compiler. - // https://github.com/IGCIT/Intel-GPU-Community-Issue-Tracker-IGCIT/issues/159 - if (device.GetDriverID() == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS && - !Settings::values.enable_compute_pipelines.GetValue()) { + if (device.HasBrokenCompute()) { LOG_ERROR(Render_Vulkan, "Skipping 0x{:016x}", key.Hash()); return nullptr; } diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 8711e2a87..f3cef09dd 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -330,9 +330,9 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { }; } -[[maybe_unused]] [[nodiscard]] std::vector<VkBufferCopy> TransformBufferCopies( - std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) { - std::vector<VkBufferCopy> result(copies.size()); +[[maybe_unused]] [[nodiscard]] boost::container::small_vector<VkBufferCopy, 16> +TransformBufferCopies(std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) { + boost::container::small_vector<VkBufferCopy, 16> result(copies.size()); std::ranges::transform( copies, result.begin(), [buffer_offset](const VideoCommon::BufferCopy& copy) { return VkBufferCopy{ @@ -344,7 +344,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { return result; } -[[nodiscard]] std::vector<VkBufferImageCopy> TransformBufferImageCopies( +[[nodiscard]] boost::container::small_vector<VkBufferImageCopy, 16> TransformBufferImageCopies( std::span<const BufferImageCopy> copies, size_t buffer_offset, VkImageAspectFlags aspect_mask) { struct Maker { VkBufferImageCopy operator()(const BufferImageCopy& copy) const { @@ -377,14 +377,14 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { VkImageAspectFlags aspect_mask; }; if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { - std::vector<VkBufferImageCopy> result(copies.size() * 2); + boost::container::small_vector<VkBufferImageCopy, 16> result(copies.size() * 2); std::ranges::transform(copies, result.begin(), Maker{buffer_offset, VK_IMAGE_ASPECT_DEPTH_BIT}); std::ranges::transform(copies, result.begin() + copies.size(), Maker{buffer_offset, VK_IMAGE_ASPECT_STENCIL_BIT}); return result; } else { - std::vector<VkBufferImageCopy> result(copies.size()); + boost::container::small_vector<VkBufferImageCopy, 16> result(copies.size()); std::ranges::transform(copies, result.begin(), Maker{buffer_offset, aspect_mask}); return result; } @@ -867,8 +867,8 @@ void TextureCacheRuntime::BarrierFeedbackLoop() { void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies) { - std::vector<VkBufferImageCopy> vk_in_copies(copies.size()); - std::vector<VkBufferImageCopy> vk_out_copies(copies.size()); + boost::container::small_vector<VkBufferImageCopy, 16> vk_in_copies(copies.size()); + boost::container::small_vector<VkBufferImageCopy, 16> vk_out_copies(copies.size()); const VkImageAspectFlags src_aspect_mask = src.AspectMask(); const VkImageAspectFlags dst_aspect_mask = dst.AspectMask(); @@ -1157,7 +1157,7 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im void TextureCacheRuntime::CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies) { - std::vector<VkImageCopy> vk_copies(copies.size()); + boost::container::small_vector<VkImageCopy, 16> vk_copies(copies.size()); const VkImageAspectFlags aspect_mask = dst.AspectMask(); ASSERT(aspect_mask == src.AspectMask()); @@ -1332,7 +1332,7 @@ void Image::UploadMemory(VkBuffer buffer, VkDeviceSize offset, ScaleDown(true); } scheduler->RequestOutsideRenderPassOperationContext(); - std::vector vk_copies = TransformBufferImageCopies(copies, offset, aspect_mask); + auto vk_copies = TransformBufferImageCopies(copies, offset, aspect_mask); const VkBuffer src_buffer = buffer; const VkImage vk_image = *original_image; const VkImageAspectFlags vk_aspect_mask = aspect_mask; @@ -1367,8 +1367,9 @@ void Image::DownloadMemory(std::span<VkBuffer> buffers_span, std::span<VkDeviceS if (is_rescaled) { ScaleDown(); } - boost::container::small_vector<VkBuffer, 1> buffers_vector{}; - boost::container::small_vector<std::vector<VkBufferImageCopy>, 1> vk_copies; + boost::container::small_vector<VkBuffer, 8> buffers_vector{}; + boost::container::small_vector<boost::container::small_vector<VkBufferImageCopy, 16>, 8> + vk_copies; for (size_t index = 0; index < buffers_span.size(); index++) { buffers_vector.emplace_back(buffers_span[index]); vk_copies.emplace_back( @@ -1802,27 +1803,36 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& t // Some games have samplers with garbage. Sanitize them here. const f32 max_anisotropy = std::clamp(tsc.MaxAnisotropy(), 1.0f, 16.0f); - sampler = device.GetLogical().CreateSampler(VkSamplerCreateInfo{ - .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, - .pNext = pnext, - .flags = 0, - .magFilter = MaxwellToVK::Sampler::Filter(tsc.mag_filter), - .minFilter = MaxwellToVK::Sampler::Filter(tsc.min_filter), - .mipmapMode = MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter), - .addressModeU = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter), - .addressModeV = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter), - .addressModeW = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter), - .mipLodBias = tsc.LodBias(), - .anisotropyEnable = static_cast<VkBool32>(max_anisotropy > 1.0f ? VK_TRUE : VK_FALSE), - .maxAnisotropy = max_anisotropy, - .compareEnable = tsc.depth_compare_enabled, - .compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func), - .minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.MinLod(), - .maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.MaxLod(), - .borderColor = - arbitrary_borders ? VK_BORDER_COLOR_FLOAT_CUSTOM_EXT : ConvertBorderColor(color), - .unnormalizedCoordinates = VK_FALSE, - }); + const auto create_sampler = [&](const f32 anisotropy) { + return device.GetLogical().CreateSampler(VkSamplerCreateInfo{ + .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, + .pNext = pnext, + .flags = 0, + .magFilter = MaxwellToVK::Sampler::Filter(tsc.mag_filter), + .minFilter = MaxwellToVK::Sampler::Filter(tsc.min_filter), + .mipmapMode = MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter), + .addressModeU = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter), + .addressModeV = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter), + .addressModeW = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter), + .mipLodBias = tsc.LodBias(), + .anisotropyEnable = static_cast<VkBool32>(anisotropy > 1.0f ? VK_TRUE : VK_FALSE), + .maxAnisotropy = anisotropy, + .compareEnable = tsc.depth_compare_enabled, + .compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func), + .minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.MinLod(), + .maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.MaxLod(), + .borderColor = + arbitrary_borders ? VK_BORDER_COLOR_FLOAT_CUSTOM_EXT : ConvertBorderColor(color), + .unnormalizedCoordinates = VK_FALSE, + }); + }; + + sampler = create_sampler(max_anisotropy); + + const f32 max_anisotropy_default = static_cast<f32>(1U << tsc.max_anisotropy); + if (max_anisotropy > max_anisotropy_default) { + sampler_default_anisotropy = create_sampler(max_anisotropy_default); + } } Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers, @@ -1849,7 +1859,7 @@ Framebuffer::~Framebuffer() = default; void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers, ImageView* depth_buffer, bool is_rescaled) { - std::vector<VkImageView> attachments; + boost::container::small_vector<VkImageView, NUM_RT + 1> attachments; RenderPassKey renderpass_key{}; s32 num_layers = 1; diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 0f7a5ffd4..f14525dcb 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -279,8 +279,17 @@ public: return *sampler; } + [[nodiscard]] VkSampler HandleWithDefaultAnisotropy() const noexcept { + return *sampler_default_anisotropy; + } + + [[nodiscard]] bool HasAddedAnisotropy() const noexcept { + return static_cast<bool>(sampler_default_anisotropy); + } + private: vk::Sampler sampler; + vk::Sampler sampler_default_anisotropy; }; class Framebuffer { diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp index c5213875b..4db948b6d 100644 --- a/src/video_core/shader_cache.cpp +++ b/src/video_core/shader_cache.cpp @@ -151,11 +151,9 @@ void ShaderCache::RemovePendingShaders() { marked_for_removal.erase(std::unique(marked_for_removal.begin(), marked_for_removal.end()), marked_for_removal.end()); - std::vector<ShaderInfo*> removed_shaders; - removed_shaders.reserve(marked_for_removal.size()); + boost::container::small_vector<ShaderInfo*, 16> removed_shaders; std::scoped_lock lock{lookup_mutex}; - for (Entry* const entry : marked_for_removal) { removed_shaders.push_back(entry->data); diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index 1b8a17ee8..55d49d017 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h @@ -6,6 +6,7 @@ #include <array> #include <optional> #include <vector> +#include <boost/container/small_vector.hpp> #include "common/common_funcs.h" #include "common/common_types.h" @@ -108,8 +109,8 @@ struct ImageBase { std::vector<ImageViewInfo> image_view_infos; std::vector<ImageViewId> image_view_ids; - std::vector<u32> slice_offsets; - std::vector<SubresourceBase> slice_subresources; + boost::container::small_vector<u32, 16> slice_offsets; + boost::container::small_vector<SubresourceBase, 16> slice_subresources; std::vector<AliasedImage> aliased_images; std::vector<ImageId> overlapping_images; diff --git a/src/video_core/texture_cache/image_view_base.cpp b/src/video_core/texture_cache/image_view_base.cpp index d134b6738..0c5f4450d 100644 --- a/src/video_core/texture_cache/image_view_base.cpp +++ b/src/video_core/texture_cache/image_view_base.cpp @@ -45,4 +45,56 @@ ImageViewBase::ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_in ImageViewBase::ImageViewBase(const NullImageViewParams&) : image_id{NULL_IMAGE_ID} {} +bool ImageViewBase::SupportsAnisotropy() const noexcept { + const bool has_mips = range.extent.levels > 1; + const bool is_2d = type == ImageViewType::e2D || type == ImageViewType::e2DArray; + if (!has_mips || !is_2d) { + return false; + } + + switch (format) { + case PixelFormat::R8_UNORM: + case PixelFormat::R8_SNORM: + case PixelFormat::R8_SINT: + case PixelFormat::R8_UINT: + case PixelFormat::BC4_UNORM: + case PixelFormat::BC4_SNORM: + case PixelFormat::BC5_UNORM: + case PixelFormat::BC5_SNORM: + case PixelFormat::R32G32_FLOAT: + case PixelFormat::R32G32_SINT: + case PixelFormat::R32_FLOAT: + case PixelFormat::R16_FLOAT: + case PixelFormat::R16_UNORM: + case PixelFormat::R16_SNORM: + case PixelFormat::R16_UINT: + case PixelFormat::R16_SINT: + case PixelFormat::R16G16_UNORM: + case PixelFormat::R16G16_FLOAT: + case PixelFormat::R16G16_UINT: + case PixelFormat::R16G16_SINT: + case PixelFormat::R16G16_SNORM: + case PixelFormat::R8G8_UNORM: + case PixelFormat::R8G8_SNORM: + case PixelFormat::R8G8_SINT: + case PixelFormat::R8G8_UINT: + case PixelFormat::R32G32_UINT: + case PixelFormat::R32_UINT: + case PixelFormat::R32_SINT: + case PixelFormat::G4R4_UNORM: + // Depth formats + case PixelFormat::D32_FLOAT: + case PixelFormat::D16_UNORM: + // Stencil formats + case PixelFormat::S8_UINT: + // DepthStencil formats + case PixelFormat::D24_UNORM_S8_UINT: + case PixelFormat::S8_UINT_D24_UNORM: + case PixelFormat::D32_FLOAT_S8_UINT: + return false; + default: + return true; + } +} + } // namespace VideoCommon diff --git a/src/video_core/texture_cache/image_view_base.h b/src/video_core/texture_cache/image_view_base.h index a25ae1d4a..87549ffff 100644 --- a/src/video_core/texture_cache/image_view_base.h +++ b/src/video_core/texture_cache/image_view_base.h @@ -33,6 +33,8 @@ struct ImageViewBase { return type == ImageViewType::Buffer; } + [[nodiscard]] bool SupportsAnisotropy() const noexcept; + ImageId image_id{}; GPUVAddr gpu_addr = 0; PixelFormat format{}; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index c7f7448e9..d3f03a995 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -186,6 +186,10 @@ void TextureCache<P>::FillComputeImageViews(std::span<ImageViewInOut> views) { template <class P> void TextureCache<P>::CheckFeedbackLoop(std::span<const ImageViewInOut> views) { + if (!Settings::values.barrier_feedback_loops.GetValue()) { + return; + } + const bool requires_barrier = [&] { for (const auto& view : views) { if (!view.id) { @@ -222,30 +226,50 @@ void TextureCache<P>::CheckFeedbackLoop(std::span<const ImageViewInOut> views) { template <class P> typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) { + return &slot_samplers[GetGraphicsSamplerId(index)]; +} + +template <class P> +typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) { + return &slot_samplers[GetComputeSamplerId(index)]; +} + +template <class P> +SamplerId TextureCache<P>::GetGraphicsSamplerId(u32 index) { if (index > channel_state->graphics_sampler_table.Limit()) { LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); - return &slot_samplers[NULL_SAMPLER_ID]; + return NULL_SAMPLER_ID; } const auto [descriptor, is_new] = channel_state->graphics_sampler_table.Read(index); SamplerId& id = channel_state->graphics_sampler_ids[index]; if (is_new) { id = FindSampler(descriptor); } - return &slot_samplers[id]; + return id; } template <class P> -typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) { +SamplerId TextureCache<P>::GetComputeSamplerId(u32 index) { if (index > channel_state->compute_sampler_table.Limit()) { LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); - return &slot_samplers[NULL_SAMPLER_ID]; + return NULL_SAMPLER_ID; } const auto [descriptor, is_new] = channel_state->compute_sampler_table.Read(index); SamplerId& id = channel_state->compute_sampler_ids[index]; if (is_new) { id = FindSampler(descriptor); } - return &slot_samplers[id]; + return id; +} + +template <class P> +const typename P::Sampler& TextureCache<P>::GetSampler(SamplerId id) const noexcept { + return slot_samplers[id]; +} + +template <class P> +typename P::Sampler& TextureCache<P>::GetSampler(SamplerId id) noexcept { + return slot_samplers[id]; } template <class P> @@ -280,7 +304,7 @@ void TextureCache<P>::SynchronizeComputeDescriptors() { } template <class P> -bool TextureCache<P>::RescaleRenderTargets(bool is_clear) { +bool TextureCache<P>::RescaleRenderTargets() { auto& flags = maxwell3d->dirty.flags; u32 scale_rating = 0; bool rescaled = false; @@ -318,13 +342,13 @@ bool TextureCache<P>::RescaleRenderTargets(bool is_clear) { ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; if (flags[Dirty::ColorBuffer0 + index] || force) { flags[Dirty::ColorBuffer0 + index] = false; - BindRenderTarget(&color_buffer_id, FindColorBuffer(index, is_clear)); + BindRenderTarget(&color_buffer_id, FindColorBuffer(index)); } check_rescale(color_buffer_id, tmp_color_images[index]); } if (flags[Dirty::ZetaBuffer] || force) { flags[Dirty::ZetaBuffer] = false; - BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear)); + BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer()); } check_rescale(render_targets.depth_buffer_id, tmp_depth_image); @@ -389,7 +413,7 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) { return; } - const bool rescaled = RescaleRenderTargets(is_clear); + const bool rescaled = RescaleRenderTargets(); if (is_rescaling != rescaled) { flags[Dirty::RescaleViewports] = true; flags[Dirty::RescaleScissors] = true; @@ -502,7 +526,7 @@ void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) { template <class P> void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) { - std::vector<ImageId> images; + boost::container::small_vector<ImageId, 16> images; ForEachImageInRegion(cpu_addr, size, [&images](ImageId image_id, ImageBase& image) { if (!image.IsSafeDownload()) { return; @@ -555,7 +579,7 @@ std::optional<VideoCore::RasterizerDownloadArea> TextureCache<P>::GetFlushArea(V template <class P> void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) { - std::vector<ImageId> deleted_images; + boost::container::small_vector<ImageId, 16> deleted_images; ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); }); for (const ImageId id : deleted_images) { Image& image = slot_images[id]; @@ -569,7 +593,7 @@ void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) { template <class P> void TextureCache<P>::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size) { - std::vector<ImageId> deleted_images; + boost::container::small_vector<ImageId, 16> deleted_images; ForEachImageInRegionGPU(as_id, gpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); }); for (const ImageId id : deleted_images) { @@ -1077,7 +1101,7 @@ ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, const bool native_bgr = runtime.HasNativeBgr(); const bool flexible_formats = True(options & RelaxedOptions::Format); ImageId image_id{}; - boost::container::small_vector<ImageId, 1> image_ids; + boost::container::small_vector<ImageId, 8> image_ids; const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { if (True(existing_image.flags & ImageFlagBits::Remapped)) { return false; @@ -1598,7 +1622,7 @@ ImageId TextureCache<P>::FindDMAImage(const ImageInfo& info, GPUVAddr gpu_addr) } } ImageId image_id{}; - boost::container::small_vector<ImageId, 1> image_ids; + boost::container::small_vector<ImageId, 8> image_ids; const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { if (True(existing_image.flags & ImageFlagBits::Remapped)) { return false; @@ -1658,7 +1682,7 @@ SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) { } template <class P> -ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) { +ImageViewId TextureCache<P>::FindColorBuffer(size_t index) { const auto& regs = maxwell3d->regs; if (index >= regs.rt_control.count) { return ImageViewId{}; @@ -1672,11 +1696,11 @@ ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) { return ImageViewId{}; } const ImageInfo info(regs.rt[index], regs.anti_alias_samples_mode); - return FindRenderTargetView(info, gpu_addr, is_clear); + return FindRenderTargetView(info, gpu_addr); } template <class P> -ImageViewId TextureCache<P>::FindDepthBuffer(bool is_clear) { +ImageViewId TextureCache<P>::FindDepthBuffer() { const auto& regs = maxwell3d->regs; if (!regs.zeta_enable) { return ImageViewId{}; @@ -1686,18 +1710,16 @@ ImageViewId TextureCache<P>::FindDepthBuffer(bool is_clear) { return ImageViewId{}; } const ImageInfo info(regs.zeta, regs.zeta_size, regs.anti_alias_samples_mode); - return FindRenderTargetView(info, gpu_addr, is_clear); + return FindRenderTargetView(info, gpu_addr); } template <class P> -ImageViewId TextureCache<P>::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr, - bool is_clear) { - const auto options = is_clear ? RelaxedOptions::Samples : RelaxedOptions{}; +ImageViewId TextureCache<P>::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr) { ImageId image_id{}; bool delete_state = has_deleted_images; do { has_deleted_images = false; - image_id = FindOrInsertImage(info, gpu_addr, options); + image_id = FindOrInsertImage(info, gpu_addr); delete_state |= has_deleted_images; } while (has_deleted_images); has_deleted_images = delete_state; @@ -1920,7 +1942,7 @@ void TextureCache<P>::RegisterImage(ImageId image_id) { image.map_view_id = map_id; return; } - std::vector<ImageViewId> sparse_maps{}; + boost::container::small_vector<ImageViewId, 16> sparse_maps; ForEachSparseSegment( image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id); @@ -2195,7 +2217,7 @@ void TextureCache<P>::MarkModification(ImageBase& image) noexcept { template <class P> void TextureCache<P>::SynchronizeAliases(ImageId image_id) { - boost::container::small_vector<const AliasedImage*, 1> aliased_images; + boost::container::small_vector<const AliasedImage*, 8> aliased_images; Image& image = slot_images[image_id]; bool any_rescaled = True(image.flags & ImageFlagBits::Rescaled); bool any_modified = True(image.flags & ImageFlagBits::GpuModified); diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 3bfa92154..e9ec91265 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -56,7 +56,7 @@ struct ImageViewInOut { struct AsyncDecodeContext { ImageId image_id; Common::ScratchBuffer<u8> decoded_data; - std::vector<BufferImageCopy> copies; + boost::container::small_vector<BufferImageCopy, 16> copies; std::mutex mutex; std::atomic_bool complete; }; @@ -159,6 +159,18 @@ public: /// Get the sampler from the compute descriptor table in the specified index Sampler* GetComputeSampler(u32 index); + /// Get the sampler id from the graphics descriptor table in the specified index + SamplerId GetGraphicsSamplerId(u32 index); + + /// Get the sampler id from the compute descriptor table in the specified index + SamplerId GetComputeSamplerId(u32 index); + + /// Return a constant reference to the given sampler id + [[nodiscard]] const Sampler& GetSampler(SamplerId id) const noexcept; + + /// Return a reference to the given sampler id + [[nodiscard]] Sampler& GetSampler(SamplerId id) noexcept; + /// Refresh the state for graphics image view and sampler descriptors void SynchronizeGraphicsDescriptors(); @@ -166,9 +178,8 @@ public: void SynchronizeComputeDescriptors(); /// Updates the Render Targets if they can be rescaled - /// @param is_clear True when the render targets are being used for clears /// @retval True if the Render Targets have been rescaled. - bool RescaleRenderTargets(bool is_clear); + bool RescaleRenderTargets(); /// Update bound render targets and upload memory if necessary /// @param is_clear True when the render targets are being used for clears @@ -324,14 +335,13 @@ private: [[nodiscard]] SamplerId FindSampler(const TSCEntry& config); /// Find or create an image view for the given color buffer index - [[nodiscard]] ImageViewId FindColorBuffer(size_t index, bool is_clear); + [[nodiscard]] ImageViewId FindColorBuffer(size_t index); /// Find or create an image view for the depth buffer - [[nodiscard]] ImageViewId FindDepthBuffer(bool is_clear); + [[nodiscard]] ImageViewId FindDepthBuffer(); /// Find or create a view for a render target with the given image parameters - [[nodiscard]] ImageViewId FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr, - bool is_clear); + [[nodiscard]] ImageViewId FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr); /// Iterates over all the images in a region calling func template <typename Func> @@ -419,7 +429,7 @@ private: std::unordered_map<u64, std::vector<ImageMapId>, Common::IdentityHash<u64>> page_table; std::unordered_map<u64, std::vector<ImageId>, Common::IdentityHash<u64>> sparse_page_table; - std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views; + std::unordered_map<ImageId, boost::container::small_vector<ImageViewId, 16>> sparse_views; VAddr virtual_invalid_space{}; diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index 95a5b47d8..f781cb7a0 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -329,13 +329,13 @@ template <u32 GOB_EXTENT> [[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapRightAddress3D( const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size) { - const std::vector<u32> slice_offsets = CalculateSliceOffsets(new_info); + const auto slice_offsets = CalculateSliceOffsets(new_info); const u32 diff = static_cast<u32>(overlap.gpu_addr - gpu_addr); const auto it = std::ranges::find(slice_offsets, diff); if (it == slice_offsets.end()) { return std::nullopt; } - const std::vector subresources = CalculateSliceSubresources(new_info); + const auto subresources = CalculateSliceSubresources(new_info); const SubresourceBase base = subresources[std::distance(slice_offsets.begin(), it)]; const ImageInfo& info = overlap.info; if (!IsBlockLinearSizeCompatible(new_info, info, base.level, 0, strict_size)) { @@ -655,9 +655,9 @@ LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept { return sizes; } -std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) { +boost::container::small_vector<u32, 16> CalculateSliceOffsets(const ImageInfo& info) { ASSERT(info.type == ImageType::e3D); - std::vector<u32> offsets; + boost::container::small_vector<u32, 16> offsets; offsets.reserve(NumSlices(info)); const LevelInfo level_info = MakeLevelInfo(info); @@ -679,9 +679,10 @@ std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) { return offsets; } -std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info) { +boost::container::small_vector<SubresourceBase, 16> CalculateSliceSubresources( + const ImageInfo& info) { ASSERT(info.type == ImageType::e3D); - std::vector<SubresourceBase> subresources; + boost::container::small_vector<SubresourceBase, 16> subresources; subresources.reserve(NumSlices(info)); for (s32 level = 0; level < info.resources.levels; ++level) { const s32 depth = AdjustMipSize(info.size.depth, level); @@ -723,8 +724,10 @@ ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept { } } -std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageInfo& src, - SubresourceBase base, u32 up_scale, u32 down_shift) { +boost::container::small_vector<ImageCopy, 16> MakeShrinkImageCopies(const ImageInfo& dst, + const ImageInfo& src, + SubresourceBase base, + u32 up_scale, u32 down_shift) { ASSERT(dst.resources.levels >= src.resources.levels); const bool is_dst_3d = dst.type == ImageType::e3D; @@ -733,7 +736,7 @@ std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn ASSERT(src.resources.levels == 1); } const bool both_2d{src.type == ImageType::e2D && dst.type == ImageType::e2D}; - std::vector<ImageCopy> copies; + boost::container::small_vector<ImageCopy, 16> copies; copies.reserve(src.resources.levels); for (s32 level = 0; level < src.resources.levels; ++level) { ImageCopy& copy = copies.emplace_back(); @@ -770,9 +773,10 @@ std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn return copies; } -std::vector<ImageCopy> MakeReinterpretImageCopies(const ImageInfo& src, u32 up_scale, - u32 down_shift) { - std::vector<ImageCopy> copies; +boost::container::small_vector<ImageCopy, 16> MakeReinterpretImageCopies(const ImageInfo& src, + u32 up_scale, + u32 down_shift) { + boost::container::small_vector<ImageCopy, 16> copies; copies.reserve(src.resources.levels); const bool is_3d = src.type == ImageType::e3D; for (s32 level = 0; level < src.resources.levels; ++level) { @@ -824,9 +828,11 @@ bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config return gpu_memory.GpuToCpuAddress(address, guest_size_bytes).has_value(); } -std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, - const ImageInfo& info, std::span<const u8> input, - std::span<u8> output) { +boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::MemoryManager& gpu_memory, + GPUVAddr gpu_addr, + const ImageInfo& info, + std::span<const u8> input, + std::span<u8> output) { const size_t guest_size_bytes = input.size_bytes(); const u32 bpp_log2 = BytesPerBlockLog2(info.format); const Extent3D size = info.size; @@ -861,7 +867,7 @@ std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GP info.tile_width_spacing); size_t guest_offset = 0; u32 host_offset = 0; - std::vector<BufferImageCopy> copies(num_levels); + boost::container::small_vector<BufferImageCopy, 16> copies(num_levels); for (s32 level = 0; level < num_levels; ++level) { const Extent3D level_size = AdjustMipSize(size, level); @@ -978,7 +984,7 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8 } } -std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info) { +boost::container::small_vector<BufferImageCopy, 16> FullDownloadCopies(const ImageInfo& info) { const Extent3D size = info.size; const u32 bytes_per_block = BytesPerBlock(info.format); if (info.type == ImageType::Linear) { @@ -1006,7 +1012,7 @@ std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info) { u32 host_offset = 0; - std::vector<BufferImageCopy> copies(num_levels); + boost::container::small_vector<BufferImageCopy, 16> copies(num_levels); for (s32 level = 0; level < num_levels; ++level) { const Extent3D level_size = AdjustMipSize(size, level); const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size); @@ -1042,10 +1048,10 @@ Extent3D MipBlockSize(const ImageInfo& info, u32 level) { return AdjustMipBlockSize(num_tiles, level_info.block, level); } -std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info) { +boost::container::small_vector<SwizzleParameters, 16> FullUploadSwizzles(const ImageInfo& info) { const Extent2D tile_size = DefaultBlockSize(info.format); if (info.type == ImageType::Linear) { - return std::vector{SwizzleParameters{ + return {SwizzleParameters{ .num_tiles = AdjustTileSize(info.size, tile_size), .block = {}, .buffer_offset = 0, @@ -1057,7 +1063,7 @@ std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info) { const s32 num_levels = info.resources.levels; u32 guest_offset = 0; - std::vector<SwizzleParameters> params(num_levels); + boost::container::small_vector<SwizzleParameters, 16> params(num_levels); for (s32 level = 0; level < num_levels; ++level) { const Extent3D level_size = AdjustMipSize(size, level); const Extent3D num_tiles = AdjustTileSize(level_size, tile_size); diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h index 84aa6880d..ab45a43c4 100644 --- a/src/video_core/texture_cache/util.h +++ b/src/video_core/texture_cache/util.h @@ -5,6 +5,7 @@ #include <optional> #include <span> +#include <boost/container/small_vector.hpp> #include "common/common_types.h" #include "common/scratch_buffer.h" @@ -40,9 +41,10 @@ struct OverlapResult { [[nodiscard]] LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept; -[[nodiscard]] std::vector<u32> CalculateSliceOffsets(const ImageInfo& info); +[[nodiscard]] boost::container::small_vector<u32, 16> CalculateSliceOffsets(const ImageInfo& info); -[[nodiscard]] std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info); +[[nodiscard]] boost::container::small_vector<SubresourceBase, 16> CalculateSliceSubresources( + const ImageInfo& info); [[nodiscard]] u32 CalculateLevelStrideAlignment(const ImageInfo& info, u32 level); @@ -51,21 +53,18 @@ struct OverlapResult { [[nodiscard]] ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept; -[[nodiscard]] std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, - const ImageInfo& src, - SubresourceBase base, u32 up_scale = 1, - u32 down_shift = 0); +[[nodiscard]] boost::container::small_vector<ImageCopy, 16> MakeShrinkImageCopies( + const ImageInfo& dst, const ImageInfo& src, SubresourceBase base, u32 up_scale = 1, + u32 down_shift = 0); -[[nodiscard]] std::vector<ImageCopy> MakeReinterpretImageCopies(const ImageInfo& src, - u32 up_scale = 1, - u32 down_shift = 0); +[[nodiscard]] boost::container::small_vector<ImageCopy, 16> MakeReinterpretImageCopies( + const ImageInfo& src, u32 up_scale = 1, u32 down_shift = 0); [[nodiscard]] bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config); -[[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, - GPUVAddr gpu_addr, const ImageInfo& info, - std::span<const u8> input, - std::span<u8> output); +[[nodiscard]] boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage( + Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, + std::span<const u8> input, std::span<u8> output); [[nodiscard]] BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageBase& image, std::span<u8> output); @@ -73,13 +72,15 @@ struct OverlapResult { void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output, std::span<BufferImageCopy> copies); -[[nodiscard]] std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info); +[[nodiscard]] boost::container::small_vector<BufferImageCopy, 16> FullDownloadCopies( + const ImageInfo& info); [[nodiscard]] Extent3D MipSize(Extent3D size, u32 level); [[nodiscard]] Extent3D MipBlockSize(const ImageInfo& info, u32 level); -[[nodiscard]] std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info); +[[nodiscard]] boost::container::small_vector<SwizzleParameters, 16> FullUploadSwizzles( + const ImageInfo& info); void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, std::span<const BufferImageCopy> copies, std::span<const u8> memory, diff --git a/src/video_core/textures/texture.cpp b/src/video_core/textures/texture.cpp index 4a80a59f9..d8b88d9bc 100644 --- a/src/video_core/textures/texture.cpp +++ b/src/video_core/textures/texture.cpp @@ -62,7 +62,12 @@ std::array<float, 4> TSCEntry::BorderColor() const noexcept { } float TSCEntry::MaxAnisotropy() const noexcept { - if (max_anisotropy == 0 && mipmap_filter != TextureMipmapFilter::Linear) { + const bool is_suitable_mipmap_filter = mipmap_filter != TextureMipmapFilter::None; + const bool has_regular_lods = min_lod_clamp == 0 && max_lod_clamp >= 256; + const bool is_bilinear_filter = min_filter == TextureFilter::Linear && + reduction_filter == SamplerReduction::WeightedAverage; + if (max_anisotropy == 0 && (!is_suitable_mipmap_filter || !has_regular_lods || + !is_bilinear_filter || depth_compare_enabled)) { return 1.0f; } const auto anisotropic_settings = Settings::values.max_anisotropy.GetValue(); diff --git a/src/video_core/transform_feedback.cpp b/src/video_core/transform_feedback.cpp index 155599316..1f353d2df 100644 --- a/src/video_core/transform_feedback.cpp +++ b/src/video_core/transform_feedback.cpp @@ -13,7 +13,7 @@ namespace VideoCommon { -std::vector<Shader::TransformFeedbackVarying> MakeTransformFeedbackVaryings( +std::pair<std::array<Shader::TransformFeedbackVarying, 256>, u32> MakeTransformFeedbackVaryings( const TransformFeedbackState& state) { static constexpr std::array VECTORS{ 28U, // gl_Position @@ -62,7 +62,8 @@ std::vector<Shader::TransformFeedbackVarying> MakeTransformFeedbackVaryings( 216U, // gl_TexCoord[6] 220U, // gl_TexCoord[7] }; - std::vector<Shader::TransformFeedbackVarying> xfb(256); + std::array<Shader::TransformFeedbackVarying, 256> xfb{}; + u32 count{0}; for (size_t buffer = 0; buffer < state.layouts.size(); ++buffer) { const auto& locations = state.varyings[buffer]; const auto& layout = state.layouts[buffer]; @@ -103,11 +104,12 @@ std::vector<Shader::TransformFeedbackVarying> MakeTransformFeedbackVaryings( } } xfb[attribute] = varying; + count = std::max(count, attribute); highest = std::max(highest, (base_offset + varying.components) * 4); } UNIMPLEMENTED_IF(highest != layout.stride); } - return xfb; + return {xfb, count + 1}; } } // namespace VideoCommon diff --git a/src/video_core/transform_feedback.h b/src/video_core/transform_feedback.h index d13eb16c3..401b1352a 100644 --- a/src/video_core/transform_feedback.h +++ b/src/video_core/transform_feedback.h @@ -24,7 +24,7 @@ struct TransformFeedbackState { varyings; }; -std::vector<Shader::TransformFeedbackVarying> MakeTransformFeedbackVaryings( +std::pair<std::array<Shader::TransformFeedbackVarying, 256>, u32> MakeTransformFeedbackVaryings( const TransformFeedbackState& state); } // namespace VideoCommon diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 3d2e9a16a..b11abe311 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -316,6 +316,7 @@ NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical, std::vector<const char*> ExtensionListForVulkan( const std::set<std::string, std::less<>>& extensions) { std::vector<const char*> output; + output.reserve(extensions.size()); for (const auto& extension : extensions) { output.push_back(extension.c_str()); } @@ -562,6 +563,9 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR LOG_WARNING(Render_Vulkan, "Intel proprietary drivers do not support MSAA image blits"); cant_blit_msaa = true; } + has_broken_compute = + CheckBrokenCompute(properties.driver.driverID, properties.properties.driverVersion) && + !Settings::values.enable_compute_pipelines.GetValue(); if (is_intel_anv || (is_qualcomm && !is_s8gen2)) { LOG_WARNING(Render_Vulkan, "Driver does not support native BGR format"); must_emulate_bgr565 = true; @@ -783,9 +787,6 @@ bool Device::GetSuitability(bool requires_swapchain) { FOR_EACH_VK_FEATURE_EXT(FEATURE_EXTENSION); FOR_EACH_VK_EXTENSION(EXTENSION); -#ifdef _WIN32 - FOR_EACH_VK_EXTENSION_WIN32(EXTENSION); -#endif #undef FEATURE_EXTENSION #undef EXTENSION @@ -804,11 +805,6 @@ bool Device::GetSuitability(bool requires_swapchain) { FOR_EACH_VK_RECOMMENDED_EXTENSION(LOG_EXTENSION); FOR_EACH_VK_MANDATORY_EXTENSION(CHECK_EXTENSION); -#ifdef _WIN32 - FOR_EACH_VK_MANDATORY_EXTENSION_WIN32(CHECK_EXTENSION); -#else - FOR_EACH_VK_MANDATORY_EXTENSION_GENERIC(CHECK_EXTENSION); -#endif if (requires_swapchain) { CHECK_EXTENSION(VK_KHR_SWAPCHAIN_EXTENSION_NAME); diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index f314d0ffe..0b634a876 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -10,6 +10,7 @@ #include <vector> #include "common/common_types.h" +#include "common/logging/log.h" #include "common/settings.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -68,7 +69,6 @@ EXTENSION(EXT, VERTEX_ATTRIBUTE_DIVISOR, vertex_attribute_divisor) \ EXTENSION(KHR, DRAW_INDIRECT_COUNT, draw_indirect_count) \ EXTENSION(KHR, DRIVER_PROPERTIES, driver_properties) \ - EXTENSION(KHR, EXTERNAL_MEMORY_FD, external_memory_fd) \ EXTENSION(KHR, PUSH_DESCRIPTOR, push_descriptor) \ EXTENSION(KHR, SAMPLER_MIRROR_CLAMP_TO_EDGE, sampler_mirror_clamp_to_edge) \ EXTENSION(KHR, SHADER_FLOAT_CONTROLS, shader_float_controls) \ @@ -80,9 +80,6 @@ EXTENSION(NV, VIEWPORT_ARRAY2, viewport_array2) \ EXTENSION(NV, VIEWPORT_SWIZZLE, viewport_swizzle) -#define FOR_EACH_VK_EXTENSION_WIN32(EXTENSION) \ - EXTENSION(KHR, EXTERNAL_MEMORY_WIN32, external_memory_win32) - // Define extensions which must be supported. #define FOR_EACH_VK_MANDATORY_EXTENSION(EXTENSION_NAME) \ EXTENSION_NAME(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME) \ @@ -90,12 +87,6 @@ EXTENSION_NAME(VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME) \ EXTENSION_NAME(VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME) -#define FOR_EACH_VK_MANDATORY_EXTENSION_GENERIC(EXTENSION_NAME) \ - EXTENSION_NAME(VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME) - -#define FOR_EACH_VK_MANDATORY_EXTENSION_WIN32(EXTENSION_NAME) \ - EXTENSION_NAME(VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME) - // Define extensions where the absence of the extension may result in a degraded experience. #define FOR_EACH_VK_RECOMMENDED_EXTENSION(EXTENSION_NAME) \ EXTENSION_NAME(VK_EXT_CONSERVATIVE_RASTERIZATION_EXTENSION_NAME) \ @@ -528,6 +519,11 @@ public: return has_renderdoc || has_nsight_graphics || Settings::values.renderer_debug.GetValue(); } + /// @returns True if compute pipelines can cause crashing. + bool HasBrokenCompute() const { + return has_broken_compute; + } + /// Returns true when the device does not properly support cube compatibility. bool HasBrokenCubeImageCompability() const { return has_broken_cube_compatibility; @@ -589,6 +585,22 @@ public: return supports_conditional_barriers; } + [[nodiscard]] static constexpr bool CheckBrokenCompute(VkDriverId driver_id, + u32 driver_version) { + if (driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) { + const u32 major = VK_API_VERSION_MAJOR(driver_version); + const u32 minor = VK_API_VERSION_MINOR(driver_version); + const u32 patch = VK_API_VERSION_PATCH(driver_version); + if (major == 0 && minor == 405 && patch < 286) { + LOG_WARNING( + Render_Vulkan, + "Intel proprietary drivers 0.405.0 until 0.405.286 have broken compute"); + return true; + } + } + return false; + } + private: /// Checks if the physical device is suitable and configures the object state /// with all necessary info about its properties. @@ -636,7 +648,6 @@ private: FOR_EACH_VK_FEATURE_1_3(FEATURE); FOR_EACH_VK_FEATURE_EXT(FEATURE); FOR_EACH_VK_EXTENSION(EXTENSION); - FOR_EACH_VK_EXTENSION_WIN32(EXTENSION); #undef EXTENSION #undef FEATURE @@ -683,6 +694,7 @@ private: bool is_integrated{}; ///< Is GPU an iGPU. bool is_virtual{}; ///< Is GPU a virtual GPU. bool is_non_gpu{}; ///< Is SoftwareRasterizer, FPGA, non-GPU device. + bool has_broken_compute{}; ///< Compute shaders can cause crashes bool has_broken_cube_compatibility{}; ///< Has broken cube compatibility bit bool has_renderdoc{}; ///< Has RenderDoc attached bool has_nsight_graphics{}; ///< Has Nsight Graphics attached |