summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h13
-rw-r--r--src/video_core/buffer_cache/buffer_cache_base.h4
-rw-r--r--src/video_core/cdma_pusher.h1
-rw-r--r--src/video_core/dma_pusher.h8
-rw-r--r--src/video_core/engines/draw_manager.cpp10
-rw-r--r--src/video_core/engines/maxwell_dma.cpp35
-rw-r--r--src/video_core/gpu.cpp13
-rw-r--r--src/video_core/host1x/codecs/h264.cpp4
-rw-r--r--src/video_core/memory_manager.cpp17
-rw-r--r--src/video_core/memory_manager.h15
-rw-r--r--src/video_core/renderer_opengl/gl_compute_pipeline.cpp23
-rw-r--r--src/video_core/renderer_opengl/gl_graphics_pipeline.cpp27
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp72
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h11
-rw-r--r--src/video_core/renderer_vulkan/pipeline_helper.h11
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp22
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pipeline.cpp8
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp8
-rw-r--r--src/video_core/renderer_vulkan/vk_master_semaphore.cpp1
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp15
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp78
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h9
-rw-r--r--src/video_core/shader_cache.cpp4
-rw-r--r--src/video_core/texture_cache/image_base.h5
-rw-r--r--src/video_core/texture_cache/image_view_base.cpp52
-rw-r--r--src/video_core/texture_cache/image_view_base.h2
-rw-r--r--src/video_core/texture_cache/texture_cache.h70
-rw-r--r--src/video_core/texture_cache/texture_cache_base.h26
-rw-r--r--src/video_core/texture_cache/util.cpp48
-rw-r--r--src/video_core/texture_cache/util.h31
-rw-r--r--src/video_core/textures/texture.cpp7
-rw-r--r--src/video_core/transform_feedback.cpp8
-rw-r--r--src/video_core/transform_feedback.h2
-rw-r--r--src/video_core/vulkan_common/vulkan_device.cpp12
-rw-r--r--src/video_core/vulkan_common/vulkan_device.h34
36 files changed, 455 insertions, 255 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 9bafd8cc0..58a45ab67 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -207,7 +207,7 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
if (has_new_downloads) {
memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount);
}
- tmp_buffer.resize(amount);
+ tmp_buffer.resize_destructive(amount);
cpu_memory.ReadBlockUnsafe(*cpu_src_address, tmp_buffer.data(), amount);
cpu_memory.WriteBlockUnsafe(*cpu_dest_address, tmp_buffer.data(), amount);
return true;
@@ -719,9 +719,15 @@ void BufferCache<P>::BindHostVertexBuffers() {
bool any_valid{false};
auto& flags = maxwell3d->dirty.flags;
for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
+ const Binding& binding = channel_state->vertex_buffers[index];
+ Buffer& buffer = slot_buffers[binding.buffer_id];
+ TouchBuffer(buffer, binding.buffer_id);
+ SynchronizeBuffer(buffer, binding.cpu_addr, binding.size);
if (!flags[Dirty::VertexBuffer0 + index]) {
continue;
}
+ flags[Dirty::VertexBuffer0 + index] = false;
+
host_bindings.min_index = std::min(host_bindings.min_index, index);
host_bindings.max_index = std::max(host_bindings.max_index, index);
any_valid = true;
@@ -735,9 +741,6 @@ void BufferCache<P>::BindHostVertexBuffers() {
const Binding& binding = channel_state->vertex_buffers[index];
Buffer& buffer = slot_buffers[binding.buffer_id];
- TouchBuffer(buffer, binding.buffer_id);
- SynchronizeBuffer(buffer, binding.cpu_addr, binding.size);
-
const u32 stride = maxwell3d->regs.vertex_streams[index].stride;
const u32 offset = buffer.Offset(binding.cpu_addr);
@@ -1276,7 +1279,7 @@ template <class P>
typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu_addr,
u32 wanted_size) {
static constexpr int STREAM_LEAP_THRESHOLD = 16;
- std::vector<BufferId> overlap_ids;
+ boost::container::small_vector<BufferId, 16> overlap_ids;
VAddr begin = cpu_addr;
VAddr end = cpu_addr + wanted_size;
int stream_score = 0;
diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h
index 63a120f7a..fe6068cfe 100644
--- a/src/video_core/buffer_cache/buffer_cache_base.h
+++ b/src/video_core/buffer_cache/buffer_cache_base.h
@@ -229,7 +229,7 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInf
using OverlapCounter = boost::icl::split_interval_map<VAddr, int>;
struct OverlapResult {
- std::vector<BufferId> ids;
+ boost::container::small_vector<BufferId, 16> ids;
VAddr begin;
VAddr end;
bool has_stream_leap = false;
@@ -582,7 +582,7 @@ private:
BufferId inline_buffer_id;
std::array<BufferId, ((1ULL << 39) >> CACHING_PAGEBITS)> page_table;
- std::vector<u8> tmp_buffer;
+ Common::ScratchBuffer<u8> tmp_buffer;
};
} // namespace VideoCommon
diff --git a/src/video_core/cdma_pusher.h b/src/video_core/cdma_pusher.h
index 83112dfce..7d660af47 100644
--- a/src/video_core/cdma_pusher.h
+++ b/src/video_core/cdma_pusher.h
@@ -63,7 +63,6 @@ struct ChCommand {
};
using ChCommandHeaderList = std::vector<ChCommandHeader>;
-using ChCommandList = std::vector<ChCommand>;
struct ThiRegisters {
u32_le increment_syncpt{};
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h
index 1cdb690ed..8a2784cdc 100644
--- a/src/video_core/dma_pusher.h
+++ b/src/video_core/dma_pusher.h
@@ -6,6 +6,7 @@
#include <array>
#include <span>
#include <vector>
+#include <boost/container/small_vector.hpp>
#include <queue>
#include "common/bit_field.h"
@@ -102,11 +103,12 @@ inline CommandHeader BuildCommandHeader(BufferMethods method, u32 arg_count, Sub
struct CommandList final {
CommandList() = default;
explicit CommandList(std::size_t size) : command_lists(size) {}
- explicit CommandList(std::vector<CommandHeader>&& prefetch_command_list_)
+ explicit CommandList(
+ boost::container::small_vector<CommandHeader, 512>&& prefetch_command_list_)
: prefetch_command_list{std::move(prefetch_command_list_)} {}
- std::vector<CommandListHeader> command_lists;
- std::vector<CommandHeader> prefetch_command_list;
+ boost::container::small_vector<CommandListHeader, 512> command_lists;
+ boost::container::small_vector<CommandHeader, 512> prefetch_command_list;
};
/**
diff --git a/src/video_core/engines/draw_manager.cpp b/src/video_core/engines/draw_manager.cpp
index 0e94c521a..f34090791 100644
--- a/src/video_core/engines/draw_manager.cpp
+++ b/src/video_core/engines/draw_manager.cpp
@@ -1,6 +1,7 @@
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
+#include "common/settings.h"
#include "video_core/dirty_flags.h"
#include "video_core/engines/draw_manager.h"
#include "video_core/rasterizer_interface.h"
@@ -195,8 +196,12 @@ void DrawManager::DrawTexture() {
if (lower_left) {
draw_texture_state.dst_y0 -= dst_height;
}
- draw_texture_state.dst_x1 = draw_texture_state.dst_x0 + dst_width;
- draw_texture_state.dst_y1 = draw_texture_state.dst_y0 + dst_height;
+ draw_texture_state.dst_x1 =
+ draw_texture_state.dst_x0 +
+ static_cast<f32>(Settings::values.resolution_info.ScaleUp(static_cast<u32>(dst_width)));
+ draw_texture_state.dst_y1 =
+ draw_texture_state.dst_y0 +
+ static_cast<f32>(Settings::values.resolution_info.ScaleUp(static_cast<u32>(dst_height)));
draw_texture_state.src_x0 = static_cast<float>(regs.draw_texture.src_x0) / 4096.f;
draw_texture_state.src_y0 = static_cast<float>(regs.draw_texture.src_y0) / 4096.f;
draw_texture_state.src_x1 =
@@ -207,7 +212,6 @@ void DrawManager::DrawTexture() {
draw_texture_state.src_y0;
draw_texture_state.src_sampler = regs.draw_texture.src_sampler;
draw_texture_state.src_texture = regs.draw_texture.src_texture;
-
maxwell3d->rasterizer->DrawTexture();
}
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index ebe5536de..bc1eb41e7 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -108,9 +108,11 @@ void MaxwellDMA::Launch() {
if (regs.launch_dma.remap_enable != 0 && is_const_a_dst) {
ASSERT(regs.remap_const.component_size_minus_one == 3);
accelerate.BufferClear(regs.offset_out, regs.line_length_in, regs.remap_consta_value);
- std::vector<u32> tmp_buffer(regs.line_length_in, regs.remap_consta_value);
+ read_buffer.resize_destructive(regs.line_length_in * sizeof(u32));
+ std::span<u32> span(reinterpret_cast<u32*>(read_buffer.data()), regs.line_length_in);
+ std::ranges::fill(span, regs.remap_consta_value);
memory_manager.WriteBlockUnsafe(regs.offset_out,
- reinterpret_cast<u8*>(tmp_buffer.data()),
+ reinterpret_cast<u8*>(read_buffer.data()),
regs.line_length_in * sizeof(u32));
} else {
memory_manager.FlushCaching();
@@ -126,32 +128,32 @@ void MaxwellDMA::Launch() {
UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0);
UNIMPLEMENTED_IF(regs.offset_in % 16 != 0);
UNIMPLEMENTED_IF(regs.offset_out % 16 != 0);
- std::vector<u8> tmp_buffer(16);
+ read_buffer.resize_destructive(16);
for (u32 offset = 0; offset < regs.line_length_in; offset += 16) {
memory_manager.ReadBlockUnsafe(
convert_linear_2_blocklinear_addr(regs.offset_in + offset),
- tmp_buffer.data(), tmp_buffer.size());
- memory_manager.WriteBlockCached(regs.offset_out + offset, tmp_buffer.data(),
- tmp_buffer.size());
+ read_buffer.data(), read_buffer.size());
+ memory_manager.WriteBlockCached(regs.offset_out + offset, read_buffer.data(),
+ read_buffer.size());
}
} else if (is_src_pitch && !is_dst_pitch) {
UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0);
UNIMPLEMENTED_IF(regs.offset_in % 16 != 0);
UNIMPLEMENTED_IF(regs.offset_out % 16 != 0);
- std::vector<u8> tmp_buffer(16);
+ read_buffer.resize_destructive(16);
for (u32 offset = 0; offset < regs.line_length_in; offset += 16) {
- memory_manager.ReadBlockUnsafe(regs.offset_in + offset, tmp_buffer.data(),
- tmp_buffer.size());
+ memory_manager.ReadBlockUnsafe(regs.offset_in + offset, read_buffer.data(),
+ read_buffer.size());
memory_manager.WriteBlockCached(
convert_linear_2_blocklinear_addr(regs.offset_out + offset),
- tmp_buffer.data(), tmp_buffer.size());
+ read_buffer.data(), read_buffer.size());
}
} else {
if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) {
- std::vector<u8> tmp_buffer(regs.line_length_in);
- memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(),
+ read_buffer.resize_destructive(regs.line_length_in);
+ memory_manager.ReadBlockUnsafe(regs.offset_in, read_buffer.data(),
regs.line_length_in);
- memory_manager.WriteBlockCached(regs.offset_out, tmp_buffer.data(),
+ memory_manager.WriteBlockCached(regs.offset_out, read_buffer.data(),
regs.line_length_in);
}
}
@@ -171,7 +173,8 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
src_operand.address = regs.offset_in;
DMA::BufferOperand dst_operand;
- dst_operand.pitch = regs.pitch_out;
+ u32 abs_pitch_out = std::abs(static_cast<s32>(regs.pitch_out));
+ dst_operand.pitch = abs_pitch_out;
dst_operand.width = regs.line_length_in;
dst_operand.height = regs.line_count;
dst_operand.address = regs.offset_out;
@@ -218,7 +221,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
const size_t src_size =
CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth);
- const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count;
+ const size_t dst_size = static_cast<size_t>(abs_pitch_out) * regs.line_count;
read_buffer.resize_destructive(src_size);
write_buffer.resize_destructive(dst_size);
@@ -227,7 +230,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset,
src_params.origin.y, x_elements, regs.line_count, block_height, block_depth,
- regs.pitch_out);
+ abs_pitch_out);
memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
}
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 456f733cf..db385076d 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -193,18 +193,13 @@ struct GPU::Impl {
}
[[nodiscard]] u64 GetTicks() const {
- // This values were reversed engineered by fincs from NVN
- // The gpu clock is reported in units of 385/625 nanoseconds
- constexpr u64 gpu_ticks_num = 384;
- constexpr u64 gpu_ticks_den = 625;
+ u64 gpu_tick = system.CoreTiming().GetGPUTicks();
- u64 nanoseconds = system.CoreTiming().GetCPUTimeNs().count();
if (Settings::values.use_fast_gpu_time.GetValue()) {
- nanoseconds /= 256;
+ gpu_tick /= 256;
}
- const u64 nanoseconds_num = nanoseconds / gpu_ticks_den;
- const u64 nanoseconds_rem = nanoseconds % gpu_ticks_den;
- return nanoseconds_num * gpu_ticks_num + (nanoseconds_rem * gpu_ticks_num) / gpu_ticks_den;
+
+ return gpu_tick;
}
[[nodiscard]] bool IsAsync() const {
diff --git a/src/video_core/host1x/codecs/h264.cpp b/src/video_core/host1x/codecs/h264.cpp
index 6ce179167..ce827eb6c 100644
--- a/src/video_core/host1x/codecs/h264.cpp
+++ b/src/video_core/host1x/codecs/h264.cpp
@@ -4,6 +4,7 @@
#include <array>
#include <bit>
+#include "common/scratch_buffer.h"
#include "common/settings.h"
#include "video_core/host1x/codecs/h264.h"
#include "video_core/host1x/host1x.h"
@@ -188,7 +189,8 @@ void H264BitWriter::WriteBit(bool state) {
}
void H264BitWriter::WriteScalingList(std::span<const u8> list, s32 start, s32 count) {
- std::vector<u8> scan(count);
+ static Common::ScratchBuffer<u8> scan{};
+ scan.resize_destructive(count);
if (count == 16) {
std::memcpy(scan.data(), zig_zag_scan.data(), scan.size());
} else {
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 7b2cde7a7..45141e488 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -111,7 +111,7 @@ GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cp
[[maybe_unused]] const auto current_entry_type = GetEntry<false>(current_gpu_addr);
SetEntry<false>(current_gpu_addr, entry_type);
if (current_entry_type != entry_type) {
- rasterizer->ModifyGPUMemory(unique_identifier, gpu_addr, page_size);
+ rasterizer->ModifyGPUMemory(unique_identifier, current_gpu_addr, page_size);
}
if constexpr (entry_type == EntryType::Mapped) {
const VAddr current_cpu_addr = cpu_addr + offset;
@@ -134,7 +134,7 @@ GPUVAddr MemoryManager::BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr
[[maybe_unused]] const auto current_entry_type = GetEntry<true>(current_gpu_addr);
SetEntry<true>(current_gpu_addr, entry_type);
if (current_entry_type != entry_type) {
- rasterizer->ModifyGPUMemory(unique_identifier, gpu_addr, big_page_size);
+ rasterizer->ModifyGPUMemory(unique_identifier, current_gpu_addr, big_page_size);
}
if constexpr (entry_type == EntryType::Mapped) {
const VAddr current_cpu_addr = cpu_addr + offset;
@@ -587,7 +587,7 @@ void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size,
void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size,
VideoCommon::CacheType which) {
- std::vector<u8> tmp_buffer(size);
+ tmp_buffer.resize_destructive(size);
ReadBlock(gpu_src_addr, tmp_buffer.data(), size, which);
// The output block must be flushed in case it has data modified from the GPU.
@@ -670,9 +670,9 @@ bool MemoryManager::IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) cons
return result;
}
-std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
- GPUVAddr gpu_addr, std::size_t size) const {
- std::vector<std::pair<GPUVAddr, std::size_t>> result{};
+boost::container::small_vector<std::pair<GPUVAddr, std::size_t>, 32>
+MemoryManager::GetSubmappedRange(GPUVAddr gpu_addr, std::size_t size) const {
+ boost::container::small_vector<std::pair<GPUVAddr, std::size_t>, 32> result{};
GetSubmappedRangeImpl<true>(gpu_addr, size, result);
return result;
}
@@ -680,8 +680,9 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
template <bool is_gpu_address>
void MemoryManager::GetSubmappedRangeImpl(
GPUVAddr gpu_addr, std::size_t size,
- std::vector<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>&
- result) const {
+ boost::container::small_vector<
+ std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>, 32>& result)
+ const {
std::optional<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>
last_segment{};
std::optional<VAddr> old_page_addr{};
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 794535122..4202c26ff 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -8,10 +8,12 @@
#include <mutex>
#include <optional>
#include <vector>
+#include <boost/container/small_vector.hpp>
#include "common/common_types.h"
#include "common/multi_level_page_table.h"
#include "common/range_map.h"
+#include "common/scratch_buffer.h"
#include "common/virtual_buffer.h"
#include "video_core/cache_types.h"
#include "video_core/pte_kind.h"
@@ -107,8 +109,8 @@ public:
* if the region is continuous, a single pair will be returned. If it's unmapped, an empty
* vector will be returned;
*/
- std::vector<std::pair<GPUVAddr, std::size_t>> GetSubmappedRange(GPUVAddr gpu_addr,
- std::size_t size) const;
+ boost::container::small_vector<std::pair<GPUVAddr, std::size_t>, 32> GetSubmappedRange(
+ GPUVAddr gpu_addr, std::size_t size) const;
GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size,
PTEKind kind = PTEKind::INVALID, bool is_big_pages = true);
@@ -165,7 +167,8 @@ private:
template <bool is_gpu_address>
void GetSubmappedRangeImpl(
GPUVAddr gpu_addr, std::size_t size,
- std::vector<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>&
+ boost::container::small_vector<
+ std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>, 32>&
result) const;
Core::System& system;
@@ -215,8 +218,8 @@ private:
Common::VirtualBuffer<u32> big_page_table_cpu;
std::vector<u64> big_page_continuous;
- std::vector<std::pair<VAddr, std::size_t>> page_stash{};
- std::vector<std::pair<VAddr, std::size_t>> page_stash2{};
+ boost::container::small_vector<std::pair<VAddr, std::size_t>, 32> page_stash{};
+ boost::container::small_vector<std::pair<VAddr, std::size_t>, 32> page_stash2{};
mutable std::mutex guard;
@@ -226,6 +229,8 @@ private:
std::unique_ptr<VideoCommon::InvalidationAccumulator> accumulator;
static std::atomic<size_t> unique_identifier_generator;
+
+ Common::ScratchBuffer<u8> tmp_buffer;
};
} // namespace Tegra
diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp
index 1a0cea9b7..3151c0db8 100644
--- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp
+++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp
@@ -87,7 +87,8 @@ void ComputePipeline::Configure() {
texture_cache.SynchronizeComputeDescriptors();
boost::container::static_vector<VideoCommon::ImageViewInOut, MAX_TEXTURES + MAX_IMAGES> views;
- std::array<GLuint, MAX_TEXTURES> samplers;
+ boost::container::static_vector<VideoCommon::SamplerId, MAX_TEXTURES> samplers;
+ std::array<GLuint, MAX_TEXTURES> gl_samplers;
std::array<GLuint, MAX_TEXTURES> textures;
std::array<GLuint, MAX_IMAGES> images;
GLsizei sampler_binding{};
@@ -131,7 +132,6 @@ void ComputePipeline::Configure() {
for (u32 index = 0; index < desc.count; ++index) {
const auto handle{read_handle(desc, index)};
views.push_back({handle.first});
- samplers[sampler_binding++] = 0;
}
}
for (const auto& desc : info.image_buffer_descriptors) {
@@ -142,8 +142,8 @@ void ComputePipeline::Configure() {
const auto handle{read_handle(desc, index)};
views.push_back({handle.first});
- Sampler* const sampler = texture_cache.GetComputeSampler(handle.second);
- samplers[sampler_binding++] = sampler->Handle();
+ VideoCommon::SamplerId sampler = texture_cache.GetComputeSamplerId(handle.second);
+ samplers.push_back(sampler);
}
}
for (const auto& desc : info.image_descriptors) {
@@ -186,10 +186,17 @@ void ComputePipeline::Configure() {
const VideoCommon::ImageViewInOut* views_it{views.data() + num_texture_buffers +
num_image_buffers};
+ const VideoCommon::SamplerId* samplers_it{samplers.data()};
texture_binding += num_texture_buffers;
image_binding += num_image_buffers;
u32 texture_scaling_mask{};
+
+ for (const auto& desc : info.texture_buffer_descriptors) {
+ for (u32 index = 0; index < desc.count; ++index) {
+ gl_samplers[sampler_binding++] = 0;
+ }
+ }
for (const auto& desc : info.texture_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
ImageView& image_view{texture_cache.GetImageView((views_it++)->id)};
@@ -198,6 +205,12 @@ void ComputePipeline::Configure() {
texture_scaling_mask |= 1u << texture_binding;
}
++texture_binding;
+
+ const Sampler& sampler{texture_cache.GetSampler(*(samplers_it++))};
+ const bool use_fallback_sampler{sampler.HasAddedAnisotropy() &&
+ !image_view.SupportsAnisotropy()};
+ gl_samplers[sampler_binding++] =
+ use_fallback_sampler ? sampler.HandleWithDefaultAnisotropy() : sampler.Handle();
}
}
u32 image_scaling_mask{};
@@ -228,7 +241,7 @@ void ComputePipeline::Configure() {
if (texture_binding != 0) {
ASSERT(texture_binding == sampler_binding);
glBindTextures(0, texture_binding, textures.data());
- glBindSamplers(0, sampler_binding, samplers.data());
+ glBindSamplers(0, sampler_binding, gl_samplers.data());
}
if (image_binding != 0) {
glBindImageTextures(0, image_binding, images.data());
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
index 89000d6e0..c58f760b8 100644
--- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
+++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
@@ -275,9 +275,9 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c
template <typename Spec>
void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
std::array<VideoCommon::ImageViewInOut, MAX_TEXTURES + MAX_IMAGES> views;
- std::array<GLuint, MAX_TEXTURES> samplers;
+ std::array<VideoCommon::SamplerId, MAX_TEXTURES> samplers;
size_t views_index{};
- GLsizei sampler_binding{};
+ size_t samplers_index{};
texture_cache.SynchronizeGraphicsDescriptors();
@@ -337,7 +337,6 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
for (u32 index = 0; index < desc.count; ++index) {
const auto handle{read_handle(desc, index)};
views[views_index++] = {handle.first};
- samplers[sampler_binding++] = 0;
}
}
}
@@ -351,8 +350,8 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
const auto handle{read_handle(desc, index)};
views[views_index++] = {handle.first};
- Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)};
- samplers[sampler_binding++] = sampler->Handle();
+ VideoCommon::SamplerId sampler{texture_cache.GetGraphicsSamplerId(handle.second)};
+ samplers[samplers_index++] = sampler;
}
}
if constexpr (Spec::has_images) {
@@ -445,10 +444,13 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
program_manager.BindSourcePrograms(source_programs);
}
const VideoCommon::ImageViewInOut* views_it{views.data()};
+ const VideoCommon::SamplerId* samplers_it{samplers.data()};
GLsizei texture_binding = 0;
GLsizei image_binding = 0;
+ GLsizei sampler_binding{};
std::array<GLuint, MAX_TEXTURES> textures;
std::array<GLuint, MAX_IMAGES> images;
+ std::array<GLuint, MAX_TEXTURES> gl_samplers;
const auto prepare_stage{[&](size_t stage) {
buffer_cache.runtime.SetImagePointers(&textures[texture_binding], &images[image_binding]);
buffer_cache.BindHostStageBuffers(stage);
@@ -465,6 +467,13 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
u32 stage_image_binding{};
const auto& info{stage_infos[stage]};
+ if constexpr (Spec::has_texture_buffers) {
+ for (const auto& desc : info.texture_buffer_descriptors) {
+ for (u32 index = 0; index < desc.count; ++index) {
+ gl_samplers[sampler_binding++] = 0;
+ }
+ }
+ }
for (const auto& desc : info.texture_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
ImageView& image_view{texture_cache.GetImageView((views_it++)->id)};
@@ -474,6 +483,12 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
}
++texture_binding;
++stage_texture_binding;
+
+ const Sampler& sampler{texture_cache.GetSampler(*(samplers_it++))};
+ const bool use_fallback_sampler{sampler.HasAddedAnisotropy() &&
+ !image_view.SupportsAnisotropy()};
+ gl_samplers[sampler_binding++] =
+ use_fallback_sampler ? sampler.HandleWithDefaultAnisotropy() : sampler.Handle();
}
}
for (const auto& desc : info.image_descriptors) {
@@ -534,7 +549,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
if (texture_binding != 0) {
ASSERT(texture_binding == sampler_binding);
glBindTextures(0, texture_binding, textures.data());
- glBindSamplers(0, sampler_binding, samplers.data());
+ glBindSamplers(0, sampler_binding, gl_samplers.data());
}
if (image_binding != 0) {
glBindImageTextures(0, image_binding, images.data());
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 3f077311e..0329ed820 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -85,7 +85,9 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key,
case Shader::Stage::VertexB:
case Shader::Stage::Geometry:
if (!use_assembly_shaders && key.xfb_enabled != 0) {
- info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.xfb_state);
+ auto [varyings, count] = VideoCommon::MakeTransformFeedbackVaryings(key.xfb_state);
+ info.xfb_varyings = varyings;
+ info.xfb_count = count;
}
break;
case Shader::Stage::TessellationEval:
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 1c5dbcdd8..3b446be07 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -1268,36 +1268,48 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const TSCEntry& config) {
UNIMPLEMENTED_IF(config.cubemap_anisotropy != 1);
- sampler.Create();
- const GLuint handle = sampler.handle;
- glSamplerParameteri(handle, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(config.wrap_u));
- glSamplerParameteri(handle, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(config.wrap_v));
- glSamplerParameteri(handle, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(config.wrap_p));
- glSamplerParameteri(handle, GL_TEXTURE_COMPARE_MODE, compare_mode);
- glSamplerParameteri(handle, GL_TEXTURE_COMPARE_FUNC, compare_func);
- glSamplerParameteri(handle, GL_TEXTURE_MAG_FILTER, mag);
- glSamplerParameteri(handle, GL_TEXTURE_MIN_FILTER, min);
- glSamplerParameterf(handle, GL_TEXTURE_LOD_BIAS, config.LodBias());
- glSamplerParameterf(handle, GL_TEXTURE_MIN_LOD, config.MinLod());
- glSamplerParameterf(handle, GL_TEXTURE_MAX_LOD, config.MaxLod());
- glSamplerParameterfv(handle, GL_TEXTURE_BORDER_COLOR, config.BorderColor().data());
-
- if (GLAD_GL_ARB_texture_filter_anisotropic || GLAD_GL_EXT_texture_filter_anisotropic) {
- const f32 max_anisotropy = std::clamp(config.MaxAnisotropy(), 1.0f, 16.0f);
- glSamplerParameterf(handle, GL_TEXTURE_MAX_ANISOTROPY, max_anisotropy);
- } else {
- LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_anisotropic is required");
- }
- if (GLAD_GL_ARB_texture_filter_minmax || GLAD_GL_EXT_texture_filter_minmax) {
- glSamplerParameteri(handle, GL_TEXTURE_REDUCTION_MODE_ARB, reduction_filter);
- } else if (reduction_filter != GL_WEIGHTED_AVERAGE_ARB) {
- LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_minmax is required");
- }
- if (GLAD_GL_ARB_seamless_cubemap_per_texture || GLAD_GL_AMD_seamless_cubemap_per_texture) {
- glSamplerParameteri(handle, GL_TEXTURE_CUBE_MAP_SEAMLESS, seamless);
- } else if (seamless == GL_FALSE) {
- // We default to false because it's more common
- LOG_WARNING(Render_OpenGL, "GL_ARB_seamless_cubemap_per_texture is required");
+ const f32 max_anisotropy = std::clamp(config.MaxAnisotropy(), 1.0f, 16.0f);
+
+ const auto create_sampler = [&](const f32 anisotropy) {
+ OGLSampler new_sampler;
+ new_sampler.Create();
+ const GLuint handle = new_sampler.handle;
+ glSamplerParameteri(handle, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(config.wrap_u));
+ glSamplerParameteri(handle, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(config.wrap_v));
+ glSamplerParameteri(handle, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(config.wrap_p));
+ glSamplerParameteri(handle, GL_TEXTURE_COMPARE_MODE, compare_mode);
+ glSamplerParameteri(handle, GL_TEXTURE_COMPARE_FUNC, compare_func);
+ glSamplerParameteri(handle, GL_TEXTURE_MAG_FILTER, mag);
+ glSamplerParameteri(handle, GL_TEXTURE_MIN_FILTER, min);
+ glSamplerParameterf(handle, GL_TEXTURE_LOD_BIAS, config.LodBias());
+ glSamplerParameterf(handle, GL_TEXTURE_MIN_LOD, config.MinLod());
+ glSamplerParameterf(handle, GL_TEXTURE_MAX_LOD, config.MaxLod());
+ glSamplerParameterfv(handle, GL_TEXTURE_BORDER_COLOR, config.BorderColor().data());
+
+ if (GLAD_GL_ARB_texture_filter_anisotropic || GLAD_GL_EXT_texture_filter_anisotropic) {
+ glSamplerParameterf(handle, GL_TEXTURE_MAX_ANISOTROPY, anisotropy);
+ } else {
+ LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_anisotropic is required");
+ }
+ if (GLAD_GL_ARB_texture_filter_minmax || GLAD_GL_EXT_texture_filter_minmax) {
+ glSamplerParameteri(handle, GL_TEXTURE_REDUCTION_MODE_ARB, reduction_filter);
+ } else if (reduction_filter != GL_WEIGHTED_AVERAGE_ARB) {
+ LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_minmax is required");
+ }
+ if (GLAD_GL_ARB_seamless_cubemap_per_texture || GLAD_GL_AMD_seamless_cubemap_per_texture) {
+ glSamplerParameteri(handle, GL_TEXTURE_CUBE_MAP_SEAMLESS, seamless);
+ } else if (seamless == GL_FALSE) {
+ // We default to false because it's more common
+ LOG_WARNING(Render_OpenGL, "GL_ARB_seamless_cubemap_per_texture is required");
+ }
+ return new_sampler;
+ };
+
+ sampler = create_sampler(max_anisotropy);
+
+ const f32 max_anisotropy_default = static_cast<f32>(1U << config.max_anisotropy);
+ if (max_anisotropy > max_anisotropy_default) {
+ sampler_default_anisotropy = create_sampler(max_anisotropy_default);
}
}
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 1148b73d7..3676eaaa9 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -309,12 +309,21 @@ class Sampler {
public:
explicit Sampler(TextureCacheRuntime&, const Tegra::Texture::TSCEntry&);
- GLuint Handle() const noexcept {
+ [[nodiscard]] GLuint Handle() const noexcept {
return sampler.handle;
}
+ [[nodiscard]] GLuint HandleWithDefaultAnisotropy() const noexcept {
+ return sampler_default_anisotropy.handle;
+ }
+
+ [[nodiscard]] bool HasAddedAnisotropy() const noexcept {
+ return static_cast<bool>(sampler_default_anisotropy.handle);
+ }
+
private:
OGLSampler sampler;
+ OGLSampler sampler_default_anisotropy;
};
class Framebuffer {
diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h
index 983e1c2e1..71c783709 100644
--- a/src/video_core/renderer_vulkan/pipeline_helper.h
+++ b/src/video_core/renderer_vulkan/pipeline_helper.h
@@ -178,7 +178,7 @@ public:
inline void PushImageDescriptors(TextureCache& texture_cache,
GuestDescriptorQueue& guest_descriptor_queue,
const Shader::Info& info, RescalingPushConstant& rescaling,
- const VkSampler*& samplers,
+ const VideoCommon::SamplerId*& samplers,
const VideoCommon::ImageViewInOut*& views) {
const u32 num_texture_buffers = Shader::NumDescriptors(info.texture_buffer_descriptors);
const u32 num_image_buffers = Shader::NumDescriptors(info.image_buffer_descriptors);
@@ -187,10 +187,15 @@ inline void PushImageDescriptors(TextureCache& texture_cache,
for (const auto& desc : info.texture_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
const VideoCommon::ImageViewId image_view_id{(views++)->id};
- const VkSampler sampler{*(samplers++)};
+ const VideoCommon::SamplerId sampler_id{*(samplers++)};
ImageView& image_view{texture_cache.GetImageView(image_view_id)};
const VkImageView vk_image_view{image_view.Handle(desc.type)};
- guest_descriptor_queue.AddSampledImage(vk_image_view, sampler);
+ const Sampler& sampler{texture_cache.GetSampler(sampler_id)};
+ const bool use_fallback_sampler{sampler.HasAddedAnisotropy() &&
+ !image_view.SupportsAnisotropy()};
+ const VkSampler vk_sampler{use_fallback_sampler ? sampler.HandleWithDefaultAnisotropy()
+ : sampler.Handle()};
+ guest_descriptor_queue.AddSampledImage(vk_image_view, vk_sampler);
rescaling.PushTexture(texture_cache.IsRescaling(image_view));
}
}
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 8c33722d3..f47301ad5 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -361,7 +361,7 @@ void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
};
// Measuring a popular game, this number never exceeds the specified size once data is warmed up
- boost::container::small_vector<VkBufferCopy, 3> vk_copies(copies.size());
+ boost::container::small_vector<VkBufferCopy, 8> vk_copies(copies.size());
std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy);
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([src_buffer, dst_buffer, vk_copies, barrier](vk::CommandBuffer cmdbuf) {
@@ -516,15 +516,15 @@ void BufferCacheRuntime::BindVertexBuffers(VideoCommon::HostBindings<Buffer>& bi
buffer_handles.push_back(handle);
}
if (device.IsExtExtendedDynamicStateSupported()) {
- scheduler.Record([bindings = bindings,
- buffer_handles = buffer_handles](vk::CommandBuffer cmdbuf) {
+ scheduler.Record([bindings = std::move(bindings),
+ buffer_handles = std::move(buffer_handles)](vk::CommandBuffer cmdbuf) {
cmdbuf.BindVertexBuffers2EXT(
bindings.min_index, bindings.max_index - bindings.min_index, buffer_handles.data(),
bindings.offsets.data(), bindings.sizes.data(), bindings.strides.data());
});
} else {
- scheduler.Record([bindings = bindings,
- buffer_handles = buffer_handles](vk::CommandBuffer cmdbuf) {
+ scheduler.Record([bindings = std::move(bindings),
+ buffer_handles = std::move(buffer_handles)](vk::CommandBuffer cmdbuf) {
cmdbuf.BindVertexBuffers(bindings.min_index, bindings.max_index - bindings.min_index,
buffer_handles.data(), bindings.offsets.data());
});
@@ -561,12 +561,12 @@ void BufferCacheRuntime::BindTransformFeedbackBuffers(VideoCommon::HostBindings<
for (u32 index = 0; index < bindings.buffers.size(); ++index) {
buffer_handles.push_back(bindings.buffers[index]->Handle());
}
- scheduler.Record(
- [bindings = bindings, buffer_handles = buffer_handles](vk::CommandBuffer cmdbuf) {
- cmdbuf.BindTransformFeedbackBuffersEXT(0, static_cast<u32>(buffer_handles.size()),
- buffer_handles.data(), bindings.offsets.data(),
- bindings.sizes.data());
- });
+ scheduler.Record([bindings = std::move(bindings),
+ buffer_handles = std::move(buffer_handles)](vk::CommandBuffer cmdbuf) {
+ cmdbuf.BindTransformFeedbackBuffersEXT(0, static_cast<u32>(buffer_handles.size()),
+ buffer_handles.data(), bindings.offsets.data(),
+ bindings.sizes.data());
+ });
}
void BufferCacheRuntime::ReserveNullBuffer() {
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
index 733e70d9d..73e585c2b 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
@@ -115,7 +115,7 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
static constexpr size_t max_elements = 64;
boost::container::static_vector<VideoCommon::ImageViewInOut, max_elements> views;
- boost::container::static_vector<VkSampler, max_elements> samplers;
+ boost::container::static_vector<VideoCommon::SamplerId, max_elements> samplers;
const auto& qmd{kepler_compute.launch_description};
const auto& cbufs{qmd.const_buffer_config};
@@ -160,8 +160,8 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
const auto handle{read_handle(desc, index)};
views.push_back({handle.first});
- Sampler* const sampler = texture_cache.GetComputeSampler(handle.second);
- samplers.push_back(sampler->Handle());
+ VideoCommon::SamplerId sampler = texture_cache.GetComputeSamplerId(handle.second);
+ samplers.push_back(sampler);
}
}
for (const auto& desc : info.image_descriptors) {
@@ -192,7 +192,7 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
buffer_cache.BindHostComputeBuffers();
RescalingPushConstant rescaling;
- const VkSampler* samplers_it{samplers.data()};
+ const VideoCommon::SamplerId* samplers_it{samplers.data()};
const VideoCommon::ImageViewInOut* views_it{views.data()};
PushImageDescriptors(texture_cache, guest_descriptor_queue, info, rescaling, samplers_it,
views_it);
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index 506b78f08..c1595642e 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -298,7 +298,7 @@ void GraphicsPipeline::AddTransition(GraphicsPipeline* transition) {
template <typename Spec>
void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
std::array<VideoCommon::ImageViewInOut, MAX_IMAGE_ELEMENTS> views;
- std::array<VkSampler, MAX_IMAGE_ELEMENTS> samplers;
+ std::array<VideoCommon::SamplerId, MAX_IMAGE_ELEMENTS> samplers;
size_t sampler_index{};
size_t view_index{};
@@ -367,8 +367,8 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
const auto handle{read_handle(desc, index)};
views[view_index++] = {handle.first};
- Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)};
- samplers[sampler_index++] = sampler->Handle();
+ VideoCommon::SamplerId sampler{texture_cache.GetGraphicsSamplerId(handle.second)};
+ samplers[sampler_index++] = sampler;
}
}
if constexpr (Spec::has_images) {
@@ -453,7 +453,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
RescalingPushConstant rescaling;
RenderAreaPushConstant render_area;
- const VkSampler* samplers_it{samplers.data()};
+ const VideoCommon::SamplerId* samplers_it{samplers.data()};
const VideoCommon::ImageViewInOut* views_it{views.data()};
const auto prepare_stage{[&](size_t stage) LAMBDA_FORCEINLINE {
buffer_cache.BindHostStageBuffers(stage);
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
index b128c4f6e..5eeda08d2 100644
--- a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
+++ b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
@@ -3,6 +3,7 @@
#include <thread>
+#include "common/polyfill_ranges.h"
#include "common/settings.h"
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/vulkan_common/vulkan_device.h"
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 18e040a1b..9f316113c 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -167,7 +167,10 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> program
info.fixed_state_point_size = point_size;
}
if (key.state.xfb_enabled) {
- info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state);
+ auto [varyings, count] =
+ VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state);
+ info.xfb_varyings = varyings;
+ info.xfb_count = count;
}
info.convert_depth_mode = gl_ndc;
}
@@ -214,7 +217,10 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> program
info.fixed_state_point_size = point_size;
}
if (key.state.xfb_enabled != 0) {
- info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state);
+ auto [varyings, count] =
+ VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state);
+ info.xfb_varyings = varyings;
+ info.xfb_count = count;
}
info.convert_depth_mode = gl_ndc;
break;
@@ -705,10 +711,7 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
ShaderPools& pools, const ComputePipelineCacheKey& key, Shader::Environment& env,
PipelineStatistics* statistics, bool build_in_parallel) try {
- // TODO: Remove this when Intel fixes their shader compiler.
- // https://github.com/IGCIT/Intel-GPU-Community-Issue-Tracker-IGCIT/issues/159
- if (device.GetDriverID() == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS &&
- !Settings::values.enable_compute_pipelines.GetValue()) {
+ if (device.HasBrokenCompute()) {
LOG_ERROR(Render_Vulkan, "Skipping 0x{:016x}", key.Hash());
return nullptr;
}
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 8711e2a87..f3cef09dd 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -330,9 +330,9 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
};
}
-[[maybe_unused]] [[nodiscard]] std::vector<VkBufferCopy> TransformBufferCopies(
- std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) {
- std::vector<VkBufferCopy> result(copies.size());
+[[maybe_unused]] [[nodiscard]] boost::container::small_vector<VkBufferCopy, 16>
+TransformBufferCopies(std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) {
+ boost::container::small_vector<VkBufferCopy, 16> result(copies.size());
std::ranges::transform(
copies, result.begin(), [buffer_offset](const VideoCommon::BufferCopy& copy) {
return VkBufferCopy{
@@ -344,7 +344,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
return result;
}
-[[nodiscard]] std::vector<VkBufferImageCopy> TransformBufferImageCopies(
+[[nodiscard]] boost::container::small_vector<VkBufferImageCopy, 16> TransformBufferImageCopies(
std::span<const BufferImageCopy> copies, size_t buffer_offset, VkImageAspectFlags aspect_mask) {
struct Maker {
VkBufferImageCopy operator()(const BufferImageCopy& copy) const {
@@ -377,14 +377,14 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
VkImageAspectFlags aspect_mask;
};
if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
- std::vector<VkBufferImageCopy> result(copies.size() * 2);
+ boost::container::small_vector<VkBufferImageCopy, 16> result(copies.size() * 2);
std::ranges::transform(copies, result.begin(),
Maker{buffer_offset, VK_IMAGE_ASPECT_DEPTH_BIT});
std::ranges::transform(copies, result.begin() + copies.size(),
Maker{buffer_offset, VK_IMAGE_ASPECT_STENCIL_BIT});
return result;
} else {
- std::vector<VkBufferImageCopy> result(copies.size());
+ boost::container::small_vector<VkBufferImageCopy, 16> result(copies.size());
std::ranges::transform(copies, result.begin(), Maker{buffer_offset, aspect_mask});
return result;
}
@@ -867,8 +867,8 @@ void TextureCacheRuntime::BarrierFeedbackLoop() {
void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src,
std::span<const VideoCommon::ImageCopy> copies) {
- std::vector<VkBufferImageCopy> vk_in_copies(copies.size());
- std::vector<VkBufferImageCopy> vk_out_copies(copies.size());
+ boost::container::small_vector<VkBufferImageCopy, 16> vk_in_copies(copies.size());
+ boost::container::small_vector<VkBufferImageCopy, 16> vk_out_copies(copies.size());
const VkImageAspectFlags src_aspect_mask = src.AspectMask();
const VkImageAspectFlags dst_aspect_mask = dst.AspectMask();
@@ -1157,7 +1157,7 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im
void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
std::span<const VideoCommon::ImageCopy> copies) {
- std::vector<VkImageCopy> vk_copies(copies.size());
+ boost::container::small_vector<VkImageCopy, 16> vk_copies(copies.size());
const VkImageAspectFlags aspect_mask = dst.AspectMask();
ASSERT(aspect_mask == src.AspectMask());
@@ -1332,7 +1332,7 @@ void Image::UploadMemory(VkBuffer buffer, VkDeviceSize offset,
ScaleDown(true);
}
scheduler->RequestOutsideRenderPassOperationContext();
- std::vector vk_copies = TransformBufferImageCopies(copies, offset, aspect_mask);
+ auto vk_copies = TransformBufferImageCopies(copies, offset, aspect_mask);
const VkBuffer src_buffer = buffer;
const VkImage vk_image = *original_image;
const VkImageAspectFlags vk_aspect_mask = aspect_mask;
@@ -1367,8 +1367,9 @@ void Image::DownloadMemory(std::span<VkBuffer> buffers_span, std::span<VkDeviceS
if (is_rescaled) {
ScaleDown();
}
- boost::container::small_vector<VkBuffer, 1> buffers_vector{};
- boost::container::small_vector<std::vector<VkBufferImageCopy>, 1> vk_copies;
+ boost::container::small_vector<VkBuffer, 8> buffers_vector{};
+ boost::container::small_vector<boost::container::small_vector<VkBufferImageCopy, 16>, 8>
+ vk_copies;
for (size_t index = 0; index < buffers_span.size(); index++) {
buffers_vector.emplace_back(buffers_span[index]);
vk_copies.emplace_back(
@@ -1802,27 +1803,36 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& t
// Some games have samplers with garbage. Sanitize them here.
const f32 max_anisotropy = std::clamp(tsc.MaxAnisotropy(), 1.0f, 16.0f);
- sampler = device.GetLogical().CreateSampler(VkSamplerCreateInfo{
- .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
- .pNext = pnext,
- .flags = 0,
- .magFilter = MaxwellToVK::Sampler::Filter(tsc.mag_filter),
- .minFilter = MaxwellToVK::Sampler::Filter(tsc.min_filter),
- .mipmapMode = MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter),
- .addressModeU = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter),
- .addressModeV = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter),
- .addressModeW = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter),
- .mipLodBias = tsc.LodBias(),
- .anisotropyEnable = static_cast<VkBool32>(max_anisotropy > 1.0f ? VK_TRUE : VK_FALSE),
- .maxAnisotropy = max_anisotropy,
- .compareEnable = tsc.depth_compare_enabled,
- .compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func),
- .minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.MinLod(),
- .maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.MaxLod(),
- .borderColor =
- arbitrary_borders ? VK_BORDER_COLOR_FLOAT_CUSTOM_EXT : ConvertBorderColor(color),
- .unnormalizedCoordinates = VK_FALSE,
- });
+ const auto create_sampler = [&](const f32 anisotropy) {
+ return device.GetLogical().CreateSampler(VkSamplerCreateInfo{
+ .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
+ .pNext = pnext,
+ .flags = 0,
+ .magFilter = MaxwellToVK::Sampler::Filter(tsc.mag_filter),
+ .minFilter = MaxwellToVK::Sampler::Filter(tsc.min_filter),
+ .mipmapMode = MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter),
+ .addressModeU = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter),
+ .addressModeV = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter),
+ .addressModeW = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter),
+ .mipLodBias = tsc.LodBias(),
+ .anisotropyEnable = static_cast<VkBool32>(anisotropy > 1.0f ? VK_TRUE : VK_FALSE),
+ .maxAnisotropy = anisotropy,
+ .compareEnable = tsc.depth_compare_enabled,
+ .compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func),
+ .minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.MinLod(),
+ .maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.MaxLod(),
+ .borderColor =
+ arbitrary_borders ? VK_BORDER_COLOR_FLOAT_CUSTOM_EXT : ConvertBorderColor(color),
+ .unnormalizedCoordinates = VK_FALSE,
+ });
+ };
+
+ sampler = create_sampler(max_anisotropy);
+
+ const f32 max_anisotropy_default = static_cast<f32>(1U << tsc.max_anisotropy);
+ if (max_anisotropy > max_anisotropy_default) {
+ sampler_default_anisotropy = create_sampler(max_anisotropy_default);
+ }
}
Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers,
@@ -1849,7 +1859,7 @@ Framebuffer::~Framebuffer() = default;
void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime,
std::span<ImageView*, NUM_RT> color_buffers,
ImageView* depth_buffer, bool is_rescaled) {
- std::vector<VkImageView> attachments;
+ boost::container::small_vector<VkImageView, NUM_RT + 1> attachments;
RenderPassKey renderpass_key{};
s32 num_layers = 1;
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index 0f7a5ffd4..f14525dcb 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -279,8 +279,17 @@ public:
return *sampler;
}
+ [[nodiscard]] VkSampler HandleWithDefaultAnisotropy() const noexcept {
+ return *sampler_default_anisotropy;
+ }
+
+ [[nodiscard]] bool HasAddedAnisotropy() const noexcept {
+ return static_cast<bool>(sampler_default_anisotropy);
+ }
+
private:
vk::Sampler sampler;
+ vk::Sampler sampler_default_anisotropy;
};
class Framebuffer {
diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp
index c5213875b..4db948b6d 100644
--- a/src/video_core/shader_cache.cpp
+++ b/src/video_core/shader_cache.cpp
@@ -151,11 +151,9 @@ void ShaderCache::RemovePendingShaders() {
marked_for_removal.erase(std::unique(marked_for_removal.begin(), marked_for_removal.end()),
marked_for_removal.end());
- std::vector<ShaderInfo*> removed_shaders;
- removed_shaders.reserve(marked_for_removal.size());
+ boost::container::small_vector<ShaderInfo*, 16> removed_shaders;
std::scoped_lock lock{lookup_mutex};
-
for (Entry* const entry : marked_for_removal) {
removed_shaders.push_back(entry->data);
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h
index 1b8a17ee8..55d49d017 100644
--- a/src/video_core/texture_cache/image_base.h
+++ b/src/video_core/texture_cache/image_base.h
@@ -6,6 +6,7 @@
#include <array>
#include <optional>
#include <vector>
+#include <boost/container/small_vector.hpp>
#include "common/common_funcs.h"
#include "common/common_types.h"
@@ -108,8 +109,8 @@ struct ImageBase {
std::vector<ImageViewInfo> image_view_infos;
std::vector<ImageViewId> image_view_ids;
- std::vector<u32> slice_offsets;
- std::vector<SubresourceBase> slice_subresources;
+ boost::container::small_vector<u32, 16> slice_offsets;
+ boost::container::small_vector<SubresourceBase, 16> slice_subresources;
std::vector<AliasedImage> aliased_images;
std::vector<ImageId> overlapping_images;
diff --git a/src/video_core/texture_cache/image_view_base.cpp b/src/video_core/texture_cache/image_view_base.cpp
index d134b6738..0c5f4450d 100644
--- a/src/video_core/texture_cache/image_view_base.cpp
+++ b/src/video_core/texture_cache/image_view_base.cpp
@@ -45,4 +45,56 @@ ImageViewBase::ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_in
ImageViewBase::ImageViewBase(const NullImageViewParams&) : image_id{NULL_IMAGE_ID} {}
+bool ImageViewBase::SupportsAnisotropy() const noexcept {
+ const bool has_mips = range.extent.levels > 1;
+ const bool is_2d = type == ImageViewType::e2D || type == ImageViewType::e2DArray;
+ if (!has_mips || !is_2d) {
+ return false;
+ }
+
+ switch (format) {
+ case PixelFormat::R8_UNORM:
+ case PixelFormat::R8_SNORM:
+ case PixelFormat::R8_SINT:
+ case PixelFormat::R8_UINT:
+ case PixelFormat::BC4_UNORM:
+ case PixelFormat::BC4_SNORM:
+ case PixelFormat::BC5_UNORM:
+ case PixelFormat::BC5_SNORM:
+ case PixelFormat::R32G32_FLOAT:
+ case PixelFormat::R32G32_SINT:
+ case PixelFormat::R32_FLOAT:
+ case PixelFormat::R16_FLOAT:
+ case PixelFormat::R16_UNORM:
+ case PixelFormat::R16_SNORM:
+ case PixelFormat::R16_UINT:
+ case PixelFormat::R16_SINT:
+ case PixelFormat::R16G16_UNORM:
+ case PixelFormat::R16G16_FLOAT:
+ case PixelFormat::R16G16_UINT:
+ case PixelFormat::R16G16_SINT:
+ case PixelFormat::R16G16_SNORM:
+ case PixelFormat::R8G8_UNORM:
+ case PixelFormat::R8G8_SNORM:
+ case PixelFormat::R8G8_SINT:
+ case PixelFormat::R8G8_UINT:
+ case PixelFormat::R32G32_UINT:
+ case PixelFormat::R32_UINT:
+ case PixelFormat::R32_SINT:
+ case PixelFormat::G4R4_UNORM:
+ // Depth formats
+ case PixelFormat::D32_FLOAT:
+ case PixelFormat::D16_UNORM:
+ // Stencil formats
+ case PixelFormat::S8_UINT:
+ // DepthStencil formats
+ case PixelFormat::D24_UNORM_S8_UINT:
+ case PixelFormat::S8_UINT_D24_UNORM:
+ case PixelFormat::D32_FLOAT_S8_UINT:
+ return false;
+ default:
+ return true;
+ }
+}
+
} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_view_base.h b/src/video_core/texture_cache/image_view_base.h
index a25ae1d4a..87549ffff 100644
--- a/src/video_core/texture_cache/image_view_base.h
+++ b/src/video_core/texture_cache/image_view_base.h
@@ -33,6 +33,8 @@ struct ImageViewBase {
return type == ImageViewType::Buffer;
}
+ [[nodiscard]] bool SupportsAnisotropy() const noexcept;
+
ImageId image_id{};
GPUVAddr gpu_addr = 0;
PixelFormat format{};
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index c7f7448e9..d3f03a995 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -186,6 +186,10 @@ void TextureCache<P>::FillComputeImageViews(std::span<ImageViewInOut> views) {
template <class P>
void TextureCache<P>::CheckFeedbackLoop(std::span<const ImageViewInOut> views) {
+ if (!Settings::values.barrier_feedback_loops.GetValue()) {
+ return;
+ }
+
const bool requires_barrier = [&] {
for (const auto& view : views) {
if (!view.id) {
@@ -222,30 +226,50 @@ void TextureCache<P>::CheckFeedbackLoop(std::span<const ImageViewInOut> views) {
template <class P>
typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) {
+ return &slot_samplers[GetGraphicsSamplerId(index)];
+}
+
+template <class P>
+typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) {
+ return &slot_samplers[GetComputeSamplerId(index)];
+}
+
+template <class P>
+SamplerId TextureCache<P>::GetGraphicsSamplerId(u32 index) {
if (index > channel_state->graphics_sampler_table.Limit()) {
LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index);
- return &slot_samplers[NULL_SAMPLER_ID];
+ return NULL_SAMPLER_ID;
}
const auto [descriptor, is_new] = channel_state->graphics_sampler_table.Read(index);
SamplerId& id = channel_state->graphics_sampler_ids[index];
if (is_new) {
id = FindSampler(descriptor);
}
- return &slot_samplers[id];
+ return id;
}
template <class P>
-typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) {
+SamplerId TextureCache<P>::GetComputeSamplerId(u32 index) {
if (index > channel_state->compute_sampler_table.Limit()) {
LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index);
- return &slot_samplers[NULL_SAMPLER_ID];
+ return NULL_SAMPLER_ID;
}
const auto [descriptor, is_new] = channel_state->compute_sampler_table.Read(index);
SamplerId& id = channel_state->compute_sampler_ids[index];
if (is_new) {
id = FindSampler(descriptor);
}
- return &slot_samplers[id];
+ return id;
+}
+
+template <class P>
+const typename P::Sampler& TextureCache<P>::GetSampler(SamplerId id) const noexcept {
+ return slot_samplers[id];
+}
+
+template <class P>
+typename P::Sampler& TextureCache<P>::GetSampler(SamplerId id) noexcept {
+ return slot_samplers[id];
}
template <class P>
@@ -280,7 +304,7 @@ void TextureCache<P>::SynchronizeComputeDescriptors() {
}
template <class P>
-bool TextureCache<P>::RescaleRenderTargets(bool is_clear) {
+bool TextureCache<P>::RescaleRenderTargets() {
auto& flags = maxwell3d->dirty.flags;
u32 scale_rating = 0;
bool rescaled = false;
@@ -318,13 +342,13 @@ bool TextureCache<P>::RescaleRenderTargets(bool is_clear) {
ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index];
if (flags[Dirty::ColorBuffer0 + index] || force) {
flags[Dirty::ColorBuffer0 + index] = false;
- BindRenderTarget(&color_buffer_id, FindColorBuffer(index, is_clear));
+ BindRenderTarget(&color_buffer_id, FindColorBuffer(index));
}
check_rescale(color_buffer_id, tmp_color_images[index]);
}
if (flags[Dirty::ZetaBuffer] || force) {
flags[Dirty::ZetaBuffer] = false;
- BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear));
+ BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer());
}
check_rescale(render_targets.depth_buffer_id, tmp_depth_image);
@@ -389,7 +413,7 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
return;
}
- const bool rescaled = RescaleRenderTargets(is_clear);
+ const bool rescaled = RescaleRenderTargets();
if (is_rescaling != rescaled) {
flags[Dirty::RescaleViewports] = true;
flags[Dirty::RescaleScissors] = true;
@@ -502,7 +526,7 @@ void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) {
template <class P>
void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) {
- std::vector<ImageId> images;
+ boost::container::small_vector<ImageId, 16> images;
ForEachImageInRegion(cpu_addr, size, [&images](ImageId image_id, ImageBase& image) {
if (!image.IsSafeDownload()) {
return;
@@ -555,7 +579,7 @@ std::optional<VideoCore::RasterizerDownloadArea> TextureCache<P>::GetFlushArea(V
template <class P>
void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) {
- std::vector<ImageId> deleted_images;
+ boost::container::small_vector<ImageId, 16> deleted_images;
ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); });
for (const ImageId id : deleted_images) {
Image& image = slot_images[id];
@@ -569,7 +593,7 @@ void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) {
template <class P>
void TextureCache<P>::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size) {
- std::vector<ImageId> deleted_images;
+ boost::container::small_vector<ImageId, 16> deleted_images;
ForEachImageInRegionGPU(as_id, gpu_addr, size,
[&](ImageId id, Image&) { deleted_images.push_back(id); });
for (const ImageId id : deleted_images) {
@@ -1077,7 +1101,7 @@ ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
const bool native_bgr = runtime.HasNativeBgr();
const bool flexible_formats = True(options & RelaxedOptions::Format);
ImageId image_id{};
- boost::container::small_vector<ImageId, 1> image_ids;
+ boost::container::small_vector<ImageId, 8> image_ids;
const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) {
if (True(existing_image.flags & ImageFlagBits::Remapped)) {
return false;
@@ -1598,7 +1622,7 @@ ImageId TextureCache<P>::FindDMAImage(const ImageInfo& info, GPUVAddr gpu_addr)
}
}
ImageId image_id{};
- boost::container::small_vector<ImageId, 1> image_ids;
+ boost::container::small_vector<ImageId, 8> image_ids;
const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) {
if (True(existing_image.flags & ImageFlagBits::Remapped)) {
return false;
@@ -1658,7 +1682,7 @@ SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) {
}
template <class P>
-ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) {
+ImageViewId TextureCache<P>::FindColorBuffer(size_t index) {
const auto& regs = maxwell3d->regs;
if (index >= regs.rt_control.count) {
return ImageViewId{};
@@ -1672,11 +1696,11 @@ ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) {
return ImageViewId{};
}
const ImageInfo info(regs.rt[index], regs.anti_alias_samples_mode);
- return FindRenderTargetView(info, gpu_addr, is_clear);
+ return FindRenderTargetView(info, gpu_addr);
}
template <class P>
-ImageViewId TextureCache<P>::FindDepthBuffer(bool is_clear) {
+ImageViewId TextureCache<P>::FindDepthBuffer() {
const auto& regs = maxwell3d->regs;
if (!regs.zeta_enable) {
return ImageViewId{};
@@ -1686,18 +1710,16 @@ ImageViewId TextureCache<P>::FindDepthBuffer(bool is_clear) {
return ImageViewId{};
}
const ImageInfo info(regs.zeta, regs.zeta_size, regs.anti_alias_samples_mode);
- return FindRenderTargetView(info, gpu_addr, is_clear);
+ return FindRenderTargetView(info, gpu_addr);
}
template <class P>
-ImageViewId TextureCache<P>::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr,
- bool is_clear) {
- const auto options = is_clear ? RelaxedOptions::Samples : RelaxedOptions{};
+ImageViewId TextureCache<P>::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr) {
ImageId image_id{};
bool delete_state = has_deleted_images;
do {
has_deleted_images = false;
- image_id = FindOrInsertImage(info, gpu_addr, options);
+ image_id = FindOrInsertImage(info, gpu_addr);
delete_state |= has_deleted_images;
} while (has_deleted_images);
has_deleted_images = delete_state;
@@ -1920,7 +1942,7 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {
image.map_view_id = map_id;
return;
}
- std::vector<ImageViewId> sparse_maps{};
+ boost::container::small_vector<ImageViewId, 16> sparse_maps;
ForEachSparseSegment(
image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) {
auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id);
@@ -2195,7 +2217,7 @@ void TextureCache<P>::MarkModification(ImageBase& image) noexcept {
template <class P>
void TextureCache<P>::SynchronizeAliases(ImageId image_id) {
- boost::container::small_vector<const AliasedImage*, 1> aliased_images;
+ boost::container::small_vector<const AliasedImage*, 8> aliased_images;
Image& image = slot_images[image_id];
bool any_rescaled = True(image.flags & ImageFlagBits::Rescaled);
bool any_modified = True(image.flags & ImageFlagBits::GpuModified);
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h
index 3bfa92154..e9ec91265 100644
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -56,7 +56,7 @@ struct ImageViewInOut {
struct AsyncDecodeContext {
ImageId image_id;
Common::ScratchBuffer<u8> decoded_data;
- std::vector<BufferImageCopy> copies;
+ boost::container::small_vector<BufferImageCopy, 16> copies;
std::mutex mutex;
std::atomic_bool complete;
};
@@ -159,6 +159,18 @@ public:
/// Get the sampler from the compute descriptor table in the specified index
Sampler* GetComputeSampler(u32 index);
+ /// Get the sampler id from the graphics descriptor table in the specified index
+ SamplerId GetGraphicsSamplerId(u32 index);
+
+ /// Get the sampler id from the compute descriptor table in the specified index
+ SamplerId GetComputeSamplerId(u32 index);
+
+ /// Return a constant reference to the given sampler id
+ [[nodiscard]] const Sampler& GetSampler(SamplerId id) const noexcept;
+
+ /// Return a reference to the given sampler id
+ [[nodiscard]] Sampler& GetSampler(SamplerId id) noexcept;
+
/// Refresh the state for graphics image view and sampler descriptors
void SynchronizeGraphicsDescriptors();
@@ -166,9 +178,8 @@ public:
void SynchronizeComputeDescriptors();
/// Updates the Render Targets if they can be rescaled
- /// @param is_clear True when the render targets are being used for clears
/// @retval True if the Render Targets have been rescaled.
- bool RescaleRenderTargets(bool is_clear);
+ bool RescaleRenderTargets();
/// Update bound render targets and upload memory if necessary
/// @param is_clear True when the render targets are being used for clears
@@ -324,14 +335,13 @@ private:
[[nodiscard]] SamplerId FindSampler(const TSCEntry& config);
/// Find or create an image view for the given color buffer index
- [[nodiscard]] ImageViewId FindColorBuffer(size_t index, bool is_clear);
+ [[nodiscard]] ImageViewId FindColorBuffer(size_t index);
/// Find or create an image view for the depth buffer
- [[nodiscard]] ImageViewId FindDepthBuffer(bool is_clear);
+ [[nodiscard]] ImageViewId FindDepthBuffer();
/// Find or create a view for a render target with the given image parameters
- [[nodiscard]] ImageViewId FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr,
- bool is_clear);
+ [[nodiscard]] ImageViewId FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr);
/// Iterates over all the images in a region calling func
template <typename Func>
@@ -419,7 +429,7 @@ private:
std::unordered_map<u64, std::vector<ImageMapId>, Common::IdentityHash<u64>> page_table;
std::unordered_map<u64, std::vector<ImageId>, Common::IdentityHash<u64>> sparse_page_table;
- std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views;
+ std::unordered_map<ImageId, boost::container::small_vector<ImageViewId, 16>> sparse_views;
VAddr virtual_invalid_space{};
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
index 95a5b47d8..f781cb7a0 100644
--- a/src/video_core/texture_cache/util.cpp
+++ b/src/video_core/texture_cache/util.cpp
@@ -329,13 +329,13 @@ template <u32 GOB_EXTENT>
[[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapRightAddress3D(
const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size) {
- const std::vector<u32> slice_offsets = CalculateSliceOffsets(new_info);
+ const auto slice_offsets = CalculateSliceOffsets(new_info);
const u32 diff = static_cast<u32>(overlap.gpu_addr - gpu_addr);
const auto it = std::ranges::find(slice_offsets, diff);
if (it == slice_offsets.end()) {
return std::nullopt;
}
- const std::vector subresources = CalculateSliceSubresources(new_info);
+ const auto subresources = CalculateSliceSubresources(new_info);
const SubresourceBase base = subresources[std::distance(slice_offsets.begin(), it)];
const ImageInfo& info = overlap.info;
if (!IsBlockLinearSizeCompatible(new_info, info, base.level, 0, strict_size)) {
@@ -655,9 +655,9 @@ LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept {
return sizes;
}
-std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) {
+boost::container::small_vector<u32, 16> CalculateSliceOffsets(const ImageInfo& info) {
ASSERT(info.type == ImageType::e3D);
- std::vector<u32> offsets;
+ boost::container::small_vector<u32, 16> offsets;
offsets.reserve(NumSlices(info));
const LevelInfo level_info = MakeLevelInfo(info);
@@ -679,9 +679,10 @@ std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) {
return offsets;
}
-std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info) {
+boost::container::small_vector<SubresourceBase, 16> CalculateSliceSubresources(
+ const ImageInfo& info) {
ASSERT(info.type == ImageType::e3D);
- std::vector<SubresourceBase> subresources;
+ boost::container::small_vector<SubresourceBase, 16> subresources;
subresources.reserve(NumSlices(info));
for (s32 level = 0; level < info.resources.levels; ++level) {
const s32 depth = AdjustMipSize(info.size.depth, level);
@@ -723,8 +724,10 @@ ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept {
}
}
-std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageInfo& src,
- SubresourceBase base, u32 up_scale, u32 down_shift) {
+boost::container::small_vector<ImageCopy, 16> MakeShrinkImageCopies(const ImageInfo& dst,
+ const ImageInfo& src,
+ SubresourceBase base,
+ u32 up_scale, u32 down_shift) {
ASSERT(dst.resources.levels >= src.resources.levels);
const bool is_dst_3d = dst.type == ImageType::e3D;
@@ -733,7 +736,7 @@ std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn
ASSERT(src.resources.levels == 1);
}
const bool both_2d{src.type == ImageType::e2D && dst.type == ImageType::e2D};
- std::vector<ImageCopy> copies;
+ boost::container::small_vector<ImageCopy, 16> copies;
copies.reserve(src.resources.levels);
for (s32 level = 0; level < src.resources.levels; ++level) {
ImageCopy& copy = copies.emplace_back();
@@ -770,9 +773,10 @@ std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn
return copies;
}
-std::vector<ImageCopy> MakeReinterpretImageCopies(const ImageInfo& src, u32 up_scale,
- u32 down_shift) {
- std::vector<ImageCopy> copies;
+boost::container::small_vector<ImageCopy, 16> MakeReinterpretImageCopies(const ImageInfo& src,
+ u32 up_scale,
+ u32 down_shift) {
+ boost::container::small_vector<ImageCopy, 16> copies;
copies.reserve(src.resources.levels);
const bool is_3d = src.type == ImageType::e3D;
for (s32 level = 0; level < src.resources.levels; ++level) {
@@ -824,9 +828,11 @@ bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config
return gpu_memory.GpuToCpuAddress(address, guest_size_bytes).has_value();
}
-std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
- const ImageInfo& info, std::span<const u8> input,
- std::span<u8> output) {
+boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::MemoryManager& gpu_memory,
+ GPUVAddr gpu_addr,
+ const ImageInfo& info,
+ std::span<const u8> input,
+ std::span<u8> output) {
const size_t guest_size_bytes = input.size_bytes();
const u32 bpp_log2 = BytesPerBlockLog2(info.format);
const Extent3D size = info.size;
@@ -861,7 +867,7 @@ std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GP
info.tile_width_spacing);
size_t guest_offset = 0;
u32 host_offset = 0;
- std::vector<BufferImageCopy> copies(num_levels);
+ boost::container::small_vector<BufferImageCopy, 16> copies(num_levels);
for (s32 level = 0; level < num_levels; ++level) {
const Extent3D level_size = AdjustMipSize(size, level);
@@ -978,7 +984,7 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8
}
}
-std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info) {
+boost::container::small_vector<BufferImageCopy, 16> FullDownloadCopies(const ImageInfo& info) {
const Extent3D size = info.size;
const u32 bytes_per_block = BytesPerBlock(info.format);
if (info.type == ImageType::Linear) {
@@ -1006,7 +1012,7 @@ std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info) {
u32 host_offset = 0;
- std::vector<BufferImageCopy> copies(num_levels);
+ boost::container::small_vector<BufferImageCopy, 16> copies(num_levels);
for (s32 level = 0; level < num_levels; ++level) {
const Extent3D level_size = AdjustMipSize(size, level);
const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size);
@@ -1042,10 +1048,10 @@ Extent3D MipBlockSize(const ImageInfo& info, u32 level) {
return AdjustMipBlockSize(num_tiles, level_info.block, level);
}
-std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info) {
+boost::container::small_vector<SwizzleParameters, 16> FullUploadSwizzles(const ImageInfo& info) {
const Extent2D tile_size = DefaultBlockSize(info.format);
if (info.type == ImageType::Linear) {
- return std::vector{SwizzleParameters{
+ return {SwizzleParameters{
.num_tiles = AdjustTileSize(info.size, tile_size),
.block = {},
.buffer_offset = 0,
@@ -1057,7 +1063,7 @@ std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info) {
const s32 num_levels = info.resources.levels;
u32 guest_offset = 0;
- std::vector<SwizzleParameters> params(num_levels);
+ boost::container::small_vector<SwizzleParameters, 16> params(num_levels);
for (s32 level = 0; level < num_levels; ++level) {
const Extent3D level_size = AdjustMipSize(size, level);
const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);
diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h
index 84aa6880d..ab45a43c4 100644
--- a/src/video_core/texture_cache/util.h
+++ b/src/video_core/texture_cache/util.h
@@ -5,6 +5,7 @@
#include <optional>
#include <span>
+#include <boost/container/small_vector.hpp>
#include "common/common_types.h"
#include "common/scratch_buffer.h"
@@ -40,9 +41,10 @@ struct OverlapResult {
[[nodiscard]] LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept;
-[[nodiscard]] std::vector<u32> CalculateSliceOffsets(const ImageInfo& info);
+[[nodiscard]] boost::container::small_vector<u32, 16> CalculateSliceOffsets(const ImageInfo& info);
-[[nodiscard]] std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info);
+[[nodiscard]] boost::container::small_vector<SubresourceBase, 16> CalculateSliceSubresources(
+ const ImageInfo& info);
[[nodiscard]] u32 CalculateLevelStrideAlignment(const ImageInfo& info, u32 level);
@@ -51,21 +53,18 @@ struct OverlapResult {
[[nodiscard]] ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept;
-[[nodiscard]] std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst,
- const ImageInfo& src,
- SubresourceBase base, u32 up_scale = 1,
- u32 down_shift = 0);
+[[nodiscard]] boost::container::small_vector<ImageCopy, 16> MakeShrinkImageCopies(
+ const ImageInfo& dst, const ImageInfo& src, SubresourceBase base, u32 up_scale = 1,
+ u32 down_shift = 0);
-[[nodiscard]] std::vector<ImageCopy> MakeReinterpretImageCopies(const ImageInfo& src,
- u32 up_scale = 1,
- u32 down_shift = 0);
+[[nodiscard]] boost::container::small_vector<ImageCopy, 16> MakeReinterpretImageCopies(
+ const ImageInfo& src, u32 up_scale = 1, u32 down_shift = 0);
[[nodiscard]] bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config);
-[[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory,
- GPUVAddr gpu_addr, const ImageInfo& info,
- std::span<const u8> input,
- std::span<u8> output);
+[[nodiscard]] boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(
+ Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info,
+ std::span<const u8> input, std::span<u8> output);
[[nodiscard]] BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
const ImageBase& image, std::span<u8> output);
@@ -73,13 +72,15 @@ struct OverlapResult {
void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output,
std::span<BufferImageCopy> copies);
-[[nodiscard]] std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info);
+[[nodiscard]] boost::container::small_vector<BufferImageCopy, 16> FullDownloadCopies(
+ const ImageInfo& info);
[[nodiscard]] Extent3D MipSize(Extent3D size, u32 level);
[[nodiscard]] Extent3D MipBlockSize(const ImageInfo& info, u32 level);
-[[nodiscard]] std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info);
+[[nodiscard]] boost::container::small_vector<SwizzleParameters, 16> FullUploadSwizzles(
+ const ImageInfo& info);
void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info,
std::span<const BufferImageCopy> copies, std::span<const u8> memory,
diff --git a/src/video_core/textures/texture.cpp b/src/video_core/textures/texture.cpp
index 4a80a59f9..d8b88d9bc 100644
--- a/src/video_core/textures/texture.cpp
+++ b/src/video_core/textures/texture.cpp
@@ -62,7 +62,12 @@ std::array<float, 4> TSCEntry::BorderColor() const noexcept {
}
float TSCEntry::MaxAnisotropy() const noexcept {
- if (max_anisotropy == 0 && mipmap_filter != TextureMipmapFilter::Linear) {
+ const bool is_suitable_mipmap_filter = mipmap_filter != TextureMipmapFilter::None;
+ const bool has_regular_lods = min_lod_clamp == 0 && max_lod_clamp >= 256;
+ const bool is_bilinear_filter = min_filter == TextureFilter::Linear &&
+ reduction_filter == SamplerReduction::WeightedAverage;
+ if (max_anisotropy == 0 && (!is_suitable_mipmap_filter || !has_regular_lods ||
+ !is_bilinear_filter || depth_compare_enabled)) {
return 1.0f;
}
const auto anisotropic_settings = Settings::values.max_anisotropy.GetValue();
diff --git a/src/video_core/transform_feedback.cpp b/src/video_core/transform_feedback.cpp
index 155599316..1f353d2df 100644
--- a/src/video_core/transform_feedback.cpp
+++ b/src/video_core/transform_feedback.cpp
@@ -13,7 +13,7 @@
namespace VideoCommon {
-std::vector<Shader::TransformFeedbackVarying> MakeTransformFeedbackVaryings(
+std::pair<std::array<Shader::TransformFeedbackVarying, 256>, u32> MakeTransformFeedbackVaryings(
const TransformFeedbackState& state) {
static constexpr std::array VECTORS{
28U, // gl_Position
@@ -62,7 +62,8 @@ std::vector<Shader::TransformFeedbackVarying> MakeTransformFeedbackVaryings(
216U, // gl_TexCoord[6]
220U, // gl_TexCoord[7]
};
- std::vector<Shader::TransformFeedbackVarying> xfb(256);
+ std::array<Shader::TransformFeedbackVarying, 256> xfb{};
+ u32 count{0};
for (size_t buffer = 0; buffer < state.layouts.size(); ++buffer) {
const auto& locations = state.varyings[buffer];
const auto& layout = state.layouts[buffer];
@@ -103,11 +104,12 @@ std::vector<Shader::TransformFeedbackVarying> MakeTransformFeedbackVaryings(
}
}
xfb[attribute] = varying;
+ count = std::max(count, attribute);
highest = std::max(highest, (base_offset + varying.components) * 4);
}
UNIMPLEMENTED_IF(highest != layout.stride);
}
- return xfb;
+ return {xfb, count + 1};
}
} // namespace VideoCommon
diff --git a/src/video_core/transform_feedback.h b/src/video_core/transform_feedback.h
index d13eb16c3..401b1352a 100644
--- a/src/video_core/transform_feedback.h
+++ b/src/video_core/transform_feedback.h
@@ -24,7 +24,7 @@ struct TransformFeedbackState {
varyings;
};
-std::vector<Shader::TransformFeedbackVarying> MakeTransformFeedbackVaryings(
+std::pair<std::array<Shader::TransformFeedbackVarying, 256>, u32> MakeTransformFeedbackVaryings(
const TransformFeedbackState& state);
} // namespace VideoCommon
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index 3d2e9a16a..b11abe311 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -316,6 +316,7 @@ NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical,
std::vector<const char*> ExtensionListForVulkan(
const std::set<std::string, std::less<>>& extensions) {
std::vector<const char*> output;
+ output.reserve(extensions.size());
for (const auto& extension : extensions) {
output.push_back(extension.c_str());
}
@@ -562,6 +563,9 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
LOG_WARNING(Render_Vulkan, "Intel proprietary drivers do not support MSAA image blits");
cant_blit_msaa = true;
}
+ has_broken_compute =
+ CheckBrokenCompute(properties.driver.driverID, properties.properties.driverVersion) &&
+ !Settings::values.enable_compute_pipelines.GetValue();
if (is_intel_anv || (is_qualcomm && !is_s8gen2)) {
LOG_WARNING(Render_Vulkan, "Driver does not support native BGR format");
must_emulate_bgr565 = true;
@@ -783,9 +787,6 @@ bool Device::GetSuitability(bool requires_swapchain) {
FOR_EACH_VK_FEATURE_EXT(FEATURE_EXTENSION);
FOR_EACH_VK_EXTENSION(EXTENSION);
-#ifdef _WIN32
- FOR_EACH_VK_EXTENSION_WIN32(EXTENSION);
-#endif
#undef FEATURE_EXTENSION
#undef EXTENSION
@@ -804,11 +805,6 @@ bool Device::GetSuitability(bool requires_swapchain) {
FOR_EACH_VK_RECOMMENDED_EXTENSION(LOG_EXTENSION);
FOR_EACH_VK_MANDATORY_EXTENSION(CHECK_EXTENSION);
-#ifdef _WIN32
- FOR_EACH_VK_MANDATORY_EXTENSION_WIN32(CHECK_EXTENSION);
-#else
- FOR_EACH_VK_MANDATORY_EXTENSION_GENERIC(CHECK_EXTENSION);
-#endif
if (requires_swapchain) {
CHECK_EXTENSION(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h
index f314d0ffe..0b634a876 100644
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -10,6 +10,7 @@
#include <vector>
#include "common/common_types.h"
+#include "common/logging/log.h"
#include "common/settings.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
@@ -68,7 +69,6 @@
EXTENSION(EXT, VERTEX_ATTRIBUTE_DIVISOR, vertex_attribute_divisor) \
EXTENSION(KHR, DRAW_INDIRECT_COUNT, draw_indirect_count) \
EXTENSION(KHR, DRIVER_PROPERTIES, driver_properties) \
- EXTENSION(KHR, EXTERNAL_MEMORY_FD, external_memory_fd) \
EXTENSION(KHR, PUSH_DESCRIPTOR, push_descriptor) \
EXTENSION(KHR, SAMPLER_MIRROR_CLAMP_TO_EDGE, sampler_mirror_clamp_to_edge) \
EXTENSION(KHR, SHADER_FLOAT_CONTROLS, shader_float_controls) \
@@ -80,9 +80,6 @@
EXTENSION(NV, VIEWPORT_ARRAY2, viewport_array2) \
EXTENSION(NV, VIEWPORT_SWIZZLE, viewport_swizzle)
-#define FOR_EACH_VK_EXTENSION_WIN32(EXTENSION) \
- EXTENSION(KHR, EXTERNAL_MEMORY_WIN32, external_memory_win32)
-
// Define extensions which must be supported.
#define FOR_EACH_VK_MANDATORY_EXTENSION(EXTENSION_NAME) \
EXTENSION_NAME(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME) \
@@ -90,12 +87,6 @@
EXTENSION_NAME(VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME) \
EXTENSION_NAME(VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME)
-#define FOR_EACH_VK_MANDATORY_EXTENSION_GENERIC(EXTENSION_NAME) \
- EXTENSION_NAME(VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME)
-
-#define FOR_EACH_VK_MANDATORY_EXTENSION_WIN32(EXTENSION_NAME) \
- EXTENSION_NAME(VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME)
-
// Define extensions where the absence of the extension may result in a degraded experience.
#define FOR_EACH_VK_RECOMMENDED_EXTENSION(EXTENSION_NAME) \
EXTENSION_NAME(VK_EXT_CONSERVATIVE_RASTERIZATION_EXTENSION_NAME) \
@@ -528,6 +519,11 @@ public:
return has_renderdoc || has_nsight_graphics || Settings::values.renderer_debug.GetValue();
}
+ /// @returns True if compute pipelines can cause crashing.
+ bool HasBrokenCompute() const {
+ return has_broken_compute;
+ }
+
/// Returns true when the device does not properly support cube compatibility.
bool HasBrokenCubeImageCompability() const {
return has_broken_cube_compatibility;
@@ -589,6 +585,22 @@ public:
return supports_conditional_barriers;
}
+ [[nodiscard]] static constexpr bool CheckBrokenCompute(VkDriverId driver_id,
+ u32 driver_version) {
+ if (driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) {
+ const u32 major = VK_API_VERSION_MAJOR(driver_version);
+ const u32 minor = VK_API_VERSION_MINOR(driver_version);
+ const u32 patch = VK_API_VERSION_PATCH(driver_version);
+ if (major == 0 && minor == 405 && patch < 286) {
+ LOG_WARNING(
+ Render_Vulkan,
+ "Intel proprietary drivers 0.405.0 until 0.405.286 have broken compute");
+ return true;
+ }
+ }
+ return false;
+ }
+
private:
/// Checks if the physical device is suitable and configures the object state
/// with all necessary info about its properties.
@@ -636,7 +648,6 @@ private:
FOR_EACH_VK_FEATURE_1_3(FEATURE);
FOR_EACH_VK_FEATURE_EXT(FEATURE);
FOR_EACH_VK_EXTENSION(EXTENSION);
- FOR_EACH_VK_EXTENSION_WIN32(EXTENSION);
#undef EXTENSION
#undef FEATURE
@@ -683,6 +694,7 @@ private:
bool is_integrated{}; ///< Is GPU an iGPU.
bool is_virtual{}; ///< Is GPU a virtual GPU.
bool is_non_gpu{}; ///< Is SoftwareRasterizer, FPGA, non-GPU device.
+ bool has_broken_compute{}; ///< Compute shaders can cause crashes
bool has_broken_cube_compatibility{}; ///< Has broken cube compatibility bit
bool has_renderdoc{}; ///< Has RenderDoc attached
bool has_nsight_graphics{}; ///< Has Nsight Graphics attached