summaryrefslogtreecommitdiffstats
path: root/src/video_core/renderer_vulkan
diff options
context:
space:
mode:
authorReinUsesLisp <reinuseslisp@airmail.cc>2021-01-17 00:48:58 +0100
committerReinUsesLisp <reinuseslisp@airmail.cc>2021-02-13 06:17:22 +0100
commit82c2601555b59a94d7160f2fd686cb63d32dd423 (patch)
treecd0ecd865945452fa589b572de614fc487f2f96a /src/video_core/renderer_vulkan
parentvulkan_common: Expose interop and headless devices (diff)
downloadyuzu-82c2601555b59a94d7160f2fd686cb63d32dd423.tar
yuzu-82c2601555b59a94d7160f2fd686cb63d32dd423.tar.gz
yuzu-82c2601555b59a94d7160f2fd686cb63d32dd423.tar.bz2
yuzu-82c2601555b59a94d7160f2fd686cb63d32dd423.tar.lz
yuzu-82c2601555b59a94d7160f2fd686cb63d32dd423.tar.xz
yuzu-82c2601555b59a94d7160f2fd686cb63d32dd423.tar.zst
yuzu-82c2601555b59a94d7160f2fd686cb63d32dd423.zip
Diffstat (limited to 'src/video_core/renderer_vulkan')
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp6
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.h2
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.cpp2
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.h3
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.cpp9
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp366
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h107
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.cpp97
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.h24
-rw-r--r--src/video_core/renderer_vulkan/vk_fence_manager.cpp4
-rw-r--r--src/video_core/renderer_vulkan/vk_fence_manager.h11
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp664
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h64
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.cpp14
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.h26
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp3
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.h20
-rw-r--r--src/video_core/renderer_vulkan/vk_state_tracker.cpp9
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp131
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h26
20 files changed, 585 insertions, 1003 deletions
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index 85121d9fd..19aaf034f 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -531,13 +531,9 @@ VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison) {
return {};
}
-VkIndexType IndexFormat(const Device& device, Maxwell::IndexFormat index_format) {
+VkIndexType IndexFormat(Maxwell::IndexFormat index_format) {
switch (index_format) {
case Maxwell::IndexFormat::UnsignedByte:
- if (!device.IsExtIndexTypeUint8Supported()) {
- UNIMPLEMENTED_MSG("Native uint8 indices are not supported on this device");
- return VK_INDEX_TYPE_UINT16;
- }
return VK_INDEX_TYPE_UINT8_EXT;
case Maxwell::IndexFormat::UnsignedShort:
return VK_INDEX_TYPE_UINT16;
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h
index 7c34b47dc..e3e06ba38 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.h
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h
@@ -53,7 +53,7 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison);
-VkIndexType IndexFormat(const Device& device, Maxwell::IndexFormat index_format);
+VkIndexType IndexFormat(Maxwell::IndexFormat index_format);
VkStencilOp StencilOp(Maxwell::StencilOp stencil_op);
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index 6909576cb..1cc720ddd 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -107,7 +107,7 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr),
surface(CreateSurface(instance, render_window)),
device(CreateDevice(instance, dld, *surface)),
- memory_allocator(device),
+ memory_allocator(device, false),
state_tracker(gpu),
scheduler(device, state_tracker),
swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width,
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h
index 1efaf3b77..72071316c 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.h
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.h
@@ -58,12 +58,11 @@ private:
vk::InstanceDispatch dld;
vk::Instance instance;
-
+ vk::DebugUtilsMessenger debug_callback;
vk::SurfaceKHR surface;
VKScreenInfo screen_info;
- vk::DebugUtilsMessenger debug_callback;
Device device;
MemoryAllocator memory_allocator;
StateTracker state_tracker;
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
index df8992528..a1a32aabe 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -148,8 +148,8 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
SetUniformData(data, framebuffer);
SetVertexData(data, framebuffer);
- const std::span<u8> map = buffer_commit.Map();
- std::memcpy(map.data(), &data, sizeof(data));
+ const std::span<u8> mapped_span = buffer_commit.Map();
+ std::memcpy(mapped_span.data(), &data, sizeof(data));
if (!use_accelerated) {
const u64 image_offset = GetRawImageOffset(framebuffer, image_index);
@@ -162,8 +162,8 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
constexpr u32 block_height_log2 = 4;
const u32 bytes_per_pixel = GetBytesPerPixel(framebuffer);
Tegra::Texture::UnswizzleTexture(
- map.subspan(image_offset, size_bytes), std::span(host_ptr, size_bytes), bytes_per_pixel,
- framebuffer.width, framebuffer.height, 1, block_height_log2, 0);
+ mapped_span.subspan(image_offset, size_bytes), std::span(host_ptr, size_bytes),
+ bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0);
const VkBufferImageCopy copy{
.bufferOffset = image_offset,
@@ -263,7 +263,6 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
cmdbuf.Draw(4, 1, 0, 0);
cmdbuf.EndRenderPass();
});
-
return *semaphores[image_index];
}
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index d8ad40a0f..48fc5d966 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -3,188 +3,276 @@
// Refer to the license.txt file included.
#include <algorithm>
+#include <array>
#include <cstring>
-#include <memory>
+#include <span>
+#include <vector>
-#include "core/core.h"
#include "video_core/buffer_cache/buffer_cache.h"
+#include "video_core/renderer_vulkan/maxwell_to_vk.h"
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
-#include "video_core/renderer_vulkan/vk_stream_buffer.h"
+#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
+#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
-
namespace {
+VkBufferCopy MakeBufferCopy(const VideoCommon::BufferCopy& copy) {
+ return VkBufferCopy{
+ .srcOffset = copy.src_offset,
+ .dstOffset = copy.dst_offset,
+ .size = copy.size,
+ };
+}
-constexpr VkBufferUsageFlags BUFFER_USAGE =
- VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
- VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
-
-constexpr VkPipelineStageFlags UPLOAD_PIPELINE_STAGE =
- VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT |
- VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
- VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
-
-constexpr VkAccessFlags UPLOAD_ACCESS_BARRIERS =
- VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_UNIFORM_READ_BIT |
- VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDEX_READ_BIT;
+VkIndexType IndexTypeFromNumElements(const Device& device, u32 num_elements) {
+ if (num_elements <= 0xff && device.IsExtIndexTypeUint8Supported()) {
+ return VK_INDEX_TYPE_UINT8_EXT;
+ }
+ if (num_elements <= 0xffff) {
+ return VK_INDEX_TYPE_UINT16;
+ }
+ return VK_INDEX_TYPE_UINT32;
+}
-constexpr VkAccessFlags TRANSFORM_FEEDBACK_WRITE_ACCESS =
- VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT | VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT;
+size_t BytesPerIndex(VkIndexType index_type) {
+ switch (index_type) {
+ case VK_INDEX_TYPE_UINT8_EXT:
+ return 1;
+ case VK_INDEX_TYPE_UINT16:
+ return 2;
+ case VK_INDEX_TYPE_UINT32:
+ return 4;
+ default:
+ UNREACHABLE_MSG("Invalid index type={}", index_type);
+ return 1;
+ }
+}
+template <typename T>
+std::array<T, 6> MakeQuadIndices(u32 quad, u32 first) {
+ std::array<T, 6> indices{0, 1, 2, 0, 2, 3};
+ std::ranges::transform(indices, indices.begin(),
+ [quad, first](u32 index) { return first + index + quad * 4; });
+ return indices;
+}
} // Anonymous namespace
-Buffer::Buffer(const Device& device_, MemoryAllocator& memory_allocator, VKScheduler& scheduler_,
- StagingBufferPool& staging_pool_, VAddr cpu_addr_, std::size_t size_)
- : BufferBlock{cpu_addr_, size_}, device{device_}, scheduler{scheduler_}, staging_pool{
- staging_pool_} {
- buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
+Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
+ : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {}
+
+Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
+ VAddr cpu_addr_, u64 size_bytes_)
+ : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_) {
+ buffer = runtime.device.GetLogical().CreateBuffer(VkBufferCreateInfo{
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
- .size = static_cast<VkDeviceSize>(size_),
- .usage = BUFFER_USAGE | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
+ .size = SizeBytes(),
+ .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
+ VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
+ VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
+ VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
+ VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
});
- commit = memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal);
+ if (runtime.device.HasDebuggingToolAttached()) {
+ buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str());
+ }
+ commit = runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal);
}
-Buffer::~Buffer() = default;
+BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_allocator_,
+ VKScheduler& scheduler_, StagingBufferPool& staging_pool_,
+ VKUpdateDescriptorQueue& update_descriptor_queue_,
+ VKDescriptorPool& descriptor_pool)
+ : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
+ staging_pool{staging_pool_}, update_descriptor_queue{update_descriptor_queue_},
+ uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
+ quad_index_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue) {}
-void Buffer::Upload(std::size_t offset, std::size_t data_size, const u8* data) {
- const auto& staging = staging_pool.Request(data_size, MemoryUsage::Upload);
- std::memcpy(staging.mapped_span.data(), data, data_size);
+StagingBufferRef BufferCacheRuntime::UploadStagingBuffer(size_t size) {
+ return staging_pool.Request(size, MemoryUsage::Upload);
+}
- scheduler.RequestOutsideRenderPassOperationContext();
+StagingBufferRef BufferCacheRuntime::DownloadStagingBuffer(size_t size) {
+ return staging_pool.Request(size, MemoryUsage::Download);
+}
- const VkBuffer handle = Handle();
- scheduler.Record([staging = staging.buffer, handle, offset, data_size,
- &device = device](vk::CommandBuffer cmdbuf) {
- const VkBufferMemoryBarrier read_barrier{
- .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
- .pNext = nullptr,
- .srcAccessMask =
- VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_TRANSFER_WRITE_BIT |
- VK_ACCESS_HOST_WRITE_BIT |
- (device.IsExtTransformFeedbackSupported() ? TRANSFORM_FEEDBACK_WRITE_ACCESS : 0),
- .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .buffer = handle,
- .offset = offset,
- .size = data_size,
- };
- const VkBufferMemoryBarrier write_barrier{
- .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
- .pNext = nullptr,
- .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
- .dstAccessMask = UPLOAD_ACCESS_BARRIERS,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .buffer = handle,
- .offset = offset,
- .size = data_size,
- };
+void BufferCacheRuntime::Finish() {
+ scheduler.Finish();
+}
+
+void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
+ std::span<const VideoCommon::BufferCopy> copies) {
+ static constexpr VkMemoryBarrier READ_BARRIER{
+ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
+ .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
+ };
+ static constexpr VkMemoryBarrier WRITE_BARRIER{
+ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+ .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
+ };
+ // Measuring a popular game, this number never exceeds the specified size once data is warmed up
+ boost::container::small_vector<VkBufferCopy, 3> vk_copies(copies.size());
+ std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy);
+ scheduler.RequestOutsideRenderPassOperationContext();
+ scheduler.Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) {
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
- 0, read_barrier);
- cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, data_size});
- cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0,
- write_barrier);
+ 0, READ_BARRIER);
+ cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies);
+ cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+ 0, WRITE_BARRIER);
});
}
-void Buffer::Download(std::size_t offset, std::size_t data_size, u8* data) {
- auto staging = staging_pool.Request(data_size, MemoryUsage::Download);
- scheduler.RequestOutsideRenderPassOperationContext();
+void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format,
+ u32 base_vertex, u32 num_indices, VkBuffer buffer,
+ u32 offset, [[maybe_unused]] u32 size) {
+ VkIndexType index_type = MaxwellToVK::IndexFormat(index_format);
+ if (topology == PrimitiveTopology::Quads) {
+ index_type = VK_INDEX_TYPE_UINT32;
+ std::tie(buffer, offset) =
+ quad_index_pass.Assemble(index_format, num_indices, base_vertex, buffer, offset);
+ } else if (index_type == VK_INDEX_TYPE_UINT8_EXT && !device.IsExtIndexTypeUint8Supported()) {
+ index_type = VK_INDEX_TYPE_UINT16;
+ std::tie(buffer, offset) = uint8_pass.Assemble(num_indices, buffer, offset);
+ }
+ scheduler.Record([buffer, offset, index_type](vk::CommandBuffer cmdbuf) {
+ cmdbuf.BindIndexBuffer(buffer, offset, index_type);
+ });
+}
- const VkBuffer handle = Handle();
- scheduler.Record(
- [staging = staging.buffer, handle, offset, data_size](vk::CommandBuffer cmdbuf) {
- const VkBufferMemoryBarrier barrier{
- .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
- .pNext = nullptr,
- .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
- .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .buffer = handle,
- .offset = offset,
- .size = data_size,
- };
-
- cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
- VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
- VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
- VK_PIPELINE_STAGE_TRANSFER_BIT, 0, {}, barrier, {});
- cmdbuf.CopyBuffer(handle, staging, VkBufferCopy{offset, 0, data_size});
- });
- scheduler.Finish();
+void BufferCacheRuntime::BindQuadArrayIndexBuffer(u32 first, u32 count) {
+ ReserveQuadArrayLUT(first + count, true);
- std::memcpy(data, staging.mapped_span.data(), data_size);
+ // The LUT has the indices 0, 1, 2, and 3 copied as an array
+ // To apply these 'first' offsets we can apply an offset based on the modulus.
+ const VkIndexType index_type = quad_array_lut_index_type;
+ const size_t sub_first_offset = static_cast<size_t>(first % 4) * (current_num_indices / 4);
+ const size_t offset = (sub_first_offset + first / 4) * 6ULL * BytesPerIndex(index_type);
+ scheduler.Record([buffer = *quad_array_lut, index_type, offset](vk::CommandBuffer cmdbuf) {
+ cmdbuf.BindIndexBuffer(buffer, offset, index_type);
+ });
}
-void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
- std::size_t copy_size) {
- scheduler.RequestOutsideRenderPassOperationContext();
+void BufferCacheRuntime::BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size,
+ u32 stride) {
+ if (device.IsExtExtendedDynamicStateSupported()) {
+ scheduler.Record([index, buffer, offset, size, stride](vk::CommandBuffer cmdbuf) {
+ const VkDeviceSize vk_offset = offset;
+ const VkDeviceSize vk_size = buffer != VK_NULL_HANDLE ? size : VK_WHOLE_SIZE;
+ const VkDeviceSize vk_stride = stride;
+ cmdbuf.BindVertexBuffers2EXT(index, 1, &buffer, &vk_offset, &vk_size, &vk_stride);
+ });
+ } else {
+ scheduler.Record([index, buffer, offset](vk::CommandBuffer cmdbuf) {
+ cmdbuf.BindVertexBuffer(index, buffer, offset);
+ });
+ }
+}
- const VkBuffer dst_buffer = Handle();
- scheduler.Record([src_buffer = src.Handle(), dst_buffer, src_offset, dst_offset,
- copy_size](vk::CommandBuffer cmdbuf) {
- cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, copy_size});
-
- std::array<VkBufferMemoryBarrier, 2> barriers;
- barriers[0].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
- barriers[0].pNext = nullptr;
- barriers[0].srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
- barriers[0].dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
- barriers[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
- barriers[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
- barriers[0].buffer = src_buffer;
- barriers[0].offset = src_offset;
- barriers[0].size = copy_size;
- barriers[1].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
- barriers[1].pNext = nullptr;
- barriers[1].srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
- barriers[1].dstAccessMask = UPLOAD_ACCESS_BARRIERS;
- barriers[1].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
- barriers[1].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
- barriers[1].buffer = dst_buffer;
- barriers[1].offset = dst_offset;
- barriers[1].size = copy_size;
- cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {},
- barriers, {});
+void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, u32 offset,
+ u32 size) {
+ if (!device.IsExtTransformFeedbackSupported()) {
+ // Already logged in the rasterizer
+ return;
+ }
+ scheduler.Record([index, buffer, offset, size](vk::CommandBuffer cmdbuf) {
+ const VkDeviceSize vk_offset = offset;
+ const VkDeviceSize vk_size = size;
+ cmdbuf.BindTransformFeedbackBuffersEXT(index, 1, &buffer, &vk_offset, &vk_size);
});
}
-VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer_,
- Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
- const Device& device_, MemoryAllocator& memory_allocator_,
- VKScheduler& scheduler_, VKStreamBuffer& stream_buffer_,
- StagingBufferPool& staging_pool_)
- : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer_, gpu_memory_,
- cpu_memory_, stream_buffer_},
- device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
- staging_pool{staging_pool_} {}
-
-VKBufferCache::~VKBufferCache() = default;
-
-std::shared_ptr<Buffer> VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
- return std::make_shared<Buffer>(device, memory_allocator, scheduler, staging_pool, cpu_addr,
- size);
+void BufferCacheRuntime::BindBuffer(VkBuffer buffer, u32 offset, u32 size) {
+ update_descriptor_queue.AddBuffer(buffer, offset, size);
}
-VKBufferCache::BufferInfo VKBufferCache::GetEmptyBuffer(std::size_t size) {
- size = std::max(size, std::size_t(4));
- const auto& empty = staging_pool.Request(size, MemoryUsage::DeviceLocal);
+void BufferCacheRuntime::ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle) {
+ if (num_indices <= current_num_indices) {
+ return;
+ }
+ if (wait_for_idle) {
+ scheduler.Finish();
+ }
+ current_num_indices = num_indices;
+ quad_array_lut_index_type = IndexTypeFromNumElements(device, num_indices);
+
+ const u32 num_quads = num_indices / 4;
+ const u32 num_triangle_indices = num_quads * 6;
+ const u32 num_first_offset_copies = 4;
+ const size_t bytes_per_index = BytesPerIndex(quad_array_lut_index_type);
+ const size_t size_bytes = num_triangle_indices * bytes_per_index * num_first_offset_copies;
+ quad_array_lut = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
+ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .size = size_bytes,
+ .usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ .queueFamilyIndexCount = 0,
+ .pQueueFamilyIndices = nullptr,
+ });
+ if (device.HasDebuggingToolAttached()) {
+ quad_array_lut.SetObjectNameEXT("Quad LUT");
+ }
+ quad_array_lut_commit = memory_allocator.Commit(quad_array_lut, MemoryUsage::DeviceLocal);
+
+ const StagingBufferRef staging = staging_pool.Request(size_bytes, MemoryUsage::Upload);
+ u8* staging_data = staging.mapped_span.data();
+ const size_t quad_size = bytes_per_index * 6;
+ for (u32 first = 0; first < num_first_offset_copies; ++first) {
+ for (u32 quad = 0; quad < num_quads; ++quad) {
+ switch (quad_array_lut_index_type) {
+ case VK_INDEX_TYPE_UINT8_EXT:
+ std::memcpy(staging_data, MakeQuadIndices<u8>(quad, first).data(), quad_size);
+ break;
+ case VK_INDEX_TYPE_UINT16:
+ std::memcpy(staging_data, MakeQuadIndices<u16>(quad, first).data(), quad_size);
+ break;
+ case VK_INDEX_TYPE_UINT32:
+ std::memcpy(staging_data, MakeQuadIndices<u32>(quad, first).data(), quad_size);
+ break;
+ default:
+ UNREACHABLE();
+ break;
+ }
+ staging_data += quad_size;
+ }
+ }
scheduler.RequestOutsideRenderPassOperationContext();
- scheduler.Record([size, buffer = empty.buffer](vk::CommandBuffer cmdbuf) {
- cmdbuf.FillBuffer(buffer, 0, size, 0);
+ scheduler.Record([src_buffer = staging.buffer, dst_buffer = *quad_array_lut,
+ size_bytes](vk::CommandBuffer cmdbuf) {
+ const VkBufferCopy copy{
+ .srcOffset = 0,
+ .dstOffset = 0,
+ .size = size_bytes,
+ };
+ const VkBufferMemoryBarrier write_barrier{
+ .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+ .dstAccessMask = VK_ACCESS_INDEX_READ_BIT,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .buffer = dst_buffer,
+ .offset = 0,
+ .size = size_bytes,
+ };
+ cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy);
+ cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
+ 0, write_barrier);
});
- return {empty.buffer, 0, 0};
}
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index 41d577510..d232e1f2d 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -4,69 +4,112 @@
#pragma once
-#include <memory>
-
-#include "common/common_types.h"
#include "video_core/buffer_cache/buffer_cache.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/renderer_vulkan/vk_compute_pass.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
-#include "video_core/renderer_vulkan/vk_stream_buffer.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
class Device;
+class VKDescriptorPool;
class VKScheduler;
+class VKUpdateDescriptorQueue;
-class Buffer final : public VideoCommon::BufferBlock {
-public:
- explicit Buffer(const Device& device, MemoryAllocator& memory_allocator, VKScheduler& scheduler,
- StagingBufferPool& staging_pool, VAddr cpu_addr_, std::size_t size_);
- ~Buffer();
-
- void Upload(std::size_t offset, std::size_t data_size, const u8* data);
-
- void Download(std::size_t offset, std::size_t data_size, u8* data);
+class BufferCacheRuntime;
- void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
- std::size_t copy_size);
+class Buffer : public VideoCommon::BufferBase<VideoCore::RasterizerInterface> {
+public:
+ explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params);
+ explicit Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
+ VAddr cpu_addr_, u64 size_bytes_);
- VkBuffer Handle() const {
+ [[nodiscard]] VkBuffer Handle() const noexcept {
return *buffer;
}
- u64 Address() const {
- return 0;
+ operator VkBuffer() const noexcept {
+ return *buffer;
}
private:
- const Device& device;
- VKScheduler& scheduler;
- StagingBufferPool& staging_pool;
-
vk::Buffer buffer;
MemoryCommit commit;
};
-class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer> {
+class BufferCacheRuntime {
+ friend Buffer;
+
+ using PrimitiveTopology = Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology;
+ using IndexFormat = Tegra::Engines::Maxwell3D::Regs::IndexFormat;
+
public:
- explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer,
- Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory,
- const Device& device, MemoryAllocator& memory_allocator,
- VKScheduler& scheduler, VKStreamBuffer& stream_buffer,
- StagingBufferPool& staging_pool);
- ~VKBufferCache();
+ explicit BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_manager_,
+ VKScheduler& scheduler_, StagingBufferPool& staging_pool_,
+ VKUpdateDescriptorQueue& update_descriptor_queue_,
+ VKDescriptorPool& descriptor_pool);
+
+ void Finish();
+
+ [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size);
+
+ [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size);
- BufferInfo GetEmptyBuffer(std::size_t size) override;
+ void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer,
+ std::span<const VideoCommon::BufferCopy> copies);
-protected:
- std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override;
+ void BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format, u32 num_indices,
+ u32 base_vertex, VkBuffer buffer, u32 offset, u32 size);
+
+ void BindQuadArrayIndexBuffer(u32 first, u32 count);
+
+ void BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size, u32 stride);
+
+ void BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size);
+
+ void BindUniformBuffer(VkBuffer buffer, u32 offset, u32 size) {
+ BindBuffer(buffer, offset, size);
+ }
+
+ void BindStorageBuffer(VkBuffer buffer, u32 offset, u32 size,
+ [[maybe_unused]] bool is_written) {
+ BindBuffer(buffer, offset, size);
+ }
private:
+ void BindBuffer(VkBuffer buffer, u32 offset, u32 size);
+
+ void ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle);
+
const Device& device;
MemoryAllocator& memory_allocator;
VKScheduler& scheduler;
StagingBufferPool& staging_pool;
+ VKUpdateDescriptorQueue& update_descriptor_queue;
+
+ vk::Buffer quad_array_lut;
+ MemoryCommit quad_array_lut_commit;
+ VkIndexType quad_array_lut_index_type{};
+ u32 current_num_indices = 0;
+
+ Uint8Pass uint8_pass;
+ QuadIndexedPass quad_index_pass;
};
+struct BufferCacheParams {
+ using Runtime = Vulkan::BufferCacheRuntime;
+ using Buffer = Vulkan::Buffer;
+
+ static constexpr bool IS_OPENGL = false;
+ static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = false;
+ static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = false;
+ static constexpr bool NEEDS_BIND_UNIFORM_INDEX = false;
+ static constexpr bool NEEDS_BIND_STORAGE_INDEX = false;
+ static constexpr bool USE_MEMORY_MAPS = true;
+};
+
+using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
+
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
index 5eb6a54be..a4fdcdf81 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
@@ -10,7 +10,6 @@
#include "common/alignment.h"
#include "common/assert.h"
#include "common/common_types.h"
-#include "video_core/host_shaders/vulkan_quad_array_comp_spv.h"
#include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h"
#include "video_core/host_shaders/vulkan_uint8_comp_spv.h"
#include "video_core/renderer_vulkan/vk_compute_pass.h"
@@ -22,30 +21,7 @@
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
-
namespace {
-
-VkDescriptorSetLayoutBinding BuildQuadArrayPassDescriptorSetLayoutBinding() {
- return {
- .binding = 0,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = nullptr,
- };
-}
-
-VkDescriptorUpdateTemplateEntryKHR BuildQuadArrayPassDescriptorUpdateTemplateEntry() {
- return {
- .dstBinding = 0,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .offset = 0,
- .stride = sizeof(DescriptorUpdateEntry),
- };
-}
-
VkPushConstantRange BuildComputePushConstantRange(std::size_t size) {
return {
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
@@ -162,55 +138,6 @@ VkDescriptorSet VKComputePass::CommitDescriptorSet(
return set;
}
-QuadArrayPass::QuadArrayPass(const Device& device_, VKScheduler& scheduler_,
- VKDescriptorPool& descriptor_pool_,
- StagingBufferPool& staging_buffer_pool_,
- VKUpdateDescriptorQueue& update_descriptor_queue_)
- : VKComputePass(device_, descriptor_pool_, BuildQuadArrayPassDescriptorSetLayoutBinding(),
- BuildQuadArrayPassDescriptorUpdateTemplateEntry(),
- BuildComputePushConstantRange(sizeof(u32)), VULKAN_QUAD_ARRAY_COMP_SPV),
- scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
- update_descriptor_queue{update_descriptor_queue_} {}
-
-QuadArrayPass::~QuadArrayPass() = default;
-
-std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 first) {
- const u32 num_triangle_vertices = (num_vertices / 4) * 6;
- const std::size_t staging_size = num_triangle_vertices * sizeof(u32);
- const auto staging_ref = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
-
- update_descriptor_queue.Acquire();
- update_descriptor_queue.AddBuffer(staging_ref.buffer, 0, staging_size);
- const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
-
- scheduler.RequestOutsideRenderPassOperationContext();
-
- ASSERT(num_vertices % 4 == 0);
- const u32 num_quads = num_vertices / 4;
- scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging_ref.buffer,
- num_quads, first, set](vk::CommandBuffer cmdbuf) {
- constexpr u32 dispatch_size = 1024;
- cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
- cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {});
- cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(first), &first);
- cmdbuf.Dispatch(Common::AlignUp(num_quads, dispatch_size) / dispatch_size, 1, 1);
-
- VkBufferMemoryBarrier barrier;
- barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
- barrier.pNext = nullptr;
- barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
- barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
- barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
- barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
- barrier.buffer = buffer;
- barrier.offset = 0;
- barrier.size = static_cast<VkDeviceSize>(num_quads) * 6 * sizeof(u32);
- cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
- VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, {barrier}, {});
- });
- return {staging_ref.buffer, 0};
-}
-
Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_,
VKDescriptorPool& descriptor_pool, StagingBufferPool& staging_buffer_pool_,
VKUpdateDescriptorQueue& update_descriptor_queue_)
@@ -221,18 +148,18 @@ Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_,
Uint8Pass::~Uint8Pass() = default;
-std::pair<VkBuffer, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer,
- u64 src_offset) {
+std::pair<VkBuffer, u32> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer,
+ u32 src_offset) {
const u32 staging_size = static_cast<u32>(num_vertices * sizeof(u16));
- const auto staging_ref = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
+ const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
update_descriptor_queue.Acquire();
update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices);
- update_descriptor_queue.AddBuffer(staging_ref.buffer, 0, staging_size);
+ update_descriptor_queue.AddBuffer(staging.buffer, 0, staging_size);
const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
scheduler.RequestOutsideRenderPassOperationContext();
- scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging_ref.buffer, set,
+ scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set,
num_vertices](vk::CommandBuffer cmdbuf) {
constexpr u32 dispatch_size = 1024;
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
@@ -252,7 +179,7 @@ std::pair<VkBuffer, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buff
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {});
});
- return {staging_ref.buffer, 0};
+ return {staging.buffer, 0};
}
QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_,
@@ -267,9 +194,9 @@ QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_,
QuadIndexedPass::~QuadIndexedPass() = default;
-std::pair<VkBuffer, u64> QuadIndexedPass::Assemble(
+std::pair<VkBuffer, u32> QuadIndexedPass::Assemble(
Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, u32 base_vertex,
- VkBuffer src_buffer, u64 src_offset) {
+ VkBuffer src_buffer, u32 src_offset) {
const u32 index_shift = [index_format] {
switch (index_format) {
case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedByte:
@@ -286,15 +213,15 @@ std::pair<VkBuffer, u64> QuadIndexedPass::Assemble(
const u32 num_tri_vertices = (num_vertices / 4) * 6;
const std::size_t staging_size = num_tri_vertices * sizeof(u32);
- const auto staging_ref = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
+ const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
update_descriptor_queue.Acquire();
update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size);
- update_descriptor_queue.AddBuffer(staging_ref.buffer, 0, staging_size);
+ update_descriptor_queue.AddBuffer(staging.buffer, 0, staging_size);
const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
scheduler.RequestOutsideRenderPassOperationContext();
- scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging_ref.buffer, set,
+ scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set,
num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) {
static constexpr u32 dispatch_size = 1024;
const std::array push_constants = {base_vertex, index_shift};
@@ -317,7 +244,7 @@ std::pair<VkBuffer, u64> QuadIndexedPass::Assemble(
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {});
});
- return {staging_ref.buffer, 0};
+ return {staging.buffer, 0};
}
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h
index f5c6f5f17..4904019f5 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.h
@@ -41,22 +41,6 @@ private:
vk::ShaderModule module;
};
-class QuadArrayPass final : public VKComputePass {
-public:
- explicit QuadArrayPass(const Device& device_, VKScheduler& scheduler_,
- VKDescriptorPool& descriptor_pool_,
- StagingBufferPool& staging_buffer_pool_,
- VKUpdateDescriptorQueue& update_descriptor_queue_);
- ~QuadArrayPass();
-
- std::pair<VkBuffer, VkDeviceSize> Assemble(u32 num_vertices, u32 first);
-
-private:
- VKScheduler& scheduler;
- StagingBufferPool& staging_buffer_pool;
- VKUpdateDescriptorQueue& update_descriptor_queue;
-};
-
class Uint8Pass final : public VKComputePass {
public:
explicit Uint8Pass(const Device& device_, VKScheduler& scheduler_,
@@ -64,7 +48,9 @@ public:
VKUpdateDescriptorQueue& update_descriptor_queue_);
~Uint8Pass();
- std::pair<VkBuffer, u64> Assemble(u32 num_vertices, VkBuffer src_buffer, u64 src_offset);
+ /// Assemble uint8 indices into an uint16 index buffer
+ /// Returns a pair with the staging buffer, and the offset where the assembled data is
+ std::pair<VkBuffer, u32> Assemble(u32 num_vertices, VkBuffer src_buffer, u32 src_offset);
private:
VKScheduler& scheduler;
@@ -80,9 +66,9 @@ public:
VKUpdateDescriptorQueue& update_descriptor_queue_);
~QuadIndexedPass();
- std::pair<VkBuffer, u64> Assemble(Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format,
+ std::pair<VkBuffer, u32> Assemble(Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format,
u32 num_vertices, u32 base_vertex, VkBuffer src_buffer,
- u64 src_offset);
+ u32 src_offset);
private:
VKScheduler& scheduler;
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
index 6cd00884d..3bec48d14 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
@@ -45,8 +45,8 @@ void InnerFence::Wait() {
}
VKFenceManager::VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
- Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_,
- VKBufferCache& buffer_cache_, VKQueryCache& query_cache_,
+ TextureCache& texture_cache_, BufferCache& buffer_cache_,
+ VKQueryCache& query_cache_, const Device& device_,
VKScheduler& scheduler_)
: GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_},
scheduler{scheduler_} {}
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h
index 9c5e5aa8f..2f8322d29 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.h
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.h
@@ -22,7 +22,6 @@ class RasterizerInterface;
namespace Vulkan {
class Device;
-class VKBufferCache;
class VKQueryCache;
class VKScheduler;
@@ -45,14 +44,14 @@ private:
using Fence = std::shared_ptr<InnerFence>;
using GenericFenceManager =
- VideoCommon::FenceManager<Fence, TextureCache, VKBufferCache, VKQueryCache>;
+ VideoCommon::FenceManager<Fence, TextureCache, BufferCache, VKQueryCache>;
class VKFenceManager final : public GenericFenceManager {
public:
- explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
- Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_,
- VKBufferCache& buffer_cache_, VKQueryCache& query_cache_,
- VKScheduler& scheduler_);
+ explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu,
+ TextureCache& texture_cache, BufferCache& buffer_cache,
+ VKQueryCache& query_cache, const Device& device,
+ VKScheduler& scheduler);
protected:
Fence CreateFence(u32 value, bool is_stubbed) override;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index f0a111829..684d4e3a6 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -8,8 +8,6 @@
#include <mutex>
#include <vector>
-#include <boost/container/static_vector.hpp>
-
#include "common/alignment.h"
#include "common/assert.h"
#include "common/logging/log.h"
@@ -24,7 +22,6 @@
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
#include "video_core/renderer_vulkan/renderer_vulkan.h"
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
-#include "video_core/renderer_vulkan/vk_compute_pass.h"
#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
@@ -50,15 +47,16 @@ MICROPROFILE_DEFINE(Vulkan_WaitForWorker, "Vulkan", "Wait for worker", MP_RGB(25
MICROPROFILE_DEFINE(Vulkan_Drawing, "Vulkan", "Record drawing", MP_RGB(192, 128, 128));
MICROPROFILE_DEFINE(Vulkan_Compute, "Vulkan", "Record compute", MP_RGB(192, 128, 128));
MICROPROFILE_DEFINE(Vulkan_Clearing, "Vulkan", "Record clearing", MP_RGB(192, 128, 128));
-MICROPROFILE_DEFINE(Vulkan_Geometry, "Vulkan", "Setup geometry", MP_RGB(192, 128, 128));
-MICROPROFILE_DEFINE(Vulkan_ConstBuffers, "Vulkan", "Setup constant buffers", MP_RGB(192, 128, 128));
-MICROPROFILE_DEFINE(Vulkan_GlobalBuffers, "Vulkan", "Setup global buffers", MP_RGB(192, 128, 128));
-MICROPROFILE_DEFINE(Vulkan_RenderTargets, "Vulkan", "Setup render targets", MP_RGB(192, 128, 128));
-MICROPROFILE_DEFINE(Vulkan_Textures, "Vulkan", "Setup textures", MP_RGB(192, 128, 128));
-MICROPROFILE_DEFINE(Vulkan_Images, "Vulkan", "Setup images", MP_RGB(192, 128, 128));
MICROPROFILE_DEFINE(Vulkan_PipelineCache, "Vulkan", "Pipeline cache", MP_RGB(192, 128, 128));
namespace {
+struct DrawParams {
+ u32 base_instance;
+ u32 num_instances;
+ u32 base_vertex;
+ u32 num_vertices;
+ bool is_indexed;
+};
constexpr auto COMPUTE_SHADER_INDEX = static_cast<size_t>(Tegra::Engines::ShaderType::Compute);
@@ -67,7 +65,6 @@ VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t in
const float width = src.scale_x * 2.0f;
const float height = src.scale_y * 2.0f;
const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1.0f : 0.0f;
-
VkViewport viewport{
.x = src.translate_x - src.scale_x,
.y = src.translate_y - src.scale_y,
@@ -76,12 +73,10 @@ VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t in
.minDepth = src.translate_z - src.scale_z * reduce_z,
.maxDepth = src.translate_z + src.scale_z,
};
-
if (!device.IsExtDepthRangeUnrestrictedSupported()) {
viewport.minDepth = std::clamp(viewport.minDepth, 0.0f, 1.0f);
viewport.maxDepth = std::clamp(viewport.maxDepth, 0.0f, 1.0f);
}
-
return viewport;
}
@@ -146,13 +141,6 @@ TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const
return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index);
}
-template <size_t N>
-std::array<VkDeviceSize, N> ExpandStrides(const std::array<u16, N>& strides) {
- std::array<VkDeviceSize, N> expanded;
- std::copy(strides.begin(), strides.end(), expanded.begin());
- return expanded;
-}
-
ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) {
if (entry.is_buffer) {
return ImageViewType::e2D;
@@ -221,190 +209,25 @@ void PushImageDescriptors(const ShaderEntries& entries, TextureCache& texture_ca
}
}
-} // Anonymous namespace
-
-class BufferBindings final {
-public:
- void AddVertexBinding(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size, u32 stride) {
- vertex.buffers[vertex.num_buffers] = buffer;
- vertex.offsets[vertex.num_buffers] = offset;
- vertex.sizes[vertex.num_buffers] = size;
- vertex.strides[vertex.num_buffers] = static_cast<u16>(stride);
- ++vertex.num_buffers;
- }
-
- void SetIndexBinding(VkBuffer buffer, VkDeviceSize offset, VkIndexType type) {
- index.buffer = buffer;
- index.offset = offset;
- index.type = type;
- }
-
- void Bind(const Device& device, VKScheduler& scheduler) const {
- // Use this large switch case to avoid dispatching more memory in the record lambda than
- // what we need. It looks horrible, but it's the best we can do on standard C++.
- switch (vertex.num_buffers) {
- case 0:
- return BindStatic<0>(device, scheduler);
- case 1:
- return BindStatic<1>(device, scheduler);
- case 2:
- return BindStatic<2>(device, scheduler);
- case 3:
- return BindStatic<3>(device, scheduler);
- case 4:
- return BindStatic<4>(device, scheduler);
- case 5:
- return BindStatic<5>(device, scheduler);
- case 6:
- return BindStatic<6>(device, scheduler);
- case 7:
- return BindStatic<7>(device, scheduler);
- case 8:
- return BindStatic<8>(device, scheduler);
- case 9:
- return BindStatic<9>(device, scheduler);
- case 10:
- return BindStatic<10>(device, scheduler);
- case 11:
- return BindStatic<11>(device, scheduler);
- case 12:
- return BindStatic<12>(device, scheduler);
- case 13:
- return BindStatic<13>(device, scheduler);
- case 14:
- return BindStatic<14>(device, scheduler);
- case 15:
- return BindStatic<15>(device, scheduler);
- case 16:
- return BindStatic<16>(device, scheduler);
- case 17:
- return BindStatic<17>(device, scheduler);
- case 18:
- return BindStatic<18>(device, scheduler);
- case 19:
- return BindStatic<19>(device, scheduler);
- case 20:
- return BindStatic<20>(device, scheduler);
- case 21:
- return BindStatic<21>(device, scheduler);
- case 22:
- return BindStatic<22>(device, scheduler);
- case 23:
- return BindStatic<23>(device, scheduler);
- case 24:
- return BindStatic<24>(device, scheduler);
- case 25:
- return BindStatic<25>(device, scheduler);
- case 26:
- return BindStatic<26>(device, scheduler);
- case 27:
- return BindStatic<27>(device, scheduler);
- case 28:
- return BindStatic<28>(device, scheduler);
- case 29:
- return BindStatic<29>(device, scheduler);
- case 30:
- return BindStatic<30>(device, scheduler);
- case 31:
- return BindStatic<31>(device, scheduler);
- case 32:
- return BindStatic<32>(device, scheduler);
- }
- UNREACHABLE();
- }
-
-private:
- // Some of these fields are intentionally left uninitialized to avoid initializing them twice.
- struct {
- size_t num_buffers = 0;
- std::array<VkBuffer, Maxwell::NumVertexArrays> buffers;
- std::array<VkDeviceSize, Maxwell::NumVertexArrays> offsets;
- std::array<VkDeviceSize, Maxwell::NumVertexArrays> sizes;
- std::array<u16, Maxwell::NumVertexArrays> strides;
- } vertex;
-
- struct {
- VkBuffer buffer = nullptr;
- VkDeviceSize offset;
- VkIndexType type;
- } index;
-
- template <size_t N>
- void BindStatic(const Device& device, VKScheduler& scheduler) const {
- if (device.IsExtExtendedDynamicStateSupported()) {
- if (index.buffer) {
- BindStatic<N, true, true>(scheduler);
- } else {
- BindStatic<N, false, true>(scheduler);
- }
- } else {
- if (index.buffer) {
- BindStatic<N, true, false>(scheduler);
- } else {
- BindStatic<N, false, false>(scheduler);
- }
- }
- }
-
- template <size_t N, bool is_indexed, bool has_extended_dynamic_state>
- void BindStatic(VKScheduler& scheduler) const {
- static_assert(N <= Maxwell::NumVertexArrays);
- if constexpr (N == 0) {
- return;
- }
-
- std::array<VkBuffer, N> buffers;
- std::array<VkDeviceSize, N> offsets;
- std::copy(vertex.buffers.begin(), vertex.buffers.begin() + N, buffers.begin());
- std::copy(vertex.offsets.begin(), vertex.offsets.begin() + N, offsets.begin());
-
- if constexpr (has_extended_dynamic_state) {
- // With extended dynamic states we can specify the length and stride of a vertex buffer
- std::array<VkDeviceSize, N> sizes;
- std::array<u16, N> strides;
- std::copy(vertex.sizes.begin(), vertex.sizes.begin() + N, sizes.begin());
- std::copy(vertex.strides.begin(), vertex.strides.begin() + N, strides.begin());
-
- if constexpr (is_indexed) {
- scheduler.Record(
- [buffers, offsets, sizes, strides, index = index](vk::CommandBuffer cmdbuf) {
- cmdbuf.BindIndexBuffer(index.buffer, index.offset, index.type);
- cmdbuf.BindVertexBuffers2EXT(0, static_cast<u32>(N), buffers.data(),
- offsets.data(), sizes.data(),
- ExpandStrides(strides).data());
- });
- } else {
- scheduler.Record([buffers, offsets, sizes, strides](vk::CommandBuffer cmdbuf) {
- cmdbuf.BindVertexBuffers2EXT(0, static_cast<u32>(N), buffers.data(),
- offsets.data(), sizes.data(),
- ExpandStrides(strides).data());
- });
- }
- return;
- }
-
- if constexpr (is_indexed) {
- // Indexed draw
- scheduler.Record([buffers, offsets, index = index](vk::CommandBuffer cmdbuf) {
- cmdbuf.BindIndexBuffer(index.buffer, index.offset, index.type);
- cmdbuf.BindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data());
- });
- } else {
- // Array draw
- scheduler.Record([buffers, offsets](vk::CommandBuffer cmdbuf) {
- cmdbuf.BindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data());
- });
- }
- }
-};
-
-void RasterizerVulkan::DrawParameters::Draw(vk::CommandBuffer cmdbuf) const {
- if (is_indexed) {
- cmdbuf.DrawIndexed(num_vertices, num_instances, 0, base_vertex, base_instance);
- } else {
- cmdbuf.Draw(num_vertices, num_instances, base_vertex, base_instance);
+DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced,
+ bool is_indexed) {
+ DrawParams params{
+ .base_instance = regs.vb_base_instance,
+ .num_instances = is_instanced ? num_instances : 1,
+ .base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first,
+ .num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count,
+ .is_indexed = is_indexed,
+ };
+ if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) {
+ // 6 triangle vertices per quad, base vertex is part of the index
+ // See BindQuadArrayIndexBuffer for more details
+ params.num_vertices = (params.num_vertices / 4) * 6;
+ params.base_vertex = 0;
+ params.is_indexed = true;
}
+ return params;
}
+} // Anonymous namespace
RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
Tegra::MemoryManager& gpu_memory_,
@@ -414,21 +237,19 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
: RasterizerAccelerated{cpu_memory_}, gpu{gpu_},
gpu_memory{gpu_memory_}, maxwell3d{gpu.Maxwell3D()}, kepler_compute{gpu.KeplerCompute()},
screen_info{screen_info_}, device{device_}, memory_allocator{memory_allocator_},
- state_tracker{state_tracker_}, scheduler{scheduler_}, stream_buffer(device, scheduler),
+ state_tracker{state_tracker_}, scheduler{scheduler_},
staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler),
update_descriptor_queue(device, scheduler),
blit_image(device, scheduler, state_tracker, descriptor_pool),
- quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
- quad_indexed_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
- uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
texture_cache_runtime{device, scheduler, memory_allocator, staging_pool, blit_image},
texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
+ buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool,
+ update_descriptor_queue, descriptor_pool),
+ buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler,
descriptor_pool, update_descriptor_queue),
- buffer_cache(*this, gpu_memory, cpu_memory_, device, memory_allocator, scheduler,
- stream_buffer, staging_pool),
query_cache{*this, maxwell3d, gpu_memory, device, scheduler},
- fence_manager(*this, gpu, gpu_memory, texture_cache, buffer_cache, query_cache, scheduler),
+ fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler),
wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) {
scheduler.SetQueryCache(query_cache);
if (device.UseAsynchronousShaders()) {
@@ -449,22 +270,14 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
GraphicsPipelineCacheKey key;
key.fixed_state.Fill(maxwell3d.regs, device.IsExtExtendedDynamicStateSupported());
- buffer_cache.Map(CalculateGraphicsStreamBufferSize(is_indexed));
-
- BufferBindings buffer_bindings;
- const DrawParameters draw_params =
- SetupGeometry(key.fixed_state, buffer_bindings, is_indexed, is_instanced);
+ std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
- auto lock = texture_cache.AcquireLock();
texture_cache.SynchronizeGraphicsDescriptors();
-
texture_cache.UpdateRenderTargets(false);
const auto shaders = pipeline_cache.GetShaders();
key.shaders = GetShaderAddresses(shaders);
- SetupShaderDescriptors(shaders);
-
- buffer_cache.Unmap();
+ SetupShaderDescriptors(shaders, is_indexed);
const Framebuffer* const framebuffer = texture_cache.GetFramebuffer();
key.renderpass = framebuffer->RenderPass();
@@ -476,22 +289,29 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
return;
}
- buffer_bindings.Bind(device, scheduler);
-
BeginTransformFeedback();
scheduler.RequestRenderpass(framebuffer);
scheduler.BindGraphicsPipeline(pipeline->GetHandle());
UpdateDynamicStates();
- const auto pipeline_layout = pipeline->GetLayout();
- const auto descriptor_set = pipeline->CommitDescriptorSet();
+ const auto& regs = maxwell3d.regs;
+ const u32 num_instances = maxwell3d.mme_draw.instance_count;
+ const DrawParams draw_params = MakeDrawParams(regs, num_instances, is_instanced, is_indexed);
+ const VkPipelineLayout pipeline_layout = pipeline->GetLayout();
+ const VkDescriptorSet descriptor_set = pipeline->CommitDescriptorSet();
scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) {
if (descriptor_set) {
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout,
- DESCRIPTOR_SET, descriptor_set, {});
+ DESCRIPTOR_SET, descriptor_set, nullptr);
+ }
+ if (draw_params.is_indexed) {
+ cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, 0,
+ draw_params.base_vertex, draw_params.base_instance);
+ } else {
+ cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances,
+ draw_params.base_vertex, draw_params.base_instance);
}
- draw_params.Draw(cmdbuf);
});
EndTransformFeedback();
@@ -515,7 +335,7 @@ void RasterizerVulkan::Clear() {
return;
}
- auto lock = texture_cache.AcquireLock();
+ std::scoped_lock lock{texture_cache.mutex};
texture_cache.UpdateRenderTargets(true);
const Framebuffer* const framebuffer = texture_cache.GetFramebuffer();
const VkExtent2D render_area = framebuffer->RenderArea();
@@ -559,7 +379,6 @@ void RasterizerVulkan::Clear() {
if (use_stencil) {
aspect_flags |= VK_IMAGE_ASPECT_STENCIL_BIT;
}
-
scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil,
clear_rect, aspect_flags](vk::CommandBuffer cmdbuf) {
VkClearAttachment attachment;
@@ -580,12 +399,11 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
auto& pipeline = pipeline_cache.GetComputePipeline({
.shader = code_addr,
.shared_memory_size = launch_desc.shared_alloc,
- .workgroup_size =
- {
- launch_desc.block_dim_x,
- launch_desc.block_dim_y,
- launch_desc.block_dim_z,
- },
+ .workgroup_size{
+ launch_desc.block_dim_x,
+ launch_desc.block_dim_y,
+ launch_desc.block_dim_z,
+ },
});
// Compute dispatches can't be executed inside a renderpass
@@ -594,10 +412,21 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
image_view_indices.clear();
sampler_handles.clear();
- auto lock = texture_cache.AcquireLock();
- texture_cache.SynchronizeComputeDescriptors();
+ std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
const auto& entries = pipeline.GetEntries();
+ buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers);
+ buffer_cache.UnbindComputeStorageBuffers();
+ u32 ssbo_index = 0;
+ for (const auto& buffer : entries.global_buffers) {
+ buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset,
+ buffer.is_written);
+ ++ssbo_index;
+ }
+ buffer_cache.UpdateComputeBuffers();
+
+ texture_cache.SynchronizeComputeDescriptors();
+
SetupComputeUniformTexels(entries);
SetupComputeTextures(entries);
SetupComputeStorageTexels(entries);
@@ -606,20 +435,15 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
const std::span indices_span(image_view_indices.data(), image_view_indices.size());
texture_cache.FillComputeImageViews(indices_span, image_view_ids);
- buffer_cache.Map(CalculateComputeStreamBufferSize());
-
update_descriptor_queue.Acquire();
- SetupComputeConstBuffers(entries);
- SetupComputeGlobalBuffers(entries);
+ buffer_cache.BindHostComputeBuffers();
ImageViewId* image_view_id_ptr = image_view_ids.data();
VkSampler* sampler_ptr = sampler_handles.data();
PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr,
sampler_ptr);
- buffer_cache.Unmap();
-
const VkPipeline pipeline_handle = pipeline.GetHandle();
const VkPipelineLayout pipeline_layout = pipeline.GetLayout();
const VkDescriptorSet descriptor_set = pipeline.CommitDescriptorSet();
@@ -644,6 +468,11 @@ void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCore::QueryType type,
query_cache.Query(gpu_addr, type, timestamp);
}
+void RasterizerVulkan::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
+ u32 size) {
+ buffer_cache.BindGraphicsUniformBuffer(stage, index, gpu_addr, size);
+}
+
void RasterizerVulkan::FlushAll() {}
void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) {
@@ -651,19 +480,23 @@ void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) {
return;
}
{
- auto lock = texture_cache.AcquireLock();
+ std::scoped_lock lock{texture_cache.mutex};
texture_cache.DownloadMemory(addr, size);
}
- buffer_cache.FlushRegion(addr, size);
+ {
+ std::scoped_lock lock{buffer_cache.mutex};
+ buffer_cache.DownloadMemory(addr, size);
+ }
query_cache.FlushRegion(addr, size);
}
bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size) {
+ std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex};
if (!Settings::IsGPULevelHigh()) {
- return buffer_cache.MustFlushRegion(addr, size);
+ return buffer_cache.IsRegionGpuModified(addr, size);
}
return texture_cache.IsRegionGpuModified(addr, size) ||
- buffer_cache.MustFlushRegion(addr, size);
+ buffer_cache.IsRegionGpuModified(addr, size);
}
void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) {
@@ -671,11 +504,14 @@ void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) {
return;
}
{
- auto lock = texture_cache.AcquireLock();
+ std::scoped_lock lock{texture_cache.mutex};
texture_cache.WriteMemory(addr, size);
}
+ {
+ std::scoped_lock lock{buffer_cache.mutex};
+ buffer_cache.WriteMemory(addr, size);
+ }
pipeline_cache.InvalidateRegion(addr, size);
- buffer_cache.InvalidateRegion(addr, size);
query_cache.InvalidateRegion(addr, size);
}
@@ -683,25 +519,34 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
if (addr == 0 || size == 0) {
return;
}
+ pipeline_cache.OnCPUWrite(addr, size);
{
- auto lock = texture_cache.AcquireLock();
+ std::scoped_lock lock{texture_cache.mutex};
texture_cache.WriteMemory(addr, size);
}
- pipeline_cache.OnCPUWrite(addr, size);
- buffer_cache.OnCPUWrite(addr, size);
+ {
+ std::scoped_lock lock{buffer_cache.mutex};
+ buffer_cache.CachedWriteMemory(addr, size);
+ }
}
void RasterizerVulkan::SyncGuestHost() {
- buffer_cache.SyncGuestHost();
pipeline_cache.SyncGuestHost();
+ {
+ std::scoped_lock lock{buffer_cache.mutex};
+ buffer_cache.FlushCachedWrites();
+ }
}
void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) {
{
- auto lock = texture_cache.AcquireLock();
+ std::scoped_lock lock{texture_cache.mutex};
texture_cache.UnmapMemory(addr, size);
}
- buffer_cache.OnCPUWrite(addr, size);
+ {
+ std::scoped_lock lock{buffer_cache.mutex};
+ buffer_cache.WriteMemory(addr, size);
+ }
pipeline_cache.OnCPUWrite(addr, size);
}
@@ -774,18 +619,21 @@ void RasterizerVulkan::TickFrame() {
draw_counter = 0;
update_descriptor_queue.TickFrame();
fence_manager.TickFrame();
- buffer_cache.TickFrame();
staging_pool.TickFrame();
{
- auto lock = texture_cache.AcquireLock();
+ std::scoped_lock lock{texture_cache.mutex};
texture_cache.TickFrame();
}
+ {
+ std::scoped_lock lock{buffer_cache.mutex};
+ buffer_cache.TickFrame();
+ }
}
bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Config& copy_config) {
- auto lock = texture_cache.AcquireLock();
+ std::scoped_lock lock{texture_cache.mutex};
texture_cache.BlitImage(dst, src, copy_config);
return true;
}
@@ -795,13 +643,11 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config,
if (!framebuffer_addr) {
return false;
}
-
- auto lock = texture_cache.AcquireLock();
+ std::scoped_lock lock{texture_cache.mutex};
ImageView* const image_view = texture_cache.TryFindFramebufferImageView(framebuffer_addr);
if (!image_view) {
return false;
}
-
screen_info.image_view = image_view->Handle(VideoCommon::ImageViewType::e2D);
screen_info.width = image_view->size.width;
screen_info.height = image_view->size.height;
@@ -830,29 +676,8 @@ void RasterizerVulkan::FlushWork() {
draw_counter = 0;
}
-RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineState& fixed_state,
- BufferBindings& buffer_bindings,
- bool is_indexed,
- bool is_instanced) {
- MICROPROFILE_SCOPE(Vulkan_Geometry);
-
- const auto& regs = maxwell3d.regs;
-
- SetupVertexArrays(buffer_bindings);
-
- const u32 base_instance = regs.vb_base_instance;
- const u32 num_instances = is_instanced ? maxwell3d.mme_draw.instance_count : 1;
- const u32 base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first;
- const u32 num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count;
-
- DrawParameters params{base_instance, num_instances, base_vertex, num_vertices, is_indexed};
- SetupIndexBuffer(buffer_bindings, params, is_indexed);
-
- return params;
-}
-
void RasterizerVulkan::SetupShaderDescriptors(
- const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) {
+ const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders, bool is_indexed) {
image_view_indices.clear();
sampler_handles.clear();
for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
@@ -860,15 +685,27 @@ void RasterizerVulkan::SetupShaderDescriptors(
if (!shader) {
continue;
}
- const auto& entries = shader->GetEntries();
+ const ShaderEntries& entries = shader->GetEntries();
SetupGraphicsUniformTexels(entries, stage);
SetupGraphicsTextures(entries, stage);
SetupGraphicsStorageTexels(entries, stage);
SetupGraphicsImages(entries, stage);
+
+ buffer_cache.SetEnabledUniformBuffers(stage, entries.enabled_uniform_buffers);
+ buffer_cache.UnbindGraphicsStorageBuffers(stage);
+ u32 ssbo_index = 0;
+ for (const auto& buffer : entries.global_buffers) {
+ buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index,
+ buffer.cbuf_offset, buffer.is_written);
+ ++ssbo_index;
+ }
}
const std::span indices_span(image_view_indices.data(), image_view_indices.size());
+ buffer_cache.UpdateGraphicsBuffers(is_indexed);
texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
+ buffer_cache.BindHostGeometryBuffers(is_indexed);
+
update_descriptor_queue.Acquire();
ImageViewId* image_view_id_ptr = image_view_ids.data();
@@ -879,11 +716,9 @@ void RasterizerVulkan::SetupShaderDescriptors(
if (!shader) {
continue;
}
- const auto& entries = shader->GetEntries();
- SetupGraphicsConstBuffers(entries, stage);
- SetupGraphicsGlobalBuffers(entries, stage);
- PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr,
- sampler_ptr);
+ buffer_cache.BindHostStageBuffers(stage);
+ PushImageDescriptors(shader->GetEntries(), texture_cache, update_descriptor_queue,
+ image_view_id_ptr, sampler_ptr);
}
}
@@ -916,27 +751,11 @@ void RasterizerVulkan::BeginTransformFeedback() {
LOG_ERROR(Render_Vulkan, "Transform feedbacks used but not supported");
return;
}
-
UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
-
- UNIMPLEMENTED_IF(regs.tfb_bindings[1].buffer_enable);
- UNIMPLEMENTED_IF(regs.tfb_bindings[2].buffer_enable);
- UNIMPLEMENTED_IF(regs.tfb_bindings[3].buffer_enable);
-
- const auto& binding = regs.tfb_bindings[0];
- UNIMPLEMENTED_IF(binding.buffer_enable == 0);
- UNIMPLEMENTED_IF(binding.buffer_offset != 0);
-
- const GPUVAddr gpu_addr = binding.Address();
- const VkDeviceSize size = static_cast<VkDeviceSize>(binding.buffer_size);
- const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
-
- scheduler.Record([buffer = info.handle, offset = info.offset, size](vk::CommandBuffer cmdbuf) {
- cmdbuf.BindTransformFeedbackBuffersEXT(0, 1, &buffer, &offset, &size);
- cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr);
- });
+ scheduler.Record(
+ [](vk::CommandBuffer cmdbuf) { cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr); });
}
void RasterizerVulkan::EndTransformFeedback() {
@@ -947,104 +766,11 @@ void RasterizerVulkan::EndTransformFeedback() {
if (!device.IsExtTransformFeedbackSupported()) {
return;
}
-
scheduler.Record(
[](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); });
}
-void RasterizerVulkan::SetupVertexArrays(BufferBindings& buffer_bindings) {
- const auto& regs = maxwell3d.regs;
-
- for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
- const auto& vertex_array = regs.vertex_array[index];
- if (!vertex_array.IsEnabled()) {
- continue;
- }
- const GPUVAddr start{vertex_array.StartAddress()};
- const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()};
-
- ASSERT(end >= start);
- const size_t size = end - start;
- if (size == 0) {
- buffer_bindings.AddVertexBinding(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE, 0);
- continue;
- }
- const auto info = buffer_cache.UploadMemory(start, size);
- buffer_bindings.AddVertexBinding(info.handle, info.offset, size, vertex_array.stride);
- }
-}
-
-void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params,
- bool is_indexed) {
- if (params.num_vertices == 0) {
- return;
- }
- const auto& regs = maxwell3d.regs;
- switch (regs.draw.topology) {
- case Maxwell::PrimitiveTopology::Quads: {
- if (!params.is_indexed) {
- const auto [buffer, offset] =
- quad_array_pass.Assemble(params.num_vertices, params.base_vertex);
- buffer_bindings.SetIndexBinding(buffer, offset, VK_INDEX_TYPE_UINT32);
- params.base_vertex = 0;
- params.num_vertices = params.num_vertices * 6 / 4;
- params.is_indexed = true;
- break;
- }
- const GPUVAddr gpu_addr = regs.index_array.IndexStart();
- const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
- VkBuffer buffer = info.handle;
- u64 offset = info.offset;
- std::tie(buffer, offset) = quad_indexed_pass.Assemble(
- regs.index_array.format, params.num_vertices, params.base_vertex, buffer, offset);
-
- buffer_bindings.SetIndexBinding(buffer, offset, VK_INDEX_TYPE_UINT32);
- params.num_vertices = (params.num_vertices / 4) * 6;
- params.base_vertex = 0;
- break;
- }
- default: {
- if (!is_indexed) {
- break;
- }
- const GPUVAddr gpu_addr = regs.index_array.IndexStart();
- const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
- VkBuffer buffer = info.handle;
- u64 offset = info.offset;
-
- auto format = regs.index_array.format;
- const bool is_uint8 = format == Maxwell::IndexFormat::UnsignedByte;
- if (is_uint8 && !device.IsExtIndexTypeUint8Supported()) {
- std::tie(buffer, offset) = uint8_pass.Assemble(params.num_vertices, buffer, offset);
- format = Maxwell::IndexFormat::UnsignedShort;
- }
-
- buffer_bindings.SetIndexBinding(buffer, offset, MaxwellToVK::IndexFormat(device, format));
- break;
- }
- }
-}
-
-void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, size_t stage) {
- MICROPROFILE_SCOPE(Vulkan_ConstBuffers);
- const auto& shader_stage = maxwell3d.state.shader_stages[stage];
- for (const auto& entry : entries.const_buffers) {
- SetupConstBuffer(entry, shader_stage.const_buffers[entry.GetIndex()]);
- }
-}
-
-void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries, size_t stage) {
- MICROPROFILE_SCOPE(Vulkan_GlobalBuffers);
- const auto& cbufs{maxwell3d.state.shader_stages[stage]};
-
- for (const auto& entry : entries.global_buffers) {
- const auto addr = cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset();
- SetupGlobalBuffer(entry, addr);
- }
-}
-
void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, size_t stage) {
- MICROPROFILE_SCOPE(Vulkan_Textures);
const auto& regs = maxwell3d.regs;
const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
for (const auto& entry : entries.uniform_texels) {
@@ -1054,7 +780,6 @@ void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries,
}
void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_t stage) {
- MICROPROFILE_SCOPE(Vulkan_Textures);
const auto& regs = maxwell3d.regs;
const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
for (const auto& entry : entries.samplers) {
@@ -1070,7 +795,6 @@ void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_
}
void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, size_t stage) {
- MICROPROFILE_SCOPE(Vulkan_Textures);
const auto& regs = maxwell3d.regs;
const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
for (const auto& entry : entries.storage_texels) {
@@ -1080,7 +804,6 @@ void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries,
}
void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t stage) {
- MICROPROFILE_SCOPE(Vulkan_Images);
const auto& regs = maxwell3d.regs;
const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
for (const auto& entry : entries.images) {
@@ -1089,32 +812,7 @@ void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t
}
}
-void RasterizerVulkan::SetupComputeConstBuffers(const ShaderEntries& entries) {
- MICROPROFILE_SCOPE(Vulkan_ConstBuffers);
- const auto& launch_desc = kepler_compute.launch_description;
- for (const auto& entry : entries.const_buffers) {
- const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
- const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value();
- const Tegra::Engines::ConstBufferInfo info{
- .address = config.Address(),
- .size = config.size,
- .enabled = mask[entry.GetIndex()],
- };
- SetupConstBuffer(entry, info);
- }
-}
-
-void RasterizerVulkan::SetupComputeGlobalBuffers(const ShaderEntries& entries) {
- MICROPROFILE_SCOPE(Vulkan_GlobalBuffers);
- const auto& cbufs{kepler_compute.launch_description.const_buffer_config};
- for (const auto& entry : entries.global_buffers) {
- const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()};
- SetupGlobalBuffer(entry, addr);
- }
-}
-
void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) {
- MICROPROFILE_SCOPE(Vulkan_Textures);
const bool via_header_index = kepler_compute.launch_description.linked_tsc;
for (const auto& entry : entries.uniform_texels) {
const TextureHandle handle =
@@ -1124,7 +822,6 @@ void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) {
}
void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) {
- MICROPROFILE_SCOPE(Vulkan_Textures);
const bool via_header_index = kepler_compute.launch_description.linked_tsc;
for (const auto& entry : entries.samplers) {
for (size_t index = 0; index < entry.size; ++index) {
@@ -1139,7 +836,6 @@ void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) {
}
void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) {
- MICROPROFILE_SCOPE(Vulkan_Textures);
const bool via_header_index = kepler_compute.launch_description.linked_tsc;
for (const auto& entry : entries.storage_texels) {
const TextureHandle handle =
@@ -1149,7 +845,6 @@ void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) {
}
void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) {
- MICROPROFILE_SCOPE(Vulkan_Images);
const bool via_header_index = kepler_compute.launch_description.linked_tsc;
for (const auto& entry : entries.images) {
const TextureHandle handle =
@@ -1158,42 +853,6 @@ void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) {
}
}
-void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry,
- const Tegra::Engines::ConstBufferInfo& buffer) {
- if (!buffer.enabled) {
- // Set values to zero to unbind buffers
- update_descriptor_queue.AddBuffer(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE);
- return;
- }
- // Align the size to avoid bad std140 interactions
- const size_t size = Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float));
- ASSERT(size <= MaxConstbufferSize);
-
- const u64 alignment = device.GetUniformBufferAlignment();
- const auto info = buffer_cache.UploadMemory(buffer.address, size, alignment);
- update_descriptor_queue.AddBuffer(info.handle, info.offset, size);
-}
-
-void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address) {
- const u64 actual_addr = gpu_memory.Read<u64>(address);
- const u32 size = gpu_memory.Read<u32>(address + 8);
-
- if (size == 0) {
- // Sometimes global memory pointers don't have a proper size. Upload a dummy entry
- // because Vulkan doesn't like empty buffers.
- // Note: Do *not* use DefaultBuffer() here, storage buffers can be written breaking the
- // default buffer.
- static constexpr size_t dummy_size = 4;
- const auto info = buffer_cache.GetEmptyBuffer(dummy_size);
- update_descriptor_queue.AddBuffer(info.handle, info.offset, dummy_size);
- return;
- }
-
- const auto info = buffer_cache.UploadMemory(
- actual_addr, size, device.GetStorageBufferAlignment(), entry.IsWritten());
- update_descriptor_queue.AddBuffer(info.handle, info.offset, size);
-}
-
void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchViewports()) {
return;
@@ -1206,7 +865,8 @@ void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& reg
GetViewportState(device, regs, 8), GetViewportState(device, regs, 9),
GetViewportState(device, regs, 10), GetViewportState(device, regs, 11),
GetViewportState(device, regs, 12), GetViewportState(device, regs, 13),
- GetViewportState(device, regs, 14), GetViewportState(device, regs, 15)};
+ GetViewportState(device, regs, 14), GetViewportState(device, regs, 15),
+ };
scheduler.Record([viewports](vk::CommandBuffer cmdbuf) { cmdbuf.SetViewport(0, viewports); });
}
@@ -1214,13 +874,14 @@ void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs
if (!state_tracker.TouchScissors()) {
return;
}
- const std::array scissors = {
+ const std::array scissors{
GetScissorState(regs, 0), GetScissorState(regs, 1), GetScissorState(regs, 2),
GetScissorState(regs, 3), GetScissorState(regs, 4), GetScissorState(regs, 5),
GetScissorState(regs, 6), GetScissorState(regs, 7), GetScissorState(regs, 8),
GetScissorState(regs, 9), GetScissorState(regs, 10), GetScissorState(regs, 11),
GetScissorState(regs, 12), GetScissorState(regs, 13), GetScissorState(regs, 14),
- GetScissorState(regs, 15)};
+ GetScissorState(regs, 15),
+ };
scheduler.Record([scissors](vk::CommandBuffer cmdbuf) { cmdbuf.SetScissor(0, scissors); });
}
@@ -1385,73 +1046,4 @@ void RasterizerVulkan::UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs&
});
}
-size_t RasterizerVulkan::CalculateGraphicsStreamBufferSize(bool is_indexed) const {
- size_t size = CalculateVertexArraysSize();
- if (is_indexed) {
- size = Common::AlignUp(size, 4) + CalculateIndexBufferSize();
- }
- size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + device.GetUniformBufferAlignment());
- return size;
-}
-
-size_t RasterizerVulkan::CalculateComputeStreamBufferSize() const {
- return Tegra::Engines::KeplerCompute::NumConstBuffers *
- (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
-}
-
-size_t RasterizerVulkan::CalculateVertexArraysSize() const {
- const auto& regs = maxwell3d.regs;
-
- size_t size = 0;
- for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
- // This implementation assumes that all attributes are used in the shader.
- const GPUVAddr start{regs.vertex_array[index].StartAddress()};
- const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()};
- DEBUG_ASSERT(end >= start);
-
- size += (end - start) * regs.vertex_array[index].enable;
- }
- return size;
-}
-
-size_t RasterizerVulkan::CalculateIndexBufferSize() const {
- return static_cast<size_t>(maxwell3d.regs.index_array.count) *
- static_cast<size_t>(maxwell3d.regs.index_array.FormatSizeInBytes());
-}
-
-size_t RasterizerVulkan::CalculateConstBufferSize(
- const ConstBufferEntry& entry, const Tegra::Engines::ConstBufferInfo& buffer) const {
- if (entry.IsIndirect()) {
- // Buffer is accessed indirectly, so upload the entire thing
- return buffer.size;
- } else {
- // Buffer is accessed directly, upload just what we use
- return entry.GetSize();
- }
-}
-
-VkBuffer RasterizerVulkan::DefaultBuffer() {
- if (default_buffer) {
- return *default_buffer;
- }
- default_buffer = device.GetLogical().CreateBuffer({
- .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
- .pNext = nullptr,
- .flags = 0,
- .size = DEFAULT_BUFFER_SIZE,
- .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT |
- VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
- .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
- .queueFamilyIndexCount = 0,
- .pQueueFamilyIndices = nullptr,
- });
- default_buffer_commit = memory_allocator.Commit(default_buffer, MemoryUsage::DeviceLocal);
-
- scheduler.RequestOutsideRenderPassOperationContext();
- scheduler.Record([buffer = *default_buffer](vk::CommandBuffer cmdbuf) {
- cmdbuf.FillBuffer(buffer, 0, DEFAULT_BUFFER_SIZE, 0);
- });
- return *default_buffer;
-}
-
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 8e261b9bd..7fc6741da 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -18,14 +18,12 @@
#include "video_core/renderer_vulkan/blit_image.h"
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
-#include "video_core/renderer_vulkan/vk_compute_pass.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_fence_manager.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_query_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
-#include "video_core/renderer_vulkan/vk_stream_buffer.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/shader/async_shaders.h"
@@ -49,7 +47,6 @@ namespace Vulkan {
struct VKScreenInfo;
class StateTracker;
-class BufferBindings;
class RasterizerVulkan final : public VideoCore::RasterizerAccelerated {
public:
@@ -65,6 +62,7 @@ public:
void DispatchCompute(GPUVAddr code_addr) override;
void ResetCounter(VideoCore::QueryType type) override;
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
+ void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
void FlushAll() override;
void FlushRegion(VAddr addr, u64 size) override;
bool MustFlushRegion(VAddr addr, u64 size) override;
@@ -107,24 +105,11 @@ private:
static constexpr VkDeviceSize DEFAULT_BUFFER_SIZE = 4 * sizeof(float);
- struct DrawParameters {
- void Draw(vk::CommandBuffer cmdbuf) const;
-
- u32 base_instance = 0;
- u32 num_instances = 0;
- u32 base_vertex = 0;
- u32 num_vertices = 0;
- bool is_indexed = 0;
- };
-
void FlushWork();
- /// Setups geometry buffers and state.
- DrawParameters SetupGeometry(FixedPipelineState& fixed_state, BufferBindings& buffer_bindings,
- bool is_indexed, bool is_instanced);
-
/// Setup descriptors in the graphics pipeline.
- void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders);
+ void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders,
+ bool is_indexed);
void UpdateDynamicStates();
@@ -132,16 +117,6 @@ private:
void EndTransformFeedback();
- void SetupVertexArrays(BufferBindings& buffer_bindings);
-
- void SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params, bool is_indexed);
-
- /// Setup constant buffers in the graphics pipeline.
- void SetupGraphicsConstBuffers(const ShaderEntries& entries, std::size_t stage);
-
- /// Setup global buffers in the graphics pipeline.
- void SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage);
-
/// Setup uniform texels in the graphics pipeline.
void SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage);
@@ -154,12 +129,6 @@ private:
/// Setup images in the graphics pipeline.
void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage);
- /// Setup constant buffers in the compute pipeline.
- void SetupComputeConstBuffers(const ShaderEntries& entries);
-
- /// Setup global buffers in the compute pipeline.
- void SetupComputeGlobalBuffers(const ShaderEntries& entries);
-
/// Setup texel buffers in the compute pipeline.
void SetupComputeUniformTexels(const ShaderEntries& entries);
@@ -172,11 +141,6 @@ private:
/// Setup images in the compute pipeline.
void SetupComputeImages(const ShaderEntries& entries);
- void SetupConstBuffer(const ConstBufferEntry& entry,
- const Tegra::Engines::ConstBufferInfo& buffer);
-
- void SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address);
-
void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs);
@@ -193,19 +157,6 @@ private:
void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
- size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const;
-
- size_t CalculateComputeStreamBufferSize() const;
-
- size_t CalculateVertexArraysSize() const;
-
- size_t CalculateIndexBufferSize() const;
-
- size_t CalculateConstBufferSize(const ConstBufferEntry& entry,
- const Tegra::Engines::ConstBufferInfo& buffer) const;
-
- VkBuffer DefaultBuffer();
-
Tegra::GPU& gpu;
Tegra::MemoryManager& gpu_memory;
Tegra::Engines::Maxwell3D& maxwell3d;
@@ -217,24 +168,19 @@ private:
StateTracker& state_tracker;
VKScheduler& scheduler;
- VKStreamBuffer stream_buffer;
StagingBufferPool staging_pool;
VKDescriptorPool descriptor_pool;
VKUpdateDescriptorQueue update_descriptor_queue;
BlitImageHelper blit_image;
- QuadArrayPass quad_array_pass;
- QuadIndexedPass quad_indexed_pass;
- Uint8Pass uint8_pass;
TextureCacheRuntime texture_cache_runtime;
TextureCache texture_cache;
+ BufferCacheRuntime buffer_cache_runtime;
+ BufferCache buffer_cache;
VKPipelineCache pipeline_cache;
- VKBufferCache buffer_cache;
VKQueryCache query_cache;
VKFenceManager fence_manager;
- vk::Buffer default_buffer;
- MemoryCommit default_buffer_commit;
vk::Event wfi_event;
VideoCommon::Shader::AsyncShaders async_shaders;
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index 66004f9c0..f35c120b0 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -52,18 +52,6 @@ VKScheduler::~VKScheduler() {
worker_thread.join();
}
-u64 VKScheduler::CurrentTick() const noexcept {
- return master_semaphore->CurrentTick();
-}
-
-bool VKScheduler::IsFree(u64 tick) const noexcept {
- return master_semaphore->IsFree(tick);
-}
-
-void VKScheduler::Wait(u64 tick) {
- master_semaphore->Wait(tick);
-}
-
void VKScheduler::Flush(VkSemaphore semaphore) {
SubmitExecution(semaphore);
AllocateNewContext();
@@ -269,7 +257,7 @@ void VKScheduler::EndRenderPass() {
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT |
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
- VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, 0, nullptr, nullptr,
+ VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, nullptr, nullptr,
vk::Span(barriers.data(), num_images));
});
state.renderpass = nullptr;
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
index 15f2987eb..3ce48e9d2 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -14,6 +14,7 @@
#include "common/alignment.h"
#include "common/common_types.h"
#include "common/threadsafe_queue.h"
+#include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
@@ -21,7 +22,6 @@ namespace Vulkan {
class CommandPool;
class Device;
class Framebuffer;
-class MasterSemaphore;
class StateTracker;
class VKQueryCache;
@@ -32,15 +32,6 @@ public:
explicit VKScheduler(const Device& device, StateTracker& state_tracker);
~VKScheduler();
- /// Returns the current command buffer tick.
- [[nodiscard]] u64 CurrentTick() const noexcept;
-
- /// Returns true when a tick has been triggered by the GPU.
- [[nodiscard]] bool IsFree(u64 tick) const noexcept;
-
- /// Waits for the given tick to trigger on the GPU.
- void Wait(u64 tick);
-
/// Sends the current execution context to the GPU.
void Flush(VkSemaphore semaphore = nullptr);
@@ -82,6 +73,21 @@ public:
(void)chunk->Record(command);
}
+ /// Returns the current command buffer tick.
+ [[nodiscard]] u64 CurrentTick() const noexcept {
+ return master_semaphore->CurrentTick();
+ }
+
+ /// Returns true when a tick has been triggered by the GPU.
+ [[nodiscard]] bool IsFree(u64 tick) const noexcept {
+ return master_semaphore->IsFree(tick);
+ }
+
+ /// Waits for the given tick to trigger on the GPU.
+ void Wait(u64 tick) {
+ master_semaphore->Wait(tick);
+ }
+
/// Returns the master timeline semaphore.
[[nodiscard]] MasterSemaphore& GetMasterSemaphore() const noexcept {
return *master_semaphore;
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 61d52b961..e165a6987 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -3127,6 +3127,9 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) {
entries.attributes.insert(GetGenericAttributeLocation(attribute));
}
}
+ for (const auto& buffer : entries.const_buffers) {
+ entries.enabled_uniform_buffers |= 1U << buffer.GetIndex();
+ }
entries.clip_distances = ir.GetClipDistances();
entries.shader_length = ir.GetLength();
entries.uses_warps = ir.UsesWarps();
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
index 26381e444..5d94132a5 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
@@ -39,24 +39,7 @@ private:
u32 index{};
};
-class GlobalBufferEntry {
-public:
- constexpr explicit GlobalBufferEntry(u32 cbuf_index_, u32 cbuf_offset_, bool is_written_)
- : cbuf_index{cbuf_index_}, cbuf_offset{cbuf_offset_}, is_written{is_written_} {}
-
- constexpr u32 GetCbufIndex() const {
- return cbuf_index;
- }
-
- constexpr u32 GetCbufOffset() const {
- return cbuf_offset;
- }
-
- constexpr bool IsWritten() const {
- return is_written;
- }
-
-private:
+struct GlobalBufferEntry {
u32 cbuf_index{};
u32 cbuf_offset{};
bool is_written{};
@@ -78,6 +61,7 @@ struct ShaderEntries {
std::set<u32> attributes;
std::array<bool, Maxwell::NumClipDistances> clip_distances{};
std::size_t shader_length{};
+ u32 enabled_uniform_buffers{};
bool uses_warps{};
};
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
index 1779a2e30..e81fad007 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
@@ -30,15 +30,18 @@ using Table = Maxwell3D::DirtyState::Table;
using Flags = Maxwell3D::DirtyState::Flags;
Flags MakeInvalidationFlags() {
- static constexpr std::array INVALIDATION_FLAGS{
+ static constexpr int INVALIDATION_FLAGS[]{
Viewports, Scissors, DepthBias, BlendConstants, DepthBounds,
StencilProperties, CullMode, DepthBoundsEnable, DepthTestEnable, DepthWriteEnable,
- DepthCompareOp, FrontFace, StencilOp, StencilTestEnable,
+ DepthCompareOp, FrontFace, StencilOp, StencilTestEnable, VertexBuffers,
};
Flags flags{};
for (const int flag : INVALIDATION_FLAGS) {
flags[flag] = true;
}
+ for (int index = VertexBuffer0; index <= VertexBuffer31; ++index) {
+ flags[index] = true;
+ }
return flags;
}
@@ -130,7 +133,7 @@ void SetupDirtyStencilTestEnable(Tables& tables) {
StateTracker::StateTracker(Tegra::GPU& gpu)
: flags{gpu.Maxwell3D().dirty.flags}, invalidation_flags{MakeInvalidationFlags()} {
auto& tables = gpu.Maxwell3D().dirty.tables;
- SetupDirtyRenderTargets(tables);
+ SetupDirtyFlags(tables);
SetupDirtyViewports(tables);
SetupDirtyScissors(tables);
SetupDirtyDepthBias(tables);
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index aa7c5d7c6..1eeb45ca9 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -426,46 +426,47 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage image,
VkImageAspectFlags aspect_mask, bool is_initialized,
std::span<const VkBufferImageCopy> copies) {
- static constexpr VkAccessFlags ACCESS_FLAGS = VK_ACCESS_SHADER_WRITE_BIT |
- VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
- VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
+ static constexpr VkAccessFlags WRITE_ACCESS_FLAGS =
+ VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
+ VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
+ static constexpr VkAccessFlags READ_ACCESS_FLAGS = VK_ACCESS_SHADER_READ_BIT |
+ VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
+ VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT;
const VkImageMemoryBarrier read_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
- .srcAccessMask = ACCESS_FLAGS,
+ .srcAccessMask = WRITE_ACCESS_FLAGS,
.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.oldLayout = is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED,
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = image,
- .subresourceRange =
- {
- .aspectMask = aspect_mask,
- .baseMipLevel = 0,
- .levelCount = VK_REMAINING_MIP_LEVELS,
- .baseArrayLayer = 0,
- .layerCount = VK_REMAINING_ARRAY_LAYERS,
- },
+ .subresourceRange{
+ .aspectMask = aspect_mask,
+ .baseMipLevel = 0,
+ .levelCount = VK_REMAINING_MIP_LEVELS,
+ .baseArrayLayer = 0,
+ .layerCount = VK_REMAINING_ARRAY_LAYERS,
+ },
};
const VkImageMemoryBarrier write_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
- .dstAccessMask = ACCESS_FLAGS,
+ .dstAccessMask = WRITE_ACCESS_FLAGS | READ_ACCESS_FLAGS,
.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = image,
- .subresourceRange =
- {
- .aspectMask = aspect_mask,
- .baseMipLevel = 0,
- .levelCount = VK_REMAINING_MIP_LEVELS,
- .baseArrayLayer = 0,
- .layerCount = VK_REMAINING_ARRAY_LAYERS,
- },
+ .subresourceRange{
+ .aspectMask = aspect_mask,
+ .baseMipLevel = 0,
+ .levelCount = VK_REMAINING_MIP_LEVELS,
+ .baseArrayLayer = 0,
+ .layerCount = VK_REMAINING_ARRAY_LAYERS,
+ },
};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
read_barrier);
@@ -569,20 +570,12 @@ void TextureCacheRuntime::Finish() {
scheduler.Finish();
}
-ImageBufferMap TextureCacheRuntime::MapUploadBuffer(size_t size) {
- const auto staging_ref = staging_buffer_pool.Request(size, MemoryUsage::Upload);
- return {
- .handle = staging_ref.buffer,
- .span = staging_ref.mapped_span,
- };
+StagingBufferRef TextureCacheRuntime::UploadStagingBuffer(size_t size) {
+ return staging_buffer_pool.Request(size, MemoryUsage::Upload);
}
-ImageBufferMap TextureCacheRuntime::MapDownloadBuffer(size_t size) {
- const auto staging_ref = staging_buffer_pool.Request(size, MemoryUsage::Download);
- return {
- .handle = staging_ref.buffer,
- .span = staging_ref.mapped_span,
- };
+StagingBufferRef TextureCacheRuntime::DownloadStagingBuffer(size_t size) {
+ return staging_buffer_pool.Request(size, MemoryUsage::Download);
}
void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
@@ -754,7 +747,7 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
VK_ACCESS_TRANSFER_WRITE_BIT,
- .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+ .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
@@ -765,12 +758,9 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
VkImageMemoryBarrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
- .srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT |
- VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
- VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
- VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
+ .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
- VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
+ VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
@@ -828,12 +818,12 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_
}
}
-void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
+void Image::UploadMemory(const StagingBufferRef& map, size_t buffer_offset,
std::span<const BufferImageCopy> copies) {
// TODO: Move this to another API
scheduler->RequestOutsideRenderPassOperationContext();
std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask);
- const VkBuffer src_buffer = map.handle;
+ const VkBuffer src_buffer = map.buffer;
const VkImage vk_image = *image;
const VkImageAspectFlags vk_aspect_mask = aspect_mask;
const bool is_initialized = std::exchange(initialized, true);
@@ -843,12 +833,12 @@ void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
});
}
-void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
+void Image::UploadMemory(const StagingBufferRef& map, size_t buffer_offset,
std::span<const VideoCommon::BufferCopy> copies) {
// TODO: Move this to another API
scheduler->RequestOutsideRenderPassOperationContext();
std::vector vk_copies = TransformBufferCopies(copies, buffer_offset);
- const VkBuffer src_buffer = map.handle;
+ const VkBuffer src_buffer = map.buffer;
const VkBuffer dst_buffer = *buffer;
scheduler->Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) {
// TODO: Barriers
@@ -856,13 +846,58 @@ void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
});
}
-void Image::DownloadMemory(const ImageBufferMap& map, size_t buffer_offset,
+void Image::DownloadMemory(const StagingBufferRef& map, size_t buffer_offset,
std::span<const BufferImageCopy> copies) {
std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask);
- scheduler->Record([buffer = map.handle, image = *image, aspect_mask = aspect_mask,
+ scheduler->Record([buffer = map.buffer, image = *image, aspect_mask = aspect_mask,
vk_copies](vk::CommandBuffer cmdbuf) {
- // TODO: Barriers
- cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_GENERAL, buffer, vk_copies);
+ const VkImageMemoryBarrier read_barrier{
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
+ .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
+ .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = image,
+ .subresourceRange{
+ .aspectMask = aspect_mask,
+ .baseMipLevel = 0,
+ .levelCount = VK_REMAINING_MIP_LEVELS,
+ .baseArrayLayer = 0,
+ .layerCount = VK_REMAINING_ARRAY_LAYERS,
+ },
+ };
+ const VkImageMemoryBarrier image_write_barrier{
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = 0,
+ .dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
+ .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+ .newLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = image,
+ .subresourceRange{
+ .aspectMask = aspect_mask,
+ .baseMipLevel = 0,
+ .levelCount = VK_REMAINING_MIP_LEVELS,
+ .baseArrayLayer = 0,
+ .layerCount = VK_REMAINING_ARRAY_LAYERS,
+ },
+ };
+ const VkMemoryBarrier memory_write_barrier{
+ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
+ .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
+ };
+ cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
+ 0, read_barrier);
+ cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffer, vk_copies);
+ cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+ 0, memory_write_barrier, nullptr, image_write_barrier);
});
}
@@ -1127,7 +1162,7 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
.pAttachments = attachments.data(),
.width = key.size.width,
.height = key.size.height,
- .layers = static_cast<u32>(num_layers),
+ .layers = static_cast<u32>(std::max(num_layers, 1)),
});
if (runtime.device.HasDebuggingToolAttached()) {
framebuffer.SetObjectNameEXT(VideoCommon::Name(key).c_str());
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index 8d29361a1..4558c3297 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -7,6 +7,7 @@
#include <compare>
#include <span>
+#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/texture_cache/texture_cache.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
@@ -53,19 +54,6 @@ struct hash<Vulkan::RenderPassKey> {
namespace Vulkan {
-struct ImageBufferMap {
- [[nodiscard]] VkBuffer Handle() const noexcept {
- return handle;
- }
-
- [[nodiscard]] std::span<u8> Span() const noexcept {
- return span;
- }
-
- VkBuffer handle;
- std::span<u8> span;
-};
-
struct TextureCacheRuntime {
const Device& device;
VKScheduler& scheduler;
@@ -76,9 +64,9 @@ struct TextureCacheRuntime {
void Finish();
- [[nodiscard]] ImageBufferMap MapUploadBuffer(size_t size);
+ [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size);
- [[nodiscard]] ImageBufferMap MapDownloadBuffer(size_t size);
+ [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size);
void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
const std::array<Offset2D, 2>& dst_region,
@@ -94,7 +82,7 @@ struct TextureCacheRuntime {
return false;
}
- void AccelerateImageUpload(Image&, const ImageBufferMap&, size_t,
+ void AccelerateImageUpload(Image&, const StagingBufferRef&, size_t,
std::span<const VideoCommon::SwizzleParameters>) {
UNREACHABLE();
}
@@ -112,13 +100,13 @@ public:
explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
VAddr cpu_addr);
- void UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
+ void UploadMemory(const StagingBufferRef& map, size_t buffer_offset,
std::span<const VideoCommon::BufferImageCopy> copies);
- void UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
+ void UploadMemory(const StagingBufferRef& map, size_t buffer_offset,
std::span<const VideoCommon::BufferCopy> copies);
- void DownloadMemory(const ImageBufferMap& map, size_t buffer_offset,
+ void DownloadMemory(const StagingBufferRef& map, size_t buffer_offset,
std::span<const VideoCommon::BufferImageCopy> copies);
[[nodiscard]] VkImage Handle() const noexcept {