From 3da87d3f12d39b9a52625fa9e5e0c5defc0ac440 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 18 Jan 2021 19:00:00 -0300 Subject: gl_buffer_cache: Drop interop based parameter buffer workarounds Sacrify runtime performance to avoid generating kernel exceptions on Windows due to our abusive aliasing of interop buffer objects. --- src/video_core/buffer_cache/buffer_cache.h | 4 +- src/video_core/renderer_opengl/gl_buffer_cache.cpp | 86 +++++++++------------- src/video_core/renderer_opengl/gl_buffer_cache.h | 20 ++--- 3 files changed, 45 insertions(+), 65 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 0fff42826..a296036f4 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -91,7 +91,7 @@ class BufferCache { }; public: - static constexpr size_t SKIP_CACHE_SIZE = 4096; + static constexpr u32 SKIP_CACHE_SIZE = 4096; explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_, @@ -671,7 +671,7 @@ void BufferCache

::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 const VAddr cpu_addr = binding.cpu_addr; const u32 size = binding.size; Buffer& buffer = slot_buffers[binding.buffer_id]; - if (size <= runtime.SkipCacheSize() && !buffer.IsRegionGpuModified(cpu_addr, size)) { + if (size <= SKIP_CACHE_SIZE && !buffer.IsRegionGpuModified(cpu_addr, size)) { if constexpr (IS_OPENGL) { if (runtime.HasFastBufferSubData()) { // Fast path for Nvidia diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 889ad6c56..1e555098d 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -36,13 +36,8 @@ Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rast buffer.Create(); const std::string name = fmt::format("Buffer 0x{:x}", CpuAddr()); glObjectLabel(GL_BUFFER, buffer.handle, static_cast(name.size()), name.data()); - if (runtime.device.UseAssemblyShaders()) { - CreateMemoryObjects(runtime); - glNamedBufferStorageMemEXT(buffer.handle, SizeBytes(), memory_commit.ExportOpenGLHandle(), - memory_commit.Offset()); - } else { - glNamedBufferData(buffer.handle, SizeBytes(), nullptr, GL_DYNAMIC_DRAW); - } + glNamedBufferData(buffer.handle, SizeBytes(), nullptr, GL_DYNAMIC_DRAW); + if (runtime.has_unified_vertex_buffers) { glGetNamedBufferParameterui64vNV(buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &address); } @@ -71,61 +66,33 @@ void Buffer::MakeResident(GLenum access) noexcept { glMakeNamedBufferResidentNV(buffer.handle, access); } -GLuint Buffer::SubBuffer(u32 offset) { - if (offset == 0) { - return buffer.handle; - } - for (const auto& [sub_buffer, sub_offset] : subs) { - if (sub_offset == offset) { - return sub_buffer.handle; - } - } - OGLBuffer sub_buffer; - sub_buffer.Create(); - glNamedBufferStorageMemEXT(sub_buffer.handle, SizeBytes() - offset, - memory_commit.ExportOpenGLHandle(), memory_commit.Offset() + offset); - return subs.emplace_back(std::move(sub_buffer), offset).first.handle; -} - -void Buffer::CreateMemoryObjects(BufferCacheRuntime& runtime) { - auto& allocator = runtime.vulkan_memory_allocator; - auto& device = runtime.vulkan_device->GetLogical(); - auto vulkan_buffer = device.CreateBuffer(VkBufferCreateInfo{ - .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .size = SizeBytes(), - .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | - VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | - VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | - VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | - VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, - .sharingMode = VK_SHARING_MODE_EXCLUSIVE, - .queueFamilyIndexCount = 0, - .pQueueFamilyIndices = nullptr, - }); - const VkMemoryRequirements requirements = device.GetBufferMemoryRequirements(*vulkan_buffer); - memory_commit = allocator->Commit(requirements, Vulkan::MemoryUsage::DeviceLocal); -} - BufferCacheRuntime::BufferCacheRuntime(const Device& device_, const Vulkan::Device* vulkan_device_, Vulkan::MemoryAllocator* vulkan_memory_allocator_) : device{device_}, vulkan_device{vulkan_device_}, vulkan_memory_allocator{vulkan_memory_allocator_}, - stream_buffer{device.HasFastBufferSubData() ? std::nullopt - : std::make_optional()} { + has_fast_buffer_sub_data{device.HasFastBufferSubData()}, + use_assembly_shaders{device.UseAssemblyShaders()}, + has_unified_vertex_buffers{device.HasVertexBufferUnifiedMemory()}, + stream_buffer{has_fast_buffer_sub_data ? std::nullopt : std::make_optional()} { GLint gl_max_attributes; glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &gl_max_attributes); max_attributes = static_cast(gl_max_attributes); - use_assembly_shaders = device.UseAssemblyShaders(); - has_unified_vertex_buffers = device.HasVertexBufferUnifiedMemory(); - for (auto& stage_uniforms : fast_uniforms) { for (OGLBuffer& buffer : stage_uniforms) { buffer.Create(); glNamedBufferData(buffer.handle, BufferCache::SKIP_CACHE_SIZE, nullptr, GL_STREAM_DRAW); } } + for (auto& stage_uniforms : copy_uniforms) { + for (OGLBuffer& buffer : stage_uniforms) { + buffer.Create(); + glNamedBufferData(buffer.handle, 0x10'000, nullptr, GL_STREAM_COPY); + } + } + for (OGLBuffer& buffer : copy_compute_uniforms) { + buffer.Create(); + glNamedBufferData(buffer.handle, 0x10'000, nullptr, GL_STREAM_COPY); + } } void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer, @@ -167,8 +134,14 @@ void BufferCacheRuntime::BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer, u32 offset, u32 size) { if (use_assembly_shaders) { - const GLuint sub_buffer = buffer.SubBuffer(offset); - glBindBufferRangeNV(PABO_LUT[stage], binding_index, sub_buffer, 0, + GLuint handle; + if (offset != 0) { + handle = copy_uniforms[stage][binding_index].handle; + glCopyNamedBufferSubData(buffer.Handle(), handle, offset, 0, size); + } else { + handle = buffer.Handle(); + } + glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, static_cast(size)); } else { const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer; @@ -181,8 +154,15 @@ void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buff void BufferCacheRuntime::BindComputeUniformBuffer(u32 binding_index, Buffer& buffer, u32 offset, u32 size) { if (use_assembly_shaders) { - glBindBufferRangeNV(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding_index, - buffer.SubBuffer(offset), 0, static_cast(size)); + GLuint handle; + if (offset != 0) { + handle = copy_compute_uniforms[binding_index].handle; + glCopyNamedBufferSubData(buffer.Handle(), handle, offset, 0, size); + } else { + handle = buffer.Handle(); + } + glBindBufferRangeNV(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding_index, handle, 0, + static_cast(size)); } else { glBindBufferRange(GL_UNIFORM_BUFFER, binding_index, buffer.Handle(), static_cast(offset), static_cast(size)); diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index f4d8871a9..35c9deb51 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -39,8 +39,6 @@ public: void MakeResident(GLenum access) noexcept; - [[nodiscard]] GLuint SubBuffer(u32 offset); - [[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept { return address; } @@ -50,13 +48,9 @@ public: } private: - void CreateMemoryObjects(BufferCacheRuntime& runtime); - GLuint64EXT address = 0; - Vulkan::MemoryCommit memory_commit; OGLBuffer buffer; GLenum current_residency_access = GL_NONE; - std::vector> subs; }; class BufferCacheRuntime { @@ -127,7 +121,7 @@ public: } [[nodiscard]] bool HasFastBufferSubData() const noexcept { - return device.HasFastBufferSubData(); + return has_fast_buffer_sub_data; } private: @@ -140,16 +134,22 @@ private: const Device& device; const Vulkan::Device* vulkan_device; Vulkan::MemoryAllocator* vulkan_memory_allocator; - std::optional stream_buffer; - - u32 max_attributes = 0; + bool has_fast_buffer_sub_data = false; bool use_assembly_shaders = false; bool has_unified_vertex_buffers = false; + u32 max_attributes = 0; + + std::optional stream_buffer; + std::array, VideoCommon::NUM_STAGES> fast_uniforms; + std::array, + VideoCommon::NUM_STAGES> + copy_uniforms; + std::array copy_compute_uniforms; u32 index_buffer_offset = 0; }; -- cgit v1.2.3