From 50a806ea671114c92b7905182a0a9140148415b2 Mon Sep 17 00:00:00 2001 From: Markus Wick Date: Wed, 29 Aug 2018 00:27:03 +0200 Subject: renderer_opengl: Implement a buffer cache. The idea of this cache is to avoid redundant uploads. So we are going to cache the uploaded buffers within the stream_buffer and just reuse the old pointers. The next step is to implement a VBO cache on GPU memory, but for now, I want to check the overhead of the cache management. Fetching the buffer over PCI-E should be quite fast. --- src/video_core/renderer_opengl/gl_buffer_cache.cpp | 90 ++++++++++++++++++ src/video_core/renderer_opengl/gl_buffer_cache.h | 57 ++++++++++++ src/video_core/renderer_opengl/gl_rasterizer.cpp | 101 ++++++--------------- src/video_core/renderer_opengl/gl_rasterizer.h | 19 ++-- 4 files changed, 181 insertions(+), 86 deletions(-) create mode 100644 src/video_core/renderer_opengl/gl_buffer_cache.cpp create mode 100644 src/video_core/renderer_opengl/gl_buffer_cache.h (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp new file mode 100644 index 000000000..c85fbd306 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -0,0 +1,90 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/alignment.h" +#include "common/assert.h" +#include "core/core.h" +#include "core/memory.h" +#include "video_core/renderer_opengl/gl_buffer_cache.h" + +namespace OpenGL { + +OGLBufferCache::OGLBufferCache(size_t size) : stream_buffer(GL_ARRAY_BUFFER, size) {} + +GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, size_t size, size_t alignment, + bool cache) { + auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); + const boost::optional cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)}; + + // Cache management is a big overhead, so only cache entries with a given size. + // TODO: Figure out which size is the best for given games. + cache &= size >= 2048; + + if (cache) { + auto entry = TryGet(*cpu_addr); + if (entry) { + if (entry->size >= size && entry->alignment == alignment) { + return entry->offset; + } + Unregister(entry); + } + } + + AlignBuffer(alignment); + GLintptr uploaded_offset = buffer_offset; + + Memory::ReadBlock(*cpu_addr, buffer_ptr, size); + + buffer_ptr += size; + buffer_offset += size; + + if (cache) { + auto entry = std::make_shared(); + entry->offset = uploaded_offset; + entry->size = size; + entry->alignment = alignment; + entry->addr = *cpu_addr; + Register(entry); + } + + return uploaded_offset; +} + +GLintptr OGLBufferCache::UploadHostMemory(const void* raw_pointer, size_t size, size_t alignment) { + AlignBuffer(alignment); + std::memcpy(buffer_ptr, raw_pointer, size); + GLintptr uploaded_offset = buffer_offset; + + buffer_ptr += size; + buffer_offset += size; + return uploaded_offset; +} + +void OGLBufferCache::Map(size_t max_size) { + bool invalidate; + std::tie(buffer_ptr, buffer_offset_base, invalidate) = + stream_buffer.Map(static_cast(max_size), 4); + buffer_offset = buffer_offset_base; + + if (invalidate) { + InvalidateAll(); + } +} +void OGLBufferCache::Unmap() { + stream_buffer.Unmap(buffer_offset - buffer_offset_base); +} + +GLuint OGLBufferCache::GetHandle() { + return stream_buffer.GetHandle(); +} + +void OGLBufferCache::AlignBuffer(size_t alignment) { + // Align the offset, not the mapped pointer + GLintptr offset_aligned = + static_cast(Common::AlignUp(static_cast(buffer_offset), alignment)); + buffer_ptr += offset_aligned - buffer_offset; + buffer_offset = offset_aligned; +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h new file mode 100644 index 000000000..9c7ad27e6 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -0,0 +1,57 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include "common/common_types.h" +#include "video_core/rasterizer_cache.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_stream_buffer.h" + +namespace OpenGL { + +struct CachedBufferEntry final { + VAddr GetAddr() const { + return addr; + } + + size_t GetSizeInBytes() const { + return size; + } + + VAddr addr; + size_t size; + GLintptr offset; + size_t alignment; +}; + +class OGLBufferCache final : public RasterizerCache> { +public: + OGLBufferCache(size_t size); + + GLintptr UploadMemory(Tegra::GPUVAddr gpu_addr, size_t size, size_t alignment = 4, + bool cache = true); + + GLintptr UploadHostMemory(const void* raw_pointer, size_t size, size_t alignment = 4); + + void Map(size_t max_size); + void Unmap(); + + GLuint GetHandle(); + +protected: + void AlignBuffer(size_t alignment); + +private: + OGLStreamBuffer stream_buffer; + + u8* buffer_ptr; + GLintptr buffer_offset; + GLintptr buffer_offset_base; +}; + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 7ee3f2ae7..c66a18155 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -43,7 +43,7 @@ MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(128, 128, 192)); MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo& info) - : emu_window{window}, screen_info{info}, stream_buffer(GL_ARRAY_BUFFER, STREAM_BUFFER_SIZE) { + : emu_window{window}, screen_info{info}, buffer_cache(STREAM_BUFFER_SIZE) { // Create sampler objects for (size_t i = 0; i < texture_samplers.size(); ++i) { texture_samplers[i].Create(); @@ -83,14 +83,14 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo hw_vao.Create(); - state.draw.vertex_buffer = stream_buffer.GetHandle(); + state.draw.vertex_buffer = buffer_cache.GetHandle(); shader_program_manager = std::make_unique(); state.draw.shader_program = 0; state.draw.vertex_array = hw_vao.handle; state.Apply(); - glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, stream_buffer.GetHandle()); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer_cache.GetHandle()); glEnable(GL_BLEND); @@ -101,14 +101,13 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo RasterizerOpenGL::~RasterizerOpenGL() {} -std::pair RasterizerOpenGL::SetupVertexArrays(u8* array_ptr, - GLintptr buffer_offset) { +void RasterizerOpenGL::SetupVertexArrays() { MICROPROFILE_SCOPE(OpenGL_VAO); const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); const auto& regs = gpu.regs; state.draw.vertex_array = hw_vao.handle; - state.draw.vertex_buffer = stream_buffer.GetHandle(); + state.draw.vertex_buffer = buffer_cache.GetHandle(); state.Apply(); // Upload all guest vertex arrays sequentially to our buffer @@ -127,12 +126,10 @@ std::pair RasterizerOpenGL::SetupVertexArrays(u8* array_ptr, ASSERT(end > start); u64 size = end - start + 1; - GLintptr vertex_buffer_offset; - std::tie(array_ptr, buffer_offset, vertex_buffer_offset) = - UploadMemory(array_ptr, buffer_offset, start, size); + GLintptr vertex_buffer_offset = buffer_cache.UploadMemory(start, size); // Bind the vertex array to the buffer at the current offset. - glBindVertexBuffer(index, stream_buffer.GetHandle(), vertex_buffer_offset, + glBindVertexBuffer(index, buffer_cache.GetHandle(), vertex_buffer_offset, vertex_array.stride); if (regs.instanced_arrays.IsInstancingEnabled(index) && vertex_array.divisor != 0) { @@ -177,11 +174,9 @@ std::pair RasterizerOpenGL::SetupVertexArrays(u8* array_ptr, } glVertexAttribBinding(index, attrib.buffer); } - - return {array_ptr, buffer_offset}; } -std::pair RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) { +void RasterizerOpenGL::SetupShaders() { MICROPROFILE_SCOPE(OpenGL_Shader); auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); @@ -199,21 +194,15 @@ std::pair RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr continue; } - std::tie(buffer_ptr, buffer_offset) = - AlignBuffer(buffer_ptr, buffer_offset, static_cast(uniform_buffer_alignment)); - const size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5 GLShader::MaxwellUniformData ubo{}; ubo.SetFromRegs(gpu.state.shader_stages[stage]); - std::memcpy(buffer_ptr, &ubo, sizeof(ubo)); + GLintptr offset = buffer_cache.UploadHostMemory( + &ubo, sizeof(ubo), static_cast(uniform_buffer_alignment)); // Bind the buffer - glBindBufferRange(GL_UNIFORM_BUFFER, stage, stream_buffer.GetHandle(), buffer_offset, - sizeof(ubo)); - - buffer_ptr += sizeof(ubo); - buffer_offset += sizeof(ubo); + glBindBufferRange(GL_UNIFORM_BUFFER, stage, buffer_cache.GetHandle(), offset, sizeof(ubo)); Shader shader{shader_cache.GetStageProgram(program)}; @@ -234,9 +223,8 @@ std::pair RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr } // Configure the const buffers for this shader stage. - std::tie(buffer_ptr, buffer_offset, current_constbuffer_bindpoint) = - SetupConstBuffers(buffer_ptr, buffer_offset, static_cast(stage), - shader, current_constbuffer_bindpoint); + current_constbuffer_bindpoint = SetupConstBuffers(static_cast(stage), + shader, current_constbuffer_bindpoint); // Configure the textures for this shader stage. current_texture_bindpoint = SetupTextures(static_cast(stage), shader, @@ -250,8 +238,6 @@ std::pair RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr } shader_program_manager->UseTrivialGeometryShader(); - - return {buffer_ptr, buffer_offset}; } size_t RasterizerOpenGL::CalculateVertexArraysSize() const { @@ -439,31 +425,6 @@ void RasterizerOpenGL::Clear() { glClear(clear_mask); } -std::pair RasterizerOpenGL::AlignBuffer(u8* buffer_ptr, GLintptr buffer_offset, - size_t alignment) { - // Align the offset, not the mapped pointer - GLintptr offset_aligned = - static_cast(Common::AlignUp(static_cast(buffer_offset), alignment)); - return {buffer_ptr + (offset_aligned - buffer_offset), offset_aligned}; -} - -std::tuple RasterizerOpenGL::UploadMemory(u8* buffer_ptr, - GLintptr buffer_offset, - Tegra::GPUVAddr gpu_addr, - size_t size, size_t alignment) { - std::tie(buffer_ptr, buffer_offset) = AlignBuffer(buffer_ptr, buffer_offset, alignment); - GLintptr uploaded_offset = buffer_offset; - - auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); - const boost::optional cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)}; - Memory::ReadBlock(*cpu_addr, buffer_ptr, size); - - buffer_ptr += size; - buffer_offset += size; - - return {buffer_ptr, buffer_offset, uploaded_offset}; -} - void RasterizerOpenGL::DrawArrays() { if (accelerate_draw == AccelDraw::Disabled) return; @@ -489,7 +450,7 @@ void RasterizerOpenGL::DrawArrays() { const bool is_indexed = accelerate_draw == AccelDraw::Indexed; const u64 index_buffer_size{regs.index_array.count * regs.index_array.FormatSizeInBytes()}; - state.draw.vertex_buffer = stream_buffer.GetHandle(); + state.draw.vertex_buffer = buffer_cache.GetHandle(); state.Apply(); size_t buffer_size = CalculateVertexArraysSize(); @@ -506,25 +467,21 @@ void RasterizerOpenGL::DrawArrays() { // Add space for at least 18 constant buffers buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment); - u8* buffer_ptr; - GLintptr buffer_offset; - std::tie(buffer_ptr, buffer_offset, std::ignore) = - stream_buffer.Map(static_cast(buffer_size), 4); - u8* buffer_ptr_base = buffer_ptr; + buffer_cache.Map(buffer_size); - std::tie(buffer_ptr, buffer_offset) = SetupVertexArrays(buffer_ptr, buffer_offset); + SetupVertexArrays(); // If indexed mode, copy the index buffer GLintptr index_buffer_offset = 0; if (is_indexed) { MICROPROFILE_SCOPE(OpenGL_Index); - std::tie(buffer_ptr, buffer_offset, index_buffer_offset) = UploadMemory( - buffer_ptr, buffer_offset, regs.index_array.StartAddress(), index_buffer_size); + index_buffer_offset = + buffer_cache.UploadMemory(regs.index_array.StartAddress(), index_buffer_size); } - std::tie(buffer_ptr, buffer_offset) = SetupShaders(buffer_ptr, buffer_offset); + SetupShaders(); - stream_buffer.Unmap(buffer_ptr - buffer_ptr_base); + buffer_cache.Unmap(); shader_program_manager->ApplyTo(state); state.Apply(); @@ -569,6 +526,7 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { MICROPROFILE_SCOPE(OpenGL_CacheManagement); res_cache.InvalidateRegion(addr, size); shader_cache.InvalidateRegion(addr, size); + buffer_cache.InvalidateRegion(addr, size); } void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) { @@ -658,11 +616,8 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntr } } -std::tuple RasterizerOpenGL::SetupConstBuffers(u8* buffer_ptr, - GLintptr buffer_offset, - Maxwell::ShaderStage stage, - Shader& shader, - u32 current_bindpoint) { +u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shader, + u32 current_bindpoint) { MICROPROFILE_SCOPE(OpenGL_UBO); const auto& gpu = Core::System::GetInstance().GPU(); const auto& maxwell3d = gpu.Maxwell3D(); @@ -699,13 +654,11 @@ std::tuple RasterizerOpenGL::SetupConstBuffers(u8* buffer_pt size = Common::AlignUp(size, sizeof(GLvec4)); ASSERT_MSG(size <= MaxConstbufferSize, "Constbuffer too big"); - GLintptr const_buffer_offset; - std::tie(buffer_ptr, buffer_offset, const_buffer_offset) = - UploadMemory(buffer_ptr, buffer_offset, buffer.address, size, - static_cast(uniform_buffer_alignment)); + GLintptr const_buffer_offset = buffer_cache.UploadMemory( + buffer.address, size, static_cast(uniform_buffer_alignment)); glBindBufferRange(GL_UNIFORM_BUFFER, current_bindpoint + bindpoint, - stream_buffer.GetHandle(), const_buffer_offset, size); + buffer_cache.GetHandle(), const_buffer_offset, size); // Now configure the bindpoint of the buffer inside the shader glUniformBlockBinding(shader->GetProgramHandle(), @@ -715,7 +668,7 @@ std::tuple RasterizerOpenGL::SetupConstBuffers(u8* buffer_pt state.Apply(); - return {buffer_ptr, buffer_offset, current_bindpoint + static_cast(entries.size())}; + return current_bindpoint + static_cast(entries.size()); } u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader, u32 current_unit) { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 30045ebff..4c4b084b8 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -18,7 +18,9 @@ #include "common/common_types.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/memory_manager.h" +#include "video_core/rasterizer_cache.h" #include "video_core/rasterizer_interface.h" +#include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_rasterizer_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_cache.h" @@ -109,9 +111,8 @@ private: * @param current_bindpoint The offset at which to start counting new buffer bindpoints. * @returns The next available bindpoint for use in the next shader stage. */ - std::tuple SetupConstBuffers( - u8* buffer_ptr, GLintptr buffer_offset, Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, - Shader& shader, u32 current_bindpoint); + u32 SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, Shader& shader, + u32 current_bindpoint); /* * Configures the current textures to use for the draw command. @@ -173,22 +174,16 @@ private: std::array texture_samplers; static constexpr size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; - OGLStreamBuffer stream_buffer; + OGLBufferCache buffer_cache; OGLBuffer uniform_buffer; OGLFramebuffer framebuffer; GLint uniform_buffer_alignment; size_t CalculateVertexArraysSize() const; - std::pair SetupVertexArrays(u8* array_ptr, GLintptr buffer_offset); + void SetupVertexArrays(); - std::pair SetupShaders(u8* buffer_ptr, GLintptr buffer_offset); - - std::pair AlignBuffer(u8* buffer_ptr, GLintptr buffer_offset, size_t alignment); - - std::tuple UploadMemory(u8* buffer_ptr, GLintptr buffer_offset, - Tegra::GPUVAddr gpu_addr, size_t size, - size_t alignment = 4); + void SetupShaders(); enum class AccelDraw { Disabled, Arrays, Indexed }; AccelDraw accelerate_draw = AccelDraw::Disabled; -- cgit v1.2.3