From 9531a2928369bcc094c28b77408811d47c46c7f2 Mon Sep 17 00:00:00 2001 From: Subv Date: Sat, 21 Apr 2018 19:19:33 -0500 Subject: GPU: Support multiple enabled vertex arrays. The vertex arrays will be copied to the stream buffer one after the other, and the attributes will be set using the ARB_vertex_attrib_binding extension. yuzu now thus requires OpenGL 4.3 or the ARB_vertex_attrib_binding extension. --- src/video_core/engines/maxwell_3d.h | 5 + src/video_core/renderer_opengl/gl_rasterizer.cpp | 121 +++++++++++++++-------- src/video_core/renderer_opengl/gl_rasterizer.h | 6 +- 3 files changed, 89 insertions(+), 43 deletions(-) diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index d4fcedace..609504795 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -500,6 +500,11 @@ public: return static_cast((static_cast(start_high) << 32) | start_low); } + + bool IsEnabled() const { + return enable != 0 && StartAddress() != 0; + } + } vertex_array[NumVertexArrays]; Blend blend; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 2d4a0d6db..82001e7b4 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -127,7 +127,8 @@ RasterizerOpenGL::~RasterizerOpenGL() { } } -void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) { +std::pair RasterizerOpenGL::SetupVertexArrays(u8* array_ptr, + GLintptr buffer_offset) { MICROPROFILE_SCOPE(OpenGL_VAO); const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; const auto& memory_manager = Core::System().GetInstance().GPU().memory_manager; @@ -136,43 +137,59 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) { state.draw.vertex_buffer = stream_buffer->GetHandle(); state.Apply(); - // TODO(bunnei): Add support for 1+ vertex arrays - const auto& vertex_array{regs.vertex_array[0]}; - const auto& vertex_array_limit{regs.vertex_array_limit[0]}; - ASSERT_MSG(vertex_array.enable, "vertex array 0 is disabled?"); - ASSERT_MSG(!vertex_array.divisor, "vertex array 0 divisor is unimplemented!"); - for (unsigned index = 1; index < Maxwell::NumVertexArrays; ++index) { - ASSERT_MSG(!regs.vertex_array[index].enable, "vertex array %d is unimplemented!", index); + // Upload all guest vertex arrays sequentially to our buffer + for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { + const auto& vertex_array = regs.vertex_array[index]; + if (!vertex_array.IsEnabled()) + continue; + + const Tegra::GPUVAddr start = vertex_array.StartAddress(); + const Tegra::GPUVAddr end = regs.vertex_array_limit[index].LimitAddress(); + + ASSERT(end > start); + u64 size = end - start + 1; + + // Copy vertex array data + const VAddr data_addr{memory_manager->PhysicalToVirtualAddress(start)}; + res_cache.FlushRegion(data_addr, size, nullptr); + Memory::ReadBlock(data_addr, array_ptr, size); + + // Bind the vertex array to the buffer at the current offset. + glBindVertexBuffer(index, stream_buffer->GetHandle(), buffer_offset, vertex_array.stride); + + ASSERT_MSG(vertex_array.divisor == 0, "Vertex buffer divisor unimplemented"); + + array_ptr += size; + buffer_offset += size; } // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. // Enables the first 16 vertex attributes always, as we don't know which ones are actually used - // until shader time. Note, Tegra technically supports 32, but we're cappinig this to 16 for now + // until shader time. Note, Tegra technically supports 32, but we're capping this to 16 for now // to avoid OpenGL errors. + // TODO(Subv): Analyze the shader to identify which attributes are actually used and don't + // assume every shader uses them all. for (unsigned index = 0; index < 16; ++index) { auto& attrib = regs.vertex_attrib_format[index]; NGLOG_DEBUG(HW_GPU, "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}", index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(), attrib.offset.Value(), attrib.IsNormalized()); - glVertexAttribPointer(index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib), - attrib.IsNormalized() ? GL_TRUE : GL_FALSE, vertex_array.stride, - reinterpret_cast(buffer_offset + attrib.offset)); + auto& buffer = regs.vertex_array[attrib.buffer]; + ASSERT(buffer.IsEnabled()); + glEnableVertexAttribArray(index); + glVertexAttribFormat(index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib), + attrib.IsNormalized() ? GL_TRUE : GL_FALSE, attrib.offset); + glVertexAttribBinding(index, attrib.buffer); + hw_vao_enabled_attributes[index] = true; } - // Copy vertex array data - const u64 data_size{vertex_array_limit.LimitAddress() - vertex_array.StartAddress() + 1}; - const VAddr data_addr{memory_manager->PhysicalToVirtualAddress(vertex_array.StartAddress())}; - res_cache.FlushRegion(data_addr, data_size, nullptr); - Memory::ReadBlock(data_addr, array_ptr, data_size); - - array_ptr += data_size; - buffer_offset += data_size; + return {array_ptr, buffer_offset}; } -void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size_t ptr_pos) { +void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) { // Helper function for uploading uniform data const auto copy_buffer = [&](GLuint handle, GLintptr offset, GLsizeiptr size) { if (has_ARB_direct_state_access) { @@ -190,8 +207,6 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size u32 current_constbuffer_bindpoint = 0; for (unsigned index = 1; index < Maxwell::MaxShaderProgram; ++index) { - ptr_pos += sizeof(GLShader::MaxwellUniformData); - auto& shader_config = gpu.regs.shader_config[index]; const Maxwell::ShaderProgram program{static_cast(index)}; @@ -205,13 +220,16 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size } // Upload uniform data as one UBO per stage - const GLintptr ubo_offset = buffer_offset + static_cast(ptr_pos); + const GLintptr ubo_offset = buffer_offset; copy_buffer(uniform_buffers[stage].handle, ubo_offset, sizeof(GLShader::MaxwellUniformData)); GLShader::MaxwellUniformData* ub_ptr = - reinterpret_cast(&buffer_ptr[ptr_pos]); + reinterpret_cast(buffer_ptr); ub_ptr->SetFromRegs(gpu.state.shader_stages[stage]); + buffer_ptr += sizeof(GLShader::MaxwellUniformData); + buffer_offset += sizeof(GLShader::MaxwellUniformData); + // Fetch program code from memory GLShader::ProgramCode program_code; const u64 gpu_address{gpu.regs.code_address.CodeAddress() + shader_config.offset}; @@ -252,6 +270,24 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size shader_program_manager->UseTrivialGeometryShader(); } +size_t RasterizerOpenGL::CalculateVertexArraysSize() const { + const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; + + size_t size = 0; + for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { + if (!regs.vertex_array[index].IsEnabled()) + continue; + + const Tegra::GPUVAddr start = regs.vertex_array[index].StartAddress(); + const Tegra::GPUVAddr end = regs.vertex_array_limit[index].LimitAddress(); + + ASSERT(end > start); + size += end - start + 1; + } + + return size; +} + bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) { accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays; DrawArrays(); @@ -329,44 +365,49 @@ void RasterizerOpenGL::DrawArrays() { const u64 index_buffer_size{regs.index_array.count * regs.index_array.FormatSizeInBytes()}; const unsigned vertex_num{is_indexed ? regs.index_array.count : regs.vertex_buffer.count}; - // TODO(bunnei): Add support for 1+ vertex arrays - vs_input_size = vertex_num * regs.vertex_array[0].stride; - state.draw.vertex_buffer = stream_buffer->GetHandle(); state.Apply(); - size_t buffer_size = static_cast(vs_input_size); + size_t buffer_size = CalculateVertexArraysSize(); + if (is_indexed) { - buffer_size = Common::AlignUp(buffer_size, 4) + index_buffer_size; + buffer_size = Common::AlignUp(buffer_size, 4) + index_buffer_size; } // Uniform space for the 5 shader stages - buffer_size += sizeof(GLShader::MaxwellUniformData) * Maxwell::MaxShaderStage; + buffer_size = Common::AlignUp(buffer_size, 4) + + sizeof(GLShader::MaxwellUniformData) * Maxwell::MaxShaderStage; - size_t ptr_pos = 0; u8* buffer_ptr; GLintptr buffer_offset; std::tie(buffer_ptr, buffer_offset) = stream_buffer->Map(static_cast(buffer_size), 4); - SetupVertexArray(buffer_ptr, buffer_offset); - ptr_pos += vs_input_size; + u8* offseted_buffer; + std::tie(offseted_buffer, buffer_offset) = SetupVertexArrays(buffer_ptr, buffer_offset); + + offseted_buffer = + reinterpret_cast(Common::AlignUp(reinterpret_cast(offseted_buffer), 4)); + buffer_offset = Common::AlignUp(buffer_offset, 4); // If indexed mode, copy the index buffer GLintptr index_buffer_offset = 0; if (is_indexed) { - ptr_pos = Common::AlignUp(ptr_pos, 4); - const auto& memory_manager = Core::System().GetInstance().GPU().memory_manager; const VAddr index_data_addr{ memory_manager->PhysicalToVirtualAddress(regs.index_array.StartAddress())}; - Memory::ReadBlock(index_data_addr, &buffer_ptr[ptr_pos], index_buffer_size); + Memory::ReadBlock(index_data_addr, offseted_buffer, index_buffer_size); - index_buffer_offset = buffer_offset + static_cast(ptr_pos); - ptr_pos += index_buffer_size; + index_buffer_offset = buffer_offset; + offseted_buffer += index_buffer_size; + buffer_offset += index_buffer_size; } - SetupShaders(buffer_ptr, buffer_offset, ptr_pos); + offseted_buffer = + reinterpret_cast(Common::AlignUp(reinterpret_cast(offseted_buffer), 4)); + buffer_offset = Common::AlignUp(buffer_offset, 4); + + SetupShaders(offseted_buffer, buffer_offset); stream_buffer->Unmap(); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 03e02b52a..544714b95 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -148,13 +148,13 @@ private: static constexpr size_t STREAM_BUFFER_SIZE = 4 * 1024 * 1024; std::unique_ptr stream_buffer; - GLsizeiptr vs_input_size; + size_t CalculateVertexArraysSize() const; - void SetupVertexArray(u8* array_ptr, GLintptr buffer_offset); + std::pair SetupVertexArrays(u8* array_ptr, GLintptr buffer_offset); std::array uniform_buffers; - void SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size_t ptr_pos); + void SetupShaders(u8* buffer_ptr, GLintptr buffer_offset); enum class AccelDraw { Disabled, Arrays, Indexed }; AccelDraw accelerate_draw; -- cgit v1.2.3