diff options
-rw-r--r-- | src/video_core/CMakeLists.txt | 2 | ||||
-rw-r--r-- | src/video_core/engines/maxwell_3d.h | 6 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_buffer_cache.cpp | 17 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_buffer_cache.h | 7 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_primitive_assembler.cpp | 64 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_primitive_assembler.h | 33 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 146 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 7 |
8 files changed, 236 insertions, 46 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index f5ae57039..09ecc5bad 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -27,6 +27,8 @@ add_library(video_core STATIC renderer_base.h renderer_opengl/gl_buffer_cache.cpp renderer_opengl/gl_buffer_cache.h + renderer_opengl/gl_primitive_assembler.cpp + renderer_opengl/gl_primitive_assembler.h renderer_opengl/gl_rasterizer.cpp renderer_opengl/gl_rasterizer.h renderer_opengl/gl_rasterizer_cache.cpp diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 9f5581045..4290da33f 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -744,6 +744,12 @@ public: return static_cast<GPUVAddr>((static_cast<GPUVAddr>(end_addr_high) << 32) | end_addr_low); } + + /// Adjust the index buffer offset so it points to the first desired index. + GPUVAddr IndexStart() const { + return StartAddress() + static_cast<size_t>(first) * + static_cast<size_t>(FormatSizeInBytes()); + } } index_array; INSERT_PADDING_WORDS(0x7); diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 578aca789..c142095c5 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -34,7 +34,7 @@ GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size } AlignBuffer(alignment); - GLintptr uploaded_offset = buffer_offset; + const GLintptr uploaded_offset = buffer_offset; Memory::ReadBlock(*cpu_addr, buffer_ptr, size); @@ -57,13 +57,23 @@ GLintptr OGLBufferCache::UploadHostMemory(const void* raw_pointer, std::size_t s std::size_t alignment) { AlignBuffer(alignment); std::memcpy(buffer_ptr, raw_pointer, size); - GLintptr uploaded_offset = buffer_offset; + const GLintptr uploaded_offset = buffer_offset; buffer_ptr += size; buffer_offset += size; return uploaded_offset; } +std::tuple<u8*, GLintptr> OGLBufferCache::ReserveMemory(std::size_t size, std::size_t alignment) { + AlignBuffer(alignment); + u8* const uploaded_ptr = buffer_ptr; + const GLintptr uploaded_offset = buffer_offset; + + buffer_ptr += size; + buffer_offset += size; + return std::make_tuple(uploaded_ptr, uploaded_offset); +} + void OGLBufferCache::Map(std::size_t max_size) { bool invalidate; std::tie(buffer_ptr, buffer_offset_base, invalidate) = @@ -74,6 +84,7 @@ void OGLBufferCache::Map(std::size_t max_size) { InvalidateAll(); } } + void OGLBufferCache::Unmap() { stream_buffer.Unmap(buffer_offset - buffer_offset_base); } @@ -84,7 +95,7 @@ GLuint OGLBufferCache::GetHandle() const { void OGLBufferCache::AlignBuffer(std::size_t alignment) { // Align the offset, not the mapped pointer - GLintptr offset_aligned = + const GLintptr offset_aligned = static_cast<GLintptr>(Common::AlignUp(static_cast<std::size_t>(buffer_offset), alignment)); buffer_ptr += offset_aligned - buffer_offset; buffer_offset = offset_aligned; diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 6c18461f4..965976334 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -6,6 +6,7 @@ #include <cstddef> #include <memory> +#include <tuple> #include "common/common_types.h" #include "video_core/rasterizer_cache.h" @@ -33,11 +34,17 @@ class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBuffer public: explicit OGLBufferCache(std::size_t size); + /// Uploads data from a guest GPU address. Returns host's buffer offset where it's been + /// allocated. GLintptr UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, bool cache = true); + /// Uploads from a host memory. Returns host's buffer offset where it's been allocated. GLintptr UploadHostMemory(const void* raw_pointer, std::size_t size, std::size_t alignment = 4); + /// Reserves memory to be used by host's CPU. Returns mapped address and offset. + std::tuple<u8*, GLintptr> ReserveMemory(std::size_t size, std::size_t alignment = 4); + void Map(std::size_t max_size); void Unmap(); diff --git a/src/video_core/renderer_opengl/gl_primitive_assembler.cpp b/src/video_core/renderer_opengl/gl_primitive_assembler.cpp new file mode 100644 index 000000000..ee1d9601b --- /dev/null +++ b/src/video_core/renderer_opengl/gl_primitive_assembler.cpp @@ -0,0 +1,64 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <array> +#include "common/assert.h" +#include "common/common_types.h" +#include "core/memory.h" +#include "video_core/renderer_opengl/gl_buffer_cache.h" +#include "video_core/renderer_opengl/gl_primitive_assembler.h" + +namespace OpenGL { + +constexpr u32 TRIANGLES_PER_QUAD = 6; +constexpr std::array<u32, TRIANGLES_PER_QUAD> QUAD_MAP = {0, 1, 2, 0, 2, 3}; + +PrimitiveAssembler::PrimitiveAssembler(OGLBufferCache& buffer_cache) : buffer_cache(buffer_cache) {} + +PrimitiveAssembler::~PrimitiveAssembler() = default; + +std::size_t PrimitiveAssembler::CalculateQuadSize(u32 count) const { + ASSERT_MSG(count % 4 == 0, "Quad count is expected to be a multiple of 4"); + return (count / 4) * TRIANGLES_PER_QUAD * sizeof(GLuint); +} + +GLintptr PrimitiveAssembler::MakeQuadArray(u32 first, u32 count) { + const std::size_t size{CalculateQuadSize(count)}; + auto [dst_pointer, index_offset] = buffer_cache.ReserveMemory(size); + + for (u32 primitive = 0; primitive < count / 4; ++primitive) { + for (u32 i = 0; i < TRIANGLES_PER_QUAD; ++i) { + const u32 index = first + primitive * 4 + QUAD_MAP[i]; + std::memcpy(dst_pointer, &index, sizeof(index)); + dst_pointer += sizeof(index); + } + } + + return index_offset; +} + +GLintptr PrimitiveAssembler::MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size_t index_size, + u32 count) { + const std::size_t map_size{CalculateQuadSize(count)}; + auto [dst_pointer, index_offset] = buffer_cache.ReserveMemory(map_size); + + auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); + const boost::optional<VAddr> cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)}; + const u8* source{Memory::GetPointer(*cpu_addr)}; + + for (u32 primitive = 0; primitive < count / 4; ++primitive) { + for (std::size_t i = 0; i < TRIANGLES_PER_QUAD; ++i) { + const u32 index = primitive * 4 + QUAD_MAP[i]; + const u8* src_offset = source + (index * index_size); + + std::memcpy(dst_pointer, src_offset, index_size); + dst_pointer += index_size; + } + } + + return index_offset; +} + +} // namespace OpenGL
\ No newline at end of file diff --git a/src/video_core/renderer_opengl/gl_primitive_assembler.h b/src/video_core/renderer_opengl/gl_primitive_assembler.h new file mode 100644 index 000000000..a8cb88eb5 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_primitive_assembler.h @@ -0,0 +1,33 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <vector> +#include <glad/glad.h> + +#include "common/common_types.h" +#include "video_core/memory_manager.h" + +namespace OpenGL { + +class OGLBufferCache; + +class PrimitiveAssembler { +public: + explicit PrimitiveAssembler(OGLBufferCache& buffer_cache); + ~PrimitiveAssembler(); + + /// Calculates the size required by MakeQuadArray and MakeQuadIndexed. + std::size_t CalculateQuadSize(u32 count) const; + + GLintptr MakeQuadArray(u32 first, u32 count); + + GLintptr MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size_t index_size, u32 count); + +private: + OGLBufferCache& buffer_cache; +}; + +} // namespace OpenGL
\ No newline at end of file diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 587d9dffb..60dcdc184 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -42,6 +42,41 @@ MICROPROFILE_DEFINE(OpenGL_Framebuffer, "OpenGL", "Framebuffer Setup", MP_RGB(12 MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192)); MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(128, 128, 192)); MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); +MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255, 100, 100)); + +struct DrawParameters { + GLenum primitive_mode; + GLsizei count; + GLint current_instance; + bool use_indexed; + + GLint vertex_first; + + GLenum index_format; + GLint base_vertex; + GLintptr index_buffer_offset; + + void DispatchDraw() const { + if (use_indexed) { + const auto index_buffer_ptr = reinterpret_cast<const void*>(index_buffer_offset); + if (current_instance > 0) { + glDrawElementsInstancedBaseVertexBaseInstance(primitive_mode, count, index_format, + index_buffer_ptr, 1, base_vertex, + current_instance); + } else { + glDrawElementsBaseVertex(primitive_mode, count, index_format, index_buffer_ptr, + base_vertex); + } + } else { + if (current_instance > 0) { + glDrawArraysInstancedBaseInstance(primitive_mode, vertex_first, count, 1, + current_instance); + } else { + glDrawArrays(primitive_mode, vertex_first, count); + } + } + } +}; RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo& info) : emu_window{window}, screen_info{info}, buffer_cache(STREAM_BUFFER_SIZE) { @@ -172,6 +207,53 @@ void RasterizerOpenGL::SetupVertexArrays() { } } +DrawParameters RasterizerOpenGL::SetupDraw() { + const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); + const auto& regs = gpu.regs; + const bool is_indexed = accelerate_draw == AccelDraw::Indexed; + + DrawParameters params{}; + params.current_instance = gpu.state.current_instance; + + if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) { + MICROPROFILE_SCOPE(OpenGL_PrimitiveAssembly); + + params.use_indexed = true; + params.primitive_mode = GL_TRIANGLES; + + if (is_indexed) { + params.index_format = MaxwellToGL::IndexFormat(regs.index_array.format); + params.count = (regs.index_array.count / 4) * 6; + params.index_buffer_offset = primitive_assembler.MakeQuadIndexed( + regs.index_array.IndexStart(), regs.index_array.FormatSizeInBytes(), + regs.index_array.count); + params.base_vertex = static_cast<GLint>(regs.vb_element_base); + } else { + // MakeQuadArray always generates u32 indexes + params.index_format = GL_UNSIGNED_INT; + params.count = (regs.vertex_buffer.count / 4) * 6; + params.index_buffer_offset = + primitive_assembler.MakeQuadArray(regs.vertex_buffer.first, params.count); + } + return params; + } + + params.use_indexed = is_indexed; + params.primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology); + + if (is_indexed) { + MICROPROFILE_SCOPE(OpenGL_Index); + params.index_format = MaxwellToGL::IndexFormat(regs.index_array.format); + params.count = regs.index_array.count; + params.index_buffer_offset = + buffer_cache.UploadMemory(regs.index_array.IndexStart(), CalculateIndexBufferSize()); + params.base_vertex = static_cast<GLint>(regs.vb_element_base); + } else { + params.count = regs.vertex_buffer.count; + params.vertex_first = regs.vertex_buffer.first; + } +} + void RasterizerOpenGL::SetupShaders() { MICROPROFILE_SCOPE(OpenGL_Shader); const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); @@ -256,6 +338,13 @@ std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { return size; } +std::size_t RasterizerOpenGL::CalculateIndexBufferSize() const { + const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; + + return static_cast<std::size_t>(regs.index_array.count) * + static_cast<std::size_t>(regs.index_array.FormatSizeInBytes()); +} + bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) { accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays; DrawArrays(); @@ -459,16 +548,23 @@ void RasterizerOpenGL::DrawArrays() { // Draw the vertex batch const bool is_indexed = accelerate_draw == AccelDraw::Indexed; - const u64 index_buffer_size{static_cast<u64>(regs.index_array.count) * - static_cast<u64>(regs.index_array.FormatSizeInBytes())}; state.draw.vertex_buffer = buffer_cache.GetHandle(); state.Apply(); std::size_t buffer_size = CalculateVertexArraysSize(); - if (is_indexed) { - buffer_size = Common::AlignUp<std::size_t>(buffer_size, 4) + index_buffer_size; + // Add space for index buffer (keeping in mind non-core primitives) + switch (regs.draw.topology) { + case Maxwell::PrimitiveTopology::Quads: + buffer_size = Common::AlignUp<std::size_t>(buffer_size, 4) + + primitive_assembler.CalculateQuadSize(regs.vertex_buffer.count); + break; + default: + if (is_indexed) { + buffer_size = Common::AlignUp<std::size_t>(buffer_size, 4) + CalculateIndexBufferSize(); + } + break; } // Uniform space for the 5 shader stages @@ -482,20 +578,7 @@ void RasterizerOpenGL::DrawArrays() { buffer_cache.Map(buffer_size); SetupVertexArrays(); - - // If indexed mode, copy the index buffer - GLintptr index_buffer_offset = 0; - if (is_indexed) { - MICROPROFILE_SCOPE(OpenGL_Index); - - // Adjust the index buffer offset so it points to the first desired index. - auto index_start = regs.index_array.StartAddress(); - index_start += static_cast<size_t>(regs.index_array.first) * - static_cast<size_t>(regs.index_array.FormatSizeInBytes()); - - index_buffer_offset = buffer_cache.UploadMemory(index_start, index_buffer_size); - } - + DrawParameters params = SetupDraw(); SetupShaders(); buffer_cache.Unmap(); @@ -503,31 +586,8 @@ void RasterizerOpenGL::DrawArrays() { shader_program_manager->ApplyTo(state); state.Apply(); - const GLenum primitive_mode{MaxwellToGL::PrimitiveTopology(regs.draw.topology)}; - if (is_indexed) { - const GLint base_vertex{static_cast<GLint>(regs.vb_element_base)}; - - if (gpu.state.current_instance > 0) { - glDrawElementsInstancedBaseVertexBaseInstance( - primitive_mode, regs.index_array.count, - MaxwellToGL::IndexFormat(regs.index_array.format), - reinterpret_cast<const void*>(index_buffer_offset), 1, base_vertex, - gpu.state.current_instance); - } else { - glDrawElementsBaseVertex(primitive_mode, regs.index_array.count, - MaxwellToGL::IndexFormat(regs.index_array.format), - reinterpret_cast<const void*>(index_buffer_offset), - base_vertex); - } - } else { - if (gpu.state.current_instance > 0) { - glDrawArraysInstancedBaseInstance(primitive_mode, regs.vertex_buffer.first, - regs.vertex_buffer.count, 1, - gpu.state.current_instance); - } else { - glDrawArrays(primitive_mode, regs.vertex_buffer.first, regs.vertex_buffer.count); - } - } + // Execute draw call + params.DispatchDraw(); // Disable scissor test state.scissor.enabled = false; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 4c8ecbd1c..bf954bb5d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -23,6 +23,7 @@ #include "video_core/rasterizer_cache.h" #include "video_core/rasterizer_interface.h" #include "video_core/renderer_opengl/gl_buffer_cache.h" +#include "video_core/renderer_opengl/gl_primitive_assembler.h" #include "video_core/renderer_opengl/gl_rasterizer_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_cache.h" @@ -38,6 +39,7 @@ class EmuWindow; namespace OpenGL { struct ScreenInfo; +struct DrawParameters; class RasterizerOpenGL : public VideoCore::RasterizerInterface { public: @@ -192,12 +194,17 @@ private: static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; OGLBufferCache buffer_cache; OGLFramebuffer framebuffer; + PrimitiveAssembler primitive_assembler{buffer_cache}; GLint uniform_buffer_alignment; std::size_t CalculateVertexArraysSize() const; + std::size_t CalculateIndexBufferSize() const; + void SetupVertexArrays(); + DrawParameters SetupDraw(); + void SetupShaders(); enum class AccelDraw { Disabled, Arrays, Indexed }; |