summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/video_core/CMakeLists.txt2
-rw-r--r--src/video_core/engines/maxwell_3d.h6
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp17
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h7
-rw-r--r--src/video_core/renderer_opengl/gl_primitive_assembler.cpp64
-rw-r--r--src/video_core/renderer_opengl/gl_primitive_assembler.h33
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp146
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h7
8 files changed, 236 insertions, 46 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index f5ae57039..09ecc5bad 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -27,6 +27,8 @@ add_library(video_core STATIC
renderer_base.h
renderer_opengl/gl_buffer_cache.cpp
renderer_opengl/gl_buffer_cache.h
+ renderer_opengl/gl_primitive_assembler.cpp
+ renderer_opengl/gl_primitive_assembler.h
renderer_opengl/gl_rasterizer.cpp
renderer_opengl/gl_rasterizer.h
renderer_opengl/gl_rasterizer_cache.cpp
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 9f5581045..4290da33f 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -744,6 +744,12 @@ public:
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(end_addr_high) << 32) |
end_addr_low);
}
+
+ /// Adjust the index buffer offset so it points to the first desired index.
+ GPUVAddr IndexStart() const {
+ return StartAddress() + static_cast<size_t>(first) *
+ static_cast<size_t>(FormatSizeInBytes());
+ }
} index_array;
INSERT_PADDING_WORDS(0x7);
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 578aca789..c142095c5 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -34,7 +34,7 @@ GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size
}
AlignBuffer(alignment);
- GLintptr uploaded_offset = buffer_offset;
+ const GLintptr uploaded_offset = buffer_offset;
Memory::ReadBlock(*cpu_addr, buffer_ptr, size);
@@ -57,13 +57,23 @@ GLintptr OGLBufferCache::UploadHostMemory(const void* raw_pointer, std::size_t s
std::size_t alignment) {
AlignBuffer(alignment);
std::memcpy(buffer_ptr, raw_pointer, size);
- GLintptr uploaded_offset = buffer_offset;
+ const GLintptr uploaded_offset = buffer_offset;
buffer_ptr += size;
buffer_offset += size;
return uploaded_offset;
}
+std::tuple<u8*, GLintptr> OGLBufferCache::ReserveMemory(std::size_t size, std::size_t alignment) {
+ AlignBuffer(alignment);
+ u8* const uploaded_ptr = buffer_ptr;
+ const GLintptr uploaded_offset = buffer_offset;
+
+ buffer_ptr += size;
+ buffer_offset += size;
+ return std::make_tuple(uploaded_ptr, uploaded_offset);
+}
+
void OGLBufferCache::Map(std::size_t max_size) {
bool invalidate;
std::tie(buffer_ptr, buffer_offset_base, invalidate) =
@@ -74,6 +84,7 @@ void OGLBufferCache::Map(std::size_t max_size) {
InvalidateAll();
}
}
+
void OGLBufferCache::Unmap() {
stream_buffer.Unmap(buffer_offset - buffer_offset_base);
}
@@ -84,7 +95,7 @@ GLuint OGLBufferCache::GetHandle() const {
void OGLBufferCache::AlignBuffer(std::size_t alignment) {
// Align the offset, not the mapped pointer
- GLintptr offset_aligned =
+ const GLintptr offset_aligned =
static_cast<GLintptr>(Common::AlignUp(static_cast<std::size_t>(buffer_offset), alignment));
buffer_ptr += offset_aligned - buffer_offset;
buffer_offset = offset_aligned;
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index 6c18461f4..965976334 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -6,6 +6,7 @@
#include <cstddef>
#include <memory>
+#include <tuple>
#include "common/common_types.h"
#include "video_core/rasterizer_cache.h"
@@ -33,11 +34,17 @@ class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBuffer
public:
explicit OGLBufferCache(std::size_t size);
+ /// Uploads data from a guest GPU address. Returns host's buffer offset where it's been
+ /// allocated.
GLintptr UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
bool cache = true);
+ /// Uploads from a host memory. Returns host's buffer offset where it's been allocated.
GLintptr UploadHostMemory(const void* raw_pointer, std::size_t size, std::size_t alignment = 4);
+ /// Reserves memory to be used by host's CPU. Returns mapped address and offset.
+ std::tuple<u8*, GLintptr> ReserveMemory(std::size_t size, std::size_t alignment = 4);
+
void Map(std::size_t max_size);
void Unmap();
diff --git a/src/video_core/renderer_opengl/gl_primitive_assembler.cpp b/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
new file mode 100644
index 000000000..ee1d9601b
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
@@ -0,0 +1,64 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <array>
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "core/memory.h"
+#include "video_core/renderer_opengl/gl_buffer_cache.h"
+#include "video_core/renderer_opengl/gl_primitive_assembler.h"
+
+namespace OpenGL {
+
+constexpr u32 TRIANGLES_PER_QUAD = 6;
+constexpr std::array<u32, TRIANGLES_PER_QUAD> QUAD_MAP = {0, 1, 2, 0, 2, 3};
+
+PrimitiveAssembler::PrimitiveAssembler(OGLBufferCache& buffer_cache) : buffer_cache(buffer_cache) {}
+
+PrimitiveAssembler::~PrimitiveAssembler() = default;
+
+std::size_t PrimitiveAssembler::CalculateQuadSize(u32 count) const {
+ ASSERT_MSG(count % 4 == 0, "Quad count is expected to be a multiple of 4");
+ return (count / 4) * TRIANGLES_PER_QUAD * sizeof(GLuint);
+}
+
+GLintptr PrimitiveAssembler::MakeQuadArray(u32 first, u32 count) {
+ const std::size_t size{CalculateQuadSize(count)};
+ auto [dst_pointer, index_offset] = buffer_cache.ReserveMemory(size);
+
+ for (u32 primitive = 0; primitive < count / 4; ++primitive) {
+ for (u32 i = 0; i < TRIANGLES_PER_QUAD; ++i) {
+ const u32 index = first + primitive * 4 + QUAD_MAP[i];
+ std::memcpy(dst_pointer, &index, sizeof(index));
+ dst_pointer += sizeof(index);
+ }
+ }
+
+ return index_offset;
+}
+
+GLintptr PrimitiveAssembler::MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size_t index_size,
+ u32 count) {
+ const std::size_t map_size{CalculateQuadSize(count)};
+ auto [dst_pointer, index_offset] = buffer_cache.ReserveMemory(map_size);
+
+ auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
+ const boost::optional<VAddr> cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)};
+ const u8* source{Memory::GetPointer(*cpu_addr)};
+
+ for (u32 primitive = 0; primitive < count / 4; ++primitive) {
+ for (std::size_t i = 0; i < TRIANGLES_PER_QUAD; ++i) {
+ const u32 index = primitive * 4 + QUAD_MAP[i];
+ const u8* src_offset = source + (index * index_size);
+
+ std::memcpy(dst_pointer, src_offset, index_size);
+ dst_pointer += index_size;
+ }
+ }
+
+ return index_offset;
+}
+
+} // namespace OpenGL \ No newline at end of file
diff --git a/src/video_core/renderer_opengl/gl_primitive_assembler.h b/src/video_core/renderer_opengl/gl_primitive_assembler.h
new file mode 100644
index 000000000..a8cb88eb5
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_primitive_assembler.h
@@ -0,0 +1,33 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <vector>
+#include <glad/glad.h>
+
+#include "common/common_types.h"
+#include "video_core/memory_manager.h"
+
+namespace OpenGL {
+
+class OGLBufferCache;
+
+class PrimitiveAssembler {
+public:
+ explicit PrimitiveAssembler(OGLBufferCache& buffer_cache);
+ ~PrimitiveAssembler();
+
+ /// Calculates the size required by MakeQuadArray and MakeQuadIndexed.
+ std::size_t CalculateQuadSize(u32 count) const;
+
+ GLintptr MakeQuadArray(u32 first, u32 count);
+
+ GLintptr MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size_t index_size, u32 count);
+
+private:
+ OGLBufferCache& buffer_cache;
+};
+
+} // namespace OpenGL \ No newline at end of file
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 587d9dffb..60dcdc184 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -42,6 +42,41 @@ MICROPROFILE_DEFINE(OpenGL_Framebuffer, "OpenGL", "Framebuffer Setup", MP_RGB(12
MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100));
+MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255, 100, 100));
+
+struct DrawParameters {
+ GLenum primitive_mode;
+ GLsizei count;
+ GLint current_instance;
+ bool use_indexed;
+
+ GLint vertex_first;
+
+ GLenum index_format;
+ GLint base_vertex;
+ GLintptr index_buffer_offset;
+
+ void DispatchDraw() const {
+ if (use_indexed) {
+ const auto index_buffer_ptr = reinterpret_cast<const void*>(index_buffer_offset);
+ if (current_instance > 0) {
+ glDrawElementsInstancedBaseVertexBaseInstance(primitive_mode, count, index_format,
+ index_buffer_ptr, 1, base_vertex,
+ current_instance);
+ } else {
+ glDrawElementsBaseVertex(primitive_mode, count, index_format, index_buffer_ptr,
+ base_vertex);
+ }
+ } else {
+ if (current_instance > 0) {
+ glDrawArraysInstancedBaseInstance(primitive_mode, vertex_first, count, 1,
+ current_instance);
+ } else {
+ glDrawArrays(primitive_mode, vertex_first, count);
+ }
+ }
+ }
+};
RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo& info)
: emu_window{window}, screen_info{info}, buffer_cache(STREAM_BUFFER_SIZE) {
@@ -172,6 +207,53 @@ void RasterizerOpenGL::SetupVertexArrays() {
}
}
+DrawParameters RasterizerOpenGL::SetupDraw() {
+ const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
+ const auto& regs = gpu.regs;
+ const bool is_indexed = accelerate_draw == AccelDraw::Indexed;
+
+ DrawParameters params{};
+ params.current_instance = gpu.state.current_instance;
+
+ if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) {
+ MICROPROFILE_SCOPE(OpenGL_PrimitiveAssembly);
+
+ params.use_indexed = true;
+ params.primitive_mode = GL_TRIANGLES;
+
+ if (is_indexed) {
+ params.index_format = MaxwellToGL::IndexFormat(regs.index_array.format);
+ params.count = (regs.index_array.count / 4) * 6;
+ params.index_buffer_offset = primitive_assembler.MakeQuadIndexed(
+ regs.index_array.IndexStart(), regs.index_array.FormatSizeInBytes(),
+ regs.index_array.count);
+ params.base_vertex = static_cast<GLint>(regs.vb_element_base);
+ } else {
+ // MakeQuadArray always generates u32 indexes
+ params.index_format = GL_UNSIGNED_INT;
+ params.count = (regs.vertex_buffer.count / 4) * 6;
+ params.index_buffer_offset =
+ primitive_assembler.MakeQuadArray(regs.vertex_buffer.first, params.count);
+ }
+ return params;
+ }
+
+ params.use_indexed = is_indexed;
+ params.primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology);
+
+ if (is_indexed) {
+ MICROPROFILE_SCOPE(OpenGL_Index);
+ params.index_format = MaxwellToGL::IndexFormat(regs.index_array.format);
+ params.count = regs.index_array.count;
+ params.index_buffer_offset =
+ buffer_cache.UploadMemory(regs.index_array.IndexStart(), CalculateIndexBufferSize());
+ params.base_vertex = static_cast<GLint>(regs.vb_element_base);
+ } else {
+ params.count = regs.vertex_buffer.count;
+ params.vertex_first = regs.vertex_buffer.first;
+ }
+}
+
void RasterizerOpenGL::SetupShaders() {
MICROPROFILE_SCOPE(OpenGL_Shader);
const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
@@ -256,6 +338,13 @@ std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
return size;
}
+std::size_t RasterizerOpenGL::CalculateIndexBufferSize() const {
+ const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
+
+ return static_cast<std::size_t>(regs.index_array.count) *
+ static_cast<std::size_t>(regs.index_array.FormatSizeInBytes());
+}
+
bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) {
accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays;
DrawArrays();
@@ -459,16 +548,23 @@ void RasterizerOpenGL::DrawArrays() {
// Draw the vertex batch
const bool is_indexed = accelerate_draw == AccelDraw::Indexed;
- const u64 index_buffer_size{static_cast<u64>(regs.index_array.count) *
- static_cast<u64>(regs.index_array.FormatSizeInBytes())};
state.draw.vertex_buffer = buffer_cache.GetHandle();
state.Apply();
std::size_t buffer_size = CalculateVertexArraysSize();
- if (is_indexed) {
- buffer_size = Common::AlignUp<std::size_t>(buffer_size, 4) + index_buffer_size;
+ // Add space for index buffer (keeping in mind non-core primitives)
+ switch (regs.draw.topology) {
+ case Maxwell::PrimitiveTopology::Quads:
+ buffer_size = Common::AlignUp<std::size_t>(buffer_size, 4) +
+ primitive_assembler.CalculateQuadSize(regs.vertex_buffer.count);
+ break;
+ default:
+ if (is_indexed) {
+ buffer_size = Common::AlignUp<std::size_t>(buffer_size, 4) + CalculateIndexBufferSize();
+ }
+ break;
}
// Uniform space for the 5 shader stages
@@ -482,20 +578,7 @@ void RasterizerOpenGL::DrawArrays() {
buffer_cache.Map(buffer_size);
SetupVertexArrays();
-
- // If indexed mode, copy the index buffer
- GLintptr index_buffer_offset = 0;
- if (is_indexed) {
- MICROPROFILE_SCOPE(OpenGL_Index);
-
- // Adjust the index buffer offset so it points to the first desired index.
- auto index_start = regs.index_array.StartAddress();
- index_start += static_cast<size_t>(regs.index_array.first) *
- static_cast<size_t>(regs.index_array.FormatSizeInBytes());
-
- index_buffer_offset = buffer_cache.UploadMemory(index_start, index_buffer_size);
- }
-
+ DrawParameters params = SetupDraw();
SetupShaders();
buffer_cache.Unmap();
@@ -503,31 +586,8 @@ void RasterizerOpenGL::DrawArrays() {
shader_program_manager->ApplyTo(state);
state.Apply();
- const GLenum primitive_mode{MaxwellToGL::PrimitiveTopology(regs.draw.topology)};
- if (is_indexed) {
- const GLint base_vertex{static_cast<GLint>(regs.vb_element_base)};
-
- if (gpu.state.current_instance > 0) {
- glDrawElementsInstancedBaseVertexBaseInstance(
- primitive_mode, regs.index_array.count,
- MaxwellToGL::IndexFormat(regs.index_array.format),
- reinterpret_cast<const void*>(index_buffer_offset), 1, base_vertex,
- gpu.state.current_instance);
- } else {
- glDrawElementsBaseVertex(primitive_mode, regs.index_array.count,
- MaxwellToGL::IndexFormat(regs.index_array.format),
- reinterpret_cast<const void*>(index_buffer_offset),
- base_vertex);
- }
- } else {
- if (gpu.state.current_instance > 0) {
- glDrawArraysInstancedBaseInstance(primitive_mode, regs.vertex_buffer.first,
- regs.vertex_buffer.count, 1,
- gpu.state.current_instance);
- } else {
- glDrawArrays(primitive_mode, regs.vertex_buffer.first, regs.vertex_buffer.count);
- }
- }
+ // Execute draw call
+ params.DispatchDraw();
// Disable scissor test
state.scissor.enabled = false;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 4c8ecbd1c..bf954bb5d 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -23,6 +23,7 @@
#include "video_core/rasterizer_cache.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
+#include "video_core/renderer_opengl/gl_primitive_assembler.h"
#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
@@ -38,6 +39,7 @@ class EmuWindow;
namespace OpenGL {
struct ScreenInfo;
+struct DrawParameters;
class RasterizerOpenGL : public VideoCore::RasterizerInterface {
public:
@@ -192,12 +194,17 @@ private:
static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
OGLBufferCache buffer_cache;
OGLFramebuffer framebuffer;
+ PrimitiveAssembler primitive_assembler{buffer_cache};
GLint uniform_buffer_alignment;
std::size_t CalculateVertexArraysSize() const;
+ std::size_t CalculateIndexBufferSize() const;
+
void SetupVertexArrays();
+ DrawParameters SetupDraw();
+
void SetupShaders();
enum class AccelDraw { Disabled, Arrays, Indexed };