summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h14
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp31
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h20
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_device.h5
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp29
6 files changed, 84 insertions, 19 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 2442ddfd6..63b3a8205 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -30,7 +30,7 @@ public:
using BufferInfo = std::pair<const TBufferType*, u64>;
BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
- bool is_written = false) {
+ bool is_written = false, bool use_fast_cbuf = false) {
std::lock_guard lock{mutex};
auto& memory_manager = system.GPU().MemoryManager();
@@ -43,9 +43,13 @@ public:
// Cache management is a big overhead, so only cache entries with a given size.
// TODO: Figure out which size is the best for given games.
constexpr std::size_t max_stream_size = 0x800;
- if (size < max_stream_size) {
+ if (use_fast_cbuf || size < max_stream_size) {
if (!is_written && !IsRegionWritten(cache_addr, cache_addr + size - 1)) {
- return StreamBufferUpload(host_ptr, size, alignment);
+ if (use_fast_cbuf) {
+ return ConstBufferUpload(host_ptr, size);
+ } else {
+ return StreamBufferUpload(host_ptr, size, alignment);
+ }
}
}
@@ -152,6 +156,10 @@ protected:
virtual void CopyBlock(const TBuffer& src, const TBuffer& dst, std::size_t src_offset,
std::size_t dst_offset, std::size_t size) = 0;
+ virtual BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) {
+ return {};
+ }
+
/// Register an object into the cache
void Register(const MapInterval& new_map, bool inherit_written = false) {
const CacheAddr cache_ptr = new_map->GetStart();
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index f8a807c84..0375fca17 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -8,13 +8,17 @@
#include "common/assert.h"
#include "common/microprofile.h"
+#include "video_core/engines/maxwell_3d.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
+#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace OpenGL {
+using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+
MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128));
CachedBufferBlock::CachedBufferBlock(CacheAddr cache_addr, const std::size_t size)
@@ -26,11 +30,22 @@ CachedBufferBlock::CachedBufferBlock(CacheAddr cache_addr, const std::size_t siz
CachedBufferBlock::~CachedBufferBlock() = default;
OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
- std::size_t stream_size)
- : VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>{
- rasterizer, system, std::make_unique<OGLStreamBuffer>(stream_size, true)} {}
+ const Device& device, std::size_t stream_size)
+ : GenericBufferCache{rasterizer, system, std::make_unique<OGLStreamBuffer>(stream_size, true)} {
+ if (!device.HasFastBufferSubData()) {
+ return;
+ }
+
+ static constexpr auto size = static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize);
+ glCreateBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
+ for (const GLuint cbuf : cbufs) {
+ glNamedBufferData(cbuf, size, nullptr, GL_STREAM_DRAW);
+ }
+}
-OGLBufferCache::~OGLBufferCache() = default;
+OGLBufferCache::~OGLBufferCache() {
+ glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
+}
Buffer OGLBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) {
return std::make_shared<CachedBufferBlock>(cache_addr, size);
@@ -69,4 +84,12 @@ void OGLBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t
static_cast<GLsizeiptr>(size));
}
+OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer,
+ std::size_t size) {
+ DEBUG_ASSERT(cbuf_cursor < std::size(cbufs));
+ const GLuint& cbuf = cbufs[cbuf_cursor++];
+ glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer);
+ return {&cbuf, 0};
+}
+
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index 022e7bfa9..8c7145443 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -4,10 +4,12 @@
#pragma once
+#include <array>
#include <memory>
#include "common/common_types.h"
#include "video_core/buffer_cache/buffer_cache.h"
+#include "video_core/engines/maxwell_3d.h"
#include "video_core/rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_stream_buffer.h"
@@ -18,12 +20,14 @@ class System;
namespace OpenGL {
+class Device;
class OGLStreamBuffer;
class RasterizerOpenGL;
class CachedBufferBlock;
using Buffer = std::shared_ptr<CachedBufferBlock>;
+using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>;
class CachedBufferBlock : public VideoCommon::BufferBlock {
public:
@@ -38,14 +42,18 @@ private:
OGLBuffer gl_buffer{};
};
-class OGLBufferCache final : public VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer> {
+class OGLBufferCache final : public GenericBufferCache {
public:
explicit OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
- std::size_t stream_size);
+ const Device& device, std::size_t stream_size);
~OGLBufferCache();
const GLuint* GetEmptyBuffer(std::size_t) override;
+ void Acquire() noexcept {
+ cbuf_cursor = 0;
+ }
+
protected:
Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override;
@@ -61,6 +69,14 @@ protected:
void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
std::size_t dst_offset, std::size_t size) override;
+
+ BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) override;
+
+private:
+ std::size_t cbuf_cursor = 0;
+ std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers *
+ Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram>
+ cbufs;
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 64de7e425..c65b24c69 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -51,8 +51,11 @@ bool HasExtension(const std::vector<std::string_view>& images, std::string_view
} // Anonymous namespace
Device::Device() {
+ const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
const std::vector extensions = GetExtensions();
+ const bool is_nvidia = vendor == "NVIDIA Corporation";
+
uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
@@ -64,6 +67,7 @@ Device::Device() {
has_variable_aoffi = TestVariableAoffi();
has_component_indexing_bug = TestComponentIndexingBug();
has_precise_bug = TestPreciseBug();
+ has_fast_buffer_sub_data = is_nvidia;
LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi);
LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug);
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index bb273c3d6..bf35bd0b6 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -54,6 +54,10 @@ public:
return has_precise_bug;
}
+ bool HasFastBufferSubData() const {
+ return has_fast_buffer_sub_data;
+ }
+
private:
static bool TestVariableAoffi();
static bool TestComponentIndexingBug();
@@ -69,6 +73,7 @@ private:
bool has_variable_aoffi{};
bool has_component_indexing_bug{};
bool has_precise_bug{};
+ bool has_fast_buffer_sub_data{};
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 6a4d2c83a..28fa8a8be 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -67,7 +67,7 @@ static std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buf
RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
ScreenInfo& info)
: texture_cache{system, *this, device}, shader_cache{*this, system, emu_window, device},
- system{system}, screen_info{info}, buffer_cache{*this, system, STREAM_BUFFER_SIZE} {
+ system{system}, screen_info{info}, buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} {
shader_program_manager = std::make_unique<GLShader::ProgramManager>();
state.draw.shader_program = 0;
state.Apply();
@@ -558,6 +558,8 @@ void RasterizerOpenGL::DrawPrelude() {
SyncPolygonOffset();
SyncAlphaTest();
+ buffer_cache.Acquire();
+
// Draw the vertex batch
const bool is_indexed = accelerate_draw == AccelDraw::Indexed;
@@ -573,9 +575,11 @@ void RasterizerOpenGL::DrawPrelude() {
(sizeof(GLShader::MaxwellUniformData) + device.GetUniformBufferAlignment()) *
Maxwell::MaxShaderStage;
- // Add space for at least 18 constant buffers
- buffer_size += Maxwell::MaxConstBuffers *
- (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
+ if (!device.HasFastBufferSubData()) {
+ // Add space for at least 18 constant buffers
+ buffer_size += Maxwell::MaxConstBuffers *
+ (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
+ }
// Prepare the vertex array.
buffer_cache.Map(buffer_size);
@@ -739,10 +743,12 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
state.draw.shader_program = program;
state.draw.program_pipeline = 0;
- const std::size_t buffer_size =
- Tegra::Engines::KeplerCompute::NumConstBuffers *
- (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
- buffer_cache.Map(buffer_size);
+ if (!device.HasFastBufferSubData()) {
+ const std::size_t buffer_size =
+ Tegra::Engines::KeplerCompute::NumConstBuffers *
+ (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
+ buffer_cache.Map(buffer_size);
+ }
bind_ubo_pushbuffer.Setup(0);
bind_ssbo_pushbuffer.Setup(0);
@@ -750,7 +756,9 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
SetupComputeConstBuffers(kernel);
SetupComputeGlobalMemory(kernel);
- buffer_cache.Unmap();
+ if (!device.HasFastBufferSubData()) {
+ buffer_cache.Unmap();
+ }
bind_ubo_pushbuffer.Bind();
bind_ssbo_pushbuffer.Bind();
@@ -879,7 +887,8 @@ void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& b
const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4));
const auto alignment = device.GetUniformBufferAlignment();
- const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment);
+ const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment, false,
+ device.HasFastBufferSubData());
bind_ubo_pushbuffer.Push(cbuf, offset, size);
}