diff options
Diffstat (limited to 'src/video_core/renderer_opengl/gl_device.cpp')
-rw-r--r-- | src/video_core/renderer_opengl/gl_device.cpp | 136 |
1 files changed, 88 insertions, 48 deletions
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index b30d5be74..413d8546b 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -5,7 +5,9 @@ #include <algorithm> #include <array> #include <cstddef> +#include <optional> #include <vector> + #include <glad/glad.h> #include "common/logging/log.h" @@ -17,6 +19,30 @@ namespace OpenGL { namespace { +// One uniform block is reserved for emulation purposes +constexpr u32 ReservedUniformBlocks = 1; + +constexpr u32 NumStages = 5; + +constexpr std::array LimitUBOs = {GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS, + GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, + GL_MAX_GEOMETRY_UNIFORM_BLOCKS, GL_MAX_FRAGMENT_UNIFORM_BLOCKS}; + +constexpr std::array LimitSSBOs = { + GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS, + GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS, + GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS}; + +constexpr std::array LimitSamplers = { + GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS, GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS, + GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS, GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS, + GL_MAX_TEXTURE_IMAGE_UNITS}; + +constexpr std::array LimitImages = {GL_MAX_VERTEX_IMAGE_UNIFORMS, + GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS, + GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, + GL_MAX_GEOMETRY_IMAGE_UNIFORMS, GL_MAX_FRAGMENT_IMAGE_UNIFORMS}; + template <typename T> T GetInteger(GLenum pname) { GLint temporary; @@ -48,13 +74,71 @@ bool HasExtension(const std::vector<std::string_view>& images, std::string_view return std::find(images.begin(), images.end(), extension) != images.end(); } +u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) { + ASSERT(num >= amount); + if (limit) { + amount = std::min(amount, GetInteger<u32>(*limit)); + } + num -= amount; + return std::exchange(base, base + amount); +} + +std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindings() noexcept { + std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> bindings; + + static std::array<std::size_t, 5> stage_swizzle = {0, 1, 2, 3, 4}; + const u32 total_ubos = GetInteger<u32>(GL_MAX_UNIFORM_BUFFER_BINDINGS); + const u32 total_ssbos = GetInteger<u32>(GL_MAX_SHADER_STORAGE_BUFFER_BINDINGS); + const u32 total_samplers = GetInteger<u32>(GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS); + + u32 num_ubos = total_ubos - ReservedUniformBlocks; + u32 num_ssbos = total_ssbos; + u32 num_samplers = total_samplers; + + u32 base_ubo = ReservedUniformBlocks; + u32 base_ssbo = 0; + u32 base_samplers = 0; + + for (std::size_t i = 0; i < NumStages; ++i) { + const std::size_t stage = stage_swizzle[i]; + bindings[stage] = { + Extract(base_ubo, num_ubos, total_ubos / NumStages, LimitUBOs[stage]), + Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LimitSSBOs[stage]), + Extract(base_samplers, num_samplers, total_samplers / NumStages, LimitSamplers[stage])}; + } + + u32 num_images = GetInteger<u32>(GL_MAX_IMAGE_UNITS); + u32 base_images = 0; + + // Reserve more image bindings on fragment and vertex stages. + bindings[4].image = + Extract(base_images, num_images, num_images / NumStages + 2, LimitImages[4]); + bindings[0].image = + Extract(base_images, num_images, num_images / NumStages + 1, LimitImages[0]); + + // Reserve the other image bindings. + const u32 total_extracted_images = num_images / (NumStages - 2); + for (std::size_t i = 2; i < NumStages; ++i) { + const std::size_t stage = stage_swizzle[i]; + bindings[stage].image = + Extract(base_images, num_images, total_extracted_images, LimitImages[stage]); + } + + // Compute doesn't care about any of this. + bindings[5] = {0, 0, 0, 0}; + + return bindings; +} + } // Anonymous namespace -Device::Device() { +Device::Device() : base_bindings{BuildBaseBindings()} { const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR)); const std::vector extensions = GetExtensions(); const bool is_nvidia = vendor == "NVIDIA Corporation"; + const bool is_amd = vendor == "ATI Technologies Inc."; + const bool is_intel = vendor == "Intel"; uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); @@ -66,8 +150,9 @@ Device::Device() { has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array; has_image_load_formatted = HasExtension(extensions, "GL_EXT_shader_image_load_formatted"); has_variable_aoffi = TestVariableAoffi(); - has_component_indexing_bug = TestComponentIndexingBug(); + has_component_indexing_bug = is_amd; has_precise_bug = TestPreciseBug(); + has_broken_compute = is_intel; has_fast_buffer_sub_data = is_nvidia; LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi); @@ -85,6 +170,7 @@ Device::Device(std::nullptr_t) { has_image_load_formatted = true; has_variable_aoffi = true; has_component_indexing_bug = false; + has_broken_compute = false; has_precise_bug = false; } @@ -99,52 +185,6 @@ void main() { })"); } -bool Device::TestComponentIndexingBug() { - const GLchar* COMPONENT_TEST = R"(#version 430 core -layout (std430, binding = 0) buffer OutputBuffer { - uint output_value; -}; -layout (std140, binding = 0) uniform InputBuffer { - uvec4 input_value[4096]; -}; -layout (location = 0) uniform uint idx; -void main() { - output_value = input_value[idx >> 2][idx & 3]; -})"; - const GLuint shader{glCreateShaderProgramv(GL_VERTEX_SHADER, 1, &COMPONENT_TEST)}; - SCOPE_EXIT({ glDeleteProgram(shader); }); - glUseProgram(shader); - - OGLVertexArray vao; - vao.Create(); - glBindVertexArray(vao.handle); - - constexpr std::array<GLuint, 8> values{0, 0, 0, 0, 0x1236327, 0x985482, 0x872753, 0x2378432}; - OGLBuffer ubo; - ubo.Create(); - glNamedBufferData(ubo.handle, sizeof(values), values.data(), GL_STATIC_DRAW); - glBindBufferBase(GL_UNIFORM_BUFFER, 0, ubo.handle); - - OGLBuffer ssbo; - ssbo.Create(); - glNamedBufferStorage(ssbo.handle, sizeof(GLuint), nullptr, GL_CLIENT_STORAGE_BIT); - - for (GLuint index = 4; index < 8; ++index) { - glInvalidateBufferData(ssbo.handle); - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, ssbo.handle); - - glProgramUniform1ui(shader, 0, index); - glDrawArrays(GL_POINTS, 0, 1); - - GLuint result; - glGetNamedBufferSubData(ssbo.handle, 0, sizeof(result), &result); - if (result != values.at(index)) { - return true; - } - } - return false; -} - bool Device::TestPreciseBug() { return !TestProgram(R"(#version 430 core in vec3 coords; |