diff options
Diffstat (limited to 'src/video_core/renderer_opengl/gl_rasterizer.cpp')
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 406 |
1 files changed, 224 insertions, 182 deletions
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index f217a265b..2d4a0d6db 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <algorithm> #include <memory> #include <string> #include <tuple> @@ -13,7 +14,6 @@ #include "common/math_util.h" #include "common/microprofile.h" #include "common/scope_exit.h" -#include "common/vector_math.h" #include "core/core.h" #include "core/hle/kernel/process.h" #include "core/settings.h" @@ -34,33 +34,7 @@ MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192)); MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255)); MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); -enum class UniformBindings : GLuint { Common, VS, FS }; - -static void SetShaderUniformBlockBinding(GLuint shader, const char* name, UniformBindings binding, - size_t expected_size) { - GLuint ub_index = glGetUniformBlockIndex(shader, name); - if (ub_index != GL_INVALID_INDEX) { - GLint ub_size = 0; - glGetActiveUniformBlockiv(shader, ub_index, GL_UNIFORM_BLOCK_DATA_SIZE, &ub_size); - ASSERT_MSG(ub_size == expected_size, - "Uniform block size did not match! Got %d, expected %zu", - static_cast<int>(ub_size), expected_size); - glUniformBlockBinding(shader, ub_index, static_cast<GLuint>(binding)); - } -} - -static void SetShaderUniformBlockBindings(GLuint shader) { - SetShaderUniformBlockBinding(shader, "shader_data", UniformBindings::Common, - sizeof(RasterizerOpenGL::UniformData)); - SetShaderUniformBlockBinding(shader, "vs_config", UniformBindings::VS, - sizeof(RasterizerOpenGL::VSUniformData)); - SetShaderUniformBlockBinding(shader, "fs_config", UniformBindings::FS, - sizeof(RasterizerOpenGL::FSUniformData)); -} - RasterizerOpenGL::RasterizerOpenGL() { - shader_dirty = true; - has_ARB_buffer_storage = false; has_ARB_direct_state_access = false; has_ARB_separate_shader_objects = false; @@ -72,6 +46,14 @@ RasterizerOpenGL::RasterizerOpenGL() { state.texture_units[i].sampler = texture_samplers[i].sampler.handle; } + // Create SSBOs + for (size_t stage = 0; stage < ssbos.size(); ++stage) { + for (size_t buffer = 0; buffer < ssbos[stage].size(); ++buffer) { + ssbos[stage][buffer].Create(); + state.draw.const_buffers[stage][buffer].ssbo = ssbos[stage][buffer].handle; + } + } + GLint ext_num; glGetIntegerv(GL_NUM_EXTENSIONS, &ext_num); for (GLint i = 0; i < ext_num; i++) { @@ -88,6 +70,8 @@ RasterizerOpenGL::RasterizerOpenGL() { } } + ASSERT_MSG(has_ARB_separate_shader_objects, "has_ARB_separate_shader_objects is unsupported"); + // Clipping plane 0 is always enabled for PICA fixed clip plane z <= 0 state.clip_distance[0] = true; @@ -102,36 +86,30 @@ RasterizerOpenGL::RasterizerOpenGL() { state.draw.uniform_buffer = uniform_buffer.handle; state.Apply(); - glBufferData(GL_UNIFORM_BUFFER, sizeof(UniformData), nullptr, GL_STATIC_DRAW); - glBindBufferBase(GL_UNIFORM_BUFFER, 0, uniform_buffer.handle); - - uniform_block_data.dirty = true; - // Create render framebuffer framebuffer.Create(); - if (has_ARB_separate_shader_objects) { - hw_vao.Create(); - hw_vao_enabled_attributes.fill(false); + hw_vao.Create(); + hw_vao_enabled_attributes.fill(false); - stream_buffer = OGLStreamBuffer::MakeBuffer(has_ARB_buffer_storage, GL_ARRAY_BUFFER); - stream_buffer->Create(STREAM_BUFFER_SIZE, STREAM_BUFFER_SIZE / 2); - state.draw.vertex_buffer = stream_buffer->GetHandle(); + stream_buffer = OGLStreamBuffer::MakeBuffer(has_ARB_buffer_storage, GL_ARRAY_BUFFER); + stream_buffer->Create(STREAM_BUFFER_SIZE, STREAM_BUFFER_SIZE / 2); + state.draw.vertex_buffer = stream_buffer->GetHandle(); - pipeline.Create(); - state.draw.program_pipeline = pipeline.handle; - state.draw.shader_program = 0; - state.draw.vertex_array = hw_vao.handle; - state.Apply(); + shader_program_manager = std::make_unique<GLShader::ProgramManager>(); + state.draw.shader_program = 0; + state.draw.vertex_array = hw_vao.handle; + state.Apply(); - glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, stream_buffer->GetHandle()); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, stream_buffer->GetHandle()); - vs_uniform_buffer.Create(); - glBindBuffer(GL_UNIFORM_BUFFER, vs_uniform_buffer.handle); - glBufferData(GL_UNIFORM_BUFFER, sizeof(VSUniformData), nullptr, GL_STREAM_COPY); - glBindBufferBase(GL_UNIFORM_BUFFER, 1, vs_uniform_buffer.handle); - } else { - UNREACHABLE(); + for (unsigned index = 0; index < uniform_buffers.size(); ++index) { + auto& buffer = uniform_buffers[index]; + buffer.Create(); + glBindBuffer(GL_UNIFORM_BUFFER, buffer.handle); + glBufferData(GL_UNIFORM_BUFFER, sizeof(GLShader::MaxwellUniformData), nullptr, + GL_STREAM_COPY); + glBindBufferBase(GL_UNIFORM_BUFFER, index, buffer.handle); } accelerate_draw = AccelDraw::Disabled; @@ -149,17 +127,6 @@ RasterizerOpenGL::~RasterizerOpenGL() { } } -void RasterizerOpenGL::AnalyzeVertexArray(bool is_indexed) { - const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; - - if (is_indexed) { - UNREACHABLE(); - } - - // TODO(bunnei): Add support for 1+ vertex arrays - vs_input_size = regs.vertex_buffer.count * regs.vertex_array[0].stride; -} - void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) { MICROPROFILE_SCOPE(OpenGL_VAO); const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; @@ -171,6 +138,7 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) { // TODO(bunnei): Add support for 1+ vertex arrays const auto& vertex_array{regs.vertex_array[0]}; + const auto& vertex_array_limit{regs.vertex_array_limit[0]}; ASSERT_MSG(vertex_array.enable, "vertex array 0 is disabled?"); ASSERT_MSG(!vertex_array.divisor, "vertex array 0 divisor is unimplemented!"); for (unsigned index = 1; index < Maxwell::NumVertexArrays; ++index) { @@ -183,6 +151,10 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) { // to avoid OpenGL errors. for (unsigned index = 0; index < 16; ++index) { auto& attrib = regs.vertex_attrib_format[index]; + NGLOG_DEBUG(HW_GPU, "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}", + index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(), + attrib.offset.Value(), attrib.IsNormalized()); + glVertexAttribPointer(index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib), attrib.IsNormalized() ? GL_TRUE : GL_FALSE, vertex_array.stride, reinterpret_cast<GLvoid*>(buffer_offset + attrib.offset)); @@ -191,7 +163,7 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) { } // Copy vertex array data - const u32 data_size{vertex_array.stride * regs.vertex_buffer.count}; + const u64 data_size{vertex_array_limit.LimitAddress() - vertex_array.StartAddress() + 1}; const VAddr data_addr{memory_manager->PhysicalToVirtualAddress(vertex_array.StartAddress())}; res_cache.FlushRegion(data_addr, data_size, nullptr); Memory::ReadBlock(data_addr, array_ptr, data_size); @@ -200,26 +172,89 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) { buffer_offset += data_size; } -void RasterizerOpenGL::SetupVertexShader(VSUniformData* ub_ptr, GLintptr buffer_offset) { - MICROPROFILE_SCOPE(OpenGL_VS); - LOG_CRITICAL(Render_OpenGL, "Emulated shaders are not supported! Using a passthrough shader."); - glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, current_shader->shader.handle); -} +void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size_t ptr_pos) { + // Helper function for uploading uniform data + const auto copy_buffer = [&](GLuint handle, GLintptr offset, GLsizeiptr size) { + if (has_ARB_direct_state_access) { + glCopyNamedBufferSubData(stream_buffer->GetHandle(), handle, offset, 0, size); + } else { + glBindBuffer(GL_COPY_WRITE_BUFFER, handle); + glCopyBufferSubData(GL_ARRAY_BUFFER, GL_COPY_WRITE_BUFFER, offset, 0, size); + } + }; -void RasterizerOpenGL::SetupFragmentShader(FSUniformData* ub_ptr, GLintptr buffer_offset) { - MICROPROFILE_SCOPE(OpenGL_FS); - UNREACHABLE(); -} + auto& gpu = Core::System().GetInstance().GPU().Maxwell3D(); + ASSERT_MSG(!gpu.regs.shader_config[0].enable, "VertexA is unsupported!"); -bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) { - if (!has_ARB_separate_shader_objects) { - UNREACHABLE(); - return false; + // Next available bindpoint to use when uploading the const buffers to the GLSL shaders. + u32 current_constbuffer_bindpoint = 0; + + for (unsigned index = 1; index < Maxwell::MaxShaderProgram; ++index) { + ptr_pos += sizeof(GLShader::MaxwellUniformData); + + auto& shader_config = gpu.regs.shader_config[index]; + const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)}; + + const auto& stage = index - 1; // Stage indices are 0 - 5 + + const bool is_enabled = gpu.IsShaderStageEnabled(static_cast<Maxwell::ShaderStage>(stage)); + + // Skip stages that are not enabled + if (!is_enabled) { + continue; + } + + // Upload uniform data as one UBO per stage + const GLintptr ubo_offset = buffer_offset + static_cast<GLintptr>(ptr_pos); + copy_buffer(uniform_buffers[stage].handle, ubo_offset, + sizeof(GLShader::MaxwellUniformData)); + GLShader::MaxwellUniformData* ub_ptr = + reinterpret_cast<GLShader::MaxwellUniformData*>(&buffer_ptr[ptr_pos]); + ub_ptr->SetFromRegs(gpu.state.shader_stages[stage]); + + // Fetch program code from memory + GLShader::ProgramCode program_code; + const u64 gpu_address{gpu.regs.code_address.CodeAddress() + shader_config.offset}; + const VAddr cpu_address{gpu.memory_manager.PhysicalToVirtualAddress(gpu_address)}; + Memory::ReadBlock(cpu_address, program_code.data(), program_code.size() * sizeof(u64)); + GLShader::ShaderSetup setup{std::move(program_code)}; + + GLShader::ShaderEntries shader_resources; + + switch (program) { + case Maxwell::ShaderProgram::VertexB: { + GLShader::MaxwellVSConfig vs_config{setup}; + shader_resources = + shader_program_manager->UseProgrammableVertexShader(vs_config, setup); + break; + } + case Maxwell::ShaderProgram::Fragment: { + GLShader::MaxwellFSConfig fs_config{setup}; + shader_resources = + shader_program_manager->UseProgrammableFragmentShader(fs_config, setup); + break; + } + default: + LOG_CRITICAL(HW_GPU, "Unimplemented shader index=%d, enable=%d, offset=0x%08X", index, + shader_config.enable.Value(), shader_config.offset); + UNREACHABLE(); + } + + GLuint gl_stage_program = shader_program_manager->GetCurrentProgramStage( + static_cast<Maxwell::ShaderStage>(stage)); + + // Configure the const buffers for this shader stage. + current_constbuffer_bindpoint = + SetupConstBuffers(static_cast<Maxwell::ShaderStage>(stage), gl_stage_program, + current_constbuffer_bindpoint, shader_resources.const_buffer_entries); } + shader_program_manager->UseTrivialGeometryShader(); +} + +bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) { accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays; DrawArrays(); - return true; } @@ -255,18 +290,18 @@ void RasterizerOpenGL::DrawArrays() { : (depth_surface == nullptr ? 1u : depth_surface->res_scale); MathUtil::Rectangle<u32> draw_rect{ - static_cast<u32>(MathUtil::Clamp<s32>(static_cast<s32>(surfaces_rect.left) + - viewport_rect.left * res_scale, - surfaces_rect.left, surfaces_rect.right)), // Left - static_cast<u32>(MathUtil::Clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + - viewport_rect.top * res_scale, - surfaces_rect.bottom, surfaces_rect.top)), // Top - static_cast<u32>(MathUtil::Clamp<s32>(static_cast<s32>(surfaces_rect.left) + - viewport_rect.right * res_scale, - surfaces_rect.left, surfaces_rect.right)), // Right - static_cast<u32>(MathUtil::Clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + - viewport_rect.bottom * res_scale, - surfaces_rect.bottom, surfaces_rect.top))}; // Bottom + static_cast<u32>( + std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.left * res_scale, + surfaces_rect.left, surfaces_rect.right)), // Left + static_cast<u32>( + std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + viewport_rect.top * res_scale, + surfaces_rect.bottom, surfaces_rect.top)), // Top + static_cast<u32>( + std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.right * res_scale, + surfaces_rect.left, surfaces_rect.right)), // Right + static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + + viewport_rect.bottom * res_scale, + surfaces_rect.bottom, surfaces_rect.top))}; // Bottom // Bind the framebuffer surfaces BindFramebufferSurfaces(color_surface, depth_surface, has_stencil); @@ -280,18 +315,6 @@ void RasterizerOpenGL::DrawArrays() { // Sync and bind the texture surfaces BindTextures(); - // Sync and bind the shader - if (shader_dirty) { - SetShader(); - shader_dirty = false; - } - - // Sync the uniform data - if (uniform_block_data.dirty) { - glBufferSubData(GL_UNIFORM_BUFFER, 0, sizeof(UniformData), &uniform_block_data.data); - uniform_block_data.dirty = false; - } - // Viewport can have negative offsets or larger dimensions than our framebuffer sub-rect. Enable // scissor test to prevent drawing outside of the framebuffer region state.scissor.enabled = true; @@ -303,15 +326,22 @@ void RasterizerOpenGL::DrawArrays() { // Draw the vertex batch const bool is_indexed = accelerate_draw == AccelDraw::Indexed; - AnalyzeVertexArray(is_indexed); + const u64 index_buffer_size{regs.index_array.count * regs.index_array.FormatSizeInBytes()}; + const unsigned vertex_num{is_indexed ? regs.index_array.count : regs.vertex_buffer.count}; + + // TODO(bunnei): Add support for 1+ vertex arrays + vs_input_size = vertex_num * regs.vertex_array[0].stride; + state.draw.vertex_buffer = stream_buffer->GetHandle(); state.Apply(); size_t buffer_size = static_cast<size_t>(vs_input_size); if (is_indexed) { - UNREACHABLE(); + buffer_size = Common::AlignUp(buffer_size, 4) + index_buffer_size; } - buffer_size += sizeof(VSUniformData); + + // Uniform space for the 5 shader stages + buffer_size += sizeof(GLShader::MaxwellUniformData) * Maxwell::MaxShaderStage; size_t ptr_pos = 0; u8* buffer_ptr; @@ -322,36 +352,37 @@ void RasterizerOpenGL::DrawArrays() { SetupVertexArray(buffer_ptr, buffer_offset); ptr_pos += vs_input_size; + // If indexed mode, copy the index buffer GLintptr index_buffer_offset = 0; if (is_indexed) { - UNREACHABLE(); - } + ptr_pos = Common::AlignUp(ptr_pos, 4); - SetupVertexShader(reinterpret_cast<VSUniformData*>(&buffer_ptr[ptr_pos]), - buffer_offset + static_cast<GLintptr>(ptr_pos)); - const GLintptr vs_ubo_offset = buffer_offset + static_cast<GLintptr>(ptr_pos); - ptr_pos += sizeof(VSUniformData); + const auto& memory_manager = Core::System().GetInstance().GPU().memory_manager; + const VAddr index_data_addr{ + memory_manager->PhysicalToVirtualAddress(regs.index_array.StartAddress())}; + Memory::ReadBlock(index_data_addr, &buffer_ptr[ptr_pos], index_buffer_size); - stream_buffer->Unmap(); + index_buffer_offset = buffer_offset + static_cast<GLintptr>(ptr_pos); + ptr_pos += index_buffer_size; + } - const auto copy_buffer = [&](GLuint handle, GLintptr offset, GLsizeiptr size) { - if (has_ARB_direct_state_access) { - glCopyNamedBufferSubData(stream_buffer->GetHandle(), handle, offset, 0, size); - } else { - glBindBuffer(GL_COPY_WRITE_BUFFER, handle); - glCopyBufferSubData(GL_ARRAY_BUFFER, GL_COPY_WRITE_BUFFER, offset, 0, size); - } - }; + SetupShaders(buffer_ptr, buffer_offset, ptr_pos); - copy_buffer(vs_uniform_buffer.handle, vs_ubo_offset, sizeof(VSUniformData)); + stream_buffer->Unmap(); - glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, current_shader->shader.handle); + shader_program_manager->ApplyTo(state); + state.Apply(); + const GLenum primitive_mode{MaxwellToGL::PrimitiveTopology(regs.draw.topology)}; if (is_indexed) { - UNREACHABLE(); + const GLint index_min{static_cast<GLint>(regs.index_array.first)}; + const GLint index_max{static_cast<GLint>(regs.index_array.first + regs.index_array.count)}; + glDrawRangeElementsBaseVertex(primitive_mode, index_min, index_max, regs.index_array.count, + MaxwellToGL::IndexFormat(regs.index_array.format), + reinterpret_cast<const void*>(index_buffer_offset), + -index_min); } else { - glDrawArrays(MaxwellToGL::PrimitiveTopology(regs.draw.topology), 0, - regs.vertex_buffer.count); + glDrawArrays(primitive_mode, 0, regs.vertex_buffer.count); } // Disable scissor test @@ -384,7 +415,7 @@ void RasterizerOpenGL::DrawArrays() { void RasterizerOpenGL::BindTextures() { using Regs = Tegra::Engines::Maxwell3D::Regs; - auto maxwell3d = Core::System::GetInstance().GPU().Get3DEngine(); + auto& maxwell3d = Core::System::GetInstance().GPU().Get3DEngine(); // Each Maxwell shader stage can have an arbitrary number of textures, but we're limited to a // certain number in OpenGL. We try to only use the minimum amount of host textures by not @@ -415,7 +446,32 @@ void RasterizerOpenGL::BindTextures() { } } -void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 id) {} +void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 method) { + const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; + switch (method) { + case MAXWELL3D_REG_INDEX(blend.separate_alpha): + ASSERT_MSG(false, "unimplemented"); + break; + case MAXWELL3D_REG_INDEX(blend.equation_rgb): + state.blend.rgb_equation = MaxwellToGL::BlendEquation(regs.blend.equation_rgb); + break; + case MAXWELL3D_REG_INDEX(blend.factor_source_rgb): + state.blend.src_rgb_func = MaxwellToGL::BlendFunc(regs.blend.factor_source_rgb); + break; + case MAXWELL3D_REG_INDEX(blend.factor_dest_rgb): + state.blend.dst_rgb_func = MaxwellToGL::BlendFunc(regs.blend.factor_dest_rgb); + break; + case MAXWELL3D_REG_INDEX(blend.equation_a): + state.blend.a_equation = MaxwellToGL::BlendEquation(regs.blend.equation_a); + break; + case MAXWELL3D_REG_INDEX(blend.factor_source_a): + state.blend.src_a_func = MaxwellToGL::BlendFunc(regs.blend.factor_source_a); + break; + case MAXWELL3D_REG_INDEX(blend.factor_dest_a): + state.blend.dst_a_func = MaxwellToGL::BlendFunc(regs.blend.factor_dest_a); + break; + } +} void RasterizerOpenGL::FlushAll() { MICROPROFILE_SCOPE(OpenGL_CacheManagement); @@ -467,9 +523,12 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& framebu src_params.width = std::min(framebuffer.width, pixel_stride); src_params.height = framebuffer.height; src_params.stride = pixel_stride; - src_params.is_tiled = false; + src_params.is_tiled = true; + src_params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight; src_params.pixel_format = SurfaceParams::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format); + src_params.component_type = + SurfaceParams::ComponentTypeFromGPUPixelFormat(framebuffer.pixel_format); src_params.UpdateParams(); MathUtil::Rectangle<u32> src_rect; @@ -531,70 +590,53 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntr } } -void RasterizerOpenGL::SetShader() { - // TODO(bunnei): The below sets up a static test shader for passing untransformed vertices to - // OpenGL for rendering. This should be removed/replaced when we start emulating Maxwell - // shaders. - - static constexpr char vertex_shader[] = R"( -#version 150 core - -in vec2 vert_position; -in vec2 vert_tex_coord; -out vec2 frag_tex_coord; - -void main() { - // Multiply input position by the rotscale part of the matrix and then manually translate by - // the last column. This is equivalent to using a full 3x3 matrix and expanding the vector - // to `vec3(vert_position.xy, 1.0)` - gl_Position = vec4(mat2(mat3x2(0.0015625f, 0.0, 0.0, -0.0027778, -1.0, 1.0)) * vert_position + mat3x2(0.0015625f, 0.0, 0.0, -0.0027778, -1.0, 1.0)[2], 0.0, 1.0); - frag_tex_coord = vert_tex_coord; -} -)"; - - static constexpr char fragment_shader[] = R"( -#version 150 core - -in vec2 frag_tex_coord; -out vec4 color; - -uniform sampler2D tex[32]; +u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, GLuint program, + u32 current_bindpoint, + const std::vector<GLShader::ConstBufferEntry>& entries) { + auto& gpu = Core::System::GetInstance().GPU(); + auto& maxwell3d = gpu.Get3DEngine(); -void main() { - color = texture(tex[0], frag_tex_coord); -} -)"; + ASSERT_MSG(maxwell3d.IsShaderStageEnabled(stage), + "Attempted to upload constbuffer of disabled shader stage"); - if (current_shader) { - return; + // Reset all buffer draw state for this stage. + for (auto& buffer : state.draw.const_buffers[static_cast<size_t>(stage)]) { + buffer.bindpoint = 0; + buffer.enabled = false; } - LOG_CRITICAL(Render_OpenGL, "Emulated shaders are not supported! Using a passthrough shader."); - - current_shader = &test_shader; - if (has_ARB_separate_shader_objects) { - test_shader.shader.Create(vertex_shader, nullptr, fragment_shader, {}, true); - glActiveShaderProgram(pipeline.handle, test_shader.shader.handle); - } else { - UNREACHABLE(); + // Upload only the enabled buffers from the 16 constbuffers of each shader stage + auto& shader_stage = maxwell3d.state.shader_stages[static_cast<size_t>(stage)]; + + for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { + const auto& used_buffer = entries[bindpoint]; + const auto& buffer = shader_stage.const_buffers[used_buffer.GetIndex()]; + auto& buffer_draw_state = + state.draw.const_buffers[static_cast<size_t>(stage)][used_buffer.GetIndex()]; + + ASSERT_MSG(buffer.enabled, "Attempted to upload disabled constbuffer"); + buffer_draw_state.enabled = true; + buffer_draw_state.bindpoint = current_bindpoint + bindpoint; + + VAddr addr = gpu.memory_manager->PhysicalToVirtualAddress(buffer.address); + std::vector<u8> data(used_buffer.GetSize() * sizeof(float)); + Memory::ReadBlock(addr, data.data(), data.size()); + + glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer_draw_state.ssbo); + glBufferData(GL_SHADER_STORAGE_BUFFER, data.size(), data.data(), GL_DYNAMIC_DRAW); + glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); + + // Now configure the bindpoint of the buffer inside the shader + std::string buffer_name = used_buffer.GetName(); + GLuint index = + glGetProgramResourceIndex(program, GL_SHADER_STORAGE_BLOCK, buffer_name.c_str()); + if (index != -1) + glShaderStorageBlockBinding(program, index, buffer_draw_state.bindpoint); } - state.draw.shader_program = test_shader.shader.handle; state.Apply(); - for (u32 texture = 0; texture < texture_samplers.size(); ++texture) { - // Set the texture samplers to correspond to different texture units - std::string uniform_name = "tex[" + std::to_string(texture) + "]"; - GLint uniform_tex = glGetUniformLocation(test_shader.shader.handle, uniform_name.c_str()); - if (uniform_tex != -1) { - glUniform1i(uniform_tex, TextureUnits::MaxwellTexture(texture).id); - } - } - - if (has_ARB_separate_shader_objects) { - state.draw.shader_program = 0; - state.Apply(); - } + return current_bindpoint + entries.size(); } void RasterizerOpenGL::BindFramebufferSurfaces(const Surface& color_surface, |