diff options
Diffstat (limited to '')
7 files changed, 113 insertions, 129 deletions
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index c428f06e4..46a7433ea 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -55,16 +55,20 @@ namespace { template <typename Engine, typename Entry> Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, - Tegra::Engines::ShaderType shader_type) { + Tegra::Engines::ShaderType shader_type, + std::size_t index = 0) { if (entry.IsBindless()) { const Tegra::Texture::TextureHandle tex_handle = engine.AccessConstBuffer32(shader_type, entry.GetBuffer(), entry.GetOffset()); return engine.GetTextureInfo(tex_handle); } + const auto& gpu_profile = engine.AccessGuestDriverProfile(); + const u32 offset = + entry.GetOffset() + static_cast<u32>(index * gpu_profile.GetTextureHandlerSize()); if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) { - return engine.GetStageTexture(shader_type, entry.GetOffset()); + return engine.GetStageTexture(shader_type, offset); } else { - return engine.GetTexture(entry.GetOffset()); + return engine.GetTexture(offset); } } @@ -244,9 +248,6 @@ void RasterizerOpenGL::SetupVertexInstances(GLuint vao) { } GLintptr RasterizerOpenGL::SetupIndexBuffer() { - if (accelerate_draw != AccelDraw::Indexed) { - return 0; - } MICROPROFILE_SCOPE(OpenGL_Index); const auto& regs = system.GPU().Maxwell3D().regs; const std::size_t size = CalculateIndexBufferSize(); @@ -542,7 +543,8 @@ void RasterizerOpenGL::Clear() { } } -void RasterizerOpenGL::DrawPrelude() { +void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { + MICROPROFILE_SCOPE(OpenGL_Drawing); auto& gpu = system.GPU().Maxwell3D(); SyncRasterizeEnable(state); @@ -563,9 +565,6 @@ void RasterizerOpenGL::DrawPrelude() { buffer_cache.Acquire(); - // Draw the vertex batch - const bool is_indexed = accelerate_draw == AccelDraw::Indexed; - std::size_t buffer_size = CalculateVertexArraysSize(); // Add space for index buffer @@ -592,7 +591,11 @@ void RasterizerOpenGL::DrawPrelude() { // Upload vertex and index data. SetupVertexBuffer(vao); SetupVertexInstances(vao); - index_buffer_offset = SetupIndexBuffer(); + + GLintptr index_buffer_offset; + if (is_indexed) { + index_buffer_offset = SetupIndexBuffer(); + } // Prepare packed bindings. bind_ubo_pushbuffer.Setup(); @@ -626,6 +629,7 @@ void RasterizerOpenGL::DrawPrelude() { // As all cached buffers are invalidated, we need to recheck their state. gpu.dirty.ResetVertexArrays(); } + gpu.dirty.memory_general = false; shader_program_manager->ApplyTo(state); state.Apply(); @@ -633,106 +637,33 @@ void RasterizerOpenGL::DrawPrelude() { if (texture_cache.TextureBarrier()) { glTextureBarrier(); } -} - -struct DrawParams { - bool is_indexed{}; - bool is_instanced{}; - GLenum primitive_mode{}; - GLint count{}; - GLint base_vertex{}; - - // Indexed settings - GLenum index_format{}; - GLintptr index_buffer_offset{}; - - // Instanced setting - GLint num_instances{}; - GLint base_instance{}; - - void DispatchDraw() { - if (is_indexed) { - const auto index_buffer_ptr = reinterpret_cast<const void*>(index_buffer_offset); - if (is_instanced) { - glDrawElementsInstancedBaseVertexBaseInstance(primitive_mode, count, index_format, - index_buffer_ptr, num_instances, - base_vertex, base_instance); - } else { - glDrawElementsBaseVertex(primitive_mode, count, index_format, index_buffer_ptr, - base_vertex); - } - } else { - if (is_instanced) { - glDrawArraysInstancedBaseInstance(primitive_mode, base_vertex, count, num_instances, - base_instance); - } else { - glDrawArrays(primitive_mode, base_vertex, count); - } - } - } -}; - -bool RasterizerOpenGL::DrawBatch(bool is_indexed) { - accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays; - - MICROPROFILE_SCOPE(OpenGL_Drawing); - - DrawPrelude(); - auto& maxwell3d = system.GPU().Maxwell3D(); - const auto& regs = maxwell3d.regs; - const auto current_instance = maxwell3d.state.current_instance; - DrawParams draw_call{}; - draw_call.is_indexed = is_indexed; - draw_call.num_instances = static_cast<GLint>(1); - draw_call.base_instance = static_cast<GLint>(current_instance); - draw_call.is_instanced = current_instance > 0; - draw_call.primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology); - if (draw_call.is_indexed) { - draw_call.count = static_cast<GLint>(regs.index_array.count); - draw_call.base_vertex = static_cast<GLint>(regs.vb_element_base); - draw_call.index_format = MaxwellToGL::IndexFormat(regs.index_array.format); - draw_call.index_buffer_offset = index_buffer_offset; + const GLuint base_instance = static_cast<GLuint>(gpu.regs.vb_base_instance); + const GLsizei num_instances = + static_cast<GLsizei>(is_instanced ? gpu.mme_draw.instance_count : 1); + if (is_indexed) { + const GLenum index_format = MaxwellToGL::IndexFormat(gpu.regs.index_array.format); + const GLint base_vertex = static_cast<GLint>(gpu.regs.vb_element_base); + const GLsizei num_vertices = static_cast<GLsizei>(gpu.regs.index_array.count); + glDrawElementsInstancedBaseVertexBaseInstance( + primitive_mode, num_vertices, index_format, + reinterpret_cast<const void*>(index_buffer_offset), num_instances, base_vertex, + base_instance); } else { - draw_call.count = static_cast<GLint>(regs.vertex_buffer.count); - draw_call.base_vertex = static_cast<GLint>(regs.vertex_buffer.first); + const GLint base_vertex = static_cast<GLint>(gpu.regs.vertex_buffer.first); + const GLsizei num_vertices = static_cast<GLsizei>(gpu.regs.vertex_buffer.count); + glDrawArraysInstancedBaseInstance(primitive_mode, base_vertex, num_vertices, num_instances, + base_instance); } - draw_call.DispatchDraw(); +} - maxwell3d.dirty.memory_general = false; - accelerate_draw = AccelDraw::Disabled; +bool RasterizerOpenGL::DrawBatch(bool is_indexed) { + Draw(is_indexed, false); return true; } bool RasterizerOpenGL::DrawMultiBatch(bool is_indexed) { - accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays; - - MICROPROFILE_SCOPE(OpenGL_Drawing); - - DrawPrelude(); - - auto& maxwell3d = system.GPU().Maxwell3D(); - const auto& regs = maxwell3d.regs; - const auto& draw_setup = maxwell3d.mme_draw; - DrawParams draw_call{}; - draw_call.is_indexed = is_indexed; - draw_call.num_instances = static_cast<GLint>(draw_setup.instance_count); - draw_call.base_instance = static_cast<GLint>(regs.vb_base_instance); - draw_call.is_instanced = draw_setup.instance_count > 1; - draw_call.primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology); - if (draw_call.is_indexed) { - draw_call.count = static_cast<GLint>(regs.index_array.count); - draw_call.base_vertex = static_cast<GLint>(regs.vb_element_base); - draw_call.index_format = MaxwellToGL::IndexFormat(regs.index_array.format); - draw_call.index_buffer_offset = index_buffer_offset; - } else { - draw_call.count = static_cast<GLint>(regs.vertex_buffer.count); - draw_call.base_vertex = static_cast<GLint>(regs.vertex_buffer.first); - } - draw_call.DispatchDraw(); - - maxwell3d.dirty.memory_general = false; - accelerate_draw = AccelDraw::Disabled; + Draw(is_indexed, true); return true; } @@ -942,8 +873,15 @@ void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader& u32 binding = device.GetBaseBindings(stage_index).sampler; for (const auto& entry : shader->GetShaderEntries().samplers) { const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index); - const auto texture = GetTextureInfo(maxwell3d, entry, shader_type); - SetupTexture(binding++, texture, entry); + if (!entry.IsIndexed()) { + const auto texture = GetTextureInfo(maxwell3d, entry, shader_type); + SetupTexture(binding++, texture, entry); + } else { + for (std::size_t i = 0; i < entry.Size(); ++i) { + const auto texture = GetTextureInfo(maxwell3d, entry, shader_type, i); + SetupTexture(binding++, texture, entry); + } + } } } @@ -952,8 +890,17 @@ void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) { const auto& compute = system.GPU().KeplerCompute(); u32 binding = 0; for (const auto& entry : kernel->GetShaderEntries().samplers) { - const auto texture = GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute); - SetupTexture(binding++, texture, entry); + if (!entry.IsIndexed()) { + const auto texture = + GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute); + SetupTexture(binding++, texture, entry); + } else { + for (std::size_t i = 0; i < entry.Size(); ++i) { + const auto texture = + GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute, i); + SetupTexture(binding++, texture, entry); + } + } } } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 6a27cf497..0501f3828 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -103,7 +103,7 @@ private: std::size_t size); /// Syncs all the state, shaders, render targets and textures setting before a draw call. - void DrawPrelude(); + void Draw(bool is_indexed, bool is_instanced); /// Configures the current textures to use for the draw command. void SetupDrawTextures(std::size_t stage_index, const Shader& shader); @@ -220,12 +220,7 @@ private: GLintptr SetupIndexBuffer(); - GLintptr index_buffer_offset; - void SetupShaders(GLenum primitive_mode); - - enum class AccelDraw { Disabled, Arrays, Indexed }; - AccelDraw accelerate_draw = AccelDraw::Disabled; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 3c5bdd377..489eb143c 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -214,6 +214,7 @@ std::unique_ptr<ConstBufferLocker> MakeLocker(Core::System& system, ShaderType s } void FillLocker(ConstBufferLocker& locker, const ShaderDiskCacheUsage& usage) { + locker.SetBoundBuffer(usage.bound_buffer); for (const auto& key : usage.keys) { const auto [buffer, offset] = key.first; locker.InsertKey(buffer, offset, key.second); @@ -418,7 +419,8 @@ bool CachedShader::EnsureValidLockerVariant() { ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant, const ConstBufferLocker& locker) const { - return ShaderDiskCacheUsage{unique_identifier, variant, locker.GetKeys(), + return ShaderDiskCacheUsage{unique_identifier, variant, + locker.GetBoundBuffer(), locker.GetKeys(), locker.GetBoundSamplers(), locker.GetBindlessSamplers()}; } diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index a1ac3d7a9..4735000b5 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -391,6 +391,7 @@ public: DeclareVertex(); DeclareGeometry(); DeclareRegisters(); + DeclareCustomVariables(); DeclarePredicates(); DeclareLocalMemory(); DeclareInternalFlags(); @@ -503,6 +504,16 @@ private: } } + void DeclareCustomVariables() { + const u32 num_custom_variables = ir.GetNumCustomVariables(); + for (u32 i = 0; i < num_custom_variables; ++i) { + code.AddLine("float {} = 0.0f;", GetCustomVariable(i)); + } + if (num_custom_variables > 0) { + code.AddNewLine(); + } + } + void DeclarePredicates() { const auto& predicates = ir.GetPredicates(); for (const auto pred : predicates) { @@ -655,7 +666,8 @@ private: u32 binding = device.GetBaseBindings(stage).sampler; for (const auto& sampler : ir.GetSamplers()) { const std::string name = GetSampler(sampler); - const std::string description = fmt::format("layout (binding = {}) uniform", binding++); + const std::string description = fmt::format("layout (binding = {}) uniform", binding); + binding += sampler.IsIndexed() ? sampler.Size() : 1; std::string sampler_type = [&]() { if (sampler.IsBuffer()) { @@ -682,7 +694,11 @@ private: sampler_type += "Shadow"; } - code.AddLine("{} {} {};", description, sampler_type, name); + if (!sampler.IsIndexed()) { + code.AddLine("{} {} {};", description, sampler_type, name); + } else { + code.AddLine("{} {} {}[{}];", description, sampler_type, name, sampler.Size()); + } } if (!ir.GetSamplers().empty()) { code.AddNewLine(); @@ -775,6 +791,11 @@ private: return {GetRegister(index), Type::Float}; } + if (const auto cv = std::get_if<CustomVarNode>(&*node)) { + const u32 index = cv->GetIndex(); + return {GetCustomVariable(index), Type::Float}; + } + if (const auto immediate = std::get_if<ImmediateNode>(&*node)) { const u32 value = immediate->GetValue(); if (value < 10) { @@ -1098,7 +1119,11 @@ private: } else if (!meta->ptp.empty()) { expr += "Offsets"; } - expr += '(' + GetSampler(meta->sampler) + ", "; + if (!meta->sampler.IsIndexed()) { + expr += '(' + GetSampler(meta->sampler) + ", "; + } else { + expr += '(' + GetSampler(meta->sampler) + '[' + Visit(meta->index).AsUint() + "], "; + } expr += coord_constructors.at(count + (has_array ? 1 : 0) + (has_shadow && !separate_dc ? 1 : 0) - 1); expr += '('; @@ -1310,6 +1335,8 @@ private: const std::string final_offset = fmt::format("({} - {}) >> 2", real, base); target = {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset), Type::Uint}; + } else if (const auto cv = std::get_if<CustomVarNode>(&*dest)) { + target = {GetCustomVariable(cv->GetIndex()), Type::Float}; } else { UNREACHABLE_MSG("Assign called without a proper target"); } @@ -2237,6 +2264,10 @@ private: return GetDeclarationWithSuffix(index, "gpr"); } + std::string GetCustomVariable(u32 index) const { + return GetDeclarationWithSuffix(index, "custom_var"); + } + std::string GetPredicate(Tegra::Shader::Pred pred) const { return GetDeclarationWithSuffix(static_cast<u32>(pred), "pred"); } diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index cf874a09a..1fc204f6f 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -53,7 +53,7 @@ struct BindlessSamplerKey { Tegra::Engines::SamplerDescriptor sampler{}; }; -constexpr u32 NativeVersion = 11; +constexpr u32 NativeVersion = 12; // Making sure sizes doesn't change by accident static_assert(sizeof(ProgramVariant) == 20); @@ -186,7 +186,8 @@ ShaderDiskCacheOpenGL::LoadTransferable() { u32 num_bound_samplers{}; u32 num_bindless_samplers{}; if (file.ReadArray(&usage.unique_identifier, 1) != 1 || - file.ReadArray(&usage.variant, 1) != 1 || file.ReadArray(&num_keys, 1) != 1 || + file.ReadArray(&usage.variant, 1) != 1 || + file.ReadArray(&usage.bound_buffer, 1) != 1 || file.ReadArray(&num_keys, 1) != 1 || file.ReadArray(&num_bound_samplers, 1) != 1 || file.ReadArray(&num_bindless_samplers, 1) != 1) { LOG_ERROR(Render_OpenGL, error_loading); @@ -281,7 +282,9 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { u32 num_bindless_samplers{}; ShaderDiskCacheUsage usage; if (!LoadObjectFromPrecompiled(usage.unique_identifier) || - !LoadObjectFromPrecompiled(usage.variant) || !LoadObjectFromPrecompiled(num_keys) || + !LoadObjectFromPrecompiled(usage.variant) || + !LoadObjectFromPrecompiled(usage.bound_buffer) || + !LoadObjectFromPrecompiled(num_keys) || !LoadObjectFromPrecompiled(num_bound_samplers) || !LoadObjectFromPrecompiled(num_bindless_samplers)) { return {}; @@ -393,6 +396,7 @@ void ShaderDiskCacheOpenGL::SaveUsage(const ShaderDiskCacheUsage& usage) { if (file.WriteObject(TransferableEntryKind::Usage) != 1 || file.WriteObject(usage.unique_identifier) != 1 || file.WriteObject(usage.variant) != 1 || + file.WriteObject(usage.bound_buffer) != 1 || file.WriteObject(static_cast<u32>(usage.keys.size())) != 1 || file.WriteObject(static_cast<u32>(usage.bound_samplers.size())) != 1 || file.WriteObject(static_cast<u32>(usage.bindless_samplers.size())) != 1) { @@ -447,7 +451,7 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p }; if (!SaveObjectToPrecompiled(usage.unique_identifier) || - !SaveObjectToPrecompiled(usage.variant) || + !SaveObjectToPrecompiled(usage.variant) || !SaveObjectToPrecompiled(usage.bound_buffer) || !SaveObjectToPrecompiled(static_cast<u32>(usage.keys.size())) || !SaveObjectToPrecompiled(static_cast<u32>(usage.bound_samplers.size())) || !SaveObjectToPrecompiled(static_cast<u32>(usage.bindless_samplers.size()))) { diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index 69a2fbdda..ef2371f6d 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h @@ -79,6 +79,7 @@ static_assert(std::is_trivially_copyable_v<ProgramVariant>); struct ShaderDiskCacheUsage { u64 unique_identifier{}; ProgramVariant variant; + u32 bound_buffer{}; VideoCommon::Shader::KeyMap keys; VideoCommon::Shader::BoundSamplerMap bound_samplers; VideoCommon::Shader::BindlessSamplerMap bindless_samplers; diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index ea4f35663..7ed505628 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -47,8 +47,7 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { case Maxwell::VertexAttribute::Size::Size_10_10_10_2: return GL_UNSIGNED_INT_2_10_10_10_REV; default: - LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString()); - UNREACHABLE(); + LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString()); return {}; } case Maxwell::VertexAttribute::Type::SignedInt: @@ -72,8 +71,7 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { case Maxwell::VertexAttribute::Size::Size_10_10_10_2: return GL_INT_2_10_10_10_REV; default: - LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString()); - UNREACHABLE(); + LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString()); return {}; } case Maxwell::VertexAttribute::Type::Float: @@ -89,13 +87,19 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { case Maxwell::VertexAttribute::Size::Size_32_32_32_32: return GL_FLOAT; default: - LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString()); - UNREACHABLE(); + LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString()); + return {}; + } + case Maxwell::VertexAttribute::Type::UnsignedScaled: + switch (attrib.size) { + case Maxwell::VertexAttribute::Size::Size_8_8: + return GL_UNSIGNED_BYTE; + default: + LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString()); return {}; } default: - LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex type={}", attrib.TypeString()); - UNREACHABLE(); + LOG_ERROR(Render_OpenGL, "Unimplemented vertex type={}", attrib.TypeString()); return {}; } } |