From 32c1bc6a67820f9df21c8f64f4df078b015aa7da Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 6 Nov 2019 04:32:43 -0300 Subject: shader/texture: Deduce texture buffers from locker Instead of specializing shaders to separate texture buffers from 1D textures, use the locker to deduce them while they are being decoded. --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 45 ++++++++---------------- 1 file changed, 15 insertions(+), 30 deletions(-) (limited to 'src/video_core/renderer_opengl/gl_rasterizer.cpp') diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 05f8e511b..b76de71ec 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -271,9 +271,9 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { const auto stage = static_cast(index == 0 ? 0 : index - 1); SetupDrawConstBuffers(stage, shader); SetupDrawGlobalMemory(stage, shader); - const auto texture_buffer_usage{SetupDrawTextures(stage, shader, base_bindings)}; + SetupDrawTextures(stage, shader, base_bindings); - const ProgramVariant variant{base_bindings, primitive_mode, texture_buffer_usage}; + const ProgramVariant variant{base_bindings, primitive_mode}; const auto [program_handle, next_bindings] = shader->GetProgramHandle(variant); switch (program) { @@ -303,7 +303,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { // When VertexA is enabled, we have dual vertex shaders if (program == Maxwell::ShaderProgram::VertexA) { // VertexB was combined with VertexA, so we skip the VertexB iteration - index++; + ++index; } base_bindings = next_bindings; @@ -732,11 +732,10 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { } auto kernel = shader_cache.GetComputeKernel(code_addr); - ProgramVariant variant; - variant.texture_buffer_usage = SetupComputeTextures(kernel); + SetupComputeTextures(kernel); SetupComputeImages(kernel); - const auto [program, next_bindings] = kernel->GetProgramHandle(variant); + const auto [program, next_bindings] = kernel->GetProgramHandle({}); state.draw.shader_program = program; state.draw.program_pipeline = 0; @@ -918,9 +917,8 @@ void RasterizerOpenGL::SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entr bind_ssbo_pushbuffer.Push(ssbo, buffer_offset, static_cast(size)); } -TextureBufferUsage RasterizerOpenGL::SetupDrawTextures(Maxwell::ShaderStage stage, - const Shader& shader, - BaseBindings base_bindings) { +void RasterizerOpenGL::SetupDrawTextures(Maxwell::ShaderStage stage, const Shader& shader, + BaseBindings base_bindings) { MICROPROFILE_SCOPE(OpenGL_Texture); const auto& gpu = system.GPU(); const auto& maxwell3d = gpu.Maxwell3D(); @@ -929,8 +927,6 @@ TextureBufferUsage RasterizerOpenGL::SetupDrawTextures(Maxwell::ShaderStage stag ASSERT_MSG(base_bindings.sampler + entries.size() <= std::size(state.textures), "Exceeded the number of active textures."); - TextureBufferUsage texture_buffer_usage{0}; - for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { const auto& entry = entries[bindpoint]; const auto texture = [&] { @@ -943,15 +939,11 @@ TextureBufferUsage RasterizerOpenGL::SetupDrawTextures(Maxwell::ShaderStage stag return maxwell3d.GetTextureInfo(tex_handle); }(); - if (SetupTexture(base_bindings.sampler + bindpoint, texture, entry)) { - texture_buffer_usage.set(bindpoint); - } + SetupTexture(base_bindings.sampler + bindpoint, texture, entry); } - - return texture_buffer_usage; } -TextureBufferUsage RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) { +void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) { MICROPROFILE_SCOPE(OpenGL_Texture); const auto& compute = system.GPU().KeplerCompute(); const auto& entries = kernel->GetShaderEntries().samplers; @@ -959,8 +951,6 @@ TextureBufferUsage RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) ASSERT_MSG(entries.size() <= std::size(state.textures), "Exceeded the number of active textures."); - TextureBufferUsage texture_buffer_usage{0}; - for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { const auto& entry = entries[bindpoint]; const auto texture = [&] { @@ -972,34 +962,29 @@ TextureBufferUsage RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) return compute.GetTextureInfo(tex_handle); }(); - if (SetupTexture(bindpoint, texture, entry)) { - texture_buffer_usage.set(bindpoint); - } + SetupTexture(bindpoint, texture, entry); } - - return texture_buffer_usage; } -bool RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, +void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, const GLShader::SamplerEntry& entry) { - state.samplers[binding] = sampler_cache.GetSampler(texture.tsc); - const auto view = texture_cache.GetTextureSurface(texture.tic, entry); if (!view) { // Can occur when texture addr is null or its memory is unmapped/invalid + state.samplers[binding] = 0; state.textures[binding] = 0; - return false; + return; } state.textures[binding] = view->GetTexture(); if (view->GetSurfaceParams().IsBuffer()) { - return true; + return; } + state.samplers[binding] = sampler_cache.GetSampler(texture.tsc); // Apply swizzle to textures that are not buffers. view->ApplySwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source, texture.tic.w_source); - return false; } void RasterizerOpenGL::SetupComputeImages(const Shader& shader) { -- cgit v1.2.3 From 4f5d8e434278cd5999bf21e91f0923d55ec8d52b Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 12 Nov 2019 23:26:56 -0300 Subject: gl_shader_cache: Specialize shader workgroup Drop the usage of ARB_compute_variable_group_size and specialize compute shaders instead. This permits compute to run on AMD and Intel proprietary drivers. --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) (limited to 'src/video_core/renderer_opengl/gl_rasterizer.cpp') diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index b76de71ec..bd4e5f6e3 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -273,8 +273,8 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { SetupDrawGlobalMemory(stage, shader); SetupDrawTextures(stage, shader, base_bindings); - const ProgramVariant variant{base_bindings, primitive_mode}; - const auto [program_handle, next_bindings] = shader->GetProgramHandle(variant); + const ProgramVariant variant(base_bindings, primitive_mode); + const auto [program_handle, next_bindings] = shader->GetHandle(variant); switch (program) { case Maxwell::ShaderProgram::VertexA: @@ -725,18 +725,14 @@ bool RasterizerOpenGL::DrawMultiBatch(bool is_indexed) { } void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { - if (!GLAD_GL_ARB_compute_variable_group_size) { - LOG_ERROR(Render_OpenGL, "Compute is currently not supported on this device due to the " - "lack of GL_ARB_compute_variable_group_size"); - return; - } - auto kernel = shader_cache.GetComputeKernel(code_addr); SetupComputeTextures(kernel); SetupComputeImages(kernel); - const auto [program, next_bindings] = kernel->GetProgramHandle({}); - state.draw.shader_program = program; + const auto& launch_desc = system.GPU().KeplerCompute().launch_description; + const ProgramVariant variant(launch_desc.block_dim_x, launch_desc.block_dim_y, + launch_desc.block_dim_z); + std::tie(state.draw.shader_program, std::ignore) = kernel->GetHandle(variant); state.draw.program_pipeline = 0; const std::size_t buffer_size = @@ -760,10 +756,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { state.ApplyShaderProgram(); state.ApplyProgramPipeline(); - const auto& launch_desc = system.GPU().KeplerCompute().launch_description; - glDispatchComputeGroupSizeARB(launch_desc.grid_dim_x, launch_desc.grid_dim_y, - launch_desc.grid_dim_z, launch_desc.block_dim_x, - launch_desc.block_dim_y, launch_desc.block_dim_z); + glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); } void RasterizerOpenGL::FlushAll() {} -- cgit v1.2.3 From dbeb52387979c7e28c0acb03dfc1468146947104 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 12 Nov 2019 23:39:45 -0300 Subject: gl_shader_cache: Specialize shared memory size Shared memory was being declared with an undefined size. Specialize from guest GPU parameters the compute shader's shared memory size. --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core/renderer_opengl/gl_rasterizer.cpp') diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index bd4e5f6e3..ebfe52e6d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -731,7 +731,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { const auto& launch_desc = system.GPU().KeplerCompute().launch_description; const ProgramVariant variant(launch_desc.block_dim_x, launch_desc.block_dim_y, - launch_desc.block_dim_z); + launch_desc.block_dim_z, launch_desc.shared_alloc); std::tie(state.draw.shader_program, std::ignore) = kernel->GetHandle(variant); state.draw.program_pipeline = 0; -- cgit v1.2.3 From 287ae2b9e8ea38642a4c8e36f7863d881d4c0e87 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 13 Nov 2019 00:25:52 -0300 Subject: gl_shader_cache: Specialize local memory size for compute shaders Local memory size in compute shaders was stubbed with an arbitary size. This commit specializes local memory size from guest GPU parameters. --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/video_core/renderer_opengl/gl_rasterizer.cpp') diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index ebfe52e6d..d890076f8 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -731,7 +731,8 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { const auto& launch_desc = system.GPU().KeplerCompute().launch_description; const ProgramVariant variant(launch_desc.block_dim_x, launch_desc.block_dim_y, - launch_desc.block_dim_z, launch_desc.shared_alloc); + launch_desc.block_dim_z, launch_desc.shared_alloc, + launch_desc.local_pos_alloc); std::tie(state.draw.shader_program, std::ignore) = kernel->GetHandle(variant); state.draw.program_pipeline = 0; -- cgit v1.2.3 From 0f23359a44d9258efa0c0cd50243cd0efaf80235 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 13 Nov 2019 00:27:12 -0300 Subject: gl_rasterizer: Bind graphics images to draw commands Images were not being bound to draw invocations because these would require a cache invalidation. --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 80 ++++++++++++++---------- 1 file changed, 48 insertions(+), 32 deletions(-) (limited to 'src/video_core/renderer_opengl/gl_rasterizer.cpp') diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index d890076f8..9ce20f8f4 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -49,8 +49,26 @@ MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(128, 128, 192)); MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255, 100, 100)); -static std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, - const GLShader::ConstBufferEntry& entry) { +namespace { + +template +Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, + Tegra::Engines::ShaderType shader_type) { + if (entry.IsBindless()) { + const Tegra::Texture::TextureHandle tex_handle = + engine.AccessConstBuffer32(shader_type, entry.GetBuffer(), entry.GetOffset()); + return engine.GetTextureInfo(tex_handle); + } + if constexpr (std::is_same_v) { + const auto stage = static_cast(shader_type); + return engine.GetStageTexture(stage, entry.GetOffset()); + } else { + return engine.GetTexture(entry.GetOffset()); + } +} + +std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, + const GLShader::ConstBufferEntry& entry) { if (!entry.IsIndirect()) { return entry.GetSize(); } @@ -64,6 +82,8 @@ static std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buf return buffer.size; } +} // Anonymous namespace + RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, ScreenInfo& info) : texture_cache{system, *this, device}, shader_cache{*this, system, emu_window, device}, @@ -272,6 +292,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { SetupDrawConstBuffers(stage, shader); SetupDrawGlobalMemory(stage, shader); SetupDrawTextures(stage, shader, base_bindings); + SetupDrawImages(stage, shader, base_bindings); const ProgramVariant variant(base_bindings, primitive_mode); const auto [program_handle, next_bindings] = shader->GetHandle(variant); @@ -921,18 +942,11 @@ void RasterizerOpenGL::SetupDrawTextures(Maxwell::ShaderStage stage, const Shade ASSERT_MSG(base_bindings.sampler + entries.size() <= std::size(state.textures), "Exceeded the number of active textures."); - for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { + const auto num_entries = static_cast(entries.size()); + for (u32 bindpoint = 0; bindpoint < num_entries; ++bindpoint) { const auto& entry = entries[bindpoint]; - const auto texture = [&] { - if (!entry.IsBindless()) { - return maxwell3d.GetStageTexture(stage, entry.GetOffset()); - } - const auto shader_type = static_cast(stage); - const Tegra::Texture::TextureHandle tex_handle = - maxwell3d.AccessConstBuffer32(shader_type, entry.GetBuffer(), entry.GetOffset()); - return maxwell3d.GetTextureInfo(tex_handle); - }(); - + const auto shader_type = static_cast(stage); + const auto texture = GetTextureInfo(maxwell3d, entry, shader_type); SetupTexture(base_bindings.sampler + bindpoint, texture, entry); } } @@ -945,17 +959,10 @@ void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) { ASSERT_MSG(entries.size() <= std::size(state.textures), "Exceeded the number of active textures."); - for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { + const auto num_entries = static_cast(entries.size()); + for (u32 bindpoint = 0; bindpoint < num_entries; ++bindpoint) { const auto& entry = entries[bindpoint]; - const auto texture = [&] { - if (!entry.IsBindless()) { - return compute.GetTexture(entry.GetOffset()); - } - const Tegra::Texture::TextureHandle tex_handle = compute.AccessConstBuffer32( - Tegra::Engines::ShaderType::Compute, entry.GetBuffer(), entry.GetOffset()); - return compute.GetTextureInfo(tex_handle); - }(); - + const auto texture = GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute); SetupTexture(bindpoint, texture, entry); } } @@ -981,19 +988,28 @@ void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextu texture.tic.w_source); } +void RasterizerOpenGL::SetupDrawImages(Maxwell::ShaderStage stage, const Shader& shader, + BaseBindings base_bindings) { + const auto& maxwell3d = system.GPU().Maxwell3D(); + const auto& entries = shader->GetShaderEntries().images; + + const auto num_entries = static_cast(entries.size()); + for (u32 bindpoint = 0; bindpoint < num_entries; ++bindpoint) { + const auto& entry = entries[bindpoint]; + const auto shader_type = static_cast(stage); + const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic; + SetupImage(base_bindings.image + bindpoint, tic, entry); + } +} + void RasterizerOpenGL::SetupComputeImages(const Shader& shader) { const auto& compute = system.GPU().KeplerCompute(); const auto& entries = shader->GetShaderEntries().images; - for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { + + const auto num_entries = static_cast(entries.size()); + for (u32 bindpoint = 0; bindpoint < num_entries; ++bindpoint) { const auto& entry = entries[bindpoint]; - const auto tic = [&] { - if (!entry.IsBindless()) { - return compute.GetTexture(entry.GetOffset()).tic; - } - const Tegra::Texture::TextureHandle tex_handle = compute.AccessConstBuffer32( - Tegra::Engines::ShaderType::Compute, entry.GetBuffer(), entry.GetOffset()); - return compute.GetTextureInfo(tex_handle).tic; - }(); + const auto tic = GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute).tic; SetupImage(bindpoint, tic, entry); } } -- cgit v1.2.3 From c8a48aacc0a2bfe87de74e0aa7842f5d1aec1558 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 18 Nov 2019 18:35:21 -0300 Subject: video_core: Unify ProgramType and ShaderStage into ShaderType --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 26 +++++++++++------------- 1 file changed, 12 insertions(+), 14 deletions(-) (limited to 'src/video_core/renderer_opengl/gl_rasterizer.cpp') diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 9ce20f8f4..8baa73ebf 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -22,6 +22,7 @@ #include "core/settings.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" +#include "video_core/engines/shader_type.h" #include "video_core/memory_manager.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_cache.h" @@ -60,8 +61,7 @@ Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry return engine.GetTextureInfo(tex_handle); } if constexpr (std::is_same_v) { - const auto stage = static_cast(shader_type); - return engine.GetStageTexture(stage, entry.GetOffset()); + return engine.GetStageTexture(shader_type, entry.GetOffset()); } else { return engine.GetTexture(entry.GetOffset()); } @@ -263,7 +263,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { const auto& shader_config = gpu.regs.shader_config[index]; - const Maxwell::ShaderProgram program{static_cast(index)}; + const auto program{static_cast(index)}; // Skip stages that are not enabled if (!gpu.regs.IsShaderConfigEnabled(index)) { @@ -288,7 +288,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { Shader shader{shader_cache.GetStageProgram(program)}; // Stage indices are 0 - 5 - const auto stage = static_cast(index == 0 ? 0 : index - 1); + const std::size_t stage = index == 0 ? 0 : index - 1; SetupDrawConstBuffers(stage, shader); SetupDrawGlobalMemory(stage, shader); SetupDrawTextures(stage, shader, base_bindings); @@ -856,11 +856,10 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, return true; } -void RasterizerOpenGL::SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, - const Shader& shader) { +void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader) { MICROPROFILE_SCOPE(OpenGL_UBO); const auto& stages = system.GPU().Maxwell3D().state.shader_stages; - const auto& shader_stage = stages[static_cast(stage)]; + const auto& shader_stage = stages[stage_index]; for (const auto& entry : shader->GetShaderEntries().const_buffers) { const auto& buffer = shader_stage.const_buffers[entry.GetIndex()]; SetupConstBuffer(buffer, entry); @@ -899,11 +898,10 @@ void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& b bind_ubo_pushbuffer.Push(cbuf, offset, size); } -void RasterizerOpenGL::SetupDrawGlobalMemory(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, - const Shader& shader) { +void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader) { auto& gpu{system.GPU()}; auto& memory_manager{gpu.MemoryManager()}; - const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast(stage)]}; + const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]}; for (const auto& entry : shader->GetShaderEntries().global_memory_entries) { const auto addr{cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset()}; const auto gpu_addr{memory_manager.Read(addr)}; @@ -932,7 +930,7 @@ void RasterizerOpenGL::SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entr bind_ssbo_pushbuffer.Push(ssbo, buffer_offset, static_cast(size)); } -void RasterizerOpenGL::SetupDrawTextures(Maxwell::ShaderStage stage, const Shader& shader, +void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader& shader, BaseBindings base_bindings) { MICROPROFILE_SCOPE(OpenGL_Texture); const auto& gpu = system.GPU(); @@ -945,7 +943,7 @@ void RasterizerOpenGL::SetupDrawTextures(Maxwell::ShaderStage stage, const Shade const auto num_entries = static_cast(entries.size()); for (u32 bindpoint = 0; bindpoint < num_entries; ++bindpoint) { const auto& entry = entries[bindpoint]; - const auto shader_type = static_cast(stage); + const auto shader_type = static_cast(stage_index); const auto texture = GetTextureInfo(maxwell3d, entry, shader_type); SetupTexture(base_bindings.sampler + bindpoint, texture, entry); } @@ -988,7 +986,7 @@ void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextu texture.tic.w_source); } -void RasterizerOpenGL::SetupDrawImages(Maxwell::ShaderStage stage, const Shader& shader, +void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader, BaseBindings base_bindings) { const auto& maxwell3d = system.GPU().Maxwell3D(); const auto& entries = shader->GetShaderEntries().images; @@ -996,7 +994,7 @@ void RasterizerOpenGL::SetupDrawImages(Maxwell::ShaderStage stage, const Shader& const auto num_entries = static_cast(entries.size()); for (u32 bindpoint = 0; bindpoint < num_entries; ++bindpoint) { const auto& entry = entries[bindpoint]; - const auto shader_type = static_cast(stage); + const auto shader_type = static_cast(stage_index); const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic; SetupImage(base_bindings.image + bindpoint, tic, entry); } -- cgit v1.2.3 From 180417c51438e2c97b800f4b19e621dbc8288493 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 18 Nov 2019 21:38:15 -0300 Subject: gl_shader_cache: Remove dynamic BaseBinding specialization --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 94 ++++++++++++------------ 1 file changed, 45 insertions(+), 49 deletions(-) (limited to 'src/video_core/renderer_opengl/gl_rasterizer.cpp') diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 8baa73ebf..5c5ad1f6c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -258,7 +258,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { MICROPROFILE_SCOPE(OpenGL_Shader); auto& gpu = system.GPU().Maxwell3D(); - BaseBindings base_bindings; std::array clip_distances{}; for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { @@ -277,25 +276,17 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { continue; } - GLShader::MaxwellUniformData ubo{}; - ubo.SetFromRegs(gpu); - const auto [buffer, offset] = - buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); - - // Bind the emulation info buffer - bind_ubo_pushbuffer.Push(buffer, offset, static_cast(sizeof(ubo))); - Shader shader{shader_cache.GetStageProgram(program)}; // Stage indices are 0 - 5 const std::size_t stage = index == 0 ? 0 : index - 1; SetupDrawConstBuffers(stage, shader); SetupDrawGlobalMemory(stage, shader); - SetupDrawTextures(stage, shader, base_bindings); - SetupDrawImages(stage, shader, base_bindings); + SetupDrawTextures(stage, shader); + SetupDrawImages(stage, shader); - const ProgramVariant variant(base_bindings, primitive_mode); - const auto [program_handle, next_bindings] = shader->GetHandle(variant); + const ProgramVariant variant(primitive_mode); + const auto program_handle = shader->GetHandle(variant); switch (program) { case Maxwell::ShaderProgram::VertexA: @@ -326,8 +317,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { // VertexB was combined with VertexA, so we skip the VertexB iteration ++index; } - - base_bindings = next_bindings; } SyncClipEnabled(clip_distances); @@ -612,8 +601,16 @@ void RasterizerOpenGL::DrawPrelude() { index_buffer_offset = SetupIndexBuffer(); // Prepare packed bindings. - bind_ubo_pushbuffer.Setup(0); - bind_ssbo_pushbuffer.Setup(0); + bind_ubo_pushbuffer.Setup(); + bind_ssbo_pushbuffer.Setup(); + + // Setup emulation uniform buffer. + GLShader::MaxwellUniformData ubo; + ubo.SetFromRegs(gpu); + const auto [buffer, offset] = + buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); + bind_ubo_pushbuffer.Push(EmulationUniformBlockBinding, buffer, offset, + static_cast(sizeof(ubo))); // Setup shaders and their used resources. texture_cache.GuardSamplers(true); @@ -754,7 +751,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { const ProgramVariant variant(launch_desc.block_dim_x, launch_desc.block_dim_y, launch_desc.block_dim_z, launch_desc.shared_alloc, launch_desc.local_pos_alloc); - std::tie(state.draw.shader_program, std::ignore) = kernel->GetHandle(variant); + state.draw.shader_program = kernel->GetHandle(variant); state.draw.program_pipeline = 0; const std::size_t buffer_size = @@ -762,8 +759,8 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); buffer_cache.Map(buffer_size); - bind_ubo_pushbuffer.Setup(0); - bind_ssbo_pushbuffer.Setup(0); + bind_ubo_pushbuffer.Setup(); + bind_ssbo_pushbuffer.Setup(); SetupComputeConstBuffers(kernel); SetupComputeGlobalMemory(kernel); @@ -847,7 +844,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, ASSERT_MSG(params.height == config.height, "Framebuffer height is different"); if (params.pixel_format != pixel_format) { - LOG_WARNING(Render_OpenGL, "Framebuffer pixel_format is different"); + LOG_DEBUG(Render_OpenGL, "Framebuffer pixel_format is different"); } screen_info.display_texture = surface->GetTexture(); @@ -858,17 +855,21 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader) { MICROPROFILE_SCOPE(OpenGL_UBO); + const u32 base_binding = device.GetBaseBindings(stage_index).uniform_buffer; const auto& stages = system.GPU().Maxwell3D().state.shader_stages; const auto& shader_stage = stages[stage_index]; + for (const auto& entry : shader->GetShaderEntries().const_buffers) { const auto& buffer = shader_stage.const_buffers[entry.GetIndex()]; - SetupConstBuffer(buffer, entry); + SetupConstBuffer(base_binding + entry.GetIndex(), buffer, entry); } } void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) { MICROPROFILE_SCOPE(OpenGL_UBO); const auto& launch_desc = system.GPU().KeplerCompute().launch_description; + + u32 binding = 0; for (const auto& entry : kernel->GetShaderEntries().const_buffers) { const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value(); @@ -876,15 +877,16 @@ void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) { buffer.address = config.Address(); buffer.size = config.size; buffer.enabled = mask[entry.GetIndex()]; - SetupConstBuffer(buffer, entry); + SetupConstBuffer(binding++, buffer, entry); } } -void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& buffer, +void RasterizerOpenGL::SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, const GLShader::ConstBufferEntry& entry) { if (!buffer.enabled) { // Set values to zero to unbind buffers - bind_ubo_pushbuffer.Push(buffer_cache.GetEmptyBuffer(sizeof(float)), 0, sizeof(float)); + bind_ubo_pushbuffer.Push(binding, buffer_cache.GetEmptyBuffer(sizeof(float)), 0, + sizeof(float)); return; } @@ -895,18 +897,20 @@ void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& b const auto alignment = device.GetUniformBufferAlignment(); const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment, false, device.HasFastBufferSubData()); - bind_ubo_pushbuffer.Push(cbuf, offset, size); + bind_ubo_pushbuffer.Push(binding, cbuf, offset, size); } void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader) { auto& gpu{system.GPU()}; auto& memory_manager{gpu.MemoryManager()}; const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]}; + + u32 binding = device.GetBaseBindings(stage_index).shader_storage_buffer; for (const auto& entry : shader->GetShaderEntries().global_memory_entries) { const auto addr{cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset()}; const auto gpu_addr{memory_manager.Read(addr)}; const auto size{memory_manager.Read(addr + 8)}; - SetupGlobalMemory(entry, gpu_addr, size); + SetupGlobalMemory(binding++, entry, gpu_addr, size); } } @@ -914,38 +918,35 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) { auto& gpu{system.GPU()}; auto& memory_manager{gpu.MemoryManager()}; const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config}; + + u32 binding = 0; for (const auto& entry : kernel->GetShaderEntries().global_memory_entries) { const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()}; const auto gpu_addr{memory_manager.Read(addr)}; const auto size{memory_manager.Read(addr + 8)}; - SetupGlobalMemory(entry, gpu_addr, size); + SetupGlobalMemory(binding++, entry, gpu_addr, size); } } -void RasterizerOpenGL::SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entry, +void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GLShader::GlobalMemoryEntry& entry, GPUVAddr gpu_addr, std::size_t size) { const auto alignment{device.GetShaderStorageBufferAlignment()}; const auto [ssbo, buffer_offset] = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.IsWritten()); - bind_ssbo_pushbuffer.Push(ssbo, buffer_offset, static_cast(size)); + bind_ssbo_pushbuffer.Push(binding, ssbo, buffer_offset, static_cast(size)); } -void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader& shader, - BaseBindings base_bindings) { +void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader& shader) { MICROPROFILE_SCOPE(OpenGL_Texture); const auto& gpu = system.GPU(); const auto& maxwell3d = gpu.Maxwell3D(); const auto& entries = shader->GetShaderEntries().samplers; - ASSERT_MSG(base_bindings.sampler + entries.size() <= std::size(state.textures), - "Exceeded the number of active textures."); - - const auto num_entries = static_cast(entries.size()); - for (u32 bindpoint = 0; bindpoint < num_entries; ++bindpoint) { - const auto& entry = entries[bindpoint]; + u32 binding = device.GetBaseBindings(stage_index).sampler; + for (const auto& entry : entries) { const auto shader_type = static_cast(stage_index); const auto texture = GetTextureInfo(maxwell3d, entry, shader_type); - SetupTexture(base_bindings.sampler + bindpoint, texture, entry); + SetupTexture(binding++, texture, entry); } } @@ -954,14 +955,10 @@ void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) { const auto& compute = system.GPU().KeplerCompute(); const auto& entries = kernel->GetShaderEntries().samplers; - ASSERT_MSG(entries.size() <= std::size(state.textures), - "Exceeded the number of active textures."); - - const auto num_entries = static_cast(entries.size()); - for (u32 bindpoint = 0; bindpoint < num_entries; ++bindpoint) { - const auto& entry = entries[bindpoint]; + u32 binding = 0; + for (const auto& entry : entries) { const auto texture = GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute); - SetupTexture(bindpoint, texture, entry); + SetupTexture(binding++, texture, entry); } } @@ -986,8 +983,7 @@ void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextu texture.tic.w_source); } -void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader, - BaseBindings base_bindings) { +void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader) { const auto& maxwell3d = system.GPU().Maxwell3D(); const auto& entries = shader->GetShaderEntries().images; @@ -996,7 +992,7 @@ void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& sh const auto& entry = entries[bindpoint]; const auto shader_type = static_cast(stage_index); const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic; - SetupImage(base_bindings.image + bindpoint, tic, entry); + SetupImage(bindpoint, tic, entry); } } -- cgit v1.2.3 From f936b86c7c5a7db8dd9d7b5aed32eb9c47962d0d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 21 Nov 2019 14:21:27 -0300 Subject: gl_rasterizer: Add missing cbuf counter reset on compute --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/video_core/renderer_opengl/gl_rasterizer.cpp') diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 5c5ad1f6c..f2494e0e2 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -743,6 +743,8 @@ bool RasterizerOpenGL::DrawMultiBatch(bool is_indexed) { } void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { + buffer_cache.Acquire(); + auto kernel = shader_cache.GetComputeKernel(code_addr); SetupComputeTextures(kernel); SetupComputeImages(kernel); -- cgit v1.2.3 From 36d9b409fc8793b617c4ce5525d8dc297cb4d579 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 21 Nov 2019 14:21:42 -0300 Subject: gl_shader_decompiler: Normalize cbuf bindings Stage and compute shaders were using a different binding counter. Normalize these. --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/video_core/renderer_opengl/gl_rasterizer.cpp') diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index f2494e0e2..de7b7ce93 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -857,13 +857,13 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader) { MICROPROFILE_SCOPE(OpenGL_UBO); - const u32 base_binding = device.GetBaseBindings(stage_index).uniform_buffer; const auto& stages = system.GPU().Maxwell3D().state.shader_stages; const auto& shader_stage = stages[stage_index]; + u32 binding = device.GetBaseBindings(stage_index).uniform_buffer; for (const auto& entry : shader->GetShaderEntries().const_buffers) { const auto& buffer = shader_stage.const_buffers[entry.GetIndex()]; - SetupConstBuffer(base_binding + entry.GetIndex(), buffer, entry); + SetupConstBuffer(binding++, buffer, entry); } } -- cgit v1.2.3 From e35b9597ef4ecf9d500462ef19510a36a564fd9d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 22 Nov 2019 04:34:14 -0300 Subject: gl_shader_decompiler: Normalize image bindings --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 29 ++++++++---------------- 1 file changed, 9 insertions(+), 20 deletions(-) (limited to 'src/video_core/renderer_opengl/gl_rasterizer.cpp') diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index de7b7ce93..fbb9bbac1 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -940,12 +940,9 @@ void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GLShader::GlobalMemo void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader& shader) { MICROPROFILE_SCOPE(OpenGL_Texture); - const auto& gpu = system.GPU(); - const auto& maxwell3d = gpu.Maxwell3D(); - const auto& entries = shader->GetShaderEntries().samplers; - + const auto& maxwell3d = system.GPU().Maxwell3D(); u32 binding = device.GetBaseBindings(stage_index).sampler; - for (const auto& entry : entries) { + for (const auto& entry : shader->GetShaderEntries().samplers) { const auto shader_type = static_cast(stage_index); const auto texture = GetTextureInfo(maxwell3d, entry, shader_type); SetupTexture(binding++, texture, entry); @@ -955,10 +952,8 @@ void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader& void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) { MICROPROFILE_SCOPE(OpenGL_Texture); const auto& compute = system.GPU().KeplerCompute(); - const auto& entries = kernel->GetShaderEntries().samplers; - u32 binding = 0; - for (const auto& entry : entries) { + for (const auto& entry : kernel->GetShaderEntries().samplers) { const auto texture = GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute); SetupTexture(binding++, texture, entry); } @@ -987,26 +982,20 @@ void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextu void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader) { const auto& maxwell3d = system.GPU().Maxwell3D(); - const auto& entries = shader->GetShaderEntries().images; - - const auto num_entries = static_cast(entries.size()); - for (u32 bindpoint = 0; bindpoint < num_entries; ++bindpoint) { - const auto& entry = entries[bindpoint]; + u32 binding = device.GetBaseBindings(stage_index).image; + for (const auto& entry : shader->GetShaderEntries().images) { const auto shader_type = static_cast(stage_index); const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic; - SetupImage(bindpoint, tic, entry); + SetupImage(binding++, tic, entry); } } void RasterizerOpenGL::SetupComputeImages(const Shader& shader) { const auto& compute = system.GPU().KeplerCompute(); - const auto& entries = shader->GetShaderEntries().images; - - const auto num_entries = static_cast(entries.size()); - for (u32 bindpoint = 0; bindpoint < num_entries; ++bindpoint) { - const auto& entry = entries[bindpoint]; + u32 binding = 0; + for (const auto& entry : shader->GetShaderEntries().images) { const auto tic = GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute).tic; - SetupImage(bindpoint, tic, entry); + SetupImage(binding++, tic, entry); } } -- cgit v1.2.3 From 919ac2c4d3a81c94ae441f7298ebe4f3710a343f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 22 Nov 2019 21:12:31 -0300 Subject: gl_rasterizer: Disable compute shaders on Intel Intel's proprietary driver enters in a corrupt state when compute shaders are executed. For now, disable these. --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/video_core/renderer_opengl/gl_rasterizer.cpp') diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index fbb9bbac1..f97ec06f0 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -743,6 +743,10 @@ bool RasterizerOpenGL::DrawMultiBatch(bool is_indexed) { } void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { + if (device.HasBrokenCompute()) { + return; + } + buffer_cache.Acquire(); auto kernel = shader_cache.GetComputeKernel(code_addr); -- cgit v1.2.3