diff options
Diffstat (limited to 'src/video_core/renderer_opengl')
-rw-r--r-- | src/video_core/renderer_opengl/gl_buffer_cache.cpp | 1 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_global_cache.cpp | 43 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_global_cache.h | 16 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 59 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 8 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 43 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.h | 8 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 1 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 39 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.h | 15 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_disk_cache.cpp | 13 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_manager.cpp | 34 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_manager.h | 61 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/utils.cpp | 28 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/utils.h | 20 |
15 files changed, 264 insertions, 125 deletions
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 7989ec11b..25652e794 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -7,6 +7,7 @@ #include "common/alignment.h" #include "core/core.h" +#include "video_core/memory_manager.h" #include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_rasterizer.h" diff --git a/src/video_core/renderer_opengl/gl_global_cache.cpp b/src/video_core/renderer_opengl/gl_global_cache.cpp index 5842d6213..ea4a593af 100644 --- a/src/video_core/renderer_opengl/gl_global_cache.cpp +++ b/src/video_core/renderer_opengl/gl_global_cache.cpp @@ -6,6 +6,7 @@ #include "common/logging/log.h" #include "core/core.h" +#include "video_core/memory_manager.h" #include "video_core/renderer_opengl/gl_global_cache.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" @@ -13,28 +14,28 @@ namespace OpenGL { -CachedGlobalRegion::CachedGlobalRegion(VAddr cpu_addr, u32 size, u8* host_ptr) - : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size} { +CachedGlobalRegion::CachedGlobalRegion(VAddr cpu_addr, u8* host_ptr, u32 size, u32 max_size) + : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, host_ptr{host_ptr}, size{size}, + max_size{max_size} { buffer.Create(); - // Bind and unbind the buffer so it gets allocated by the driver - glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle); - glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); LabelGLObject(GL_BUFFER, buffer.handle, cpu_addr, "GlobalMemory"); } -void CachedGlobalRegion::Reload(u32 size_) { - constexpr auto max_size = static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize); +CachedGlobalRegion::~CachedGlobalRegion() = default; +void CachedGlobalRegion::Reload(u32 size_) { size = size_; if (size > max_size) { size = max_size; - LOG_CRITICAL(HW_GPU, "Global region size {} exceeded the expected size {}!", size_, + LOG_CRITICAL(HW_GPU, "Global region size {} exceeded the supported size {}!", size_, max_size); } + glNamedBufferData(buffer.handle, size, host_ptr, GL_STREAM_DRAW); +} - // TODO(Rodrigo): Get rid of Memory::GetPointer with a staging buffer - glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle); - glBufferData(GL_SHADER_STORAGE_BUFFER, size, GetHostPtr(), GL_DYNAMIC_DRAW); +void CachedGlobalRegion::Flush() { + LOG_DEBUG(Render_OpenGL, "Flushing {} bytes to CPU memory address 0x{:16}", size, cpu_addr); + glGetNamedBufferSubData(buffer.handle, 0, static_cast<GLsizeiptr>(size), host_ptr); } GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const { @@ -45,14 +46,16 @@ GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(CacheAddr addr, return search->second; } -GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(GPUVAddr addr, u32 size, - u8* host_ptr) { +GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(GPUVAddr addr, u8* host_ptr, + u32 size) { GlobalRegion region{TryGetReservedGlobalRegion(ToCacheAddr(host_ptr), size)}; if (!region) { // No reserved surface available, create a new one and reserve it auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()}; - const auto cpu_addr = *memory_manager.GpuToCpuAddress(addr); - region = std::make_shared<CachedGlobalRegion>(cpu_addr, size, host_ptr); + const auto cpu_addr{memory_manager.GpuToCpuAddress(addr)}; + ASSERT(cpu_addr); + + region = std::make_shared<CachedGlobalRegion>(*cpu_addr, host_ptr, size, max_ssbo_size); ReserveGlobalRegion(region); } region->Reload(size); @@ -64,7 +67,11 @@ void GlobalRegionCacheOpenGL::ReserveGlobalRegion(GlobalRegion region) { } GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer) - : RasterizerCache{rasterizer} {} + : RasterizerCache{rasterizer} { + GLint max_ssbo_size_; + glGetIntegerv(GL_MAX_SHADER_STORAGE_BLOCK_SIZE, &max_ssbo_size_); + max_ssbo_size = static_cast<u32>(max_ssbo_size_); +} GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion( const GLShader::GlobalMemoryEntry& global_region, @@ -72,7 +79,7 @@ GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion( auto& gpu{Core::System::GetInstance().GPU()}; auto& memory_manager{gpu.MemoryManager()}; - const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)]}; + const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<std::size_t>(stage)]}; const auto addr{cbufs.const_buffers[global_region.GetCbufIndex()].address + global_region.GetCbufOffset()}; const auto actual_addr{memory_manager.Read<u64>(addr)}; @@ -84,7 +91,7 @@ GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion( if (!region) { // No global region found - create a new one - region = GetUncachedGlobalRegion(actual_addr, size, host_ptr); + region = GetUncachedGlobalRegion(actual_addr, host_ptr, size); Register(region); } diff --git a/src/video_core/renderer_opengl/gl_global_cache.h b/src/video_core/renderer_opengl/gl_global_cache.h index 5a21ab66f..196e6e278 100644 --- a/src/video_core/renderer_opengl/gl_global_cache.h +++ b/src/video_core/renderer_opengl/gl_global_cache.h @@ -19,7 +19,7 @@ namespace OpenGL { namespace GLShader { class GlobalMemoryEntry; -} // namespace GLShader +} class RasterizerOpenGL; class CachedGlobalRegion; @@ -27,7 +27,8 @@ using GlobalRegion = std::shared_ptr<CachedGlobalRegion>; class CachedGlobalRegion final : public RasterizerCacheObject { public: - explicit CachedGlobalRegion(VAddr cpu_addr, u32 size, u8* host_ptr); + explicit CachedGlobalRegion(VAddr cpu_addr, u8* host_ptr, u32 size, u32 max_size); + ~CachedGlobalRegion(); VAddr GetCpuAddr() const override { return cpu_addr; @@ -45,14 +46,14 @@ public: /// Reloads the global region from guest memory void Reload(u32 size_); - // TODO(Rodrigo): When global memory is written (STG), implement flushing - void Flush() override { - UNIMPLEMENTED(); - } + void Flush() override; private: VAddr cpu_addr{}; + u8* host_ptr{}; u32 size{}; + u32 max_size{}; + OGLBuffer buffer; }; @@ -66,10 +67,11 @@ public: private: GlobalRegion TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const; - GlobalRegion GetUncachedGlobalRegion(GPUVAddr addr, u32 size, u8* host_ptr); + GlobalRegion GetUncachedGlobalRegion(GPUVAddr addr, u8* host_ptr, u32 size); void ReserveGlobalRegion(GlobalRegion region); std::unordered_map<CacheAddr, GlobalRegion> reserve; + u32 max_ssbo_size{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 6f3bcccec..86a2e117d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -299,6 +299,10 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { BaseBindings base_bindings; std::array<bool, Maxwell::NumClipDistances> clip_distances{}; + // Prepare packed bindings + bind_ubo_pushbuffer.Setup(base_bindings.cbuf); + bind_ssbo_pushbuffer.Setup(base_bindings.gmem); + for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { const auto& shader_config = gpu.regs.shader_config[index]; const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)}; @@ -321,8 +325,8 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { &ubo, sizeof(ubo), static_cast<std::size_t>(uniform_buffer_alignment)); // Bind the emulation info buffer - glBindBufferRange(GL_UNIFORM_BUFFER, base_bindings.cbuf, buffer_cache.GetHandle(), offset, - static_cast<GLsizeiptr>(sizeof(ubo))); + bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), offset, + static_cast<GLsizeiptr>(sizeof(ubo))); Shader shader{shader_cache.GetStageProgram(program)}; const auto [program_handle, next_bindings] = @@ -366,6 +370,9 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { base_bindings = next_bindings; } + bind_ubo_pushbuffer.Bind(); + bind_ssbo_pushbuffer.Bind(); + SyncClipEnabled(clip_distances); gpu.dirty_flags.shaders = false; @@ -575,9 +582,6 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers( } void RasterizerOpenGL::Clear() { - const auto prev_state{state}; - SCOPE_EXIT({ prev_state.Apply(); }); - const auto& regs = system.GPU().Maxwell3D().regs; bool use_color{}; bool use_depth{}; @@ -649,7 +653,10 @@ void RasterizerOpenGL::Clear() { clear_state.EmulateViewportWithScissor(); } - clear_state.Apply(); + clear_state.ApplyColorMask(); + clear_state.ApplyDepth(); + clear_state.ApplyStencilTest(); + clear_state.ApplyViewport(); if (use_color) { glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color); @@ -749,6 +756,7 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) { return; } res_cache.FlushRegion(addr, size); + global_cache.FlushRegion(addr, size); } void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { @@ -900,23 +908,14 @@ void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::Shader const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<std::size_t>(stage)]; const auto& entries = shader->GetShaderEntries().const_buffers; - constexpr u64 max_binds = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers; - std::array<GLuint, max_binds> bind_buffers; - std::array<GLintptr, max_binds> bind_offsets; - std::array<GLsizeiptr, max_binds> bind_sizes; - - ASSERT_MSG(entries.size() <= max_binds, "Exceeded expected number of binding points."); - // Upload only the enabled buffers from the 16 constbuffers of each shader stage for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { const auto& used_buffer = entries[bindpoint]; const auto& buffer = shader_stage.const_buffers[used_buffer.GetIndex()]; if (!buffer.enabled) { - // With disabled buffers set values as zero to unbind them - bind_buffers[bindpoint] = 0; - bind_offsets[bindpoint] = 0; - bind_sizes[bindpoint] = 0; + // Set values to zero to unbind buffers + bind_ubo_pushbuffer.Push(0, 0, 0); continue; } @@ -944,30 +943,22 @@ void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::Shader const GLintptr const_buffer_offset = buffer_cache.UploadMemory( buffer.address, size, static_cast<std::size_t>(uniform_buffer_alignment)); - // Prepare values for multibind - bind_buffers[bindpoint] = buffer_cache.GetHandle(); - bind_offsets[bindpoint] = const_buffer_offset; - bind_sizes[bindpoint] = size; + bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), const_buffer_offset, size); } - - // The first binding is reserved for emulation values - const GLuint ubo_base_binding = base_bindings.cbuf + 1; - glBindBuffersRange(GL_UNIFORM_BUFFER, ubo_base_binding, static_cast<GLsizei>(entries.size()), - bind_buffers.data(), bind_offsets.data(), bind_sizes.data()); } void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader, GLenum primitive_mode, BaseBindings base_bindings) { - // TODO(Rodrigo): Use ARB_multi_bind here const auto& entries = shader->GetShaderEntries().global_memory_entries; - - for (u32 bindpoint = 0; bindpoint < static_cast<u32>(entries.size()); ++bindpoint) { - const auto& entry = entries[bindpoint]; - const u32 current_bindpoint = base_bindings.gmem + bindpoint; - const auto& region = global_cache.GetGlobalRegion(entry, stage); - - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, current_bindpoint, region->GetBufferHandle()); + for (std::size_t bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { + const auto& entry{entries[bindpoint]}; + const auto& region{global_cache.GetGlobalRegion(entry, stage)}; + if (entry.IsWritten()) { + region->MarkAsModified(true, global_cache); + } + bind_ssbo_pushbuffer.Push(region->GetBufferHandle(), 0, + static_cast<GLsizeiptr>(region->GetSizeInBytes())); } } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 54fbf48aa..d4c2cf80e 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -28,6 +28,7 @@ #include "video_core/renderer_opengl/gl_shader_cache.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_state.h" +#include "video_core/renderer_opengl/utils.h" namespace Core { class System; @@ -70,10 +71,6 @@ public: static_assert(MaxConstbufferSize % sizeof(GLvec4) == 0, "The maximum size of a constbuffer must be a multiple of the size of GLvec4"); - static constexpr std::size_t MaxGlobalMemorySize = 0x10000; - static_assert(MaxGlobalMemorySize % sizeof(float) == 0, - "The maximum size of a global memory must be a multiple of the size of float"); - private: class SamplerInfo { public: @@ -229,6 +226,9 @@ private: PrimitiveAssembler primitive_assembler{buffer_cache}; GLint uniform_buffer_alignment; + BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER}; + BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER}; + std::size_t CalculateVertexArraysSize() const; std::size_t CalculateIndexBufferSize() const; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 7a3280620..7a68b8738 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -15,6 +15,7 @@ #include "core/hle/kernel/process.h" #include "core/settings.h" #include "video_core/engines/maxwell_3d.h" +#include "video_core/memory_manager.h" #include "video_core/morton.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_rasterizer_cache.h" @@ -111,11 +112,26 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only, params.pixel_format = PixelFormatFromTextureFormat(config.tic.format, config.tic.r_type.Value(), params.srgb_conversion); - if (params.pixel_format == PixelFormat::R16U && config.tsc.depth_compare_enabled) { + if (config.tsc.depth_compare_enabled) { // Some titles create a 'R16U' (normalized 16-bit) texture with depth_compare enabled, // then attempt to sample from it via a shadow sampler. Convert format to Z16 (which also // causes GetFormatType to properly return 'Depth' below). - params.pixel_format = PixelFormat::Z16; + if (GetFormatType(params.pixel_format) == SurfaceType::ColorTexture) { + switch (params.pixel_format) { + case PixelFormat::R16S: + case PixelFormat::R16U: + case PixelFormat::R16F: + params.pixel_format = PixelFormat::Z16; + break; + case PixelFormat::R32F: + params.pixel_format = PixelFormat::Z32F; + break; + default: + LOG_WARNING(HW_GPU, "Color texture format being used with depth compare: {}", + static_cast<u32>(params.pixel_format)); + break; + } + } } params.component_type = ComponentTypeFromTexture(config.tic.r_type.Value()); @@ -265,6 +281,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only, params.component_type = ComponentTypeFromRenderTarget(config.format); params.type = GetFormatType(params.pixel_format); params.width = config.width; + params.pitch = config.pitch; params.height = config.height; params.unaligned_height = config.height; params.target = SurfaceTarget::Texture2D; @@ -661,8 +678,8 @@ void CachedSurface::FlushGLBuffer() { gl_buffer[0].resize(GetSizeInBytes()); const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type); - // Ensure no bad interactions with GL_UNPACK_ALIGNMENT - ASSERT(params.width * GetBytesPerPixel(params.pixel_format) % 4 == 0); + const u32 align = std::clamp(params.RowAlign(0), 1U, 8U); + glPixelStorei(GL_PACK_ALIGNMENT, align); glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.width)); ASSERT(!tuple.compressed); glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); @@ -707,8 +724,8 @@ void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type); - // Ensure no bad interactions with GL_UNPACK_ALIGNMENT - ASSERT(params.MipWidth(mip_map) * GetBytesPerPixel(params.pixel_format) % 4 == 0); + const u32 align = std::clamp(params.RowAlign(mip_map), 1U, 8U); + glPixelStorei(GL_UNPACK_ALIGNMENT, align); glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.MipWidth(mip_map))); const auto image_size = static_cast<GLsizei>(params.GetMipmapSizeGL(mip_map, false)); @@ -1174,10 +1191,16 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface, return new_surface; } + const bool old_compressed = + GetFormatTuple(old_params.pixel_format, old_params.component_type).compressed; + const bool new_compressed = + GetFormatTuple(new_params.pixel_format, new_params.component_type).compressed; + const bool compatible_formats = + GetFormatBpp(old_params.pixel_format) == GetFormatBpp(new_params.pixel_format) && + !(old_compressed || new_compressed); // For compatible surfaces, we can just do fast glCopyImageSubData based copy - if (old_params.target == new_params.target && old_params.type == new_params.type && - old_params.depth == new_params.depth && old_params.depth == 1 && - GetFormatBpp(old_params.pixel_format) == GetFormatBpp(new_params.pixel_format)) { + if (old_params.target == new_params.target && old_params.depth == new_params.depth && + old_params.depth == 1 && compatible_formats) { FastCopySurface(old_surface, new_surface); return new_surface; } @@ -1192,7 +1215,7 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface, case SurfaceTarget::TextureCubemap: case SurfaceTarget::Texture2DArray: case SurfaceTarget::TextureCubeArray: - if (old_params.pixel_format == new_params.pixel_format) + if (compatible_formats) FastLayeredCopySurface(old_surface, new_surface); else { AccurateCopySurface(old_surface, new_surface); diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index ad4fd3ad2..db280dbb3 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -11,6 +11,7 @@ #include <vector> #include "common/alignment.h" +#include "common/bit_util.h" #include "common/common_types.h" #include "common/hash.h" #include "common/math_util.h" @@ -205,6 +206,13 @@ struct SurfaceParams { return bd; } + u32 RowAlign(u32 mip_level) const { + const u32 m_width = MipWidth(mip_level); + const u32 bytes_per_pixel = GetBytesPerPixel(pixel_format); + const u32 l2 = Common::CountTrailingZeroes32(m_width * bytes_per_pixel); + return (1U << l2); + } + /// Creates SurfaceParams from a texture configuration static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config, const GLShader::SamplerEntry& entry); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index ab381932c..99f67494c 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -7,6 +7,7 @@ #include "common/hash.h" #include "core/core.h" #include "video_core/engines/maxwell_3d.h" +#include "video_core/memory_manager.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_cache.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 3ea08ef7b..445048daf 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -45,8 +45,6 @@ using TextureIR = std::variant<TextureAoffi, TextureArgument>; enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 }; constexpr u32 MAX_CONSTBUFFER_ELEMENTS = static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float)); -constexpr u32 MAX_GLOBALMEMORY_ELEMENTS = - static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize) / sizeof(float); class ShaderWriter { public: @@ -208,8 +206,10 @@ public: for (const auto& sampler : ir.GetSamplers()) { entries.samplers.emplace_back(sampler); } - for (const auto& gmem : ir.GetGlobalMemoryBases()) { - entries.global_memory_entries.emplace_back(gmem.cbuf_index, gmem.cbuf_offset); + for (const auto& gmem_pair : ir.GetGlobalMemory()) { + const auto& [base, usage] = gmem_pair; + entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset, + usage.is_read, usage.is_written); } entries.clip_distances = ir.GetClipDistances(); entries.shader_length = ir.GetLength(); @@ -380,12 +380,22 @@ private: } void DeclareGlobalMemory() { - for (const auto& entry : ir.GetGlobalMemoryBases()) { + for (const auto& gmem : ir.GetGlobalMemory()) { + const auto& [base, usage] = gmem; + + // Since we don't know how the shader will use the shader, hint the driver to disable as + // much optimizations as possible + std::string qualifier = "coherent volatile"; + if (usage.is_read && !usage.is_written) + qualifier += " readonly"; + else if (usage.is_written && !usage.is_read) + qualifier += " writeonly"; + const std::string binding = - fmt::format("GMEM_BINDING_{}_{}", entry.cbuf_index, entry.cbuf_offset); - code.AddLine("layout (std430, binding = " + binding + ") buffer " + - GetGlobalMemoryBlock(entry) + " {"); - code.AddLine(" float " + GetGlobalMemory(entry) + "[MAX_GLOBALMEMORY_ELEMENTS];"); + fmt::format("GMEM_BINDING_{}_{}", base.cbuf_index, base.cbuf_offset); + code.AddLine("layout (std430, binding = " + binding + ") " + qualifier + " buffer " + + GetGlobalMemoryBlock(base) + " {"); + code.AddLine(" float " + GetGlobalMemory(base) + "[];"); code.AddLine("};"); code.AddNewLine(); } @@ -552,8 +562,7 @@ private: } else if (std::holds_alternative<OperationNode>(*offset)) { // Indirect access const std::string final_offset = code.GenerateTemporary(); - code.AddLine("uint " + final_offset + " = (ftou(" + Visit(offset) + ") / 4) & " + - std::to_string(MAX_CONSTBUFFER_ELEMENTS - 1) + ';'); + code.AddLine("uint " + final_offset + " = (ftou(" + Visit(offset) + ") / 4);"); return fmt::format("{}[{} / 4][{} % 4]", GetConstBuffer(cbuf->GetIndex()), final_offset, final_offset); @@ -869,6 +878,12 @@ private: } else if (const auto lmem = std::get_if<LmemNode>(dest)) { target = GetLocalMemory() + "[ftou(" + Visit(lmem->GetAddress()) + ") / 4]"; + } else if (const auto gmem = std::get_if<GmemNode>(dest)) { + const std::string real = Visit(gmem->GetRealAddress()); + const std::string base = Visit(gmem->GetBaseAddress()); + const std::string final_offset = "(ftou(" + real + ") - ftou(" + base + ")) / 4"; + target = fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset); + } else { UNREACHABLE_MSG("Assign called without a proper target"); } @@ -1622,9 +1637,7 @@ private: std::string GetCommonDeclarations() { const auto cbuf = std::to_string(MAX_CONSTBUFFER_ELEMENTS); - const auto gmem = std::to_string(MAX_GLOBALMEMORY_ELEMENTS); return "#define MAX_CONSTBUFFER_ELEMENTS " + cbuf + "\n" + - "#define MAX_GLOBALMEMORY_ELEMENTS " + gmem + "\n" + "#define ftoi floatBitsToInt\n" "#define ftou floatBitsToUint\n" "#define itof intBitsToFloat\n" diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index 9f7b7272e..74032d237 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h @@ -39,8 +39,9 @@ private: class GlobalMemoryEntry { public: - explicit GlobalMemoryEntry(u32 cbuf_index, u32 cbuf_offset) - : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset} {} + explicit GlobalMemoryEntry(u32 cbuf_index, u32 cbuf_offset, bool is_read, bool is_written) + : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset}, is_read{is_read}, is_written{ + is_written} {} u32 GetCbufIndex() const { return cbuf_index; @@ -50,9 +51,19 @@ public: return cbuf_offset; } + bool IsRead() const { + return is_read; + } + + bool IsWritten() const { + return is_written; + } + private: u32 cbuf_index{}; u32 cbuf_offset{}; + bool is_read{}; + bool is_written{}; }; struct ShaderEntries { diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 08603b7a5..53752b38d 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -340,11 +340,16 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn for (u32 i = 0; i < global_memory_count; ++i) { u32 cbuf_index{}; u32 cbuf_offset{}; + u8 is_read{}; + u8 is_written{}; if (file.ReadBytes(&cbuf_index, sizeof(u32)) != sizeof(u32) || - file.ReadBytes(&cbuf_offset, sizeof(u32)) != sizeof(u32)) { + file.ReadBytes(&cbuf_offset, sizeof(u32)) != sizeof(u32) || + file.ReadBytes(&is_read, sizeof(u8)) != sizeof(u8) || + file.ReadBytes(&is_written, sizeof(u8)) != sizeof(u8)) { return {}; } - entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset); + entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset, is_read != 0, + is_written != 0); } for (auto& clip_distance : entry.entries.clip_distances) { @@ -401,7 +406,9 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(FileUtil::IOFile& file, u64 uniqu return false; for (const auto& gmem : entries.global_memory_entries) { if (file.WriteObject(static_cast<u32>(gmem.GetCbufIndex())) != 1 || - file.WriteObject(static_cast<u32>(gmem.GetCbufOffset())) != 1) { + file.WriteObject(static_cast<u32>(gmem.GetCbufOffset())) != 1 || + file.WriteObject(static_cast<u8>(gmem.IsRead() ? 1 : 0)) != 1 || + file.WriteObject(static_cast<u8>(gmem.IsWritten() ? 1 : 0)) != 1) { return false; } } diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index eaf3e03a0..05ab01dcb 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -2,12 +2,44 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include "common/common_types.h" +#include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_opengl/gl_shader_manager.h" namespace OpenGL::GLShader { using Tegra::Engines::Maxwell3D; +ProgramManager::ProgramManager() { + pipeline.Create(); +} + +ProgramManager::~ProgramManager() = default; + +void ProgramManager::ApplyTo(OpenGLState& state) { + UpdatePipeline(); + state.draw.shader_program = 0; + state.draw.program_pipeline = pipeline.handle; +} + +void ProgramManager::UpdatePipeline() { + // Avoid updating the pipeline when values have no changed + if (old_state == current_state) { + return; + } + + // Workaround for AMD bug + constexpr GLenum all_used_stages{GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT | + GL_FRAGMENT_SHADER_BIT}; + glUseProgramStages(pipeline.handle, all_used_stages, 0); + + glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, current_state.vertex_shader); + glUseProgramStages(pipeline.handle, GL_GEOMETRY_SHADER_BIT, current_state.geometry_shader); + glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, current_state.fragment_shader); + + old_state = current_state; +} + void MaxwellUniformData::SetFromRegs(const Maxwell3D& maxwell, std::size_t shader_stage) { const auto& regs = maxwell.regs; const auto& state = maxwell.state; @@ -16,7 +48,7 @@ void MaxwellUniformData::SetFromRegs(const Maxwell3D& maxwell, std::size_t shade viewport_flip[0] = regs.viewport_transform[0].scale_x < 0.0 ? -1.0f : 1.0f; viewport_flip[1] = regs.viewport_transform[0].scale_y < 0.0 ? -1.0f : 1.0f; - u32 func = static_cast<u32>(regs.alpha_test_func); + auto func{static_cast<u32>(regs.alpha_test_func)}; // Normalize the gl variants of opCompare to be the same as the normal variants const u32 op_gl_variant_base = static_cast<u32>(Maxwell3D::Regs::ComparisonOp::Never); if (func >= op_gl_variant_base) { diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 37dcfefdb..cec18a832 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -4,6 +4,8 @@ #pragma once +#include <cstddef> + #include <glad/glad.h> #include "video_core/renderer_opengl/gl_resource_manager.h" @@ -38,55 +40,48 @@ static_assert(sizeof(MaxwellUniformData) < 16384, class ProgramManager { public: - ProgramManager() { - pipeline.Create(); - } + explicit ProgramManager(); + ~ProgramManager(); + + void ApplyTo(OpenGLState& state); void UseProgrammableVertexShader(GLuint program) { - vs = program; + current_state.vertex_shader = program; } void UseProgrammableGeometryShader(GLuint program) { - gs = program; + current_state.geometry_shader = program; } void UseProgrammableFragmentShader(GLuint program) { - fs = program; + current_state.fragment_shader = program; } void UseTrivialGeometryShader() { - gs = 0; - } - - void ApplyTo(OpenGLState& state) { - UpdatePipeline(); - state.draw.shader_program = 0; - state.draw.program_pipeline = pipeline.handle; + current_state.geometry_shader = 0; } private: - void UpdatePipeline() { - // Avoid updating the pipeline when values have no changed - if (old_vs == vs && old_fs == fs && old_gs == gs) - return; - // Workaround for AMD bug - glUseProgramStages(pipeline.handle, - GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT | GL_FRAGMENT_SHADER_BIT, - 0); - - glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vs); - glUseProgramStages(pipeline.handle, GL_GEOMETRY_SHADER_BIT, gs); - glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fs); - - // Update the old values - old_vs = vs; - old_fs = fs; - old_gs = gs; - } + struct PipelineState { + bool operator==(const PipelineState& rhs) const { + return vertex_shader == rhs.vertex_shader && fragment_shader == rhs.fragment_shader && + geometry_shader == rhs.geometry_shader; + } + + bool operator!=(const PipelineState& rhs) const { + return !operator==(rhs); + } + + GLuint vertex_shader{}; + GLuint fragment_shader{}; + GLuint geometry_shader{}; + }; + + void UpdatePipeline(); OGLPipeline pipeline; - GLuint vs{}, fs{}, gs{}; - GLuint old_vs{}, old_fs{}, old_gs{}; + PipelineState current_state; + PipelineState old_state; }; } // namespace OpenGL::GLShader diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp index d84634cb3..84a987371 100644 --- a/src/video_core/renderer_opengl/utils.cpp +++ b/src/video_core/renderer_opengl/utils.cpp @@ -5,11 +5,39 @@ #include <string> #include <fmt/format.h> #include <glad/glad.h> +#include "common/assert.h" #include "common/common_types.h" #include "video_core/renderer_opengl/utils.h" namespace OpenGL { +BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{target} {} + +BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default; + +void BindBuffersRangePushBuffer::Setup(GLuint first_) { + first = first_; + buffers.clear(); + offsets.clear(); + sizes.clear(); +} + +void BindBuffersRangePushBuffer::Push(GLuint buffer, GLintptr offset, GLsizeiptr size) { + buffers.push_back(buffer); + offsets.push_back(offset); + sizes.push_back(size); +} + +void BindBuffersRangePushBuffer::Bind() const { + const std::size_t count{buffers.size()}; + DEBUG_ASSERT(count == offsets.size() && count == sizes.size()); + if (count == 0) { + return; + } + glBindBuffersRange(target, first, static_cast<GLsizei>(count), buffers.data(), offsets.data(), + sizes.data()); +} + void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string extra_info) { if (!GLAD_GL_KHR_debug) { return; // We don't need to throw an error as this is just for debugging diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h index 1fcb6fc11..aef45c9dc 100644 --- a/src/video_core/renderer_opengl/utils.h +++ b/src/video_core/renderer_opengl/utils.h @@ -5,11 +5,31 @@ #pragma once #include <string> +#include <vector> #include <glad/glad.h> #include "common/common_types.h" namespace OpenGL { +class BindBuffersRangePushBuffer { +public: + BindBuffersRangePushBuffer(GLenum target); + ~BindBuffersRangePushBuffer(); + + void Setup(GLuint first_); + + void Push(GLuint buffer, GLintptr offset, GLsizeiptr size); + + void Bind() const; + +private: + GLenum target; + GLuint first; + std::vector<GLuint> buffers; + std::vector<GLintptr> offsets; + std::vector<GLsizeiptr> sizes; +}; + void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string extra_info = ""); } // namespace OpenGL
\ No newline at end of file |