diff options
Diffstat (limited to 'src/video_core/renderer_opengl')
17 files changed, 432 insertions, 242 deletions
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 6d3bda192..38d553d3c 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -106,8 +106,10 @@ GLuint Buffer::View(u32 offset, u32 size, PixelFormat format) { return views.back().texture.handle; } -BufferCacheRuntime::BufferCacheRuntime(const Device& device_) - : device{device_}, has_fast_buffer_sub_data{device.HasFastBufferSubData()}, +BufferCacheRuntime::BufferCacheRuntime(const Device& device_, + StagingBufferPool& staging_buffer_pool_) + : device{device_}, staging_buffer_pool{staging_buffer_pool_}, + has_fast_buffer_sub_data{device.HasFastBufferSubData()}, use_assembly_shaders{device.UseAssemblyShaders()}, has_unified_vertex_buffers{device.HasVertexBufferUnifiedMemory()}, stream_buffer{has_fast_buffer_sub_data ? std::nullopt : std::make_optional<StreamBuffer>()} { @@ -140,6 +142,14 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_) }(); } +StagingBufferMap BufferCacheRuntime::UploadStagingBuffer(size_t size) { + return staging_buffer_pool.RequestUploadBuffer(size); +} + +StagingBufferMap BufferCacheRuntime::DownloadStagingBuffer(size_t size) { + return staging_buffer_pool.RequestDownloadBuffer(size); +} + u64 BufferCacheRuntime::GetDeviceMemoryUsage() const { if (device.CanReportMemoryUsage()) { return device_access_memory - device.GetCurrentDedicatedVideoMemory(); @@ -147,13 +157,47 @@ u64 BufferCacheRuntime::GetDeviceMemoryUsage() const { return 2_GiB; } -void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer, - std::span<const VideoCommon::BufferCopy> copies) { +void BufferCacheRuntime::CopyBuffer(GLuint dst_buffer, GLuint src_buffer, + std::span<const VideoCommon::BufferCopy> copies, bool barrier) { + if (barrier) { + PreCopyBarrier(); + } for (const VideoCommon::BufferCopy& copy : copies) { - glCopyNamedBufferSubData( - src_buffer.Handle(), dst_buffer.Handle(), static_cast<GLintptr>(copy.src_offset), - static_cast<GLintptr>(copy.dst_offset), static_cast<GLsizeiptr>(copy.size)); + glCopyNamedBufferSubData(src_buffer, dst_buffer, static_cast<GLintptr>(copy.src_offset), + static_cast<GLintptr>(copy.dst_offset), + static_cast<GLsizeiptr>(copy.size)); } + if (barrier) { + PostCopyBarrier(); + } +} + +void BufferCacheRuntime::CopyBuffer(GLuint dst_buffer, Buffer& src_buffer, + std::span<const VideoCommon::BufferCopy> copies, bool barrier) { + CopyBuffer(dst_buffer, src_buffer.Handle(), copies, barrier); +} + +void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, GLuint src_buffer, + std::span<const VideoCommon::BufferCopy> copies, bool barrier) { + CopyBuffer(dst_buffer.Handle(), src_buffer, copies, barrier); +} + +void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer, + std::span<const VideoCommon::BufferCopy> copies) { + CopyBuffer(dst_buffer.Handle(), src_buffer.Handle(), copies); +} + +void BufferCacheRuntime::PreCopyBarrier() { + // TODO: finer grained barrier? + glMemoryBarrier(GL_ALL_BARRIER_BITS); +} + +void BufferCacheRuntime::PostCopyBarrier() { + glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT | GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT); +} + +void BufferCacheRuntime::Finish() { + glFinish(); } void BufferCacheRuntime::ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value) { @@ -188,6 +232,15 @@ void BufferCacheRuntime::BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, } } +void BufferCacheRuntime::BindVertexBuffers(VideoCommon::HostBindings<Buffer>& bindings) { + for (u32 index = 0; index < bindings.buffers.size(); ++index) { + BindVertexBuffer(bindings.min_index + index, *bindings.buffers[index], + static_cast<u32>(bindings.offsets[index]), + static_cast<u32>(bindings.sizes[index]), + static_cast<u32>(bindings.strides[index])); + } +} + void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer, u32 offset, u32 size) { if (use_assembly_shaders) { @@ -276,6 +329,14 @@ void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, Buffer& buffer, static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); } +void BufferCacheRuntime::BindTransformFeedbackBuffers(VideoCommon::HostBindings<Buffer>& bindings) { + for (u32 index = 0; index < bindings.buffers.size(); ++index) { + glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, index, bindings.buffers[index]->Handle(), + static_cast<GLintptr>(bindings.offsets[index]), + static_cast<GLsizeiptr>(bindings.sizes[index])); + } +} + void BufferCacheRuntime::BindTextureBuffer(Buffer& buffer, u32 offset, u32 size, PixelFormat format) { *texture_handles++ = buffer.View(offset, size, format); diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 18d3c3ac0..41b746f3b 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -7,12 +7,12 @@ #include <span> #include "common/common_types.h" -#include "video_core/buffer_cache/buffer_cache.h" +#include "video_core/buffer_cache/buffer_cache_base.h" #include "video_core/buffer_cache/memory_tracker_base.h" #include "video_core/rasterizer_interface.h" #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_stream_buffer.h" +#include "video_core/renderer_opengl/gl_staging_buffer_pool.h" namespace OpenGL { @@ -60,17 +60,36 @@ class BufferCacheRuntime { public: static constexpr u8 INVALID_BINDING = std::numeric_limits<u8>::max(); - explicit BufferCacheRuntime(const Device& device_); + explicit BufferCacheRuntime(const Device& device_, StagingBufferPool& staging_buffer_pool_); + + [[nodiscard]] StagingBufferMap UploadStagingBuffer(size_t size); + + [[nodiscard]] StagingBufferMap DownloadStagingBuffer(size_t size); + + void CopyBuffer(GLuint dst_buffer, GLuint src_buffer, + std::span<const VideoCommon::BufferCopy> copies, bool barrier = true); + + void CopyBuffer(GLuint dst_buffer, Buffer& src_buffer, + std::span<const VideoCommon::BufferCopy> copies, bool barrier = true); + + void CopyBuffer(Buffer& dst_buffer, GLuint src_buffer, + std::span<const VideoCommon::BufferCopy> copies, bool barrier = true); void CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer, std::span<const VideoCommon::BufferCopy> copies); + void PreCopyBarrier(); + void PostCopyBarrier(); + void Finish(); + void ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value); void BindIndexBuffer(Buffer& buffer, u32 offset, u32 size); void BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size, u32 stride); + void BindVertexBuffers(VideoCommon::HostBindings<Buffer>& bindings); + void BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer, u32 offset, u32 size); void BindComputeUniformBuffer(u32 binding_index, Buffer& buffer, u32 offset, u32 size); @@ -83,6 +102,8 @@ public: void BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset, u32 size); + void BindTransformFeedbackBuffers(VideoCommon::HostBindings<Buffer>& bindings); + void BindTextureBuffer(Buffer& buffer, u32 offset, u32 size, VideoCore::Surface::PixelFormat format); @@ -169,6 +190,7 @@ private: }; const Device& device; + StagingBufferPool& staging_buffer_pool; bool has_fast_buffer_sub_data = false; bool use_assembly_shaders = false; @@ -201,7 +223,7 @@ private: struct BufferCacheParams { using Runtime = OpenGL::BufferCacheRuntime; using Buffer = OpenGL::Buffer; - using Async_Buffer = u32; + using Async_Buffer = OpenGL::StagingBufferMap; using MemoryTracker = VideoCommon::MemoryTrackerBase<VideoCore::RasterizerInterface>; static constexpr bool IS_OPENGL = true; @@ -209,9 +231,12 @@ struct BufferCacheParams { static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = true; static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true; static constexpr bool NEEDS_BIND_STORAGE_INDEX = true; - static constexpr bool USE_MEMORY_MAPS = false; + static constexpr bool USE_MEMORY_MAPS = true; static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true; static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = false; + + // TODO: Investigate why OpenGL seems to perform worse with persistently mapped buffer uploads + static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = false; }; using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp index 1a0cea9b7..3151c0db8 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp @@ -87,7 +87,8 @@ void ComputePipeline::Configure() { texture_cache.SynchronizeComputeDescriptors(); boost::container::static_vector<VideoCommon::ImageViewInOut, MAX_TEXTURES + MAX_IMAGES> views; - std::array<GLuint, MAX_TEXTURES> samplers; + boost::container::static_vector<VideoCommon::SamplerId, MAX_TEXTURES> samplers; + std::array<GLuint, MAX_TEXTURES> gl_samplers; std::array<GLuint, MAX_TEXTURES> textures; std::array<GLuint, MAX_IMAGES> images; GLsizei sampler_binding{}; @@ -131,7 +132,6 @@ void ComputePipeline::Configure() { for (u32 index = 0; index < desc.count; ++index) { const auto handle{read_handle(desc, index)}; views.push_back({handle.first}); - samplers[sampler_binding++] = 0; } } for (const auto& desc : info.image_buffer_descriptors) { @@ -142,8 +142,8 @@ void ComputePipeline::Configure() { const auto handle{read_handle(desc, index)}; views.push_back({handle.first}); - Sampler* const sampler = texture_cache.GetComputeSampler(handle.second); - samplers[sampler_binding++] = sampler->Handle(); + VideoCommon::SamplerId sampler = texture_cache.GetComputeSamplerId(handle.second); + samplers.push_back(sampler); } } for (const auto& desc : info.image_descriptors) { @@ -186,10 +186,17 @@ void ComputePipeline::Configure() { const VideoCommon::ImageViewInOut* views_it{views.data() + num_texture_buffers + num_image_buffers}; + const VideoCommon::SamplerId* samplers_it{samplers.data()}; texture_binding += num_texture_buffers; image_binding += num_image_buffers; u32 texture_scaling_mask{}; + + for (const auto& desc : info.texture_buffer_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + gl_samplers[sampler_binding++] = 0; + } + } for (const auto& desc : info.texture_descriptors) { for (u32 index = 0; index < desc.count; ++index) { ImageView& image_view{texture_cache.GetImageView((views_it++)->id)}; @@ -198,6 +205,12 @@ void ComputePipeline::Configure() { texture_scaling_mask |= 1u << texture_binding; } ++texture_binding; + + const Sampler& sampler{texture_cache.GetSampler(*(samplers_it++))}; + const bool use_fallback_sampler{sampler.HasAddedAnisotropy() && + !image_view.SupportsAnisotropy()}; + gl_samplers[sampler_binding++] = + use_fallback_sampler ? sampler.HandleWithDefaultAnisotropy() : sampler.Handle(); } } u32 image_scaling_mask{}; @@ -228,7 +241,7 @@ void ComputePipeline::Configure() { if (texture_binding != 0) { ASSERT(texture_binding == sampler_binding); glBindTextures(0, texture_binding, textures.data()); - glBindSamplers(0, sampler_binding, samplers.data()); + glBindSamplers(0, sampler_binding, gl_samplers.data()); } if (image_binding != 0) { glBindImageTextures(0, image_binding, images.data()); diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 400c21981..03d234f2f 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -201,6 +201,7 @@ Device::Device(Core::Frontend::EmuWindow& emu_window) { use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue() && !(is_amd || (is_intel && !is_linux)) && !strict_context_required; use_driver_cache = is_nvidia; + supports_conditional_barriers = !is_intel; LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi); LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug); diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index cc0b95f1a..ad27264e5 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -188,6 +188,10 @@ public: return strict_context_required; } + bool SupportsConditionalBarriers() const { + return supports_conditional_barriers; + } + private: static bool TestVariableAoffi(); static bool TestPreciseBug(); @@ -233,6 +237,7 @@ private: bool has_bool_ref_bug{}; bool can_report_memory{}; bool strict_context_required{}; + bool supports_conditional_barriers{}; std::string vendor_name; }; diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index 89000d6e0..c58f760b8 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -275,9 +275,9 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c template <typename Spec> void GraphicsPipeline::ConfigureImpl(bool is_indexed) { std::array<VideoCommon::ImageViewInOut, MAX_TEXTURES + MAX_IMAGES> views; - std::array<GLuint, MAX_TEXTURES> samplers; + std::array<VideoCommon::SamplerId, MAX_TEXTURES> samplers; size_t views_index{}; - GLsizei sampler_binding{}; + size_t samplers_index{}; texture_cache.SynchronizeGraphicsDescriptors(); @@ -337,7 +337,6 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { for (u32 index = 0; index < desc.count; ++index) { const auto handle{read_handle(desc, index)}; views[views_index++] = {handle.first}; - samplers[sampler_binding++] = 0; } } } @@ -351,8 +350,8 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { const auto handle{read_handle(desc, index)}; views[views_index++] = {handle.first}; - Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)}; - samplers[sampler_binding++] = sampler->Handle(); + VideoCommon::SamplerId sampler{texture_cache.GetGraphicsSamplerId(handle.second)}; + samplers[samplers_index++] = sampler; } } if constexpr (Spec::has_images) { @@ -445,10 +444,13 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { program_manager.BindSourcePrograms(source_programs); } const VideoCommon::ImageViewInOut* views_it{views.data()}; + const VideoCommon::SamplerId* samplers_it{samplers.data()}; GLsizei texture_binding = 0; GLsizei image_binding = 0; + GLsizei sampler_binding{}; std::array<GLuint, MAX_TEXTURES> textures; std::array<GLuint, MAX_IMAGES> images; + std::array<GLuint, MAX_TEXTURES> gl_samplers; const auto prepare_stage{[&](size_t stage) { buffer_cache.runtime.SetImagePointers(&textures[texture_binding], &images[image_binding]); buffer_cache.BindHostStageBuffers(stage); @@ -465,6 +467,13 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { u32 stage_image_binding{}; const auto& info{stage_infos[stage]}; + if constexpr (Spec::has_texture_buffers) { + for (const auto& desc : info.texture_buffer_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + gl_samplers[sampler_binding++] = 0; + } + } + } for (const auto& desc : info.texture_descriptors) { for (u32 index = 0; index < desc.count; ++index) { ImageView& image_view{texture_cache.GetImageView((views_it++)->id)}; @@ -474,6 +483,12 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { } ++texture_binding; ++stage_texture_binding; + + const Sampler& sampler{texture_cache.GetSampler(*(samplers_it++))}; + const bool use_fallback_sampler{sampler.HasAddedAnisotropy() && + !image_view.SupportsAnisotropy()}; + gl_samplers[sampler_binding++] = + use_fallback_sampler ? sampler.HandleWithDefaultAnisotropy() : sampler.Handle(); } } for (const auto& desc : info.image_descriptors) { @@ -534,7 +549,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { if (texture_binding != 0) { ASSERT(texture_binding == sampler_binding); glBindTextures(0, texture_binding, textures.data()); - glBindSamplers(0, sampler_binding, samplers.data()); + glBindSamplers(0, sampler_binding, gl_samplers.data()); } if (image_binding != 0) { glBindImageTextures(0, image_binding, images.data()); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index f5baa0f3c..fc711c44a 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -24,6 +24,7 @@ #include "video_core/renderer_opengl/gl_query_cache.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_cache.h" +#include "video_core/renderer_opengl/gl_staging_buffer_pool.h" #include "video_core/renderer_opengl/gl_texture_cache.h" #include "video_core/renderer_opengl/maxwell_to_gl.h" #include "video_core/renderer_opengl/renderer_opengl.h" @@ -58,8 +59,9 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra StateTracker& state_tracker_) : RasterizerAccelerated(cpu_memory_), gpu(gpu_), device(device_), screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_), - texture_cache_runtime(device, program_manager, state_tracker), - texture_cache(texture_cache_runtime, *this), buffer_cache_runtime(device), + texture_cache_runtime(device, program_manager, state_tracker, staging_buffer_pool), + texture_cache(texture_cache_runtime, *this), + buffer_cache_runtime(device, staging_buffer_pool), buffer_cache(*this, cpu_memory_, buffer_cache_runtime), shader_cache(*this, emu_window_, device, texture_cache, buffer_cache, program_manager, state_tracker, gpu.ShaderNotify()), diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 410d8ffc5..a73ad15c1 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -230,6 +230,7 @@ private: ProgramManager& program_manager; StateTracker& state_tracker; + StagingBufferPool staging_buffer_pool; TextureCacheRuntime texture_cache_runtime; TextureCache texture_cache; BufferCacheRuntime buffer_cache_runtime; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 6ecda2984..3f077311e 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -232,12 +232,14 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .gl_max_compute_smem_size = device.GetMaxComputeSharedMemorySize(), }, host_info{ + .support_float64 = true, .support_float16 = false, .support_int64 = device.HasShaderInt64(), .needs_demote_reorder = device.IsAmd(), .support_snorm_render_buffer = false, .support_viewport_index_layer = device.HasVertexViewportLayer(), .support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(), + .support_conditional_barrier = device.SupportsConditionalBarriers(), } { if (use_asynchronous_shaders) { workers = CreateWorkers(); diff --git a/src/video_core/renderer_opengl/gl_shader_context.h b/src/video_core/renderer_opengl/gl_shader_context.h index 207a75d42..d12cd06fa 100644 --- a/src/video_core/renderer_opengl/gl_shader_context.h +++ b/src/video_core/renderer_opengl/gl_shader_context.h @@ -16,9 +16,9 @@ struct ShaderPools { inst.ReleaseContents(); } - Shader::ObjectPool<Shader::IR::Inst> inst; - Shader::ObjectPool<Shader::IR::Block> block; - Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block; + Shader::ObjectPool<Shader::IR::Inst> inst{8192}; + Shader::ObjectPool<Shader::IR::Block> block{32}; + Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block{32}; }; struct Context { diff --git a/src/video_core/renderer_opengl/gl_staging_buffer_pool.cpp b/src/video_core/renderer_opengl/gl_staging_buffer_pool.cpp new file mode 100644 index 000000000..bbb06e51f --- /dev/null +++ b/src/video_core/renderer_opengl/gl_staging_buffer_pool.cpp @@ -0,0 +1,150 @@ +// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <array> +#include <memory> +#include <span> + +#include <glad/glad.h> + +#include "common/alignment.h" +#include "common/assert.h" +#include "common/bit_util.h" +#include "common/microprofile.h" +#include "video_core/renderer_opengl/gl_staging_buffer_pool.h" + +MICROPROFILE_DEFINE(OpenGL_BufferRequest, "OpenGL", "BufferRequest", MP_RGB(128, 128, 192)); + +namespace OpenGL { + +StagingBufferMap::~StagingBufferMap() { + if (sync) { + sync->Create(); + } +} + +StagingBuffers::StagingBuffers(GLenum storage_flags_, GLenum map_flags_) + : storage_flags{storage_flags_}, map_flags{map_flags_} {} + +StagingBuffers::~StagingBuffers() = default; + +StagingBufferMap StagingBuffers::RequestMap(size_t requested_size, bool insert_fence) { + MICROPROFILE_SCOPE(OpenGL_BufferRequest); + + const size_t index = RequestBuffer(requested_size); + OGLSync* const sync = insert_fence ? &syncs[index] : nullptr; + sync_indices[index] = insert_fence ? ++current_sync_index : 0; + return StagingBufferMap{ + .mapped_span = std::span(maps[index], requested_size), + .sync = sync, + .buffer = buffers[index].handle, + }; +} + +size_t StagingBuffers::RequestBuffer(size_t requested_size) { + if (const std::optional<size_t> index = FindBuffer(requested_size); index) { + return *index; + } + + OGLBuffer& buffer = buffers.emplace_back(); + buffer.Create(); + const auto next_pow2_size = Common::NextPow2(requested_size); + glNamedBufferStorage(buffer.handle, next_pow2_size, nullptr, + storage_flags | GL_MAP_PERSISTENT_BIT); + maps.push_back(static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, next_pow2_size, + map_flags | GL_MAP_PERSISTENT_BIT))); + syncs.emplace_back(); + sync_indices.emplace_back(); + sizes.push_back(next_pow2_size); + + ASSERT(syncs.size() == buffers.size() && buffers.size() == maps.size() && + maps.size() == sizes.size()); + + return buffers.size() - 1; +} + +std::optional<size_t> StagingBuffers::FindBuffer(size_t requested_size) { + size_t known_unsignaled_index = current_sync_index + 1; + size_t smallest_buffer = std::numeric_limits<size_t>::max(); + std::optional<size_t> found; + const size_t num_buffers = sizes.size(); + for (size_t index = 0; index < num_buffers; ++index) { + const size_t buffer_size = sizes[index]; + if (buffer_size < requested_size || buffer_size >= smallest_buffer) { + continue; + } + if (syncs[index].handle != 0) { + if (sync_indices[index] >= known_unsignaled_index) { + // This fence is later than a fence that is known to not be signaled + continue; + } + if (!syncs[index].IsSignaled()) { + // Since this fence hasn't been signaled, it's safe to assume all later + // fences haven't been signaled either + known_unsignaled_index = std::min(known_unsignaled_index, sync_indices[index]); + continue; + } + syncs[index].Release(); + } + smallest_buffer = buffer_size; + found = index; + } + return found; +} + +StreamBuffer::StreamBuffer() { + static constexpr GLenum flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT; + buffer.Create(); + glObjectLabel(GL_BUFFER, buffer.handle, -1, "Stream Buffer"); + glNamedBufferStorage(buffer.handle, STREAM_BUFFER_SIZE, nullptr, flags); + mapped_pointer = + static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, STREAM_BUFFER_SIZE, flags)); + for (OGLSync& sync : fences) { + sync.Create(); + } +} + +std::pair<std::span<u8>, size_t> StreamBuffer::Request(size_t size) noexcept { + ASSERT(size < REGION_SIZE); + for (size_t region = Region(used_iterator), region_end = Region(iterator); region < region_end; + ++region) { + fences[region].Create(); + } + used_iterator = iterator; + + for (size_t region = Region(free_iterator) + 1, + region_end = std::min(Region(iterator + size) + 1, NUM_SYNCS); + region < region_end; ++region) { + glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED); + fences[region].Release(); + } + if (iterator + size >= free_iterator) { + free_iterator = iterator + size; + } + if (iterator + size > STREAM_BUFFER_SIZE) { + for (size_t region = Region(used_iterator); region < NUM_SYNCS; ++region) { + fences[region].Create(); + } + used_iterator = 0; + iterator = 0; + free_iterator = size; + + for (size_t region = 0, region_end = Region(size); region <= region_end; ++region) { + glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED); + fences[region].Release(); + } + } + const size_t offset = iterator; + iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT); + return {std::span(mapped_pointer + offset, size), offset}; +} + +StagingBufferMap StagingBufferPool::RequestUploadBuffer(size_t size) { + return upload_buffers.RequestMap(size, true); +} + +StagingBufferMap StagingBufferPool::RequestDownloadBuffer(size_t size) { + return download_buffers.RequestMap(size, false); +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_staging_buffer_pool.h index 8fe927aaf..60f72d3a0 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.h +++ b/src/video_core/renderer_opengl/gl_staging_buffer_pool.h @@ -4,8 +4,10 @@ #pragma once #include <array> +#include <optional> #include <span> #include <utility> +#include <vector> #include <glad/glad.h> @@ -17,6 +19,35 @@ namespace OpenGL { using namespace Common::Literals; +struct StagingBufferMap { + ~StagingBufferMap(); + + std::span<u8> mapped_span; + size_t offset = 0; + OGLSync* sync; + GLuint buffer; +}; + +struct StagingBuffers { + explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_); + ~StagingBuffers(); + + StagingBufferMap RequestMap(size_t requested_size, bool insert_fence); + + size_t RequestBuffer(size_t requested_size); + + std::optional<size_t> FindBuffer(size_t requested_size); + + std::vector<OGLSync> syncs; + std::vector<OGLBuffer> buffers; + std::vector<u8*> maps; + std::vector<size_t> sizes; + std::vector<size_t> sync_indices; + GLenum storage_flags; + GLenum map_flags; + size_t current_sync_index = 0; +}; + class StreamBuffer { static constexpr size_t STREAM_BUFFER_SIZE = 64_MiB; static constexpr size_t NUM_SYNCS = 16; @@ -48,4 +79,17 @@ private: std::array<OGLSync, NUM_SYNCS> fences; }; +class StagingBufferPool { +public: + StagingBufferPool() = default; + ~StagingBufferPool() = default; + + StagingBufferMap RequestUploadBuffer(size_t size); + StagingBufferMap RequestDownloadBuffer(size_t size); + +private: + StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT}; + StagingBuffers download_buffers{GL_MAP_READ_BIT | GL_CLIENT_STORAGE_BIT, GL_MAP_READ_BIT}; +}; + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp deleted file mode 100644 index 2005c8993..000000000 --- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp +++ /dev/null @@ -1,63 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#include <array> -#include <memory> -#include <span> - -#include <glad/glad.h> - -#include "common/alignment.h" -#include "common/assert.h" -#include "video_core/renderer_opengl/gl_stream_buffer.h" - -namespace OpenGL { - -StreamBuffer::StreamBuffer() { - static constexpr GLenum flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT; - buffer.Create(); - glObjectLabel(GL_BUFFER, buffer.handle, -1, "Stream Buffer"); - glNamedBufferStorage(buffer.handle, STREAM_BUFFER_SIZE, nullptr, flags); - mapped_pointer = - static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, STREAM_BUFFER_SIZE, flags)); - for (OGLSync& sync : fences) { - sync.Create(); - } -} - -std::pair<std::span<u8>, size_t> StreamBuffer::Request(size_t size) noexcept { - ASSERT(size < REGION_SIZE); - for (size_t region = Region(used_iterator), region_end = Region(iterator); region < region_end; - ++region) { - fences[region].Create(); - } - used_iterator = iterator; - - for (size_t region = Region(free_iterator) + 1, - region_end = std::min(Region(iterator + size) + 1, NUM_SYNCS); - region < region_end; ++region) { - glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED); - fences[region].Release(); - } - if (iterator + size >= free_iterator) { - free_iterator = iterator + size; - } - if (iterator + size > STREAM_BUFFER_SIZE) { - for (size_t region = Region(used_iterator); region < NUM_SYNCS; ++region) { - fences[region].Create(); - } - used_iterator = 0; - iterator = 0; - free_iterator = size; - - for (size_t region = 0, region_end = Region(size); region <= region_end; ++region) { - glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED); - fences[region].Release(); - } - } - const size_t offset = iterator; - iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT); - return {std::span(mapped_pointer + offset, size), offset}; -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 56d0ff869..3b446be07 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -456,19 +456,14 @@ OGLTexture MakeImage(const VideoCommon::ImageInfo& info, GLenum gl_internal_form return is_srgb ? GL_SRGB8_ALPHA8 : GL_RGBA8; } } - } // Anonymous namespace -ImageBufferMap::~ImageBufferMap() { - if (sync) { - sync->Create(); - } -} - TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& program_manager, - StateTracker& state_tracker_) - : device{device_}, state_tracker{state_tracker_}, util_shaders(program_manager), - format_conversion_pass{util_shaders}, resolution{Settings::values.resolution_info} { + StateTracker& state_tracker_, + StagingBufferPool& staging_buffer_pool_) + : device{device_}, state_tracker{state_tracker_}, staging_buffer_pool{staging_buffer_pool_}, + util_shaders(program_manager), format_conversion_pass{util_shaders}, + resolution{Settings::values.resolution_info} { static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D}; for (size_t i = 0; i < TARGETS.size(); ++i) { const GLenum target = TARGETS[i]; @@ -558,12 +553,12 @@ void TextureCacheRuntime::Finish() { glFinish(); } -ImageBufferMap TextureCacheRuntime::UploadStagingBuffer(size_t size) { - return upload_buffers.RequestMap(size, true); +StagingBufferMap TextureCacheRuntime::UploadStagingBuffer(size_t size) { + return staging_buffer_pool.RequestUploadBuffer(size); } -ImageBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size) { - return download_buffers.RequestMap(size, false); +StagingBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size) { + return staging_buffer_pool.RequestDownloadBuffer(size); } u64 TextureCacheRuntime::GetDeviceMemoryUsage() const { @@ -648,7 +643,7 @@ void TextureCacheRuntime::BlitFramebuffer(Framebuffer* dst, Framebuffer* src, is_linear ? GL_LINEAR : GL_NEAREST); } -void TextureCacheRuntime::AccelerateImageUpload(Image& image, const ImageBufferMap& map, +void TextureCacheRuntime::AccelerateImageUpload(Image& image, const StagingBufferMap& map, std::span<const SwizzleParameters> swizzles) { switch (image.info.type) { case ImageType::e2D: @@ -690,64 +685,6 @@ bool TextureCacheRuntime::HasNativeASTC() const noexcept { return device.HasASTC(); } -TextureCacheRuntime::StagingBuffers::StagingBuffers(GLenum storage_flags_, GLenum map_flags_) - : storage_flags{storage_flags_}, map_flags{map_flags_} {} - -TextureCacheRuntime::StagingBuffers::~StagingBuffers() = default; - -ImageBufferMap TextureCacheRuntime::StagingBuffers::RequestMap(size_t requested_size, - bool insert_fence) { - const size_t index = RequestBuffer(requested_size); - OGLSync* const sync = insert_fence ? &syncs[index] : nullptr; - return ImageBufferMap{ - .mapped_span = std::span(maps[index], requested_size), - .sync = sync, - .buffer = buffers[index].handle, - }; -} - -size_t TextureCacheRuntime::StagingBuffers::RequestBuffer(size_t requested_size) { - if (const std::optional<size_t> index = FindBuffer(requested_size); index) { - return *index; - } - - OGLBuffer& buffer = buffers.emplace_back(); - buffer.Create(); - glNamedBufferStorage(buffer.handle, requested_size, nullptr, - storage_flags | GL_MAP_PERSISTENT_BIT); - maps.push_back(static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, requested_size, - map_flags | GL_MAP_PERSISTENT_BIT))); - - syncs.emplace_back(); - sizes.push_back(requested_size); - - ASSERT(syncs.size() == buffers.size() && buffers.size() == maps.size() && - maps.size() == sizes.size()); - - return buffers.size() - 1; -} - -std::optional<size_t> TextureCacheRuntime::StagingBuffers::FindBuffer(size_t requested_size) { - size_t smallest_buffer = std::numeric_limits<size_t>::max(); - std::optional<size_t> found; - const size_t num_buffers = sizes.size(); - for (size_t index = 0; index < num_buffers; ++index) { - const size_t buffer_size = sizes[index]; - if (buffer_size < requested_size || buffer_size >= smallest_buffer) { - continue; - } - if (syncs[index].handle != 0) { - if (!syncs[index].IsSignaled()) { - continue; - } - syncs[index].Release(); - } - smallest_buffer = buffer_size; - found = index; - } - return found; -} - Image::Image(TextureCacheRuntime& runtime_, const VideoCommon::ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_) : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), runtime{&runtime_} { @@ -823,7 +760,7 @@ void Image::UploadMemory(GLuint buffer_handle, size_t buffer_offset, } } -void Image::UploadMemory(const ImageBufferMap& map, +void Image::UploadMemory(const StagingBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies) { UploadMemory(map.buffer, map.offset, copies); } @@ -870,7 +807,7 @@ void Image::DownloadMemory(std::span<GLuint> buffer_handles, std::span<size_t> b } } -void Image::DownloadMemory(ImageBufferMap& map, +void Image::DownloadMemory(StagingBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies) { DownloadMemory(map.buffer, map.offset, copies); } @@ -1331,36 +1268,48 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const TSCEntry& config) { UNIMPLEMENTED_IF(config.cubemap_anisotropy != 1); - sampler.Create(); - const GLuint handle = sampler.handle; - glSamplerParameteri(handle, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(config.wrap_u)); - glSamplerParameteri(handle, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(config.wrap_v)); - glSamplerParameteri(handle, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(config.wrap_p)); - glSamplerParameteri(handle, GL_TEXTURE_COMPARE_MODE, compare_mode); - glSamplerParameteri(handle, GL_TEXTURE_COMPARE_FUNC, compare_func); - glSamplerParameteri(handle, GL_TEXTURE_MAG_FILTER, mag); - glSamplerParameteri(handle, GL_TEXTURE_MIN_FILTER, min); - glSamplerParameterf(handle, GL_TEXTURE_LOD_BIAS, config.LodBias()); - glSamplerParameterf(handle, GL_TEXTURE_MIN_LOD, config.MinLod()); - glSamplerParameterf(handle, GL_TEXTURE_MAX_LOD, config.MaxLod()); - glSamplerParameterfv(handle, GL_TEXTURE_BORDER_COLOR, config.BorderColor().data()); - - if (GLAD_GL_ARB_texture_filter_anisotropic || GLAD_GL_EXT_texture_filter_anisotropic) { - const f32 max_anisotropy = std::clamp(config.MaxAnisotropy(), 1.0f, 16.0f); - glSamplerParameterf(handle, GL_TEXTURE_MAX_ANISOTROPY, max_anisotropy); - } else { - LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_anisotropic is required"); - } - if (GLAD_GL_ARB_texture_filter_minmax || GLAD_GL_EXT_texture_filter_minmax) { - glSamplerParameteri(handle, GL_TEXTURE_REDUCTION_MODE_ARB, reduction_filter); - } else if (reduction_filter != GL_WEIGHTED_AVERAGE_ARB) { - LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_minmax is required"); - } - if (GLAD_GL_ARB_seamless_cubemap_per_texture || GLAD_GL_AMD_seamless_cubemap_per_texture) { - glSamplerParameteri(handle, GL_TEXTURE_CUBE_MAP_SEAMLESS, seamless); - } else if (seamless == GL_FALSE) { - // We default to false because it's more common - LOG_WARNING(Render_OpenGL, "GL_ARB_seamless_cubemap_per_texture is required"); + const f32 max_anisotropy = std::clamp(config.MaxAnisotropy(), 1.0f, 16.0f); + + const auto create_sampler = [&](const f32 anisotropy) { + OGLSampler new_sampler; + new_sampler.Create(); + const GLuint handle = new_sampler.handle; + glSamplerParameteri(handle, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(config.wrap_u)); + glSamplerParameteri(handle, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(config.wrap_v)); + glSamplerParameteri(handle, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(config.wrap_p)); + glSamplerParameteri(handle, GL_TEXTURE_COMPARE_MODE, compare_mode); + glSamplerParameteri(handle, GL_TEXTURE_COMPARE_FUNC, compare_func); + glSamplerParameteri(handle, GL_TEXTURE_MAG_FILTER, mag); + glSamplerParameteri(handle, GL_TEXTURE_MIN_FILTER, min); + glSamplerParameterf(handle, GL_TEXTURE_LOD_BIAS, config.LodBias()); + glSamplerParameterf(handle, GL_TEXTURE_MIN_LOD, config.MinLod()); + glSamplerParameterf(handle, GL_TEXTURE_MAX_LOD, config.MaxLod()); + glSamplerParameterfv(handle, GL_TEXTURE_BORDER_COLOR, config.BorderColor().data()); + + if (GLAD_GL_ARB_texture_filter_anisotropic || GLAD_GL_EXT_texture_filter_anisotropic) { + glSamplerParameterf(handle, GL_TEXTURE_MAX_ANISOTROPY, anisotropy); + } else { + LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_anisotropic is required"); + } + if (GLAD_GL_ARB_texture_filter_minmax || GLAD_GL_EXT_texture_filter_minmax) { + glSamplerParameteri(handle, GL_TEXTURE_REDUCTION_MODE_ARB, reduction_filter); + } else if (reduction_filter != GL_WEIGHTED_AVERAGE_ARB) { + LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_minmax is required"); + } + if (GLAD_GL_ARB_seamless_cubemap_per_texture || GLAD_GL_AMD_seamless_cubemap_per_texture) { + glSamplerParameteri(handle, GL_TEXTURE_CUBE_MAP_SEAMLESS, seamless); + } else if (seamless == GL_FALSE) { + // We default to false because it's more common + LOG_WARNING(Render_OpenGL, "GL_ARB_seamless_cubemap_per_texture is required"); + } + return new_sampler; + }; + + sampler = create_sampler(max_anisotropy); + + const f32 max_anisotropy_default = static_cast<f32>(1U << config.max_anisotropy); + if (max_anisotropy > max_anisotropy_default) { + sampler_default_anisotropy = create_sampler(max_anisotropy_default); } } diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 3e9b3302b..3676eaaa9 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -11,6 +11,7 @@ #include "shader_recompiler/shader_info.h" #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_staging_buffer_pool.h" #include "video_core/renderer_opengl/util_shaders.h" #include "video_core/texture_cache/image_view_base.h" #include "video_core/texture_cache/texture_cache_base.h" @@ -37,15 +38,6 @@ using VideoCommon::Region2D; using VideoCommon::RenderTargets; using VideoCommon::SlotVector; -struct ImageBufferMap { - ~ImageBufferMap(); - - std::span<u8> mapped_span; - size_t offset = 0; - OGLSync* sync; - GLuint buffer; -}; - struct FormatProperties { GLenum compatibility_class; bool compatibility_by_size; @@ -74,14 +66,15 @@ class TextureCacheRuntime { public: explicit TextureCacheRuntime(const Device& device, ProgramManager& program_manager, - StateTracker& state_tracker); + StateTracker& state_tracker, + StagingBufferPool& staging_buffer_pool); ~TextureCacheRuntime(); void Finish(); - ImageBufferMap UploadStagingBuffer(size_t size); + StagingBufferMap UploadStagingBuffer(size_t size); - ImageBufferMap DownloadStagingBuffer(size_t size); + StagingBufferMap DownloadStagingBuffer(size_t size); u64 GetDeviceLocalMemory() const { return device_access_memory; @@ -120,7 +113,7 @@ public: const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter, Tegra::Engines::Fermi2D::Operation operation); - void AccelerateImageUpload(Image& image, const ImageBufferMap& map, + void AccelerateImageUpload(Image& image, const StagingBufferMap& map, std::span<const VideoCommon::SwizzleParameters> swizzles); void InsertUploadMemoryBarrier(); @@ -149,35 +142,16 @@ public: } private: - struct StagingBuffers { - explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_); - ~StagingBuffers(); - - ImageBufferMap RequestMap(size_t requested_size, bool insert_fence); - - size_t RequestBuffer(size_t requested_size); - - std::optional<size_t> FindBuffer(size_t requested_size); - - std::vector<OGLSync> syncs; - std::vector<OGLBuffer> buffers; - std::vector<u8*> maps; - std::vector<size_t> sizes; - GLenum storage_flags; - GLenum map_flags; - }; - const Device& device; StateTracker& state_tracker; + StagingBufferPool& staging_buffer_pool; + UtilShaders util_shaders; FormatConversionPass format_conversion_pass; std::array<std::unordered_map<GLenum, FormatProperties>, 3> format_properties; bool has_broken_texture_view_formats = false; - StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT}; - StagingBuffers download_buffers{GL_MAP_READ_BIT | GL_CLIENT_STORAGE_BIT, GL_MAP_READ_BIT}; - OGLTexture null_image_1d_array; OGLTexture null_image_cube_array; OGLTexture null_image_3d; @@ -213,7 +187,7 @@ public: void UploadMemory(GLuint buffer_handle, size_t buffer_offset, std::span<const VideoCommon::BufferImageCopy> copies); - void UploadMemory(const ImageBufferMap& map, + void UploadMemory(const StagingBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies); void DownloadMemory(GLuint buffer_handle, size_t buffer_offset, @@ -222,7 +196,8 @@ public: void DownloadMemory(std::span<GLuint> buffer_handle, std::span<size_t> buffer_offset, std::span<const VideoCommon::BufferImageCopy> copies); - void DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies); + void DownloadMemory(StagingBufferMap& map, + std::span<const VideoCommon::BufferImageCopy> copies); GLuint StorageHandle() noexcept; @@ -334,12 +309,21 @@ class Sampler { public: explicit Sampler(TextureCacheRuntime&, const Tegra::Texture::TSCEntry&); - GLuint Handle() const noexcept { + [[nodiscard]] GLuint Handle() const noexcept { return sampler.handle; } + [[nodiscard]] GLuint HandleWithDefaultAnisotropy() const noexcept { + return sampler_default_anisotropy.handle; + } + + [[nodiscard]] bool HasAddedAnisotropy() const noexcept { + return static_cast<bool>(sampler_default_anisotropy.handle); + } + private: OGLSampler sampler; + OGLSampler sampler_default_anisotropy; }; class Framebuffer { diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index 2c7ac210b..544982d18 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp @@ -19,6 +19,7 @@ #include "video_core/host_shaders/pitch_unswizzle_comp.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_shader_util.h" +#include "video_core/renderer_opengl/gl_staging_buffer_pool.h" #include "video_core/renderer_opengl/gl_texture_cache.h" #include "video_core/renderer_opengl/util_shaders.h" #include "video_core/texture_cache/accelerated_swizzle.h" @@ -63,7 +64,7 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_) UtilShaders::~UtilShaders() = default; -void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, +void UtilShaders::ASTCDecode(Image& image, const StagingBufferMap& map, std::span<const VideoCommon::SwizzleParameters> swizzles) { static constexpr GLuint BINDING_INPUT_BUFFER = 0; static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; @@ -111,7 +112,7 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, program_manager.RestoreGuestCompute(); } -void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, +void UtilShaders::BlockLinearUpload2D(Image& image, const StagingBufferMap& map, std::span<const SwizzleParameters> swizzles) { static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1}; static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0; @@ -148,7 +149,7 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, program_manager.RestoreGuestCompute(); } -void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, +void UtilShaders::BlockLinearUpload3D(Image& image, const StagingBufferMap& map, std::span<const SwizzleParameters> swizzles) { static constexpr Extent3D WORKGROUP_SIZE{16, 8, 8}; @@ -189,7 +190,7 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, program_manager.RestoreGuestCompute(); } -void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, +void UtilShaders::PitchUpload(Image& image, const StagingBufferMap& map, std::span<const SwizzleParameters> swizzles) { static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1}; static constexpr GLuint BINDING_INPUT_BUFFER = 0; diff --git a/src/video_core/renderer_opengl/util_shaders.h b/src/video_core/renderer_opengl/util_shaders.h index 9013808e7..feecd404c 100644 --- a/src/video_core/renderer_opengl/util_shaders.h +++ b/src/video_core/renderer_opengl/util_shaders.h @@ -16,23 +16,23 @@ namespace OpenGL { class Image; class ProgramManager; -struct ImageBufferMap; +struct StagingBufferMap; class UtilShaders { public: explicit UtilShaders(ProgramManager& program_manager); ~UtilShaders(); - void ASTCDecode(Image& image, const ImageBufferMap& map, + void ASTCDecode(Image& image, const StagingBufferMap& map, std::span<const VideoCommon::SwizzleParameters> swizzles); - void BlockLinearUpload2D(Image& image, const ImageBufferMap& map, + void BlockLinearUpload2D(Image& image, const StagingBufferMap& map, std::span<const VideoCommon::SwizzleParameters> swizzles); - void BlockLinearUpload3D(Image& image, const ImageBufferMap& map, + void BlockLinearUpload3D(Image& image, const StagingBufferMap& map, std::span<const VideoCommon::SwizzleParameters> swizzles); - void PitchUpload(Image& image, const ImageBufferMap& map, + void PitchUpload(Image& image, const StagingBufferMap& map, std::span<const VideoCommon::SwizzleParameters> swizzles); void CopyBC4(Image& dst_image, Image& src_image, |