diff options
Diffstat (limited to 'src/video_core/renderer_opengl')
9 files changed, 108 insertions, 30 deletions
diff --git a/src/video_core/renderer_opengl/blit_image.cpp b/src/video_core/renderer_opengl/blit_image.cpp index 9a560a73b..3b03e8d5a 100644 --- a/src/video_core/renderer_opengl/blit_image.cpp +++ b/src/video_core/renderer_opengl/blit_image.cpp @@ -22,7 +22,7 @@ BlitImageHelper::~BlitImageHelper() = default; void BlitImageHelper::BlitColor(GLuint dst_framebuffer, GLuint src_image_view, GLuint src_sampler, const Region2D& dst_region, const Region2D& src_region, const Extent3D& src_size) { - glEnable(GL_CULL_FACE); + glDisable(GL_CULL_FACE); glDisable(GL_COLOR_LOGIC_OP); glDisable(GL_DEPTH_TEST); glDisable(GL_STENCIL_TEST); @@ -31,7 +31,6 @@ void BlitImageHelper::BlitColor(GLuint dst_framebuffer, GLuint src_image_view, G glDisable(GL_ALPHA_TEST); glDisablei(GL_BLEND, 0); glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); - glCullFace(GL_BACK); glFrontFace(GL_CW); glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); glDepthRangeIndexed(0, 0.0, 0.0); diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp index 91463f854..5326172af 100644 --- a/src/video_core/renderer_opengl/gl_fence_manager.cpp +++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp @@ -27,9 +27,7 @@ bool GLInnerFence::IsSignaled() const { return true; } ASSERT(sync_object.handle != 0); - GLint sync_status; - glGetSynciv(sync_object.handle, GL_SYNC_STATUS, 1, nullptr, &sync_status); - return sync_status == GL_SIGNALED; + return sync_object.IsSignaled(); } void GLInnerFence::Wait() { diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index 29491e762..89000d6e0 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -621,10 +621,7 @@ bool GraphicsPipeline::IsBuilt() noexcept { if (built_fence.handle == 0) { return false; } - // Timeout of zero means this is non-blocking - const auto sync_status = glClientWaitSync(built_fence.handle, 0, 0); - ASSERT(sync_status != GL_WAIT_FAILED); - is_built = sync_status != GL_TIMEOUT_EXPIRED; + is_built = built_fence.IsSignaled(); return is_built; } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 7bced675c..90e35e307 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -63,7 +63,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra buffer_cache(*this, cpu_memory_, buffer_cache_runtime), shader_cache(*this, emu_window_, device, texture_cache, buffer_cache, program_manager, state_tracker, gpu.ShaderNotify()), - query_cache(*this), accelerate_dma(buffer_cache), + query_cache(*this), accelerate_dma(buffer_cache, texture_cache), fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache), blit_image(program_manager_) {} @@ -357,6 +357,7 @@ void RasterizerOpenGL::DrawTexture() { .y = static_cast<s32>(draw_texture_state.src_y1)}}; blit_image.BlitColor(texture_cache.GetFramebuffer()->Handle(), texture.DefaultHandle(), sampler->Handle(), dst_region, src_region, texture.size); + state_tracker.InvalidateState(); } ++num_queued_commands; @@ -576,7 +577,7 @@ bool RasterizerOpenGL::AccelerateConditionalRendering() { // Reimplement Host conditional rendering. return false; } - // Medium / Low Hack: stub any checks on queries writen into the buffer cache. + // Medium / Low Hack: stub any checks on queries written into the buffer cache. const GPUVAddr condition_address{maxwell3d->regs.render_enable.Address()}; Maxwell::ReportSemaphore::Compare cmp; if (gpu_memory->IsMemoryDirty(condition_address, sizeof(cmp), @@ -1262,7 +1263,8 @@ void RasterizerOpenGL::ReleaseChannel(s32 channel_id) { query_cache.EraseChannel(channel_id); } -AccelerateDMA::AccelerateDMA(BufferCache& buffer_cache_) : buffer_cache{buffer_cache_} {} +AccelerateDMA::AccelerateDMA(BufferCache& buffer_cache_, TextureCache& texture_cache_) + : buffer_cache{buffer_cache_}, texture_cache{texture_cache_} {} bool AccelerateDMA::BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) { std::scoped_lock lock{buffer_cache.mutex}; @@ -1274,4 +1276,44 @@ bool AccelerateDMA::BufferClear(GPUVAddr src_address, u64 amount, u32 value) { return buffer_cache.DMAClear(src_address, amount, value); } +template <bool IS_IMAGE_UPLOAD> +bool AccelerateDMA::DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info, + const Tegra::DMA::BufferOperand& buffer_operand, + const Tegra::DMA::ImageOperand& image_operand) { + std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; + const auto image_id = texture_cache.DmaImageId(image_operand); + if (image_id == VideoCommon::NULL_IMAGE_ID) { + return false; + } + const u32 buffer_size = static_cast<u32>(buffer_operand.pitch * buffer_operand.height); + static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize; + const auto post_op = IS_IMAGE_UPLOAD ? VideoCommon::ObtainBufferOperation::DoNothing + : VideoCommon::ObtainBufferOperation::MarkAsWritten; + const auto [buffer, offset] = + buffer_cache.ObtainBuffer(buffer_operand.address, buffer_size, sync_info, post_op); + + const auto [image, copy] = texture_cache.DmaBufferImageCopy( + copy_info, buffer_operand, image_operand, image_id, IS_IMAGE_UPLOAD); + const std::span copy_span{©, 1}; + + if constexpr (IS_IMAGE_UPLOAD) { + image->UploadMemory(buffer->Handle(), offset, copy_span); + } else { + image->DownloadMemory(buffer->Handle(), offset, copy_span); + } + return true; +} + +bool AccelerateDMA::ImageToBuffer(const Tegra::DMA::ImageCopy& copy_info, + const Tegra::DMA::ImageOperand& image_operand, + const Tegra::DMA::BufferOperand& buffer_operand) { + return DmaBufferImageCopy<false>(copy_info, buffer_operand, image_operand); +} + +bool AccelerateDMA::BufferToImage(const Tegra::DMA::ImageCopy& copy_info, + const Tegra::DMA::BufferOperand& buffer_operand, + const Tegra::DMA::ImageOperand& image_operand) { + return DmaBufferImageCopy<true>(copy_info, buffer_operand, image_operand); +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 0c45832ae..ad6978bd0 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -50,14 +50,26 @@ static_assert(sizeof(BindlessSSBO) * CHAR_BIT == 128); class AccelerateDMA : public Tegra::Engines::AccelerateDMAInterface { public: - explicit AccelerateDMA(BufferCache& buffer_cache); + explicit AccelerateDMA(BufferCache& buffer_cache, TextureCache& texture_cache); bool BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) override; bool BufferClear(GPUVAddr src_address, u64 amount, u32 value) override; + bool ImageToBuffer(const Tegra::DMA::ImageCopy& copy_info, const Tegra::DMA::ImageOperand& src, + const Tegra::DMA::BufferOperand& dst) override; + + bool BufferToImage(const Tegra::DMA::ImageCopy& copy_info, const Tegra::DMA::BufferOperand& src, + const Tegra::DMA::ImageOperand& dst) override; + private: + template <bool IS_IMAGE_UPLOAD> + bool DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info, + const Tegra::DMA::BufferOperand& src, + const Tegra::DMA::ImageOperand& dst); + BufferCache& buffer_cache; + TextureCache& texture_cache; }; class RasterizerOpenGL : public VideoCore::RasterizerAccelerated, @@ -150,7 +162,7 @@ private: /// Syncs the cull mode to match the guest state void SyncCullMode(); - /// Syncs the primitve restart to match the guest state + /// Syncs the primitive restart to match the guest state void SyncPrimitiveRestart(); /// Syncs the depth test state to match the guest state @@ -234,7 +246,7 @@ private: std::array<GLuint, MAX_TEXTURES> texture_handles{}; std::array<GLuint, MAX_IMAGES> image_handles{}; - /// Number of commands queued to the OpenGL driver. Resetted on flush. + /// Number of commands queued to the OpenGL driver. Reset on flush. size_t num_queued_commands = 0; bool has_written_global_memory = false; diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp index 3a664fdec..eae8fd110 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.cpp +++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp @@ -3,6 +3,7 @@ #include <string_view> #include <glad/glad.h> +#include "common/assert.h" #include "common/microprofile.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_util.h" @@ -158,6 +159,15 @@ void OGLSync::Release() { handle = 0; } +bool OGLSync::IsSignaled() const noexcept { + // At least on Nvidia, glClientWaitSync with a timeout of 0 + // is faster than glGetSynciv of GL_SYNC_STATUS. + // Timeout of 0 means this check is non-blocking. + const auto sync_status = glClientWaitSync(handle, 0, 0); + ASSERT(sync_status != GL_WAIT_FAILED); + return sync_status != GL_TIMEOUT_EXPIRED; +} + void OGLFramebuffer::Create() { if (handle != 0) return; diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h index bc05ba4bd..77362acd2 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.h +++ b/src/video_core/renderer_opengl/gl_resource_manager.h @@ -263,6 +263,9 @@ public: /// Deletes the internal OpenGL resource void Release(); + /// Checks if the sync has been signaled + bool IsSignaled() const noexcept; + GLsync handle = 0; }; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index b047e7b3d..0b9c4a904 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -112,13 +112,17 @@ GLenum ImageTarget(Shader::TextureType type, int num_samples = 1) { return GL_NONE; } -GLenum TextureMode(PixelFormat format, bool is_first) { +GLenum TextureMode(PixelFormat format, std::array<SwizzleSource, 4> swizzle) { + bool any_r = + std::ranges::any_of(swizzle, [](SwizzleSource s) { return s == SwizzleSource::R; }); switch (format) { case PixelFormat::D24_UNORM_S8_UINT: case PixelFormat::D32_FLOAT_S8_UINT: - return is_first ? GL_DEPTH_COMPONENT : GL_STENCIL_INDEX; + // R = depth, G = stencil + return any_r ? GL_DEPTH_COMPONENT : GL_STENCIL_INDEX; case PixelFormat::S8_UINT_D24_UNORM: - return is_first ? GL_STENCIL_INDEX : GL_DEPTH_COMPONENT; + // R = stencil, G = depth + return any_r ? GL_STENCIL_INDEX : GL_DEPTH_COMPONENT; default: ASSERT(false); return GL_DEPTH_COMPONENT; @@ -208,8 +212,7 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4 case PixelFormat::D32_FLOAT_S8_UINT: case PixelFormat::S8_UINT_D24_UNORM: UNIMPLEMENTED_IF(swizzle[0] != SwizzleSource::R && swizzle[0] != SwizzleSource::G); - glTextureParameteri(handle, GL_DEPTH_STENCIL_TEXTURE_MODE, - TextureMode(format, swizzle[0] == SwizzleSource::R)); + glTextureParameteri(handle, GL_DEPTH_STENCIL_TEXTURE_MODE, TextureMode(format, swizzle)); std::ranges::transform(swizzle, swizzle.begin(), ConvertGreenRed); break; case PixelFormat::A5B5G5R1_UNORM: { @@ -714,9 +717,7 @@ std::optional<size_t> TextureCacheRuntime::StagingBuffers::FindBuffer(size_t req continue; } if (syncs[index].handle != 0) { - GLint status; - glGetSynciv(syncs[index].handle, GL_SYNC_STATUS, 1, nullptr, &status); - if (status != GL_SIGNALED) { + if (!syncs[index].IsSignaled()) { continue; } syncs[index].Release(); @@ -762,14 +763,14 @@ Image::Image(const VideoCommon::NullImageParams& params) : VideoCommon::ImageBas Image::~Image() = default; -void Image::UploadMemory(const ImageBufferMap& map, +void Image::UploadMemory(GLuint buffer_handle, size_t buffer_offset, std::span<const VideoCommon::BufferImageCopy> copies) { const bool is_rescaled = True(flags & ImageFlagBits::Rescaled); if (is_rescaled) { ScaleDown(true); } - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.buffer); - glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, map.offset, unswizzled_size_bytes); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, buffer_handle); + glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, buffer_offset, unswizzled_size_bytes); glPixelStorei(GL_UNPACK_ALIGNMENT, 1); @@ -788,21 +789,26 @@ void Image::UploadMemory(const ImageBufferMap& map, current_image_height = copy.buffer_image_height; glPixelStorei(GL_UNPACK_IMAGE_HEIGHT, current_image_height); } - CopyBufferToImage(copy, map.offset); + CopyBufferToImage(copy, buffer_offset); } if (is_rescaled) { ScaleUp(); } } -void Image::DownloadMemory(ImageBufferMap& map, +void Image::UploadMemory(const ImageBufferMap& map, + std::span<const VideoCommon::BufferImageCopy> copies) { + UploadMemory(map.buffer, map.offset, copies); +} + +void Image::DownloadMemory(GLuint buffer_handle, size_t buffer_offset, std::span<const VideoCommon::BufferImageCopy> copies) { const bool is_rescaled = True(flags & ImageFlagBits::Rescaled); if (is_rescaled) { ScaleDown(); } glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API - glBindBuffer(GL_PIXEL_PACK_BUFFER, map.buffer); + glBindBuffer(GL_PIXEL_PACK_BUFFER, buffer_handle); glPixelStorei(GL_PACK_ALIGNMENT, 1); u32 current_row_length = std::numeric_limits<u32>::max(); @@ -820,13 +826,18 @@ void Image::DownloadMemory(ImageBufferMap& map, current_image_height = copy.buffer_image_height; glPixelStorei(GL_PACK_IMAGE_HEIGHT, current_image_height); } - CopyImageToBuffer(copy, map.offset); + CopyImageToBuffer(copy, buffer_offset); } if (is_rescaled) { ScaleUp(true); } } +void Image::DownloadMemory(ImageBufferMap& map, + std::span<const VideoCommon::BufferImageCopy> copies) { + DownloadMemory(map.buffer, map.offset, copies); +} + GLuint Image::StorageHandle() noexcept { switch (info.format) { case PixelFormat::A8B8G8R8_SRGB: diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index e30875496..911e4607a 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -206,9 +206,15 @@ public: Image(Image&&) = default; Image& operator=(Image&&) = default; + void UploadMemory(GLuint buffer_handle, size_t buffer_offset, + std::span<const VideoCommon::BufferImageCopy> copies); + void UploadMemory(const ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies); + void DownloadMemory(GLuint buffer_handle, size_t buffer_offset, + std::span<const VideoCommon::BufferImageCopy> copies); + void DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies); GLuint StorageHandle() noexcept; |