diff options
Diffstat (limited to 'src/video_core')
31 files changed, 599 insertions, 268 deletions
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 26939be3f..6ea7cc6a5 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -542,7 +542,7 @@ public: BitField<12, 1, InvMemoryLayout> type; } memory_layout; union { - BitField<0, 16, u32> array_mode; + BitField<0, 16, u32> layers; BitField<16, 1, u32> volume; }; u32 layer_stride; @@ -800,8 +800,12 @@ public: u32 zeta_width; u32 zeta_height; + union { + BitField<0, 16, u32> zeta_layers; + BitField<16, 1, u32> zeta_volume; + }; - INSERT_UNION_PADDING_WORDS(0x27); + INSERT_UNION_PADDING_WORDS(0x26); u32 depth_test_enable; @@ -1507,6 +1511,7 @@ ASSERT_REG_POSITION(vertex_attrib_format, 0x458); ASSERT_REG_POSITION(rt_control, 0x487); ASSERT_REG_POSITION(zeta_width, 0x48a); ASSERT_REG_POSITION(zeta_height, 0x48b); +ASSERT_REG_POSITION(zeta_layers, 0x48c); ASSERT_REG_POSITION(depth_test_enable, 0x4B3); ASSERT_REG_POSITION(independent_blend_enable, 0x4B9); ASSERT_REG_POSITION(depth_write_enabled, 0x4BA); diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 7d7137109..e8f763ce9 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -140,71 +140,6 @@ void GPU::FlushCommands() { renderer.Rasterizer().FlushCommands(); } -u32 RenderTargetBytesPerPixel(RenderTargetFormat format) { - ASSERT(format != RenderTargetFormat::NONE); - - switch (format) { - case RenderTargetFormat::RGBA32_FLOAT: - case RenderTargetFormat::RGBA32_UINT: - return 16; - case RenderTargetFormat::RGBA16_UINT: - case RenderTargetFormat::RGBA16_UNORM: - case RenderTargetFormat::RGBA16_FLOAT: - case RenderTargetFormat::RGBX16_FLOAT: - case RenderTargetFormat::RG32_FLOAT: - case RenderTargetFormat::RG32_UINT: - return 8; - case RenderTargetFormat::RGBA8_UNORM: - case RenderTargetFormat::RGBA8_SNORM: - case RenderTargetFormat::RGBA8_SRGB: - case RenderTargetFormat::RGBA8_UINT: - case RenderTargetFormat::RGB10_A2_UNORM: - case RenderTargetFormat::BGRA8_UNORM: - case RenderTargetFormat::BGRA8_SRGB: - case RenderTargetFormat::RG16_UNORM: - case RenderTargetFormat::RG16_SNORM: - case RenderTargetFormat::RG16_UINT: - case RenderTargetFormat::RG16_SINT: - case RenderTargetFormat::RG16_FLOAT: - case RenderTargetFormat::R32_FLOAT: - case RenderTargetFormat::R11G11B10_FLOAT: - case RenderTargetFormat::R32_UINT: - return 4; - case RenderTargetFormat::R16_UNORM: - case RenderTargetFormat::R16_SNORM: - case RenderTargetFormat::R16_UINT: - case RenderTargetFormat::R16_SINT: - case RenderTargetFormat::R16_FLOAT: - case RenderTargetFormat::RG8_UNORM: - case RenderTargetFormat::RG8_SNORM: - return 2; - case RenderTargetFormat::R8_UNORM: - case RenderTargetFormat::R8_UINT: - return 1; - default: - UNIMPLEMENTED_MSG("Unimplemented render target format {}", static_cast<u32>(format)); - return 1; - } -} - -u32 DepthFormatBytesPerPixel(DepthFormat format) { - switch (format) { - case DepthFormat::Z32_S8_X24_FLOAT: - return 8; - case DepthFormat::Z32_FLOAT: - case DepthFormat::S8_Z24_UNORM: - case DepthFormat::Z24_X8_UNORM: - case DepthFormat::Z24_S8_UNORM: - case DepthFormat::Z24_C8_UNORM: - return 4; - case DepthFormat::Z16_UNORM: - return 2; - default: - UNIMPLEMENTED_MSG("Unimplemented Depth format {}", static_cast<u32>(format)); - return 1; - } -} - // Note that, traditionally, methods are treated as 4-byte addressable locations, and hence // their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4. // So the values you see in docs might be multiplied by 4. diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 07727210c..ba8c9d665 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -57,6 +57,7 @@ enum class RenderTargetFormat : u32 { RG16_UINT = 0xDD, RG16_FLOAT = 0xDE, R11G11B10_FLOAT = 0xE0, + R32_SINT = 0xE3, R32_UINT = 0xE4, R32_FLOAT = 0xE5, B5G6R5_UNORM = 0xE8, @@ -82,12 +83,6 @@ enum class DepthFormat : u32 { Z32_S8_X24_FLOAT = 0x19, }; -/// Returns the number of bytes per pixel of each rendertarget format. -u32 RenderTargetBytesPerPixel(RenderTargetFormat format); - -/// Returns the number of bytes per pixel of each depth format. -u32 DepthFormatBytesPerPixel(DepthFormat format); - struct CommandListHeader; class DebugContext; diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 2cdf1aa7f..b1088af3d 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -5,7 +5,7 @@ #include "common/assert.h" #include "common/microprofile.h" #include "core/core.h" -#include "core/frontend/scope_acquire_window_context.h" +#include "core/frontend/scope_acquire_context.h" #include "video_core/dma_pusher.h" #include "video_core/gpu.h" #include "video_core/gpu_thread.h" @@ -27,7 +27,7 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p return; } - Core::Frontend::ScopeAcquireWindowContext acquire_context{renderer.GetRenderWindow()}; + Core::Frontend::ScopeAcquireContext acquire_context{renderer.GetRenderWindow()}; CommandDataContainer next; while (state.is_running) { diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp index 2f2fe6859..f2c83266e 100644 --- a/src/video_core/morton.cpp +++ b/src/video_core/morton.cpp @@ -85,6 +85,7 @@ static constexpr ConversionArray morton_to_linear_fns = { MortonCopy<true, PixelFormat::RG32UI>, MortonCopy<true, PixelFormat::RGBX16F>, MortonCopy<true, PixelFormat::R32UI>, + MortonCopy<true, PixelFormat::R32I>, MortonCopy<true, PixelFormat::ASTC_2D_8X8>, MortonCopy<true, PixelFormat::ASTC_2D_8X5>, MortonCopy<true, PixelFormat::ASTC_2D_5X4>, @@ -166,6 +167,7 @@ static constexpr ConversionArray linear_to_morton_fns = { MortonCopy<false, PixelFormat::RG32UI>, MortonCopy<false, PixelFormat::RGBX16F>, MortonCopy<false, PixelFormat::R32UI>, + MortonCopy<false, PixelFormat::R32I>, nullptr, nullptr, nullptr, diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h index af1bebc4f..5ec99a126 100644 --- a/src/video_core/renderer_base.h +++ b/src/video_core/renderer_base.h @@ -35,15 +35,19 @@ public: explicit RendererBase(Core::Frontend::EmuWindow& window); virtual ~RendererBase(); - /// Swap buffers (render frame) - virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0; - /// Initialize the renderer virtual bool Init() = 0; /// Shutdown the renderer virtual void ShutDown() = 0; + /// Finalize rendering the guest frame and draw into the presentation texture + virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0; + + /// Draws the latest frame to the window waiting timeout_ms for a frame to arrive (Renderer + /// specific implementation) + virtual void TryPresent(int timeout_ms) = 0; + // Getter/setter functions: // ------------------------ diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp index f0ddfb276..c0aee770f 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.cpp +++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp @@ -15,6 +15,24 @@ MICROPROFILE_DEFINE(OpenGL_ResourceDeletion, "OpenGL", "Resource Deletion", MP_R namespace OpenGL { +void OGLRenderbuffer::Create() { + if (handle != 0) + return; + + MICROPROFILE_SCOPE(OpenGL_ResourceCreation); + glGenRenderbuffers(1, &handle); +} + +void OGLRenderbuffer::Release() { + if (handle == 0) + return; + + MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); + glDeleteRenderbuffers(1, &handle); + OpenGLState::GetCurState().ResetRenderbuffer(handle).Apply(); + handle = 0; +} + void OGLTexture::Create(GLenum target) { if (handle != 0) return; diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h index 514d1d165..995a4e45e 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.h +++ b/src/video_core/renderer_opengl/gl_resource_manager.h @@ -11,6 +11,31 @@ namespace OpenGL { +class OGLRenderbuffer : private NonCopyable { +public: + OGLRenderbuffer() = default; + + OGLRenderbuffer(OGLRenderbuffer&& o) noexcept : handle(std::exchange(o.handle, 0)) {} + + ~OGLRenderbuffer() { + Release(); + } + + OGLRenderbuffer& operator=(OGLRenderbuffer&& o) noexcept { + Release(); + handle = std::exchange(o.handle, 0); + return *this; + } + + /// Creates a new internal OpenGL resource and stores the handle + void Create(); + + /// Deletes the internal OpenGL resource + void Release(); + + GLuint handle = 0; +}; + class OGLTexture : private NonCopyable { public: OGLTexture() = default; diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index ab1f7983c..7d3bc1a1f 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -423,6 +423,13 @@ void OpenGLState::ApplyClipControl() { } } +void OpenGLState::ApplyRenderBuffer() { + if (cur_state.renderbuffer != renderbuffer) { + cur_state.renderbuffer = renderbuffer; + glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer); + } +} + void OpenGLState::ApplyTextures() { const std::size_t size = std::size(textures); for (std::size_t i = 0; i < size; ++i) { @@ -478,6 +485,7 @@ void OpenGLState::Apply() { ApplyPolygonOffset(); ApplyAlphaTest(); ApplyClipControl(); + ApplyRenderBuffer(); } void OpenGLState::EmulateViewportWithScissor() { @@ -551,4 +559,11 @@ OpenGLState& OpenGLState::ResetFramebuffer(GLuint handle) { return *this; } +OpenGLState& OpenGLState::ResetRenderbuffer(GLuint handle) { + if (renderbuffer == handle) { + renderbuffer = 0; + } + return *this; +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index 4953eeda2..bce662f2c 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -158,6 +158,8 @@ public: GLenum depth_mode = GL_NEGATIVE_ONE_TO_ONE; } clip_control; + GLuint renderbuffer{}; // GL_RENDERBUFFER_BINDING + OpenGLState(); /// Get the currently active OpenGL state @@ -196,6 +198,7 @@ public: void ApplyPolygonOffset(); void ApplyAlphaTest(); void ApplyClipControl(); + void ApplyRenderBuffer(); /// Resets any references to the given resource OpenGLState& UnbindTexture(GLuint handle); @@ -204,6 +207,7 @@ public: OpenGLState& ResetPipeline(GLuint handle); OpenGLState& ResetVertexArray(GLuint handle); OpenGLState& ResetFramebuffer(GLuint handle); + OpenGLState& ResetRenderbuffer(GLuint handle); /// Viewport does not affects glClearBuffer so emulate viewport using scissor test void EmulateViewportWithScissor(); diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 5c1ae1418..cf934b0d8 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -87,6 +87,7 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, false}, // RG32UI {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBX16F {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, false}, // R32UI + {GL_R32I, GL_RED_INTEGER, GL_INT, false}, // R32I {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X8 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X5 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_5X4 @@ -405,24 +406,36 @@ CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& p CachedSurfaceView::~CachedSurfaceView() = default; void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const { - ASSERT(params.num_layers == 1 && params.num_levels == 1); + ASSERT(params.num_levels == 1); - const auto& owner_params = surface.GetSurfaceParams(); + const GLuint texture = surface.GetTexture(); + if (params.num_layers > 1) { + // Layered framebuffer attachments + UNIMPLEMENTED_IF(params.base_layer != 0); + + switch (params.target) { + case SurfaceTarget::Texture2DArray: + glFramebufferTexture(target, attachment, texture, params.base_level); + break; + default: + UNIMPLEMENTED(); + } + return; + } - switch (owner_params.target) { + const GLenum view_target = surface.GetTarget(); + switch (surface.GetSurfaceParams().target) { case SurfaceTarget::Texture1D: - glFramebufferTexture1D(target, attachment, surface.GetTarget(), surface.GetTexture(), - params.base_level); + glFramebufferTexture1D(target, attachment, view_target, texture, params.base_level); break; case SurfaceTarget::Texture2D: - glFramebufferTexture2D(target, attachment, surface.GetTarget(), surface.GetTexture(), - params.base_level); + glFramebufferTexture2D(target, attachment, view_target, texture, params.base_level); break; case SurfaceTarget::Texture1DArray: case SurfaceTarget::Texture2DArray: case SurfaceTarget::TextureCubemap: case SurfaceTarget::TextureCubeArray: - glFramebufferTextureLayer(target, attachment, surface.GetTexture(), params.base_level, + glFramebufferTextureLayer(target, attachment, texture, params.base_level, params.base_layer); break; default: diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index 7ed505628..d3dea3659 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -92,8 +92,32 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { } case Maxwell::VertexAttribute::Type::UnsignedScaled: switch (attrib.size) { + case Maxwell::VertexAttribute::Size::Size_8: case Maxwell::VertexAttribute::Size::Size_8_8: + case Maxwell::VertexAttribute::Size::Size_8_8_8: + case Maxwell::VertexAttribute::Size::Size_8_8_8_8: return GL_UNSIGNED_BYTE; + case Maxwell::VertexAttribute::Size::Size_16: + case Maxwell::VertexAttribute::Size::Size_16_16: + case Maxwell::VertexAttribute::Size::Size_16_16_16: + case Maxwell::VertexAttribute::Size::Size_16_16_16_16: + return GL_UNSIGNED_SHORT; + default: + LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString()); + return {}; + } + case Maxwell::VertexAttribute::Type::SignedScaled: + switch (attrib.size) { + case Maxwell::VertexAttribute::Size::Size_8: + case Maxwell::VertexAttribute::Size::Size_8_8: + case Maxwell::VertexAttribute::Size::Size_8_8_8: + case Maxwell::VertexAttribute::Size::Size_8_8_8_8: + return GL_BYTE; + case Maxwell::VertexAttribute::Size::Size_16: + case Maxwell::VertexAttribute::Size::Size_16_16: + case Maxwell::VertexAttribute::Size::Size_16_16_16: + case Maxwell::VertexAttribute::Size::Size_16_16_16_16: + return GL_SHORT; default: LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString()); return {}; diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index bba16afaf..a4340b502 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -9,11 +9,11 @@ #include <glad/glad.h> #include "common/assert.h" #include "common/logging/log.h" +#include "common/microprofile.h" #include "common/telemetry.h" #include "core/core.h" #include "core/core_timing.h" #include "core/frontend/emu_window.h" -#include "core/frontend/scope_acquire_window_context.h" #include "core/memory.h" #include "core/perf_stats.h" #include "core/settings.h" @@ -24,6 +24,144 @@ namespace OpenGL { +// If the size of this is too small, it ends up creating a soft cap on FPS as the renderer will have +// to wait on available presentation frames. +constexpr std::size_t SWAP_CHAIN_SIZE = 3; + +struct Frame { + u32 width{}; /// Width of the frame (to detect resize) + u32 height{}; /// Height of the frame + bool color_reloaded{}; /// Texture attachment was recreated (ie: resized) + OpenGL::OGLRenderbuffer color{}; /// Buffer shared between the render/present FBO + OpenGL::OGLFramebuffer render{}; /// FBO created on the render thread + OpenGL::OGLFramebuffer present{}; /// FBO created on the present thread + GLsync render_fence{}; /// Fence created on the render thread + GLsync present_fence{}; /// Fence created on the presentation thread + bool is_srgb{}; /// Framebuffer is sRGB or RGB +}; + +/** + * For smooth Vsync rendering, we want to always present the latest frame that the core generates, + * but also make sure that rendering happens at the pace that the frontend dictates. This is a + * helper class that the renderer uses to sync frames between the render thread and the presentation + * thread + */ +class FrameMailbox { +public: + std::mutex swap_chain_lock; + std::condition_variable present_cv; + std::array<Frame, SWAP_CHAIN_SIZE> swap_chain{}; + std::queue<Frame*> free_queue; + std::deque<Frame*> present_queue; + Frame* previous_frame{}; + + FrameMailbox() { + for (auto& frame : swap_chain) { + free_queue.push(&frame); + } + } + + ~FrameMailbox() { + // lock the mutex and clear out the present and free_queues and notify any people who are + // blocked to prevent deadlock on shutdown + std::scoped_lock lock{swap_chain_lock}; + std::queue<Frame*>().swap(free_queue); + present_queue.clear(); + present_cv.notify_all(); + } + + void ReloadPresentFrame(Frame* frame, u32 height, u32 width) { + frame->present.Release(); + frame->present.Create(); + GLint previous_draw_fbo{}; + glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &previous_draw_fbo); + glBindFramebuffer(GL_FRAMEBUFFER, frame->present.handle); + glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, + frame->color.handle); + if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { + LOG_CRITICAL(Render_OpenGL, "Failed to recreate present FBO!"); + } + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, previous_draw_fbo); + frame->color_reloaded = false; + } + + void ReloadRenderFrame(Frame* frame, u32 width, u32 height) { + OpenGLState prev_state = OpenGLState::GetCurState(); + OpenGLState state = OpenGLState::GetCurState(); + + // Recreate the color texture attachment + frame->color.Release(); + frame->color.Create(); + state.renderbuffer = frame->color.handle; + state.Apply(); + glRenderbufferStorage(GL_RENDERBUFFER, frame->is_srgb ? GL_SRGB8 : GL_RGB8, width, height); + + // Recreate the FBO for the render target + frame->render.Release(); + frame->render.Create(); + state.draw.read_framebuffer = frame->render.handle; + state.draw.draw_framebuffer = frame->render.handle; + state.Apply(); + glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, + frame->color.handle); + if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { + LOG_CRITICAL(Render_OpenGL, "Failed to recreate render FBO!"); + } + prev_state.Apply(); + frame->width = width; + frame->height = height; + frame->color_reloaded = true; + } + + Frame* GetRenderFrame() { + std::unique_lock lock{swap_chain_lock}; + + // If theres no free frames, we will reuse the oldest render frame + if (free_queue.empty()) { + auto frame = present_queue.back(); + present_queue.pop_back(); + return frame; + } + + Frame* frame = free_queue.front(); + free_queue.pop(); + return frame; + } + + void ReleaseRenderFrame(Frame* frame) { + std::unique_lock lock{swap_chain_lock}; + present_queue.push_front(frame); + present_cv.notify_one(); + } + + Frame* TryGetPresentFrame(int timeout_ms) { + std::unique_lock lock{swap_chain_lock}; + // wait for new entries in the present_queue + present_cv.wait_for(lock, std::chrono::milliseconds(timeout_ms), + [&] { return !present_queue.empty(); }); + if (present_queue.empty()) { + // timed out waiting for a frame to draw so return the previous frame + return previous_frame; + } + + // free the previous frame and add it back to the free queue + if (previous_frame) { + free_queue.push(previous_frame); + } + + // the newest entries are pushed to the front of the queue + Frame* frame = present_queue.front(); + present_queue.pop_front(); + // remove all old entries from the present queue and move them back to the free_queue + for (auto f : present_queue) { + free_queue.push(f); + } + present_queue.clear(); + previous_frame = frame; + return frame; + } +}; + namespace { constexpr char vertex_shader[] = R"( @@ -158,21 +296,91 @@ void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severit } // Anonymous namespace RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system) - : VideoCore::RendererBase{emu_window}, emu_window{emu_window}, system{system} {} + : VideoCore::RendererBase{emu_window}, emu_window{emu_window}, system{system}, + frame_mailbox{std::make_unique<FrameMailbox>()} {} RendererOpenGL::~RendererOpenGL() = default; +MICROPROFILE_DEFINE(OpenGL_RenderFrame, "OpenGL", "Render Frame", MP_RGB(128, 128, 64)); +MICROPROFILE_DEFINE(OpenGL_WaitPresent, "OpenGL", "Wait For Present", MP_RGB(128, 128, 128)); + void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { + render_window.PollEvents(); + + if (!framebuffer) { + return; + } + // Maintain the rasterizer's state as a priority OpenGLState prev_state = OpenGLState::GetCurState(); state.AllDirty(); state.Apply(); + PrepareRendertarget(framebuffer); + RenderScreenshot(); + + Frame* frame; + { + MICROPROFILE_SCOPE(OpenGL_WaitPresent); + + frame = frame_mailbox->GetRenderFrame(); + + // Clean up sync objects before drawing + + // INTEL driver workaround. We can't delete the previous render sync object until we are + // sure that the presentation is done + if (frame->present_fence) { + glClientWaitSync(frame->present_fence, 0, GL_TIMEOUT_IGNORED); + } + + // delete the draw fence if the frame wasn't presented + if (frame->render_fence) { + glDeleteSync(frame->render_fence); + frame->render_fence = 0; + } + + // wait for the presentation to be done + if (frame->present_fence) { + glWaitSync(frame->present_fence, 0, GL_TIMEOUT_IGNORED); + glDeleteSync(frame->present_fence); + frame->present_fence = 0; + } + } + + { + MICROPROFILE_SCOPE(OpenGL_RenderFrame); + const auto& layout = render_window.GetFramebufferLayout(); + + // Recreate the frame if the size of the window has changed + if (layout.width != frame->width || layout.height != frame->height || + screen_info.display_srgb != frame->is_srgb) { + LOG_DEBUG(Render_OpenGL, "Reloading render frame"); + frame->is_srgb = screen_info.display_srgb; + frame_mailbox->ReloadRenderFrame(frame, layout.width, layout.height); + } + state.draw.draw_framebuffer = frame->render.handle; + state.Apply(); + DrawScreen(layout); + // Create a fence for the frontend to wait on and swap this frame to OffTex + frame->render_fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); + glFlush(); + frame_mailbox->ReleaseRenderFrame(frame); + m_current_frame++; + rasterizer->TickFrame(); + } + + // Restore the rasterizer state + prev_state.AllDirty(); + prev_state.Apply(); +} + +void RendererOpenGL::PrepareRendertarget(const Tegra::FramebufferConfig* framebuffer) { if (framebuffer) { // If framebuffer is provided, reload it from memory to a texture if (screen_info.texture.width != static_cast<GLsizei>(framebuffer->width) || screen_info.texture.height != static_cast<GLsizei>(framebuffer->height) || - screen_info.texture.pixel_format != framebuffer->pixel_format) { + screen_info.texture.pixel_format != framebuffer->pixel_format || + gl_framebuffer_data.empty()) { // Reallocate texture if the framebuffer size has changed. // This is expected to not happen very often and hence should not be a // performance problem. @@ -181,22 +389,7 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { // Load the framebuffer from memory, draw it to the screen, and swap buffers LoadFBToScreenInfo(*framebuffer); - - if (renderer_settings.screenshot_requested) - CaptureScreenshot(); - - DrawScreen(render_window.GetFramebufferLayout()); - - rasterizer->TickFrame(); - - render_window.SwapBuffers(); } - - render_window.PollEvents(); - - // Restore the rasterizer state - prev_state.AllDirty(); - prev_state.Apply(); } void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer) { @@ -418,13 +611,48 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { DrawScreenTriangles(screen_info, static_cast<float>(screen.left), static_cast<float>(screen.top), static_cast<float>(screen.GetWidth()), static_cast<float>(screen.GetHeight())); +} - m_current_frame++; +void RendererOpenGL::TryPresent(int timeout_ms) { + const auto& layout = render_window.GetFramebufferLayout(); + auto frame = frame_mailbox->TryGetPresentFrame(timeout_ms); + if (!frame) { + LOG_DEBUG(Render_OpenGL, "TryGetPresentFrame returned no frame to present"); + return; + } + + // Clearing before a full overwrite of a fbo can signal to drivers that they can avoid a + // readback since we won't be doing any blending + glClear(GL_COLOR_BUFFER_BIT); + + // Recreate the presentation FBO if the color attachment was changed + if (frame->color_reloaded) { + LOG_DEBUG(Render_OpenGL, "Reloading present frame"); + frame_mailbox->ReloadPresentFrame(frame, layout.width, layout.height); + } + glWaitSync(frame->render_fence, 0, GL_TIMEOUT_IGNORED); + // INTEL workaround. + // Normally we could just delete the draw fence here, but due to driver bugs, we can just delete + // it on the emulation thread without too much penalty + // glDeleteSync(frame.render_sync); + // frame.render_sync = 0; + + glBindFramebuffer(GL_READ_FRAMEBUFFER, frame->present.handle); + glBlitFramebuffer(0, 0, frame->width, frame->height, 0, 0, layout.width, layout.height, + GL_COLOR_BUFFER_BIT, GL_LINEAR); + + // Insert fence for the main thread to block on + frame->present_fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); + glFlush(); + + glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); } -void RendererOpenGL::UpdateFramerate() {} +void RendererOpenGL::RenderScreenshot() { + if (!renderer_settings.screenshot_requested) { + return; + } -void RendererOpenGL::CaptureScreenshot() { // Draw the current frame to the screenshot framebuffer screenshot_framebuffer.Create(); GLuint old_read_fb = state.draw.read_framebuffer; @@ -459,8 +687,6 @@ void RendererOpenGL::CaptureScreenshot() { } bool RendererOpenGL::Init() { - Core::Frontend::ScopeAcquireWindowContext acquire_context{render_window}; - if (GLAD_GL_KHR_debug) { glEnable(GL_DEBUG_OUTPUT); glDebugMessageCallback(DebugHandler, nullptr); diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index b56328a7f..d45e69cbc 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -44,19 +44,23 @@ struct ScreenInfo { TextureInfo texture; }; +struct PresentationTexture { + u32 width = 0; + u32 height = 0; + OGLTexture texture; +}; + +class FrameMailbox; + class RendererOpenGL final : public VideoCore::RendererBase { public: explicit RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system); ~RendererOpenGL() override; - /// Swap buffers (render frame) - void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; - - /// Initialize the renderer bool Init() override; - - /// Shutdown the renderer void ShutDown() override; + void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; + void TryPresent(int timeout_ms) override; private: /// Initializes the OpenGL state and creates persistent objects. @@ -74,10 +78,7 @@ private: void DrawScreenTriangles(const ScreenInfo& screen_info, float x, float y, float w, float h); - /// Updates the framerate. - void UpdateFramerate(); - - void CaptureScreenshot(); + void RenderScreenshot(); /// Loads framebuffer from emulated memory into the active OpenGL texture. void LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer); @@ -87,6 +88,8 @@ private: void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a, const TextureInfo& texture); + void PrepareRendertarget(const Tegra::FramebufferConfig* framebuffer); + Core::Frontend::EmuWindow& emu_window; Core::System& system; @@ -107,6 +110,9 @@ private: /// Used for transforming the framebuffer orientation Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags; Common::Rectangle<int> framebuffer_crop_rect; + + /// Frame presentation mailbox + std::unique_ptr<FrameMailbox> frame_mailbox; }; } // namespace OpenGL diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 5403c3ab7..aad0c895b 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -159,6 +159,7 @@ struct FormatTuple { {vk::Format::eR32G32Uint, Attachable | Storage}, // RG32UI {vk::Format::eUndefined, {}}, // RGBX16F {vk::Format::eR32Uint, Attachable | Storage}, // R32UI + {vk::Format::eR32Sint, Attachable | Storage}, // R32I {vk::Format::eAstc8x8UnormBlock, {}}, // ASTC_2D_8X8 {vk::Format::eUndefined, {}}, // ASTC_2D_8X5 {vk::Format::eUndefined, {}}, // ASTC_2D_5X4 @@ -370,8 +371,22 @@ vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttr } case Maxwell::VertexAttribute::Type::UnsignedScaled: switch (size) { + case Maxwell::VertexAttribute::Size::Size_8: + return vk::Format::eR8Uscaled; case Maxwell::VertexAttribute::Size::Size_8_8: return vk::Format::eR8G8Uscaled; + case Maxwell::VertexAttribute::Size::Size_8_8_8: + return vk::Format::eR8G8B8Uscaled; + case Maxwell::VertexAttribute::Size::Size_8_8_8_8: + return vk::Format::eR8G8B8A8Uscaled; + case Maxwell::VertexAttribute::Size::Size_16: + return vk::Format::eR16Uscaled; + case Maxwell::VertexAttribute::Size::Size_16_16: + return vk::Format::eR16G16Uscaled; + case Maxwell::VertexAttribute::Size::Size_16_16_16: + return vk::Format::eR16G16B16Uscaled; + case Maxwell::VertexAttribute::Size::Size_16_16_16_16: + return vk::Format::eR16G16B16A16Uscaled; default: break; } diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index d5032b432..ddc62bc97 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -106,8 +106,14 @@ RendererVulkan::~RendererVulkan() { } void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { + render_window.PollEvents(); + + if (!framebuffer) { + return; + } + const auto& layout = render_window.GetFramebufferLayout(); - if (framebuffer && layout.width > 0 && layout.height > 0 && render_window.IsShown()) { + if (layout.width > 0 && layout.height > 0 && render_window.IsShown()) { const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset; const bool use_accelerated = rasterizer->AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); @@ -128,13 +134,16 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { blit_screen->Recreate(); } - render_window.SwapBuffers(); rasterizer->TickFrame(); } render_window.PollEvents(); } +void RendererVulkan::TryPresent(int /*timeout_ms*/) { + // TODO (bunnei): ImplementMe +} + bool RendererVulkan::Init() { PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr{}; render_window.RetrieveVulkanHandlers(&vkGetInstanceProcAddr, &instance, &surface); @@ -262,4 +271,4 @@ void RendererVulkan::Report() const { telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions); } -} // namespace Vulkan
\ No newline at end of file +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index a472c5dc9..f513397f0 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -36,14 +36,10 @@ public: explicit RendererVulkan(Core::Frontend::EmuWindow& window, Core::System& system); ~RendererVulkan() override; - /// Swap buffers (render frame) - void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; - - /// Initialize the renderer bool Init() override; - - /// Shutdown the renderer void ShutDown() override; + void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; + void TryPresent(int timeout_ms) override; private: std::optional<vk::DebugUtilsMessengerEXT> CreateDebugCallback( diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index d1da4f9d3..886bde3b9 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp @@ -523,6 +523,7 @@ std::unordered_map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperti vk::Format::eB10G11R11UfloatPack32, vk::Format::eR32Sfloat, vk::Format::eR32Uint, + vk::Format::eR32Sint, vk::Format::eR16Sfloat, vk::Format::eR16G16B16A16Sfloat, vk::Format::eB8G8R8A8Unorm, diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index ad837dd4a..3fe28c204 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -614,33 +614,34 @@ bool RasterizerVulkan::WalkAttachmentOverlaps(const CachedSurfaceView& attachmen std::tuple<vk::Framebuffer, vk::Extent2D> RasterizerVulkan::ConfigureFramebuffers( vk::RenderPass renderpass) { FramebufferCacheKey key{renderpass, std::numeric_limits<u32>::max(), - std::numeric_limits<u32>::max()}; + std::numeric_limits<u32>::max(), std::numeric_limits<u32>::max()}; - const auto MarkAsModifiedAndPush = [&](const View& view) { - if (view == nullptr) { + const auto try_push = [&](const View& view) { + if (!view) { return false; } key.views.push_back(view->GetHandle()); key.width = std::min(key.width, view->GetWidth()); key.height = std::min(key.height, view->GetHeight()); + key.layers = std::min(key.layers, view->GetNumLayers()); return true; }; for (std::size_t index = 0; index < std::size(color_attachments); ++index) { - if (MarkAsModifiedAndPush(color_attachments[index])) { + if (try_push(color_attachments[index])) { texture_cache.MarkColorBufferInUse(index); } } - if (MarkAsModifiedAndPush(zeta_attachment)) { + if (try_push(zeta_attachment)) { texture_cache.MarkDepthBufferInUse(); } const auto [fbentry, is_cache_miss] = framebuffer_cache.try_emplace(key); auto& framebuffer = fbentry->second; if (is_cache_miss) { - const vk::FramebufferCreateInfo framebuffer_ci({}, key.renderpass, - static_cast<u32>(key.views.size()), - key.views.data(), key.width, key.height, 1); + const vk::FramebufferCreateInfo framebuffer_ci( + {}, key.renderpass, static_cast<u32>(key.views.size()), key.views.data(), key.width, + key.height, key.layers); const auto dev = device.GetLogical(); const auto& dld = device.GetDispatchLoader(); framebuffer = dev.createFramebufferUnique(framebuffer_ci, nullptr, dld); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 138903d60..4dc8af6e8 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -56,6 +56,7 @@ struct FramebufferCacheKey { vk::RenderPass renderpass{}; u32 width = 0; u32 height = 0; + u32 layers = 0; ImageViewsPack views; std::size_t Hash() const noexcept { @@ -66,12 +67,17 @@ struct FramebufferCacheKey { } boost::hash_combine(hash, width); boost::hash_combine(hash, height); + boost::hash_combine(hash, layers); return hash; } bool operator==(const FramebufferCacheKey& rhs) const noexcept { - return std::tie(renderpass, views, width, height) == - std::tie(rhs.renderpass, rhs.views, rhs.width, rhs.height); + return std::tie(renderpass, views, width, height, layers) == + std::tie(rhs.renderpass, rhs.views, rhs.width, rhs.height, rhs.layers); + } + + bool operator!=(const FramebufferCacheKey& rhs) const noexcept { + return !operator==(rhs); } }; diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 9841f0dd1..cfcca5af0 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -2221,16 +2221,14 @@ private: switch (specialization.attribute_types.at(location)) { case Maxwell::VertexAttribute::Type::SignedNorm: case Maxwell::VertexAttribute::Type::UnsignedNorm: + case Maxwell::VertexAttribute::Type::UnsignedScaled: + case Maxwell::VertexAttribute::Type::SignedScaled: case Maxwell::VertexAttribute::Type::Float: return {Type::Float, t_in_float, t_in_float4}; case Maxwell::VertexAttribute::Type::SignedInt: return {Type::Int, t_in_int, t_in_int4}; case Maxwell::VertexAttribute::Type::UnsignedInt: return {Type::Uint, t_in_uint, t_in_uint4}; - case Maxwell::VertexAttribute::Type::UnsignedScaled: - case Maxwell::VertexAttribute::Type::SignedScaled: - UNIMPLEMENTED(); - return {Type::Float, t_in_float, t_in_float4}; default: UNREACHABLE(); return {Type::Float, t_in_float, t_in_float4}; diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index d3edbe80c..22e3d34de 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -151,6 +151,10 @@ public: return params.GetMipHeight(base_level); } + u32 GetNumLayers() const { + return num_layers; + } + bool IsBufferView() const { return buffer_view; } diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp index 90240c765..478394682 100644 --- a/src/video_core/shader/decode/arithmetic.cpp +++ b/src/video_core/shader/decode/arithmetic.cpp @@ -53,29 +53,24 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) { op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b); - // TODO(Rodrigo): Should precise be used when there's a postfactor? - Node value = Operation(OperationCode::FMul, PRECISE, op_a, op_b); + static constexpr std::array FmulPostFactor = { + 1.000f, // None + 0.500f, // Divide 2 + 0.250f, // Divide 4 + 0.125f, // Divide 8 + 8.000f, // Mul 8 + 4.000f, // Mul 4 + 2.000f, // Mul 2 + }; if (instr.fmul.postfactor != 0) { - auto postfactor = static_cast<s32>(instr.fmul.postfactor); - - // Postfactor encoded as 3-bit 1's complement in instruction, interpreted with below - // logic. - if (postfactor >= 4) { - postfactor = 7 - postfactor; - } else { - postfactor = 0 - postfactor; - } - - if (postfactor > 0) { - value = Operation(OperationCode::FMul, NO_PRECISE, value, - Immediate(static_cast<f32>(1 << postfactor))); - } else { - value = Operation(OperationCode::FDiv, NO_PRECISE, value, - Immediate(static_cast<f32>(1 << -postfactor))); - } + op_a = Operation(OperationCode::FMul, NO_PRECISE, op_a, + Immediate(FmulPostFactor[instr.fmul.postfactor])); } + // TODO(Rodrigo): Should precise be used when there's a postfactor? + Node value = Operation(OperationCode::FMul, PRECISE, op_a, op_b); + value = GetSaturatedFloat(value, instr.alu.saturate_d); SetInternalFlagsFromFloat(bb, value, instr.generates_cc); diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index 21366869d..2fe787d6f 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -293,44 +293,66 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { void ShaderIR::WriteLop3Instruction(NodeBlock& bb, Register dest, Node op_a, Node op_b, Node op_c, Node imm_lut, bool sets_cc) { - constexpr u32 lop_iterations = 32; - const Node one = Immediate(1); - const Node two = Immediate(2); - - Node value; - for (u32 i = 0; i < lop_iterations; ++i) { - const Node shift_amount = Immediate(i); - - const Node a = Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, op_c, shift_amount); - const Node pack_0 = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, one); - - const Node b = Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, op_b, shift_amount); - const Node c = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, b, one); - const Node pack_1 = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, c, one); - - const Node d = Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, op_a, shift_amount); - const Node e = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, d, one); - const Node pack_2 = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, e, two); - - const Node pack_01 = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, pack_0, pack_1); - const Node pack_012 = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, pack_01, pack_2); - - const Node shifted_bit = - Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, imm_lut, pack_012); - const Node bit = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, shifted_bit, one); - - const Node right = - Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, bit, shift_amount); - - if (i > 0) { - value = Operation(OperationCode::IBitwiseOr, NO_PRECISE, value, right); - } else { - value = right; + const Node lop3_fast = [&](const Node na, const Node nb, const Node nc, const Node ttbl) { + Node value = Immediate(0); + const ImmediateNode imm = std::get<ImmediateNode>(*ttbl); + if (imm.GetValue() & 0x01) { + const Node a = Operation(OperationCode::IBitwiseNot, na); + const Node b = Operation(OperationCode::IBitwiseNot, nb); + const Node c = Operation(OperationCode::IBitwiseNot, nc); + Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, b); + r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c); + value = Operation(OperationCode::IBitwiseOr, value, r); } - } + if (imm.GetValue() & 0x02) { + const Node a = Operation(OperationCode::IBitwiseNot, na); + const Node b = Operation(OperationCode::IBitwiseNot, nb); + Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, b); + r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc); + value = Operation(OperationCode::IBitwiseOr, value, r); + } + if (imm.GetValue() & 0x04) { + const Node a = Operation(OperationCode::IBitwiseNot, na); + const Node c = Operation(OperationCode::IBitwiseNot, nc); + Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, nb); + r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c); + value = Operation(OperationCode::IBitwiseOr, value, r); + } + if (imm.GetValue() & 0x08) { + const Node a = Operation(OperationCode::IBitwiseNot, na); + Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, nb); + r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc); + value = Operation(OperationCode::IBitwiseOr, value, r); + } + if (imm.GetValue() & 0x10) { + const Node b = Operation(OperationCode::IBitwiseNot, nb); + const Node c = Operation(OperationCode::IBitwiseNot, nc); + Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, b); + r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c); + value = Operation(OperationCode::IBitwiseOr, value, r); + } + if (imm.GetValue() & 0x20) { + const Node b = Operation(OperationCode::IBitwiseNot, nb); + Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, b); + r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc); + value = Operation(OperationCode::IBitwiseOr, value, r); + } + if (imm.GetValue() & 0x40) { + const Node c = Operation(OperationCode::IBitwiseNot, nc); + Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, nb); + r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c); + value = Operation(OperationCode::IBitwiseOr, value, r); + } + if (imm.GetValue() & 0x80) { + Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, nb); + r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc); + value = Operation(OperationCode::IBitwiseOr, value, r); + } + return value; + }(op_a, op_b, op_c, imm_lut); - SetInternalFlagsFromInteger(bb, value, sets_cc); - SetRegister(bb, dest, value); + SetInternalFlagsFromInteger(bb, lop3_fast, sets_cc); + SetRegister(bb, dest, lop3_fast); } } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index face8c943..15e22b9fa 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp @@ -157,13 +157,21 @@ std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& co if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) { return {}; } - // Reduce the cursor in one to avoid infinite loops when the instruction sets the same - // register that it uses as operand - const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1); - if (!source) { - return {}; + s64 current_cursor = cursor; + while (current_cursor > 0) { + // Reduce the cursor in one to avoid infinite loops when the instruction sets the same + // register that it uses as operand + const auto [source, new_cursor] = TrackRegister(gpr, code, current_cursor - 1); + current_cursor = new_cursor; + if (!source) { + continue; + } + const auto [base_address, index, offset] = TrackCbuf(source, code, current_cursor); + if (base_address != nullptr) { + return {base_address, index, offset}; + } } - return TrackCbuf(source, code, new_cursor); + return {}; } if (const auto operation = std::get_if<OperationNode>(&*tracked)) { for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) { diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index 1655ccf16..9707c353d 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp @@ -155,6 +155,8 @@ PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) return PixelFormat::R16I; case Tegra::RenderTargetFormat::R32_FLOAT: return PixelFormat::R32F; + case Tegra::RenderTargetFormat::R32_SINT: + return PixelFormat::R32I; case Tegra::RenderTargetFormat::R32_UINT: return PixelFormat::R32UI; case Tegra::RenderTargetFormat::RG32_UINT: diff --git a/src/video_core/surface.h b/src/video_core/surface.h index 0d17a93ed..d88109e5a 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h @@ -59,47 +59,48 @@ enum class PixelFormat { RG32UI = 41, RGBX16F = 42, R32UI = 43, - ASTC_2D_8X8 = 44, - ASTC_2D_8X5 = 45, - ASTC_2D_5X4 = 46, - BGRA8_SRGB = 47, - DXT1_SRGB = 48, - DXT23_SRGB = 49, - DXT45_SRGB = 50, - BC7U_SRGB = 51, - R4G4B4A4U = 52, - ASTC_2D_4X4_SRGB = 53, - ASTC_2D_8X8_SRGB = 54, - ASTC_2D_8X5_SRGB = 55, - ASTC_2D_5X4_SRGB = 56, - ASTC_2D_5X5 = 57, - ASTC_2D_5X5_SRGB = 58, - ASTC_2D_10X8 = 59, - ASTC_2D_10X8_SRGB = 60, - ASTC_2D_6X6 = 61, - ASTC_2D_6X6_SRGB = 62, - ASTC_2D_10X10 = 63, - ASTC_2D_10X10_SRGB = 64, - ASTC_2D_12X12 = 65, - ASTC_2D_12X12_SRGB = 66, - ASTC_2D_8X6 = 67, - ASTC_2D_8X6_SRGB = 68, - ASTC_2D_6X5 = 69, - ASTC_2D_6X5_SRGB = 70, - E5B9G9R9F = 71, + R32I = 44, + ASTC_2D_8X8 = 45, + ASTC_2D_8X5 = 46, + ASTC_2D_5X4 = 47, + BGRA8_SRGB = 48, + DXT1_SRGB = 49, + DXT23_SRGB = 50, + DXT45_SRGB = 51, + BC7U_SRGB = 52, + R4G4B4A4U = 53, + ASTC_2D_4X4_SRGB = 54, + ASTC_2D_8X8_SRGB = 55, + ASTC_2D_8X5_SRGB = 56, + ASTC_2D_5X4_SRGB = 57, + ASTC_2D_5X5 = 58, + ASTC_2D_5X5_SRGB = 59, + ASTC_2D_10X8 = 60, + ASTC_2D_10X8_SRGB = 61, + ASTC_2D_6X6 = 62, + ASTC_2D_6X6_SRGB = 63, + ASTC_2D_10X10 = 64, + ASTC_2D_10X10_SRGB = 65, + ASTC_2D_12X12 = 66, + ASTC_2D_12X12_SRGB = 67, + ASTC_2D_8X6 = 68, + ASTC_2D_8X6_SRGB = 69, + ASTC_2D_6X5 = 70, + ASTC_2D_6X5_SRGB = 71, + E5B9G9R9F = 72, MaxColorFormat, // Depth formats - Z32F = 72, - Z16 = 73, + Z32F = 73, + Z16 = 74, MaxDepthFormat, // DepthStencil formats - Z24S8 = 74, - S8Z24 = 75, - Z32FS8 = 76, + Z24S8 = 75, + S8Z24 = 76, + Z32FS8 = 77, MaxDepthStencilFormat, @@ -171,6 +172,7 @@ constexpr std::array<u32, MaxPixelFormat> compression_factor_shift_table = {{ 0, // RG32UI 0, // RGBX16F 0, // R32UI + 0, // R32I 2, // ASTC_2D_8X8 2, // ASTC_2D_8X5 2, // ASTC_2D_5X4 @@ -267,6 +269,7 @@ constexpr std::array<u32, MaxPixelFormat> block_width_table = {{ 1, // RG32UI 1, // RGBX16F 1, // R32UI + 1, // R32I 8, // ASTC_2D_8X8 8, // ASTC_2D_8X5 5, // ASTC_2D_5X4 @@ -355,6 +358,7 @@ constexpr std::array<u32, MaxPixelFormat> block_height_table = {{ 1, // RG32UI 1, // RGBX16F 1, // R32UI + 1, // R32I 8, // ASTC_2D_8X8 5, // ASTC_2D_8X5 4, // ASTC_2D_5X4 @@ -443,6 +447,7 @@ constexpr std::array<u32, MaxPixelFormat> bpp_table = {{ 64, // RG32UI 64, // RGBX16F 32, // R32UI + 32, // R32I 128, // ASTC_2D_8X8 128, // ASTC_2D_8X5 128, // ASTC_2D_5X4 @@ -546,6 +551,7 @@ constexpr std::array<SurfaceCompression, MaxPixelFormat> compression_type_table SurfaceCompression::None, // RG32UI SurfaceCompression::None, // RGBX16F SurfaceCompression::None, // R32UI + SurfaceCompression::None, // R32I SurfaceCompression::Converted, // ASTC_2D_8X8 SurfaceCompression::Converted, // ASTC_2D_8X5 SurfaceCompression::Converted, // ASTC_2D_5X4 diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp index 81fb9f633..cc3ad8417 100644 --- a/src/video_core/texture_cache/format_lookup_table.cpp +++ b/src/video_core/texture_cache/format_lookup_table.cpp @@ -41,7 +41,7 @@ struct Table { ComponentType alpha_component; bool is_srgb; }; -constexpr std::array<Table, 74> DefinitionTable = {{ +constexpr std::array<Table, 75> DefinitionTable = {{ {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ABGR8U}, {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::ABGR8S}, {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::ABGR8UI}, @@ -89,6 +89,7 @@ constexpr std::array<Table, 74> DefinitionTable = {{ {TextureFormat::R32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32F}, {TextureFormat::R32, C, UINT, UINT, UINT, UINT, PixelFormat::R32UI}, + {TextureFormat::R32, C, SINT, SINT, SINT, SINT, PixelFormat::R32I}, {TextureFormat::E5B9G9R9_SHAREDEXP, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::E5B9G9R9F}, diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index 38b3a4ba8..f00839313 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -84,19 +84,16 @@ SurfaceParams SurfaceParams::CreateForTexture(const FormatLookupTable& lookup_ta if (entry.IsShadow() && params.type == SurfaceType::ColorTexture) { switch (params.pixel_format) { case PixelFormat::R16U: - case PixelFormat::R16F: { + case PixelFormat::R16F: params.pixel_format = PixelFormat::Z16; break; - } - case PixelFormat::R32F: { + case PixelFormat::R32F: params.pixel_format = PixelFormat::Z32F; break; - } - default: { + default: UNIMPLEMENTED_MSG("Unimplemented shadow convert format: {}", static_cast<u32>(params.pixel_format)); } - } params.type = GetFormatType(params.pixel_format); } params.type = GetFormatType(params.pixel_format); @@ -168,27 +165,29 @@ SurfaceParams SurfaceParams::CreateForImage(const FormatLookupTable& lookup_tabl return params; } -SurfaceParams SurfaceParams::CreateForDepthBuffer( - Core::System& system, u32 zeta_width, u32 zeta_height, Tegra::DepthFormat format, - u32 block_width, u32 block_height, u32 block_depth, - Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type) { +SurfaceParams SurfaceParams::CreateForDepthBuffer(Core::System& system) { + const auto& regs = system.GPU().Maxwell3D().regs; + regs.zeta_width, regs.zeta_height, regs.zeta.format, regs.zeta.memory_layout.type; SurfaceParams params; - params.is_tiled = type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; + params.is_tiled = regs.zeta.memory_layout.type == + Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; params.srgb_conversion = false; - params.block_width = std::min(block_width, 5U); - params.block_height = std::min(block_height, 5U); - params.block_depth = std::min(block_depth, 5U); + params.block_width = std::min(regs.zeta.memory_layout.block_width.Value(), 5U); + params.block_height = std::min(regs.zeta.memory_layout.block_height.Value(), 5U); + params.block_depth = std::min(regs.zeta.memory_layout.block_depth.Value(), 5U); params.tile_width_spacing = 1; - params.pixel_format = PixelFormatFromDepthFormat(format); + params.pixel_format = PixelFormatFromDepthFormat(regs.zeta.format); params.type = GetFormatType(params.pixel_format); - params.width = zeta_width; - params.height = zeta_height; - params.target = SurfaceTarget::Texture2D; - params.depth = 1; + params.width = regs.zeta_width; + params.height = regs.zeta_height; params.pitch = 0; params.num_levels = 1; params.emulated_levels = 1; - params.is_layered = false; + + const bool is_layered = regs.zeta_layers > 1 && params.block_depth == 0; + params.is_layered = is_layered; + params.target = is_layered ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D; + params.depth = is_layered ? regs.zeta_layers.Value() : 1U; return params; } @@ -214,11 +213,13 @@ SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::siz params.width = params.pitch / bpp; } params.height = config.height; - params.depth = 1; - params.target = SurfaceTarget::Texture2D; params.num_levels = 1; params.emulated_levels = 1; - params.is_layered = false; + + const bool is_layered = config.layers > 1 && params.block_depth == 0; + params.is_layered = is_layered; + params.depth = is_layered ? config.layers.Value() : 1; + params.target = is_layered ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D; return params; } diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h index 9256fd6d9..995cc3818 100644 --- a/src/video_core/texture_cache/surface_params.h +++ b/src/video_core/texture_cache/surface_params.h @@ -35,10 +35,7 @@ public: const VideoCommon::Shader::Image& entry); /// Creates SurfaceCachedParams for a depth buffer configuration. - static SurfaceParams CreateForDepthBuffer( - Core::System& system, u32 zeta_width, u32 zeta_height, Tegra::DepthFormat format, - u32 block_width, u32 block_height, u32 block_depth, - Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type); + static SurfaceParams CreateForDepthBuffer(Core::System& system); /// Creates SurfaceCachedParams from a framebuffer configuration. static SurfaceParams CreateForFramebuffer(Core::System& system, std::size_t index); diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 0d105d386..c70e4aec2 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -160,10 +160,7 @@ public: SetEmptyDepthBuffer(); return {}; } - const auto depth_params{SurfaceParams::CreateForDepthBuffer( - system, regs.zeta_width, regs.zeta_height, regs.zeta.format, - regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height, - regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)}; + const auto depth_params{SurfaceParams::CreateForDepthBuffer(system)}; auto surface_view = GetSurface(gpu_addr, cache_addr, depth_params, preserve_contents, true); if (depth_buffer.target) depth_buffer.target->MarkAsRenderTarget(false, NO_RT); |