diff options
Diffstat (limited to '')
50 files changed, 1312 insertions, 363 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index f617665de..4742bcbe9 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -52,6 +52,8 @@ add_library(video_core STATIC engines/puller.cpp engines/puller.h framebuffer_config.h + fsr.cpp + fsr.h host1x/codecs/codec.cpp host1x/codecs/codec.h host1x/codecs/h264.cpp @@ -100,6 +102,8 @@ add_library(video_core STATIC renderer_null/null_rasterizer.h renderer_null/renderer_null.cpp renderer_null/renderer_null.h + renderer_opengl/blit_image.cpp + renderer_opengl/blit_image.h renderer_opengl/gl_buffer_cache.cpp renderer_opengl/gl_buffer_cache.h renderer_opengl/gl_compute_pipeline.cpp @@ -108,6 +112,8 @@ add_library(video_core STATIC renderer_opengl/gl_device.h renderer_opengl/gl_fence_manager.cpp renderer_opengl/gl_fence_manager.h + renderer_opengl/gl_fsr.cpp + renderer_opengl/gl_fsr.h renderer_opengl/gl_graphics_pipeline.cpp renderer_opengl/gl_graphics_pipeline.h renderer_opengl/gl_rasterizer.cpp diff --git a/src/video_core/engines/draw_manager.cpp b/src/video_core/engines/draw_manager.cpp index 2437121ce..1d22d25f1 100644 --- a/src/video_core/engines/draw_manager.cpp +++ b/src/video_core/engines/draw_manager.cpp @@ -51,6 +51,10 @@ void DrawManager::ProcessMethodCall(u32 method, u32 argument) { LOG_WARNING(HW_GPU, "(STUBBED) called"); break; } + case MAXWELL3D_REG_INDEX(draw_texture.src_y0): { + DrawTexture(); + break; + } default: break; } @@ -179,6 +183,33 @@ void DrawManager::DrawIndexSmall(u32 argument) { ProcessDraw(true, 1); } +void DrawManager::DrawTexture() { + const auto& regs{maxwell3d->regs}; + draw_texture_state.dst_x0 = static_cast<float>(regs.draw_texture.dst_x0) / 4096.f; + draw_texture_state.dst_y0 = static_cast<float>(regs.draw_texture.dst_y0) / 4096.f; + const auto dst_width = static_cast<float>(regs.draw_texture.dst_width) / 4096.f; + const auto dst_height = static_cast<float>(regs.draw_texture.dst_height) / 4096.f; + const bool lower_left{regs.window_origin.mode != + Maxwell3D::Regs::WindowOrigin::Mode::UpperLeft}; + if (lower_left) { + draw_texture_state.dst_y0 -= dst_height; + } + draw_texture_state.dst_x1 = draw_texture_state.dst_x0 + dst_width; + draw_texture_state.dst_y1 = draw_texture_state.dst_y0 + dst_height; + draw_texture_state.src_x0 = static_cast<float>(regs.draw_texture.src_x0) / 4096.f; + draw_texture_state.src_y0 = static_cast<float>(regs.draw_texture.src_y0) / 4096.f; + draw_texture_state.src_x1 = + (static_cast<float>(regs.draw_texture.dx_du) / 4294967296.f) * dst_width + + draw_texture_state.src_x0; + draw_texture_state.src_y1 = + (static_cast<float>(regs.draw_texture.dy_dv) / 4294967296.f) * dst_height + + draw_texture_state.src_y0; + draw_texture_state.src_sampler = regs.draw_texture.src_sampler; + draw_texture_state.src_texture = regs.draw_texture.src_texture; + + maxwell3d->rasterizer->DrawTexture(); +} + void DrawManager::UpdateTopology() { const auto& regs{maxwell3d->regs}; switch (regs.primitive_topology_control) { diff --git a/src/video_core/engines/draw_manager.h b/src/video_core/engines/draw_manager.h index 58d1b2d59..7c22c49f1 100644 --- a/src/video_core/engines/draw_manager.h +++ b/src/video_core/engines/draw_manager.h @@ -32,6 +32,19 @@ public: std::vector<u8> inline_index_draw_indexes; }; + struct DrawTextureState { + f32 dst_x0; + f32 dst_y0; + f32 dst_x1; + f32 dst_y1; + f32 src_x0; + f32 src_y0; + f32 src_x1; + f32 src_y1; + u32 src_sampler; + u32 src_texture; + }; + struct IndirectParams { bool is_indexed; bool include_count; @@ -64,6 +77,10 @@ public: return draw_state; } + const DrawTextureState& GetDrawTextureState() const { + return draw_texture_state; + } + IndirectParams& GetIndirectParams() { return indirect_state; } @@ -81,6 +98,8 @@ private: void DrawIndexSmall(u32 argument); + void DrawTexture(); + void UpdateTopology(); void ProcessDraw(bool draw_indexed, u32 instance_count); @@ -89,6 +108,7 @@ private: Maxwell3D* maxwell3d{}; State draw_state{}; + DrawTextureState draw_texture_state{}; IndirectParams indirect_state{}; }; } // namespace Tegra::Engines diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 68ceda519..ae9da6290 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -149,6 +149,7 @@ bool Maxwell3D::IsMethodExecutable(u32 method) { case MAXWELL3D_REG_INDEX(inline_index_4x8.index0): case MAXWELL3D_REG_INDEX(vertex_array_instance_first): case MAXWELL3D_REG_INDEX(vertex_array_instance_subsequent): + case MAXWELL3D_REG_INDEX(draw_texture.src_y0): case MAXWELL3D_REG_INDEX(wait_for_idle): case MAXWELL3D_REG_INDEX(shadow_ram_control): case MAXWELL3D_REG_INDEX(load_mme.instruction_ptr): diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 0b2fd2928..c89969bb4 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -1599,6 +1599,20 @@ public: }; static_assert(sizeof(TIRModulationCoeff) == 0x4); + struct DrawTexture { + s32 dst_x0; + s32 dst_y0; + s32 dst_width; + s32 dst_height; + s64 dx_du; + s64 dy_dv; + u32 src_sampler; + u32 src_texture; + s32 src_x0; + s32 src_y0; + }; + static_assert(sizeof(DrawTexture) == 0x30); + struct ReduceColorThreshold { union { BitField<0, 8, u32> all_hit_once; @@ -2751,7 +2765,7 @@ public: u32 reserved_sw_method2; ///< 0x102C std::array<TIRModulationCoeff, 5> tir_modulation_coeff; ///< 0x1030 std::array<u32, 15> spare_nop; ///< 0x1044 - INSERT_PADDING_BYTES_NOINIT(0x30); + DrawTexture draw_texture; ///< 0x1080 std::array<u32, 7> reserved_sw_method3_to_7; ///< 0x10B0 ReduceColorThreshold reduce_color_thresholds_unorm8; ///< 0x10CC std::array<u32, 4> reserved_sw_method10_to_13; ///< 0x10D0 diff --git a/src/video_core/fsr.cpp b/src/video_core/fsr.cpp new file mode 100644 index 000000000..5653c64fc --- /dev/null +++ b/src/video_core/fsr.cpp @@ -0,0 +1,148 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <cmath> +#include "video_core/fsr.h" + +namespace FSR { +namespace { +// Reimplementations of the constant generating functions in ffx_fsr1.h +// GCC generated a lot of warnings when using the official header. +u32 AU1_AH1_AF1(f32 f) { + static constexpr u32 base[512]{ + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, + 0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x0c00, 0x1000, 0x1400, 0x1800, 0x1c00, 0x2000, + 0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00, 0x4000, 0x4400, 0x4800, 0x4c00, + 0x5000, 0x5400, 0x5800, 0x5c00, 0x6000, 0x6400, 0x6800, 0x6c00, 0x7000, 0x7400, 0x7800, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001, 0x8002, 0x8004, 0x8008, + 0x8010, 0x8020, 0x8040, 0x8080, 0x8100, 0x8200, 0x8400, 0x8800, 0x8c00, 0x9000, 0x9400, + 0x9800, 0x9c00, 0xa000, 0xa400, 0xa800, 0xac00, 0xb000, 0xb400, 0xb800, 0xbc00, 0xc000, + 0xc400, 0xc800, 0xcc00, 0xd000, 0xd400, 0xd800, 0xdc00, 0xe000, 0xe400, 0xe800, 0xec00, + 0xf000, 0xf400, 0xf800, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + }; + static constexpr s8 shift[512]{ + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17, 0x16, + 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, + 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, + 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17, + 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, + 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, + 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, + }; + const u32 u = Common::BitCast<u32>(f); + const u32 i = u >> 23; + return base[i] + ((u & 0x7fffff) >> shift[i]); +} + +u32 AU1_AH2_AF2(f32 a[2]) { + return AU1_AH1_AF1(a[0]) + (AU1_AH1_AF1(a[1]) << 16); +} + +void FsrEasuCon(u32 con0[4], u32 con1[4], u32 con2[4], u32 con3[4], f32 inputViewportInPixelsX, + f32 inputViewportInPixelsY, f32 inputSizeInPixelsX, f32 inputSizeInPixelsY, + f32 outputSizeInPixelsX, f32 outputSizeInPixelsY) { + con0[0] = Common::BitCast<u32>(inputViewportInPixelsX / outputSizeInPixelsX); + con0[1] = Common::BitCast<u32>(inputViewportInPixelsY / outputSizeInPixelsY); + con0[2] = Common::BitCast<u32>(0.5f * inputViewportInPixelsX / outputSizeInPixelsX - 0.5f); + con0[3] = Common::BitCast<u32>(0.5f * inputViewportInPixelsY / outputSizeInPixelsY - 0.5f); + con1[0] = Common::BitCast<u32>(1.0f / inputSizeInPixelsX); + con1[1] = Common::BitCast<u32>(1.0f / inputSizeInPixelsY); + con1[2] = Common::BitCast<u32>(1.0f / inputSizeInPixelsX); + con1[3] = Common::BitCast<u32>(-1.0f / inputSizeInPixelsY); + con2[0] = Common::BitCast<u32>(-1.0f / inputSizeInPixelsX); + con2[1] = Common::BitCast<u32>(2.0f / inputSizeInPixelsY); + con2[2] = Common::BitCast<u32>(1.0f / inputSizeInPixelsX); + con2[3] = Common::BitCast<u32>(2.0f / inputSizeInPixelsY); + con3[0] = Common::BitCast<u32>(0.0f / inputSizeInPixelsX); + con3[1] = Common::BitCast<u32>(4.0f / inputSizeInPixelsY); + con3[2] = con3[3] = 0; +} +} // Anonymous namespace + +void FsrEasuConOffset(u32 con0[4], u32 con1[4], u32 con2[4], u32 con3[4], + f32 inputViewportInPixelsX, f32 inputViewportInPixelsY, + f32 inputSizeInPixelsX, f32 inputSizeInPixelsY, f32 outputSizeInPixelsX, + f32 outputSizeInPixelsY, f32 inputOffsetInPixelsX, f32 inputOffsetInPixelsY) { + FsrEasuCon(con0, con1, con2, con3, inputViewportInPixelsX, inputViewportInPixelsY, + inputSizeInPixelsX, inputSizeInPixelsY, outputSizeInPixelsX, outputSizeInPixelsY); + con0[2] = Common::BitCast<u32>(0.5f * inputViewportInPixelsX / outputSizeInPixelsX - 0.5f + + inputOffsetInPixelsX); + con0[3] = Common::BitCast<u32>(0.5f * inputViewportInPixelsY / outputSizeInPixelsY - 0.5f + + inputOffsetInPixelsY); +} + +void FsrRcasCon(u32* con, f32 sharpness) { + sharpness = std::exp2f(-sharpness); + f32 hSharp[2]{sharpness, sharpness}; + con[0] = Common::BitCast<u32>(sharpness); + con[1] = AU1_AH2_AF2(hSharp); + con[2] = 0; + con[3] = 0; +} +} // namespace FSR diff --git a/src/video_core/fsr.h b/src/video_core/fsr.h new file mode 100644 index 000000000..db0d4ec6f --- /dev/null +++ b/src/video_core/fsr.h @@ -0,0 +1,19 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "common/bit_cast.h" +#include "common/common_types.h" + +namespace FSR { +// Reimplementations of the constant generating functions in ffx_fsr1.h +// GCC generated a lot of warnings when using the official header. +void FsrEasuConOffset(u32 con0[4], u32 con1[4], u32 con2[4], u32 con3[4], + f32 inputViewportInPixelsX, f32 inputViewportInPixelsY, + f32 inputSizeInPixelsX, f32 inputSizeInPixelsY, f32 outputSizeInPixelsX, + f32 outputSizeInPixelsY, f32 inputOffsetInPixelsX, f32 inputOffsetInPixelsY); + +void FsrRcasCon(u32* con, f32 sharpness); + +} // namespace FSR diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index f275b2aa9..52cd5bb81 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -3,14 +3,19 @@ set(FIDELITYFX_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/externals/FidelityFX-FSR/ffx-fsr) -set(GLSL_INCLUDES - fidelityfx_fsr.comp +set(FIDELITYFX_FILES ${FIDELITYFX_INCLUDE_DIR}/ffx_a.h ${FIDELITYFX_INCLUDE_DIR}/ffx_fsr1.h ) +set(GLSL_INCLUDES + fidelityfx_fsr.comp + ${FIDELITYFX_FILES} +) + set(SHADER_FILES astc_decoder.comp + blit_color_float.frag block_linear_unswizzle_2d.comp block_linear_unswizzle_3d.comp convert_abgr8_to_d24s8.frag @@ -23,6 +28,9 @@ set(SHADER_FILES fxaa.vert opengl_convert_s8d24.comp opengl_copy_bc4.comp + opengl_fidelityfx_fsr.frag + opengl_fidelityfx_fsr_easu.frag + opengl_fidelityfx_fsr_rcas.frag opengl_present.frag opengl_present.vert opengl_present_scaleforce.frag @@ -36,8 +44,9 @@ set(SHADER_FILES smaa_blending_weight_calculation.frag smaa_neighborhood_blending.vert smaa_neighborhood_blending.frag - vulkan_blit_color_float.frag vulkan_blit_depth_stencil.frag + vulkan_color_clear.frag + vulkan_color_clear.vert vulkan_fidelityfx_fsr_easu_fp16.comp vulkan_fidelityfx_fsr_easu_fp32.comp vulkan_fidelityfx_fsr_rcas_fp16.comp @@ -118,6 +127,25 @@ foreach(FILENAME IN ITEMS ${SHADER_FILES}) endif() endforeach() +foreach(FILEPATH IN ITEMS ${FIDELITYFX_FILES}) + get_filename_component(FILENAME ${FILEPATH} NAME) + string(REPLACE "." "_" HEADER_NAME ${FILENAME}) + set(SOURCE_FILE ${FILEPATH}) + set(SOURCE_HEADER_FILE ${SHADER_DIR}/${HEADER_NAME}.h) + add_custom_command( + OUTPUT + ${SOURCE_HEADER_FILE} + COMMAND + ${CMAKE_COMMAND} -P ${HEADER_GENERATOR} ${SOURCE_FILE} ${SOURCE_HEADER_FILE} ${INPUT_FILE} + MAIN_DEPENDENCY + ${SOURCE_FILE} + DEPENDS + ${INPUT_FILE} + # HEADER_GENERATOR should be included here but msbuild seems to assume it's always modified + ) + set(SHADER_HEADERS ${SHADER_HEADERS} ${SOURCE_HEADER_FILE}) +endforeach() + set(SHADER_SOURCES ${SHADER_FILES}) list(APPEND SHADER_SOURCES ${GLSL_INCLUDES}) diff --git a/src/video_core/host_shaders/vulkan_blit_color_float.frag b/src/video_core/host_shaders/blit_color_float.frag index c0c832296..c0c832296 100644 --- a/src/video_core/host_shaders/vulkan_blit_color_float.frag +++ b/src/video_core/host_shaders/blit_color_float.frag diff --git a/src/video_core/host_shaders/full_screen_triangle.vert b/src/video_core/host_shaders/full_screen_triangle.vert index 2c976b19f..d16d98995 100644 --- a/src/video_core/host_shaders/full_screen_triangle.vert +++ b/src/video_core/host_shaders/full_screen_triangle.vert @@ -4,13 +4,20 @@ #version 450 #ifdef VULKAN +#define VERTEX_ID gl_VertexIndex #define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants { #define END_PUSH_CONSTANTS }; #define UNIFORM(n) +#define FLIPY 1 #else // ^^^ Vulkan ^^^ // vvv OpenGL vvv +#define VERTEX_ID gl_VertexID #define BEGIN_PUSH_CONSTANTS #define END_PUSH_CONSTANTS +#define FLIPY -1 #define UNIFORM(n) layout (location = n) uniform +out gl_PerVertex { + vec4 gl_Position; +}; #endif BEGIN_PUSH_CONSTANTS @@ -21,8 +28,8 @@ END_PUSH_CONSTANTS layout(location = 0) out vec2 texcoord; void main() { - float x = float((gl_VertexIndex & 1) << 2); - float y = float((gl_VertexIndex & 2) << 1); - gl_Position = vec4(x - 1.0, y - 1.0, 0.0, 1.0); + float x = float((VERTEX_ID & 1) << 2); + float y = float((VERTEX_ID & 2) << 1); + gl_Position = vec4(x - 1.0, FLIPY * (y - 1.0), 0.0, 1.0); texcoord = fma(vec2(x, y) / 2.0, tex_scale, tex_offset); } diff --git a/src/video_core/host_shaders/opengl_fidelityfx_fsr.frag b/src/video_core/host_shaders/opengl_fidelityfx_fsr.frag new file mode 100644 index 000000000..16d22f58e --- /dev/null +++ b/src/video_core/host_shaders/opengl_fidelityfx_fsr.frag @@ -0,0 +1,108 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +//!#version 460 core +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +#extension GL_AMD_gpu_shader_half_float : enable +#extension GL_NV_gpu_shader5 : enable + +// FidelityFX Super Resolution Sample +// +// Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +layout (location = 0) uniform uvec4 constants[4]; + +#define A_GPU 1 +#define A_GLSL 1 + +#ifdef YUZU_USE_FP16 + #define A_HALF +#endif +#include "ffx_a.h" + +#ifndef YUZU_USE_FP16 + layout (binding=0) uniform sampler2D InputTexture; + #if USE_EASU + #define FSR_EASU_F 1 + AF4 FsrEasuRF(AF2 p) { AF4 res = textureGather(InputTexture, p, 0); return res; } + AF4 FsrEasuGF(AF2 p) { AF4 res = textureGather(InputTexture, p, 1); return res; } + AF4 FsrEasuBF(AF2 p) { AF4 res = textureGather(InputTexture, p, 2); return res; } + #endif + #if USE_RCAS + #define FSR_RCAS_F + AF4 FsrRcasLoadF(ASU2 p) { return texelFetch(InputTexture, ASU2(p), 0); } + void FsrRcasInputF(inout AF1 r, inout AF1 g, inout AF1 b) {} + #endif +#else + layout (binding=0) uniform sampler2D InputTexture; + #if USE_EASU + #define FSR_EASU_H 1 + AH4 FsrEasuRH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 0)); return res; } + AH4 FsrEasuGH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 1)); return res; } + AH4 FsrEasuBH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 2)); return res; } + #endif + #if USE_RCAS + #define FSR_RCAS_H + AH4 FsrRcasLoadH(ASW2 p) { return AH4(texelFetch(InputTexture, ASU2(p), 0)); } + void FsrRcasInputH(inout AH1 r,inout AH1 g,inout AH1 b){} + #endif +#endif + +#include "ffx_fsr1.h" + +#if USE_RCAS + layout(location = 0) in vec2 frag_texcoord; +#endif +layout (location = 0) out vec4 frag_color; + +void CurrFilter(AU2 pos) +{ +#if USE_EASU + #ifndef YUZU_USE_FP16 + AF3 c; + FsrEasuF(c, pos, constants[0], constants[1], constants[2], constants[3]); + frag_color = AF4(c, 1.0); + #else + AH3 c; + FsrEasuH(c, pos, constants[0], constants[1], constants[2], constants[3]); + frag_color = AH4(c, 1.0); + #endif +#endif +#if USE_RCAS + #ifndef YUZU_USE_FP16 + AF3 c; + FsrRcasF(c.r, c.g, c.b, pos, constants[0]); + frag_color = AF4(c, 1.0); + #else + AH3 c; + FsrRcasH(c.r, c.g, c.b, pos, constants[0]); + frag_color = AH4(c, 1.0); + #endif +#endif +} + +void main() +{ +#if USE_RCAS + CurrFilter(AU2(frag_texcoord * vec2(textureSize(InputTexture, 0)))); +#else + CurrFilter(AU2(gl_FragCoord.xy)); +#endif +} diff --git a/src/video_core/host_shaders/opengl_fidelityfx_fsr_easu.frag b/src/video_core/host_shaders/opengl_fidelityfx_fsr_easu.frag new file mode 100644 index 000000000..d39f80ac1 --- /dev/null +++ b/src/video_core/host_shaders/opengl_fidelityfx_fsr_easu.frag @@ -0,0 +1,9 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#version 460 core +#extension GL_GOOGLE_include_directive : enable + +#define USE_EASU 1 + +#include "opengl_fidelityfx_fsr.frag" diff --git a/src/video_core/host_shaders/opengl_fidelityfx_fsr_rcas.frag b/src/video_core/host_shaders/opengl_fidelityfx_fsr_rcas.frag new file mode 100644 index 000000000..cfa78ddc7 --- /dev/null +++ b/src/video_core/host_shaders/opengl_fidelityfx_fsr_rcas.frag @@ -0,0 +1,9 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#version 460 core +#extension GL_GOOGLE_include_directive : enable + +#define USE_RCAS 1 + +#include "opengl_fidelityfx_fsr.frag" diff --git a/src/video_core/host_shaders/vulkan_color_clear.frag b/src/video_core/host_shaders/vulkan_color_clear.frag new file mode 100644 index 000000000..617bf01e1 --- /dev/null +++ b/src/video_core/host_shaders/vulkan_color_clear.frag @@ -0,0 +1,14 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#version 460 core + +layout (push_constant) uniform PushConstants { + vec4 clear_color; +}; + +layout(location = 0) out vec4 color; + +void main() { + color = clear_color; +} diff --git a/src/video_core/host_shaders/vulkan_color_clear.vert b/src/video_core/host_shaders/vulkan_color_clear.vert new file mode 100644 index 000000000..d85883141 --- /dev/null +++ b/src/video_core/host_shaders/vulkan_color_clear.vert @@ -0,0 +1,10 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#version 460 core + +void main() { + float x = float((gl_VertexIndex & 1) << 2); + float y = float((gl_VertexIndex & 2) << 1); + gl_Position = vec4(x - 1.0, y - 1.0, 0.0, 1.0); +} diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 3bcae3503..83924475b 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -6,7 +6,6 @@ #include "common/alignment.h" #include "common/assert.h" #include "common/logging/log.h" -#include "common/settings.h" #include "core/core.h" #include "core/device_memory.h" #include "core/hle/kernel/k_page_table.h" @@ -46,11 +45,6 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 big_page_table_cpu.resize(big_page_table_size); big_page_continous.resize(big_page_table_size / continous_bits, 0); entries.resize(page_table_size / 32, 0); - if (!Settings::IsGPULevelExtreme() && Settings::IsFastmemEnabled()) { - fastmem_arena = system.DeviceMemory().buffer.VirtualBasePointer(); - } else { - fastmem_arena = nullptr; - } } MemoryManager::~MemoryManager() = default; @@ -360,7 +354,7 @@ inline void MemoryManager::MemoryOperation(GPUVAddr gpu_src_addr, std::size_t si } } -template <bool is_safe, bool use_fastmem> +template <bool is_safe> void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size, [[maybe_unused]] VideoCommon::CacheType which) const { auto set_to_zero = [&]([[maybe_unused]] std::size_t page_index, @@ -374,12 +368,8 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std: if constexpr (is_safe) { rasterizer->FlushRegion(cpu_addr_base, copy_amount, which); } - if constexpr (use_fastmem) { - std::memcpy(dest_buffer, &fastmem_arena[cpu_addr_base], copy_amount); - } else { - u8* physical = memory.GetPointer(cpu_addr_base); - std::memcpy(dest_buffer, physical, copy_amount); - } + u8* physical = memory.GetPointer(cpu_addr_base); + std::memcpy(dest_buffer, physical, copy_amount); dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; }; auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { @@ -388,15 +378,11 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std: if constexpr (is_safe) { rasterizer->FlushRegion(cpu_addr_base, copy_amount, which); } - if constexpr (use_fastmem) { - std::memcpy(dest_buffer, &fastmem_arena[cpu_addr_base], copy_amount); + if (!IsBigPageContinous(page_index)) [[unlikely]] { + memory.ReadBlockUnsafe(cpu_addr_base, dest_buffer, copy_amount); } else { - if (!IsBigPageContinous(page_index)) [[unlikely]] { - memory.ReadBlockUnsafe(cpu_addr_base, dest_buffer, copy_amount); - } else { - u8* physical = memory.GetPointer(cpu_addr_base); - std::memcpy(dest_buffer, physical, copy_amount); - } + u8* physical = memory.GetPointer(cpu_addr_base); + std::memcpy(dest_buffer, physical, copy_amount); } dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; }; @@ -410,20 +396,12 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std: void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size, VideoCommon::CacheType which) const { - if (fastmem_arena) [[likely]] { - ReadBlockImpl<true, true>(gpu_src_addr, dest_buffer, size, which); - return; - } - ReadBlockImpl<true, false>(gpu_src_addr, dest_buffer, size, which); + ReadBlockImpl<true>(gpu_src_addr, dest_buffer, size, which); } void MemoryManager::ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer, const std::size_t size) const { - if (fastmem_arena) [[likely]] { - ReadBlockImpl<false, true>(gpu_src_addr, dest_buffer, size, VideoCommon::CacheType::None); - return; - } - ReadBlockImpl<false, false>(gpu_src_addr, dest_buffer, size, VideoCommon::CacheType::None); + ReadBlockImpl<false>(gpu_src_addr, dest_buffer, size, VideoCommon::CacheType::None); } template <bool is_safe> diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 2936364f0..9ebfb6179 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -141,7 +141,7 @@ private: inline void MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size, FuncMapped&& func_mapped, FuncReserved&& func_reserved, FuncUnmapped&& func_unmapped) const; - template <bool is_safe, bool use_fastmem> + template <bool is_safe> void ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size, VideoCommon::CacheType which) const; @@ -215,7 +215,6 @@ private: std::vector<u64> big_page_continous; std::vector<std::pair<VAddr, std::size_t>> page_stash{}; - u8* fastmem_arena{}; constexpr static size_t continous_bits = 64; diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 1735b6164..33e2610bc 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -47,6 +47,9 @@ public: /// Dispatches an indirect draw invocation virtual void DrawIndirect() {} + /// Dispatches an draw texture invocation + virtual void DrawTexture() = 0; + /// Clear the current framebuffer virtual void Clear(u32 layer_count) = 0; diff --git a/src/video_core/renderer_null/null_rasterizer.cpp b/src/video_core/renderer_null/null_rasterizer.cpp index 2c11345d7..2b5c7defa 100644 --- a/src/video_core/renderer_null/null_rasterizer.cpp +++ b/src/video_core/renderer_null/null_rasterizer.cpp @@ -21,6 +21,7 @@ RasterizerNull::RasterizerNull(Core::Memory::Memory& cpu_memory_, Tegra::GPU& gp RasterizerNull::~RasterizerNull() = default; void RasterizerNull::Draw(bool is_indexed, u32 instance_count) {} +void RasterizerNull::DrawTexture() {} void RasterizerNull::Clear(u32 layer_count) {} void RasterizerNull::DispatchCompute() {} void RasterizerNull::ResetCounter(VideoCore::QueryType type) {} diff --git a/src/video_core/renderer_null/null_rasterizer.h b/src/video_core/renderer_null/null_rasterizer.h index 2112aa70e..51f896e43 100644 --- a/src/video_core/renderer_null/null_rasterizer.h +++ b/src/video_core/renderer_null/null_rasterizer.h @@ -31,6 +31,7 @@ public: ~RasterizerNull() override; void Draw(bool is_indexed, u32 instance_count) override; + void DrawTexture() override; void Clear(u32 layer_count) override; void DispatchCompute() override; void ResetCounter(VideoCore::QueryType type) override; diff --git a/src/video_core/renderer_opengl/blit_image.cpp b/src/video_core/renderer_opengl/blit_image.cpp new file mode 100644 index 000000000..9a560a73b --- /dev/null +++ b/src/video_core/renderer_opengl/blit_image.cpp @@ -0,0 +1,59 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <algorithm> + +#include "video_core/host_shaders/blit_color_float_frag.h" +#include "video_core/host_shaders/full_screen_triangle_vert.h" +#include "video_core/renderer_opengl/blit_image.h" +#include "video_core/renderer_opengl/gl_shader_manager.h" +#include "video_core/renderer_opengl/gl_shader_util.h" + +namespace OpenGL { + +BlitImageHelper::BlitImageHelper(ProgramManager& program_manager_) + : program_manager(program_manager_), + full_screen_vert(CreateProgram(HostShaders::FULL_SCREEN_TRIANGLE_VERT, GL_VERTEX_SHADER)), + blit_color_to_color_frag( + CreateProgram(HostShaders::BLIT_COLOR_FLOAT_FRAG, GL_FRAGMENT_SHADER)) {} + +BlitImageHelper::~BlitImageHelper() = default; + +void BlitImageHelper::BlitColor(GLuint dst_framebuffer, GLuint src_image_view, GLuint src_sampler, + const Region2D& dst_region, const Region2D& src_region, + const Extent3D& src_size) { + glEnable(GL_CULL_FACE); + glDisable(GL_COLOR_LOGIC_OP); + glDisable(GL_DEPTH_TEST); + glDisable(GL_STENCIL_TEST); + glDisable(GL_POLYGON_OFFSET_FILL); + glDisable(GL_RASTERIZER_DISCARD); + glDisable(GL_ALPHA_TEST); + glDisablei(GL_BLEND, 0); + glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); + glCullFace(GL_BACK); + glFrontFace(GL_CW); + glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + glDepthRangeIndexed(0, 0.0, 0.0); + + program_manager.BindPresentPrograms(full_screen_vert.handle, blit_color_to_color_frag.handle); + glProgramUniform2f(full_screen_vert.handle, 0, + static_cast<float>(src_region.end.x - src_region.start.x) / + static_cast<float>(src_size.width), + static_cast<float>(src_region.end.y - src_region.start.y) / + static_cast<float>(src_size.height)); + glProgramUniform2f(full_screen_vert.handle, 1, + static_cast<float>(src_region.start.x) / static_cast<float>(src_size.width), + static_cast<float>(src_region.start.y) / + static_cast<float>(src_size.height)); + glViewport(std::min(dst_region.start.x, dst_region.end.x), + std::min(dst_region.start.y, dst_region.end.y), + std::abs(dst_region.end.x - dst_region.start.x), + std::abs(dst_region.end.y - dst_region.start.y)); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, dst_framebuffer); + glBindSampler(0, src_sampler); + glBindTextureUnit(0, src_image_view); + glClear(GL_COLOR_BUFFER_BIT); + glDrawArrays(GL_TRIANGLES, 0, 3); +} +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/blit_image.h b/src/video_core/renderer_opengl/blit_image.h new file mode 100644 index 000000000..5a2b12d16 --- /dev/null +++ b/src/video_core/renderer_opengl/blit_image.h @@ -0,0 +1,38 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include <glad/glad.h> + +#include "video_core/engines/fermi_2d.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/texture_cache/types.h" + +namespace OpenGL { + +using VideoCommon::Extent3D; +using VideoCommon::Offset2D; +using VideoCommon::Region2D; + +class ProgramManager; +class Framebuffer; +class ImageView; + +class BlitImageHelper { +public: + explicit BlitImageHelper(ProgramManager& program_manager); + ~BlitImageHelper(); + + void BlitColor(GLuint dst_framebuffer, GLuint src_image_view, GLuint src_sampler, + const Region2D& dst_region, const Region2D& src_region, + const Extent3D& src_size); + +private: + ProgramManager& program_manager; + + OGLProgram full_screen_vert; + OGLProgram blit_color_to_color_frag; +}; + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp index 26d066004..1a0cea9b7 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp @@ -30,7 +30,7 @@ bool ComputePipelineKey::operator==(const ComputePipelineKey& rhs) const noexcep ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cache_, BufferCache& buffer_cache_, ProgramManager& program_manager_, const Shader::Info& info_, std::string code, - std::vector<u32> code_v) + std::vector<u32> code_v, bool force_context_flush) : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, program_manager{program_manager_}, info{info_} { switch (device.GetShaderBackend()) { @@ -63,6 +63,15 @@ ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cac writes_global_memory = !use_storage_buffers && std::ranges::any_of(info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; }); + if (force_context_flush) { + std::scoped_lock lock{built_mutex}; + built_fence.Create(); + // Flush this context to ensure compilation commands and fence are in the GPU pipe. + glFlush(); + built_condvar.notify_one(); + } else { + is_built = true; + } } void ComputePipeline::Configure() { @@ -142,6 +151,9 @@ void ComputePipeline::Configure() { } texture_cache.FillComputeImageViews(std::span(views.data(), views.size())); + if (!is_built) { + WaitForBuild(); + } if (assembly_program.handle != 0) { program_manager.BindComputeAssemblyProgram(assembly_program.handle); } else { @@ -223,4 +235,13 @@ void ComputePipeline::Configure() { } } +void ComputePipeline::WaitForBuild() { + if (built_fence.handle == 0) { + std::unique_lock lock{built_mutex}; + built_condvar.wait(lock, [this] { return built_fence.handle != 0; }); + } + ASSERT(glClientWaitSync(built_fence.handle, 0, GL_TIMEOUT_IGNORED) != GL_WAIT_FAILED); + is_built = true; +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.h b/src/video_core/renderer_opengl/gl_compute_pipeline.h index 6534dec32..9bcc72b59 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.h +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.h @@ -50,7 +50,8 @@ class ComputePipeline { public: explicit ComputePipeline(const Device& device, TextureCache& texture_cache_, BufferCache& buffer_cache_, ProgramManager& program_manager_, - const Shader::Info& info_, std::string code, std::vector<u32> code_v); + const Shader::Info& info_, std::string code, std::vector<u32> code_v, + bool force_context_flush = false); void Configure(); @@ -65,6 +66,8 @@ public: } private: + void WaitForBuild(); + TextureCache& texture_cache; BufferCache& buffer_cache; Tegra::MemoryManager* gpu_memory; @@ -81,6 +84,11 @@ private: bool use_storage_buffers{}; bool writes_global_memory{}; + + std::mutex built_mutex; + std::condition_variable built_condvar; + OGLSync built_fence{}; + bool is_built{false}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index cee5c3247..22ed16ebf 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -166,6 +166,7 @@ Device::Device(Core::Frontend::EmuWindow& emu_window) { has_shader_int64 = HasExtension(extensions, "GL_ARB_gpu_shader_int64"); has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float; has_sparse_texture_2 = GLAD_GL_ARB_sparse_texture2; + has_draw_texture = GLAD_GL_NV_draw_texture; warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel; need_fastmath_off = is_nvidia; can_report_memory = GLAD_GL_NVX_gpu_memory_info; diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 2a72d84be..3ff8cad83 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -4,6 +4,8 @@ #pragma once #include <cstddef> +#include <string> + #include "common/common_types.h" #include "core/frontend/emu_window.h" #include "shader_recompiler/stage.h" @@ -146,6 +148,10 @@ public: return has_sparse_texture_2; } + bool HasDrawTexture() const { + return has_draw_texture; + } + bool IsWarpSizePotentiallyLargerThanGuest() const { return warp_size_potentially_larger_than_guest; } @@ -216,6 +222,7 @@ private: bool has_shader_int64{}; bool has_amd_shader_half_float{}; bool has_sparse_texture_2{}; + bool has_draw_texture{}; bool warp_size_potentially_larger_than_guest{}; bool need_fastmath_off{}; bool has_cbuf_ftou_bug{}; diff --git a/src/video_core/renderer_opengl/gl_fsr.cpp b/src/video_core/renderer_opengl/gl_fsr.cpp new file mode 100644 index 000000000..77262dcf1 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_fsr.cpp @@ -0,0 +1,101 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/settings.h" +#include "video_core/fsr.h" +#include "video_core/renderer_opengl/gl_fsr.h" +#include "video_core/renderer_opengl/gl_shader_manager.h" +#include "video_core/renderer_opengl/gl_shader_util.h" + +namespace OpenGL { +using namespace FSR; + +using FsrConstants = std::array<u32, 4 * 4>; + +FSR::FSR(std::string_view fsr_vertex_source, std::string_view fsr_easu_source, + std::string_view fsr_rcas_source) + : fsr_vertex{CreateProgram(fsr_vertex_source, GL_VERTEX_SHADER)}, + fsr_easu_frag{CreateProgram(fsr_easu_source, GL_FRAGMENT_SHADER)}, + fsr_rcas_frag{CreateProgram(fsr_rcas_source, GL_FRAGMENT_SHADER)} { + glProgramUniform2f(fsr_vertex.handle, 0, 1.0f, 1.0f); + glProgramUniform2f(fsr_vertex.handle, 1, 0.0f, 0.0f); +} + +FSR::~FSR() = default; + +void FSR::Draw(ProgramManager& program_manager, const Common::Rectangle<u32>& screen, + u32 input_image_width, u32 input_image_height, + const Common::Rectangle<int>& crop_rect) { + + const auto output_image_width = screen.GetWidth(); + const auto output_image_height = screen.GetHeight(); + + if (fsr_intermediate_tex.handle) { + GLint fsr_tex_width, fsr_tex_height; + glGetTextureLevelParameteriv(fsr_intermediate_tex.handle, 0, GL_TEXTURE_WIDTH, + &fsr_tex_width); + glGetTextureLevelParameteriv(fsr_intermediate_tex.handle, 0, GL_TEXTURE_HEIGHT, + &fsr_tex_height); + if (static_cast<u32>(fsr_tex_width) != output_image_width || + static_cast<u32>(fsr_tex_height) != output_image_height) { + fsr_intermediate_tex.Release(); + } + } + if (!fsr_intermediate_tex.handle) { + fsr_intermediate_tex.Create(GL_TEXTURE_2D); + glTextureStorage2D(fsr_intermediate_tex.handle, 1, GL_RGB16F, output_image_width, + output_image_height); + glNamedFramebufferTexture(fsr_framebuffer.handle, GL_COLOR_ATTACHMENT0, + fsr_intermediate_tex.handle, 0); + } + + GLint old_draw_fb; + glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &old_draw_fb); + + glFrontFace(GL_CW); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, fsr_framebuffer.handle); + glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(output_image_width), + static_cast<GLfloat>(output_image_height)); + + FsrConstants constants; + FsrEasuConOffset( + constants.data() + 0, constants.data() + 4, constants.data() + 8, constants.data() + 12, + + static_cast<f32>(crop_rect.GetWidth()), static_cast<f32>(crop_rect.GetHeight()), + static_cast<f32>(input_image_width), static_cast<f32>(input_image_height), + static_cast<f32>(output_image_width), static_cast<f32>(output_image_height), + static_cast<f32>(crop_rect.left), static_cast<f32>(crop_rect.top)); + + glProgramUniform4uiv(fsr_easu_frag.handle, 0, sizeof(constants), std::data(constants)); + + program_manager.BindPresentPrograms(fsr_vertex.handle, fsr_easu_frag.handle); + glDrawArrays(GL_TRIANGLES, 0, 3); + + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, old_draw_fb); + glBindTextureUnit(0, fsr_intermediate_tex.handle); + + const float sharpening = + static_cast<float>(Settings::values.fsr_sharpening_slider.GetValue()) / 100.0f; + + FsrRcasCon(constants.data(), sharpening); + glProgramUniform4uiv(fsr_rcas_frag.handle, 0, sizeof(constants), std::data(constants)); +} + +void FSR::InitBuffers() { + fsr_framebuffer.Create(); +} + +void FSR::ReleaseBuffers() { + fsr_framebuffer.Release(); + fsr_intermediate_tex.Release(); +} + +const OGLProgram& FSR::GetPresentFragmentProgram() const noexcept { + return fsr_rcas_frag; +} + +bool FSR::AreBuffersInitialized() const noexcept { + return fsr_framebuffer.handle; +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_fsr.h b/src/video_core/renderer_opengl/gl_fsr.h new file mode 100644 index 000000000..1f6ae3115 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_fsr.h @@ -0,0 +1,43 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include <string_view> + +#include "common/common_types.h" +#include "common/math_util.h" +#include "video_core/fsr.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" + +namespace OpenGL { + +class ProgramManager; + +class FSR { +public: + explicit FSR(std::string_view fsr_vertex_source, std::string_view fsr_easu_source, + std::string_view fsr_rcas_source); + ~FSR(); + + void Draw(ProgramManager& program_manager, const Common::Rectangle<u32>& screen, + u32 input_image_width, u32 input_image_height, + const Common::Rectangle<int>& crop_rect); + + void InitBuffers(); + + void ReleaseBuffers(); + + [[nodiscard]] const OGLProgram& GetPresentFragmentProgram() const noexcept; + + [[nodiscard]] bool AreBuffersInitialized() const noexcept; + +private: + OGLFramebuffer fsr_framebuffer; + OGLProgram fsr_vertex; + OGLProgram fsr_easu_frag; + OGLProgram fsr_rcas_frag; + OGLTexture fsr_intermediate_tex; +}; + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index c115dabe1..29491e762 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -176,7 +176,7 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c std::array<std::string, 5> sources, std::array<std::vector<u32>, 5> sources_spirv, const std::array<const Shader::Info*, 5>& infos, - const GraphicsPipelineKey& key_) + const GraphicsPipelineKey& key_, bool force_context_flush) : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, program_manager{program_manager_}, state_tracker{state_tracker_}, key{key_} { if (shader_notify) { @@ -231,7 +231,8 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c const bool in_parallel = thread_worker != nullptr; const auto backend = device.GetShaderBackend(); auto func{[this, sources = std::move(sources), sources_spirv = std::move(sources_spirv), - shader_notify, backend, in_parallel](ShaderContext::Context*) mutable { + shader_notify, backend, in_parallel, + force_context_flush](ShaderContext::Context*) mutable { for (size_t stage = 0; stage < 5; ++stage) { switch (backend) { case Settings::ShaderBackend::GLSL: @@ -251,7 +252,7 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c break; } } - if (in_parallel) { + if (force_context_flush || in_parallel) { std::scoped_lock lock{built_mutex}; built_fence.Create(); // Flush this context to ensure compilation commands and fence are in the GPU pipe. diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index 1c06b3655..7bab3be0a 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -78,7 +78,7 @@ public: std::array<std::string, 5> sources, std::array<std::vector<u32>, 5> sources_spirv, const std::array<const Shader::Info*, 5>& infos, - const GraphicsPipelineKey& key_); + const GraphicsPipelineKey& key_, bool force_context_flush = false); void Configure(bool is_indexed) { configure_func(this, is_indexed); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 181857d9c..7bced675c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -64,7 +64,8 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra shader_cache(*this, emu_window_, device, texture_cache, buffer_cache, program_manager, state_tracker, gpu.ShaderNotify()), query_cache(*this), accelerate_dma(buffer_cache), - fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {} + fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache), + blit_image(program_manager_) {} RasterizerOpenGL::~RasterizerOpenGL() = default; @@ -320,6 +321,47 @@ void RasterizerOpenGL::DrawIndirect() { buffer_cache.SetDrawIndirect(nullptr); } +void RasterizerOpenGL::DrawTexture() { + MICROPROFILE_SCOPE(OpenGL_Drawing); + + SCOPE_EXIT({ gpu.TickWork(); }); + query_cache.UpdateCounters(); + + texture_cache.SynchronizeGraphicsDescriptors(); + texture_cache.UpdateRenderTargets(false); + + SyncState(); + + const auto& draw_texture_state = maxwell3d->draw_manager->GetDrawTextureState(); + const auto& sampler = texture_cache.GetGraphicsSampler(draw_texture_state.src_sampler); + const auto& texture = texture_cache.GetImageView(draw_texture_state.src_texture); + + if (device.HasDrawTexture()) { + state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); + + glDrawTextureNV(texture.DefaultHandle(), sampler->Handle(), draw_texture_state.dst_x0, + draw_texture_state.dst_y0, draw_texture_state.dst_x1, + draw_texture_state.dst_y1, 0, + draw_texture_state.src_x0 / static_cast<float>(texture.size.width), + draw_texture_state.src_y0 / static_cast<float>(texture.size.height), + draw_texture_state.src_x1 / static_cast<float>(texture.size.width), + draw_texture_state.src_y1 / static_cast<float>(texture.size.height)); + } else { + Region2D dst_region = {Offset2D{.x = static_cast<s32>(draw_texture_state.dst_x0), + .y = static_cast<s32>(draw_texture_state.dst_y0)}, + Offset2D{.x = static_cast<s32>(draw_texture_state.dst_x1), + .y = static_cast<s32>(draw_texture_state.dst_y1)}}; + Region2D src_region = {Offset2D{.x = static_cast<s32>(draw_texture_state.src_x0), + .y = static_cast<s32>(draw_texture_state.src_y0)}, + Offset2D{.x = static_cast<s32>(draw_texture_state.src_x1), + .y = static_cast<s32>(draw_texture_state.src_y1)}}; + blit_image.BlitColor(texture_cache.GetFramebuffer()->Handle(), texture.DefaultHandle(), + sampler->Handle(), dst_region, src_region, texture.size); + } + + ++num_queued_commands; +} + void RasterizerOpenGL::DispatchCompute() { gpu_memory->FlushCaching(); ComputePipeline* const pipeline{shader_cache.CurrentComputePipeline()}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index be4f76c18..0c45832ae 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -16,6 +16,7 @@ #include "video_core/engines/maxwell_dma.h" #include "video_core/rasterizer_accelerated.h" #include "video_core/rasterizer_interface.h" +#include "video_core/renderer_opengl/blit_image.h" #include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_fence_manager.h" @@ -70,6 +71,7 @@ public: void Draw(bool is_indexed, u32 instance_count) override; void DrawIndirect() override; + void DrawTexture() override; void Clear(u32 layer_count) override; void DispatchCompute() override; void ResetCounter(VideoCore::QueryType type) override; @@ -224,6 +226,8 @@ private: AccelerateDMA accelerate_dma; FenceManagerOpenGL fence_manager; + BlitImageHelper blit_image; + boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices; std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids; boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 7dd854e0f..626ea7dcb 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -286,7 +286,7 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, file.read(reinterpret_cast<char*>(&key), sizeof(key)); queue_work([this, key, env = std::move(env), &state, &callback](Context* ctx) mutable { ctx->pools.ReleaseContents(); - auto pipeline{CreateComputePipeline(ctx->pools, key, env)}; + auto pipeline{CreateComputePipeline(ctx->pools, key, env, true)}; std::scoped_lock lock{state.mutex}; if (pipeline) { compute_cache.emplace(key, std::move(pipeline)); @@ -307,7 +307,7 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, env_ptrs.push_back(&env); } ctx->pools.ReleaseContents(); - auto pipeline{CreateGraphicsPipeline(ctx->pools, key, MakeSpan(env_ptrs), false)}; + auto pipeline{CreateGraphicsPipeline(ctx->pools, key, MakeSpan(env_ptrs), false, true)}; std::scoped_lock lock{state.mutex}; if (pipeline) { graphics_cache.emplace(key, std::move(pipeline)); @@ -439,7 +439,8 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline() { std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline( ShaderContext::ShaderPools& pools, const GraphicsPipelineKey& key, - std::span<Shader::Environment* const> envs, bool build_in_parallel) try { + std::span<Shader::Environment* const> envs, bool use_shader_workers, + bool force_context_flush) try { LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); size_t env_index{}; u32 total_storage_buffers{}; @@ -531,10 +532,10 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline( } previous_program = &program; } - auto* const thread_worker{build_in_parallel ? workers.get() : nullptr}; + auto* const thread_worker{use_shader_workers ? workers.get() : nullptr}; return std::make_unique<GraphicsPipeline>(device, texture_cache, buffer_cache, program_manager, state_tracker, thread_worker, &shader_notify, sources, - sources_spirv, infos, key); + sources_spirv, infos, key, force_context_flush); } catch (Shader::Exception& exception) { LOG_ERROR(Render_OpenGL, "{}", exception.what()); @@ -559,8 +560,8 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline( } std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline( - ShaderContext::ShaderPools& pools, const ComputePipelineKey& key, - Shader::Environment& env) try { + ShaderContext::ShaderPools& pools, const ComputePipelineKey& key, Shader::Environment& env, + bool force_context_flush) try { LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; @@ -589,7 +590,7 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline( } return std::make_unique<ComputePipeline>(device, texture_cache, buffer_cache, program_manager, - program.info, code, code_spirv); + program.info, code, code_spirv, force_context_flush); } catch (Shader::Exception& exception) { LOG_ERROR(Render_OpenGL, "{}", exception.what()); return nullptr; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index f82420592..6b9732fca 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -50,14 +50,16 @@ private: std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline( ShaderContext::ShaderPools& pools, const GraphicsPipelineKey& key, - std::span<Shader::Environment* const> envs, bool build_in_parallel); + std::span<Shader::Environment* const> envs, bool use_shader_workers, + bool force_context_flush = false); std::unique_ptr<ComputePipeline> CreateComputePipeline(const ComputePipelineKey& key, const VideoCommon::ShaderInfo* shader); std::unique_ptr<ComputePipeline> CreateComputePipeline(ShaderContext::ShaderPools& pools, const ComputePipelineKey& key, - Shader::Environment& env); + Shader::Environment& env, + bool force_context_flush = false); std::unique_ptr<ShaderWorker> CreateWorkers() const; diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index d9c29d8b7..98841ae65 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -1,2 +1,123 @@ // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later + +#include <glad/glad.h> + +#include "video_core/renderer_opengl/gl_shader_manager.h" + +namespace OpenGL { + +static constexpr std::array ASSEMBLY_PROGRAM_ENUMS{ + GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV, + GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV, +}; + +ProgramManager::ProgramManager(const Device& device) { + glCreateProgramPipelines(1, &pipeline.handle); + if (device.UseAssemblyShaders()) { + glEnable(GL_COMPUTE_PROGRAM_NV); + } +} + +void ProgramManager::BindComputeProgram(GLuint program) { + glUseProgram(program); + is_compute_bound = true; +} + +void ProgramManager::BindComputeAssemblyProgram(GLuint program) { + if (current_assembly_compute_program != program) { + current_assembly_compute_program = program; + glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program); + } + UnbindPipeline(); +} + +void ProgramManager::BindSourcePrograms(std::span<const OGLProgram, NUM_STAGES> programs) { + static constexpr std::array<GLenum, 5> stage_enums{ + GL_VERTEX_SHADER_BIT, GL_TESS_CONTROL_SHADER_BIT, GL_TESS_EVALUATION_SHADER_BIT, + GL_GEOMETRY_SHADER_BIT, GL_FRAGMENT_SHADER_BIT, + }; + for (size_t stage = 0; stage < NUM_STAGES; ++stage) { + if (current_programs[stage] != programs[stage].handle) { + current_programs[stage] = programs[stage].handle; + glUseProgramStages(pipeline.handle, stage_enums[stage], programs[stage].handle); + } + } + BindPipeline(); +} + +void ProgramManager::BindPresentPrograms(GLuint vertex, GLuint fragment) { + if (current_programs[0] != vertex) { + current_programs[0] = vertex; + glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex); + } + if (current_programs[4] != fragment) { + current_programs[4] = fragment; + glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment); + } + glUseProgramStages( + pipeline.handle, + GL_TESS_CONTROL_SHADER_BIT | GL_TESS_EVALUATION_SHADER_BIT | GL_GEOMETRY_SHADER_BIT, 0); + current_programs[1] = 0; + current_programs[2] = 0; + current_programs[3] = 0; + + if (current_stage_mask != 0) { + current_stage_mask = 0; + for (const GLenum program_type : ASSEMBLY_PROGRAM_ENUMS) { + glDisable(program_type); + } + } + BindPipeline(); +} + +void ProgramManager::BindAssemblyPrograms(std::span<const OGLAssemblyProgram, NUM_STAGES> programs, + u32 stage_mask) { + const u32 changed_mask = current_stage_mask ^ stage_mask; + current_stage_mask = stage_mask; + + if (changed_mask != 0) { + for (size_t stage = 0; stage < NUM_STAGES; ++stage) { + if (((changed_mask >> stage) & 1) != 0) { + if (((stage_mask >> stage) & 1) != 0) { + glEnable(ASSEMBLY_PROGRAM_ENUMS[stage]); + } else { + glDisable(ASSEMBLY_PROGRAM_ENUMS[stage]); + } + } + } + } + for (size_t stage = 0; stage < NUM_STAGES; ++stage) { + if (current_programs[stage] != programs[stage].handle) { + current_programs[stage] = programs[stage].handle; + glBindProgramARB(ASSEMBLY_PROGRAM_ENUMS[stage], programs[stage].handle); + } + } + UnbindPipeline(); +} + +void ProgramManager::RestoreGuestCompute() {} + +void ProgramManager::BindPipeline() { + if (!is_pipeline_bound) { + is_pipeline_bound = true; + glBindProgramPipeline(pipeline.handle); + } + UnbindCompute(); +} + +void ProgramManager::UnbindPipeline() { + if (is_pipeline_bound) { + is_pipeline_bound = false; + glBindProgramPipeline(0); + } + UnbindCompute(); +} + +void ProgramManager::UnbindCompute() { + if (is_compute_bound) { + is_compute_bound = false; + glUseProgram(0); + } +} +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index a84f5aeb3..07ffab77f 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -6,8 +6,6 @@ #include <array> #include <span> -#include <glad/glad.h> - #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_resource_manager.h" @@ -16,121 +14,28 @@ namespace OpenGL { class ProgramManager { static constexpr size_t NUM_STAGES = 5; - static constexpr std::array ASSEMBLY_PROGRAM_ENUMS{ - GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV, - GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV, - }; - public: - explicit ProgramManager(const Device& device) { - glCreateProgramPipelines(1, &pipeline.handle); - if (device.UseAssemblyShaders()) { - glEnable(GL_COMPUTE_PROGRAM_NV); - } - } - - void BindComputeProgram(GLuint program) { - glUseProgram(program); - is_compute_bound = true; - } - - void BindComputeAssemblyProgram(GLuint program) { - if (current_assembly_compute_program != program) { - current_assembly_compute_program = program; - glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program); - } - UnbindPipeline(); - } - - void BindSourcePrograms(std::span<const OGLProgram, NUM_STAGES> programs) { - static constexpr std::array<GLenum, 5> stage_enums{ - GL_VERTEX_SHADER_BIT, GL_TESS_CONTROL_SHADER_BIT, GL_TESS_EVALUATION_SHADER_BIT, - GL_GEOMETRY_SHADER_BIT, GL_FRAGMENT_SHADER_BIT, - }; - for (size_t stage = 0; stage < NUM_STAGES; ++stage) { - if (current_programs[stage] != programs[stage].handle) { - current_programs[stage] = programs[stage].handle; - glUseProgramStages(pipeline.handle, stage_enums[stage], programs[stage].handle); - } - } - BindPipeline(); - } - - void BindPresentPrograms(GLuint vertex, GLuint fragment) { - if (current_programs[0] != vertex) { - current_programs[0] = vertex; - glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex); - } - if (current_programs[4] != fragment) { - current_programs[4] = fragment; - glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment); - } - glUseProgramStages( - pipeline.handle, - GL_TESS_CONTROL_SHADER_BIT | GL_TESS_EVALUATION_SHADER_BIT | GL_GEOMETRY_SHADER_BIT, 0); - current_programs[1] = 0; - current_programs[2] = 0; - current_programs[3] = 0; - - if (current_stage_mask != 0) { - current_stage_mask = 0; - for (const GLenum program_type : ASSEMBLY_PROGRAM_ENUMS) { - glDisable(program_type); - } - } - BindPipeline(); - } + explicit ProgramManager(const Device& device); + + void BindComputeProgram(GLuint program); + + void BindComputeAssemblyProgram(GLuint program); + + void BindSourcePrograms(std::span<const OGLProgram, NUM_STAGES> programs); + + void BindPresentPrograms(GLuint vertex, GLuint fragment); void BindAssemblyPrograms(std::span<const OGLAssemblyProgram, NUM_STAGES> programs, - u32 stage_mask) { - const u32 changed_mask = current_stage_mask ^ stage_mask; - current_stage_mask = stage_mask; - - if (changed_mask != 0) { - for (size_t stage = 0; stage < NUM_STAGES; ++stage) { - if (((changed_mask >> stage) & 1) != 0) { - if (((stage_mask >> stage) & 1) != 0) { - glEnable(ASSEMBLY_PROGRAM_ENUMS[stage]); - } else { - glDisable(ASSEMBLY_PROGRAM_ENUMS[stage]); - } - } - } - } - for (size_t stage = 0; stage < NUM_STAGES; ++stage) { - if (current_programs[stage] != programs[stage].handle) { - current_programs[stage] = programs[stage].handle; - glBindProgramARB(ASSEMBLY_PROGRAM_ENUMS[stage], programs[stage].handle); - } - } - UnbindPipeline(); - } - - void RestoreGuestCompute() {} + u32 stage_mask); + + void RestoreGuestCompute(); private: - void BindPipeline() { - if (!is_pipeline_bound) { - is_pipeline_bound = true; - glBindProgramPipeline(pipeline.handle); - } - UnbindCompute(); - } - - void UnbindPipeline() { - if (is_pipeline_bound) { - is_pipeline_bound = false; - glBindProgramPipeline(0); - } - UnbindCompute(); - } - - void UnbindCompute() { - if (is_compute_bound) { - is_compute_bound = false; - glUseProgram(0); - } - } + void BindPipeline(); + + void UnbindPipeline(); + + void UnbindCompute(); OGLPipeline pipeline; bool is_pipeline_bound{}; diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index de95f2634..2a74c1d05 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -17,8 +17,14 @@ #include "core/frontend/emu_window.h" #include "core/memory.h" #include "core/telemetry_session.h" +#include "video_core/host_shaders/ffx_a_h.h" +#include "video_core/host_shaders/ffx_fsr1_h.h" +#include "video_core/host_shaders/full_screen_triangle_vert.h" #include "video_core/host_shaders/fxaa_frag.h" #include "video_core/host_shaders/fxaa_vert.h" +#include "video_core/host_shaders/opengl_fidelityfx_fsr_easu_frag.h" +#include "video_core/host_shaders/opengl_fidelityfx_fsr_frag.h" +#include "video_core/host_shaders/opengl_fidelityfx_fsr_rcas_frag.h" #include "video_core/host_shaders/opengl_present_frag.h" #include "video_core/host_shaders/opengl_present_scaleforce_frag.h" #include "video_core/host_shaders/opengl_present_vert.h" @@ -31,6 +37,7 @@ #include "video_core/host_shaders/smaa_edge_detection_vert.h" #include "video_core/host_shaders/smaa_neighborhood_blending_frag.h" #include "video_core/host_shaders/smaa_neighborhood_blending_vert.h" +#include "video_core/renderer_opengl/gl_fsr.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_shader_util.h" @@ -268,12 +275,17 @@ void RendererOpenGL::InitOpenGLObjects() { fxaa_vertex = CreateProgram(HostShaders::FXAA_VERT, GL_VERTEX_SHADER); fxaa_fragment = CreateProgram(HostShaders::FXAA_FRAG, GL_FRAGMENT_SHADER); - const auto SmaaShader = [](std::string_view specialized_source, GLenum stage) { - std::string shader_source{specialized_source}; - constexpr std::string_view include_string = "#include \"opengl_smaa.glsl\""; + const auto replace_include = [](std::string& shader_source, std::string_view include_name, + std::string_view include_content) { + const std::string include_string = fmt::format("#include \"{}\"", include_name); const std::size_t pos = shader_source.find(include_string); ASSERT(pos != std::string::npos); - shader_source.replace(pos, include_string.size(), HostShaders::OPENGL_SMAA_GLSL); + shader_source.replace(pos, include_string.size(), include_content); + }; + + const auto SmaaShader = [&](std::string_view specialized_source, GLenum stage) { + std::string shader_source{specialized_source}; + replace_include(shader_source, "opengl_smaa.glsl", HostShaders::OPENGL_SMAA_GLSL); return CreateProgram(shader_source, stage); }; @@ -298,14 +310,32 @@ void RendererOpenGL::InitOpenGLObjects() { CreateProgram(fmt::format("#version 460\n{}", HostShaders::OPENGL_PRESENT_SCALEFORCE_FRAG), GL_FRAGMENT_SHADER); + std::string fsr_source{HostShaders::OPENGL_FIDELITYFX_FSR_FRAG}; + replace_include(fsr_source, "ffx_a.h", HostShaders::FFX_A_H); + replace_include(fsr_source, "ffx_fsr1.h", HostShaders::FFX_FSR1_H); + + std::string fsr_easu_frag_source{HostShaders::OPENGL_FIDELITYFX_FSR_EASU_FRAG}; + std::string fsr_rcas_frag_source{HostShaders::OPENGL_FIDELITYFX_FSR_RCAS_FRAG}; + replace_include(fsr_easu_frag_source, "opengl_fidelityfx_fsr.frag", fsr_source); + replace_include(fsr_rcas_frag_source, "opengl_fidelityfx_fsr.frag", fsr_source); + + fsr = std::make_unique<FSR>(HostShaders::FULL_SCREEN_TRIANGLE_VERT, fsr_easu_frag_source, + fsr_rcas_frag_source); + // Generate presentation sampler present_sampler.Create(); glSamplerParameteri(present_sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR); glSamplerParameteri(present_sampler.handle, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glSamplerParameteri(present_sampler.handle, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glSamplerParameteri(present_sampler.handle, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glSamplerParameteri(present_sampler.handle, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE); present_sampler_nn.Create(); glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE); // Generate VBO handle for drawing vertex_buffer.Create(); @@ -525,6 +555,31 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { glBindTextureUnit(0, aa_texture.handle); } + glDisablei(GL_SCISSOR_TEST, 0); + + if (Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::Fsr) { + if (!fsr->AreBuffersInitialized()) { + fsr->InitBuffers(); + } + + auto crop_rect = framebuffer_crop_rect; + if (crop_rect.GetWidth() == 0) { + crop_rect.right = framebuffer_width; + } + if (crop_rect.GetHeight() == 0) { + crop_rect.bottom = framebuffer_height; + } + crop_rect = crop_rect.Scale(Settings::values.resolution_info.up_factor); + const auto fsr_input_width = Settings::values.resolution_info.ScaleUp(framebuffer_width); + const auto fsr_input_height = Settings::values.resolution_info.ScaleUp(framebuffer_height); + glBindSampler(0, present_sampler.handle); + fsr->Draw(program_manager, layout.screen, fsr_input_width, fsr_input_height, crop_rect); + } else { + if (fsr->AreBuffersInitialized()) { + fsr->ReleaseBuffers(); + } + } + const std::array ortho_matrix = MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height)); @@ -540,10 +595,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { case Settings::ScalingFilter::ScaleForce: return present_scaleforce_fragment.handle; case Settings::ScalingFilter::Fsr: - LOG_WARNING( - Render_OpenGL, - "FidelityFX Super Resolution is not supported in OpenGL, changing to ScaleForce"); - return present_scaleforce_fragment.handle; + return fsr->GetPresentFragmentProgram().handle; default: return present_bilinear_fragment.handle; } @@ -578,15 +630,18 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { f32 scale_u = static_cast<f32>(framebuffer_width) / static_cast<f32>(screen_info.texture.width); f32 scale_v = static_cast<f32>(framebuffer_height) / static_cast<f32>(screen_info.texture.height); - // Scale the output by the crop width/height. This is commonly used with 1280x720 rendering - // (e.g. handheld mode) on a 1920x1080 framebuffer. - if (framebuffer_crop_rect.GetWidth() > 0) { - scale_u = static_cast<f32>(framebuffer_crop_rect.GetWidth()) / - static_cast<f32>(screen_info.texture.width); - } - if (framebuffer_crop_rect.GetHeight() > 0) { - scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) / - static_cast<f32>(screen_info.texture.height); + + if (Settings::values.scaling_filter.GetValue() != Settings::ScalingFilter::Fsr) { + // Scale the output by the crop width/height. This is commonly used with 1280x720 rendering + // (e.g. handheld mode) on a 1920x1080 framebuffer. + if (framebuffer_crop_rect.GetWidth() > 0) { + scale_u = static_cast<f32>(framebuffer_crop_rect.GetWidth()) / + static_cast<f32>(screen_info.texture.width); + } + if (framebuffer_crop_rect.GetHeight() > 0) { + scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) / + static_cast<f32>(screen_info.texture.height); + } } if (Settings::values.anti_aliasing.GetValue() == Settings::AntiAliasing::Fxaa && !screen_info.was_accelerated) { @@ -612,7 +667,6 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { } else { glDisable(GL_FRAMEBUFFER_SRGB); } - glDisablei(GL_SCISSOR_TEST, 0); glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(layout.width), static_cast<GLfloat>(layout.height)); diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index cc97d7b26..f1d5fd954 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -10,6 +10,7 @@ #include "video_core/renderer_base.h" #include "video_core/renderer_opengl/gl_device.h" +#include "video_core/renderer_opengl/gl_fsr.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_manager.h" @@ -141,6 +142,8 @@ private: OGLTexture smaa_edges_tex; OGLTexture smaa_blend_tex; + std::unique_ptr<FSR> fsr; + /// OpenGL framebuffer data std::vector<u8> gl_framebuffer_data; diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index 3f2b139e0..cf2964a3f 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -4,14 +4,16 @@ #include <algorithm> #include "common/settings.h" +#include "video_core/host_shaders/blit_color_float_frag_spv.h" #include "video_core/host_shaders/convert_abgr8_to_d24s8_frag_spv.h" #include "video_core/host_shaders/convert_d24s8_to_abgr8_frag_spv.h" #include "video_core/host_shaders/convert_depth_to_float_frag_spv.h" #include "video_core/host_shaders/convert_float_to_depth_frag_spv.h" #include "video_core/host_shaders/convert_s8d24_to_abgr8_frag_spv.h" #include "video_core/host_shaders/full_screen_triangle_vert_spv.h" -#include "video_core/host_shaders/vulkan_blit_color_float_frag_spv.h" #include "video_core/host_shaders/vulkan_blit_depth_stencil_frag_spv.h" +#include "video_core/host_shaders/vulkan_color_clear_frag_spv.h" +#include "video_core/host_shaders/vulkan_color_clear_vert_spv.h" #include "video_core/renderer_vulkan/blit_image.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -69,10 +71,11 @@ constexpr VkDescriptorSetLayoutCreateInfo TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CRE .bindingCount = static_cast<u32>(TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_BINDINGS.size()), .pBindings = TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_BINDINGS.data(), }; -constexpr VkPushConstantRange PUSH_CONSTANT_RANGE{ - .stageFlags = VK_SHADER_STAGE_VERTEX_BIT, +template <VkShaderStageFlags stageFlags, size_t size> +inline constexpr VkPushConstantRange PUSH_CONSTANT_RANGE{ + .stageFlags = stageFlags, .offset = 0, - .size = sizeof(PushConstants), + .size = static_cast<u32>(size), }; constexpr VkPipelineVertexInputStateCreateInfo PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO{ .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, @@ -125,10 +128,8 @@ constexpr VkPipelineMultisampleStateCreateInfo PIPELINE_MULTISAMPLE_STATE_CREATE .alphaToCoverageEnable = VK_FALSE, .alphaToOneEnable = VK_FALSE, }; -constexpr std::array DYNAMIC_STATES{ - VK_DYNAMIC_STATE_VIEWPORT, - VK_DYNAMIC_STATE_SCISSOR, -}; +constexpr std::array DYNAMIC_STATES{VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, + VK_DYNAMIC_STATE_BLEND_CONSTANTS}; constexpr VkPipelineDynamicStateCreateInfo PIPELINE_DYNAMIC_STATE_CREATE_INFO{ .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, .pNext = nullptr, @@ -205,15 +206,15 @@ inline constexpr VkSamplerCreateInfo SAMPLER_CREATE_INFO{ }; constexpr VkPipelineLayoutCreateInfo PipelineLayoutCreateInfo( - const VkDescriptorSetLayout* set_layout) { + const VkDescriptorSetLayout* set_layout, vk::Span<VkPushConstantRange> push_constants) { return VkPipelineLayoutCreateInfo{ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, .pNext = nullptr, .flags = 0, - .setLayoutCount = 1, + .setLayoutCount = (set_layout != nullptr ? 1u : 0u), .pSetLayouts = set_layout, - .pushConstantRangeCount = 1, - .pPushConstantRanges = &PUSH_CONSTANT_RANGE, + .pushConstantRangeCount = push_constants.size(), + .pPushConstantRanges = push_constants.data(), }; } @@ -302,8 +303,7 @@ void UpdateTwoTexturesDescriptorSet(const Device& device, VkDescriptorSet descri device.GetLogical().UpdateDescriptorSets(write_descriptor_sets, nullptr); } -void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Region2D& dst_region, - const Region2D& src_region) { +void BindBlitState(vk::CommandBuffer cmdbuf, const Region2D& dst_region) { const VkOffset2D offset{ .x = std::min(dst_region.start.x, dst_region.end.x), .y = std::min(dst_region.start.y, dst_region.end.y), @@ -325,15 +325,23 @@ void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Regi .offset = offset, .extent = extent, }; - const float scale_x = static_cast<float>(src_region.end.x - src_region.start.x); - const float scale_y = static_cast<float>(src_region.end.y - src_region.start.y); + cmdbuf.SetViewport(0, viewport); + cmdbuf.SetScissor(0, scissor); +} + +void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Region2D& dst_region, + const Region2D& src_region, const Extent3D& src_size = {1, 1, 1}) { + BindBlitState(cmdbuf, dst_region); + const float scale_x = static_cast<float>(src_region.end.x - src_region.start.x) / + static_cast<float>(src_size.width); + const float scale_y = static_cast<float>(src_region.end.y - src_region.start.y) / + static_cast<float>(src_size.height); const PushConstants push_constants{ .tex_scale = {scale_x, scale_y}, - .tex_offset = {static_cast<float>(src_region.start.x), - static_cast<float>(src_region.start.y)}, + .tex_offset = {static_cast<float>(src_region.start.x) / static_cast<float>(src_size.width), + static_cast<float>(src_region.start.y) / + static_cast<float>(src_size.height)}, }; - cmdbuf.SetViewport(0, viewport); - cmdbuf.SetScissor(0, scissor); cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants); } @@ -347,6 +355,51 @@ VkExtent2D GetConversionExtent(const ImageView& src_image_view) { .height = is_rescaled ? resolution.ScaleUp(height) : height, }; } + +void TransitionImageLayout(vk::CommandBuffer& cmdbuf, VkImage image, VkImageLayout target_layout, + VkImageLayout source_layout = VK_IMAGE_LAYOUT_GENERAL) { + constexpr VkFlags flags{VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT}; + const VkImageMemoryBarrier barrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = flags, + .dstAccessMask = flags, + .oldLayout = source_layout, + .newLayout = target_layout, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange{ + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }; + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + 0, barrier); +} + +void BeginRenderPass(vk::CommandBuffer& cmdbuf, const Framebuffer* framebuffer) { + const VkRenderPass render_pass = framebuffer->RenderPass(); + const VkFramebuffer framebuffer_handle = framebuffer->Handle(); + const VkExtent2D render_area = framebuffer->RenderArea(); + const VkRenderPassBeginInfo renderpass_bi{ + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .pNext = nullptr, + .renderPass = render_pass, + .framebuffer = framebuffer_handle, + .renderArea{ + .offset{}, + .extent = render_area, + }, + .clearValueCount = 0, + .pClearValues = nullptr, + }; + cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); +} } // Anonymous namespace BlitImageHelper::BlitImageHelper(const Device& device_, Scheduler& scheduler_, @@ -360,13 +413,20 @@ BlitImageHelper::BlitImageHelper(const Device& device_, Scheduler& scheduler_, descriptor_pool.Allocator(*one_texture_set_layout, TEXTURE_DESCRIPTOR_BANK_INFO<1>)}, two_textures_descriptor_allocator{ descriptor_pool.Allocator(*two_textures_set_layout, TEXTURE_DESCRIPTOR_BANK_INFO<2>)}, - one_texture_pipeline_layout(device.GetLogical().CreatePipelineLayout( - PipelineLayoutCreateInfo(one_texture_set_layout.address()))), - two_textures_pipeline_layout(device.GetLogical().CreatePipelineLayout( - PipelineLayoutCreateInfo(two_textures_set_layout.address()))), + one_texture_pipeline_layout(device.GetLogical().CreatePipelineLayout(PipelineLayoutCreateInfo( + one_texture_set_layout.address(), + PUSH_CONSTANT_RANGE<VK_SHADER_STAGE_VERTEX_BIT, sizeof(PushConstants)>))), + two_textures_pipeline_layout( + device.GetLogical().CreatePipelineLayout(PipelineLayoutCreateInfo( + two_textures_set_layout.address(), + PUSH_CONSTANT_RANGE<VK_SHADER_STAGE_VERTEX_BIT, sizeof(PushConstants)>))), + clear_color_pipeline_layout(device.GetLogical().CreatePipelineLayout(PipelineLayoutCreateInfo( + nullptr, PUSH_CONSTANT_RANGE<VK_SHADER_STAGE_FRAGMENT_BIT, sizeof(float) * 4>))), full_screen_vert(BuildShader(device, FULL_SCREEN_TRIANGLE_VERT_SPV)), - blit_color_to_color_frag(BuildShader(device, VULKAN_BLIT_COLOR_FLOAT_FRAG_SPV)), + blit_color_to_color_frag(BuildShader(device, BLIT_COLOR_FLOAT_FRAG_SPV)), blit_depth_stencil_frag(BuildShader(device, VULKAN_BLIT_DEPTH_STENCIL_FRAG_SPV)), + clear_color_vert(BuildShader(device, VULKAN_COLOR_CLEAR_VERT_SPV)), + clear_color_frag(BuildShader(device, VULKAN_COLOR_CLEAR_FRAG_SPV)), convert_depth_to_float_frag(BuildShader(device, CONVERT_DEPTH_TO_FLOAT_FRAG_SPV)), convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)), convert_abgr8_to_d24s8_frag(BuildShader(device, CONVERT_ABGR8_TO_D24S8_FRAG_SPV)), @@ -404,6 +464,32 @@ void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, VkImageView scheduler.InvalidateState(); } +void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, VkImageView src_image_view, + VkImage src_image, VkSampler src_sampler, + const Region2D& dst_region, const Region2D& src_region, + const Extent3D& src_size) { + const BlitImagePipelineKey key{ + .renderpass = dst_framebuffer->RenderPass(), + .operation = Tegra::Engines::Fermi2D::Operation::SrcCopy, + }; + const VkPipelineLayout layout = *one_texture_pipeline_layout; + const VkPipeline pipeline = FindOrEmplaceColorPipeline(key); + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([this, dst_framebuffer, src_image_view, src_image, src_sampler, dst_region, + src_region, src_size, pipeline, layout](vk::CommandBuffer cmdbuf) { + TransitionImageLayout(cmdbuf, src_image, VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL); + BeginRenderPass(cmdbuf, dst_framebuffer); + const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); + UpdateOneTextureDescriptorSet(device, descriptor_set, src_sampler, src_image_view); + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, + nullptr); + BindBlitState(cmdbuf, layout, dst_region, src_region, src_size); + cmdbuf.Draw(3, 1, 0, 0); + cmdbuf.EndRenderPass(); + }); +} + void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer, VkImageView src_depth_view, VkImageView src_stencil_view, const Region2D& dst_region, const Region2D& src_region, @@ -479,6 +565,30 @@ void BlitImageHelper::ConvertS8D24ToABGR8(const Framebuffer* dst_framebuffer, ConvertDepthStencil(*convert_s8d24_to_abgr8_pipeline, dst_framebuffer, src_image_view); } +void BlitImageHelper::ClearColor(const Framebuffer* dst_framebuffer, u8 color_mask, + const std::array<f32, 4>& clear_color, + const Region2D& dst_region) { + const BlitImagePipelineKey key{ + .renderpass = dst_framebuffer->RenderPass(), + .operation = Tegra::Engines::Fermi2D::Operation::BlendPremult, + }; + const VkPipeline pipeline = FindOrEmplaceClearColorPipeline(key); + const VkPipelineLayout layout = *clear_color_pipeline_layout; + scheduler.RequestRenderpass(dst_framebuffer); + scheduler.Record( + [pipeline, layout, color_mask, clear_color, dst_region](vk::CommandBuffer cmdbuf) { + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + const std::array blend_color = { + (color_mask & 0x1) ? 1.0f : 0.0f, (color_mask & 0x2) ? 1.0f : 0.0f, + (color_mask & 0x4) ? 1.0f : 0.0f, (color_mask & 0x8) ? 1.0f : 0.0f}; + cmdbuf.SetBlendConstants(blend_color.data()); + BindBlitState(cmdbuf, dst_region); + cmdbuf.PushConstants(layout, VK_SHADER_STAGE_FRAGMENT_BIT, clear_color); + cmdbuf.Draw(3, 1, 0, 0); + }); + scheduler.InvalidateState(); +} + void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, const ImageView& src_image_view) { const VkPipelineLayout layout = *one_texture_pipeline_layout; @@ -654,6 +764,58 @@ VkPipeline BlitImageHelper::FindOrEmplaceDepthStencilPipeline(const BlitImagePip return *blit_depth_stencil_pipelines.back(); } +VkPipeline BlitImageHelper::FindOrEmplaceClearColorPipeline(const BlitImagePipelineKey& key) { + const auto it = std::ranges::find(clear_color_keys, key); + if (it != clear_color_keys.end()) { + return *clear_color_pipelines[std::distance(clear_color_keys.begin(), it)]; + } + clear_color_keys.push_back(key); + const std::array stages = MakeStages(*clear_color_vert, *clear_color_frag); + const VkPipelineColorBlendAttachmentState color_blend_attachment_state{ + .blendEnable = VK_TRUE, + .srcColorBlendFactor = VK_BLEND_FACTOR_CONSTANT_COLOR, + .dstColorBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR, + .colorBlendOp = VK_BLEND_OP_ADD, + .srcAlphaBlendFactor = VK_BLEND_FACTOR_CONSTANT_ALPHA, + .dstAlphaBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA, + .alphaBlendOp = VK_BLEND_OP_ADD, + .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT, + }; + const VkPipelineColorBlendStateCreateInfo color_blend_state_generic_create_info{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .logicOpEnable = VK_FALSE, + .logicOp = VK_LOGIC_OP_CLEAR, + .attachmentCount = 1, + .pAttachments = &color_blend_attachment_state, + .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f}, + }; + clear_color_pipelines.push_back(device.GetLogical().CreateGraphicsPipeline({ + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stageCount = static_cast<u32>(stages.size()), + .pStages = stages.data(), + .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .pTessellationState = nullptr, + .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, + .pColorBlendState = &color_blend_state_generic_create_info, + .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .layout = *clear_color_pipeline_layout, + .renderPass = key.renderpass, + .subpass = 0, + .basePipelineHandle = VK_NULL_HANDLE, + .basePipelineIndex = 0, + })); + return *clear_color_pipelines.back(); +} + void BlitImageHelper::ConvertPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass, bool is_target_depth) { if (pipeline) { diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h index 5df679fb4..2976a7d91 100644 --- a/src/video_core/renderer_vulkan/blit_image.h +++ b/src/video_core/renderer_vulkan/blit_image.h @@ -10,6 +10,8 @@ namespace Vulkan { +using VideoCommon::Extent3D; +using VideoCommon::Offset2D; using VideoCommon::Region2D; class Device; @@ -36,6 +38,10 @@ public: Tegra::Engines::Fermi2D::Filter filter, Tegra::Engines::Fermi2D::Operation operation); + void BlitColor(const Framebuffer* dst_framebuffer, VkImageView src_image_view, + VkImage src_image, VkSampler src_sampler, const Region2D& dst_region, + const Region2D& src_region, const Extent3D& src_size); + void BlitDepthStencil(const Framebuffer* dst_framebuffer, VkImageView src_depth_view, VkImageView src_stencil_view, const Region2D& dst_region, const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter, @@ -55,6 +61,9 @@ public: void ConvertS8D24ToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view); + void ClearColor(const Framebuffer* dst_framebuffer, u8 color_mask, + const std::array<f32, 4>& clear_color, const Region2D& dst_region); + private: void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, const ImageView& src_image_view); @@ -66,6 +75,8 @@ private: [[nodiscard]] VkPipeline FindOrEmplaceDepthStencilPipeline(const BlitImagePipelineKey& key); + [[nodiscard]] VkPipeline FindOrEmplaceClearColorPipeline(const BlitImagePipelineKey& key); + void ConvertPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass, bool is_target_depth); void ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass); @@ -91,9 +102,12 @@ private: DescriptorAllocator two_textures_descriptor_allocator; vk::PipelineLayout one_texture_pipeline_layout; vk::PipelineLayout two_textures_pipeline_layout; + vk::PipelineLayout clear_color_pipeline_layout; vk::ShaderModule full_screen_vert; vk::ShaderModule blit_color_to_color_frag; vk::ShaderModule blit_depth_stencil_frag; + vk::ShaderModule clear_color_vert; + vk::ShaderModule clear_color_frag; vk::ShaderModule convert_depth_to_float_frag; vk::ShaderModule convert_float_to_depth_frag; vk::ShaderModule convert_abgr8_to_d24s8_frag; @@ -106,6 +120,8 @@ private: std::vector<vk::Pipeline> blit_color_pipelines; std::vector<BlitImagePipelineKey> blit_depth_stencil_keys; std::vector<vk::Pipeline> blit_depth_stencil_pipelines; + std::vector<BlitImagePipelineKey> clear_color_keys; + std::vector<vk::Pipeline> clear_color_pipelines; vk::Pipeline convert_d32_to_r32_pipeline; vk::Pipeline convert_r32_to_d32_pipeline; vk::Pipeline convert_d16_to_r16_pipeline; diff --git a/src/video_core/renderer_vulkan/vk_fsr.cpp b/src/video_core/renderer_vulkan/vk_fsr.cpp index 33daa8c1c..df972cd54 100644 --- a/src/video_core/renderer_vulkan/vk_fsr.cpp +++ b/src/video_core/renderer_vulkan/vk_fsr.cpp @@ -1,12 +1,11 @@ // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#include <cmath> -#include "common/bit_cast.h" #include "common/common_types.h" #include "common/div_ceil.h" #include "common/settings.h" +#include "video_core/fsr.h" #include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16_comp_spv.h" #include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32_comp_spv.h" #include "video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16_comp_spv.h" @@ -17,146 +16,7 @@ #include "video_core/vulkan_common/vulkan_device.h" namespace Vulkan { -namespace { -// Reimplementations of the constant generating functions in ffx_fsr1.h -// GCC generated a lot of warnings when using the official header. -u32 AU1_AH1_AF1(f32 f) { - static constexpr u32 base[512]{ - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, - 0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x0c00, 0x1000, 0x1400, 0x1800, 0x1c00, 0x2000, - 0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00, 0x4000, 0x4400, 0x4800, 0x4c00, - 0x5000, 0x5400, 0x5800, 0x5c00, 0x6000, 0x6400, 0x6800, 0x6c00, 0x7000, 0x7400, 0x7800, - 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, - 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, - 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, - 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, - 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, - 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, - 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, - 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, - 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, - 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, - 0x7bff, 0x7bff, 0x7bff, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001, 0x8002, 0x8004, 0x8008, - 0x8010, 0x8020, 0x8040, 0x8080, 0x8100, 0x8200, 0x8400, 0x8800, 0x8c00, 0x9000, 0x9400, - 0x9800, 0x9c00, 0xa000, 0xa400, 0xa800, 0xac00, 0xb000, 0xb400, 0xb800, 0xbc00, 0xc000, - 0xc400, 0xc800, 0xcc00, 0xd000, 0xd400, 0xd800, 0xdc00, 0xe000, 0xe400, 0xe800, 0xec00, - 0xf000, 0xf400, 0xf800, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, - 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, - 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, - 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, - 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, - 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, - 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, - 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, - 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, - 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, - 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, - }; - static constexpr s8 shift[512]{ - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17, 0x16, - 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, - 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, - 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17, - 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, - 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, - 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, - }; - const u32 u = Common::BitCast<u32>(f); - const u32 i = u >> 23; - return base[i] + ((u & 0x7fffff) >> shift[i]); -} - -u32 AU1_AH2_AF2(f32 a[2]) { - return AU1_AH1_AF1(a[0]) + (AU1_AH1_AF1(a[1]) << 16); -} - -void FsrEasuCon(u32 con0[4], u32 con1[4], u32 con2[4], u32 con3[4], f32 inputViewportInPixelsX, - f32 inputViewportInPixelsY, f32 inputSizeInPixelsX, f32 inputSizeInPixelsY, - f32 outputSizeInPixelsX, f32 outputSizeInPixelsY) { - con0[0] = Common::BitCast<u32>(inputViewportInPixelsX / outputSizeInPixelsX); - con0[1] = Common::BitCast<u32>(inputViewportInPixelsY / outputSizeInPixelsY); - con0[2] = Common::BitCast<u32>(0.5f * inputViewportInPixelsX / outputSizeInPixelsX - 0.5f); - con0[3] = Common::BitCast<u32>(0.5f * inputViewportInPixelsY / outputSizeInPixelsY - 0.5f); - con1[0] = Common::BitCast<u32>(1.0f / inputSizeInPixelsX); - con1[1] = Common::BitCast<u32>(1.0f / inputSizeInPixelsY); - con1[2] = Common::BitCast<u32>(1.0f / inputSizeInPixelsX); - con1[3] = Common::BitCast<u32>(-1.0f / inputSizeInPixelsY); - con2[0] = Common::BitCast<u32>(-1.0f / inputSizeInPixelsX); - con2[1] = Common::BitCast<u32>(2.0f / inputSizeInPixelsY); - con2[2] = Common::BitCast<u32>(1.0f / inputSizeInPixelsX); - con2[3] = Common::BitCast<u32>(2.0f / inputSizeInPixelsY); - con3[0] = Common::BitCast<u32>(0.0f / inputSizeInPixelsX); - con3[1] = Common::BitCast<u32>(4.0f / inputSizeInPixelsY); - con3[2] = con3[3] = 0; -} - -void FsrEasuConOffset(u32 con0[4], u32 con1[4], u32 con2[4], u32 con3[4], - f32 inputViewportInPixelsX, f32 inputViewportInPixelsY, - f32 inputSizeInPixelsX, f32 inputSizeInPixelsY, f32 outputSizeInPixelsX, - f32 outputSizeInPixelsY, f32 inputOffsetInPixelsX, f32 inputOffsetInPixelsY) { - FsrEasuCon(con0, con1, con2, con3, inputViewportInPixelsX, inputViewportInPixelsY, - inputSizeInPixelsX, inputSizeInPixelsY, outputSizeInPixelsX, outputSizeInPixelsY); - con0[2] = Common::BitCast<u32>(0.5f * inputViewportInPixelsX / outputSizeInPixelsX - 0.5f + - inputOffsetInPixelsX); - con0[3] = Common::BitCast<u32>(0.5f * inputViewportInPixelsY / outputSizeInPixelsY - 0.5f + - inputOffsetInPixelsY); -} - -void FsrRcasCon(u32* con, f32 sharpness) { - sharpness = std::exp2f(-sharpness); - f32 hSharp[2]{sharpness, sharpness}; - con[0] = Common::BitCast<u32>(sharpness); - con[1] = AU1_AH2_AF2(hSharp); - con[2] = 0; - con[3] = 0; -} -} // Anonymous namespace +using namespace FSR; FSR::FSR(const Device& device_, MemoryAllocator& memory_allocator_, size_t image_count_, VkExtent2D output_size_) diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index b75b8eec6..719edbcfb 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -266,6 +266,35 @@ void RasterizerVulkan::DrawIndirect() { buffer_cache.SetDrawIndirect(nullptr); } +void RasterizerVulkan::DrawTexture() { + MICROPROFILE_SCOPE(Vulkan_Drawing); + + SCOPE_EXIT({ gpu.TickWork(); }); + FlushWork(); + + query_cache.UpdateCounters(); + + texture_cache.SynchronizeGraphicsDescriptors(); + texture_cache.UpdateRenderTargets(false); + + UpdateDynamicStates(); + + const auto& draw_texture_state = maxwell3d->draw_manager->GetDrawTextureState(); + const auto& sampler = texture_cache.GetGraphicsSampler(draw_texture_state.src_sampler); + const auto& texture = texture_cache.GetImageView(draw_texture_state.src_texture); + Region2D dst_region = {Offset2D{.x = static_cast<s32>(draw_texture_state.dst_x0), + .y = static_cast<s32>(draw_texture_state.dst_y0)}, + Offset2D{.x = static_cast<s32>(draw_texture_state.dst_x1), + .y = static_cast<s32>(draw_texture_state.dst_y1)}}; + Region2D src_region = {Offset2D{.x = static_cast<s32>(draw_texture_state.src_x0), + .y = static_cast<s32>(draw_texture_state.src_y0)}, + Offset2D{.x = static_cast<s32>(draw_texture_state.src_x1), + .y = static_cast<s32>(draw_texture_state.src_y1)}}; + blit_image.BlitColor(texture_cache.GetFramebuffer(), texture.RenderTarget(), + texture.ImageHandle(), sampler->Handle(), dst_region, src_region, + texture.size); +} + void RasterizerVulkan::Clear(u32 layer_count) { MICROPROFILE_SCOPE(Vulkan_Clearing); @@ -365,7 +394,15 @@ void RasterizerVulkan::Clear(u32 layer_count) { cmdbuf.ClearAttachments(attachment, clear_rect); }); } else { - UNIMPLEMENTED_MSG("Unimplemented Clear only the specified channel"); + u8 color_mask = static_cast<u8>(regs.clear_surface.R | regs.clear_surface.G << 1 | + regs.clear_surface.B << 2 | regs.clear_surface.A << 3); + Region2D dst_region = { + Offset2D{.x = clear_rect.rect.offset.x, .y = clear_rect.rect.offset.y}, + Offset2D{.x = clear_rect.rect.offset.x + + static_cast<s32>(clear_rect.rect.extent.width), + .y = clear_rect.rect.offset.y + + static_cast<s32>(clear_rect.rect.extent.height)}}; + blit_image.ClearColor(framebuffer, color_mask, regs.clear_color, dst_region); } } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 472cc64d9..a0508b57c 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -66,6 +66,7 @@ public: void Draw(bool is_indexed, u32 instance_count) override; void DrawIndirect() override; + void DrawTexture() override; void Clear(u32 layer_count) override; void DispatchCompute() override; void ResetCounter(VideoCore::QueryType type) override; diff --git a/src/video_core/texture_cache/descriptor_table.h b/src/video_core/texture_cache/descriptor_table.h index ee4240288..1bad83fb4 100644 --- a/src/video_core/texture_cache/descriptor_table.h +++ b/src/video_core/texture_cache/descriptor_table.h @@ -19,9 +19,7 @@ public: explicit DescriptorTable(Tegra::MemoryManager& gpu_memory_) : gpu_memory{gpu_memory_} {} [[nodiscard]] bool Synchronize(GPUVAddr gpu_addr, u32 limit) { - [[likely]] if (current_gpu_addr == gpu_addr && current_limit == limit) { - return false; - } + [[likely]] if (current_gpu_addr == gpu_addr && current_limit == limit) { return false; } Refresh(gpu_addr, limit); return true; } diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp index 852ec2519..e9100091e 100644 --- a/src/video_core/texture_cache/image_info.cpp +++ b/src/video_core/texture_cache/image_info.cpp @@ -100,6 +100,10 @@ ImageInfo::ImageInfo(const TICEntry& config) noexcept { ASSERT_MSG(false, "Invalid texture_type={}", static_cast<int>(config.texture_type.Value())); break; } + if (num_samples > 1) { + size.width *= NumSamplesX(config.msaa_mode); + size.height *= NumSamplesY(config.msaa_mode); + } if (type != ImageType::Linear) { // FIXME: Call this without passing *this layer_stride = CalculateLayerStride(*this); diff --git a/src/video_core/texture_cache/samples_helper.h b/src/video_core/texture_cache/samples_helper.h index d552bccf0..203ac1b11 100644 --- a/src/video_core/texture_cache/samples_helper.h +++ b/src/video_core/texture_cache/samples_helper.h @@ -51,4 +51,48 @@ namespace VideoCommon { return 1; } +[[nodiscard]] inline int NumSamplesX(Tegra::Texture::MsaaMode msaa_mode) { + using Tegra::Texture::MsaaMode; + switch (msaa_mode) { + case MsaaMode::Msaa1x1: + return 1; + case MsaaMode::Msaa2x1: + case MsaaMode::Msaa2x1_D3D: + case MsaaMode::Msaa2x2: + case MsaaMode::Msaa2x2_VC4: + case MsaaMode::Msaa2x2_VC12: + return 2; + case MsaaMode::Msaa4x2: + case MsaaMode::Msaa4x2_D3D: + case MsaaMode::Msaa4x2_VC8: + case MsaaMode::Msaa4x2_VC24: + case MsaaMode::Msaa4x4: + return 4; + } + ASSERT_MSG(false, "Invalid MSAA mode={}", static_cast<int>(msaa_mode)); + return 1; +} + +[[nodiscard]] inline int NumSamplesY(Tegra::Texture::MsaaMode msaa_mode) { + using Tegra::Texture::MsaaMode; + switch (msaa_mode) { + case MsaaMode::Msaa1x1: + case MsaaMode::Msaa2x1: + case MsaaMode::Msaa2x1_D3D: + return 1; + case MsaaMode::Msaa2x2: + case MsaaMode::Msaa2x2_VC4: + case MsaaMode::Msaa2x2_VC12: + case MsaaMode::Msaa4x2: + case MsaaMode::Msaa4x2_D3D: + case MsaaMode::Msaa4x2_VC8: + case MsaaMode::Msaa4x2_VC24: + return 2; + case MsaaMode::Msaa4x4: + return 4; + } + ASSERT_MSG(false, "Invalid MSAA mode={}", static_cast<int>(msaa_mode)); + return 1; +} + } // namespace VideoCommon diff --git a/src/video_core/texture_cache/slot_vector.h b/src/video_core/texture_cache/slot_vector.h index 1e2aad76a..9df6a2903 100644 --- a/src/video_core/texture_cache/slot_vector.h +++ b/src/video_core/texture_cache/slot_vector.h @@ -29,7 +29,7 @@ struct SlotId { }; template <class T> -requires std::is_nothrow_move_assignable_v<T> && std::is_nothrow_move_constructible_v<T> + requires std::is_nothrow_move_assignable_v<T> && std::is_nothrow_move_constructible_v<T> class SlotVector { public: class Iterator { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 87152c8e9..1b01990a4 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -149,6 +149,13 @@ typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) noexcept { } template <class P> +typename P::ImageView& TextureCache<P>::GetImageView(u32 index) noexcept { + const auto image_view_id = VisitImageView(channel_state->graphics_image_table, + channel_state->graphics_image_view_ids, index); + return slot_image_views[image_view_id]; +} + +template <class P> void TextureCache<P>::MarkModification(ImageId id) noexcept { MarkModification(slot_images[id]); } diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 4eea1f609..485eaabaa 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -129,6 +129,9 @@ public: /// Return a reference to the given image view id [[nodiscard]] ImageView& GetImageView(ImageViewId id) noexcept; + /// Get the imageview from the graphics descriptor table in the specified index + [[nodiscard]] ImageView& GetImageView(u32 index) noexcept; + /// Mark an image as modified from the GPU void MarkModification(ImageId id) noexcept; diff --git a/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp b/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp index 85f1d13e0..5fa0d9620 100644 --- a/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp +++ b/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp @@ -57,7 +57,7 @@ NsightAftermathTracker::NsightAftermathTracker() { if (!GFSDK_Aftermath_SUCCEED(GFSDK_Aftermath_EnableGpuCrashDumps( GFSDK_Aftermath_Version_API, GFSDK_Aftermath_GpuCrashDumpWatchedApiFlags_Vulkan, GFSDK_Aftermath_GpuCrashDumpFeatureFlags_Default, GpuCrashDumpCallback, - ShaderDebugInfoCallback, CrashDumpDescriptionCallback, this))) { + ShaderDebugInfoCallback, CrashDumpDescriptionCallback, nullptr, this))) { LOG_ERROR(Render_Vulkan, "GFSDK_Aftermath_EnableGpuCrashDumps failed"); return; } @@ -83,7 +83,7 @@ void NsightAftermathTracker::SaveShader(std::span<const u32> spirv) const { std::scoped_lock lock{mutex}; - GFSDK_Aftermath_ShaderHash hash; + GFSDK_Aftermath_ShaderBinaryHash hash; if (!GFSDK_Aftermath_SUCCEED( GFSDK_Aftermath_GetShaderHashSpirv(GFSDK_Aftermath_Version_API, &shader, &hash))) { LOG_ERROR(Render_Vulkan, "Failed to hash SPIR-V module"); @@ -121,8 +121,8 @@ void NsightAftermathTracker::OnGpuCrashDumpCallback(const void* gpu_crash_dump, u32 json_size = 0; if (!GFSDK_Aftermath_SUCCEED(GFSDK_Aftermath_GpuCrashDump_GenerateJSON( decoder, GFSDK_Aftermath_GpuCrashDumpDecoderFlags_ALL_INFO, - GFSDK_Aftermath_GpuCrashDumpFormatterFlags_NONE, nullptr, nullptr, nullptr, nullptr, - this, &json_size))) { + GFSDK_Aftermath_GpuCrashDumpFormatterFlags_NONE, nullptr, nullptr, nullptr, this, + &json_size))) { LOG_ERROR(Render_Vulkan, "Failed to generate JSON"); return; } |