diff options
Diffstat (limited to '')
29 files changed, 807 insertions, 235 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index b474eb363..4742bcbe9 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -52,6 +52,8 @@ add_library(video_core STATIC engines/puller.cpp engines/puller.h framebuffer_config.h + fsr.cpp + fsr.h host1x/codecs/codec.cpp host1x/codecs/codec.h host1x/codecs/h264.cpp @@ -110,6 +112,8 @@ add_library(video_core STATIC renderer_opengl/gl_device.h renderer_opengl/gl_fence_manager.cpp renderer_opengl/gl_fence_manager.h + renderer_opengl/gl_fsr.cpp + renderer_opengl/gl_fsr.h renderer_opengl/gl_graphics_pipeline.cpp renderer_opengl/gl_graphics_pipeline.h renderer_opengl/gl_rasterizer.cpp diff --git a/src/video_core/fsr.cpp b/src/video_core/fsr.cpp new file mode 100644 index 000000000..5653c64fc --- /dev/null +++ b/src/video_core/fsr.cpp @@ -0,0 +1,148 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <cmath> +#include "video_core/fsr.h" + +namespace FSR { +namespace { +// Reimplementations of the constant generating functions in ffx_fsr1.h +// GCC generated a lot of warnings when using the official header. +u32 AU1_AH1_AF1(f32 f) { + static constexpr u32 base[512]{ + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, + 0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x0c00, 0x1000, 0x1400, 0x1800, 0x1c00, 0x2000, + 0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00, 0x4000, 0x4400, 0x4800, 0x4c00, + 0x5000, 0x5400, 0x5800, 0x5c00, 0x6000, 0x6400, 0x6800, 0x6c00, 0x7000, 0x7400, 0x7800, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001, 0x8002, 0x8004, 0x8008, + 0x8010, 0x8020, 0x8040, 0x8080, 0x8100, 0x8200, 0x8400, 0x8800, 0x8c00, 0x9000, 0x9400, + 0x9800, 0x9c00, 0xa000, 0xa400, 0xa800, 0xac00, 0xb000, 0xb400, 0xb800, 0xbc00, 0xc000, + 0xc400, 0xc800, 0xcc00, 0xd000, 0xd400, 0xd800, 0xdc00, 0xe000, 0xe400, 0xe800, 0xec00, + 0xf000, 0xf400, 0xf800, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + }; + static constexpr s8 shift[512]{ + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17, 0x16, + 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, + 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, + 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17, + 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, + 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, + 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, + }; + const u32 u = Common::BitCast<u32>(f); + const u32 i = u >> 23; + return base[i] + ((u & 0x7fffff) >> shift[i]); +} + +u32 AU1_AH2_AF2(f32 a[2]) { + return AU1_AH1_AF1(a[0]) + (AU1_AH1_AF1(a[1]) << 16); +} + +void FsrEasuCon(u32 con0[4], u32 con1[4], u32 con2[4], u32 con3[4], f32 inputViewportInPixelsX, + f32 inputViewportInPixelsY, f32 inputSizeInPixelsX, f32 inputSizeInPixelsY, + f32 outputSizeInPixelsX, f32 outputSizeInPixelsY) { + con0[0] = Common::BitCast<u32>(inputViewportInPixelsX / outputSizeInPixelsX); + con0[1] = Common::BitCast<u32>(inputViewportInPixelsY / outputSizeInPixelsY); + con0[2] = Common::BitCast<u32>(0.5f * inputViewportInPixelsX / outputSizeInPixelsX - 0.5f); + con0[3] = Common::BitCast<u32>(0.5f * inputViewportInPixelsY / outputSizeInPixelsY - 0.5f); + con1[0] = Common::BitCast<u32>(1.0f / inputSizeInPixelsX); + con1[1] = Common::BitCast<u32>(1.0f / inputSizeInPixelsY); + con1[2] = Common::BitCast<u32>(1.0f / inputSizeInPixelsX); + con1[3] = Common::BitCast<u32>(-1.0f / inputSizeInPixelsY); + con2[0] = Common::BitCast<u32>(-1.0f / inputSizeInPixelsX); + con2[1] = Common::BitCast<u32>(2.0f / inputSizeInPixelsY); + con2[2] = Common::BitCast<u32>(1.0f / inputSizeInPixelsX); + con2[3] = Common::BitCast<u32>(2.0f / inputSizeInPixelsY); + con3[0] = Common::BitCast<u32>(0.0f / inputSizeInPixelsX); + con3[1] = Common::BitCast<u32>(4.0f / inputSizeInPixelsY); + con3[2] = con3[3] = 0; +} +} // Anonymous namespace + +void FsrEasuConOffset(u32 con0[4], u32 con1[4], u32 con2[4], u32 con3[4], + f32 inputViewportInPixelsX, f32 inputViewportInPixelsY, + f32 inputSizeInPixelsX, f32 inputSizeInPixelsY, f32 outputSizeInPixelsX, + f32 outputSizeInPixelsY, f32 inputOffsetInPixelsX, f32 inputOffsetInPixelsY) { + FsrEasuCon(con0, con1, con2, con3, inputViewportInPixelsX, inputViewportInPixelsY, + inputSizeInPixelsX, inputSizeInPixelsY, outputSizeInPixelsX, outputSizeInPixelsY); + con0[2] = Common::BitCast<u32>(0.5f * inputViewportInPixelsX / outputSizeInPixelsX - 0.5f + + inputOffsetInPixelsX); + con0[3] = Common::BitCast<u32>(0.5f * inputViewportInPixelsY / outputSizeInPixelsY - 0.5f + + inputOffsetInPixelsY); +} + +void FsrRcasCon(u32* con, f32 sharpness) { + sharpness = std::exp2f(-sharpness); + f32 hSharp[2]{sharpness, sharpness}; + con[0] = Common::BitCast<u32>(sharpness); + con[1] = AU1_AH2_AF2(hSharp); + con[2] = 0; + con[3] = 0; +} +} // namespace FSR diff --git a/src/video_core/fsr.h b/src/video_core/fsr.h new file mode 100644 index 000000000..db0d4ec6f --- /dev/null +++ b/src/video_core/fsr.h @@ -0,0 +1,19 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "common/bit_cast.h" +#include "common/common_types.h" + +namespace FSR { +// Reimplementations of the constant generating functions in ffx_fsr1.h +// GCC generated a lot of warnings when using the official header. +void FsrEasuConOffset(u32 con0[4], u32 con1[4], u32 con2[4], u32 con3[4], + f32 inputViewportInPixelsX, f32 inputViewportInPixelsY, + f32 inputSizeInPixelsX, f32 inputSizeInPixelsY, f32 outputSizeInPixelsX, + f32 outputSizeInPixelsY, f32 inputOffsetInPixelsX, f32 inputOffsetInPixelsY); + +void FsrRcasCon(u32* con, f32 sharpness); + +} // namespace FSR diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index e968ae220..52cd5bb81 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -3,12 +3,16 @@ set(FIDELITYFX_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/externals/FidelityFX-FSR/ffx-fsr) -set(GLSL_INCLUDES - fidelityfx_fsr.comp +set(FIDELITYFX_FILES ${FIDELITYFX_INCLUDE_DIR}/ffx_a.h ${FIDELITYFX_INCLUDE_DIR}/ffx_fsr1.h ) +set(GLSL_INCLUDES + fidelityfx_fsr.comp + ${FIDELITYFX_FILES} +) + set(SHADER_FILES astc_decoder.comp blit_color_float.frag @@ -24,6 +28,9 @@ set(SHADER_FILES fxaa.vert opengl_convert_s8d24.comp opengl_copy_bc4.comp + opengl_fidelityfx_fsr.frag + opengl_fidelityfx_fsr_easu.frag + opengl_fidelityfx_fsr_rcas.frag opengl_present.frag opengl_present.vert opengl_present_scaleforce.frag @@ -38,6 +45,8 @@ set(SHADER_FILES smaa_neighborhood_blending.vert smaa_neighborhood_blending.frag vulkan_blit_depth_stencil.frag + vulkan_color_clear.frag + vulkan_color_clear.vert vulkan_fidelityfx_fsr_easu_fp16.comp vulkan_fidelityfx_fsr_easu_fp32.comp vulkan_fidelityfx_fsr_rcas_fp16.comp @@ -118,6 +127,25 @@ foreach(FILENAME IN ITEMS ${SHADER_FILES}) endif() endforeach() +foreach(FILEPATH IN ITEMS ${FIDELITYFX_FILES}) + get_filename_component(FILENAME ${FILEPATH} NAME) + string(REPLACE "." "_" HEADER_NAME ${FILENAME}) + set(SOURCE_FILE ${FILEPATH}) + set(SOURCE_HEADER_FILE ${SHADER_DIR}/${HEADER_NAME}.h) + add_custom_command( + OUTPUT + ${SOURCE_HEADER_FILE} + COMMAND + ${CMAKE_COMMAND} -P ${HEADER_GENERATOR} ${SOURCE_FILE} ${SOURCE_HEADER_FILE} ${INPUT_FILE} + MAIN_DEPENDENCY + ${SOURCE_FILE} + DEPENDS + ${INPUT_FILE} + # HEADER_GENERATOR should be included here but msbuild seems to assume it's always modified + ) + set(SHADER_HEADERS ${SHADER_HEADERS} ${SOURCE_HEADER_FILE}) +endforeach() + set(SHADER_SOURCES ${SHADER_FILES}) list(APPEND SHADER_SOURCES ${GLSL_INCLUDES}) diff --git a/src/video_core/host_shaders/opengl_fidelityfx_fsr.frag b/src/video_core/host_shaders/opengl_fidelityfx_fsr.frag new file mode 100644 index 000000000..16d22f58e --- /dev/null +++ b/src/video_core/host_shaders/opengl_fidelityfx_fsr.frag @@ -0,0 +1,108 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +//!#version 460 core +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +#extension GL_AMD_gpu_shader_half_float : enable +#extension GL_NV_gpu_shader5 : enable + +// FidelityFX Super Resolution Sample +// +// Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +layout (location = 0) uniform uvec4 constants[4]; + +#define A_GPU 1 +#define A_GLSL 1 + +#ifdef YUZU_USE_FP16 + #define A_HALF +#endif +#include "ffx_a.h" + +#ifndef YUZU_USE_FP16 + layout (binding=0) uniform sampler2D InputTexture; + #if USE_EASU + #define FSR_EASU_F 1 + AF4 FsrEasuRF(AF2 p) { AF4 res = textureGather(InputTexture, p, 0); return res; } + AF4 FsrEasuGF(AF2 p) { AF4 res = textureGather(InputTexture, p, 1); return res; } + AF4 FsrEasuBF(AF2 p) { AF4 res = textureGather(InputTexture, p, 2); return res; } + #endif + #if USE_RCAS + #define FSR_RCAS_F + AF4 FsrRcasLoadF(ASU2 p) { return texelFetch(InputTexture, ASU2(p), 0); } + void FsrRcasInputF(inout AF1 r, inout AF1 g, inout AF1 b) {} + #endif +#else + layout (binding=0) uniform sampler2D InputTexture; + #if USE_EASU + #define FSR_EASU_H 1 + AH4 FsrEasuRH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 0)); return res; } + AH4 FsrEasuGH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 1)); return res; } + AH4 FsrEasuBH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 2)); return res; } + #endif + #if USE_RCAS + #define FSR_RCAS_H + AH4 FsrRcasLoadH(ASW2 p) { return AH4(texelFetch(InputTexture, ASU2(p), 0)); } + void FsrRcasInputH(inout AH1 r,inout AH1 g,inout AH1 b){} + #endif +#endif + +#include "ffx_fsr1.h" + +#if USE_RCAS + layout(location = 0) in vec2 frag_texcoord; +#endif +layout (location = 0) out vec4 frag_color; + +void CurrFilter(AU2 pos) +{ +#if USE_EASU + #ifndef YUZU_USE_FP16 + AF3 c; + FsrEasuF(c, pos, constants[0], constants[1], constants[2], constants[3]); + frag_color = AF4(c, 1.0); + #else + AH3 c; + FsrEasuH(c, pos, constants[0], constants[1], constants[2], constants[3]); + frag_color = AH4(c, 1.0); + #endif +#endif +#if USE_RCAS + #ifndef YUZU_USE_FP16 + AF3 c; + FsrRcasF(c.r, c.g, c.b, pos, constants[0]); + frag_color = AF4(c, 1.0); + #else + AH3 c; + FsrRcasH(c.r, c.g, c.b, pos, constants[0]); + frag_color = AH4(c, 1.0); + #endif +#endif +} + +void main() +{ +#if USE_RCAS + CurrFilter(AU2(frag_texcoord * vec2(textureSize(InputTexture, 0)))); +#else + CurrFilter(AU2(gl_FragCoord.xy)); +#endif +} diff --git a/src/video_core/host_shaders/opengl_fidelityfx_fsr_easu.frag b/src/video_core/host_shaders/opengl_fidelityfx_fsr_easu.frag new file mode 100644 index 000000000..d39f80ac1 --- /dev/null +++ b/src/video_core/host_shaders/opengl_fidelityfx_fsr_easu.frag @@ -0,0 +1,9 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#version 460 core +#extension GL_GOOGLE_include_directive : enable + +#define USE_EASU 1 + +#include "opengl_fidelityfx_fsr.frag" diff --git a/src/video_core/host_shaders/opengl_fidelityfx_fsr_rcas.frag b/src/video_core/host_shaders/opengl_fidelityfx_fsr_rcas.frag new file mode 100644 index 000000000..cfa78ddc7 --- /dev/null +++ b/src/video_core/host_shaders/opengl_fidelityfx_fsr_rcas.frag @@ -0,0 +1,9 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#version 460 core +#extension GL_GOOGLE_include_directive : enable + +#define USE_RCAS 1 + +#include "opengl_fidelityfx_fsr.frag" diff --git a/src/video_core/host_shaders/vulkan_color_clear.frag b/src/video_core/host_shaders/vulkan_color_clear.frag new file mode 100644 index 000000000..617bf01e1 --- /dev/null +++ b/src/video_core/host_shaders/vulkan_color_clear.frag @@ -0,0 +1,14 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#version 460 core + +layout (push_constant) uniform PushConstants { + vec4 clear_color; +}; + +layout(location = 0) out vec4 color; + +void main() { + color = clear_color; +} diff --git a/src/video_core/host_shaders/vulkan_color_clear.vert b/src/video_core/host_shaders/vulkan_color_clear.vert new file mode 100644 index 000000000..d85883141 --- /dev/null +++ b/src/video_core/host_shaders/vulkan_color_clear.vert @@ -0,0 +1,10 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#version 460 core + +void main() { + float x = float((gl_VertexIndex & 1) << 2); + float y = float((gl_VertexIndex & 2) << 1); + gl_Position = vec4(x - 1.0, y - 1.0, 0.0, 1.0); +} diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 3bcae3503..83924475b 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -6,7 +6,6 @@ #include "common/alignment.h" #include "common/assert.h" #include "common/logging/log.h" -#include "common/settings.h" #include "core/core.h" #include "core/device_memory.h" #include "core/hle/kernel/k_page_table.h" @@ -46,11 +45,6 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 big_page_table_cpu.resize(big_page_table_size); big_page_continous.resize(big_page_table_size / continous_bits, 0); entries.resize(page_table_size / 32, 0); - if (!Settings::IsGPULevelExtreme() && Settings::IsFastmemEnabled()) { - fastmem_arena = system.DeviceMemory().buffer.VirtualBasePointer(); - } else { - fastmem_arena = nullptr; - } } MemoryManager::~MemoryManager() = default; @@ -360,7 +354,7 @@ inline void MemoryManager::MemoryOperation(GPUVAddr gpu_src_addr, std::size_t si } } -template <bool is_safe, bool use_fastmem> +template <bool is_safe> void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size, [[maybe_unused]] VideoCommon::CacheType which) const { auto set_to_zero = [&]([[maybe_unused]] std::size_t page_index, @@ -374,12 +368,8 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std: if constexpr (is_safe) { rasterizer->FlushRegion(cpu_addr_base, copy_amount, which); } - if constexpr (use_fastmem) { - std::memcpy(dest_buffer, &fastmem_arena[cpu_addr_base], copy_amount); - } else { - u8* physical = memory.GetPointer(cpu_addr_base); - std::memcpy(dest_buffer, physical, copy_amount); - } + u8* physical = memory.GetPointer(cpu_addr_base); + std::memcpy(dest_buffer, physical, copy_amount); dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; }; auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { @@ -388,15 +378,11 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std: if constexpr (is_safe) { rasterizer->FlushRegion(cpu_addr_base, copy_amount, which); } - if constexpr (use_fastmem) { - std::memcpy(dest_buffer, &fastmem_arena[cpu_addr_base], copy_amount); + if (!IsBigPageContinous(page_index)) [[unlikely]] { + memory.ReadBlockUnsafe(cpu_addr_base, dest_buffer, copy_amount); } else { - if (!IsBigPageContinous(page_index)) [[unlikely]] { - memory.ReadBlockUnsafe(cpu_addr_base, dest_buffer, copy_amount); - } else { - u8* physical = memory.GetPointer(cpu_addr_base); - std::memcpy(dest_buffer, physical, copy_amount); - } + u8* physical = memory.GetPointer(cpu_addr_base); + std::memcpy(dest_buffer, physical, copy_amount); } dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; }; @@ -410,20 +396,12 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std: void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size, VideoCommon::CacheType which) const { - if (fastmem_arena) [[likely]] { - ReadBlockImpl<true, true>(gpu_src_addr, dest_buffer, size, which); - return; - } - ReadBlockImpl<true, false>(gpu_src_addr, dest_buffer, size, which); + ReadBlockImpl<true>(gpu_src_addr, dest_buffer, size, which); } void MemoryManager::ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer, const std::size_t size) const { - if (fastmem_arena) [[likely]] { - ReadBlockImpl<false, true>(gpu_src_addr, dest_buffer, size, VideoCommon::CacheType::None); - return; - } - ReadBlockImpl<false, false>(gpu_src_addr, dest_buffer, size, VideoCommon::CacheType::None); + ReadBlockImpl<false>(gpu_src_addr, dest_buffer, size, VideoCommon::CacheType::None); } template <bool is_safe> diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 2936364f0..9ebfb6179 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -141,7 +141,7 @@ private: inline void MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size, FuncMapped&& func_mapped, FuncReserved&& func_reserved, FuncUnmapped&& func_unmapped) const; - template <bool is_safe, bool use_fastmem> + template <bool is_safe> void ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size, VideoCommon::CacheType which) const; @@ -215,7 +215,6 @@ private: std::vector<u64> big_page_continous; std::vector<std::pair<VAddr, std::size_t>> page_stash{}; - u8* fastmem_arena{}; constexpr static size_t continous_bits = 64; diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp index 26d066004..1a0cea9b7 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp @@ -30,7 +30,7 @@ bool ComputePipelineKey::operator==(const ComputePipelineKey& rhs) const noexcep ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cache_, BufferCache& buffer_cache_, ProgramManager& program_manager_, const Shader::Info& info_, std::string code, - std::vector<u32> code_v) + std::vector<u32> code_v, bool force_context_flush) : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, program_manager{program_manager_}, info{info_} { switch (device.GetShaderBackend()) { @@ -63,6 +63,15 @@ ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cac writes_global_memory = !use_storage_buffers && std::ranges::any_of(info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; }); + if (force_context_flush) { + std::scoped_lock lock{built_mutex}; + built_fence.Create(); + // Flush this context to ensure compilation commands and fence are in the GPU pipe. + glFlush(); + built_condvar.notify_one(); + } else { + is_built = true; + } } void ComputePipeline::Configure() { @@ -142,6 +151,9 @@ void ComputePipeline::Configure() { } texture_cache.FillComputeImageViews(std::span(views.data(), views.size())); + if (!is_built) { + WaitForBuild(); + } if (assembly_program.handle != 0) { program_manager.BindComputeAssemblyProgram(assembly_program.handle); } else { @@ -223,4 +235,13 @@ void ComputePipeline::Configure() { } } +void ComputePipeline::WaitForBuild() { + if (built_fence.handle == 0) { + std::unique_lock lock{built_mutex}; + built_condvar.wait(lock, [this] { return built_fence.handle != 0; }); + } + ASSERT(glClientWaitSync(built_fence.handle, 0, GL_TIMEOUT_IGNORED) != GL_WAIT_FAILED); + is_built = true; +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.h b/src/video_core/renderer_opengl/gl_compute_pipeline.h index 6534dec32..9bcc72b59 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.h +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.h @@ -50,7 +50,8 @@ class ComputePipeline { public: explicit ComputePipeline(const Device& device, TextureCache& texture_cache_, BufferCache& buffer_cache_, ProgramManager& program_manager_, - const Shader::Info& info_, std::string code, std::vector<u32> code_v); + const Shader::Info& info_, std::string code, std::vector<u32> code_v, + bool force_context_flush = false); void Configure(); @@ -65,6 +66,8 @@ public: } private: + void WaitForBuild(); + TextureCache& texture_cache; BufferCache& buffer_cache; Tegra::MemoryManager* gpu_memory; @@ -81,6 +84,11 @@ private: bool use_storage_buffers{}; bool writes_global_memory{}; + + std::mutex built_mutex; + std::condition_variable built_condvar; + OGLSync built_fence{}; + bool is_built{false}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_fsr.cpp b/src/video_core/renderer_opengl/gl_fsr.cpp new file mode 100644 index 000000000..77262dcf1 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_fsr.cpp @@ -0,0 +1,101 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/settings.h" +#include "video_core/fsr.h" +#include "video_core/renderer_opengl/gl_fsr.h" +#include "video_core/renderer_opengl/gl_shader_manager.h" +#include "video_core/renderer_opengl/gl_shader_util.h" + +namespace OpenGL { +using namespace FSR; + +using FsrConstants = std::array<u32, 4 * 4>; + +FSR::FSR(std::string_view fsr_vertex_source, std::string_view fsr_easu_source, + std::string_view fsr_rcas_source) + : fsr_vertex{CreateProgram(fsr_vertex_source, GL_VERTEX_SHADER)}, + fsr_easu_frag{CreateProgram(fsr_easu_source, GL_FRAGMENT_SHADER)}, + fsr_rcas_frag{CreateProgram(fsr_rcas_source, GL_FRAGMENT_SHADER)} { + glProgramUniform2f(fsr_vertex.handle, 0, 1.0f, 1.0f); + glProgramUniform2f(fsr_vertex.handle, 1, 0.0f, 0.0f); +} + +FSR::~FSR() = default; + +void FSR::Draw(ProgramManager& program_manager, const Common::Rectangle<u32>& screen, + u32 input_image_width, u32 input_image_height, + const Common::Rectangle<int>& crop_rect) { + + const auto output_image_width = screen.GetWidth(); + const auto output_image_height = screen.GetHeight(); + + if (fsr_intermediate_tex.handle) { + GLint fsr_tex_width, fsr_tex_height; + glGetTextureLevelParameteriv(fsr_intermediate_tex.handle, 0, GL_TEXTURE_WIDTH, + &fsr_tex_width); + glGetTextureLevelParameteriv(fsr_intermediate_tex.handle, 0, GL_TEXTURE_HEIGHT, + &fsr_tex_height); + if (static_cast<u32>(fsr_tex_width) != output_image_width || + static_cast<u32>(fsr_tex_height) != output_image_height) { + fsr_intermediate_tex.Release(); + } + } + if (!fsr_intermediate_tex.handle) { + fsr_intermediate_tex.Create(GL_TEXTURE_2D); + glTextureStorage2D(fsr_intermediate_tex.handle, 1, GL_RGB16F, output_image_width, + output_image_height); + glNamedFramebufferTexture(fsr_framebuffer.handle, GL_COLOR_ATTACHMENT0, + fsr_intermediate_tex.handle, 0); + } + + GLint old_draw_fb; + glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &old_draw_fb); + + glFrontFace(GL_CW); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, fsr_framebuffer.handle); + glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(output_image_width), + static_cast<GLfloat>(output_image_height)); + + FsrConstants constants; + FsrEasuConOffset( + constants.data() + 0, constants.data() + 4, constants.data() + 8, constants.data() + 12, + + static_cast<f32>(crop_rect.GetWidth()), static_cast<f32>(crop_rect.GetHeight()), + static_cast<f32>(input_image_width), static_cast<f32>(input_image_height), + static_cast<f32>(output_image_width), static_cast<f32>(output_image_height), + static_cast<f32>(crop_rect.left), static_cast<f32>(crop_rect.top)); + + glProgramUniform4uiv(fsr_easu_frag.handle, 0, sizeof(constants), std::data(constants)); + + program_manager.BindPresentPrograms(fsr_vertex.handle, fsr_easu_frag.handle); + glDrawArrays(GL_TRIANGLES, 0, 3); + + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, old_draw_fb); + glBindTextureUnit(0, fsr_intermediate_tex.handle); + + const float sharpening = + static_cast<float>(Settings::values.fsr_sharpening_slider.GetValue()) / 100.0f; + + FsrRcasCon(constants.data(), sharpening); + glProgramUniform4uiv(fsr_rcas_frag.handle, 0, sizeof(constants), std::data(constants)); +} + +void FSR::InitBuffers() { + fsr_framebuffer.Create(); +} + +void FSR::ReleaseBuffers() { + fsr_framebuffer.Release(); + fsr_intermediate_tex.Release(); +} + +const OGLProgram& FSR::GetPresentFragmentProgram() const noexcept { + return fsr_rcas_frag; +} + +bool FSR::AreBuffersInitialized() const noexcept { + return fsr_framebuffer.handle; +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_fsr.h b/src/video_core/renderer_opengl/gl_fsr.h new file mode 100644 index 000000000..1f6ae3115 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_fsr.h @@ -0,0 +1,43 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include <string_view> + +#include "common/common_types.h" +#include "common/math_util.h" +#include "video_core/fsr.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" + +namespace OpenGL { + +class ProgramManager; + +class FSR { +public: + explicit FSR(std::string_view fsr_vertex_source, std::string_view fsr_easu_source, + std::string_view fsr_rcas_source); + ~FSR(); + + void Draw(ProgramManager& program_manager, const Common::Rectangle<u32>& screen, + u32 input_image_width, u32 input_image_height, + const Common::Rectangle<int>& crop_rect); + + void InitBuffers(); + + void ReleaseBuffers(); + + [[nodiscard]] const OGLProgram& GetPresentFragmentProgram() const noexcept; + + [[nodiscard]] bool AreBuffersInitialized() const noexcept; + +private: + OGLFramebuffer fsr_framebuffer; + OGLProgram fsr_vertex; + OGLProgram fsr_easu_frag; + OGLProgram fsr_rcas_frag; + OGLTexture fsr_intermediate_tex; +}; + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index c115dabe1..29491e762 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -176,7 +176,7 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c std::array<std::string, 5> sources, std::array<std::vector<u32>, 5> sources_spirv, const std::array<const Shader::Info*, 5>& infos, - const GraphicsPipelineKey& key_) + const GraphicsPipelineKey& key_, bool force_context_flush) : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, program_manager{program_manager_}, state_tracker{state_tracker_}, key{key_} { if (shader_notify) { @@ -231,7 +231,8 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c const bool in_parallel = thread_worker != nullptr; const auto backend = device.GetShaderBackend(); auto func{[this, sources = std::move(sources), sources_spirv = std::move(sources_spirv), - shader_notify, backend, in_parallel](ShaderContext::Context*) mutable { + shader_notify, backend, in_parallel, + force_context_flush](ShaderContext::Context*) mutable { for (size_t stage = 0; stage < 5; ++stage) { switch (backend) { case Settings::ShaderBackend::GLSL: @@ -251,7 +252,7 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c break; } } - if (in_parallel) { + if (force_context_flush || in_parallel) { std::scoped_lock lock{built_mutex}; built_fence.Create(); // Flush this context to ensure compilation commands and fence are in the GPU pipe. diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index 1c06b3655..7bab3be0a 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -78,7 +78,7 @@ public: std::array<std::string, 5> sources, std::array<std::vector<u32>, 5> sources_spirv, const std::array<const Shader::Info*, 5>& infos, - const GraphicsPipelineKey& key_); + const GraphicsPipelineKey& key_, bool force_context_flush = false); void Configure(bool is_indexed) { configure_func(this, is_indexed); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 7dd854e0f..626ea7dcb 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -286,7 +286,7 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, file.read(reinterpret_cast<char*>(&key), sizeof(key)); queue_work([this, key, env = std::move(env), &state, &callback](Context* ctx) mutable { ctx->pools.ReleaseContents(); - auto pipeline{CreateComputePipeline(ctx->pools, key, env)}; + auto pipeline{CreateComputePipeline(ctx->pools, key, env, true)}; std::scoped_lock lock{state.mutex}; if (pipeline) { compute_cache.emplace(key, std::move(pipeline)); @@ -307,7 +307,7 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, env_ptrs.push_back(&env); } ctx->pools.ReleaseContents(); - auto pipeline{CreateGraphicsPipeline(ctx->pools, key, MakeSpan(env_ptrs), false)}; + auto pipeline{CreateGraphicsPipeline(ctx->pools, key, MakeSpan(env_ptrs), false, true)}; std::scoped_lock lock{state.mutex}; if (pipeline) { graphics_cache.emplace(key, std::move(pipeline)); @@ -439,7 +439,8 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline() { std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline( ShaderContext::ShaderPools& pools, const GraphicsPipelineKey& key, - std::span<Shader::Environment* const> envs, bool build_in_parallel) try { + std::span<Shader::Environment* const> envs, bool use_shader_workers, + bool force_context_flush) try { LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); size_t env_index{}; u32 total_storage_buffers{}; @@ -531,10 +532,10 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline( } previous_program = &program; } - auto* const thread_worker{build_in_parallel ? workers.get() : nullptr}; + auto* const thread_worker{use_shader_workers ? workers.get() : nullptr}; return std::make_unique<GraphicsPipeline>(device, texture_cache, buffer_cache, program_manager, state_tracker, thread_worker, &shader_notify, sources, - sources_spirv, infos, key); + sources_spirv, infos, key, force_context_flush); } catch (Shader::Exception& exception) { LOG_ERROR(Render_OpenGL, "{}", exception.what()); @@ -559,8 +560,8 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline( } std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline( - ShaderContext::ShaderPools& pools, const ComputePipelineKey& key, - Shader::Environment& env) try { + ShaderContext::ShaderPools& pools, const ComputePipelineKey& key, Shader::Environment& env, + bool force_context_flush) try { LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; @@ -589,7 +590,7 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline( } return std::make_unique<ComputePipeline>(device, texture_cache, buffer_cache, program_manager, - program.info, code, code_spirv); + program.info, code, code_spirv, force_context_flush); } catch (Shader::Exception& exception) { LOG_ERROR(Render_OpenGL, "{}", exception.what()); return nullptr; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index f82420592..6b9732fca 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -50,14 +50,16 @@ private: std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline( ShaderContext::ShaderPools& pools, const GraphicsPipelineKey& key, - std::span<Shader::Environment* const> envs, bool build_in_parallel); + std::span<Shader::Environment* const> envs, bool use_shader_workers, + bool force_context_flush = false); std::unique_ptr<ComputePipeline> CreateComputePipeline(const ComputePipelineKey& key, const VideoCommon::ShaderInfo* shader); std::unique_ptr<ComputePipeline> CreateComputePipeline(ShaderContext::ShaderPools& pools, const ComputePipelineKey& key, - Shader::Environment& env); + Shader::Environment& env, + bool force_context_flush = false); std::unique_ptr<ShaderWorker> CreateWorkers() const; diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index de95f2634..2a74c1d05 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -17,8 +17,14 @@ #include "core/frontend/emu_window.h" #include "core/memory.h" #include "core/telemetry_session.h" +#include "video_core/host_shaders/ffx_a_h.h" +#include "video_core/host_shaders/ffx_fsr1_h.h" +#include "video_core/host_shaders/full_screen_triangle_vert.h" #include "video_core/host_shaders/fxaa_frag.h" #include "video_core/host_shaders/fxaa_vert.h" +#include "video_core/host_shaders/opengl_fidelityfx_fsr_easu_frag.h" +#include "video_core/host_shaders/opengl_fidelityfx_fsr_frag.h" +#include "video_core/host_shaders/opengl_fidelityfx_fsr_rcas_frag.h" #include "video_core/host_shaders/opengl_present_frag.h" #include "video_core/host_shaders/opengl_present_scaleforce_frag.h" #include "video_core/host_shaders/opengl_present_vert.h" @@ -31,6 +37,7 @@ #include "video_core/host_shaders/smaa_edge_detection_vert.h" #include "video_core/host_shaders/smaa_neighborhood_blending_frag.h" #include "video_core/host_shaders/smaa_neighborhood_blending_vert.h" +#include "video_core/renderer_opengl/gl_fsr.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_shader_util.h" @@ -268,12 +275,17 @@ void RendererOpenGL::InitOpenGLObjects() { fxaa_vertex = CreateProgram(HostShaders::FXAA_VERT, GL_VERTEX_SHADER); fxaa_fragment = CreateProgram(HostShaders::FXAA_FRAG, GL_FRAGMENT_SHADER); - const auto SmaaShader = [](std::string_view specialized_source, GLenum stage) { - std::string shader_source{specialized_source}; - constexpr std::string_view include_string = "#include \"opengl_smaa.glsl\""; + const auto replace_include = [](std::string& shader_source, std::string_view include_name, + std::string_view include_content) { + const std::string include_string = fmt::format("#include \"{}\"", include_name); const std::size_t pos = shader_source.find(include_string); ASSERT(pos != std::string::npos); - shader_source.replace(pos, include_string.size(), HostShaders::OPENGL_SMAA_GLSL); + shader_source.replace(pos, include_string.size(), include_content); + }; + + const auto SmaaShader = [&](std::string_view specialized_source, GLenum stage) { + std::string shader_source{specialized_source}; + replace_include(shader_source, "opengl_smaa.glsl", HostShaders::OPENGL_SMAA_GLSL); return CreateProgram(shader_source, stage); }; @@ -298,14 +310,32 @@ void RendererOpenGL::InitOpenGLObjects() { CreateProgram(fmt::format("#version 460\n{}", HostShaders::OPENGL_PRESENT_SCALEFORCE_FRAG), GL_FRAGMENT_SHADER); + std::string fsr_source{HostShaders::OPENGL_FIDELITYFX_FSR_FRAG}; + replace_include(fsr_source, "ffx_a.h", HostShaders::FFX_A_H); + replace_include(fsr_source, "ffx_fsr1.h", HostShaders::FFX_FSR1_H); + + std::string fsr_easu_frag_source{HostShaders::OPENGL_FIDELITYFX_FSR_EASU_FRAG}; + std::string fsr_rcas_frag_source{HostShaders::OPENGL_FIDELITYFX_FSR_RCAS_FRAG}; + replace_include(fsr_easu_frag_source, "opengl_fidelityfx_fsr.frag", fsr_source); + replace_include(fsr_rcas_frag_source, "opengl_fidelityfx_fsr.frag", fsr_source); + + fsr = std::make_unique<FSR>(HostShaders::FULL_SCREEN_TRIANGLE_VERT, fsr_easu_frag_source, + fsr_rcas_frag_source); + // Generate presentation sampler present_sampler.Create(); glSamplerParameteri(present_sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR); glSamplerParameteri(present_sampler.handle, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glSamplerParameteri(present_sampler.handle, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glSamplerParameteri(present_sampler.handle, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glSamplerParameteri(present_sampler.handle, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE); present_sampler_nn.Create(); glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE); // Generate VBO handle for drawing vertex_buffer.Create(); @@ -525,6 +555,31 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { glBindTextureUnit(0, aa_texture.handle); } + glDisablei(GL_SCISSOR_TEST, 0); + + if (Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::Fsr) { + if (!fsr->AreBuffersInitialized()) { + fsr->InitBuffers(); + } + + auto crop_rect = framebuffer_crop_rect; + if (crop_rect.GetWidth() == 0) { + crop_rect.right = framebuffer_width; + } + if (crop_rect.GetHeight() == 0) { + crop_rect.bottom = framebuffer_height; + } + crop_rect = crop_rect.Scale(Settings::values.resolution_info.up_factor); + const auto fsr_input_width = Settings::values.resolution_info.ScaleUp(framebuffer_width); + const auto fsr_input_height = Settings::values.resolution_info.ScaleUp(framebuffer_height); + glBindSampler(0, present_sampler.handle); + fsr->Draw(program_manager, layout.screen, fsr_input_width, fsr_input_height, crop_rect); + } else { + if (fsr->AreBuffersInitialized()) { + fsr->ReleaseBuffers(); + } + } + const std::array ortho_matrix = MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height)); @@ -540,10 +595,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { case Settings::ScalingFilter::ScaleForce: return present_scaleforce_fragment.handle; case Settings::ScalingFilter::Fsr: - LOG_WARNING( - Render_OpenGL, - "FidelityFX Super Resolution is not supported in OpenGL, changing to ScaleForce"); - return present_scaleforce_fragment.handle; + return fsr->GetPresentFragmentProgram().handle; default: return present_bilinear_fragment.handle; } @@ -578,15 +630,18 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { f32 scale_u = static_cast<f32>(framebuffer_width) / static_cast<f32>(screen_info.texture.width); f32 scale_v = static_cast<f32>(framebuffer_height) / static_cast<f32>(screen_info.texture.height); - // Scale the output by the crop width/height. This is commonly used with 1280x720 rendering - // (e.g. handheld mode) on a 1920x1080 framebuffer. - if (framebuffer_crop_rect.GetWidth() > 0) { - scale_u = static_cast<f32>(framebuffer_crop_rect.GetWidth()) / - static_cast<f32>(screen_info.texture.width); - } - if (framebuffer_crop_rect.GetHeight() > 0) { - scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) / - static_cast<f32>(screen_info.texture.height); + + if (Settings::values.scaling_filter.GetValue() != Settings::ScalingFilter::Fsr) { + // Scale the output by the crop width/height. This is commonly used with 1280x720 rendering + // (e.g. handheld mode) on a 1920x1080 framebuffer. + if (framebuffer_crop_rect.GetWidth() > 0) { + scale_u = static_cast<f32>(framebuffer_crop_rect.GetWidth()) / + static_cast<f32>(screen_info.texture.width); + } + if (framebuffer_crop_rect.GetHeight() > 0) { + scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) / + static_cast<f32>(screen_info.texture.height); + } } if (Settings::values.anti_aliasing.GetValue() == Settings::AntiAliasing::Fxaa && !screen_info.was_accelerated) { @@ -612,7 +667,6 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { } else { glDisable(GL_FRAMEBUFFER_SRGB); } - glDisablei(GL_SCISSOR_TEST, 0); glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(layout.width), static_cast<GLfloat>(layout.height)); diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index cc97d7b26..f1d5fd954 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -10,6 +10,7 @@ #include "video_core/renderer_base.h" #include "video_core/renderer_opengl/gl_device.h" +#include "video_core/renderer_opengl/gl_fsr.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_manager.h" @@ -141,6 +142,8 @@ private: OGLTexture smaa_edges_tex; OGLTexture smaa_blend_tex; + std::unique_ptr<FSR> fsr; + /// OpenGL framebuffer data std::vector<u8> gl_framebuffer_data; diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index dd00d3edf..cf2964a3f 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -12,6 +12,8 @@ #include "video_core/host_shaders/convert_s8d24_to_abgr8_frag_spv.h" #include "video_core/host_shaders/full_screen_triangle_vert_spv.h" #include "video_core/host_shaders/vulkan_blit_depth_stencil_frag_spv.h" +#include "video_core/host_shaders/vulkan_color_clear_frag_spv.h" +#include "video_core/host_shaders/vulkan_color_clear_vert_spv.h" #include "video_core/renderer_vulkan/blit_image.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -69,10 +71,11 @@ constexpr VkDescriptorSetLayoutCreateInfo TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CRE .bindingCount = static_cast<u32>(TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_BINDINGS.size()), .pBindings = TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_BINDINGS.data(), }; -constexpr VkPushConstantRange PUSH_CONSTANT_RANGE{ - .stageFlags = VK_SHADER_STAGE_VERTEX_BIT, +template <VkShaderStageFlags stageFlags, size_t size> +inline constexpr VkPushConstantRange PUSH_CONSTANT_RANGE{ + .stageFlags = stageFlags, .offset = 0, - .size = sizeof(PushConstants), + .size = static_cast<u32>(size), }; constexpr VkPipelineVertexInputStateCreateInfo PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO{ .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, @@ -125,10 +128,8 @@ constexpr VkPipelineMultisampleStateCreateInfo PIPELINE_MULTISAMPLE_STATE_CREATE .alphaToCoverageEnable = VK_FALSE, .alphaToOneEnable = VK_FALSE, }; -constexpr std::array DYNAMIC_STATES{ - VK_DYNAMIC_STATE_VIEWPORT, - VK_DYNAMIC_STATE_SCISSOR, -}; +constexpr std::array DYNAMIC_STATES{VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, + VK_DYNAMIC_STATE_BLEND_CONSTANTS}; constexpr VkPipelineDynamicStateCreateInfo PIPELINE_DYNAMIC_STATE_CREATE_INFO{ .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, .pNext = nullptr, @@ -205,15 +206,15 @@ inline constexpr VkSamplerCreateInfo SAMPLER_CREATE_INFO{ }; constexpr VkPipelineLayoutCreateInfo PipelineLayoutCreateInfo( - const VkDescriptorSetLayout* set_layout) { + const VkDescriptorSetLayout* set_layout, vk::Span<VkPushConstantRange> push_constants) { return VkPipelineLayoutCreateInfo{ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, .pNext = nullptr, .flags = 0, - .setLayoutCount = 1, + .setLayoutCount = (set_layout != nullptr ? 1u : 0u), .pSetLayouts = set_layout, - .pushConstantRangeCount = 1, - .pPushConstantRanges = &PUSH_CONSTANT_RANGE, + .pushConstantRangeCount = push_constants.size(), + .pPushConstantRanges = push_constants.data(), }; } @@ -302,8 +303,7 @@ void UpdateTwoTexturesDescriptorSet(const Device& device, VkDescriptorSet descri device.GetLogical().UpdateDescriptorSets(write_descriptor_sets, nullptr); } -void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Region2D& dst_region, - const Region2D& src_region, const Extent3D& src_size = {1, 1, 1}) { +void BindBlitState(vk::CommandBuffer cmdbuf, const Region2D& dst_region) { const VkOffset2D offset{ .x = std::min(dst_region.start.x, dst_region.end.x), .y = std::min(dst_region.start.y, dst_region.end.y), @@ -325,6 +325,13 @@ void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Regi .offset = offset, .extent = extent, }; + cmdbuf.SetViewport(0, viewport); + cmdbuf.SetScissor(0, scissor); +} + +void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Region2D& dst_region, + const Region2D& src_region, const Extent3D& src_size = {1, 1, 1}) { + BindBlitState(cmdbuf, dst_region); const float scale_x = static_cast<float>(src_region.end.x - src_region.start.x) / static_cast<float>(src_size.width); const float scale_y = static_cast<float>(src_region.end.y - src_region.start.y) / @@ -335,8 +342,6 @@ void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Regi static_cast<float>(src_region.start.y) / static_cast<float>(src_size.height)}, }; - cmdbuf.SetViewport(0, viewport); - cmdbuf.SetScissor(0, scissor); cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants); } @@ -408,13 +413,20 @@ BlitImageHelper::BlitImageHelper(const Device& device_, Scheduler& scheduler_, descriptor_pool.Allocator(*one_texture_set_layout, TEXTURE_DESCRIPTOR_BANK_INFO<1>)}, two_textures_descriptor_allocator{ descriptor_pool.Allocator(*two_textures_set_layout, TEXTURE_DESCRIPTOR_BANK_INFO<2>)}, - one_texture_pipeline_layout(device.GetLogical().CreatePipelineLayout( - PipelineLayoutCreateInfo(one_texture_set_layout.address()))), - two_textures_pipeline_layout(device.GetLogical().CreatePipelineLayout( - PipelineLayoutCreateInfo(two_textures_set_layout.address()))), + one_texture_pipeline_layout(device.GetLogical().CreatePipelineLayout(PipelineLayoutCreateInfo( + one_texture_set_layout.address(), + PUSH_CONSTANT_RANGE<VK_SHADER_STAGE_VERTEX_BIT, sizeof(PushConstants)>))), + two_textures_pipeline_layout( + device.GetLogical().CreatePipelineLayout(PipelineLayoutCreateInfo( + two_textures_set_layout.address(), + PUSH_CONSTANT_RANGE<VK_SHADER_STAGE_VERTEX_BIT, sizeof(PushConstants)>))), + clear_color_pipeline_layout(device.GetLogical().CreatePipelineLayout(PipelineLayoutCreateInfo( + nullptr, PUSH_CONSTANT_RANGE<VK_SHADER_STAGE_FRAGMENT_BIT, sizeof(float) * 4>))), full_screen_vert(BuildShader(device, FULL_SCREEN_TRIANGLE_VERT_SPV)), blit_color_to_color_frag(BuildShader(device, BLIT_COLOR_FLOAT_FRAG_SPV)), blit_depth_stencil_frag(BuildShader(device, VULKAN_BLIT_DEPTH_STENCIL_FRAG_SPV)), + clear_color_vert(BuildShader(device, VULKAN_COLOR_CLEAR_VERT_SPV)), + clear_color_frag(BuildShader(device, VULKAN_COLOR_CLEAR_FRAG_SPV)), convert_depth_to_float_frag(BuildShader(device, CONVERT_DEPTH_TO_FLOAT_FRAG_SPV)), convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)), convert_abgr8_to_d24s8_frag(BuildShader(device, CONVERT_ABGR8_TO_D24S8_FRAG_SPV)), @@ -553,6 +565,30 @@ void BlitImageHelper::ConvertS8D24ToABGR8(const Framebuffer* dst_framebuffer, ConvertDepthStencil(*convert_s8d24_to_abgr8_pipeline, dst_framebuffer, src_image_view); } +void BlitImageHelper::ClearColor(const Framebuffer* dst_framebuffer, u8 color_mask, + const std::array<f32, 4>& clear_color, + const Region2D& dst_region) { + const BlitImagePipelineKey key{ + .renderpass = dst_framebuffer->RenderPass(), + .operation = Tegra::Engines::Fermi2D::Operation::BlendPremult, + }; + const VkPipeline pipeline = FindOrEmplaceClearColorPipeline(key); + const VkPipelineLayout layout = *clear_color_pipeline_layout; + scheduler.RequestRenderpass(dst_framebuffer); + scheduler.Record( + [pipeline, layout, color_mask, clear_color, dst_region](vk::CommandBuffer cmdbuf) { + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + const std::array blend_color = { + (color_mask & 0x1) ? 1.0f : 0.0f, (color_mask & 0x2) ? 1.0f : 0.0f, + (color_mask & 0x4) ? 1.0f : 0.0f, (color_mask & 0x8) ? 1.0f : 0.0f}; + cmdbuf.SetBlendConstants(blend_color.data()); + BindBlitState(cmdbuf, dst_region); + cmdbuf.PushConstants(layout, VK_SHADER_STAGE_FRAGMENT_BIT, clear_color); + cmdbuf.Draw(3, 1, 0, 0); + }); + scheduler.InvalidateState(); +} + void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, const ImageView& src_image_view) { const VkPipelineLayout layout = *one_texture_pipeline_layout; @@ -728,6 +764,58 @@ VkPipeline BlitImageHelper::FindOrEmplaceDepthStencilPipeline(const BlitImagePip return *blit_depth_stencil_pipelines.back(); } +VkPipeline BlitImageHelper::FindOrEmplaceClearColorPipeline(const BlitImagePipelineKey& key) { + const auto it = std::ranges::find(clear_color_keys, key); + if (it != clear_color_keys.end()) { + return *clear_color_pipelines[std::distance(clear_color_keys.begin(), it)]; + } + clear_color_keys.push_back(key); + const std::array stages = MakeStages(*clear_color_vert, *clear_color_frag); + const VkPipelineColorBlendAttachmentState color_blend_attachment_state{ + .blendEnable = VK_TRUE, + .srcColorBlendFactor = VK_BLEND_FACTOR_CONSTANT_COLOR, + .dstColorBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR, + .colorBlendOp = VK_BLEND_OP_ADD, + .srcAlphaBlendFactor = VK_BLEND_FACTOR_CONSTANT_ALPHA, + .dstAlphaBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA, + .alphaBlendOp = VK_BLEND_OP_ADD, + .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT, + }; + const VkPipelineColorBlendStateCreateInfo color_blend_state_generic_create_info{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .logicOpEnable = VK_FALSE, + .logicOp = VK_LOGIC_OP_CLEAR, + .attachmentCount = 1, + .pAttachments = &color_blend_attachment_state, + .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f}, + }; + clear_color_pipelines.push_back(device.GetLogical().CreateGraphicsPipeline({ + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stageCount = static_cast<u32>(stages.size()), + .pStages = stages.data(), + .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .pTessellationState = nullptr, + .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, + .pColorBlendState = &color_blend_state_generic_create_info, + .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .layout = *clear_color_pipeline_layout, + .renderPass = key.renderpass, + .subpass = 0, + .basePipelineHandle = VK_NULL_HANDLE, + .basePipelineIndex = 0, + })); + return *clear_color_pipelines.back(); +} + void BlitImageHelper::ConvertPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass, bool is_target_depth) { if (pipeline) { diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h index be8a9a2f6..2976a7d91 100644 --- a/src/video_core/renderer_vulkan/blit_image.h +++ b/src/video_core/renderer_vulkan/blit_image.h @@ -61,6 +61,9 @@ public: void ConvertS8D24ToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view); + void ClearColor(const Framebuffer* dst_framebuffer, u8 color_mask, + const std::array<f32, 4>& clear_color, const Region2D& dst_region); + private: void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, const ImageView& src_image_view); @@ -72,6 +75,8 @@ private: [[nodiscard]] VkPipeline FindOrEmplaceDepthStencilPipeline(const BlitImagePipelineKey& key); + [[nodiscard]] VkPipeline FindOrEmplaceClearColorPipeline(const BlitImagePipelineKey& key); + void ConvertPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass, bool is_target_depth); void ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass); @@ -97,9 +102,12 @@ private: DescriptorAllocator two_textures_descriptor_allocator; vk::PipelineLayout one_texture_pipeline_layout; vk::PipelineLayout two_textures_pipeline_layout; + vk::PipelineLayout clear_color_pipeline_layout; vk::ShaderModule full_screen_vert; vk::ShaderModule blit_color_to_color_frag; vk::ShaderModule blit_depth_stencil_frag; + vk::ShaderModule clear_color_vert; + vk::ShaderModule clear_color_frag; vk::ShaderModule convert_depth_to_float_frag; vk::ShaderModule convert_float_to_depth_frag; vk::ShaderModule convert_abgr8_to_d24s8_frag; @@ -112,6 +120,8 @@ private: std::vector<vk::Pipeline> blit_color_pipelines; std::vector<BlitImagePipelineKey> blit_depth_stencil_keys; std::vector<vk::Pipeline> blit_depth_stencil_pipelines; + std::vector<BlitImagePipelineKey> clear_color_keys; + std::vector<vk::Pipeline> clear_color_pipelines; vk::Pipeline convert_d32_to_r32_pipeline; vk::Pipeline convert_r32_to_d32_pipeline; vk::Pipeline convert_d16_to_r16_pipeline; diff --git a/src/video_core/renderer_vulkan/vk_fsr.cpp b/src/video_core/renderer_vulkan/vk_fsr.cpp index 33daa8c1c..df972cd54 100644 --- a/src/video_core/renderer_vulkan/vk_fsr.cpp +++ b/src/video_core/renderer_vulkan/vk_fsr.cpp @@ -1,12 +1,11 @@ // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#include <cmath> -#include "common/bit_cast.h" #include "common/common_types.h" #include "common/div_ceil.h" #include "common/settings.h" +#include "video_core/fsr.h" #include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16_comp_spv.h" #include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32_comp_spv.h" #include "video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16_comp_spv.h" @@ -17,146 +16,7 @@ #include "video_core/vulkan_common/vulkan_device.h" namespace Vulkan { -namespace { -// Reimplementations of the constant generating functions in ffx_fsr1.h -// GCC generated a lot of warnings when using the official header. -u32 AU1_AH1_AF1(f32 f) { - static constexpr u32 base[512]{ - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, - 0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x0c00, 0x1000, 0x1400, 0x1800, 0x1c00, 0x2000, - 0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00, 0x4000, 0x4400, 0x4800, 0x4c00, - 0x5000, 0x5400, 0x5800, 0x5c00, 0x6000, 0x6400, 0x6800, 0x6c00, 0x7000, 0x7400, 0x7800, - 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, - 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, - 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, - 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, - 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, - 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, - 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, - 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, - 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, - 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, - 0x7bff, 0x7bff, 0x7bff, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001, 0x8002, 0x8004, 0x8008, - 0x8010, 0x8020, 0x8040, 0x8080, 0x8100, 0x8200, 0x8400, 0x8800, 0x8c00, 0x9000, 0x9400, - 0x9800, 0x9c00, 0xa000, 0xa400, 0xa800, 0xac00, 0xb000, 0xb400, 0xb800, 0xbc00, 0xc000, - 0xc400, 0xc800, 0xcc00, 0xd000, 0xd400, 0xd800, 0xdc00, 0xe000, 0xe400, 0xe800, 0xec00, - 0xf000, 0xf400, 0xf800, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, - 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, - 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, - 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, - 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, - 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, - 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, - 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, - 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, - 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, - 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, - }; - static constexpr s8 shift[512]{ - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17, 0x16, - 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, - 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, - 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17, - 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, - 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, - 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, - }; - const u32 u = Common::BitCast<u32>(f); - const u32 i = u >> 23; - return base[i] + ((u & 0x7fffff) >> shift[i]); -} - -u32 AU1_AH2_AF2(f32 a[2]) { - return AU1_AH1_AF1(a[0]) + (AU1_AH1_AF1(a[1]) << 16); -} - -void FsrEasuCon(u32 con0[4], u32 con1[4], u32 con2[4], u32 con3[4], f32 inputViewportInPixelsX, - f32 inputViewportInPixelsY, f32 inputSizeInPixelsX, f32 inputSizeInPixelsY, - f32 outputSizeInPixelsX, f32 outputSizeInPixelsY) { - con0[0] = Common::BitCast<u32>(inputViewportInPixelsX / outputSizeInPixelsX); - con0[1] = Common::BitCast<u32>(inputViewportInPixelsY / outputSizeInPixelsY); - con0[2] = Common::BitCast<u32>(0.5f * inputViewportInPixelsX / outputSizeInPixelsX - 0.5f); - con0[3] = Common::BitCast<u32>(0.5f * inputViewportInPixelsY / outputSizeInPixelsY - 0.5f); - con1[0] = Common::BitCast<u32>(1.0f / inputSizeInPixelsX); - con1[1] = Common::BitCast<u32>(1.0f / inputSizeInPixelsY); - con1[2] = Common::BitCast<u32>(1.0f / inputSizeInPixelsX); - con1[3] = Common::BitCast<u32>(-1.0f / inputSizeInPixelsY); - con2[0] = Common::BitCast<u32>(-1.0f / inputSizeInPixelsX); - con2[1] = Common::BitCast<u32>(2.0f / inputSizeInPixelsY); - con2[2] = Common::BitCast<u32>(1.0f / inputSizeInPixelsX); - con2[3] = Common::BitCast<u32>(2.0f / inputSizeInPixelsY); - con3[0] = Common::BitCast<u32>(0.0f / inputSizeInPixelsX); - con3[1] = Common::BitCast<u32>(4.0f / inputSizeInPixelsY); - con3[2] = con3[3] = 0; -} - -void FsrEasuConOffset(u32 con0[4], u32 con1[4], u32 con2[4], u32 con3[4], - f32 inputViewportInPixelsX, f32 inputViewportInPixelsY, - f32 inputSizeInPixelsX, f32 inputSizeInPixelsY, f32 outputSizeInPixelsX, - f32 outputSizeInPixelsY, f32 inputOffsetInPixelsX, f32 inputOffsetInPixelsY) { - FsrEasuCon(con0, con1, con2, con3, inputViewportInPixelsX, inputViewportInPixelsY, - inputSizeInPixelsX, inputSizeInPixelsY, outputSizeInPixelsX, outputSizeInPixelsY); - con0[2] = Common::BitCast<u32>(0.5f * inputViewportInPixelsX / outputSizeInPixelsX - 0.5f + - inputOffsetInPixelsX); - con0[3] = Common::BitCast<u32>(0.5f * inputViewportInPixelsY / outputSizeInPixelsY - 0.5f + - inputOffsetInPixelsY); -} - -void FsrRcasCon(u32* con, f32 sharpness) { - sharpness = std::exp2f(-sharpness); - f32 hSharp[2]{sharpness, sharpness}; - con[0] = Common::BitCast<u32>(sharpness); - con[1] = AU1_AH2_AF2(hSharp); - con[2] = 0; - con[3] = 0; -} -} // Anonymous namespace +using namespace FSR; FSR::FSR(const Device& device_, MemoryAllocator& memory_allocator_, size_t image_count_, VkExtent2D output_size_) diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 86ef0daeb..719edbcfb 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -394,7 +394,15 @@ void RasterizerVulkan::Clear(u32 layer_count) { cmdbuf.ClearAttachments(attachment, clear_rect); }); } else { - UNIMPLEMENTED_MSG("Unimplemented Clear only the specified channel"); + u8 color_mask = static_cast<u8>(regs.clear_surface.R | regs.clear_surface.G << 1 | + regs.clear_surface.B << 2 | regs.clear_surface.A << 3); + Region2D dst_region = { + Offset2D{.x = clear_rect.rect.offset.x, .y = clear_rect.rect.offset.y}, + Offset2D{.x = clear_rect.rect.offset.x + + static_cast<s32>(clear_rect.rect.extent.width), + .y = clear_rect.rect.offset.y + + static_cast<s32>(clear_rect.rect.extent.height)}}; + blit_image.ClearColor(framebuffer, color_mask, regs.clear_color, dst_region); } } diff --git a/src/video_core/texture_cache/descriptor_table.h b/src/video_core/texture_cache/descriptor_table.h index ee4240288..1bad83fb4 100644 --- a/src/video_core/texture_cache/descriptor_table.h +++ b/src/video_core/texture_cache/descriptor_table.h @@ -19,9 +19,7 @@ public: explicit DescriptorTable(Tegra::MemoryManager& gpu_memory_) : gpu_memory{gpu_memory_} {} [[nodiscard]] bool Synchronize(GPUVAddr gpu_addr, u32 limit) { - [[likely]] if (current_gpu_addr == gpu_addr && current_limit == limit) { - return false; - } + [[likely]] if (current_gpu_addr == gpu_addr && current_limit == limit) { return false; } Refresh(gpu_addr, limit); return true; } diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp index 852ec2519..e9100091e 100644 --- a/src/video_core/texture_cache/image_info.cpp +++ b/src/video_core/texture_cache/image_info.cpp @@ -100,6 +100,10 @@ ImageInfo::ImageInfo(const TICEntry& config) noexcept { ASSERT_MSG(false, "Invalid texture_type={}", static_cast<int>(config.texture_type.Value())); break; } + if (num_samples > 1) { + size.width *= NumSamplesX(config.msaa_mode); + size.height *= NumSamplesY(config.msaa_mode); + } if (type != ImageType::Linear) { // FIXME: Call this without passing *this layer_stride = CalculateLayerStride(*this); diff --git a/src/video_core/texture_cache/samples_helper.h b/src/video_core/texture_cache/samples_helper.h index d552bccf0..203ac1b11 100644 --- a/src/video_core/texture_cache/samples_helper.h +++ b/src/video_core/texture_cache/samples_helper.h @@ -51,4 +51,48 @@ namespace VideoCommon { return 1; } +[[nodiscard]] inline int NumSamplesX(Tegra::Texture::MsaaMode msaa_mode) { + using Tegra::Texture::MsaaMode; + switch (msaa_mode) { + case MsaaMode::Msaa1x1: + return 1; + case MsaaMode::Msaa2x1: + case MsaaMode::Msaa2x1_D3D: + case MsaaMode::Msaa2x2: + case MsaaMode::Msaa2x2_VC4: + case MsaaMode::Msaa2x2_VC12: + return 2; + case MsaaMode::Msaa4x2: + case MsaaMode::Msaa4x2_D3D: + case MsaaMode::Msaa4x2_VC8: + case MsaaMode::Msaa4x2_VC24: + case MsaaMode::Msaa4x4: + return 4; + } + ASSERT_MSG(false, "Invalid MSAA mode={}", static_cast<int>(msaa_mode)); + return 1; +} + +[[nodiscard]] inline int NumSamplesY(Tegra::Texture::MsaaMode msaa_mode) { + using Tegra::Texture::MsaaMode; + switch (msaa_mode) { + case MsaaMode::Msaa1x1: + case MsaaMode::Msaa2x1: + case MsaaMode::Msaa2x1_D3D: + return 1; + case MsaaMode::Msaa2x2: + case MsaaMode::Msaa2x2_VC4: + case MsaaMode::Msaa2x2_VC12: + case MsaaMode::Msaa4x2: + case MsaaMode::Msaa4x2_D3D: + case MsaaMode::Msaa4x2_VC8: + case MsaaMode::Msaa4x2_VC24: + return 2; + case MsaaMode::Msaa4x4: + return 4; + } + ASSERT_MSG(false, "Invalid MSAA mode={}", static_cast<int>(msaa_mode)); + return 1; +} + } // namespace VideoCommon diff --git a/src/video_core/texture_cache/slot_vector.h b/src/video_core/texture_cache/slot_vector.h index 1e2aad76a..9df6a2903 100644 --- a/src/video_core/texture_cache/slot_vector.h +++ b/src/video_core/texture_cache/slot_vector.h @@ -29,7 +29,7 @@ struct SlotId { }; template <class T> -requires std::is_nothrow_move_assignable_v<T> && std::is_nothrow_move_constructible_v<T> + requires std::is_nothrow_move_assignable_v<T> && std::is_nothrow_move_constructible_v<T> class SlotVector { public: class Iterator { |