summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/video_core/CMakeLists.txt6
-rw-r--r--src/video_core/engines/draw_manager.cpp31
-rw-r--r--src/video_core/engines/draw_manager.h20
-rw-r--r--src/video_core/engines/maxwell_3d.cpp1
-rw-r--r--src/video_core/engines/maxwell_3d.h16
-rw-r--r--src/video_core/fsr.cpp148
-rw-r--r--src/video_core/fsr.h19
-rw-r--r--src/video_core/host_shaders/CMakeLists.txt34
-rw-r--r--src/video_core/host_shaders/blit_color_float.frag (renamed from src/video_core/host_shaders/vulkan_blit_color_float.frag)0
-rw-r--r--src/video_core/host_shaders/full_screen_triangle.vert13
-rw-r--r--src/video_core/host_shaders/opengl_fidelityfx_fsr.frag108
-rw-r--r--src/video_core/host_shaders/opengl_fidelityfx_fsr_easu.frag9
-rw-r--r--src/video_core/host_shaders/opengl_fidelityfx_fsr_rcas.frag9
-rw-r--r--src/video_core/host_shaders/vulkan_color_clear.frag14
-rw-r--r--src/video_core/host_shaders/vulkan_color_clear.vert10
-rw-r--r--src/video_core/memory_manager.cpp40
-rw-r--r--src/video_core/memory_manager.h3
-rw-r--r--src/video_core/rasterizer_interface.h3
-rw-r--r--src/video_core/renderer_null/null_rasterizer.cpp1
-rw-r--r--src/video_core/renderer_null/null_rasterizer.h1
-rw-r--r--src/video_core/renderer_opengl/blit_image.cpp59
-rw-r--r--src/video_core/renderer_opengl/blit_image.h38
-rw-r--r--src/video_core/renderer_opengl/gl_compute_pipeline.cpp23
-rw-r--r--src/video_core/renderer_opengl/gl_compute_pipeline.h10
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_device.h7
-rw-r--r--src/video_core/renderer_opengl/gl_fsr.cpp101
-rw-r--r--src/video_core/renderer_opengl/gl_fsr.h43
-rw-r--r--src/video_core/renderer_opengl/gl_graphics_pipeline.cpp7
-rw-r--r--src/video_core/renderer_opengl/gl_graphics_pipeline.h2
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp44
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h4
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp17
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h6
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.cpp121
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.h129
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp90
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h3
-rw-r--r--src/video_core/renderer_vulkan/blit_image.cpp212
-rw-r--r--src/video_core/renderer_vulkan/blit_image.h16
-rw-r--r--src/video_core/renderer_vulkan/vk_fsr.cpp144
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp39
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h1
-rw-r--r--src/video_core/texture_cache/descriptor_table.h4
-rw-r--r--src/video_core/texture_cache/image_info.cpp4
-rw-r--r--src/video_core/texture_cache/samples_helper.h44
-rw-r--r--src/video_core/texture_cache/slot_vector.h2
-rw-r--r--src/video_core/texture_cache/texture_cache.h7
-rw-r--r--src/video_core/texture_cache/texture_cache_base.h3
-rw-r--r--src/video_core/vulkan_common/nsight_aftermath_tracker.cpp8
50 files changed, 1312 insertions, 363 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index f617665de..4742bcbe9 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -52,6 +52,8 @@ add_library(video_core STATIC
engines/puller.cpp
engines/puller.h
framebuffer_config.h
+ fsr.cpp
+ fsr.h
host1x/codecs/codec.cpp
host1x/codecs/codec.h
host1x/codecs/h264.cpp
@@ -100,6 +102,8 @@ add_library(video_core STATIC
renderer_null/null_rasterizer.h
renderer_null/renderer_null.cpp
renderer_null/renderer_null.h
+ renderer_opengl/blit_image.cpp
+ renderer_opengl/blit_image.h
renderer_opengl/gl_buffer_cache.cpp
renderer_opengl/gl_buffer_cache.h
renderer_opengl/gl_compute_pipeline.cpp
@@ -108,6 +112,8 @@ add_library(video_core STATIC
renderer_opengl/gl_device.h
renderer_opengl/gl_fence_manager.cpp
renderer_opengl/gl_fence_manager.h
+ renderer_opengl/gl_fsr.cpp
+ renderer_opengl/gl_fsr.h
renderer_opengl/gl_graphics_pipeline.cpp
renderer_opengl/gl_graphics_pipeline.h
renderer_opengl/gl_rasterizer.cpp
diff --git a/src/video_core/engines/draw_manager.cpp b/src/video_core/engines/draw_manager.cpp
index 2437121ce..1d22d25f1 100644
--- a/src/video_core/engines/draw_manager.cpp
+++ b/src/video_core/engines/draw_manager.cpp
@@ -51,6 +51,10 @@ void DrawManager::ProcessMethodCall(u32 method, u32 argument) {
LOG_WARNING(HW_GPU, "(STUBBED) called");
break;
}
+ case MAXWELL3D_REG_INDEX(draw_texture.src_y0): {
+ DrawTexture();
+ break;
+ }
default:
break;
}
@@ -179,6 +183,33 @@ void DrawManager::DrawIndexSmall(u32 argument) {
ProcessDraw(true, 1);
}
+void DrawManager::DrawTexture() {
+ const auto& regs{maxwell3d->regs};
+ draw_texture_state.dst_x0 = static_cast<float>(regs.draw_texture.dst_x0) / 4096.f;
+ draw_texture_state.dst_y0 = static_cast<float>(regs.draw_texture.dst_y0) / 4096.f;
+ const auto dst_width = static_cast<float>(regs.draw_texture.dst_width) / 4096.f;
+ const auto dst_height = static_cast<float>(regs.draw_texture.dst_height) / 4096.f;
+ const bool lower_left{regs.window_origin.mode !=
+ Maxwell3D::Regs::WindowOrigin::Mode::UpperLeft};
+ if (lower_left) {
+ draw_texture_state.dst_y0 -= dst_height;
+ }
+ draw_texture_state.dst_x1 = draw_texture_state.dst_x0 + dst_width;
+ draw_texture_state.dst_y1 = draw_texture_state.dst_y0 + dst_height;
+ draw_texture_state.src_x0 = static_cast<float>(regs.draw_texture.src_x0) / 4096.f;
+ draw_texture_state.src_y0 = static_cast<float>(regs.draw_texture.src_y0) / 4096.f;
+ draw_texture_state.src_x1 =
+ (static_cast<float>(regs.draw_texture.dx_du) / 4294967296.f) * dst_width +
+ draw_texture_state.src_x0;
+ draw_texture_state.src_y1 =
+ (static_cast<float>(regs.draw_texture.dy_dv) / 4294967296.f) * dst_height +
+ draw_texture_state.src_y0;
+ draw_texture_state.src_sampler = regs.draw_texture.src_sampler;
+ draw_texture_state.src_texture = regs.draw_texture.src_texture;
+
+ maxwell3d->rasterizer->DrawTexture();
+}
+
void DrawManager::UpdateTopology() {
const auto& regs{maxwell3d->regs};
switch (regs.primitive_topology_control) {
diff --git a/src/video_core/engines/draw_manager.h b/src/video_core/engines/draw_manager.h
index 58d1b2d59..7c22c49f1 100644
--- a/src/video_core/engines/draw_manager.h
+++ b/src/video_core/engines/draw_manager.h
@@ -32,6 +32,19 @@ public:
std::vector<u8> inline_index_draw_indexes;
};
+ struct DrawTextureState {
+ f32 dst_x0;
+ f32 dst_y0;
+ f32 dst_x1;
+ f32 dst_y1;
+ f32 src_x0;
+ f32 src_y0;
+ f32 src_x1;
+ f32 src_y1;
+ u32 src_sampler;
+ u32 src_texture;
+ };
+
struct IndirectParams {
bool is_indexed;
bool include_count;
@@ -64,6 +77,10 @@ public:
return draw_state;
}
+ const DrawTextureState& GetDrawTextureState() const {
+ return draw_texture_state;
+ }
+
IndirectParams& GetIndirectParams() {
return indirect_state;
}
@@ -81,6 +98,8 @@ private:
void DrawIndexSmall(u32 argument);
+ void DrawTexture();
+
void UpdateTopology();
void ProcessDraw(bool draw_indexed, u32 instance_count);
@@ -89,6 +108,7 @@ private:
Maxwell3D* maxwell3d{};
State draw_state{};
+ DrawTextureState draw_texture_state{};
IndirectParams indirect_state{};
};
} // namespace Tegra::Engines
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 68ceda519..ae9da6290 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -149,6 +149,7 @@ bool Maxwell3D::IsMethodExecutable(u32 method) {
case MAXWELL3D_REG_INDEX(inline_index_4x8.index0):
case MAXWELL3D_REG_INDEX(vertex_array_instance_first):
case MAXWELL3D_REG_INDEX(vertex_array_instance_subsequent):
+ case MAXWELL3D_REG_INDEX(draw_texture.src_y0):
case MAXWELL3D_REG_INDEX(wait_for_idle):
case MAXWELL3D_REG_INDEX(shadow_ram_control):
case MAXWELL3D_REG_INDEX(load_mme.instruction_ptr):
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 0b2fd2928..c89969bb4 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -1599,6 +1599,20 @@ public:
};
static_assert(sizeof(TIRModulationCoeff) == 0x4);
+ struct DrawTexture {
+ s32 dst_x0;
+ s32 dst_y0;
+ s32 dst_width;
+ s32 dst_height;
+ s64 dx_du;
+ s64 dy_dv;
+ u32 src_sampler;
+ u32 src_texture;
+ s32 src_x0;
+ s32 src_y0;
+ };
+ static_assert(sizeof(DrawTexture) == 0x30);
+
struct ReduceColorThreshold {
union {
BitField<0, 8, u32> all_hit_once;
@@ -2751,7 +2765,7 @@ public:
u32 reserved_sw_method2; ///< 0x102C
std::array<TIRModulationCoeff, 5> tir_modulation_coeff; ///< 0x1030
std::array<u32, 15> spare_nop; ///< 0x1044
- INSERT_PADDING_BYTES_NOINIT(0x30);
+ DrawTexture draw_texture; ///< 0x1080
std::array<u32, 7> reserved_sw_method3_to_7; ///< 0x10B0
ReduceColorThreshold reduce_color_thresholds_unorm8; ///< 0x10CC
std::array<u32, 4> reserved_sw_method10_to_13; ///< 0x10D0
diff --git a/src/video_core/fsr.cpp b/src/video_core/fsr.cpp
new file mode 100644
index 000000000..5653c64fc
--- /dev/null
+++ b/src/video_core/fsr.cpp
@@ -0,0 +1,148 @@
+// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <cmath>
+#include "video_core/fsr.h"
+
+namespace FSR {
+namespace {
+// Reimplementations of the constant generating functions in ffx_fsr1.h
+// GCC generated a lot of warnings when using the official header.
+u32 AU1_AH1_AF1(f32 f) {
+ static constexpr u32 base[512]{
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040,
+ 0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x0c00, 0x1000, 0x1400, 0x1800, 0x1c00, 0x2000,
+ 0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00, 0x4000, 0x4400, 0x4800, 0x4c00,
+ 0x5000, 0x5400, 0x5800, 0x5c00, 0x6000, 0x6400, 0x6800, 0x6c00, 0x7000, 0x7400, 0x7800,
+ 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
+ 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
+ 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
+ 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
+ 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
+ 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
+ 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
+ 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
+ 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
+ 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
+ 0x7bff, 0x7bff, 0x7bff, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+ 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+ 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+ 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+ 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+ 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+ 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+ 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+ 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+ 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001, 0x8002, 0x8004, 0x8008,
+ 0x8010, 0x8020, 0x8040, 0x8080, 0x8100, 0x8200, 0x8400, 0x8800, 0x8c00, 0x9000, 0x9400,
+ 0x9800, 0x9c00, 0xa000, 0xa400, 0xa800, 0xac00, 0xb000, 0xb400, 0xb800, 0xbc00, 0xc000,
+ 0xc400, 0xc800, 0xcc00, 0xd000, 0xd400, 0xd800, 0xdc00, 0xe000, 0xe400, 0xe800, 0xec00,
+ 0xf000, 0xf400, 0xf800, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
+ 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
+ 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
+ 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
+ 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
+ 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
+ 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
+ 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
+ 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
+ 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
+ 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
+ };
+ static constexpr s8 shift[512]{
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17, 0x16,
+ 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
+ 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
+ 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17,
+ 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
+ 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
+ 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18,
+ };
+ const u32 u = Common::BitCast<u32>(f);
+ const u32 i = u >> 23;
+ return base[i] + ((u & 0x7fffff) >> shift[i]);
+}
+
+u32 AU1_AH2_AF2(f32 a[2]) {
+ return AU1_AH1_AF1(a[0]) + (AU1_AH1_AF1(a[1]) << 16);
+}
+
+void FsrEasuCon(u32 con0[4], u32 con1[4], u32 con2[4], u32 con3[4], f32 inputViewportInPixelsX,
+ f32 inputViewportInPixelsY, f32 inputSizeInPixelsX, f32 inputSizeInPixelsY,
+ f32 outputSizeInPixelsX, f32 outputSizeInPixelsY) {
+ con0[0] = Common::BitCast<u32>(inputViewportInPixelsX / outputSizeInPixelsX);
+ con0[1] = Common::BitCast<u32>(inputViewportInPixelsY / outputSizeInPixelsY);
+ con0[2] = Common::BitCast<u32>(0.5f * inputViewportInPixelsX / outputSizeInPixelsX - 0.5f);
+ con0[3] = Common::BitCast<u32>(0.5f * inputViewportInPixelsY / outputSizeInPixelsY - 0.5f);
+ con1[0] = Common::BitCast<u32>(1.0f / inputSizeInPixelsX);
+ con1[1] = Common::BitCast<u32>(1.0f / inputSizeInPixelsY);
+ con1[2] = Common::BitCast<u32>(1.0f / inputSizeInPixelsX);
+ con1[3] = Common::BitCast<u32>(-1.0f / inputSizeInPixelsY);
+ con2[0] = Common::BitCast<u32>(-1.0f / inputSizeInPixelsX);
+ con2[1] = Common::BitCast<u32>(2.0f / inputSizeInPixelsY);
+ con2[2] = Common::BitCast<u32>(1.0f / inputSizeInPixelsX);
+ con2[3] = Common::BitCast<u32>(2.0f / inputSizeInPixelsY);
+ con3[0] = Common::BitCast<u32>(0.0f / inputSizeInPixelsX);
+ con3[1] = Common::BitCast<u32>(4.0f / inputSizeInPixelsY);
+ con3[2] = con3[3] = 0;
+}
+} // Anonymous namespace
+
+void FsrEasuConOffset(u32 con0[4], u32 con1[4], u32 con2[4], u32 con3[4],
+ f32 inputViewportInPixelsX, f32 inputViewportInPixelsY,
+ f32 inputSizeInPixelsX, f32 inputSizeInPixelsY, f32 outputSizeInPixelsX,
+ f32 outputSizeInPixelsY, f32 inputOffsetInPixelsX, f32 inputOffsetInPixelsY) {
+ FsrEasuCon(con0, con1, con2, con3, inputViewportInPixelsX, inputViewportInPixelsY,
+ inputSizeInPixelsX, inputSizeInPixelsY, outputSizeInPixelsX, outputSizeInPixelsY);
+ con0[2] = Common::BitCast<u32>(0.5f * inputViewportInPixelsX / outputSizeInPixelsX - 0.5f +
+ inputOffsetInPixelsX);
+ con0[3] = Common::BitCast<u32>(0.5f * inputViewportInPixelsY / outputSizeInPixelsY - 0.5f +
+ inputOffsetInPixelsY);
+}
+
+void FsrRcasCon(u32* con, f32 sharpness) {
+ sharpness = std::exp2f(-sharpness);
+ f32 hSharp[2]{sharpness, sharpness};
+ con[0] = Common::BitCast<u32>(sharpness);
+ con[1] = AU1_AH2_AF2(hSharp);
+ con[2] = 0;
+ con[3] = 0;
+}
+} // namespace FSR
diff --git a/src/video_core/fsr.h b/src/video_core/fsr.h
new file mode 100644
index 000000000..db0d4ec6f
--- /dev/null
+++ b/src/video_core/fsr.h
@@ -0,0 +1,19 @@
+// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#pragma once
+
+#include "common/bit_cast.h"
+#include "common/common_types.h"
+
+namespace FSR {
+// Reimplementations of the constant generating functions in ffx_fsr1.h
+// GCC generated a lot of warnings when using the official header.
+void FsrEasuConOffset(u32 con0[4], u32 con1[4], u32 con2[4], u32 con3[4],
+ f32 inputViewportInPixelsX, f32 inputViewportInPixelsY,
+ f32 inputSizeInPixelsX, f32 inputSizeInPixelsY, f32 outputSizeInPixelsX,
+ f32 outputSizeInPixelsY, f32 inputOffsetInPixelsX, f32 inputOffsetInPixelsY);
+
+void FsrRcasCon(u32* con, f32 sharpness);
+
+} // namespace FSR
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt
index f275b2aa9..52cd5bb81 100644
--- a/src/video_core/host_shaders/CMakeLists.txt
+++ b/src/video_core/host_shaders/CMakeLists.txt
@@ -3,14 +3,19 @@
set(FIDELITYFX_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/externals/FidelityFX-FSR/ffx-fsr)
-set(GLSL_INCLUDES
- fidelityfx_fsr.comp
+set(FIDELITYFX_FILES
${FIDELITYFX_INCLUDE_DIR}/ffx_a.h
${FIDELITYFX_INCLUDE_DIR}/ffx_fsr1.h
)
+set(GLSL_INCLUDES
+ fidelityfx_fsr.comp
+ ${FIDELITYFX_FILES}
+)
+
set(SHADER_FILES
astc_decoder.comp
+ blit_color_float.frag
block_linear_unswizzle_2d.comp
block_linear_unswizzle_3d.comp
convert_abgr8_to_d24s8.frag
@@ -23,6 +28,9 @@ set(SHADER_FILES
fxaa.vert
opengl_convert_s8d24.comp
opengl_copy_bc4.comp
+ opengl_fidelityfx_fsr.frag
+ opengl_fidelityfx_fsr_easu.frag
+ opengl_fidelityfx_fsr_rcas.frag
opengl_present.frag
opengl_present.vert
opengl_present_scaleforce.frag
@@ -36,8 +44,9 @@ set(SHADER_FILES
smaa_blending_weight_calculation.frag
smaa_neighborhood_blending.vert
smaa_neighborhood_blending.frag
- vulkan_blit_color_float.frag
vulkan_blit_depth_stencil.frag
+ vulkan_color_clear.frag
+ vulkan_color_clear.vert
vulkan_fidelityfx_fsr_easu_fp16.comp
vulkan_fidelityfx_fsr_easu_fp32.comp
vulkan_fidelityfx_fsr_rcas_fp16.comp
@@ -118,6 +127,25 @@ foreach(FILENAME IN ITEMS ${SHADER_FILES})
endif()
endforeach()
+foreach(FILEPATH IN ITEMS ${FIDELITYFX_FILES})
+ get_filename_component(FILENAME ${FILEPATH} NAME)
+ string(REPLACE "." "_" HEADER_NAME ${FILENAME})
+ set(SOURCE_FILE ${FILEPATH})
+ set(SOURCE_HEADER_FILE ${SHADER_DIR}/${HEADER_NAME}.h)
+ add_custom_command(
+ OUTPUT
+ ${SOURCE_HEADER_FILE}
+ COMMAND
+ ${CMAKE_COMMAND} -P ${HEADER_GENERATOR} ${SOURCE_FILE} ${SOURCE_HEADER_FILE} ${INPUT_FILE}
+ MAIN_DEPENDENCY
+ ${SOURCE_FILE}
+ DEPENDS
+ ${INPUT_FILE}
+ # HEADER_GENERATOR should be included here but msbuild seems to assume it's always modified
+ )
+ set(SHADER_HEADERS ${SHADER_HEADERS} ${SOURCE_HEADER_FILE})
+endforeach()
+
set(SHADER_SOURCES ${SHADER_FILES})
list(APPEND SHADER_SOURCES ${GLSL_INCLUDES})
diff --git a/src/video_core/host_shaders/vulkan_blit_color_float.frag b/src/video_core/host_shaders/blit_color_float.frag
index c0c832296..c0c832296 100644
--- a/src/video_core/host_shaders/vulkan_blit_color_float.frag
+++ b/src/video_core/host_shaders/blit_color_float.frag
diff --git a/src/video_core/host_shaders/full_screen_triangle.vert b/src/video_core/host_shaders/full_screen_triangle.vert
index 2c976b19f..d16d98995 100644
--- a/src/video_core/host_shaders/full_screen_triangle.vert
+++ b/src/video_core/host_shaders/full_screen_triangle.vert
@@ -4,13 +4,20 @@
#version 450
#ifdef VULKAN
+#define VERTEX_ID gl_VertexIndex
#define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants {
#define END_PUSH_CONSTANTS };
#define UNIFORM(n)
+#define FLIPY 1
#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
+#define VERTEX_ID gl_VertexID
#define BEGIN_PUSH_CONSTANTS
#define END_PUSH_CONSTANTS
+#define FLIPY -1
#define UNIFORM(n) layout (location = n) uniform
+out gl_PerVertex {
+ vec4 gl_Position;
+};
#endif
BEGIN_PUSH_CONSTANTS
@@ -21,8 +28,8 @@ END_PUSH_CONSTANTS
layout(location = 0) out vec2 texcoord;
void main() {
- float x = float((gl_VertexIndex & 1) << 2);
- float y = float((gl_VertexIndex & 2) << 1);
- gl_Position = vec4(x - 1.0, y - 1.0, 0.0, 1.0);
+ float x = float((VERTEX_ID & 1) << 2);
+ float y = float((VERTEX_ID & 2) << 1);
+ gl_Position = vec4(x - 1.0, FLIPY * (y - 1.0), 0.0, 1.0);
texcoord = fma(vec2(x, y) / 2.0, tex_scale, tex_offset);
}
diff --git a/src/video_core/host_shaders/opengl_fidelityfx_fsr.frag b/src/video_core/host_shaders/opengl_fidelityfx_fsr.frag
new file mode 100644
index 000000000..16d22f58e
--- /dev/null
+++ b/src/video_core/host_shaders/opengl_fidelityfx_fsr.frag
@@ -0,0 +1,108 @@
+// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+//!#version 460 core
+#extension GL_ARB_separate_shader_objects : enable
+#extension GL_ARB_shading_language_420pack : enable
+
+#extension GL_AMD_gpu_shader_half_float : enable
+#extension GL_NV_gpu_shader5 : enable
+
+// FidelityFX Super Resolution Sample
+//
+// Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+layout (location = 0) uniform uvec4 constants[4];
+
+#define A_GPU 1
+#define A_GLSL 1
+
+#ifdef YUZU_USE_FP16
+ #define A_HALF
+#endif
+#include "ffx_a.h"
+
+#ifndef YUZU_USE_FP16
+ layout (binding=0) uniform sampler2D InputTexture;
+ #if USE_EASU
+ #define FSR_EASU_F 1
+ AF4 FsrEasuRF(AF2 p) { AF4 res = textureGather(InputTexture, p, 0); return res; }
+ AF4 FsrEasuGF(AF2 p) { AF4 res = textureGather(InputTexture, p, 1); return res; }
+ AF4 FsrEasuBF(AF2 p) { AF4 res = textureGather(InputTexture, p, 2); return res; }
+ #endif
+ #if USE_RCAS
+ #define FSR_RCAS_F
+ AF4 FsrRcasLoadF(ASU2 p) { return texelFetch(InputTexture, ASU2(p), 0); }
+ void FsrRcasInputF(inout AF1 r, inout AF1 g, inout AF1 b) {}
+ #endif
+#else
+ layout (binding=0) uniform sampler2D InputTexture;
+ #if USE_EASU
+ #define FSR_EASU_H 1
+ AH4 FsrEasuRH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 0)); return res; }
+ AH4 FsrEasuGH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 1)); return res; }
+ AH4 FsrEasuBH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 2)); return res; }
+ #endif
+ #if USE_RCAS
+ #define FSR_RCAS_H
+ AH4 FsrRcasLoadH(ASW2 p) { return AH4(texelFetch(InputTexture, ASU2(p), 0)); }
+ void FsrRcasInputH(inout AH1 r,inout AH1 g,inout AH1 b){}
+ #endif
+#endif
+
+#include "ffx_fsr1.h"
+
+#if USE_RCAS
+ layout(location = 0) in vec2 frag_texcoord;
+#endif
+layout (location = 0) out vec4 frag_color;
+
+void CurrFilter(AU2 pos)
+{
+#if USE_EASU
+ #ifndef YUZU_USE_FP16
+ AF3 c;
+ FsrEasuF(c, pos, constants[0], constants[1], constants[2], constants[3]);
+ frag_color = AF4(c, 1.0);
+ #else
+ AH3 c;
+ FsrEasuH(c, pos, constants[0], constants[1], constants[2], constants[3]);
+ frag_color = AH4(c, 1.0);
+ #endif
+#endif
+#if USE_RCAS
+ #ifndef YUZU_USE_FP16
+ AF3 c;
+ FsrRcasF(c.r, c.g, c.b, pos, constants[0]);
+ frag_color = AF4(c, 1.0);
+ #else
+ AH3 c;
+ FsrRcasH(c.r, c.g, c.b, pos, constants[0]);
+ frag_color = AH4(c, 1.0);
+ #endif
+#endif
+}
+
+void main()
+{
+#if USE_RCAS
+ CurrFilter(AU2(frag_texcoord * vec2(textureSize(InputTexture, 0))));
+#else
+ CurrFilter(AU2(gl_FragCoord.xy));
+#endif
+}
diff --git a/src/video_core/host_shaders/opengl_fidelityfx_fsr_easu.frag b/src/video_core/host_shaders/opengl_fidelityfx_fsr_easu.frag
new file mode 100644
index 000000000..d39f80ac1
--- /dev/null
+++ b/src/video_core/host_shaders/opengl_fidelityfx_fsr_easu.frag
@@ -0,0 +1,9 @@
+// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#version 460 core
+#extension GL_GOOGLE_include_directive : enable
+
+#define USE_EASU 1
+
+#include "opengl_fidelityfx_fsr.frag"
diff --git a/src/video_core/host_shaders/opengl_fidelityfx_fsr_rcas.frag b/src/video_core/host_shaders/opengl_fidelityfx_fsr_rcas.frag
new file mode 100644
index 000000000..cfa78ddc7
--- /dev/null
+++ b/src/video_core/host_shaders/opengl_fidelityfx_fsr_rcas.frag
@@ -0,0 +1,9 @@
+// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#version 460 core
+#extension GL_GOOGLE_include_directive : enable
+
+#define USE_RCAS 1
+
+#include "opengl_fidelityfx_fsr.frag"
diff --git a/src/video_core/host_shaders/vulkan_color_clear.frag b/src/video_core/host_shaders/vulkan_color_clear.frag
new file mode 100644
index 000000000..617bf01e1
--- /dev/null
+++ b/src/video_core/host_shaders/vulkan_color_clear.frag
@@ -0,0 +1,14 @@
+// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#version 460 core
+
+layout (push_constant) uniform PushConstants {
+ vec4 clear_color;
+};
+
+layout(location = 0) out vec4 color;
+
+void main() {
+ color = clear_color;
+}
diff --git a/src/video_core/host_shaders/vulkan_color_clear.vert b/src/video_core/host_shaders/vulkan_color_clear.vert
new file mode 100644
index 000000000..d85883141
--- /dev/null
+++ b/src/video_core/host_shaders/vulkan_color_clear.vert
@@ -0,0 +1,10 @@
+// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#version 460 core
+
+void main() {
+ float x = float((gl_VertexIndex & 1) << 2);
+ float y = float((gl_VertexIndex & 2) << 1);
+ gl_Position = vec4(x - 1.0, y - 1.0, 0.0, 1.0);
+}
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 3bcae3503..83924475b 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -6,7 +6,6 @@
#include "common/alignment.h"
#include "common/assert.h"
#include "common/logging/log.h"
-#include "common/settings.h"
#include "core/core.h"
#include "core/device_memory.h"
#include "core/hle/kernel/k_page_table.h"
@@ -46,11 +45,6 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64
big_page_table_cpu.resize(big_page_table_size);
big_page_continous.resize(big_page_table_size / continous_bits, 0);
entries.resize(page_table_size / 32, 0);
- if (!Settings::IsGPULevelExtreme() && Settings::IsFastmemEnabled()) {
- fastmem_arena = system.DeviceMemory().buffer.VirtualBasePointer();
- } else {
- fastmem_arena = nullptr;
- }
}
MemoryManager::~MemoryManager() = default;
@@ -360,7 +354,7 @@ inline void MemoryManager::MemoryOperation(GPUVAddr gpu_src_addr, std::size_t si
}
}
-template <bool is_safe, bool use_fastmem>
+template <bool is_safe>
void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size,
[[maybe_unused]] VideoCommon::CacheType which) const {
auto set_to_zero = [&]([[maybe_unused]] std::size_t page_index,
@@ -374,12 +368,8 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std:
if constexpr (is_safe) {
rasterizer->FlushRegion(cpu_addr_base, copy_amount, which);
}
- if constexpr (use_fastmem) {
- std::memcpy(dest_buffer, &fastmem_arena[cpu_addr_base], copy_amount);
- } else {
- u8* physical = memory.GetPointer(cpu_addr_base);
- std::memcpy(dest_buffer, physical, copy_amount);
- }
+ u8* physical = memory.GetPointer(cpu_addr_base);
+ std::memcpy(dest_buffer, physical, copy_amount);
dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
};
auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
@@ -388,15 +378,11 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std:
if constexpr (is_safe) {
rasterizer->FlushRegion(cpu_addr_base, copy_amount, which);
}
- if constexpr (use_fastmem) {
- std::memcpy(dest_buffer, &fastmem_arena[cpu_addr_base], copy_amount);
+ if (!IsBigPageContinous(page_index)) [[unlikely]] {
+ memory.ReadBlockUnsafe(cpu_addr_base, dest_buffer, copy_amount);
} else {
- if (!IsBigPageContinous(page_index)) [[unlikely]] {
- memory.ReadBlockUnsafe(cpu_addr_base, dest_buffer, copy_amount);
- } else {
- u8* physical = memory.GetPointer(cpu_addr_base);
- std::memcpy(dest_buffer, physical, copy_amount);
- }
+ u8* physical = memory.GetPointer(cpu_addr_base);
+ std::memcpy(dest_buffer, physical, copy_amount);
}
dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
};
@@ -410,20 +396,12 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std:
void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size,
VideoCommon::CacheType which) const {
- if (fastmem_arena) [[likely]] {
- ReadBlockImpl<true, true>(gpu_src_addr, dest_buffer, size, which);
- return;
- }
- ReadBlockImpl<true, false>(gpu_src_addr, dest_buffer, size, which);
+ ReadBlockImpl<true>(gpu_src_addr, dest_buffer, size, which);
}
void MemoryManager::ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer,
const std::size_t size) const {
- if (fastmem_arena) [[likely]] {
- ReadBlockImpl<false, true>(gpu_src_addr, dest_buffer, size, VideoCommon::CacheType::None);
- return;
- }
- ReadBlockImpl<false, false>(gpu_src_addr, dest_buffer, size, VideoCommon::CacheType::None);
+ ReadBlockImpl<false>(gpu_src_addr, dest_buffer, size, VideoCommon::CacheType::None);
}
template <bool is_safe>
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 2936364f0..9ebfb6179 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -141,7 +141,7 @@ private:
inline void MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size, FuncMapped&& func_mapped,
FuncReserved&& func_reserved, FuncUnmapped&& func_unmapped) const;
- template <bool is_safe, bool use_fastmem>
+ template <bool is_safe>
void ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size,
VideoCommon::CacheType which) const;
@@ -215,7 +215,6 @@ private:
std::vector<u64> big_page_continous;
std::vector<std::pair<VAddr, std::size_t>> page_stash{};
- u8* fastmem_arena{};
constexpr static size_t continous_bits = 64;
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 1735b6164..33e2610bc 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -47,6 +47,9 @@ public:
/// Dispatches an indirect draw invocation
virtual void DrawIndirect() {}
+ /// Dispatches an draw texture invocation
+ virtual void DrawTexture() = 0;
+
/// Clear the current framebuffer
virtual void Clear(u32 layer_count) = 0;
diff --git a/src/video_core/renderer_null/null_rasterizer.cpp b/src/video_core/renderer_null/null_rasterizer.cpp
index 2c11345d7..2b5c7defa 100644
--- a/src/video_core/renderer_null/null_rasterizer.cpp
+++ b/src/video_core/renderer_null/null_rasterizer.cpp
@@ -21,6 +21,7 @@ RasterizerNull::RasterizerNull(Core::Memory::Memory& cpu_memory_, Tegra::GPU& gp
RasterizerNull::~RasterizerNull() = default;
void RasterizerNull::Draw(bool is_indexed, u32 instance_count) {}
+void RasterizerNull::DrawTexture() {}
void RasterizerNull::Clear(u32 layer_count) {}
void RasterizerNull::DispatchCompute() {}
void RasterizerNull::ResetCounter(VideoCore::QueryType type) {}
diff --git a/src/video_core/renderer_null/null_rasterizer.h b/src/video_core/renderer_null/null_rasterizer.h
index 2112aa70e..51f896e43 100644
--- a/src/video_core/renderer_null/null_rasterizer.h
+++ b/src/video_core/renderer_null/null_rasterizer.h
@@ -31,6 +31,7 @@ public:
~RasterizerNull() override;
void Draw(bool is_indexed, u32 instance_count) override;
+ void DrawTexture() override;
void Clear(u32 layer_count) override;
void DispatchCompute() override;
void ResetCounter(VideoCore::QueryType type) override;
diff --git a/src/video_core/renderer_opengl/blit_image.cpp b/src/video_core/renderer_opengl/blit_image.cpp
new file mode 100644
index 000000000..9a560a73b
--- /dev/null
+++ b/src/video_core/renderer_opengl/blit_image.cpp
@@ -0,0 +1,59 @@
+// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <algorithm>
+
+#include "video_core/host_shaders/blit_color_float_frag.h"
+#include "video_core/host_shaders/full_screen_triangle_vert.h"
+#include "video_core/renderer_opengl/blit_image.h"
+#include "video_core/renderer_opengl/gl_shader_manager.h"
+#include "video_core/renderer_opengl/gl_shader_util.h"
+
+namespace OpenGL {
+
+BlitImageHelper::BlitImageHelper(ProgramManager& program_manager_)
+ : program_manager(program_manager_),
+ full_screen_vert(CreateProgram(HostShaders::FULL_SCREEN_TRIANGLE_VERT, GL_VERTEX_SHADER)),
+ blit_color_to_color_frag(
+ CreateProgram(HostShaders::BLIT_COLOR_FLOAT_FRAG, GL_FRAGMENT_SHADER)) {}
+
+BlitImageHelper::~BlitImageHelper() = default;
+
+void BlitImageHelper::BlitColor(GLuint dst_framebuffer, GLuint src_image_view, GLuint src_sampler,
+ const Region2D& dst_region, const Region2D& src_region,
+ const Extent3D& src_size) {
+ glEnable(GL_CULL_FACE);
+ glDisable(GL_COLOR_LOGIC_OP);
+ glDisable(GL_DEPTH_TEST);
+ glDisable(GL_STENCIL_TEST);
+ glDisable(GL_POLYGON_OFFSET_FILL);
+ glDisable(GL_RASTERIZER_DISCARD);
+ glDisable(GL_ALPHA_TEST);
+ glDisablei(GL_BLEND, 0);
+ glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
+ glCullFace(GL_BACK);
+ glFrontFace(GL_CW);
+ glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
+ glDepthRangeIndexed(0, 0.0, 0.0);
+
+ program_manager.BindPresentPrograms(full_screen_vert.handle, blit_color_to_color_frag.handle);
+ glProgramUniform2f(full_screen_vert.handle, 0,
+ static_cast<float>(src_region.end.x - src_region.start.x) /
+ static_cast<float>(src_size.width),
+ static_cast<float>(src_region.end.y - src_region.start.y) /
+ static_cast<float>(src_size.height));
+ glProgramUniform2f(full_screen_vert.handle, 1,
+ static_cast<float>(src_region.start.x) / static_cast<float>(src_size.width),
+ static_cast<float>(src_region.start.y) /
+ static_cast<float>(src_size.height));
+ glViewport(std::min(dst_region.start.x, dst_region.end.x),
+ std::min(dst_region.start.y, dst_region.end.y),
+ std::abs(dst_region.end.x - dst_region.start.x),
+ std::abs(dst_region.end.y - dst_region.start.y));
+ glBindFramebuffer(GL_DRAW_FRAMEBUFFER, dst_framebuffer);
+ glBindSampler(0, src_sampler);
+ glBindTextureUnit(0, src_image_view);
+ glClear(GL_COLOR_BUFFER_BIT);
+ glDrawArrays(GL_TRIANGLES, 0, 3);
+}
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/blit_image.h b/src/video_core/renderer_opengl/blit_image.h
new file mode 100644
index 000000000..5a2b12d16
--- /dev/null
+++ b/src/video_core/renderer_opengl/blit_image.h
@@ -0,0 +1,38 @@
+// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#pragma once
+
+#include <glad/glad.h>
+
+#include "video_core/engines/fermi_2d.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
+#include "video_core/texture_cache/types.h"
+
+namespace OpenGL {
+
+using VideoCommon::Extent3D;
+using VideoCommon::Offset2D;
+using VideoCommon::Region2D;
+
+class ProgramManager;
+class Framebuffer;
+class ImageView;
+
+class BlitImageHelper {
+public:
+ explicit BlitImageHelper(ProgramManager& program_manager);
+ ~BlitImageHelper();
+
+ void BlitColor(GLuint dst_framebuffer, GLuint src_image_view, GLuint src_sampler,
+ const Region2D& dst_region, const Region2D& src_region,
+ const Extent3D& src_size);
+
+private:
+ ProgramManager& program_manager;
+
+ OGLProgram full_screen_vert;
+ OGLProgram blit_color_to_color_frag;
+};
+
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp
index 26d066004..1a0cea9b7 100644
--- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp
+++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp
@@ -30,7 +30,7 @@ bool ComputePipelineKey::operator==(const ComputePipelineKey& rhs) const noexcep
ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cache_,
BufferCache& buffer_cache_, ProgramManager& program_manager_,
const Shader::Info& info_, std::string code,
- std::vector<u32> code_v)
+ std::vector<u32> code_v, bool force_context_flush)
: texture_cache{texture_cache_}, buffer_cache{buffer_cache_},
program_manager{program_manager_}, info{info_} {
switch (device.GetShaderBackend()) {
@@ -63,6 +63,15 @@ ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cac
writes_global_memory = !use_storage_buffers &&
std::ranges::any_of(info.storage_buffers_descriptors,
[](const auto& desc) { return desc.is_written; });
+ if (force_context_flush) {
+ std::scoped_lock lock{built_mutex};
+ built_fence.Create();
+ // Flush this context to ensure compilation commands and fence are in the GPU pipe.
+ glFlush();
+ built_condvar.notify_one();
+ } else {
+ is_built = true;
+ }
}
void ComputePipeline::Configure() {
@@ -142,6 +151,9 @@ void ComputePipeline::Configure() {
}
texture_cache.FillComputeImageViews(std::span(views.data(), views.size()));
+ if (!is_built) {
+ WaitForBuild();
+ }
if (assembly_program.handle != 0) {
program_manager.BindComputeAssemblyProgram(assembly_program.handle);
} else {
@@ -223,4 +235,13 @@ void ComputePipeline::Configure() {
}
}
+void ComputePipeline::WaitForBuild() {
+ if (built_fence.handle == 0) {
+ std::unique_lock lock{built_mutex};
+ built_condvar.wait(lock, [this] { return built_fence.handle != 0; });
+ }
+ ASSERT(glClientWaitSync(built_fence.handle, 0, GL_TIMEOUT_IGNORED) != GL_WAIT_FAILED);
+ is_built = true;
+}
+
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.h b/src/video_core/renderer_opengl/gl_compute_pipeline.h
index 6534dec32..9bcc72b59 100644
--- a/src/video_core/renderer_opengl/gl_compute_pipeline.h
+++ b/src/video_core/renderer_opengl/gl_compute_pipeline.h
@@ -50,7 +50,8 @@ class ComputePipeline {
public:
explicit ComputePipeline(const Device& device, TextureCache& texture_cache_,
BufferCache& buffer_cache_, ProgramManager& program_manager_,
- const Shader::Info& info_, std::string code, std::vector<u32> code_v);
+ const Shader::Info& info_, std::string code, std::vector<u32> code_v,
+ bool force_context_flush = false);
void Configure();
@@ -65,6 +66,8 @@ public:
}
private:
+ void WaitForBuild();
+
TextureCache& texture_cache;
BufferCache& buffer_cache;
Tegra::MemoryManager* gpu_memory;
@@ -81,6 +84,11 @@ private:
bool use_storage_buffers{};
bool writes_global_memory{};
+
+ std::mutex built_mutex;
+ std::condition_variable built_condvar;
+ OGLSync built_fence{};
+ bool is_built{false};
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index cee5c3247..22ed16ebf 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -166,6 +166,7 @@ Device::Device(Core::Frontend::EmuWindow& emu_window) {
has_shader_int64 = HasExtension(extensions, "GL_ARB_gpu_shader_int64");
has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float;
has_sparse_texture_2 = GLAD_GL_ARB_sparse_texture2;
+ has_draw_texture = GLAD_GL_NV_draw_texture;
warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel;
need_fastmath_off = is_nvidia;
can_report_memory = GLAD_GL_NVX_gpu_memory_info;
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index 2a72d84be..3ff8cad83 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -4,6 +4,8 @@
#pragma once
#include <cstddef>
+#include <string>
+
#include "common/common_types.h"
#include "core/frontend/emu_window.h"
#include "shader_recompiler/stage.h"
@@ -146,6 +148,10 @@ public:
return has_sparse_texture_2;
}
+ bool HasDrawTexture() const {
+ return has_draw_texture;
+ }
+
bool IsWarpSizePotentiallyLargerThanGuest() const {
return warp_size_potentially_larger_than_guest;
}
@@ -216,6 +222,7 @@ private:
bool has_shader_int64{};
bool has_amd_shader_half_float{};
bool has_sparse_texture_2{};
+ bool has_draw_texture{};
bool warp_size_potentially_larger_than_guest{};
bool need_fastmath_off{};
bool has_cbuf_ftou_bug{};
diff --git a/src/video_core/renderer_opengl/gl_fsr.cpp b/src/video_core/renderer_opengl/gl_fsr.cpp
new file mode 100644
index 000000000..77262dcf1
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_fsr.cpp
@@ -0,0 +1,101 @@
+// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include "common/settings.h"
+#include "video_core/fsr.h"
+#include "video_core/renderer_opengl/gl_fsr.h"
+#include "video_core/renderer_opengl/gl_shader_manager.h"
+#include "video_core/renderer_opengl/gl_shader_util.h"
+
+namespace OpenGL {
+using namespace FSR;
+
+using FsrConstants = std::array<u32, 4 * 4>;
+
+FSR::FSR(std::string_view fsr_vertex_source, std::string_view fsr_easu_source,
+ std::string_view fsr_rcas_source)
+ : fsr_vertex{CreateProgram(fsr_vertex_source, GL_VERTEX_SHADER)},
+ fsr_easu_frag{CreateProgram(fsr_easu_source, GL_FRAGMENT_SHADER)},
+ fsr_rcas_frag{CreateProgram(fsr_rcas_source, GL_FRAGMENT_SHADER)} {
+ glProgramUniform2f(fsr_vertex.handle, 0, 1.0f, 1.0f);
+ glProgramUniform2f(fsr_vertex.handle, 1, 0.0f, 0.0f);
+}
+
+FSR::~FSR() = default;
+
+void FSR::Draw(ProgramManager& program_manager, const Common::Rectangle<u32>& screen,
+ u32 input_image_width, u32 input_image_height,
+ const Common::Rectangle<int>& crop_rect) {
+
+ const auto output_image_width = screen.GetWidth();
+ const auto output_image_height = screen.GetHeight();
+
+ if (fsr_intermediate_tex.handle) {
+ GLint fsr_tex_width, fsr_tex_height;
+ glGetTextureLevelParameteriv(fsr_intermediate_tex.handle, 0, GL_TEXTURE_WIDTH,
+ &fsr_tex_width);
+ glGetTextureLevelParameteriv(fsr_intermediate_tex.handle, 0, GL_TEXTURE_HEIGHT,
+ &fsr_tex_height);
+ if (static_cast<u32>(fsr_tex_width) != output_image_width ||
+ static_cast<u32>(fsr_tex_height) != output_image_height) {
+ fsr_intermediate_tex.Release();
+ }
+ }
+ if (!fsr_intermediate_tex.handle) {
+ fsr_intermediate_tex.Create(GL_TEXTURE_2D);
+ glTextureStorage2D(fsr_intermediate_tex.handle, 1, GL_RGB16F, output_image_width,
+ output_image_height);
+ glNamedFramebufferTexture(fsr_framebuffer.handle, GL_COLOR_ATTACHMENT0,
+ fsr_intermediate_tex.handle, 0);
+ }
+
+ GLint old_draw_fb;
+ glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &old_draw_fb);
+
+ glFrontFace(GL_CW);
+ glBindFramebuffer(GL_DRAW_FRAMEBUFFER, fsr_framebuffer.handle);
+ glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(output_image_width),
+ static_cast<GLfloat>(output_image_height));
+
+ FsrConstants constants;
+ FsrEasuConOffset(
+ constants.data() + 0, constants.data() + 4, constants.data() + 8, constants.data() + 12,
+
+ static_cast<f32>(crop_rect.GetWidth()), static_cast<f32>(crop_rect.GetHeight()),
+ static_cast<f32>(input_image_width), static_cast<f32>(input_image_height),
+ static_cast<f32>(output_image_width), static_cast<f32>(output_image_height),
+ static_cast<f32>(crop_rect.left), static_cast<f32>(crop_rect.top));
+
+ glProgramUniform4uiv(fsr_easu_frag.handle, 0, sizeof(constants), std::data(constants));
+
+ program_manager.BindPresentPrograms(fsr_vertex.handle, fsr_easu_frag.handle);
+ glDrawArrays(GL_TRIANGLES, 0, 3);
+
+ glBindFramebuffer(GL_DRAW_FRAMEBUFFER, old_draw_fb);
+ glBindTextureUnit(0, fsr_intermediate_tex.handle);
+
+ const float sharpening =
+ static_cast<float>(Settings::values.fsr_sharpening_slider.GetValue()) / 100.0f;
+
+ FsrRcasCon(constants.data(), sharpening);
+ glProgramUniform4uiv(fsr_rcas_frag.handle, 0, sizeof(constants), std::data(constants));
+}
+
+void FSR::InitBuffers() {
+ fsr_framebuffer.Create();
+}
+
+void FSR::ReleaseBuffers() {
+ fsr_framebuffer.Release();
+ fsr_intermediate_tex.Release();
+}
+
+const OGLProgram& FSR::GetPresentFragmentProgram() const noexcept {
+ return fsr_rcas_frag;
+}
+
+bool FSR::AreBuffersInitialized() const noexcept {
+ return fsr_framebuffer.handle;
+}
+
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_fsr.h b/src/video_core/renderer_opengl/gl_fsr.h
new file mode 100644
index 000000000..1f6ae3115
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_fsr.h
@@ -0,0 +1,43 @@
+// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#pragma once
+
+#include <string_view>
+
+#include "common/common_types.h"
+#include "common/math_util.h"
+#include "video_core/fsr.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
+
+namespace OpenGL {
+
+class ProgramManager;
+
+class FSR {
+public:
+ explicit FSR(std::string_view fsr_vertex_source, std::string_view fsr_easu_source,
+ std::string_view fsr_rcas_source);
+ ~FSR();
+
+ void Draw(ProgramManager& program_manager, const Common::Rectangle<u32>& screen,
+ u32 input_image_width, u32 input_image_height,
+ const Common::Rectangle<int>& crop_rect);
+
+ void InitBuffers();
+
+ void ReleaseBuffers();
+
+ [[nodiscard]] const OGLProgram& GetPresentFragmentProgram() const noexcept;
+
+ [[nodiscard]] bool AreBuffersInitialized() const noexcept;
+
+private:
+ OGLFramebuffer fsr_framebuffer;
+ OGLProgram fsr_vertex;
+ OGLProgram fsr_easu_frag;
+ OGLProgram fsr_rcas_frag;
+ OGLTexture fsr_intermediate_tex;
+};
+
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
index c115dabe1..29491e762 100644
--- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
+++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
@@ -176,7 +176,7 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c
std::array<std::string, 5> sources,
std::array<std::vector<u32>, 5> sources_spirv,
const std::array<const Shader::Info*, 5>& infos,
- const GraphicsPipelineKey& key_)
+ const GraphicsPipelineKey& key_, bool force_context_flush)
: texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, program_manager{program_manager_},
state_tracker{state_tracker_}, key{key_} {
if (shader_notify) {
@@ -231,7 +231,8 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c
const bool in_parallel = thread_worker != nullptr;
const auto backend = device.GetShaderBackend();
auto func{[this, sources = std::move(sources), sources_spirv = std::move(sources_spirv),
- shader_notify, backend, in_parallel](ShaderContext::Context*) mutable {
+ shader_notify, backend, in_parallel,
+ force_context_flush](ShaderContext::Context*) mutable {
for (size_t stage = 0; stage < 5; ++stage) {
switch (backend) {
case Settings::ShaderBackend::GLSL:
@@ -251,7 +252,7 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c
break;
}
}
- if (in_parallel) {
+ if (force_context_flush || in_parallel) {
std::scoped_lock lock{built_mutex};
built_fence.Create();
// Flush this context to ensure compilation commands and fence are in the GPU pipe.
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h
index 1c06b3655..7bab3be0a 100644
--- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h
+++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h
@@ -78,7 +78,7 @@ public:
std::array<std::string, 5> sources,
std::array<std::vector<u32>, 5> sources_spirv,
const std::array<const Shader::Info*, 5>& infos,
- const GraphicsPipelineKey& key_);
+ const GraphicsPipelineKey& key_, bool force_context_flush = false);
void Configure(bool is_indexed) {
configure_func(this, is_indexed);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 181857d9c..7bced675c 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -64,7 +64,8 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra
shader_cache(*this, emu_window_, device, texture_cache, buffer_cache, program_manager,
state_tracker, gpu.ShaderNotify()),
query_cache(*this), accelerate_dma(buffer_cache),
- fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {}
+ fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache),
+ blit_image(program_manager_) {}
RasterizerOpenGL::~RasterizerOpenGL() = default;
@@ -320,6 +321,47 @@ void RasterizerOpenGL::DrawIndirect() {
buffer_cache.SetDrawIndirect(nullptr);
}
+void RasterizerOpenGL::DrawTexture() {
+ MICROPROFILE_SCOPE(OpenGL_Drawing);
+
+ SCOPE_EXIT({ gpu.TickWork(); });
+ query_cache.UpdateCounters();
+
+ texture_cache.SynchronizeGraphicsDescriptors();
+ texture_cache.UpdateRenderTargets(false);
+
+ SyncState();
+
+ const auto& draw_texture_state = maxwell3d->draw_manager->GetDrawTextureState();
+ const auto& sampler = texture_cache.GetGraphicsSampler(draw_texture_state.src_sampler);
+ const auto& texture = texture_cache.GetImageView(draw_texture_state.src_texture);
+
+ if (device.HasDrawTexture()) {
+ state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
+
+ glDrawTextureNV(texture.DefaultHandle(), sampler->Handle(), draw_texture_state.dst_x0,
+ draw_texture_state.dst_y0, draw_texture_state.dst_x1,
+ draw_texture_state.dst_y1, 0,
+ draw_texture_state.src_x0 / static_cast<float>(texture.size.width),
+ draw_texture_state.src_y0 / static_cast<float>(texture.size.height),
+ draw_texture_state.src_x1 / static_cast<float>(texture.size.width),
+ draw_texture_state.src_y1 / static_cast<float>(texture.size.height));
+ } else {
+ Region2D dst_region = {Offset2D{.x = static_cast<s32>(draw_texture_state.dst_x0),
+ .y = static_cast<s32>(draw_texture_state.dst_y0)},
+ Offset2D{.x = static_cast<s32>(draw_texture_state.dst_x1),
+ .y = static_cast<s32>(draw_texture_state.dst_y1)}};
+ Region2D src_region = {Offset2D{.x = static_cast<s32>(draw_texture_state.src_x0),
+ .y = static_cast<s32>(draw_texture_state.src_y0)},
+ Offset2D{.x = static_cast<s32>(draw_texture_state.src_x1),
+ .y = static_cast<s32>(draw_texture_state.src_y1)}};
+ blit_image.BlitColor(texture_cache.GetFramebuffer()->Handle(), texture.DefaultHandle(),
+ sampler->Handle(), dst_region, src_region, texture.size);
+ }
+
+ ++num_queued_commands;
+}
+
void RasterizerOpenGL::DispatchCompute() {
gpu_memory->FlushCaching();
ComputePipeline* const pipeline{shader_cache.CurrentComputePipeline()};
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index be4f76c18..0c45832ae 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -16,6 +16,7 @@
#include "video_core/engines/maxwell_dma.h"
#include "video_core/rasterizer_accelerated.h"
#include "video_core/rasterizer_interface.h"
+#include "video_core/renderer_opengl/blit_image.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_fence_manager.h"
@@ -70,6 +71,7 @@ public:
void Draw(bool is_indexed, u32 instance_count) override;
void DrawIndirect() override;
+ void DrawTexture() override;
void Clear(u32 layer_count) override;
void DispatchCompute() override;
void ResetCounter(VideoCore::QueryType type) override;
@@ -224,6 +226,8 @@ private:
AccelerateDMA accelerate_dma;
FenceManagerOpenGL fence_manager;
+ BlitImageHelper blit_image;
+
boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;
std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;
boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles;
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 7dd854e0f..626ea7dcb 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -286,7 +286,7 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
file.read(reinterpret_cast<char*>(&key), sizeof(key));
queue_work([this, key, env = std::move(env), &state, &callback](Context* ctx) mutable {
ctx->pools.ReleaseContents();
- auto pipeline{CreateComputePipeline(ctx->pools, key, env)};
+ auto pipeline{CreateComputePipeline(ctx->pools, key, env, true)};
std::scoped_lock lock{state.mutex};
if (pipeline) {
compute_cache.emplace(key, std::move(pipeline));
@@ -307,7 +307,7 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
env_ptrs.push_back(&env);
}
ctx->pools.ReleaseContents();
- auto pipeline{CreateGraphicsPipeline(ctx->pools, key, MakeSpan(env_ptrs), false)};
+ auto pipeline{CreateGraphicsPipeline(ctx->pools, key, MakeSpan(env_ptrs), false, true)};
std::scoped_lock lock{state.mutex};
if (pipeline) {
graphics_cache.emplace(key, std::move(pipeline));
@@ -439,7 +439,8 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline() {
std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
ShaderContext::ShaderPools& pools, const GraphicsPipelineKey& key,
- std::span<Shader::Environment* const> envs, bool build_in_parallel) try {
+ std::span<Shader::Environment* const> envs, bool use_shader_workers,
+ bool force_context_flush) try {
LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash());
size_t env_index{};
u32 total_storage_buffers{};
@@ -531,10 +532,10 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
}
previous_program = &program;
}
- auto* const thread_worker{build_in_parallel ? workers.get() : nullptr};
+ auto* const thread_worker{use_shader_workers ? workers.get() : nullptr};
return std::make_unique<GraphicsPipeline>(device, texture_cache, buffer_cache, program_manager,
state_tracker, thread_worker, &shader_notify, sources,
- sources_spirv, infos, key);
+ sources_spirv, infos, key, force_context_flush);
} catch (Shader::Exception& exception) {
LOG_ERROR(Render_OpenGL, "{}", exception.what());
@@ -559,8 +560,8 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(
}
std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(
- ShaderContext::ShaderPools& pools, const ComputePipelineKey& key,
- Shader::Environment& env) try {
+ ShaderContext::ShaderPools& pools, const ComputePipelineKey& key, Shader::Environment& env,
+ bool force_context_flush) try {
LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash());
Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()};
@@ -589,7 +590,7 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(
}
return std::make_unique<ComputePipeline>(device, texture_cache, buffer_cache, program_manager,
- program.info, code, code_spirv);
+ program.info, code, code_spirv, force_context_flush);
} catch (Shader::Exception& exception) {
LOG_ERROR(Render_OpenGL, "{}", exception.what());
return nullptr;
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index f82420592..6b9732fca 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -50,14 +50,16 @@ private:
std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline(
ShaderContext::ShaderPools& pools, const GraphicsPipelineKey& key,
- std::span<Shader::Environment* const> envs, bool build_in_parallel);
+ std::span<Shader::Environment* const> envs, bool use_shader_workers,
+ bool force_context_flush = false);
std::unique_ptr<ComputePipeline> CreateComputePipeline(const ComputePipelineKey& key,
const VideoCommon::ShaderInfo* shader);
std::unique_ptr<ComputePipeline> CreateComputePipeline(ShaderContext::ShaderPools& pools,
const ComputePipelineKey& key,
- Shader::Environment& env);
+ Shader::Environment& env,
+ bool force_context_flush = false);
std::unique_ptr<ShaderWorker> CreateWorkers() const;
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index d9c29d8b7..98841ae65 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -1,2 +1,123 @@
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <glad/glad.h>
+
+#include "video_core/renderer_opengl/gl_shader_manager.h"
+
+namespace OpenGL {
+
+static constexpr std::array ASSEMBLY_PROGRAM_ENUMS{
+ GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
+ GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
+};
+
+ProgramManager::ProgramManager(const Device& device) {
+ glCreateProgramPipelines(1, &pipeline.handle);
+ if (device.UseAssemblyShaders()) {
+ glEnable(GL_COMPUTE_PROGRAM_NV);
+ }
+}
+
+void ProgramManager::BindComputeProgram(GLuint program) {
+ glUseProgram(program);
+ is_compute_bound = true;
+}
+
+void ProgramManager::BindComputeAssemblyProgram(GLuint program) {
+ if (current_assembly_compute_program != program) {
+ current_assembly_compute_program = program;
+ glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program);
+ }
+ UnbindPipeline();
+}
+
+void ProgramManager::BindSourcePrograms(std::span<const OGLProgram, NUM_STAGES> programs) {
+ static constexpr std::array<GLenum, 5> stage_enums{
+ GL_VERTEX_SHADER_BIT, GL_TESS_CONTROL_SHADER_BIT, GL_TESS_EVALUATION_SHADER_BIT,
+ GL_GEOMETRY_SHADER_BIT, GL_FRAGMENT_SHADER_BIT,
+ };
+ for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
+ if (current_programs[stage] != programs[stage].handle) {
+ current_programs[stage] = programs[stage].handle;
+ glUseProgramStages(pipeline.handle, stage_enums[stage], programs[stage].handle);
+ }
+ }
+ BindPipeline();
+}
+
+void ProgramManager::BindPresentPrograms(GLuint vertex, GLuint fragment) {
+ if (current_programs[0] != vertex) {
+ current_programs[0] = vertex;
+ glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex);
+ }
+ if (current_programs[4] != fragment) {
+ current_programs[4] = fragment;
+ glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment);
+ }
+ glUseProgramStages(
+ pipeline.handle,
+ GL_TESS_CONTROL_SHADER_BIT | GL_TESS_EVALUATION_SHADER_BIT | GL_GEOMETRY_SHADER_BIT, 0);
+ current_programs[1] = 0;
+ current_programs[2] = 0;
+ current_programs[3] = 0;
+
+ if (current_stage_mask != 0) {
+ current_stage_mask = 0;
+ for (const GLenum program_type : ASSEMBLY_PROGRAM_ENUMS) {
+ glDisable(program_type);
+ }
+ }
+ BindPipeline();
+}
+
+void ProgramManager::BindAssemblyPrograms(std::span<const OGLAssemblyProgram, NUM_STAGES> programs,
+ u32 stage_mask) {
+ const u32 changed_mask = current_stage_mask ^ stage_mask;
+ current_stage_mask = stage_mask;
+
+ if (changed_mask != 0) {
+ for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
+ if (((changed_mask >> stage) & 1) != 0) {
+ if (((stage_mask >> stage) & 1) != 0) {
+ glEnable(ASSEMBLY_PROGRAM_ENUMS[stage]);
+ } else {
+ glDisable(ASSEMBLY_PROGRAM_ENUMS[stage]);
+ }
+ }
+ }
+ }
+ for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
+ if (current_programs[stage] != programs[stage].handle) {
+ current_programs[stage] = programs[stage].handle;
+ glBindProgramARB(ASSEMBLY_PROGRAM_ENUMS[stage], programs[stage].handle);
+ }
+ }
+ UnbindPipeline();
+}
+
+void ProgramManager::RestoreGuestCompute() {}
+
+void ProgramManager::BindPipeline() {
+ if (!is_pipeline_bound) {
+ is_pipeline_bound = true;
+ glBindProgramPipeline(pipeline.handle);
+ }
+ UnbindCompute();
+}
+
+void ProgramManager::UnbindPipeline() {
+ if (is_pipeline_bound) {
+ is_pipeline_bound = false;
+ glBindProgramPipeline(0);
+ }
+ UnbindCompute();
+}
+
+void ProgramManager::UnbindCompute() {
+ if (is_compute_bound) {
+ is_compute_bound = false;
+ glUseProgram(0);
+ }
+}
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index a84f5aeb3..07ffab77f 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -6,8 +6,6 @@
#include <array>
#include <span>
-#include <glad/glad.h>
-
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
@@ -16,121 +14,28 @@ namespace OpenGL {
class ProgramManager {
static constexpr size_t NUM_STAGES = 5;
- static constexpr std::array ASSEMBLY_PROGRAM_ENUMS{
- GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
- GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
- };
-
public:
- explicit ProgramManager(const Device& device) {
- glCreateProgramPipelines(1, &pipeline.handle);
- if (device.UseAssemblyShaders()) {
- glEnable(GL_COMPUTE_PROGRAM_NV);
- }
- }
-
- void BindComputeProgram(GLuint program) {
- glUseProgram(program);
- is_compute_bound = true;
- }
-
- void BindComputeAssemblyProgram(GLuint program) {
- if (current_assembly_compute_program != program) {
- current_assembly_compute_program = program;
- glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program);
- }
- UnbindPipeline();
- }
-
- void BindSourcePrograms(std::span<const OGLProgram, NUM_STAGES> programs) {
- static constexpr std::array<GLenum, 5> stage_enums{
- GL_VERTEX_SHADER_BIT, GL_TESS_CONTROL_SHADER_BIT, GL_TESS_EVALUATION_SHADER_BIT,
- GL_GEOMETRY_SHADER_BIT, GL_FRAGMENT_SHADER_BIT,
- };
- for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
- if (current_programs[stage] != programs[stage].handle) {
- current_programs[stage] = programs[stage].handle;
- glUseProgramStages(pipeline.handle, stage_enums[stage], programs[stage].handle);
- }
- }
- BindPipeline();
- }
-
- void BindPresentPrograms(GLuint vertex, GLuint fragment) {
- if (current_programs[0] != vertex) {
- current_programs[0] = vertex;
- glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex);
- }
- if (current_programs[4] != fragment) {
- current_programs[4] = fragment;
- glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment);
- }
- glUseProgramStages(
- pipeline.handle,
- GL_TESS_CONTROL_SHADER_BIT | GL_TESS_EVALUATION_SHADER_BIT | GL_GEOMETRY_SHADER_BIT, 0);
- current_programs[1] = 0;
- current_programs[2] = 0;
- current_programs[3] = 0;
-
- if (current_stage_mask != 0) {
- current_stage_mask = 0;
- for (const GLenum program_type : ASSEMBLY_PROGRAM_ENUMS) {
- glDisable(program_type);
- }
- }
- BindPipeline();
- }
+ explicit ProgramManager(const Device& device);
+
+ void BindComputeProgram(GLuint program);
+
+ void BindComputeAssemblyProgram(GLuint program);
+
+ void BindSourcePrograms(std::span<const OGLProgram, NUM_STAGES> programs);
+
+ void BindPresentPrograms(GLuint vertex, GLuint fragment);
void BindAssemblyPrograms(std::span<const OGLAssemblyProgram, NUM_STAGES> programs,
- u32 stage_mask) {
- const u32 changed_mask = current_stage_mask ^ stage_mask;
- current_stage_mask = stage_mask;
-
- if (changed_mask != 0) {
- for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
- if (((changed_mask >> stage) & 1) != 0) {
- if (((stage_mask >> stage) & 1) != 0) {
- glEnable(ASSEMBLY_PROGRAM_ENUMS[stage]);
- } else {
- glDisable(ASSEMBLY_PROGRAM_ENUMS[stage]);
- }
- }
- }
- }
- for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
- if (current_programs[stage] != programs[stage].handle) {
- current_programs[stage] = programs[stage].handle;
- glBindProgramARB(ASSEMBLY_PROGRAM_ENUMS[stage], programs[stage].handle);
- }
- }
- UnbindPipeline();
- }
-
- void RestoreGuestCompute() {}
+ u32 stage_mask);
+
+ void RestoreGuestCompute();
private:
- void BindPipeline() {
- if (!is_pipeline_bound) {
- is_pipeline_bound = true;
- glBindProgramPipeline(pipeline.handle);
- }
- UnbindCompute();
- }
-
- void UnbindPipeline() {
- if (is_pipeline_bound) {
- is_pipeline_bound = false;
- glBindProgramPipeline(0);
- }
- UnbindCompute();
- }
-
- void UnbindCompute() {
- if (is_compute_bound) {
- is_compute_bound = false;
- glUseProgram(0);
- }
- }
+ void BindPipeline();
+
+ void UnbindPipeline();
+
+ void UnbindCompute();
OGLPipeline pipeline;
bool is_pipeline_bound{};
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index de95f2634..2a74c1d05 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -17,8 +17,14 @@
#include "core/frontend/emu_window.h"
#include "core/memory.h"
#include "core/telemetry_session.h"
+#include "video_core/host_shaders/ffx_a_h.h"
+#include "video_core/host_shaders/ffx_fsr1_h.h"
+#include "video_core/host_shaders/full_screen_triangle_vert.h"
#include "video_core/host_shaders/fxaa_frag.h"
#include "video_core/host_shaders/fxaa_vert.h"
+#include "video_core/host_shaders/opengl_fidelityfx_fsr_easu_frag.h"
+#include "video_core/host_shaders/opengl_fidelityfx_fsr_frag.h"
+#include "video_core/host_shaders/opengl_fidelityfx_fsr_rcas_frag.h"
#include "video_core/host_shaders/opengl_present_frag.h"
#include "video_core/host_shaders/opengl_present_scaleforce_frag.h"
#include "video_core/host_shaders/opengl_present_vert.h"
@@ -31,6 +37,7 @@
#include "video_core/host_shaders/smaa_edge_detection_vert.h"
#include "video_core/host_shaders/smaa_neighborhood_blending_frag.h"
#include "video_core/host_shaders/smaa_neighborhood_blending_vert.h"
+#include "video_core/renderer_opengl/gl_fsr.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
@@ -268,12 +275,17 @@ void RendererOpenGL::InitOpenGLObjects() {
fxaa_vertex = CreateProgram(HostShaders::FXAA_VERT, GL_VERTEX_SHADER);
fxaa_fragment = CreateProgram(HostShaders::FXAA_FRAG, GL_FRAGMENT_SHADER);
- const auto SmaaShader = [](std::string_view specialized_source, GLenum stage) {
- std::string shader_source{specialized_source};
- constexpr std::string_view include_string = "#include \"opengl_smaa.glsl\"";
+ const auto replace_include = [](std::string& shader_source, std::string_view include_name,
+ std::string_view include_content) {
+ const std::string include_string = fmt::format("#include \"{}\"", include_name);
const std::size_t pos = shader_source.find(include_string);
ASSERT(pos != std::string::npos);
- shader_source.replace(pos, include_string.size(), HostShaders::OPENGL_SMAA_GLSL);
+ shader_source.replace(pos, include_string.size(), include_content);
+ };
+
+ const auto SmaaShader = [&](std::string_view specialized_source, GLenum stage) {
+ std::string shader_source{specialized_source};
+ replace_include(shader_source, "opengl_smaa.glsl", HostShaders::OPENGL_SMAA_GLSL);
return CreateProgram(shader_source, stage);
};
@@ -298,14 +310,32 @@ void RendererOpenGL::InitOpenGLObjects() {
CreateProgram(fmt::format("#version 460\n{}", HostShaders::OPENGL_PRESENT_SCALEFORCE_FRAG),
GL_FRAGMENT_SHADER);
+ std::string fsr_source{HostShaders::OPENGL_FIDELITYFX_FSR_FRAG};
+ replace_include(fsr_source, "ffx_a.h", HostShaders::FFX_A_H);
+ replace_include(fsr_source, "ffx_fsr1.h", HostShaders::FFX_FSR1_H);
+
+ std::string fsr_easu_frag_source{HostShaders::OPENGL_FIDELITYFX_FSR_EASU_FRAG};
+ std::string fsr_rcas_frag_source{HostShaders::OPENGL_FIDELITYFX_FSR_RCAS_FRAG};
+ replace_include(fsr_easu_frag_source, "opengl_fidelityfx_fsr.frag", fsr_source);
+ replace_include(fsr_rcas_frag_source, "opengl_fidelityfx_fsr.frag", fsr_source);
+
+ fsr = std::make_unique<FSR>(HostShaders::FULL_SCREEN_TRIANGLE_VERT, fsr_easu_frag_source,
+ fsr_rcas_frag_source);
+
// Generate presentation sampler
present_sampler.Create();
glSamplerParameteri(present_sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glSamplerParameteri(present_sampler.handle, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+ glSamplerParameteri(present_sampler.handle, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+ glSamplerParameteri(present_sampler.handle, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+ glSamplerParameteri(present_sampler.handle, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
present_sampler_nn.Create();
glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+ glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+ glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+ glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
// Generate VBO handle for drawing
vertex_buffer.Create();
@@ -525,6 +555,31 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
glBindTextureUnit(0, aa_texture.handle);
}
+ glDisablei(GL_SCISSOR_TEST, 0);
+
+ if (Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::Fsr) {
+ if (!fsr->AreBuffersInitialized()) {
+ fsr->InitBuffers();
+ }
+
+ auto crop_rect = framebuffer_crop_rect;
+ if (crop_rect.GetWidth() == 0) {
+ crop_rect.right = framebuffer_width;
+ }
+ if (crop_rect.GetHeight() == 0) {
+ crop_rect.bottom = framebuffer_height;
+ }
+ crop_rect = crop_rect.Scale(Settings::values.resolution_info.up_factor);
+ const auto fsr_input_width = Settings::values.resolution_info.ScaleUp(framebuffer_width);
+ const auto fsr_input_height = Settings::values.resolution_info.ScaleUp(framebuffer_height);
+ glBindSampler(0, present_sampler.handle);
+ fsr->Draw(program_manager, layout.screen, fsr_input_width, fsr_input_height, crop_rect);
+ } else {
+ if (fsr->AreBuffersInitialized()) {
+ fsr->ReleaseBuffers();
+ }
+ }
+
const std::array ortho_matrix =
MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height));
@@ -540,10 +595,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
case Settings::ScalingFilter::ScaleForce:
return present_scaleforce_fragment.handle;
case Settings::ScalingFilter::Fsr:
- LOG_WARNING(
- Render_OpenGL,
- "FidelityFX Super Resolution is not supported in OpenGL, changing to ScaleForce");
- return present_scaleforce_fragment.handle;
+ return fsr->GetPresentFragmentProgram().handle;
default:
return present_bilinear_fragment.handle;
}
@@ -578,15 +630,18 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
f32 scale_u = static_cast<f32>(framebuffer_width) / static_cast<f32>(screen_info.texture.width);
f32 scale_v =
static_cast<f32>(framebuffer_height) / static_cast<f32>(screen_info.texture.height);
- // Scale the output by the crop width/height. This is commonly used with 1280x720 rendering
- // (e.g. handheld mode) on a 1920x1080 framebuffer.
- if (framebuffer_crop_rect.GetWidth() > 0) {
- scale_u = static_cast<f32>(framebuffer_crop_rect.GetWidth()) /
- static_cast<f32>(screen_info.texture.width);
- }
- if (framebuffer_crop_rect.GetHeight() > 0) {
- scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) /
- static_cast<f32>(screen_info.texture.height);
+
+ if (Settings::values.scaling_filter.GetValue() != Settings::ScalingFilter::Fsr) {
+ // Scale the output by the crop width/height. This is commonly used with 1280x720 rendering
+ // (e.g. handheld mode) on a 1920x1080 framebuffer.
+ if (framebuffer_crop_rect.GetWidth() > 0) {
+ scale_u = static_cast<f32>(framebuffer_crop_rect.GetWidth()) /
+ static_cast<f32>(screen_info.texture.width);
+ }
+ if (framebuffer_crop_rect.GetHeight() > 0) {
+ scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) /
+ static_cast<f32>(screen_info.texture.height);
+ }
}
if (Settings::values.anti_aliasing.GetValue() == Settings::AntiAliasing::Fxaa &&
!screen_info.was_accelerated) {
@@ -612,7 +667,6 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
} else {
glDisable(GL_FRAMEBUFFER_SRGB);
}
- glDisablei(GL_SCISSOR_TEST, 0);
glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(layout.width),
static_cast<GLfloat>(layout.height));
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index cc97d7b26..f1d5fd954 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -10,6 +10,7 @@
#include "video_core/renderer_base.h"
#include "video_core/renderer_opengl/gl_device.h"
+#include "video_core/renderer_opengl/gl_fsr.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
@@ -141,6 +142,8 @@ private:
OGLTexture smaa_edges_tex;
OGLTexture smaa_blend_tex;
+ std::unique_ptr<FSR> fsr;
+
/// OpenGL framebuffer data
std::vector<u8> gl_framebuffer_data;
diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp
index 3f2b139e0..cf2964a3f 100644
--- a/src/video_core/renderer_vulkan/blit_image.cpp
+++ b/src/video_core/renderer_vulkan/blit_image.cpp
@@ -4,14 +4,16 @@
#include <algorithm>
#include "common/settings.h"
+#include "video_core/host_shaders/blit_color_float_frag_spv.h"
#include "video_core/host_shaders/convert_abgr8_to_d24s8_frag_spv.h"
#include "video_core/host_shaders/convert_d24s8_to_abgr8_frag_spv.h"
#include "video_core/host_shaders/convert_depth_to_float_frag_spv.h"
#include "video_core/host_shaders/convert_float_to_depth_frag_spv.h"
#include "video_core/host_shaders/convert_s8d24_to_abgr8_frag_spv.h"
#include "video_core/host_shaders/full_screen_triangle_vert_spv.h"
-#include "video_core/host_shaders/vulkan_blit_color_float_frag_spv.h"
#include "video_core/host_shaders/vulkan_blit_depth_stencil_frag_spv.h"
+#include "video_core/host_shaders/vulkan_color_clear_frag_spv.h"
+#include "video_core/host_shaders/vulkan_color_clear_vert_spv.h"
#include "video_core/renderer_vulkan/blit_image.h"
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
@@ -69,10 +71,11 @@ constexpr VkDescriptorSetLayoutCreateInfo TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CRE
.bindingCount = static_cast<u32>(TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_BINDINGS.size()),
.pBindings = TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_BINDINGS.data(),
};
-constexpr VkPushConstantRange PUSH_CONSTANT_RANGE{
- .stageFlags = VK_SHADER_STAGE_VERTEX_BIT,
+template <VkShaderStageFlags stageFlags, size_t size>
+inline constexpr VkPushConstantRange PUSH_CONSTANT_RANGE{
+ .stageFlags = stageFlags,
.offset = 0,
- .size = sizeof(PushConstants),
+ .size = static_cast<u32>(size),
};
constexpr VkPipelineVertexInputStateCreateInfo PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO{
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
@@ -125,10 +128,8 @@ constexpr VkPipelineMultisampleStateCreateInfo PIPELINE_MULTISAMPLE_STATE_CREATE
.alphaToCoverageEnable = VK_FALSE,
.alphaToOneEnable = VK_FALSE,
};
-constexpr std::array DYNAMIC_STATES{
- VK_DYNAMIC_STATE_VIEWPORT,
- VK_DYNAMIC_STATE_SCISSOR,
-};
+constexpr std::array DYNAMIC_STATES{VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR,
+ VK_DYNAMIC_STATE_BLEND_CONSTANTS};
constexpr VkPipelineDynamicStateCreateInfo PIPELINE_DYNAMIC_STATE_CREATE_INFO{
.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
.pNext = nullptr,
@@ -205,15 +206,15 @@ inline constexpr VkSamplerCreateInfo SAMPLER_CREATE_INFO{
};
constexpr VkPipelineLayoutCreateInfo PipelineLayoutCreateInfo(
- const VkDescriptorSetLayout* set_layout) {
+ const VkDescriptorSetLayout* set_layout, vk::Span<VkPushConstantRange> push_constants) {
return VkPipelineLayoutCreateInfo{
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
- .setLayoutCount = 1,
+ .setLayoutCount = (set_layout != nullptr ? 1u : 0u),
.pSetLayouts = set_layout,
- .pushConstantRangeCount = 1,
- .pPushConstantRanges = &PUSH_CONSTANT_RANGE,
+ .pushConstantRangeCount = push_constants.size(),
+ .pPushConstantRanges = push_constants.data(),
};
}
@@ -302,8 +303,7 @@ void UpdateTwoTexturesDescriptorSet(const Device& device, VkDescriptorSet descri
device.GetLogical().UpdateDescriptorSets(write_descriptor_sets, nullptr);
}
-void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Region2D& dst_region,
- const Region2D& src_region) {
+void BindBlitState(vk::CommandBuffer cmdbuf, const Region2D& dst_region) {
const VkOffset2D offset{
.x = std::min(dst_region.start.x, dst_region.end.x),
.y = std::min(dst_region.start.y, dst_region.end.y),
@@ -325,15 +325,23 @@ void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Regi
.offset = offset,
.extent = extent,
};
- const float scale_x = static_cast<float>(src_region.end.x - src_region.start.x);
- const float scale_y = static_cast<float>(src_region.end.y - src_region.start.y);
+ cmdbuf.SetViewport(0, viewport);
+ cmdbuf.SetScissor(0, scissor);
+}
+
+void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Region2D& dst_region,
+ const Region2D& src_region, const Extent3D& src_size = {1, 1, 1}) {
+ BindBlitState(cmdbuf, dst_region);
+ const float scale_x = static_cast<float>(src_region.end.x - src_region.start.x) /
+ static_cast<float>(src_size.width);
+ const float scale_y = static_cast<float>(src_region.end.y - src_region.start.y) /
+ static_cast<float>(src_size.height);
const PushConstants push_constants{
.tex_scale = {scale_x, scale_y},
- .tex_offset = {static_cast<float>(src_region.start.x),
- static_cast<float>(src_region.start.y)},
+ .tex_offset = {static_cast<float>(src_region.start.x) / static_cast<float>(src_size.width),
+ static_cast<float>(src_region.start.y) /
+ static_cast<float>(src_size.height)},
};
- cmdbuf.SetViewport(0, viewport);
- cmdbuf.SetScissor(0, scissor);
cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants);
}
@@ -347,6 +355,51 @@ VkExtent2D GetConversionExtent(const ImageView& src_image_view) {
.height = is_rescaled ? resolution.ScaleUp(height) : height,
};
}
+
+void TransitionImageLayout(vk::CommandBuffer& cmdbuf, VkImage image, VkImageLayout target_layout,
+ VkImageLayout source_layout = VK_IMAGE_LAYOUT_GENERAL) {
+ constexpr VkFlags flags{VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
+ VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT};
+ const VkImageMemoryBarrier barrier{
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = flags,
+ .dstAccessMask = flags,
+ .oldLayout = source_layout,
+ .newLayout = target_layout,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = image,
+ .subresourceRange{
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .baseMipLevel = 0,
+ .levelCount = 1,
+ .baseArrayLayer = 0,
+ .layerCount = 1,
+ },
+ };
+ cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+ 0, barrier);
+}
+
+void BeginRenderPass(vk::CommandBuffer& cmdbuf, const Framebuffer* framebuffer) {
+ const VkRenderPass render_pass = framebuffer->RenderPass();
+ const VkFramebuffer framebuffer_handle = framebuffer->Handle();
+ const VkExtent2D render_area = framebuffer->RenderArea();
+ const VkRenderPassBeginInfo renderpass_bi{
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+ .pNext = nullptr,
+ .renderPass = render_pass,
+ .framebuffer = framebuffer_handle,
+ .renderArea{
+ .offset{},
+ .extent = render_area,
+ },
+ .clearValueCount = 0,
+ .pClearValues = nullptr,
+ };
+ cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE);
+}
} // Anonymous namespace
BlitImageHelper::BlitImageHelper(const Device& device_, Scheduler& scheduler_,
@@ -360,13 +413,20 @@ BlitImageHelper::BlitImageHelper(const Device& device_, Scheduler& scheduler_,
descriptor_pool.Allocator(*one_texture_set_layout, TEXTURE_DESCRIPTOR_BANK_INFO<1>)},
two_textures_descriptor_allocator{
descriptor_pool.Allocator(*two_textures_set_layout, TEXTURE_DESCRIPTOR_BANK_INFO<2>)},
- one_texture_pipeline_layout(device.GetLogical().CreatePipelineLayout(
- PipelineLayoutCreateInfo(one_texture_set_layout.address()))),
- two_textures_pipeline_layout(device.GetLogical().CreatePipelineLayout(
- PipelineLayoutCreateInfo(two_textures_set_layout.address()))),
+ one_texture_pipeline_layout(device.GetLogical().CreatePipelineLayout(PipelineLayoutCreateInfo(
+ one_texture_set_layout.address(),
+ PUSH_CONSTANT_RANGE<VK_SHADER_STAGE_VERTEX_BIT, sizeof(PushConstants)>))),
+ two_textures_pipeline_layout(
+ device.GetLogical().CreatePipelineLayout(PipelineLayoutCreateInfo(
+ two_textures_set_layout.address(),
+ PUSH_CONSTANT_RANGE<VK_SHADER_STAGE_VERTEX_BIT, sizeof(PushConstants)>))),
+ clear_color_pipeline_layout(device.GetLogical().CreatePipelineLayout(PipelineLayoutCreateInfo(
+ nullptr, PUSH_CONSTANT_RANGE<VK_SHADER_STAGE_FRAGMENT_BIT, sizeof(float) * 4>))),
full_screen_vert(BuildShader(device, FULL_SCREEN_TRIANGLE_VERT_SPV)),
- blit_color_to_color_frag(BuildShader(device, VULKAN_BLIT_COLOR_FLOAT_FRAG_SPV)),
+ blit_color_to_color_frag(BuildShader(device, BLIT_COLOR_FLOAT_FRAG_SPV)),
blit_depth_stencil_frag(BuildShader(device, VULKAN_BLIT_DEPTH_STENCIL_FRAG_SPV)),
+ clear_color_vert(BuildShader(device, VULKAN_COLOR_CLEAR_VERT_SPV)),
+ clear_color_frag(BuildShader(device, VULKAN_COLOR_CLEAR_FRAG_SPV)),
convert_depth_to_float_frag(BuildShader(device, CONVERT_DEPTH_TO_FLOAT_FRAG_SPV)),
convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)),
convert_abgr8_to_d24s8_frag(BuildShader(device, CONVERT_ABGR8_TO_D24S8_FRAG_SPV)),
@@ -404,6 +464,32 @@ void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, VkImageView
scheduler.InvalidateState();
}
+void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, VkImageView src_image_view,
+ VkImage src_image, VkSampler src_sampler,
+ const Region2D& dst_region, const Region2D& src_region,
+ const Extent3D& src_size) {
+ const BlitImagePipelineKey key{
+ .renderpass = dst_framebuffer->RenderPass(),
+ .operation = Tegra::Engines::Fermi2D::Operation::SrcCopy,
+ };
+ const VkPipelineLayout layout = *one_texture_pipeline_layout;
+ const VkPipeline pipeline = FindOrEmplaceColorPipeline(key);
+ scheduler.RequestOutsideRenderPassOperationContext();
+ scheduler.Record([this, dst_framebuffer, src_image_view, src_image, src_sampler, dst_region,
+ src_region, src_size, pipeline, layout](vk::CommandBuffer cmdbuf) {
+ TransitionImageLayout(cmdbuf, src_image, VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL);
+ BeginRenderPass(cmdbuf, dst_framebuffer);
+ const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit();
+ UpdateOneTextureDescriptorSet(device, descriptor_set, src_sampler, src_image_view);
+ cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
+ cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set,
+ nullptr);
+ BindBlitState(cmdbuf, layout, dst_region, src_region, src_size);
+ cmdbuf.Draw(3, 1, 0, 0);
+ cmdbuf.EndRenderPass();
+ });
+}
+
void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer,
VkImageView src_depth_view, VkImageView src_stencil_view,
const Region2D& dst_region, const Region2D& src_region,
@@ -479,6 +565,30 @@ void BlitImageHelper::ConvertS8D24ToABGR8(const Framebuffer* dst_framebuffer,
ConvertDepthStencil(*convert_s8d24_to_abgr8_pipeline, dst_framebuffer, src_image_view);
}
+void BlitImageHelper::ClearColor(const Framebuffer* dst_framebuffer, u8 color_mask,
+ const std::array<f32, 4>& clear_color,
+ const Region2D& dst_region) {
+ const BlitImagePipelineKey key{
+ .renderpass = dst_framebuffer->RenderPass(),
+ .operation = Tegra::Engines::Fermi2D::Operation::BlendPremult,
+ };
+ const VkPipeline pipeline = FindOrEmplaceClearColorPipeline(key);
+ const VkPipelineLayout layout = *clear_color_pipeline_layout;
+ scheduler.RequestRenderpass(dst_framebuffer);
+ scheduler.Record(
+ [pipeline, layout, color_mask, clear_color, dst_region](vk::CommandBuffer cmdbuf) {
+ cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
+ const std::array blend_color = {
+ (color_mask & 0x1) ? 1.0f : 0.0f, (color_mask & 0x2) ? 1.0f : 0.0f,
+ (color_mask & 0x4) ? 1.0f : 0.0f, (color_mask & 0x8) ? 1.0f : 0.0f};
+ cmdbuf.SetBlendConstants(blend_color.data());
+ BindBlitState(cmdbuf, dst_region);
+ cmdbuf.PushConstants(layout, VK_SHADER_STAGE_FRAGMENT_BIT, clear_color);
+ cmdbuf.Draw(3, 1, 0, 0);
+ });
+ scheduler.InvalidateState();
+}
+
void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
const VkPipelineLayout layout = *one_texture_pipeline_layout;
@@ -654,6 +764,58 @@ VkPipeline BlitImageHelper::FindOrEmplaceDepthStencilPipeline(const BlitImagePip
return *blit_depth_stencil_pipelines.back();
}
+VkPipeline BlitImageHelper::FindOrEmplaceClearColorPipeline(const BlitImagePipelineKey& key) {
+ const auto it = std::ranges::find(clear_color_keys, key);
+ if (it != clear_color_keys.end()) {
+ return *clear_color_pipelines[std::distance(clear_color_keys.begin(), it)];
+ }
+ clear_color_keys.push_back(key);
+ const std::array stages = MakeStages(*clear_color_vert, *clear_color_frag);
+ const VkPipelineColorBlendAttachmentState color_blend_attachment_state{
+ .blendEnable = VK_TRUE,
+ .srcColorBlendFactor = VK_BLEND_FACTOR_CONSTANT_COLOR,
+ .dstColorBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR,
+ .colorBlendOp = VK_BLEND_OP_ADD,
+ .srcAlphaBlendFactor = VK_BLEND_FACTOR_CONSTANT_ALPHA,
+ .dstAlphaBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA,
+ .alphaBlendOp = VK_BLEND_OP_ADD,
+ .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
+ VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
+ };
+ const VkPipelineColorBlendStateCreateInfo color_blend_state_generic_create_info{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .logicOpEnable = VK_FALSE,
+ .logicOp = VK_LOGIC_OP_CLEAR,
+ .attachmentCount = 1,
+ .pAttachments = &color_blend_attachment_state,
+ .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f},
+ };
+ clear_color_pipelines.push_back(device.GetLogical().CreateGraphicsPipeline({
+ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .stageCount = static_cast<u32>(stages.size()),
+ .pStages = stages.data(),
+ .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+ .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+ .pTessellationState = nullptr,
+ .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+ .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+ .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+ .pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
+ .pColorBlendState = &color_blend_state_generic_create_info,
+ .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+ .layout = *clear_color_pipeline_layout,
+ .renderPass = key.renderpass,
+ .subpass = 0,
+ .basePipelineHandle = VK_NULL_HANDLE,
+ .basePipelineIndex = 0,
+ }));
+ return *clear_color_pipelines.back();
+}
+
void BlitImageHelper::ConvertPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass,
bool is_target_depth) {
if (pipeline) {
diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h
index 5df679fb4..2976a7d91 100644
--- a/src/video_core/renderer_vulkan/blit_image.h
+++ b/src/video_core/renderer_vulkan/blit_image.h
@@ -10,6 +10,8 @@
namespace Vulkan {
+using VideoCommon::Extent3D;
+using VideoCommon::Offset2D;
using VideoCommon::Region2D;
class Device;
@@ -36,6 +38,10 @@ public:
Tegra::Engines::Fermi2D::Filter filter,
Tegra::Engines::Fermi2D::Operation operation);
+ void BlitColor(const Framebuffer* dst_framebuffer, VkImageView src_image_view,
+ VkImage src_image, VkSampler src_sampler, const Region2D& dst_region,
+ const Region2D& src_region, const Extent3D& src_size);
+
void BlitDepthStencil(const Framebuffer* dst_framebuffer, VkImageView src_depth_view,
VkImageView src_stencil_view, const Region2D& dst_region,
const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter,
@@ -55,6 +61,9 @@ public:
void ConvertS8D24ToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view);
+ void ClearColor(const Framebuffer* dst_framebuffer, u8 color_mask,
+ const std::array<f32, 4>& clear_color, const Region2D& dst_region);
+
private:
void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
const ImageView& src_image_view);
@@ -66,6 +75,8 @@ private:
[[nodiscard]] VkPipeline FindOrEmplaceDepthStencilPipeline(const BlitImagePipelineKey& key);
+ [[nodiscard]] VkPipeline FindOrEmplaceClearColorPipeline(const BlitImagePipelineKey& key);
+
void ConvertPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass, bool is_target_depth);
void ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass);
@@ -91,9 +102,12 @@ private:
DescriptorAllocator two_textures_descriptor_allocator;
vk::PipelineLayout one_texture_pipeline_layout;
vk::PipelineLayout two_textures_pipeline_layout;
+ vk::PipelineLayout clear_color_pipeline_layout;
vk::ShaderModule full_screen_vert;
vk::ShaderModule blit_color_to_color_frag;
vk::ShaderModule blit_depth_stencil_frag;
+ vk::ShaderModule clear_color_vert;
+ vk::ShaderModule clear_color_frag;
vk::ShaderModule convert_depth_to_float_frag;
vk::ShaderModule convert_float_to_depth_frag;
vk::ShaderModule convert_abgr8_to_d24s8_frag;
@@ -106,6 +120,8 @@ private:
std::vector<vk::Pipeline> blit_color_pipelines;
std::vector<BlitImagePipelineKey> blit_depth_stencil_keys;
std::vector<vk::Pipeline> blit_depth_stencil_pipelines;
+ std::vector<BlitImagePipelineKey> clear_color_keys;
+ std::vector<vk::Pipeline> clear_color_pipelines;
vk::Pipeline convert_d32_to_r32_pipeline;
vk::Pipeline convert_r32_to_d32_pipeline;
vk::Pipeline convert_d16_to_r16_pipeline;
diff --git a/src/video_core/renderer_vulkan/vk_fsr.cpp b/src/video_core/renderer_vulkan/vk_fsr.cpp
index 33daa8c1c..df972cd54 100644
--- a/src/video_core/renderer_vulkan/vk_fsr.cpp
+++ b/src/video_core/renderer_vulkan/vk_fsr.cpp
@@ -1,12 +1,11 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
-#include <cmath>
-#include "common/bit_cast.h"
#include "common/common_types.h"
#include "common/div_ceil.h"
#include "common/settings.h"
+#include "video_core/fsr.h"
#include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16_comp_spv.h"
#include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32_comp_spv.h"
#include "video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16_comp_spv.h"
@@ -17,146 +16,7 @@
#include "video_core/vulkan_common/vulkan_device.h"
namespace Vulkan {
-namespace {
-// Reimplementations of the constant generating functions in ffx_fsr1.h
-// GCC generated a lot of warnings when using the official header.
-u32 AU1_AH1_AF1(f32 f) {
- static constexpr u32 base[512]{
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040,
- 0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x0c00, 0x1000, 0x1400, 0x1800, 0x1c00, 0x2000,
- 0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00, 0x4000, 0x4400, 0x4800, 0x4c00,
- 0x5000, 0x5400, 0x5800, 0x5c00, 0x6000, 0x6400, 0x6800, 0x6c00, 0x7000, 0x7400, 0x7800,
- 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
- 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
- 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
- 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
- 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
- 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
- 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
- 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
- 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
- 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
- 0x7bff, 0x7bff, 0x7bff, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
- 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
- 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
- 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
- 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
- 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
- 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
- 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
- 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
- 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001, 0x8002, 0x8004, 0x8008,
- 0x8010, 0x8020, 0x8040, 0x8080, 0x8100, 0x8200, 0x8400, 0x8800, 0x8c00, 0x9000, 0x9400,
- 0x9800, 0x9c00, 0xa000, 0xa400, 0xa800, 0xac00, 0xb000, 0xb400, 0xb800, 0xbc00, 0xc000,
- 0xc400, 0xc800, 0xcc00, 0xd000, 0xd400, 0xd800, 0xdc00, 0xe000, 0xe400, 0xe800, 0xec00,
- 0xf000, 0xf400, 0xf800, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
- 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
- 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
- 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
- 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
- 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
- 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
- 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
- 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
- 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
- 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
- };
- static constexpr s8 shift[512]{
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17, 0x16,
- 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
- 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
- 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17,
- 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
- 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
- 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18,
- };
- const u32 u = Common::BitCast<u32>(f);
- const u32 i = u >> 23;
- return base[i] + ((u & 0x7fffff) >> shift[i]);
-}
-
-u32 AU1_AH2_AF2(f32 a[2]) {
- return AU1_AH1_AF1(a[0]) + (AU1_AH1_AF1(a[1]) << 16);
-}
-
-void FsrEasuCon(u32 con0[4], u32 con1[4], u32 con2[4], u32 con3[4], f32 inputViewportInPixelsX,
- f32 inputViewportInPixelsY, f32 inputSizeInPixelsX, f32 inputSizeInPixelsY,
- f32 outputSizeInPixelsX, f32 outputSizeInPixelsY) {
- con0[0] = Common::BitCast<u32>(inputViewportInPixelsX / outputSizeInPixelsX);
- con0[1] = Common::BitCast<u32>(inputViewportInPixelsY / outputSizeInPixelsY);
- con0[2] = Common::BitCast<u32>(0.5f * inputViewportInPixelsX / outputSizeInPixelsX - 0.5f);
- con0[3] = Common::BitCast<u32>(0.5f * inputViewportInPixelsY / outputSizeInPixelsY - 0.5f);
- con1[0] = Common::BitCast<u32>(1.0f / inputSizeInPixelsX);
- con1[1] = Common::BitCast<u32>(1.0f / inputSizeInPixelsY);
- con1[2] = Common::BitCast<u32>(1.0f / inputSizeInPixelsX);
- con1[3] = Common::BitCast<u32>(-1.0f / inputSizeInPixelsY);
- con2[0] = Common::BitCast<u32>(-1.0f / inputSizeInPixelsX);
- con2[1] = Common::BitCast<u32>(2.0f / inputSizeInPixelsY);
- con2[2] = Common::BitCast<u32>(1.0f / inputSizeInPixelsX);
- con2[3] = Common::BitCast<u32>(2.0f / inputSizeInPixelsY);
- con3[0] = Common::BitCast<u32>(0.0f / inputSizeInPixelsX);
- con3[1] = Common::BitCast<u32>(4.0f / inputSizeInPixelsY);
- con3[2] = con3[3] = 0;
-}
-
-void FsrEasuConOffset(u32 con0[4], u32 con1[4], u32 con2[4], u32 con3[4],
- f32 inputViewportInPixelsX, f32 inputViewportInPixelsY,
- f32 inputSizeInPixelsX, f32 inputSizeInPixelsY, f32 outputSizeInPixelsX,
- f32 outputSizeInPixelsY, f32 inputOffsetInPixelsX, f32 inputOffsetInPixelsY) {
- FsrEasuCon(con0, con1, con2, con3, inputViewportInPixelsX, inputViewportInPixelsY,
- inputSizeInPixelsX, inputSizeInPixelsY, outputSizeInPixelsX, outputSizeInPixelsY);
- con0[2] = Common::BitCast<u32>(0.5f * inputViewportInPixelsX / outputSizeInPixelsX - 0.5f +
- inputOffsetInPixelsX);
- con0[3] = Common::BitCast<u32>(0.5f * inputViewportInPixelsY / outputSizeInPixelsY - 0.5f +
- inputOffsetInPixelsY);
-}
-
-void FsrRcasCon(u32* con, f32 sharpness) {
- sharpness = std::exp2f(-sharpness);
- f32 hSharp[2]{sharpness, sharpness};
- con[0] = Common::BitCast<u32>(sharpness);
- con[1] = AU1_AH2_AF2(hSharp);
- con[2] = 0;
- con[3] = 0;
-}
-} // Anonymous namespace
+using namespace FSR;
FSR::FSR(const Device& device_, MemoryAllocator& memory_allocator_, size_t image_count_,
VkExtent2D output_size_)
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index b75b8eec6..719edbcfb 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -266,6 +266,35 @@ void RasterizerVulkan::DrawIndirect() {
buffer_cache.SetDrawIndirect(nullptr);
}
+void RasterizerVulkan::DrawTexture() {
+ MICROPROFILE_SCOPE(Vulkan_Drawing);
+
+ SCOPE_EXIT({ gpu.TickWork(); });
+ FlushWork();
+
+ query_cache.UpdateCounters();
+
+ texture_cache.SynchronizeGraphicsDescriptors();
+ texture_cache.UpdateRenderTargets(false);
+
+ UpdateDynamicStates();
+
+ const auto& draw_texture_state = maxwell3d->draw_manager->GetDrawTextureState();
+ const auto& sampler = texture_cache.GetGraphicsSampler(draw_texture_state.src_sampler);
+ const auto& texture = texture_cache.GetImageView(draw_texture_state.src_texture);
+ Region2D dst_region = {Offset2D{.x = static_cast<s32>(draw_texture_state.dst_x0),
+ .y = static_cast<s32>(draw_texture_state.dst_y0)},
+ Offset2D{.x = static_cast<s32>(draw_texture_state.dst_x1),
+ .y = static_cast<s32>(draw_texture_state.dst_y1)}};
+ Region2D src_region = {Offset2D{.x = static_cast<s32>(draw_texture_state.src_x0),
+ .y = static_cast<s32>(draw_texture_state.src_y0)},
+ Offset2D{.x = static_cast<s32>(draw_texture_state.src_x1),
+ .y = static_cast<s32>(draw_texture_state.src_y1)}};
+ blit_image.BlitColor(texture_cache.GetFramebuffer(), texture.RenderTarget(),
+ texture.ImageHandle(), sampler->Handle(), dst_region, src_region,
+ texture.size);
+}
+
void RasterizerVulkan::Clear(u32 layer_count) {
MICROPROFILE_SCOPE(Vulkan_Clearing);
@@ -365,7 +394,15 @@ void RasterizerVulkan::Clear(u32 layer_count) {
cmdbuf.ClearAttachments(attachment, clear_rect);
});
} else {
- UNIMPLEMENTED_MSG("Unimplemented Clear only the specified channel");
+ u8 color_mask = static_cast<u8>(regs.clear_surface.R | regs.clear_surface.G << 1 |
+ regs.clear_surface.B << 2 | regs.clear_surface.A << 3);
+ Region2D dst_region = {
+ Offset2D{.x = clear_rect.rect.offset.x, .y = clear_rect.rect.offset.y},
+ Offset2D{.x = clear_rect.rect.offset.x +
+ static_cast<s32>(clear_rect.rect.extent.width),
+ .y = clear_rect.rect.offset.y +
+ static_cast<s32>(clear_rect.rect.extent.height)}};
+ blit_image.ClearColor(framebuffer, color_mask, regs.clear_color, dst_region);
}
}
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 472cc64d9..a0508b57c 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -66,6 +66,7 @@ public:
void Draw(bool is_indexed, u32 instance_count) override;
void DrawIndirect() override;
+ void DrawTexture() override;
void Clear(u32 layer_count) override;
void DispatchCompute() override;
void ResetCounter(VideoCore::QueryType type) override;
diff --git a/src/video_core/texture_cache/descriptor_table.h b/src/video_core/texture_cache/descriptor_table.h
index ee4240288..1bad83fb4 100644
--- a/src/video_core/texture_cache/descriptor_table.h
+++ b/src/video_core/texture_cache/descriptor_table.h
@@ -19,9 +19,7 @@ public:
explicit DescriptorTable(Tegra::MemoryManager& gpu_memory_) : gpu_memory{gpu_memory_} {}
[[nodiscard]] bool Synchronize(GPUVAddr gpu_addr, u32 limit) {
- [[likely]] if (current_gpu_addr == gpu_addr && current_limit == limit) {
- return false;
- }
+ [[likely]] if (current_gpu_addr == gpu_addr && current_limit == limit) { return false; }
Refresh(gpu_addr, limit);
return true;
}
diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp
index 852ec2519..e9100091e 100644
--- a/src/video_core/texture_cache/image_info.cpp
+++ b/src/video_core/texture_cache/image_info.cpp
@@ -100,6 +100,10 @@ ImageInfo::ImageInfo(const TICEntry& config) noexcept {
ASSERT_MSG(false, "Invalid texture_type={}", static_cast<int>(config.texture_type.Value()));
break;
}
+ if (num_samples > 1) {
+ size.width *= NumSamplesX(config.msaa_mode);
+ size.height *= NumSamplesY(config.msaa_mode);
+ }
if (type != ImageType::Linear) {
// FIXME: Call this without passing *this
layer_stride = CalculateLayerStride(*this);
diff --git a/src/video_core/texture_cache/samples_helper.h b/src/video_core/texture_cache/samples_helper.h
index d552bccf0..203ac1b11 100644
--- a/src/video_core/texture_cache/samples_helper.h
+++ b/src/video_core/texture_cache/samples_helper.h
@@ -51,4 +51,48 @@ namespace VideoCommon {
return 1;
}
+[[nodiscard]] inline int NumSamplesX(Tegra::Texture::MsaaMode msaa_mode) {
+ using Tegra::Texture::MsaaMode;
+ switch (msaa_mode) {
+ case MsaaMode::Msaa1x1:
+ return 1;
+ case MsaaMode::Msaa2x1:
+ case MsaaMode::Msaa2x1_D3D:
+ case MsaaMode::Msaa2x2:
+ case MsaaMode::Msaa2x2_VC4:
+ case MsaaMode::Msaa2x2_VC12:
+ return 2;
+ case MsaaMode::Msaa4x2:
+ case MsaaMode::Msaa4x2_D3D:
+ case MsaaMode::Msaa4x2_VC8:
+ case MsaaMode::Msaa4x2_VC24:
+ case MsaaMode::Msaa4x4:
+ return 4;
+ }
+ ASSERT_MSG(false, "Invalid MSAA mode={}", static_cast<int>(msaa_mode));
+ return 1;
+}
+
+[[nodiscard]] inline int NumSamplesY(Tegra::Texture::MsaaMode msaa_mode) {
+ using Tegra::Texture::MsaaMode;
+ switch (msaa_mode) {
+ case MsaaMode::Msaa1x1:
+ case MsaaMode::Msaa2x1:
+ case MsaaMode::Msaa2x1_D3D:
+ return 1;
+ case MsaaMode::Msaa2x2:
+ case MsaaMode::Msaa2x2_VC4:
+ case MsaaMode::Msaa2x2_VC12:
+ case MsaaMode::Msaa4x2:
+ case MsaaMode::Msaa4x2_D3D:
+ case MsaaMode::Msaa4x2_VC8:
+ case MsaaMode::Msaa4x2_VC24:
+ return 2;
+ case MsaaMode::Msaa4x4:
+ return 4;
+ }
+ ASSERT_MSG(false, "Invalid MSAA mode={}", static_cast<int>(msaa_mode));
+ return 1;
+}
+
} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/slot_vector.h b/src/video_core/texture_cache/slot_vector.h
index 1e2aad76a..9df6a2903 100644
--- a/src/video_core/texture_cache/slot_vector.h
+++ b/src/video_core/texture_cache/slot_vector.h
@@ -29,7 +29,7 @@ struct SlotId {
};
template <class T>
-requires std::is_nothrow_move_assignable_v<T> && std::is_nothrow_move_constructible_v<T>
+ requires std::is_nothrow_move_assignable_v<T> && std::is_nothrow_move_constructible_v<T>
class SlotVector {
public:
class Iterator {
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 87152c8e9..1b01990a4 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -149,6 +149,13 @@ typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) noexcept {
}
template <class P>
+typename P::ImageView& TextureCache<P>::GetImageView(u32 index) noexcept {
+ const auto image_view_id = VisitImageView(channel_state->graphics_image_table,
+ channel_state->graphics_image_view_ids, index);
+ return slot_image_views[image_view_id];
+}
+
+template <class P>
void TextureCache<P>::MarkModification(ImageId id) noexcept {
MarkModification(slot_images[id]);
}
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h
index 4eea1f609..485eaabaa 100644
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -129,6 +129,9 @@ public:
/// Return a reference to the given image view id
[[nodiscard]] ImageView& GetImageView(ImageViewId id) noexcept;
+ /// Get the imageview from the graphics descriptor table in the specified index
+ [[nodiscard]] ImageView& GetImageView(u32 index) noexcept;
+
/// Mark an image as modified from the GPU
void MarkModification(ImageId id) noexcept;
diff --git a/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp b/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp
index 85f1d13e0..5fa0d9620 100644
--- a/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp
+++ b/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp
@@ -57,7 +57,7 @@ NsightAftermathTracker::NsightAftermathTracker() {
if (!GFSDK_Aftermath_SUCCEED(GFSDK_Aftermath_EnableGpuCrashDumps(
GFSDK_Aftermath_Version_API, GFSDK_Aftermath_GpuCrashDumpWatchedApiFlags_Vulkan,
GFSDK_Aftermath_GpuCrashDumpFeatureFlags_Default, GpuCrashDumpCallback,
- ShaderDebugInfoCallback, CrashDumpDescriptionCallback, this))) {
+ ShaderDebugInfoCallback, CrashDumpDescriptionCallback, nullptr, this))) {
LOG_ERROR(Render_Vulkan, "GFSDK_Aftermath_EnableGpuCrashDumps failed");
return;
}
@@ -83,7 +83,7 @@ void NsightAftermathTracker::SaveShader(std::span<const u32> spirv) const {
std::scoped_lock lock{mutex};
- GFSDK_Aftermath_ShaderHash hash;
+ GFSDK_Aftermath_ShaderBinaryHash hash;
if (!GFSDK_Aftermath_SUCCEED(
GFSDK_Aftermath_GetShaderHashSpirv(GFSDK_Aftermath_Version_API, &shader, &hash))) {
LOG_ERROR(Render_Vulkan, "Failed to hash SPIR-V module");
@@ -121,8 +121,8 @@ void NsightAftermathTracker::OnGpuCrashDumpCallback(const void* gpu_crash_dump,
u32 json_size = 0;
if (!GFSDK_Aftermath_SUCCEED(GFSDK_Aftermath_GpuCrashDump_GenerateJSON(
decoder, GFSDK_Aftermath_GpuCrashDumpDecoderFlags_ALL_INFO,
- GFSDK_Aftermath_GpuCrashDumpFormatterFlags_NONE, nullptr, nullptr, nullptr, nullptr,
- this, &json_size))) {
+ GFSDK_Aftermath_GpuCrashDumpFormatterFlags_NONE, nullptr, nullptr, nullptr, this,
+ &json_size))) {
LOG_ERROR(Render_Vulkan, "Failed to generate JSON");
return;
}