From a6b88e85bfb14c45345f6443b54d15a61e3975d5 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 17 Aug 2021 00:12:52 +0200 Subject: Renderer: Implement Bicubic and ScaleForce filters. --- src/video_core/host_shaders/CMakeLists.txt | 4 + .../host_shaders/opengl_present_bicubic.frag | 56 +++++++++ .../host_shaders/opengl_present_scaleforce.frag | 135 ++++++++++++++++++++ .../host_shaders/vulkan_present_bicubic.frag | 56 +++++++++ .../host_shaders/vulkan_present_scaleforce.frag | 137 +++++++++++++++++++++ 5 files changed, 388 insertions(+) create mode 100644 src/video_core/host_shaders/opengl_present_bicubic.frag create mode 100644 src/video_core/host_shaders/opengl_present_scaleforce.frag create mode 100644 src/video_core/host_shaders/vulkan_present_bicubic.frag create mode 100644 src/video_core/host_shaders/vulkan_present_scaleforce.frag (limited to 'src/video_core/host_shaders') diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index 20d748c12..835b37944 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -6,11 +6,15 @@ set(SHADER_FILES convert_float_to_depth.frag full_screen_triangle.vert opengl_copy_bc4.comp + opengl_present_scaleforce.frag + opengl_present_bicubic.frag opengl_present.frag opengl_present.vert pitch_unswizzle.comp vulkan_blit_color_float.frag vulkan_blit_depth_stencil.frag + vulkan_present_bicubic.frag + vulkan_present_scaleforce.frag vulkan_present.frag vulkan_present.vert vulkan_quad_indexed.comp diff --git a/src/video_core/host_shaders/opengl_present_bicubic.frag b/src/video_core/host_shaders/opengl_present_bicubic.frag new file mode 100644 index 000000000..17772095a --- /dev/null +++ b/src/video_core/host_shaders/opengl_present_bicubic.frag @@ -0,0 +1,56 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 460 core + +layout (location = 0) in vec2 frag_tex_coord; + +layout (location = 0) out vec4 color; + +layout (binding = 1) uniform sampler2D color_texture; + +vec4 cubic(float v) { + vec4 n = vec4(1.0, 2.0, 3.0, 4.0) - v; + vec4 s = n * n * n; + float x = s.x; + float y = s.y - 4.0 * s.x; + float z = s.z - 4.0 * s.y + 6.0 * s.x; + float w = 6.0 - x - y - z; + return vec4(x, y, z, w) * (1.0 / 6.0); +} + +vec4 textureBicubic( sampler2D textureSampler, vec2 texCoords ) { + + vec2 texSize = textureSize(textureSampler, 0); + vec2 invTexSize = 1.0 / texSize; + + texCoords = texCoords * texSize - 0.5; + + vec2 fxy = fract(texCoords); + texCoords -= fxy; + + vec4 xcubic = cubic(fxy.x); + vec4 ycubic = cubic(fxy.y); + + vec4 c = texCoords.xxyy + vec2(-0.5, +1.5).xyxy; + + vec4 s = vec4(xcubic.xz + xcubic.yw, ycubic.xz + ycubic.yw); + vec4 offset = c + vec4(xcubic.yw, ycubic.yw) / s; + + offset *= invTexSize.xxyy; + + vec4 sample0 = texture(textureSampler, offset.xz); + vec4 sample1 = texture(textureSampler, offset.yz); + vec4 sample2 = texture(textureSampler, offset.xw); + vec4 sample3 = texture(textureSampler, offset.yw); + + float sx = s.x / (s.x + s.y); + float sy = s.z / (s.z + s.w); + + return mix(mix(sample3, sample2, sx), mix(sample1, sample0, sx), sy); +} + +void main() { + color = vec4(textureBicubic(color_texture, frag_tex_coord).rgb, 1.0f); +} diff --git a/src/video_core/host_shaders/opengl_present_scaleforce.frag b/src/video_core/host_shaders/opengl_present_scaleforce.frag new file mode 100644 index 000000000..0153f62c0 --- /dev/null +++ b/src/video_core/host_shaders/opengl_present_scaleforce.frag @@ -0,0 +1,135 @@ +// from https://github.com/BreadFish64/ScaleFish/tree/master/scale_force + +// MIT License +// +// Copyright (c) 2020 BreadFish64 +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +precision mediump float; + +layout (location = 0) in vec2 tex_coord; + +layout (location = 0) out vec4 frag_color; + +layout (binding = 1) uniform sampler2D input_texture; + +vec2 tex_size; +vec2 inv_tex_size; + +vec4 cubic(float v) { + vec3 n = vec3(1.0, 2.0, 3.0) - v; + vec3 s = n * n * n; + float x = s.x; + float y = s.y - 4.0 * s.x; + float z = s.z - 4.0 * s.y + 6.0 * s.x; + float w = 6.0 - x - y - z; + return vec4(x, y, z, w) / 6.0; +} + +// Bicubic interpolation +vec4 textureBicubic(vec2 tex_coords) { + tex_coords = tex_coords * tex_size - 0.5; + + vec2 fxy = modf(tex_coords, tex_coords); + + vec4 xcubic = cubic(fxy.x); + vec4 ycubic = cubic(fxy.y); + + vec4 c = tex_coords.xxyy + vec2(-0.5, +1.5).xyxy; + + vec4 s = vec4(xcubic.xz + xcubic.yw, ycubic.xz + ycubic.yw); + vec4 offset = c + vec4(xcubic.yw, ycubic.yw) / s; + + offset *= inv_tex_size.xxyy; + + vec4 sample0 = textureLod(input_texture, offset.xz, 0.0); + vec4 sample1 = textureLod(input_texture, offset.yz, 0.0); + vec4 sample2 = textureLod(input_texture, offset.xw, 0.0); + vec4 sample3 = textureLod(input_texture, offset.yw, 0.0); + + float sx = s.x / (s.x + s.y); + float sy = s.z / (s.z + s.w); + + return mix(mix(sample3, sample2, sx), mix(sample1, sample0, sx), sy); +} + +mat4x3 center_matrix; +vec4 center_alpha; + +// Finds the distance between four colors and cc in YCbCr space +vec4 ColorDist(vec4 A, vec4 B, vec4 C, vec4 D) { + // https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.2020_conversion + const vec3 K = vec3(0.2627, 0.6780, 0.0593); + const float LUMINANCE_WEIGHT = .6; + const mat3 YCBCR_MATRIX = + mat3(K * LUMINANCE_WEIGHT, -.5 * K.r / (1.0 - K.b), -.5 * K.g / (1.0 - K.b), .5, .5, + -.5 * K.g / (1.0 - K.r), -.5 * K.b / (1.0 - K.r)); + + mat4x3 colors = mat4x3(A.rgb, B.rgb, C.rgb, D.rgb) - center_matrix; + mat4x3 YCbCr = YCBCR_MATRIX * colors; + vec4 color_dist = vec3(1.0) * YCbCr; + color_dist *= color_dist; + vec4 alpha = vec4(A.a, B.a, C.a, D.a); + + return sqrt((color_dist + abs(center_alpha - alpha)) * alpha * center_alpha); +} + +void main() { + vec4 bl = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(-1, -1)); + vec4 bc = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(0, -1)); + vec4 br = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(1, -1)); + vec4 cl = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(-1, 0)); + vec4 cc = textureLod(input_texture, tex_coord, 0.0); + vec4 cr = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(1, 0)); + vec4 tl = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(-1, 1)); + vec4 tc = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(0, 1)); + vec4 tr = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(1, 1)); + + + tex_size = vec2(textureSize(input_texture, 0)); + inv_tex_size = 1.0 / tex_size; + center_matrix = mat4x3(cc.rgb, cc.rgb, cc.rgb, cc.rgb); + center_alpha = cc.aaaa; + + vec4 offset_tl = ColorDist(tl, tc, tr, cr); + vec4 offset_br = ColorDist(br, bc, bl, cl); + + // Calculate how different cc is from the texels around it + float total_dist = dot(offset_tl + offset_br, vec4(1.0)); + + // Add together all the distances with direction taken into account + vec4 tmp = offset_tl - offset_br; + vec2 total_offset = tmp.wy + tmp.zz + vec2(-tmp.x, tmp.x); + + if (total_dist == 0.0) { + // Doing bicubic filtering just past the edges where the offset is 0 causes black floaters + // and it doesn't really matter which filter is used when the colors aren't changing. + frag_color = vec4(cc.rgb, 1.0f); + } else { + // When the image has thin points, they tend to split apart. + // This is because the texels all around are different + // and total_offset reaches into clear areas. + // This works pretty well to keep the offset in bounds for these cases. + float clamp_val = length(total_offset) / total_dist; + vec2 final_offset = clamp(total_offset, -clamp_val, clamp_val) * inv_tex_size; + + frag_color = vec4(textureBicubic(tex_coord - final_offset).rgb, 1.0f); + } +} diff --git a/src/video_core/host_shaders/vulkan_present_bicubic.frag b/src/video_core/host_shaders/vulkan_present_bicubic.frag new file mode 100644 index 000000000..17772095a --- /dev/null +++ b/src/video_core/host_shaders/vulkan_present_bicubic.frag @@ -0,0 +1,56 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 460 core + +layout (location = 0) in vec2 frag_tex_coord; + +layout (location = 0) out vec4 color; + +layout (binding = 1) uniform sampler2D color_texture; + +vec4 cubic(float v) { + vec4 n = vec4(1.0, 2.0, 3.0, 4.0) - v; + vec4 s = n * n * n; + float x = s.x; + float y = s.y - 4.0 * s.x; + float z = s.z - 4.0 * s.y + 6.0 * s.x; + float w = 6.0 - x - y - z; + return vec4(x, y, z, w) * (1.0 / 6.0); +} + +vec4 textureBicubic( sampler2D textureSampler, vec2 texCoords ) { + + vec2 texSize = textureSize(textureSampler, 0); + vec2 invTexSize = 1.0 / texSize; + + texCoords = texCoords * texSize - 0.5; + + vec2 fxy = fract(texCoords); + texCoords -= fxy; + + vec4 xcubic = cubic(fxy.x); + vec4 ycubic = cubic(fxy.y); + + vec4 c = texCoords.xxyy + vec2(-0.5, +1.5).xyxy; + + vec4 s = vec4(xcubic.xz + xcubic.yw, ycubic.xz + ycubic.yw); + vec4 offset = c + vec4(xcubic.yw, ycubic.yw) / s; + + offset *= invTexSize.xxyy; + + vec4 sample0 = texture(textureSampler, offset.xz); + vec4 sample1 = texture(textureSampler, offset.yz); + vec4 sample2 = texture(textureSampler, offset.xw); + vec4 sample3 = texture(textureSampler, offset.yw); + + float sx = s.x / (s.x + s.y); + float sy = s.z / (s.z + s.w); + + return mix(mix(sample3, sample2, sx), mix(sample1, sample0, sx), sy); +} + +void main() { + color = vec4(textureBicubic(color_texture, frag_tex_coord).rgb, 1.0f); +} diff --git a/src/video_core/host_shaders/vulkan_present_scaleforce.frag b/src/video_core/host_shaders/vulkan_present_scaleforce.frag new file mode 100644 index 000000000..801c8eae9 --- /dev/null +++ b/src/video_core/host_shaders/vulkan_present_scaleforce.frag @@ -0,0 +1,137 @@ +#version 320 es + +// from https://github.com/BreadFish64/ScaleFish/tree/master/scale_force + +// MIT License +// +// Copyright (c) 2020 BreadFish64 +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +precision mediump float; + +layout (location = 0) in vec2 tex_coord; + +layout (location = 0) out vec4 frag_color; + +layout (binding = 1) uniform sampler2D input_texture; + +vec2 tex_size; +vec2 inv_tex_size; + +vec4 cubic(float v) { + vec3 n = vec3(1.0, 2.0, 3.0) - v; + vec3 s = n * n * n; + float x = s.x; + float y = s.y - 4.0 * s.x; + float z = s.z - 4.0 * s.y + 6.0 * s.x; + float w = 6.0 - x - y - z; + return vec4(x, y, z, w) / 6.0; +} + +// Bicubic interpolation +vec4 textureBicubic(vec2 tex_coords) { + tex_coords = tex_coords * tex_size - 0.5; + + vec2 fxy = modf(tex_coords, tex_coords); + + vec4 xcubic = cubic(fxy.x); + vec4 ycubic = cubic(fxy.y); + + vec4 c = tex_coords.xxyy + vec2(-0.5, +1.5).xyxy; + + vec4 s = vec4(xcubic.xz + xcubic.yw, ycubic.xz + ycubic.yw); + vec4 offset = c + vec4(xcubic.yw, ycubic.yw) / s; + + offset *= inv_tex_size.xxyy; + + vec4 sample0 = textureLod(input_texture, offset.xz, 0.0); + vec4 sample1 = textureLod(input_texture, offset.yz, 0.0); + vec4 sample2 = textureLod(input_texture, offset.xw, 0.0); + vec4 sample3 = textureLod(input_texture, offset.yw, 0.0); + + float sx = s.x / (s.x + s.y); + float sy = s.z / (s.z + s.w); + + return mix(mix(sample3, sample2, sx), mix(sample1, sample0, sx), sy); +} + +mat4x3 center_matrix; +vec4 center_alpha; + +// Finds the distance between four colors and cc in YCbCr space +vec4 ColorDist(vec4 A, vec4 B, vec4 C, vec4 D) { + // https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.2020_conversion + const vec3 K = vec3(0.2627, 0.6780, 0.0593); + const float LUMINANCE_WEIGHT = .6; + const mat3 YCBCR_MATRIX = + mat3(K * LUMINANCE_WEIGHT, -.5 * K.r / (1.0 - K.b), -.5 * K.g / (1.0 - K.b), .5, .5, + -.5 * K.g / (1.0 - K.r), -.5 * K.b / (1.0 - K.r)); + + mat4x3 colors = mat4x3(A.rgb, B.rgb, C.rgb, D.rgb) - center_matrix; + mat4x3 YCbCr = YCBCR_MATRIX * colors; + vec4 color_dist = vec3(1.0) * YCbCr; + color_dist *= color_dist; + vec4 alpha = vec4(A.a, B.a, C.a, D.a); + + return sqrt((color_dist + abs(center_alpha - alpha)) * alpha * center_alpha); +} + +void main() { + vec4 bl = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(-1, -1)); + vec4 bc = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(0, -1)); + vec4 br = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(1, -1)); + vec4 cl = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(-1, 0)); + vec4 cc = textureLod(input_texture, tex_coord, 0.0); + vec4 cr = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(1, 0)); + vec4 tl = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(-1, 1)); + vec4 tc = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(0, 1)); + vec4 tr = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(1, 1)); + + + tex_size = vec2(textureSize(input_texture, 0)); + inv_tex_size = 1.0 / tex_size; + center_matrix = mat4x3(cc.rgb, cc.rgb, cc.rgb, cc.rgb); + center_alpha = cc.aaaa; + + vec4 offset_tl = ColorDist(tl, tc, tr, cr); + vec4 offset_br = ColorDist(br, bc, bl, cl); + + // Calculate how different cc is from the texels around it + float total_dist = dot(offset_tl + offset_br, vec4(1.0)); + + // Add together all the distances with direction taken into account + vec4 tmp = offset_tl - offset_br; + vec2 total_offset = tmp.wy + tmp.zz + vec2(-tmp.x, tmp.x); + + if (total_dist == 0.0) { + // Doing bicubic filtering just past the edges where the offset is 0 causes black floaters + // and it doesn't really matter which filter is used when the colors aren't changing. + frag_color = vec4(cc.rgb, 1.0f); + } else { + // When the image has thin points, they tend to split apart. + // This is because the texels all around are different + // and total_offset reaches into clear areas. + // This works pretty well to keep the offset in bounds for these cases. + float clamp_val = length(total_offset) / total_dist; + vec2 final_offset = clamp(total_offset, -clamp_val, clamp_val) * inv_tex_size; + + frag_color = vec4(textureBicubic(tex_coord - final_offset).rgb, 1.0f); + } +} -- cgit v1.2.3 From ae8d19d17eb5448207ece99ae07507c542c6ddae Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Thu, 9 Sep 2021 23:01:39 -0400 Subject: Renderers: Unify post processing filter shaders --- src/video_core/host_shaders/CMakeLists.txt | 6 +- .../host_shaders/opengl_present_bicubic.frag | 56 -------- .../host_shaders/opengl_present_scaleforce.frag | 135 ------------------- src/video_core/host_shaders/present_bicubic.frag | 67 ++++++++++ .../host_shaders/present_scaleforce.frag | 145 +++++++++++++++++++++ .../host_shaders/vulkan_present_bicubic.frag | 56 -------- .../host_shaders/vulkan_present_scaleforce.frag | 137 ------------------- 7 files changed, 214 insertions(+), 388 deletions(-) delete mode 100644 src/video_core/host_shaders/opengl_present_bicubic.frag delete mode 100644 src/video_core/host_shaders/opengl_present_scaleforce.frag create mode 100644 src/video_core/host_shaders/present_bicubic.frag create mode 100644 src/video_core/host_shaders/present_scaleforce.frag delete mode 100644 src/video_core/host_shaders/vulkan_present_bicubic.frag delete mode 100644 src/video_core/host_shaders/vulkan_present_scaleforce.frag (limited to 'src/video_core/host_shaders') diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index 835b37944..664d6ce5d 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -6,15 +6,13 @@ set(SHADER_FILES convert_float_to_depth.frag full_screen_triangle.vert opengl_copy_bc4.comp - opengl_present_scaleforce.frag - opengl_present_bicubic.frag opengl_present.frag opengl_present.vert pitch_unswizzle.comp + present_scaleforce.frag + present_bicubic.frag vulkan_blit_color_float.frag vulkan_blit_depth_stencil.frag - vulkan_present_bicubic.frag - vulkan_present_scaleforce.frag vulkan_present.frag vulkan_present.vert vulkan_quad_indexed.comp diff --git a/src/video_core/host_shaders/opengl_present_bicubic.frag b/src/video_core/host_shaders/opengl_present_bicubic.frag deleted file mode 100644 index 17772095a..000000000 --- a/src/video_core/host_shaders/opengl_present_bicubic.frag +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#version 460 core - -layout (location = 0) in vec2 frag_tex_coord; - -layout (location = 0) out vec4 color; - -layout (binding = 1) uniform sampler2D color_texture; - -vec4 cubic(float v) { - vec4 n = vec4(1.0, 2.0, 3.0, 4.0) - v; - vec4 s = n * n * n; - float x = s.x; - float y = s.y - 4.0 * s.x; - float z = s.z - 4.0 * s.y + 6.0 * s.x; - float w = 6.0 - x - y - z; - return vec4(x, y, z, w) * (1.0 / 6.0); -} - -vec4 textureBicubic( sampler2D textureSampler, vec2 texCoords ) { - - vec2 texSize = textureSize(textureSampler, 0); - vec2 invTexSize = 1.0 / texSize; - - texCoords = texCoords * texSize - 0.5; - - vec2 fxy = fract(texCoords); - texCoords -= fxy; - - vec4 xcubic = cubic(fxy.x); - vec4 ycubic = cubic(fxy.y); - - vec4 c = texCoords.xxyy + vec2(-0.5, +1.5).xyxy; - - vec4 s = vec4(xcubic.xz + xcubic.yw, ycubic.xz + ycubic.yw); - vec4 offset = c + vec4(xcubic.yw, ycubic.yw) / s; - - offset *= invTexSize.xxyy; - - vec4 sample0 = texture(textureSampler, offset.xz); - vec4 sample1 = texture(textureSampler, offset.yz); - vec4 sample2 = texture(textureSampler, offset.xw); - vec4 sample3 = texture(textureSampler, offset.yw); - - float sx = s.x / (s.x + s.y); - float sy = s.z / (s.z + s.w); - - return mix(mix(sample3, sample2, sx), mix(sample1, sample0, sx), sy); -} - -void main() { - color = vec4(textureBicubic(color_texture, frag_tex_coord).rgb, 1.0f); -} diff --git a/src/video_core/host_shaders/opengl_present_scaleforce.frag b/src/video_core/host_shaders/opengl_present_scaleforce.frag deleted file mode 100644 index 0153f62c0..000000000 --- a/src/video_core/host_shaders/opengl_present_scaleforce.frag +++ /dev/null @@ -1,135 +0,0 @@ -// from https://github.com/BreadFish64/ScaleFish/tree/master/scale_force - -// MIT License -// -// Copyright (c) 2020 BreadFish64 -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -precision mediump float; - -layout (location = 0) in vec2 tex_coord; - -layout (location = 0) out vec4 frag_color; - -layout (binding = 1) uniform sampler2D input_texture; - -vec2 tex_size; -vec2 inv_tex_size; - -vec4 cubic(float v) { - vec3 n = vec3(1.0, 2.0, 3.0) - v; - vec3 s = n * n * n; - float x = s.x; - float y = s.y - 4.0 * s.x; - float z = s.z - 4.0 * s.y + 6.0 * s.x; - float w = 6.0 - x - y - z; - return vec4(x, y, z, w) / 6.0; -} - -// Bicubic interpolation -vec4 textureBicubic(vec2 tex_coords) { - tex_coords = tex_coords * tex_size - 0.5; - - vec2 fxy = modf(tex_coords, tex_coords); - - vec4 xcubic = cubic(fxy.x); - vec4 ycubic = cubic(fxy.y); - - vec4 c = tex_coords.xxyy + vec2(-0.5, +1.5).xyxy; - - vec4 s = vec4(xcubic.xz + xcubic.yw, ycubic.xz + ycubic.yw); - vec4 offset = c + vec4(xcubic.yw, ycubic.yw) / s; - - offset *= inv_tex_size.xxyy; - - vec4 sample0 = textureLod(input_texture, offset.xz, 0.0); - vec4 sample1 = textureLod(input_texture, offset.yz, 0.0); - vec4 sample2 = textureLod(input_texture, offset.xw, 0.0); - vec4 sample3 = textureLod(input_texture, offset.yw, 0.0); - - float sx = s.x / (s.x + s.y); - float sy = s.z / (s.z + s.w); - - return mix(mix(sample3, sample2, sx), mix(sample1, sample0, sx), sy); -} - -mat4x3 center_matrix; -vec4 center_alpha; - -// Finds the distance between four colors and cc in YCbCr space -vec4 ColorDist(vec4 A, vec4 B, vec4 C, vec4 D) { - // https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.2020_conversion - const vec3 K = vec3(0.2627, 0.6780, 0.0593); - const float LUMINANCE_WEIGHT = .6; - const mat3 YCBCR_MATRIX = - mat3(K * LUMINANCE_WEIGHT, -.5 * K.r / (1.0 - K.b), -.5 * K.g / (1.0 - K.b), .5, .5, - -.5 * K.g / (1.0 - K.r), -.5 * K.b / (1.0 - K.r)); - - mat4x3 colors = mat4x3(A.rgb, B.rgb, C.rgb, D.rgb) - center_matrix; - mat4x3 YCbCr = YCBCR_MATRIX * colors; - vec4 color_dist = vec3(1.0) * YCbCr; - color_dist *= color_dist; - vec4 alpha = vec4(A.a, B.a, C.a, D.a); - - return sqrt((color_dist + abs(center_alpha - alpha)) * alpha * center_alpha); -} - -void main() { - vec4 bl = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(-1, -1)); - vec4 bc = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(0, -1)); - vec4 br = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(1, -1)); - vec4 cl = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(-1, 0)); - vec4 cc = textureLod(input_texture, tex_coord, 0.0); - vec4 cr = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(1, 0)); - vec4 tl = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(-1, 1)); - vec4 tc = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(0, 1)); - vec4 tr = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(1, 1)); - - - tex_size = vec2(textureSize(input_texture, 0)); - inv_tex_size = 1.0 / tex_size; - center_matrix = mat4x3(cc.rgb, cc.rgb, cc.rgb, cc.rgb); - center_alpha = cc.aaaa; - - vec4 offset_tl = ColorDist(tl, tc, tr, cr); - vec4 offset_br = ColorDist(br, bc, bl, cl); - - // Calculate how different cc is from the texels around it - float total_dist = dot(offset_tl + offset_br, vec4(1.0)); - - // Add together all the distances with direction taken into account - vec4 tmp = offset_tl - offset_br; - vec2 total_offset = tmp.wy + tmp.zz + vec2(-tmp.x, tmp.x); - - if (total_dist == 0.0) { - // Doing bicubic filtering just past the edges where the offset is 0 causes black floaters - // and it doesn't really matter which filter is used when the colors aren't changing. - frag_color = vec4(cc.rgb, 1.0f); - } else { - // When the image has thin points, they tend to split apart. - // This is because the texels all around are different - // and total_offset reaches into clear areas. - // This works pretty well to keep the offset in bounds for these cases. - float clamp_val = length(total_offset) / total_dist; - vec2 final_offset = clamp(total_offset, -clamp_val, clamp_val) * inv_tex_size; - - frag_color = vec4(textureBicubic(tex_coord - final_offset).rgb, 1.0f); - } -} diff --git a/src/video_core/host_shaders/present_bicubic.frag b/src/video_core/host_shaders/present_bicubic.frag new file mode 100644 index 000000000..f3e5410e7 --- /dev/null +++ b/src/video_core/host_shaders/present_bicubic.frag @@ -0,0 +1,67 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 460 core + +#ifdef VULKAN + +#define BINDING_COLOR_TEXTURE 1 + +#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv + +#define BINDING_COLOR_TEXTURE 0 + +#endif + + +layout (location = 0) in vec2 frag_tex_coord; + +layout (location = 0) out vec4 color; + +layout (binding = BINDING_COLOR_TEXTURE) uniform sampler2D color_texture; + +vec4 cubic(float v) { + vec4 n = vec4(1.0, 2.0, 3.0, 4.0) - v; + vec4 s = n * n * n; + float x = s.x; + float y = s.y - 4.0 * s.x; + float z = s.z - 4.0 * s.y + 6.0 * s.x; + float w = 6.0 - x - y - z; + return vec4(x, y, z, w) * (1.0 / 6.0); +} + +vec4 textureBicubic( sampler2D textureSampler, vec2 texCoords ) { + + vec2 texSize = textureSize(textureSampler, 0); + vec2 invTexSize = 1.0 / texSize; + + texCoords = texCoords * texSize - 0.5; + + vec2 fxy = fract(texCoords); + texCoords -= fxy; + + vec4 xcubic = cubic(fxy.x); + vec4 ycubic = cubic(fxy.y); + + vec4 c = texCoords.xxyy + vec2(-0.5, +1.5).xyxy; + + vec4 s = vec4(xcubic.xz + xcubic.yw, ycubic.xz + ycubic.yw); + vec4 offset = c + vec4(xcubic.yw, ycubic.yw) / s; + + offset *= invTexSize.xxyy; + + vec4 sample0 = texture(textureSampler, offset.xz); + vec4 sample1 = texture(textureSampler, offset.yz); + vec4 sample2 = texture(textureSampler, offset.xw); + vec4 sample3 = texture(textureSampler, offset.yw); + + float sx = s.x / (s.x + s.y); + float sy = s.z / (s.z + s.w); + + return mix(mix(sample3, sample2, sx), mix(sample1, sample0, sx), sy); +} + +void main() { + color = vec4(textureBicubic(color_texture, frag_tex_coord).rgb, 1.0f); +} diff --git a/src/video_core/host_shaders/present_scaleforce.frag b/src/video_core/host_shaders/present_scaleforce.frag new file mode 100644 index 000000000..1829a9be8 --- /dev/null +++ b/src/video_core/host_shaders/present_scaleforce.frag @@ -0,0 +1,145 @@ +// MIT License +// +// Copyright (c) 2020 BreadFish64 +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +// Adapted from https://github.com/BreadFish64/ScaleFish/tree/master/scaleforce + +#version 460 + +#ifdef VULKAN + +#define BINDING_COLOR_TEXTURE 1 + +#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv + +#define BINDING_COLOR_TEXTURE 0 + +#endif + +layout (location = 0) in vec2 tex_coord; + +layout (location = 0) out vec4 frag_color; + +layout (binding = BINDING_COLOR_TEXTURE) uniform sampler2D input_texture; + +vec2 tex_size; +vec2 inv_tex_size; + +vec4 cubic(float v) { + vec3 n = vec3(1.0, 2.0, 3.0) - v; + vec3 s = n * n * n; + float x = s.x; + float y = s.y - 4.0 * s.x; + float z = s.z - 4.0 * s.y + 6.0 * s.x; + float w = 6.0 - x - y - z; + return vec4(x, y, z, w) / 6.0; +} + +// Bicubic interpolation +vec4 textureBicubic(vec2 tex_coords) { + tex_coords = tex_coords * tex_size - 0.5; + + vec2 fxy = modf(tex_coords, tex_coords); + + vec4 xcubic = cubic(fxy.x); + vec4 ycubic = cubic(fxy.y); + + vec4 c = tex_coords.xxyy + vec2(-0.5, +1.5).xyxy; + + vec4 s = vec4(xcubic.xz + xcubic.yw, ycubic.xz + ycubic.yw); + vec4 offset = c + vec4(xcubic.yw, ycubic.yw) / s; + + offset *= inv_tex_size.xxyy; + + vec4 sample0 = textureLod(input_texture, offset.xz, 0.0); + vec4 sample1 = textureLod(input_texture, offset.yz, 0.0); + vec4 sample2 = textureLod(input_texture, offset.xw, 0.0); + vec4 sample3 = textureLod(input_texture, offset.yw, 0.0); + + float sx = s.x / (s.x + s.y); + float sy = s.z / (s.z + s.w); + + return mix(mix(sample3, sample2, sx), mix(sample1, sample0, sx), sy); +} + +mat4x3 center_matrix; +vec4 center_alpha; + +// Finds the distance between four colors and cc in YCbCr space +vec4 ColorDist(vec4 A, vec4 B, vec4 C, vec4 D) { + // https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.2020_conversion + const vec3 K = vec3(0.2627, 0.6780, 0.0593); + const float LUMINANCE_WEIGHT = .6; + const mat3 YCBCR_MATRIX = + mat3(K * LUMINANCE_WEIGHT, -.5 * K.r / (1.0 - K.b), -.5 * K.g / (1.0 - K.b), .5, .5, + -.5 * K.g / (1.0 - K.r), -.5 * K.b / (1.0 - K.r)); + + mat4x3 colors = mat4x3(A.rgb, B.rgb, C.rgb, D.rgb) - center_matrix; + mat4x3 YCbCr = YCBCR_MATRIX * colors; + vec4 color_dist = vec3(1.0) * YCbCr; + color_dist *= color_dist; + vec4 alpha = vec4(A.a, B.a, C.a, D.a); + + return sqrt((color_dist + abs(center_alpha - alpha)) * alpha * center_alpha); +} + +void main() { + vec4 bl = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(-1, -1)); + vec4 bc = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(0, -1)); + vec4 br = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(1, -1)); + vec4 cl = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(-1, 0)); + vec4 cc = textureLod(input_texture, tex_coord, 0.0); + vec4 cr = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(1, 0)); + vec4 tl = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(-1, 1)); + vec4 tc = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(0, 1)); + vec4 tr = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(1, 1)); + + + tex_size = vec2(textureSize(input_texture, 0)); + inv_tex_size = 1.0 / tex_size; + center_matrix = mat4x3(cc.rgb, cc.rgb, cc.rgb, cc.rgb); + center_alpha = cc.aaaa; + + vec4 offset_tl = ColorDist(tl, tc, tr, cr); + vec4 offset_br = ColorDist(br, bc, bl, cl); + + // Calculate how different cc is from the texels around it + float total_dist = dot(offset_tl + offset_br, vec4(1.0)); + + // Add together all the distances with direction taken into account + vec4 tmp = offset_tl - offset_br; + vec2 total_offset = tmp.wy + tmp.zz + vec2(-tmp.x, tmp.x); + + if (total_dist == 0.0) { + // Doing bicubic filtering just past the edges where the offset is 0 causes black floaters + // and it doesn't really matter which filter is used when the colors aren't changing. + frag_color = vec4(cc.rgb, 1.0f); + } else { + // When the image has thin points, they tend to split apart. + // This is because the texels all around are different + // and total_offset reaches into clear areas. + // This works pretty well to keep the offset in bounds for these cases. + float clamp_val = length(total_offset) / total_dist; + vec2 final_offset = clamp(total_offset, -clamp_val, clamp_val) * inv_tex_size; + + frag_color = vec4(textureBicubic(tex_coord - final_offset).rgb, 1.0f); + } +} diff --git a/src/video_core/host_shaders/vulkan_present_bicubic.frag b/src/video_core/host_shaders/vulkan_present_bicubic.frag deleted file mode 100644 index 17772095a..000000000 --- a/src/video_core/host_shaders/vulkan_present_bicubic.frag +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#version 460 core - -layout (location = 0) in vec2 frag_tex_coord; - -layout (location = 0) out vec4 color; - -layout (binding = 1) uniform sampler2D color_texture; - -vec4 cubic(float v) { - vec4 n = vec4(1.0, 2.0, 3.0, 4.0) - v; - vec4 s = n * n * n; - float x = s.x; - float y = s.y - 4.0 * s.x; - float z = s.z - 4.0 * s.y + 6.0 * s.x; - float w = 6.0 - x - y - z; - return vec4(x, y, z, w) * (1.0 / 6.0); -} - -vec4 textureBicubic( sampler2D textureSampler, vec2 texCoords ) { - - vec2 texSize = textureSize(textureSampler, 0); - vec2 invTexSize = 1.0 / texSize; - - texCoords = texCoords * texSize - 0.5; - - vec2 fxy = fract(texCoords); - texCoords -= fxy; - - vec4 xcubic = cubic(fxy.x); - vec4 ycubic = cubic(fxy.y); - - vec4 c = texCoords.xxyy + vec2(-0.5, +1.5).xyxy; - - vec4 s = vec4(xcubic.xz + xcubic.yw, ycubic.xz + ycubic.yw); - vec4 offset = c + vec4(xcubic.yw, ycubic.yw) / s; - - offset *= invTexSize.xxyy; - - vec4 sample0 = texture(textureSampler, offset.xz); - vec4 sample1 = texture(textureSampler, offset.yz); - vec4 sample2 = texture(textureSampler, offset.xw); - vec4 sample3 = texture(textureSampler, offset.yw); - - float sx = s.x / (s.x + s.y); - float sy = s.z / (s.z + s.w); - - return mix(mix(sample3, sample2, sx), mix(sample1, sample0, sx), sy); -} - -void main() { - color = vec4(textureBicubic(color_texture, frag_tex_coord).rgb, 1.0f); -} diff --git a/src/video_core/host_shaders/vulkan_present_scaleforce.frag b/src/video_core/host_shaders/vulkan_present_scaleforce.frag deleted file mode 100644 index 801c8eae9..000000000 --- a/src/video_core/host_shaders/vulkan_present_scaleforce.frag +++ /dev/null @@ -1,137 +0,0 @@ -#version 320 es - -// from https://github.com/BreadFish64/ScaleFish/tree/master/scale_force - -// MIT License -// -// Copyright (c) 2020 BreadFish64 -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -precision mediump float; - -layout (location = 0) in vec2 tex_coord; - -layout (location = 0) out vec4 frag_color; - -layout (binding = 1) uniform sampler2D input_texture; - -vec2 tex_size; -vec2 inv_tex_size; - -vec4 cubic(float v) { - vec3 n = vec3(1.0, 2.0, 3.0) - v; - vec3 s = n * n * n; - float x = s.x; - float y = s.y - 4.0 * s.x; - float z = s.z - 4.0 * s.y + 6.0 * s.x; - float w = 6.0 - x - y - z; - return vec4(x, y, z, w) / 6.0; -} - -// Bicubic interpolation -vec4 textureBicubic(vec2 tex_coords) { - tex_coords = tex_coords * tex_size - 0.5; - - vec2 fxy = modf(tex_coords, tex_coords); - - vec4 xcubic = cubic(fxy.x); - vec4 ycubic = cubic(fxy.y); - - vec4 c = tex_coords.xxyy + vec2(-0.5, +1.5).xyxy; - - vec4 s = vec4(xcubic.xz + xcubic.yw, ycubic.xz + ycubic.yw); - vec4 offset = c + vec4(xcubic.yw, ycubic.yw) / s; - - offset *= inv_tex_size.xxyy; - - vec4 sample0 = textureLod(input_texture, offset.xz, 0.0); - vec4 sample1 = textureLod(input_texture, offset.yz, 0.0); - vec4 sample2 = textureLod(input_texture, offset.xw, 0.0); - vec4 sample3 = textureLod(input_texture, offset.yw, 0.0); - - float sx = s.x / (s.x + s.y); - float sy = s.z / (s.z + s.w); - - return mix(mix(sample3, sample2, sx), mix(sample1, sample0, sx), sy); -} - -mat4x3 center_matrix; -vec4 center_alpha; - -// Finds the distance between four colors and cc in YCbCr space -vec4 ColorDist(vec4 A, vec4 B, vec4 C, vec4 D) { - // https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.2020_conversion - const vec3 K = vec3(0.2627, 0.6780, 0.0593); - const float LUMINANCE_WEIGHT = .6; - const mat3 YCBCR_MATRIX = - mat3(K * LUMINANCE_WEIGHT, -.5 * K.r / (1.0 - K.b), -.5 * K.g / (1.0 - K.b), .5, .5, - -.5 * K.g / (1.0 - K.r), -.5 * K.b / (1.0 - K.r)); - - mat4x3 colors = mat4x3(A.rgb, B.rgb, C.rgb, D.rgb) - center_matrix; - mat4x3 YCbCr = YCBCR_MATRIX * colors; - vec4 color_dist = vec3(1.0) * YCbCr; - color_dist *= color_dist; - vec4 alpha = vec4(A.a, B.a, C.a, D.a); - - return sqrt((color_dist + abs(center_alpha - alpha)) * alpha * center_alpha); -} - -void main() { - vec4 bl = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(-1, -1)); - vec4 bc = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(0, -1)); - vec4 br = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(1, -1)); - vec4 cl = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(-1, 0)); - vec4 cc = textureLod(input_texture, tex_coord, 0.0); - vec4 cr = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(1, 0)); - vec4 tl = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(-1, 1)); - vec4 tc = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(0, 1)); - vec4 tr = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(1, 1)); - - - tex_size = vec2(textureSize(input_texture, 0)); - inv_tex_size = 1.0 / tex_size; - center_matrix = mat4x3(cc.rgb, cc.rgb, cc.rgb, cc.rgb); - center_alpha = cc.aaaa; - - vec4 offset_tl = ColorDist(tl, tc, tr, cr); - vec4 offset_br = ColorDist(br, bc, bl, cl); - - // Calculate how different cc is from the texels around it - float total_dist = dot(offset_tl + offset_br, vec4(1.0)); - - // Add together all the distances with direction taken into account - vec4 tmp = offset_tl - offset_br; - vec2 total_offset = tmp.wy + tmp.zz + vec2(-tmp.x, tmp.x); - - if (total_dist == 0.0) { - // Doing bicubic filtering just past the edges where the offset is 0 causes black floaters - // and it doesn't really matter which filter is used when the colors aren't changing. - frag_color = vec4(cc.rgb, 1.0f); - } else { - // When the image has thin points, they tend to split apart. - // This is because the texels all around are different - // and total_offset reaches into clear areas. - // This works pretty well to keep the offset in bounds for these cases. - float clamp_val = length(total_offset) / total_dist; - vec2 final_offset = clamp(total_offset, -clamp_val, clamp_val) * inv_tex_size; - - frag_color = vec4(textureBicubic(tex_coord - final_offset).rgb, 1.0f); - } -} -- cgit v1.2.3 From 37cb0377ae30e2139f6fa381d04124e51fcccded Mon Sep 17 00:00:00 2001 From: Marshall Mohror Date: Sat, 16 Oct 2021 20:33:58 -0500 Subject: vulkan: Implement FidelityFX Super Resolution --- src/video_core/host_shaders/CMakeLists.txt | 17 ++- src/video_core/host_shaders/fidelityfx_fsr.comp | 114 +++++++++++++++++++++ .../host_shaders/vulkan_fidelityfx_fsr_easu.comp | 13 +++ .../host_shaders/vulkan_fidelityfx_fsr_rcas.comp | 13 +++ 4 files changed, 155 insertions(+), 2 deletions(-) create mode 100644 src/video_core/host_shaders/fidelityfx_fsr.comp create mode 100644 src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu.comp create mode 100644 src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas.comp (limited to 'src/video_core/host_shaders') diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index 664d6ce5d..32e2ab500 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -1,3 +1,11 @@ +set(FIDELITYFX_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/externals/FidelityFX-FSR/ffx-fsr) + +set(GLSL_INCLUDES + fidelityfx_fsr.comp + ${FIDELITYFX_INCLUDE_DIR}/ffx_a.h + ${FIDELITYFX_INCLUDE_DIR}/ffx_fsr1.h +) + set(SHADER_FILES astc_decoder.comp block_linear_unswizzle_2d.comp @@ -13,6 +21,8 @@ set(SHADER_FILES present_bicubic.frag vulkan_blit_color_float.frag vulkan_blit_depth_stencil.frag + vulkan_fidelityfx_fsr_easu.comp + vulkan_fidelityfx_fsr_rcas.comp vulkan_present.frag vulkan_present.vert vulkan_quad_indexed.comp @@ -78,7 +88,7 @@ foreach(FILENAME IN ITEMS ${SHADER_FILES}) OUTPUT ${SPIRV_HEADER_FILE} COMMAND - ${GLSLANGVALIDATOR} -V ${QUIET_FLAG} ${GLSL_FLAGS} --variable-name ${SPIRV_VARIABLE_NAME} -o ${SPIRV_HEADER_FILE} ${SOURCE_FILE} + ${GLSLANGVALIDATOR} -V ${QUIET_FLAG} -I"${FIDELITYFX_INCLUDE_DIR}" ${GLSL_FLAGS} --variable-name ${SPIRV_VARIABLE_NAME} -o ${SPIRV_HEADER_FILE} ${SOURCE_FILE} MAIN_DEPENDENCY ${SOURCE_FILE} ) @@ -86,9 +96,12 @@ foreach(FILENAME IN ITEMS ${SHADER_FILES}) endif() endforeach() +set(SHADER_SOURCES ${SHADER_FILES}) +list(APPEND SHADER_SOURCES ${GLSL_INCLUDES}) + add_custom_target(host_shaders DEPENDS ${SHADER_HEADERS} SOURCES - ${SHADER_FILES} + ${SHADER_SOURCES} ) diff --git a/src/video_core/host_shaders/fidelityfx_fsr.comp b/src/video_core/host_shaders/fidelityfx_fsr.comp new file mode 100644 index 000000000..cbb601580 --- /dev/null +++ b/src/video_core/host_shaders/fidelityfx_fsr.comp @@ -0,0 +1,114 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +//!#version 460 core +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable +#extension GL_GOOGLE_include_directive : enable +#extension GL_EXT_shader_explicit_arithmetic_types : require + +// FidelityFX Super Resolution Sample +// +// Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +layout( push_constant ) uniform constants { + u32vec2 input_size; +}; + +uvec4 Const0; +uvec4 Const1; +uvec4 Const2; +uvec4 Const3; + +#define A_GPU 1 +#define A_GLSL 1 +#define A_HALF + +#include "ffx_a.h" + +f16vec4 LinearToSRGB(f16vec4 linear) { + bvec4 selector = greaterThan(linear, f16vec4(0.00313066844250063)); + f16vec4 low = linear * float16_t(12.92); + f16vec4 high = float16_t(1.055) * pow(linear, f16vec4(1 / 2.4)) - float16_t(0.055); + return mix(low, high, selector); +} + +f16vec4 SRGBToLinear(f16vec4 srgb) { + bvec4 selector = greaterThan(srgb, f16vec4(0.0404482362771082)); + f16vec4 low = srgb * float16_t(1.0 / 12.92); + f16vec4 high = pow((srgb + float16_t(0.055)) * float16_t(1.0 / 1.055), f16vec4(2.4)); + return mix(low, high, selector); +} + +#if USE_EASU + #define FSR_EASU_H 1 + f16vec4 FsrEasuRH(vec2 p) { f16vec4 res = f16vec4(textureGather(InputTexture, p, 0)); return res; } + f16vec4 FsrEasuGH(vec2 p) { f16vec4 res = f16vec4(textureGather(InputTexture, p, 1)); return res; } + f16vec4 FsrEasuBH(vec2 p) { f16vec4 res = f16vec4(textureGather(InputTexture, p, 2)); return res; } +#endif +#if USE_RCAS + #define FSR_RCAS_H 1 + f16vec4 FsrRcasLoadH(ASW2 p) { return f16vec4(texelFetch(InputTexture, ASU2(p), 0)); } + void FsrRcasInputH(inout float16_t r, inout float16_t g, inout float16_t b) {} +#endif + +#include "ffx_fsr1.h" + +void CurrFilter(u32vec2 pos) { + // For debugging +#if USE_BILINEAR + vec2 pp = (vec2(pos) * vec2_AU2(Const0.xy) + vec2_AU2(Const0.zw)) * vec2_AU2(Const1.xy) + vec2(0.5, -0.5) * vec2_AU2(Const1.zw); + imageStore(OutputTexture, ivec2(pos), textureLod(InputTexture, pp, 0.0)); +#endif +#if USE_EASU + f16vec3 c; + FsrEasuH(c, pos, Const0, Const1, Const2, Const3); + imageStore(OutputTexture, ivec2(pos), f16vec4(c, 1)); +#endif +#if USE_RCAS + f16vec3 c; + FsrRcasH(c.r, c.g, c.b, pos, Const0); + imageStore(OutputTexture, ivec2(pos), f16vec4(c, 1)); +#endif + +} + +layout(local_size_x=64) in; +void main() { + +#if USE_EASU || USE_BILINEAR + vec2 ires = vec2(input_size); + vec2 tres = textureSize(InputTexture, 0); + vec2 ores = imageSize(OutputTexture); + FsrEasuCon(Const0, Const1, Const2, Const3, ires.x, ires.y, tres.x, tres.y, ores.x, ores.y); +#endif +#if USE_RCAS + FsrRcasCon(Const0, 0.25f); +#endif + + // Do remapping of local xy in workgroup for a more PS-like swizzle pattern. + AU2 gxy = ARmp8x8(gl_LocalInvocationID.x) + AU2(gl_WorkGroupID.x << 4u, gl_WorkGroupID.y << 4u); + CurrFilter(gxy); + gxy.x += 8u; + CurrFilter(gxy); + gxy.y += 8u; + CurrFilter(gxy); + gxy.x -= 8u; + CurrFilter(gxy); +} diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu.comp b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu.comp new file mode 100644 index 000000000..6525eeeb5 --- /dev/null +++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu.comp @@ -0,0 +1,13 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 460 core +#extension GL_GOOGLE_include_directive : enable + +layout(set=0,binding=0) uniform sampler2D InputTexture; +layout(set=0,binding=1,rgba16f) uniform image2D OutputTexture; + +#define USE_EASU 1 + +#include "fidelityfx_fsr.comp" diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas.comp b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas.comp new file mode 100644 index 000000000..9463ed842 --- /dev/null +++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas.comp @@ -0,0 +1,13 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 460 core +#extension GL_GOOGLE_include_directive : enable + +layout(set=0,binding=0) uniform sampler2D InputTexture; +layout(set=0,binding=1,rgba16f) uniform image2D OutputTexture; + +#define USE_RCAS 1 + +#include "fidelityfx_fsr.comp" -- cgit v1.2.3 From 916b882ea8870e695d50e8ca8c8e4c35fb1895d5 Mon Sep 17 00:00:00 2001 From: Marshall Mohror Date: Wed, 20 Oct 2021 13:35:59 -0500 Subject: Update scaleforce to use FP16 --- .../host_shaders/present_scaleforce.frag | 143 ++++++++------------- 1 file changed, 55 insertions(+), 88 deletions(-) (limited to 'src/video_core/host_shaders') diff --git a/src/video_core/host_shaders/present_scaleforce.frag b/src/video_core/host_shaders/present_scaleforce.frag index 1829a9be8..ebc0d9b90 100644 --- a/src/video_core/host_shaders/present_scaleforce.frag +++ b/src/video_core/host_shaders/present_scaleforce.frag @@ -24,6 +24,9 @@ #version 460 +#extension GL_AMD_gpu_shader_half_float : enable +#extension GL_NV_gpu_shader5 : enable + #ifdef VULKAN #define BINDING_COLOR_TEXTURE 1 @@ -40,106 +43,70 @@ layout (location = 0) out vec4 frag_color; layout (binding = BINDING_COLOR_TEXTURE) uniform sampler2D input_texture; -vec2 tex_size; -vec2 inv_tex_size; - -vec4 cubic(float v) { - vec3 n = vec3(1.0, 2.0, 3.0) - v; - vec3 s = n * n * n; - float x = s.x; - float y = s.y - 4.0 * s.x; - float z = s.z - 4.0 * s.y + 6.0 * s.x; - float w = 6.0 - x - y - z; - return vec4(x, y, z, w) / 6.0; -} - -// Bicubic interpolation -vec4 textureBicubic(vec2 tex_coords) { - tex_coords = tex_coords * tex_size - 0.5; - - vec2 fxy = modf(tex_coords, tex_coords); - - vec4 xcubic = cubic(fxy.x); - vec4 ycubic = cubic(fxy.y); - - vec4 c = tex_coords.xxyy + vec2(-0.5, +1.5).xyxy; +const bool ignore_alpha = true; - vec4 s = vec4(xcubic.xz + xcubic.yw, ycubic.xz + ycubic.yw); - vec4 offset = c + vec4(xcubic.yw, ycubic.yw) / s; - - offset *= inv_tex_size.xxyy; - - vec4 sample0 = textureLod(input_texture, offset.xz, 0.0); - vec4 sample1 = textureLod(input_texture, offset.yz, 0.0); - vec4 sample2 = textureLod(input_texture, offset.xw, 0.0); - vec4 sample3 = textureLod(input_texture, offset.yw, 0.0); - - float sx = s.x / (s.x + s.y); - float sy = s.z / (s.z + s.w); - - return mix(mix(sample3, sample2, sx), mix(sample1, sample0, sx), sy); -} - -mat4x3 center_matrix; -vec4 center_alpha; - -// Finds the distance between four colors and cc in YCbCr space -vec4 ColorDist(vec4 A, vec4 B, vec4 C, vec4 D) { +float16_t ColorDist1(f16vec4 a, f16vec4 b) { // https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.2020_conversion - const vec3 K = vec3(0.2627, 0.6780, 0.0593); - const float LUMINANCE_WEIGHT = .6; - const mat3 YCBCR_MATRIX = - mat3(K * LUMINANCE_WEIGHT, -.5 * K.r / (1.0 - K.b), -.5 * K.g / (1.0 - K.b), .5, .5, - -.5 * K.g / (1.0 - K.r), -.5 * K.b / (1.0 - K.r)); - - mat4x3 colors = mat4x3(A.rgb, B.rgb, C.rgb, D.rgb) - center_matrix; - mat4x3 YCbCr = YCBCR_MATRIX * colors; - vec4 color_dist = vec3(1.0) * YCbCr; - color_dist *= color_dist; - vec4 alpha = vec4(A.a, B.a, C.a, D.a); - - return sqrt((color_dist + abs(center_alpha - alpha)) * alpha * center_alpha); + const f16vec3 K = f16vec3(0.2627, 0.6780, 0.0593); + const float16_t scaleB = float16_t(0.5) / (float16_t(1.0) - K.b); + const float16_t scaleR = float16_t(0.5) / (float16_t(1.0) - K.r); + f16vec4 diff = a - b; + float16_t Y = dot(diff.rgb, K); + float16_t Cb = scaleB * (diff.b - Y); + float16_t Cr = scaleR * (diff.r - Y); + f16vec3 YCbCr = f16vec3(Y, Cb, Cr); + float16_t d = length(YCbCr); + if (ignore_alpha) { + return d; + } + return sqrt(a.a * b.a * d * d + diff.a * diff.a); } -void main() { - vec4 bl = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(-1, -1)); - vec4 bc = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(0, -1)); - vec4 br = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(1, -1)); - vec4 cl = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(-1, 0)); - vec4 cc = textureLod(input_texture, tex_coord, 0.0); - vec4 cr = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(1, 0)); - vec4 tl = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(-1, 1)); - vec4 tc = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(0, 1)); - vec4 tr = textureLodOffset(input_texture, tex_coord, 0.0, ivec2(1, 1)); - +f16vec4 ColorDist(f16vec4 ref, f16vec4 A, f16vec4 B, f16vec4 C, f16vec4 D) { + return f16vec4( + ColorDist1(ref, A), + ColorDist1(ref, B), + ColorDist1(ref, C), + ColorDist1(ref, D) + ); +} - tex_size = vec2(textureSize(input_texture, 0)); - inv_tex_size = 1.0 / tex_size; - center_matrix = mat4x3(cc.rgb, cc.rgb, cc.rgb, cc.rgb); - center_alpha = cc.aaaa; +vec4 Scaleforce(sampler2D tex, vec2 tex_coord) { + f16vec4 bl = f16vec4(textureOffset(tex, tex_coord, ivec2(-1, -1))); + f16vec4 bc = f16vec4(textureOffset(tex, tex_coord, ivec2(0, -1))); + f16vec4 br = f16vec4(textureOffset(tex, tex_coord, ivec2(1, -1))); + f16vec4 cl = f16vec4(textureOffset(tex, tex_coord, ivec2(-1, 0))); + f16vec4 cc = f16vec4(texture(tex, tex_coord)); + f16vec4 cr = f16vec4(textureOffset(tex, tex_coord, ivec2(1, 0))); + f16vec4 tl = f16vec4(textureOffset(tex, tex_coord, ivec2(-1, 1))); + f16vec4 tc = f16vec4(textureOffset(tex, tex_coord, ivec2(0, 1))); + f16vec4 tr = f16vec4(textureOffset(tex, tex_coord, ivec2(1, 1))); - vec4 offset_tl = ColorDist(tl, tc, tr, cr); - vec4 offset_br = ColorDist(br, bc, bl, cl); + f16vec4 offset_tl = ColorDist(cc, tl, tc, tr, cr); + f16vec4 offset_br = ColorDist(cc, br, bc, bl, cl); // Calculate how different cc is from the texels around it - float total_dist = dot(offset_tl + offset_br, vec4(1.0)); - - // Add together all the distances with direction taken into account - vec4 tmp = offset_tl - offset_br; - vec2 total_offset = tmp.wy + tmp.zz + vec2(-tmp.x, tmp.x); + const float16_t plus_weight = float16_t(1.5); + const float16_t cross_weight = float16_t(1.5); + float16_t total_dist = dot(offset_tl + offset_br, f16vec4(cross_weight, plus_weight, cross_weight, plus_weight)); - if (total_dist == 0.0) { - // Doing bicubic filtering just past the edges where the offset is 0 causes black floaters - // and it doesn't really matter which filter is used when the colors aren't changing. - frag_color = vec4(cc.rgb, 1.0f); + if (total_dist == float16_t(0.0)) { + return cc; } else { + // Add together all the distances with direction taken into account + f16vec4 tmp = offset_tl - offset_br; + f16vec2 total_offset = tmp.wy * plus_weight + (tmp.zz + f16vec2(-tmp.x, tmp.x)) * cross_weight; + // When the image has thin points, they tend to split apart. - // This is because the texels all around are different - // and total_offset reaches into clear areas. + // This is because the texels all around are different and total_offset reaches into clear areas. // This works pretty well to keep the offset in bounds for these cases. - float clamp_val = length(total_offset) / total_dist; - vec2 final_offset = clamp(total_offset, -clamp_val, clamp_val) * inv_tex_size; + float16_t clamp_val = length(total_offset) / total_dist; + f16vec2 final_offset = clamp(total_offset, -clamp_val, clamp_val) / f16vec2(textureSize(tex, 0)); - frag_color = vec4(textureBicubic(tex_coord - final_offset).rgb, 1.0f); + return texture(tex, tex_coord - final_offset); } } + +void main() { + frag_color = Scaleforce(input_texture, tex_coord); +} \ No newline at end of file -- cgit v1.2.3 From 9e065b9c7d3b25ddfe20afa4a945cca6e9767fa9 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Thu, 21 Oct 2021 01:27:54 +0200 Subject: VideoCore: Add gaussian filtering. --- src/video_core/host_shaders/CMakeLists.txt | 1 + src/video_core/host_shaders/present_gaussian.frag | 74 +++++++++++++++++++++++ 2 files changed, 75 insertions(+) create mode 100644 src/video_core/host_shaders/present_gaussian.frag (limited to 'src/video_core/host_shaders') diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index 32e2ab500..b0e15773c 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -19,6 +19,7 @@ set(SHADER_FILES pitch_unswizzle.comp present_scaleforce.frag present_bicubic.frag + present_gaussian.frag vulkan_blit_color_float.frag vulkan_blit_depth_stencil.frag vulkan_fidelityfx_fsr_easu.comp diff --git a/src/video_core/host_shaders/present_gaussian.frag b/src/video_core/host_shaders/present_gaussian.frag new file mode 100644 index 000000000..d5e2b1781 --- /dev/null +++ b/src/video_core/host_shaders/present_gaussian.frag @@ -0,0 +1,74 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 460 core + +#ifdef VULKAN + +#define BINDING_COLOR_TEXTURE 1 + +#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv + +#define BINDING_COLOR_TEXTURE 0 + +#endif + +layout (location = 0) in vec2 frag_tex_coord; + +layout (location = 0) out vec4 color; + +layout (binding = BINDING_COLOR_TEXTURE) uniform sampler2D color_texture; + +const float offset[3] = float[](0.0, 1.3846153846, 3.2307692308); +const float weight[3] = float[](0.2270270270, 0.3162162162, 0.0702702703); + +vec4 blurVertical(sampler2D textureSampler, vec2 coord, vec2 norm) { + vec4 result = vec4(0.0f); + for (int i=1; i<3; i++) { + result += + texture(textureSampler, vec2(coord) + (vec2(0.0, offset[i]) * norm)) + * weight[i]; + result += + texture(textureSampler, vec2(coord) - (vec2(0.0, offset[i]) * norm)) + * weight[i]; + } + return result; +} + +vec4 blurHorizontal(sampler2D textureSampler, vec2 coord, vec2 norm) { + vec4 result = vec4(0.0f); + for (int i=1; i<3; i++) { + result += + texture(textureSampler, vec2(coord) + (vec2(offset[i], 0.0) * norm)) + * weight[i]; + result += + texture(textureSampler, vec2(coord) - (vec2(offset[i], 0.0) * norm)) + * weight[i]; + } + return result; +} + +vec4 blurDiagonal(sampler2D textureSampler, vec2 coord, vec2 norm) { + vec4 result = vec4(0.0f); + for (int i=1; i<3; i++) { + result += + texture(textureSampler, vec2(coord) + (vec2(offset[i], offset[i]) * norm)) + * weight[i]; + result += + texture(textureSampler, vec2(coord) - (vec2(offset[i], offset[i]) * norm)) + * weight[i]; + } + return result; +} + +void main() { + vec3 base = texture(color_texture, vec2(frag_tex_coord)).rgb * weight[0]; + vec2 tex_offset = 1.0f / textureSize(color_texture, 0); + vec3 horizontal = blurHorizontal(color_texture, frag_tex_coord, tex_offset).rgb; + vec3 vertical = blurVertical(color_texture, frag_tex_coord, tex_offset).rgb; + vec3 diagonalA = blurVertical(color_texture, frag_tex_coord, tex_offset).rgb; + vec3 diagonalB = blurVertical(color_texture, frag_tex_coord, -tex_offset).rgb; + vec3 combination = mix(mix(horizontal, vertical, 0.5f), mix(diagonalA, diagonalB, 0.5f), 0.5f); + color = vec4(combination + base, 1.0f); +} -- cgit v1.2.3 From 48cf3764626e1ed30450d15e00befb75a4eae329 Mon Sep 17 00:00:00 2001 From: Marshall Mohror Date: Wed, 20 Oct 2021 18:36:06 -0500 Subject: OpenGL: Implement FXAA --- src/video_core/host_shaders/CMakeLists.txt | 2 + src/video_core/host_shaders/fxaa.frag | 72 ++++++++++++++++++++++++++++++ src/video_core/host_shaders/fxaa.vert | 40 +++++++++++++++++ 3 files changed, 114 insertions(+) create mode 100644 src/video_core/host_shaders/fxaa.frag create mode 100644 src/video_core/host_shaders/fxaa.vert (limited to 'src/video_core/host_shaders') diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index b0e15773c..6b5ea649a 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -13,6 +13,8 @@ set(SHADER_FILES convert_depth_to_float.frag convert_float_to_depth.frag full_screen_triangle.vert + fxaa.frag + fxaa.vert opengl_copy_bc4.comp opengl_present.frag opengl_present.vert diff --git a/src/video_core/host_shaders/fxaa.frag b/src/video_core/host_shaders/fxaa.frag new file mode 100644 index 000000000..23f910d4c --- /dev/null +++ b/src/video_core/host_shaders/fxaa.frag @@ -0,0 +1,72 @@ +// Adapted from +// https://www.geeks3d.com/20110405/fxaa-fast-approximate-anti-aliasing-demo-glsl-opengl-test-radeon-geforce/3/ + +#version 460 + +#ifdef VULKAN + +#define BINDING_COLOR_TEXTURE 1 + +#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv + +#define BINDING_COLOR_TEXTURE 0 + +#endif + +layout (location = 0) in vec4 posPos; + +layout (location = 0) out vec4 frag_color; + +layout (binding = BINDING_COLOR_TEXTURE) uniform sampler2D input_texture; + +const float FXAA_SPAN_MAX = 8.0; +const float FXAA_REDUCE_MUL = 1.0 / 8.0; +const float FXAA_REDUCE_MIN = 1.0 / 128.0; + +#define FxaaTexLod0(t, p) textureLod(t, p, 0.0) +#define FxaaTexOff(t, p, o) textureLodOffset(t, p, 0.0, o) + +vec3 FxaaPixelShader(vec4 posPos, sampler2D tex) { + + vec3 rgbNW = FxaaTexLod0(tex, posPos.zw).xyz; + vec3 rgbNE = FxaaTexOff(tex, posPos.zw, ivec2(1,0)).xyz; + vec3 rgbSW = FxaaTexOff(tex, posPos.zw, ivec2(0,1)).xyz; + vec3 rgbSE = FxaaTexOff(tex, posPos.zw, ivec2(1,1)).xyz; + vec3 rgbM = FxaaTexLod0(tex, posPos.xy).xyz; +/*---------------------------------------------------------*/ + vec3 luma = vec3(0.299, 0.587, 0.114); + float lumaNW = dot(rgbNW, luma); + float lumaNE = dot(rgbNE, luma); + float lumaSW = dot(rgbSW, luma); + float lumaSE = dot(rgbSE, luma); + float lumaM = dot(rgbM, luma); +/*---------------------------------------------------------*/ + float lumaMin = min(lumaM, min(min(lumaNW, lumaNE), min(lumaSW, lumaSE))); + float lumaMax = max(lumaM, max(max(lumaNW, lumaNE), max(lumaSW, lumaSE))); +/*---------------------------------------------------------*/ + vec2 dir; + dir.x = -((lumaNW + lumaNE) - (lumaSW + lumaSE)); + dir.y = ((lumaNW + lumaSW) - (lumaNE + lumaSE)); +/*---------------------------------------------------------*/ + float dirReduce = max( + (lumaNW + lumaNE + lumaSW + lumaSE) * (0.25 * FXAA_REDUCE_MUL), + FXAA_REDUCE_MIN); + float rcpDirMin = 1.0/(min(abs(dir.x), abs(dir.y)) + dirReduce); + dir = min(vec2( FXAA_SPAN_MAX, FXAA_SPAN_MAX), + max(vec2(-FXAA_SPAN_MAX, -FXAA_SPAN_MAX), + dir * rcpDirMin)) / textureSize(tex, 0); +/*--------------------------------------------------------*/ + vec3 rgbA = (1.0 / 2.0) * ( + FxaaTexLod0(tex, posPos.xy + dir * (1.0 / 3.0 - 0.5)).xyz + + FxaaTexLod0(tex, posPos.xy + dir * (2.0 / 3.0 - 0.5)).xyz); + vec3 rgbB = rgbA * (1.0 / 2.0) + (1.0 / 4.0) * ( + FxaaTexLod0(tex, posPos.xy + dir * (0.0 / 3.0 - 0.5)).xyz + + FxaaTexLod0(tex, posPos.xy + dir * (3.0 / 3.0 - 0.5)).xyz); + float lumaB = dot(rgbB, luma); + if((lumaB < lumaMin) || (lumaB > lumaMax)) return rgbA; + return rgbB; +} + +void main() { + frag_color = vec4(FxaaPixelShader(posPos, input_texture), 1.0); +} diff --git a/src/video_core/host_shaders/fxaa.vert b/src/video_core/host_shaders/fxaa.vert new file mode 100644 index 000000000..715fce462 --- /dev/null +++ b/src/video_core/host_shaders/fxaa.vert @@ -0,0 +1,40 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 460 + +out gl_PerVertex { + vec4 gl_Position; +}; + +const vec2 vertices[4] = + vec2[4](vec2(-1.0, 1.0), vec2(1.0, 1.0), vec2(-1.0, -1.0), vec2(1.0, -1.0)); + +layout (location = 0) out vec4 posPos; + +#ifdef VULKAN + +#define BINDING_COLOR_TEXTURE 1 + +#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv + +#define BINDING_COLOR_TEXTURE 0 + +#endif + +layout (binding = BINDING_COLOR_TEXTURE) uniform sampler2D input_texture; + +const float FXAA_SUBPIX_SHIFT = 0; + +void main() { +#ifdef VULKAN + vec2 vertex = vertices[gl_VertexIndex]; +#else + vec2 vertex = vertices[gl_VertexID]; +#endif + gl_Position = vec4(vertex, 0.0, 1.0); + vec2 vert_tex_coord = (vertex + 1.0) / 2.0; + posPos.xy = vert_tex_coord; + posPos.zw = vert_tex_coord - (0.5 + FXAA_SUBPIX_SHIFT) / textureSize(input_texture, 0); +} -- cgit v1.2.3 From e6f1ed08fb1f11d86bb4cb7c03d83a9f443d6c12 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Fri, 22 Oct 2021 19:22:34 +0200 Subject: Vulkan: Implement FXAA --- src/video_core/host_shaders/fxaa.vert | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core/host_shaders') diff --git a/src/video_core/host_shaders/fxaa.vert b/src/video_core/host_shaders/fxaa.vert index 715fce462..01d5ff4df 100644 --- a/src/video_core/host_shaders/fxaa.vert +++ b/src/video_core/host_shaders/fxaa.vert @@ -15,7 +15,7 @@ layout (location = 0) out vec4 posPos; #ifdef VULKAN -#define BINDING_COLOR_TEXTURE 1 +#define BINDING_COLOR_TEXTURE 0 #else // ^^^ Vulkan ^^^ // vvv OpenGL vvv -- cgit v1.2.3 From 99547d2656ee8e84b684794fa8e013b146f15284 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 23 Oct 2021 00:23:50 +0200 Subject: HostShader: Fix gaussian and add attribution. --- src/video_core/host_shaders/present_gaussian.frag | 42 ++++++++++------------- 1 file changed, 19 insertions(+), 23 deletions(-) (limited to 'src/video_core/host_shaders') diff --git a/src/video_core/host_shaders/present_gaussian.frag b/src/video_core/host_shaders/present_gaussian.frag index d5e2b1781..a9558548f 100644 --- a/src/video_core/host_shaders/present_gaussian.frag +++ b/src/video_core/host_shaders/present_gaussian.frag @@ -2,6 +2,10 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +// Code obtained from this 2 sources: +// - https://learnopengl.com/Advanced-Lighting/Bloom +// - https://www.rastergrid.com/blog/2010/09/efficient-gaussian-blur-with-linear-sampling/ + #version 460 core #ifdef VULKAN @@ -14,50 +18,40 @@ #endif -layout (location = 0) in vec2 frag_tex_coord; +layout(location = 0) in vec2 frag_tex_coord; -layout (location = 0) out vec4 color; +layout(location = 0) out vec4 color; -layout (binding = BINDING_COLOR_TEXTURE) uniform sampler2D color_texture; +layout(binding = BINDING_COLOR_TEXTURE) uniform sampler2D color_texture; const float offset[3] = float[](0.0, 1.3846153846, 3.2307692308); const float weight[3] = float[](0.2270270270, 0.3162162162, 0.0702702703); vec4 blurVertical(sampler2D textureSampler, vec2 coord, vec2 norm) { vec4 result = vec4(0.0f); - for (int i=1; i<3; i++) { - result += - texture(textureSampler, vec2(coord) + (vec2(0.0, offset[i]) * norm)) - * weight[i]; - result += - texture(textureSampler, vec2(coord) - (vec2(0.0, offset[i]) * norm)) - * weight[i]; + for (int i = 1; i < 3; i++) { + result += texture(textureSampler, vec2(coord) + (vec2(0.0, offset[i]) * norm)) * weight[i]; + result += texture(textureSampler, vec2(coord) - (vec2(0.0, offset[i]) * norm)) * weight[i]; } return result; } vec4 blurHorizontal(sampler2D textureSampler, vec2 coord, vec2 norm) { vec4 result = vec4(0.0f); - for (int i=1; i<3; i++) { - result += - texture(textureSampler, vec2(coord) + (vec2(offset[i], 0.0) * norm)) - * weight[i]; - result += - texture(textureSampler, vec2(coord) - (vec2(offset[i], 0.0) * norm)) - * weight[i]; + for (int i = 1; i < 3; i++) { + result += texture(textureSampler, vec2(coord) + (vec2(offset[i], 0.0) * norm)) * weight[i]; + result += texture(textureSampler, vec2(coord) - (vec2(offset[i], 0.0) * norm)) * weight[i]; } return result; } vec4 blurDiagonal(sampler2D textureSampler, vec2 coord, vec2 norm) { vec4 result = vec4(0.0f); - for (int i=1; i<3; i++) { + for (int i = 1; i < 3; i++) { result += - texture(textureSampler, vec2(coord) + (vec2(offset[i], offset[i]) * norm)) - * weight[i]; + texture(textureSampler, vec2(coord) + (vec2(offset[i], offset[i]) * norm)) * weight[i]; result += - texture(textureSampler, vec2(coord) - (vec2(offset[i], offset[i]) * norm)) - * weight[i]; + texture(textureSampler, vec2(coord) - (vec2(offset[i], offset[i]) * norm)) * weight[i]; } return result; } @@ -65,10 +59,12 @@ vec4 blurDiagonal(sampler2D textureSampler, vec2 coord, vec2 norm) { void main() { vec3 base = texture(color_texture, vec2(frag_tex_coord)).rgb * weight[0]; vec2 tex_offset = 1.0f / textureSize(color_texture, 0); + + // TODO(Blinkhawk): This code can be optimized through shader group instructions. vec3 horizontal = blurHorizontal(color_texture, frag_tex_coord, tex_offset).rgb; vec3 vertical = blurVertical(color_texture, frag_tex_coord, tex_offset).rgb; vec3 diagonalA = blurVertical(color_texture, frag_tex_coord, tex_offset).rgb; - vec3 diagonalB = blurVertical(color_texture, frag_tex_coord, -tex_offset).rgb; + vec3 diagonalB = blurVertical(color_texture, frag_tex_coord, tex_offset * vec2(1.0, -1.0)).rgb; vec3 combination = mix(mix(horizontal, vertical, 0.5f), mix(diagonalA, diagonalB, 0.5f), 0.5f); color = vec4(combination + base, 1.0f); } -- cgit v1.2.3 From dcc5b4f6b005a2c89bb4e77bca4cfe8705734021 Mon Sep 17 00:00:00 2001 From: Marshall Mohror Date: Fri, 22 Oct 2021 23:09:29 -0500 Subject: Presentation: Only use FP16 in scaling shaders on supported devices in Vulkan --- src/video_core/host_shaders/CMakeLists.txt | 10 +- src/video_core/host_shaders/fidelityfx_fsr.comp | 106 ++++++++--------- .../host_shaders/opengl_present_scaleforce.frag | 130 +++++++++++++++++++++ .../host_shaders/present_scaleforce.frag | 112 ------------------ .../host_shaders/vulkan_fidelityfx_fsr_easu.comp | 13 --- .../vulkan_fidelityfx_fsr_easu_fp16.comp | 11 ++ .../vulkan_fidelityfx_fsr_easu_fp32.comp | 10 ++ .../host_shaders/vulkan_fidelityfx_fsr_rcas.comp | 13 --- .../vulkan_fidelityfx_fsr_rcas_fp16.comp | 11 ++ .../vulkan_fidelityfx_fsr_rcas_fp32.comp | 10 ++ .../vulkan_present_scaleforce_fp16.frag | 7 ++ .../vulkan_present_scaleforce_fp32.frag | 5 + 12 files changed, 245 insertions(+), 193 deletions(-) create mode 100644 src/video_core/host_shaders/opengl_present_scaleforce.frag delete mode 100644 src/video_core/host_shaders/present_scaleforce.frag delete mode 100644 src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu.comp create mode 100644 src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16.comp create mode 100644 src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32.comp delete mode 100644 src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas.comp create mode 100644 src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16.comp create mode 100644 src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32.comp create mode 100644 src/video_core/host_shaders/vulkan_present_scaleforce_fp16.frag create mode 100644 src/video_core/host_shaders/vulkan_present_scaleforce_fp32.frag (limited to 'src/video_core/host_shaders') diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index 6b5ea649a..d779a967a 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -18,16 +18,20 @@ set(SHADER_FILES opengl_copy_bc4.comp opengl_present.frag opengl_present.vert + opengl_present_scaleforce.frag pitch_unswizzle.comp - present_scaleforce.frag present_bicubic.frag present_gaussian.frag vulkan_blit_color_float.frag vulkan_blit_depth_stencil.frag - vulkan_fidelityfx_fsr_easu.comp - vulkan_fidelityfx_fsr_rcas.comp + vulkan_fidelityfx_fsr_easu_fp16.comp + vulkan_fidelityfx_fsr_easu_fp32.comp + vulkan_fidelityfx_fsr_rcas_fp16.comp + vulkan_fidelityfx_fsr_rcas_fp32.comp vulkan_present.frag vulkan_present.vert + vulkan_present_scaleforce_fp16.frag + vulkan_present_scaleforce_fp32.frag vulkan_quad_indexed.comp vulkan_uint8.comp ) diff --git a/src/video_core/host_shaders/fidelityfx_fsr.comp b/src/video_core/host_shaders/fidelityfx_fsr.comp index cbb601580..6b97f789d 100644 --- a/src/video_core/host_shaders/fidelityfx_fsr.comp +++ b/src/video_core/host_shaders/fidelityfx_fsr.comp @@ -28,80 +28,82 @@ // THE SOFTWARE. layout( push_constant ) uniform constants { - u32vec2 input_size; + uvec4 Const0; + uvec4 Const1; + uvec4 Const2; + uvec4 Const3; }; -uvec4 Const0; -uvec4 Const1; -uvec4 Const2; -uvec4 Const3; +layout(set=0,binding=0) uniform sampler2D InputTexture; +layout(set=0,binding=1,rgba16f) uniform image2D OutputTexture; #define A_GPU 1 #define A_GLSL 1 -#define A_HALF -#include "ffx_a.h" +#ifndef YUZU_USE_FP16 + #include "ffx_a.h" -f16vec4 LinearToSRGB(f16vec4 linear) { - bvec4 selector = greaterThan(linear, f16vec4(0.00313066844250063)); - f16vec4 low = linear * float16_t(12.92); - f16vec4 high = float16_t(1.055) * pow(linear, f16vec4(1 / 2.4)) - float16_t(0.055); - return mix(low, high, selector); -} - -f16vec4 SRGBToLinear(f16vec4 srgb) { - bvec4 selector = greaterThan(srgb, f16vec4(0.0404482362771082)); - f16vec4 low = srgb * float16_t(1.0 / 12.92); - f16vec4 high = pow((srgb + float16_t(0.055)) * float16_t(1.0 / 1.055), f16vec4(2.4)); - return mix(low, high, selector); -} + #if USE_EASU + #define FSR_EASU_F 1 + AF4 FsrEasuRF(AF2 p) { AF4 res = textureGather(InputTexture, p, 0); return res; } + AF4 FsrEasuGF(AF2 p) { AF4 res = textureGather(InputTexture, p, 1); return res; } + AF4 FsrEasuBF(AF2 p) { AF4 res = textureGather(InputTexture, p, 2); return res; } + #endif + #if USE_RCAS + #define FSR_RCAS_F 1 + AF4 FsrRcasLoadF(ASU2 p) { return texelFetch(InputTexture, ASU2(p), 0); } + void FsrRcasInputF(inout AF1 r, inout AF1 g, inout AF1 b) {} + #endif +#else + #define A_HALF + #include "ffx_a.h" -#if USE_EASU - #define FSR_EASU_H 1 - f16vec4 FsrEasuRH(vec2 p) { f16vec4 res = f16vec4(textureGather(InputTexture, p, 0)); return res; } - f16vec4 FsrEasuGH(vec2 p) { f16vec4 res = f16vec4(textureGather(InputTexture, p, 1)); return res; } - f16vec4 FsrEasuBH(vec2 p) { f16vec4 res = f16vec4(textureGather(InputTexture, p, 2)); return res; } -#endif -#if USE_RCAS - #define FSR_RCAS_H 1 - f16vec4 FsrRcasLoadH(ASW2 p) { return f16vec4(texelFetch(InputTexture, ASU2(p), 0)); } - void FsrRcasInputH(inout float16_t r, inout float16_t g, inout float16_t b) {} + #if USE_EASU + #define FSR_EASU_H 1 + AH4 FsrEasuRH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 0)); return res; } + AH4 FsrEasuGH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 1)); return res; } + AH4 FsrEasuBH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 2)); return res; } + #endif + #if USE_RCAS + #define FSR_RCAS_H 1 + AH4 FsrRcasLoadH(ASW2 p) { return AH4(texelFetch(InputTexture, ASU2(p), 0)); } + void FsrRcasInputH(inout AH1 r,inout AH1 g,inout AH1 b){} + #endif #endif #include "ffx_fsr1.h" -void CurrFilter(u32vec2 pos) { - // For debugging +void CurrFilter(AU2 pos) { #if USE_BILINEAR - vec2 pp = (vec2(pos) * vec2_AU2(Const0.xy) + vec2_AU2(Const0.zw)) * vec2_AU2(Const1.xy) + vec2(0.5, -0.5) * vec2_AU2(Const1.zw); - imageStore(OutputTexture, ivec2(pos), textureLod(InputTexture, pp, 0.0)); + AF2 pp = (AF2(pos) * AF2_AU2(Const0.xy) + AF2_AU2(Const0.zw)) * AF2_AU2(Const1.xy) + AF2(0.5, -0.5) * AF2_AU2(Const1.zw); + imageStore(OutputTexture, ASU2(pos), textureLod(InputTexture, pp, 0.0)); #endif #if USE_EASU - f16vec3 c; - FsrEasuH(c, pos, Const0, Const1, Const2, Const3); - imageStore(OutputTexture, ivec2(pos), f16vec4(c, 1)); + #ifndef YUZU_USE_FP16 + AF3 c; + FsrEasuF(c, pos, Const0, Const1, Const2, Const3); + imageStore(OutputTexture, ASU2(pos), AF4(c, 1)); + #else + AH3 c; + FsrEasuH(c, pos, Const0, Const1, Const2, Const3); + imageStore(OutputTexture, ASU2(pos), AH4(c, 1)); + #endif #endif #if USE_RCAS - f16vec3 c; - FsrRcasH(c.r, c.g, c.b, pos, Const0); - imageStore(OutputTexture, ivec2(pos), f16vec4(c, 1)); + #ifndef YUZU_USE_FP16 + AF3 c; + FsrRcasF(c.r, c.g, c.b, pos, Const0); + imageStore(OutputTexture, ASU2(pos), AF4(c, 1)); + #else + AH3 c; + FsrRcasH(c.r, c.g, c.b, pos, Const0); + imageStore(OutputTexture, ASU2(pos), AH4(c, 1)); + #endif #endif - } layout(local_size_x=64) in; void main() { - -#if USE_EASU || USE_BILINEAR - vec2 ires = vec2(input_size); - vec2 tres = textureSize(InputTexture, 0); - vec2 ores = imageSize(OutputTexture); - FsrEasuCon(Const0, Const1, Const2, Const3, ires.x, ires.y, tres.x, tres.y, ores.x, ores.y); -#endif -#if USE_RCAS - FsrRcasCon(Const0, 0.25f); -#endif - // Do remapping of local xy in workgroup for a more PS-like swizzle pattern. AU2 gxy = ARmp8x8(gl_LocalInvocationID.x) + AU2(gl_WorkGroupID.x << 4u, gl_WorkGroupID.y << 4u); CurrFilter(gxy); diff --git a/src/video_core/host_shaders/opengl_present_scaleforce.frag b/src/video_core/host_shaders/opengl_present_scaleforce.frag new file mode 100644 index 000000000..71ff9e1e3 --- /dev/null +++ b/src/video_core/host_shaders/opengl_present_scaleforce.frag @@ -0,0 +1,130 @@ +// MIT License +// +// Copyright (c) 2020 BreadFish64 +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +// Adapted from https://github.com/BreadFish64/ScaleFish/tree/master/scaleforce + +//! #version 460 + +#extension GL_ARB_separate_shader_objects : enable + +#ifdef YUZU_USE_FP16 + +#extension GL_AMD_gpu_shader_half_float : enable +#extension GL_NV_gpu_shader5 : enable + +#define lfloat float16_t +#define lvec2 f16vec2 +#define lvec3 f16vec3 +#define lvec4 f16vec4 + +#else + +#define lfloat float +#define lvec2 vec2 +#define lvec3 vec3 +#define lvec4 vec4 + +#endif + +#ifdef VULKAN + +#define BINDING_COLOR_TEXTURE 1 + +#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv + +#define BINDING_COLOR_TEXTURE 0 + +#endif + +layout (location = 0) in vec2 tex_coord; + +layout (location = 0) out vec4 frag_color; + +layout (binding = BINDING_COLOR_TEXTURE) uniform sampler2D input_texture; + +const bool ignore_alpha = true; + +lfloat ColorDist1(lvec4 a, lvec4 b) { + // https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.2020_conversion + const lvec3 K = lvec3(0.2627, 0.6780, 0.0593); + const lfloat scaleB = lfloat(0.5) / (lfloat(1.0) - K.b); + const lfloat scaleR = lfloat(0.5) / (lfloat(1.0) - K.r); + lvec4 diff = a - b; + lfloat Y = dot(diff.rgb, K); + lfloat Cb = scaleB * (diff.b - Y); + lfloat Cr = scaleR * (diff.r - Y); + lvec3 YCbCr = lvec3(Y, Cb, Cr); + lfloat d = length(YCbCr); + if (ignore_alpha) { + return d; + } + return sqrt(a.a * b.a * d * d + diff.a * diff.a); +} + +lvec4 ColorDist(lvec4 ref, lvec4 A, lvec4 B, lvec4 C, lvec4 D) { + return lvec4( + ColorDist1(ref, A), + ColorDist1(ref, B), + ColorDist1(ref, C), + ColorDist1(ref, D) + ); +} + +vec4 Scaleforce(sampler2D tex, vec2 tex_coord) { + lvec4 bl = lvec4(textureOffset(tex, tex_coord, ivec2(-1, -1))); + lvec4 bc = lvec4(textureOffset(tex, tex_coord, ivec2(0, -1))); + lvec4 br = lvec4(textureOffset(tex, tex_coord, ivec2(1, -1))); + lvec4 cl = lvec4(textureOffset(tex, tex_coord, ivec2(-1, 0))); + lvec4 cc = lvec4(texture(tex, tex_coord)); + lvec4 cr = lvec4(textureOffset(tex, tex_coord, ivec2(1, 0))); + lvec4 tl = lvec4(textureOffset(tex, tex_coord, ivec2(-1, 1))); + lvec4 tc = lvec4(textureOffset(tex, tex_coord, ivec2(0, 1))); + lvec4 tr = lvec4(textureOffset(tex, tex_coord, ivec2(1, 1))); + + lvec4 offset_tl = ColorDist(cc, tl, tc, tr, cr); + lvec4 offset_br = ColorDist(cc, br, bc, bl, cl); + + // Calculate how different cc is from the texels around it + const lfloat plus_weight = lfloat(1.5); + const lfloat cross_weight = lfloat(1.5); + lfloat total_dist = dot(offset_tl + offset_br, lvec4(cross_weight, plus_weight, cross_weight, plus_weight)); + + if (total_dist == lfloat(0.0)) { + return cc; + } else { + // Add together all the distances with direction taken into account + lvec4 tmp = offset_tl - offset_br; + lvec2 total_offset = tmp.wy * plus_weight + (tmp.zz + lvec2(-tmp.x, tmp.x)) * cross_weight; + + // When the image has thin points, they tend to split apart. + // This is because the texels all around are different and total_offset reaches into clear areas. + // This works pretty well to keep the offset in bounds for these cases. + lfloat clamp_val = length(total_offset) / total_dist; + vec2 final_offset = vec2(clamp(total_offset, -clamp_val, clamp_val)) / textureSize(tex, 0); + + return texture(tex, tex_coord - final_offset); + } +} + +void main() { + frag_color = Scaleforce(input_texture, tex_coord); +} diff --git a/src/video_core/host_shaders/present_scaleforce.frag b/src/video_core/host_shaders/present_scaleforce.frag deleted file mode 100644 index ebc0d9b90..000000000 --- a/src/video_core/host_shaders/present_scaleforce.frag +++ /dev/null @@ -1,112 +0,0 @@ -// MIT License -// -// Copyright (c) 2020 BreadFish64 -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -// Adapted from https://github.com/BreadFish64/ScaleFish/tree/master/scaleforce - -#version 460 - -#extension GL_AMD_gpu_shader_half_float : enable -#extension GL_NV_gpu_shader5 : enable - -#ifdef VULKAN - -#define BINDING_COLOR_TEXTURE 1 - -#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv - -#define BINDING_COLOR_TEXTURE 0 - -#endif - -layout (location = 0) in vec2 tex_coord; - -layout (location = 0) out vec4 frag_color; - -layout (binding = BINDING_COLOR_TEXTURE) uniform sampler2D input_texture; - -const bool ignore_alpha = true; - -float16_t ColorDist1(f16vec4 a, f16vec4 b) { - // https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.2020_conversion - const f16vec3 K = f16vec3(0.2627, 0.6780, 0.0593); - const float16_t scaleB = float16_t(0.5) / (float16_t(1.0) - K.b); - const float16_t scaleR = float16_t(0.5) / (float16_t(1.0) - K.r); - f16vec4 diff = a - b; - float16_t Y = dot(diff.rgb, K); - float16_t Cb = scaleB * (diff.b - Y); - float16_t Cr = scaleR * (diff.r - Y); - f16vec3 YCbCr = f16vec3(Y, Cb, Cr); - float16_t d = length(YCbCr); - if (ignore_alpha) { - return d; - } - return sqrt(a.a * b.a * d * d + diff.a * diff.a); -} - -f16vec4 ColorDist(f16vec4 ref, f16vec4 A, f16vec4 B, f16vec4 C, f16vec4 D) { - return f16vec4( - ColorDist1(ref, A), - ColorDist1(ref, B), - ColorDist1(ref, C), - ColorDist1(ref, D) - ); -} - -vec4 Scaleforce(sampler2D tex, vec2 tex_coord) { - f16vec4 bl = f16vec4(textureOffset(tex, tex_coord, ivec2(-1, -1))); - f16vec4 bc = f16vec4(textureOffset(tex, tex_coord, ivec2(0, -1))); - f16vec4 br = f16vec4(textureOffset(tex, tex_coord, ivec2(1, -1))); - f16vec4 cl = f16vec4(textureOffset(tex, tex_coord, ivec2(-1, 0))); - f16vec4 cc = f16vec4(texture(tex, tex_coord)); - f16vec4 cr = f16vec4(textureOffset(tex, tex_coord, ivec2(1, 0))); - f16vec4 tl = f16vec4(textureOffset(tex, tex_coord, ivec2(-1, 1))); - f16vec4 tc = f16vec4(textureOffset(tex, tex_coord, ivec2(0, 1))); - f16vec4 tr = f16vec4(textureOffset(tex, tex_coord, ivec2(1, 1))); - - f16vec4 offset_tl = ColorDist(cc, tl, tc, tr, cr); - f16vec4 offset_br = ColorDist(cc, br, bc, bl, cl); - - // Calculate how different cc is from the texels around it - const float16_t plus_weight = float16_t(1.5); - const float16_t cross_weight = float16_t(1.5); - float16_t total_dist = dot(offset_tl + offset_br, f16vec4(cross_weight, plus_weight, cross_weight, plus_weight)); - - if (total_dist == float16_t(0.0)) { - return cc; - } else { - // Add together all the distances with direction taken into account - f16vec4 tmp = offset_tl - offset_br; - f16vec2 total_offset = tmp.wy * plus_weight + (tmp.zz + f16vec2(-tmp.x, tmp.x)) * cross_weight; - - // When the image has thin points, they tend to split apart. - // This is because the texels all around are different and total_offset reaches into clear areas. - // This works pretty well to keep the offset in bounds for these cases. - float16_t clamp_val = length(total_offset) / total_dist; - f16vec2 final_offset = clamp(total_offset, -clamp_val, clamp_val) / f16vec2(textureSize(tex, 0)); - - return texture(tex, tex_coord - final_offset); - } -} - -void main() { - frag_color = Scaleforce(input_texture, tex_coord); -} \ No newline at end of file diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu.comp b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu.comp deleted file mode 100644 index 6525eeeb5..000000000 --- a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu.comp +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#version 460 core -#extension GL_GOOGLE_include_directive : enable - -layout(set=0,binding=0) uniform sampler2D InputTexture; -layout(set=0,binding=1,rgba16f) uniform image2D OutputTexture; - -#define USE_EASU 1 - -#include "fidelityfx_fsr.comp" diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16.comp b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16.comp new file mode 100644 index 000000000..1c96a7905 --- /dev/null +++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16.comp @@ -0,0 +1,11 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 460 core +#extension GL_GOOGLE_include_directive : enable + +#define YUZU_USE_FP16 +#define USE_EASU 1 + +#include "fidelityfx_fsr.comp" diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32.comp b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32.comp new file mode 100644 index 000000000..f4daff739 --- /dev/null +++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32.comp @@ -0,0 +1,10 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 460 core +#extension GL_GOOGLE_include_directive : enable + +#define USE_EASU 1 + +#include "fidelityfx_fsr.comp" diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas.comp b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas.comp deleted file mode 100644 index 9463ed842..000000000 --- a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas.comp +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#version 460 core -#extension GL_GOOGLE_include_directive : enable - -layout(set=0,binding=0) uniform sampler2D InputTexture; -layout(set=0,binding=1,rgba16f) uniform image2D OutputTexture; - -#define USE_RCAS 1 - -#include "fidelityfx_fsr.comp" diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16.comp b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16.comp new file mode 100644 index 000000000..6b6796dd1 --- /dev/null +++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16.comp @@ -0,0 +1,11 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 460 core +#extension GL_GOOGLE_include_directive : enable + +#define YUZU_USE_FP16 +#define USE_RCAS 1 + +#include "fidelityfx_fsr.comp" diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32.comp b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32.comp new file mode 100644 index 000000000..f785eebf3 --- /dev/null +++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32.comp @@ -0,0 +1,10 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 460 core +#extension GL_GOOGLE_include_directive : enable + +#define USE_RCAS 1 + +#include "fidelityfx_fsr.comp" diff --git a/src/video_core/host_shaders/vulkan_present_scaleforce_fp16.frag b/src/video_core/host_shaders/vulkan_present_scaleforce_fp16.frag new file mode 100644 index 000000000..924c03060 --- /dev/null +++ b/src/video_core/host_shaders/vulkan_present_scaleforce_fp16.frag @@ -0,0 +1,7 @@ +#version 460 + +#extension GL_GOOGLE_include_directive : enable + +#define YUZU_USE_FP16 + +#include "opengl_present_scaleforce.frag" diff --git a/src/video_core/host_shaders/vulkan_present_scaleforce_fp32.frag b/src/video_core/host_shaders/vulkan_present_scaleforce_fp32.frag new file mode 100644 index 000000000..a594b83ca --- /dev/null +++ b/src/video_core/host_shaders/vulkan_present_scaleforce_fp32.frag @@ -0,0 +1,5 @@ +#version 460 + +#extension GL_GOOGLE_include_directive : enable + +#include "opengl_present_scaleforce.frag" -- cgit v1.2.3 From 87abab71fff2189c549ae247eb6c281c8f618acd Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 23 Oct 2021 02:40:02 -0400 Subject: host_shaders: Misc copyright/style changes --- src/video_core/host_shaders/fxaa.frag | 6 +++++- src/video_core/host_shaders/fxaa.vert | 10 ++++------ src/video_core/host_shaders/present_bicubic.frag | 2 +- src/video_core/host_shaders/present_gaussian.frag | 4 ++-- 4 files changed, 12 insertions(+), 10 deletions(-) (limited to 'src/video_core/host_shaders') diff --git a/src/video_core/host_shaders/fxaa.frag b/src/video_core/host_shaders/fxaa.frag index 23f910d4c..02f4068d1 100644 --- a/src/video_core/host_shaders/fxaa.frag +++ b/src/video_core/host_shaders/fxaa.frag @@ -1,4 +1,8 @@ -// Adapted from +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +// Source code is adapted from // https://www.geeks3d.com/20110405/fxaa-fast-approximate-anti-aliasing-demo-glsl-opengl-test-radeon-geforce/3/ #version 460 diff --git a/src/video_core/host_shaders/fxaa.vert b/src/video_core/host_shaders/fxaa.vert index 01d5ff4df..ac20c04e9 100644 --- a/src/video_core/host_shaders/fxaa.vert +++ b/src/video_core/host_shaders/fxaa.vert @@ -1,4 +1,4 @@ -// Copyright 2019 yuzu Emulator Project +// Copyright 2021 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. @@ -16,10 +16,12 @@ layout (location = 0) out vec4 posPos; #ifdef VULKAN #define BINDING_COLOR_TEXTURE 0 +#define VERTEX_ID gl_VertexIndex #else // ^^^ Vulkan ^^^ // vvv OpenGL vvv #define BINDING_COLOR_TEXTURE 0 +#define VERTEX_ID gl_VertexID #endif @@ -28,11 +30,7 @@ layout (binding = BINDING_COLOR_TEXTURE) uniform sampler2D input_texture; const float FXAA_SUBPIX_SHIFT = 0; void main() { -#ifdef VULKAN - vec2 vertex = vertices[gl_VertexIndex]; -#else - vec2 vertex = vertices[gl_VertexID]; -#endif + vec2 vertex = vertices[VERTEX_ID]; gl_Position = vec4(vertex, 0.0, 1.0); vec2 vert_tex_coord = (vertex + 1.0) / 2.0; posPos.xy = vert_tex_coord; diff --git a/src/video_core/host_shaders/present_bicubic.frag b/src/video_core/host_shaders/present_bicubic.frag index f3e5410e7..902b70c2b 100644 --- a/src/video_core/host_shaders/present_bicubic.frag +++ b/src/video_core/host_shaders/present_bicubic.frag @@ -1,4 +1,4 @@ -// Copyright 2019 yuzu Emulator Project +// Copyright 2021 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. diff --git a/src/video_core/host_shaders/present_gaussian.frag b/src/video_core/host_shaders/present_gaussian.frag index a9558548f..72a300dac 100644 --- a/src/video_core/host_shaders/present_gaussian.frag +++ b/src/video_core/host_shaders/present_gaussian.frag @@ -1,8 +1,8 @@ -// Copyright 2019 yuzu Emulator Project +// Copyright 2021 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -// Code obtained from this 2 sources: +// Code adapted from the following sources: // - https://learnopengl.com/Advanced-Lighting/Bloom // - https://www.rastergrid.com/blog/2010/09/efficient-gaussian-blur-with-linear-sampling/ -- cgit v1.2.3 From d46a71e786b42ecfe702ae00e01e6dcdf9121875 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Fri, 29 Oct 2021 17:45:02 +0200 Subject: HostShader: fix Gaussian filter. --- src/video_core/host_shaders/present_gaussian.frag | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/video_core/host_shaders') diff --git a/src/video_core/host_shaders/present_gaussian.frag b/src/video_core/host_shaders/present_gaussian.frag index 72a300dac..66fed3238 100644 --- a/src/video_core/host_shaders/present_gaussian.frag +++ b/src/video_core/host_shaders/present_gaussian.frag @@ -63,8 +63,8 @@ void main() { // TODO(Blinkhawk): This code can be optimized through shader group instructions. vec3 horizontal = blurHorizontal(color_texture, frag_tex_coord, tex_offset).rgb; vec3 vertical = blurVertical(color_texture, frag_tex_coord, tex_offset).rgb; - vec3 diagonalA = blurVertical(color_texture, frag_tex_coord, tex_offset).rgb; - vec3 diagonalB = blurVertical(color_texture, frag_tex_coord, tex_offset * vec2(1.0, -1.0)).rgb; + vec3 diagonalA = blurDiagonal(color_texture, frag_tex_coord, tex_offset).rgb; + vec3 diagonalB = blurDiagonal(color_texture, frag_tex_coord, tex_offset * vec2(1.0, -1.0)).rgb; vec3 combination = mix(mix(horizontal, vertical, 0.5f), mix(diagonalA, diagonalB, 0.5f), 0.5f); color = vec4(combination + base, 1.0f); } -- cgit v1.2.3