From 12d16248dd61d2998ee271e16d6980d6df963bbe Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 30 Dec 2020 01:39:05 -0300 Subject: host_shaders: Add block linear upload compute shaders --- src/video_core/host_shaders/CMakeLists.txt | 2 + .../host_shaders/block_linear_unswizzle_2d.comp | 122 ++++++++++++++++++++ .../host_shaders/block_linear_unswizzle_3d.comp | 125 +++++++++++++++++++++ 3 files changed, 249 insertions(+) create mode 100644 src/video_core/host_shaders/block_linear_unswizzle_2d.comp create mode 100644 src/video_core/host_shaders/block_linear_unswizzle_3d.comp (limited to 'src/video_core/host_shaders') diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index ff20bc93b..7feb6df99 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -1,4 +1,6 @@ set(SHADER_FILES + block_linear_unswizzle_2d.comp + block_linear_unswizzle_3d.comp opengl_present.frag opengl_present.vert ) diff --git a/src/video_core/host_shaders/block_linear_unswizzle_2d.comp b/src/video_core/host_shaders/block_linear_unswizzle_2d.comp new file mode 100644 index 000000000..a131be79e --- /dev/null +++ b/src/video_core/host_shaders/block_linear_unswizzle_2d.comp @@ -0,0 +1,122 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 430 + +#ifdef VULKAN + +#extension GL_EXT_shader_16bit_storage : require +#extension GL_EXT_shader_8bit_storage : require +#define HAS_EXTENDED_TYPES 1 +#define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants { +#define END_PUSH_CONSTANTS }; +#define UNIFORM(n) +#define BINDING_SWIZZLE_BUFFER 0 +#define BINDING_INPUT_BUFFER 1 +#define BINDING_OUTPUT_IMAGE 2 + +#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv + +#extension GL_NV_gpu_shader5 : enable +#ifdef GL_NV_gpu_shader5 +#define HAS_EXTENDED_TYPES 1 +#else +#define HAS_EXTENDED_TYPES 0 +#endif +#define BEGIN_PUSH_CONSTANTS +#define END_PUSH_CONSTANTS +#define UNIFORM(n) layout (location = n) uniform +#define BINDING_SWIZZLE_BUFFER 0 +#define BINDING_INPUT_BUFFER 1 +#define BINDING_OUTPUT_IMAGE 0 + +#endif + +BEGIN_PUSH_CONSTANTS +UNIFORM(0) uvec3 origin; +UNIFORM(1) ivec3 destination; +UNIFORM(2) uint bytes_per_block_log2; +UNIFORM(3) uint layer_stride; +UNIFORM(4) uint block_size; +UNIFORM(5) uint x_shift; +UNIFORM(6) uint block_height; +UNIFORM(7) uint block_height_mask; +END_PUSH_CONSTANTS + +layout(binding = BINDING_SWIZZLE_BUFFER, std430) readonly buffer SwizzleTable { + uint swizzle_table[]; +}; + +#if HAS_EXTENDED_TYPES +layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU8 { uint8_t u8data[]; }; +layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU16 { uint16_t u16data[]; }; +#endif +layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU32 { uint u32data[]; }; +layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU64 { uvec2 u64data[]; }; +layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU128 { uvec4 u128data[]; }; + +layout(binding = BINDING_OUTPUT_IMAGE) uniform writeonly uimage2DArray output_image; + +layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in; + +const uint GOB_SIZE_X = 64; +const uint GOB_SIZE_Y = 8; +const uint GOB_SIZE_Z = 1; +const uint GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z; + +const uint GOB_SIZE_X_SHIFT = 6; +const uint GOB_SIZE_Y_SHIFT = 3; +const uint GOB_SIZE_Z_SHIFT = 0; +const uint GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT; + +const uvec2 SWIZZLE_MASK = uvec2(GOB_SIZE_X - 1, GOB_SIZE_Y - 1); + +uint SwizzleOffset(uvec2 pos) { + pos = pos & SWIZZLE_MASK; + return swizzle_table[pos.y * 64 + pos.x]; +} + +uvec4 ReadTexel(uint offset) { + switch (bytes_per_block_log2) { +#if HAS_EXTENDED_TYPES + case 0: + return uvec4(u8data[offset], 0, 0, 0); + case 1: + return uvec4(u16data[offset / 2], 0, 0, 0); +#else + case 0: + return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 24), 8), 0, 0, 0); + case 1: + return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 16), 16), 0, 0, 0); +#endif + case 2: + return uvec4(u32data[offset / 4], 0, 0, 0); + case 3: + return uvec4(u64data[offset / 8], 0, 0); + case 4: + return u128data[offset / 16]; + } + return uvec4(0); +} + +void main() { + uvec3 pos = gl_GlobalInvocationID + origin; + pos.x <<= bytes_per_block_log2; + + // Read as soon as possible due to its latency + const uint swizzle = SwizzleOffset(pos.xy); + + const uint block_y = pos.y >> GOB_SIZE_Y_SHIFT; + + uint offset = 0; + offset += pos.z * layer_stride; + offset += (block_y >> block_height) * block_size; + offset += (block_y & block_height_mask) << GOB_SIZE_SHIFT; + offset += (pos.x >> GOB_SIZE_X_SHIFT) << x_shift; + offset += swizzle; + + const uvec4 texel = ReadTexel(offset); + const ivec3 coord = ivec3(gl_GlobalInvocationID) + destination; + imageStore(output_image, coord, texel); +} diff --git a/src/video_core/host_shaders/block_linear_unswizzle_3d.comp b/src/video_core/host_shaders/block_linear_unswizzle_3d.comp new file mode 100644 index 000000000..bb6872e6b --- /dev/null +++ b/src/video_core/host_shaders/block_linear_unswizzle_3d.comp @@ -0,0 +1,125 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 430 + +#ifdef VULKAN + +#extension GL_EXT_shader_16bit_storage : require +#extension GL_EXT_shader_8bit_storage : require +#define HAS_EXTENDED_TYPES 1 +#define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants { +#define END_PUSH_CONSTANTS }; +#define UNIFORM(n) +#define BINDING_SWIZZLE_BUFFER 0 +#define BINDING_INPUT_BUFFER 1 +#define BINDING_OUTPUT_IMAGE 2 + +#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv + +#extension GL_NV_gpu_shader5 : enable +#ifdef GL_NV_gpu_shader5 +#define HAS_EXTENDED_TYPES 1 +#else +#define HAS_EXTENDED_TYPES 0 +#endif +#define BEGIN_PUSH_CONSTANTS +#define END_PUSH_CONSTANTS +#define UNIFORM(n) layout (location = n) uniform +#define BINDING_SWIZZLE_BUFFER 0 +#define BINDING_INPUT_BUFFER 1 +#define BINDING_OUTPUT_IMAGE 0 + +#endif + +BEGIN_PUSH_CONSTANTS +UNIFORM(0) uvec3 origin; +UNIFORM(1) ivec3 destination; +UNIFORM(2) uint bytes_per_block_log2; +UNIFORM(3) uint slice_size; +UNIFORM(4) uint block_size; +UNIFORM(5) uint x_shift; +UNIFORM(6) uint block_height; +UNIFORM(7) uint block_height_mask; +UNIFORM(8) uint block_depth; +UNIFORM(9) uint block_depth_mask; +END_PUSH_CONSTANTS + +layout(binding = BINDING_SWIZZLE_BUFFER, std430) readonly buffer SwizzleTable { + uint swizzle_table[]; +}; + +#if HAS_EXTENDED_TYPES +layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU8 { uint8_t u8data[]; }; +layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU16 { uint16_t u16data[]; }; +#endif +layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU32 { uint u32data[]; }; +layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU64 { uvec2 u64data[]; }; +layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU128 { uvec4 u128data[]; }; + +layout(binding = BINDING_OUTPUT_IMAGE) uniform writeonly uimage3D output_image; + +layout(local_size_x = 16, local_size_y = 8, local_size_z = 8) in; + +const uint GOB_SIZE_X = 64; +const uint GOB_SIZE_Y = 8; +const uint GOB_SIZE_Z = 1; +const uint GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z; + +const uint GOB_SIZE_X_SHIFT = 6; +const uint GOB_SIZE_Y_SHIFT = 3; +const uint GOB_SIZE_Z_SHIFT = 0; +const uint GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT; + +const uvec2 SWIZZLE_MASK = uvec2(GOB_SIZE_X - 1, GOB_SIZE_Y - 1); + +uint SwizzleOffset(uvec2 pos) { + pos = pos & SWIZZLE_MASK; + return swizzle_table[pos.y * 64 + pos.x]; +} + +uvec4 ReadTexel(uint offset) { + switch (bytes_per_block_log2) { +#if HAS_EXTENDED_TYPES + case 0: + return uvec4(u8data[offset], 0, 0, 0); + case 1: + return uvec4(u16data[offset / 2], 0, 0, 0); +#else + case 0: + return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 24), 8), 0, 0, 0); + case 1: + return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 16), 16), 0, 0, 0); +#endif + case 2: + return uvec4(u32data[offset / 4], 0, 0, 0); + case 3: + return uvec4(u64data[offset / 8], 0, 0); + case 4: + return u128data[offset / 16]; + } + return uvec4(0); +} + +void main() { + uvec3 pos = gl_GlobalInvocationID + origin; + pos.x <<= bytes_per_block_log2; + + // Read as soon as possible due to its latency + const uint swizzle = SwizzleOffset(pos.xy); + + const uint block_y = pos.y >> GOB_SIZE_Y_SHIFT; + + uint offset = 0; + offset += (pos.z >> block_depth) * slice_size; + offset += (pos.z & block_depth_mask) << (GOB_SIZE_SHIFT + block_height); + offset += (block_y >> block_height) * block_size; + offset += (block_y & block_height_mask) << GOB_SIZE_SHIFT; + offset += (pos.x >> GOB_SIZE_X_SHIFT) << x_shift; + offset += swizzle; + + const uvec4 texel = ReadTexel(offset); + const ivec3 coord = ivec3(gl_GlobalInvocationID) + destination; + imageStore(output_image, coord, texel); +} -- cgit v1.2.3