From 5665d055476fa793192523c3cb6fe06369d58674 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 4 Jul 2021 22:48:41 -0400 Subject: astc_decoder: Optimize the use EncodingData This buffer was a list of EncodingData structures sorted by their bit length, with some duplication from the cpu decoder implementation. We can take advantage of its sorted property to optimize its usage in the shader. Thanks to wwylele for the optimization idea. --- src/video_core/renderer_opengl/util_shaders.cpp | 13 ++++--------- src/video_core/renderer_opengl/util_shaders.h | 1 - 2 files changed, 4 insertions(+), 10 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index 37a4d1d9d..a2b264700 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp @@ -60,19 +60,15 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_) copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)) { const auto swizzle_table = Tegra::Texture::MakeSwizzleTable(); swizzle_table_buffer.Create(); - astc_buffer.Create(); glNamedBufferStorage(swizzle_table_buffer.handle, sizeof(swizzle_table), &swizzle_table, 0); - glNamedBufferStorage(astc_buffer.handle, sizeof(ASTC_ENCODINGS_VALUES), &ASTC_ENCODINGS_VALUES, - 0); } UtilShaders::~UtilShaders() = default; void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, std::span swizzles) { - static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0; - static constexpr GLuint BINDING_INPUT_BUFFER = 1; - static constexpr GLuint BINDING_ENC_BUFFER = 2; + static constexpr GLuint BINDING_INPUT_BUFFER = 0; + static constexpr GLuint BINDING_SWIZZLE_BUFFER = 1; static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; const Extent2D tile_size{ @@ -81,7 +77,6 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, }; program_manager.BindComputeProgram(astc_decoder_program.handle); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_ENC_BUFFER, astc_buffer.handle); glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); glUniform2ui(1, tile_size.width, tile_size.height); @@ -103,11 +98,11 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, glUniform1ui(6, params.block_height); glUniform1ui(7, params.block_height_mask); - glBindImageTexture(BINDING_OUTPUT_IMAGE, image.StorageHandle(), swizzle.level, GL_TRUE, 0, - GL_WRITE_ONLY, GL_RGBA8); // ASTC texture data glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset, image.guest_size_bytes - swizzle.buffer_offset); + glBindImageTexture(BINDING_OUTPUT_IMAGE, image.StorageHandle(), swizzle.level, GL_TRUE, 0, + GL_WRITE_ONLY, GL_RGBA8); glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers); } diff --git a/src/video_core/renderer_opengl/util_shaders.h b/src/video_core/renderer_opengl/util_shaders.h index 53d65f368..ef881e35f 100644 --- a/src/video_core/renderer_opengl/util_shaders.h +++ b/src/video_core/renderer_opengl/util_shaders.h @@ -62,7 +62,6 @@ private: ProgramManager& program_manager; OGLBuffer swizzle_table_buffer; - OGLBuffer astc_buffer; OGLProgram astc_decoder_program; OGLProgram block_linear_unswizzle_2d_program; -- cgit v1.2.3 From 5ab80535118e593ef3add3ce2b5935437e1dc1d3 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 31 Jul 2021 22:24:15 -0400 Subject: astc_decoder: Compute offset swizzles in-shader Alleviates the dependency on the swizzle table and a uniform which is constant for all ASTC texture sizes. --- src/video_core/renderer_opengl/util_shaders.cpp | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index a2b264700..4e6f7cb00 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp @@ -68,7 +68,6 @@ UtilShaders::~UtilShaders() = default; void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, std::span swizzles) { static constexpr GLuint BINDING_INPUT_BUFFER = 0; - static constexpr GLuint BINDING_SWIZZLE_BUFFER = 1; static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; const Extent2D tile_size{ @@ -76,10 +75,9 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, .height = VideoCore::Surface::DefaultBlockHeight(image.info.format), }; program_manager.BindComputeProgram(astc_decoder_program.handle); - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); - glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); glUniform2ui(1, tile_size.width, tile_size.height); + // Ensure buffer data is valid before dispatching glFlush(); for (const SwizzleParameters& swizzle : swizzles) { @@ -90,13 +88,13 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info); ASSERT(params.origin == (std::array{0, 0, 0})); ASSERT(params.destination == (std::array{0, 0, 0})); + ASSERT(params.bytes_per_block_log2 == 4); - glUniform1ui(2, params.bytes_per_block_log2); - glUniform1ui(3, params.layer_stride); - glUniform1ui(4, params.block_size); - glUniform1ui(5, params.x_shift); - glUniform1ui(6, params.block_height); - glUniform1ui(7, params.block_height_mask); + glUniform1ui(2, params.layer_stride); + glUniform1ui(3, params.block_size); + glUniform1ui(4, params.x_shift); + glUniform1ui(5, params.block_height); + glUniform1ui(6, params.block_height_mask); // ASTC texture data glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset, -- cgit v1.2.3 From c439fc9be994583801418743ab202fb63d1c83a0 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 31 Jul 2021 23:55:20 -0400 Subject: astc_decoder: Reduce workgroup size This reduces the amount of over dispatching when there are odd dimensions (i.e. ASTC 8x5), which rarely evenly divide into 32x32. --- src/video_core/renderer_opengl/util_shaders.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index 4e6f7cb00..333f35a1c 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp @@ -82,8 +82,8 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, glFlush(); for (const SwizzleParameters& swizzle : swizzles) { const size_t input_offset = swizzle.buffer_offset + map.offset; - const u32 num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 32U); - const u32 num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 32U); + const u32 num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 8U); + const u32 num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 8U); const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info); ASSERT(params.origin == (std::array{0, 0, 0})); -- cgit v1.2.3