diff options
Diffstat (limited to 'src/video_core')
-rw-r--r-- | src/video_core/host_shaders/astc_decoder.comp | 2 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_buffer_cache.cpp | 2 | ||||
-rw-r--r-- | src/video_core/textures/astc.cpp | 58 |
3 files changed, 38 insertions, 24 deletions
diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp index 3441a5fe5..d608678a3 100644 --- a/src/video_core/host_shaders/astc_decoder.comp +++ b/src/video_core/host_shaders/astc_decoder.comp @@ -1065,7 +1065,7 @@ TexelWeightParams DecodeBlockInfo() { void FillError(ivec3 coord) { for (uint j = 0; j < block_dims.y; j++) { for (uint i = 0; i < block_dims.x; i++) { - imageStore(dest_image, coord + ivec3(i, j, 0), vec4(1.0, 1.0, 0.0, 1.0)); + imageStore(dest_image, coord + ivec3(i, j, 0), vec4(0.0, 0.0, 0.0, 0.0)); } } } diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 32450ee1d..08f4d69ab 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -168,7 +168,7 @@ void BufferCacheRuntime::BindIndexBuffer(Buffer& buffer, u32 offset, u32 size) { if (has_unified_vertex_buffers) { buffer.MakeResident(GL_READ_ONLY); glBufferAddressRangeNV(GL_ELEMENT_ARRAY_ADDRESS_NV, 0, buffer.HostGpuAddr() + offset, - static_cast<GLsizeiptr>(size)); + static_cast<GLsizeiptr>(Common::AlignUp(size, 4))); } else { glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer.Handle()); index_buffer_offset = offset; diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp index e3f3d3c5d..e3742ddf5 100644 --- a/src/video_core/textures/astc.cpp +++ b/src/video_core/textures/astc.cpp @@ -13,7 +13,9 @@ #include <boost/container/static_vector.hpp> +#include "common/alignment.h" #include "common/common_types.h" +#include "common/thread_worker.h" #include "video_core/textures/astc.h" class InputBitStream { @@ -1411,7 +1413,7 @@ static void FillVoidExtentLDR(InputBitStream& strm, std::span<u32> outBuf, u32 b static void FillError(std::span<u32> outBuf, u32 blockWidth, u32 blockHeight) { for (u32 j = 0; j < blockHeight; j++) { for (u32 i = 0; i < blockWidth; i++) { - outBuf[j * blockWidth + i] = 0xFFFF00FF; + outBuf[j * blockWidth + i] = 0x00000000; } } } @@ -1650,29 +1652,41 @@ static void DecompressBlock(std::span<const u8, 16> inBuf, const u32 blockWidth, void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth, uint32_t block_width, uint32_t block_height, std::span<uint8_t> output) { - u32 block_index = 0; - std::size_t depth_offset = 0; - for (u32 z = 0; z < depth; z++) { - for (u32 y = 0; y < height; y += block_height) { - for (u32 x = 0; x < width; x += block_width) { - const std::span<const u8, 16> blockPtr{data.subspan(block_index * 16, 16)}; - - // Blocks can be at most 12x12 - std::array<u32, 12 * 12> uncompData; - DecompressBlock(blockPtr, block_width, block_height, uncompData); - - u32 decompWidth = std::min(block_width, width - x); - u32 decompHeight = std::min(block_height, height - y); - - const std::span<u8> outRow = output.subspan(depth_offset + (y * width + x) * 4); - for (u32 jj = 0; jj < decompHeight; jj++) { - std::memcpy(outRow.data() + jj * width * 4, - uncompData.data() + jj * block_width, decompWidth * 4); + const u32 rows = Common::DivideUp(height, block_height); + const u32 cols = Common::DivideUp(width, block_width); + + Common::ThreadWorker workers{std::max(std::thread::hardware_concurrency(), 2U) / 2, + "yuzu:ASTCDecompress"}; + + for (u32 z = 0; z < depth; ++z) { + const u32 depth_offset = z * height * width * 4; + for (u32 y_index = 0; y_index < rows; ++y_index) { + auto decompress_stride = [data, width, height, depth, block_width, block_height, output, + rows, cols, z, depth_offset, y_index] { + const u32 y = y_index * block_height; + for (u32 x_index = 0; x_index < cols; ++x_index) { + const u32 block_index = (z * rows * cols) + (y_index * cols) + x_index; + const u32 x = x_index * block_width; + + const std::span<const u8, 16> blockPtr{data.subspan(block_index * 16, 16)}; + + // Blocks can be at most 12x12 + std::array<u32, 12 * 12> uncompData; + DecompressBlock(blockPtr, block_width, block_height, uncompData); + + u32 decompWidth = std::min(block_width, width - x); + u32 decompHeight = std::min(block_height, height - y); + + const std::span<u8> outRow = output.subspan(depth_offset + (y * width + x) * 4); + for (u32 h = 0; h < decompHeight; ++h) { + std::memcpy(outRow.data() + h * width * 4, + uncompData.data() + h * block_width, decompWidth * 4); + } } - ++block_index; - } + }; + workers.QueueWork(std::move(decompress_stride)); } - depth_offset += height * width * 4; + workers.WaitForRequests(); } } |