diff options
Diffstat (limited to 'src/video_core/textures')
-rw-r--r-- | src/video_core/textures/astc.cpp | 156 | ||||
-rw-r--r-- | src/video_core/textures/astc.h | 111 | ||||
-rw-r--r-- | src/video_core/textures/decoders.cpp | 52 |
3 files changed, 176 insertions, 143 deletions
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp index 3ab500760..25161df1f 100644 --- a/src/video_core/textures/astc.cpp +++ b/src/video_core/textures/astc.cpp @@ -151,6 +151,76 @@ private: const IntType& m_Bits; }; +enum class IntegerEncoding { JustBits, Quint, Trit }; + +struct IntegerEncodedValue { + constexpr IntegerEncodedValue() = default; + + constexpr IntegerEncodedValue(IntegerEncoding encoding_, u32 num_bits_) + : encoding{encoding_}, num_bits{num_bits_} {} + + constexpr bool MatchesEncoding(const IntegerEncodedValue& other) const { + return encoding == other.encoding && num_bits == other.num_bits; + } + + // Returns the number of bits required to encode num_vals values. + u32 GetBitLength(u32 num_vals) const { + u32 total_bits = num_bits * num_vals; + if (encoding == IntegerEncoding::Trit) { + total_bits += (num_vals * 8 + 4) / 5; + } else if (encoding == IntegerEncoding::Quint) { + total_bits += (num_vals * 7 + 2) / 3; + } + return total_bits; + } + + IntegerEncoding encoding{}; + u32 num_bits = 0; + u32 bit_value = 0; + union { + u32 quint_value = 0; + u32 trit_value; + }; +}; + +// Returns a new instance of this struct that corresponds to the +// can take no more than mav_value values +static constexpr IntegerEncodedValue CreateEncoding(u32 mav_value) { + while (mav_value > 0) { + u32 check = mav_value + 1; + + // Is mav_value a power of two? + if (!(check & (check - 1))) { + return IntegerEncodedValue(IntegerEncoding::JustBits, std::popcount(mav_value)); + } + + // Is mav_value of the type 3*2^n - 1? + if ((check % 3 == 0) && !((check / 3) & ((check / 3) - 1))) { + return IntegerEncodedValue(IntegerEncoding::Trit, std::popcount(check / 3 - 1)); + } + + // Is mav_value of the type 5*2^n - 1? + if ((check % 5 == 0) && !((check / 5) & ((check / 5) - 1))) { + return IntegerEncodedValue(IntegerEncoding::Quint, std::popcount(check / 5 - 1)); + } + + // Apparently it can't be represented with a bounded integer sequence... + // just iterate. + mav_value--; + } + return IntegerEncodedValue(IntegerEncoding::JustBits, 0); +} + +static constexpr std::array<IntegerEncodedValue, 256> MakeEncodedValues() { + std::array<IntegerEncodedValue, 256> encodings{}; + for (std::size_t i = 0; i < encodings.size(); ++i) { + encodings[i] = CreateEncoding(static_cast<u32>(i)); + } + return encodings; +} + +static constexpr std::array<IntegerEncodedValue, 256> ASTC_ENCODINGS_VALUES = MakeEncodedValues(); + namespace Tegra::Texture::ASTC { using IntegerEncodedVector = boost::container::static_vector< IntegerEncodedValue, 256, @@ -521,35 +591,41 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) { return params; } -static void FillVoidExtentLDR(InputBitStream& strm, std::span<u32> outBuf, u32 blockWidth, - u32 blockHeight) { - // Don't actually care about the void extent, just read the bits... - for (s32 i = 0; i < 4; ++i) { - strm.ReadBits<13>(); +// Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)] +// is the same as [(num_bits - 1):0] and repeats all the way down. +template <typename IntType> +static constexpr IntType Replicate(IntType val, u32 num_bits, u32 to_bit) { + if (num_bits == 0 || to_bit == 0) { + return 0; } - - // Decode the RGBA components and renormalize them to the range [0, 255] - u16 r = static_cast<u16>(strm.ReadBits<16>()); - u16 g = static_cast<u16>(strm.ReadBits<16>()); - u16 b = static_cast<u16>(strm.ReadBits<16>()); - u16 a = static_cast<u16>(strm.ReadBits<16>()); - - u32 rgba = (r >> 8) | (g & 0xFF00) | (static_cast<u32>(b) & 0xFF00) << 8 | - (static_cast<u32>(a) & 0xFF00) << 16; - - for (u32 j = 0; j < blockHeight; j++) { - for (u32 i = 0; i < blockWidth; i++) { - outBuf[j * blockWidth + i] = rgba; + const IntType v = val & static_cast<IntType>((1 << num_bits) - 1); + IntType res = v; + u32 reslen = num_bits; + while (reslen < to_bit) { + u32 comp = 0; + if (num_bits > to_bit - reslen) { + u32 newshift = to_bit - reslen; + comp = num_bits - newshift; + num_bits = newshift; } + res = static_cast<IntType>(res << num_bits); + res = static_cast<IntType>(res | (v >> comp)); + reslen += num_bits; } + return res; } -static void FillError(std::span<u32> outBuf, u32 blockWidth, u32 blockHeight) { - for (u32 j = 0; j < blockHeight; j++) { - for (u32 i = 0; i < blockWidth; i++) { - outBuf[j * blockWidth + i] = 0xFFFF00FF; - } +static constexpr std::size_t NumReplicateEntries(u32 num_bits) { + return std::size_t(1) << num_bits; +} + +template <typename IntType, u32 num_bits, u32 to_bit> +static constexpr auto MakeReplicateTable() { + std::array<IntType, NumReplicateEntries(num_bits)> table{}; + for (IntType value = 0; value < static_cast<IntType>(std::size(table)); ++value) { + table[value] = Replicate(value, num_bits, to_bit); } + return table; } static constexpr auto REPLICATE_BYTE_TO_16_TABLE = MakeReplicateTable<u32, 8, 16>(); @@ -572,6 +648,9 @@ static constexpr auto REPLICATE_2_BIT_TO_8_TABLE = MakeReplicateTable<u32, 2, 8> static constexpr auto REPLICATE_3_BIT_TO_8_TABLE = MakeReplicateTable<u32, 3, 8>(); static constexpr auto REPLICATE_4_BIT_TO_8_TABLE = MakeReplicateTable<u32, 4, 8>(); static constexpr auto REPLICATE_5_BIT_TO_8_TABLE = MakeReplicateTable<u32, 5, 8>(); +static constexpr auto REPLICATE_6_BIT_TO_8_TABLE = MakeReplicateTable<u32, 6, 8>(); +static constexpr auto REPLICATE_7_BIT_TO_8_TABLE = MakeReplicateTable<u32, 7, 8>(); +static constexpr auto REPLICATE_8_BIT_TO_8_TABLE = MakeReplicateTable<u32, 8, 8>(); /// Use a precompiled table with the most common usages, if it's not in the expected range, fallback /// to the runtime implementation static constexpr u32 FastReplicateTo8(u32 value, u32 num_bits) { @@ -1316,6 +1395,37 @@ static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const u32*& colorValues, #undef READ_INT_VALUES } +static void FillVoidExtentLDR(InputBitStream& strm, std::span<u32> outBuf, u32 blockWidth, + u32 blockHeight) { + // Don't actually care about the void extent, just read the bits... + for (s32 i = 0; i < 4; ++i) { + strm.ReadBits<13>(); + } + + // Decode the RGBA components and renormalize them to the range [0, 255] + u16 r = static_cast<u16>(strm.ReadBits<16>()); + u16 g = static_cast<u16>(strm.ReadBits<16>()); + u16 b = static_cast<u16>(strm.ReadBits<16>()); + u16 a = static_cast<u16>(strm.ReadBits<16>()); + + u32 rgba = (r >> 8) | (g & 0xFF00) | (static_cast<u32>(b) & 0xFF00) << 8 | + (static_cast<u32>(a) & 0xFF00) << 16; + + for (u32 j = 0; j < blockHeight; j++) { + for (u32 i = 0; i < blockWidth; i++) { + outBuf[j * blockWidth + i] = rgba; + } + } +} + +static void FillError(std::span<u32> outBuf, u32 blockWidth, u32 blockHeight) { + for (u32 j = 0; j < blockHeight; j++) { + for (u32 i = 0; i < blockWidth; i++) { + outBuf[j * blockWidth + i] = 0xFFFF00FF; + } + } +} + static void DecompressBlock(std::span<const u8, 16> inBuf, const u32 blockWidth, const u32 blockHeight, std::span<u32, 12 * 12> outBuf) { InputBitStream strm(inBuf); diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h index 0229ae122..14d2beec0 100644 --- a/src/video_core/textures/astc.h +++ b/src/video_core/textures/astc.h @@ -9,117 +9,6 @@ namespace Tegra::Texture::ASTC { -enum class IntegerEncoding { JustBits, Quint, Trit }; - -struct IntegerEncodedValue { - constexpr IntegerEncodedValue() = default; - - constexpr IntegerEncodedValue(IntegerEncoding encoding_, u32 num_bits_) - : encoding{encoding_}, num_bits{num_bits_} {} - - constexpr bool MatchesEncoding(const IntegerEncodedValue& other) const { - return encoding == other.encoding && num_bits == other.num_bits; - } - - // Returns the number of bits required to encode num_vals values. - u32 GetBitLength(u32 num_vals) const { - u32 total_bits = num_bits * num_vals; - if (encoding == IntegerEncoding::Trit) { - total_bits += (num_vals * 8 + 4) / 5; - } else if (encoding == IntegerEncoding::Quint) { - total_bits += (num_vals * 7 + 2) / 3; - } - return total_bits; - } - - IntegerEncoding encoding{}; - u32 num_bits = 0; - u32 bit_value = 0; - union { - u32 quint_value = 0; - u32 trit_value; - }; -}; - -// Returns a new instance of this struct that corresponds to the -// can take no more than mav_value values -constexpr IntegerEncodedValue CreateEncoding(u32 mav_value) { - while (mav_value > 0) { - u32 check = mav_value + 1; - - // Is mav_value a power of two? - if (!(check & (check - 1))) { - return IntegerEncodedValue(IntegerEncoding::JustBits, std::popcount(mav_value)); - } - - // Is mav_value of the type 3*2^n - 1? - if ((check % 3 == 0) && !((check / 3) & ((check / 3) - 1))) { - return IntegerEncodedValue(IntegerEncoding::Trit, std::popcount(check / 3 - 1)); - } - - // Is mav_value of the type 5*2^n - 1? - if ((check % 5 == 0) && !((check / 5) & ((check / 5) - 1))) { - return IntegerEncodedValue(IntegerEncoding::Quint, std::popcount(check / 5 - 1)); - } - - // Apparently it can't be represented with a bounded integer sequence... - // just iterate. - mav_value--; - } - return IntegerEncodedValue(IntegerEncoding::JustBits, 0); -} - -constexpr std::array<IntegerEncodedValue, 256> MakeEncodedValues() { - std::array<IntegerEncodedValue, 256> encodings{}; - for (std::size_t i = 0; i < encodings.size(); ++i) { - encodings[i] = CreateEncoding(static_cast<u32>(i)); - } - return encodings; -} - -constexpr std::array<IntegerEncodedValue, 256> ASTC_ENCODINGS_VALUES = MakeEncodedValues(); - -// Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)] -// is the same as [(num_bits - 1):0] and repeats all the way down. -template <typename IntType> -constexpr IntType Replicate(IntType val, u32 num_bits, u32 to_bit) { - if (num_bits == 0 || to_bit == 0) { - return 0; - } - const IntType v = val & static_cast<IntType>((1 << num_bits) - 1); - IntType res = v; - u32 reslen = num_bits; - while (reslen < to_bit) { - u32 comp = 0; - if (num_bits > to_bit - reslen) { - u32 newshift = to_bit - reslen; - comp = num_bits - newshift; - num_bits = newshift; - } - res = static_cast<IntType>(res << num_bits); - res = static_cast<IntType>(res | (v >> comp)); - reslen += num_bits; - } - return res; -} - -constexpr std::size_t NumReplicateEntries(u32 num_bits) { - return std::size_t(1) << num_bits; -} - -template <typename IntType, u32 num_bits, u32 to_bit> -constexpr auto MakeReplicateTable() { - std::array<IntType, NumReplicateEntries(num_bits)> table{}; - for (IntType value = 0; value < static_cast<IntType>(std::size(table)); ++value) { - table[value] = Replicate(value, num_bits, to_bit); - } - return table; -} - -constexpr auto REPLICATE_6_BIT_TO_8_TABLE = MakeReplicateTable<u32, 6, 8>(); -constexpr auto REPLICATE_7_BIT_TO_8_TABLE = MakeReplicateTable<u32, 7, 8>(); -constexpr auto REPLICATE_8_BIT_TO_8_TABLE = MakeReplicateTable<u32, 8, 8>(); - void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth, uint32_t block_width, uint32_t block_height, std::span<uint8_t> output); diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index f1f523ad1..c32ae956a 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp @@ -18,9 +18,9 @@ namespace Tegra::Texture { namespace { -template <bool TO_LINEAR> -void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width, - u32 height, u32 depth, u32 block_height, u32 block_depth, u32 stride_alignment) { +template <bool TO_LINEAR, u32 BYTES_PER_PIXEL> +void SwizzleImpl(std::span<u8> output, std::span<const u8> input, u32 width, u32 height, u32 depth, + u32 block_height, u32 block_depth, u32 stride_alignment) { // The origin of the transformation can be configured here, leave it as zero as the current API // doesn't expose it. static constexpr u32 origin_x = 0; @@ -28,9 +28,9 @@ void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixe static constexpr u32 origin_z = 0; // We can configure here a custom pitch - // As it's not exposed 'width * bpp' will be the expected pitch. - const u32 pitch = width * bytes_per_pixel; - const u32 stride = Common::AlignUpLog2(width, stride_alignment) * bytes_per_pixel; + // As it's not exposed 'width * BYTES_PER_PIXEL' will be the expected pitch. + const u32 pitch = width * BYTES_PER_PIXEL; + const u32 stride = Common::AlignUpLog2(width, stride_alignment) * BYTES_PER_PIXEL; const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT); const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth); @@ -54,14 +54,14 @@ void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixe ((block_y & block_height_mask) << GOB_SIZE_SHIFT); for (u32 column = 0; column < width; ++column) { - const u32 x = (column + origin_x) * bytes_per_pixel; + const u32 x = (column + origin_x) * BYTES_PER_PIXEL; const u32 offset_x = (x >> GOB_SIZE_X_SHIFT) << x_shift; const u32 base_swizzled_offset = offset_z + offset_y + offset_x; const u32 swizzled_offset = base_swizzled_offset + table[x % GOB_SIZE_X]; const u32 unswizzled_offset = - slice * pitch * height + line * pitch + column * bytes_per_pixel; + slice * pitch * height + line * pitch + column * BYTES_PER_PIXEL; if (const auto offset = (TO_LINEAR ? unswizzled_offset : swizzled_offset); offset >= input.size()) { @@ -73,11 +73,45 @@ void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixe u8* const dst = &output[TO_LINEAR ? swizzled_offset : unswizzled_offset]; const u8* const src = &input[TO_LINEAR ? unswizzled_offset : swizzled_offset]; - std::memcpy(dst, src, bytes_per_pixel); + + std::memcpy(dst, src, BYTES_PER_PIXEL); } } } } + +template <bool TO_LINEAR> +void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width, + u32 height, u32 depth, u32 block_height, u32 block_depth, u32 stride_alignment) { + switch (bytes_per_pixel) { + case 1: + return SwizzleImpl<TO_LINEAR, 1>(output, input, width, height, depth, block_height, + block_depth, stride_alignment); + case 2: + return SwizzleImpl<TO_LINEAR, 2>(output, input, width, height, depth, block_height, + block_depth, stride_alignment); + case 3: + return SwizzleImpl<TO_LINEAR, 3>(output, input, width, height, depth, block_height, + block_depth, stride_alignment); + case 4: + return SwizzleImpl<TO_LINEAR, 4>(output, input, width, height, depth, block_height, + block_depth, stride_alignment); + case 6: + return SwizzleImpl<TO_LINEAR, 6>(output, input, width, height, depth, block_height, + block_depth, stride_alignment); + case 8: + return SwizzleImpl<TO_LINEAR, 8>(output, input, width, height, depth, block_height, + block_depth, stride_alignment); + case 12: + return SwizzleImpl<TO_LINEAR, 12>(output, input, width, height, depth, block_height, + block_depth, stride_alignment); + case 16: + return SwizzleImpl<TO_LINEAR, 16>(output, input, width, height, depth, block_height, + block_depth, stride_alignment); + default: + UNREACHABLE_MSG("Invalid bytes_per_pixel={}", bytes_per_pixel); + } +} } // Anonymous namespace void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, |