summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorMat M <mathew1800@gmail.com>2020-04-13 16:17:32 +0200
committerGitHub <noreply@github.com>2020-04-13 16:17:32 +0200
commitc4001225f64bbea511cf8df0885c77cb6ba70091 (patch)
tree850d379792b1018cddd9282da50fade32e38fa4f /src
parentMerge pull request #3619 from ReinUsesLisp/i2i (diff)
parentastc: Hard code bit depth changes to 8 and use fast replicate (diff)
downloadyuzu-c4001225f64bbea511cf8df0885c77cb6ba70091.tar
yuzu-c4001225f64bbea511cf8df0885c77cb6ba70091.tar.gz
yuzu-c4001225f64bbea511cf8df0885c77cb6ba70091.tar.bz2
yuzu-c4001225f64bbea511cf8df0885c77cb6ba70091.tar.lz
yuzu-c4001225f64bbea511cf8df0885c77cb6ba70091.tar.xz
yuzu-c4001225f64bbea511cf8df0885c77cb6ba70091.tar.zst
yuzu-c4001225f64bbea511cf8df0885c77cb6ba70091.zip
Diffstat (limited to 'src')
-rw-r--r--src/video_core/textures/astc.cpp241
1 files changed, 159 insertions, 82 deletions
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
index 062b4f252..365bde2f1 100644
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -20,6 +20,8 @@
#include <cstring>
#include <vector>
+#include <boost/container/static_vector.hpp>
+
#include "common/common_types.h"
#include "video_core/textures/astc.h"
@@ -39,25 +41,25 @@ constexpr u32 Popcnt(u32 n) {
class InputBitStream {
public:
- explicit InputBitStream(const u8* ptr, std::size_t start_offset = 0)
- : m_CurByte(ptr), m_NextBit(start_offset % 8) {}
+ constexpr explicit InputBitStream(const u8* ptr, std::size_t start_offset = 0)
+ : cur_byte{ptr}, next_bit{start_offset % 8} {}
- std::size_t GetBitsRead() const {
- return m_BitsRead;
+ constexpr std::size_t GetBitsRead() const {
+ return bits_read;
}
- u32 ReadBit() {
- u32 bit = *m_CurByte >> m_NextBit++;
- while (m_NextBit >= 8) {
- m_NextBit -= 8;
- m_CurByte++;
+ constexpr bool ReadBit() {
+ const bool bit = (*cur_byte >> next_bit++) & 1;
+ while (next_bit >= 8) {
+ next_bit -= 8;
+ cur_byte++;
}
- m_BitsRead++;
- return bit & 1;
+ bits_read++;
+ return bit;
}
- u32 ReadBits(std::size_t nBits) {
+ constexpr u32 ReadBits(std::size_t nBits) {
u32 ret = 0;
for (std::size_t i = 0; i < nBits; ++i) {
ret |= (ReadBit() & 1) << i;
@@ -66,7 +68,7 @@ public:
}
template <std::size_t nBits>
- u32 ReadBits() {
+ constexpr u32 ReadBits() {
u32 ret = 0;
for (std::size_t i = 0; i < nBits; ++i) {
ret |= (ReadBit() & 1) << i;
@@ -75,64 +77,58 @@ public:
}
private:
- const u8* m_CurByte;
- std::size_t m_NextBit = 0;
- std::size_t m_BitsRead = 0;
+ const u8* cur_byte;
+ std::size_t next_bit = 0;
+ std::size_t bits_read = 0;
};
class OutputBitStream {
public:
- explicit OutputBitStream(u8* ptr, s32 nBits = 0, s32 start_offset = 0)
- : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {}
-
- ~OutputBitStream() = default;
+ constexpr explicit OutputBitStream(u8* ptr, std::size_t bits = 0, std::size_t start_offset = 0)
+ : cur_byte{ptr}, num_bits{bits}, next_bit{start_offset % 8} {}
- s32 GetBitsWritten() const {
- return m_BitsWritten;
+ constexpr std::size_t GetBitsWritten() const {
+ return bits_written;
}
- void WriteBitsR(u32 val, u32 nBits) {
+ constexpr void WriteBitsR(u32 val, u32 nBits) {
for (u32 i = 0; i < nBits; i++) {
WriteBit((val >> (nBits - i - 1)) & 1);
}
}
- void WriteBits(u32 val, u32 nBits) {
+ constexpr void WriteBits(u32 val, u32 nBits) {
for (u32 i = 0; i < nBits; i++) {
WriteBit((val >> i) & 1);
}
}
private:
- void WriteBit(s32 b) {
-
- if (done)
+ constexpr void WriteBit(bool b) {
+ if (bits_written >= num_bits) {
return;
+ }
- const u32 mask = 1 << m_NextBit++;
+ const u32 mask = 1 << next_bit++;
// clear the bit
- *m_CurByte &= static_cast<u8>(~mask);
+ *cur_byte &= static_cast<u8>(~mask);
// Write the bit, if necessary
if (b)
- *m_CurByte |= static_cast<u8>(mask);
+ *cur_byte |= static_cast<u8>(mask);
// Next byte?
- if (m_NextBit >= 8) {
- m_CurByte += 1;
- m_NextBit = 0;
+ if (next_bit >= 8) {
+ cur_byte += 1;
+ next_bit = 0;
}
-
- done = done || ++m_BitsWritten >= m_NumBits;
}
- s32 m_BitsWritten = 0;
- const s32 m_NumBits;
- u8* m_CurByte;
- s32 m_NextBit = 0;
-
- bool done = false;
+ u8* cur_byte;
+ std::size_t num_bits;
+ std::size_t bits_written = 0;
+ std::size_t next_bit = 0;
};
template <typename IntType>
@@ -195,9 +191,13 @@ struct IntegerEncodedValue {
u32 trit_value;
};
};
+using IntegerEncodedVector = boost::container::static_vector<
+ IntegerEncodedValue, 64,
+ boost::container::static_vector_options<
+ boost::container::inplace_alignment<alignof(IntegerEncodedValue)>,
+ boost::container::throw_on_overflow<false>>::type>;
-static void DecodeTritBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result,
- u32 nBitsPerValue) {
+static void DecodeTritBlock(InputBitStream& bits, IntegerEncodedVector& result, u32 nBitsPerValue) {
// Implement the algorithm in section C.2.12
u32 m[5];
u32 t[5];
@@ -255,7 +255,7 @@ static void DecodeTritBlock(InputBitStream& bits, std::vector<IntegerEncodedValu
}
}
-static void DecodeQus32Block(InputBitStream& bits, std::vector<IntegerEncodedValue>& result,
+static void DecodeQus32Block(InputBitStream& bits, IntegerEncodedVector& result,
u32 nBitsPerValue) {
// Implement the algorithm in section C.2.12
u32 m[3];
@@ -343,8 +343,8 @@ static constexpr std::array EncodingsValues = MakeEncodedValues();
// Fills result with the values that are encoded in the given
// bitstream. We must know beforehand what the maximum possible
// value is, and how many values we're decoding.
-static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result, InputBitStream& bits,
- u32 maxRange, u32 nValues) {
+static void DecodeIntegerSequence(IntegerEncodedVector& result, InputBitStream& bits, u32 maxRange,
+ u32 nValues) {
// Determine encoding parameters
IntegerEncodedValue val = EncodingsValues[maxRange];
@@ -634,12 +634,14 @@ static void FillError(u32* outBuf, u32 blockWidth, u32 blockHeight) {
// Replicates low numBits such that [(toBit - 1):(toBit - 1 - fromBit)]
// is the same as [(numBits - 1):0] and repeats all the way down.
template <typename IntType>
-static IntType Replicate(IntType val, u32 numBits, u32 toBit) {
- if (numBits == 0)
+static constexpr IntType Replicate(IntType val, u32 numBits, u32 toBit) {
+ if (numBits == 0) {
return 0;
- if (toBit == 0)
+ }
+ if (toBit == 0) {
return 0;
- IntType v = val & static_cast<IntType>((1 << numBits) - 1);
+ }
+ const IntType v = val & static_cast<IntType>((1 << numBits) - 1);
IntType res = v;
u32 reslen = numBits;
while (reslen < toBit) {
@@ -656,6 +658,89 @@ static IntType Replicate(IntType val, u32 numBits, u32 toBit) {
return res;
}
+static constexpr std::size_t NumReplicateEntries(u32 num_bits) {
+ return std::size_t(1) << num_bits;
+}
+
+template <typename IntType, u32 num_bits, u32 to_bit>
+static constexpr auto MakeReplicateTable() {
+ std::array<IntType, NumReplicateEntries(num_bits)> table{};
+ for (IntType value = 0; value < static_cast<IntType>(std::size(table)); ++value) {
+ table[value] = Replicate(value, num_bits, to_bit);
+ }
+ return table;
+}
+
+static constexpr auto REPLICATE_BYTE_TO_16_TABLE = MakeReplicateTable<u32, 8, 16>();
+static constexpr u32 ReplicateByteTo16(std::size_t value) {
+ return REPLICATE_BYTE_TO_16_TABLE[value];
+}
+
+static constexpr auto REPLICATE_BIT_TO_7_TABLE = MakeReplicateTable<u32, 1, 7>();
+static constexpr u32 ReplicateBitTo7(std::size_t value) {
+ return REPLICATE_BIT_TO_7_TABLE[value];
+}
+
+static constexpr auto REPLICATE_BIT_TO_9_TABLE = MakeReplicateTable<u32, 1, 9>();
+static constexpr u32 ReplicateBitTo9(std::size_t value) {
+ return REPLICATE_BIT_TO_9_TABLE[value];
+}
+
+static constexpr auto REPLICATE_1_BIT_TO_8_TABLE = MakeReplicateTable<u32, 1, 8>();
+static constexpr auto REPLICATE_2_BIT_TO_8_TABLE = MakeReplicateTable<u32, 2, 8>();
+static constexpr auto REPLICATE_3_BIT_TO_8_TABLE = MakeReplicateTable<u32, 3, 8>();
+static constexpr auto REPLICATE_4_BIT_TO_8_TABLE = MakeReplicateTable<u32, 4, 8>();
+static constexpr auto REPLICATE_5_BIT_TO_8_TABLE = MakeReplicateTable<u32, 5, 8>();
+static constexpr auto REPLICATE_6_BIT_TO_8_TABLE = MakeReplicateTable<u32, 6, 8>();
+static constexpr auto REPLICATE_7_BIT_TO_8_TABLE = MakeReplicateTable<u32, 7, 8>();
+static constexpr auto REPLICATE_8_BIT_TO_8_TABLE = MakeReplicateTable<u32, 8, 8>();
+/// Use a precompiled table with the most common usages, if it's not in the expected range, fallback
+/// to the runtime implementation
+static constexpr u32 FastReplicateTo8(u32 value, u32 num_bits) {
+ switch (num_bits) {
+ case 1:
+ return REPLICATE_1_BIT_TO_8_TABLE[value];
+ case 2:
+ return REPLICATE_2_BIT_TO_8_TABLE[value];
+ case 3:
+ return REPLICATE_3_BIT_TO_8_TABLE[value];
+ case 4:
+ return REPLICATE_4_BIT_TO_8_TABLE[value];
+ case 5:
+ return REPLICATE_5_BIT_TO_8_TABLE[value];
+ case 6:
+ return REPLICATE_6_BIT_TO_8_TABLE[value];
+ case 7:
+ return REPLICATE_7_BIT_TO_8_TABLE[value];
+ case 8:
+ return REPLICATE_8_BIT_TO_8_TABLE[value];
+ default:
+ return Replicate(value, num_bits, 8);
+ }
+}
+
+static constexpr auto REPLICATE_1_BIT_TO_6_TABLE = MakeReplicateTable<u32, 1, 6>();
+static constexpr auto REPLICATE_2_BIT_TO_6_TABLE = MakeReplicateTable<u32, 2, 6>();
+static constexpr auto REPLICATE_3_BIT_TO_6_TABLE = MakeReplicateTable<u32, 3, 6>();
+static constexpr auto REPLICATE_4_BIT_TO_6_TABLE = MakeReplicateTable<u32, 4, 6>();
+static constexpr auto REPLICATE_5_BIT_TO_6_TABLE = MakeReplicateTable<u32, 5, 6>();
+static constexpr u32 FastReplicateTo6(u32 value, u32 num_bits) {
+ switch (num_bits) {
+ case 1:
+ return REPLICATE_1_BIT_TO_6_TABLE[value];
+ case 2:
+ return REPLICATE_2_BIT_TO_6_TABLE[value];
+ case 3:
+ return REPLICATE_3_BIT_TO_6_TABLE[value];
+ case 4:
+ return REPLICATE_4_BIT_TO_6_TABLE[value];
+ case 5:
+ return REPLICATE_5_BIT_TO_6_TABLE[value];
+ default:
+ return Replicate(value, num_bits, 6);
+ }
+}
+
class Pixel {
protected:
using ChannelType = s16;
@@ -674,10 +759,10 @@ public:
// significant bits when going from larger to smaller bit depth
// or by repeating the most significant bits when going from
// smaller to larger bit depths.
- void ChangeBitDepth(const u8 (&depth)[4]) {
+ void ChangeBitDepth() {
for (u32 i = 0; i < 4; i++) {
- Component(i) = ChangeBitDepth(Component(i), m_BitDepth[i], depth[i]);
- m_BitDepth[i] = depth[i];
+ Component(i) = ChangeBitDepth(Component(i), m_BitDepth[i]);
+ m_BitDepth[i] = 8;
}
}
@@ -689,28 +774,23 @@ public:
// Changes the bit depth of a single component. See the comment
// above for how we do this.
- static ChannelType ChangeBitDepth(Pixel::ChannelType val, u8 oldDepth, u8 newDepth) {
- assert(newDepth <= 8);
+ static ChannelType ChangeBitDepth(Pixel::ChannelType val, u8 oldDepth) {
assert(oldDepth <= 8);
- if (oldDepth == newDepth) {
+ if (oldDepth == 8) {
// Do nothing
return val;
- } else if (oldDepth == 0 && newDepth != 0) {
- return static_cast<ChannelType>((1 << newDepth) - 1);
- } else if (newDepth > oldDepth) {
- return Replicate(val, oldDepth, newDepth);
+ } else if (oldDepth == 0) {
+ return static_cast<ChannelType>((1 << 8) - 1);
+ } else if (8 > oldDepth) {
+ return static_cast<ChannelType>(FastReplicateTo8(static_cast<u32>(val), oldDepth));
} else {
// oldDepth > newDepth
- if (newDepth == 0) {
- return 0xFF;
- } else {
- u8 bitsWasted = static_cast<u8>(oldDepth - newDepth);
- u16 v = static_cast<u16>(val);
- v = static_cast<u16>((v + (1 << (bitsWasted - 1))) >> bitsWasted);
- v = ::std::min<u16>(::std::max<u16>(0, v), static_cast<u16>((1 << newDepth) - 1));
- return static_cast<u8>(v);
- }
+ const u8 bitsWasted = static_cast<u8>(oldDepth - 8);
+ u16 v = static_cast<u16>(val);
+ v = static_cast<u16>((v + (1 << (bitsWasted - 1))) >> bitsWasted);
+ v = ::std::min<u16>(::std::max<u16>(0, v), static_cast<u16>((1 << 8) - 1));
+ return static_cast<u8>(v);
}
assert(false && "We shouldn't get here.");
@@ -760,8 +840,7 @@ public:
// up in the most-significant byte.
u32 Pack() const {
Pixel eightBit(*this);
- const u8 eightBitDepth[4] = {8, 8, 8, 8};
- eightBit.ChangeBitDepth(eightBitDepth);
+ eightBit.ChangeBitDepth();
u32 r = 0;
r |= eightBit.A();
@@ -816,8 +895,7 @@ static void DecodeColorValues(u32* out, u8* data, const u32* modes, const u32 nP
}
// We now have enough to decode our integer sequence.
- std::vector<IntegerEncodedValue> decodedColorValues;
- decodedColorValues.reserve(32);
+ IntegerEncodedVector decodedColorValues;
InputBitStream colorStream(data);
DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues);
@@ -839,12 +917,12 @@ static void DecodeColorValues(u32* out, u8* data, const u32* modes, const u32 nP
u32 A = 0, B = 0, C = 0, D = 0;
// A is just the lsb replicated 9 times.
- A = Replicate(bitval & 1, 1, 9);
+ A = ReplicateBitTo9(bitval & 1);
switch (val.encoding) {
// Replicate bits
case IntegerEncoding::JustBits:
- out[outIdx++] = Replicate(bitval, bitlen, 8);
+ out[outIdx++] = FastReplicateTo8(bitval, bitlen);
break;
// Use algorithm in C.2.13
@@ -962,13 +1040,13 @@ static u32 UnquantizeTexelWeight(const IntegerEncodedValue& val) {
u32 bitval = val.bit_value;
u32 bitlen = val.num_bits;
- u32 A = Replicate(bitval & 1, 1, 7);
+ u32 A = ReplicateBitTo7(bitval & 1);
u32 B = 0, C = 0, D = 0;
u32 result = 0;
switch (val.encoding) {
case IntegerEncoding::JustBits:
- result = Replicate(bitval, bitlen, 6);
+ result = FastReplicateTo6(bitval, bitlen);
break;
case IntegerEncoding::Trit: {
@@ -1047,7 +1125,7 @@ static u32 UnquantizeTexelWeight(const IntegerEncodedValue& val) {
return result;
}
-static void UnquantizeTexelWeights(u32 out[2][144], const std::vector<IntegerEncodedValue>& weights,
+static void UnquantizeTexelWeights(u32 out[2][144], const IntegerEncodedVector& weights,
const TexelWeightParams& params, const u32 blockWidth,
const u32 blockHeight) {
u32 weightIdx = 0;
@@ -1545,8 +1623,7 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32
static_cast<u8>((1 << (weightParams.GetPackedBitSize() % 8)) - 1);
memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart);
- std::vector<IntegerEncodedValue> texelWeightValues;
- texelWeightValues.reserve(64);
+ IntegerEncodedVector texelWeightValues;
InputBitStream weightStream(texelWeightData);
@@ -1568,9 +1645,9 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32
Pixel p;
for (u32 c = 0; c < 4; c++) {
u32 C0 = endpos32s[partition][0].Component(c);
- C0 = Replicate(C0, 8, 16);
+ C0 = ReplicateByteTo16(C0);
u32 C1 = endpos32s[partition][1].Component(c);
- C1 = Replicate(C1, 8, 16);
+ C1 = ReplicateByteTo16(C1);
u32 plane = 0;
if (weightParams.m_bDualPlane && (((planeIdx + 1) & 3) == c)) {