summaryrefslogtreecommitdiffstats
path: root/src/video_core/textures
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/video_core/textures/astc.cpp58
-rw-r--r--src/video_core/textures/astc.h5
-rw-r--r--src/video_core/textures/convert.cpp93
-rw-r--r--src/video_core/textures/convert.h22
-rw-r--r--src/video_core/textures/decoders.cpp249
-rw-r--r--src/video_core/textures/decoders.h44
-rw-r--r--src/video_core/textures/texture.cpp16
-rw-r--r--src/video_core/textures/texture.h239
8 files changed, 274 insertions, 452 deletions
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
index 365bde2f1..acd5bdd78 100644
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -18,6 +18,7 @@
#include <algorithm>
#include <cassert>
#include <cstring>
+#include <span>
#include <vector>
#include <boost/container/static_vector.hpp>
@@ -600,7 +601,7 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) {
return params;
}
-static void FillVoidExtentLDR(InputBitStream& strm, u32* const outBuf, u32 blockWidth,
+static void FillVoidExtentLDR(InputBitStream& strm, std::span<u32> outBuf, u32 blockWidth,
u32 blockHeight) {
// Don't actually care about the void extent, just read the bits...
for (s32 i = 0; i < 4; ++i) {
@@ -623,7 +624,7 @@ static void FillVoidExtentLDR(InputBitStream& strm, u32* const outBuf, u32 block
}
}
-static void FillError(u32* outBuf, u32 blockWidth, u32 blockHeight) {
+static void FillError(std::span<u32> outBuf, u32 blockWidth, u32 blockHeight) {
for (u32 j = 0; j < blockHeight; j++) {
for (u32 i = 0; i < blockWidth; i++) {
outBuf[j * blockWidth + i] = 0xFFFF00FF;
@@ -1438,9 +1439,9 @@ static void ComputeEndpos32s(Pixel& ep1, Pixel& ep2, const u32*& colorValues,
#undef READ_INT_VALUES
}
-static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32 blockHeight,
- u32* outBuf) {
- InputBitStream strm(inBuf);
+static void DecompressBlock(std::span<const u8, 16> inBuf, const u32 blockWidth,
+ const u32 blockHeight, std::span<u32, 12 * 12> outBuf) {
+ InputBitStream strm(inBuf.data());
TexelWeightParams weightParams = DecodeBlockInfo(strm);
// Was there an error?
@@ -1601,8 +1602,8 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32
}
// Read the texel weight data..
- u8 texelWeightData[16];
- memcpy(texelWeightData, inBuf, sizeof(texelWeightData));
+ std::array<u8, 16> texelWeightData;
+ std::ranges::copy(inBuf, texelWeightData.begin());
// Reverse everything
for (u32 i = 0; i < 8; i++) {
@@ -1618,14 +1619,15 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32
// Make sure that higher non-texel bits are set to zero
const u32 clearByteStart = (weightParams.GetPackedBitSize() >> 3) + 1;
- texelWeightData[clearByteStart - 1] =
- texelWeightData[clearByteStart - 1] &
- static_cast<u8>((1 << (weightParams.GetPackedBitSize() % 8)) - 1);
- memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart);
+ if (clearByteStart > 0) {
+ texelWeightData[clearByteStart - 1] &=
+ static_cast<u8>((1 << (weightParams.GetPackedBitSize() % 8)) - 1);
+ }
+ std::memset(texelWeightData.data() + clearByteStart, 0, std::min(16U - clearByteStart, 16U));
IntegerEncodedVector texelWeightValues;
- InputBitStream weightStream(texelWeightData);
+ InputBitStream weightStream(texelWeightData.data());
DecodeIntegerSequence(texelWeightValues, weightStream, weightParams.m_MaxWeight,
weightParams.GetNumWeightValues());
@@ -1672,36 +1674,32 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32
namespace Tegra::Texture::ASTC {
-std::vector<u8> Decompress(const u8* data, u32 width, u32 height, u32 depth, u32 block_width,
- u32 block_height) {
- u32 blockIdx = 0;
+void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
+ uint32_t block_width, uint32_t block_height, std::span<uint8_t> output) {
+ u32 block_index = 0;
std::size_t depth_offset = 0;
- std::vector<u8> outData(height * width * depth * 4);
- for (u32 k = 0; k < depth; k++) {
- for (u32 j = 0; j < height; j += block_height) {
- for (u32 i = 0; i < width; i += block_width) {
-
- const u8* blockPtr = data + blockIdx * 16;
+ for (u32 z = 0; z < depth; z++) {
+ for (u32 y = 0; y < height; y += block_height) {
+ for (u32 x = 0; x < width; x += block_width) {
+ const std::span<const u8, 16> blockPtr{data.subspan(block_index * 16, 16)};
// Blocks can be at most 12x12
- u32 uncompData[144];
+ std::array<u32, 12 * 12> uncompData;
ASTCC::DecompressBlock(blockPtr, block_width, block_height, uncompData);
- u32 decompWidth = std::min(block_width, width - i);
- u32 decompHeight = std::min(block_height, height - j);
+ u32 decompWidth = std::min(block_width, width - x);
+ u32 decompHeight = std::min(block_height, height - y);
- u8* outRow = depth_offset + outData.data() + (j * width + i) * 4;
+ const std::span<u8> outRow = output.subspan(depth_offset + (y * width + x) * 4);
for (u32 jj = 0; jj < decompHeight; jj++) {
- memcpy(outRow + jj * width * 4, uncompData + jj * block_width, decompWidth * 4);
+ std::memcpy(outRow.data() + jj * width * 4,
+ uncompData.data() + jj * block_width, decompWidth * 4);
}
-
- blockIdx++;
+ ++block_index;
}
}
depth_offset += height * width * 4;
}
-
- return outData;
}
} // namespace Tegra::Texture::ASTC
diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h
index 991cdba72..9105119bc 100644
--- a/src/video_core/textures/astc.h
+++ b/src/video_core/textures/astc.h
@@ -5,11 +5,10 @@
#pragma once
#include <cstdint>
-#include <vector>
namespace Tegra::Texture::ASTC {
-std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height,
- uint32_t depth, uint32_t block_width, uint32_t block_height);
+void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
+ uint32_t block_width, uint32_t block_height, std::span<uint8_t> output);
} // namespace Tegra::Texture::ASTC
diff --git a/src/video_core/textures/convert.cpp b/src/video_core/textures/convert.cpp
deleted file mode 100644
index bd1aebf02..000000000
--- a/src/video_core/textures/convert.cpp
+++ /dev/null
@@ -1,93 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <cstring>
-#include <tuple>
-#include <vector>
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "common/logging/log.h"
-#include "video_core/surface.h"
-#include "video_core/textures/astc.h"
-#include "video_core/textures/convert.h"
-
-namespace Tegra::Texture {
-
-using VideoCore::Surface::PixelFormat;
-
-template <bool reverse>
-void SwapS8Z24ToZ24S8(u8* data, u32 width, u32 height) {
- union S8Z24 {
- BitField<0, 24, u32> z24;
- BitField<24, 8, u32> s8;
- };
- static_assert(sizeof(S8Z24) == 4, "S8Z24 is incorrect size");
-
- union Z24S8 {
- BitField<0, 8, u32> s8;
- BitField<8, 24, u32> z24;
- };
- static_assert(sizeof(Z24S8) == 4, "Z24S8 is incorrect size");
-
- S8Z24 s8z24_pixel{};
- Z24S8 z24s8_pixel{};
- constexpr auto bpp{
- VideoCore::Surface::GetBytesPerPixel(VideoCore::Surface::PixelFormat::S8_UINT_D24_UNORM)};
- for (std::size_t y = 0; y < height; ++y) {
- for (std::size_t x = 0; x < width; ++x) {
- const std::size_t offset{bpp * (y * width + x)};
- if constexpr (reverse) {
- std::memcpy(&z24s8_pixel, &data[offset], sizeof(Z24S8));
- s8z24_pixel.s8.Assign(z24s8_pixel.s8);
- s8z24_pixel.z24.Assign(z24s8_pixel.z24);
- std::memcpy(&data[offset], &s8z24_pixel, sizeof(S8Z24));
- } else {
- std::memcpy(&s8z24_pixel, &data[offset], sizeof(S8Z24));
- z24s8_pixel.s8.Assign(s8z24_pixel.s8);
- z24s8_pixel.z24.Assign(s8z24_pixel.z24);
- std::memcpy(&data[offset], &z24s8_pixel, sizeof(Z24S8));
- }
- }
- }
-}
-
-static void ConvertS8Z24ToZ24S8(u8* data, u32 width, u32 height) {
- SwapS8Z24ToZ24S8<false>(data, width, height);
-}
-
-static void ConvertZ24S8ToS8Z24(u8* data, u32 width, u32 height) {
- SwapS8Z24ToZ24S8<true>(data, width, height);
-}
-
-void ConvertFromGuestToHost(u8* in_data, u8* out_data, PixelFormat pixel_format, u32 width,
- u32 height, u32 depth, bool convert_astc, bool convert_s8z24) {
- if (convert_astc && IsPixelFormatASTC(pixel_format)) {
- // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC.
- u32 block_width{};
- u32 block_height{};
- std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format);
- const std::vector<u8> rgba8_data = Tegra::Texture::ASTC::Decompress(
- in_data, width, height, depth, block_width, block_height);
- std::copy(rgba8_data.begin(), rgba8_data.end(), out_data);
-
- } else if (convert_s8z24 && pixel_format == PixelFormat::S8_UINT_D24_UNORM) {
- Tegra::Texture::ConvertS8Z24ToZ24S8(in_data, width, height);
- }
-}
-
-void ConvertFromHostToGuest(u8* data, PixelFormat pixel_format, u32 width, u32 height, u32 depth,
- bool convert_astc, bool convert_s8z24) {
- if (convert_astc && IsPixelFormatASTC(pixel_format)) {
- LOG_CRITICAL(HW_GPU, "Conversion of format {} after texture flushing is not implemented",
- pixel_format);
- UNREACHABLE();
-
- } else if (convert_s8z24 && pixel_format == PixelFormat::S8_UINT_D24_UNORM) {
- Tegra::Texture::ConvertZ24S8ToS8Z24(data, width, height);
- }
-}
-
-} // namespace Tegra::Texture
diff --git a/src/video_core/textures/convert.h b/src/video_core/textures/convert.h
deleted file mode 100644
index d5d6c77bb..000000000
--- a/src/video_core/textures/convert.h
+++ /dev/null
@@ -1,22 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include "common/common_types.h"
-
-namespace VideoCore::Surface {
-enum class PixelFormat;
-}
-
-namespace Tegra::Texture {
-
-void ConvertFromGuestToHost(u8* in_data, u8* out_data, VideoCore::Surface::PixelFormat pixel_format,
- u32 width, u32 height, u32 depth, bool convert_astc,
- bool convert_s8z24);
-
-void ConvertFromHostToGuest(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width,
- u32 height, u32 depth, bool convert_astc, bool convert_s8z24);
-
-} // namespace Tegra::Texture
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 16d46a018..9f5181318 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -2,204 +2,111 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <array>
#include <cmath>
#include <cstring>
+#include <span>
+#include <utility>
+
#include "common/alignment.h"
#include "common/assert.h"
#include "common/bit_util.h"
+#include "common/div_ceil.h"
#include "video_core/gpu.h"
#include "video_core/textures/decoders.h"
#include "video_core/textures/texture.h"
namespace Tegra::Texture {
-namespace {
+namespace {
/**
- * This table represents the internal swizzle of a gob,
- * in format 16 bytes x 2 sector packing.
+ * This table represents the internal swizzle of a gob, in format 16 bytes x 2 sector packing.
* Calculates the offset of an (x, y) position within a swizzled texture.
* Taken from the Tegra X1 Technical Reference Manual. pages 1187-1188
*/
-template <std::size_t N, std::size_t M, u32 Align>
-struct alignas(64) SwizzleTable {
- static_assert(M * Align == 64, "Swizzle Table does not align to GOB");
- constexpr SwizzleTable() {
- for (u32 y = 0; y < N; ++y) {
- for (u32 x = 0; x < M; ++x) {
- const u32 x2 = x * Align;
- values[y][x] = static_cast<u16>(((x2 % 64) / 32) * 256 + ((y % 8) / 2) * 64 +
- ((x2 % 32) / 16) * 32 + (y % 2) * 16 + (x2 % 16));
- }
+constexpr SwizzleTable MakeSwizzleTableConst() {
+ SwizzleTable table{};
+ for (u32 y = 0; y < table.size(); ++y) {
+ for (u32 x = 0; x < table[0].size(); ++x) {
+ table[y][x] = ((x % 64) / 32) * 256 + ((y % 8) / 2) * 64 + ((x % 32) / 16) * 32 +
+ (y % 2) * 16 + (x % 16);
}
}
- const std::array<u16, M>& operator[](std::size_t index) const {
- return values[index];
- }
- std::array<std::array<u16, M>, N> values{};
-};
+ return table;
+}
-constexpr u32 FAST_SWIZZLE_ALIGN = 16;
+constexpr SwizzleTable SWIZZLE_TABLE = MakeSwizzleTableConst();
-constexpr auto LEGACY_SWIZZLE_TABLE = SwizzleTable<GOB_SIZE_X, GOB_SIZE_X, GOB_SIZE_Z>();
-constexpr auto FAST_SWIZZLE_TABLE = SwizzleTable<GOB_SIZE_Y, 4, FAST_SWIZZLE_ALIGN>();
+template <bool TO_LINEAR>
+void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width,
+ u32 height, u32 depth, u32 block_height, u32 block_depth, u32 stride_alignment) {
+ // The origin of the transformation can be configured here, leave it as zero as the current API
+ // doesn't expose it.
+ static constexpr u32 origin_x = 0;
+ static constexpr u32 origin_y = 0;
+ static constexpr u32 origin_z = 0;
-/**
- * This function manages ALL the GOBs(Group of Bytes) Inside a single block.
- * Instead of going gob by gob, we map the coordinates inside a block and manage from
- * those. Block_Width is assumed to be 1.
- */
-void PreciseProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const bool unswizzle,
- const u32 x_start, const u32 y_start, const u32 z_start, const u32 x_end,
- const u32 y_end, const u32 z_end, const u32 tile_offset,
- const u32 xy_block_size, const u32 layer_z, const u32 stride_x,
- const u32 bytes_per_pixel, const u32 out_bytes_per_pixel) {
- std::array<u8*, 2> data_ptrs;
- u32 z_address = tile_offset;
-
- for (u32 z = z_start; z < z_end; z++) {
- u32 y_address = z_address;
- u32 pixel_base = layer_z * z + y_start * stride_x;
- for (u32 y = y_start; y < y_end; y++) {
- const auto& table = LEGACY_SWIZZLE_TABLE[y % GOB_SIZE_Y];
- for (u32 x = x_start; x < x_end; x++) {
- const u32 swizzle_offset{y_address + table[x * bytes_per_pixel % GOB_SIZE_X]};
- const u32 pixel_index{x * out_bytes_per_pixel + pixel_base};
- data_ptrs[unswizzle] = swizzled_data + swizzle_offset;
- data_ptrs[!unswizzle] = unswizzled_data + pixel_index;
- std::memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel);
- }
- pixel_base += stride_x;
- if ((y + 1) % GOB_SIZE_Y == 0)
- y_address += GOB_SIZE;
- }
- z_address += xy_block_size;
- }
-}
+ // We can configure here a custom pitch
+ // As it's not exposed 'width * bpp' will be the expected pitch.
+ const u32 pitch = width * bytes_per_pixel;
+ const u32 stride = Common::AlignBits(width, stride_alignment) * bytes_per_pixel;
-/**
- * This function manages ALL the GOBs(Group of Bytes) Inside a single block.
- * Instead of going gob by gob, we map the coordinates inside a block and manage from
- * those. Block_Width is assumed to be 1.
- */
-void FastProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const bool unswizzle,
- const u32 x_start, const u32 y_start, const u32 z_start, const u32 x_end,
- const u32 y_end, const u32 z_end, const u32 tile_offset,
- const u32 xy_block_size, const u32 layer_z, const u32 stride_x,
- const u32 bytes_per_pixel, const u32 out_bytes_per_pixel) {
- std::array<u8*, 2> data_ptrs;
- u32 z_address = tile_offset;
- const u32 x_startb = x_start * bytes_per_pixel;
- const u32 x_endb = x_end * bytes_per_pixel;
-
- for (u32 z = z_start; z < z_end; z++) {
- u32 y_address = z_address;
- u32 pixel_base = layer_z * z + y_start * stride_x;
- for (u32 y = y_start; y < y_end; y++) {
- const auto& table = FAST_SWIZZLE_TABLE[y % GOB_SIZE_Y];
- for (u32 xb = x_startb; xb < x_endb; xb += FAST_SWIZZLE_ALIGN) {
- const u32 swizzle_offset{y_address + table[(xb / FAST_SWIZZLE_ALIGN) % 4]};
- const u32 out_x = xb * out_bytes_per_pixel / bytes_per_pixel;
- const u32 pixel_index{out_x + pixel_base};
- data_ptrs[unswizzle ? 1 : 0] = swizzled_data + swizzle_offset;
- data_ptrs[unswizzle ? 0 : 1] = unswizzled_data + pixel_index;
- std::memcpy(data_ptrs[0], data_ptrs[1], FAST_SWIZZLE_ALIGN);
- }
- pixel_base += stride_x;
- if ((y + 1) % GOB_SIZE_Y == 0)
- y_address += GOB_SIZE;
- }
- z_address += xy_block_size;
- }
-}
+ const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT);
+ const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth);
+ const u32 slice_size =
+ Common::DivCeilLog2(height, block_height + GOB_SIZE_Y_SHIFT) * block_size;
-/**
- * This function unswizzles or swizzles a texture by mapping Linear to BlockLinear Textue.
- * The body of this function takes care of splitting the swizzled texture into blocks,
- * and managing the extents of it. Once all the parameters of a single block are obtained,
- * the function calls 'ProcessBlock' to process that particular Block.
- *
- * Documentation for the memory layout and decoding can be found at:
- * https://envytools.readthedocs.io/en/latest/hw/memory/g80-surface.html#blocklinear-surfaces
- */
-template <bool fast>
-void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool unswizzle,
- const u32 width, const u32 height, const u32 depth, const u32 bytes_per_pixel,
- const u32 out_bytes_per_pixel, const u32 block_height, const u32 block_depth,
- const u32 width_spacing) {
- auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); };
- const u32 stride_x = width * out_bytes_per_pixel;
- const u32 layer_z = height * stride_x;
- const u32 gob_elements_x = GOB_SIZE_X / bytes_per_pixel;
- constexpr u32 gob_elements_y = GOB_SIZE_Y;
- constexpr u32 gob_elements_z = GOB_SIZE_Z;
- const u32 block_x_elements = gob_elements_x;
- const u32 block_y_elements = gob_elements_y * block_height;
- const u32 block_z_elements = gob_elements_z * block_depth;
- const u32 aligned_width = Common::AlignUp(width, gob_elements_x * width_spacing);
- const u32 blocks_on_x = div_ceil(aligned_width, block_x_elements);
- const u32 blocks_on_y = div_ceil(height, block_y_elements);
- const u32 blocks_on_z = div_ceil(depth, block_z_elements);
- const u32 xy_block_size = GOB_SIZE * block_height;
- const u32 block_size = xy_block_size * block_depth;
- u32 tile_offset = 0;
- for (u32 zb = 0; zb < blocks_on_z; zb++) {
- const u32 z_start = zb * block_z_elements;
- const u32 z_end = std::min(depth, z_start + block_z_elements);
- for (u32 yb = 0; yb < blocks_on_y; yb++) {
- const u32 y_start = yb * block_y_elements;
- const u32 y_end = std::min(height, y_start + block_y_elements);
- for (u32 xb = 0; xb < blocks_on_x; xb++) {
- const u32 x_start = xb * block_x_elements;
- const u32 x_end = std::min(width, x_start + block_x_elements);
- if constexpr (fast) {
- FastProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start,
- z_start, x_end, y_end, z_end, tile_offset, xy_block_size,
- layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel);
- } else {
- PreciseProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start,
- z_start, x_end, y_end, z_end, tile_offset, xy_block_size,
- layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel);
- }
- tile_offset += block_size;
+ const u32 block_height_mask = (1U << block_height) - 1;
+ const u32 block_depth_mask = (1U << block_depth) - 1;
+ const u32 x_shift = GOB_SIZE_SHIFT + block_height + block_depth;
+
+ for (u32 slice = 0; slice < depth; ++slice) {
+ const u32 z = slice + origin_z;
+ const u32 offset_z = (z >> block_depth) * slice_size +
+ ((z & block_depth_mask) << (GOB_SIZE_SHIFT + block_height));
+ for (u32 line = 0; line < height; ++line) {
+ const u32 y = line + origin_y;
+ const auto& table = SWIZZLE_TABLE[y % GOB_SIZE_Y];
+
+ const u32 block_y = y >> GOB_SIZE_Y_SHIFT;
+ const u32 offset_y = (block_y >> block_height) * block_size +
+ ((block_y & block_height_mask) << GOB_SIZE_SHIFT);
+
+ for (u32 column = 0; column < width; ++column) {
+ const u32 x = (column + origin_x) * bytes_per_pixel;
+ const u32 offset_x = (x >> GOB_SIZE_X_SHIFT) << x_shift;
+
+ const u32 base_swizzled_offset = offset_z + offset_y + offset_x;
+ const u32 swizzled_offset = base_swizzled_offset + table[x % GOB_SIZE_X];
+
+ const u32 unswizzled_offset =
+ slice * pitch * height + line * pitch + column * bytes_per_pixel;
+
+ u8* const dst = &output[TO_LINEAR ? swizzled_offset : unswizzled_offset];
+ const u8* const src = &input[TO_LINEAR ? unswizzled_offset : swizzled_offset];
+ std::memcpy(dst, src, bytes_per_pixel);
}
}
}
}
-
} // Anonymous namespace
-void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel,
- u32 out_bytes_per_pixel, u8* const swizzled_data, u8* const unswizzled_data,
- bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing) {
- const u32 block_height_size{1U << block_height};
- const u32 block_depth_size{1U << block_depth};
- if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % FAST_SWIZZLE_ALIGN == 0) {
- SwizzledData<true>(swizzled_data, unswizzled_data, unswizzle, width, height, depth,
- bytes_per_pixel, out_bytes_per_pixel, block_height_size,
- block_depth_size, width_spacing);
- } else {
- SwizzledData<false>(swizzled_data, unswizzled_data, unswizzle, width, height, depth,
- bytes_per_pixel, out_bytes_per_pixel, block_height_size,
- block_depth_size, width_spacing);
- }
+SwizzleTable MakeSwizzleTable() {
+ return SWIZZLE_TABLE;
}
-void UnswizzleTexture(u8* const unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y,
- u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height,
- u32 block_depth, u32 width_spacing) {
- CopySwizzledData((width + tile_size_x - 1) / tile_size_x,
- (height + tile_size_y - 1) / tile_size_y, depth, bytes_per_pixel,
- bytes_per_pixel, address, unswizzled_data, true, block_height, block_depth,
- width_spacing);
+void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel,
+ u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth,
+ u32 stride_alignment) {
+ Swizzle<false>(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth,
+ stride_alignment);
}
-std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, u32 bytes_per_pixel,
- u32 width, u32 height, u32 depth, u32 block_height,
- u32 block_depth, u32 width_spacing) {
- std::vector<u8> unswizzled_data(width * height * depth * bytes_per_pixel);
- UnswizzleTexture(unswizzled_data.data(), address, tile_size_x, tile_size_y, bytes_per_pixel,
- width, height, depth, block_height, block_depth, width_spacing);
- return unswizzled_data;
+void SwizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width,
+ u32 height, u32 depth, u32 block_height, u32 block_depth,
+ u32 stride_alignment) {
+ Swizzle<true>(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth,
+ stride_alignment);
}
void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
@@ -213,7 +120,7 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32
const u32 gob_address_y =
(dst_y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs +
((dst_y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE;
- const auto& table = LEGACY_SWIZZLE_TABLE[dst_y % GOB_SIZE_Y];
+ const auto& table = SWIZZLE_TABLE[dst_y % GOB_SIZE_Y];
for (u32 x = 0; x < subrect_width; ++x) {
const u32 dst_x = x + offset_x;
const u32 gob_address =
@@ -235,11 +142,11 @@ void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width,
const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height);
const u32 block_height_mask = (1U << block_height) - 1;
- const u32 x_shift = static_cast<u32>(GOB_SIZE_SHIFT) + block_height;
+ const u32 x_shift = GOB_SIZE_SHIFT + block_height;
for (u32 line = 0; line < line_count; ++line) {
const u32 src_y = line + origin_y;
- const auto& table = LEGACY_SWIZZLE_TABLE[src_y % GOB_SIZE_Y];
+ const auto& table = SWIZZLE_TABLE[src_y % GOB_SIZE_Y];
const u32 block_y = src_y >> GOB_SIZE_Y_SHIFT;
const u32 src_offset_y = (block_y >> block_height) * block_size +
@@ -270,7 +177,7 @@ void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 widt
const u32 x_shift = static_cast<u32>(GOB_SIZE_SHIFT) + block_height + block_depth;
for (u32 line = 0; line < line_count; ++line) {
- const auto& table = LEGACY_SWIZZLE_TABLE[line % GOB_SIZE_Y];
+ const auto& table = SWIZZLE_TABLE[line % GOB_SIZE_Y];
const u32 block_y = line / GOB_SIZE_Y;
const u32 dst_offset_y =
(block_y >> block_height) * block_size + (block_y & block_height_mask) * GOB_SIZE;
@@ -293,7 +200,7 @@ void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32
const std::size_t gob_address_y =
(y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs +
((y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE;
- const auto& table = LEGACY_SWIZZLE_TABLE[y % GOB_SIZE_Y];
+ const auto& table = SWIZZLE_TABLE[y % GOB_SIZE_Y];
for (std::size_t x = dst_x; x < width && count < copy_size; ++x) {
const std::size_t gob_address =
gob_address_y + (x / GOB_SIZE_X) * GOB_SIZE * block_height;
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h
index 01e156bc8..d7cdc81e8 100644
--- a/src/video_core/textures/decoders.h
+++ b/src/video_core/textures/decoders.h
@@ -4,7 +4,8 @@
#pragma once
-#include <vector>
+#include <span>
+
#include "common/common_types.h"
#include "video_core/textures/texture.h"
@@ -15,28 +16,25 @@ constexpr u32 GOB_SIZE_Y = 8;
constexpr u32 GOB_SIZE_Z = 1;
constexpr u32 GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z;
-constexpr std::size_t GOB_SIZE_X_SHIFT = 6;
-constexpr std::size_t GOB_SIZE_Y_SHIFT = 3;
-constexpr std::size_t GOB_SIZE_Z_SHIFT = 0;
-constexpr std::size_t GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT;
-
-/// Unswizzles a swizzled texture without changing its format.
-void UnswizzleTexture(u8* unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y,
- u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
- u32 block_height = TICEntry::DefaultBlockHeight,
- u32 block_depth = TICEntry::DefaultBlockHeight, u32 width_spacing = 0);
-
-/// Unswizzles a swizzled texture without changing its format.
-std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, u32 bytes_per_pixel,
- u32 width, u32 height, u32 depth,
- u32 block_height = TICEntry::DefaultBlockHeight,
- u32 block_depth = TICEntry::DefaultBlockHeight,
- u32 width_spacing = 0);
-
-/// Copies texture data from a buffer and performs swizzling/unswizzling as necessary.
-void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel,
- u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data,
- bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing);
+constexpr u32 GOB_SIZE_X_SHIFT = 6;
+constexpr u32 GOB_SIZE_Y_SHIFT = 3;
+constexpr u32 GOB_SIZE_Z_SHIFT = 0;
+constexpr u32 GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT;
+
+using SwizzleTable = std::array<std::array<u32, GOB_SIZE_X>, GOB_SIZE_Y>;
+
+/// Returns a z-order swizzle table
+SwizzleTable MakeSwizzleTable();
+
+/// Unswizzles a block linear texture into linear memory.
+void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel,
+ u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth,
+ u32 stride_alignment = 1);
+
+/// Swizzles linear memory into a block linear texture.
+void SwizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width,
+ u32 height, u32 depth, u32 block_height, u32 block_depth,
+ u32 stride_alignment = 1);
/// This function calculates the correct size of a texture depending if it's tiled or not.
std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
diff --git a/src/video_core/textures/texture.cpp b/src/video_core/textures/texture.cpp
index 4171e3ef2..ae5621a7d 100644
--- a/src/video_core/textures/texture.cpp
+++ b/src/video_core/textures/texture.cpp
@@ -5,9 +5,13 @@
#include <algorithm>
#include <array>
+#include "common/cityhash.h"
#include "core/settings.h"
#include "video_core/textures/texture.h"
+using Tegra::Texture::TICEntry;
+using Tegra::Texture::TSCEntry;
+
namespace Tegra::Texture {
namespace {
@@ -65,7 +69,7 @@ unsigned SettingsMinimumAnisotropy() noexcept {
} // Anonymous namespace
-std::array<float, 4> TSCEntry::GetBorderColor() const noexcept {
+std::array<float, 4> TSCEntry::BorderColor() const noexcept {
if (!srgb_conversion) {
return border_color;
}
@@ -73,8 +77,16 @@ std::array<float, 4> TSCEntry::GetBorderColor() const noexcept {
SRGB_CONVERSION_LUT[srgb_border_color_b], border_color[3]};
}
-float TSCEntry::GetMaxAnisotropy() const noexcept {
+float TSCEntry::MaxAnisotropy() const noexcept {
return static_cast<float>(std::max(1U << max_anisotropy, SettingsMinimumAnisotropy()));
}
} // namespace Tegra::Texture
+
+size_t std::hash<TICEntry>::operator()(const TICEntry& tic) const noexcept {
+ return Common::CityHash64(reinterpret_cast<const char*>(&tic), sizeof tic);
+}
+
+size_t std::hash<TSCEntry>::operator()(const TSCEntry& tsc) const noexcept {
+ return Common::CityHash64(reinterpret_cast<const char*>(&tsc), sizeof tsc);
+}
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index bbc7e3eaf..c1d14335e 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -53,27 +53,27 @@ enum class TextureFormat : u32 {
BC4 = 0x27,
BC5 = 0x28,
S8D24 = 0x29,
- X8Z24 = 0x2a,
+ X8D24 = 0x2a,
D24S8 = 0x2b,
- X4V4Z24__COV4R4V = 0x2c,
- X4V4Z24__COV8R8V = 0x2d,
- V8Z24__COV4R12V = 0x2e,
+ X4V4D24__COV4R4V = 0x2c,
+ X4V4D24__COV8R8V = 0x2d,
+ V8D24__COV4R12V = 0x2e,
D32 = 0x2f,
D32S8 = 0x30,
- X8Z24_X20V4S8__COV4R4V = 0x31,
- X8Z24_X20V4S8__COV8R8V = 0x32,
- ZF32_X20V4X8__COV4R4V = 0x33,
- ZF32_X20V4X8__COV8R8V = 0x34,
- ZF32_X20V4S8__COV4R4V = 0x35,
- ZF32_X20V4S8__COV8R8V = 0x36,
- X8Z24_X16V8S8__COV4R12V = 0x37,
- ZF32_X16V8X8__COV4R12V = 0x38,
- ZF32_X16V8S8__COV4R12V = 0x39,
+ X8D24_X20V4S8__COV4R4V = 0x31,
+ X8D24_X20V4S8__COV8R8V = 0x32,
+ D32_X20V4X8__COV4R4V = 0x33,
+ D32_X20V4X8__COV8R8V = 0x34,
+ D32_X20V4S8__COV4R4V = 0x35,
+ D32_X20V4S8__COV8R8V = 0x36,
+ X8D24_X16V8S8__COV4R12V = 0x37,
+ D32_X16V8X8__COV4R12V = 0x38,
+ D32_X16V8S8__COV4R12V = 0x39,
D16 = 0x3a,
- V8Z24__COV8R24V = 0x3b,
- X8Z24_X16V8S8__COV8R24V = 0x3c,
- ZF32_X16V8X8__COV8R24V = 0x3d,
- ZF32_X16V8S8__COV8R24V = 0x3e,
+ V8D24__COV8R24V = 0x3b,
+ X8D24_X16V8S8__COV8R24V = 0x3c,
+ D32_X16V8X8__COV8R24V = 0x3d,
+ D32_X16V8S8__COV8R24V = 0x3e,
ASTC_2D_4X4 = 0x40,
ASTC_2D_5X5 = 0x41,
ASTC_2D_6X6 = 0x42,
@@ -146,7 +146,7 @@ enum class MsaaMode : u32 {
};
union TextureHandle {
- /* implicit */ TextureHandle(u32 raw_) : raw{raw_} {}
+ /* implicit */ constexpr TextureHandle(u32 raw_) : raw{raw_} {}
u32 raw;
BitField<0, 20, u32> tic_id;
@@ -155,124 +155,124 @@ union TextureHandle {
static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size");
struct TICEntry {
- static constexpr u32 DefaultBlockHeight = 16;
- static constexpr u32 DefaultBlockDepth = 1;
-
- union {
- u32 raw;
- BitField<0, 7, TextureFormat> format;
- BitField<7, 3, ComponentType> r_type;
- BitField<10, 3, ComponentType> g_type;
- BitField<13, 3, ComponentType> b_type;
- BitField<16, 3, ComponentType> a_type;
-
- BitField<19, 3, SwizzleSource> x_source;
- BitField<22, 3, SwizzleSource> y_source;
- BitField<25, 3, SwizzleSource> z_source;
- BitField<28, 3, SwizzleSource> w_source;
- };
- u32 address_low;
union {
- BitField<0, 16, u32> address_high;
- BitField<21, 3, TICHeaderVersion> header_version;
- };
- union {
- BitField<0, 3, u32> block_width;
- BitField<3, 3, u32> block_height;
- BitField<6, 3, u32> block_depth;
+ struct {
+ union {
+ BitField<0, 7, TextureFormat> format;
+ BitField<7, 3, ComponentType> r_type;
+ BitField<10, 3, ComponentType> g_type;
+ BitField<13, 3, ComponentType> b_type;
+ BitField<16, 3, ComponentType> a_type;
+
+ BitField<19, 3, SwizzleSource> x_source;
+ BitField<22, 3, SwizzleSource> y_source;
+ BitField<25, 3, SwizzleSource> z_source;
+ BitField<28, 3, SwizzleSource> w_source;
+ };
+ u32 address_low;
+ union {
+ BitField<0, 16, u32> address_high;
+ BitField<16, 5, u32> layer_base_3_7;
+ BitField<21, 3, TICHeaderVersion> header_version;
+ BitField<24, 1, u32> load_store_hint;
+ BitField<25, 4, u32> view_coherency_hash;
+ BitField<29, 3, u32> layer_base_8_10;
+ };
+ union {
+ BitField<0, 3, u32> block_width;
+ BitField<3, 3, u32> block_height;
+ BitField<6, 3, u32> block_depth;
- BitField<10, 3, u32> tile_width_spacing;
+ BitField<10, 3, u32> tile_width_spacing;
- // High 16 bits of the pitch value
- BitField<0, 16, u32> pitch_high;
- BitField<26, 1, u32> use_header_opt_control;
- BitField<27, 1, u32> depth_texture;
- BitField<28, 4, u32> max_mip_level;
+ // High 16 bits of the pitch value
+ BitField<0, 16, u32> pitch_high;
+ BitField<26, 1, u32> use_header_opt_control;
+ BitField<27, 1, u32> depth_texture;
+ BitField<28, 4, u32> max_mip_level;
- BitField<0, 16, u32> buffer_high_width_minus_one;
- };
- union {
- BitField<0, 16, u32> width_minus_1;
- BitField<22, 1, u32> srgb_conversion;
- BitField<23, 4, TextureType> texture_type;
- BitField<29, 3, u32> border_size;
+ BitField<0, 16, u32> buffer_high_width_minus_one;
+ };
+ union {
+ BitField<0, 16, u32> width_minus_one;
+ BitField<16, 3, u32> layer_base_0_2;
+ BitField<22, 1, u32> srgb_conversion;
+ BitField<23, 4, TextureType> texture_type;
+ BitField<29, 3, u32> border_size;
- BitField<0, 16, u32> buffer_low_width_minus_one;
- };
- union {
- BitField<0, 16, u32> height_minus_1;
- BitField<16, 14, u32> depth_minus_1;
- };
- union {
- BitField<6, 13, u32> mip_lod_bias;
- BitField<27, 3, u32> max_anisotropy;
+ BitField<0, 16, u32> buffer_low_width_minus_one;
+ };
+ union {
+ BitField<0, 16, u32> height_minus_1;
+ BitField<16, 14, u32> depth_minus_1;
+ BitField<30, 1, u32> is_sparse;
+ BitField<31, 1, u32> normalized_coords;
+ };
+ union {
+ BitField<6, 13, u32> mip_lod_bias;
+ BitField<27, 3, u32> max_anisotropy;
+ };
+ union {
+ BitField<0, 4, u32> res_min_mip_level;
+ BitField<4, 4, u32> res_max_mip_level;
+ BitField<8, 4, MsaaMode> msaa_mode;
+ BitField<12, 12, u32> min_lod_clamp;
+ };
+ };
+ std::array<u64, 4> raw;
};
- union {
- BitField<0, 4, u32> res_min_mip_level;
- BitField<4, 4, u32> res_max_mip_level;
- BitField<8, 4, MsaaMode> msaa_mode;
- BitField<12, 12, u32> min_lod_clamp;
- };
+ constexpr bool operator==(const TICEntry& rhs) const noexcept {
+ return raw == rhs.raw;
+ }
- GPUVAddr Address() const {
+ constexpr bool operator!=(const TICEntry& rhs) const noexcept {
+ return raw != rhs.raw;
+ }
+
+ constexpr GPUVAddr Address() const {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low);
}
- u32 Pitch() const {
+ constexpr u32 Pitch() const {
ASSERT(header_version == TICHeaderVersion::Pitch ||
header_version == TICHeaderVersion::PitchColorKey);
// The pitch value is 21 bits, and is 32B aligned.
return pitch_high << 5;
}
- u32 Width() const {
+ constexpr u32 Width() const {
if (header_version != TICHeaderVersion::OneDBuffer) {
- return width_minus_1 + 1;
+ return width_minus_one + 1;
}
- return ((buffer_high_width_minus_one << 16) | buffer_low_width_minus_one) + 1;
+ return (buffer_high_width_minus_one << 16 | buffer_low_width_minus_one) + 1;
}
- u32 Height() const {
+ constexpr u32 Height() const {
return height_minus_1 + 1;
}
- u32 Depth() const {
+ constexpr u32 Depth() const {
return depth_minus_1 + 1;
}
- u32 BlockWidth() const {
- ASSERT(IsTiled());
- return block_width;
- }
-
- u32 BlockHeight() const {
- ASSERT(IsTiled());
- return block_height;
- }
-
- u32 BlockDepth() const {
- ASSERT(IsTiled());
- return block_depth;
+ constexpr u32 BaseLayer() const {
+ return layer_base_0_2 | layer_base_3_7 << 3 | layer_base_8_10 << 8;
}
- bool IsTiled() const {
+ constexpr bool IsBlockLinear() const {
return header_version == TICHeaderVersion::BlockLinear ||
header_version == TICHeaderVersion::BlockLinearColorKey;
}
- bool IsLineal() const {
+ constexpr bool IsPitchLinear() const {
return header_version == TICHeaderVersion::Pitch ||
header_version == TICHeaderVersion::PitchColorKey;
}
- bool IsBuffer() const {
+ constexpr bool IsBuffer() const {
return header_version == TICHeaderVersion::OneDBuffer;
}
-
- bool IsSrgbConversionEnabled() const {
- return srgb_conversion != 0;
- }
};
static_assert(sizeof(TICEntry) == 0x20, "TICEntry has wrong size");
@@ -309,6 +309,12 @@ enum class TextureMipmapFilter : u32 {
Linear = 3,
};
+enum class SamplerReduction : u32 {
+ WeightedAverage = 0,
+ Min = 1,
+ Max = 2,
+};
+
enum class Anisotropy {
Default,
Filter2x,
@@ -333,8 +339,12 @@ struct TSCEntry {
BitField<0, 2, TextureFilter> mag_filter;
BitField<4, 2, TextureFilter> min_filter;
BitField<6, 2, TextureMipmapFilter> mipmap_filter;
+ BitField<8, 1, u32> cubemap_anisotropy;
BitField<9, 1, u32> cubemap_interface_filtering;
+ BitField<10, 2, SamplerReduction> reduction_filter;
BitField<12, 13, u32> mip_lod_bias;
+ BitField<25, 1, u32> float_coord_normalization;
+ BitField<26, 5, u32> trilin_opt;
};
union {
BitField<0, 12, u32> min_lod_clamp;
@@ -347,32 +357,45 @@ struct TSCEntry {
};
std::array<f32, 4> border_color;
};
- std::array<u8, 0x20> raw;
+ std::array<u64, 4> raw;
};
- std::array<float, 4> GetBorderColor() const noexcept;
+ constexpr bool operator==(const TSCEntry& rhs) const noexcept {
+ return raw == rhs.raw;
+ }
+
+ constexpr bool operator!=(const TSCEntry& rhs) const noexcept {
+ return raw != rhs.raw;
+ }
+
+ std::array<float, 4> BorderColor() const noexcept;
- float GetMaxAnisotropy() const noexcept;
+ float MaxAnisotropy() const noexcept;
- float GetMinLod() const {
+ float MinLod() const {
return static_cast<float>(min_lod_clamp) / 256.0f;
}
- float GetMaxLod() const {
+ float MaxLod() const {
return static_cast<float>(max_lod_clamp) / 256.0f;
}
- float GetLodBias() const {
+ float LodBias() const {
// Sign extend the 13-bit value.
- constexpr u32 mask = 1U << (13 - 1);
+ static constexpr u32 mask = 1U << (13 - 1);
return static_cast<float>(static_cast<s32>((mip_lod_bias ^ mask) - mask)) / 256.0f;
}
};
static_assert(sizeof(TSCEntry) == 0x20, "TSCEntry has wrong size");
-struct FullTextureInfo {
- TICEntry tic;
- TSCEntry tsc;
+} // namespace Tegra::Texture
+
+template <>
+struct std::hash<Tegra::Texture::TICEntry> {
+ size_t operator()(const Tegra::Texture::TICEntry& tic) const noexcept;
};
-} // namespace Tegra::Texture
+template <>
+struct std::hash<Tegra::Texture::TSCEntry> {
+ size_t operator()(const Tegra::Texture::TSCEntry& tsc) const noexcept;
+};