summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/video_core/engines/maxwell_3d.cpp6
-rw-r--r--src/video_core/rasterizer_cache.h35
-rw-r--r--src/video_core/renderer_base.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp40
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp5
-rw-r--r--src/video_core/renderer_opengl/gl_state.h2
-rw-r--r--src/video_core/surface.cpp4
-rw-r--r--src/video_core/surface.h148
-rw-r--r--src/video_core/textures/decoders.cpp36
9 files changed, 151 insertions, 126 deletions
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index d1777b25b..6de07ea56 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -121,10 +121,8 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandLoaded, nullptr);
}
- u32 old = regs.reg_array[method];
- regs.reg_array[method] = value;
-
- if (value != old) {
+ if (regs.reg_array[method] != value) {
+ regs.reg_array[method] = value;
if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) &&
method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {
dirty_flags.vertex_attrib_format = true;
diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h
index 6d41321fa..bcf0c15a4 100644
--- a/src/video_core/rasterizer_cache.h
+++ b/src/video_core/rasterizer_cache.h
@@ -5,6 +5,7 @@
#pragma once
#include <set>
+#include <unordered_map>
#include <boost/icl/interval_map.hpp>
#include <boost/range/iterator_range_core.hpp>
@@ -88,29 +89,25 @@ public:
/// Invalidates everything in the cache
void InvalidateAll() {
- while (object_cache.begin() != object_cache.end()) {
- Unregister(*object_cache.begin()->second.begin());
+ while (interval_cache.begin() != interval_cache.end()) {
+ Unregister(*interval_cache.begin()->second.begin());
}
}
protected:
/// Tries to get an object from the cache with the specified address
T TryGet(VAddr addr) const {
- const ObjectInterval interval{addr};
- for (auto& pair : boost::make_iterator_range(object_cache.equal_range(interval))) {
- for (auto& cached_object : pair.second) {
- if (cached_object->GetAddr() == addr) {
- return cached_object;
- }
- }
- }
+ const auto iter = map_cache.find(addr);
+ if (iter != map_cache.end())
+ return iter->second;
return nullptr;
}
/// Register an object into the cache
void Register(const T& object) {
object->SetIsRegistered(true);
- object_cache.add({GetInterval(object), ObjectSet{object}});
+ interval_cache.add({GetInterval(object), ObjectSet{object}});
+ map_cache.insert({object->GetAddr(), object});
rasterizer.UpdatePagesCachedCount(object->GetAddr(), object->GetSizeInBytes(), 1);
}
@@ -118,13 +115,13 @@ protected:
void Unregister(const T& object) {
object->SetIsRegistered(false);
rasterizer.UpdatePagesCachedCount(object->GetAddr(), object->GetSizeInBytes(), -1);
-
// Only flush if use_accurate_gpu_emulation is enabled, as it incurs a performance hit
if (Settings::values.use_accurate_gpu_emulation) {
FlushObject(object);
}
- object_cache.subtract({GetInterval(object), ObjectSet{object}});
+ interval_cache.subtract({GetInterval(object), ObjectSet{object}});
+ map_cache.erase(object->GetAddr());
}
/// Returns a ticks counter used for tracking when cached objects were last modified
@@ -141,7 +138,7 @@ private:
std::vector<T> objects;
const ObjectInterval interval{addr, addr + size};
- for (auto& pair : boost::make_iterator_range(object_cache.equal_range(interval))) {
+ for (auto& pair : boost::make_iterator_range(interval_cache.equal_range(interval))) {
for (auto& cached_object : pair.second) {
if (!cached_object) {
continue;
@@ -167,15 +164,17 @@ private:
}
using ObjectSet = std::set<T>;
- using ObjectCache = boost::icl::interval_map<VAddr, ObjectSet>;
- using ObjectInterval = typename ObjectCache::interval_type;
+ using ObjectCache = std::unordered_map<VAddr, T>;
+ using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>;
+ using ObjectInterval = typename IntervalCache::interval_type;
static auto GetInterval(const T& object) {
return ObjectInterval::right_open(object->GetAddr(),
object->GetAddr() + object->GetSizeInBytes());
}
- ObjectCache object_cache; ///< Cache of objects
- u64 modified_ticks{}; ///< Counter of cache state ticks, used for in-order flushing
+ ObjectCache map_cache;
+ IntervalCache interval_cache; ///< Cache of objects
+ u64 modified_ticks{}; ///< Counter of cache state ticks, used for in-order flushing
VideoCore::RasterizerInterface& rasterizer;
};
diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp
index 0df3725c2..1482cdb40 100644
--- a/src/video_core/renderer_base.cpp
+++ b/src/video_core/renderer_base.cpp
@@ -5,7 +5,6 @@
#include "core/frontend/emu_window.h"
#include "core/settings.h"
#include "video_core/renderer_base.h"
-#include "video_core/renderer_opengl/gl_rasterizer.h"
namespace VideoCore {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 894f4f294..b44ecfa1c 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -314,6 +314,8 @@ static constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X4_SRGB
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X5
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X5_SRGB
+ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X8
+ {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X8_SRGB
// Depth formats
{GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F
@@ -456,6 +458,8 @@ static constexpr GLConversionArray morton_to_gl_fns = {
MortonCopy<true, PixelFormat::ASTC_2D_5X4_SRGB>,
MortonCopy<true, PixelFormat::ASTC_2D_5X5>,
MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>,
+ MortonCopy<true, PixelFormat::ASTC_2D_10X8>,
+ MortonCopy<true, PixelFormat::ASTC_2D_10X8_SRGB>,
MortonCopy<true, PixelFormat::Z32F>,
MortonCopy<true, PixelFormat::Z16>,
MortonCopy<true, PixelFormat::Z24S8>,
@@ -526,6 +530,8 @@ static constexpr GLConversionArray gl_to_morton_fns = {
nullptr,
nullptr,
nullptr,
+ nullptr,
+ nullptr,
MortonCopy<false, PixelFormat::Z32F>,
MortonCopy<false, PixelFormat::Z16>,
MortonCopy<false, PixelFormat::Z24S8>,
@@ -544,8 +550,8 @@ void SwizzleFunc(const GLConversionArray& functions, const SurfaceParams& params
if (params.is_layered) {
u64 offset = params.GetMipmapLevelOffset(mip_level);
u64 offset_gl = 0;
- u64 layer_size = params.LayerMemorySize();
- u64 gl_size = params.LayerSizeGL(mip_level);
+ const u64 layer_size = params.LayerMemorySize();
+ const u64 gl_size = params.LayerSizeGL(mip_level);
for (u32 i = 0; i < params.depth; i++) {
functions[static_cast<std::size_t>(params.pixel_format)](
params.MipWidth(mip_level), params.MipBlockHeight(mip_level),
@@ -555,7 +561,7 @@ void SwizzleFunc(const GLConversionArray& functions, const SurfaceParams& params
offset_gl += gl_size;
}
} else {
- u64 offset = params.GetMipmapLevelOffset(mip_level);
+ const u64 offset = params.GetMipmapLevelOffset(mip_level);
functions[static_cast<std::size_t>(params.pixel_format)](
params.MipWidth(mip_level), params.MipBlockHeight(mip_level),
params.MipHeight(mip_level), params.MipBlockDepth(mip_level), depth, gl_buffer.data(),
@@ -709,18 +715,18 @@ static void FastCopySurface(const Surface& src_surface, const Surface& dst_surfa
MICROPROFILE_DEFINE(OpenGL_CopySurface, "OpenGL", "CopySurface", MP_RGB(128, 192, 64));
static void CopySurface(const Surface& src_surface, const Surface& dst_surface,
- GLuint copy_pbo_handle, GLenum src_attachment = 0,
- GLenum dst_attachment = 0, std::size_t cubemap_face = 0) {
+ const GLuint copy_pbo_handle, const GLenum src_attachment = 0,
+ const GLenum dst_attachment = 0, const std::size_t cubemap_face = 0) {
MICROPROFILE_SCOPE(OpenGL_CopySurface);
ASSERT_MSG(dst_attachment == 0, "Unimplemented");
const auto& src_params{src_surface->GetSurfaceParams()};
const auto& dst_params{dst_surface->GetSurfaceParams()};
- auto source_format = GetFormatTuple(src_params.pixel_format, src_params.component_type);
- auto dest_format = GetFormatTuple(dst_params.pixel_format, dst_params.component_type);
+ const auto source_format = GetFormatTuple(src_params.pixel_format, src_params.component_type);
+ const auto dest_format = GetFormatTuple(dst_params.pixel_format, dst_params.component_type);
- std::size_t buffer_size = std::max(src_params.size_in_bytes, dst_params.size_in_bytes);
+ const std::size_t buffer_size = std::max(src_params.size_in_bytes, dst_params.size_in_bytes);
glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo_handle);
glBufferData(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, GL_STREAM_DRAW_ARB);
@@ -744,13 +750,10 @@ static void CopySurface(const Surface& src_surface, const Surface& dst_surface,
LOG_DEBUG(HW_GPU, "Trying to upload extra texture data from the CPU during "
"reinterpretation but the texture is tiled.");
}
- std::size_t remaining_size = dst_params.size_in_bytes - src_params.size_in_bytes;
- std::vector<u8> data(remaining_size);
- std::memcpy(data.data(), Memory::GetPointer(dst_params.addr + src_params.size_in_bytes),
- data.size());
+ const std::size_t remaining_size = dst_params.size_in_bytes - src_params.size_in_bytes;
glBufferSubData(GL_PIXEL_PACK_BUFFER, src_params.size_in_bytes, remaining_size,
- data.data());
+ Memory::GetPointer(dst_params.addr + src_params.size_in_bytes));
}
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
@@ -932,7 +935,9 @@ static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelForma
case PixelFormat::ASTC_2D_8X8_SRGB:
case PixelFormat::ASTC_2D_8X5_SRGB:
case PixelFormat::ASTC_2D_5X4_SRGB:
- case PixelFormat::ASTC_2D_5X5_SRGB: {
+ case PixelFormat::ASTC_2D_5X5_SRGB:
+ case PixelFormat::ASTC_2D_10X8:
+ case PixelFormat::ASTC_2D_10X8_SRGB: {
// Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC.
u32 block_width{};
u32 block_height{};
@@ -967,7 +972,11 @@ static void ConvertFormatAsNeeded_FlushGLBuffer(std::vector<u8>& data, PixelForm
case PixelFormat::ASTC_2D_4X4:
case PixelFormat::ASTC_2D_8X8:
case PixelFormat::ASTC_2D_4X4_SRGB:
- case PixelFormat::ASTC_2D_8X8_SRGB: {
+ case PixelFormat::ASTC_2D_8X8_SRGB:
+ case PixelFormat::ASTC_2D_5X5:
+ case PixelFormat::ASTC_2D_5X5_SRGB:
+ case PixelFormat::ASTC_2D_10X8:
+ case PixelFormat::ASTC_2D_10X8_SRGB: {
LOG_CRITICAL(HW_GPU, "Conversion of format {} after texture flushing is not implemented",
static_cast<u32>(pixel_format));
UNREACHABLE();
@@ -1336,6 +1345,7 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
break;
case SurfaceTarget::TextureCubemap:
case SurfaceTarget::Texture3D:
+ case SurfaceTarget::Texture2DArray:
case SurfaceTarget::TextureCubeArray:
AccurateCopySurface(old_surface, new_surface);
break;
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index 2635f2b0c..98622a058 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -266,7 +266,8 @@ void OpenGLState::ApplyViewport() const {
const auto& updated = viewports[0];
if (updated.x != current.x || updated.y != current.y || updated.width != current.width ||
updated.height != current.height) {
- glViewport(updated.x, updated.y, updated.width, updated.height);
+ glViewport(static_cast<GLint>(updated.x), static_cast<GLint>(updated.y),
+ static_cast<GLsizei>(updated.width), static_cast<GLsizei>(updated.height));
}
if (updated.depth_range_near != current.depth_range_near ||
updated.depth_range_far != current.depth_range_far) {
@@ -313,7 +314,7 @@ void OpenGLState::ApplyGlobalBlending() const {
}
}
-void OpenGLState::ApplyTargetBlending(int target, bool force) const {
+void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) const {
const Blend& updated = blend[target];
const Blend& current = cur_state.blend[target];
const bool blend_changed = updated.enabled != current.enabled || force;
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index eacca0b9c..e5d1baae6 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -208,7 +208,7 @@ private:
void ApplyPrimitiveRestart() const;
void ApplyStencilTest() const;
void ApplyViewport() const;
- void ApplyTargetBlending(int target, bool force) const;
+ void ApplyTargetBlending(std::size_t target, bool force) const;
void ApplyGlobalBlending() const;
void ApplyBlending() const;
void ApplyLogicOp() const;
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index 051ad3964..9582dd2ca 100644
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -306,6 +306,8 @@ PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format,
return is_srgb ? PixelFormat::ASTC_2D_8X8_SRGB : PixelFormat::ASTC_2D_8X8;
case Tegra::Texture::TextureFormat::ASTC_2D_8X5:
return is_srgb ? PixelFormat::ASTC_2D_8X5_SRGB : PixelFormat::ASTC_2D_8X5;
+ case Tegra::Texture::TextureFormat::ASTC_2D_10X8:
+ return is_srgb ? PixelFormat::ASTC_2D_10X8_SRGB : PixelFormat::ASTC_2D_10X8;
case Tegra::Texture::TextureFormat::R16_G16:
switch (component_type) {
case Tegra::Texture::ComponentType::FLOAT:
@@ -453,6 +455,8 @@ bool IsPixelFormatASTC(PixelFormat format) {
case PixelFormat::ASTC_2D_5X5_SRGB:
case PixelFormat::ASTC_2D_8X8_SRGB:
case PixelFormat::ASTC_2D_8X5_SRGB:
+ case PixelFormat::ASTC_2D_10X8:
+ case PixelFormat::ASTC_2D_10X8_SRGB:
return true;
default:
return false;
diff --git a/src/video_core/surface.h b/src/video_core/surface.h
index dfdb8d122..0dd3eb2e4 100644
--- a/src/video_core/surface.h
+++ b/src/video_core/surface.h
@@ -74,19 +74,21 @@ enum class PixelFormat {
ASTC_2D_5X4_SRGB = 56,
ASTC_2D_5X5 = 57,
ASTC_2D_5X5_SRGB = 58,
+ ASTC_2D_10X8 = 59,
+ ASTC_2D_10X8_SRGB = 60,
MaxColorFormat,
// Depth formats
- Z32F = 59,
- Z16 = 60,
+ Z32F = 61,
+ Z16 = 62,
MaxDepthFormat,
// DepthStencil formats
- Z24S8 = 61,
- S8Z24 = 62,
- Z32FS8 = 63,
+ Z24S8 = 63,
+ S8Z24 = 64,
+ Z32FS8 = 65,
MaxDepthStencilFormat,
@@ -193,6 +195,8 @@ static constexpr u32 GetCompressionFactor(PixelFormat format) {
4, // ASTC_2D_5X4_SRGB
4, // ASTC_2D_5X5
4, // ASTC_2D_5X5_SRGB
+ 4, // ASTC_2D_10X8
+ 4, // ASTC_2D_10X8_SRGB
1, // Z32F
1, // Z16
1, // Z24S8
@@ -208,70 +212,72 @@ static constexpr u32 GetDefaultBlockWidth(PixelFormat format) {
if (format == PixelFormat::Invalid)
return 0;
constexpr std::array<u32, MaxPixelFormat> block_width_table = {{
- 1, // ABGR8U
- 1, // ABGR8S
- 1, // ABGR8UI
- 1, // B5G6R5U
- 1, // A2B10G10R10U
- 1, // A1B5G5R5U
- 1, // R8U
- 1, // R8UI
- 1, // RGBA16F
- 1, // RGBA16U
- 1, // RGBA16UI
- 1, // R11FG11FB10F
- 1, // RGBA32UI
- 4, // DXT1
- 4, // DXT23
- 4, // DXT45
- 4, // DXN1
- 4, // DXN2UNORM
- 4, // DXN2SNORM
- 4, // BC7U
- 4, // BC6H_UF16
- 4, // BC6H_SF16
- 4, // ASTC_2D_4X4
- 1, // G8R8U
- 1, // G8R8S
- 1, // BGRA8
- 1, // RGBA32F
- 1, // RG32F
- 1, // R32F
- 1, // R16F
- 1, // R16U
- 1, // R16S
- 1, // R16UI
- 1, // R16I
- 1, // RG16
- 1, // RG16F
- 1, // RG16UI
- 1, // RG16I
- 1, // RG16S
- 1, // RGB32F
- 1, // RGBA8_SRGB
- 1, // RG8U
- 1, // RG8S
- 1, // RG32UI
- 1, // R32UI
- 8, // ASTC_2D_8X8
- 8, // ASTC_2D_8X5
- 5, // ASTC_2D_5X4
- 1, // BGRA8_SRGB
- 4, // DXT1_SRGB
- 4, // DXT23_SRGB
- 4, // DXT45_SRGB
- 4, // BC7U_SRGB
- 4, // ASTC_2D_4X4_SRGB
- 8, // ASTC_2D_8X8_SRGB
- 8, // ASTC_2D_8X5_SRGB
- 5, // ASTC_2D_5X4_SRGB
- 5, // ASTC_2D_5X5
- 5, // ASTC_2D_5X5_SRGB
- 1, // Z32F
- 1, // Z16
- 1, // Z24S8
- 1, // S8Z24
- 1, // Z32FS8
+ 1, // ABGR8U
+ 1, // ABGR8S
+ 1, // ABGR8UI
+ 1, // B5G6R5U
+ 1, // A2B10G10R10U
+ 1, // A1B5G5R5U
+ 1, // R8U
+ 1, // R8UI
+ 1, // RGBA16F
+ 1, // RGBA16U
+ 1, // RGBA16UI
+ 1, // R11FG11FB10F
+ 1, // RGBA32UI
+ 4, // DXT1
+ 4, // DXT23
+ 4, // DXT45
+ 4, // DXN1
+ 4, // DXN2UNORM
+ 4, // DXN2SNORM
+ 4, // BC7U
+ 4, // BC6H_UF16
+ 4, // BC6H_SF16
+ 4, // ASTC_2D_4X4
+ 1, // G8R8U
+ 1, // G8R8S
+ 1, // BGRA8
+ 1, // RGBA32F
+ 1, // RG32F
+ 1, // R32F
+ 1, // R16F
+ 1, // R16U
+ 1, // R16S
+ 1, // R16UI
+ 1, // R16I
+ 1, // RG16
+ 1, // RG16F
+ 1, // RG16UI
+ 1, // RG16I
+ 1, // RG16S
+ 1, // RGB32F
+ 1, // RGBA8_SRGB
+ 1, // RG8U
+ 1, // RG8S
+ 1, // RG32UI
+ 1, // R32UI
+ 8, // ASTC_2D_8X8
+ 8, // ASTC_2D_8X5
+ 5, // ASTC_2D_5X4
+ 1, // BGRA8_SRGB
+ 4, // DXT1_SRGB
+ 4, // DXT23_SRGB
+ 4, // DXT45_SRGB
+ 4, // BC7U_SRGB
+ 4, // ASTC_2D_4X4_SRGB
+ 8, // ASTC_2D_8X8_SRGB
+ 8, // ASTC_2D_8X5_SRGB
+ 5, // ASTC_2D_5X4_SRGB
+ 5, // ASTC_2D_5X5
+ 5, // ASTC_2D_5X5_SRGB
+ 10, // ASTC_2D_10X8
+ 10, // ASTC_2D_10X8_SRGB
+ 1, // Z32F
+ 1, // Z16
+ 1, // Z24S8
+ 1, // S8Z24
+ 1, // Z32FS8
}};
ASSERT(static_cast<std::size_t>(format) < block_width_table.size());
return block_width_table[static_cast<std::size_t>(format)];
@@ -341,6 +347,8 @@ static constexpr u32 GetDefaultBlockHeight(PixelFormat format) {
4, // ASTC_2D_5X4_SRGB
5, // ASTC_2D_5X5
5, // ASTC_2D_5X5_SRGB
+ 8, // ASTC_2D_10X8
+ 8, // ASTC_2D_10X8_SRGB
1, // Z32F
1, // Z16
1, // Z24S8
@@ -416,6 +424,8 @@ static constexpr u32 GetFormatBpp(PixelFormat format) {
128, // ASTC_2D_5X4_SRGB
128, // ASTC_2D_5X5
128, // ASTC_2D_5X5_SRGB
+ 128, // ASTC_2D_10X8
+ 128, // ASTC_2D_10X8_SRGB
32, // Z32F
16, // Z16
32, // Z24S8
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 3066abf61..19f30b1b5 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -45,7 +45,7 @@ constexpr auto fast_swizzle_table = SwizzleTable<8, 4, 16>();
* Instead of going gob by gob, we map the coordinates inside a block and manage from
* those. Block_Width is assumed to be 1.
*/
-void PreciseProcessBlock(u8* swizzled_data, u8* unswizzled_data, const bool unswizzle,
+void PreciseProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const bool unswizzle,
const u32 x_start, const u32 y_start, const u32 z_start, const u32 x_end,
const u32 y_end, const u32 z_end, const u32 tile_offset,
const u32 xy_block_size, const u32 layer_z, const u32 stride_x,
@@ -81,7 +81,7 @@ void PreciseProcessBlock(u8* swizzled_data, u8* unswizzled_data, const bool unsw
* Instead of going gob by gob, we map the coordinates inside a block and manage from
* those. Block_Width is assumed to be 1.
*/
-void FastProcessBlock(u8* swizzled_data, u8* unswizzled_data, const bool unswizzle,
+void FastProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const bool unswizzle,
const u32 x_start, const u32 y_start, const u32 z_start, const u32 x_end,
const u32 y_end, const u32 z_end, const u32 tile_offset,
const u32 xy_block_size, const u32 layer_z, const u32 stride_x,
@@ -90,10 +90,10 @@ void FastProcessBlock(u8* swizzled_data, u8* unswizzled_data, const bool unswizz
u32 z_address = tile_offset;
const u32 x_startb = x_start * bytes_per_pixel;
const u32 x_endb = x_end * bytes_per_pixel;
- const u32 copy_size = 16;
- const u32 gob_size_x = 64;
- const u32 gob_size_y = 8;
- const u32 gob_size_z = 1;
+ constexpr u32 copy_size = 16;
+ constexpr u32 gob_size_x = 64;
+ constexpr u32 gob_size_y = 8;
+ constexpr u32 gob_size_z = 1;
const u32 gob_size = gob_size_x * gob_size_y * gob_size_z;
for (u32 z = z_start; z < z_end; z++) {
u32 y_address = z_address;
@@ -126,23 +126,23 @@ void FastProcessBlock(u8* swizzled_data, u8* unswizzled_data, const bool unswizz
* https://envytools.readthedocs.io/en/latest/hw/memory/g80-surface.html#blocklinear-surfaces
*/
template <bool fast>
-void SwizzledData(u8* swizzled_data, u8* unswizzled_data, const bool unswizzle, const u32 width,
- const u32 height, const u32 depth, const u32 bytes_per_pixel,
+void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool unswizzle,
+ const u32 width, const u32 height, const u32 depth, const u32 bytes_per_pixel,
const u32 out_bytes_per_pixel, const u32 block_height, const u32 block_depth) {
auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); };
const u32 stride_x = width * out_bytes_per_pixel;
const u32 layer_z = height * stride_x;
- const u32 gob_x_bytes = 64;
+ constexpr u32 gob_x_bytes = 64;
const u32 gob_elements_x = gob_x_bytes / bytes_per_pixel;
- const u32 gob_elements_y = 8;
- const u32 gob_elements_z = 1;
+ constexpr u32 gob_elements_y = 8;
+ constexpr u32 gob_elements_z = 1;
const u32 block_x_elements = gob_elements_x;
const u32 block_y_elements = gob_elements_y * block_height;
const u32 block_z_elements = gob_elements_z * block_depth;
const u32 blocks_on_x = div_ceil(width, block_x_elements);
const u32 blocks_on_y = div_ceil(height, block_y_elements);
const u32 blocks_on_z = div_ceil(depth, block_z_elements);
- const u32 gob_size = gob_x_bytes * gob_elements_y * gob_elements_z;
+ constexpr u32 gob_size = gob_x_bytes * gob_elements_y * gob_elements_z;
const u32 xy_block_size = gob_size * block_height;
const u32 block_size = xy_block_size * block_depth;
u32 tile_offset = 0;
@@ -171,7 +171,7 @@ void SwizzledData(u8* swizzled_data, u8* unswizzled_data, const bool unswizzle,
}
void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel,
- u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data,
+ u32 out_bytes_per_pixel, u8* const swizzled_data, u8* const unswizzled_data,
bool unswizzle, u32 block_height, u32 block_depth) {
if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % 16 == 0) {
SwizzledData<true>(swizzled_data, unswizzled_data, unswizzle, width, height, depth,
@@ -202,6 +202,8 @@ u32 BytesPerPixel(TextureFormat format) {
case TextureFormat::ASTC_2D_5X4:
case TextureFormat::ASTC_2D_8X8:
case TextureFormat::ASTC_2D_8X5:
+ case TextureFormat::ASTC_2D_10X8:
+ case TextureFormat::ASTC_2D_5X5:
case TextureFormat::A8R8G8B8:
case TextureFormat::A2B10G10R10:
case TextureFormat::BF10GF11RF11:
@@ -294,6 +296,8 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat
case TextureFormat::BC6H_SF16:
case TextureFormat::ASTC_2D_4X4:
case TextureFormat::ASTC_2D_8X8:
+ case TextureFormat::ASTC_2D_5X5:
+ case TextureFormat::ASTC_2D_10X8:
case TextureFormat::A8R8G8B8:
case TextureFormat::A2B10G10R10:
case TextureFormat::A1B5G5R5:
@@ -321,9 +325,9 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat
std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
u32 block_height, u32 block_depth) {
if (tiled) {
- const u32 gobs_in_x = 64;
- const u32 gobs_in_y = 8;
- const u32 gobs_in_z = 1;
+ constexpr u32 gobs_in_x = 64;
+ constexpr u32 gobs_in_y = 8;
+ constexpr u32 gobs_in_z = 1;
const u32 aligned_width = Common::AlignUp(width * bytes_per_pixel, gobs_in_x);
const u32 aligned_height = Common::AlignUp(height, gobs_in_y * block_height);
const u32 aligned_depth = Common::AlignUp(depth, gobs_in_z * block_depth);