summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/video_core/engines/maxwell_3d.cpp15
-rw-r--r--src/video_core/engines/maxwell_3d.h2
-rw-r--r--src/video_core/engines/shader_bytecode.h15
-rw-r--r--src/video_core/gpu.cpp1
-rw-r--r--src/video_core/gpu.h1
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp23
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp82
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h147
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp22
-rw-r--r--src/video_core/renderer_opengl/gl_state.h6
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h7
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp2
-rw-r--r--src/video_core/textures/decoders.cpp86
-rw-r--r--src/video_core/textures/decoders.h4
14 files changed, 165 insertions, 248 deletions
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 5c0ae8009..a46ed4bd7 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -23,12 +23,17 @@ Maxwell3D::Maxwell3D(VideoCore::RasterizerInterface& rasterizer, MemoryManager&
: memory_manager(memory_manager), rasterizer{rasterizer}, macro_interpreter(*this) {}
void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) {
- auto macro_code = uploaded_macros.find(method);
+ // Reset the current macro.
+ executing_macro = 0;
+
// The requested macro must have been uploaded already.
- ASSERT_MSG(macro_code != uploaded_macros.end(), "Macro %08X was not uploaded", method);
+ auto macro_code = uploaded_macros.find(method);
+ if (macro_code == uploaded_macros.end()) {
+ LOG_ERROR(HW_GPU, "Macro {:04X} was not uploaded", method);
+ return;
+ }
- // Reset the current macro and execute it.
- executing_macro = 0;
+ // Execute the current macro.
macro_interpreter.Execute(macro_code->second, std::move(parameters));
}
@@ -238,6 +243,8 @@ void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) {
auto& buffer = shader.const_buffers[bind_data.index];
+ ASSERT(bind_data.index < Regs::MaxConstBuffers);
+
buffer.enabled = bind_data.valid.Value() != 0;
buffer.index = bind_data.index;
buffer.address = regs.const_buffer.BufferAddress();
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 4d0ff96a5..0506ac8fe 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -44,7 +44,7 @@ public:
static constexpr size_t MaxShaderProgram = 6;
static constexpr size_t MaxShaderStage = 5;
// Maximum number of const buffers per shader stage.
- static constexpr size_t MaxConstBuffers = 16;
+ static constexpr size_t MaxConstBuffers = 18;
enum class QueryMode : u32 {
Write = 0,
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index c7e3fb4b1..3d4557b7e 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -78,6 +78,8 @@ union Attribute {
// shader, and a tuple of (TessCoord.x, TessCoord.y, TessCoord.z, ~) when inside a Tess Eval
// shader.
TessCoordInstanceIDVertexID = 47,
+ // TODO(bunnei): Figure out what this is used for. Super Mario Odyssey uses this.
+ Unknown_63 = 63,
};
union {
@@ -254,20 +256,15 @@ union Instruction {
BitField<56, 1, u64> invert_b;
} lop32i;
- float GetImm20_19() const {
- float result{};
+ u32 GetImm20_19() const {
u32 imm{static_cast<u32>(imm20_19)};
imm <<= 12;
imm |= negate_imm ? 0x80000000 : 0;
- std::memcpy(&result, &imm, sizeof(imm));
- return result;
+ return imm;
}
- float GetImm20_32() const {
- float result{};
- s32 imm{static_cast<s32>(imm20_32)};
- std::memcpy(&result, &imm, sizeof(imm));
- return result;
+ u32 GetImm20_32() const {
+ return static_cast<u32>(imm20_32);
}
s32 GetSignedImm20_20() const {
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index b2a83ce0b..4ff4d71c5 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -42,6 +42,7 @@ u32 RenderTargetBytesPerPixel(RenderTargetFormat format) {
case RenderTargetFormat::RGB10_A2_UNORM:
case RenderTargetFormat::BGRA8_UNORM:
case RenderTargetFormat::R32_FLOAT:
+ case RenderTargetFormat::R11G11B10_FLOAT:
return 4;
default:
UNIMPLEMENTED_MSG("Unimplemented render target format {}", static_cast<u32>(format));
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 440505c9d..874eddd78 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -34,6 +34,7 @@ enum class RenderTargetFormat : u32 {
RG16_FLOAT = 0xDE,
R11G11B10_FLOAT = 0xE0,
R32_FLOAT = 0xE5,
+ B5G6R5_UNORM = 0xE8,
R16_FLOAT = 0xF2,
R8_UNORM = 0xF3,
};
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index c2a931469..8360feb5d 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -161,7 +161,7 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr,
// assume every shader uses them all.
for (unsigned index = 0; index < 16; ++index) {
auto& attrib = regs.vertex_attrib_format[index];
- LOG_DEBUG(HW_GPU, "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}",
+ LOG_TRACE(HW_GPU, "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}",
index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(),
attrib.offset.Value(), attrib.IsNormalized());
@@ -324,11 +324,14 @@ std::pair<Surface, Surface> RasterizerOpenGL::ConfigureFramebuffers(bool using_c
bool using_depth_fb) {
const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
+ if (regs.rt[0].format == Tegra::RenderTargetFormat::NONE) {
+ LOG_ERROR(HW_GPU, "RenderTargetFormat is not configured");
+ using_color_fb = false;
+ }
+
// TODO(bunnei): Implement this
const bool has_stencil = false;
- const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[0].GetRect()};
-
const bool write_color_fb =
state.color_mask.red_enabled == GL_TRUE || state.color_mask.green_enabled == GL_TRUE ||
state.color_mask.blue_enabled == GL_TRUE || state.color_mask.alpha_enabled == GL_TRUE;
@@ -341,9 +344,10 @@ std::pair<Surface, Surface> RasterizerOpenGL::ConfigureFramebuffers(bool using_c
Surface depth_surface;
MathUtil::Rectangle<u32> surfaces_rect;
std::tie(color_surface, depth_surface, surfaces_rect) =
- res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb, viewport_rect);
+ res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb);
- MathUtil::Rectangle<u32> draw_rect{
+ const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[0].GetRect()};
+ const MathUtil::Rectangle<u32> draw_rect{
static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.left,
surfaces_rect.left, surfaces_rect.right)), // Left
static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + viewport_rect.top,
@@ -659,7 +663,10 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, GLuint progr
auto& buffer_draw_state =
state.draw.const_buffers[static_cast<size_t>(stage)][used_buffer.GetIndex()];
- ASSERT_MSG(buffer.enabled, "Attempted to upload disabled constbuffer");
+ if (!buffer.enabled) {
+ continue;
+ }
+
buffer_draw_state.enabled = true;
buffer_draw_state.bindpoint = current_bindpoint + bindpoint;
@@ -804,9 +811,7 @@ void RasterizerOpenGL::SyncClipCoef() {
void RasterizerOpenGL::SyncCullMode() {
const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
- // TODO(bunnei): Enable the below once more things work - until then, this may hide regressions
- // state.cull.enabled = regs.cull.enabled != 0;
- state.cull.enabled = false;
+ state.cull.enabled = regs.cull.enabled != 0;
if (state.cull.enabled) {
state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 257aa9571..9fb734b77 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -109,6 +109,9 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form
{GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
true}, // DXT45
{GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, true}, // DXN1
+ {GL_COMPRESSED_RG_RGTC2, GL_RG, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
+ true}, // DXN2UNORM
+ {GL_COMPRESSED_SIGNED_RG_RGTC2, GL_RG, GL_INT, ComponentType::SNorm, true}, // DXN2SNORM
{GL_COMPRESSED_RGBA_BPTC_UNORM_ARB, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
true}, // BC7U
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4
@@ -180,36 +183,49 @@ MathUtil::Rectangle<u32> SurfaceParams::GetRect() const {
return {0, actual_height, width, 0};
}
+/// Returns true if the specified PixelFormat is a BCn format, e.g. DXT or DXN
+static bool IsFormatBCn(PixelFormat format) {
+ switch (format) {
+ case PixelFormat::DXT1:
+ case PixelFormat::DXT23:
+ case PixelFormat::DXT45:
+ case PixelFormat::DXN1:
+ case PixelFormat::DXN2SNORM:
+ case PixelFormat::DXN2UNORM:
+ case PixelFormat::BC7U:
+ return true;
+ }
+ return false;
+}
+
template <bool morton_to_gl, PixelFormat format>
-void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, Tegra::GPUVAddr addr) {
+void MortonCopy(u32 stride, u32 block_height, u32 height, std::vector<u8>& gl_buffer,
+ Tegra::GPUVAddr addr) {
constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / CHAR_BIT;
constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format);
const auto& gpu = Core::System::GetInstance().GPU();
if (morton_to_gl) {
- if (SurfaceParams::GetFormatType(format) == SurfaceType::ColorTexture) {
- auto data = Tegra::Texture::UnswizzleTexture(
- *gpu.memory_manager->GpuToCpuAddress(addr),
- SurfaceParams::TextureFormatFromPixelFormat(format), stride, height, block_height);
- std::memcpy(gl_buffer, data.data(), data.size());
- } else {
- auto data = Tegra::Texture::UnswizzleDepthTexture(
- *gpu.memory_manager->GpuToCpuAddress(addr),
- SurfaceParams::DepthFormatFromPixelFormat(format), stride, height, block_height);
- std::memcpy(gl_buffer, data.data(), data.size());
- }
+ // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual
+ // pixel values.
+ const u32 tile_size{IsFormatBCn(format) ? 4U : 1U};
+ const std::vector<u8> data =
+ Tegra::Texture::UnswizzleTexture(*gpu.memory_manager->GpuToCpuAddress(addr), tile_size,
+ bytes_per_pixel, stride, height, block_height);
+ const size_t size_to_copy{std::min(gl_buffer.size(), data.size())};
+ gl_buffer.assign(data.begin(), data.begin() + size_to_copy);
} else {
// TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should
// check the configuration for this and perform more generic un/swizzle
LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!");
VideoCore::MortonCopyPixels128(
stride, height, bytes_per_pixel, gl_bytes_per_pixel,
- Memory::GetPointer(*gpu.memory_manager->GpuToCpuAddress(addr)), gl_buffer,
+ Memory::GetPointer(*gpu.memory_manager->GpuToCpuAddress(addr)), gl_buffer.data(),
morton_to_gl);
}
}
-static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr),
+static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPUVAddr),
SurfaceParams::MaxPixelFormat>
morton_to_gl_fns = {
MortonCopy<true, PixelFormat::ABGR8>, MortonCopy<true, PixelFormat::B5G6R5>,
@@ -218,6 +234,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr),
MortonCopy<true, PixelFormat::R11FG11FB10F>, MortonCopy<true, PixelFormat::RGBA32UI>,
MortonCopy<true, PixelFormat::DXT1>, MortonCopy<true, PixelFormat::DXT23>,
MortonCopy<true, PixelFormat::DXT45>, MortonCopy<true, PixelFormat::DXN1>,
+ MortonCopy<true, PixelFormat::DXN2UNORM>, MortonCopy<true, PixelFormat::DXN2SNORM>,
MortonCopy<true, PixelFormat::BC7U>, MortonCopy<true, PixelFormat::ASTC_2D_4X4>,
MortonCopy<true, PixelFormat::G8R8>, MortonCopy<true, PixelFormat::BGRA8>,
MortonCopy<true, PixelFormat::RGBA32F>, MortonCopy<true, PixelFormat::RG32F>,
@@ -231,7 +248,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr),
MortonCopy<true, PixelFormat::Z32FS8>,
};
-static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr),
+static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPUVAddr),
SurfaceParams::MaxPixelFormat>
gl_to_morton_fns = {
MortonCopy<false, PixelFormat::ABGR8>,
@@ -242,7 +259,10 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr),
MortonCopy<false, PixelFormat::RGBA16F>,
MortonCopy<false, PixelFormat::R11FG11FB10F>,
MortonCopy<false, PixelFormat::RGBA32UI>,
- // TODO(Subv): Swizzling DXT1/DXT23/DXT45/DXN1/BC7U/ASTC_2D_4X4 formats is not supported
+ // TODO(Subv): Swizzling DXT1/DXT23/DXT45/DXN1/DXN2/BC7U/ASTC_2D_4X4 formats is not
+ // supported
+ nullptr,
+ nullptr,
nullptr,
nullptr,
nullptr,
@@ -447,22 +467,24 @@ MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64
void CachedSurface::LoadGLBuffer() {
ASSERT(params.type != SurfaceType::Fill);
- u8* const texture_src_data = Memory::GetPointer(params.GetCpuAddr());
+ const u8* const texture_src_data = Memory::GetPointer(params.GetCpuAddr());
ASSERT(texture_src_data);
- gl_buffer.resize(params.width * params.height * GetGLBytesPerPixel(params.pixel_format));
+ const u32 bytes_per_pixel = GetGLBytesPerPixel(params.pixel_format);
+ const u32 copy_size = params.width * params.height * bytes_per_pixel;
MICROPROFILE_SCOPE(OpenGL_SurfaceLoad);
- if (!params.is_tiled) {
- const u32 bytes_per_pixel{params.GetFormatBpp() >> 3};
+ if (params.is_tiled) {
+ gl_buffer.resize(copy_size);
- std::memcpy(gl_buffer.data(), texture_src_data,
- bytes_per_pixel * params.width * params.height);
- } else {
morton_to_gl_fns[static_cast<size_t>(params.pixel_format)](
- params.width, params.block_height, params.height, gl_buffer.data(), params.addr);
+ params.width, params.block_height, params.height, gl_buffer, params.addr);
+ } else {
+ const u8* const texture_src_data_end = texture_src_data + copy_size;
+
+ gl_buffer.assign(texture_src_data, texture_src_data_end);
}
ConvertFormatAsNeeded_LoadGLBuffer(gl_buffer, params.pixel_format, params.width, params.height);
@@ -485,7 +507,7 @@ void CachedSurface::FlushGLBuffer() {
std::memcpy(dst_buffer, gl_buffer.data(), params.size_in_bytes);
} else {
gl_to_morton_fns[static_cast<size_t>(params.pixel_format)](
- params.width, params.block_height, params.height, gl_buffer.data(), params.addr);
+ params.width, params.block_height, params.height, gl_buffer, params.addr);
}
}
@@ -600,8 +622,8 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextu
return GetSurface(SurfaceParams::CreateForTexture(config));
}
-SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces(
- bool using_color_fb, bool using_depth_fb, const MathUtil::Rectangle<s32>& viewport) {
+SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces(bool using_color_fb,
+ bool using_depth_fb) {
const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
// TODO(bunnei): This is hard corded to use just the first render buffer
@@ -757,10 +779,12 @@ void RasterizerCacheOpenGL::FlushRegion(Tegra::GPUVAddr /*addr*/, size_t /*size*
}
void RasterizerCacheOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, size_t size) {
- for (const auto& pair : surface_cache) {
- const auto& surface{pair.second};
+ for (auto iter = surface_cache.cbegin(); iter != surface_cache.cend();) {
+ const auto& surface{iter->second};
const auto& params{surface->GetSurfaceParams()};
+ ++iter;
+
if (params.IsOverlappingRegion(addr, size)) {
UnregisterSurface(surface);
}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 0c6652c7a..829a76dfe 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -35,31 +35,33 @@ struct SurfaceParams {
DXT23 = 9,
DXT45 = 10,
DXN1 = 11, // This is also known as BC4
- BC7U = 12,
- ASTC_2D_4X4 = 13,
- G8R8 = 14,
- BGRA8 = 15,
- RGBA32F = 16,
- RG32F = 17,
- R32F = 18,
- R16F = 19,
- R16UNORM = 20,
- RG16 = 21,
- RG16F = 22,
- RG16UI = 23,
- RG16I = 24,
- RG16S = 25,
- RGB32F = 26,
- SRGBA8 = 27,
+ DXN2UNORM = 12,
+ DXN2SNORM = 13,
+ BC7U = 14,
+ ASTC_2D_4X4 = 15,
+ G8R8 = 16,
+ BGRA8 = 17,
+ RGBA32F = 18,
+ RG32F = 19,
+ R32F = 20,
+ R16F = 21,
+ R16UNORM = 22,
+ RG16 = 23,
+ RG16F = 24,
+ RG16UI = 25,
+ RG16I = 26,
+ RG16S = 27,
+ RGB32F = 28,
+ SRGBA8 = 29,
MaxColorFormat,
// DepthStencil formats
- Z24S8 = 28,
- S8Z24 = 29,
- Z32F = 30,
- Z16 = 31,
- Z32FS8 = 32,
+ Z24S8 = 30,
+ S8Z24 = 31,
+ Z32F = 32,
+ Z16 = 33,
+ Z32FS8 = 34,
MaxDepthStencilFormat,
@@ -109,6 +111,8 @@ struct SurfaceParams {
4, // DXT23
4, // DXT45
4, // DXN1
+ 4, // DXN2UNORM
+ 4, // DXN2SNORM
4, // BC7U
4, // ASTC_2D_4X4
1, // G8R8
@@ -153,6 +157,8 @@ struct SurfaceParams {
128, // DXT23
128, // DXT45
64, // DXN1
+ 128, // DXN2UNORM
+ 128, // DXN2SNORM
128, // BC7U
32, // ASTC_2D_4X4
16, // G8R8
@@ -221,6 +227,8 @@ struct SurfaceParams {
return PixelFormat::RG32F;
case Tegra::RenderTargetFormat::R11G11B10_FLOAT:
return PixelFormat::R11FG11FB10F;
+ case Tegra::RenderTargetFormat::B5G6R5_UNORM:
+ return PixelFormat::B5G6R5;
case Tegra::RenderTargetFormat::RGBA32_UINT:
return PixelFormat::RGBA32UI;
case Tegra::RenderTargetFormat::R8_UNORM:
@@ -303,6 +311,16 @@ struct SurfaceParams {
return PixelFormat::DXT45;
case Tegra::Texture::TextureFormat::DXN1:
return PixelFormat::DXN1;
+ case Tegra::Texture::TextureFormat::DXN2:
+ switch (component_type) {
+ case Tegra::Texture::ComponentType::UNORM:
+ return PixelFormat::DXN2UNORM;
+ case Tegra::Texture::ComponentType::SNORM:
+ return PixelFormat::DXN2SNORM;
+ }
+ LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
+ static_cast<u32>(component_type));
+ UNREACHABLE();
case Tegra::Texture::TextureFormat::BC7U:
return PixelFormat::BC7U;
case Tegra::Texture::TextureFormat::ASTC_2D_4X4:
@@ -330,89 +348,6 @@ struct SurfaceParams {
}
}
- static Tegra::Texture::TextureFormat TextureFormatFromPixelFormat(PixelFormat format) {
- // TODO(Subv): Properly implement this
- switch (format) {
- case PixelFormat::ABGR8:
- case PixelFormat::SRGBA8:
- return Tegra::Texture::TextureFormat::A8R8G8B8;
- case PixelFormat::B5G6R5:
- return Tegra::Texture::TextureFormat::B5G6R5;
- case PixelFormat::A2B10G10R10:
- return Tegra::Texture::TextureFormat::A2B10G10R10;
- case PixelFormat::A1B5G5R5:
- return Tegra::Texture::TextureFormat::A1B5G5R5;
- case PixelFormat::R8:
- return Tegra::Texture::TextureFormat::R8;
- case PixelFormat::G8R8:
- return Tegra::Texture::TextureFormat::G8R8;
- case PixelFormat::RGBA16F:
- return Tegra::Texture::TextureFormat::R16_G16_B16_A16;
- case PixelFormat::R11FG11FB10F:
- return Tegra::Texture::TextureFormat::BF10GF11RF11;
- case PixelFormat::RGBA32UI:
- return Tegra::Texture::TextureFormat::R32_G32_B32_A32;
- case PixelFormat::DXT1:
- return Tegra::Texture::TextureFormat::DXT1;
- case PixelFormat::DXT23:
- return Tegra::Texture::TextureFormat::DXT23;
- case PixelFormat::DXT45:
- return Tegra::Texture::TextureFormat::DXT45;
- case PixelFormat::DXN1:
- return Tegra::Texture::TextureFormat::DXN1;
- case PixelFormat::BC7U:
- return Tegra::Texture::TextureFormat::BC7U;
- case PixelFormat::ASTC_2D_4X4:
- return Tegra::Texture::TextureFormat::ASTC_2D_4X4;
- case PixelFormat::BGRA8:
- // TODO(bunnei): This is fine for unswizzling (since we just need the right component
- // sizes), but could be a bug if we used this function in different ways.
- return Tegra::Texture::TextureFormat::A8R8G8B8;
- case PixelFormat::RGBA32F:
- return Tegra::Texture::TextureFormat::R32_G32_B32_A32;
- case PixelFormat::RGB32F:
- return Tegra::Texture::TextureFormat::R32_G32_B32;
- case PixelFormat::RG32F:
- return Tegra::Texture::TextureFormat::R32_G32;
- case PixelFormat::R32F:
- return Tegra::Texture::TextureFormat::R32;
- case PixelFormat::R16F:
- case PixelFormat::R16UNORM:
- return Tegra::Texture::TextureFormat::R16;
- case PixelFormat::Z32F:
- return Tegra::Texture::TextureFormat::ZF32;
- case PixelFormat::Z24S8:
- return Tegra::Texture::TextureFormat::Z24S8;
- case PixelFormat::RG16F:
- case PixelFormat::RG16:
- case PixelFormat::RG16UI:
- case PixelFormat::RG16I:
- case PixelFormat::RG16S:
- return Tegra::Texture::TextureFormat::R16_G16;
- default:
- LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
- UNREACHABLE();
- }
- }
-
- static Tegra::DepthFormat DepthFormatFromPixelFormat(PixelFormat format) {
- switch (format) {
- case PixelFormat::S8Z24:
- return Tegra::DepthFormat::S8_Z24_UNORM;
- case PixelFormat::Z24S8:
- return Tegra::DepthFormat::Z24_S8_UNORM;
- case PixelFormat::Z32F:
- return Tegra::DepthFormat::Z32_FLOAT;
- case PixelFormat::Z16:
- return Tegra::DepthFormat::Z16_UNORM;
- case PixelFormat::Z32FS8:
- return Tegra::DepthFormat::Z32_S8_X24_FLOAT;
- default:
- LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
- UNREACHABLE();
- }
- }
-
static ComponentType ComponentTypeFromTexture(Tegra::Texture::ComponentType type) {
// TODO(Subv): Implement more component types
switch (type) {
@@ -441,6 +376,7 @@ struct SurfaceParams {
case Tegra::RenderTargetFormat::RGB10_A2_UNORM:
case Tegra::RenderTargetFormat::R8_UNORM:
case Tegra::RenderTargetFormat::RG16_UNORM:
+ case Tegra::RenderTargetFormat::B5G6R5_UNORM:
return ComponentType::UNorm;
case Tegra::RenderTargetFormat::RG16_SNORM:
return ComponentType::SNorm;
@@ -612,8 +548,7 @@ public:
Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config);
/// Get the color and depth surfaces based on the framebuffer configuration
- SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb,
- const MathUtil::Rectangle<s32>& viewport);
+ SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb);
/// Flushes the surface to Switch memory
void FlushSurface(const Surface& surface);
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index e3217db81..32f06f409 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -507,6 +507,8 @@ private:
/// Build the GLSL register list.
void BuildRegisterList() {
+ regs.reserve(Register::NumRegisters);
+
for (size_t index = 0; index < Register::NumRegisters; ++index) {
regs.emplace_back(index, suffix);
}
@@ -523,6 +525,11 @@ private:
// shader.
ASSERT(stage == Maxwell3D::Regs::ShaderStage::Vertex);
return "vec4(0, 0, uintBitsToFloat(gl_InstanceID), uintBitsToFloat(gl_VertexID))";
+ case Attribute::Index::Unknown_63:
+ // TODO(bunnei): Figure out what this is used for. Super Mario Odyssey uses this.
+ LOG_CRITICAL(HW_GPU, "Unhandled input attribute Unknown_63");
+ UNREACHABLE();
+ break;
default:
const u32 index{static_cast<u32>(attribute) -
static_cast<u32>(Attribute::Index::Attribute_0)};
@@ -534,6 +541,8 @@ private:
LOG_CRITICAL(HW_GPU, "Unhandled input attribute: {}", index);
UNREACHABLE();
}
+
+ return "vec4(0, 0, 0, 0)";
}
/// Generates code representing an output attribute register.
@@ -602,12 +611,12 @@ private:
/// Generates code representing a 19-bit immediate value
static std::string GetImmediate19(const Instruction& instr) {
- return std::to_string(instr.alu.GetImm20_19());
+ return fmt::format("uintBitsToFloat({})", instr.alu.GetImm20_19());
}
/// Generates code representing a 32-bit immediate value
static std::string GetImmediate32(const Instruction& instr) {
- return std::to_string(instr.alu.GetImm20_32());
+ return fmt::format("uintBitsToFloat({})", instr.alu.GetImm20_32());
}
/// Generates code representing a texture sampler.
@@ -650,16 +659,17 @@ private:
* @param instr Instruction to generate the if condition for.
* @returns string containing the predicate condition.
*/
- std::string GetPredicateCondition(u64 index, bool negate) const {
+ std::string GetPredicateCondition(u64 index, bool negate) {
using Tegra::Shader::Pred;
std::string variable;
// Index 7 is used as an 'Always True' condition.
- if (index == static_cast<u64>(Pred::UnusedIndex))
+ if (index == static_cast<u64>(Pred::UnusedIndex)) {
variable = "true";
- else
+ } else {
variable = 'p' + std::to_string(index) + '_' + suffix;
-
+ declr_predicates.insert(variable);
+ }
if (negate) {
return "!(" + variable + ')';
}
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index 24b1d956b..5c7b636e4 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -7,6 +7,10 @@
#include <array>
#include <glad/glad.h>
+#include "video_core/engines/maxwell_3d.h"
+
+using Regs = Tegra::Engines::Maxwell3D::Regs;
+
namespace TextureUnits {
struct TextureUnit {
@@ -120,7 +124,7 @@ public:
GLuint bindpoint;
GLuint ssbo;
};
- std::array<std::array<ConstBufferConfig, 16>, 5> const_buffers{};
+ std::array<std::array<ConstBufferConfig, Regs::MaxConstBuffers>, 5> const_buffers;
} draw;
struct {
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index 16b1bd606..c439446b1 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -27,9 +27,11 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
case Maxwell::VertexAttribute::Type::UnsignedNorm: {
switch (attrib.size) {
+ case Maxwell::VertexAttribute::Size::Size_8_8:
case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
return GL_UNSIGNED_BYTE;
case Maxwell::VertexAttribute::Size::Size_16_16:
+ case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
return GL_UNSIGNED_SHORT;
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
return GL_UNSIGNED_INT_2_10_10_10_REV;
@@ -43,6 +45,9 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
case Maxwell::VertexAttribute::Type::SignedNorm: {
switch (attrib.size) {
+ case Maxwell::VertexAttribute::Size::Size_32_32_32:
+ return GL_INT;
+ case Maxwell::VertexAttribute::Size::Size_8_8:
case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
return GL_BYTE;
case Maxwell::VertexAttribute::Size::Size_16_16:
@@ -84,6 +89,8 @@ inline GLenum IndexFormat(Maxwell::IndexFormat index_format) {
inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
switch (topology) {
+ case Maxwell::PrimitiveTopology::Points:
+ return GL_POINTS;
case Maxwell::PrimitiveTopology::Triangles:
return GL_TRIANGLES;
case Maxwell::PrimitiveTopology::TriangleStrip:
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index bf9131193..899865e3b 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -430,7 +430,7 @@ static void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum
break;
case GL_DEBUG_SEVERITY_NOTIFICATION:
case GL_DEBUG_SEVERITY_LOW:
- LOG_DEBUG(Render_OpenGL, format, str_source, str_type, id, message);
+ LOG_TRACE(Render_OpenGL, format, str_source, str_type, id, message);
break;
}
}
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 65db84ad3..70746a34e 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -54,6 +54,7 @@ u32 BytesPerPixel(TextureFormat format) {
return 8;
case TextureFormat::DXT23:
case TextureFormat::DXT45:
+ case TextureFormat::DXN2:
case TextureFormat::BC7U:
// In this case a 'pixel' actually refers to a 4x4 tile.
return 16;
@@ -85,87 +86,11 @@ u32 BytesPerPixel(TextureFormat format) {
}
}
-static u32 DepthBytesPerPixel(DepthFormat format) {
- switch (format) {
- case DepthFormat::Z16_UNORM:
- return 2;
- case DepthFormat::S8_Z24_UNORM:
- case DepthFormat::Z24_S8_UNORM:
- case DepthFormat::Z32_FLOAT:
- return 4;
- case DepthFormat::Z32_S8_X24_FLOAT:
- return 8;
- default:
- UNIMPLEMENTED_MSG("Format not implemented");
- break;
- }
-}
-
-std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height,
- u32 block_height) {
- u8* data = Memory::GetPointer(address);
- u32 bytes_per_pixel = BytesPerPixel(format);
-
+std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size, u32 bytes_per_pixel, u32 width,
+ u32 height, u32 block_height) {
std::vector<u8> unswizzled_data(width * height * bytes_per_pixel);
-
- switch (format) {
- case TextureFormat::DXT1:
- case TextureFormat::DXT23:
- case TextureFormat::DXT45:
- case TextureFormat::DXN1:
- case TextureFormat::BC7U:
- // In the DXT and DXN formats, each 4x4 tile is swizzled instead of just individual pixel
- // values.
- CopySwizzledData(width / 4, height / 4, bytes_per_pixel, bytes_per_pixel, data,
- unswizzled_data.data(), true, block_height);
- break;
- case TextureFormat::A8R8G8B8:
- case TextureFormat::A2B10G10R10:
- case TextureFormat::A1B5G5R5:
- case TextureFormat::B5G6R5:
- case TextureFormat::R8:
- case TextureFormat::G8R8:
- case TextureFormat::R16_G16_B16_A16:
- case TextureFormat::R32_G32_B32_A32:
- case TextureFormat::R32_G32:
- case TextureFormat::R32:
- case TextureFormat::R16:
- case TextureFormat::R16_G16:
- case TextureFormat::BF10GF11RF11:
- case TextureFormat::ASTC_2D_4X4:
- case TextureFormat::R32_G32_B32:
- CopySwizzledData(width, height, bytes_per_pixel, bytes_per_pixel, data,
- unswizzled_data.data(), true, block_height);
- break;
- default:
- UNIMPLEMENTED_MSG("Format not implemented");
- break;
- }
-
- return unswizzled_data;
-}
-
-std::vector<u8> UnswizzleDepthTexture(VAddr address, DepthFormat format, u32 width, u32 height,
- u32 block_height) {
- u8* data = Memory::GetPointer(address);
- u32 bytes_per_pixel = DepthBytesPerPixel(format);
-
- std::vector<u8> unswizzled_data(width * height * bytes_per_pixel);
-
- switch (format) {
- case DepthFormat::Z16_UNORM:
- case DepthFormat::S8_Z24_UNORM:
- case DepthFormat::Z24_S8_UNORM:
- case DepthFormat::Z32_FLOAT:
- case DepthFormat::Z32_S8_X24_FLOAT:
- CopySwizzledData(width, height, bytes_per_pixel, bytes_per_pixel, data,
- unswizzled_data.data(), true, block_height);
- break;
- default:
- UNIMPLEMENTED_MSG("Format not implemented");
- break;
- }
-
+ CopySwizzledData(width / tile_size, height / tile_size, bytes_per_pixel, bytes_per_pixel,
+ Memory::GetPointer(address), unswizzled_data.data(), true, block_height);
return unswizzled_data;
}
@@ -179,6 +104,7 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat
case TextureFormat::DXT23:
case TextureFormat::DXT45:
case TextureFormat::DXN1:
+ case TextureFormat::DXN2:
case TextureFormat::BC7U:
case TextureFormat::ASTC_2D_4X4:
case TextureFormat::A8R8G8B8:
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h
index 73a4924d1..1f7b731be 100644
--- a/src/video_core/textures/decoders.h
+++ b/src/video_core/textures/decoders.h
@@ -13,8 +13,8 @@ namespace Tegra::Texture {
/**
* Unswizzles a swizzled texture without changing its format.
*/
-std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height,
- u32 block_height = TICEntry::DefaultBlockHeight);
+std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size, u32 bytes_per_pixel, u32 width,
+ u32 height, u32 block_height = TICEntry::DefaultBlockHeight);
/**
* Unswizzles a swizzled depth texture without changing its format.