diff options
Diffstat (limited to '')
-rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 15 | ||||
-rw-r--r-- | src/video_core/engines/maxwell_3d.h | 2 | ||||
-rw-r--r-- | src/video_core/engines/shader_bytecode.h | 15 | ||||
-rw-r--r-- | src/video_core/gpu.cpp | 1 | ||||
-rw-r--r-- | src/video_core/gpu.h | 1 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 23 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 82 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.h | 147 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 22 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_state.h | 6 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/maxwell_to_gl.h | 7 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/renderer_opengl.cpp | 2 | ||||
-rw-r--r-- | src/video_core/textures/decoders.cpp | 86 | ||||
-rw-r--r-- | src/video_core/textures/decoders.h | 4 |
14 files changed, 165 insertions, 248 deletions
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 5c0ae8009..a46ed4bd7 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -23,12 +23,17 @@ Maxwell3D::Maxwell3D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& : memory_manager(memory_manager), rasterizer{rasterizer}, macro_interpreter(*this) {} void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) { - auto macro_code = uploaded_macros.find(method); + // Reset the current macro. + executing_macro = 0; + // The requested macro must have been uploaded already. - ASSERT_MSG(macro_code != uploaded_macros.end(), "Macro %08X was not uploaded", method); + auto macro_code = uploaded_macros.find(method); + if (macro_code == uploaded_macros.end()) { + LOG_ERROR(HW_GPU, "Macro {:04X} was not uploaded", method); + return; + } - // Reset the current macro and execute it. - executing_macro = 0; + // Execute the current macro. macro_interpreter.Execute(macro_code->second, std::move(parameters)); } @@ -238,6 +243,8 @@ void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) { auto& buffer = shader.const_buffers[bind_data.index]; + ASSERT(bind_data.index < Regs::MaxConstBuffers); + buffer.enabled = bind_data.valid.Value() != 0; buffer.index = bind_data.index; buffer.address = regs.const_buffer.BufferAddress(); diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 4d0ff96a5..0506ac8fe 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -44,7 +44,7 @@ public: static constexpr size_t MaxShaderProgram = 6; static constexpr size_t MaxShaderStage = 5; // Maximum number of const buffers per shader stage. - static constexpr size_t MaxConstBuffers = 16; + static constexpr size_t MaxConstBuffers = 18; enum class QueryMode : u32 { Write = 0, diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index c7e3fb4b1..3d4557b7e 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -78,6 +78,8 @@ union Attribute { // shader, and a tuple of (TessCoord.x, TessCoord.y, TessCoord.z, ~) when inside a Tess Eval // shader. TessCoordInstanceIDVertexID = 47, + // TODO(bunnei): Figure out what this is used for. Super Mario Odyssey uses this. + Unknown_63 = 63, }; union { @@ -254,20 +256,15 @@ union Instruction { BitField<56, 1, u64> invert_b; } lop32i; - float GetImm20_19() const { - float result{}; + u32 GetImm20_19() const { u32 imm{static_cast<u32>(imm20_19)}; imm <<= 12; imm |= negate_imm ? 0x80000000 : 0; - std::memcpy(&result, &imm, sizeof(imm)); - return result; + return imm; } - float GetImm20_32() const { - float result{}; - s32 imm{static_cast<s32>(imm20_32)}; - std::memcpy(&result, &imm, sizeof(imm)); - return result; + u32 GetImm20_32() const { + return static_cast<u32>(imm20_32); } s32 GetSignedImm20_20() const { diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index b2a83ce0b..4ff4d71c5 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -42,6 +42,7 @@ u32 RenderTargetBytesPerPixel(RenderTargetFormat format) { case RenderTargetFormat::RGB10_A2_UNORM: case RenderTargetFormat::BGRA8_UNORM: case RenderTargetFormat::R32_FLOAT: + case RenderTargetFormat::R11G11B10_FLOAT: return 4; default: UNIMPLEMENTED_MSG("Unimplemented render target format {}", static_cast<u32>(format)); diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 440505c9d..874eddd78 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -34,6 +34,7 @@ enum class RenderTargetFormat : u32 { RG16_FLOAT = 0xDE, R11G11B10_FLOAT = 0xE0, R32_FLOAT = 0xE5, + B5G6R5_UNORM = 0xE8, R16_FLOAT = 0xF2, R8_UNORM = 0xF3, }; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index c2a931469..8360feb5d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -161,7 +161,7 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr, // assume every shader uses them all. for (unsigned index = 0; index < 16; ++index) { auto& attrib = regs.vertex_attrib_format[index]; - LOG_DEBUG(HW_GPU, "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}", + LOG_TRACE(HW_GPU, "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}", index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(), attrib.offset.Value(), attrib.IsNormalized()); @@ -324,11 +324,14 @@ std::pair<Surface, Surface> RasterizerOpenGL::ConfigureFramebuffers(bool using_c bool using_depth_fb) { const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; + if (regs.rt[0].format == Tegra::RenderTargetFormat::NONE) { + LOG_ERROR(HW_GPU, "RenderTargetFormat is not configured"); + using_color_fb = false; + } + // TODO(bunnei): Implement this const bool has_stencil = false; - const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[0].GetRect()}; - const bool write_color_fb = state.color_mask.red_enabled == GL_TRUE || state.color_mask.green_enabled == GL_TRUE || state.color_mask.blue_enabled == GL_TRUE || state.color_mask.alpha_enabled == GL_TRUE; @@ -341,9 +344,10 @@ std::pair<Surface, Surface> RasterizerOpenGL::ConfigureFramebuffers(bool using_c Surface depth_surface; MathUtil::Rectangle<u32> surfaces_rect; std::tie(color_surface, depth_surface, surfaces_rect) = - res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb, viewport_rect); + res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb); - MathUtil::Rectangle<u32> draw_rect{ + const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[0].GetRect()}; + const MathUtil::Rectangle<u32> draw_rect{ static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.left, surfaces_rect.left, surfaces_rect.right)), // Left static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + viewport_rect.top, @@ -659,7 +663,10 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, GLuint progr auto& buffer_draw_state = state.draw.const_buffers[static_cast<size_t>(stage)][used_buffer.GetIndex()]; - ASSERT_MSG(buffer.enabled, "Attempted to upload disabled constbuffer"); + if (!buffer.enabled) { + continue; + } + buffer_draw_state.enabled = true; buffer_draw_state.bindpoint = current_bindpoint + bindpoint; @@ -804,9 +811,7 @@ void RasterizerOpenGL::SyncClipCoef() { void RasterizerOpenGL::SyncCullMode() { const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; - // TODO(bunnei): Enable the below once more things work - until then, this may hide regressions - // state.cull.enabled = regs.cull.enabled != 0; - state.cull.enabled = false; + state.cull.enabled = regs.cull.enabled != 0; if (state.cull.enabled) { state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face); diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 257aa9571..9fb734b77 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -109,6 +109,9 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, true}, // DXT45 {GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, true}, // DXN1 + {GL_COMPRESSED_RG_RGTC2, GL_RG, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, + true}, // DXN2UNORM + {GL_COMPRESSED_SIGNED_RG_RGTC2, GL_RG, GL_INT, ComponentType::SNorm, true}, // DXN2SNORM {GL_COMPRESSED_RGBA_BPTC_UNORM_ARB, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, true}, // BC7U {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4 @@ -180,36 +183,49 @@ MathUtil::Rectangle<u32> SurfaceParams::GetRect() const { return {0, actual_height, width, 0}; } +/// Returns true if the specified PixelFormat is a BCn format, e.g. DXT or DXN +static bool IsFormatBCn(PixelFormat format) { + switch (format) { + case PixelFormat::DXT1: + case PixelFormat::DXT23: + case PixelFormat::DXT45: + case PixelFormat::DXN1: + case PixelFormat::DXN2SNORM: + case PixelFormat::DXN2UNORM: + case PixelFormat::BC7U: + return true; + } + return false; +} + template <bool morton_to_gl, PixelFormat format> -void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, Tegra::GPUVAddr addr) { +void MortonCopy(u32 stride, u32 block_height, u32 height, std::vector<u8>& gl_buffer, + Tegra::GPUVAddr addr) { constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / CHAR_BIT; constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); const auto& gpu = Core::System::GetInstance().GPU(); if (morton_to_gl) { - if (SurfaceParams::GetFormatType(format) == SurfaceType::ColorTexture) { - auto data = Tegra::Texture::UnswizzleTexture( - *gpu.memory_manager->GpuToCpuAddress(addr), - SurfaceParams::TextureFormatFromPixelFormat(format), stride, height, block_height); - std::memcpy(gl_buffer, data.data(), data.size()); - } else { - auto data = Tegra::Texture::UnswizzleDepthTexture( - *gpu.memory_manager->GpuToCpuAddress(addr), - SurfaceParams::DepthFormatFromPixelFormat(format), stride, height, block_height); - std::memcpy(gl_buffer, data.data(), data.size()); - } + // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual + // pixel values. + const u32 tile_size{IsFormatBCn(format) ? 4U : 1U}; + const std::vector<u8> data = + Tegra::Texture::UnswizzleTexture(*gpu.memory_manager->GpuToCpuAddress(addr), tile_size, + bytes_per_pixel, stride, height, block_height); + const size_t size_to_copy{std::min(gl_buffer.size(), data.size())}; + gl_buffer.assign(data.begin(), data.begin() + size_to_copy); } else { // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should // check the configuration for this and perform more generic un/swizzle LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!"); VideoCore::MortonCopyPixels128( stride, height, bytes_per_pixel, gl_bytes_per_pixel, - Memory::GetPointer(*gpu.memory_manager->GpuToCpuAddress(addr)), gl_buffer, + Memory::GetPointer(*gpu.memory_manager->GpuToCpuAddress(addr)), gl_buffer.data(), morton_to_gl); } } -static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr), +static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPUVAddr), SurfaceParams::MaxPixelFormat> morton_to_gl_fns = { MortonCopy<true, PixelFormat::ABGR8>, MortonCopy<true, PixelFormat::B5G6R5>, @@ -218,6 +234,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr), MortonCopy<true, PixelFormat::R11FG11FB10F>, MortonCopy<true, PixelFormat::RGBA32UI>, MortonCopy<true, PixelFormat::DXT1>, MortonCopy<true, PixelFormat::DXT23>, MortonCopy<true, PixelFormat::DXT45>, MortonCopy<true, PixelFormat::DXN1>, + MortonCopy<true, PixelFormat::DXN2UNORM>, MortonCopy<true, PixelFormat::DXN2SNORM>, MortonCopy<true, PixelFormat::BC7U>, MortonCopy<true, PixelFormat::ASTC_2D_4X4>, MortonCopy<true, PixelFormat::G8R8>, MortonCopy<true, PixelFormat::BGRA8>, MortonCopy<true, PixelFormat::RGBA32F>, MortonCopy<true, PixelFormat::RG32F>, @@ -231,7 +248,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr), MortonCopy<true, PixelFormat::Z32FS8>, }; -static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr), +static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPUVAddr), SurfaceParams::MaxPixelFormat> gl_to_morton_fns = { MortonCopy<false, PixelFormat::ABGR8>, @@ -242,7 +259,10 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr), MortonCopy<false, PixelFormat::RGBA16F>, MortonCopy<false, PixelFormat::R11FG11FB10F>, MortonCopy<false, PixelFormat::RGBA32UI>, - // TODO(Subv): Swizzling DXT1/DXT23/DXT45/DXN1/BC7U/ASTC_2D_4X4 formats is not supported + // TODO(Subv): Swizzling DXT1/DXT23/DXT45/DXN1/DXN2/BC7U/ASTC_2D_4X4 formats is not + // supported + nullptr, + nullptr, nullptr, nullptr, nullptr, @@ -447,22 +467,24 @@ MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64 void CachedSurface::LoadGLBuffer() { ASSERT(params.type != SurfaceType::Fill); - u8* const texture_src_data = Memory::GetPointer(params.GetCpuAddr()); + const u8* const texture_src_data = Memory::GetPointer(params.GetCpuAddr()); ASSERT(texture_src_data); - gl_buffer.resize(params.width * params.height * GetGLBytesPerPixel(params.pixel_format)); + const u32 bytes_per_pixel = GetGLBytesPerPixel(params.pixel_format); + const u32 copy_size = params.width * params.height * bytes_per_pixel; MICROPROFILE_SCOPE(OpenGL_SurfaceLoad); - if (!params.is_tiled) { - const u32 bytes_per_pixel{params.GetFormatBpp() >> 3}; + if (params.is_tiled) { + gl_buffer.resize(copy_size); - std::memcpy(gl_buffer.data(), texture_src_data, - bytes_per_pixel * params.width * params.height); - } else { morton_to_gl_fns[static_cast<size_t>(params.pixel_format)]( - params.width, params.block_height, params.height, gl_buffer.data(), params.addr); + params.width, params.block_height, params.height, gl_buffer, params.addr); + } else { + const u8* const texture_src_data_end = texture_src_data + copy_size; + + gl_buffer.assign(texture_src_data, texture_src_data_end); } ConvertFormatAsNeeded_LoadGLBuffer(gl_buffer, params.pixel_format, params.width, params.height); @@ -485,7 +507,7 @@ void CachedSurface::FlushGLBuffer() { std::memcpy(dst_buffer, gl_buffer.data(), params.size_in_bytes); } else { gl_to_morton_fns[static_cast<size_t>(params.pixel_format)]( - params.width, params.block_height, params.height, gl_buffer.data(), params.addr); + params.width, params.block_height, params.height, gl_buffer, params.addr); } } @@ -600,8 +622,8 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextu return GetSurface(SurfaceParams::CreateForTexture(config)); } -SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( - bool using_color_fb, bool using_depth_fb, const MathUtil::Rectangle<s32>& viewport) { +SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces(bool using_color_fb, + bool using_depth_fb) { const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; // TODO(bunnei): This is hard corded to use just the first render buffer @@ -757,10 +779,12 @@ void RasterizerCacheOpenGL::FlushRegion(Tegra::GPUVAddr /*addr*/, size_t /*size* } void RasterizerCacheOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, size_t size) { - for (const auto& pair : surface_cache) { - const auto& surface{pair.second}; + for (auto iter = surface_cache.cbegin(); iter != surface_cache.cend();) { + const auto& surface{iter->second}; const auto& params{surface->GetSurfaceParams()}; + ++iter; + if (params.IsOverlappingRegion(addr, size)) { UnregisterSurface(surface); } diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 0c6652c7a..829a76dfe 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -35,31 +35,33 @@ struct SurfaceParams { DXT23 = 9, DXT45 = 10, DXN1 = 11, // This is also known as BC4 - BC7U = 12, - ASTC_2D_4X4 = 13, - G8R8 = 14, - BGRA8 = 15, - RGBA32F = 16, - RG32F = 17, - R32F = 18, - R16F = 19, - R16UNORM = 20, - RG16 = 21, - RG16F = 22, - RG16UI = 23, - RG16I = 24, - RG16S = 25, - RGB32F = 26, - SRGBA8 = 27, + DXN2UNORM = 12, + DXN2SNORM = 13, + BC7U = 14, + ASTC_2D_4X4 = 15, + G8R8 = 16, + BGRA8 = 17, + RGBA32F = 18, + RG32F = 19, + R32F = 20, + R16F = 21, + R16UNORM = 22, + RG16 = 23, + RG16F = 24, + RG16UI = 25, + RG16I = 26, + RG16S = 27, + RGB32F = 28, + SRGBA8 = 29, MaxColorFormat, // DepthStencil formats - Z24S8 = 28, - S8Z24 = 29, - Z32F = 30, - Z16 = 31, - Z32FS8 = 32, + Z24S8 = 30, + S8Z24 = 31, + Z32F = 32, + Z16 = 33, + Z32FS8 = 34, MaxDepthStencilFormat, @@ -109,6 +111,8 @@ struct SurfaceParams { 4, // DXT23 4, // DXT45 4, // DXN1 + 4, // DXN2UNORM + 4, // DXN2SNORM 4, // BC7U 4, // ASTC_2D_4X4 1, // G8R8 @@ -153,6 +157,8 @@ struct SurfaceParams { 128, // DXT23 128, // DXT45 64, // DXN1 + 128, // DXN2UNORM + 128, // DXN2SNORM 128, // BC7U 32, // ASTC_2D_4X4 16, // G8R8 @@ -221,6 +227,8 @@ struct SurfaceParams { return PixelFormat::RG32F; case Tegra::RenderTargetFormat::R11G11B10_FLOAT: return PixelFormat::R11FG11FB10F; + case Tegra::RenderTargetFormat::B5G6R5_UNORM: + return PixelFormat::B5G6R5; case Tegra::RenderTargetFormat::RGBA32_UINT: return PixelFormat::RGBA32UI; case Tegra::RenderTargetFormat::R8_UNORM: @@ -303,6 +311,16 @@ struct SurfaceParams { return PixelFormat::DXT45; case Tegra::Texture::TextureFormat::DXN1: return PixelFormat::DXN1; + case Tegra::Texture::TextureFormat::DXN2: + switch (component_type) { + case Tegra::Texture::ComponentType::UNORM: + return PixelFormat::DXN2UNORM; + case Tegra::Texture::ComponentType::SNORM: + return PixelFormat::DXN2SNORM; + } + LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", + static_cast<u32>(component_type)); + UNREACHABLE(); case Tegra::Texture::TextureFormat::BC7U: return PixelFormat::BC7U; case Tegra::Texture::TextureFormat::ASTC_2D_4X4: @@ -330,89 +348,6 @@ struct SurfaceParams { } } - static Tegra::Texture::TextureFormat TextureFormatFromPixelFormat(PixelFormat format) { - // TODO(Subv): Properly implement this - switch (format) { - case PixelFormat::ABGR8: - case PixelFormat::SRGBA8: - return Tegra::Texture::TextureFormat::A8R8G8B8; - case PixelFormat::B5G6R5: - return Tegra::Texture::TextureFormat::B5G6R5; - case PixelFormat::A2B10G10R10: - return Tegra::Texture::TextureFormat::A2B10G10R10; - case PixelFormat::A1B5G5R5: - return Tegra::Texture::TextureFormat::A1B5G5R5; - case PixelFormat::R8: - return Tegra::Texture::TextureFormat::R8; - case PixelFormat::G8R8: - return Tegra::Texture::TextureFormat::G8R8; - case PixelFormat::RGBA16F: - return Tegra::Texture::TextureFormat::R16_G16_B16_A16; - case PixelFormat::R11FG11FB10F: - return Tegra::Texture::TextureFormat::BF10GF11RF11; - case PixelFormat::RGBA32UI: - return Tegra::Texture::TextureFormat::R32_G32_B32_A32; - case PixelFormat::DXT1: - return Tegra::Texture::TextureFormat::DXT1; - case PixelFormat::DXT23: - return Tegra::Texture::TextureFormat::DXT23; - case PixelFormat::DXT45: - return Tegra::Texture::TextureFormat::DXT45; - case PixelFormat::DXN1: - return Tegra::Texture::TextureFormat::DXN1; - case PixelFormat::BC7U: - return Tegra::Texture::TextureFormat::BC7U; - case PixelFormat::ASTC_2D_4X4: - return Tegra::Texture::TextureFormat::ASTC_2D_4X4; - case PixelFormat::BGRA8: - // TODO(bunnei): This is fine for unswizzling (since we just need the right component - // sizes), but could be a bug if we used this function in different ways. - return Tegra::Texture::TextureFormat::A8R8G8B8; - case PixelFormat::RGBA32F: - return Tegra::Texture::TextureFormat::R32_G32_B32_A32; - case PixelFormat::RGB32F: - return Tegra::Texture::TextureFormat::R32_G32_B32; - case PixelFormat::RG32F: - return Tegra::Texture::TextureFormat::R32_G32; - case PixelFormat::R32F: - return Tegra::Texture::TextureFormat::R32; - case PixelFormat::R16F: - case PixelFormat::R16UNORM: - return Tegra::Texture::TextureFormat::R16; - case PixelFormat::Z32F: - return Tegra::Texture::TextureFormat::ZF32; - case PixelFormat::Z24S8: - return Tegra::Texture::TextureFormat::Z24S8; - case PixelFormat::RG16F: - case PixelFormat::RG16: - case PixelFormat::RG16UI: - case PixelFormat::RG16I: - case PixelFormat::RG16S: - return Tegra::Texture::TextureFormat::R16_G16; - default: - LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); - UNREACHABLE(); - } - } - - static Tegra::DepthFormat DepthFormatFromPixelFormat(PixelFormat format) { - switch (format) { - case PixelFormat::S8Z24: - return Tegra::DepthFormat::S8_Z24_UNORM; - case PixelFormat::Z24S8: - return Tegra::DepthFormat::Z24_S8_UNORM; - case PixelFormat::Z32F: - return Tegra::DepthFormat::Z32_FLOAT; - case PixelFormat::Z16: - return Tegra::DepthFormat::Z16_UNORM; - case PixelFormat::Z32FS8: - return Tegra::DepthFormat::Z32_S8_X24_FLOAT; - default: - LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); - UNREACHABLE(); - } - } - static ComponentType ComponentTypeFromTexture(Tegra::Texture::ComponentType type) { // TODO(Subv): Implement more component types switch (type) { @@ -441,6 +376,7 @@ struct SurfaceParams { case Tegra::RenderTargetFormat::RGB10_A2_UNORM: case Tegra::RenderTargetFormat::R8_UNORM: case Tegra::RenderTargetFormat::RG16_UNORM: + case Tegra::RenderTargetFormat::B5G6R5_UNORM: return ComponentType::UNorm; case Tegra::RenderTargetFormat::RG16_SNORM: return ComponentType::SNorm; @@ -612,8 +548,7 @@ public: Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config); /// Get the color and depth surfaces based on the framebuffer configuration - SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb, - const MathUtil::Rectangle<s32>& viewport); + SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb); /// Flushes the surface to Switch memory void FlushSurface(const Surface& surface); diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index e3217db81..32f06f409 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -507,6 +507,8 @@ private: /// Build the GLSL register list. void BuildRegisterList() { + regs.reserve(Register::NumRegisters); + for (size_t index = 0; index < Register::NumRegisters; ++index) { regs.emplace_back(index, suffix); } @@ -523,6 +525,11 @@ private: // shader. ASSERT(stage == Maxwell3D::Regs::ShaderStage::Vertex); return "vec4(0, 0, uintBitsToFloat(gl_InstanceID), uintBitsToFloat(gl_VertexID))"; + case Attribute::Index::Unknown_63: + // TODO(bunnei): Figure out what this is used for. Super Mario Odyssey uses this. + LOG_CRITICAL(HW_GPU, "Unhandled input attribute Unknown_63"); + UNREACHABLE(); + break; default: const u32 index{static_cast<u32>(attribute) - static_cast<u32>(Attribute::Index::Attribute_0)}; @@ -534,6 +541,8 @@ private: LOG_CRITICAL(HW_GPU, "Unhandled input attribute: {}", index); UNREACHABLE(); } + + return "vec4(0, 0, 0, 0)"; } /// Generates code representing an output attribute register. @@ -602,12 +611,12 @@ private: /// Generates code representing a 19-bit immediate value static std::string GetImmediate19(const Instruction& instr) { - return std::to_string(instr.alu.GetImm20_19()); + return fmt::format("uintBitsToFloat({})", instr.alu.GetImm20_19()); } /// Generates code representing a 32-bit immediate value static std::string GetImmediate32(const Instruction& instr) { - return std::to_string(instr.alu.GetImm20_32()); + return fmt::format("uintBitsToFloat({})", instr.alu.GetImm20_32()); } /// Generates code representing a texture sampler. @@ -650,16 +659,17 @@ private: * @param instr Instruction to generate the if condition for. * @returns string containing the predicate condition. */ - std::string GetPredicateCondition(u64 index, bool negate) const { + std::string GetPredicateCondition(u64 index, bool negate) { using Tegra::Shader::Pred; std::string variable; // Index 7 is used as an 'Always True' condition. - if (index == static_cast<u64>(Pred::UnusedIndex)) + if (index == static_cast<u64>(Pred::UnusedIndex)) { variable = "true"; - else + } else { variable = 'p' + std::to_string(index) + '_' + suffix; - + declr_predicates.insert(variable); + } if (negate) { return "!(" + variable + ')'; } diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index 24b1d956b..5c7b636e4 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -7,6 +7,10 @@ #include <array> #include <glad/glad.h> +#include "video_core/engines/maxwell_3d.h" + +using Regs = Tegra::Engines::Maxwell3D::Regs; + namespace TextureUnits { struct TextureUnit { @@ -120,7 +124,7 @@ public: GLuint bindpoint; GLuint ssbo; }; - std::array<std::array<ConstBufferConfig, 16>, 5> const_buffers{}; + std::array<std::array<ConstBufferConfig, Regs::MaxConstBuffers>, 5> const_buffers; } draw; struct { diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index 16b1bd606..c439446b1 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -27,9 +27,11 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { case Maxwell::VertexAttribute::Type::UnsignedNorm: { switch (attrib.size) { + case Maxwell::VertexAttribute::Size::Size_8_8: case Maxwell::VertexAttribute::Size::Size_8_8_8_8: return GL_UNSIGNED_BYTE; case Maxwell::VertexAttribute::Size::Size_16_16: + case Maxwell::VertexAttribute::Size::Size_16_16_16_16: return GL_UNSIGNED_SHORT; case Maxwell::VertexAttribute::Size::Size_10_10_10_2: return GL_UNSIGNED_INT_2_10_10_10_REV; @@ -43,6 +45,9 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { case Maxwell::VertexAttribute::Type::SignedNorm: { switch (attrib.size) { + case Maxwell::VertexAttribute::Size::Size_32_32_32: + return GL_INT; + case Maxwell::VertexAttribute::Size::Size_8_8: case Maxwell::VertexAttribute::Size::Size_8_8_8_8: return GL_BYTE; case Maxwell::VertexAttribute::Size::Size_16_16: @@ -84,6 +89,8 @@ inline GLenum IndexFormat(Maxwell::IndexFormat index_format) { inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) { switch (topology) { + case Maxwell::PrimitiveTopology::Points: + return GL_POINTS; case Maxwell::PrimitiveTopology::Triangles: return GL_TRIANGLES; case Maxwell::PrimitiveTopology::TriangleStrip: diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index bf9131193..899865e3b 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -430,7 +430,7 @@ static void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum break; case GL_DEBUG_SEVERITY_NOTIFICATION: case GL_DEBUG_SEVERITY_LOW: - LOG_DEBUG(Render_OpenGL, format, str_source, str_type, id, message); + LOG_TRACE(Render_OpenGL, format, str_source, str_type, id, message); break; } } diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 65db84ad3..70746a34e 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp @@ -54,6 +54,7 @@ u32 BytesPerPixel(TextureFormat format) { return 8; case TextureFormat::DXT23: case TextureFormat::DXT45: + case TextureFormat::DXN2: case TextureFormat::BC7U: // In this case a 'pixel' actually refers to a 4x4 tile. return 16; @@ -85,87 +86,11 @@ u32 BytesPerPixel(TextureFormat format) { } } -static u32 DepthBytesPerPixel(DepthFormat format) { - switch (format) { - case DepthFormat::Z16_UNORM: - return 2; - case DepthFormat::S8_Z24_UNORM: - case DepthFormat::Z24_S8_UNORM: - case DepthFormat::Z32_FLOAT: - return 4; - case DepthFormat::Z32_S8_X24_FLOAT: - return 8; - default: - UNIMPLEMENTED_MSG("Format not implemented"); - break; - } -} - -std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height, - u32 block_height) { - u8* data = Memory::GetPointer(address); - u32 bytes_per_pixel = BytesPerPixel(format); - +std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size, u32 bytes_per_pixel, u32 width, + u32 height, u32 block_height) { std::vector<u8> unswizzled_data(width * height * bytes_per_pixel); - - switch (format) { - case TextureFormat::DXT1: - case TextureFormat::DXT23: - case TextureFormat::DXT45: - case TextureFormat::DXN1: - case TextureFormat::BC7U: - // In the DXT and DXN formats, each 4x4 tile is swizzled instead of just individual pixel - // values. - CopySwizzledData(width / 4, height / 4, bytes_per_pixel, bytes_per_pixel, data, - unswizzled_data.data(), true, block_height); - break; - case TextureFormat::A8R8G8B8: - case TextureFormat::A2B10G10R10: - case TextureFormat::A1B5G5R5: - case TextureFormat::B5G6R5: - case TextureFormat::R8: - case TextureFormat::G8R8: - case TextureFormat::R16_G16_B16_A16: - case TextureFormat::R32_G32_B32_A32: - case TextureFormat::R32_G32: - case TextureFormat::R32: - case TextureFormat::R16: - case TextureFormat::R16_G16: - case TextureFormat::BF10GF11RF11: - case TextureFormat::ASTC_2D_4X4: - case TextureFormat::R32_G32_B32: - CopySwizzledData(width, height, bytes_per_pixel, bytes_per_pixel, data, - unswizzled_data.data(), true, block_height); - break; - default: - UNIMPLEMENTED_MSG("Format not implemented"); - break; - } - - return unswizzled_data; -} - -std::vector<u8> UnswizzleDepthTexture(VAddr address, DepthFormat format, u32 width, u32 height, - u32 block_height) { - u8* data = Memory::GetPointer(address); - u32 bytes_per_pixel = DepthBytesPerPixel(format); - - std::vector<u8> unswizzled_data(width * height * bytes_per_pixel); - - switch (format) { - case DepthFormat::Z16_UNORM: - case DepthFormat::S8_Z24_UNORM: - case DepthFormat::Z24_S8_UNORM: - case DepthFormat::Z32_FLOAT: - case DepthFormat::Z32_S8_X24_FLOAT: - CopySwizzledData(width, height, bytes_per_pixel, bytes_per_pixel, data, - unswizzled_data.data(), true, block_height); - break; - default: - UNIMPLEMENTED_MSG("Format not implemented"); - break; - } - + CopySwizzledData(width / tile_size, height / tile_size, bytes_per_pixel, bytes_per_pixel, + Memory::GetPointer(address), unswizzled_data.data(), true, block_height); return unswizzled_data; } @@ -179,6 +104,7 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat case TextureFormat::DXT23: case TextureFormat::DXT45: case TextureFormat::DXN1: + case TextureFormat::DXN2: case TextureFormat::BC7U: case TextureFormat::ASTC_2D_4X4: case TextureFormat::A8R8G8B8: diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h index 73a4924d1..1f7b731be 100644 --- a/src/video_core/textures/decoders.h +++ b/src/video_core/textures/decoders.h @@ -13,8 +13,8 @@ namespace Tegra::Texture { /** * Unswizzles a swizzled texture without changing its format. */ -std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height, - u32 block_height = TICEntry::DefaultBlockHeight); +std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size, u32 bytes_per_pixel, u32 width, + u32 height, u32 block_height = TICEntry::DefaultBlockHeight); /** * Unswizzles a swizzled depth texture without changing its format. |