diff options
Diffstat (limited to '')
-rw-r--r-- | src/core/hle/service/nvflinger/nvflinger.cpp | 3 | ||||
-rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 5 | ||||
-rw-r--r-- | src/video_core/engines/maxwell_3d.h | 11 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 5 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 166 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.h | 117 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_gen.cpp | 10 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_manager.cpp | 8 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_manager.h | 5 | ||||
-rw-r--r-- | src/video_core/textures/decoders.cpp | 23 | ||||
-rw-r--r-- | src/video_core/textures/decoders.h | 3 | ||||
-rw-r--r-- | src/video_core/textures/texture.h | 25 | ||||
-rw-r--r-- | src/yuzu/main.cpp | 9 | ||||
-rw-r--r-- | src/yuzu_cmd/yuzu.cpp | 9 |
14 files changed, 281 insertions, 118 deletions
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp index 0d30f54dc..ee1bf0404 100644 --- a/src/core/hle/service/nvflinger/nvflinger.cpp +++ b/src/core/hle/service/nvflinger/nvflinger.cpp @@ -5,6 +5,7 @@ #include <algorithm> #include "common/alignment.h" +#include "common/microprofile.h" #include "common/scope_exit.h" #include "core/core.h" #include "core/core_timing.h" @@ -128,6 +129,8 @@ void NVFlinger::Compose() { // Search for a queued buffer and acquire it auto buffer = buffer_queue->AcquireBuffer(); + MicroProfileFlip(); + if (buffer == boost::none) { // There was no queued buffer to draw, render previous frame Core::System::GetInstance().perf_stats.EndGameFrame(); diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index a2f162602..2a3ff234a 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -218,8 +218,9 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { Texture::TICEntry tic_entry; Memory::ReadBlock(tic_address_cpu, &tic_entry, sizeof(Texture::TICEntry)); - ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear, - "TIC versions other than BlockLinear are unimplemented"); + ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear || + tic_entry.header_version == Texture::TICHeaderVersion::Pitch, + "TIC versions other than BlockLinear or Pitch are unimplemented"); ASSERT_MSG((tic_entry.texture_type == Texture::TextureType::Texture2D) || (tic_entry.texture_type == Texture::TextureType::Texture2DNoMipmap), diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index b379d8057..d4fcedace 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -319,7 +319,15 @@ public: } } rt[NumRenderTargets]; - INSERT_PADDING_WORDS(0x80); + struct { + f32 scale_x; + f32 scale_y; + f32 scale_z; + u32 translate_x; + u32 translate_y; + u32 translate_z; + INSERT_PADDING_WORDS(2); + } viewport_transform[NumViewports]; struct { union { @@ -649,6 +657,7 @@ private: "Field " #field_name " has invalid position") ASSERT_REG_POSITION(rt, 0x200); +ASSERT_REG_POSITION(viewport_transform[0], 0x280); ASSERT_REG_POSITION(viewport, 0x300); ASSERT_REG_POSITION(vertex_buffer, 0x35D); ASSERT_REG_POSITION(zeta, 0x3F8); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 7b6240e65..13e2a77ce 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -523,9 +523,12 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& framebu src_params.width = std::min(framebuffer.width, pixel_stride); src_params.height = framebuffer.height; src_params.stride = pixel_stride; - src_params.is_tiled = false; + src_params.is_tiled = true; + src_params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight; src_params.pixel_format = SurfaceParams::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format); + src_params.component_type = + SurfaceParams::ComponentTypeFromGPUPixelFormat(framebuffer.pixel_format); src_params.UpdateParams(); MathUtil::Rectangle<u32> src_rect; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 213b20a21..561c6913d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -36,6 +36,7 @@ using SurfaceType = SurfaceParams::SurfaceType; using PixelFormat = SurfaceParams::PixelFormat; +using ComponentType = SurfaceParams::ComponentType; struct FormatTuple { GLint internal_format; @@ -47,26 +48,24 @@ struct FormatTuple { u32 compression_factor; }; -static constexpr std::array<FormatTuple, 1> fb_format_tuples = {{ - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, false, 1}, // RGBA8 +static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_format_tuples = {{ + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false, 1}, // ABGR8 + {GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, false, 1}, // B5G6R5 + {GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true, 16}, // DXT1 + {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true, 16}, // DXT23 + {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true, 16}, // DXT45 }}; -static constexpr std::array<FormatTuple, 2> tex_format_tuples = {{ - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, false, 1}, // RGBA8 - {GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true, 16}, // DXT1 -}}; - -static const FormatTuple& GetFormatTuple(PixelFormat pixel_format) { +static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) { const SurfaceType type = SurfaceParams::GetFormatType(pixel_format); - if (type == SurfaceType::Color) { - ASSERT(static_cast<size_t>(pixel_format) < fb_format_tuples.size()); - return fb_format_tuples[static_cast<unsigned int>(pixel_format)]; + if (type == SurfaceType::ColorTexture) { + ASSERT(static_cast<size_t>(pixel_format) < tex_format_tuples.size()); + // For now only UNORM components are supported + ASSERT(component_type == ComponentType::UNorm); + return tex_format_tuples[static_cast<unsigned int>(pixel_format)]; } else if (type == SurfaceType::Depth || type == SurfaceType::DepthStencil) { // TODO(Subv): Implement depth formats ASSERT_MSG(false, "Unimplemented"); - } else if (type == SurfaceType::Texture) { - ASSERT(static_cast<size_t>(pixel_format) < tex_format_tuples.size()); - return tex_format_tuples[static_cast<unsigned int>(pixel_format)]; } UNREACHABLE(); @@ -85,56 +84,42 @@ static u16 GetResolutionScaleFactor() { } template <bool morton_to_gl, PixelFormat format> -static void MortonCopyTile(u32 stride, u8* tile_buffer, u8* gl_buffer) { +void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, VAddr base, VAddr start, + VAddr end) { constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / 8; constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); - for (u32 y = 0; y < 8; ++y) { - for (u32 x = 0; x < 8; ++x) { - u8* tile_ptr = tile_buffer + VideoCore::MortonInterleave(x, y) * bytes_per_pixel; - u8* gl_ptr = gl_buffer + ((7 - y) * stride + x) * gl_bytes_per_pixel; - if (morton_to_gl) { - std::memcpy(gl_ptr, tile_ptr, bytes_per_pixel); - } else { - std::memcpy(tile_ptr, gl_ptr, bytes_per_pixel); - } - } - } -} - -template <bool morton_to_gl, PixelFormat format> -void MortonCopy(u32 stride, u32 height, u8* gl_buffer, VAddr base, VAddr start, VAddr end) { - constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / 8; - constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); - - // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check the - // configuration for this and perform more generic un/swizzle - LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!"); - VideoCore::MortonCopyPixels128(stride, height, bytes_per_pixel, gl_bytes_per_pixel, - Memory::GetPointer(base), gl_buffer, morton_to_gl); -} -template <> -void MortonCopy<true, PixelFormat::DXT1>(u32 stride, u32 height, u8* gl_buffer, VAddr base, - VAddr start, VAddr end) { - constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(PixelFormat::DXT1) / 8; - constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(PixelFormat::DXT1); - - // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check the - // configuration for this and perform more generic un/swizzle - LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!"); - auto data = - Tegra::Texture::UnswizzleTexture(base, Tegra::Texture::TextureFormat::DXT1, stride, height); - std::memcpy(gl_buffer, data.data(), data.size()); + if (morton_to_gl) { + auto data = Tegra::Texture::UnswizzleTexture( + base, SurfaceParams::TextureFormatFromPixelFormat(format), stride, height, + block_height); + std::memcpy(gl_buffer, data.data(), data.size()); + } else { + // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check + // the configuration for this and perform more generic un/swizzle + LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!"); + VideoCore::MortonCopyPixels128(stride, height, bytes_per_pixel, gl_bytes_per_pixel, + Memory::GetPointer(base), gl_buffer, morton_to_gl); + } } -static constexpr std::array<void (*)(u32, u32, u8*, VAddr, VAddr, VAddr), 2> morton_to_gl_fns = { - MortonCopy<true, PixelFormat::RGBA8>, - MortonCopy<true, PixelFormat::DXT1>, +static constexpr std::array<void (*)(u32, u32, u32, u8*, VAddr, VAddr, VAddr), + SurfaceParams::MaxPixelFormat> + morton_to_gl_fns = { + MortonCopy<true, PixelFormat::ABGR8>, MortonCopy<true, PixelFormat::B5G6R5>, + MortonCopy<true, PixelFormat::DXT1>, MortonCopy<true, PixelFormat::DXT23>, + MortonCopy<true, PixelFormat::DXT45>, }; -static constexpr std::array<void (*)(u32, u32, u8*, VAddr, VAddr, VAddr), 2> gl_to_morton_fns = { - MortonCopy<false, PixelFormat::RGBA8>, - MortonCopy<false, PixelFormat::DXT1>, +static constexpr std::array<void (*)(u32, u32, u32, u8*, VAddr, VAddr, VAddr), + SurfaceParams::MaxPixelFormat> + gl_to_morton_fns = { + MortonCopy<false, PixelFormat::ABGR8>, + MortonCopy<false, PixelFormat::B5G6R5>, + // TODO(Subv): Swizzling the DXT1/DXT23/DXT45 formats is not yet supported + nullptr, + nullptr, + nullptr, }; // Allocate an uninitialized texture of appropriate size and format for the surface @@ -183,7 +168,7 @@ static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rec u32 buffers = 0; - if (type == SurfaceType::Color || type == SurfaceType::Texture) { + if (type == SurfaceType::ColorTexture) { glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, src_tex, 0); glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, @@ -311,15 +296,18 @@ MathUtil::Rectangle<u32> SurfaceParams::GetScaledSubRect(const SurfaceParams& su bool SurfaceParams::ExactMatch(const SurfaceParams& other_surface) const { return std::tie(other_surface.addr, other_surface.width, other_surface.height, - other_surface.stride, other_surface.pixel_format, other_surface.is_tiled) == - std::tie(addr, width, height, stride, pixel_format, is_tiled) && + other_surface.stride, other_surface.block_height, other_surface.pixel_format, + other_surface.component_type, + other_surface.is_tiled) == std::tie(addr, width, height, stride, block_height, + pixel_format, component_type, is_tiled) && pixel_format != PixelFormat::Invalid; } bool SurfaceParams::CanSubRect(const SurfaceParams& sub_surface) const { return sub_surface.addr >= addr && sub_surface.end <= end && sub_surface.pixel_format == pixel_format && pixel_format != PixelFormat::Invalid && - sub_surface.is_tiled == is_tiled && + sub_surface.is_tiled == is_tiled && sub_surface.block_height == block_height && + sub_surface.component_type == component_type && (sub_surface.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0 && (sub_surface.stride == stride || sub_surface.height <= (is_tiled ? 8u : 1u)) && GetSubRect(sub_surface).left + sub_surface.width <= stride; @@ -328,7 +316,8 @@ bool SurfaceParams::CanSubRect(const SurfaceParams& sub_surface) const { bool SurfaceParams::CanExpand(const SurfaceParams& expanded_surface) const { return pixel_format != PixelFormat::Invalid && pixel_format == expanded_surface.pixel_format && addr <= expanded_surface.end && expanded_surface.addr <= end && - is_tiled == expanded_surface.is_tiled && stride == expanded_surface.stride && + is_tiled == expanded_surface.is_tiled && block_height == expanded_surface.block_height && + component_type == expanded_surface.component_type && stride == expanded_surface.stride && (std::max(expanded_surface.addr, addr) - std::min(expanded_surface.addr, addr)) % BytesInPixels(stride * (is_tiled ? 8 : 1)) == 0; @@ -339,6 +328,10 @@ bool SurfaceParams::CanTexCopy(const SurfaceParams& texcopy_params) const { end < texcopy_params.end) { return false; } + if (texcopy_params.block_height != block_height || + texcopy_params.component_type != component_type) + return false; + if (texcopy_params.width != texcopy_params.stride) { const u32 tile_stride = static_cast<u32>(BytesInPixels(stride * (is_tiled ? 8 : 1))); return (texcopy_params.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0 && @@ -481,18 +474,13 @@ void CachedSurface::LoadGLBuffer(VAddr load_start, VAddr load_end) { const u64 start_offset = load_start - addr; if (!is_tiled) { - ASSERT(type == SurfaceType::Color); const u32 bytes_per_pixel{GetFormatBpp() >> 3}; - // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check - // the configuration for this and perform more generic un/swizzle - LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!"); - VideoCore::MortonCopyPixels128(width, height, bytes_per_pixel, 4, - texture_src_data + start_offset, &gl_buffer[start_offset], - true); + std::memcpy(&gl_buffer[start_offset], texture_src_data + start_offset, + bytes_per_pixel * width * height); } else { - morton_to_gl_fns[static_cast<size_t>(pixel_format)](stride, height, &gl_buffer[0], addr, - load_start, load_end); + morton_to_gl_fns[static_cast<size_t>(pixel_format)]( + stride, block_height, height, &gl_buffer[0], addr, load_start, load_end); } } @@ -533,11 +521,10 @@ void CachedSurface::FlushGLBuffer(VAddr flush_start, VAddr flush_end) { if (backup_bytes) std::memcpy(&dst_buffer[coarse_start_offset], &backup_data[0], backup_bytes); } else if (!is_tiled) { - ASSERT(type == SurfaceType::Color); std::memcpy(dst_buffer + start_offset, &gl_buffer[start_offset], flush_end - flush_start); } else { - gl_to_morton_fns[static_cast<size_t>(pixel_format)](stride, height, &gl_buffer[0], addr, - flush_start, flush_end); + gl_to_morton_fns[static_cast<size_t>(pixel_format)]( + stride, block_height, height, &gl_buffer[0], addr, flush_start, flush_end); } } @@ -556,7 +543,7 @@ void CachedSurface::UploadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint GLint y0 = static_cast<GLint>(rect.bottom); size_t buffer_offset = (y0 * stride + x0) * GetGLBytesPerPixel(pixel_format); - const FormatTuple& tuple = GetFormatTuple(pixel_format); + const FormatTuple& tuple = GetFormatTuple(pixel_format, component_type); GLuint target_tex = texture.handle; // If not 1x scale, create 1x texture that we will blit from to replace texture subrect in @@ -629,7 +616,7 @@ void CachedSurface::DownloadGLTexture(const MathUtil::Rectangle<u32>& rect, GLui OpenGLState prev_state = state; SCOPE_EXIT({ prev_state.Apply(); }); - const FormatTuple& tuple = GetFormatTuple(pixel_format); + const FormatTuple& tuple = GetFormatTuple(pixel_format, component_type); // Ensure no bad interactions with GL_PACK_ALIGNMENT ASSERT(stride * GetGLBytesPerPixel(pixel_format) % 4 == 0); @@ -662,7 +649,7 @@ void CachedSurface::DownloadGLTexture(const MathUtil::Rectangle<u32>& rect, GLui state.draw.read_framebuffer = read_fb_handle; state.Apply(); - if (type == SurfaceType::Color || type == SurfaceType::Texture) { + if (type == SurfaceType::ColorTexture) { glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, texture.handle, 0); glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, @@ -1041,9 +1028,25 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextu params.height = config.tic.Height(); params.is_tiled = config.tic.IsTiled(); params.pixel_format = SurfaceParams::PixelFormatFromTextureFormat(config.tic.format); + + // TODO(Subv): Different types per component are not supported. + ASSERT(config.tic.r_type.Value() == config.tic.g_type.Value() && + config.tic.r_type.Value() == config.tic.b_type.Value() && + config.tic.r_type.Value() == config.tic.a_type.Value()); + + params.component_type = SurfaceParams::ComponentTypeFromTexture(config.tic.r_type.Value()); + + if (config.tic.IsTiled()) { + params.block_height = config.tic.BlockHeight(); + } else { + // Use the texture-provided stride value if the texture isn't tiled. + params.stride = params.PixelsInBytes(config.tic.Pitch()); + } + params.UpdateParams(); - if (config.tic.Width() % 8 != 0 || config.tic.Height() % 8 != 0) { + if (config.tic.Width() % 8 != 0 || config.tic.Height() % 8 != 0 || + params.stride != params.width) { Surface src_surface; MathUtil::Rectangle<u32> rect; std::tie(src_surface, rect) = GetSurfaceSubRect(params, ScaleMatch::Ignore, true); @@ -1094,10 +1097,13 @@ SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( color_params.res_scale = resolution_scale_factor; color_params.width = config.width; color_params.height = config.height; + // TODO(Subv): Can framebuffers use a different block height? + color_params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight; SurfaceParams depth_params = color_params; color_params.addr = memory_manager->PhysicalToVirtualAddress(config.Address()); color_params.pixel_format = SurfaceParams::PixelFormatFromRenderTargetFormat(config.format); + color_params.component_type = SurfaceParams::ComponentTypeFromRenderTarget(config.format); color_params.UpdateParams(); ASSERT_MSG(!using_depth_fb, "depth buffer is unimplemented"); @@ -1293,7 +1299,6 @@ void RasterizerCacheOpenGL::InvalidateRegion(VAddr addr, u64 size, const Surface const SurfaceInterval invalid_interval(addr, addr + size); if (region_owner != nullptr) { - ASSERT(region_owner->type != SurfaceType::Texture); ASSERT(addr >= region_owner->addr && addr + size <= region_owner->end); // Surfaces can't have a gap ASSERT(region_owner->width == region_owner->stride); @@ -1355,7 +1360,8 @@ Surface RasterizerCacheOpenGL::CreateSurface(const SurfaceParams& params) { surface->gl_buffer_size = 0; surface->invalid_regions.insert(surface->GetInterval()); - AllocateSurfaceTexture(surface->texture.handle, GetFormatTuple(surface->pixel_format), + AllocateSurfaceTexture(surface->texture.handle, + GetFormatTuple(surface->pixel_format, surface->component_type), surface->GetScaledWidth(), surface->GetScaledHeight()); return surface; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 3293905d6..6861efe16 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -52,27 +52,45 @@ enum class ScaleMatch { struct SurfaceParams { enum class PixelFormat { - RGBA8 = 0, - DXT1 = 1, + ABGR8 = 0, + B5G6R5 = 1, + DXT1 = 2, + DXT23 = 3, + DXT45 = 4, + + Max, Invalid = 255, }; + static constexpr size_t MaxPixelFormat = static_cast<size_t>(PixelFormat::Max); + + enum class ComponentType { + Invalid = 0, + SNorm = 1, + UNorm = 2, + SInt = 3, + UInt = 4, + Float = 5, + }; + enum class SurfaceType { - Color = 0, - Texture = 1, - Depth = 2, - DepthStencil = 3, - Fill = 4, - Invalid = 5 + ColorTexture = 0, + Depth = 1, + DepthStencil = 2, + Fill = 3, + Invalid = 4, }; static constexpr unsigned int GetFormatBpp(PixelFormat format) { if (format == PixelFormat::Invalid) return 0; - constexpr std::array<unsigned int, 2> bpp_table = { - 32, // RGBA8 - 64, // DXT1 + constexpr std::array<unsigned int, MaxPixelFormat> bpp_table = { + 32, // ABGR8 + 16, // B5G6R5 + 64, // DXT1 + 128, // DXT23 + 128, // DXT45 }; ASSERT(static_cast<size_t>(format) < bpp_table.size()); @@ -85,7 +103,7 @@ struct SurfaceParams { static PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) { switch (format) { case Tegra::RenderTargetFormat::RGBA8_UNORM: - return PixelFormat::RGBA8; + return PixelFormat::ABGR8; default: NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); UNREACHABLE(); @@ -95,7 +113,7 @@ struct SurfaceParams { static PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format) { switch (format) { case Tegra::FramebufferConfig::PixelFormat::ABGR8: - return PixelFormat::RGBA8; + return PixelFormat::ABGR8; default: NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); UNREACHABLE(); @@ -106,9 +124,67 @@ struct SurfaceParams { // TODO(Subv): Properly implement this switch (format) { case Tegra::Texture::TextureFormat::A8R8G8B8: - return PixelFormat::RGBA8; + return PixelFormat::ABGR8; + case Tegra::Texture::TextureFormat::B5G6R5: + return PixelFormat::B5G6R5; case Tegra::Texture::TextureFormat::DXT1: return PixelFormat::DXT1; + case Tegra::Texture::TextureFormat::DXT23: + return PixelFormat::DXT23; + case Tegra::Texture::TextureFormat::DXT45: + return PixelFormat::DXT45; + default: + NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); + UNREACHABLE(); + } + } + + static Tegra::Texture::TextureFormat TextureFormatFromPixelFormat(PixelFormat format) { + // TODO(Subv): Properly implement this + switch (format) { + case PixelFormat::ABGR8: + return Tegra::Texture::TextureFormat::A8R8G8B8; + case PixelFormat::B5G6R5: + return Tegra::Texture::TextureFormat::B5G6R5; + case PixelFormat::DXT1: + return Tegra::Texture::TextureFormat::DXT1; + case PixelFormat::DXT23: + return Tegra::Texture::TextureFormat::DXT23; + case PixelFormat::DXT45: + return Tegra::Texture::TextureFormat::DXT45; + default: + UNREACHABLE(); + } + } + + static ComponentType ComponentTypeFromTexture(Tegra::Texture::ComponentType type) { + // TODO(Subv): Implement more component types + switch (type) { + case Tegra::Texture::ComponentType::UNORM: + return ComponentType::UNorm; + default: + NGLOG_CRITICAL(HW_GPU, "Unimplemented component type={}", static_cast<u32>(type)); + UNREACHABLE(); + } + } + + static ComponentType ComponentTypeFromRenderTarget(Tegra::RenderTargetFormat format) { + // TODO(Subv): Implement more render targets + switch (format) { + case Tegra::RenderTargetFormat::RGBA8_UNORM: + case Tegra::RenderTargetFormat::RGB10_A2_UNORM: + return ComponentType::UNorm; + default: + NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); + UNREACHABLE(); + } + } + + static ComponentType ComponentTypeFromGPUPixelFormat( + Tegra::FramebufferConfig::PixelFormat format) { + switch (format) { + case Tegra::FramebufferConfig::PixelFormat::ABGR8: + return ComponentType::UNorm; default: NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); UNREACHABLE(); @@ -119,8 +195,7 @@ struct SurfaceParams { SurfaceType a_type = GetFormatType(pixel_format_a); SurfaceType b_type = GetFormatType(pixel_format_b); - if ((a_type == SurfaceType::Color || a_type == SurfaceType::Texture) && - (b_type == SurfaceType::Color || b_type == SurfaceType::Texture)) { + if (a_type == SurfaceType::ColorTexture && b_type == SurfaceType::ColorTexture) { return true; } @@ -136,12 +211,8 @@ struct SurfaceParams { } static SurfaceType GetFormatType(PixelFormat pixel_format) { - if ((unsigned int)pixel_format <= static_cast<unsigned int>(PixelFormat::RGBA8)) { - return SurfaceType::Color; - } - - if ((unsigned int)pixel_format <= static_cast<unsigned int>(PixelFormat::DXT1)) { - return SurfaceType::Texture; + if (static_cast<size_t>(pixel_format) < MaxPixelFormat) { + return SurfaceType::ColorTexture; } // TODO(Subv): Implement the other formats @@ -213,11 +284,13 @@ struct SurfaceParams { u32 width = 0; u32 height = 0; u32 stride = 0; + u32 block_height = 0; u16 res_scale = 1; bool is_tiled = false; PixelFormat pixel_format = PixelFormat::Invalid; SurfaceType type = SurfaceType::Invalid; + ComponentType component_type = ComponentType::Invalid; }; struct CachedSurface : SurfaceParams { diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 8b7f17601..254f6e2c3 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -29,9 +29,15 @@ out gl_PerVertex { out vec4 position; +layout (std140) uniform vs_config { + vec4 viewport_flip; +}; + void main() { exec_shader(); + // Viewport can be flipped, which is unsupported by glViewport + position.xy *= viewport_flip.xy; gl_Position = position; } )"; @@ -52,6 +58,10 @@ ProgramResult GenerateFragmentShader(const ShaderSetup& setup, const MaxwellFSCo in vec4 position; out vec4 color; +layout (std140) uniform fs_config { + vec4 viewport_flip; +}; + uniform sampler2D tex[32]; void main() { diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index 85b838faa..17b3925a0 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -53,6 +53,12 @@ void SetShaderSamplerBindings(GLuint shader) { } // namespace Impl -void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage) {} +void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage) { + const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; + + // TODO(bunnei): Support more than one viewport + viewport_flip[0] = regs.viewport_transform[0].scale_x < 0.0 ? -1.0 : 1.0; + viewport_flip[1] = regs.viewport_transform[0].scale_y < 0.0 ? -1.0 : 1.0; +} } // namespace GLShader diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index be63320e0..e963b4b7e 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -30,10 +30,9 @@ void SetShaderSamplerBindings(GLuint shader); // Not following that rule will cause problems on some AMD drivers. struct MaxwellUniformData { void SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage); - // TODO(Subv): Use this for something. + alignas(16) GLvec4 viewport_flip; }; -// static_assert(sizeof(MaxwellUniformData) == 1024, "MaxwellUniformData structure size is -// incorrect"); +static_assert(sizeof(MaxwellUniformData) == 16, "MaxwellUniformData structure size is incorrect"); static_assert(sizeof(MaxwellUniformData) < 16384, "MaxwellUniformData structure must be less than 16kb as per the OpenGL spec"); diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 2e87281eb..4df687786 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp @@ -48,31 +48,39 @@ u32 BytesPerPixel(TextureFormat format) { case TextureFormat::DXT1: // In this case a 'pixel' actually refers to a 4x4 tile. return 8; + case TextureFormat::DXT23: + case TextureFormat::DXT45: + // In this case a 'pixel' actually refers to a 4x4 tile. + return 16; case TextureFormat::A8R8G8B8: return 4; + case TextureFormat::B5G6R5: + return 2; default: UNIMPLEMENTED_MSG("Format not implemented"); break; } } -std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height) { +std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height, + u32 block_height) { u8* data = Memory::GetPointer(address); u32 bytes_per_pixel = BytesPerPixel(format); - static constexpr u32 DefaultBlockHeight = 16; - std::vector<u8> unswizzled_data(width * height * bytes_per_pixel); switch (format) { case TextureFormat::DXT1: - // In the DXT1 format, each 4x4 tile is swizzled instead of just individual pixel values. + case TextureFormat::DXT23: + case TextureFormat::DXT45: + // In the DXT formats, each 4x4 tile is swizzled instead of just individual pixel values. CopySwizzledData(width / 4, height / 4, bytes_per_pixel, bytes_per_pixel, data, - unswizzled_data.data(), true, DefaultBlockHeight); + unswizzled_data.data(), true, block_height); break; case TextureFormat::A8R8G8B8: + case TextureFormat::B5G6R5: CopySwizzledData(width, height, bytes_per_pixel, bytes_per_pixel, data, - unswizzled_data.data(), true, DefaultBlockHeight); + unswizzled_data.data(), true, block_height); break; default: UNIMPLEMENTED_MSG("Format not implemented"); @@ -89,7 +97,10 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat // TODO(Subv): Implement. switch (format) { case TextureFormat::DXT1: + case TextureFormat::DXT23: + case TextureFormat::DXT45: case TextureFormat::A8R8G8B8: + case TextureFormat::B5G6R5: // TODO(Subv): For the time being just forward the same data without any decoding. rgba_data = texture_data; break; diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h index 0c21694ff..a700911cf 100644 --- a/src/video_core/textures/decoders.h +++ b/src/video_core/textures/decoders.h @@ -14,7 +14,8 @@ namespace Texture { /** * Unswizzles a swizzled texture without changing its format. */ -std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height); +std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height, + u32 block_height = TICEntry::DefaultBlockHeight); /** * Decodes an unswizzled texture into a A8R8G8B8 texture. diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h index 9d443ea90..86e45aa88 100644 --- a/src/video_core/textures/texture.h +++ b/src/video_core/textures/texture.h @@ -4,6 +4,7 @@ #pragma once +#include "common/assert.h" #include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" @@ -14,6 +15,7 @@ namespace Texture { enum class TextureFormat : u32 { A8R8G8B8 = 0x8, + B5G6R5 = 0x15, DXT1 = 0x24, DXT23 = 0x25, DXT45 = 0x26, @@ -57,6 +59,8 @@ union TextureHandle { static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size"); struct TICEntry { + static constexpr u32 DefaultBlockHeight = 16; + union { u32 raw; BitField<0, 7, TextureFormat> format; @@ -70,7 +74,12 @@ struct TICEntry { BitField<0, 16, u32> address_high; BitField<21, 3, TICHeaderVersion> header_version; }; - INSERT_PADDING_BYTES(4); + union { + BitField<3, 3, u32> block_height; + + // High 16 bits of the pitch value + BitField<0, 16, u32> pitch_high; + }; union { BitField<0, 16, u32> width_minus_1; BitField<23, 4, TextureType> texture_type; @@ -82,6 +91,13 @@ struct TICEntry { return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low); } + u32 Pitch() const { + ASSERT(header_version == TICHeaderVersion::Pitch || + header_version == TICHeaderVersion::PitchColorKey); + // The pitch value is 21 bits, and is 32B aligned. + return pitch_high << 5; + } + u32 Width() const { return width_minus_1 + 1; } @@ -90,6 +106,13 @@ struct TICEntry { return height_minus_1 + 1; } + u32 BlockHeight() const { + ASSERT(header_version == TICHeaderVersion::BlockLinear || + header_version == TICHeaderVersion::BlockLinearColorKey); + // The block height is stored in log2 format. + return 1 << block_height; + } + bool IsTiled() const { return header_version == TICHeaderVersion::BlockLinear || header_version == TICHeaderVersion::BlockLinearColorKey; diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index 534145f7e..20796e92c 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -44,6 +44,15 @@ Q_IMPORT_PLUGIN(QWindowsIntegrationPlugin); #endif +#ifdef _WIN32 +extern "C" { +// tells Nvidia and AMD drivers to use the dedicated GPU by default on laptops with switchable +// graphics +__declspec(dllexport) unsigned long NvOptimusEnablement = 0x00000001; +__declspec(dllexport) int AmdPowerXpressRequestHighPerformance = 1; +} +#endif + /** * "Callouts" are one-time instructional messages shown to the user. In the config settings, there * is a bitfield "callout_flags" options, used to track if a message has already been shown to the diff --git a/src/yuzu_cmd/yuzu.cpp b/src/yuzu_cmd/yuzu.cpp index 261312f62..a91140447 100644 --- a/src/yuzu_cmd/yuzu.cpp +++ b/src/yuzu_cmd/yuzu.cpp @@ -37,6 +37,15 @@ #include "yuzu_cmd/config.h" #include "yuzu_cmd/emu_window/emu_window_sdl2.h" +#ifdef _WIN32 +extern "C" { +// tells Nvidia and AMD drivers to use the dedicated GPU by default on laptops with switchable +// graphics +__declspec(dllexport) unsigned long NvOptimusEnablement = 0x00000001; +__declspec(dllexport) int AmdPowerXpressRequestHighPerformance = 1; +} +#endif + static void PrintHelp(const char* argv0) { std::cout << "Usage: " << argv0 << " [options] <filename>\n" |