diff options
Diffstat (limited to 'src/video_core')
42 files changed, 1361 insertions, 1290 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index dd7ce8c99..b5dc68902 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -524,11 +524,8 @@ private: void MarkRegionAsWritten(VAddr start, VAddr end) { const u64 page_end = end >> WRITE_PAGE_BIT; for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) { - auto it = written_pages.find(page_start); - if (it != written_pages.end()) { - it->second = it->second + 1; - } else { - written_pages.insert_or_assign(page_start, 1); + if (const auto [it, inserted] = written_pages.emplace(page_start, 1); !inserted) { + ++it->second; } } } @@ -539,7 +536,7 @@ private: auto it = written_pages.find(page_start); if (it != written_pages.end()) { if (it->second > 1) { - it->second = it->second - 1; + --it->second; } else { written_pages.erase(it); } diff --git a/src/video_core/compatible_formats.cpp b/src/video_core/compatible_formats.cpp index 6c426b035..b06c32c84 100644 --- a/src/video_core/compatible_formats.cpp +++ b/src/video_core/compatible_formats.cpp @@ -17,101 +17,94 @@ namespace { // https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_view.txt constexpr std::array VIEW_CLASS_128_BITS = { - PixelFormat::RGBA32F, - PixelFormat::RGBA32UI, + PixelFormat::R32G32B32A32_FLOAT, + PixelFormat::R32G32B32A32_UINT, + PixelFormat::R32G32B32A32_SINT, }; -// Missing formats: -// PixelFormat::RGBA32I constexpr std::array VIEW_CLASS_96_BITS = { - PixelFormat::RGB32F, + PixelFormat::R32G32B32_FLOAT, }; // Missing formats: // PixelFormat::RGB32UI, // PixelFormat::RGB32I, constexpr std::array VIEW_CLASS_64_BITS = { - PixelFormat::RGBA16F, PixelFormat::RG32F, PixelFormat::RGBA16UI, PixelFormat::RG32UI, - PixelFormat::RGBA16U, PixelFormat::RGBA16F, PixelFormat::RGBA16S, + PixelFormat::R32G32_FLOAT, PixelFormat::R32G32_UINT, + PixelFormat::R32G32_SINT, PixelFormat::R16G16B16A16_FLOAT, + PixelFormat::R16G16B16A16_UNORM, PixelFormat::R16G16B16A16_SNORM, + PixelFormat::R16G16B16A16_UINT, PixelFormat::R16G16B16A16_SINT, }; -// Missing formats: -// PixelFormat::RGBA16I -// PixelFormat::RG32I // TODO: How should we handle 48 bits? constexpr std::array VIEW_CLASS_32_BITS = { - PixelFormat::RG16F, PixelFormat::R11FG11FB10F, PixelFormat::R32F, - PixelFormat::A2B10G10R10U, PixelFormat::RG16UI, PixelFormat::R32UI, - PixelFormat::RG16I, PixelFormat::R32I, PixelFormat::ABGR8U, - PixelFormat::RG16, PixelFormat::ABGR8S, PixelFormat::RG16S, - PixelFormat::RGBA8_SRGB, PixelFormat::E5B9G9R9F, PixelFormat::BGRA8, - PixelFormat::BGRA8_SRGB, + PixelFormat::R16G16_FLOAT, PixelFormat::B10G11R11_FLOAT, PixelFormat::R32_FLOAT, + PixelFormat::A2B10G10R10_UNORM, PixelFormat::R16G16_UINT, PixelFormat::R32_UINT, + PixelFormat::R16G16_SINT, PixelFormat::R32_SINT, PixelFormat::A8B8G8R8_UNORM, + PixelFormat::R16G16_UNORM, PixelFormat::A8B8G8R8_SNORM, PixelFormat::R16G16_SNORM, + PixelFormat::A8B8G8R8_SRGB, PixelFormat::E5B9G9R9_FLOAT, PixelFormat::B8G8R8A8_UNORM, + PixelFormat::B8G8R8A8_SRGB, PixelFormat::A8B8G8R8_UINT, PixelFormat::A8B8G8R8_SINT, + PixelFormat::A2B10G10R10_UINT, }; -// Missing formats: -// PixelFormat::RGBA8UI -// PixelFormat::RGBA8I -// PixelFormat::RGB10_A2_UI // TODO: How should we handle 24 bits? constexpr std::array VIEW_CLASS_16_BITS = { - PixelFormat::R16F, PixelFormat::RG8UI, PixelFormat::R16UI, PixelFormat::R16I, - PixelFormat::RG8U, PixelFormat::R16U, PixelFormat::RG8S, PixelFormat::R16S, + PixelFormat::R16_FLOAT, PixelFormat::R8G8_UINT, PixelFormat::R16_UINT, + PixelFormat::R16_SINT, PixelFormat::R8G8_UNORM, PixelFormat::R16_UNORM, + PixelFormat::R8G8_SNORM, PixelFormat::R16_SNORM, PixelFormat::R8G8_SINT, }; -// Missing formats: -// PixelFormat::RG8I constexpr std::array VIEW_CLASS_8_BITS = { - PixelFormat::R8UI, - PixelFormat::R8U, + PixelFormat::R8_UINT, + PixelFormat::R8_UNORM, + PixelFormat::R8_SINT, + PixelFormat::R8_SNORM, }; -// Missing formats: -// PixelFormat::R8I -// PixelFormat::R8S constexpr std::array VIEW_CLASS_RGTC1_RED = { - PixelFormat::DXN1, + PixelFormat::BC4_UNORM, + PixelFormat::BC4_SNORM, }; -// Missing formats: -// COMPRESSED_SIGNED_RED_RGTC1 constexpr std::array VIEW_CLASS_RGTC2_RG = { - PixelFormat::DXN2UNORM, - PixelFormat::DXN2SNORM, + PixelFormat::BC5_UNORM, + PixelFormat::BC5_SNORM, }; constexpr std::array VIEW_CLASS_BPTC_UNORM = { - PixelFormat::BC7U, - PixelFormat::BC7U_SRGB, + PixelFormat::BC7_UNORM, + PixelFormat::BC7_SRGB, }; constexpr std::array VIEW_CLASS_BPTC_FLOAT = { - PixelFormat::BC6H_SF16, - PixelFormat::BC6H_UF16, + PixelFormat::BC6H_SFLOAT, + PixelFormat::BC6H_UFLOAT, }; // Compatibility table taken from Table 4.X.1 in: // https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_copy_image.txt constexpr std::array COPY_CLASS_128_BITS = { - PixelFormat::RGBA32UI, PixelFormat::RGBA32F, PixelFormat::DXT23, - PixelFormat::DXT23_SRGB, PixelFormat::DXT45, PixelFormat::DXT45_SRGB, - PixelFormat::DXN2SNORM, PixelFormat::BC7U, PixelFormat::BC7U_SRGB, - PixelFormat::BC6H_SF16, PixelFormat::BC6H_UF16, + PixelFormat::R32G32B32A32_UINT, PixelFormat::R32G32B32A32_FLOAT, PixelFormat::R32G32B32A32_SINT, + PixelFormat::BC2_UNORM, PixelFormat::BC2_SRGB, PixelFormat::BC3_UNORM, + PixelFormat::BC3_SRGB, PixelFormat::BC5_UNORM, PixelFormat::BC5_SNORM, + PixelFormat::BC7_UNORM, PixelFormat::BC7_SRGB, PixelFormat::BC6H_SFLOAT, + PixelFormat::BC6H_UFLOAT, }; // Missing formats: // PixelFormat::RGBA32I // COMPRESSED_RG_RGTC2 constexpr std::array COPY_CLASS_64_BITS = { - PixelFormat::RGBA16F, PixelFormat::RG32F, PixelFormat::RGBA16UI, PixelFormat::RG32UI, - PixelFormat::RGBA16U, PixelFormat::RGBA16S, PixelFormat::DXT1_SRGB, PixelFormat::DXT1, - + PixelFormat::R16G16B16A16_FLOAT, PixelFormat::R16G16B16A16_UINT, + PixelFormat::R16G16B16A16_UNORM, PixelFormat::R16G16B16A16_SNORM, + PixelFormat::R16G16B16A16_SINT, PixelFormat::R32G32_UINT, + PixelFormat::R32G32_FLOAT, PixelFormat::R32G32_SINT, + PixelFormat::BC1_RGBA_UNORM, PixelFormat::BC1_RGBA_SRGB, }; // Missing formats: -// PixelFormat::RGBA16I -// PixelFormat::RG32I, // COMPRESSED_RGB_S3TC_DXT1_EXT // COMPRESSED_SRGB_S3TC_DXT1_EXT // COMPRESSED_RGBA_S3TC_DXT1_EXT diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 8d04d9fd9..19a34c402 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -40,53 +40,61 @@ namespace Tegra { enum class RenderTargetFormat : u32 { NONE = 0x0, - RGBA32_FLOAT = 0xC0, - RGBA32_UINT = 0xC2, - RGBA16_UNORM = 0xC6, - RGBA16_SNORM = 0xC7, - RGBA16_UINT = 0xC9, - RGBA16_FLOAT = 0xCA, - RG32_FLOAT = 0xCB, - RG32_UINT = 0xCD, - RGBX16_FLOAT = 0xCE, - BGRA8_UNORM = 0xCF, - BGRA8_SRGB = 0xD0, - RGB10_A2_UNORM = 0xD1, - RGBA8_UNORM = 0xD5, - RGBA8_SRGB = 0xD6, - RGBA8_SNORM = 0xD7, - RGBA8_UINT = 0xD9, - RG16_UNORM = 0xDA, - RG16_SNORM = 0xDB, - RG16_SINT = 0xDC, - RG16_UINT = 0xDD, - RG16_FLOAT = 0xDE, - R11G11B10_FLOAT = 0xE0, + R32B32G32A32_FLOAT = 0xC0, + R32G32B32A32_SINT = 0xC1, + R32G32B32A32_UINT = 0xC2, + R16G16B16A16_UNORM = 0xC6, + R16G16B16A16_SNORM = 0xC7, + R16G16B16A16_SINT = 0xC8, + R16G16B16A16_UINT = 0xC9, + R16G16B16A16_FLOAT = 0xCA, + R32G32_FLOAT = 0xCB, + R32G32_SINT = 0xCC, + R32G32_UINT = 0xCD, + R16G16B16X16_FLOAT = 0xCE, + B8G8R8A8_UNORM = 0xCF, + B8G8R8A8_SRGB = 0xD0, + A2B10G10R10_UNORM = 0xD1, + A2B10G10R10_UINT = 0xD2, + A8B8G8R8_UNORM = 0xD5, + A8B8G8R8_SRGB = 0xD6, + A8B8G8R8_SNORM = 0xD7, + A8B8G8R8_SINT = 0xD8, + A8B8G8R8_UINT = 0xD9, + R16G16_UNORM = 0xDA, + R16G16_SNORM = 0xDB, + R16G16_SINT = 0xDC, + R16G16_UINT = 0xDD, + R16G16_FLOAT = 0xDE, + B10G11R11_FLOAT = 0xE0, R32_SINT = 0xE3, R32_UINT = 0xE4, R32_FLOAT = 0xE5, - B5G6R5_UNORM = 0xE8, - BGR5A1_UNORM = 0xE9, - RG8_UNORM = 0xEA, - RG8_SNORM = 0xEB, - RG8_UINT = 0xED, + R5G6B5_UNORM = 0xE8, + A1R5G5B5_UNORM = 0xE9, + R8G8_UNORM = 0xEA, + R8G8_SNORM = 0xEB, + R8G8_SINT = 0xEC, + R8G8_UINT = 0xED, R16_UNORM = 0xEE, R16_SNORM = 0xEF, R16_SINT = 0xF0, R16_UINT = 0xF1, R16_FLOAT = 0xF2, R8_UNORM = 0xF3, + R8_SNORM = 0xF4, + R8_SINT = 0xF5, R8_UINT = 0xF6, }; enum class DepthFormat : u32 { - Z32_FLOAT = 0xA, - Z16_UNORM = 0x13, - S8_Z24_UNORM = 0x14, - Z24_X8_UNORM = 0x15, - Z24_S8_UNORM = 0x16, - Z24_C8_UNORM = 0x18, - Z32_S8_X24_FLOAT = 0x19, + D32_FLOAT = 0xA, + D16_UNORM = 0x13, + S8_UINT_Z24_UNORM = 0x14, + D24X8_UNORM = 0x15, + D24S8_UNORM = 0x16, + D24C8_UNORM = 0x18, + D32_FLOAT_S8X24_UINT = 0x19, }; struct CommandListHeader; @@ -97,9 +105,9 @@ class DebugContext; */ struct FramebufferConfig { enum class PixelFormat : u32 { - ABGR8 = 1, - RGB565 = 4, - BGRA8 = 5, + A8B8G8R8_UNORM = 1, + RGB565_UNORM = 4, + B8G8R8A8_UNORM = 5, }; VAddr address; diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 738c6f0c1..bf761abf2 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -44,9 +44,9 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, dma_pusher.DispatchCalls(); } else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) { renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr); - } else if (const auto data = std::get_if<OnCommandListEndCommand>(&next.data)) { + } else if (std::holds_alternative<OnCommandListEndCommand>(next.data)) { renderer.Rasterizer().ReleaseFences(); - } else if (const auto data = std::get_if<GPUTickCommand>(&next.data)) { + } else if (std::holds_alternative<GPUTickCommand>(next.data)) { system.GPU().TickWork(); } else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) { renderer.Rasterizer().FlushRegion(data->addr, data->size); diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index 07292702f..c1b9e4ad9 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -419,7 +419,6 @@ void Tegra::MacroJITx64Impl::Optimizer_ScanFlags() { void MacroJITx64Impl::Compile() { MICROPROFILE_SCOPE(MacroJitCompile); - bool keep_executing = true; labels.fill(Xbyak::Label()); Common::X64::ABI_PushRegistersAndAdjustStack(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8); diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp index 836b25c1d..9da9fb4ff 100644 --- a/src/video_core/morton.cpp +++ b/src/video_core/morton.cpp @@ -41,146 +41,168 @@ static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth } static constexpr ConversionArray morton_to_linear_fns = { - MortonCopy<true, PixelFormat::ABGR8U>, - MortonCopy<true, PixelFormat::ABGR8S>, - MortonCopy<true, PixelFormat::ABGR8UI>, - MortonCopy<true, PixelFormat::B5G6R5U>, - MortonCopy<true, PixelFormat::A2B10G10R10U>, - MortonCopy<true, PixelFormat::A1B5G5R5U>, - MortonCopy<true, PixelFormat::R8U>, - MortonCopy<true, PixelFormat::R8UI>, - MortonCopy<true, PixelFormat::RGBA16F>, - MortonCopy<true, PixelFormat::RGBA16U>, - MortonCopy<true, PixelFormat::RGBA16S>, - MortonCopy<true, PixelFormat::RGBA16UI>, - MortonCopy<true, PixelFormat::R11FG11FB10F>, - MortonCopy<true, PixelFormat::RGBA32UI>, - MortonCopy<true, PixelFormat::DXT1>, - MortonCopy<true, PixelFormat::DXT23>, - MortonCopy<true, PixelFormat::DXT45>, - MortonCopy<true, PixelFormat::DXN1>, - MortonCopy<true, PixelFormat::DXN2UNORM>, - MortonCopy<true, PixelFormat::DXN2SNORM>, - MortonCopy<true, PixelFormat::BC7U>, - MortonCopy<true, PixelFormat::BC6H_UF16>, - MortonCopy<true, PixelFormat::BC6H_SF16>, - MortonCopy<true, PixelFormat::ASTC_2D_4X4>, - MortonCopy<true, PixelFormat::BGRA8>, - MortonCopy<true, PixelFormat::RGBA32F>, - MortonCopy<true, PixelFormat::RG32F>, - MortonCopy<true, PixelFormat::R32F>, - MortonCopy<true, PixelFormat::R16F>, - MortonCopy<true, PixelFormat::R16U>, - MortonCopy<true, PixelFormat::R16S>, - MortonCopy<true, PixelFormat::R16UI>, - MortonCopy<true, PixelFormat::R16I>, - MortonCopy<true, PixelFormat::RG16>, - MortonCopy<true, PixelFormat::RG16F>, - MortonCopy<true, PixelFormat::RG16UI>, - MortonCopy<true, PixelFormat::RG16I>, - MortonCopy<true, PixelFormat::RG16S>, - MortonCopy<true, PixelFormat::RGB32F>, - MortonCopy<true, PixelFormat::RGBA8_SRGB>, - MortonCopy<true, PixelFormat::RG8U>, - MortonCopy<true, PixelFormat::RG8S>, - MortonCopy<true, PixelFormat::RG8UI>, - MortonCopy<true, PixelFormat::RG32UI>, - MortonCopy<true, PixelFormat::RGBX16F>, - MortonCopy<true, PixelFormat::R32UI>, - MortonCopy<true, PixelFormat::R32I>, - MortonCopy<true, PixelFormat::ASTC_2D_8X8>, - MortonCopy<true, PixelFormat::ASTC_2D_8X5>, - MortonCopy<true, PixelFormat::ASTC_2D_5X4>, - MortonCopy<true, PixelFormat::BGRA8_SRGB>, - MortonCopy<true, PixelFormat::DXT1_SRGB>, - MortonCopy<true, PixelFormat::DXT23_SRGB>, - MortonCopy<true, PixelFormat::DXT45_SRGB>, - MortonCopy<true, PixelFormat::BC7U_SRGB>, - MortonCopy<true, PixelFormat::R4G4B4A4U>, + MortonCopy<true, PixelFormat::A8B8G8R8_UNORM>, + MortonCopy<true, PixelFormat::A8B8G8R8_SNORM>, + MortonCopy<true, PixelFormat::A8B8G8R8_SINT>, + MortonCopy<true, PixelFormat::A8B8G8R8_UINT>, + MortonCopy<true, PixelFormat::R5G6B5_UNORM>, + MortonCopy<true, PixelFormat::B5G6R5_UNORM>, + MortonCopy<true, PixelFormat::A1R5G5B5_UNORM>, + MortonCopy<true, PixelFormat::A2B10G10R10_UNORM>, + MortonCopy<true, PixelFormat::A2B10G10R10_UINT>, + MortonCopy<true, PixelFormat::A1B5G5R5_UNORM>, + MortonCopy<true, PixelFormat::R8_UNORM>, + MortonCopy<true, PixelFormat::R8_SNORM>, + MortonCopy<true, PixelFormat::R8_SINT>, + MortonCopy<true, PixelFormat::R8_UINT>, + MortonCopy<true, PixelFormat::R16G16B16A16_FLOAT>, + MortonCopy<true, PixelFormat::R16G16B16A16_UNORM>, + MortonCopy<true, PixelFormat::R16G16B16A16_SNORM>, + MortonCopy<true, PixelFormat::R16G16B16A16_SINT>, + MortonCopy<true, PixelFormat::R16G16B16A16_UINT>, + MortonCopy<true, PixelFormat::B10G11R11_FLOAT>, + MortonCopy<true, PixelFormat::R32G32B32A32_UINT>, + MortonCopy<true, PixelFormat::BC1_RGBA_UNORM>, + MortonCopy<true, PixelFormat::BC2_UNORM>, + MortonCopy<true, PixelFormat::BC3_UNORM>, + MortonCopy<true, PixelFormat::BC4_UNORM>, + MortonCopy<true, PixelFormat::BC4_SNORM>, + MortonCopy<true, PixelFormat::BC5_UNORM>, + MortonCopy<true, PixelFormat::BC5_SNORM>, + MortonCopy<true, PixelFormat::BC7_UNORM>, + MortonCopy<true, PixelFormat::BC6H_UFLOAT>, + MortonCopy<true, PixelFormat::BC6H_SFLOAT>, + MortonCopy<true, PixelFormat::ASTC_2D_4X4_UNORM>, + MortonCopy<true, PixelFormat::B8G8R8A8_UNORM>, + MortonCopy<true, PixelFormat::R32G32B32A32_FLOAT>, + MortonCopy<true, PixelFormat::R32G32B32A32_SINT>, + MortonCopy<true, PixelFormat::R32G32_FLOAT>, + MortonCopy<true, PixelFormat::R32G32_SINT>, + MortonCopy<true, PixelFormat::R32_FLOAT>, + MortonCopy<true, PixelFormat::R16_FLOAT>, + MortonCopy<true, PixelFormat::R16_UNORM>, + MortonCopy<true, PixelFormat::R16_SNORM>, + MortonCopy<true, PixelFormat::R16_UINT>, + MortonCopy<true, PixelFormat::R16_SINT>, + MortonCopy<true, PixelFormat::R16G16_UNORM>, + MortonCopy<true, PixelFormat::R16G16_FLOAT>, + MortonCopy<true, PixelFormat::R16G16_UINT>, + MortonCopy<true, PixelFormat::R16G16_SINT>, + MortonCopy<true, PixelFormat::R16G16_SNORM>, + MortonCopy<true, PixelFormat::R32G32B32_FLOAT>, + MortonCopy<true, PixelFormat::A8B8G8R8_SRGB>, + MortonCopy<true, PixelFormat::R8G8_UNORM>, + MortonCopy<true, PixelFormat::R8G8_SNORM>, + MortonCopy<true, PixelFormat::R8G8_SINT>, + MortonCopy<true, PixelFormat::R8G8_UINT>, + MortonCopy<true, PixelFormat::R32G32_UINT>, + MortonCopy<true, PixelFormat::R16G16B16X16_FLOAT>, + MortonCopy<true, PixelFormat::R32_UINT>, + MortonCopy<true, PixelFormat::R32_SINT>, + MortonCopy<true, PixelFormat::ASTC_2D_8X8_UNORM>, + MortonCopy<true, PixelFormat::ASTC_2D_8X5_UNORM>, + MortonCopy<true, PixelFormat::ASTC_2D_5X4_UNORM>, + MortonCopy<true, PixelFormat::B8G8R8A8_SRGB>, + MortonCopy<true, PixelFormat::BC1_RGBA_SRGB>, + MortonCopy<true, PixelFormat::BC2_SRGB>, + MortonCopy<true, PixelFormat::BC3_SRGB>, + MortonCopy<true, PixelFormat::BC7_SRGB>, + MortonCopy<true, PixelFormat::A4B4G4R4_UNORM>, MortonCopy<true, PixelFormat::ASTC_2D_4X4_SRGB>, MortonCopy<true, PixelFormat::ASTC_2D_8X8_SRGB>, MortonCopy<true, PixelFormat::ASTC_2D_8X5_SRGB>, MortonCopy<true, PixelFormat::ASTC_2D_5X4_SRGB>, - MortonCopy<true, PixelFormat::ASTC_2D_5X5>, + MortonCopy<true, PixelFormat::ASTC_2D_5X5_UNORM>, MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>, - MortonCopy<true, PixelFormat::ASTC_2D_10X8>, + MortonCopy<true, PixelFormat::ASTC_2D_10X8_UNORM>, MortonCopy<true, PixelFormat::ASTC_2D_10X8_SRGB>, - MortonCopy<true, PixelFormat::ASTC_2D_6X6>, + MortonCopy<true, PixelFormat::ASTC_2D_6X6_UNORM>, MortonCopy<true, PixelFormat::ASTC_2D_6X6_SRGB>, - MortonCopy<true, PixelFormat::ASTC_2D_10X10>, + MortonCopy<true, PixelFormat::ASTC_2D_10X10_UNORM>, MortonCopy<true, PixelFormat::ASTC_2D_10X10_SRGB>, - MortonCopy<true, PixelFormat::ASTC_2D_12X12>, + MortonCopy<true, PixelFormat::ASTC_2D_12X12_UNORM>, MortonCopy<true, PixelFormat::ASTC_2D_12X12_SRGB>, - MortonCopy<true, PixelFormat::ASTC_2D_8X6>, + MortonCopy<true, PixelFormat::ASTC_2D_8X6_UNORM>, MortonCopy<true, PixelFormat::ASTC_2D_8X6_SRGB>, - MortonCopy<true, PixelFormat::ASTC_2D_6X5>, + MortonCopy<true, PixelFormat::ASTC_2D_6X5_UNORM>, MortonCopy<true, PixelFormat::ASTC_2D_6X5_SRGB>, - MortonCopy<true, PixelFormat::E5B9G9R9F>, - MortonCopy<true, PixelFormat::Z32F>, - MortonCopy<true, PixelFormat::Z16>, - MortonCopy<true, PixelFormat::Z24S8>, - MortonCopy<true, PixelFormat::S8Z24>, - MortonCopy<true, PixelFormat::Z32FS8>, + MortonCopy<true, PixelFormat::E5B9G9R9_FLOAT>, + MortonCopy<true, PixelFormat::D32_FLOAT>, + MortonCopy<true, PixelFormat::D16_UNORM>, + MortonCopy<true, PixelFormat::D24_UNORM_S8_UINT>, + MortonCopy<true, PixelFormat::S8_UINT_D24_UNORM>, + MortonCopy<true, PixelFormat::D32_FLOAT_S8_UINT>, }; static constexpr ConversionArray linear_to_morton_fns = { - MortonCopy<false, PixelFormat::ABGR8U>, - MortonCopy<false, PixelFormat::ABGR8S>, - MortonCopy<false, PixelFormat::ABGR8UI>, - MortonCopy<false, PixelFormat::B5G6R5U>, - MortonCopy<false, PixelFormat::A2B10G10R10U>, - MortonCopy<false, PixelFormat::A1B5G5R5U>, - MortonCopy<false, PixelFormat::R8U>, - MortonCopy<false, PixelFormat::R8UI>, - MortonCopy<false, PixelFormat::RGBA16F>, - MortonCopy<false, PixelFormat::RGBA16S>, - MortonCopy<false, PixelFormat::RGBA16U>, - MortonCopy<false, PixelFormat::RGBA16UI>, - MortonCopy<false, PixelFormat::R11FG11FB10F>, - MortonCopy<false, PixelFormat::RGBA32UI>, - MortonCopy<false, PixelFormat::DXT1>, - MortonCopy<false, PixelFormat::DXT23>, - MortonCopy<false, PixelFormat::DXT45>, - MortonCopy<false, PixelFormat::DXN1>, - MortonCopy<false, PixelFormat::DXN2UNORM>, - MortonCopy<false, PixelFormat::DXN2SNORM>, - MortonCopy<false, PixelFormat::BC7U>, - MortonCopy<false, PixelFormat::BC6H_UF16>, - MortonCopy<false, PixelFormat::BC6H_SF16>, + MortonCopy<false, PixelFormat::A8B8G8R8_UNORM>, + MortonCopy<false, PixelFormat::A8B8G8R8_SNORM>, + MortonCopy<false, PixelFormat::A8B8G8R8_SINT>, + MortonCopy<false, PixelFormat::A8B8G8R8_UINT>, + MortonCopy<false, PixelFormat::R5G6B5_UNORM>, + MortonCopy<false, PixelFormat::B5G6R5_UNORM>, + MortonCopy<false, PixelFormat::A1R5G5B5_UNORM>, + MortonCopy<false, PixelFormat::A2B10G10R10_UNORM>, + MortonCopy<false, PixelFormat::A2B10G10R10_UINT>, + MortonCopy<false, PixelFormat::A1B5G5R5_UNORM>, + MortonCopy<false, PixelFormat::R8_UNORM>, + MortonCopy<false, PixelFormat::R8_SNORM>, + MortonCopy<false, PixelFormat::R8_SINT>, + MortonCopy<false, PixelFormat::R8_UINT>, + MortonCopy<false, PixelFormat::R16G16B16A16_FLOAT>, + MortonCopy<false, PixelFormat::R16G16B16A16_SNORM>, + MortonCopy<false, PixelFormat::R16G16B16A16_SINT>, + MortonCopy<false, PixelFormat::R16G16B16A16_UNORM>, + MortonCopy<false, PixelFormat::R16G16B16A16_UINT>, + MortonCopy<false, PixelFormat::B10G11R11_FLOAT>, + MortonCopy<false, PixelFormat::R32G32B32A32_UINT>, + MortonCopy<false, PixelFormat::BC1_RGBA_UNORM>, + MortonCopy<false, PixelFormat::BC2_UNORM>, + MortonCopy<false, PixelFormat::BC3_UNORM>, + MortonCopy<false, PixelFormat::BC4_UNORM>, + MortonCopy<false, PixelFormat::BC4_SNORM>, + MortonCopy<false, PixelFormat::BC5_UNORM>, + MortonCopy<false, PixelFormat::BC5_SNORM>, + MortonCopy<false, PixelFormat::BC7_UNORM>, + MortonCopy<false, PixelFormat::BC6H_UFLOAT>, + MortonCopy<false, PixelFormat::BC6H_SFLOAT>, // TODO(Subv): Swizzling ASTC formats are not supported nullptr, - MortonCopy<false, PixelFormat::BGRA8>, - MortonCopy<false, PixelFormat::RGBA32F>, - MortonCopy<false, PixelFormat::RG32F>, - MortonCopy<false, PixelFormat::R32F>, - MortonCopy<false, PixelFormat::R16F>, - MortonCopy<false, PixelFormat::R16U>, - MortonCopy<false, PixelFormat::R16S>, - MortonCopy<false, PixelFormat::R16UI>, - MortonCopy<false, PixelFormat::R16I>, - MortonCopy<false, PixelFormat::RG16>, - MortonCopy<false, PixelFormat::RG16F>, - MortonCopy<false, PixelFormat::RG16UI>, - MortonCopy<false, PixelFormat::RG16I>, - MortonCopy<false, PixelFormat::RG16S>, - MortonCopy<false, PixelFormat::RGB32F>, - MortonCopy<false, PixelFormat::RGBA8_SRGB>, - MortonCopy<false, PixelFormat::RG8U>, - MortonCopy<false, PixelFormat::RG8S>, - MortonCopy<false, PixelFormat::RG8UI>, - MortonCopy<false, PixelFormat::RG32UI>, - MortonCopy<false, PixelFormat::RGBX16F>, - MortonCopy<false, PixelFormat::R32UI>, - MortonCopy<false, PixelFormat::R32I>, + MortonCopy<false, PixelFormat::B8G8R8A8_UNORM>, + MortonCopy<false, PixelFormat::R32G32B32A32_FLOAT>, + MortonCopy<false, PixelFormat::R32G32B32A32_SINT>, + MortonCopy<false, PixelFormat::R32G32_FLOAT>, + MortonCopy<false, PixelFormat::R32G32_SINT>, + MortonCopy<false, PixelFormat::R32_FLOAT>, + MortonCopy<false, PixelFormat::R16_FLOAT>, + MortonCopy<false, PixelFormat::R16_UNORM>, + MortonCopy<false, PixelFormat::R16_SNORM>, + MortonCopy<false, PixelFormat::R16_UINT>, + MortonCopy<false, PixelFormat::R16_SINT>, + MortonCopy<false, PixelFormat::R16G16_UNORM>, + MortonCopy<false, PixelFormat::R16G16_FLOAT>, + MortonCopy<false, PixelFormat::R16G16_UINT>, + MortonCopy<false, PixelFormat::R16G16_SINT>, + MortonCopy<false, PixelFormat::R16G16_SNORM>, + MortonCopy<false, PixelFormat::R32G32B32_FLOAT>, + MortonCopy<false, PixelFormat::A8B8G8R8_SRGB>, + MortonCopy<false, PixelFormat::R8G8_UNORM>, + MortonCopy<false, PixelFormat::R8G8_SNORM>, + MortonCopy<false, PixelFormat::R8G8_SINT>, + MortonCopy<false, PixelFormat::R8G8_UINT>, + MortonCopy<false, PixelFormat::R32G32_UINT>, + MortonCopy<false, PixelFormat::R16G16B16X16_FLOAT>, + MortonCopy<false, PixelFormat::R32_UINT>, + MortonCopy<false, PixelFormat::R32_SINT>, nullptr, nullptr, nullptr, - MortonCopy<false, PixelFormat::BGRA8_SRGB>, - MortonCopy<false, PixelFormat::DXT1_SRGB>, - MortonCopy<false, PixelFormat::DXT23_SRGB>, - MortonCopy<false, PixelFormat::DXT45_SRGB>, - MortonCopy<false, PixelFormat::BC7U_SRGB>, - MortonCopy<false, PixelFormat::R4G4B4A4U>, + MortonCopy<false, PixelFormat::B8G8R8A8_SRGB>, + MortonCopy<false, PixelFormat::BC1_RGBA_SRGB>, + MortonCopy<false, PixelFormat::BC2_SRGB>, + MortonCopy<false, PixelFormat::BC3_SRGB>, + MortonCopy<false, PixelFormat::BC7_SRGB>, + MortonCopy<false, PixelFormat::A4B4G4R4_UNORM>, nullptr, nullptr, nullptr, @@ -199,12 +221,12 @@ static constexpr ConversionArray linear_to_morton_fns = { nullptr, nullptr, nullptr, - MortonCopy<false, PixelFormat::E5B9G9R9F>, - MortonCopy<false, PixelFormat::Z32F>, - MortonCopy<false, PixelFormat::Z16>, - MortonCopy<false, PixelFormat::Z24S8>, - MortonCopy<false, PixelFormat::S8Z24>, - MortonCopy<false, PixelFormat::Z32FS8>, + MortonCopy<false, PixelFormat::E5B9G9R9_FLOAT>, + MortonCopy<false, PixelFormat::D32_FLOAT>, + MortonCopy<false, PixelFormat::D16_UNORM>, + MortonCopy<false, PixelFormat::D24_UNORM_S8_UINT>, + MortonCopy<false, PixelFormat::S8_UINT_D24_UNORM>, + MortonCopy<false, PixelFormat::D32_FLOAT_S8_UINT>, }; static MortonCopyFn GetSwizzleFunction(MortonSwizzleMode mode, Surface::PixelFormat format) { diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp index eb5158407..b7e9ed2e9 100644 --- a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp @@ -185,10 +185,6 @@ std::string TextureType(const MetaTexture& meta) { return type; } -std::string GlobalMemoryName(const GlobalMemoryBase& base) { - return fmt::format("gmem{}_{}", base.cbuf_index, base.cbuf_offset); -} - class ARBDecompiler final { public: explicit ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry, @@ -199,6 +195,8 @@ public: } private: + void DefineGlobalMemory(); + void DeclareHeader(); void DeclareVertex(); void DeclareGeometry(); @@ -228,6 +226,7 @@ private: std::pair<std::string, std::size_t> BuildCoords(Operation); std::string BuildAoffi(Operation); + std::string GlobalMemoryPointer(const GmemNode& gmem); void Exit(); std::string Assign(Operation); @@ -378,10 +377,8 @@ private: std::string address; std::string_view opname; if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) { - AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()), - Visit(gmem->GetBaseAddress())); - address = fmt::format("{}[{}]", GlobalMemoryName(gmem->GetDescriptor()), temporary); - opname = "ATOMB"; + address = GlobalMemoryPointer(*gmem); + opname = "ATOM"; } else if (const auto smem = std::get_if<SmemNode>(&*operation[0])) { address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress())); opname = "ATOMS"; @@ -456,9 +453,13 @@ private: shader_source += '\n'; } - std::string AllocTemporary() { - max_temporaries = std::max(max_temporaries, num_temporaries + 1); - return fmt::format("T{}.x", num_temporaries++); + std::string AllocLongVectorTemporary() { + max_long_temporaries = std::max(max_long_temporaries, num_long_temporaries + 1); + return fmt::format("L{}", num_long_temporaries++); + } + + std::string AllocLongTemporary() { + return fmt::format("{}.x", AllocLongVectorTemporary()); } std::string AllocVectorTemporary() { @@ -466,8 +467,13 @@ private: return fmt::format("T{}", num_temporaries++); } + std::string AllocTemporary() { + return fmt::format("{}.x", AllocVectorTemporary()); + } + void ResetTemporaries() noexcept { num_temporaries = 0; + num_long_temporaries = 0; } const Device& device; @@ -478,6 +484,11 @@ private: std::size_t num_temporaries = 0; std::size_t max_temporaries = 0; + std::size_t num_long_temporaries = 0; + std::size_t max_long_temporaries = 0; + + std::map<GlobalMemoryBase, u32> global_memory_names; + std::string shader_source; static constexpr std::string_view ADD_F32 = "ADD.F32"; @@ -784,6 +795,8 @@ private: ARBDecompiler::ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry, ShaderType stage, std::string_view identifier) : device{device}, ir{ir}, registry{registry}, stage{stage} { + DefineGlobalMemory(); + AddLine("TEMP RC;"); AddLine("TEMP FSWZA[4];"); AddLine("TEMP FSWZB[4];"); @@ -829,12 +842,20 @@ std::string_view HeaderStageName(ShaderType stage) { } } +void ARBDecompiler::DefineGlobalMemory() { + u32 binding = 0; + for (const auto& pair : ir.GetGlobalMemory()) { + const GlobalMemoryBase base = pair.first; + global_memory_names.emplace(base, binding); + ++binding; + } +} + void ARBDecompiler::DeclareHeader() { AddLine("!!NV{}5.0", HeaderStageName(stage)); // Enabling this allows us to cheat on some instructions like TXL with SHADOWARRAY2D AddLine("OPTION NV_internal;"); AddLine("OPTION NV_gpu_program_fp64;"); - AddLine("OPTION NV_shader_storage_buffer;"); AddLine("OPTION NV_shader_thread_group;"); if (ir.UsesWarps() && device.HasWarpIntrinsics()) { AddLine("OPTION NV_shader_thread_shuffle;"); @@ -892,11 +913,19 @@ void ARBDecompiler::DeclareCompute() { const ComputeInfo& info = registry.GetComputeInfo(); AddLine("GROUP_SIZE {} {} {};", info.workgroup_size[0], info.workgroup_size[1], info.workgroup_size[2]); - if (info.shared_memory_size_in_words > 0) { - const u32 size_in_bytes = info.shared_memory_size_in_words * 4; - AddLine("SHARED_MEMORY {};", size_in_bytes); - AddLine("SHARED shared_mem[] = {{program.sharedmem}};"); + if (info.shared_memory_size_in_words == 0) { + return; + } + const u32 limit = device.GetMaxComputeSharedMemorySize(); + u32 size_in_bytes = info.shared_memory_size_in_words * 4; + if (size_in_bytes > limit) { + LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}", + size_in_bytes, limit); + size_in_bytes = limit; } + + AddLine("SHARED_MEMORY {};", size_in_bytes); + AddLine("SHARED shared_mem[] = {{program.sharedmem}};"); } void ARBDecompiler::DeclareInputAttributes() { @@ -951,11 +980,10 @@ void ARBDecompiler::DeclareLocalMemory() { } void ARBDecompiler::DeclareGlobalMemory() { - u32 binding = 0; // device.GetBaseBindings(stage).shader_storage_buffer; - for (const auto& pair : ir.GetGlobalMemory()) { - const auto& base = pair.first; - AddLine("STORAGE {}[] = {{ program.storage[{}] }};", GlobalMemoryName(base), binding); - ++binding; + const std::size_t num_entries = ir.GetGlobalMemory().size(); + if (num_entries > 0) { + const std::size_t num_vectors = Common::AlignUp(num_entries, 2) / 2; + AddLine("PARAM c[{}] = {{ program.local[0..{}] }};", num_vectors, num_vectors - 1); } } @@ -977,6 +1005,9 @@ void ARBDecompiler::DeclareTemporaries() { for (std::size_t i = 0; i < max_temporaries; ++i) { AddLine("TEMP T{};", i); } + for (std::size_t i = 0; i < max_long_temporaries; ++i) { + AddLine("LONG TEMP L{};", i); + } } void ARBDecompiler::DeclarePredicates() { @@ -1260,13 +1291,6 @@ std::string ARBDecompiler::Visit(const Node& node) { return "{0, 0, 0, 0}.x"; } - const auto buffer_index = [this, &abuf]() -> std::string { - if (stage != ShaderType::Geometry) { - return ""; - } - return fmt::format("[{}]", Visit(abuf->GetBuffer())); - }; - const Attribute::Index index = abuf->GetIndex(); const u32 element = abuf->GetElement(); const char swizzle = Swizzle(element); @@ -1339,10 +1363,7 @@ std::string ARBDecompiler::Visit(const Node& node) { if (const auto gmem = std::get_if<GmemNode>(&*node)) { std::string temporary = AllocTemporary(); - AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()), - Visit(gmem->GetBaseAddress())); - AddLine("LDB.U32 {}, {}[{}];", temporary, GlobalMemoryName(gmem->GetDescriptor()), - temporary); + AddLine("LOAD.U32 {}, {};", temporary, GlobalMemoryPointer(*gmem)); return temporary; } @@ -1375,7 +1396,7 @@ std::string ARBDecompiler::Visit(const Node& node) { return {}; } - if (const auto cmt = std::get_if<CommentNode>(&*node)) { + if ([[maybe_unused]] const auto cmt = std::get_if<CommentNode>(&*node)) { // Uncommenting this will generate invalid code. GLASM lacks comments. // AddLine("// {}", cmt->GetText()); return {}; @@ -1419,6 +1440,22 @@ std::string ARBDecompiler::BuildAoffi(Operation operation) { return fmt::format(", offset({})", temporary); } +std::string ARBDecompiler::GlobalMemoryPointer(const GmemNode& gmem) { + const u32 binding = global_memory_names.at(gmem.GetDescriptor()); + const char result_swizzle = binding % 2 == 0 ? 'x' : 'y'; + + const std::string pointer = AllocLongVectorTemporary(); + std::string temporary = AllocTemporary(); + + const u32 local_index = binding / 2; + AddLine("PK64.U {}, c[{}];", pointer, local_index); + AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem.GetRealAddress()), + Visit(gmem.GetBaseAddress())); + AddLine("CVT.U64.U32 {}.z, {};", pointer, temporary); + AddLine("ADD.U64 {}.x, {}.{}, {}.z;", pointer, pointer, result_swizzle, pointer); + return fmt::format("{}.x", pointer); +} + void ARBDecompiler::Exit() { if (stage != ShaderType::Fragment) { AddLine("RET;"); @@ -1515,11 +1552,7 @@ std::string ARBDecompiler::Assign(Operation operation) { ResetTemporaries(); return {}; } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { - const std::string temporary = AllocTemporary(); - AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()), - Visit(gmem->GetBaseAddress())); - AddLine("STB.U32 {}, {}[{}];", Visit(src), GlobalMemoryName(gmem->GetDescriptor()), - temporary); + AddLine("STORE.U32 {}, {};", Visit(src), GlobalMemoryPointer(*gmem)); ResetTemporaries(); return {}; } else { @@ -1671,7 +1704,7 @@ std::string ARBDecompiler::HCastFloat(Operation operation) { } std::string ARBDecompiler::HUnpack(Operation operation) { - const std::string operand = Visit(operation[0]); + std::string operand = Visit(operation[0]); switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) { case Tegra::Shader::HalfType::H0_H1: return operand; @@ -2021,7 +2054,7 @@ std::string ARBDecompiler::InvocationId(Operation) { std::string ARBDecompiler::YNegate(Operation) { LOG_WARNING(Render_OpenGL, "(STUBBED)"); - const std::string temporary = AllocTemporary(); + std::string temporary = AllocTemporary(); AddLine("MOV.F {}, 1;", temporary); return temporary; } @@ -2044,10 +2077,6 @@ std::string ARBDecompiler::ShuffleIndexed(Operation operation) { } std::string ARBDecompiler::Barrier(Operation) { - if (!ir.IsDecompiled()) { - LOG_ERROR(Render_OpenGL, "BAR used but shader is not decompiled"); - return {}; - } AddLine("BAR;"); return {}; } diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index e461e4c70..e866d8f2f 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -26,7 +26,7 @@ Buffer::Buffer(const Device& device, VAddr cpu_addr, std::size_t size) : VideoCommon::BufferBlock{cpu_addr, size} { gl_buffer.Create(); glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW); - if (device.HasVertexBufferUnifiedMemory()) { + if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) { glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_WRITE); glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address); } diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 630acb73b..e7d95149f 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -212,6 +212,7 @@ Device::Device() shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); + max_compute_shared_memory_size = GetInteger<u32>(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE); has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group && GLAD_GL_NV_shader_thread_shuffle; has_shader_ballot = GLAD_GL_ARB_shader_ballot; @@ -250,6 +251,7 @@ Device::Device(std::nullptr_t) { shader_storage_alignment = 4; max_vertex_attributes = 16; max_varyings = 15; + max_compute_shared_memory_size = 0x10000; has_warp_intrinsics = true; has_shader_ballot = true; has_vertex_viewport_layer = true; diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 94d38d7d1..8a4b6b9fc 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -52,6 +52,10 @@ public: return max_varyings; } + u32 GetMaxComputeSharedMemorySize() const { + return max_compute_shared_memory_size; + } + bool HasWarpIntrinsics() const { return has_warp_intrinsics; } @@ -118,6 +122,7 @@ private: std::size_t shader_storage_alignment{}; u32 max_vertex_attributes{}; u32 max_varyings{}; + u32 max_compute_shared_memory_size{}; bool has_warp_intrinsics{}; bool has_shader_ballot{}; bool has_vertex_viewport_layer{}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index c3fad563c..03e82c599 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -139,6 +139,18 @@ void oglEnable(GLenum cap, bool state) { (state ? glEnable : glDisable)(cap); } +void UpdateBindlessPointers(GLenum target, GLuint64EXT* pointers, std::size_t num_entries) { + if (num_entries == 0) { + return; + } + if (num_entries % 2 == 1) { + pointers[num_entries] = 0; + } + const GLsizei num_vectors = static_cast<GLsizei>((num_entries + 1) / 2); + glProgramLocalParametersI4uivNV(target, 0, num_vectors, + reinterpret_cast<const GLuint*>(pointers)); +} + } // Anonymous namespace RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, @@ -324,7 +336,6 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() { void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { MICROPROFILE_SCOPE(OpenGL_Shader); auto& gpu = system.GPU().Maxwell3D(); - std::size_t num_ssbos = 0; u32 clip_distances = 0; for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { @@ -347,29 +358,13 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { } // Currently this stages are not supported in the OpenGL backend. - // Todo(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL - if (program == Maxwell::ShaderProgram::TesselationControl) { + // TODO(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL + if (program == Maxwell::ShaderProgram::TesselationControl || + program == Maxwell::ShaderProgram::TesselationEval) { continue; - } else if (program == Maxwell::ShaderProgram::TesselationEval) { - continue; - } - - Shader* shader = shader_cache.GetStageProgram(program, async_shaders); - - if (device.UseAssemblyShaders()) { - // Check for ARB limitation. We only have 16 SSBOs per context state. To workaround this - // all stages share the same bindings. - const std::size_t num_stage_ssbos = shader->GetEntries().global_memory_entries.size(); - ASSERT_MSG(num_stage_ssbos == 0 || num_ssbos == 0, "SSBOs on more than one stage"); - num_ssbos += num_stage_ssbos; } - // Stage indices are 0 - 5 - const std::size_t stage = index == 0 ? 0 : index - 1; - SetupDrawConstBuffers(stage, shader); - SetupDrawGlobalMemory(stage, shader); - SetupDrawTextures(stage, shader); - SetupDrawImages(stage, shader); + Shader* const shader = shader_cache.GetStageProgram(program, async_shaders); const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0; switch (program) { @@ -388,6 +383,13 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { shader_config.enable.Value(), shader_config.offset); } + // Stage indices are 0 - 5 + const std::size_t stage = index == 0 ? 0 : index - 1; + SetupDrawConstBuffers(stage, shader); + SetupDrawGlobalMemory(stage, shader); + SetupDrawTextures(stage, shader); + SetupDrawImages(stage, shader); + // Workaround for Intel drivers. // When a clip distance is enabled but not set in the shader it crops parts of the screen // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the @@ -749,6 +751,8 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { current_cbuf = 0; auto kernel = shader_cache.GetComputeKernel(code_addr); + program_manager.BindCompute(kernel->GetHandle()); + SetupComputeTextures(kernel); SetupComputeImages(kernel); @@ -763,7 +767,6 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { buffer_cache.Unmap(); const auto& launch_desc = system.GPU().KeplerCompute().launch_description; - program_manager.BindCompute(kernel->GetHandle()); glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); ++num_queued_commands; } @@ -1023,40 +1026,66 @@ void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding, } void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader) { + static constexpr std::array TARGET_LUT = { + GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV, + GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV, + }; + auto& gpu{system.GPU()}; auto& memory_manager{gpu.MemoryManager()}; - const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]}; + const auto& cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]}; + const auto& entries{shader->GetEntries().global_memory_entries}; + + std::array<GLuint64EXT, 32> pointers; + ASSERT(entries.size() < pointers.size()); - u32 binding = - device.UseAssemblyShaders() ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer; - for (const auto& entry : shader->GetEntries().global_memory_entries) { + const bool assembly_shaders = device.UseAssemblyShaders(); + u32 binding = assembly_shaders ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer; + for (const auto& entry : entries) { const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset}; const GPUVAddr gpu_addr{memory_manager.Read<u64>(addr)}; const u32 size{memory_manager.Read<u32>(addr + 8)}; - SetupGlobalMemory(binding++, entry, gpu_addr, size); + SetupGlobalMemory(binding, entry, gpu_addr, size, &pointers[binding]); + ++binding; + } + if (assembly_shaders) { + UpdateBindlessPointers(TARGET_LUT[stage_index], pointers.data(), entries.size()); } } void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) { auto& gpu{system.GPU()}; auto& memory_manager{gpu.MemoryManager()}; - const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config}; + const auto& cbufs{gpu.KeplerCompute().launch_description.const_buffer_config}; + const auto& entries{kernel->GetEntries().global_memory_entries}; + + std::array<GLuint64EXT, 32> pointers; + ASSERT(entries.size() < pointers.size()); u32 binding = 0; - for (const auto& entry : kernel->GetEntries().global_memory_entries) { - const auto addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset}; - const auto gpu_addr{memory_manager.Read<u64>(addr)}; - const auto size{memory_manager.Read<u32>(addr + 8)}; - SetupGlobalMemory(binding++, entry, gpu_addr, size); + for (const auto& entry : entries) { + const GPUVAddr addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset}; + const GPUVAddr gpu_addr{memory_manager.Read<u64>(addr)}; + const u32 size{memory_manager.Read<u32>(addr + 8)}; + SetupGlobalMemory(binding, entry, gpu_addr, size, &pointers[binding]); + ++binding; + } + if (device.UseAssemblyShaders()) { + UpdateBindlessPointers(GL_COMPUTE_PROGRAM_NV, pointers.data(), entries.size()); } } void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, - GPUVAddr gpu_addr, std::size_t size) { - const auto alignment{device.GetShaderStorageBufferAlignment()}; + GPUVAddr gpu_addr, std::size_t size, + GLuint64EXT* pointer) { + const std::size_t alignment{device.GetShaderStorageBufferAlignment()}; const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written); - glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset, - static_cast<GLsizeiptr>(size)); + if (device.UseAssemblyShaders()) { + *pointer = info.address + info.offset; + } else { + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset, + static_cast<GLsizeiptr>(size)); + } } void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, Shader* shader) { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index a95646936..ccc6f50f6 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -124,9 +124,9 @@ private: /// Configures the current global memory entries to use for the kernel invocation. void SetupComputeGlobalMemory(Shader* kernel); - /// Configures a constant buffer. + /// Configures a global memory buffer. void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr, - std::size_t size); + std::size_t size, GLuint64EXT* pointer); /// Configures the current textures to use for the draw command. void SetupDrawTextures(std::size_t stage_index, Shader* shader); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index f469ed656..be71e1733 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -126,7 +126,7 @@ std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) { const VideoCore::GuestDriverProfile guest_profile{entry.texture_handler_size}; const VideoCommon::Shader::SerializedRegistryInfo info{guest_profile, entry.bound_buffer, entry.graphics_info, entry.compute_info}; - const auto registry = std::make_shared<Registry>(entry.type, info); + auto registry = std::make_shared<Registry>(entry.type, info); for (const auto& [address, value] : entry.keys) { const auto [buffer, offset] = address; registry->InsertKey(buffer, offset, value); @@ -237,7 +237,6 @@ std::unique_ptr<Shader> Shader::CreateStageFromMemory( const ShaderParameters& params, Maxwell::ShaderProgram program_type, ProgramCode code, ProgramCode code_b, VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr) { const auto shader_type = GetShaderType(program_type); - const std::size_t size_in_bytes = code.size() * sizeof(u64); auto& gpu = params.system.GPU(); gpu.ShaderNotify().MarkSharderBuilding(); @@ -287,8 +286,6 @@ std::unique_ptr<Shader> Shader::CreateStageFromMemory( std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { - const std::size_t size_in_bytes = code.size() * sizeof(u64); - auto& gpu = params.system.GPU(); gpu.ShaderNotify().MarkSharderBuilding(); diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 2c49aeaac..3f75fcd2b 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -602,8 +602,15 @@ private: return; } const auto& info = registry.GetComputeInfo(); - if (const u32 size = info.shared_memory_size_in_words; size > 0) { - code.AddLine("shared uint smem[{}];", size); + if (u32 size = info.shared_memory_size_in_words * 4; size > 0) { + const u32 limit = device.GetMaxComputeSharedMemorySize(); + if (size > limit) { + LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}", + size, limit); + size = limit; + } + + code.AddLine("shared uint smem[{}];", size / 4); code.AddNewLine(); } code.AddLine("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;", @@ -1912,7 +1919,7 @@ private: Expression Comparison(Operation operation) { static_assert(!unordered || type == Type::Float); - const Expression expr = GenerateBinaryInfix(operation, op, Type::Bool, type, type); + Expression expr = GenerateBinaryInfix(operation, op, Type::Bool, type, type); if constexpr (op.compare("!=") == 0 && type == Type::Float && !unordered) { // GLSL's operator!=(float, float) doesn't seem be ordered. This happens on both AMD's @@ -1952,10 +1959,6 @@ private: return {fmt::format("({} != 0)", carry), Type::Bool}; } - Expression LogicalFIsNan(Operation operation) { - return GenerateUnary(operation, "isnan", Type::Bool, Type::Float); - } - Expression LogicalAssign(Operation operation) { const Node& dest = operation[0]; const Node& src = operation[1]; @@ -2771,15 +2774,6 @@ private: return std::min<u32>(device.GetMaxVaryings(), Maxwell::NumVaryings); } - bool IsRenderTargetEnabled(u32 render_target) const { - for (u32 component = 0; component < 4; ++component) { - if (header.ps.IsColorComponentOutputEnabled(render_target, component)) { - return true; - } - } - return false; - } - const Device& device; const ShaderIR& ir; const Registry& registry; diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index 8e754fa90..691c6c79b 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -11,8 +11,30 @@ namespace OpenGL { -ProgramManager::ProgramManager(const Device& device) { - use_assembly_programs = device.UseAssemblyShaders(); +namespace { + +void BindProgram(GLenum stage, GLuint current, GLuint old, bool& enabled) { + if (current == old) { + return; + } + if (current == 0) { + if (enabled) { + enabled = false; + glDisable(stage); + } + return; + } + if (!enabled) { + enabled = true; + glEnable(stage); + } + glBindProgramARB(stage, current); +} + +} // Anonymous namespace + +ProgramManager::ProgramManager(const Device& device) + : use_assembly_programs{device.UseAssemblyShaders()} { if (use_assembly_programs) { glEnable(GL_COMPUTE_PROGRAM_NV); } else { @@ -33,9 +55,7 @@ void ProgramManager::BindCompute(GLuint program) { } void ProgramManager::BindGraphicsPipeline() { - if (use_assembly_programs) { - UpdateAssemblyPrograms(); - } else { + if (!use_assembly_programs) { UpdateSourcePrograms(); } } @@ -63,32 +83,25 @@ void ProgramManager::RestoreGuestPipeline() { } } -void ProgramManager::UpdateAssemblyPrograms() { - const auto update_state = [](GLenum stage, bool& enabled, GLuint current, GLuint old) { - if (current == old) { - return; - } - if (current == 0) { - if (enabled) { - enabled = false; - glDisable(stage); - } - return; - } - if (!enabled) { - enabled = true; - glEnable(stage); - } - glBindProgramARB(stage, current); - }; +void ProgramManager::UseVertexShader(GLuint program) { + if (use_assembly_programs) { + BindProgram(GL_VERTEX_PROGRAM_NV, program, current_state.vertex, vertex_enabled); + } + current_state.vertex = program; +} - update_state(GL_VERTEX_PROGRAM_NV, vertex_enabled, current_state.vertex, old_state.vertex); - update_state(GL_GEOMETRY_PROGRAM_NV, geometry_enabled, current_state.geometry, - old_state.geometry); - update_state(GL_FRAGMENT_PROGRAM_NV, fragment_enabled, current_state.fragment, - old_state.fragment); +void ProgramManager::UseGeometryShader(GLuint program) { + if (use_assembly_programs) { + BindProgram(GL_GEOMETRY_PROGRAM_NV, program, current_state.vertex, geometry_enabled); + } + current_state.geometry = program; +} - old_state = current_state; +void ProgramManager::UseFragmentShader(GLuint program) { + if (use_assembly_programs) { + BindProgram(GL_FRAGMENT_PROGRAM_NV, program, current_state.vertex, fragment_enabled); + } + current_state.fragment = program; } void ProgramManager::UpdateSourcePrograms() { diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 0f03b4f12..950e0dfcb 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -45,17 +45,9 @@ public: /// Rewinds BindHostPipeline state changes. void RestoreGuestPipeline(); - void UseVertexShader(GLuint program) { - current_state.vertex = program; - } - - void UseGeometryShader(GLuint program) { - current_state.geometry = program; - } - - void UseFragmentShader(GLuint program) { - current_state.fragment = program; - } + void UseVertexShader(GLuint program); + void UseGeometryShader(GLuint program); + void UseFragmentShader(GLuint program); private: struct PipelineState { @@ -64,9 +56,6 @@ private: GLuint fragment = 0; }; - /// Update NV_gpu_program5 programs. - void UpdateAssemblyPrograms(); - /// Update GLSL programs. void UpdateSourcePrograms(); diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp index 3655ff629..887995cf4 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp +++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp @@ -35,7 +35,7 @@ OGLStreamBuffer::OGLStreamBuffer(const Device& device, GLsizeiptr size, bool ver mapped_ptr = static_cast<u8*>( glMapNamedBufferRange(gl_buffer.handle, 0, buffer_size, flags | GL_MAP_FLUSH_EXPLICIT_BIT)); - if (device.HasVertexBufferUnifiedMemory()) { + if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) { glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY); glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address); } diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 61505879b..0a7bc9e2b 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -41,91 +41,103 @@ struct FormatTuple { }; constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{ - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // ABGR8U - {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // ABGR8S - {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // ABGR8UI - {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5U - {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10U - {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5U - {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8U - {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8UI - {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // RGBA16F - {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // RGBA16U - {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // RGBA16S - {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // RGBA16UI - {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // R11FG11FB10F - {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // RGBA32UI - {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // DXT1 - {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // DXT23 - {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // DXT45 - {GL_COMPRESSED_RED_RGTC1}, // DXN1 - {GL_COMPRESSED_RG_RGTC2}, // DXN2UNORM - {GL_COMPRESSED_SIGNED_RG_RGTC2}, // DXN2SNORM - {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7U - {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UF16 - {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SF16 - {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4 - {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE}, // BGRA8 - {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // RGBA32F - {GL_RG32F, GL_RG, GL_FLOAT}, // RG32F - {GL_R32F, GL_RED, GL_FLOAT}, // R32F - {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16F - {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16U - {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16S - {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16UI - {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16I - {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // RG16 - {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // RG16F - {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // RG16UI - {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // RG16I - {GL_RG16_SNORM, GL_RG, GL_SHORT}, // RG16S - {GL_RGB32F, GL_RGB, GL_FLOAT}, // RGB32F - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // RGBA8_SRGB - {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // RG8U - {GL_RG8_SNORM, GL_RG, GL_BYTE}, // RG8S - {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // RG8UI - {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // RG32UI - {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // RGBX16F - {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32UI - {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32I - {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8 - {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5 - {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4 - {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE}, // BGRA8 + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM + {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM + {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT + {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT + {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM + {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM + {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM + {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM + {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT + {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM + {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM + {GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM + {GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT + {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT + {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT + {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM + {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM + {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT + {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT + {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT + {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT + {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM + {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM + {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM + {GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM + {GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM + {GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM + {GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM + {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM + {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT + {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT + {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM + {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM + {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT + {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT + {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT + {GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT + {GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT + {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT + {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM + {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM + {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT + {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT + {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM + {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT + {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT + {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT + {GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM + {GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_SRGB + {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM + {GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM + {GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT + {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT + {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT + {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT + {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT + {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT + {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM + {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM + {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM + {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM // Compressed sRGB formats - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // DXT1_SRGB - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // DXT23_SRGB - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // DXT45_SRGB - {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7U_SRGB - {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // R4G4B4A4U + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB + {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB + {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB - {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5 + {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB - {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8 + {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB - {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6 + {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB - {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10 + {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB - {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12 + {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB - {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6 + {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB - {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5 + {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB - {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9F + {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT // Depth formats - {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // Z32F - {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // Z16 + {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT + {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM // DepthStencil formats - {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // Z24S8 - {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8Z24 - {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // Z32FS8 + {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT + {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM + {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, + GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT }}; const FormatTuple& GetFormatTuple(PixelFormat pixel_format) { @@ -178,10 +190,10 @@ GLint GetSwizzleSource(SwizzleSource source) { GLenum GetComponent(PixelFormat format, bool is_first) { switch (format) { - case PixelFormat::Z24S8: - case PixelFormat::Z32FS8: + case PixelFormat::D24_UNORM_S8_UINT: + case PixelFormat::D32_FLOAT_S8_UINT: return is_first ? GL_DEPTH_COMPONENT : GL_STENCIL_INDEX; - case PixelFormat::S8Z24: + case PixelFormat::S8_UINT_D24_UNORM: return is_first ? GL_STENCIL_INDEX : GL_DEPTH_COMPONENT; default: UNREACHABLE(); @@ -482,9 +494,9 @@ GLuint CachedSurfaceView::GetTexture(SwizzleSource x_source, SwizzleSource y_sou std::array swizzle{x_source, y_source, z_source, w_source}; switch (const PixelFormat format = GetSurfaceParams().pixel_format) { - case PixelFormat::Z24S8: - case PixelFormat::Z32FS8: - case PixelFormat::S8Z24: + case PixelFormat::D24_UNORM_S8_UINT: + case PixelFormat::D32_FLOAT_S8_UINT: + case PixelFormat::S8_UINT_D24_UNORM: UNIMPLEMENTED_IF(x_source != SwizzleSource::R && x_source != SwizzleSource::G); glTextureParameteri(view.handle, GL_DEPTH_STENCIL_TEXTURE_MODE, GetComponent(format, x_source == SwizzleSource::R)); diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index e66cdc083..52e9e8250 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -535,12 +535,12 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, GLint internal_format; switch (framebuffer.pixel_format) { - case Tegra::FramebufferConfig::PixelFormat::ABGR8: + case Tegra::FramebufferConfig::PixelFormat::A8B8G8R8_UNORM: internal_format = GL_RGBA8; texture.gl_format = GL_RGBA; texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; break; - case Tegra::FramebufferConfig::PixelFormat::RGB565: + case Tegra::FramebufferConfig::PixelFormat::RGB565_UNORM: internal_format = GL_RGB565; texture.gl_format = GL_RGB; texture.gl_type = GL_UNSIGNED_SHORT_5_6_5; diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index d1f0ea932..81a39a3b8 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -40,7 +40,6 @@ constexpr std::array POLYGON_OFFSET_ENABLE_LUT = { } // Anonymous namespace void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_state) { - const auto& clip = regs.view_volume_clip_control; const std::array enabled_lut = {regs.polygon_offset_point_enable, regs.polygon_offset_line_enable, regs.polygon_offset_fill_enable}; diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index d7f1ae89f..f8c77f4fa 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -117,90 +117,101 @@ struct FormatTuple { VkFormat format; ///< Vulkan format int usage = 0; ///< Describes image format usage } constexpr tex_format_tuples[] = { - {VK_FORMAT_A8B8G8R8_UNORM_PACK32, Attachable | Storage}, // ABGR8U - {VK_FORMAT_A8B8G8R8_SNORM_PACK32, Attachable | Storage}, // ABGR8S - {VK_FORMAT_A8B8G8R8_UINT_PACK32, Attachable | Storage}, // ABGR8UI - {VK_FORMAT_B5G6R5_UNORM_PACK16}, // B5G6R5U - {VK_FORMAT_A2B10G10R10_UNORM_PACK32, Attachable | Storage}, // A2B10G10R10U - {VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable}, // A1B5G5R5U (flipped with swizzle) - {VK_FORMAT_R8_UNORM, Attachable | Storage}, // R8U - {VK_FORMAT_R8_UINT, Attachable | Storage}, // R8UI - {VK_FORMAT_R16G16B16A16_SFLOAT, Attachable | Storage}, // RGBA16F - {VK_FORMAT_R16G16B16A16_UNORM, Attachable | Storage}, // RGBA16U - {VK_FORMAT_R16G16B16A16_SNORM, Attachable | Storage}, // RGBA16S - {VK_FORMAT_R16G16B16A16_UINT, Attachable | Storage}, // RGBA16UI - {VK_FORMAT_B10G11R11_UFLOAT_PACK32, Attachable | Storage}, // R11FG11FB10F - {VK_FORMAT_R32G32B32A32_UINT, Attachable | Storage}, // RGBA32UI - {VK_FORMAT_BC1_RGBA_UNORM_BLOCK}, // DXT1 - {VK_FORMAT_BC2_UNORM_BLOCK}, // DXT23 - {VK_FORMAT_BC3_UNORM_BLOCK}, // DXT45 - {VK_FORMAT_BC4_UNORM_BLOCK}, // DXN1 - {VK_FORMAT_BC5_UNORM_BLOCK}, // DXN2UNORM - {VK_FORMAT_BC5_SNORM_BLOCK}, // DXN2SNORM - {VK_FORMAT_BC7_UNORM_BLOCK}, // BC7U - {VK_FORMAT_BC6H_UFLOAT_BLOCK}, // BC6H_UF16 - {VK_FORMAT_BC6H_SFLOAT_BLOCK}, // BC6H_SF16 - {VK_FORMAT_ASTC_4x4_UNORM_BLOCK}, // ASTC_2D_4X4 - {VK_FORMAT_B8G8R8A8_UNORM, Attachable}, // BGRA8 - {VK_FORMAT_R32G32B32A32_SFLOAT, Attachable | Storage}, // RGBA32F - {VK_FORMAT_R32G32_SFLOAT, Attachable | Storage}, // RG32F - {VK_FORMAT_R32_SFLOAT, Attachable | Storage}, // R32F - {VK_FORMAT_R16_SFLOAT, Attachable | Storage}, // R16F - {VK_FORMAT_R16_UNORM, Attachable | Storage}, // R16U - {VK_FORMAT_UNDEFINED}, // R16S - {VK_FORMAT_R16_UINT, Attachable | Storage}, // R16UI - {VK_FORMAT_UNDEFINED}, // R16I - {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // RG16 - {VK_FORMAT_R16G16_SFLOAT, Attachable | Storage}, // RG16F - {VK_FORMAT_UNDEFINED}, // RG16UI - {VK_FORMAT_UNDEFINED}, // RG16I - {VK_FORMAT_R16G16_SNORM, Attachable | Storage}, // RG16S - {VK_FORMAT_UNDEFINED}, // RGB32F - {VK_FORMAT_R8G8B8A8_SRGB, Attachable}, // RGBA8_SRGB - {VK_FORMAT_R8G8_UNORM, Attachable | Storage}, // RG8U - {VK_FORMAT_R8G8_SNORM, Attachable | Storage}, // RG8S - {VK_FORMAT_R8G8_UINT, Attachable | Storage}, // RG8UI - {VK_FORMAT_R32G32_UINT, Attachable | Storage}, // RG32UI - {VK_FORMAT_UNDEFINED}, // RGBX16F - {VK_FORMAT_R32_UINT, Attachable | Storage}, // R32UI - {VK_FORMAT_R32_SINT, Attachable | Storage}, // R32I - {VK_FORMAT_ASTC_8x8_UNORM_BLOCK}, // ASTC_2D_8X8 - {VK_FORMAT_UNDEFINED}, // ASTC_2D_8X5 - {VK_FORMAT_UNDEFINED}, // ASTC_2D_5X4 - {VK_FORMAT_B8G8R8A8_SRGB, Attachable}, // BGRA8_SRGB - {VK_FORMAT_BC1_RGBA_SRGB_BLOCK}, // DXT1_SRGB - {VK_FORMAT_BC2_SRGB_BLOCK}, // DXT23_SRGB - {VK_FORMAT_BC3_SRGB_BLOCK}, // DXT45_SRGB - {VK_FORMAT_BC7_SRGB_BLOCK}, // BC7U_SRGB - {VK_FORMAT_R4G4B4A4_UNORM_PACK16, Attachable}, // R4G4B4A4U - {VK_FORMAT_ASTC_4x4_SRGB_BLOCK}, // ASTC_2D_4X4_SRGB - {VK_FORMAT_ASTC_8x8_SRGB_BLOCK}, // ASTC_2D_8X8_SRGB - {VK_FORMAT_ASTC_8x5_SRGB_BLOCK}, // ASTC_2D_8X5_SRGB - {VK_FORMAT_ASTC_5x4_SRGB_BLOCK}, // ASTC_2D_5X4_SRGB - {VK_FORMAT_ASTC_5x5_UNORM_BLOCK}, // ASTC_2D_5X5 - {VK_FORMAT_ASTC_5x5_SRGB_BLOCK}, // ASTC_2D_5X5_SRGB - {VK_FORMAT_ASTC_10x8_UNORM_BLOCK}, // ASTC_2D_10X8 - {VK_FORMAT_ASTC_10x8_SRGB_BLOCK}, // ASTC_2D_10X8_SRGB - {VK_FORMAT_ASTC_6x6_UNORM_BLOCK}, // ASTC_2D_6X6 - {VK_FORMAT_ASTC_6x6_SRGB_BLOCK}, // ASTC_2D_6X6_SRGB - {VK_FORMAT_ASTC_10x10_UNORM_BLOCK}, // ASTC_2D_10X10 - {VK_FORMAT_ASTC_10x10_SRGB_BLOCK}, // ASTC_2D_10X10_SRGB - {VK_FORMAT_ASTC_12x12_UNORM_BLOCK}, // ASTC_2D_12X12 - {VK_FORMAT_ASTC_12x12_SRGB_BLOCK}, // ASTC_2D_12X12_SRGB - {VK_FORMAT_ASTC_8x6_UNORM_BLOCK}, // ASTC_2D_8X6 - {VK_FORMAT_ASTC_8x6_SRGB_BLOCK}, // ASTC_2D_8X6_SRGB - {VK_FORMAT_ASTC_6x5_UNORM_BLOCK}, // ASTC_2D_6X5 - {VK_FORMAT_ASTC_6x5_SRGB_BLOCK}, // ASTC_2D_6X5_SRGB - {VK_FORMAT_E5B9G9R9_UFLOAT_PACK32}, // E5B9G9R9F + {VK_FORMAT_A8B8G8R8_UNORM_PACK32, Attachable | Storage}, // A8B8G8R8_UNORM + {VK_FORMAT_A8B8G8R8_SNORM_PACK32, Attachable | Storage}, // A8B8G8R8_SNORM + {VK_FORMAT_A8B8G8R8_SINT_PACK32, Attachable | Storage}, // A8B8G8R8_SINT + {VK_FORMAT_A8B8G8R8_UINT_PACK32, Attachable | Storage}, // A8B8G8R8_UINT + {VK_FORMAT_R5G6B5_UNORM_PACK16, Attachable}, // R5G6B5_UNORM + {VK_FORMAT_B5G6R5_UNORM_PACK16, Attachable}, // B5G6R5_UNORM + {VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable}, // A1R5G5B5_UNORM + {VK_FORMAT_A2B10G10R10_UNORM_PACK32, Attachable | Storage}, // A2B10G10R10_UNORM + {VK_FORMAT_A2B10G10R10_UINT_PACK32, Attachable | Storage}, // A2B10G10R10_UINT + {VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable}, // A1B5G5R5_UNORM (flipped with swizzle) + {VK_FORMAT_R8_UNORM, Attachable | Storage}, // R8_UNORM + {VK_FORMAT_R8_SNORM, Attachable | Storage}, // R8_SNORM + {VK_FORMAT_R8_SINT, Attachable | Storage}, // R8_SINT + {VK_FORMAT_R8_UINT, Attachable | Storage}, // R8_UINT + {VK_FORMAT_R16G16B16A16_SFLOAT, Attachable | Storage}, // R16G16B16A16_FLOAT + {VK_FORMAT_R16G16B16A16_UNORM, Attachable | Storage}, // R16G16B16A16_UNORM + {VK_FORMAT_R16G16B16A16_SNORM, Attachable | Storage}, // R16G16B16A16_SNORM + {VK_FORMAT_R16G16B16A16_SINT, Attachable | Storage}, // R16G16B16A16_SINT + {VK_FORMAT_R16G16B16A16_UINT, Attachable | Storage}, // R16G16B16A16_UINT + {VK_FORMAT_B10G11R11_UFLOAT_PACK32, Attachable | Storage}, // B10G11R11_FLOAT + {VK_FORMAT_R32G32B32A32_UINT, Attachable | Storage}, // R32G32B32A32_UINT + {VK_FORMAT_BC1_RGBA_UNORM_BLOCK}, // BC1_RGBA_UNORM + {VK_FORMAT_BC2_UNORM_BLOCK}, // BC2_UNORM + {VK_FORMAT_BC3_UNORM_BLOCK}, // BC3_UNORM + {VK_FORMAT_BC4_UNORM_BLOCK}, // BC4_UNORM + {VK_FORMAT_BC4_SNORM_BLOCK}, // BC4_SNORM + {VK_FORMAT_BC5_UNORM_BLOCK}, // BC5_UNORM + {VK_FORMAT_BC5_SNORM_BLOCK}, // BC5_SNORM + {VK_FORMAT_BC7_UNORM_BLOCK}, // BC7_UNORM + {VK_FORMAT_BC6H_UFLOAT_BLOCK}, // BC6H_UFLOAT + {VK_FORMAT_BC6H_SFLOAT_BLOCK}, // BC6H_SFLOAT + {VK_FORMAT_ASTC_4x4_UNORM_BLOCK}, // ASTC_2D_4X4_UNORM + {VK_FORMAT_B8G8R8A8_UNORM, Attachable}, // B8G8R8A8_UNORM + {VK_FORMAT_R32G32B32A32_SFLOAT, Attachable | Storage}, // R32G32B32A32_FLOAT + {VK_FORMAT_R32G32B32A32_SINT, Attachable | Storage}, // R32G32B32A32_SINT + {VK_FORMAT_R32G32_SFLOAT, Attachable | Storage}, // R32G32_FLOAT + {VK_FORMAT_R32G32_SINT, Attachable | Storage}, // R32G32_SINT + {VK_FORMAT_R32_SFLOAT, Attachable | Storage}, // R32_FLOAT + {VK_FORMAT_R16_SFLOAT, Attachable | Storage}, // R16_FLOAT + {VK_FORMAT_R16_UNORM, Attachable | Storage}, // R16_UNORM + {VK_FORMAT_UNDEFINED}, // R16_SNORM + {VK_FORMAT_R16_UINT, Attachable | Storage}, // R16_UINT + {VK_FORMAT_UNDEFINED}, // R16_SINT + {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // R16G16_UNORM + {VK_FORMAT_R16G16_SFLOAT, Attachable | Storage}, // R16G16_FLOAT + {VK_FORMAT_UNDEFINED}, // R16G16_UINT + {VK_FORMAT_UNDEFINED}, // R16G16_SINT + {VK_FORMAT_R16G16_SNORM, Attachable | Storage}, // R16G16_SNORM + {VK_FORMAT_UNDEFINED}, // R32G32B32_FLOAT + {VK_FORMAT_R8G8B8A8_SRGB, Attachable}, // A8B8G8R8_SRGB + {VK_FORMAT_R8G8_UNORM, Attachable | Storage}, // R8G8_UNORM + {VK_FORMAT_R8G8_SNORM, Attachable | Storage}, // R8G8_SNORM + {VK_FORMAT_R8G8_SINT, Attachable | Storage}, // R8G8_SINT + {VK_FORMAT_R8G8_UINT, Attachable | Storage}, // R8G8_UINT + {VK_FORMAT_R32G32_UINT, Attachable | Storage}, // R32G32_UINT + {VK_FORMAT_UNDEFINED}, // R16G16B16X16_FLOAT + {VK_FORMAT_R32_UINT, Attachable | Storage}, // R32_UINT + {VK_FORMAT_R32_SINT, Attachable | Storage}, // R32_SINT + {VK_FORMAT_ASTC_8x8_UNORM_BLOCK}, // ASTC_2D_8X8_UNORM + {VK_FORMAT_UNDEFINED}, // ASTC_2D_8X5_UNORM + {VK_FORMAT_UNDEFINED}, // ASTC_2D_5X4_UNORM + {VK_FORMAT_B8G8R8A8_SRGB, Attachable}, // B8G8R8A8_SRGB + {VK_FORMAT_BC1_RGBA_SRGB_BLOCK}, // BC1_RGBA_SRGB + {VK_FORMAT_BC2_SRGB_BLOCK}, // BC2_SRGB + {VK_FORMAT_BC3_SRGB_BLOCK}, // BC3_SRGB + {VK_FORMAT_BC7_SRGB_BLOCK}, // BC7_SRGB + {VK_FORMAT_R4G4B4A4_UNORM_PACK16, Attachable}, // A4B4G4R4_UNORM + {VK_FORMAT_ASTC_4x4_SRGB_BLOCK}, // ASTC_2D_4X4_SRGB + {VK_FORMAT_ASTC_8x8_SRGB_BLOCK}, // ASTC_2D_8X8_SRGB + {VK_FORMAT_ASTC_8x5_SRGB_BLOCK}, // ASTC_2D_8X5_SRGB + {VK_FORMAT_ASTC_5x4_SRGB_BLOCK}, // ASTC_2D_5X4_SRGB + {VK_FORMAT_ASTC_5x5_UNORM_BLOCK}, // ASTC_2D_5X5_UNORM + {VK_FORMAT_ASTC_5x5_SRGB_BLOCK}, // ASTC_2D_5X5_SRGB + {VK_FORMAT_ASTC_10x8_UNORM_BLOCK}, // ASTC_2D_10X8_UNORM + {VK_FORMAT_ASTC_10x8_SRGB_BLOCK}, // ASTC_2D_10X8_SRGB + {VK_FORMAT_ASTC_6x6_UNORM_BLOCK}, // ASTC_2D_6X6_UNORM + {VK_FORMAT_ASTC_6x6_SRGB_BLOCK}, // ASTC_2D_6X6_SRGB + {VK_FORMAT_ASTC_10x10_UNORM_BLOCK}, // ASTC_2D_10X10_UNORM + {VK_FORMAT_ASTC_10x10_SRGB_BLOCK}, // ASTC_2D_10X10_SRGB + {VK_FORMAT_ASTC_12x12_UNORM_BLOCK}, // ASTC_2D_12X12_UNORM + {VK_FORMAT_ASTC_12x12_SRGB_BLOCK}, // ASTC_2D_12X12_SRGB + {VK_FORMAT_ASTC_8x6_UNORM_BLOCK}, // ASTC_2D_8X6_UNORM + {VK_FORMAT_ASTC_8x6_SRGB_BLOCK}, // ASTC_2D_8X6_SRGB + {VK_FORMAT_ASTC_6x5_UNORM_BLOCK}, // ASTC_2D_6X5_UNORM + {VK_FORMAT_ASTC_6x5_SRGB_BLOCK}, // ASTC_2D_6X5_SRGB + {VK_FORMAT_E5B9G9R9_UFLOAT_PACK32}, // E5B9G9R9_FLOAT // Depth formats - {VK_FORMAT_D32_SFLOAT, Attachable}, // Z32F - {VK_FORMAT_D16_UNORM, Attachable}, // Z16 + {VK_FORMAT_D32_SFLOAT, Attachable}, // D32_FLOAT + {VK_FORMAT_D16_UNORM, Attachable}, // D16_UNORM // DepthStencil formats - {VK_FORMAT_D24_UNORM_S8_UINT, Attachable}, // Z24S8 - {VK_FORMAT_D24_UNORM_S8_UINT, Attachable}, // S8Z24 (emulated) - {VK_FORMAT_D32_SFLOAT_S8_UINT, Attachable}, // Z32FS8 + {VK_FORMAT_D24_UNORM_S8_UINT, Attachable}, // D24_UNORM_S8_UINT + {VK_FORMAT_D24_UNORM_S8_UINT, Attachable}, // S8_UINT_D24_UNORM (emulated) + {VK_FORMAT_D32_SFLOAT_S8_UINT, Attachable}, // D32_FLOAT_S8_UINT }; static_assert(std::size(tex_format_tuples) == VideoCore::Surface::MaxPixelFormat); @@ -221,7 +232,7 @@ FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFo return {VK_FORMAT_A8B8G8R8_UNORM_PACK32, true, true}; } - // Use ABGR8 on hardware that doesn't support ASTC natively + // Use A8B8G8R8_UNORM on hardware that doesn't support ASTC natively if (!device.IsOptimalAstcSupported() && VideoCore::Surface::IsPixelFormatASTC(pixel_format)) { tuple.format = VideoCore::Surface::IsPixelFormatSRGB(pixel_format) ? VK_FORMAT_A8B8G8R8_SRGB_PACK32 diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index 866813465..ce53e5a6b 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -187,9 +187,9 @@ std::size_t GetSizeInBytes(const Tegra::FramebufferConfig& framebuffer) { VkFormat GetFormat(const Tegra::FramebufferConfig& framebuffer) { switch (framebuffer.pixel_format) { - case Tegra::FramebufferConfig::PixelFormat::ABGR8: + case Tegra::FramebufferConfig::PixelFormat::A8B8G8R8_UNORM: return VK_FORMAT_A8B8G8R8_UNORM_PACK32; - case Tegra::FramebufferConfig::PixelFormat::RGB565: + case Tegra::FramebufferConfig::PixelFormat::RGB565_UNORM: return VK_FORMAT_R5G6B5_UNORM_PACK16; default: UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}", diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index 26379ee01..6245e0d78 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp @@ -84,14 +84,19 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties( VK_FORMAT_A8B8G8R8_UNORM_PACK32, VK_FORMAT_A8B8G8R8_UINT_PACK32, VK_FORMAT_A8B8G8R8_SNORM_PACK32, + VK_FORMAT_A8B8G8R8_SINT_PACK32, VK_FORMAT_A8B8G8R8_SRGB_PACK32, VK_FORMAT_B5G6R5_UNORM_PACK16, VK_FORMAT_A2B10G10R10_UNORM_PACK32, + VK_FORMAT_A2B10G10R10_UINT_PACK32, VK_FORMAT_A1R5G5B5_UNORM_PACK16, VK_FORMAT_R32G32B32A32_SFLOAT, + VK_FORMAT_R32G32B32A32_SINT, VK_FORMAT_R32G32B32A32_UINT, VK_FORMAT_R32G32_SFLOAT, + VK_FORMAT_R32G32_SINT, VK_FORMAT_R32G32_UINT, + VK_FORMAT_R16G16B16A16_SINT, VK_FORMAT_R16G16B16A16_UINT, VK_FORMAT_R16G16B16A16_SNORM, VK_FORMAT_R16G16B16A16_UNORM, @@ -103,8 +108,11 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties( VK_FORMAT_R8G8B8A8_SRGB, VK_FORMAT_R8G8_UNORM, VK_FORMAT_R8G8_SNORM, + VK_FORMAT_R8G8_SINT, VK_FORMAT_R8G8_UINT, VK_FORMAT_R8_UNORM, + VK_FORMAT_R8_SNORM, + VK_FORMAT_R8_SINT, VK_FORMAT_R8_UINT, VK_FORMAT_B10G11R11_UFLOAT_PACK32, VK_FORMAT_R32_SFLOAT, @@ -124,6 +132,7 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties( VK_FORMAT_BC2_UNORM_BLOCK, VK_FORMAT_BC3_UNORM_BLOCK, VK_FORMAT_BC4_UNORM_BLOCK, + VK_FORMAT_BC4_SNORM_BLOCK, VK_FORMAT_BC5_UNORM_BLOCK, VK_FORMAT_BC5_SNORM_BLOCK, VK_FORMAT_BC7_UNORM_BLOCK, diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h index ae5c21baa..529744f2d 100644 --- a/src/video_core/renderer_vulkan/vk_device.h +++ b/src/video_core/renderer_vulkan/vk_device.h @@ -122,6 +122,11 @@ public: return properties.limits.maxPushConstantsSize; } + /// Returns the maximum size for shared memory. + u32 GetMaxComputeSharedMemorySize() const { + return properties.limits.maxComputeSharedMemorySize; + } + /// Returns true if ASTC is natively supported. bool IsOptimalAstcSupported() const { return is_optimal_astc_supported; diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 97429cc59..cd7d7a4e4 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -685,13 +685,19 @@ private: } t_smem_uint = TypePointer(spv::StorageClass::Workgroup, t_uint); - const u32 smem_size = specialization.shared_memory_size; + u32 smem_size = specialization.shared_memory_size * 4; if (smem_size == 0) { // Avoid declaring an empty array. return; } - const auto element_count = static_cast<u32>(Common::AlignUp(smem_size, 4) / 4); - const Id type_array = TypeArray(t_uint, Constant(t_uint, element_count)); + const u32 limit = device.GetMaxComputeSharedMemorySize(); + if (smem_size > limit) { + LOG_ERROR(Render_Vulkan, "Shared memory size {} is clamped to host's limit {}", + smem_size, limit); + smem_size = limit; + } + + const Id type_array = TypeArray(t_uint, Constant(t_uint, smem_size / 4)); const Id type_pointer = TypePointer(spv::StorageClass::Workgroup, type_array); Name(type_pointer, "SharedMemory"); @@ -700,9 +706,9 @@ private: } void DeclareInternalFlags() { - constexpr std::array names = {"zero", "sign", "carry", "overflow"}; + static constexpr std::array names{"zero", "sign", "carry", "overflow"}; + for (std::size_t flag = 0; flag < INTERNAL_FLAGS_COUNT; ++flag) { - const auto flag_code = static_cast<InternalFlag>(flag); const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); internal_flags[flag] = AddGlobalVariable(Name(id, names[flag])); } @@ -2798,7 +2804,6 @@ private: std::map<GlobalMemoryBase, Id> global_buffers; std::map<u32, TexelBuffer> uniform_texels; std::map<u32, SampledImage> sampled_images; - std::map<u32, TexelBuffer> storage_texels; std::map<u32, StorageImage> images; std::array<Id, Maxwell::NumRenderTargets> frag_colors{}; diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 9bc18c21a..d102e6d27 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -235,7 +235,7 @@ void CachedSurface::UploadTexture(const std::vector<u8>& staging_buffer) { void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) { UNIMPLEMENTED_IF(params.IsBuffer()); - if (params.pixel_format == VideoCore::Surface::PixelFormat::A1B5G5R5U) { + if (params.pixel_format == VideoCore::Surface::PixelFormat::A1B5G5R5_UNORM) { LOG_WARNING(Render_Vulkan, "A1B5G5R5 flushing is stubbed"); } @@ -385,7 +385,7 @@ VkImageView CachedSurfaceView::GetImageView(SwizzleSource x_source, SwizzleSourc std::array swizzle{MaxwellToVK::SwizzleSource(x_source), MaxwellToVK::SwizzleSource(y_source), MaxwellToVK::SwizzleSource(z_source), MaxwellToVK::SwizzleSource(w_source)}; - if (params.pixel_format == VideoCore::Surface::PixelFormat::A1B5G5R5U) { + if (params.pixel_format == VideoCore::Surface::PixelFormat::A1B5G5R5_UNORM) { // A1B5G5R5 is implemented as A1R5G5B5, we have to change the swizzle here. std::swap(swizzle[0], swizzle[2]); } @@ -397,11 +397,11 @@ VkImageView CachedSurfaceView::GetImageView(SwizzleSource x_source, SwizzleSourc UNIMPLEMENTED_IF(x_source != SwizzleSource::R && x_source != SwizzleSource::G); const bool is_first = x_source == SwizzleSource::R; switch (params.pixel_format) { - case VideoCore::Surface::PixelFormat::Z24S8: - case VideoCore::Surface::PixelFormat::Z32FS8: + case VideoCore::Surface::PixelFormat::D24_UNORM_S8_UINT: + case VideoCore::Surface::PixelFormat::D32_FLOAT_S8_UINT: aspect = is_first ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_STENCIL_BIT; break; - case VideoCore::Surface::PixelFormat::S8Z24: + case VideoCore::Surface::PixelFormat::S8_UINT_D24_UNORM: aspect = is_first ? VK_IMAGE_ASPECT_STENCIL_BIT : VK_IMAGE_ASPECT_DEPTH_BIT; break; default: diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index a041519b7..73155966f 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -98,12 +98,12 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { op_b = GetOperandAbsNegInteger(op_b, false, instr.iadd3.neg_b, true); op_c = GetOperandAbsNegInteger(op_c, false, instr.iadd3.neg_c, true); - const Node value = [&]() { - const Node add_ab = Operation(OperationCode::IAdd, NO_PRECISE, op_a, op_b); + const Node value = [&] { + Node add_ab = Operation(OperationCode::IAdd, NO_PRECISE, op_a, op_b); if (opcode->get().GetId() != OpCode::Id::IADD3_R) { return Operation(OperationCode::IAdd, NO_PRECISE, add_ab, op_c); } - const Node shifted = [&]() { + const Node shifted = [&] { switch (instr.iadd3.mode) { case Tegra::Shader::IAdd3Mode::RightShift: // TODO(tech4me): According to diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp index 07778dc3e..e75ca4fdb 100644 --- a/src/video_core/shader/decode/image.cpp +++ b/src/video_core/shader/decode/image.cpp @@ -31,11 +31,11 @@ ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor, std::size_t component) { const TextureFormat format{descriptor.format}; switch (format) { - case TextureFormat::R16_G16_B16_A16: - case TextureFormat::R32_G32_B32_A32: - case TextureFormat::R32_G32_B32: - case TextureFormat::R32_G32: - case TextureFormat::R16_G16: + case TextureFormat::R16G16B16A16: + case TextureFormat::R32G32B32A32: + case TextureFormat::R32G32B32: + case TextureFormat::R32G32: + case TextureFormat::R16G16: case TextureFormat::R32: case TextureFormat::R16: case TextureFormat::R8: @@ -97,7 +97,7 @@ ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor, break; case TextureFormat::B5G6R5: case TextureFormat::B6G5R5: - case TextureFormat::BF10GF11RF11: + case TextureFormat::B10G11R11: if (component == 0) { return descriptor.b_type; } @@ -108,9 +108,9 @@ ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor, return descriptor.r_type; } break; - case TextureFormat::G8R24: - case TextureFormat::G24R8: - case TextureFormat::G8R8: + case TextureFormat::R24G8: + case TextureFormat::R8G24: + case TextureFormat::R8G8: case TextureFormat::G4R4: if (component == 0) { return descriptor.g_type; @@ -137,15 +137,15 @@ bool IsComponentEnabled(std::size_t component_mask, std::size_t component) { u32 GetComponentSize(TextureFormat format, std::size_t component) { switch (format) { - case TextureFormat::R32_G32_B32_A32: + case TextureFormat::R32G32B32A32: return 32; - case TextureFormat::R16_G16_B16_A16: + case TextureFormat::R16G16B16A16: return 16; - case TextureFormat::R32_G32_B32: + case TextureFormat::R32G32B32: return component <= 2 ? 32 : 0; - case TextureFormat::R32_G32: + case TextureFormat::R32G32: return component <= 1 ? 32 : 0; - case TextureFormat::R16_G16: + case TextureFormat::R16G16: return component <= 1 ? 16 : 0; case TextureFormat::R32: return component == 0 ? 32 : 0; @@ -192,7 +192,7 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) { return 6; } return 0; - case TextureFormat::BF10GF11RF11: + case TextureFormat::B10G11R11: if (component == 1 || component == 2) { return 11; } @@ -200,7 +200,7 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) { return 10; } return 0; - case TextureFormat::G8R24: + case TextureFormat::R24G8: if (component == 0) { return 8; } @@ -208,7 +208,7 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) { return 24; } return 0; - case TextureFormat::G24R8: + case TextureFormat::R8G24: if (component == 0) { return 8; } @@ -216,7 +216,7 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) { return 24; } return 0; - case TextureFormat::G8R8: + case TextureFormat::R8G8: return (component == 0 || component == 1) ? 8 : 0; case TextureFormat::G4R4: return (component == 0 || component == 1) ? 4 : 0; @@ -231,25 +231,25 @@ std::size_t GetImageComponentMask(TextureFormat format) { constexpr u8 B = 0b0100; constexpr u8 A = 0b1000; switch (format) { - case TextureFormat::R32_G32_B32_A32: - case TextureFormat::R16_G16_B16_A16: + case TextureFormat::R32G32B32A32: + case TextureFormat::R16G16B16A16: case TextureFormat::A8R8G8B8: case TextureFormat::A2B10G10R10: case TextureFormat::A4B4G4R4: case TextureFormat::A5B5G5R1: case TextureFormat::A1B5G5R5: return std::size_t{R | G | B | A}; - case TextureFormat::R32_G32_B32: + case TextureFormat::R32G32B32: case TextureFormat::R32_B24G8: case TextureFormat::B5G6R5: case TextureFormat::B6G5R5: - case TextureFormat::BF10GF11RF11: + case TextureFormat::B10G11R11: return std::size_t{R | G | B}; - case TextureFormat::R32_G32: - case TextureFormat::R16_G16: - case TextureFormat::G8R24: - case TextureFormat::G24R8: - case TextureFormat::G8R8: + case TextureFormat::R32G32: + case TextureFormat::R16G16: + case TextureFormat::R24G8: + case TextureFormat::R8G24: + case TextureFormat::R8G8: case TextureFormat::G4R4: return std::size_t{R | G}; case TextureFormat::R32: diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index c0a8f233f..29a7cfbfe 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -75,8 +75,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { const Node value = [this, instr] { switch (instr.sys20) { case SystemVariable::LaneId: - LOG_WARNING(HW_GPU, "S2R instruction with LaneId is incomplete"); - return Immediate(0U); + return Operation(OperationCode::ThreadId); case SystemVariable::InvocationId: return Operation(OperationCode::InvocationId); case SystemVariable::Ydirection: diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp index 64ba60ea2..1c0957277 100644 --- a/src/video_core/shader/decode/video.cpp +++ b/src/video_core/shader/decode/video.cpp @@ -91,29 +91,28 @@ u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) { return pc; } -Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed, - Tegra::Shader::VideoType type, u64 byte_height) { +Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed, VideoType type, + u64 byte_height) { if (!is_chunk) { return BitfieldExtract(op, static_cast<u32>(byte_height * 8), 8); } - const Node zero = Immediate(0); switch (type) { - case Tegra::Shader::VideoType::Size16_Low: + case VideoType::Size16_Low: return BitfieldExtract(op, 0, 16); - case Tegra::Shader::VideoType::Size16_High: + case VideoType::Size16_High: return BitfieldExtract(op, 16, 16); - case Tegra::Shader::VideoType::Size32: + case VideoType::Size32: // TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when this type is used // (1 * 1 + 0 == 0x5b800000). Until a better explanation is found: abort. UNIMPLEMENTED(); - return zero; - case Tegra::Shader::VideoType::Invalid: + return Immediate(0); + case VideoType::Invalid: UNREACHABLE_MSG("Invalid instruction encoding"); - return zero; + return Immediate(0); default: UNREACHABLE(); - return zero; + return Immediate(0); } } diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp index c83dc6615..233b8fa42 100644 --- a/src/video_core/shader/decode/xmad.cpp +++ b/src/video_core/shader/decode/xmad.cpp @@ -81,20 +81,21 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) { SetTemporary(bb, 0, product); product = GetTemporary(0); - const Node original_c = op_c; + Node original_c = op_c; const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error - op_c = [&]() { + op_c = [&] { switch (set_mode) { case Tegra::Shader::XmadMode::None: return original_c; case Tegra::Shader::XmadMode::CLo: - return BitfieldExtract(original_c, 0, 16); + return BitfieldExtract(std::move(original_c), 0, 16); case Tegra::Shader::XmadMode::CHi: - return BitfieldExtract(original_c, 16, 16); + return BitfieldExtract(std::move(original_c), 16, 16); case Tegra::Shader::XmadMode::CBcc: { - const Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b, - original_b, Immediate(16)); - return SignedOperation(OperationCode::IAdd, is_signed_c, original_c, shifted_b); + Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b, + original_b, Immediate(16)); + return SignedOperation(OperationCode::IAdd, is_signed_c, std::move(original_c), + std::move(shifted_b)); } case Tegra::Shader::XmadMode::CSfu: { const Node comp_a = diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index e322c3402..29d794b34 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -112,9 +112,9 @@ Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buff } Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) const { - const Node node = MakeNode<InternalFlagNode>(flag); + Node node = MakeNode<InternalFlagNode>(flag); if (negated) { - return Operation(OperationCode::LogicalNegate, node); + return Operation(OperationCode::LogicalNegate, std::move(node)); } return node; } diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index bbe93903c..1688267bb 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp @@ -74,117 +74,131 @@ bool SurfaceTargetIsArray(SurfaceTarget target) { PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format) { switch (format) { - case Tegra::DepthFormat::S8_Z24_UNORM: - return PixelFormat::S8Z24; - case Tegra::DepthFormat::Z24_S8_UNORM: - return PixelFormat::Z24S8; - case Tegra::DepthFormat::Z32_FLOAT: - return PixelFormat::Z32F; - case Tegra::DepthFormat::Z16_UNORM: - return PixelFormat::Z16; - case Tegra::DepthFormat::Z32_S8_X24_FLOAT: - return PixelFormat::Z32FS8; + case Tegra::DepthFormat::S8_UINT_Z24_UNORM: + return PixelFormat::S8_UINT_D24_UNORM; + case Tegra::DepthFormat::D24S8_UNORM: + return PixelFormat::D24_UNORM_S8_UINT; + case Tegra::DepthFormat::D32_FLOAT: + return PixelFormat::D32_FLOAT; + case Tegra::DepthFormat::D16_UNORM: + return PixelFormat::D16_UNORM; + case Tegra::DepthFormat::D32_FLOAT_S8X24_UINT: + return PixelFormat::D32_FLOAT_S8_UINT; default: - LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); - UNREACHABLE(); - return PixelFormat::S8Z24; + UNIMPLEMENTED_MSG("Unimplemented format={}", static_cast<u32>(format)); + return PixelFormat::S8_UINT_D24_UNORM; } } PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) { switch (format) { - case Tegra::RenderTargetFormat::RGBA8_SRGB: - return PixelFormat::RGBA8_SRGB; - case Tegra::RenderTargetFormat::RGBA8_UNORM: - return PixelFormat::ABGR8U; - case Tegra::RenderTargetFormat::RGBA8_SNORM: - return PixelFormat::ABGR8S; - case Tegra::RenderTargetFormat::RGBA8_UINT: - return PixelFormat::ABGR8UI; - case Tegra::RenderTargetFormat::BGRA8_SRGB: - return PixelFormat::BGRA8_SRGB; - case Tegra::RenderTargetFormat::BGRA8_UNORM: - return PixelFormat::BGRA8; - case Tegra::RenderTargetFormat::RGB10_A2_UNORM: - return PixelFormat::A2B10G10R10U; - case Tegra::RenderTargetFormat::RGBA16_FLOAT: - return PixelFormat::RGBA16F; - case Tegra::RenderTargetFormat::RGBA16_UNORM: - return PixelFormat::RGBA16U; - case Tegra::RenderTargetFormat::RGBA16_SNORM: - return PixelFormat::RGBA16S; - case Tegra::RenderTargetFormat::RGBA16_UINT: - return PixelFormat::RGBA16UI; - case Tegra::RenderTargetFormat::RGBA32_FLOAT: - return PixelFormat::RGBA32F; - case Tegra::RenderTargetFormat::RG32_FLOAT: - return PixelFormat::RG32F; - case Tegra::RenderTargetFormat::R11G11B10_FLOAT: - return PixelFormat::R11FG11FB10F; - case Tegra::RenderTargetFormat::B5G6R5_UNORM: - return PixelFormat::B5G6R5U; - case Tegra::RenderTargetFormat::BGR5A1_UNORM: - return PixelFormat::A1B5G5R5U; - case Tegra::RenderTargetFormat::RGBA32_UINT: - return PixelFormat::RGBA32UI; - case Tegra::RenderTargetFormat::R8_UNORM: - return PixelFormat::R8U; - case Tegra::RenderTargetFormat::R8_UINT: - return PixelFormat::R8UI; - case Tegra::RenderTargetFormat::RG16_FLOAT: - return PixelFormat::RG16F; - case Tegra::RenderTargetFormat::RG16_UINT: - return PixelFormat::RG16UI; - case Tegra::RenderTargetFormat::RG16_SINT: - return PixelFormat::RG16I; - case Tegra::RenderTargetFormat::RG16_UNORM: - return PixelFormat::RG16; - case Tegra::RenderTargetFormat::RG16_SNORM: - return PixelFormat::RG16S; - case Tegra::RenderTargetFormat::RG8_UNORM: - return PixelFormat::RG8U; - case Tegra::RenderTargetFormat::RG8_SNORM: - return PixelFormat::RG8S; - case Tegra::RenderTargetFormat::RG8_UINT: - return PixelFormat::RG8UI; - case Tegra::RenderTargetFormat::R16_FLOAT: - return PixelFormat::R16F; + case Tegra::RenderTargetFormat::R32B32G32A32_FLOAT: + return PixelFormat::R32G32B32A32_FLOAT; + case Tegra::RenderTargetFormat::R32G32B32A32_SINT: + return PixelFormat::R32G32B32A32_SINT; + case Tegra::RenderTargetFormat::R32G32B32A32_UINT: + return PixelFormat::R32G32B32A32_UINT; + case Tegra::RenderTargetFormat::R16G16B16A16_UNORM: + return PixelFormat::R16G16B16A16_UNORM; + case Tegra::RenderTargetFormat::R16G16B16A16_SNORM: + return PixelFormat::R16G16B16A16_SNORM; + case Tegra::RenderTargetFormat::R16G16B16A16_SINT: + return PixelFormat::R16G16B16A16_SINT; + case Tegra::RenderTargetFormat::R16G16B16A16_UINT: + return PixelFormat::R16G16B16A16_UINT; + case Tegra::RenderTargetFormat::R16G16B16A16_FLOAT: + return PixelFormat::R16G16B16A16_FLOAT; + case Tegra::RenderTargetFormat::R32G32_FLOAT: + return PixelFormat::R32G32_FLOAT; + case Tegra::RenderTargetFormat::R32G32_SINT: + return PixelFormat::R32G32_SINT; + case Tegra::RenderTargetFormat::R32G32_UINT: + return PixelFormat::R32G32_UINT; + case Tegra::RenderTargetFormat::R16G16B16X16_FLOAT: + return PixelFormat::R16G16B16X16_FLOAT; + case Tegra::RenderTargetFormat::B8G8R8A8_UNORM: + return PixelFormat::B8G8R8A8_UNORM; + case Tegra::RenderTargetFormat::B8G8R8A8_SRGB: + return PixelFormat::B8G8R8A8_SRGB; + case Tegra::RenderTargetFormat::A2B10G10R10_UNORM: + return PixelFormat::A2B10G10R10_UNORM; + case Tegra::RenderTargetFormat::A2B10G10R10_UINT: + return PixelFormat::A2B10G10R10_UINT; + case Tegra::RenderTargetFormat::A8B8G8R8_UNORM: + return PixelFormat::A8B8G8R8_UNORM; + case Tegra::RenderTargetFormat::A8B8G8R8_SRGB: + return PixelFormat::A8B8G8R8_SRGB; + case Tegra::RenderTargetFormat::A8B8G8R8_SNORM: + return PixelFormat::A8B8G8R8_SNORM; + case Tegra::RenderTargetFormat::A8B8G8R8_SINT: + return PixelFormat::A8B8G8R8_SINT; + case Tegra::RenderTargetFormat::A8B8G8R8_UINT: + return PixelFormat::A8B8G8R8_UINT; + case Tegra::RenderTargetFormat::R16G16_UNORM: + return PixelFormat::R16G16_UNORM; + case Tegra::RenderTargetFormat::R16G16_SNORM: + return PixelFormat::R16G16_SNORM; + case Tegra::RenderTargetFormat::R16G16_SINT: + return PixelFormat::R16G16_SINT; + case Tegra::RenderTargetFormat::R16G16_UINT: + return PixelFormat::R16G16_UINT; + case Tegra::RenderTargetFormat::R16G16_FLOAT: + return PixelFormat::R16G16_FLOAT; + case Tegra::RenderTargetFormat::B10G11R11_FLOAT: + return PixelFormat::B10G11R11_FLOAT; + case Tegra::RenderTargetFormat::R32_SINT: + return PixelFormat::R32_SINT; + case Tegra::RenderTargetFormat::R32_UINT: + return PixelFormat::R32_UINT; + case Tegra::RenderTargetFormat::R32_FLOAT: + return PixelFormat::R32_FLOAT; + case Tegra::RenderTargetFormat::R5G6B5_UNORM: + return PixelFormat::R5G6B5_UNORM; + case Tegra::RenderTargetFormat::A1R5G5B5_UNORM: + return PixelFormat::A1R5G5B5_UNORM; + case Tegra::RenderTargetFormat::R8G8_UNORM: + return PixelFormat::R8G8_UNORM; + case Tegra::RenderTargetFormat::R8G8_SNORM: + return PixelFormat::R8G8_SNORM; + case Tegra::RenderTargetFormat::R8G8_SINT: + return PixelFormat::R8G8_SINT; + case Tegra::RenderTargetFormat::R8G8_UINT: + return PixelFormat::R8G8_UINT; case Tegra::RenderTargetFormat::R16_UNORM: - return PixelFormat::R16U; + return PixelFormat::R16_UNORM; case Tegra::RenderTargetFormat::R16_SNORM: - return PixelFormat::R16S; - case Tegra::RenderTargetFormat::R16_UINT: - return PixelFormat::R16UI; + return PixelFormat::R16_SNORM; case Tegra::RenderTargetFormat::R16_SINT: - return PixelFormat::R16I; - case Tegra::RenderTargetFormat::R32_FLOAT: - return PixelFormat::R32F; - case Tegra::RenderTargetFormat::R32_SINT: - return PixelFormat::R32I; - case Tegra::RenderTargetFormat::R32_UINT: - return PixelFormat::R32UI; - case Tegra::RenderTargetFormat::RG32_UINT: - return PixelFormat::RG32UI; - case Tegra::RenderTargetFormat::RGBX16_FLOAT: - return PixelFormat::RGBX16F; + return PixelFormat::R16_SINT; + case Tegra::RenderTargetFormat::R16_UINT: + return PixelFormat::R16_UINT; + case Tegra::RenderTargetFormat::R16_FLOAT: + return PixelFormat::R16_FLOAT; + case Tegra::RenderTargetFormat::R8_UNORM: + return PixelFormat::R8_UNORM; + case Tegra::RenderTargetFormat::R8_SNORM: + return PixelFormat::R8_SNORM; + case Tegra::RenderTargetFormat::R8_SINT: + return PixelFormat::R8_SINT; + case Tegra::RenderTargetFormat::R8_UINT: + return PixelFormat::R8_UINT; default: - LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); - UNREACHABLE(); - return PixelFormat::RGBA8_SRGB; + UNIMPLEMENTED_MSG("Unimplemented format={}", static_cast<int>(format)); + return PixelFormat::A8B8G8R8_UNORM; } } PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format) { switch (format) { - case Tegra::FramebufferConfig::PixelFormat::ABGR8: - return PixelFormat::ABGR8U; - case Tegra::FramebufferConfig::PixelFormat::RGB565: - return PixelFormat::B5G6R5U; - case Tegra::FramebufferConfig::PixelFormat::BGRA8: - return PixelFormat::BGRA8; + case Tegra::FramebufferConfig::PixelFormat::A8B8G8R8_UNORM: + return PixelFormat::A8B8G8R8_UNORM; + case Tegra::FramebufferConfig::PixelFormat::RGB565_UNORM: + return PixelFormat::R5G6B5_UNORM; + case Tegra::FramebufferConfig::PixelFormat::B8G8R8A8_UNORM: + return PixelFormat::B8G8R8A8_UNORM; default: UNIMPLEMENTED_MSG("Unimplemented format={}", static_cast<u32>(format)); - return PixelFormat::ABGR8U; + return PixelFormat::A8B8G8R8_UNORM; } } @@ -212,27 +226,27 @@ SurfaceType GetFormatType(PixelFormat pixel_format) { bool IsPixelFormatASTC(PixelFormat format) { switch (format) { - case PixelFormat::ASTC_2D_4X4: - case PixelFormat::ASTC_2D_5X4: - case PixelFormat::ASTC_2D_5X5: - case PixelFormat::ASTC_2D_8X8: - case PixelFormat::ASTC_2D_8X5: + case PixelFormat::ASTC_2D_4X4_UNORM: + case PixelFormat::ASTC_2D_5X4_UNORM: + case PixelFormat::ASTC_2D_5X5_UNORM: + case PixelFormat::ASTC_2D_8X8_UNORM: + case PixelFormat::ASTC_2D_8X5_UNORM: case PixelFormat::ASTC_2D_4X4_SRGB: case PixelFormat::ASTC_2D_5X4_SRGB: case PixelFormat::ASTC_2D_5X5_SRGB: case PixelFormat::ASTC_2D_8X8_SRGB: case PixelFormat::ASTC_2D_8X5_SRGB: - case PixelFormat::ASTC_2D_10X8: + case PixelFormat::ASTC_2D_10X8_UNORM: case PixelFormat::ASTC_2D_10X8_SRGB: - case PixelFormat::ASTC_2D_6X6: + case PixelFormat::ASTC_2D_6X6_UNORM: case PixelFormat::ASTC_2D_6X6_SRGB: - case PixelFormat::ASTC_2D_10X10: + case PixelFormat::ASTC_2D_10X10_UNORM: case PixelFormat::ASTC_2D_10X10_SRGB: - case PixelFormat::ASTC_2D_12X12: + case PixelFormat::ASTC_2D_12X12_UNORM: case PixelFormat::ASTC_2D_12X12_SRGB: - case PixelFormat::ASTC_2D_8X6: + case PixelFormat::ASTC_2D_8X6_UNORM: case PixelFormat::ASTC_2D_8X6_SRGB: - case PixelFormat::ASTC_2D_6X5: + case PixelFormat::ASTC_2D_6X5_UNORM: case PixelFormat::ASTC_2D_6X5_SRGB: return true; default: @@ -242,12 +256,12 @@ bool IsPixelFormatASTC(PixelFormat format) { bool IsPixelFormatSRGB(PixelFormat format) { switch (format) { - case PixelFormat::RGBA8_SRGB: - case PixelFormat::BGRA8_SRGB: - case PixelFormat::DXT1_SRGB: - case PixelFormat::DXT23_SRGB: - case PixelFormat::DXT45_SRGB: - case PixelFormat::BC7U_SRGB: + case PixelFormat::A8B8G8R8_SRGB: + case PixelFormat::B8G8R8A8_SRGB: + case PixelFormat::BC1_RGBA_SRGB: + case PixelFormat::BC2_SRGB: + case PixelFormat::BC3_SRGB: + case PixelFormat::BC7_SRGB: case PixelFormat::ASTC_2D_4X4_SRGB: case PixelFormat::ASTC_2D_8X8_SRGB: case PixelFormat::ASTC_2D_8X5_SRGB: @@ -269,25 +283,4 @@ std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) { return {GetDefaultBlockWidth(format), GetDefaultBlockHeight(format)}; } -bool IsFormatBCn(PixelFormat format) { - switch (format) { - case PixelFormat::DXT1: - case PixelFormat::DXT23: - case PixelFormat::DXT45: - case PixelFormat::DXN1: - case PixelFormat::DXN2SNORM: - case PixelFormat::DXN2UNORM: - case PixelFormat::BC7U: - case PixelFormat::BC6H_UF16: - case PixelFormat::BC6H_SF16: - case PixelFormat::DXT1_SRGB: - case PixelFormat::DXT23_SRGB: - case PixelFormat::DXT45_SRGB: - case PixelFormat::BC7U_SRGB: - return true; - default: - return false; - } -} - } // namespace VideoCore::Surface diff --git a/src/video_core/surface.h b/src/video_core/surface.h index 6da6a1b97..cfd12fa61 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h @@ -15,94 +15,105 @@ namespace VideoCore::Surface { enum class PixelFormat { - ABGR8U = 0, - ABGR8S = 1, - ABGR8UI = 2, - B5G6R5U = 3, - A2B10G10R10U = 4, - A1B5G5R5U = 5, - R8U = 6, - R8UI = 7, - RGBA16F = 8, - RGBA16U = 9, - RGBA16S = 10, - RGBA16UI = 11, - R11FG11FB10F = 12, - RGBA32UI = 13, - DXT1 = 14, - DXT23 = 15, - DXT45 = 16, - DXN1 = 17, // This is also known as BC4 - DXN2UNORM = 18, - DXN2SNORM = 19, - BC7U = 20, - BC6H_UF16 = 21, - BC6H_SF16 = 22, - ASTC_2D_4X4 = 23, - BGRA8 = 24, - RGBA32F = 25, - RG32F = 26, - R32F = 27, - R16F = 28, - R16U = 29, - R16S = 30, - R16UI = 31, - R16I = 32, - RG16 = 33, - RG16F = 34, - RG16UI = 35, - RG16I = 36, - RG16S = 37, - RGB32F = 38, - RGBA8_SRGB = 39, - RG8U = 40, - RG8S = 41, - RG8UI = 42, - RG32UI = 43, - RGBX16F = 44, - R32UI = 45, - R32I = 46, - ASTC_2D_8X8 = 47, - ASTC_2D_8X5 = 48, - ASTC_2D_5X4 = 49, - BGRA8_SRGB = 50, - DXT1_SRGB = 51, - DXT23_SRGB = 52, - DXT45_SRGB = 53, - BC7U_SRGB = 54, - R4G4B4A4U = 55, - ASTC_2D_4X4_SRGB = 56, - ASTC_2D_8X8_SRGB = 57, - ASTC_2D_8X5_SRGB = 58, - ASTC_2D_5X4_SRGB = 59, - ASTC_2D_5X5 = 60, - ASTC_2D_5X5_SRGB = 61, - ASTC_2D_10X8 = 62, - ASTC_2D_10X8_SRGB = 63, - ASTC_2D_6X6 = 64, - ASTC_2D_6X6_SRGB = 65, - ASTC_2D_10X10 = 66, - ASTC_2D_10X10_SRGB = 67, - ASTC_2D_12X12 = 68, - ASTC_2D_12X12_SRGB = 69, - ASTC_2D_8X6 = 70, - ASTC_2D_8X6_SRGB = 71, - ASTC_2D_6X5 = 72, - ASTC_2D_6X5_SRGB = 73, - E5B9G9R9F = 74, + A8B8G8R8_UNORM, + A8B8G8R8_SNORM, + A8B8G8R8_SINT, + A8B8G8R8_UINT, + R5G6B5_UNORM, + B5G6R5_UNORM, + A1R5G5B5_UNORM, + A2B10G10R10_UNORM, + A2B10G10R10_UINT, + A1B5G5R5_UNORM, + R8_UNORM, + R8_SNORM, + R8_SINT, + R8_UINT, + R16G16B16A16_FLOAT, + R16G16B16A16_UNORM, + R16G16B16A16_SNORM, + R16G16B16A16_SINT, + R16G16B16A16_UINT, + B10G11R11_FLOAT, + R32G32B32A32_UINT, + BC1_RGBA_UNORM, + BC2_UNORM, + BC3_UNORM, + BC4_UNORM, + BC4_SNORM, + BC5_UNORM, + BC5_SNORM, + BC7_UNORM, + BC6H_UFLOAT, + BC6H_SFLOAT, + ASTC_2D_4X4_UNORM, + B8G8R8A8_UNORM, + R32G32B32A32_FLOAT, + R32G32B32A32_SINT, + R32G32_FLOAT, + R32G32_SINT, + R32_FLOAT, + R16_FLOAT, + R16_UNORM, + R16_SNORM, + R16_UINT, + R16_SINT, + R16G16_UNORM, + R16G16_FLOAT, + R16G16_UINT, + R16G16_SINT, + R16G16_SNORM, + R32G32B32_FLOAT, + A8B8G8R8_SRGB, + R8G8_UNORM, + R8G8_SNORM, + R8G8_SINT, + R8G8_UINT, + R32G32_UINT, + R16G16B16X16_FLOAT, + R32_UINT, + R32_SINT, + ASTC_2D_8X8_UNORM, + ASTC_2D_8X5_UNORM, + ASTC_2D_5X4_UNORM, + B8G8R8A8_SRGB, + BC1_RGBA_SRGB, + BC2_SRGB, + BC3_SRGB, + BC7_SRGB, + A4B4G4R4_UNORM, + ASTC_2D_4X4_SRGB, + ASTC_2D_8X8_SRGB, + ASTC_2D_8X5_SRGB, + ASTC_2D_5X4_SRGB, + ASTC_2D_5X5_UNORM, + ASTC_2D_5X5_SRGB, + ASTC_2D_10X8_UNORM, + ASTC_2D_10X8_SRGB, + ASTC_2D_6X6_UNORM, + ASTC_2D_6X6_SRGB, + ASTC_2D_10X10_UNORM, + ASTC_2D_10X10_SRGB, + ASTC_2D_12X12_UNORM, + ASTC_2D_12X12_SRGB, + ASTC_2D_8X6_UNORM, + ASTC_2D_8X6_SRGB, + ASTC_2D_6X5_UNORM, + ASTC_2D_6X5_SRGB, + E5B9G9R9_FLOAT, MaxColorFormat, // Depth formats - Z32F = 75, - Z16 = 76, + D32_FLOAT = MaxColorFormat, + D16_UNORM, MaxDepthFormat, // DepthStencil formats - Z24S8 = 77, - S8Z24 = 78, - Z32FS8 = 79, + D24_UNORM_S8_UINT = MaxDepthFormat, + S8_UINT_D24_UNORM, + D32_FLOAT_S8_UINT, MaxDepthStencilFormat, @@ -130,86 +141,97 @@ enum class SurfaceTarget { }; constexpr std::array<u32, MaxPixelFormat> compression_factor_shift_table = {{ - 0, // ABGR8U - 0, // ABGR8S - 0, // ABGR8UI - 0, // B5G6R5U - 0, // A2B10G10R10U - 0, // A1B5G5R5U - 0, // R8U - 0, // R8UI - 0, // RGBA16F - 0, // RGBA16U - 0, // RGBA16S - 0, // RGBA16UI - 0, // R11FG11FB10F - 0, // RGBA32UI - 2, // DXT1 - 2, // DXT23 - 2, // DXT45 - 2, // DXN1 - 2, // DXN2UNORM - 2, // DXN2SNORM - 2, // BC7U - 2, // BC6H_UF16 - 2, // BC6H_SF16 - 2, // ASTC_2D_4X4 - 0, // BGRA8 - 0, // RGBA32F - 0, // RG32F - 0, // R32F - 0, // R16F - 0, // R16U - 0, // R16S - 0, // R16UI - 0, // R16I - 0, // RG16 - 0, // RG16F - 0, // RG16UI - 0, // RG16I - 0, // RG16S - 0, // RGB32F - 0, // RGBA8_SRGB - 0, // RG8U - 0, // RG8S - 0, // RG8UI - 0, // RG32UI - 0, // RGBX16F - 0, // R32UI - 0, // R32I - 2, // ASTC_2D_8X8 - 2, // ASTC_2D_8X5 - 2, // ASTC_2D_5X4 - 0, // BGRA8_SRGB - 2, // DXT1_SRGB - 2, // DXT23_SRGB - 2, // DXT45_SRGB - 2, // BC7U_SRGB - 0, // R4G4B4A4U + 0, // A8B8G8R8_UNORM + 0, // A8B8G8R8_SNORM + 0, // A8B8G8R8_SINT + 0, // A8B8G8R8_UINT + 0, // R5G6B5_UNORM + 0, // B5G6R5_UNORM + 0, // A1R5G5B5_UNORM + 0, // A2B10G10R10_UNORM + 0, // A2B10G10R10_UINT + 0, // A1B5G5R5_UNORM + 0, // R8_UNORM + 0, // R8_SNORM + 0, // R8_SINT + 0, // R8_UINT + 0, // R16G16B16A16_FLOAT + 0, // R16G16B16A16_UNORM + 0, // R16G16B16A16_SNORM + 0, // R16G16B16A16_SINT + 0, // R16G16B16A16_UINT + 0, // B10G11R11_FLOAT + 0, // R32G32B32A32_UINT + 2, // BC1_RGBA_UNORM + 2, // BC2_UNORM + 2, // BC3_UNORM + 2, // BC4_UNORM + 2, // BC4_SNORM + 2, // BC5_UNORM + 2, // BC5_SNORM + 2, // BC7_UNORM + 2, // BC6H_UFLOAT + 2, // BC6H_SFLOAT + 2, // ASTC_2D_4X4_UNORM + 0, // B8G8R8A8_UNORM + 0, // R32G32B32A32_FLOAT + 0, // R32G32B32A32_SINT + 0, // R32G32_FLOAT + 0, // R32G32_SINT + 0, // R32_FLOAT + 0, // R16_FLOAT + 0, // R16_UNORM + 0, // R16_SNORM + 0, // R16_UINT + 0, // R16_SINT + 0, // R16G16_UNORM + 0, // R16G16_FLOAT + 0, // R16G16_UINT + 0, // R16G16_SINT + 0, // R16G16_SNORM + 0, // R32G32B32_FLOAT + 0, // A8B8G8R8_SRGB + 0, // R8G8_UNORM + 0, // R8G8_SNORM + 0, // R8G8_SINT + 0, // R8G8_UINT + 0, // R32G32_UINT + 0, // R16G16B16X16_FLOAT + 0, // R32_UINT + 0, // R32_SINT + 2, // ASTC_2D_8X8_UNORM + 2, // ASTC_2D_8X5_UNORM + 2, // ASTC_2D_5X4_UNORM + 0, // B8G8R8A8_SRGB + 2, // BC1_RGBA_SRGB + 2, // BC2_SRGB + 2, // BC3_SRGB + 2, // BC7_SRGB + 0, // A4B4G4R4_UNORM 2, // ASTC_2D_4X4_SRGB 2, // ASTC_2D_8X8_SRGB 2, // ASTC_2D_8X5_SRGB 2, // ASTC_2D_5X4_SRGB - 2, // ASTC_2D_5X5 + 2, // ASTC_2D_5X5_UNORM 2, // ASTC_2D_5X5_SRGB - 2, // ASTC_2D_10X8 + 2, // ASTC_2D_10X8_UNORM 2, // ASTC_2D_10X8_SRGB - 2, // ASTC_2D_6X6 + 2, // ASTC_2D_6X6_UNORM 2, // ASTC_2D_6X6_SRGB - 2, // ASTC_2D_10X10 + 2, // ASTC_2D_10X10_UNORM 2, // ASTC_2D_10X10_SRGB - 2, // ASTC_2D_12X12 + 2, // ASTC_2D_12X12_UNORM 2, // ASTC_2D_12X12_SRGB - 2, // ASTC_2D_8X6 + 2, // ASTC_2D_8X6_UNORM 2, // ASTC_2D_8X6_SRGB - 2, // ASTC_2D_6X5 + 2, // ASTC_2D_6X5_UNORM 2, // ASTC_2D_6X5_SRGB - 0, // E5B9G9R9F - 0, // Z32F - 0, // Z16 - 0, // Z24S8 - 0, // S8Z24 - 0, // Z32FS8 + 0, // E5B9G9R9_FLOAT + 0, // D32_FLOAT + 0, // D16_UNORM + 0, // D24_UNORM_S8_UINT + 0, // S8_UINT_D24_UNORM + 0, // D32_FLOAT_S8_UINT }}; /** @@ -229,86 +251,97 @@ inline constexpr u32 GetCompressionFactor(PixelFormat format) { } constexpr std::array<u32, MaxPixelFormat> block_width_table = {{ - 1, // ABGR8U - 1, // ABGR8S - 1, // ABGR8UI - 1, // B5G6R5U - 1, // A2B10G10R10U - 1, // A1B5G5R5U - 1, // R8U - 1, // R8UI - 1, // RGBA16F - 1, // RGBA16U - 1, // RGBA16S - 1, // RGBA16UI - 1, // R11FG11FB10F - 1, // RGBA32UI - 4, // DXT1 - 4, // DXT23 - 4, // DXT45 - 4, // DXN1 - 4, // DXN2UNORM - 4, // DXN2SNORM - 4, // BC7U - 4, // BC6H_UF16 - 4, // BC6H_SF16 - 4, // ASTC_2D_4X4 - 1, // BGRA8 - 1, // RGBA32F - 1, // RG32F - 1, // R32F - 1, // R16F - 1, // R16U - 1, // R16S - 1, // R16UI - 1, // R16I - 1, // RG16 - 1, // RG16F - 1, // RG16UI - 1, // RG16I - 1, // RG16S - 1, // RGB32F - 1, // RGBA8_SRGB - 1, // RG8U - 1, // RG8S - 1, // RG8UI - 1, // RG32UI - 1, // RGBX16F - 1, // R32UI - 1, // R32I - 8, // ASTC_2D_8X8 - 8, // ASTC_2D_8X5 - 5, // ASTC_2D_5X4 - 1, // BGRA8_SRGB - 4, // DXT1_SRGB - 4, // DXT23_SRGB - 4, // DXT45_SRGB - 4, // BC7U_SRGB - 1, // R4G4B4A4U + 1, // A8B8G8R8_UNORM + 1, // A8B8G8R8_SNORM + 1, // A8B8G8R8_SINT + 1, // A8B8G8R8_UINT + 1, // R5G6B5_UNORM + 1, // B5G6R5_UNORM + 1, // A1R5G5B5_UNORM + 1, // A2B10G10R10_UNORM + 1, // A2B10G10R10_UINT + 1, // A1B5G5R5_UNORM + 1, // R8_UNORM + 1, // R8_SNORM + 1, // R8_SINT + 1, // R8_UINT + 1, // R16G16B16A16_FLOAT + 1, // R16G16B16A16_UNORM + 1, // R16G16B16A16_SNORM + 1, // R16G16B16A16_SINT + 1, // R16G16B16A16_UINT + 1, // B10G11R11_FLOAT + 1, // R32G32B32A32_UINT + 4, // BC1_RGBA_UNORM + 4, // BC2_UNORM + 4, // BC3_UNORM + 4, // BC4_UNORM + 4, // BC4_SNORM + 4, // BC5_UNORM + 4, // BC5_SNORM + 4, // BC7_UNORM + 4, // BC6H_UFLOAT + 4, // BC6H_SFLOAT + 4, // ASTC_2D_4X4_UNORM + 1, // B8G8R8A8_UNORM + 1, // R32G32B32A32_FLOAT + 1, // R32G32B32A32_SINT + 1, // R32G32_FLOAT + 1, // R32G32_SINT + 1, // R32_FLOAT + 1, // R16_FLOAT + 1, // R16_UNORM + 1, // R16_SNORM + 1, // R16_UINT + 1, // R16_SINT + 1, // R16G16_UNORM + 1, // R16G16_FLOAT + 1, // R16G16_UINT + 1, // R16G16_SINT + 1, // R16G16_SNORM + 1, // R32G32B32_FLOAT + 1, // A8B8G8R8_SRGB + 1, // R8G8_UNORM + 1, // R8G8_SNORM + 1, // R8G8_SINT + 1, // R8G8_UINT + 1, // R32G32_UINT + 1, // R16G16B16X16_FLOAT + 1, // R32_UINT + 1, // R32_SINT + 8, // ASTC_2D_8X8_UNORM + 8, // ASTC_2D_8X5_UNORM + 5, // ASTC_2D_5X4_UNORM + 1, // B8G8R8A8_SRGB + 4, // BC1_RGBA_SRGB + 4, // BC2_SRGB + 4, // BC3_SRGB + 4, // BC7_SRGB + 1, // A4B4G4R4_UNORM 4, // ASTC_2D_4X4_SRGB 8, // ASTC_2D_8X8_SRGB 8, // ASTC_2D_8X5_SRGB 5, // ASTC_2D_5X4_SRGB - 5, // ASTC_2D_5X5 + 5, // ASTC_2D_5X5_UNORM 5, // ASTC_2D_5X5_SRGB - 10, // ASTC_2D_10X8 + 10, // ASTC_2D_10X8_UNORM 10, // ASTC_2D_10X8_SRGB - 6, // ASTC_2D_6X6 + 6, // ASTC_2D_6X6_UNORM 6, // ASTC_2D_6X6_SRGB - 10, // ASTC_2D_10X10 + 10, // ASTC_2D_10X10_UNORM 10, // ASTC_2D_10X10_SRGB - 12, // ASTC_2D_12X12 + 12, // ASTC_2D_12X12_UNORM 12, // ASTC_2D_12X12_SRGB - 8, // ASTC_2D_8X6 + 8, // ASTC_2D_8X6_UNORM 8, // ASTC_2D_8X6_SRGB - 6, // ASTC_2D_6X5 + 6, // ASTC_2D_6X5_UNORM 6, // ASTC_2D_6X5_SRGB - 1, // E5B9G9R9F - 1, // Z32F - 1, // Z16 - 1, // Z24S8 - 1, // S8Z24 - 1, // Z32FS8 + 1, // E5B9G9R9_FLOAT + 1, // D32_FLOAT + 1, // D16_UNORM + 1, // D24_UNORM_S8_UINT + 1, // S8_UINT_D24_UNORM + 1, // D32_FLOAT_S8_UINT }}; static constexpr u32 GetDefaultBlockWidth(PixelFormat format) { @@ -320,86 +353,97 @@ static constexpr u32 GetDefaultBlockWidth(PixelFormat format) { } constexpr std::array<u32, MaxPixelFormat> block_height_table = {{ - 1, // ABGR8U - 1, // ABGR8S - 1, // ABGR8UI - 1, // B5G6R5U - 1, // A2B10G10R10U - 1, // A1B5G5R5U - 1, // R8U - 1, // R8UI - 1, // RGBA16F - 1, // RGBA16U - 1, // RGBA16S - 1, // RGBA16UI - 1, // R11FG11FB10F - 1, // RGBA32UI - 4, // DXT1 - 4, // DXT23 - 4, // DXT45 - 4, // DXN1 - 4, // DXN2UNORM - 4, // DXN2SNORM - 4, // BC7U - 4, // BC6H_UF16 - 4, // BC6H_SF16 - 4, // ASTC_2D_4X4 - 1, // BGRA8 - 1, // RGBA32F - 1, // RG32F - 1, // R32F - 1, // R16F - 1, // R16U - 1, // R16S - 1, // R16UI - 1, // R16I - 1, // RG16 - 1, // RG16F - 1, // RG16UI - 1, // RG16I - 1, // RG16S - 1, // RGB32F - 1, // RGBA8_SRGB - 1, // RG8U - 1, // RG8S - 1, // RG8UI - 1, // RG32UI - 1, // RGBX16F - 1, // R32UI - 1, // R32I - 8, // ASTC_2D_8X8 - 5, // ASTC_2D_8X5 - 4, // ASTC_2D_5X4 - 1, // BGRA8_SRGB - 4, // DXT1_SRGB - 4, // DXT23_SRGB - 4, // DXT45_SRGB - 4, // BC7U_SRGB - 1, // R4G4B4A4U + 1, // A8B8G8R8_UNORM + 1, // A8B8G8R8_SNORM + 1, // A8B8G8R8_SINT + 1, // A8B8G8R8_UINT + 1, // R5G6B5_UNORM + 1, // B5G6R5_UNORM + 1, // A1R5G5B5_UNORM + 1, // A2B10G10R10_UNORM + 1, // A2B10G10R10_UINT + 1, // A1B5G5R5_UNORM + 1, // R8_UNORM + 1, // R8_SNORM + 1, // R8_SINT + 1, // R8_UINT + 1, // R16G16B16A16_FLOAT + 1, // R16G16B16A16_UNORM + 1, // R16G16B16A16_SNORM + 1, // R16G16B16A16_SINT + 1, // R16G16B16A16_UINT + 1, // B10G11R11_FLOAT + 1, // R32G32B32A32_UINT + 4, // BC1_RGBA_UNORM + 4, // BC2_UNORM + 4, // BC3_UNORM + 4, // BC4_UNORM + 4, // BC4_SNORM + 4, // BC5_UNORM + 4, // BC5_SNORM + 4, // BC7_UNORM + 4, // BC6H_UFLOAT + 4, // BC6H_SFLOAT + 4, // ASTC_2D_4X4_UNORM + 1, // B8G8R8A8_UNORM + 1, // R32G32B32A32_FLOAT + 1, // R32G32B32A32_SINT + 1, // R32G32_FLOAT + 1, // R32G32_SINT + 1, // R32_FLOAT + 1, // R16_FLOAT + 1, // R16_UNORM + 1, // R16_SNORM + 1, // R16_UINT + 1, // R16_SINT + 1, // R16G16_UNORM + 1, // R16G16_FLOAT + 1, // R16G16_UINT + 1, // R16G16_SINT + 1, // R16G16_SNORM + 1, // R32G32B32_FLOAT + 1, // A8B8G8R8_SRGB + 1, // R8G8_UNORM + 1, // R8G8_SNORM + 1, // R8G8_SINT + 1, // R8G8_UINT + 1, // R32G32_UINT + 1, // R16G16B16X16_FLOAT + 1, // R32_UINT + 1, // R32_SINT + 8, // ASTC_2D_8X8_UNORM + 5, // ASTC_2D_8X5_UNORM + 4, // ASTC_2D_5X4_UNORM + 1, // B8G8R8A8_SRGB + 4, // BC1_RGBA_SRGB + 4, // BC2_SRGB + 4, // BC3_SRGB + 4, // BC7_SRGB + 1, // A4B4G4R4_UNORM 4, // ASTC_2D_4X4_SRGB 8, // ASTC_2D_8X8_SRGB 5, // ASTC_2D_8X5_SRGB 4, // ASTC_2D_5X4_SRGB - 5, // ASTC_2D_5X5 + 5, // ASTC_2D_5X5_UNORM 5, // ASTC_2D_5X5_SRGB - 8, // ASTC_2D_10X8 + 8, // ASTC_2D_10X8_UNORM 8, // ASTC_2D_10X8_SRGB - 6, // ASTC_2D_6X6 + 6, // ASTC_2D_6X6_UNORM 6, // ASTC_2D_6X6_SRGB - 10, // ASTC_2D_10X10 + 10, // ASTC_2D_10X10_UNORM 10, // ASTC_2D_10X10_SRGB - 12, // ASTC_2D_12X12 + 12, // ASTC_2D_12X12_UNORM 12, // ASTC_2D_12X12_SRGB - 6, // ASTC_2D_8X6 + 6, // ASTC_2D_8X6_UNORM 6, // ASTC_2D_8X6_SRGB - 5, // ASTC_2D_6X5 + 5, // ASTC_2D_6X5_UNORM 5, // ASTC_2D_6X5_SRGB - 1, // E5B9G9R9F - 1, // Z32F - 1, // Z16 - 1, // Z24S8 - 1, // S8Z24 - 1, // Z32FS8 + 1, // E5B9G9R9_FLOAT + 1, // D32_FLOAT + 1, // D16_UNORM + 1, // D24_UNORM_S8_UINT + 1, // S8_UINT_D24_UNORM + 1, // D32_FLOAT_S8_UINT }}; static constexpr u32 GetDefaultBlockHeight(PixelFormat format) { @@ -411,86 +455,97 @@ static constexpr u32 GetDefaultBlockHeight(PixelFormat format) { } constexpr std::array<u32, MaxPixelFormat> bpp_table = {{ - 32, // ABGR8U - 32, // ABGR8S - 32, // ABGR8UI - 16, // B5G6R5U - 32, // A2B10G10R10U - 16, // A1B5G5R5U - 8, // R8U - 8, // R8UI - 64, // RGBA16F - 64, // RGBA16U - 64, // RGBA16S - 64, // RGBA16UI - 32, // R11FG11FB10F - 128, // RGBA32UI - 64, // DXT1 - 128, // DXT23 - 128, // DXT45 - 64, // DXN1 - 128, // DXN2UNORM - 128, // DXN2SNORM - 128, // BC7U - 128, // BC6H_UF16 - 128, // BC6H_SF16 - 128, // ASTC_2D_4X4 - 32, // BGRA8 - 128, // RGBA32F - 64, // RG32F - 32, // R32F - 16, // R16F - 16, // R16U - 16, // R16S - 16, // R16UI - 16, // R16I - 32, // RG16 - 32, // RG16F - 32, // RG16UI - 32, // RG16I - 32, // RG16S - 96, // RGB32F - 32, // RGBA8_SRGB - 16, // RG8U - 16, // RG8S - 16, // RG8UI - 64, // RG32UI - 64, // RGBX16F - 32, // R32UI - 32, // R32I - 128, // ASTC_2D_8X8 - 128, // ASTC_2D_8X5 - 128, // ASTC_2D_5X4 - 32, // BGRA8_SRGB - 64, // DXT1_SRGB - 128, // DXT23_SRGB - 128, // DXT45_SRGB - 128, // BC7U - 16, // R4G4B4A4U + 32, // A8B8G8R8_UNORM + 32, // A8B8G8R8_SNORM + 32, // A8B8G8R8_SINT + 32, // A8B8G8R8_UINT + 16, // R5G6B5_UNORM + 16, // B5G6R5_UNORM + 16, // A1R5G5B5_UNORM + 32, // A2B10G10R10_UNORM + 32, // A2B10G10R10_UINT + 16, // A1B5G5R5_UNORM + 8, // R8_UNORM + 8, // R8_SNORM + 8, // R8_SINT + 8, // R8_UINT + 64, // R16G16B16A16_FLOAT + 64, // R16G16B16A16_UNORM + 64, // R16G16B16A16_SNORM + 64, // R16G16B16A16_SINT + 64, // R16G16B16A16_UINT + 32, // B10G11R11_FLOAT + 128, // R32G32B32A32_UINT + 64, // BC1_RGBA_UNORM + 128, // BC2_UNORM + 128, // BC3_UNORM + 64, // BC4_UNORM + 64, // BC4_SNORM + 128, // BC5_UNORM + 128, // BC5_SNORM + 128, // BC7_UNORM + 128, // BC6H_UFLOAT + 128, // BC6H_SFLOAT + 128, // ASTC_2D_4X4_UNORM + 32, // B8G8R8A8_UNORM + 128, // R32G32B32A32_FLOAT + 128, // R32G32B32A32_SINT + 64, // R32G32_FLOAT + 64, // R32G32_SINT + 32, // R32_FLOAT + 16, // R16_FLOAT + 16, // R16_UNORM + 16, // R16_SNORM + 16, // R16_UINT + 16, // R16_SINT + 32, // R16G16_UNORM + 32, // R16G16_FLOAT + 32, // R16G16_UINT + 32, // R16G16_SINT + 32, // R16G16_SNORM + 96, // R32G32B32_FLOAT + 32, // A8B8G8R8_SRGB + 16, // R8G8_UNORM + 16, // R8G8_SNORM + 16, // R8G8_SINT + 16, // R8G8_UINT + 64, // R32G32_UINT + 64, // R16G16B16X16_FLOAT + 32, // R32_UINT + 32, // R32_SINT + 128, // ASTC_2D_8X8_UNORM + 128, // ASTC_2D_8X5_UNORM + 128, // ASTC_2D_5X4_UNORM + 32, // B8G8R8A8_SRGB + 64, // BC1_RGBA_SRGB + 128, // BC2_SRGB + 128, // BC3_SRGB + 128, // BC7_UNORM + 16, // A4B4G4R4_UNORM 128, // ASTC_2D_4X4_SRGB 128, // ASTC_2D_8X8_SRGB 128, // ASTC_2D_8X5_SRGB 128, // ASTC_2D_5X4_SRGB - 128, // ASTC_2D_5X5 + 128, // ASTC_2D_5X5_UNORM 128, // ASTC_2D_5X5_SRGB - 128, // ASTC_2D_10X8 + 128, // ASTC_2D_10X8_UNORM 128, // ASTC_2D_10X8_SRGB - 128, // ASTC_2D_6X6 + 128, // ASTC_2D_6X6_UNORM 128, // ASTC_2D_6X6_SRGB - 128, // ASTC_2D_10X10 + 128, // ASTC_2D_10X10_UNORM 128, // ASTC_2D_10X10_SRGB - 128, // ASTC_2D_12X12 + 128, // ASTC_2D_12X12_UNORM 128, // ASTC_2D_12X12_SRGB - 128, // ASTC_2D_8X6 + 128, // ASTC_2D_8X6_UNORM 128, // ASTC_2D_8X6_SRGB - 128, // ASTC_2D_6X5 + 128, // ASTC_2D_6X5_UNORM 128, // ASTC_2D_6X5_SRGB - 32, // E5B9G9R9F - 32, // Z32F - 16, // Z16 - 32, // Z24S8 - 32, // S8Z24 - 64, // Z32FS8 + 32, // E5B9G9R9_FLOAT + 32, // D32_FLOAT + 16, // D16_UNORM + 32, // D24_UNORM_S8_UINT + 32, // S8_UINT_D24_UNORM + 64, // D32_FLOAT_S8_UINT }}; static constexpr u32 GetFormatBpp(PixelFormat format) { @@ -529,7 +584,4 @@ bool IsPixelFormatSRGB(PixelFormat format); std::pair<u32, u32> GetASTCBlockSize(PixelFormat format); -/// Returns true if the specified PixelFormat is a BCn format, e.g. DXT or DXN -bool IsFormatBCn(PixelFormat format); - } // namespace VideoCore::Surface diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp index f476f03b0..7d5a75648 100644 --- a/src/video_core/texture_cache/format_lookup_table.cpp +++ b/src/video_core/texture_cache/format_lookup_table.cpp @@ -19,8 +19,6 @@ constexpr auto SNORM = ComponentType::SNORM; constexpr auto UNORM = ComponentType::UNORM; constexpr auto SINT = ComponentType::SINT; constexpr auto UINT = ComponentType::UINT; -constexpr auto SNORM_FORCE_FP16 = ComponentType::SNORM_FORCE_FP16; -constexpr auto UNORM_FORCE_FP16 = ComponentType::UNORM_FORCE_FP16; constexpr auto FLOAT = ComponentType::FLOAT; constexpr bool C = false; // Normal color constexpr bool S = true; // Srgb @@ -41,119 +39,126 @@ struct Table { ComponentType alpha_component; bool is_srgb; }; -constexpr std::array<Table, 78> DefinitionTable = {{ - {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ABGR8U}, - {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::ABGR8S}, - {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::ABGR8UI}, - {TextureFormat::A8R8G8B8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::RGBA8_SRGB}, +constexpr std::array<Table, 86> DefinitionTable = {{ + {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A8B8G8R8_UNORM}, + {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::A8B8G8R8_SNORM}, + {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::A8B8G8R8_UINT}, + {TextureFormat::A8R8G8B8, C, SINT, SINT, SINT, SINT, PixelFormat::A8B8G8R8_SINT}, + {TextureFormat::A8R8G8B8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::A8B8G8R8_SRGB}, - {TextureFormat::B5G6R5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::B5G6R5U}, + {TextureFormat::B5G6R5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::B5G6R5_UNORM}, - {TextureFormat::A2B10G10R10, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A2B10G10R10U}, + {TextureFormat::A2B10G10R10, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A2B10G10R10_UNORM}, + {TextureFormat::A2B10G10R10, C, UINT, UINT, UINT, UINT, PixelFormat::A2B10G10R10_UINT}, - {TextureFormat::A1B5G5R5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A1B5G5R5U}, + {TextureFormat::A1B5G5R5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A1B5G5R5_UNORM}, - {TextureFormat::A4B4G4R4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R4G4B4A4U}, + {TextureFormat::A4B4G4R4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A4B4G4R4_UNORM}, - {TextureFormat::R8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R8U}, - {TextureFormat::R8, C, UINT, UINT, UINT, UINT, PixelFormat::R8UI}, + {TextureFormat::R8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R8_UNORM}, + {TextureFormat::R8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R8_SNORM}, + {TextureFormat::R8, C, UINT, UINT, UINT, UINT, PixelFormat::R8_UINT}, + {TextureFormat::R8, C, SINT, SINT, SINT, SINT, PixelFormat::R8_SINT}, - {TextureFormat::G8R8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RG8U}, - {TextureFormat::G8R8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::RG8S}, - {TextureFormat::G8R8, C, UINT, UINT, UINT, UINT, PixelFormat::RG8UI}, + {TextureFormat::R8G8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R8G8_UNORM}, + {TextureFormat::R8G8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R8G8_SNORM}, + {TextureFormat::R8G8, C, UINT, UINT, UINT, UINT, PixelFormat::R8G8_UINT}, + {TextureFormat::R8G8, C, SINT, SINT, SINT, SINT, PixelFormat::R8G8_SINT}, - {TextureFormat::R16_G16_B16_A16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::RGBA16S}, - {TextureFormat::R16_G16_B16_A16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RGBA16U}, - {TextureFormat::R16_G16_B16_A16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RGBA16F}, - {TextureFormat::R16_G16_B16_A16, C, UINT, UINT, UINT, UINT, PixelFormat::RGBA16UI}, + {TextureFormat::R16G16B16A16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16G16B16A16_SNORM}, + {TextureFormat::R16G16B16A16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16G16B16A16_UNORM}, + {TextureFormat::R16G16B16A16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16G16B16A16_FLOAT}, + {TextureFormat::R16G16B16A16, C, UINT, UINT, UINT, UINT, PixelFormat::R16G16B16A16_UINT}, + {TextureFormat::R16G16B16A16, C, SINT, SINT, SINT, SINT, PixelFormat::R16G16B16A16_SINT}, - {TextureFormat::R16_G16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RG16F}, - {TextureFormat::R16_G16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RG16}, - {TextureFormat::R16_G16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::RG16S}, - {TextureFormat::R16_G16, C, UINT, UINT, UINT, UINT, PixelFormat::RG16UI}, - {TextureFormat::R16_G16, C, SINT, SINT, SINT, SINT, PixelFormat::RG16I}, + {TextureFormat::R16G16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16G16_FLOAT}, + {TextureFormat::R16G16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16G16_UNORM}, + {TextureFormat::R16G16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16G16_SNORM}, + {TextureFormat::R16G16, C, UINT, UINT, UINT, UINT, PixelFormat::R16G16_UINT}, + {TextureFormat::R16G16, C, SINT, SINT, SINT, SINT, PixelFormat::R16G16_SINT}, - {TextureFormat::R16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16F}, - {TextureFormat::R16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16U}, - {TextureFormat::R16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16S}, - {TextureFormat::R16, C, UINT, UINT, UINT, UINT, PixelFormat::R16UI}, - {TextureFormat::R16, C, SINT, SINT, SINT, SINT, PixelFormat::R16I}, + {TextureFormat::R16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16_FLOAT}, + {TextureFormat::R16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16_UNORM}, + {TextureFormat::R16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16_SNORM}, + {TextureFormat::R16, C, UINT, UINT, UINT, UINT, PixelFormat::R16_UINT}, + {TextureFormat::R16, C, SINT, SINT, SINT, SINT, PixelFormat::R16_SINT}, - {TextureFormat::BF10GF11RF11, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R11FG11FB10F}, + {TextureFormat::B10G11R11, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::B10G11R11_FLOAT}, - {TextureFormat::R32_G32_B32_A32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RGBA32F}, - {TextureFormat::R32_G32_B32_A32, C, UINT, UINT, UINT, UINT, PixelFormat::RGBA32UI}, + {TextureFormat::R32G32B32A32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32G32B32A32_FLOAT}, + {TextureFormat::R32G32B32A32, C, UINT, UINT, UINT, UINT, PixelFormat::R32G32B32A32_UINT}, + {TextureFormat::R32G32B32A32, C, SINT, SINT, SINT, SINT, PixelFormat::R32G32B32A32_SINT}, - {TextureFormat::R32_G32_B32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RGB32F}, + {TextureFormat::R32G32B32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32G32B32_FLOAT}, - {TextureFormat::R32_G32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RG32F}, - {TextureFormat::R32_G32, C, UINT, UINT, UINT, UINT, PixelFormat::RG32UI}, + {TextureFormat::R32G32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32G32_FLOAT}, + {TextureFormat::R32G32, C, UINT, UINT, UINT, UINT, PixelFormat::R32G32_UINT}, + {TextureFormat::R32G32, C, SINT, SINT, SINT, SINT, PixelFormat::R32G32_SINT}, - {TextureFormat::R32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32F}, - {TextureFormat::R32, C, UINT, UINT, UINT, UINT, PixelFormat::R32UI}, - {TextureFormat::R32, C, SINT, SINT, SINT, SINT, PixelFormat::R32I}, + {TextureFormat::R32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32_FLOAT}, + {TextureFormat::R32, C, UINT, UINT, UINT, UINT, PixelFormat::R32_UINT}, + {TextureFormat::R32, C, SINT, SINT, SINT, SINT, PixelFormat::R32_SINT}, - {TextureFormat::E5B9G9R9_SHAREDEXP, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::E5B9G9R9F}, + {TextureFormat::E5B9G9R9, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::E5B9G9R9_FLOAT}, - {TextureFormat::ZF32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::Z32F}, - {TextureFormat::Z16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::Z16}, - {TextureFormat::S8Z24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24}, - {TextureFormat::G24R8, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24}, - {TextureFormat::ZF32_X24S8, C, FLOAT, UINT, UNORM, UNORM, PixelFormat::Z32FS8}, + {TextureFormat::D32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::D32_FLOAT}, + {TextureFormat::D16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::D16_UNORM}, + {TextureFormat::S8D24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8_UINT_D24_UNORM}, + {TextureFormat::R8G24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8_UINT_D24_UNORM}, + {TextureFormat::D32S8, C, FLOAT, UINT, UNORM, UNORM, PixelFormat::D32_FLOAT_S8_UINT}, - {TextureFormat::DXT1, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1}, - {TextureFormat::DXT1, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1_SRGB}, + {TextureFormat::BC1_RGBA, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC1_RGBA_UNORM}, + {TextureFormat::BC1_RGBA, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC1_RGBA_SRGB}, - {TextureFormat::DXT23, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT23}, - {TextureFormat::DXT23, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT23_SRGB}, + {TextureFormat::BC2, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC2_UNORM}, + {TextureFormat::BC2, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC2_SRGB}, - {TextureFormat::DXT45, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT45}, - {TextureFormat::DXT45, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT45_SRGB}, + {TextureFormat::BC3, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC3_UNORM}, + {TextureFormat::BC3, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC3_SRGB}, - // TODO: Use a different pixel format for SNORM - {TextureFormat::DXN1, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXN1}, - {TextureFormat::DXN1, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::DXN1}, + {TextureFormat::BC4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC4_UNORM}, + {TextureFormat::BC4, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::BC4_SNORM}, - {TextureFormat::DXN2, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXN2UNORM}, - {TextureFormat::DXN2, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::DXN2SNORM}, + {TextureFormat::BC5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC5_UNORM}, + {TextureFormat::BC5, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::BC5_SNORM}, - {TextureFormat::BC7U, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC7U}, - {TextureFormat::BC7U, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC7U_SRGB}, + {TextureFormat::BC7, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC7_UNORM}, + {TextureFormat::BC7, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC7_SRGB}, - {TextureFormat::BC6H_SF16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::BC6H_SF16}, - {TextureFormat::BC6H_UF16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::BC6H_UF16}, + {TextureFormat::BC6H_SFLOAT, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::BC6H_SFLOAT}, + {TextureFormat::BC6H_UFLOAT, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::BC6H_UFLOAT}, - {TextureFormat::ASTC_2D_4X4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_4X4}, + {TextureFormat::ASTC_2D_4X4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_4X4_UNORM}, {TextureFormat::ASTC_2D_4X4, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_4X4_SRGB}, - {TextureFormat::ASTC_2D_5X4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X4}, + {TextureFormat::ASTC_2D_5X4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X4_UNORM}, {TextureFormat::ASTC_2D_5X4, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X4_SRGB}, - {TextureFormat::ASTC_2D_5X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X5}, + {TextureFormat::ASTC_2D_5X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X5_UNORM}, {TextureFormat::ASTC_2D_5X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X5_SRGB}, - {TextureFormat::ASTC_2D_8X8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X8}, + {TextureFormat::ASTC_2D_8X8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X8_UNORM}, {TextureFormat::ASTC_2D_8X8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X8_SRGB}, - {TextureFormat::ASTC_2D_8X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X5}, + {TextureFormat::ASTC_2D_8X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X5_UNORM}, {TextureFormat::ASTC_2D_8X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X5_SRGB}, - {TextureFormat::ASTC_2D_10X8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X8}, + {TextureFormat::ASTC_2D_10X8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X8_UNORM}, {TextureFormat::ASTC_2D_10X8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X8_SRGB}, - {TextureFormat::ASTC_2D_6X6, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X6}, + {TextureFormat::ASTC_2D_6X6, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X6_UNORM}, {TextureFormat::ASTC_2D_6X6, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X6_SRGB}, - {TextureFormat::ASTC_2D_10X10, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X10}, + {TextureFormat::ASTC_2D_10X10, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X10_UNORM}, {TextureFormat::ASTC_2D_10X10, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X10_SRGB}, - {TextureFormat::ASTC_2D_12X12, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_12X12}, + {TextureFormat::ASTC_2D_12X12, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_12X12_UNORM}, {TextureFormat::ASTC_2D_12X12, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_12X12_SRGB}, - {TextureFormat::ASTC_2D_8X6, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X6}, + {TextureFormat::ASTC_2D_8X6, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X6_UNORM}, {TextureFormat::ASTC_2D_8X6, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X6_SRGB}, - {TextureFormat::ASTC_2D_6X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X5}, + {TextureFormat::ASTC_2D_6X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X5_UNORM}, {TextureFormat::ASTC_2D_6X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X5_SRGB}, }}; @@ -184,7 +189,7 @@ PixelFormat FormatLookupTable::GetPixelFormat(TextureFormat format, bool is_srgb static_cast<int>(format), is_srgb, static_cast<int>(red_component), static_cast<int>(green_component), static_cast<int>(blue_component), static_cast<int>(alpha_component)); - return PixelFormat::ABGR8U; + return PixelFormat::A8B8G8R8_UNORM; } void FormatLookupTable::Set(TextureFormat format, bool is_srgb, ComponentType red_component, diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 0caf3b4f0..dfcf36e0b 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -228,7 +228,7 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager, } } - if (!is_converted && params.pixel_format != PixelFormat::S8Z24) { + if (!is_converted && params.pixel_format != PixelFormat::S8_UINT_D24_UNORM) { return; } diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index 921562c1f..9e5fe2374 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -83,12 +83,12 @@ SurfaceParams SurfaceParams::CreateForTexture(const FormatLookupTable& lookup_ta params.type = GetFormatType(params.pixel_format); if (entry.is_shadow && params.type == SurfaceType::ColorTexture) { switch (params.pixel_format) { - case PixelFormat::R16U: - case PixelFormat::R16F: - params.pixel_format = PixelFormat::Z16; + case PixelFormat::R16_UNORM: + case PixelFormat::R16_FLOAT: + params.pixel_format = PixelFormat::D16_UNORM; break; - case PixelFormat::R32F: - params.pixel_format = PixelFormat::Z32F; + case PixelFormat::R32_FLOAT: + params.pixel_format = PixelFormat::D32_FLOAT; break; default: UNIMPLEMENTED_MSG("Unimplemented shadow convert format: {}", @@ -195,8 +195,8 @@ SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::siz SurfaceParams params; params.is_tiled = config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; - params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB || - config.format == Tegra::RenderTargetFormat::RGBA8_SRGB; + params.srgb_conversion = config.format == Tegra::RenderTargetFormat::B8G8R8A8_SRGB || + config.format == Tegra::RenderTargetFormat::A8B8G8R8_SRGB; params.block_width = config.memory_layout.block_width; params.block_height = config.memory_layout.block_height; params.block_depth = config.memory_layout.block_depth; @@ -235,8 +235,8 @@ SurfaceParams SurfaceParams::CreateForFermiCopySurface( const Tegra::Engines::Fermi2D::Regs::Surface& config) { SurfaceParams params{}; params.is_tiled = !config.linear; - params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB || - config.format == Tegra::RenderTargetFormat::RGBA8_SRGB; + params.srgb_conversion = config.format == Tegra::RenderTargetFormat::B8G8R8A8_SRGB || + config.format == Tegra::RenderTargetFormat::A8B8G8R8_SRGB; params.block_width = params.is_tiled ? std::min(config.BlockWidth(), 5U) : 0, params.block_height = params.is_tiled ? std::min(config.BlockHeight(), 5U) : 0, params.block_depth = params.is_tiled ? std::min(config.BlockDepth(), 5U) : 0, diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index cdcddb225..96c4e4cc2 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -373,9 +373,9 @@ protected: siblings_table[static_cast<std::size_t>(b)] = a; }; std::fill(siblings_table.begin(), siblings_table.end(), PixelFormat::Invalid); - make_siblings(PixelFormat::Z16, PixelFormat::R16U); - make_siblings(PixelFormat::Z32F, PixelFormat::R32F); - make_siblings(PixelFormat::Z32FS8, PixelFormat::RG32F); + make_siblings(PixelFormat::D16_UNORM, PixelFormat::R16_UNORM); + make_siblings(PixelFormat::D32_FLOAT, PixelFormat::R32_FLOAT); + make_siblings(PixelFormat::D32_FLOAT_S8_UINT, PixelFormat::R32G32_FLOAT); sampled_textures.reserve(64); } @@ -1031,7 +1031,7 @@ private: params.pitch = 4; params.num_levels = 1; params.emulated_levels = 1; - params.pixel_format = VideoCore::Surface::PixelFormat::R8U; + params.pixel_format = VideoCore::Surface::PixelFormat::R8_UNORM; params.type = VideoCore::Surface::SurfaceType::ColorTexture; auto surface = CreateSurface(0ULL, params); invalid_memory.resize(surface->GetHostSizeInBytes(), 0U); diff --git a/src/video_core/textures/convert.cpp b/src/video_core/textures/convert.cpp index f3efa7eb0..962921483 100644 --- a/src/video_core/textures/convert.cpp +++ b/src/video_core/textures/convert.cpp @@ -35,7 +35,7 @@ void SwapS8Z24ToZ24S8(u8* data, u32 width, u32 height) { S8Z24 s8z24_pixel{}; Z24S8 z24s8_pixel{}; constexpr auto bpp{ - VideoCore::Surface::GetBytesPerPixel(VideoCore::Surface::PixelFormat::S8Z24)}; + VideoCore::Surface::GetBytesPerPixel(VideoCore::Surface::PixelFormat::S8_UINT_D24_UNORM)}; for (std::size_t y = 0; y < height; ++y) { for (std::size_t x = 0; x < width; ++x) { const std::size_t offset{bpp * (y * width + x)}; @@ -73,7 +73,7 @@ void ConvertFromGuestToHost(u8* in_data, u8* out_data, PixelFormat pixel_format, in_data, width, height, depth, block_width, block_height); std::copy(rgba8_data.begin(), rgba8_data.end(), out_data); - } else if (convert_s8z24 && pixel_format == PixelFormat::S8Z24) { + } else if (convert_s8z24 && pixel_format == PixelFormat::S8_UINT_D24_UNORM) { Tegra::Texture::ConvertS8Z24ToZ24S8(in_data, width, height); } } @@ -85,7 +85,7 @@ void ConvertFromHostToGuest(u8* data, PixelFormat pixel_format, u32 width, u32 h static_cast<u32>(pixel_format)); UNREACHABLE(); - } else if (convert_s8z24 && pixel_format == PixelFormat::S8Z24) { + } else if (convert_s8z24 && pixel_format == PixelFormat::S8_UINT_D24_UNORM) { Tegra::Texture::ConvertZ24S8ToS8Z24(data, width, height); } } diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 98beabef1..474ae620a 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp @@ -184,53 +184,6 @@ void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel, } } -u32 BytesPerPixel(TextureFormat format) { - switch (format) { - case TextureFormat::DXT1: - case TextureFormat::DXN1: - // In this case a 'pixel' actually refers to a 4x4 tile. - return 8; - case TextureFormat::DXT23: - case TextureFormat::DXT45: - case TextureFormat::DXN2: - case TextureFormat::BC7U: - case TextureFormat::BC6H_UF16: - case TextureFormat::BC6H_SF16: - // In this case a 'pixel' actually refers to a 4x4 tile. - return 16; - case TextureFormat::R32_G32_B32: - return 12; - case TextureFormat::ASTC_2D_4X4: - case TextureFormat::ASTC_2D_5X4: - case TextureFormat::ASTC_2D_8X8: - case TextureFormat::ASTC_2D_8X5: - case TextureFormat::ASTC_2D_10X8: - case TextureFormat::ASTC_2D_5X5: - case TextureFormat::A8R8G8B8: - case TextureFormat::A2B10G10R10: - case TextureFormat::BF10GF11RF11: - case TextureFormat::R32: - case TextureFormat::R16_G16: - return 4; - case TextureFormat::A1B5G5R5: - case TextureFormat::B5G6R5: - case TextureFormat::G8R8: - case TextureFormat::R16: - return 2; - case TextureFormat::R8: - return 1; - case TextureFormat::R16_G16_B16_A16: - return 8; - case TextureFormat::R32_G32_B32_A32: - return 16; - case TextureFormat::R32_G32: - return 8; - default: - UNIMPLEMENTED_MSG("Format not implemented"); - return 1; - } -} - void UnswizzleTexture(u8* const unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth, u32 width_spacing) { @@ -348,48 +301,6 @@ void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 } } -std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width, - u32 height) { - std::vector<u8> rgba_data; - - // TODO(Subv): Implement. - switch (format) { - case TextureFormat::DXT1: - case TextureFormat::DXT23: - case TextureFormat::DXT45: - case TextureFormat::DXN1: - case TextureFormat::DXN2: - case TextureFormat::BC7U: - case TextureFormat::BC6H_UF16: - case TextureFormat::BC6H_SF16: - case TextureFormat::ASTC_2D_4X4: - case TextureFormat::ASTC_2D_8X8: - case TextureFormat::ASTC_2D_5X5: - case TextureFormat::ASTC_2D_10X8: - case TextureFormat::A8R8G8B8: - case TextureFormat::A2B10G10R10: - case TextureFormat::A1B5G5R5: - case TextureFormat::B5G6R5: - case TextureFormat::R8: - case TextureFormat::G8R8: - case TextureFormat::BF10GF11RF11: - case TextureFormat::R32_G32_B32_A32: - case TextureFormat::R32_G32: - case TextureFormat::R32: - case TextureFormat::R16: - case TextureFormat::R16_G16: - case TextureFormat::R32_G32_B32: - // TODO(Subv): For the time being just forward the same data without any decoding. - rgba_data = texture_data; - break; - default: - UNIMPLEMENTED_MSG("Format not implemented"); - break; - } - - return rgba_data; -} - std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth) { if (tiled) { diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h index 232b696b3..d6fe35d37 100644 --- a/src/video_core/textures/decoders.h +++ b/src/video_core/textures/decoders.h @@ -38,10 +38,6 @@ void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel, u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing); -/// Decodes an unswizzled texture into a A8R8G8B8 texture. -std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width, - u32 height); - /// This function calculates the correct size of a texture depending if it's tiled or not. std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth); diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h index eba05aced..0574fef12 100644 --- a/src/video_core/textures/texture.h +++ b/src/video_core/textures/texture.h @@ -12,10 +12,10 @@ namespace Tegra::Texture { enum class TextureFormat : u32 { - R32_G32_B32_A32 = 0x01, - R32_G32_B32 = 0x02, - R16_G16_B16_A16 = 0x03, - R32_G32 = 0x04, + R32G32B32A32 = 0x01, + R32G32B32 = 0x02, + R16G16B16A16 = 0x03, + R32G32 = 0x04, R32_B24G8 = 0x05, ETC2_RGB = 0x06, X8B8G8R8 = 0x07, @@ -23,19 +23,19 @@ enum class TextureFormat : u32 { A2B10G10R10 = 0x09, ETC2_RGB_PTA = 0x0a, ETC2_RGBA = 0x0b, - R16_G16 = 0x0c, - G8R24 = 0x0d, - G24R8 = 0x0e, + R16G16 = 0x0c, + R24G8 = 0x0d, + R8G24 = 0x0e, R32 = 0x0f, - BC6H_SF16 = 0x10, - BC6H_UF16 = 0x11, + BC6H_SFLOAT = 0x10, + BC6H_UFLOAT = 0x11, A4B4G4R4 = 0x12, A5B5G5R1 = 0x13, A1B5G5R5 = 0x14, B5G6R5 = 0x15, B6G5R5 = 0x16, - BC7U = 0x17, - G8R8 = 0x18, + BC7 = 0x17, + R8G8 = 0x18, EAC = 0x19, EACX2 = 0x1a, R16 = 0x1b, @@ -43,23 +43,23 @@ enum class TextureFormat : u32 { R8 = 0x1d, G4R4 = 0x1e, R1 = 0x1f, - E5B9G9R9_SHAREDEXP = 0x20, - BF10GF11RF11 = 0x21, + E5B9G9R9 = 0x20, + B10G11R11 = 0x21, G8B8G8R8 = 0x22, B8G8R8G8 = 0x23, - DXT1 = 0x24, - DXT23 = 0x25, - DXT45 = 0x26, - DXN1 = 0x27, - DXN2 = 0x28, - S8Z24 = 0x29, + BC1_RGBA = 0x24, + BC2 = 0x25, + BC3 = 0x26, + BC4 = 0x27, + BC5 = 0x28, + S8D24 = 0x29, X8Z24 = 0x2a, - Z24S8 = 0x2b, + D24S8 = 0x2b, X4V4Z24__COV4R4V = 0x2c, X4V4Z24__COV8R8V = 0x2d, V8Z24__COV4R12V = 0x2e, - ZF32 = 0x2f, - ZF32_X24S8 = 0x30, + D32 = 0x2f, + D32S8 = 0x30, X8Z24_X20V4S8__COV4R4V = 0x31, X8Z24_X20V4S8__COV8R8V = 0x32, ZF32_X20V4X8__COV4R4V = 0x33, @@ -69,7 +69,7 @@ enum class TextureFormat : u32 { X8Z24_X16V8S8__COV4R12V = 0x37, ZF32_X16V8X8__COV4R12V = 0x38, ZF32_X16V8S8__COV4R12V = 0x39, - Z16 = 0x3a, + D16 = 0x3a, V8Z24__COV8R24V = 0x3b, X8Z24_X16V8S8__COV8R24V = 0x3c, ZF32_X16V8X8__COV8R24V = 0x3d, @@ -375,7 +375,4 @@ struct FullTextureInfo { TSCEntry tsc; }; -/// Returns the number of bytes per pixel of the input texture format. -u32 BytesPerPixel(TextureFormat format); - } // namespace Tegra::Texture |