diff options
Diffstat (limited to '')
73 files changed, 1219 insertions, 1131 deletions
diff --git a/CMakeModules/GenerateSCMRev.cmake b/CMakeModules/GenerateSCMRev.cmake index 5e00d839f..31edeb63d 100644 --- a/CMakeModules/GenerateSCMRev.cmake +++ b/CMakeModules/GenerateSCMRev.cmake @@ -82,6 +82,9 @@ set(HASH_FILES "${VIDEO_CORE}/shader/decode/video.cpp" "${VIDEO_CORE}/shader/decode/xmad.cpp" "${VIDEO_CORE}/shader/decode.cpp" + "${VIDEO_CORE}/shader/node.h" + "${VIDEO_CORE}/shader/node_helper.cpp" + "${VIDEO_CORE}/shader/node_helper.h" "${VIDEO_CORE}/shader/shader_ir.cpp" "${VIDEO_CORE}/shader/shader_ir.h" "${VIDEO_CORE}/shader/track.cpp" diff --git a/externals/Vulkan-Headers b/externals/Vulkan-Headers -Subproject 15e5c4db7500b936ae758236f2e72fc1aec2202 +Subproject d05c8df88da98ec1ab3bc600d7f5783b4060895 diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index cb514a0d2..198b3fe07 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -56,6 +56,9 @@ add_custom_command(OUTPUT scm_rev.cpp "${VIDEO_CORE}/shader/decode/video.cpp" "${VIDEO_CORE}/shader/decode/xmad.cpp" "${VIDEO_CORE}/shader/decode.cpp" + "${VIDEO_CORE}/shader/node.h" + "${VIDEO_CORE}/shader/node_helper.cpp" + "${VIDEO_CORE}/shader/node_helper.h" "${VIDEO_CORE}/shader/shader_ir.cpp" "${VIDEO_CORE}/shader/shader_ir.h" "${VIDEO_CORE}/shader/track.cpp" diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 6bf512e12..a2e2e976e 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -5,6 +5,8 @@ add_library(core STATIC arm/exclusive_monitor.h arm/unicorn/arm_unicorn.cpp arm/unicorn/arm_unicorn.h + constants.cpp + constants.h core.cpp core.h core_cpu.cpp diff --git a/src/core/constants.cpp b/src/core/constants.cpp new file mode 100644 index 000000000..dccb3e03c --- /dev/null +++ b/src/core/constants.cpp @@ -0,0 +1,17 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "core/constants.h" + +namespace Core::Constants { +const std::array<u8, 107> ACCOUNT_BACKUP_JPEG{{ + 0xff, 0xd8, 0xff, 0xdb, 0x00, 0x43, 0x00, 0x03, 0x02, 0x02, 0x02, 0x02, 0x02, 0x03, 0x02, 0x02, + 0x02, 0x03, 0x03, 0x03, 0x03, 0x04, 0x06, 0x04, 0x04, 0x04, 0x04, 0x04, 0x08, 0x06, 0x06, 0x05, + 0x06, 0x09, 0x08, 0x0a, 0x0a, 0x09, 0x08, 0x09, 0x09, 0x0a, 0x0c, 0x0f, 0x0c, 0x0a, 0x0b, 0x0e, + 0x0b, 0x09, 0x09, 0x0d, 0x11, 0x0d, 0x0e, 0x0f, 0x10, 0x10, 0x11, 0x10, 0x0a, 0x0c, 0x12, 0x13, + 0x12, 0x10, 0x13, 0x0f, 0x10, 0x10, 0x10, 0xff, 0xc9, 0x00, 0x0b, 0x08, 0x00, 0x01, 0x00, 0x01, + 0x01, 0x01, 0x11, 0x00, 0xff, 0xcc, 0x00, 0x06, 0x00, 0x10, 0x10, 0x05, 0xff, 0xda, 0x00, 0x08, + 0x01, 0x01, 0x00, 0x00, 0x3f, 0x00, 0xd2, 0xcf, 0x20, 0xff, 0xd9, +}}; +} diff --git a/src/core/constants.h b/src/core/constants.h new file mode 100644 index 000000000..6d0ec022a --- /dev/null +++ b/src/core/constants.h @@ -0,0 +1,17 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" + +// This is to consolidate system-wide constants that are used by multiple components of yuzu. +// This is especially to prevent the case of something in frontend duplicating a constexpr array or +// directly including some service header for the sole purpose of data. +namespace Core::Constants { + +// ACC Service - Blank JPEG used as user icon in absentia of real one. +extern const std::array<u8, 107> ACCOUNT_BACKUP_JPEG; + +} // namespace Core::Constants diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h index bf3b7eef3..f027fafa3 100644 --- a/src/core/hle/kernel/process.h +++ b/src/core/hle/kernel/process.h @@ -10,7 +10,6 @@ #include <list> #include <string> #include <vector> -#include <boost/container/static_vector.hpp> #include "common/common_types.h" #include "core/hle/kernel/address_arbiter.h" #include "core/hle/kernel/handle_table.h" diff --git a/src/core/hle/service/acc/acc.cpp b/src/core/hle/service/acc/acc.cpp index 86bf53d08..cb66e344b 100644 --- a/src/core/hle/service/acc/acc.cpp +++ b/src/core/hle/service/acc/acc.cpp @@ -10,6 +10,7 @@ #include "common/logging/log.h" #include "common/string_util.h" #include "common/swap.h" +#include "core/constants.h" #include "core/core_timing.h" #include "core/hle/ipc_helpers.h" #include "core/hle/service/acc/acc.h" @@ -21,19 +22,6 @@ namespace Service::Account { -// Smallest JPEG https://github.com/mathiasbynens/small/blob/master/jpeg.jpg -// used as a backup should the one on disk not exist -constexpr u32 backup_jpeg_size = 107; -constexpr std::array<u8, backup_jpeg_size> backup_jpeg{{ - 0xff, 0xd8, 0xff, 0xdb, 0x00, 0x43, 0x00, 0x03, 0x02, 0x02, 0x02, 0x02, 0x02, 0x03, 0x02, 0x02, - 0x02, 0x03, 0x03, 0x03, 0x03, 0x04, 0x06, 0x04, 0x04, 0x04, 0x04, 0x04, 0x08, 0x06, 0x06, 0x05, - 0x06, 0x09, 0x08, 0x0a, 0x0a, 0x09, 0x08, 0x09, 0x09, 0x0a, 0x0c, 0x0f, 0x0c, 0x0a, 0x0b, 0x0e, - 0x0b, 0x09, 0x09, 0x0d, 0x11, 0x0d, 0x0e, 0x0f, 0x10, 0x10, 0x11, 0x10, 0x0a, 0x0c, 0x12, 0x13, - 0x12, 0x10, 0x13, 0x0f, 0x10, 0x10, 0x10, 0xff, 0xc9, 0x00, 0x0b, 0x08, 0x00, 0x01, 0x00, 0x01, - 0x01, 0x01, 0x11, 0x00, 0xff, 0xcc, 0x00, 0x06, 0x00, 0x10, 0x10, 0x05, 0xff, 0xda, 0x00, 0x08, - 0x01, 0x01, 0x00, 0x00, 0x3f, 0x00, 0xd2, 0xcf, 0x20, 0xff, 0xd9, -}}; - static std::string GetImagePath(Common::UUID uuid) { return FileUtil::GetUserPath(FileUtil::UserPath::NANDDir) + "/system/save/8000000000000010/su/avators/" + uuid.FormatSwitch() + ".jpg"; @@ -101,8 +89,8 @@ private: if (!image.IsOpen()) { LOG_WARNING(Service_ACC, "Failed to load user provided image! Falling back to built-in backup..."); - ctx.WriteBuffer(backup_jpeg); - rb.Push<u32>(backup_jpeg_size); + ctx.WriteBuffer(Core::Constants::ACCOUNT_BACKUP_JPEG); + rb.Push<u32>(Core::Constants::ACCOUNT_BACKUP_JPEG.size()); return; } @@ -124,7 +112,7 @@ private: if (!image.IsOpen()) { LOG_WARNING(Service_ACC, "Failed to load user provided image! Falling back to built-in backup..."); - rb.Push<u32>(backup_jpeg_size); + rb.Push<u32>(Core::Constants::ACCOUNT_BACKUP_JPEG.size()); } else { rb.Push<u32>(SanitizeJPEGSize(image.GetSize())); } diff --git a/src/core/hle/service/ncm/ncm.cpp b/src/core/hle/service/ncm/ncm.cpp index 5d31f638f..b405a4b66 100644 --- a/src/core/hle/service/ncm/ncm.cpp +++ b/src/core/hle/service/ncm/ncm.cpp @@ -4,15 +4,89 @@ #include <memory> +#include "core/file_sys/romfs_factory.h" +#include "core/hle/ipc_helpers.h" #include "core/hle/service/ncm/ncm.h" #include "core/hle/service/service.h" #include "core/hle/service/sm/sm.h" namespace Service::NCM { -class LocationResolver final : public ServiceFramework<LocationResolver> { +class ILocationResolver final : public ServiceFramework<ILocationResolver> { public: - explicit LocationResolver() : ServiceFramework{"lr"} { + explicit ILocationResolver(FileSys::StorageId id) + : ServiceFramework{"ILocationResolver"}, storage(id) { + // clang-format off + static const FunctionInfo functions[] = { + {0, nullptr, "ResolveProgramPath"}, + {1, nullptr, "RedirectProgramPath"}, + {2, nullptr, "ResolveApplicationControlPath"}, + {3, nullptr, "ResolveApplicationHtmlDocumentPath"}, + {4, nullptr, "ResolveDataPath"}, + {5, nullptr, "RedirectApplicationControlPath"}, + {6, nullptr, "RedirectApplicationHtmlDocumentPath"}, + {7, nullptr, "ResolveApplicationLegalInformationPath"}, + {8, nullptr, "RedirectApplicationLegalInformationPath"}, + {9, nullptr, "Refresh"}, + {10, nullptr, "RedirectProgramPath2"}, + {11, nullptr, "Refresh2"}, + {12, nullptr, "DeleteProgramPath"}, + {13, nullptr, "DeleteApplicationControlPath"}, + {14, nullptr, "DeleteApplicationHtmlDocumentPath"}, + {15, nullptr, "DeleteApplicationLegalInformationPath"}, + {16, nullptr, ""}, + {17, nullptr, ""}, + {18, nullptr, ""}, + {19, nullptr, ""}, + }; + // clang-format on + + RegisterHandlers(functions); + } + +private: + FileSys::StorageId storage; +}; + +class IRegisteredLocationResolver final : public ServiceFramework<IRegisteredLocationResolver> { +public: + explicit IRegisteredLocationResolver() : ServiceFramework{"IRegisteredLocationResolver"} { + // clang-format off + static const FunctionInfo functions[] = { + {0, nullptr, "ResolveProgramPath"}, + {1, nullptr, "RegisterProgramPath"}, + {2, nullptr, "UnregisterProgramPath"}, + {3, nullptr, "RedirectProgramPath"}, + {4, nullptr, "ResolveHtmlDocumentPath"}, + {5, nullptr, "RegisterHtmlDocumentPath"}, + {6, nullptr, "UnregisterHtmlDocumentPath"}, + {7, nullptr, "RedirectHtmlDocumentPath"}, + {8, nullptr, ""}, + }; + // clang-format on + + RegisterHandlers(functions); + } +}; + +class IAddOnContentLocationResolver final : public ServiceFramework<IAddOnContentLocationResolver> { +public: + explicit IAddOnContentLocationResolver() : ServiceFramework{"IAddOnContentLocationResolver"} { + // clang-format off + static const FunctionInfo functions[] = { + {0, nullptr, "ResolveAddOnContentPath"}, + {1, nullptr, "RegisterAddOnContentStorage"}, + {2, nullptr, "UnregisterAllAddOnContentPath"}, + }; + // clang-format on + + RegisterHandlers(functions); + } +}; + +class LR final : public ServiceFramework<LR> { +public: + explicit LR() : ServiceFramework{"lr"} { // clang-format off static const FunctionInfo functions[] = { {0, nullptr, "OpenLocationResolver"}, @@ -52,7 +126,7 @@ public: }; void InstallInterfaces(SM::ServiceManager& sm) { - std::make_shared<LocationResolver>()->InstallAsService(sm); + std::make_shared<LR>()->InstallAsService(sm); std::make_shared<NCM>()->InstallAsService(sm); } diff --git a/src/core/hle/service/ns/ns.h b/src/core/hle/service/ns/ns.h index 0f4bab4cb..0e8256cb4 100644 --- a/src/core/hle/service/ns/ns.h +++ b/src/core/hle/service/ns/ns.h @@ -11,13 +11,13 @@ namespace Service::NS { class IAccountProxyInterface final : public ServiceFramework<IAccountProxyInterface> { public: explicit IAccountProxyInterface(); - ~IAccountProxyInterface(); + ~IAccountProxyInterface() override; }; class IApplicationManagerInterface final : public ServiceFramework<IApplicationManagerInterface> { public: explicit IApplicationManagerInterface(); - ~IApplicationManagerInterface(); + ~IApplicationManagerInterface() override; ResultVal<u8> GetApplicationDesiredLanguage(u32 supported_languages); ResultVal<u64> ConvertApplicationLanguageToLanguageCode(u8 application_language); @@ -31,43 +31,43 @@ private: class IApplicationVersionInterface final : public ServiceFramework<IApplicationVersionInterface> { public: explicit IApplicationVersionInterface(); - ~IApplicationVersionInterface(); + ~IApplicationVersionInterface() override; }; class IContentManagerInterface final : public ServiceFramework<IContentManagerInterface> { public: explicit IContentManagerInterface(); - ~IContentManagerInterface(); + ~IContentManagerInterface() override; }; class IDocumentInterface final : public ServiceFramework<IDocumentInterface> { public: explicit IDocumentInterface(); - ~IDocumentInterface(); + ~IDocumentInterface() override; }; class IDownloadTaskInterface final : public ServiceFramework<IDownloadTaskInterface> { public: explicit IDownloadTaskInterface(); - ~IDownloadTaskInterface(); + ~IDownloadTaskInterface() override; }; class IECommerceInterface final : public ServiceFramework<IECommerceInterface> { public: explicit IECommerceInterface(); - ~IECommerceInterface(); + ~IECommerceInterface() override; }; class IFactoryResetInterface final : public ServiceFramework<IFactoryResetInterface> { public: explicit IFactoryResetInterface(); - ~IFactoryResetInterface(); + ~IFactoryResetInterface() override; }; class NS final : public ServiceFramework<NS> { public: explicit NS(const char* name); - ~NS(); + ~NS() override; std::shared_ptr<IApplicationManagerInterface> GetApplicationManagerInterface() const; diff --git a/src/core/settings.cpp b/src/core/settings.cpp index c1365879b..6d32ebea3 100644 --- a/src/core/settings.cpp +++ b/src/core/settings.cpp @@ -90,7 +90,6 @@ void LogSettings() { LogSetting("Renderer_UseResolutionFactor", Settings::values.resolution_factor); LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit); LogSetting("Renderer_FrameLimit", Settings::values.frame_limit); - LogSetting("Renderer_UseCompatibilityProfile", Settings::values.use_compatibility_profile); LogSetting("Renderer_UseDiskShaderCache", Settings::values.use_disk_shader_cache); LogSetting("Renderer_UseAccurateGpuEmulation", Settings::values.use_accurate_gpu_emulation); LogSetting("Renderer_UseAsynchronousGpuEmulation", diff --git a/src/core/settings.h b/src/core/settings.h index 5ff3634aa..b84390745 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -390,7 +390,6 @@ struct Values { float resolution_factor; bool use_frame_limit; u16 frame_limit; - bool use_compatibility_profile; bool use_disk_shader_cache; bool use_accurate_gpu_emulation; bool use_asynchronous_gpu_emulation; diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 1e010e4da..2d4caa08d 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -42,8 +42,6 @@ add_library(video_core STATIC renderer_opengl/gl_device.h renderer_opengl/gl_global_cache.cpp renderer_opengl/gl_global_cache.h - renderer_opengl/gl_primitive_assembler.cpp - renderer_opengl/gl_primitive_assembler.h renderer_opengl/gl_rasterizer.cpp renderer_opengl/gl_rasterizer.h renderer_opengl/gl_rasterizer_cache.cpp @@ -102,6 +100,9 @@ add_library(video_core STATIC shader/decode/xmad.cpp shader/decode/other.cpp shader/decode.cpp + shader/node_helper.cpp + shader/node_helper.h + shader/node.h shader/shader_ir.cpp shader/shader_ir.h shader/track.cpp diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 25652e794..48b86f3bd 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -71,16 +71,6 @@ GLintptr OGLBufferCache::UploadHostMemory(const void* raw_pointer, std::size_t s return uploaded_offset; } -std::tuple<u8*, GLintptr> OGLBufferCache::ReserveMemory(std::size_t size, std::size_t alignment) { - AlignBuffer(alignment); - u8* const uploaded_ptr = buffer_ptr; - const GLintptr uploaded_offset = buffer_offset; - - buffer_ptr += size; - buffer_offset += size; - return std::make_tuple(uploaded_ptr, uploaded_offset); -} - bool OGLBufferCache::Map(std::size_t max_size) { bool invalidate; std::tie(buffer_ptr, buffer_offset_base, invalidate) = diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index f9247a40e..f2347581b 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -61,9 +61,6 @@ public: /// Uploads from a host memory. Returns host's buffer offset where it's been allocated. GLintptr UploadHostMemory(const void* raw_pointer, std::size_t size, std::size_t alignment = 4); - /// Reserves memory to be used by host's CPU. Returns mapped address and offset. - std::tuple<u8*, GLintptr> ReserveMemory(std::size_t size, std::size_t alignment = 4); - bool Map(std::size_t max_size); void Unmap(); diff --git a/src/video_core/renderer_opengl/gl_primitive_assembler.cpp b/src/video_core/renderer_opengl/gl_primitive_assembler.cpp deleted file mode 100644 index c3e94d917..000000000 --- a/src/video_core/renderer_opengl/gl_primitive_assembler.cpp +++ /dev/null @@ -1,63 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <algorithm> -#include <array> -#include "common/assert.h" -#include "common/common_types.h" -#include "core/core.h" -#include "video_core/memory_manager.h" -#include "video_core/renderer_opengl/gl_buffer_cache.h" -#include "video_core/renderer_opengl/gl_primitive_assembler.h" - -namespace OpenGL { - -constexpr u32 TRIANGLES_PER_QUAD = 6; -constexpr std::array<u32, TRIANGLES_PER_QUAD> QUAD_MAP = {0, 1, 2, 0, 2, 3}; - -PrimitiveAssembler::PrimitiveAssembler(OGLBufferCache& buffer_cache) : buffer_cache(buffer_cache) {} - -PrimitiveAssembler::~PrimitiveAssembler() = default; - -std::size_t PrimitiveAssembler::CalculateQuadSize(u32 count) const { - ASSERT_MSG(count % 4 == 0, "Quad count is expected to be a multiple of 4"); - return (count / 4) * TRIANGLES_PER_QUAD * sizeof(GLuint); -} - -GLintptr PrimitiveAssembler::MakeQuadArray(u32 first, u32 count) { - const std::size_t size{CalculateQuadSize(count)}; - auto [dst_pointer, index_offset] = buffer_cache.ReserveMemory(size); - - for (u32 primitive = 0; primitive < count / 4; ++primitive) { - for (u32 i = 0; i < TRIANGLES_PER_QUAD; ++i) { - const u32 index = first + primitive * 4 + QUAD_MAP[i]; - std::memcpy(dst_pointer, &index, sizeof(index)); - dst_pointer += sizeof(index); - } - } - - return index_offset; -} - -GLintptr PrimitiveAssembler::MakeQuadIndexed(GPUVAddr gpu_addr, std::size_t index_size, u32 count) { - const std::size_t map_size{CalculateQuadSize(count)}; - auto [dst_pointer, index_offset] = buffer_cache.ReserveMemory(map_size); - - auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); - const u8* source{memory_manager.GetPointer(gpu_addr)}; - - for (u32 primitive = 0; primitive < count / 4; ++primitive) { - for (std::size_t i = 0; i < TRIANGLES_PER_QUAD; ++i) { - const u32 index = primitive * 4 + QUAD_MAP[i]; - const u8* src_offset = source + (index * index_size); - - std::memcpy(dst_pointer, src_offset, index_size); - dst_pointer += index_size; - } - } - - return index_offset; -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_primitive_assembler.h b/src/video_core/renderer_opengl/gl_primitive_assembler.h deleted file mode 100644 index 4e87ce4d6..000000000 --- a/src/video_core/renderer_opengl/gl_primitive_assembler.h +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <glad/glad.h> - -#include "common/common_types.h" - -namespace OpenGL { - -class OGLBufferCache; - -class PrimitiveAssembler { -public: - explicit PrimitiveAssembler(OGLBufferCache& buffer_cache); - ~PrimitiveAssembler(); - - /// Calculates the size required by MakeQuadArray and MakeQuadIndexed. - std::size_t CalculateQuadSize(u32 count) const; - - GLintptr MakeQuadArray(u32 first, u32 count); - - GLintptr MakeQuadIndexed(GPUVAddr gpu_addr, std::size_t index_size, u32 count); - -private: - OGLBufferCache& buffer_cache; -}; - -} // namespace OpenGL
\ No newline at end of file diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index f9b6dfeea..ca410287a 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -246,29 +246,6 @@ DrawParameters RasterizerOpenGL::SetupDraw() { DrawParameters params{}; params.current_instance = gpu.state.current_instance; - if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) { - MICROPROFILE_SCOPE(OpenGL_PrimitiveAssembly); - - params.use_indexed = true; - params.primitive_mode = GL_TRIANGLES; - - if (is_indexed) { - params.index_format = MaxwellToGL::IndexFormat(regs.index_array.format); - params.count = (regs.index_array.count / 4) * 6; - params.index_buffer_offset = primitive_assembler.MakeQuadIndexed( - regs.index_array.IndexStart(), regs.index_array.FormatSizeInBytes(), - regs.index_array.count); - params.base_vertex = static_cast<GLint>(regs.vb_element_base); - } else { - // MakeQuadArray always generates u32 indexes - params.index_format = GL_UNSIGNED_INT; - params.count = (regs.vertex_buffer.count / 4) * 6; - params.index_buffer_offset = primitive_assembler.MakeQuadArray( - regs.vertex_buffer.first, regs.vertex_buffer.count); - } - return params; - } - params.use_indexed = is_indexed; params.primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology); @@ -686,30 +663,19 @@ void RasterizerOpenGL::DrawArrays() { SyncCullMode(); SyncPrimitiveRestart(); SyncScissorTest(state); - // Alpha Testing is synced on shaders. SyncTransformFeedback(); SyncPointState(); - CheckAlphaTests(); SyncPolygonOffset(); - // TODO(bunnei): Sync framebuffer_scale uniform here - // TODO(bunnei): Sync scissorbox uniform(s) here + SyncAlphaTest(); // Draw the vertex batch const bool is_indexed = accelerate_draw == AccelDraw::Indexed; std::size_t buffer_size = CalculateVertexArraysSize(); - // Add space for index buffer (keeping in mind non-core primitives) - switch (regs.draw.topology) { - case Maxwell::PrimitiveTopology::Quads: - buffer_size = Common::AlignUp(buffer_size, 4) + - primitive_assembler.CalculateQuadSize(regs.vertex_buffer.count); - break; - default: - if (is_indexed) { - buffer_size = Common::AlignUp(buffer_size, 4) + CalculateIndexBufferSize(); - } - break; + // Add space for index buffer + if (is_indexed) { + buffer_size = Common::AlignUp(buffer_size, 4) + CalculateIndexBufferSize(); } // Uniform space for the 5 shader stages @@ -1152,10 +1118,17 @@ void RasterizerOpenGL::SyncPolygonOffset() { state.polygon_offset.clamp = regs.polygon_offset_clamp; } -void RasterizerOpenGL::CheckAlphaTests() { +void RasterizerOpenGL::SyncAlphaTest() { const auto& regs = system.GPU().Maxwell3D().regs; UNIMPLEMENTED_IF_MSG(regs.alpha_test_enabled != 0 && regs.rt_control.count > 1, "Alpha Testing is enabled with more than one rendertarget"); + + state.alpha_test.enabled = regs.alpha_test_enabled; + if (!state.alpha_test.enabled) { + return; + } + state.alpha_test.func = MaxwellToGL::ComparisonOp(regs.alpha_test_func); + state.alpha_test.ref = regs.alpha_test_ref; } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index d78094138..2817f65c9 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -23,7 +23,6 @@ #include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_global_cache.h" -#include "video_core/renderer_opengl/gl_primitive_assembler.h" #include "video_core/renderer_opengl/gl_rasterizer_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_sampler_cache.h" @@ -167,8 +166,8 @@ private: /// Syncs the polygon offsets void SyncPolygonOffset(); - /// Check asserts for alpha testing. - void CheckAlphaTests(); + /// Syncs the alpha test state to match the guest state + void SyncAlphaTest(); /// Check for extension that are not strictly required /// but are needed for correct emulation @@ -197,7 +196,6 @@ private: static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; OGLBufferCache buffer_cache; - PrimitiveAssembler primitive_assembler{buffer_cache}; BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER}; BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER}; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 3b61bf77f..739477cc9 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -45,7 +45,6 @@ struct TextureAoffi {}; using TextureArgument = std::pair<Type, Node>; using TextureIR = std::variant<TextureAoffi, TextureArgument>; -enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 }; constexpr u32 MAX_CONSTBUFFER_ELEMENTS = static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float)); @@ -124,8 +123,8 @@ bool IsPrecise(Operation operand) { return false; } -bool IsPrecise(Node node) { - if (const auto operation = std::get_if<OperationNode>(node)) { +bool IsPrecise(const Node& node) { + if (const auto operation = std::get_if<OperationNode>(&*node)) { return IsPrecise(*operation); } return false; @@ -247,6 +246,12 @@ private: code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_vertices); code.AddNewLine(); + code.AddLine("in gl_PerVertex {{"); + ++code.scope; + code.AddLine("vec4 gl_Position;"); + --code.scope; + code.AddLine("}} gl_in[];"); + DeclareVertexRedeclarations(); } @@ -349,7 +354,7 @@ private: } void DeclareInputAttribute(Attribute::Index index, bool skip_unused) { - const u32 generic_index{GetGenericAttributeIndex(index)}; + const u32 location{GetGenericAttributeIndex(index)}; std::string name{GetInputAttribute(index)}; if (stage == ShaderStage::Geometry) { @@ -358,19 +363,13 @@ private: std::string suffix; if (stage == ShaderStage::Fragment) { - const auto input_mode{header.ps.GetAttributeUse(generic_index)}; + const auto input_mode{header.ps.GetAttributeUse(location)}; if (skip_unused && input_mode == AttributeUse::Unused) { return; } suffix = GetInputFlags(input_mode); } - u32 location = generic_index; - if (stage != ShaderStage::Vertex) { - // If inputs are varyings, add an offset - location += GENERIC_VARYING_START_LOCATION; - } - code.AddLine("layout (location = {}) {} in vec4 {};", location, suffix, name); } @@ -395,7 +394,7 @@ private: } void DeclareOutputAttribute(Attribute::Index index) { - const u32 location{GetGenericAttributeIndex(index) + GENERIC_VARYING_START_LOCATION}; + const u32 location{GetGenericAttributeIndex(index)}; code.AddLine("layout (location = {}) out vec4 {};", location, GetOutputAttribute(index)); } @@ -498,15 +497,15 @@ private: } void VisitBlock(const NodeBlock& bb) { - for (const Node node : bb) { + for (const auto& node : bb) { if (const std::string expr = Visit(node); !expr.empty()) { code.AddLine(expr); } } } - std::string Visit(Node node) { - if (const auto operation = std::get_if<OperationNode>(node)) { + std::string Visit(const Node& node) { + if (const auto operation = std::get_if<OperationNode>(&*node)) { const auto operation_index = static_cast<std::size_t>(operation->GetCode()); if (operation_index >= operation_decompilers.size()) { UNREACHABLE_MSG("Out of bounds operation: {}", operation_index); @@ -520,7 +519,7 @@ private: return (this->*decompiler)(*operation); } - if (const auto gpr = std::get_if<GprNode>(node)) { + if (const auto gpr = std::get_if<GprNode>(&*node)) { const u32 index = gpr->GetIndex(); if (index == Register::ZeroIndex) { return "0"; @@ -528,7 +527,7 @@ private: return GetRegister(index); } - if (const auto immediate = std::get_if<ImmediateNode>(node)) { + if (const auto immediate = std::get_if<ImmediateNode>(&*node)) { const u32 value = immediate->GetValue(); if (value < 10) { // For eyecandy avoid using hex numbers on single digits @@ -537,7 +536,7 @@ private: return fmt::format("utof(0x{:x}u)", immediate->GetValue()); } - if (const auto predicate = std::get_if<PredicateNode>(node)) { + if (const auto predicate = std::get_if<PredicateNode>(&*node)) { const auto value = [&]() -> std::string { switch (const auto index = predicate->GetIndex(); index) { case Tegra::Shader::Pred::UnusedIndex: @@ -554,7 +553,7 @@ private: return value; } - if (const auto abuf = std::get_if<AbufNode>(node)) { + if (const auto abuf = std::get_if<AbufNode>(&*node)) { UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ShaderStage::Geometry, "Physical attributes in geometry shaders are not implemented"); if (abuf->IsPhysicalBuffer()) { @@ -564,9 +563,9 @@ private: return ReadAttribute(abuf->GetIndex(), abuf->GetElement(), abuf->GetBuffer()); } - if (const auto cbuf = std::get_if<CbufNode>(node)) { + if (const auto cbuf = std::get_if<CbufNode>(&*node)) { const Node offset = cbuf->GetOffset(); - if (const auto immediate = std::get_if<ImmediateNode>(offset)) { + if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { // Direct access const u32 offset_imm = immediate->GetValue(); ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access"); @@ -602,22 +601,22 @@ private: UNREACHABLE_MSG("Unmanaged offset node type"); } - if (const auto gmem = std::get_if<GmemNode>(node)) { + if (const auto gmem = std::get_if<GmemNode>(&*node)) { const std::string real = Visit(gmem->GetRealAddress()); const std::string base = Visit(gmem->GetBaseAddress()); const std::string final_offset = fmt::format("(ftou({}) - ftou({})) / 4", real, base); return fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset); } - if (const auto lmem = std::get_if<LmemNode>(node)) { + if (const auto lmem = std::get_if<LmemNode>(&*node)) { return fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress())); } - if (const auto internal_flag = std::get_if<InternalFlagNode>(node)) { + if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) { return GetInternalFlag(internal_flag->GetFlag()); } - if (const auto conditional = std::get_if<ConditionalNode>(node)) { + if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { // It's invalid to call conditional on nested nodes, use an operation instead code.AddLine("if ({}) {{", Visit(conditional->GetCondition())); ++code.scope; @@ -629,7 +628,7 @@ private: return {}; } - if (const auto comment = std::get_if<CommentNode>(node)) { + if (const auto comment = std::get_if<CommentNode>(&*node)) { return "// " + comment->GetText(); } @@ -637,7 +636,7 @@ private: return {}; } - std::string ReadAttribute(Attribute::Index attribute, u32 element, Node buffer = {}) { + std::string ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) { const auto GeometryPass = [&](std::string_view name) { if (stage == ShaderStage::Geometry && buffer) { // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games @@ -650,10 +649,14 @@ private: switch (attribute) { case Attribute::Index::Position: - if (stage != ShaderStage::Fragment) { - return GeometryPass("position") + GetSwizzle(element); - } else { + switch (stage) { + case ShaderStage::Geometry: + return fmt::format("gl_in[ftou({})].gl_Position{}", Visit(buffer), + GetSwizzle(element)); + case ShaderStage::Fragment: return element == 3 ? "1.0f" : ("gl_FragCoord"s + GetSwizzle(element)); + default: + UNREACHABLE(); } case Attribute::Index::PointCoord: switch (element) { @@ -869,7 +872,7 @@ private: std::string expr = ", "; switch (type) { case Type::Int: - if (const auto immediate = std::get_if<ImmediateNode>(operand)) { + if (const auto immediate = std::get_if<ImmediateNode>(&*operand)) { // Inline the string as an immediate integer in GLSL (some extra arguments are // required to be constant) expr += std::to_string(static_cast<s32>(immediate->GetValue())); @@ -901,7 +904,7 @@ private: for (std::size_t index = 0; index < aoffi.size(); ++index) { const auto operand{aoffi.at(index)}; - if (const auto immediate = std::get_if<ImmediateNode>(operand)) { + if (const auto immediate = std::get_if<ImmediateNode>(&*operand)) { // Inline the string as an immediate integer in GLSL (AOFFI arguments are required // to be constant by the standard). expr += std::to_string(static_cast<s32>(immediate->GetValue())); @@ -922,23 +925,23 @@ private: } std::string Assign(Operation operation) { - const Node dest = operation[0]; - const Node src = operation[1]; + const Node& dest = operation[0]; + const Node& src = operation[1]; std::string target; - if (const auto gpr = std::get_if<GprNode>(dest)) { + if (const auto gpr = std::get_if<GprNode>(&*dest)) { if (gpr->GetIndex() == Register::ZeroIndex) { // Writing to Register::ZeroIndex is a no op return {}; } target = GetRegister(gpr->GetIndex()); - } else if (const auto abuf = std::get_if<AbufNode>(dest)) { + } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) { UNIMPLEMENTED_IF(abuf->IsPhysicalBuffer()); target = [&]() -> std::string { switch (const auto attribute = abuf->GetIndex(); abuf->GetIndex()) { case Attribute::Index::Position: - return "position"s + GetSwizzle(abuf->GetElement()); + return "gl_Position"s + GetSwizzle(abuf->GetElement()); case Attribute::Index::PointSize: return "gl_PointSize"; case Attribute::Index::ClipDistances0123: @@ -954,9 +957,9 @@ private: return "0"; } }(); - } else if (const auto lmem = std::get_if<LmemNode>(dest)) { + } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) { target = fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress())); - } else if (const auto gmem = std::get_if<GmemNode>(dest)) { + } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { const std::string real = Visit(gmem->GetRealAddress()); const std::string base = Visit(gmem->GetBaseAddress()); const std::string final_offset = fmt::format("(ftou({}) - ftou({})) / 4", real, base); @@ -1233,12 +1236,12 @@ private: } std::string LogicalAssign(Operation operation) { - const Node dest = operation[0]; - const Node src = operation[1]; + const Node& dest = operation[0]; + const Node& src = operation[1]; std::string target; - if (const auto pred = std::get_if<PredicateNode>(dest)) { + if (const auto pred = std::get_if<PredicateNode>(&*dest)) { ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment"); const auto index = pred->GetIndex(); @@ -1249,7 +1252,7 @@ private: return {}; } target = GetPredicate(index); - } else if (const auto flag = std::get_if<InternalFlagNode>(dest)) { + } else if (const auto flag = std::get_if<InternalFlagNode>(&*dest)) { target = GetInternalFlag(flag->GetFlag()); } @@ -1426,7 +1429,7 @@ private: } std::string Branch(Operation operation) { - const auto target = std::get_if<ImmediateNode>(operation[0]); + const auto target = std::get_if<ImmediateNode>(&*operation[0]); UNIMPLEMENTED_IF(!target); code.AddLine("jmp_to = 0x{:x}u;", target->GetValue()); @@ -1435,7 +1438,7 @@ private: } std::string PushFlowStack(Operation operation) { - const auto target = std::get_if<ImmediateNode>(operation[0]); + const auto target = std::get_if<ImmediateNode>(&*operation[0]); UNIMPLEMENTED_IF(!target); code.AddLine("flow_stack[flow_stack_top++] = 0x{:x}u;", target->GetValue()); @@ -1464,27 +1467,9 @@ private: UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0, "Sample mask write is unimplemented"); - code.AddLine("if (alpha_test[0] != 0) {{"); - ++code.scope; - // We start on the register containing the alpha value in the first RT. - u32 current_reg = 3; - for (u32 render_target = 0; render_target < Maxwell::NumRenderTargets; ++render_target) { - // TODO(Blinkhawk): verify the behavior of alpha testing on hardware when - // multiple render targets are used. - if (header.ps.IsColorComponentOutputEnabled(render_target, 0) || - header.ps.IsColorComponentOutputEnabled(render_target, 1) || - header.ps.IsColorComponentOutputEnabled(render_target, 2) || - header.ps.IsColorComponentOutputEnabled(render_target, 3)) { - code.AddLine("if (!AlphaFunc({})) discard;", SafeGetRegister(current_reg)); - current_reg += 4; - } - } - --code.scope; - code.AddLine("}}"); - // Write the color outputs using the data in the shader registers, disabled // rendertargets/components are skipped in the register assignment. - current_reg = 0; + u32 current_reg = 0; for (u32 render_target = 0; render_target < Maxwell::NumRenderTargets; ++render_target) { // TODO(Subv): Figure out how dual-source blending is configured in the Switch. for (u32 component = 0; component < 4; ++component) { @@ -1523,9 +1508,7 @@ private: // If a geometry shader is attached, it will always flip (it's the last stage before // fragment). For more info about flipping, refer to gl_shader_gen.cpp. - code.AddLine("position.xy *= viewport_flip.xy;"); - code.AddLine("gl_Position = position;"); - code.AddLine("position.w = 1.0;"); + code.AddLine("gl_Position.xy *= viewport_flip.xy;"); code.AddLine("EmitVertex();"); return {}; } @@ -1763,8 +1746,7 @@ private: } u32 GetNumPhysicalVaryings() const { - return std::min<u32>(device.GetMaxVaryings() - GENERIC_VARYING_START_LOCATION, - Maxwell::NumVaryings); + return std::min<u32>(device.GetMaxVaryings(), Maxwell::NumVaryings); } const Device& device; diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index d2bb705a9..9148629ec 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -23,12 +23,9 @@ ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setu out += GetCommonDeclarations(); out += R"( -layout (location = 0) out vec4 position; - layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config { vec4 viewport_flip; uvec4 config_pack; // instance_id, flip_stage, y_direction, padding - uvec4 alpha_test; }; )"; @@ -48,7 +45,6 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config { out += R"( void main() { - position = vec4(0.0, 0.0, 0.0, 0.0); execute_vertex(); )"; @@ -59,19 +55,12 @@ void main() { out += R"( // Set Position Y direction - position.y *= utof(config_pack[2]); + gl_Position.y *= utof(config_pack[2]); // Check if the flip stage is VertexB // Config pack's second value is flip_stage if (config_pack[1] == 1) { // Viewport can be flipped, which is unsupported by glViewport - position.xy *= viewport_flip.xy; - } - gl_Position = position; - - // TODO(bunnei): This is likely a hack, position.w should be interpolated as 1.0 - // For now, this is here to bring order in lieu of proper emulation - if (config_pack[1] == 1) { - position.w = 1.0; + gl_Position.xy *= viewport_flip.xy; } })"; @@ -85,13 +74,9 @@ ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& se out += GetCommonDeclarations(); out += R"( -layout (location = 0) in vec4 gs_position[]; -layout (location = 0) out vec4 position; - layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config { vec4 viewport_flip; uvec4 config_pack; // instance_id, flip_stage, y_direction, padding - uvec4 alpha_test; }; )"; @@ -124,38 +109,11 @@ layout (location = 5) out vec4 FragColor5; layout (location = 6) out vec4 FragColor6; layout (location = 7) out vec4 FragColor7; -layout (location = 0) in noperspective vec4 position; - layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config { vec4 viewport_flip; uvec4 config_pack; // instance_id, flip_stage, y_direction, padding - uvec4 alpha_test; }; -bool AlphaFunc(in float value) { - float ref = uintBitsToFloat(alpha_test[2]); - switch (alpha_test[1]) { - case 1: - return false; - case 2: - return value < ref; - case 3: - return value == ref; - case 4: - return value <= ref; - case 5: - return value > ref; - case 6: - return value != ref; - case 7: - return value >= ref; - case 8: - return true; - default: - return false; - } -} - )"; const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); ProgramResult program = diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index 05ab01dcb..b05f90f20 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -48,17 +48,6 @@ void MaxwellUniformData::SetFromRegs(const Maxwell3D& maxwell, std::size_t shade viewport_flip[0] = regs.viewport_transform[0].scale_x < 0.0 ? -1.0f : 1.0f; viewport_flip[1] = regs.viewport_transform[0].scale_y < 0.0 ? -1.0f : 1.0f; - auto func{static_cast<u32>(regs.alpha_test_func)}; - // Normalize the gl variants of opCompare to be the same as the normal variants - const u32 op_gl_variant_base = static_cast<u32>(Maxwell3D::Regs::ComparisonOp::Never); - if (func >= op_gl_variant_base) { - func = func - op_gl_variant_base + 1U; - } - - alpha_test.enabled = regs.alpha_test_enabled; - alpha_test.func = func; - alpha_test.ref = regs.alpha_test_ref; - instance_id = state.current_instance; // Assign in which stage the position has to be flipped diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index cec18a832..6961e702a 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -27,14 +27,8 @@ struct MaxwellUniformData { GLuint flip_stage; GLfloat y_direction; }; - struct alignas(16) { - GLuint enabled; - GLuint func; - GLfloat ref; - GLuint padding; - } alpha_test; }; -static_assert(sizeof(MaxwellUniformData) == 48, "MaxwellUniformData structure size is incorrect"); +static_assert(sizeof(MaxwellUniformData) == 32, "MaxwellUniformData structure size is incorrect"); static_assert(sizeof(MaxwellUniformData) < 16384, "MaxwellUniformData structure must be less than 16kb as per the OpenGL spec"); diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 7425fbe5d..d86e137ac 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -156,6 +156,10 @@ OpenGLState::OpenGLState() { polygon_offset.factor = 0.0f; polygon_offset.units = 0.0f; polygon_offset.clamp = 0.0f; + + alpha_test.enabled = false; + alpha_test.func = GL_ALWAYS; + alpha_test.ref = 0.0f; } void OpenGLState::ApplyDefaultState() { @@ -461,6 +465,14 @@ void OpenGLState::ApplyPolygonOffset() const { } } +void OpenGLState::ApplyAlphaTest() const { + Enable(GL_ALPHA_TEST, cur_state.alpha_test.enabled, alpha_test.enabled); + if (UpdateTie(std::tie(cur_state.alpha_test.func, cur_state.alpha_test.ref), + std::tie(alpha_test.func, alpha_test.ref))) { + glAlphaFunc(alpha_test.func, alpha_test.ref); + } +} + void OpenGLState::ApplyTextures() const { bool has_delta{}; std::size_t first{}; @@ -533,6 +545,7 @@ void OpenGLState::Apply() const { ApplyTextures(); ApplySamplers(); ApplyPolygonOffset(); + ApplyAlphaTest(); } void OpenGLState::EmulateViewportWithScissor() { diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index 41418a7b8..b0140495d 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -172,6 +172,12 @@ public: GLfloat clamp; } polygon_offset; + struct { + bool enabled; // GL_ALPHA_TEST + GLenum func; // GL_ALPHA_TEST_FUNC + GLfloat ref; // GL_ALPHA_TEST_REF + } alpha_test; + std::array<bool, 8> clip_distance; // GL_CLIP_DISTANCE OpenGLState(); @@ -215,6 +221,7 @@ public: void ApplySamplers() const; void ApplyDepthClamp() const; void ApplyPolygonOffset() const; + void ApplyAlphaTest() const; /// Set the initial OpenGL state static void ApplyDefaultState(); diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index ed7b5cff0..ea77dd211 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -128,6 +128,8 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) { return GL_TRIANGLE_STRIP; case Maxwell::PrimitiveTopology::TriangleFan: return GL_TRIANGLE_FAN; + case Maxwell::PrimitiveTopology::Quads: + return GL_QUADS; default: LOG_CRITICAL(Render_OpenGL, "Unimplemented topology={}", static_cast<u32>(topology)); UNREACHABLE(); @@ -173,11 +175,8 @@ inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) { return GL_CLAMP_TO_EDGE; case Tegra::Texture::WrapMode::Border: return GL_CLAMP_TO_BORDER; - case Tegra::Texture::WrapMode::ClampOGL: - // TODO(Subv): GL_CLAMP was removed as of OpenGL 3.1, to implement GL_CLAMP, we can use - // GL_CLAMP_TO_BORDER to get the border color of the texture, and then sample the edge to - // manually mix them. However the shader part of this is not yet implemented. - return GL_CLAMP_TO_BORDER; + case Tegra::Texture::WrapMode::Clamp: + return GL_CLAMP; case Tegra::Texture::WrapMode::MirrorOnceClampToEdge: return GL_MIRROR_CLAMP_TO_EDGE; case Tegra::Texture::WrapMode::MirrorOnceBorder: diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 9fe1e3280..0bbbf6851 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -52,7 +52,7 @@ vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode) { return vk::SamplerAddressMode::eClampToEdge; case Tegra::Texture::WrapMode::Border: return vk::SamplerAddressMode::eClampToBorder; - case Tegra::Texture::WrapMode::ClampOGL: + case Tegra::Texture::WrapMode::Clamp: // TODO(Rodrigo): GL_CLAMP was removed as of OpenGL 3.1, to implement GL_CLAMP, we can use // eClampToBorder to get the border color of the texture, and then sample the edge to // manually mix them. However the shader part of this is not yet implemented. diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index 00242ecbe..3b966ddc3 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp @@ -18,6 +18,7 @@ constexpr std::array<vk::Format, 3> Depth24UnormS8Uint = { vk::Format::eD32SfloatS8Uint, vk::Format::eD16UnormS8Uint, {}}; constexpr std::array<vk::Format, 3> Depth16UnormS8Uint = { vk::Format::eD24UnormS8Uint, vk::Format::eD32SfloatS8Uint, {}}; +constexpr std::array<vk::Format, 2> Astc = {vk::Format::eA8B8G8R8UnormPack32, {}}; } // namespace Alternatives @@ -51,15 +52,19 @@ VKDevice::VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice phy : physical{physical}, format_properties{GetFormatProperties(dldi, physical)} { SetupFamilies(dldi, surface); SetupProperties(dldi); + SetupFeatures(dldi); } VKDevice::~VKDevice() = default; bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance) { - const auto queue_cis = GetDeviceQueueCreateInfos(); - vk::PhysicalDeviceFeatures device_features{}; + vk::PhysicalDeviceFeatures device_features; + device_features.vertexPipelineStoresAndAtomics = true; + device_features.independentBlend = true; + device_features.textureCompressionASTC_LDR = is_optimal_astc_supported; - const std::vector<const char*> extensions = {VK_KHR_SWAPCHAIN_EXTENSION_NAME}; + const auto queue_cis = GetDeviceQueueCreateInfos(); + const std::vector<const char*> extensions = LoadExtensions(dldi); const vk::DeviceCreateInfo device_ci({}, static_cast<u32>(queue_cis.size()), queue_cis.data(), 0, nullptr, static_cast<u32>(extensions.size()), extensions.data(), &device_features); @@ -90,7 +95,7 @@ vk::Format VKDevice::GetSupportedFormat(vk::Format wanted_format, LOG_CRITICAL(Render_Vulkan, "Format={} with usage={} and type={} has no defined alternatives and host " "hardware does not support it", - static_cast<u32>(wanted_format), static_cast<u32>(wanted_usage), + vk::to_string(wanted_format), vk::to_string(wanted_usage), static_cast<u32>(format_type)); UNREACHABLE(); return wanted_format; @@ -118,6 +123,30 @@ vk::Format VKDevice::GetSupportedFormat(vk::Format wanted_format, return wanted_format; } +bool VKDevice::IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features, + const vk::DispatchLoaderDynamic& dldi) const { + if (!features.textureCompressionASTC_LDR) { + return false; + } + const auto format_feature_usage{ + vk::FormatFeatureFlagBits::eSampledImage | vk::FormatFeatureFlagBits::eBlitSrc | + vk::FormatFeatureFlagBits::eBlitDst | vk::FormatFeatureFlagBits::eTransferSrc | + vk::FormatFeatureFlagBits::eTransferDst}; + constexpr std::array<vk::Format, 9> astc_formats = { + vk::Format::eAstc4x4UnormBlock, vk::Format::eAstc4x4SrgbBlock, + vk::Format::eAstc8x8SrgbBlock, vk::Format::eAstc8x6SrgbBlock, + vk::Format::eAstc5x4SrgbBlock, vk::Format::eAstc5x5UnormBlock, + vk::Format::eAstc5x5SrgbBlock, vk::Format::eAstc10x8UnormBlock, + vk::Format::eAstc10x8SrgbBlock}; + for (const auto format : astc_formats) { + const auto format_properties{physical.getFormatProperties(format, dldi)}; + if (!(format_properties.optimalTilingFeatures & format_feature_usage)) { + return false; + } + } + return true; +} + bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage, FormatType format_type) const { const auto it = format_properties.find(wanted_format); @@ -132,11 +161,9 @@ bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlag bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical, vk::SurfaceKHR surface) { - const std::string swapchain_extension = VK_KHR_SWAPCHAIN_EXTENSION_NAME; - bool has_swapchain{}; for (const auto& prop : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) { - has_swapchain |= prop.extensionName == swapchain_extension; + has_swapchain |= prop.extensionName == std::string(VK_KHR_SWAPCHAIN_EXTENSION_NAME); } if (!has_swapchain) { // The device doesn't support creating swapchains. @@ -160,8 +187,14 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev } // TODO(Rodrigo): Check if the device matches all requeriments. - const vk::PhysicalDeviceProperties props = physical.getProperties(dldi); - if (props.limits.maxUniformBufferRange < 65536) { + const auto properties{physical.getProperties(dldi)}; + const auto limits{properties.limits}; + if (limits.maxUniformBufferRange < 65536) { + return false; + } + + const vk::PhysicalDeviceFeatures features{physical.getFeatures(dldi)}; + if (!features.vertexPipelineStoresAndAtomics || !features.independentBlend) { return false; } @@ -169,6 +202,30 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev return true; } +std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynamic& dldi) { + std::vector<const char*> extensions; + extensions.reserve(2); + extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); + + const auto Test = [&](const vk::ExtensionProperties& extension, + std::optional<std::reference_wrapper<bool>> status, const char* name, + u32 revision) { + if (extension.extensionName != std::string(name)) { + return; + } + extensions.push_back(name); + if (status) { + status->get() = true; + } + }; + + for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) { + Test(extension, ext_scalar_block_layout, VK_EXT_SCALAR_BLOCK_LAYOUT_EXTENSION_NAME, 1); + } + + return extensions; +} + void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface) { std::optional<u32> graphics_family_, present_family_; @@ -196,10 +253,16 @@ void VKDevice::SetupProperties(const vk::DispatchLoaderDynamic& dldi) { const vk::PhysicalDeviceProperties props = physical.getProperties(dldi); device_type = props.deviceType; uniform_buffer_alignment = static_cast<u64>(props.limits.minUniformBufferOffsetAlignment); + max_storage_buffer_range = static_cast<u64>(props.limits.maxStorageBufferRange); +} + +void VKDevice::SetupFeatures(const vk::DispatchLoaderDynamic& dldi) { + const auto supported_features{physical.getFeatures(dldi)}; + is_optimal_astc_supported = IsOptimalAstcSupported(supported_features, dldi); } std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() const { - static const float QUEUE_PRIORITY = 1.f; + static const float QUEUE_PRIORITY = 1.0f; std::set<u32> unique_queue_families = {graphics_family, present_family}; std::vector<vk::DeviceQueueCreateInfo> queue_cis; @@ -212,26 +275,43 @@ std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() con std::map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties( const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical) { + static constexpr std::array formats{vk::Format::eA8B8G8R8UnormPack32, + vk::Format::eB5G6R5UnormPack16, + vk::Format::eA2B10G10R10UnormPack32, + vk::Format::eR32G32B32A32Sfloat, + vk::Format::eR16G16Unorm, + vk::Format::eR16G16Snorm, + vk::Format::eR8G8B8A8Srgb, + vk::Format::eR8Unorm, + vk::Format::eB10G11R11UfloatPack32, + vk::Format::eR32Sfloat, + vk::Format::eR16Sfloat, + vk::Format::eR16G16B16A16Sfloat, + vk::Format::eD32Sfloat, + vk::Format::eD16Unorm, + vk::Format::eD16UnormS8Uint, + vk::Format::eD24UnormS8Uint, + vk::Format::eD32SfloatS8Uint, + vk::Format::eBc1RgbaUnormBlock, + vk::Format::eBc2UnormBlock, + vk::Format::eBc3UnormBlock, + vk::Format::eBc4UnormBlock, + vk::Format::eBc5UnormBlock, + vk::Format::eBc5SnormBlock, + vk::Format::eBc7UnormBlock, + vk::Format::eAstc4x4UnormBlock, + vk::Format::eAstc4x4SrgbBlock, + vk::Format::eAstc8x8SrgbBlock, + vk::Format::eAstc8x6SrgbBlock, + vk::Format::eAstc5x4SrgbBlock, + vk::Format::eAstc5x5UnormBlock, + vk::Format::eAstc5x5SrgbBlock, + vk::Format::eAstc10x8UnormBlock, + vk::Format::eAstc10x8SrgbBlock}; std::map<vk::Format, vk::FormatProperties> format_properties; - - const auto AddFormatQuery = [&format_properties, &dldi, physical](vk::Format format) { + for (const auto format : formats) { format_properties.emplace(format, physical.getFormatProperties(format, dldi)); - }; - AddFormatQuery(vk::Format::eA8B8G8R8UnormPack32); - AddFormatQuery(vk::Format::eB5G6R5UnormPack16); - AddFormatQuery(vk::Format::eA2B10G10R10UnormPack32); - AddFormatQuery(vk::Format::eR8G8B8A8Srgb); - AddFormatQuery(vk::Format::eR8Unorm); - AddFormatQuery(vk::Format::eD32Sfloat); - AddFormatQuery(vk::Format::eD16Unorm); - AddFormatQuery(vk::Format::eD16UnormS8Uint); - AddFormatQuery(vk::Format::eD24UnormS8Uint); - AddFormatQuery(vk::Format::eD32SfloatS8Uint); - AddFormatQuery(vk::Format::eBc1RgbaUnormBlock); - AddFormatQuery(vk::Format::eBc2UnormBlock); - AddFormatQuery(vk::Format::eBc3UnormBlock); - AddFormatQuery(vk::Format::eBc4UnormBlock); - + } return format_properties; } diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h index e87c7a508..537825d8b 100644 --- a/src/video_core/renderer_vulkan/vk_device.h +++ b/src/video_core/renderer_vulkan/vk_device.h @@ -11,7 +11,7 @@ namespace Vulkan { -/// Format usage descriptor +/// Format usage descriptor. enum class FormatType { Linear, Optimal, Buffer }; /// Handles data specific to a physical device. @@ -34,12 +34,12 @@ public: vk::Format GetSupportedFormat(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage, FormatType format_type) const; - /// Returns the dispatch loader with direct function pointers of the device + /// Returns the dispatch loader with direct function pointers of the device. const vk::DispatchLoaderDynamic& GetDispatchLoader() const { return dld; } - /// Returns the logical device + /// Returns the logical device. vk::Device GetLogical() const { return logical.get(); } @@ -69,30 +69,55 @@ public: return present_family; } - /// Returns if the device is integrated with the host CPU + /// Returns if the device is integrated with the host CPU. bool IsIntegrated() const { return device_type == vk::PhysicalDeviceType::eIntegratedGpu; } - /// Returns uniform buffer alignment requeriment + /// Returns uniform buffer alignment requeriment. u64 GetUniformBufferAlignment() const { return uniform_buffer_alignment; } + /// Returns the maximum range for storage buffers. + u64 GetMaxStorageBufferRange() const { + return max_storage_buffer_range; + } + + /// Returns true if ASTC is natively supported. + bool IsOptimalAstcSupported() const { + return is_optimal_astc_supported; + } + + /// Returns true if the device supports VK_EXT_scalar_block_layout. + bool IsExtScalarBlockLayoutSupported() const { + return ext_scalar_block_layout; + } + /// Checks if the physical device is suitable. static bool IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical, vk::SurfaceKHR surface); private: + /// Loads extensions into a vector and stores available ones in this object. + std::vector<const char*> LoadExtensions(const vk::DispatchLoaderDynamic& dldi); + /// Sets up queue families. void SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface); /// Sets up device properties. void SetupProperties(const vk::DispatchLoaderDynamic& dldi); + /// Sets up device features. + void SetupFeatures(const vk::DispatchLoaderDynamic& dldi); + /// Returns a list of queue initialization descriptors. std::vector<vk::DeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const; + /// Returns true if ASTC textures are natively supported. + bool IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features, + const vk::DispatchLoaderDynamic& dldi) const; + /// Returns true if a format is supported. bool IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage, FormatType format_type) const; @@ -101,16 +126,19 @@ private: static std::map<vk::Format, vk::FormatProperties> GetFormatProperties( const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical); - const vk::PhysicalDevice physical; ///< Physical device - vk::DispatchLoaderDynamic dld; ///< Device function pointers - UniqueDevice logical; ///< Logical device - vk::Queue graphics_queue; ///< Main graphics queue - vk::Queue present_queue; ///< Main present queue - u32 graphics_family{}; ///< Main graphics queue family index - u32 present_family{}; ///< Main present queue family index - vk::PhysicalDeviceType device_type; ///< Physical device type - u64 uniform_buffer_alignment{}; ///< Uniform buffer alignment requeriment - std::map<vk::Format, vk::FormatProperties> format_properties; ///< Format properties dictionary + const vk::PhysicalDevice physical; ///< Physical device. + vk::DispatchLoaderDynamic dld; ///< Device function pointers. + UniqueDevice logical; ///< Logical device. + vk::Queue graphics_queue; ///< Main graphics queue. + vk::Queue present_queue; ///< Main present queue. + u32 graphics_family{}; ///< Main graphics queue family index. + u32 present_family{}; ///< Main present queue family index. + vk::PhysicalDeviceType device_type; ///< Physical device type. + u64 uniform_buffer_alignment{}; ///< Uniform buffer alignment requeriment. + u64 max_storage_buffer_range{}; ///< Max storage buffer size. + bool is_optimal_astc_supported{}; ///< Support for native ASTC. + bool ext_scalar_block_layout{}; ///< Support for VK_EXT_scalar_block_layout. + std::map<vk::Format, vk::FormatProperties> format_properties; ///< Format properties dictionary. }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index a5b25aeff..547883425 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -17,6 +17,7 @@ #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/shader_bytecode.h" #include "video_core/engines/shader_header.h" +#include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_shader_decompiler.h" #include "video_core/shader/shader_ir.h" @@ -33,7 +34,8 @@ using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage; using Operation = const OperationNode&; // TODO(Rodrigo): Use rasterizer's value -constexpr u32 MAX_CONSTBUFFER_ELEMENTS = 0x1000; +constexpr u32 MAX_CONSTBUFFER_FLOATS = 0x4000; +constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_FLOATS / 4; constexpr u32 STAGE_BINDING_STRIDE = 0x100; enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat }; @@ -87,8 +89,8 @@ bool IsPrecise(Operation operand) { class SPIRVDecompiler : public Sirit::Module { public: - explicit SPIRVDecompiler(const ShaderIR& ir, ShaderStage stage) - : Module(0x00010300), ir{ir}, stage{stage}, header{ir.GetHeader()} { + explicit SPIRVDecompiler(const VKDevice& device, const ShaderIR& ir, ShaderStage stage) + : Module(0x00010300), device{device}, ir{ir}, stage{stage}, header{ir.GetHeader()} { AddCapability(spv::Capability::Shader); AddExtension("SPV_KHR_storage_buffer_storage_class"); AddExtension("SPV_KHR_variable_pointers"); @@ -195,7 +197,9 @@ public: entries.samplers.emplace_back(sampler); } for (const auto& attribute : ir.GetInputAttributes()) { - entries.attributes.insert(GetGenericAttributeLocation(attribute)); + if (IsGenericAttribute(attribute)) { + entries.attributes.insert(GetGenericAttributeLocation(attribute)); + } } entries.clip_distances = ir.GetClipDistances(); entries.shader_length = ir.GetLength(); @@ -210,7 +214,6 @@ private: std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>; static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount); - static constexpr u32 CBUF_STRIDE = 16; void AllocateBindings() { const u32 binding_base = static_cast<u32>(stage) * STAGE_BINDING_STRIDE; @@ -315,6 +318,7 @@ private: constexpr std::array<const char*, INTERNAL_FLAGS_COUNT> names = {"zero", "sign", "carry", "overflow"}; for (std::size_t flag = 0; flag < INTERNAL_FLAGS_COUNT; ++flag) { + const auto flag_code = static_cast<InternalFlag>(flag); const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); internal_flags[flag] = AddGlobalVariable(Name(id, names[flag])); } @@ -374,7 +378,9 @@ private: u32 binding = const_buffers_base_binding; for (const auto& entry : ir.GetConstantBuffers()) { const auto [index, size] = entry; - const Id id = OpVariable(t_cbuf_ubo, spv::StorageClass::Uniform); + const Id type = + device.IsExtScalarBlockLayoutSupported() ? t_cbuf_scalar_ubo : t_cbuf_std140_ubo; + const Id id = OpVariable(type, spv::StorageClass::Uniform); AddGlobalVariable(Name(id, fmt::format("cbuf_{}", index))); Decorate(id, spv::Decoration::Binding, binding++); @@ -475,13 +481,13 @@ private: } void VisitBasicBlock(const NodeBlock& bb) { - for (const Node node : bb) { + for (const auto& node : bb) { static_cast<void>(Visit(node)); } } - Id Visit(Node node) { - if (const auto operation = std::get_if<OperationNode>(node)) { + Id Visit(const Node& node) { + if (const auto operation = std::get_if<OperationNode>(&*node)) { const auto operation_index = static_cast<std::size_t>(operation->GetCode()); const auto decompiler = operation_decompilers[operation_index]; if (decompiler == nullptr) { @@ -489,17 +495,17 @@ private: } return (this->*decompiler)(*operation); - } else if (const auto gpr = std::get_if<GprNode>(node)) { + } else if (const auto gpr = std::get_if<GprNode>(&*node)) { const u32 index = gpr->GetIndex(); if (index == Register::ZeroIndex) { return Constant(t_float, 0.0f); } return Emit(OpLoad(t_float, registers.at(index))); - } else if (const auto immediate = std::get_if<ImmediateNode>(node)) { + } else if (const auto immediate = std::get_if<ImmediateNode>(&*node)) { return BitcastTo<Type::Float>(Constant(t_uint, immediate->GetValue())); - } else if (const auto predicate = std::get_if<PredicateNode>(node)) { + } else if (const auto predicate = std::get_if<PredicateNode>(&*node)) { const auto value = [&]() -> Id { switch (const auto index = predicate->GetIndex(); index) { case Tegra::Shader::Pred::UnusedIndex: @@ -515,7 +521,7 @@ private: } return value; - } else if (const auto abuf = std::get_if<AbufNode>(node)) { + } else if (const auto abuf = std::get_if<AbufNode>(&*node)) { const auto attribute = abuf->GetIndex(); const auto element = abuf->GetElement(); @@ -565,40 +571,42 @@ private: } UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute)); - } else if (const auto cbuf = std::get_if<CbufNode>(node)) { - const Node offset = cbuf->GetOffset(); + } else if (const auto cbuf = std::get_if<CbufNode>(&*node)) { + const Node& offset = cbuf->GetOffset(); const Id buffer_id = constant_buffers.at(cbuf->GetIndex()); - Id buffer_index{}; - Id buffer_element{}; - - if (const auto immediate = std::get_if<ImmediateNode>(offset)) { - // Direct access - const u32 offset_imm = immediate->GetValue(); - ASSERT(offset_imm % 4 == 0); - buffer_index = Constant(t_uint, offset_imm / 16); - buffer_element = Constant(t_uint, (offset_imm / 4) % 4); - - } else if (std::holds_alternative<OperationNode>(*offset)) { - // Indirect access - // TODO(Rodrigo): Use a uniform buffer stride of 4 and drop this slow math (which - // emits sub-optimal code on GLSL from my testing). - const Id offset_id = BitcastTo<Type::Uint>(Visit(offset)); - const Id unsafe_offset = Emit(OpUDiv(t_uint, offset_id, Constant(t_uint, 4))); - const Id final_offset = Emit( - OpUMod(t_uint, unsafe_offset, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS - 1))); - buffer_index = Emit(OpUDiv(t_uint, final_offset, Constant(t_uint, 4))); - buffer_element = Emit(OpUMod(t_uint, final_offset, Constant(t_uint, 4))); - + Id pointer{}; + if (device.IsExtScalarBlockLayoutSupported()) { + const Id buffer_offset = Emit(OpShiftRightLogical( + t_uint, BitcastTo<Type::Uint>(Visit(offset)), Constant(t_uint, 2u))); + pointer = Emit( + OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0u), buffer_offset)); } else { - UNREACHABLE_MSG("Unmanaged offset node type"); + Id buffer_index{}; + Id buffer_element{}; + if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { + // Direct access + const u32 offset_imm = immediate->GetValue(); + ASSERT(offset_imm % 4 == 0); + buffer_index = Constant(t_uint, offset_imm / 16); + buffer_element = Constant(t_uint, (offset_imm / 4) % 4); + } else if (std::holds_alternative<OperationNode>(*offset)) { + // Indirect access + const Id offset_id = BitcastTo<Type::Uint>(Visit(offset)); + const Id unsafe_offset = Emit(OpUDiv(t_uint, offset_id, Constant(t_uint, 4))); + const Id final_offset = Emit(OpUMod( + t_uint, unsafe_offset, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS - 1))); + buffer_index = Emit(OpUDiv(t_uint, final_offset, Constant(t_uint, 4))); + buffer_element = Emit(OpUMod(t_uint, final_offset, Constant(t_uint, 4))); + } else { + UNREACHABLE_MSG("Unmanaged offset node type"); + } + pointer = Emit(OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0), + buffer_index, buffer_element)); } - - const Id pointer = Emit(OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0), - buffer_index, buffer_element)); return Emit(OpLoad(t_float, pointer)); - } else if (const auto gmem = std::get_if<GmemNode>(node)) { + } else if (const auto gmem = std::get_if<GmemNode>(&*node)) { const Id gmem_buffer = global_buffers.at(gmem->GetDescriptor()); const Id real = BitcastTo<Type::Uint>(Visit(gmem->GetRealAddress())); const Id base = BitcastTo<Type::Uint>(Visit(gmem->GetBaseAddress())); @@ -608,11 +616,13 @@ private: return Emit(OpLoad(t_float, Emit(OpAccessChain(t_gmem_float, gmem_buffer, Constant(t_uint, 0u), offset)))); - } else if (const auto conditional = std::get_if<ConditionalNode>(node)) { + } else if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { // It's invalid to call conditional on nested nodes, use an operation instead const Id true_label = OpLabel(); const Id skip_label = OpLabel(); - Emit(OpBranchConditional(Visit(conditional->GetCondition()), true_label, skip_label)); + const Id condition = Visit(conditional->GetCondition()); + Emit(OpSelectionMerge(skip_label, spv::SelectionControlMask::MaskNone)); + Emit(OpBranchConditional(condition, true_label, skip_label)); Emit(true_label); VisitBasicBlock(conditional->GetCode()); @@ -621,7 +631,7 @@ private: Emit(skip_label); return {}; - } else if (const auto comment = std::get_if<CommentNode>(node)) { + } else if (const auto comment = std::get_if<CommentNode>(&*node)) { Name(Emit(OpUndef(t_void)), comment->GetText()); return {}; } @@ -689,18 +699,18 @@ private: } Id Assign(Operation operation) { - const Node dest = operation[0]; - const Node src = operation[1]; + const Node& dest = operation[0]; + const Node& src = operation[1]; Id target{}; - if (const auto gpr = std::get_if<GprNode>(dest)) { + if (const auto gpr = std::get_if<GprNode>(&*dest)) { if (gpr->GetIndex() == Register::ZeroIndex) { // Writing to Register::ZeroIndex is a no op return {}; } target = registers.at(gpr->GetIndex()); - } else if (const auto abuf = std::get_if<AbufNode>(dest)) { + } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) { target = [&]() -> Id { switch (const auto attribute = abuf->GetIndex(); attribute) { case Attribute::Index::Position: @@ -725,7 +735,7 @@ private: } }(); - } else if (const auto lmem = std::get_if<LmemNode>(dest)) { + } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) { Id address = BitcastTo<Type::Uint>(Visit(lmem->GetAddress())); address = Emit(OpUDiv(t_uint, address, Constant(t_uint, 4))); target = Emit(OpAccessChain(t_prv_float, local_memory, {address})); @@ -771,11 +781,11 @@ private: } Id LogicalAssign(Operation operation) { - const Node dest = operation[0]; - const Node src = operation[1]; + const Node& dest = operation[0]; + const Node& src = operation[1]; Id target{}; - if (const auto pred = std::get_if<PredicateNode>(dest)) { + if (const auto pred = std::get_if<PredicateNode>(&*dest)) { ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment"); const auto index = pred->GetIndex(); @@ -787,7 +797,7 @@ private: } target = predicates.at(index); - } else if (const auto flag = std::get_if<InternalFlagNode>(dest)) { + } else if (const auto flag = std::get_if<InternalFlagNode>(&*dest)) { target = internal_flags.at(static_cast<u32>(flag->GetFlag())); } @@ -873,7 +883,7 @@ private: } else { u32 component_value = 0; if (meta->component) { - const auto component = std::get_if<ImmediateNode>(meta->component); + const auto component = std::get_if<ImmediateNode>(&*meta->component); ASSERT_MSG(component, "Component is not an immediate value"); component_value = component->GetValue(); } @@ -930,7 +940,7 @@ private: } Id Branch(Operation operation) { - const auto target = std::get_if<ImmediateNode>(operation[0]); + const auto target = std::get_if<ImmediateNode>(&*operation[0]); UNIMPLEMENTED_IF(!target); Emit(OpStore(jmp_to, Constant(t_uint, target->GetValue()))); @@ -939,7 +949,7 @@ private: } Id PushFlowStack(Operation operation) { - const auto target = std::get_if<ImmediateNode>(operation[0]); + const auto target = std::get_if<ImmediateNode>(&*operation[0]); ASSERT(target); const Id current = Emit(OpLoad(t_uint, flow_stack_top)); @@ -968,11 +978,11 @@ private: case ShaderStage::Vertex: { // TODO(Rodrigo): We should use VK_EXT_depth_range_unrestricted instead, but it doesn't // seem to be working on Nvidia's drivers and Intel (mesa and blob) doesn't support it. - const Id position = AccessElement(t_float4, per_vertex, position_index); - Id depth = Emit(OpLoad(t_float, AccessElement(t_out_float, position, 2))); + const Id z_pointer = AccessElement(t_out_float, per_vertex, position_index, 2u); + Id depth = Emit(OpLoad(t_float, z_pointer)); depth = Emit(OpFAdd(t_float, depth, Constant(t_float, 1.0f))); depth = Emit(OpFMul(t_float, depth, Constant(t_float, 0.5f))); - Emit(OpStore(AccessElement(t_out_float, position, 2), depth)); + Emit(OpStore(z_pointer, depth)); break; } case ShaderStage::Fragment: { @@ -1311,6 +1321,7 @@ private: &SPIRVDecompiler::WorkGroupId<2>, }; + const VKDevice& device; const ShaderIR& ir; const ShaderStage stage; const Tegra::Shader::Header header; @@ -1349,12 +1360,18 @@ private: const Id t_out_float4 = Name(TypePointer(spv::StorageClass::Output, t_float4), "out_float4"); const Id t_cbuf_float = TypePointer(spv::StorageClass::Uniform, t_float); - const Id t_cbuf_array = - Decorate(Name(TypeArray(t_float4, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS)), "CbufArray"), - spv::Decoration::ArrayStride, CBUF_STRIDE); - const Id t_cbuf_struct = MemberDecorate( - Decorate(TypeStruct(t_cbuf_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); - const Id t_cbuf_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_struct); + const Id t_cbuf_std140 = Decorate( + Name(TypeArray(t_float4, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS)), "CbufStd140Array"), + spv::Decoration::ArrayStride, 16u); + const Id t_cbuf_scalar = Decorate( + Name(TypeArray(t_float, Constant(t_uint, MAX_CONSTBUFFER_FLOATS)), "CbufScalarArray"), + spv::Decoration::ArrayStride, 4u); + const Id t_cbuf_std140_struct = MemberDecorate( + Decorate(TypeStruct(t_cbuf_std140), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); + const Id t_cbuf_scalar_struct = MemberDecorate( + Decorate(TypeStruct(t_cbuf_scalar), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); + const Id t_cbuf_std140_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_std140_struct); + const Id t_cbuf_scalar_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_scalar_struct); const Id t_gmem_float = TypePointer(spv::StorageClass::StorageBuffer, t_float); const Id t_gmem_array = @@ -1403,8 +1420,9 @@ private: std::map<u32, Id> labels; }; -DecompilerResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage) { - auto decompiler = std::make_unique<SPIRVDecompiler>(ir, stage); +DecompilerResult Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, + Maxwell::ShaderStage stage) { + auto decompiler = std::make_unique<SPIRVDecompiler>(device, ir, stage); decompiler->Decompile(); return {std::move(decompiler), decompiler->GetShaderEntries()}; } diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h index 329d8fa38..f90541cc1 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h @@ -20,10 +20,13 @@ namespace VideoCommon::Shader { class ShaderIR; } +namespace Vulkan { +class VKDevice; +} + namespace Vulkan::VKShader { using Maxwell = Tegra::Engines::Maxwell3D::Regs; - using SamplerEntry = VideoCommon::Shader::Sampler; constexpr u32 DESCRIPTOR_SET = 0; @@ -75,6 +78,7 @@ struct ShaderEntries { using DecompilerResult = std::pair<std::unique_ptr<Sirit::Module>, ShaderEntries>; -DecompilerResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage); +DecompilerResult Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, + Maxwell::ShaderStage stage); } // namespace Vulkan::VKShader diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 2da595c0d..a0554c97e 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -11,6 +11,7 @@ #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" #include "video_core/engines/shader_header.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp index b4859bc1e..87d8fecaa 100644 --- a/src/video_core/shader/decode/arithmetic.cpp +++ b/src/video_core/shader/decode/arithmetic.cpp @@ -6,6 +6,7 @@ #include "common/common_types.h" #include "common/logging/log.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp index 3a29c4a46..b06cbe441 100644 --- a/src/video_core/shader/decode/arithmetic_half.cpp +++ b/src/video_core/shader/decode/arithmetic_half.cpp @@ -6,6 +6,7 @@ #include "common/common_types.h" #include "common/logging/log.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp index 5341e460f..7bcf38f23 100644 --- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp @@ -6,6 +6,7 @@ #include "common/common_types.h" #include "common/logging/log.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/arithmetic_immediate.cpp b/src/video_core/shader/decode/arithmetic_immediate.cpp index 3095f2fd4..f1875967c 100644 --- a/src/video_core/shader/decode/arithmetic_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_immediate.cpp @@ -5,6 +5,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index 9fd4b273e..c8c1a7f40 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -5,6 +5,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp index 679ac0d4e..73880db0e 100644 --- a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp @@ -5,6 +5,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/bfe.cpp b/src/video_core/shader/decode/bfe.cpp index 1ae192c6a..e02bcd097 100644 --- a/src/video_core/shader/decode/bfe.cpp +++ b/src/video_core/shader/decode/bfe.cpp @@ -5,6 +5,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/bfi.cpp b/src/video_core/shader/decode/bfi.cpp index 0b12a0d08..8be1119df 100644 --- a/src/video_core/shader/decode/bfi.cpp +++ b/src/video_core/shader/decode/bfi.cpp @@ -5,6 +5,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp index b5ec9a6f5..4221f0c58 100644 --- a/src/video_core/shader/decode/conversion.cpp +++ b/src/video_core/shader/decode/conversion.cpp @@ -5,6 +5,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp index a1d04c6e5..29be25ca3 100644 --- a/src/video_core/shader/decode/ffma.cpp +++ b/src/video_core/shader/decode/ffma.cpp @@ -5,6 +5,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/float_set.cpp b/src/video_core/shader/decode/float_set.cpp index cc522f1de..f5013e44a 100644 --- a/src/video_core/shader/decode/float_set.cpp +++ b/src/video_core/shader/decode/float_set.cpp @@ -5,6 +5,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/float_set_predicate.cpp b/src/video_core/shader/decode/float_set_predicate.cpp index 9d2322a1d..2323052b0 100644 --- a/src/video_core/shader/decode/float_set_predicate.cpp +++ b/src/video_core/shader/decode/float_set_predicate.cpp @@ -5,6 +5,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp index 755f2ec44..48ca7a4af 100644 --- a/src/video_core/shader/decode/half_set.cpp +++ b/src/video_core/shader/decode/half_set.cpp @@ -8,6 +8,7 @@ #include "common/common_types.h" #include "common/logging/log.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp index fba44d714..d59d15bd8 100644 --- a/src/video_core/shader/decode/half_set_predicate.cpp +++ b/src/video_core/shader/decode/half_set_predicate.cpp @@ -5,6 +5,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp index a425f9eb7..c3bcf1ae9 100644 --- a/src/video_core/shader/decode/hfma2.cpp +++ b/src/video_core/shader/decode/hfma2.cpp @@ -7,6 +7,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/integer_set.cpp b/src/video_core/shader/decode/integer_set.cpp index a4cdaf74d..46e3d5905 100644 --- a/src/video_core/shader/decode/integer_set.cpp +++ b/src/video_core/shader/decode/integer_set.cpp @@ -4,6 +4,7 @@ #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/integer_set_predicate.cpp b/src/video_core/shader/decode/integer_set_predicate.cpp index a6a1fb632..dd20775d7 100644 --- a/src/video_core/shader/decode/integer_set_predicate.cpp +++ b/src/video_core/shader/decode/integer_set_predicate.cpp @@ -5,6 +5,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index e6a010a7d..80fc0ccfc 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -10,6 +10,7 @@ #include "common/common_types.h" #include "common/logging/log.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { @@ -169,7 +170,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { const Node it_offset = Immediate(i * 4); const Node real_address = Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); - const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor)); + const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); SetTemporal(bb, i, gmem); } @@ -262,7 +263,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { const Node it_offset = Immediate(i * 4); const Node real_address = Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); - const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor)); + const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporal(i + 1))); } @@ -298,9 +299,9 @@ std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackAndGetGlobalMemory(NodeB const Node base_address{ TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()))}; - const auto cbuf = std::get_if<CbufNode>(base_address); + const auto cbuf = std::get_if<CbufNode>(&*base_address); ASSERT(cbuf != nullptr); - const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset()); + const auto cbuf_offset_imm = std::get_if<ImmediateNode>(&*cbuf->GetOffset()); ASSERT(cbuf_offset_imm != nullptr); const auto cbuf_offset = cbuf_offset_imm->GetValue(); diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index a6c123573..6fc07f213 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -6,6 +6,7 @@ #include "common/common_types.h" #include "common/logging/log.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/predicate_set_predicate.cpp b/src/video_core/shader/decode/predicate_set_predicate.cpp index 71844c42b..9290d22eb 100644 --- a/src/video_core/shader/decode/predicate_set_predicate.cpp +++ b/src/video_core/shader/decode/predicate_set_predicate.cpp @@ -5,6 +5,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/predicate_set_register.cpp b/src/video_core/shader/decode/predicate_set_register.cpp index 387491bd3..febbfeb50 100644 --- a/src/video_core/shader/decode/predicate_set_register.cpp +++ b/src/video_core/shader/decode/predicate_set_register.cpp @@ -5,6 +5,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/register_set_predicate.cpp b/src/video_core/shader/decode/register_set_predicate.cpp index f8659e48e..e6c9d287e 100644 --- a/src/video_core/shader/decode/register_set_predicate.cpp +++ b/src/video_core/shader/decode/register_set_predicate.cpp @@ -5,6 +5,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp index 44ae87ece..2ac16eeb0 100644 --- a/src/video_core/shader/decode/shift.cpp +++ b/src/video_core/shader/decode/shift.cpp @@ -5,6 +5,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 5b033126d..4a356dbd4 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -11,6 +11,7 @@ #include "common/common_types.h" #include "common/logging/log.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { @@ -291,8 +292,8 @@ const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, const Node sampler_register = GetRegister(reg); const Node base_sampler = TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size())); - const auto cbuf = std::get_if<CbufNode>(base_sampler); - const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset()); + const auto cbuf = std::get_if<CbufNode>(&*base_sampler); + const auto cbuf_offset_imm = std::get_if<ImmediateNode>(&*cbuf->GetOffset()); ASSERT(cbuf_offset_imm != nullptr); const auto cbuf_offset = cbuf_offset_imm->GetValue(); const auto cbuf_index = cbuf->GetIndex(); @@ -388,8 +389,8 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi, std::optional<Tegra::Shader::Register> bindless_reg) { - const bool is_array = array; - const bool is_shadow = depth_compare; + const auto is_array = static_cast<bool>(array); + const auto is_shadow = static_cast<bool>(depth_compare); const bool is_bindless = bindless_reg.has_value(); UNIMPLEMENTED_IF_MSG((texture_type == TextureType::Texture3D && (is_array || is_shadow)) || diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp index cb9ab72b1..97fc6f9b1 100644 --- a/src/video_core/shader/decode/video.cpp +++ b/src/video_core/shader/decode/video.cpp @@ -5,6 +5,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp index 04a776398..93dee77d1 100644 --- a/src/video_core/shader/decode/xmad.cpp +++ b/src/video_core/shader/decode/xmad.cpp @@ -5,6 +5,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h new file mode 100644 index 000000000..c002f90f9 --- /dev/null +++ b/src/video_core/shader/node.h @@ -0,0 +1,514 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <cstddef> +#include <memory> +#include <string> +#include <tuple> +#include <utility> +#include <variant> +#include <vector> + +#include "common/common_types.h" +#include "video_core/engines/shader_bytecode.h" + +namespace VideoCommon::Shader { + +enum class OperationCode { + Assign, /// (float& dest, float src) -> void + + Select, /// (MetaArithmetic, bool pred, float a, float b) -> float + + FAdd, /// (MetaArithmetic, float a, float b) -> float + FMul, /// (MetaArithmetic, float a, float b) -> float + FDiv, /// (MetaArithmetic, float a, float b) -> float + FFma, /// (MetaArithmetic, float a, float b, float c) -> float + FNegate, /// (MetaArithmetic, float a) -> float + FAbsolute, /// (MetaArithmetic, float a) -> float + FClamp, /// (MetaArithmetic, float value, float min, float max) -> float + FMin, /// (MetaArithmetic, float a, float b) -> float + FMax, /// (MetaArithmetic, float a, float b) -> float + FCos, /// (MetaArithmetic, float a) -> float + FSin, /// (MetaArithmetic, float a) -> float + FExp2, /// (MetaArithmetic, float a) -> float + FLog2, /// (MetaArithmetic, float a) -> float + FInverseSqrt, /// (MetaArithmetic, float a) -> float + FSqrt, /// (MetaArithmetic, float a) -> float + FRoundEven, /// (MetaArithmetic, float a) -> float + FFloor, /// (MetaArithmetic, float a) -> float + FCeil, /// (MetaArithmetic, float a) -> float + FTrunc, /// (MetaArithmetic, float a) -> float + FCastInteger, /// (MetaArithmetic, int a) -> float + FCastUInteger, /// (MetaArithmetic, uint a) -> float + + IAdd, /// (MetaArithmetic, int a, int b) -> int + IMul, /// (MetaArithmetic, int a, int b) -> int + IDiv, /// (MetaArithmetic, int a, int b) -> int + INegate, /// (MetaArithmetic, int a) -> int + IAbsolute, /// (MetaArithmetic, int a) -> int + IMin, /// (MetaArithmetic, int a, int b) -> int + IMax, /// (MetaArithmetic, int a, int b) -> int + ICastFloat, /// (MetaArithmetic, float a) -> int + ICastUnsigned, /// (MetaArithmetic, uint a) -> int + ILogicalShiftLeft, /// (MetaArithmetic, int a, uint b) -> int + ILogicalShiftRight, /// (MetaArithmetic, int a, uint b) -> int + IArithmeticShiftRight, /// (MetaArithmetic, int a, uint b) -> int + IBitwiseAnd, /// (MetaArithmetic, int a, int b) -> int + IBitwiseOr, /// (MetaArithmetic, int a, int b) -> int + IBitwiseXor, /// (MetaArithmetic, int a, int b) -> int + IBitwiseNot, /// (MetaArithmetic, int a) -> int + IBitfieldInsert, /// (MetaArithmetic, int base, int insert, int offset, int bits) -> int + IBitfieldExtract, /// (MetaArithmetic, int value, int offset, int offset) -> int + IBitCount, /// (MetaArithmetic, int) -> int + + UAdd, /// (MetaArithmetic, uint a, uint b) -> uint + UMul, /// (MetaArithmetic, uint a, uint b) -> uint + UDiv, /// (MetaArithmetic, uint a, uint b) -> uint + UMin, /// (MetaArithmetic, uint a, uint b) -> uint + UMax, /// (MetaArithmetic, uint a, uint b) -> uint + UCastFloat, /// (MetaArithmetic, float a) -> uint + UCastSigned, /// (MetaArithmetic, int a) -> uint + ULogicalShiftLeft, /// (MetaArithmetic, uint a, uint b) -> uint + ULogicalShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint + UArithmeticShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint + UBitwiseAnd, /// (MetaArithmetic, uint a, uint b) -> uint + UBitwiseOr, /// (MetaArithmetic, uint a, uint b) -> uint + UBitwiseXor, /// (MetaArithmetic, uint a, uint b) -> uint + UBitwiseNot, /// (MetaArithmetic, uint a) -> uint + UBitfieldInsert, /// (MetaArithmetic, uint base, uint insert, int offset, int bits) -> uint + UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint + UBitCount, /// (MetaArithmetic, uint) -> uint + + HAdd, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 + HMul, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 + HFma, /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2 + HAbsolute, /// (f16vec2 a) -> f16vec2 + HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2 + HClamp, /// (f16vec2 src, float min, float max) -> f16vec2 + HUnpack, /// (Tegra::Shader::HalfType, T value) -> f16vec2 + HMergeF32, /// (f16vec2 src) -> float + HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2 + HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2 + HPack2, /// (float a, float b) -> f16vec2 + + LogicalAssign, /// (bool& dst, bool src) -> void + LogicalAnd, /// (bool a, bool b) -> bool + LogicalOr, /// (bool a, bool b) -> bool + LogicalXor, /// (bool a, bool b) -> bool + LogicalNegate, /// (bool a) -> bool + LogicalPick2, /// (bool2 pair, uint index) -> bool + LogicalAll2, /// (bool2 a) -> bool + LogicalAny2, /// (bool2 a) -> bool + + LogicalFLessThan, /// (float a, float b) -> bool + LogicalFEqual, /// (float a, float b) -> bool + LogicalFLessEqual, /// (float a, float b) -> bool + LogicalFGreaterThan, /// (float a, float b) -> bool + LogicalFNotEqual, /// (float a, float b) -> bool + LogicalFGreaterEqual, /// (float a, float b) -> bool + LogicalFIsNan, /// (float a) -> bool + + LogicalILessThan, /// (int a, int b) -> bool + LogicalIEqual, /// (int a, int b) -> bool + LogicalILessEqual, /// (int a, int b) -> bool + LogicalIGreaterThan, /// (int a, int b) -> bool + LogicalINotEqual, /// (int a, int b) -> bool + LogicalIGreaterEqual, /// (int a, int b) -> bool + + LogicalULessThan, /// (uint a, uint b) -> bool + LogicalUEqual, /// (uint a, uint b) -> bool + LogicalULessEqual, /// (uint a, uint b) -> bool + LogicalUGreaterThan, /// (uint a, uint b) -> bool + LogicalUNotEqual, /// (uint a, uint b) -> bool + LogicalUGreaterEqual, /// (uint a, uint b) -> bool + + Logical2HLessThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 + Logical2HEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 + Logical2HLessEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 + Logical2HGreaterThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 + Logical2HNotEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 + Logical2HGreaterEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 + Logical2HLessThanWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 + Logical2HEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 + Logical2HLessEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 + Logical2HGreaterThanWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 + Logical2HNotEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 + Logical2HGreaterEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 + + Texture, /// (MetaTexture, float[N] coords) -> float4 + TextureLod, /// (MetaTexture, float[N] coords) -> float4 + TextureGather, /// (MetaTexture, float[N] coords) -> float4 + TextureQueryDimensions, /// (MetaTexture, float a) -> float4 + TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4 + TexelFetch, /// (MetaTexture, int[N], int) -> float4 + + Branch, /// (uint branch_target) -> void + PushFlowStack, /// (uint branch_target) -> void + PopFlowStack, /// () -> void + Exit, /// () -> void + Discard, /// () -> void + + EmitVertex, /// () -> void + EndPrimitive, /// () -> void + + YNegate, /// () -> float + LocalInvocationIdX, /// () -> uint + LocalInvocationIdY, /// () -> uint + LocalInvocationIdZ, /// () -> uint + WorkGroupIdX, /// () -> uint + WorkGroupIdY, /// () -> uint + WorkGroupIdZ, /// () -> uint + + Amount, +}; + +enum class InternalFlag { + Zero = 0, + Sign = 1, + Carry = 2, + Overflow = 3, + Amount = 4, +}; + +class OperationNode; +class ConditionalNode; +class GprNode; +class ImmediateNode; +class InternalFlagNode; +class PredicateNode; +class AbufNode; +class CbufNode; +class LmemNode; +class GmemNode; +class CommentNode; + +using NodeData = + std::variant<OperationNode, ConditionalNode, GprNode, ImmediateNode, InternalFlagNode, + PredicateNode, AbufNode, CbufNode, LmemNode, GmemNode, CommentNode>; +using Node = std::shared_ptr<NodeData>; +using Node4 = std::array<Node, 4>; +using NodeBlock = std::vector<Node>; + +class Sampler { +public: + /// This constructor is for bound samplers + explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type, + bool is_array, bool is_shadow) + : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow}, + is_bindless{false} {} + + /// This constructor is for bindless samplers + explicit Sampler(u32 cbuf_index, u32 cbuf_offset, std::size_t index, + Tegra::Shader::TextureType type, bool is_array, bool is_shadow) + : offset{(static_cast<u64>(cbuf_index) << 32) | cbuf_offset}, index{index}, type{type}, + is_array{is_array}, is_shadow{is_shadow}, is_bindless{true} {} + + /// This constructor is for serialization/deserialization + explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type, + bool is_array, bool is_shadow, bool is_bindless) + : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow}, + is_bindless{is_bindless} {} + + std::size_t GetOffset() const { + return offset; + } + + std::size_t GetIndex() const { + return index; + } + + Tegra::Shader::TextureType GetType() const { + return type; + } + + bool IsArray() const { + return is_array; + } + + bool IsShadow() const { + return is_shadow; + } + + bool IsBindless() const { + return is_bindless; + } + + std::pair<u32, u32> GetBindlessCBuf() const { + return {static_cast<u32>(offset >> 32), static_cast<u32>(offset)}; + } + + bool operator<(const Sampler& rhs) const { + return std::tie(index, offset, type, is_array, is_shadow, is_bindless) < + std::tie(rhs.index, rhs.offset, rhs.type, rhs.is_array, rhs.is_shadow, + rhs.is_bindless); + } + +private: + /// Offset in TSC memory from which to read the sampler object, as specified by the sampling + /// instruction. + std::size_t offset{}; + std::size_t index{}; ///< Value used to index into the generated GLSL sampler array. + Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) + bool is_array{}; ///< Whether the texture is being sampled as an array texture or not. + bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not. + bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not. +}; + +struct GlobalMemoryBase { + u32 cbuf_index{}; + u32 cbuf_offset{}; + + bool operator<(const GlobalMemoryBase& rhs) const { + return std::tie(cbuf_index, cbuf_offset) < std::tie(rhs.cbuf_index, rhs.cbuf_offset); + } +}; + +/// Parameters describing an arithmetic operation +struct MetaArithmetic { + bool precise{}; ///< Whether the operation can be constraint or not +}; + +/// Parameters describing a texture sampler +struct MetaTexture { + const Sampler& sampler; + Node array; + Node depth_compare; + std::vector<Node> aoffi; + Node bias; + Node lod; + Node component{}; + u32 element{}; +}; + +/// Parameters that modify an operation but are not part of any particular operand +using Meta = std::variant<MetaArithmetic, MetaTexture, Tegra::Shader::HalfType>; + +/// Holds any kind of operation that can be done in the IR +class OperationNode final { +public: + explicit OperationNode(OperationCode code) : OperationNode(code, Meta{}) {} + + explicit OperationNode(OperationCode code, Meta meta) + : OperationNode(code, meta, std::vector<Node>{}) {} + + explicit OperationNode(OperationCode code, std::vector<Node> operands) + : OperationNode(code, Meta{}, std::move(operands)) {} + + explicit OperationNode(OperationCode code, Meta meta, std::vector<Node> operands) + : code{code}, meta{std::move(meta)}, operands{std::move(operands)} {} + + template <typename... Args> + explicit OperationNode(OperationCode code, Meta meta, Args&&... operands) + : code{code}, meta{std::move(meta)}, operands{operands...} {} + + OperationCode GetCode() const { + return code; + } + + const Meta& GetMeta() const { + return meta; + } + + std::size_t GetOperandsCount() const { + return operands.size(); + } + + const Node& operator[](std::size_t operand_index) const { + return operands.at(operand_index); + } + +private: + OperationCode code{}; + Meta meta{}; + std::vector<Node> operands; +}; + +/// Encloses inside any kind of node that returns a boolean conditionally-executed code +class ConditionalNode final { +public: + explicit ConditionalNode(Node condition, std::vector<Node>&& code) + : condition{std::move(condition)}, code{std::move(code)} {} + + const Node& GetCondition() const { + return condition; + } + + const std::vector<Node>& GetCode() const { + return code; + } + +private: + Node condition; ///< Condition to be satisfied + std::vector<Node> code; ///< Code to execute +}; + +/// A general purpose register +class GprNode final { +public: + explicit constexpr GprNode(Tegra::Shader::Register index) : index{index} {} + + u32 GetIndex() const { + return static_cast<u32>(index); + } + +private: + Tegra::Shader::Register index{}; +}; + +/// A 32-bits value that represents an immediate value +class ImmediateNode final { +public: + explicit constexpr ImmediateNode(u32 value) : value{value} {} + + u32 GetValue() const { + return value; + } + +private: + u32 value{}; +}; + +/// One of Maxwell's internal flags +class InternalFlagNode final { +public: + explicit constexpr InternalFlagNode(InternalFlag flag) : flag{flag} {} + + InternalFlag GetFlag() const { + return flag; + } + +private: + InternalFlag flag{}; +}; + +/// A predicate register, it can be negated without additional nodes +class PredicateNode final { +public: + explicit constexpr PredicateNode(Tegra::Shader::Pred index, bool negated) + : index{index}, negated{negated} {} + + Tegra::Shader::Pred GetIndex() const { + return index; + } + + bool IsNegated() const { + return negated; + } + +private: + Tegra::Shader::Pred index{}; + bool negated{}; +}; + +/// Attribute buffer memory (known as attributes or varyings in GLSL terms) +class AbufNode final { +public: + // Initialize for standard attributes (index is explicit). + explicit AbufNode(Tegra::Shader::Attribute::Index index, u32 element, Node buffer = {}) + : buffer{std::move(buffer)}, index{index}, element{element} {} + + // Initialize for physical attributes (index is a variable value). + explicit AbufNode(Node physical_address, Node buffer = {}) + : physical_address{std::move(physical_address)}, buffer{std::move(buffer)} {} + + Tegra::Shader::Attribute::Index GetIndex() const { + return index; + } + + u32 GetElement() const { + return element; + } + + const Node& GetBuffer() const { + return buffer; + } + + bool IsPhysicalBuffer() const { + return static_cast<bool>(physical_address); + } + + const Node& GetPhysicalAddress() const { + return physical_address; + } + +private: + Node physical_address; + Node buffer; + Tegra::Shader::Attribute::Index index{}; + u32 element{}; +}; + +/// Constant buffer node, usually mapped to uniform buffers in GLSL +class CbufNode final { +public: + explicit CbufNode(u32 index, Node offset) : index{index}, offset{std::move(offset)} {} + + u32 GetIndex() const { + return index; + } + + const Node& GetOffset() const { + return offset; + } + +private: + u32 index{}; + Node offset; +}; + +/// Local memory node +class LmemNode final { +public: + explicit LmemNode(Node address) : address{std::move(address)} {} + + const Node& GetAddress() const { + return address; + } + +private: + Node address; +}; + +/// Global memory node +class GmemNode final { +public: + explicit GmemNode(Node real_address, Node base_address, const GlobalMemoryBase& descriptor) + : real_address{std::move(real_address)}, base_address{std::move(base_address)}, + descriptor{descriptor} {} + + const Node& GetRealAddress() const { + return real_address; + } + + const Node& GetBaseAddress() const { + return base_address; + } + + const GlobalMemoryBase& GetDescriptor() const { + return descriptor; + } + +private: + Node real_address; + Node base_address; + GlobalMemoryBase descriptor; +}; + +/// Commentary, can be dropped +class CommentNode final { +public: + explicit CommentNode(std::string text) : text{std::move(text)} {} + + const std::string& GetText() const { + return text; + } + +private: + std::string text; +}; + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/node_helper.cpp b/src/video_core/shader/node_helper.cpp new file mode 100644 index 000000000..6fccbbba3 --- /dev/null +++ b/src/video_core/shader/node_helper.cpp @@ -0,0 +1,99 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <cstring> +#include <vector> + +#include "common/common_types.h" +#include "video_core/shader/node_helper.h" +#include "video_core/shader/shader_ir.h" + +namespace VideoCommon::Shader { + +Node Conditional(Node condition, std::vector<Node> code) { + return MakeNode<ConditionalNode>(condition, std::move(code)); +} + +Node Comment(std::string text) { + return MakeNode<CommentNode>(std::move(text)); +} + +Node Immediate(u32 value) { + return MakeNode<ImmediateNode>(value); +} + +Node Immediate(s32 value) { + return Immediate(static_cast<u32>(value)); +} + +Node Immediate(f32 value) { + u32 integral; + std::memcpy(&integral, &value, sizeof(u32)); + return Immediate(integral); +} + +OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed) { + if (is_signed) { + return operation_code; + } + switch (operation_code) { + case OperationCode::FCastInteger: + return OperationCode::FCastUInteger; + case OperationCode::IAdd: + return OperationCode::UAdd; + case OperationCode::IMul: + return OperationCode::UMul; + case OperationCode::IDiv: + return OperationCode::UDiv; + case OperationCode::IMin: + return OperationCode::UMin; + case OperationCode::IMax: + return OperationCode::UMax; + case OperationCode::ICastFloat: + return OperationCode::UCastFloat; + case OperationCode::ICastUnsigned: + return OperationCode::UCastSigned; + case OperationCode::ILogicalShiftLeft: + return OperationCode::ULogicalShiftLeft; + case OperationCode::ILogicalShiftRight: + return OperationCode::ULogicalShiftRight; + case OperationCode::IArithmeticShiftRight: + return OperationCode::UArithmeticShiftRight; + case OperationCode::IBitwiseAnd: + return OperationCode::UBitwiseAnd; + case OperationCode::IBitwiseOr: + return OperationCode::UBitwiseOr; + case OperationCode::IBitwiseXor: + return OperationCode::UBitwiseXor; + case OperationCode::IBitwiseNot: + return OperationCode::UBitwiseNot; + case OperationCode::IBitfieldInsert: + return OperationCode::UBitfieldInsert; + case OperationCode::IBitCount: + return OperationCode::UBitCount; + case OperationCode::LogicalILessThan: + return OperationCode::LogicalULessThan; + case OperationCode::LogicalIEqual: + return OperationCode::LogicalUEqual; + case OperationCode::LogicalILessEqual: + return OperationCode::LogicalULessEqual; + case OperationCode::LogicalIGreaterThan: + return OperationCode::LogicalUGreaterThan; + case OperationCode::LogicalINotEqual: + return OperationCode::LogicalUNotEqual; + case OperationCode::LogicalIGreaterEqual: + return OperationCode::LogicalUGreaterEqual; + case OperationCode::INegate: + UNREACHABLE_MSG("Can't negate an unsigned integer"); + return {}; + case OperationCode::IAbsolute: + UNREACHABLE_MSG("Can't apply absolute to an unsigned integer"); + return {}; + default: + UNREACHABLE_MSG("Unknown signed operation with code={}", static_cast<u32>(operation_code)); + return {}; + } +} + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/node_helper.h b/src/video_core/shader/node_helper.h new file mode 100644 index 000000000..0c2aa749b --- /dev/null +++ b/src/video_core/shader/node_helper.h @@ -0,0 +1,65 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <memory> +#include <string> +#include <tuple> +#include <type_traits> +#include <utility> +#include <vector> + +#include "common/common_types.h" +#include "video_core/shader/node.h" + +namespace VideoCommon::Shader { + +/// This arithmetic operation cannot be constraint +inline constexpr MetaArithmetic PRECISE = {true}; +/// This arithmetic operation can be optimized away +inline constexpr MetaArithmetic NO_PRECISE = {false}; + +/// Creates a conditional node +Node Conditional(Node condition, std::vector<Node> code); + +/// Creates a commentary node +Node Comment(std::string text); + +/// Creates an u32 immediate +Node Immediate(u32 value); + +/// Creates a s32 immediate +Node Immediate(s32 value); + +/// Creates a f32 immediate +Node Immediate(f32 value); + +/// Converts an signed operation code to an unsigned operation code +OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed); + +template <typename T, typename... Args> +Node MakeNode(Args&&... args) { + static_assert(std::is_convertible_v<T, NodeData>); + return std::make_shared<NodeData>(T(std::forward<Args>(args)...)); +} + +template <typename... Args> +Node Operation(OperationCode code, Args&&... args) { + if constexpr (sizeof...(args) == 0) { + return MakeNode<OperationNode>(code); + } else if constexpr (std::is_convertible_v<std::tuple_element_t<0, std::tuple<Args...>>, + Meta>) { + return MakeNode<OperationNode>(code, std::forward<Args>(args)...); + } else { + return MakeNode<OperationNode>(code, Meta{}, std::forward<Args>(args)...); + } +} + +template <typename... Args> +Node SignedOperation(OperationCode code, bool is_signed, Args&&... args) { + return Operation(SignedToUnsignedCode(code, is_signed), std::forward<Args>(args)...); +} + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index 8a6ee5cf5..11b545cca 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -9,6 +9,7 @@ #include "common/common_types.h" #include "common/logging/log.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { @@ -28,30 +29,11 @@ ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset) ShaderIR::~ShaderIR() = default; -Node ShaderIR::StoreNode(NodeData&& node_data) { - auto store = std::make_unique<NodeData>(node_data); - const Node node = store.get(); - stored_nodes.push_back(std::move(store)); - return node; -} - -Node ShaderIR::Conditional(Node condition, std::vector<Node>&& code) { - return StoreNode(ConditionalNode(condition, std::move(code))); -} - -Node ShaderIR::Comment(std::string text) { - return StoreNode(CommentNode(std::move(text))); -} - -Node ShaderIR::Immediate(u32 value) { - return StoreNode(ImmediateNode(value)); -} - Node ShaderIR::GetRegister(Register reg) { if (reg != Register::ZeroIndex) { used_registers.insert(static_cast<u32>(reg)); } - return StoreNode(GprNode(reg)); + return MakeNode<GprNode>(reg); } Node ShaderIR::GetImmediate19(Instruction instr) { @@ -69,7 +51,7 @@ Node ShaderIR::GetConstBuffer(u64 index_, u64 offset_) { const auto [entry, is_new] = used_cbufs.try_emplace(index); entry->second.MarkAsUsed(offset); - return StoreNode(CbufNode(index, Immediate(offset))); + return MakeNode<CbufNode>(index, Immediate(offset)); } Node ShaderIR::GetConstBufferIndirect(u64 index_, u64 offset_, Node node) { @@ -80,7 +62,7 @@ Node ShaderIR::GetConstBufferIndirect(u64 index_, u64 offset_, Node node) { entry->second.MarkAsUsedIndirect(); const Node final_offset = Operation(OperationCode::UAdd, NO_PRECISE, node, Immediate(offset)); - return StoreNode(CbufNode(index, final_offset)); + return MakeNode<CbufNode>(index, final_offset); } Node ShaderIR::GetPredicate(u64 pred_, bool negated) { @@ -89,7 +71,7 @@ Node ShaderIR::GetPredicate(u64 pred_, bool negated) { used_predicates.insert(pred); } - return StoreNode(PredicateNode(pred, negated)); + return MakeNode<PredicateNode>(pred, negated); } Node ShaderIR::GetPredicate(bool immediate) { @@ -98,12 +80,12 @@ Node ShaderIR::GetPredicate(bool immediate) { Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, Node buffer) { used_input_attributes.emplace(index); - return StoreNode(AbufNode(index, static_cast<u32>(element), buffer)); + return MakeNode<AbufNode>(index, static_cast<u32>(element), buffer); } Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer) { uses_physical_attributes = true; - return StoreNode(AbufNode(GetRegister(physical_address), buffer)); + return MakeNode<AbufNode>(GetRegister(physical_address), buffer); } Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buffer) { @@ -115,11 +97,11 @@ Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buff } used_output_attributes.insert(index); - return StoreNode(AbufNode(index, static_cast<u32>(element), buffer)); + return MakeNode<AbufNode>(index, static_cast<u32>(element), buffer); } Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) { - const Node node = StoreNode(InternalFlagNode(flag)); + const Node node = MakeNode<InternalFlagNode>(flag); if (negated) { return Operation(OperationCode::LogicalNegate, node); } @@ -127,7 +109,7 @@ Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) { } Node ShaderIR::GetLocalMemory(Node address) { - return StoreNode(LmemNode(address)); + return MakeNode<LmemNode>(address); } Node ShaderIR::GetTemporal(u32 id) { @@ -393,68 +375,4 @@ Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) { Immediate(bits)); } -/*static*/ OperationCode ShaderIR::SignedToUnsignedCode(OperationCode operation_code, - bool is_signed) { - if (is_signed) { - return operation_code; - } - switch (operation_code) { - case OperationCode::FCastInteger: - return OperationCode::FCastUInteger; - case OperationCode::IAdd: - return OperationCode::UAdd; - case OperationCode::IMul: - return OperationCode::UMul; - case OperationCode::IDiv: - return OperationCode::UDiv; - case OperationCode::IMin: - return OperationCode::UMin; - case OperationCode::IMax: - return OperationCode::UMax; - case OperationCode::ICastFloat: - return OperationCode::UCastFloat; - case OperationCode::ICastUnsigned: - return OperationCode::UCastSigned; - case OperationCode::ILogicalShiftLeft: - return OperationCode::ULogicalShiftLeft; - case OperationCode::ILogicalShiftRight: - return OperationCode::ULogicalShiftRight; - case OperationCode::IArithmeticShiftRight: - return OperationCode::UArithmeticShiftRight; - case OperationCode::IBitwiseAnd: - return OperationCode::UBitwiseAnd; - case OperationCode::IBitwiseOr: - return OperationCode::UBitwiseOr; - case OperationCode::IBitwiseXor: - return OperationCode::UBitwiseXor; - case OperationCode::IBitwiseNot: - return OperationCode::UBitwiseNot; - case OperationCode::IBitfieldInsert: - return OperationCode::UBitfieldInsert; - case OperationCode::IBitCount: - return OperationCode::UBitCount; - case OperationCode::LogicalILessThan: - return OperationCode::LogicalULessThan; - case OperationCode::LogicalIEqual: - return OperationCode::LogicalUEqual; - case OperationCode::LogicalILessEqual: - return OperationCode::LogicalULessEqual; - case OperationCode::LogicalIGreaterThan: - return OperationCode::LogicalUGreaterThan; - case OperationCode::LogicalINotEqual: - return OperationCode::LogicalUNotEqual; - case OperationCode::LogicalIGreaterEqual: - return OperationCode::LogicalUGreaterEqual; - case OperationCode::INegate: - UNREACHABLE_MSG("Can't negate an unsigned integer"); - return {}; - case OperationCode::IAbsolute: - UNREACHABLE_MSG("Can't apply absolute to an unsigned integer"); - return {}; - default: - UNREACHABLE_MSG("Unknown signed operation with code={}", static_cast<u32>(operation_code)); - return {}; - } -} - } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index ff7472e30..edcf2288e 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -18,188 +18,14 @@ #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/shader_bytecode.h" #include "video_core/engines/shader_header.h" +#include "video_core/shader/node.h" namespace VideoCommon::Shader { -class OperationNode; -class ConditionalNode; -class GprNode; -class ImmediateNode; -class InternalFlagNode; -class PredicateNode; -class AbufNode; ///< Attribute buffer -class CbufNode; ///< Constant buffer -class LmemNode; ///< Local memory -class GmemNode; ///< Global memory -class CommentNode; - using ProgramCode = std::vector<u64>; -using NodeData = - std::variant<OperationNode, ConditionalNode, GprNode, ImmediateNode, InternalFlagNode, - PredicateNode, AbufNode, CbufNode, LmemNode, GmemNode, CommentNode>; -using Node = const NodeData*; -using Node4 = std::array<Node, 4>; -using NodeBlock = std::vector<Node>; - constexpr u32 MAX_PROGRAM_LENGTH = 0x1000; -enum class OperationCode { - Assign, /// (float& dest, float src) -> void - - Select, /// (MetaArithmetic, bool pred, float a, float b) -> float - - FAdd, /// (MetaArithmetic, float a, float b) -> float - FMul, /// (MetaArithmetic, float a, float b) -> float - FDiv, /// (MetaArithmetic, float a, float b) -> float - FFma, /// (MetaArithmetic, float a, float b, float c) -> float - FNegate, /// (MetaArithmetic, float a) -> float - FAbsolute, /// (MetaArithmetic, float a) -> float - FClamp, /// (MetaArithmetic, float value, float min, float max) -> float - FMin, /// (MetaArithmetic, float a, float b) -> float - FMax, /// (MetaArithmetic, float a, float b) -> float - FCos, /// (MetaArithmetic, float a) -> float - FSin, /// (MetaArithmetic, float a) -> float - FExp2, /// (MetaArithmetic, float a) -> float - FLog2, /// (MetaArithmetic, float a) -> float - FInverseSqrt, /// (MetaArithmetic, float a) -> float - FSqrt, /// (MetaArithmetic, float a) -> float - FRoundEven, /// (MetaArithmetic, float a) -> float - FFloor, /// (MetaArithmetic, float a) -> float - FCeil, /// (MetaArithmetic, float a) -> float - FTrunc, /// (MetaArithmetic, float a) -> float - FCastInteger, /// (MetaArithmetic, int a) -> float - FCastUInteger, /// (MetaArithmetic, uint a) -> float - - IAdd, /// (MetaArithmetic, int a, int b) -> int - IMul, /// (MetaArithmetic, int a, int b) -> int - IDiv, /// (MetaArithmetic, int a, int b) -> int - INegate, /// (MetaArithmetic, int a) -> int - IAbsolute, /// (MetaArithmetic, int a) -> int - IMin, /// (MetaArithmetic, int a, int b) -> int - IMax, /// (MetaArithmetic, int a, int b) -> int - ICastFloat, /// (MetaArithmetic, float a) -> int - ICastUnsigned, /// (MetaArithmetic, uint a) -> int - ILogicalShiftLeft, /// (MetaArithmetic, int a, uint b) -> int - ILogicalShiftRight, /// (MetaArithmetic, int a, uint b) -> int - IArithmeticShiftRight, /// (MetaArithmetic, int a, uint b) -> int - IBitwiseAnd, /// (MetaArithmetic, int a, int b) -> int - IBitwiseOr, /// (MetaArithmetic, int a, int b) -> int - IBitwiseXor, /// (MetaArithmetic, int a, int b) -> int - IBitwiseNot, /// (MetaArithmetic, int a) -> int - IBitfieldInsert, /// (MetaArithmetic, int base, int insert, int offset, int bits) -> int - IBitfieldExtract, /// (MetaArithmetic, int value, int offset, int offset) -> int - IBitCount, /// (MetaArithmetic, int) -> int - - UAdd, /// (MetaArithmetic, uint a, uint b) -> uint - UMul, /// (MetaArithmetic, uint a, uint b) -> uint - UDiv, /// (MetaArithmetic, uint a, uint b) -> uint - UMin, /// (MetaArithmetic, uint a, uint b) -> uint - UMax, /// (MetaArithmetic, uint a, uint b) -> uint - UCastFloat, /// (MetaArithmetic, float a) -> uint - UCastSigned, /// (MetaArithmetic, int a) -> uint - ULogicalShiftLeft, /// (MetaArithmetic, uint a, uint b) -> uint - ULogicalShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint - UArithmeticShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint - UBitwiseAnd, /// (MetaArithmetic, uint a, uint b) -> uint - UBitwiseOr, /// (MetaArithmetic, uint a, uint b) -> uint - UBitwiseXor, /// (MetaArithmetic, uint a, uint b) -> uint - UBitwiseNot, /// (MetaArithmetic, uint a) -> uint - UBitfieldInsert, /// (MetaArithmetic, uint base, uint insert, int offset, int bits) -> uint - UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint - UBitCount, /// (MetaArithmetic, uint) -> uint - - HAdd, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 - HMul, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 - HFma, /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2 - HAbsolute, /// (f16vec2 a) -> f16vec2 - HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2 - HClamp, /// (f16vec2 src, float min, float max) -> f16vec2 - HUnpack, /// (Tegra::Shader::HalfType, T value) -> f16vec2 - HMergeF32, /// (f16vec2 src) -> float - HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2 - HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2 - HPack2, /// (float a, float b) -> f16vec2 - - LogicalAssign, /// (bool& dst, bool src) -> void - LogicalAnd, /// (bool a, bool b) -> bool - LogicalOr, /// (bool a, bool b) -> bool - LogicalXor, /// (bool a, bool b) -> bool - LogicalNegate, /// (bool a) -> bool - LogicalPick2, /// (bool2 pair, uint index) -> bool - LogicalAll2, /// (bool2 a) -> bool - LogicalAny2, /// (bool2 a) -> bool - - LogicalFLessThan, /// (float a, float b) -> bool - LogicalFEqual, /// (float a, float b) -> bool - LogicalFLessEqual, /// (float a, float b) -> bool - LogicalFGreaterThan, /// (float a, float b) -> bool - LogicalFNotEqual, /// (float a, float b) -> bool - LogicalFGreaterEqual, /// (float a, float b) -> bool - LogicalFIsNan, /// (float a) -> bool - - LogicalILessThan, /// (int a, int b) -> bool - LogicalIEqual, /// (int a, int b) -> bool - LogicalILessEqual, /// (int a, int b) -> bool - LogicalIGreaterThan, /// (int a, int b) -> bool - LogicalINotEqual, /// (int a, int b) -> bool - LogicalIGreaterEqual, /// (int a, int b) -> bool - - LogicalULessThan, /// (uint a, uint b) -> bool - LogicalUEqual, /// (uint a, uint b) -> bool - LogicalULessEqual, /// (uint a, uint b) -> bool - LogicalUGreaterThan, /// (uint a, uint b) -> bool - LogicalUNotEqual, /// (uint a, uint b) -> bool - LogicalUGreaterEqual, /// (uint a, uint b) -> bool - - Logical2HLessThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HLessEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HGreaterThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HNotEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HGreaterEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HLessThanWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HLessEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HGreaterThanWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HNotEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HGreaterEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - - Texture, /// (MetaTexture, float[N] coords) -> float4 - TextureLod, /// (MetaTexture, float[N] coords) -> float4 - TextureGather, /// (MetaTexture, float[N] coords) -> float4 - TextureQueryDimensions, /// (MetaTexture, float a) -> float4 - TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4 - TexelFetch, /// (MetaTexture, int[N], int) -> float4 - - Branch, /// (uint branch_target) -> void - PushFlowStack, /// (uint branch_target) -> void - PopFlowStack, /// () -> void - Exit, /// () -> void - Discard, /// () -> void - - EmitVertex, /// () -> void - EndPrimitive, /// () -> void - - YNegate, /// () -> float - LocalInvocationIdX, /// () -> uint - LocalInvocationIdY, /// () -> uint - LocalInvocationIdZ, /// () -> uint - WorkGroupIdX, /// () -> uint - WorkGroupIdY, /// () -> uint - WorkGroupIdZ, /// () -> uint - - Amount, -}; - -enum class InternalFlag { - Zero = 0, - Sign = 1, - Carry = 2, - Overflow = 3, - Amount = 4, -}; - /// Describes the behaviour of code path of a given entry point and a return point. enum class ExitMethod { Undetermined, ///< Internal value. Only occur when analyzing JMP loop. @@ -208,71 +34,6 @@ enum class ExitMethod { AlwaysEnd, ///< All code paths reach a END instruction. }; -class Sampler { -public: - // Use this constructor for bounded Samplers - explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type, - bool is_array, bool is_shadow) - : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow}, - is_bindless{false} {} - - // Use this constructor for bindless Samplers - explicit Sampler(u32 cbuf_index, u32 cbuf_offset, std::size_t index, - Tegra::Shader::TextureType type, bool is_array, bool is_shadow) - : offset{(static_cast<u64>(cbuf_index) << 32) | cbuf_offset}, index{index}, type{type}, - is_array{is_array}, is_shadow{is_shadow}, is_bindless{true} {} - - // Use this only for serialization/deserialization - explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type, - bool is_array, bool is_shadow, bool is_bindless) - : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow}, - is_bindless{is_bindless} {} - - std::size_t GetOffset() const { - return offset; - } - - std::size_t GetIndex() const { - return index; - } - - Tegra::Shader::TextureType GetType() const { - return type; - } - - bool IsArray() const { - return is_array; - } - - bool IsShadow() const { - return is_shadow; - } - - bool IsBindless() const { - return is_bindless; - } - - std::pair<u32, u32> GetBindlessCBuf() const { - return {static_cast<u32>(offset >> 32), static_cast<u32>(offset)}; - } - - bool operator<(const Sampler& rhs) const { - return std::tie(index, offset, type, is_array, is_shadow, is_bindless) < - std::tie(rhs.index, rhs.offset, rhs.type, rhs.is_array, rhs.is_shadow, - rhs.is_bindless); - } - -private: - /// Offset in TSC memory from which to read the sampler object, as specified by the sampling - /// instruction. - std::size_t offset{}; - std::size_t index{}; ///< Value used to index into the generated GLSL sampler array. - Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) - bool is_array{}; ///< Whether the texture is being sampled as an array texture or not. - bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not. - bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not. -}; - class ConstBuffer { public: explicit ConstBuffer(u32 max_offset, bool is_indirect) @@ -305,268 +66,11 @@ private: bool is_indirect{}; }; -struct GlobalMemoryBase { - u32 cbuf_index{}; - u32 cbuf_offset{}; - - bool operator<(const GlobalMemoryBase& rhs) const { - return std::tie(cbuf_index, cbuf_offset) < std::tie(rhs.cbuf_index, rhs.cbuf_offset); - } -}; - struct GlobalMemoryUsage { bool is_read{}; bool is_written{}; }; -struct MetaArithmetic { - bool precise{}; -}; - -struct MetaTexture { - const Sampler& sampler; - Node array{}; - Node depth_compare{}; - std::vector<Node> aoffi; - Node bias{}; - Node lod{}; - Node component{}; - u32 element{}; -}; - -constexpr MetaArithmetic PRECISE = {true}; -constexpr MetaArithmetic NO_PRECISE = {false}; - -using Meta = std::variant<MetaArithmetic, MetaTexture, Tegra::Shader::HalfType>; - -/// Holds any kind of operation that can be done in the IR -class OperationNode final { -public: - explicit OperationNode(OperationCode code) : code{code} {} - - explicit OperationNode(OperationCode code, Meta&& meta) : code{code}, meta{std::move(meta)} {} - - template <typename... T> - explicit OperationNode(OperationCode code, const T*... operands) - : OperationNode(code, {}, operands...) {} - - template <typename... T> - explicit OperationNode(OperationCode code, Meta&& meta, const T*... operands_) - : code{code}, meta{std::move(meta)}, operands{operands_...} {} - - explicit OperationNode(OperationCode code, Meta&& meta, std::vector<Node>&& operands) - : code{code}, meta{meta}, operands{std::move(operands)} {} - - explicit OperationNode(OperationCode code, std::vector<Node>&& operands) - : code{code}, operands{std::move(operands)} {} - - OperationCode GetCode() const { - return code; - } - - const Meta& GetMeta() const { - return meta; - } - - std::size_t GetOperandsCount() const { - return operands.size(); - } - - Node operator[](std::size_t operand_index) const { - return operands.at(operand_index); - } - -private: - const OperationCode code; - const Meta meta; - std::vector<Node> operands; -}; - -/// Encloses inside any kind of node that returns a boolean conditionally-executed code -class ConditionalNode final { -public: - explicit ConditionalNode(Node condition, std::vector<Node>&& code) - : condition{condition}, code{std::move(code)} {} - - Node GetCondition() const { - return condition; - } - - const std::vector<Node>& GetCode() const { - return code; - } - -private: - const Node condition; ///< Condition to be satisfied - std::vector<Node> code; ///< Code to execute -}; - -/// A general purpose register -class GprNode final { -public: - explicit constexpr GprNode(Tegra::Shader::Register index) : index{index} {} - - u32 GetIndex() const { - return static_cast<u32>(index); - } - -private: - const Tegra::Shader::Register index; -}; - -/// A 32-bits value that represents an immediate value -class ImmediateNode final { -public: - explicit constexpr ImmediateNode(u32 value) : value{value} {} - - u32 GetValue() const { - return value; - } - -private: - const u32 value; -}; - -/// One of Maxwell's internal flags -class InternalFlagNode final { -public: - explicit constexpr InternalFlagNode(InternalFlag flag) : flag{flag} {} - - InternalFlag GetFlag() const { - return flag; - } - -private: - const InternalFlag flag; -}; - -/// A predicate register, it can be negated without additional nodes -class PredicateNode final { -public: - explicit constexpr PredicateNode(Tegra::Shader::Pred index, bool negated) - : index{index}, negated{negated} {} - - Tegra::Shader::Pred GetIndex() const { - return index; - } - - bool IsNegated() const { - return negated; - } - -private: - const Tegra::Shader::Pred index; - const bool negated; -}; - -/// Attribute buffer memory (known as attributes or varyings in GLSL terms) -class AbufNode final { -public: - // Initialize for standard attributes (index is explicit). - explicit constexpr AbufNode(Tegra::Shader::Attribute::Index index, u32 element, - Node buffer = {}) - : buffer{buffer}, index{index}, element{element} {} - - // Initialize for physical attributes (index is a variable value). - explicit constexpr AbufNode(Node physical_address, Node buffer = {}) - : physical_address{physical_address}, buffer{buffer} {} - - Tegra::Shader::Attribute::Index GetIndex() const { - return index; - } - - u32 GetElement() const { - return element; - } - - Node GetBuffer() const { - return buffer; - } - - bool IsPhysicalBuffer() const { - return physical_address != nullptr; - } - - Node GetPhysicalAddress() const { - return physical_address; - } - -private: - Node physical_address{}; - Node buffer{}; - Tegra::Shader::Attribute::Index index{}; - u32 element{}; -}; - -/// Constant buffer node, usually mapped to uniform buffers in GLSL -class CbufNode final { -public: - explicit constexpr CbufNode(u32 index, Node offset) : index{index}, offset{offset} {} - - u32 GetIndex() const { - return index; - } - - Node GetOffset() const { - return offset; - } - -private: - const u32 index; - const Node offset; -}; - -/// Local memory node -class LmemNode final { -public: - explicit constexpr LmemNode(Node address) : address{address} {} - - Node GetAddress() const { - return address; - } - -private: - const Node address; -}; - -/// Global memory node -class GmemNode final { -public: - explicit constexpr GmemNode(Node real_address, Node base_address, - const GlobalMemoryBase& descriptor) - : real_address{real_address}, base_address{base_address}, descriptor{descriptor} {} - - Node GetRealAddress() const { - return real_address; - } - - Node GetBaseAddress() const { - return base_address; - } - - const GlobalMemoryBase& GetDescriptor() const { - return descriptor; - } - -private: - const Node real_address; - const Node base_address; - const GlobalMemoryBase descriptor; -}; - -/// Commentary, can be dropped -class CommentNode final { -public: - explicit CommentNode(std::string text) : text{std::move(text)} {} - - const std::string& GetText() const { - return text; - } - -private: - std::string text; -}; - class ShaderIR final { public: explicit ShaderIR(const ProgramCode& program_code, u32 main_offset); @@ -663,26 +167,6 @@ private: u32 DecodeXmad(NodeBlock& bb, u32 pc); u32 DecodeOther(NodeBlock& bb, u32 pc); - /// Internalizes node's data and returns a managed pointer to a clone of that node - Node StoreNode(NodeData&& node_data); - - /// Creates a conditional node - Node Conditional(Node condition, std::vector<Node>&& code); - /// Creates a commentary - Node Comment(std::string text); - /// Creates an u32 immediate - Node Immediate(u32 value); - /// Creates a s32 immediate - Node Immediate(s32 value) { - return Immediate(static_cast<u32>(value)); - } - /// Creates a f32 immediate - Node Immediate(f32 value) { - u32 integral; - std::memcpy(&integral, &value, sizeof(u32)); - return Immediate(integral); - } - /// Generates a node for a passed register. Node GetRegister(Tegra::Shader::Register reg); /// Generates a node representing a 19-bit immediate value @@ -827,37 +311,6 @@ private: std::tuple<Node, Node, GlobalMemoryBase> TrackAndGetGlobalMemory( NodeBlock& bb, Tegra::Shader::Instruction instr, bool is_write); - template <typename... T> - Node Operation(OperationCode code, const T*... operands) { - return StoreNode(OperationNode(code, operands...)); - } - - template <typename... T> - Node Operation(OperationCode code, Meta&& meta, const T*... operands) { - return StoreNode(OperationNode(code, std::move(meta), operands...)); - } - - Node Operation(OperationCode code, std::vector<Node>&& operands) { - return StoreNode(OperationNode(code, std::move(operands))); - } - - Node Operation(OperationCode code, Meta&& meta, std::vector<Node>&& operands) { - return StoreNode(OperationNode(code, std::move(meta), std::move(operands))); - } - - template <typename... T> - Node SignedOperation(OperationCode code, bool is_signed, const T*... operands) { - return StoreNode(OperationNode(SignedToUnsignedCode(code, is_signed), operands...)); - } - - template <typename... T> - Node SignedOperation(OperationCode code, bool is_signed, Meta&& meta, const T*... operands) { - return StoreNode( - OperationNode(SignedToUnsignedCode(code, is_signed), std::move(meta), operands...)); - } - - static OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed); - const ProgramCode& program_code; const u32 main_offset; @@ -868,8 +321,6 @@ private: std::map<u32, NodeBlock> basic_blocks; NodeBlock global_code; - std::vector<std::unique_ptr<NodeData>> stored_nodes; - std::set<u32> used_registers; std::set<Tegra::Shader::Pred> used_predicates; std::set<Tegra::Shader::Attribute::Index> used_input_attributes; diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index 19ede1eb9..fc957d980 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp @@ -16,12 +16,12 @@ std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, OperationCode operation_code) { for (; cursor >= 0; --cursor) { const Node node = code.at(cursor); - if (const auto operation = std::get_if<OperationNode>(node)) { + if (const auto operation = std::get_if<OperationNode>(&*node)) { if (operation->GetCode() == operation_code) { return {node, cursor}; } } - if (const auto conditional = std::get_if<ConditionalNode>(node)) { + if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { const auto& conditional_code = conditional->GetCode(); const auto [found, internal_cursor] = FindOperation( conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code); @@ -35,11 +35,11 @@ std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, } // namespace Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const { - if (const auto cbuf = std::get_if<CbufNode>(tracked)) { + if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { // Cbuf found, but it has to be immediate return std::holds_alternative<ImmediateNode>(*cbuf->GetOffset()) ? tracked : nullptr; } - if (const auto gpr = std::get_if<GprNode>(tracked)) { + if (const auto gpr = std::get_if<GprNode>(&*tracked)) { if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) { return nullptr; } @@ -51,7 +51,7 @@ Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const } return TrackCbuf(source, code, new_cursor); } - if (const auto operation = std::get_if<OperationNode>(tracked)) { + if (const auto operation = std::get_if<OperationNode>(&*tracked)) { for (std::size_t i = 0; i < operation->GetOperandsCount(); ++i) { if (const auto found = TrackCbuf((*operation)[i], code, cursor)) { // Cbuf found in operand @@ -60,7 +60,7 @@ Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const } return nullptr; } - if (const auto conditional = std::get_if<ConditionalNode>(tracked)) { + if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) { const auto& conditional_code = conditional->GetCode(); return TrackCbuf(tracked, conditional_code, static_cast<s64>(conditional_code.size())); } @@ -75,7 +75,7 @@ std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, if (!found) { return {}; } - if (const auto immediate = std::get_if<ImmediateNode>(found)) { + if (const auto immediate = std::get_if<ImmediateNode>(&*found)) { return immediate->GetValue(); } return {}; @@ -88,11 +88,11 @@ std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeB if (!found_node) { return {}; } - const auto operation = std::get_if<OperationNode>(found_node); + const auto operation = std::get_if<OperationNode>(&*found_node); ASSERT(operation); const auto& target = (*operation)[0]; - if (const auto gpr_target = std::get_if<GprNode>(target)) { + if (const auto gpr_target = std::get_if<GprNode>(&*target)) { if (gpr_target->GetIndex() == tracked->GetIndex()) { return {(*operation)[1], new_cursor}; } diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h index bea0d5bc2..219bfd559 100644 --- a/src/video_core/textures/texture.h +++ b/src/video_core/textures/texture.h @@ -251,7 +251,7 @@ enum class WrapMode : u32 { Mirror = 1, ClampToEdge = 2, Border = 3, - ClampOGL = 4, + Clamp = 4, MirrorOnceClampToEdge = 5, MirrorOnceBorder = 6, MirrorOnceClampOGL = 7, diff --git a/src/yuzu/CMakeLists.txt b/src/yuzu/CMakeLists.txt index 3ea7b55d0..3dc0e47d0 100644 --- a/src/yuzu/CMakeLists.txt +++ b/src/yuzu/CMakeLists.txt @@ -159,6 +159,15 @@ target_compile_definitions(yuzu PRIVATE # Disable implicit conversions from/to C strings -DQT_NO_CAST_FROM_ASCII -DQT_NO_CAST_TO_ASCII + + # Disable implicit type narrowing in signal/slot connect() calls. + -DQT_NO_NARROWING_CONVERSIONS_IN_CONNECT + + # Disable unsafe overloads of QProcess' start() function. + -DQT_NO_PROCESS_COMBINED_ARGUMENT_START + + # Disable implicit QString->QUrl conversions to enforce use of proper resolving functions. + -DQT_NO_URL_CAST_FROM_STRING ) if (YUZU_ENABLE_COMPATIBILITY_REPORTING) diff --git a/src/yuzu/applets/profile_select.cpp b/src/yuzu/applets/profile_select.cpp index 6b5c7ba61..6aff38735 100644 --- a/src/yuzu/applets/profile_select.cpp +++ b/src/yuzu/applets/profile_select.cpp @@ -12,21 +12,11 @@ #include <QVBoxLayout> #include "common/file_util.h" #include "common/string_util.h" +#include "core/constants.h" #include "core/hle/lock.h" #include "yuzu/applets/profile_select.h" #include "yuzu/main.h" -// Same backup JPEG used by acc IProfile::GetImage if no jpeg found -constexpr std::array<u8, 107> backup_jpeg{ - 0xff, 0xd8, 0xff, 0xdb, 0x00, 0x43, 0x00, 0x03, 0x02, 0x02, 0x02, 0x02, 0x02, 0x03, 0x02, 0x02, - 0x02, 0x03, 0x03, 0x03, 0x03, 0x04, 0x06, 0x04, 0x04, 0x04, 0x04, 0x04, 0x08, 0x06, 0x06, 0x05, - 0x06, 0x09, 0x08, 0x0a, 0x0a, 0x09, 0x08, 0x09, 0x09, 0x0a, 0x0c, 0x0f, 0x0c, 0x0a, 0x0b, 0x0e, - 0x0b, 0x09, 0x09, 0x0d, 0x11, 0x0d, 0x0e, 0x0f, 0x10, 0x10, 0x11, 0x10, 0x0a, 0x0c, 0x12, 0x13, - 0x12, 0x10, 0x13, 0x0f, 0x10, 0x10, 0x10, 0xff, 0xc9, 0x00, 0x0b, 0x08, 0x00, 0x01, 0x00, 0x01, - 0x01, 0x01, 0x11, 0x00, 0xff, 0xcc, 0x00, 0x06, 0x00, 0x10, 0x10, 0x05, 0xff, 0xda, 0x00, 0x08, - 0x01, 0x01, 0x00, 0x00, 0x3f, 0x00, 0xd2, 0xcf, 0x20, 0xff, 0xd9, -}; - QString FormatUserEntryText(const QString& username, Common::UUID uuid) { return QtProfileSelectionDialog::tr( "%1\n%2", "%1 is the profile username, %2 is the formatted UUID (e.g. " @@ -45,7 +35,8 @@ QPixmap GetIcon(Common::UUID uuid) { if (!icon) { icon.fill(Qt::black); - icon.loadFromData(backup_jpeg.data(), static_cast<u32>(backup_jpeg.size())); + icon.loadFromData(Core::Constants::ACCOUNT_BACKUP_JPEG.data(), + static_cast<u32>(Core::Constants::ACCOUNT_BACKUP_JPEG.size())); } return icon.scaled(64, 64, Qt::IgnoreAspectRatio, Qt::SmoothTransformation); diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp index afec33b61..07a720494 100644 --- a/src/yuzu/bootmanager.cpp +++ b/src/yuzu/bootmanager.cpp @@ -381,12 +381,8 @@ void GRenderWindow::InitRenderTarget() { // WA_DontShowOnScreen, WA_DeleteOnClose QSurfaceFormat fmt; fmt.setVersion(4, 3); - if (Settings::values.use_compatibility_profile) { - fmt.setProfile(QSurfaceFormat::CompatibilityProfile); - fmt.setOption(QSurfaceFormat::FormatOption::DeprecatedFunctions); - } else { - fmt.setProfile(QSurfaceFormat::CoreProfile); - } + fmt.setProfile(QSurfaceFormat::CompatibilityProfile); + fmt.setOption(QSurfaceFormat::FormatOption::DeprecatedFunctions); // TODO: expose a setting for buffer value (ie default/single/double/triple) fmt.setSwapBehavior(QSurfaceFormat::DefaultSwapBehavior); shared_context = std::make_unique<QOpenGLContext>(); diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index b1942bedc..10e5c5c38 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -531,8 +531,6 @@ void Config::ReadRendererValues() { Settings::values.use_frame_limit = ReadSetting(QStringLiteral("use_frame_limit"), true).toBool(); Settings::values.frame_limit = ReadSetting(QStringLiteral("frame_limit"), 100).toInt(); - Settings::values.use_compatibility_profile = - ReadSetting(QStringLiteral("use_compatibility_profile"), true).toBool(); Settings::values.use_disk_shader_cache = ReadSetting(QStringLiteral("use_disk_shader_cache"), true).toBool(); Settings::values.use_accurate_gpu_emulation = @@ -914,8 +912,6 @@ void Config::SaveRendererValues() { static_cast<double>(Settings::values.resolution_factor), 1.0); WriteSetting(QStringLiteral("use_frame_limit"), Settings::values.use_frame_limit, true); WriteSetting(QStringLiteral("frame_limit"), Settings::values.frame_limit, 100); - WriteSetting(QStringLiteral("use_compatibility_profile"), - Settings::values.use_compatibility_profile, true); WriteSetting(QStringLiteral("use_disk_shader_cache"), Settings::values.use_disk_shader_cache, true); WriteSetting(QStringLiteral("use_accurate_gpu_emulation"), diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp index 45e87248c..2b17b250c 100644 --- a/src/yuzu/configuration/configure_graphics.cpp +++ b/src/yuzu/configuration/configure_graphics.cpp @@ -75,8 +75,6 @@ void ConfigureGraphics::SetConfiguration() { ui->toggle_frame_limit->setChecked(Settings::values.use_frame_limit); ui->frame_limit->setEnabled(ui->toggle_frame_limit->isChecked()); ui->frame_limit->setValue(Settings::values.frame_limit); - ui->use_compatibility_profile->setEnabled(runtime_lock); - ui->use_compatibility_profile->setChecked(Settings::values.use_compatibility_profile); ui->use_disk_shader_cache->setEnabled(runtime_lock); ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache); ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation); @@ -93,7 +91,6 @@ void ConfigureGraphics::ApplyConfiguration() { ToResolutionFactor(static_cast<Resolution>(ui->resolution_factor_combobox->currentIndex())); Settings::values.use_frame_limit = ui->toggle_frame_limit->isChecked(); Settings::values.frame_limit = ui->frame_limit->value(); - Settings::values.use_compatibility_profile = ui->use_compatibility_profile->isChecked(); Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked(); Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked(); Settings::values.use_asynchronous_gpu_emulation = diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui index 0f6f6c003..15ab18ecd 100644 --- a/src/yuzu/configuration/configure_graphics.ui +++ b/src/yuzu/configuration/configure_graphics.ui @@ -50,13 +50,6 @@ </layout> </item> <item> - <widget class="QCheckBox" name="use_compatibility_profile"> - <property name="text"> - <string>Use OpenGL compatibility profile</string> - </property> - </widget> - </item> - <item> <widget class="QCheckBox" name="use_disk_shader_cache"> <property name="text"> <string>Use disk shader cache</string> diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index 730956427..f3817bb87 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp @@ -349,8 +349,6 @@ void Config::ReadValues() { Settings::values.use_frame_limit = sdl2_config->GetBoolean("Renderer", "use_frame_limit", true); Settings::values.frame_limit = static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100)); - Settings::values.use_compatibility_profile = - sdl2_config->GetBoolean("Renderer", "use_compatibility_profile", true); Settings::values.use_disk_shader_cache = sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false); Settings::values.use_accurate_gpu_emulation = diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp index 904022137..e2d3df180 100644 --- a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp +++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp @@ -74,13 +74,9 @@ bool EmuWindow_SDL2_GL::SupportsRequiredGLExtensions() { } EmuWindow_SDL2_GL::EmuWindow_SDL2_GL(bool fullscreen) : EmuWindow_SDL2(fullscreen) { - const SDL_GLprofile profile = Settings::values.use_compatibility_profile - ? SDL_GL_CONTEXT_PROFILE_COMPATIBILITY - : SDL_GL_CONTEXT_PROFILE_CORE; - SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, 4); SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, 3); - SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, profile); + SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_COMPATIBILITY); SDL_GL_SetAttribute(SDL_GL_DOUBLEBUFFER, 1); SDL_GL_SetAttribute(SDL_GL_RED_SIZE, 8); SDL_GL_SetAttribute(SDL_GL_GREEN_SIZE, 8); |