diff options
Diffstat (limited to '')
-rw-r--r-- | src/video_core/renderer_opengl/gl_device.cpp | 3 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_device.h | 5 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 4 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 47 |
4 files changed, 58 insertions, 1 deletions
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 85424a4c9..03d434b28 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -27,6 +27,8 @@ Device::Device() { shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); + has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group && + GLAD_GL_NV_shader_thread_shuffle; has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array; has_variable_aoffi = TestVariableAoffi(); has_component_indexing_bug = TestComponentIndexingBug(); @@ -36,6 +38,7 @@ Device::Device(std::nullptr_t) { uniform_buffer_alignment = 0; max_vertex_attributes = 16; max_varyings = 15; + has_warp_intrinsics = true; has_vertex_viewport_layer = true; has_variable_aoffi = true; has_component_indexing_bug = false; diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index dc883722d..3ef7c6dd8 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -30,6 +30,10 @@ public: return max_varyings; } + bool HasWarpIntrinsics() const { + return has_warp_intrinsics; + } + bool HasVertexViewportLayer() const { return has_vertex_viewport_layer; } @@ -50,6 +54,7 @@ private: std::size_t shader_storage_alignment{}; u32 max_vertex_attributes{}; u32 max_varyings{}; + bool has_warp_intrinsics{}; bool has_vertex_viewport_layer{}; bool has_variable_aoffi{}; bool has_component_indexing_bug{}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 1c90facc3..a32a7e984 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -212,7 +212,9 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn const auto texture_buffer_usage{variant.texture_buffer_usage}; std::string source = "#version 430 core\n" - "#extension GL_ARB_separate_shader_objects : enable\n"; + "#extension GL_ARB_separate_shader_objects : enable\n" + "#extension GL_NV_gpu_shader5 : enable\n" + "#extension GL_NV_shader_thread_group : enable\n"; if (entries.shader_viewport_layer_array) { source += "#extension GL_ARB_shader_viewport_layer_array : enable\n"; } diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index d8f722c26..1bfdbcd61 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -1735,6 +1735,48 @@ private: return "utof(gl_WorkGroupID"s + GetSwizzle(element) + ')'; } + std::string BallotThread(Operation operation) { + const std::string value = VisitOperand(operation, 0, Type::Bool); + if (!device.HasWarpIntrinsics()) { + LOG_ERROR(Render_OpenGL, + "Nvidia warp intrinsics are not available and its required by a shader"); + // Stub on non-Nvidia devices by simulating all threads voting the same as the active + // one. + return fmt::format("utof({} ? 0xFFFFFFFFU : 0U)", value); + } + return fmt::format("utof(ballotThreadNV({}))", value); + } + + std::string Vote(Operation operation, const char* func) { + const std::string value = VisitOperand(operation, 0, Type::Bool); + if (!device.HasWarpIntrinsics()) { + LOG_ERROR(Render_OpenGL, + "Nvidia vote intrinsics are not available and its required by a shader"); + // Stub with a warp size of one. + return value; + } + return fmt::format("{}({})", func, value); + } + + std::string VoteAll(Operation operation) { + return Vote(operation, "allThreadsNV"); + } + + std::string VoteAny(Operation operation) { + return Vote(operation, "anyThreadNV"); + } + + std::string VoteEqual(Operation operation) { + if (!device.HasWarpIntrinsics()) { + LOG_ERROR(Render_OpenGL, + "Nvidia vote intrinsics are not available and its required by a shader"); + // We must return true here since a stub for a theoretical warp size of 1 will always + // return an equal result for all its votes. + return "true"; + } + return Vote(operation, "allThreadsEqualNV"); + } + static constexpr std::array operation_decompilers = { &GLSLDecompiler::Assign, @@ -1885,6 +1927,11 @@ private: &GLSLDecompiler::WorkGroupId<0>, &GLSLDecompiler::WorkGroupId<1>, &GLSLDecompiler::WorkGroupId<2>, + + &GLSLDecompiler::BallotThread, + &GLSLDecompiler::VoteAll, + &GLSLDecompiler::VoteAny, + &GLSLDecompiler::VoteEqual, }; static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); |