diff options
Diffstat (limited to 'src/video_core')
-rw-r--r-- | src/video_core/engines/shader_bytecode.h | 2 | ||||
-rw-r--r-- | src/video_core/gpu_thread.cpp | 2 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_device.cpp | 55 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_device.h | 6 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 69 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.h | 1 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 53 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_gen.cpp | 9 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | 18 | ||||
-rw-r--r-- | src/video_core/shader/decode/other.cpp | 42 | ||||
-rw-r--r-- | src/video_core/shader/shader_ir.h | 8 |
11 files changed, 200 insertions, 65 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index e83f25fa1..ffb3ec3e0 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -1663,6 +1663,7 @@ private: INST("111000100100----", Id::BRA, Type::Flow, "BRA"), INST("1111000011111---", Id::SYNC, Type::Flow, "SYNC"), INST("111000110100---", Id::BRK, Type::Flow, "BRK"), + INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"), INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"), INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"), @@ -1686,7 +1687,6 @@ private: INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"), INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"), INST("1101111101011---", Id::TMML, Type::Texture, "TMML"), - INST("111000110000----", Id::EXIT, Type::Trivial, "EXIT"), INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"), diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 1e2ff46b0..3f0939ec9 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -75,7 +75,7 @@ void ThreadManager::StartThread(VideoCore::RendererBase& renderer, Tegra::DmaPus void ThreadManager::SubmitList(Tegra::CommandList&& entries) { const u64 fence{PushCommand(SubmitListCommand(std::move(entries)))}; - const s64 synchronization_ticks{Core::Timing::usToCycles(9000)}; + const s64 synchronization_ticks{Core::Timing::usToCycles(std::chrono::microseconds{9000})}; system.CoreTiming().ScheduleEvent(synchronization_ticks, synchronization_event, fence); } diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 38497678a..65a88b06c 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -2,11 +2,14 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <array> #include <cstddef> #include <glad/glad.h> #include "common/logging/log.h" +#include "common/scope_exit.h" #include "video_core/renderer_opengl/gl_device.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" namespace OpenGL { @@ -24,6 +27,7 @@ Device::Device() { max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); has_variable_aoffi = TestVariableAoffi(); + has_component_indexing_bug = TestComponentIndexingBug(); } Device::Device(std::nullptr_t) { @@ -31,10 +35,12 @@ Device::Device(std::nullptr_t) { max_vertex_attributes = 16; max_varyings = 15; has_variable_aoffi = true; + has_component_indexing_bug = false; } bool Device::TestVariableAoffi() { const GLchar* AOFFI_TEST = R"(#version 430 core +// This is a unit test, please ignore me on apitrace bug reports. uniform sampler2D tex; uniform ivec2 variable_offset; void main() { @@ -51,4 +57,53 @@ void main() { return supported; } +bool Device::TestComponentIndexingBug() { + constexpr char log_message[] = "Renderer_ComponentIndexingBug: {}"; + const GLchar* COMPONENT_TEST = R"(#version 430 core +layout (std430, binding = 0) buffer OutputBuffer { + uint output_value; +}; +layout (std140, binding = 0) uniform InputBuffer { + uvec4 input_value[4096]; +}; +layout (location = 0) uniform uint idx; +void main() { + output_value = input_value[idx >> 2][idx & 3]; +})"; + const GLuint shader{glCreateShaderProgramv(GL_VERTEX_SHADER, 1, &COMPONENT_TEST)}; + SCOPE_EXIT({ glDeleteProgram(shader); }); + glUseProgram(shader); + + OGLVertexArray vao; + vao.Create(); + glBindVertexArray(vao.handle); + + constexpr std::array<GLuint, 8> values{0, 0, 0, 0, 0x1236327, 0x985482, 0x872753, 0x2378432}; + OGLBuffer ubo; + ubo.Create(); + glNamedBufferData(ubo.handle, sizeof(values), values.data(), GL_STATIC_DRAW); + glBindBufferBase(GL_UNIFORM_BUFFER, 0, ubo.handle); + + OGLBuffer ssbo; + ssbo.Create(); + glNamedBufferStorage(ssbo.handle, sizeof(GLuint), nullptr, GL_CLIENT_STORAGE_BIT); + + for (GLuint index = 4; index < 8; ++index) { + glInvalidateBufferData(ssbo.handle); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, ssbo.handle); + + glProgramUniform1ui(shader, 0, index); + glDrawArrays(GL_POINTS, 0, 1); + + GLuint result; + glGetNamedBufferSubData(ssbo.handle, 0, sizeof(result), &result); + if (result != values.at(index)) { + LOG_INFO(Render_OpenGL, log_message, true); + return true; + } + } + LOG_INFO(Render_OpenGL, log_message, false); + return false; +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index de8490682..8c8c93760 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -30,13 +30,19 @@ public: return has_variable_aoffi; } + bool HasComponentIndexingBug() const { + return has_component_indexing_bug; + } + private: static bool TestVariableAoffi(); + static bool TestComponentIndexingBug(); std::size_t uniform_buffer_alignment{}; u32 max_vertex_attributes{}; u32 max_varyings{}; bool has_variable_aoffi{}; + bool has_component_indexing_bug{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 7ee1c99c0..ac8a9e6b7 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -35,8 +35,8 @@ struct UnspecializedShader { namespace { /// Gets the address for the specified shader stage program -GPUVAddr GetShaderAddress(Maxwell::ShaderProgram program) { - const auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()}; +GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) { + const auto& gpu{system.GPU().Maxwell3D()}; const auto& shader_config{gpu.regs.shader_config[static_cast<std::size_t>(program)]}; return gpu.regs.code_address.CodeAddress() + shader_config.offset; } @@ -170,7 +170,8 @@ GLShader::ProgramResult CreateProgram(const Device& device, Maxwell::ShaderProgr CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEntries& entries, Maxwell::ShaderProgram program_type, BaseBindings base_bindings, GLenum primitive_mode, bool hint_retrievable = false) { - std::string source = "#version 430 core\n"; + std::string source = "#version 430 core\n" + "#extension GL_ARB_separate_shader_objects : enable\n\n"; source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++); for (const auto& cbuf : entries.const_buffers) { @@ -349,7 +350,8 @@ ShaderDiskCacheUsage CachedShader::GetUsage(GLenum primitive_mode, ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, Core::Frontend::EmuWindow& emu_window, const Device& device) - : RasterizerCache{rasterizer}, emu_window{emu_window}, device{device}, disk_cache{system} {} + : RasterizerCache{rasterizer}, system{system}, emu_window{emu_window}, device{device}, + disk_cache{system} {} void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback) { @@ -545,42 +547,45 @@ std::unordered_map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecia } Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { - if (!Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.shaders) { - return last_shaders[static_cast<u32>(program)]; + if (!system.GPU().Maxwell3D().dirty_flags.shaders) { + return last_shaders[static_cast<std::size_t>(program)]; } - auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()}; - const GPUVAddr program_addr{GetShaderAddress(program)}; + auto& memory_manager{system.GPU().MemoryManager()}; + const GPUVAddr program_addr{GetShaderAddress(system, program)}; // Look up shader in the cache based on address - const auto& host_ptr{memory_manager.GetPointer(program_addr)}; + const auto host_ptr{memory_manager.GetPointer(program_addr)}; Shader shader{TryGet(host_ptr)}; + if (shader) { + return last_shaders[static_cast<std::size_t>(program)] = shader; + } - if (!shader) { - // No shader found - create a new one - ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)}; - ProgramCode program_code_b; - if (program == Maxwell::ShaderProgram::VertexA) { - const GPUVAddr program_addr_b{GetShaderAddress(Maxwell::ShaderProgram::VertexB)}; - program_code_b = GetShaderCode(memory_manager, program_addr_b, - memory_manager.GetPointer(program_addr_b)); - } - const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b); - const VAddr cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)}; - const auto found = precompiled_shaders.find(unique_identifier); - if (found != precompiled_shaders.end()) { - shader = - std::make_shared<CachedShader>(cpu_addr, unique_identifier, program, disk_cache, - precompiled_programs, found->second, host_ptr); - } else { - shader = std::make_shared<CachedShader>( - device, cpu_addr, unique_identifier, program, disk_cache, precompiled_programs, - std::move(program_code), std::move(program_code_b), host_ptr); - } - Register(shader); + // No shader found - create a new one + ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)}; + ProgramCode program_code_b; + if (program == Maxwell::ShaderProgram::VertexA) { + const GPUVAddr program_addr_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)}; + program_code_b = GetShaderCode(memory_manager, program_addr_b, + memory_manager.GetPointer(program_addr_b)); + } + + const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b); + const VAddr cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)}; + const auto found = precompiled_shaders.find(unique_identifier); + if (found != precompiled_shaders.end()) { + // Create a shader from the cache + shader = std::make_shared<CachedShader>(cpu_addr, unique_identifier, program, disk_cache, + precompiled_programs, found->second, host_ptr); + } else { + // Create a shader from guest memory + shader = std::make_shared<CachedShader>( + device, cpu_addr, unique_identifier, program, disk_cache, precompiled_programs, + std::move(program_code), std::move(program_code_b), host_ptr); } + Register(shader); - return last_shaders[static_cast<u32>(program)] = shader; + return last_shaders[static_cast<std::size_t>(program)] = shader; } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 64e5a5594..09bd0761d 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -137,6 +137,7 @@ private: CachedProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump, const std::set<GLenum>& supported_formats); + Core::System& system; Core::Frontend::EmuWindow& emu_window; const Device& device; ShaderDiskCacheOpenGL disk_cache; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 6d4658c8b..3b61bf77f 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -31,6 +31,8 @@ using Tegra::Shader::IpaInterpMode; using Tegra::Shader::IpaMode; using Tegra::Shader::IpaSampleMode; using Tegra::Shader::Register; + +using namespace std::string_literals; using namespace VideoCommon::Shader; using Maxwell = Tegra::Engines::Maxwell3D::Regs; @@ -93,11 +95,9 @@ private: }; /// Generates code to use for a swizzle operation. -std::string GetSwizzle(u32 elem) { - ASSERT(elem <= 3); - std::string swizzle = "."; - swizzle += "xyzw"[elem]; - return swizzle; +constexpr const char* GetSwizzle(u32 element) { + constexpr std::array<const char*, 4> swizzle = {".x", ".y", ".z", ".w"}; + return swizzle.at(element); } /// Translate topology @@ -577,9 +577,26 @@ private: if (std::holds_alternative<OperationNode>(*offset)) { // Indirect access const std::string final_offset = code.GenerateTemporary(); - code.AddLine("uint {} = (ftou({}) / 4);", final_offset, Visit(offset)); - return fmt::format("{}[{} / 4][{} % 4]", GetConstBuffer(cbuf->GetIndex()), - final_offset, final_offset); + code.AddLine("uint {} = ftou({}) >> 2;", final_offset, Visit(offset)); + + if (!device.HasComponentIndexingBug()) { + return fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()), + final_offset, final_offset); + } + + // AMD's proprietary GLSL compiler emits ill code for variable component access. + // To bypass this driver bug generate 4 ifs, one per each component. + const std::string pack = code.GenerateTemporary(); + code.AddLine("vec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()), + final_offset); + + const std::string result = code.GenerateTemporary(); + code.AddLine("float {};", result); + for (u32 swizzle = 0; swizzle < 4; ++swizzle) { + code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result, + pack, GetSwizzle(swizzle)); + } + return result; } UNREACHABLE_MSG("Unmanaged offset node type"); @@ -636,7 +653,7 @@ private: if (stage != ShaderStage::Fragment) { return GeometryPass("position") + GetSwizzle(element); } else { - return element == 3 ? "1.0f" : "gl_FragCoord" + GetSwizzle(element); + return element == 3 ? "1.0f" : ("gl_FragCoord"s + GetSwizzle(element)); } case Attribute::Index::PointCoord: switch (element) { @@ -921,7 +938,7 @@ private: target = [&]() -> std::string { switch (const auto attribute = abuf->GetIndex(); abuf->GetIndex()) { case Attribute::Index::Position: - return "position" + GetSwizzle(abuf->GetElement()); + return "position"s + GetSwizzle(abuf->GetElement()); case Attribute::Index::PointSize: return "gl_PointSize"; case Attribute::Index::ClipDistances0123: @@ -1526,6 +1543,16 @@ private: return "uintBitsToFloat(config_pack[2])"; } + template <u32 element> + std::string LocalInvocationId(Operation) { + return "utof(gl_LocalInvocationID"s + GetSwizzle(element) + ')'; + } + + template <u32 element> + std::string WorkGroupId(Operation) { + return "utof(gl_WorkGroupID"s + GetSwizzle(element) + ')'; + } + static constexpr OperationDecompilersArray operation_decompilers = { &GLSLDecompiler::Assign, @@ -1665,6 +1692,12 @@ private: &GLSLDecompiler::EndPrimitive, &GLSLDecompiler::YNegate, + &GLSLDecompiler::LocalInvocationId<0>, + &GLSLDecompiler::LocalInvocationId<1>, + &GLSLDecompiler::LocalInvocationId<2>, + &GLSLDecompiler::WorkGroupId<0>, + &GLSLDecompiler::WorkGroupId<1>, + &GLSLDecompiler::WorkGroupId<2>, }; std::string GetRegister(u32 index) const { diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 7ab0b4553..d2bb705a9 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -19,8 +19,7 @@ static constexpr u32 PROGRAM_OFFSET{10}; ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup) { const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); - std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n"; - out += "// Shader Unique Id: VS" + id + "\n\n"; + std::string out = "// Shader Unique Id: VS" + id + "\n\n"; out += GetCommonDeclarations(); out += R"( @@ -82,8 +81,7 @@ void main() { ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& setup) { const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); - std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n"; - out += "// Shader Unique Id: GS" + id + "\n\n"; + std::string out = "// Shader Unique Id: GS" + id + "\n\n"; out += GetCommonDeclarations(); out += R"( @@ -113,8 +111,7 @@ void main() { ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup) { const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); - std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n"; - out += "// Shader Unique Id: FS" + id + "\n\n"; + std::string out = "// Shader Unique Id: FS" + id + "\n\n"; out += GetCommonDeclarations(); out += R"( diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index b61a6d170..a5b25aeff 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -1035,6 +1035,18 @@ private: return {}; } + template <u32 element> + Id LocalInvocationId(Operation) { + UNIMPLEMENTED(); + return {}; + } + + template <u32 element> + Id WorkGroupId(Operation) { + UNIMPLEMENTED(); + return {}; + } + Id DeclareBuiltIn(spv::BuiltIn builtin, spv::StorageClass storage, Id type, const std::string& name) { const Id id = OpVariable(type, storage); @@ -1291,6 +1303,12 @@ private: &SPIRVDecompiler::EndPrimitive, &SPIRVDecompiler::YNegate, + &SPIRVDecompiler::LocalInvocationId<0>, + &SPIRVDecompiler::LocalInvocationId<1>, + &SPIRVDecompiler::LocalInvocationId<2>, + &SPIRVDecompiler::WorkGroupId<0>, + &SPIRVDecompiler::WorkGroupId<1>, + &SPIRVDecompiler::WorkGroupId<2>, }; const ShaderIR& ir; diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index ca7af72e1..a6c123573 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -14,6 +14,7 @@ using Tegra::Shader::ConditionCode; using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; using Tegra::Shader::Register; +using Tegra::Shader::SystemVariable; u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; @@ -59,20 +60,33 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { break; } case OpCode::Id::MOV_SYS: { - switch (instr.sys20) { - case Tegra::Shader::SystemVariable::InvocationInfo: { - LOG_WARNING(HW_GPU, "MOV_SYS instruction with InvocationInfo is incomplete"); - SetRegister(bb, instr.gpr0, Immediate(0u)); - break; - } - case Tegra::Shader::SystemVariable::Ydirection: { - // Config pack's third value is Y_NEGATE's state. - SetRegister(bb, instr.gpr0, Operation(OperationCode::YNegate)); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled system move: {}", static_cast<u32>(instr.sys20.Value())); - } + const Node value = [&]() { + switch (instr.sys20) { + case SystemVariable::Ydirection: + return Operation(OperationCode::YNegate); + case SystemVariable::InvocationInfo: + LOG_WARNING(HW_GPU, "MOV_SYS instruction with InvocationInfo is incomplete"); + return Immediate(0u); + case SystemVariable::TidX: + return Operation(OperationCode::LocalInvocationIdX); + case SystemVariable::TidY: + return Operation(OperationCode::LocalInvocationIdY); + case SystemVariable::TidZ: + return Operation(OperationCode::LocalInvocationIdZ); + case SystemVariable::CtaIdX: + return Operation(OperationCode::WorkGroupIdX); + case SystemVariable::CtaIdY: + return Operation(OperationCode::WorkGroupIdY); + case SystemVariable::CtaIdZ: + return Operation(OperationCode::WorkGroupIdZ); + default: + UNIMPLEMENTED_MSG("Unhandled system move: {}", + static_cast<u32>(instr.sys20.Value())); + return Immediate(0u); + } + }(); + SetRegister(bb, instr.gpr0, value); + break; } case OpCode::Id::BRA: { diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 35f72bddb..ff7472e30 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -181,7 +181,13 @@ enum class OperationCode { EmitVertex, /// () -> void EndPrimitive, /// () -> void - YNegate, /// () -> float + YNegate, /// () -> float + LocalInvocationIdX, /// () -> uint + LocalInvocationIdY, /// () -> uint + LocalInvocationIdZ, /// () -> uint + WorkGroupIdX, /// () -> uint + WorkGroupIdY, /// () -> uint + WorkGroupIdZ, /// () -> uint Amount, }; |