diff options
Diffstat (limited to 'src/video_core')
32 files changed, 212 insertions, 110 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 7bbc556da..e83f25fa1 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -530,6 +530,11 @@ union Instruction { BitField<48, 16, u64> opcode; union { + BitField<8, 8, Register> gpr; + BitField<20, 24, s64> offset; + } gmem; + + union { BitField<20, 16, u64> imm20_16; BitField<20, 19, u64> imm20_19; BitField<20, 32, s64> imm20_32; @@ -812,13 +817,11 @@ union Instruction { union { BitField<48, 3, UniformType> type; BitField<46, 2, u64> cache_mode; - BitField<20, 24, s64> immediate_offset; } ldg; union { BitField<48, 3, UniformType> type; BitField<46, 2, u64> cache_mode; - BitField<20, 24, s64> immediate_offset; } stg; union { @@ -828,6 +831,11 @@ union Instruction { } al2p; union { + BitField<53, 3, UniformType> type; + BitField<52, 1, u64> extended; + } generic; + + union { BitField<0, 3, u64> pred0; BitField<3, 3, u64> pred3; BitField<7, 1, u64> abs_a; @@ -1387,10 +1395,12 @@ public: LD_L, LD_S, LD_C, + LD, // Load from generic memory + LDG, // Load from global memory ST_A, ST_L, ST_S, - LDG, // Load from global memory + ST, // Store in generic memory STG, // Store in global memory AL2P, // Transforms attribute memory into physical memory TEX, @@ -1658,10 +1668,12 @@ private: INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"), INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"), INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"), + INST("100-------------", Id::LD, Type::Memory, "LD"), + INST("1110111011010---", Id::LDG, Type::Memory, "LDG"), INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"), INST("1110111101011---", Id::ST_S, Type::Memory, "ST_S"), INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), - INST("1110111011010---", Id::LDG, Type::Memory, "LDG"), + INST("101-------------", Id::ST, Type::Memory, "ST"), INST("1110111011011---", Id::STG, Type::Memory, "STG"), INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"), INST("110000----111---", Id::TEX, Type::Texture, "TEX"), diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index dbd8049f5..f9b6dfeea 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -98,9 +98,11 @@ struct FramebufferCacheKey { } }; -RasterizerOpenGL::RasterizerOpenGL(Core::System& system, ScreenInfo& info) - : res_cache{*this}, shader_cache{*this, system, device}, global_cache{*this}, system{system}, - screen_info{info}, buffer_cache(*this, STREAM_BUFFER_SIZE) { +RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, + ScreenInfo& info) + : res_cache{*this}, shader_cache{*this, system, emu_window, device}, + global_cache{*this}, system{system}, screen_info{info}, + buffer_cache(*this, STREAM_BUFFER_SIZE) { OpenGLState::ApplyDefaultState(); shader_program_manager = std::make_unique<GLShader::ProgramManager>(); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 71b9c5ead..d78094138 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -48,7 +48,8 @@ struct FramebufferCacheKey; class RasterizerOpenGL : public VideoCore::RasterizerInterface { public: - explicit RasterizerOpenGL(Core::System& system, ScreenInfo& info); + explicit RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, + ScreenInfo& info); ~RasterizerOpenGL() override; void DrawArrays() override; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index f700dc89a..7ee1c99c0 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -2,10 +2,14 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <mutex> +#include <thread> #include <boost/functional/hash.hpp> #include "common/assert.h" #include "common/hash.h" +#include "common/scope_exit.h" #include "core/core.h" +#include "core/frontend/emu_window.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/memory_manager.h" #include "video_core/renderer_opengl/gl_rasterizer.h" @@ -344,8 +348,8 @@ ShaderDiskCacheUsage CachedShader::GetUsage(GLenum primitive_mode, } ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, - const Device& device) - : RasterizerCache{rasterizer}, device{device}, disk_cache{system} {} + Core::Frontend::EmuWindow& emu_window, const Device& device) + : RasterizerCache{rasterizer}, emu_window{emu_window}, device{device}, disk_cache{system} {} void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback) { @@ -353,62 +357,107 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, if (!transferable) { return; } - const auto [raws, usages] = *transferable; + const auto [raws, shader_usages] = *transferable; auto [decompiled, dumps] = disk_cache.LoadPrecompiled(); const auto supported_formats{GetSupportedFormats()}; - const auto unspecialized{ + const auto unspecialized_shaders{ GenerateUnspecializedShaders(stop_loading, callback, raws, decompiled)}; - if (stop_loading) + if (stop_loading) { return; + } // Track if precompiled cache was altered during loading to know if we have to serialize the // virtual precompiled cache file back to the hard drive bool precompiled_cache_altered = false; - // Build shaders - if (callback) - callback(VideoCore::LoadCallbackStage::Build, 0, usages.size()); - for (std::size_t i = 0; i < usages.size(); ++i) { - if (stop_loading) - return; + // Inform the frontend about shader build initialization + if (callback) { + callback(VideoCore::LoadCallbackStage::Build, 0, shader_usages.size()); + } - const auto& usage{usages[i]}; - LOG_INFO(Render_OpenGL, "Building shader {:016x} ({} of {})", usage.unique_identifier, - i + 1, usages.size()); + std::mutex mutex; + std::size_t built_shaders = 0; // It doesn't have be atomic since it's used behind a mutex + std::atomic_bool compilation_failed = false; - const auto& unspec{unspecialized.at(usage.unique_identifier)}; - const auto dump_it = dumps.find(usage); + const auto Worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin, + std::size_t end, const std::vector<ShaderDiskCacheUsage>& shader_usages, + const ShaderDumpsMap& dumps) { + context->MakeCurrent(); + SCOPE_EXIT({ return context->DoneCurrent(); }); - CachedProgram shader; - if (dump_it != dumps.end()) { - // If the shader is dumped, attempt to load it with - shader = GeneratePrecompiledProgram(dump_it->second, supported_formats); + for (std::size_t i = begin; i < end; ++i) { + if (stop_loading || compilation_failed) { + return; + } + const auto& usage{shader_usages[i]}; + LOG_INFO(Render_OpenGL, "Building shader {:016x} (index {} of {})", + usage.unique_identifier, i, shader_usages.size()); + + const auto& unspecialized{unspecialized_shaders.at(usage.unique_identifier)}; + const auto dump{dumps.find(usage)}; + + CachedProgram shader; + if (dump != dumps.end()) { + // If the shader is dumped, attempt to load it with + shader = GeneratePrecompiledProgram(dump->second, supported_formats); + if (!shader) { + compilation_failed = true; + return; + } + } if (!shader) { - // Invalidate the precompiled cache if a shader dumped shader was rejected - disk_cache.InvalidatePrecompiled(); - precompiled_cache_altered = true; - dumps.clear(); + shader = SpecializeShader(unspecialized.code, unspecialized.entries, + unspecialized.program_type, usage.bindings, + usage.primitive, true); } + + std::scoped_lock lock(mutex); + if (callback) { + callback(VideoCore::LoadCallbackStage::Build, ++built_shaders, + shader_usages.size()); + } + + precompiled_programs.emplace(usage, std::move(shader)); } - if (!shader) { - shader = SpecializeShader(unspec.code, unspec.entries, unspec.program_type, - usage.bindings, usage.primitive, true); - } - precompiled_programs.insert({usage, std::move(shader)}); + }; + + const auto num_workers{static_cast<std::size_t>(std::thread::hardware_concurrency() + 1)}; + const std::size_t bucket_size{shader_usages.size() / num_workers}; + std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers); + std::vector<std::thread> threads(num_workers); + for (std::size_t i = 0; i < num_workers; ++i) { + const bool is_last_worker = i + 1 == num_workers; + const std::size_t start{bucket_size * i}; + const std::size_t end{is_last_worker ? shader_usages.size() : start + bucket_size}; + + // On some platforms the shared context has to be created from the GUI thread + contexts[i] = emu_window.CreateSharedContext(); + threads[i] = std::thread(Worker, contexts[i].get(), start, end, shader_usages, dumps); + } + for (auto& thread : threads) { + thread.join(); + } - if (callback) - callback(VideoCore::LoadCallbackStage::Build, i + 1, usages.size()); + if (compilation_failed) { + // Invalidate the precompiled cache if a shader dumped shader was rejected + disk_cache.InvalidatePrecompiled(); + dumps.clear(); + precompiled_cache_altered = true; + return; + } + if (stop_loading) { + return; } // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw before // precompiling them - for (std::size_t i = 0; i < usages.size(); ++i) { - const auto& usage{usages[i]}; + for (std::size_t i = 0; i < shader_usages.size(); ++i) { + const auto& usage{shader_usages[i]}; if (dumps.find(usage) == dumps.end()) { - const auto& program = precompiled_programs.at(usage); + const auto& program{precompiled_programs.at(usage)}; disk_cache.SaveDump(usage, program->handle); precompiled_cache_altered = true; } diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 31b979987..64e5a5594 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -22,7 +22,11 @@ namespace Core { class System; -} // namespace Core +} + +namespace Core::Frontend { +class EmuWindow; +} namespace OpenGL { @@ -111,7 +115,7 @@ private: class ShaderCacheOpenGL final : public RasterizerCache<Shader> { public: explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, - const Device& device); + Core::Frontend::EmuWindow& emu_window, const Device& device); /// Loads disk cache for the current game void LoadDiskCache(const std::atomic_bool& stop_loading, @@ -133,13 +137,13 @@ private: CachedProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump, const std::set<GLenum>& supported_formats); + Core::Frontend::EmuWindow& emu_window; const Device& device; - - std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; - ShaderDiskCacheOpenGL disk_cache; + PrecompiledShaders precompiled_shaders; PrecompiledPrograms precompiled_programs; + std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index fba9c594a..ee4a45ca2 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -183,8 +183,7 @@ ShaderDiskCacheOpenGL::LoadTransferable() { return {{raws, usages}}; } -std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, - std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>> +std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, ShaderDumpsMap> ShaderDiskCacheOpenGL::LoadPrecompiled() { if (!IsUsable()) return {}; @@ -208,8 +207,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiled() { return *result; } -std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, - std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>> +std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, ShaderDumpsMap>> ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { // Read compressed file from disk and decompress to virtual precompiled cache file std::vector<u8> compressed(file.GetSize()); @@ -230,7 +228,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { } std::unordered_map<u64, ShaderDiskCacheDecompiled> decompiled; - std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump> dumps; + ShaderDumpsMap dumps; while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) { PrecompiledEntryKind kind{}; if (!LoadObjectFromPrecompiled(kind)) { diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index 2da0a4a23..ecd72ba58 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h @@ -33,6 +33,11 @@ namespace OpenGL { using ProgramCode = std::vector<u64>; using Maxwell = Tegra::Engines::Maxwell3D::Regs; +struct ShaderDiskCacheUsage; +struct ShaderDiskCacheDump; + +using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>; + /// Allocated bindings used by an OpenGL shader program struct BaseBindings { u32 cbuf{}; @@ -294,4 +299,4 @@ private: bool tried_to_load{}; }; -} // namespace OpenGL
\ No newline at end of file +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index d69cba9c3..3451d321d 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -97,8 +97,8 @@ static std::array<GLfloat, 3 * 2> MakeOrthographicMatrix(const float width, cons return matrix; } -RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& window, Core::System& system) - : VideoCore::RendererBase{window}, system{system} {} +RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system) + : VideoCore::RendererBase{emu_window}, emu_window{emu_window}, system{system} {} RendererOpenGL::~RendererOpenGL() = default; @@ -265,7 +265,7 @@ void RendererOpenGL::CreateRasterizer() { } // Initialize sRGB Usage OpenGLState::ClearsRGBUsed(); - rasterizer = std::make_unique<RasterizerOpenGL>(system, screen_info); + rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, screen_info); } void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 6cbf9d2cb..4aebf2321 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -45,7 +45,7 @@ struct ScreenInfo { class RendererOpenGL : public VideoCore::RendererBase { public: - explicit RendererOpenGL(Core::Frontend::EmuWindow& window, Core::System& system); + explicit RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system); ~RendererOpenGL() override; /// Swap buffers (render frame) @@ -77,6 +77,7 @@ private: void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a, const TextureInfo& texture); + Core::Frontend::EmuWindow& emu_window; Core::System& system; OpenGLState state; diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp index 3190e2d7c..b4859bc1e 100644 --- a/src/video_core/shader/decode/arithmetic.cpp +++ b/src/video_core/shader/decode/arithmetic.cpp @@ -4,6 +4,7 @@ #include "common/assert.h" #include "common/common_types.h" +#include "common/logging/log.h" #include "video_core/engines/shader_bytecode.h" #include "video_core/shader/shader_ir.h" @@ -152,4 +153,4 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) { return pc; } -} // namespace VideoCommon::Shader
\ No newline at end of file +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp index 2098c1170..3a29c4a46 100644 --- a/src/video_core/shader/decode/arithmetic_half.cpp +++ b/src/video_core/shader/decode/arithmetic_half.cpp @@ -4,6 +4,7 @@ #include "common/assert.h" #include "common/common_types.h" +#include "common/logging/log.h" #include "video_core/engines/shader_bytecode.h" #include "video_core/shader/shader_ir.h" diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp index fbcd35b18..5341e460f 100644 --- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp @@ -4,6 +4,7 @@ #include "common/assert.h" #include "common/common_types.h" +#include "common/logging/log.h" #include "video_core/engines/shader_bytecode.h" #include "video_core/shader/shader_ir.h" @@ -47,4 +48,4 @@ u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) { return pc; } -} // namespace VideoCommon::Shader
\ No newline at end of file +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/arithmetic_immediate.cpp b/src/video_core/shader/decode/arithmetic_immediate.cpp index 0d139c0d2..3095f2fd4 100644 --- a/src/video_core/shader/decode/arithmetic_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_immediate.cpp @@ -49,4 +49,4 @@ u32 ShaderIR::DecodeArithmeticImmediate(NodeBlock& bb, u32 pc) { return pc; } -} // namespace VideoCommon::Shader
\ No newline at end of file +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp index 3ed5ccc5a..679ac0d4e 100644 --- a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp @@ -93,4 +93,4 @@ void ShaderIR::WriteLogicOperation(NodeBlock& bb, Register dest, LogicOperation } } -} // namespace VideoCommon::Shader
\ No newline at end of file +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/bfe.cpp b/src/video_core/shader/decode/bfe.cpp index 6a95dc928..1ae192c6a 100644 --- a/src/video_core/shader/decode/bfe.cpp +++ b/src/video_core/shader/decode/bfe.cpp @@ -46,4 +46,4 @@ u32 ShaderIR::DecodeBfe(NodeBlock& bb, u32 pc) { return pc; } -} // namespace VideoCommon::Shader
\ No newline at end of file +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/bfi.cpp b/src/video_core/shader/decode/bfi.cpp index 601d66f1f..0b12a0d08 100644 --- a/src/video_core/shader/decode/bfi.cpp +++ b/src/video_core/shader/decode/bfi.cpp @@ -38,4 +38,4 @@ u32 ShaderIR::DecodeBfi(NodeBlock& bb, u32 pc) { return pc; } -} // namespace VideoCommon::Shader
\ No newline at end of file +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp index 0559cc8de..a1d04c6e5 100644 --- a/src/video_core/shader/decode/ffma.cpp +++ b/src/video_core/shader/decode/ffma.cpp @@ -56,4 +56,4 @@ u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) { return pc; } -} // namespace VideoCommon::Shader
\ No newline at end of file +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/float_set.cpp b/src/video_core/shader/decode/float_set.cpp index 1bd6755dd..cc522f1de 100644 --- a/src/video_core/shader/decode/float_set.cpp +++ b/src/video_core/shader/decode/float_set.cpp @@ -55,4 +55,4 @@ u32 ShaderIR::DecodeFloatSet(NodeBlock& bb, u32 pc) { return pc; } -} // namespace VideoCommon::Shader
\ No newline at end of file +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/float_set_predicate.cpp b/src/video_core/shader/decode/float_set_predicate.cpp index 9285b8d05..9d2322a1d 100644 --- a/src/video_core/shader/decode/float_set_predicate.cpp +++ b/src/video_core/shader/decode/float_set_predicate.cpp @@ -53,4 +53,4 @@ u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) { return pc; } -} // namespace VideoCommon::Shader
\ No newline at end of file +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp index 1dd94bf9d..755f2ec44 100644 --- a/src/video_core/shader/decode/half_set.cpp +++ b/src/video_core/shader/decode/half_set.cpp @@ -6,6 +6,7 @@ #include "common/assert.h" #include "common/common_types.h" +#include "common/logging/log.h" #include "video_core/engines/shader_bytecode.h" #include "video_core/shader/shader_ir.h" @@ -64,4 +65,4 @@ u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) { return pc; } -} // namespace VideoCommon::Shader
\ No newline at end of file +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp index 6e59eb650..fba44d714 100644 --- a/src/video_core/shader/decode/half_set_predicate.cpp +++ b/src/video_core/shader/decode/half_set_predicate.cpp @@ -59,4 +59,4 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) { return pc; } -} // namespace VideoCommon::Shader
\ No newline at end of file +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/integer_set.cpp b/src/video_core/shader/decode/integer_set.cpp index a3bf17eba..a4cdaf74d 100644 --- a/src/video_core/shader/decode/integer_set.cpp +++ b/src/video_core/shader/decode/integer_set.cpp @@ -2,7 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" #include "video_core/shader/shader_ir.h" @@ -47,4 +46,4 @@ u32 ShaderIR::DecodeIntegerSet(NodeBlock& bb, u32 pc) { return pc; } -} // namespace VideoCommon::Shader
\ No newline at end of file +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/integer_set_predicate.cpp b/src/video_core/shader/decode/integer_set_predicate.cpp index aad836d24..a6a1fb632 100644 --- a/src/video_core/shader/decode/integer_set_predicate.cpp +++ b/src/video_core/shader/decode/integer_set_predicate.cpp @@ -50,4 +50,4 @@ u32 ShaderIR::DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc) { return pc; } -} // namespace VideoCommon::Shader
\ No newline at end of file +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 165c2b41b..e6a010a7d 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -146,12 +146,25 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { } break; } + case OpCode::Id::LD: case OpCode::Id::LDG: { + const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType { + switch (opcode->get().GetId()) { + case OpCode::Id::LD: + UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended LD is not implemented"); + return instr.generic.type; + case OpCode::Id::LDG: + return instr.ldg.type; + default: + UNREACHABLE(); + return {}; + } + }(); + const auto [real_address_base, base_address, descriptor] = - TrackAndGetGlobalMemory(bb, GetRegister(instr.gpr8), - static_cast<u32>(instr.ldg.immediate_offset.Value()), false); + TrackAndGetGlobalMemory(bb, instr, false); - const u32 count = GetUniformTypeElementsCount(instr.ldg.type); + const u32 count = GetUniformTypeElementsCount(type); for (u32 i = 0; i < count; ++i) { const Node it_offset = Immediate(i * 4); const Node real_address = @@ -165,28 +178,6 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { } break; } - case OpCode::Id::STG: { - const auto [real_address_base, base_address, descriptor] = - TrackAndGetGlobalMemory(bb, GetRegister(instr.gpr8), - static_cast<u32>(instr.stg.immediate_offset.Value()), true); - - // Encode in temporary registers like this: real_base_address, {registers_to_be_written...} - SetTemporal(bb, 0, real_address_base); - - const u32 count = GetUniformTypeElementsCount(instr.stg.type); - for (u32 i = 0; i < count; ++i) { - SetTemporal(bb, i + 1, GetRegister(instr.gpr0.Value() + i)); - } - for (u32 i = 0; i < count; ++i) { - const Node it_offset = Immediate(i * 4); - const Node real_address = - Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); - const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor)); - - bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporal(i + 1))); - } - break; - } case OpCode::Id::ST_A: { UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, "Indirect attribute loads are not supported"); @@ -242,6 +233,41 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { } break; } + case OpCode::Id::ST: + case OpCode::Id::STG: { + const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType { + switch (opcode->get().GetId()) { + case OpCode::Id::ST: + UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended ST is not implemented"); + return instr.generic.type; + case OpCode::Id::STG: + return instr.stg.type; + default: + UNREACHABLE(); + return {}; + } + }(); + + const auto [real_address_base, base_address, descriptor] = + TrackAndGetGlobalMemory(bb, instr, true); + + // Encode in temporary registers like this: real_base_address, {registers_to_be_written...} + SetTemporal(bb, 0, real_address_base); + + const u32 count = GetUniformTypeElementsCount(type); + for (u32 i = 0; i < count; ++i) { + SetTemporal(bb, i + 1, GetRegister(instr.gpr0.Value() + i)); + } + for (u32 i = 0; i < count; ++i) { + const Node it_offset = Immediate(i * 4); + const Node real_address = + Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); + const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor)); + + bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporal(i + 1))); + } + break; + } case OpCode::Id::AL2P: { // Ignore al2p.direction since we don't care about it. @@ -265,9 +291,11 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { } std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackAndGetGlobalMemory(NodeBlock& bb, - Node addr_register, - u32 immediate_offset, + Instruction instr, bool is_write) { + const auto addr_register{GetRegister(instr.gmem.gpr)}; + const auto immediate_offset{static_cast<u32>(instr.gmem.offset)}; + const Node base_address{ TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()))}; const auto cbuf = std::get_if<CbufNode>(base_address); diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index fa17c45b5..ca7af72e1 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -4,6 +4,7 @@ #include "common/assert.h" #include "common/common_types.h" +#include "common/logging/log.h" #include "video_core/engines/shader_bytecode.h" #include "video_core/shader/shader_ir.h" diff --git a/src/video_core/shader/decode/predicate_set_predicate.cpp b/src/video_core/shader/decode/predicate_set_predicate.cpp index 83c61680e..71844c42b 100644 --- a/src/video_core/shader/decode/predicate_set_predicate.cpp +++ b/src/video_core/shader/decode/predicate_set_predicate.cpp @@ -64,4 +64,4 @@ u32 ShaderIR::DecodePredicateSetPredicate(NodeBlock& bb, u32 pc) { return pc; } -} // namespace VideoCommon::Shader
\ No newline at end of file +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/predicate_set_register.cpp b/src/video_core/shader/decode/predicate_set_register.cpp index d0495995d..387491bd3 100644 --- a/src/video_core/shader/decode/predicate_set_register.cpp +++ b/src/video_core/shader/decode/predicate_set_register.cpp @@ -43,4 +43,4 @@ u32 ShaderIR::DecodePredicateSetRegister(NodeBlock& bb, u32 pc) { return pc; } -} // namespace VideoCommon::Shader
\ No newline at end of file +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/register_set_predicate.cpp b/src/video_core/shader/decode/register_set_predicate.cpp index f070e8912..f8659e48e 100644 --- a/src/video_core/shader/decode/register_set_predicate.cpp +++ b/src/video_core/shader/decode/register_set_predicate.cpp @@ -48,4 +48,4 @@ u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) { return pc; } -} // namespace VideoCommon::Shader
\ No newline at end of file +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp index 951e85f44..44ae87ece 100644 --- a/src/video_core/shader/decode/shift.cpp +++ b/src/video_core/shader/decode/shift.cpp @@ -52,4 +52,4 @@ u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) { return pc; } -} // namespace VideoCommon::Shader
\ No newline at end of file +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp index 956c01d9b..cb9ab72b1 100644 --- a/src/video_core/shader/decode/video.cpp +++ b/src/video_core/shader/decode/video.cpp @@ -108,4 +108,4 @@ Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed, } } -} // namespace VideoCommon::Shader
\ No newline at end of file +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index 153ad1fd0..8a6ee5cf5 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -39,8 +39,8 @@ Node ShaderIR::Conditional(Node condition, std::vector<Node>&& code) { return StoreNode(ConditionalNode(condition, std::move(code))); } -Node ShaderIR::Comment(const std::string& text) { - return StoreNode(CommentNode(text)); +Node ShaderIR::Comment(std::string text) { + return StoreNode(CommentNode(std::move(text))); } Node ShaderIR::Immediate(u32 value) { diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 0bf124252..35f72bddb 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -663,7 +663,7 @@ private: /// Creates a conditional node Node Conditional(Node condition, std::vector<Node>&& code); /// Creates a commentary - Node Comment(const std::string& text); + Node Comment(std::string text); /// Creates an u32 immediate Node Immediate(u32 value); /// Creates a s32 immediate @@ -818,10 +818,8 @@ private: std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor) const; - std::tuple<Node, Node, GlobalMemoryBase> TrackAndGetGlobalMemory(NodeBlock& bb, - Node addr_register, - u32 immediate_offset, - bool is_write); + std::tuple<Node, Node, GlobalMemoryBase> TrackAndGetGlobalMemory( + NodeBlock& bb, Tegra::Shader::Instruction instr, bool is_write); template <typename... T> Node Operation(OperationCode code, const T*... operands) { |