diff options
Diffstat (limited to 'src/video_core')
26 files changed, 1113 insertions, 145 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 49dc5abe0..930b605af 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -231,18 +231,6 @@ enum class AtomicOp : u64 { Or = 6, Xor = 7, Exch = 8, -}; - -enum class GlobalAtomicOp : u64 { - Add = 0, - Min = 1, - Max = 2, - Inc = 3, - Dec = 4, - And = 5, - Or = 6, - Xor = 7, - Exch = 8, SafeAdd = 10, }; @@ -1001,7 +989,7 @@ union Instruction { } stg; union { - BitField<52, 4, GlobalAtomicOp> operation; + BitField<52, 4, AtomicOp> operation; BitField<49, 3, GlobalAtomicType> type; BitField<28, 20, s64> offset; } atom; diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index e8f763ce9..8acf2eda2 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -7,6 +7,7 @@ #include "core/core.h" #include "core/core_timing.h" #include "core/core_timing_util.h" +#include "core/frontend/emu_window.h" #include "core/memory.h" #include "video_core/engines/fermi_2d.h" #include "video_core/engines/kepler_compute.h" @@ -16,14 +17,15 @@ #include "video_core/gpu.h" #include "video_core/memory_manager.h" #include "video_core/renderer_base.h" +#include "video_core/video_core.h" namespace Tegra { MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); -GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async) - : system{system}, renderer{renderer}, is_async{is_async} { - auto& rasterizer{renderer.Rasterizer()}; +GPU::GPU(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer_, bool is_async) + : system{system}, renderer{std::move(renderer_)}, is_async{is_async} { + auto& rasterizer{renderer->Rasterizer()}; memory_manager = std::make_unique<Tegra::MemoryManager>(system, rasterizer); dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager); @@ -137,7 +139,7 @@ u64 GPU::GetTicks() const { } void GPU::FlushCommands() { - renderer.Rasterizer().FlushCommands(); + renderer->Rasterizer().FlushCommands(); } // Note that, traditionally, methods are treated as 4-byte addressable locations, and hence diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 64acb17df..ced9d7e28 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -25,8 +25,11 @@ inline u8* FromCacheAddr(CacheAddr cache_addr) { } namespace Core { -class System; +namespace Frontend { +class EmuWindow; } +class System; +} // namespace Core namespace VideoCore { class RendererBase; @@ -129,7 +132,8 @@ class MemoryManager; class GPU { public: - explicit GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async); + explicit GPU(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer, + bool is_async); virtual ~GPU(); @@ -174,6 +178,14 @@ public: /// Returns a reference to the GPU DMA pusher. Tegra::DmaPusher& DmaPusher(); + VideoCore::RendererBase& Renderer() { + return *renderer; + } + + const VideoCore::RendererBase& Renderer() const { + return *renderer; + } + // Waits for the GPU to finish working virtual void WaitIdle() const = 0; @@ -287,7 +299,7 @@ private: protected: std::unique_ptr<Tegra::DmaPusher> dma_pusher; Core::System& system; - VideoCore::RendererBase& renderer; + std::unique_ptr<VideoCore::RendererBase> renderer; private: std::unique_ptr<Tegra::MemoryManager> memory_manager; diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp index 04222d060..925be8d7b 100644 --- a/src/video_core/gpu_asynch.cpp +++ b/src/video_core/gpu_asynch.cpp @@ -10,13 +10,16 @@ namespace VideoCommon { -GPUAsynch::GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer) - : GPU(system, renderer, true), gpu_thread{system} {} +GPUAsynch::GPUAsynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer_, + std::unique_ptr<Core::Frontend::GraphicsContext>&& context) + : GPU(system, std::move(renderer_), true), gpu_thread{system}, gpu_context(std::move(context)), + cpu_context(renderer->GetRenderWindow().CreateSharedContext()) {} GPUAsynch::~GPUAsynch() = default; void GPUAsynch::Start() { - gpu_thread.StartThread(renderer, *dma_pusher); + cpu_context->MakeCurrent(); + gpu_thread.StartThread(*renderer, *gpu_context, *dma_pusher); } void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) { diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h index 1241ade1d..265c62758 100644 --- a/src/video_core/gpu_asynch.h +++ b/src/video_core/gpu_asynch.h @@ -7,6 +7,10 @@ #include "video_core/gpu.h" #include "video_core/gpu_thread.h" +namespace Core::Frontend { +class GraphicsContext; +} + namespace VideoCore { class RendererBase; } // namespace VideoCore @@ -16,7 +20,8 @@ namespace VideoCommon { /// Implementation of GPU interface that runs the GPU asynchronously class GPUAsynch final : public Tegra::GPU { public: - explicit GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer); + explicit GPUAsynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer, + std::unique_ptr<Core::Frontend::GraphicsContext>&& context); ~GPUAsynch() override; void Start() override; @@ -32,6 +37,8 @@ protected: private: GPUThread::ThreadManager gpu_thread; + std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context; + std::unique_ptr<Core::Frontend::GraphicsContext> gpu_context; }; } // namespace VideoCommon diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp index d48221077..bd5278a5c 100644 --- a/src/video_core/gpu_synch.cpp +++ b/src/video_core/gpu_synch.cpp @@ -7,12 +7,15 @@ namespace VideoCommon { -GPUSynch::GPUSynch(Core::System& system, VideoCore::RendererBase& renderer) - : GPU(system, renderer, false) {} +GPUSynch::GPUSynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer, + std::unique_ptr<Core::Frontend::GraphicsContext>&& context) + : GPU(system, std::move(renderer), false), context{std::move(context)} {} GPUSynch::~GPUSynch() = default; -void GPUSynch::Start() {} +void GPUSynch::Start() { + context->MakeCurrent(); +} void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) { dma_pusher->Push(std::move(entries)); @@ -20,19 +23,19 @@ void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) { } void GPUSynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { - renderer.SwapBuffers(framebuffer); + renderer->SwapBuffers(framebuffer); } void GPUSynch::FlushRegion(CacheAddr addr, u64 size) { - renderer.Rasterizer().FlushRegion(addr, size); + renderer->Rasterizer().FlushRegion(addr, size); } void GPUSynch::InvalidateRegion(CacheAddr addr, u64 size) { - renderer.Rasterizer().InvalidateRegion(addr, size); + renderer->Rasterizer().InvalidateRegion(addr, size); } void GPUSynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { - renderer.Rasterizer().FlushAndInvalidateRegion(addr, size); + renderer->Rasterizer().FlushAndInvalidateRegion(addr, size); } } // namespace VideoCommon diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h index c71baee89..866a94c8c 100644 --- a/src/video_core/gpu_synch.h +++ b/src/video_core/gpu_synch.h @@ -6,6 +6,10 @@ #include "video_core/gpu.h" +namespace Core::Frontend { +class GraphicsContext; +} + namespace VideoCore { class RendererBase; } // namespace VideoCore @@ -15,7 +19,8 @@ namespace VideoCommon { /// Implementation of GPU interface that runs the GPU synchronously class GPUSynch final : public Tegra::GPU { public: - explicit GPUSynch(Core::System& system, VideoCore::RendererBase& renderer); + explicit GPUSynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer, + std::unique_ptr<Core::Frontend::GraphicsContext>&& context); ~GPUSynch() override; void Start() override; @@ -29,6 +34,9 @@ public: protected: void TriggerCpuInterrupt([[maybe_unused]] u32 syncpoint_id, [[maybe_unused]] u32 value) const override {} + +private: + std::unique_ptr<Core::Frontend::GraphicsContext> context; }; } // namespace VideoCommon diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index b1088af3d..270c7ae0d 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -5,7 +5,7 @@ #include "common/assert.h" #include "common/microprofile.h" #include "core/core.h" -#include "core/frontend/scope_acquire_context.h" +#include "core/frontend/emu_window.h" #include "video_core/dma_pusher.h" #include "video_core/gpu.h" #include "video_core/gpu_thread.h" @@ -14,8 +14,8 @@ namespace VideoCommon::GPUThread { /// Runs the GPU thread -static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher, - SynchState& state) { +static void RunThread(VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context, + Tegra::DmaPusher& dma_pusher, SynchState& state) { MicroProfileOnThreadCreate("GpuThread"); // Wait for first GPU command before acquiring the window context @@ -27,7 +27,7 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p return; } - Core::Frontend::ScopeAcquireContext acquire_context{renderer.GetRenderWindow()}; + auto current_context = context.Acquire(); CommandDataContainer next; while (state.is_running) { @@ -62,8 +62,11 @@ ThreadManager::~ThreadManager() { thread.join(); } -void ThreadManager::StartThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher) { - thread = std::thread{RunThread, std::ref(renderer), std::ref(dma_pusher), std::ref(state)}; +void ThreadManager::StartThread(VideoCore::RendererBase& renderer, + Core::Frontend::GraphicsContext& context, + Tegra::DmaPusher& dma_pusher) { + thread = std::thread{RunThread, std::ref(renderer), std::ref(context), std::ref(dma_pusher), + std::ref(state)}; } void ThreadManager::SubmitList(Tegra::CommandList&& entries) { diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index 882e2d9c7..be36c580e 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h @@ -10,7 +10,6 @@ #include <optional> #include <thread> #include <variant> - #include "common/threadsafe_queue.h" #include "video_core/gpu.h" @@ -20,6 +19,9 @@ class DmaPusher; } // namespace Tegra namespace Core { +namespace Frontend { +class GraphicsContext; +} class System; } // namespace Core @@ -99,7 +101,8 @@ public: ~ThreadManager(); /// Creates and starts the GPU thread. - void StartThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher); + void StartThread(VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context, + Tegra::DmaPusher& dma_pusher); /// Push GPU command entries to be processed void SubmitList(Tegra::CommandList&& entries); diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h index 5ec99a126..1d85219b6 100644 --- a/src/video_core/renderer_base.h +++ b/src/video_core/renderer_base.h @@ -46,7 +46,8 @@ public: /// Draws the latest frame to the window waiting timeout_ms for a frame to arrive (Renderer /// specific implementation) - virtual void TryPresent(int timeout_ms) = 0; + /// Returns true if a frame was drawn + virtual bool TryPresent(int timeout_ms) = 0; // Getter/setter functions: // ------------------------ diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index e3d31c3eb..046ee55a5 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -327,8 +327,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, const auto worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin, std::size_t end) { - context->MakeCurrent(); - SCOPE_EXIT({ return context->DoneCurrent(); }); + const auto scope = context->Acquire(); for (std::size_t i = begin; i < end; ++i) { if (stop_loading) { diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 8aa4a7ac9..c7d24cf14 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -2114,6 +2114,10 @@ private: template <const std::string_view& opname, Type type> Expression Atomic(Operation operation) { + if ((opname == Func::Min || opname == Func::Max) && type == Type::Int) { + UNIMPLEMENTED_MSG("Unimplemented Min & Max for atomic operations"); + return {}; + } return {fmt::format("atomic{}({}, {})", opname, Visit(operation[0]).GetCode(), Visit(operation[1]).As(type)), type}; @@ -2307,6 +2311,8 @@ private: ~Func() = delete; static constexpr std::string_view Add = "Add"; + static constexpr std::string_view Min = "Min"; + static constexpr std::string_view Max = "Max"; static constexpr std::string_view And = "And"; static constexpr std::string_view Or = "Or"; static constexpr std::string_view Xor = "Xor"; @@ -2457,7 +2463,21 @@ private: &GLSLDecompiler::AtomicImage<Func::Xor>, &GLSLDecompiler::AtomicImage<Func::Exchange>, + &GLSLDecompiler::Atomic<Func::Exchange, Type::Uint>, &GLSLDecompiler::Atomic<Func::Add, Type::Uint>, + &GLSLDecompiler::Atomic<Func::Min, Type::Uint>, + &GLSLDecompiler::Atomic<Func::Max, Type::Uint>, + &GLSLDecompiler::Atomic<Func::And, Type::Uint>, + &GLSLDecompiler::Atomic<Func::Or, Type::Uint>, + &GLSLDecompiler::Atomic<Func::Xor, Type::Uint>, + + &GLSLDecompiler::Atomic<Func::Exchange, Type::Int>, + &GLSLDecompiler::Atomic<Func::Add, Type::Int>, + &GLSLDecompiler::Atomic<Func::Min, Type::Int>, + &GLSLDecompiler::Atomic<Func::Max, Type::Int>, + &GLSLDecompiler::Atomic<Func::And, Type::Int>, + &GLSLDecompiler::Atomic<Func::Or, Type::Int>, + &GLSLDecompiler::Atomic<Func::Xor, Type::Int>, &GLSLDecompiler::Branch, &GLSLDecompiler::BranchIndirect, diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index fca5e3ec0..f1a28cc21 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -30,8 +30,6 @@ namespace OpenGL { namespace { -// If the size of this is too small, it ends up creating a soft cap on FPS as the renderer will have -// to wait on available presentation frames. constexpr std::size_t SWAP_CHAIN_SIZE = 3; struct Frame { @@ -214,7 +212,7 @@ public: std::deque<Frame*> present_queue; Frame* previous_frame{}; - FrameMailbox() : has_debug_tool{HasDebugTool()} { + FrameMailbox() { for (auto& frame : swap_chain) { free_queue.push(&frame); } @@ -285,13 +283,9 @@ public: std::unique_lock lock{swap_chain_lock}; present_queue.push_front(frame); present_cv.notify_one(); - - DebugNotifyNextFrame(); } Frame* TryGetPresentFrame(int timeout_ms) { - DebugWaitForNextFrame(); - std::unique_lock lock{swap_chain_lock}; // wait for new entries in the present_queue present_cv.wait_for(lock, std::chrono::milliseconds(timeout_ms), @@ -317,38 +311,12 @@ public: previous_frame = frame; return frame; } - -private: - std::mutex debug_synch_mutex; - std::condition_variable debug_synch_condition; - std::atomic_int frame_for_debug{}; - const bool has_debug_tool; // When true, using a GPU debugger, so keep frames in lock-step - - /// Signal that a new frame is available (called from GPU thread) - void DebugNotifyNextFrame() { - if (!has_debug_tool) { - return; - } - frame_for_debug++; - std::lock_guard lock{debug_synch_mutex}; - debug_synch_condition.notify_one(); - } - - /// Wait for a new frame to be available (called from presentation thread) - void DebugWaitForNextFrame() { - if (!has_debug_tool) { - return; - } - const int last_frame = frame_for_debug; - std::unique_lock lock{debug_synch_mutex}; - debug_synch_condition.wait(lock, - [this, last_frame] { return frame_for_debug > last_frame; }); - } }; -RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system) +RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system, + Core::Frontend::GraphicsContext& context) : VideoCore::RendererBase{emu_window}, emu_window{emu_window}, system{system}, - frame_mailbox{std::make_unique<FrameMailbox>()} {} + frame_mailbox{}, context{context}, has_debug_tool{HasDebugTool()} {} RendererOpenGL::~RendererOpenGL() = default; @@ -356,8 +324,6 @@ MICROPROFILE_DEFINE(OpenGL_RenderFrame, "OpenGL", "Render Frame", MP_RGB(128, 12 MICROPROFILE_DEFINE(OpenGL_WaitPresent, "OpenGL", "Wait For Present", MP_RGB(128, 128, 128)); void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { - render_window.PollEvents(); - if (!framebuffer) { return; } @@ -413,6 +379,13 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { m_current_frame++; rasterizer->TickFrame(); } + + render_window.PollEvents(); + if (has_debug_tool) { + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); + Present(0); + context.SwapBuffers(); + } } void RendererOpenGL::PrepareRendertarget(const Tegra::FramebufferConfig* framebuffer) { @@ -480,6 +453,8 @@ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color } void RendererOpenGL::InitOpenGLObjects() { + frame_mailbox = std::make_unique<FrameMailbox>(); + glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue, 0.0f); @@ -692,12 +667,21 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); } -void RendererOpenGL::TryPresent(int timeout_ms) { +bool RendererOpenGL::TryPresent(int timeout_ms) { + if (has_debug_tool) { + LOG_DEBUG(Render_OpenGL, + "Skipping presentation because we are presenting on the main context"); + return false; + } + return Present(timeout_ms); +} + +bool RendererOpenGL::Present(int timeout_ms) { const auto& layout = render_window.GetFramebufferLayout(); auto frame = frame_mailbox->TryGetPresentFrame(timeout_ms); if (!frame) { LOG_DEBUG(Render_OpenGL, "TryGetPresentFrame returned no frame to present"); - return; + return false; } // Clearing before a full overwrite of a fbo can signal to drivers that they can avoid a @@ -725,6 +709,7 @@ void RendererOpenGL::TryPresent(int timeout_ms) { glFlush(); glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); + return true; } void RendererOpenGL::RenderScreenshot() { diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 33073ce5b..50b647661 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -55,13 +55,14 @@ class FrameMailbox; class RendererOpenGL final : public VideoCore::RendererBase { public: - explicit RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system); + explicit RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system, + Core::Frontend::GraphicsContext& context); ~RendererOpenGL() override; bool Init() override; void ShutDown() override; void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; - void TryPresent(int timeout_ms) override; + bool TryPresent(int timeout_ms) override; private: /// Initializes the OpenGL state and creates persistent objects. @@ -89,8 +90,11 @@ private: void PrepareRendertarget(const Tegra::FramebufferConfig* framebuffer); + bool Present(int timeout_ms); + Core::Frontend::EmuWindow& emu_window; Core::System& system; + Core::Frontend::GraphicsContext& context; StateTracker state_tracker{system}; @@ -115,6 +119,8 @@ private: /// Frame presentation mailbox std::unique_ptr<FrameMailbox> frame_mailbox; + + bool has_debug_tool = false; }; } // namespace OpenGL diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 42bb01418..6953aaafe 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -141,8 +141,9 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { render_window.PollEvents(); } -void RendererVulkan::TryPresent(int /*timeout_ms*/) { +bool RendererVulkan::TryPresent(int /*timeout_ms*/) { // TODO (bunnei): ImplementMe + return true; } bool RendererVulkan::Init() { diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index 3da08d2e4..d14384e79 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -42,7 +42,7 @@ public: bool Init() override; void ShutDown() override; void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; - void TryPresent(int timeout_ms) override; + bool TryPresent(int timeout_ms) override; private: std::optional<vk::DebugUtilsMessengerEXT> CreateDebugCallback( diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 51ecb5567..d67f08cf9 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -1941,7 +1941,11 @@ private: return {}; } - Expression AtomicAdd(Operation operation) { + template <Id (Module::*func)(Id, Id, Id, Id, Id), Type result_type, + Type value_type = result_type> + Expression Atomic(Operation operation) { + const Id type_def = GetTypeDefinition(result_type); + Id pointer; if (const auto smem = std::get_if<SmemNode>(&*operation[0])) { pointer = GetSharedMemoryPointer(*smem); @@ -1949,14 +1953,15 @@ private: pointer = GetGlobalMemoryPointer(*gmem); } else { UNREACHABLE(); - return {Constant(t_uint, 0), Type::Uint}; + return {Constant(type_def, 0), result_type}; } + const Id value = As(Visit(operation[1]), value_type); + const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device)); - const Id semantics = Constant(t_uint, 0U); + const Id semantics = Constant(type_def, 0); - const Id value = AsUint(Visit(operation[1])); - return {OpAtomicIAdd(t_uint, pointer, scope, semantics, value), Type::Uint}; + return {(this->*func)(type_def, pointer, scope, semantics, value), result_type}; } Expression Branch(Operation operation) { @@ -2545,7 +2550,21 @@ private: &SPIRVDecompiler::AtomicImageXor, &SPIRVDecompiler::AtomicImageExchange, - &SPIRVDecompiler::AtomicAdd, + &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange, Type::Uint>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd, Type::Uint>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicUMin, Type::Uint>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicUMax, Type::Uint>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicAnd, Type::Uint>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicOr, Type::Uint>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicXor, Type::Uint>, + + &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange, Type::Int>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd, Type::Int>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicSMin, Type::Int>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicSMax, Type::Int>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicAnd, Type::Int>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicOr, Type::Int>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicXor, Type::Int>, &SPIRVDecompiler::Branch, &SPIRVDecompiler::BranchIndirect, diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/renderer_vulkan/wrapper.cpp index c412b7f20..9b94dfff1 100644 --- a/src/video_core/renderer_vulkan/wrapper.cpp +++ b/src/video_core/renderer_vulkan/wrapper.cpp @@ -339,4 +339,412 @@ VkResult Free(VkDevice device, VkCommandPool handle, Span<VkCommandBuffer> buffe return VK_SUCCESS; } +Instance Instance::Create(Span<const char*> layers, Span<const char*> extensions, + InstanceDispatch& dld) noexcept { + VkApplicationInfo application_info; + application_info.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO; + application_info.pNext = nullptr; + application_info.pApplicationName = "yuzu Emulator"; + application_info.applicationVersion = VK_MAKE_VERSION(0, 1, 0); + application_info.pEngineName = "yuzu Emulator"; + application_info.engineVersion = VK_MAKE_VERSION(0, 1, 0); + application_info.apiVersion = VK_API_VERSION_1_1; + + VkInstanceCreateInfo ci; + ci.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; + ci.pNext = nullptr; + ci.flags = 0; + ci.pApplicationInfo = &application_info; + ci.enabledLayerCount = layers.size(); + ci.ppEnabledLayerNames = layers.data(); + ci.enabledExtensionCount = extensions.size(); + ci.ppEnabledExtensionNames = extensions.data(); + + VkInstance instance; + if (dld.vkCreateInstance(&ci, nullptr, &instance) != VK_SUCCESS) { + // Failed to create the instance. + return {}; + } + if (!Proc(dld.vkDestroyInstance, dld, "vkDestroyInstance", instance)) { + // We successfully created an instance but the destroy function couldn't be loaded. + // This is a good moment to panic. + return {}; + } + + return Instance(instance, dld); +} + +std::optional<std::vector<VkPhysicalDevice>> Instance::EnumeratePhysicalDevices() { + u32 num; + if (dld->vkEnumeratePhysicalDevices(handle, &num, nullptr) != VK_SUCCESS) { + return std::nullopt; + } + std::vector<VkPhysicalDevice> physical_devices(num); + if (dld->vkEnumeratePhysicalDevices(handle, &num, physical_devices.data()) != VK_SUCCESS) { + return std::nullopt; + } + return physical_devices; +} + +DebugCallback Instance::TryCreateDebugCallback( + PFN_vkDebugUtilsMessengerCallbackEXT callback) noexcept { + VkDebugUtilsMessengerCreateInfoEXT ci; + ci.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT; + ci.pNext = nullptr; + ci.flags = 0; + ci.messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT; + ci.messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT; + ci.pfnUserCallback = callback; + ci.pUserData = nullptr; + + VkDebugUtilsMessengerEXT messenger; + if (dld->vkCreateDebugUtilsMessengerEXT(handle, &ci, nullptr, &messenger) != VK_SUCCESS) { + return {}; + } + return DebugCallback(messenger, handle, *dld); +} + +std::vector<VkCheckpointDataNV> Queue::GetCheckpointDataNV(const DeviceDispatch& dld) const { + if (!dld.vkGetQueueCheckpointDataNV) { + return {}; + } + u32 num; + dld.vkGetQueueCheckpointDataNV(queue, &num, nullptr); + std::vector<VkCheckpointDataNV> checkpoints(num); + dld.vkGetQueueCheckpointDataNV(queue, &num, checkpoints.data()); + return checkpoints; +} + +void Buffer::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const { + Check(dld->vkBindBufferMemory(owner, handle, memory, offset)); +} + +void Image::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const { + Check(dld->vkBindImageMemory(owner, handle, memory, offset)); +} + +DescriptorSets DescriptorPool::Allocate(const VkDescriptorSetAllocateInfo& ai) const { + const std::size_t num = ai.descriptorSetCount; + std::unique_ptr sets = std::make_unique<VkDescriptorSet[]>(num); + switch (const VkResult result = dld->vkAllocateDescriptorSets(owner, &ai, sets.get())) { + case VK_SUCCESS: + return DescriptorSets(std::move(sets), num, owner, handle, *dld); + case VK_ERROR_OUT_OF_POOL_MEMORY: + return {}; + default: + throw Exception(result); + } +} + +CommandBuffers CommandPool::Allocate(std::size_t num_buffers, VkCommandBufferLevel level) const { + VkCommandBufferAllocateInfo ai; + ai.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; + ai.pNext = nullptr; + ai.commandPool = handle; + ai.level = level; + ai.commandBufferCount = static_cast<u32>(num_buffers); + + std::unique_ptr buffers = std::make_unique<VkCommandBuffer[]>(num_buffers); + switch (const VkResult result = dld->vkAllocateCommandBuffers(owner, &ai, buffers.get())) { + case VK_SUCCESS: + return CommandBuffers(std::move(buffers), num_buffers, owner, handle, *dld); + case VK_ERROR_OUT_OF_POOL_MEMORY: + return {}; + default: + throw Exception(result); + } +} + +std::vector<VkImage> SwapchainKHR::GetImages() const { + u32 num; + Check(dld->vkGetSwapchainImagesKHR(owner, handle, &num, nullptr)); + std::vector<VkImage> images(num); + Check(dld->vkGetSwapchainImagesKHR(owner, handle, &num, images.data())); + return images; +} + +Device Device::Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreateInfo> queues_ci, + Span<const char*> enabled_extensions, + const VkPhysicalDeviceFeatures2& enabled_features, + DeviceDispatch& dld) noexcept { + VkDeviceCreateInfo ci; + ci.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; + ci.pNext = &enabled_features; + ci.flags = 0; + ci.queueCreateInfoCount = queues_ci.size(); + ci.pQueueCreateInfos = queues_ci.data(); + ci.enabledLayerCount = 0; + ci.ppEnabledLayerNames = nullptr; + ci.enabledExtensionCount = enabled_extensions.size(); + ci.ppEnabledExtensionNames = enabled_extensions.data(); + ci.pEnabledFeatures = nullptr; + + VkDevice device; + if (dld.vkCreateDevice(physical_device, &ci, nullptr, &device) != VK_SUCCESS) { + return {}; + } + Load(device, dld); + return Device(device, dld); +} + +Queue Device::GetQueue(u32 family_index) const noexcept { + VkQueue queue; + dld->vkGetDeviceQueue(handle, family_index, 0, &queue); + return Queue(queue, *dld); +} + +Buffer Device::CreateBuffer(const VkBufferCreateInfo& ci) const { + VkBuffer object; + Check(dld->vkCreateBuffer(handle, &ci, nullptr, &object)); + return Buffer(object, handle, *dld); +} + +BufferView Device::CreateBufferView(const VkBufferViewCreateInfo& ci) const { + VkBufferView object; + Check(dld->vkCreateBufferView(handle, &ci, nullptr, &object)); + return BufferView(object, handle, *dld); +} + +Image Device::CreateImage(const VkImageCreateInfo& ci) const { + VkImage object; + Check(dld->vkCreateImage(handle, &ci, nullptr, &object)); + return Image(object, handle, *dld); +} + +ImageView Device::CreateImageView(const VkImageViewCreateInfo& ci) const { + VkImageView object; + Check(dld->vkCreateImageView(handle, &ci, nullptr, &object)); + return ImageView(object, handle, *dld); +} + +Semaphore Device::CreateSemaphore() const { + VkSemaphoreCreateInfo ci; + ci.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; + ci.pNext = nullptr; + ci.flags = 0; + + VkSemaphore object; + Check(dld->vkCreateSemaphore(handle, &ci, nullptr, &object)); + return Semaphore(object, handle, *dld); +} + +Fence Device::CreateFence(const VkFenceCreateInfo& ci) const { + VkFence object; + Check(dld->vkCreateFence(handle, &ci, nullptr, &object)); + return Fence(object, handle, *dld); +} + +DescriptorPool Device::CreateDescriptorPool(const VkDescriptorPoolCreateInfo& ci) const { + VkDescriptorPool object; + Check(dld->vkCreateDescriptorPool(handle, &ci, nullptr, &object)); + return DescriptorPool(object, handle, *dld); +} + +RenderPass Device::CreateRenderPass(const VkRenderPassCreateInfo& ci) const { + VkRenderPass object; + Check(dld->vkCreateRenderPass(handle, &ci, nullptr, &object)); + return RenderPass(object, handle, *dld); +} + +DescriptorSetLayout Device::CreateDescriptorSetLayout( + const VkDescriptorSetLayoutCreateInfo& ci) const { + VkDescriptorSetLayout object; + Check(dld->vkCreateDescriptorSetLayout(handle, &ci, nullptr, &object)); + return DescriptorSetLayout(object, handle, *dld); +} + +PipelineLayout Device::CreatePipelineLayout(const VkPipelineLayoutCreateInfo& ci) const { + VkPipelineLayout object; + Check(dld->vkCreatePipelineLayout(handle, &ci, nullptr, &object)); + return PipelineLayout(object, handle, *dld); +} + +Pipeline Device::CreateGraphicsPipeline(const VkGraphicsPipelineCreateInfo& ci) const { + VkPipeline object; + Check(dld->vkCreateGraphicsPipelines(handle, nullptr, 1, &ci, nullptr, &object)); + return Pipeline(object, handle, *dld); +} + +Pipeline Device::CreateComputePipeline(const VkComputePipelineCreateInfo& ci) const { + VkPipeline object; + Check(dld->vkCreateComputePipelines(handle, nullptr, 1, &ci, nullptr, &object)); + return Pipeline(object, handle, *dld); +} + +Sampler Device::CreateSampler(const VkSamplerCreateInfo& ci) const { + VkSampler object; + Check(dld->vkCreateSampler(handle, &ci, nullptr, &object)); + return Sampler(object, handle, *dld); +} + +Framebuffer Device::CreateFramebuffer(const VkFramebufferCreateInfo& ci) const { + VkFramebuffer object; + Check(dld->vkCreateFramebuffer(handle, &ci, nullptr, &object)); + return Framebuffer(object, handle, *dld); +} + +CommandPool Device::CreateCommandPool(const VkCommandPoolCreateInfo& ci) const { + VkCommandPool object; + Check(dld->vkCreateCommandPool(handle, &ci, nullptr, &object)); + return CommandPool(object, handle, *dld); +} + +DescriptorUpdateTemplateKHR Device::CreateDescriptorUpdateTemplateKHR( + const VkDescriptorUpdateTemplateCreateInfoKHR& ci) const { + VkDescriptorUpdateTemplateKHR object; + Check(dld->vkCreateDescriptorUpdateTemplateKHR(handle, &ci, nullptr, &object)); + return DescriptorUpdateTemplateKHR(object, handle, *dld); +} + +QueryPool Device::CreateQueryPool(const VkQueryPoolCreateInfo& ci) const { + VkQueryPool object; + Check(dld->vkCreateQueryPool(handle, &ci, nullptr, &object)); + return QueryPool(object, handle, *dld); +} + +ShaderModule Device::CreateShaderModule(const VkShaderModuleCreateInfo& ci) const { + VkShaderModule object; + Check(dld->vkCreateShaderModule(handle, &ci, nullptr, &object)); + return ShaderModule(object, handle, *dld); +} + +SwapchainKHR Device::CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const { + VkSwapchainKHR object; + Check(dld->vkCreateSwapchainKHR(handle, &ci, nullptr, &object)); + return SwapchainKHR(object, handle, *dld); +} + +DeviceMemory Device::TryAllocateMemory(const VkMemoryAllocateInfo& ai) const noexcept { + VkDeviceMemory memory; + if (dld->vkAllocateMemory(handle, &ai, nullptr, &memory) != VK_SUCCESS) { + return {}; + } + return DeviceMemory(memory, handle, *dld); +} + +DeviceMemory Device::AllocateMemory(const VkMemoryAllocateInfo& ai) const { + VkDeviceMemory memory; + Check(dld->vkAllocateMemory(handle, &ai, nullptr, &memory)); + return DeviceMemory(memory, handle, *dld); +} + +VkMemoryRequirements Device::GetBufferMemoryRequirements(VkBuffer buffer) const noexcept { + VkMemoryRequirements requirements; + dld->vkGetBufferMemoryRequirements(handle, buffer, &requirements); + return requirements; +} + +VkMemoryRequirements Device::GetImageMemoryRequirements(VkImage image) const noexcept { + VkMemoryRequirements requirements; + dld->vkGetImageMemoryRequirements(handle, image, &requirements); + return requirements; +} + +void Device::UpdateDescriptorSets(Span<VkWriteDescriptorSet> writes, + Span<VkCopyDescriptorSet> copies) const noexcept { + dld->vkUpdateDescriptorSets(handle, writes.size(), writes.data(), copies.size(), copies.data()); +} + +VkPhysicalDeviceProperties PhysicalDevice::GetProperties() const noexcept { + VkPhysicalDeviceProperties properties; + dld->vkGetPhysicalDeviceProperties(physical_device, &properties); + return properties; +} + +void PhysicalDevice::GetProperties2KHR(VkPhysicalDeviceProperties2KHR& properties) const noexcept { + dld->vkGetPhysicalDeviceProperties2KHR(physical_device, &properties); +} + +VkPhysicalDeviceFeatures PhysicalDevice::GetFeatures() const noexcept { + VkPhysicalDeviceFeatures2KHR features2; + features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR; + features2.pNext = nullptr; + dld->vkGetPhysicalDeviceFeatures2KHR(physical_device, &features2); + return features2.features; +} + +void PhysicalDevice::GetFeatures2KHR(VkPhysicalDeviceFeatures2KHR& features) const noexcept { + dld->vkGetPhysicalDeviceFeatures2KHR(physical_device, &features); +} + +VkFormatProperties PhysicalDevice::GetFormatProperties(VkFormat format) const noexcept { + VkFormatProperties properties; + dld->vkGetPhysicalDeviceFormatProperties(physical_device, format, &properties); + return properties; +} + +std::vector<VkExtensionProperties> PhysicalDevice::EnumerateDeviceExtensionProperties() const { + u32 num; + dld->vkEnumerateDeviceExtensionProperties(physical_device, nullptr, &num, nullptr); + std::vector<VkExtensionProperties> properties(num); + dld->vkEnumerateDeviceExtensionProperties(physical_device, nullptr, &num, properties.data()); + return properties; +} + +std::vector<VkQueueFamilyProperties> PhysicalDevice::GetQueueFamilyProperties() const { + u32 num; + dld->vkGetPhysicalDeviceQueueFamilyProperties(physical_device, &num, nullptr); + std::vector<VkQueueFamilyProperties> properties(num); + dld->vkGetPhysicalDeviceQueueFamilyProperties(physical_device, &num, properties.data()); + return properties; +} + +bool PhysicalDevice::GetSurfaceSupportKHR(u32 queue_family_index, VkSurfaceKHR surface) const { + VkBool32 supported; + Check(dld->vkGetPhysicalDeviceSurfaceSupportKHR(physical_device, queue_family_index, surface, + &supported)); + return supported == VK_TRUE; +} + +VkSurfaceCapabilitiesKHR PhysicalDevice::GetSurfaceCapabilitiesKHR(VkSurfaceKHR surface) const + noexcept { + VkSurfaceCapabilitiesKHR capabilities; + Check(dld->vkGetPhysicalDeviceSurfaceCapabilitiesKHR(physical_device, surface, &capabilities)); + return capabilities; +} + +std::vector<VkSurfaceFormatKHR> PhysicalDevice::GetSurfaceFormatsKHR(VkSurfaceKHR surface) const { + u32 num; + Check(dld->vkGetPhysicalDeviceSurfaceFormatsKHR(physical_device, surface, &num, nullptr)); + std::vector<VkSurfaceFormatKHR> formats(num); + Check( + dld->vkGetPhysicalDeviceSurfaceFormatsKHR(physical_device, surface, &num, formats.data())); + return formats; +} + +std::vector<VkPresentModeKHR> PhysicalDevice::GetSurfacePresentModesKHR( + VkSurfaceKHR surface) const { + u32 num; + Check(dld->vkGetPhysicalDeviceSurfacePresentModesKHR(physical_device, surface, &num, nullptr)); + std::vector<VkPresentModeKHR> modes(num); + Check(dld->vkGetPhysicalDeviceSurfacePresentModesKHR(physical_device, surface, &num, + modes.data())); + return modes; +} + +VkPhysicalDeviceMemoryProperties PhysicalDevice::GetMemoryProperties() const noexcept { + VkPhysicalDeviceMemoryProperties properties; + dld->vkGetPhysicalDeviceMemoryProperties(physical_device, &properties); + return properties; +} + +std::optional<std::vector<VkExtensionProperties>> EnumerateInstanceExtensionProperties( + const InstanceDispatch& dld) { + u32 num; + if (dld.vkEnumerateInstanceExtensionProperties(nullptr, &num, nullptr) != VK_SUCCESS) { + return std::nullopt; + } + std::vector<VkExtensionProperties> properties(num); + if (dld.vkEnumerateInstanceExtensionProperties(nullptr, &num, properties.data()) != + VK_SUCCESS) { + return std::nullopt; + } + return properties; +} + } // namespace Vulkan::vk diff --git a/src/video_core/renderer_vulkan/wrapper.h b/src/video_core/renderer_vulkan/wrapper.h index 686c2b9a1..fb3657819 100644 --- a/src/video_core/renderer_vulkan/wrapper.h +++ b/src/video_core/renderer_vulkan/wrapper.h @@ -542,4 +542,446 @@ using SurfaceKHR = Handle<VkSurfaceKHR, VkInstance, InstanceDispatch>; using DescriptorSets = PoolAllocations<VkDescriptorSet, VkDescriptorPool>; using CommandBuffers = PoolAllocations<VkCommandBuffer, VkCommandPool>; +/// Vulkan instance owning handle. +class Instance : public Handle<VkInstance, NoOwner, InstanceDispatch> { + using Handle<VkInstance, NoOwner, InstanceDispatch>::Handle; + +public: + /// Creates a Vulkan instance. Use "operator bool" for error handling. + static Instance Create(Span<const char*> layers, Span<const char*> extensions, + InstanceDispatch& dld) noexcept; + + /// Enumerates physical devices. + /// @return Physical devices and an empty handle on failure. + std::optional<std::vector<VkPhysicalDevice>> EnumeratePhysicalDevices(); + + /// Tries to create a debug callback messenger. Returns an empty handle on failure. + DebugCallback TryCreateDebugCallback(PFN_vkDebugUtilsMessengerCallbackEXT callback) noexcept; +}; + +class Queue { +public: + /// Construct an empty queue handle. + constexpr Queue() noexcept = default; + + /// Construct a queue handle. + constexpr Queue(VkQueue queue, const DeviceDispatch& dld) noexcept : queue{queue}, dld{&dld} {} + + /// Returns the checkpoint data. + /// @note Returns an empty vector when the function pointer is not present. + std::vector<VkCheckpointDataNV> GetCheckpointDataNV(const DeviceDispatch& dld) const; + + void Submit(Span<VkSubmitInfo> submit_infos, VkFence fence) const { + Check(dld->vkQueueSubmit(queue, submit_infos.size(), submit_infos.data(), fence)); + } + + VkResult Present(const VkPresentInfoKHR& present_info) const noexcept { + return dld->vkQueuePresentKHR(queue, &present_info); + } + +private: + VkQueue queue = nullptr; + const DeviceDispatch* dld = nullptr; +}; + +class Buffer : public Handle<VkBuffer, VkDevice, DeviceDispatch> { + using Handle<VkBuffer, VkDevice, DeviceDispatch>::Handle; + +public: + /// Attaches a memory allocation. + void BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const; +}; + +class Image : public Handle<VkImage, VkDevice, DeviceDispatch> { + using Handle<VkImage, VkDevice, DeviceDispatch>::Handle; + +public: + /// Attaches a memory allocation. + void BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const; +}; + +class DeviceMemory : public Handle<VkDeviceMemory, VkDevice, DeviceDispatch> { + using Handle<VkDeviceMemory, VkDevice, DeviceDispatch>::Handle; + +public: + u8* Map(VkDeviceSize offset, VkDeviceSize size) const { + void* data; + Check(dld->vkMapMemory(owner, handle, offset, size, 0, &data)); + return static_cast<u8*>(data); + } + + void Unmap() const noexcept { + dld->vkUnmapMemory(owner, handle); + } +}; + +class Fence : public Handle<VkFence, VkDevice, DeviceDispatch> { + using Handle<VkFence, VkDevice, DeviceDispatch>::Handle; + +public: + VkResult Wait(u64 timeout = std::numeric_limits<u64>::max()) const noexcept { + return dld->vkWaitForFences(owner, 1, &handle, true, timeout); + } + + VkResult GetStatus() const noexcept { + return dld->vkGetFenceStatus(owner, handle); + } + + void Reset() const { + Check(dld->vkResetFences(owner, 1, &handle)); + } +}; + +class DescriptorPool : public Handle<VkDescriptorPool, VkDevice, DeviceDispatch> { + using Handle<VkDescriptorPool, VkDevice, DeviceDispatch>::Handle; + +public: + DescriptorSets Allocate(const VkDescriptorSetAllocateInfo& ai) const; +}; + +class CommandPool : public Handle<VkCommandPool, VkDevice, DeviceDispatch> { + using Handle<VkCommandPool, VkDevice, DeviceDispatch>::Handle; + +public: + CommandBuffers Allocate(std::size_t num_buffers, + VkCommandBufferLevel level = VK_COMMAND_BUFFER_LEVEL_PRIMARY) const; +}; + +class SwapchainKHR : public Handle<VkSwapchainKHR, VkDevice, DeviceDispatch> { + using Handle<VkSwapchainKHR, VkDevice, DeviceDispatch>::Handle; + +public: + std::vector<VkImage> GetImages() const; +}; + +class Device : public Handle<VkDevice, NoOwner, DeviceDispatch> { + using Handle<VkDevice, NoOwner, DeviceDispatch>::Handle; + +public: + static Device Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreateInfo> queues_ci, + Span<const char*> enabled_extensions, + const VkPhysicalDeviceFeatures2& enabled_features, + DeviceDispatch& dld) noexcept; + + Queue GetQueue(u32 family_index) const noexcept; + + Buffer CreateBuffer(const VkBufferCreateInfo& ci) const; + + BufferView CreateBufferView(const VkBufferViewCreateInfo& ci) const; + + Image CreateImage(const VkImageCreateInfo& ci) const; + + ImageView CreateImageView(const VkImageViewCreateInfo& ci) const; + + Semaphore CreateSemaphore() const; + + Fence CreateFence(const VkFenceCreateInfo& ci) const; + + DescriptorPool CreateDescriptorPool(const VkDescriptorPoolCreateInfo& ci) const; + + RenderPass CreateRenderPass(const VkRenderPassCreateInfo& ci) const; + + DescriptorSetLayout CreateDescriptorSetLayout(const VkDescriptorSetLayoutCreateInfo& ci) const; + + PipelineLayout CreatePipelineLayout(const VkPipelineLayoutCreateInfo& ci) const; + + Pipeline CreateGraphicsPipeline(const VkGraphicsPipelineCreateInfo& ci) const; + + Pipeline CreateComputePipeline(const VkComputePipelineCreateInfo& ci) const; + + Sampler CreateSampler(const VkSamplerCreateInfo& ci) const; + + Framebuffer CreateFramebuffer(const VkFramebufferCreateInfo& ci) const; + + CommandPool CreateCommandPool(const VkCommandPoolCreateInfo& ci) const; + + DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplateKHR( + const VkDescriptorUpdateTemplateCreateInfoKHR& ci) const; + + QueryPool CreateQueryPool(const VkQueryPoolCreateInfo& ci) const; + + ShaderModule CreateShaderModule(const VkShaderModuleCreateInfo& ci) const; + + SwapchainKHR CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const; + + DeviceMemory TryAllocateMemory(const VkMemoryAllocateInfo& ai) const noexcept; + + DeviceMemory AllocateMemory(const VkMemoryAllocateInfo& ai) const; + + VkMemoryRequirements GetBufferMemoryRequirements(VkBuffer buffer) const noexcept; + + VkMemoryRequirements GetImageMemoryRequirements(VkImage image) const noexcept; + + void UpdateDescriptorSets(Span<VkWriteDescriptorSet> writes, + Span<VkCopyDescriptorSet> copies) const noexcept; + + void UpdateDescriptorSet(VkDescriptorSet set, VkDescriptorUpdateTemplateKHR update_template, + const void* data) const noexcept { + dld->vkUpdateDescriptorSetWithTemplateKHR(handle, set, update_template, data); + } + + VkResult AcquireNextImageKHR(VkSwapchainKHR swapchain, u64 timeout, VkSemaphore semaphore, + VkFence fence, u32* image_index) const noexcept { + return dld->vkAcquireNextImageKHR(handle, swapchain, timeout, semaphore, fence, + image_index); + } + + VkResult WaitIdle() const noexcept { + return dld->vkDeviceWaitIdle(handle); + } + + void ResetQueryPoolEXT(VkQueryPool query_pool, u32 first, u32 count) const noexcept { + dld->vkResetQueryPoolEXT(handle, query_pool, first, count); + } + + void GetQueryResults(VkQueryPool query_pool, u32 first, u32 count, std::size_t data_size, + void* data, VkDeviceSize stride, VkQueryResultFlags flags) const { + Check(dld->vkGetQueryPoolResults(handle, query_pool, first, count, data_size, data, stride, + flags)); + } + + template <typename T> + T GetQueryResult(VkQueryPool query_pool, u32 first, VkQueryResultFlags flags) const { + static_assert(std::is_trivially_copyable_v<T>); + T value; + GetQueryResults(query_pool, first, 1, sizeof(T), &value, sizeof(T), flags); + return value; + } +}; + +class PhysicalDevice { +public: + constexpr PhysicalDevice() noexcept = default; + + constexpr PhysicalDevice(VkPhysicalDevice physical_device, const InstanceDispatch& dld) noexcept + : physical_device{physical_device}, dld{&dld} {} + + constexpr operator VkPhysicalDevice() const noexcept { + return physical_device; + } + + VkPhysicalDeviceProperties GetProperties() const noexcept; + + void GetProperties2KHR(VkPhysicalDeviceProperties2KHR&) const noexcept; + + VkPhysicalDeviceFeatures GetFeatures() const noexcept; + + void GetFeatures2KHR(VkPhysicalDeviceFeatures2KHR&) const noexcept; + + VkFormatProperties GetFormatProperties(VkFormat) const noexcept; + + std::vector<VkExtensionProperties> EnumerateDeviceExtensionProperties() const; + + std::vector<VkQueueFamilyProperties> GetQueueFamilyProperties() const; + + bool GetSurfaceSupportKHR(u32 queue_family_index, VkSurfaceKHR) const; + + VkSurfaceCapabilitiesKHR GetSurfaceCapabilitiesKHR(VkSurfaceKHR) const noexcept; + + std::vector<VkSurfaceFormatKHR> GetSurfaceFormatsKHR(VkSurfaceKHR) const; + + std::vector<VkPresentModeKHR> GetSurfacePresentModesKHR(VkSurfaceKHR) const; + + VkPhysicalDeviceMemoryProperties GetMemoryProperties() const noexcept; + +private: + VkPhysicalDevice physical_device = nullptr; + const InstanceDispatch* dld = nullptr; +}; + +class CommandBuffer { +public: + CommandBuffer() noexcept = default; + + explicit CommandBuffer(VkCommandBuffer handle, const DeviceDispatch& dld) noexcept + : handle{handle}, dld{&dld} {} + + const VkCommandBuffer* address() const noexcept { + return &handle; + } + + void Begin(const VkCommandBufferBeginInfo& begin_info) const { + Check(dld->vkBeginCommandBuffer(handle, &begin_info)); + } + + void End() const { + Check(dld->vkEndCommandBuffer(handle)); + } + + void BeginRenderPass(const VkRenderPassBeginInfo& renderpass_bi, + VkSubpassContents contents) const noexcept { + dld->vkCmdBeginRenderPass(handle, &renderpass_bi, contents); + } + + void EndRenderPass() const noexcept { + dld->vkCmdEndRenderPass(handle); + } + + void BeginQuery(VkQueryPool query_pool, u32 query, VkQueryControlFlags flags) const noexcept { + dld->vkCmdBeginQuery(handle, query_pool, query, flags); + } + + void EndQuery(VkQueryPool query_pool, u32 query) const noexcept { + dld->vkCmdEndQuery(handle, query_pool, query); + } + + void BindDescriptorSets(VkPipelineBindPoint bind_point, VkPipelineLayout layout, u32 first, + Span<VkDescriptorSet> sets, Span<u32> dynamic_offsets) const noexcept { + dld->vkCmdBindDescriptorSets(handle, bind_point, layout, first, sets.size(), sets.data(), + dynamic_offsets.size(), dynamic_offsets.data()); + } + + void BindPipeline(VkPipelineBindPoint bind_point, VkPipeline pipeline) const noexcept { + dld->vkCmdBindPipeline(handle, bind_point, pipeline); + } + + void BindIndexBuffer(VkBuffer buffer, VkDeviceSize offset, VkIndexType index_type) const + noexcept { + dld->vkCmdBindIndexBuffer(handle, buffer, offset, index_type); + } + + void BindVertexBuffers(u32 first, u32 count, const VkBuffer* buffers, + const VkDeviceSize* offsets) const noexcept { + dld->vkCmdBindVertexBuffers(handle, first, count, buffers, offsets); + } + + void BindVertexBuffer(u32 binding, VkBuffer buffer, VkDeviceSize offset) const noexcept { + BindVertexBuffers(binding, 1, &buffer, &offset); + } + + void Draw(u32 vertex_count, u32 instance_count, u32 first_vertex, u32 first_instance) const + noexcept { + dld->vkCmdDraw(handle, vertex_count, instance_count, first_vertex, first_instance); + } + + void DrawIndexed(u32 index_count, u32 instance_count, u32 first_index, u32 vertex_offset, + u32 first_instance) const noexcept { + dld->vkCmdDrawIndexed(handle, index_count, instance_count, first_index, vertex_offset, + first_instance); + } + + void ClearAttachments(Span<VkClearAttachment> attachments, Span<VkClearRect> rects) const + noexcept { + dld->vkCmdClearAttachments(handle, attachments.size(), attachments.data(), rects.size(), + rects.data()); + } + + void BlitImage(VkImage src_image, VkImageLayout src_layout, VkImage dst_image, + VkImageLayout dst_layout, Span<VkImageBlit> regions, VkFilter filter) const + noexcept { + dld->vkCmdBlitImage(handle, src_image, src_layout, dst_image, dst_layout, regions.size(), + regions.data(), filter); + } + + void Dispatch(u32 x, u32 y, u32 z) const noexcept { + dld->vkCmdDispatch(handle, x, y, z); + } + + void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask, + VkDependencyFlags dependency_flags, Span<VkMemoryBarrier> memory_barriers, + Span<VkBufferMemoryBarrier> buffer_barriers, + Span<VkImageMemoryBarrier> image_barriers) const noexcept { + dld->vkCmdPipelineBarrier(handle, src_stage_mask, dst_stage_mask, dependency_flags, + memory_barriers.size(), memory_barriers.data(), + buffer_barriers.size(), buffer_barriers.data(), + image_barriers.size(), image_barriers.data()); + } + + void CopyBufferToImage(VkBuffer src_buffer, VkImage dst_image, VkImageLayout dst_image_layout, + Span<VkBufferImageCopy> regions) const noexcept { + dld->vkCmdCopyBufferToImage(handle, src_buffer, dst_image, dst_image_layout, regions.size(), + regions.data()); + } + + void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer, Span<VkBufferCopy> regions) const + noexcept { + dld->vkCmdCopyBuffer(handle, src_buffer, dst_buffer, regions.size(), regions.data()); + } + + void CopyImage(VkImage src_image, VkImageLayout src_layout, VkImage dst_image, + VkImageLayout dst_layout, Span<VkImageCopy> regions) const noexcept { + dld->vkCmdCopyImage(handle, src_image, src_layout, dst_image, dst_layout, regions.size(), + regions.data()); + } + + void CopyImageToBuffer(VkImage src_image, VkImageLayout src_layout, VkBuffer dst_buffer, + Span<VkBufferImageCopy> regions) const noexcept { + dld->vkCmdCopyImageToBuffer(handle, src_image, src_layout, dst_buffer, regions.size(), + regions.data()); + } + + void FillBuffer(VkBuffer dst_buffer, VkDeviceSize dst_offset, VkDeviceSize size, u32 data) const + noexcept { + dld->vkCmdFillBuffer(handle, dst_buffer, dst_offset, size, data); + } + + void PushConstants(VkPipelineLayout layout, VkShaderStageFlags flags, u32 offset, u32 size, + const void* values) const noexcept { + dld->vkCmdPushConstants(handle, layout, flags, offset, size, values); + } + + void SetCheckpointNV(const void* checkpoint_marker) const noexcept { + dld->vkCmdSetCheckpointNV(handle, checkpoint_marker); + } + + void SetViewport(u32 first, Span<VkViewport> viewports) const noexcept { + dld->vkCmdSetViewport(handle, first, viewports.size(), viewports.data()); + } + + void SetScissor(u32 first, Span<VkRect2D> scissors) const noexcept { + dld->vkCmdSetScissor(handle, first, scissors.size(), scissors.data()); + } + + void SetBlendConstants(const float blend_constants[4]) const noexcept { + dld->vkCmdSetBlendConstants(handle, blend_constants); + } + + void SetStencilCompareMask(VkStencilFaceFlags face_mask, u32 compare_mask) const noexcept { + dld->vkCmdSetStencilCompareMask(handle, face_mask, compare_mask); + } + + void SetStencilReference(VkStencilFaceFlags face_mask, u32 reference) const noexcept { + dld->vkCmdSetStencilReference(handle, face_mask, reference); + } + + void SetStencilWriteMask(VkStencilFaceFlags face_mask, u32 write_mask) const noexcept { + dld->vkCmdSetStencilWriteMask(handle, face_mask, write_mask); + } + + void SetDepthBias(float constant_factor, float clamp, float slope_factor) const noexcept { + dld->vkCmdSetDepthBias(handle, constant_factor, clamp, slope_factor); + } + + void SetDepthBounds(float min_depth_bounds, float max_depth_bounds) const noexcept { + dld->vkCmdSetDepthBounds(handle, min_depth_bounds, max_depth_bounds); + } + + void BindTransformFeedbackBuffersEXT(u32 first, u32 count, const VkBuffer* buffers, + const VkDeviceSize* offsets, + const VkDeviceSize* sizes) const noexcept { + dld->vkCmdBindTransformFeedbackBuffersEXT(handle, first, count, buffers, offsets, sizes); + } + + void BeginTransformFeedbackEXT(u32 first_counter_buffer, u32 counter_buffers_count, + const VkBuffer* counter_buffers, + const VkDeviceSize* counter_buffer_offsets) const noexcept { + dld->vkCmdBeginTransformFeedbackEXT(handle, first_counter_buffer, counter_buffers_count, + counter_buffers, counter_buffer_offsets); + } + + void EndTransformFeedbackEXT(u32 first_counter_buffer, u32 counter_buffers_count, + const VkBuffer* counter_buffers, + const VkDeviceSize* counter_buffer_offsets) const noexcept { + dld->vkCmdEndTransformFeedbackEXT(handle, first_counter_buffer, counter_buffers_count, + counter_buffers, counter_buffer_offsets); + } + +private: + VkCommandBuffer handle; + const DeviceDispatch* dld; +}; + +std::optional<std::vector<VkExtensionProperties>> EnumerateInstanceExtensionProperties( + const InstanceDispatch& dld); + } // namespace Vulkan::vk diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index 2fe787d6f..0f4c3103a 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -235,34 +235,30 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { case OpCode::Id::LEA_IMM: case OpCode::Id::LEA_RZ: case OpCode::Id::LEA_HI: { - const auto [op_a, op_b, op_c] = [&]() -> std::tuple<Node, Node, Node> { + auto [op_a, op_b, op_c] = [&]() -> std::tuple<Node, Node, Node> { switch (opcode->get().GetId()) { case OpCode::Id::LEA_R2: { return {GetRegister(instr.gpr20), GetRegister(instr.gpr39), Immediate(static_cast<u32>(instr.lea.r2.entry_a))}; } - case OpCode::Id::LEA_R1: { const bool neg = instr.lea.r1.neg != 0; return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), GetRegister(instr.gpr20), Immediate(static_cast<u32>(instr.lea.r1.entry_a))}; } - case OpCode::Id::LEA_IMM: { const bool neg = instr.lea.imm.neg != 0; return {Immediate(static_cast<u32>(instr.lea.imm.entry_a)), GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), Immediate(static_cast<u32>(instr.lea.imm.entry_b))}; } - case OpCode::Id::LEA_RZ: { const bool neg = instr.lea.rz.neg != 0; return {GetConstBuffer(instr.lea.rz.cb_index, instr.lea.rz.cb_offset), GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), Immediate(static_cast<u32>(instr.lea.rz.entry_a))}; } - case OpCode::Id::LEA_HI: default: UNIMPLEMENTED_MSG("Unhandled LEA subinstruction: {}", opcode->get().GetName()); @@ -275,12 +271,9 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { UNIMPLEMENTED_IF_MSG(instr.lea.pred48 != static_cast<u64>(Pred::UnusedIndex), "Unhandled LEA Predicate"); - const Node shifted_c = - Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, Immediate(1), op_c); - const Node mul_bc = Operation(OperationCode::IMul, NO_PRECISE, op_b, shifted_c); - const Node value = Operation(OperationCode::IAdd, NO_PRECISE, op_a, mul_bc); - - SetRegister(bb, instr.gpr0, value); + Node value = Operation(OperationCode::ILogicalShiftLeft, std::move(op_a), std::move(op_c)); + value = Operation(OperationCode::IAdd, std::move(op_b), std::move(value)); + SetRegister(bb, instr.gpr0, std::move(value)); break; } diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp index 6ead42070..c72690b2b 100644 --- a/src/video_core/shader/decode/conversion.cpp +++ b/src/video_core/shader/decode/conversion.cpp @@ -138,18 +138,23 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); - value = [&]() { + value = [&] { + if (instr.conversion.src_size != instr.conversion.dst_size) { + // Rounding operations only matter when the source and destination conversion size + // is the same. + return value; + } switch (instr.conversion.f2f.GetRoundingMode()) { case Tegra::Shader::F2fRoundingOp::None: return value; case Tegra::Shader::F2fRoundingOp::Round: - return Operation(OperationCode::FRoundEven, PRECISE, value); + return Operation(OperationCode::FRoundEven, value); case Tegra::Shader::F2fRoundingOp::Floor: - return Operation(OperationCode::FFloor, PRECISE, value); + return Operation(OperationCode::FFloor, value); case Tegra::Shader::F2fRoundingOp::Ceil: - return Operation(OperationCode::FCeil, PRECISE, value); + return Operation(OperationCode::FCeil, value); case Tegra::Shader::F2fRoundingOp::Trunc: - return Operation(OperationCode::FTrunc, PRECISE, value); + return Operation(OperationCode::FTrunc, value); default: UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}", static_cast<u32>(instr.conversion.f2f.rounding.Value())); diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index b5fbc4d58..b8f63922f 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -19,7 +19,6 @@ namespace VideoCommon::Shader { using Tegra::Shader::AtomicOp; using Tegra::Shader::AtomicType; using Tegra::Shader::Attribute; -using Tegra::Shader::GlobalAtomicOp; using Tegra::Shader::GlobalAtomicType; using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; @@ -28,6 +27,31 @@ using Tegra::Shader::StoreType; namespace { +Node GetAtomOperation(AtomicOp op, bool is_signed, Node memory, Node data) { + const OperationCode operation_code = [op] { + switch (op) { + case AtomicOp::Add: + return OperationCode::AtomicIAdd; + case AtomicOp::Min: + return OperationCode::AtomicIMin; + case AtomicOp::Max: + return OperationCode::AtomicIMax; + case AtomicOp::And: + return OperationCode::AtomicIAnd; + case AtomicOp::Or: + return OperationCode::AtomicIOr; + case AtomicOp::Xor: + return OperationCode::AtomicIXor; + case AtomicOp::Exch: + return OperationCode::AtomicIExchange; + default: + UNIMPLEMENTED_MSG("op={}", static_cast<int>(op)); + return OperationCode::AtomicIAdd; + } + }(); + return SignedOperation(operation_code, is_signed, std::move(memory), std::move(data)); +} + bool IsUnaligned(Tegra::Shader::UniformType uniform_type) { return uniform_type == Tegra::Shader::UniformType::UnsignedByte || uniform_type == Tegra::Shader::UniformType::UnsignedShort; @@ -363,10 +387,13 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { break; } case OpCode::Id::ATOM: { - UNIMPLEMENTED_IF_MSG(instr.atom.operation != GlobalAtomicOp::Add, "operation={}", - static_cast<int>(instr.atom.operation.Value())); - UNIMPLEMENTED_IF_MSG(instr.atom.type != GlobalAtomicType::S32, "type={}", - static_cast<int>(instr.atom.type.Value())); + UNIMPLEMENTED_IF_MSG(instr.atom.operation == AtomicOp::Inc || + instr.atom.operation == AtomicOp::Dec || + instr.atom.operation == AtomicOp::SafeAdd, + "operation={}", static_cast<int>(instr.atom.operation.Value())); + UNIMPLEMENTED_IF_MSG(instr.atom.type == GlobalAtomicType::S64 || + instr.atom.type == GlobalAtomicType::U64, + "type={}", static_cast<int>(instr.atom.type.Value())); const auto [real_address, base_address, descriptor] = TrackGlobalMemory(bb, instr, true, true); @@ -375,25 +402,29 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { break; } + const bool is_signed = + instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64; Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); - Node value = Operation(OperationCode::AtomicAdd, std::move(gmem), GetRegister(instr.gpr20)); + Node value = GetAtomOperation(static_cast<AtomicOp>(instr.atom.operation), is_signed, gmem, + GetRegister(instr.gpr20)); SetRegister(bb, instr.gpr0, std::move(value)); break; } case OpCode::Id::ATOMS: { - UNIMPLEMENTED_IF_MSG(instr.atoms.operation != AtomicOp::Add, "operation={}", - static_cast<int>(instr.atoms.operation.Value())); - UNIMPLEMENTED_IF_MSG(instr.atoms.type != AtomicType::U32, "type={}", - static_cast<int>(instr.atoms.type.Value())); - + UNIMPLEMENTED_IF_MSG(instr.atoms.operation == AtomicOp::Inc || + instr.atoms.operation == AtomicOp::Dec, + "operation={}", static_cast<int>(instr.atoms.operation.Value())); + UNIMPLEMENTED_IF_MSG(instr.atoms.type == AtomicType::S64 || + instr.atoms.type == AtomicType::U64, + "type={}", static_cast<int>(instr.atoms.type.Value())); + const bool is_signed = + instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64; const s32 offset = instr.atoms.GetImmediateOffset(); Node address = GetRegister(instr.gpr8); address = Operation(OperationCode::IAdd, std::move(address), Immediate(offset)); - - Node memory = GetSharedMemory(std::move(address)); - Node data = GetRegister(instr.gpr20); - - Node value = Operation(OperationCode::AtomicAdd, std::move(memory), std::move(data)); + Node value = + GetAtomOperation(static_cast<AtomicOp>(instr.atoms.operation), is_signed, + GetSharedMemory(std::move(address)), GetRegister(instr.gpr20)); SetRegister(bb, instr.gpr0, std::move(value)); break; } diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index a1828546e..5fcc9da60 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -162,7 +162,21 @@ enum class OperationCode { AtomicImageXor, /// (MetaImage, int[N] coords) -> void AtomicImageExchange, /// (MetaImage, int[N] coords) -> void - AtomicAdd, /// (memory, {u}int) -> {u}int + AtomicUExchange, /// (memory, uint) -> uint + AtomicUAdd, /// (memory, uint) -> uint + AtomicUMin, /// (memory, uint) -> uint + AtomicUMax, /// (memory, uint) -> uint + AtomicUAnd, /// (memory, uint) -> uint + AtomicUOr, /// (memory, uint) -> uint + AtomicUXor, /// (memory, uint) -> uint + + AtomicIExchange, /// (memory, int) -> int + AtomicIAdd, /// (memory, int) -> int + AtomicIMin, /// (memory, int) -> int + AtomicIMax, /// (memory, int) -> int + AtomicIAnd, /// (memory, int) -> int + AtomicIOr, /// (memory, int) -> int + AtomicIXor, /// (memory, int) -> int Branch, /// (uint branch_target) -> void BranchIndirect, /// (uint branch_target) -> void diff --git a/src/video_core/shader/node_helper.cpp b/src/video_core/shader/node_helper.cpp index 76c56abb5..7bf4ff387 100644 --- a/src/video_core/shader/node_helper.cpp +++ b/src/video_core/shader/node_helper.cpp @@ -86,6 +86,20 @@ OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed) return OperationCode::LogicalUNotEqual; case OperationCode::LogicalIGreaterEqual: return OperationCode::LogicalUGreaterEqual; + case OperationCode::AtomicIExchange: + return OperationCode::AtomicUExchange; + case OperationCode::AtomicIAdd: + return OperationCode::AtomicUAdd; + case OperationCode::AtomicIMin: + return OperationCode::AtomicUMin; + case OperationCode::AtomicIMax: + return OperationCode::AtomicUMax; + case OperationCode::AtomicIAnd: + return OperationCode::AtomicUAnd; + case OperationCode::AtomicIOr: + return OperationCode::AtomicUOr; + case OperationCode::AtomicIXor: + return OperationCode::AtomicUXor; case OperationCode::INegate: UNREACHABLE_MSG("Can't negate an unsigned integer"); return {}; diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index a5f81a8a0..f60bdc60a 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp @@ -15,13 +15,13 @@ #endif #include "video_core/video_core.h" -namespace VideoCore { - -std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window, - Core::System& system) { +namespace { +std::unique_ptr<VideoCore::RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window, + Core::System& system, + Core::Frontend::GraphicsContext& context) { switch (Settings::values.renderer_backend) { case Settings::RendererBackend::OpenGL: - return std::make_unique<OpenGL::RendererOpenGL>(emu_window, system); + return std::make_unique<OpenGL::RendererOpenGL>(emu_window, system, context); #ifdef HAS_VULKAN case Settings::RendererBackend::Vulkan: return std::make_unique<Vulkan::RendererVulkan>(emu_window, system); @@ -30,13 +30,23 @@ std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_wind return nullptr; } } +} // Anonymous namespace -std::unique_ptr<Tegra::GPU> CreateGPU(Core::System& system) { - if (Settings::values.use_asynchronous_gpu_emulation) { - return std::make_unique<VideoCommon::GPUAsynch>(system, system.Renderer()); +namespace VideoCore { + +std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) { + auto context = emu_window.CreateSharedContext(); + const auto scope = context->Acquire(); + auto renderer = CreateRenderer(emu_window, system, *context); + if (!renderer->Init()) { + return nullptr; } - return std::make_unique<VideoCommon::GPUSynch>(system, system.Renderer()); + if (Settings::values.use_asynchronous_gpu_emulation) { + return std::make_unique<VideoCommon::GPUAsynch>(system, std::move(renderer), + std::move(context)); + } + return std::make_unique<VideoCommon::GPUSynch>(system, std::move(renderer), std::move(context)); } u16 GetResolutionScaleFactor(const RendererBase& renderer) { diff --git a/src/video_core/video_core.h b/src/video_core/video_core.h index b8e0ac372..f5c27125d 100644 --- a/src/video_core/video_core.h +++ b/src/video_core/video_core.h @@ -22,17 +22,8 @@ namespace VideoCore { class RendererBase; -/** - * Creates a renderer instance. - * - * @note The returned renderer instance is simply allocated. Its Init() - * function still needs to be called to fully complete its setup. - */ -std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window, - Core::System& system); - /// Creates an emulated GPU instance using the given system context. -std::unique_ptr<Tegra::GPU> CreateGPU(Core::System& system); +std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system); u16 GetResolutionScaleFactor(const RendererBase& renderer); |