summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/engines/shader_bytecode.h14
-rw-r--r--src/video_core/gpu.cpp10
-rw-r--r--src/video_core/gpu.h18
-rw-r--r--src/video_core/gpu_asynch.cpp9
-rw-r--r--src/video_core/gpu_asynch.h9
-rw-r--r--src/video_core/gpu_synch.cpp17
-rw-r--r--src/video_core/gpu_synch.h10
-rw-r--r--src/video_core/gpu_thread.cpp15
-rw-r--r--src/video_core/gpu_thread.h7
-rw-r--r--src/video_core/renderer_base.h3
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp3
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp20
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp65
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h10
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.cpp3
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.h2
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp31
-rw-r--r--src/video_core/renderer_vulkan/wrapper.cpp408
-rw-r--r--src/video_core/renderer_vulkan/wrapper.h442
-rw-r--r--src/video_core/shader/decode/arithmetic_integer.cpp15
-rw-r--r--src/video_core/shader/decode/conversion.cpp15
-rw-r--r--src/video_core/shader/decode/memory.cpp63
-rw-r--r--src/video_core/shader/node.h16
-rw-r--r--src/video_core/shader/node_helper.cpp14
-rw-r--r--src/video_core/video_core.cpp28
-rw-r--r--src/video_core/video_core.h11
26 files changed, 1113 insertions, 145 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 49dc5abe0..930b605af 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -231,18 +231,6 @@ enum class AtomicOp : u64 {
Or = 6,
Xor = 7,
Exch = 8,
-};
-
-enum class GlobalAtomicOp : u64 {
- Add = 0,
- Min = 1,
- Max = 2,
- Inc = 3,
- Dec = 4,
- And = 5,
- Or = 6,
- Xor = 7,
- Exch = 8,
SafeAdd = 10,
};
@@ -1001,7 +989,7 @@ union Instruction {
} stg;
union {
- BitField<52, 4, GlobalAtomicOp> operation;
+ BitField<52, 4, AtomicOp> operation;
BitField<49, 3, GlobalAtomicType> type;
BitField<28, 20, s64> offset;
} atom;
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index e8f763ce9..8acf2eda2 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -7,6 +7,7 @@
#include "core/core.h"
#include "core/core_timing.h"
#include "core/core_timing_util.h"
+#include "core/frontend/emu_window.h"
#include "core/memory.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/engines/kepler_compute.h"
@@ -16,14 +17,15 @@
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
#include "video_core/renderer_base.h"
+#include "video_core/video_core.h"
namespace Tegra {
MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
-GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async)
- : system{system}, renderer{renderer}, is_async{is_async} {
- auto& rasterizer{renderer.Rasterizer()};
+GPU::GPU(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer_, bool is_async)
+ : system{system}, renderer{std::move(renderer_)}, is_async{is_async} {
+ auto& rasterizer{renderer->Rasterizer()};
memory_manager = std::make_unique<Tegra::MemoryManager>(system, rasterizer);
dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
@@ -137,7 +139,7 @@ u64 GPU::GetTicks() const {
}
void GPU::FlushCommands() {
- renderer.Rasterizer().FlushCommands();
+ renderer->Rasterizer().FlushCommands();
}
// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 64acb17df..ced9d7e28 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -25,8 +25,11 @@ inline u8* FromCacheAddr(CacheAddr cache_addr) {
}
namespace Core {
-class System;
+namespace Frontend {
+class EmuWindow;
}
+class System;
+} // namespace Core
namespace VideoCore {
class RendererBase;
@@ -129,7 +132,8 @@ class MemoryManager;
class GPU {
public:
- explicit GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async);
+ explicit GPU(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer,
+ bool is_async);
virtual ~GPU();
@@ -174,6 +178,14 @@ public:
/// Returns a reference to the GPU DMA pusher.
Tegra::DmaPusher& DmaPusher();
+ VideoCore::RendererBase& Renderer() {
+ return *renderer;
+ }
+
+ const VideoCore::RendererBase& Renderer() const {
+ return *renderer;
+ }
+
// Waits for the GPU to finish working
virtual void WaitIdle() const = 0;
@@ -287,7 +299,7 @@ private:
protected:
std::unique_ptr<Tegra::DmaPusher> dma_pusher;
Core::System& system;
- VideoCore::RendererBase& renderer;
+ std::unique_ptr<VideoCore::RendererBase> renderer;
private:
std::unique_ptr<Tegra::MemoryManager> memory_manager;
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp
index 04222d060..925be8d7b 100644
--- a/src/video_core/gpu_asynch.cpp
+++ b/src/video_core/gpu_asynch.cpp
@@ -10,13 +10,16 @@
namespace VideoCommon {
-GPUAsynch::GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer)
- : GPU(system, renderer, true), gpu_thread{system} {}
+GPUAsynch::GPUAsynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer_,
+ std::unique_ptr<Core::Frontend::GraphicsContext>&& context)
+ : GPU(system, std::move(renderer_), true), gpu_thread{system}, gpu_context(std::move(context)),
+ cpu_context(renderer->GetRenderWindow().CreateSharedContext()) {}
GPUAsynch::~GPUAsynch() = default;
void GPUAsynch::Start() {
- gpu_thread.StartThread(renderer, *dma_pusher);
+ cpu_context->MakeCurrent();
+ gpu_thread.StartThread(*renderer, *gpu_context, *dma_pusher);
}
void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) {
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h
index 1241ade1d..265c62758 100644
--- a/src/video_core/gpu_asynch.h
+++ b/src/video_core/gpu_asynch.h
@@ -7,6 +7,10 @@
#include "video_core/gpu.h"
#include "video_core/gpu_thread.h"
+namespace Core::Frontend {
+class GraphicsContext;
+}
+
namespace VideoCore {
class RendererBase;
} // namespace VideoCore
@@ -16,7 +20,8 @@ namespace VideoCommon {
/// Implementation of GPU interface that runs the GPU asynchronously
class GPUAsynch final : public Tegra::GPU {
public:
- explicit GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer);
+ explicit GPUAsynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer,
+ std::unique_ptr<Core::Frontend::GraphicsContext>&& context);
~GPUAsynch() override;
void Start() override;
@@ -32,6 +37,8 @@ protected:
private:
GPUThread::ThreadManager gpu_thread;
+ std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context;
+ std::unique_ptr<Core::Frontend::GraphicsContext> gpu_context;
};
} // namespace VideoCommon
diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp
index d48221077..bd5278a5c 100644
--- a/src/video_core/gpu_synch.cpp
+++ b/src/video_core/gpu_synch.cpp
@@ -7,12 +7,15 @@
namespace VideoCommon {
-GPUSynch::GPUSynch(Core::System& system, VideoCore::RendererBase& renderer)
- : GPU(system, renderer, false) {}
+GPUSynch::GPUSynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer,
+ std::unique_ptr<Core::Frontend::GraphicsContext>&& context)
+ : GPU(system, std::move(renderer), false), context{std::move(context)} {}
GPUSynch::~GPUSynch() = default;
-void GPUSynch::Start() {}
+void GPUSynch::Start() {
+ context->MakeCurrent();
+}
void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) {
dma_pusher->Push(std::move(entries));
@@ -20,19 +23,19 @@ void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) {
}
void GPUSynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
- renderer.SwapBuffers(framebuffer);
+ renderer->SwapBuffers(framebuffer);
}
void GPUSynch::FlushRegion(CacheAddr addr, u64 size) {
- renderer.Rasterizer().FlushRegion(addr, size);
+ renderer->Rasterizer().FlushRegion(addr, size);
}
void GPUSynch::InvalidateRegion(CacheAddr addr, u64 size) {
- renderer.Rasterizer().InvalidateRegion(addr, size);
+ renderer->Rasterizer().InvalidateRegion(addr, size);
}
void GPUSynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
- renderer.Rasterizer().FlushAndInvalidateRegion(addr, size);
+ renderer->Rasterizer().FlushAndInvalidateRegion(addr, size);
}
} // namespace VideoCommon
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h
index c71baee89..866a94c8c 100644
--- a/src/video_core/gpu_synch.h
+++ b/src/video_core/gpu_synch.h
@@ -6,6 +6,10 @@
#include "video_core/gpu.h"
+namespace Core::Frontend {
+class GraphicsContext;
+}
+
namespace VideoCore {
class RendererBase;
} // namespace VideoCore
@@ -15,7 +19,8 @@ namespace VideoCommon {
/// Implementation of GPU interface that runs the GPU synchronously
class GPUSynch final : public Tegra::GPU {
public:
- explicit GPUSynch(Core::System& system, VideoCore::RendererBase& renderer);
+ explicit GPUSynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer,
+ std::unique_ptr<Core::Frontend::GraphicsContext>&& context);
~GPUSynch() override;
void Start() override;
@@ -29,6 +34,9 @@ public:
protected:
void TriggerCpuInterrupt([[maybe_unused]] u32 syncpoint_id,
[[maybe_unused]] u32 value) const override {}
+
+private:
+ std::unique_ptr<Core::Frontend::GraphicsContext> context;
};
} // namespace VideoCommon
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index b1088af3d..270c7ae0d 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -5,7 +5,7 @@
#include "common/assert.h"
#include "common/microprofile.h"
#include "core/core.h"
-#include "core/frontend/scope_acquire_context.h"
+#include "core/frontend/emu_window.h"
#include "video_core/dma_pusher.h"
#include "video_core/gpu.h"
#include "video_core/gpu_thread.h"
@@ -14,8 +14,8 @@
namespace VideoCommon::GPUThread {
/// Runs the GPU thread
-static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher,
- SynchState& state) {
+static void RunThread(VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context,
+ Tegra::DmaPusher& dma_pusher, SynchState& state) {
MicroProfileOnThreadCreate("GpuThread");
// Wait for first GPU command before acquiring the window context
@@ -27,7 +27,7 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p
return;
}
- Core::Frontend::ScopeAcquireContext acquire_context{renderer.GetRenderWindow()};
+ auto current_context = context.Acquire();
CommandDataContainer next;
while (state.is_running) {
@@ -62,8 +62,11 @@ ThreadManager::~ThreadManager() {
thread.join();
}
-void ThreadManager::StartThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher) {
- thread = std::thread{RunThread, std::ref(renderer), std::ref(dma_pusher), std::ref(state)};
+void ThreadManager::StartThread(VideoCore::RendererBase& renderer,
+ Core::Frontend::GraphicsContext& context,
+ Tegra::DmaPusher& dma_pusher) {
+ thread = std::thread{RunThread, std::ref(renderer), std::ref(context), std::ref(dma_pusher),
+ std::ref(state)};
}
void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index 882e2d9c7..be36c580e 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -10,7 +10,6 @@
#include <optional>
#include <thread>
#include <variant>
-
#include "common/threadsafe_queue.h"
#include "video_core/gpu.h"
@@ -20,6 +19,9 @@ class DmaPusher;
} // namespace Tegra
namespace Core {
+namespace Frontend {
+class GraphicsContext;
+}
class System;
} // namespace Core
@@ -99,7 +101,8 @@ public:
~ThreadManager();
/// Creates and starts the GPU thread.
- void StartThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher);
+ void StartThread(VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context,
+ Tegra::DmaPusher& dma_pusher);
/// Push GPU command entries to be processed
void SubmitList(Tegra::CommandList&& entries);
diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h
index 5ec99a126..1d85219b6 100644
--- a/src/video_core/renderer_base.h
+++ b/src/video_core/renderer_base.h
@@ -46,7 +46,8 @@ public:
/// Draws the latest frame to the window waiting timeout_ms for a frame to arrive (Renderer
/// specific implementation)
- virtual void TryPresent(int timeout_ms) = 0;
+ /// Returns true if a frame was drawn
+ virtual bool TryPresent(int timeout_ms) = 0;
// Getter/setter functions:
// ------------------------
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index e3d31c3eb..046ee55a5 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -327,8 +327,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
const auto worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin,
std::size_t end) {
- context->MakeCurrent();
- SCOPE_EXIT({ return context->DoneCurrent(); });
+ const auto scope = context->Acquire();
for (std::size_t i = begin; i < end; ++i) {
if (stop_loading) {
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 8aa4a7ac9..c7d24cf14 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -2114,6 +2114,10 @@ private:
template <const std::string_view& opname, Type type>
Expression Atomic(Operation operation) {
+ if ((opname == Func::Min || opname == Func::Max) && type == Type::Int) {
+ UNIMPLEMENTED_MSG("Unimplemented Min & Max for atomic operations");
+ return {};
+ }
return {fmt::format("atomic{}({}, {})", opname, Visit(operation[0]).GetCode(),
Visit(operation[1]).As(type)),
type};
@@ -2307,6 +2311,8 @@ private:
~Func() = delete;
static constexpr std::string_view Add = "Add";
+ static constexpr std::string_view Min = "Min";
+ static constexpr std::string_view Max = "Max";
static constexpr std::string_view And = "And";
static constexpr std::string_view Or = "Or";
static constexpr std::string_view Xor = "Xor";
@@ -2457,7 +2463,21 @@ private:
&GLSLDecompiler::AtomicImage<Func::Xor>,
&GLSLDecompiler::AtomicImage<Func::Exchange>,
+ &GLSLDecompiler::Atomic<Func::Exchange, Type::Uint>,
&GLSLDecompiler::Atomic<Func::Add, Type::Uint>,
+ &GLSLDecompiler::Atomic<Func::Min, Type::Uint>,
+ &GLSLDecompiler::Atomic<Func::Max, Type::Uint>,
+ &GLSLDecompiler::Atomic<Func::And, Type::Uint>,
+ &GLSLDecompiler::Atomic<Func::Or, Type::Uint>,
+ &GLSLDecompiler::Atomic<Func::Xor, Type::Uint>,
+
+ &GLSLDecompiler::Atomic<Func::Exchange, Type::Int>,
+ &GLSLDecompiler::Atomic<Func::Add, Type::Int>,
+ &GLSLDecompiler::Atomic<Func::Min, Type::Int>,
+ &GLSLDecompiler::Atomic<Func::Max, Type::Int>,
+ &GLSLDecompiler::Atomic<Func::And, Type::Int>,
+ &GLSLDecompiler::Atomic<Func::Or, Type::Int>,
+ &GLSLDecompiler::Atomic<Func::Xor, Type::Int>,
&GLSLDecompiler::Branch,
&GLSLDecompiler::BranchIndirect,
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index fca5e3ec0..f1a28cc21 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -30,8 +30,6 @@ namespace OpenGL {
namespace {
-// If the size of this is too small, it ends up creating a soft cap on FPS as the renderer will have
-// to wait on available presentation frames.
constexpr std::size_t SWAP_CHAIN_SIZE = 3;
struct Frame {
@@ -214,7 +212,7 @@ public:
std::deque<Frame*> present_queue;
Frame* previous_frame{};
- FrameMailbox() : has_debug_tool{HasDebugTool()} {
+ FrameMailbox() {
for (auto& frame : swap_chain) {
free_queue.push(&frame);
}
@@ -285,13 +283,9 @@ public:
std::unique_lock lock{swap_chain_lock};
present_queue.push_front(frame);
present_cv.notify_one();
-
- DebugNotifyNextFrame();
}
Frame* TryGetPresentFrame(int timeout_ms) {
- DebugWaitForNextFrame();
-
std::unique_lock lock{swap_chain_lock};
// wait for new entries in the present_queue
present_cv.wait_for(lock, std::chrono::milliseconds(timeout_ms),
@@ -317,38 +311,12 @@ public:
previous_frame = frame;
return frame;
}
-
-private:
- std::mutex debug_synch_mutex;
- std::condition_variable debug_synch_condition;
- std::atomic_int frame_for_debug{};
- const bool has_debug_tool; // When true, using a GPU debugger, so keep frames in lock-step
-
- /// Signal that a new frame is available (called from GPU thread)
- void DebugNotifyNextFrame() {
- if (!has_debug_tool) {
- return;
- }
- frame_for_debug++;
- std::lock_guard lock{debug_synch_mutex};
- debug_synch_condition.notify_one();
- }
-
- /// Wait for a new frame to be available (called from presentation thread)
- void DebugWaitForNextFrame() {
- if (!has_debug_tool) {
- return;
- }
- const int last_frame = frame_for_debug;
- std::unique_lock lock{debug_synch_mutex};
- debug_synch_condition.wait(lock,
- [this, last_frame] { return frame_for_debug > last_frame; });
- }
};
-RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system)
+RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system,
+ Core::Frontend::GraphicsContext& context)
: VideoCore::RendererBase{emu_window}, emu_window{emu_window}, system{system},
- frame_mailbox{std::make_unique<FrameMailbox>()} {}
+ frame_mailbox{}, context{context}, has_debug_tool{HasDebugTool()} {}
RendererOpenGL::~RendererOpenGL() = default;
@@ -356,8 +324,6 @@ MICROPROFILE_DEFINE(OpenGL_RenderFrame, "OpenGL", "Render Frame", MP_RGB(128, 12
MICROPROFILE_DEFINE(OpenGL_WaitPresent, "OpenGL", "Wait For Present", MP_RGB(128, 128, 128));
void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
- render_window.PollEvents();
-
if (!framebuffer) {
return;
}
@@ -413,6 +379,13 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
m_current_frame++;
rasterizer->TickFrame();
}
+
+ render_window.PollEvents();
+ if (has_debug_tool) {
+ glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0);
+ Present(0);
+ context.SwapBuffers();
+ }
}
void RendererOpenGL::PrepareRendertarget(const Tegra::FramebufferConfig* framebuffer) {
@@ -480,6 +453,8 @@ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color
}
void RendererOpenGL::InitOpenGLObjects() {
+ frame_mailbox = std::make_unique<FrameMailbox>();
+
glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue,
0.0f);
@@ -692,12 +667,21 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
}
-void RendererOpenGL::TryPresent(int timeout_ms) {
+bool RendererOpenGL::TryPresent(int timeout_ms) {
+ if (has_debug_tool) {
+ LOG_DEBUG(Render_OpenGL,
+ "Skipping presentation because we are presenting on the main context");
+ return false;
+ }
+ return Present(timeout_ms);
+}
+
+bool RendererOpenGL::Present(int timeout_ms) {
const auto& layout = render_window.GetFramebufferLayout();
auto frame = frame_mailbox->TryGetPresentFrame(timeout_ms);
if (!frame) {
LOG_DEBUG(Render_OpenGL, "TryGetPresentFrame returned no frame to present");
- return;
+ return false;
}
// Clearing before a full overwrite of a fbo can signal to drivers that they can avoid a
@@ -725,6 +709,7 @@ void RendererOpenGL::TryPresent(int timeout_ms) {
glFlush();
glBindFramebuffer(GL_READ_FRAMEBUFFER, 0);
+ return true;
}
void RendererOpenGL::RenderScreenshot() {
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index 33073ce5b..50b647661 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -55,13 +55,14 @@ class FrameMailbox;
class RendererOpenGL final : public VideoCore::RendererBase {
public:
- explicit RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system);
+ explicit RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system,
+ Core::Frontend::GraphicsContext& context);
~RendererOpenGL() override;
bool Init() override;
void ShutDown() override;
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
- void TryPresent(int timeout_ms) override;
+ bool TryPresent(int timeout_ms) override;
private:
/// Initializes the OpenGL state and creates persistent objects.
@@ -89,8 +90,11 @@ private:
void PrepareRendertarget(const Tegra::FramebufferConfig* framebuffer);
+ bool Present(int timeout_ms);
+
Core::Frontend::EmuWindow& emu_window;
Core::System& system;
+ Core::Frontend::GraphicsContext& context;
StateTracker state_tracker{system};
@@ -115,6 +119,8 @@ private:
/// Frame presentation mailbox
std::unique_ptr<FrameMailbox> frame_mailbox;
+
+ bool has_debug_tool = false;
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index 42bb01418..6953aaafe 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -141,8 +141,9 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
render_window.PollEvents();
}
-void RendererVulkan::TryPresent(int /*timeout_ms*/) {
+bool RendererVulkan::TryPresent(int /*timeout_ms*/) {
// TODO (bunnei): ImplementMe
+ return true;
}
bool RendererVulkan::Init() {
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h
index 3da08d2e4..d14384e79 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.h
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.h
@@ -42,7 +42,7 @@ public:
bool Init() override;
void ShutDown() override;
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
- void TryPresent(int timeout_ms) override;
+ bool TryPresent(int timeout_ms) override;
private:
std::optional<vk::DebugUtilsMessengerEXT> CreateDebugCallback(
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 51ecb5567..d67f08cf9 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -1941,7 +1941,11 @@ private:
return {};
}
- Expression AtomicAdd(Operation operation) {
+ template <Id (Module::*func)(Id, Id, Id, Id, Id), Type result_type,
+ Type value_type = result_type>
+ Expression Atomic(Operation operation) {
+ const Id type_def = GetTypeDefinition(result_type);
+
Id pointer;
if (const auto smem = std::get_if<SmemNode>(&*operation[0])) {
pointer = GetSharedMemoryPointer(*smem);
@@ -1949,14 +1953,15 @@ private:
pointer = GetGlobalMemoryPointer(*gmem);
} else {
UNREACHABLE();
- return {Constant(t_uint, 0), Type::Uint};
+ return {Constant(type_def, 0), result_type};
}
+ const Id value = As(Visit(operation[1]), value_type);
+
const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device));
- const Id semantics = Constant(t_uint, 0U);
+ const Id semantics = Constant(type_def, 0);
- const Id value = AsUint(Visit(operation[1]));
- return {OpAtomicIAdd(t_uint, pointer, scope, semantics, value), Type::Uint};
+ return {(this->*func)(type_def, pointer, scope, semantics, value), result_type};
}
Expression Branch(Operation operation) {
@@ -2545,7 +2550,21 @@ private:
&SPIRVDecompiler::AtomicImageXor,
&SPIRVDecompiler::AtomicImageExchange,
- &SPIRVDecompiler::AtomicAdd,
+ &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange, Type::Uint>,
+ &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd, Type::Uint>,
+ &SPIRVDecompiler::Atomic<&Module::OpAtomicUMin, Type::Uint>,
+ &SPIRVDecompiler::Atomic<&Module::OpAtomicUMax, Type::Uint>,
+ &SPIRVDecompiler::Atomic<&Module::OpAtomicAnd, Type::Uint>,
+ &SPIRVDecompiler::Atomic<&Module::OpAtomicOr, Type::Uint>,
+ &SPIRVDecompiler::Atomic<&Module::OpAtomicXor, Type::Uint>,
+
+ &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange, Type::Int>,
+ &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd, Type::Int>,
+ &SPIRVDecompiler::Atomic<&Module::OpAtomicSMin, Type::Int>,
+ &SPIRVDecompiler::Atomic<&Module::OpAtomicSMax, Type::Int>,
+ &SPIRVDecompiler::Atomic<&Module::OpAtomicAnd, Type::Int>,
+ &SPIRVDecompiler::Atomic<&Module::OpAtomicOr, Type::Int>,
+ &SPIRVDecompiler::Atomic<&Module::OpAtomicXor, Type::Int>,
&SPIRVDecompiler::Branch,
&SPIRVDecompiler::BranchIndirect,
diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/renderer_vulkan/wrapper.cpp
index c412b7f20..9b94dfff1 100644
--- a/src/video_core/renderer_vulkan/wrapper.cpp
+++ b/src/video_core/renderer_vulkan/wrapper.cpp
@@ -339,4 +339,412 @@ VkResult Free(VkDevice device, VkCommandPool handle, Span<VkCommandBuffer> buffe
return VK_SUCCESS;
}
+Instance Instance::Create(Span<const char*> layers, Span<const char*> extensions,
+ InstanceDispatch& dld) noexcept {
+ VkApplicationInfo application_info;
+ application_info.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO;
+ application_info.pNext = nullptr;
+ application_info.pApplicationName = "yuzu Emulator";
+ application_info.applicationVersion = VK_MAKE_VERSION(0, 1, 0);
+ application_info.pEngineName = "yuzu Emulator";
+ application_info.engineVersion = VK_MAKE_VERSION(0, 1, 0);
+ application_info.apiVersion = VK_API_VERSION_1_1;
+
+ VkInstanceCreateInfo ci;
+ ci.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
+ ci.pNext = nullptr;
+ ci.flags = 0;
+ ci.pApplicationInfo = &application_info;
+ ci.enabledLayerCount = layers.size();
+ ci.ppEnabledLayerNames = layers.data();
+ ci.enabledExtensionCount = extensions.size();
+ ci.ppEnabledExtensionNames = extensions.data();
+
+ VkInstance instance;
+ if (dld.vkCreateInstance(&ci, nullptr, &instance) != VK_SUCCESS) {
+ // Failed to create the instance.
+ return {};
+ }
+ if (!Proc(dld.vkDestroyInstance, dld, "vkDestroyInstance", instance)) {
+ // We successfully created an instance but the destroy function couldn't be loaded.
+ // This is a good moment to panic.
+ return {};
+ }
+
+ return Instance(instance, dld);
+}
+
+std::optional<std::vector<VkPhysicalDevice>> Instance::EnumeratePhysicalDevices() {
+ u32 num;
+ if (dld->vkEnumeratePhysicalDevices(handle, &num, nullptr) != VK_SUCCESS) {
+ return std::nullopt;
+ }
+ std::vector<VkPhysicalDevice> physical_devices(num);
+ if (dld->vkEnumeratePhysicalDevices(handle, &num, physical_devices.data()) != VK_SUCCESS) {
+ return std::nullopt;
+ }
+ return physical_devices;
+}
+
+DebugCallback Instance::TryCreateDebugCallback(
+ PFN_vkDebugUtilsMessengerCallbackEXT callback) noexcept {
+ VkDebugUtilsMessengerCreateInfoEXT ci;
+ ci.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT;
+ ci.pNext = nullptr;
+ ci.flags = 0;
+ ci.messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT |
+ VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
+ VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT |
+ VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT;
+ ci.messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT |
+ VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT |
+ VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT;
+ ci.pfnUserCallback = callback;
+ ci.pUserData = nullptr;
+
+ VkDebugUtilsMessengerEXT messenger;
+ if (dld->vkCreateDebugUtilsMessengerEXT(handle, &ci, nullptr, &messenger) != VK_SUCCESS) {
+ return {};
+ }
+ return DebugCallback(messenger, handle, *dld);
+}
+
+std::vector<VkCheckpointDataNV> Queue::GetCheckpointDataNV(const DeviceDispatch& dld) const {
+ if (!dld.vkGetQueueCheckpointDataNV) {
+ return {};
+ }
+ u32 num;
+ dld.vkGetQueueCheckpointDataNV(queue, &num, nullptr);
+ std::vector<VkCheckpointDataNV> checkpoints(num);
+ dld.vkGetQueueCheckpointDataNV(queue, &num, checkpoints.data());
+ return checkpoints;
+}
+
+void Buffer::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const {
+ Check(dld->vkBindBufferMemory(owner, handle, memory, offset));
+}
+
+void Image::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const {
+ Check(dld->vkBindImageMemory(owner, handle, memory, offset));
+}
+
+DescriptorSets DescriptorPool::Allocate(const VkDescriptorSetAllocateInfo& ai) const {
+ const std::size_t num = ai.descriptorSetCount;
+ std::unique_ptr sets = std::make_unique<VkDescriptorSet[]>(num);
+ switch (const VkResult result = dld->vkAllocateDescriptorSets(owner, &ai, sets.get())) {
+ case VK_SUCCESS:
+ return DescriptorSets(std::move(sets), num, owner, handle, *dld);
+ case VK_ERROR_OUT_OF_POOL_MEMORY:
+ return {};
+ default:
+ throw Exception(result);
+ }
+}
+
+CommandBuffers CommandPool::Allocate(std::size_t num_buffers, VkCommandBufferLevel level) const {
+ VkCommandBufferAllocateInfo ai;
+ ai.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
+ ai.pNext = nullptr;
+ ai.commandPool = handle;
+ ai.level = level;
+ ai.commandBufferCount = static_cast<u32>(num_buffers);
+
+ std::unique_ptr buffers = std::make_unique<VkCommandBuffer[]>(num_buffers);
+ switch (const VkResult result = dld->vkAllocateCommandBuffers(owner, &ai, buffers.get())) {
+ case VK_SUCCESS:
+ return CommandBuffers(std::move(buffers), num_buffers, owner, handle, *dld);
+ case VK_ERROR_OUT_OF_POOL_MEMORY:
+ return {};
+ default:
+ throw Exception(result);
+ }
+}
+
+std::vector<VkImage> SwapchainKHR::GetImages() const {
+ u32 num;
+ Check(dld->vkGetSwapchainImagesKHR(owner, handle, &num, nullptr));
+ std::vector<VkImage> images(num);
+ Check(dld->vkGetSwapchainImagesKHR(owner, handle, &num, images.data()));
+ return images;
+}
+
+Device Device::Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreateInfo> queues_ci,
+ Span<const char*> enabled_extensions,
+ const VkPhysicalDeviceFeatures2& enabled_features,
+ DeviceDispatch& dld) noexcept {
+ VkDeviceCreateInfo ci;
+ ci.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
+ ci.pNext = &enabled_features;
+ ci.flags = 0;
+ ci.queueCreateInfoCount = queues_ci.size();
+ ci.pQueueCreateInfos = queues_ci.data();
+ ci.enabledLayerCount = 0;
+ ci.ppEnabledLayerNames = nullptr;
+ ci.enabledExtensionCount = enabled_extensions.size();
+ ci.ppEnabledExtensionNames = enabled_extensions.data();
+ ci.pEnabledFeatures = nullptr;
+
+ VkDevice device;
+ if (dld.vkCreateDevice(physical_device, &ci, nullptr, &device) != VK_SUCCESS) {
+ return {};
+ }
+ Load(device, dld);
+ return Device(device, dld);
+}
+
+Queue Device::GetQueue(u32 family_index) const noexcept {
+ VkQueue queue;
+ dld->vkGetDeviceQueue(handle, family_index, 0, &queue);
+ return Queue(queue, *dld);
+}
+
+Buffer Device::CreateBuffer(const VkBufferCreateInfo& ci) const {
+ VkBuffer object;
+ Check(dld->vkCreateBuffer(handle, &ci, nullptr, &object));
+ return Buffer(object, handle, *dld);
+}
+
+BufferView Device::CreateBufferView(const VkBufferViewCreateInfo& ci) const {
+ VkBufferView object;
+ Check(dld->vkCreateBufferView(handle, &ci, nullptr, &object));
+ return BufferView(object, handle, *dld);
+}
+
+Image Device::CreateImage(const VkImageCreateInfo& ci) const {
+ VkImage object;
+ Check(dld->vkCreateImage(handle, &ci, nullptr, &object));
+ return Image(object, handle, *dld);
+}
+
+ImageView Device::CreateImageView(const VkImageViewCreateInfo& ci) const {
+ VkImageView object;
+ Check(dld->vkCreateImageView(handle, &ci, nullptr, &object));
+ return ImageView(object, handle, *dld);
+}
+
+Semaphore Device::CreateSemaphore() const {
+ VkSemaphoreCreateInfo ci;
+ ci.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
+ ci.pNext = nullptr;
+ ci.flags = 0;
+
+ VkSemaphore object;
+ Check(dld->vkCreateSemaphore(handle, &ci, nullptr, &object));
+ return Semaphore(object, handle, *dld);
+}
+
+Fence Device::CreateFence(const VkFenceCreateInfo& ci) const {
+ VkFence object;
+ Check(dld->vkCreateFence(handle, &ci, nullptr, &object));
+ return Fence(object, handle, *dld);
+}
+
+DescriptorPool Device::CreateDescriptorPool(const VkDescriptorPoolCreateInfo& ci) const {
+ VkDescriptorPool object;
+ Check(dld->vkCreateDescriptorPool(handle, &ci, nullptr, &object));
+ return DescriptorPool(object, handle, *dld);
+}
+
+RenderPass Device::CreateRenderPass(const VkRenderPassCreateInfo& ci) const {
+ VkRenderPass object;
+ Check(dld->vkCreateRenderPass(handle, &ci, nullptr, &object));
+ return RenderPass(object, handle, *dld);
+}
+
+DescriptorSetLayout Device::CreateDescriptorSetLayout(
+ const VkDescriptorSetLayoutCreateInfo& ci) const {
+ VkDescriptorSetLayout object;
+ Check(dld->vkCreateDescriptorSetLayout(handle, &ci, nullptr, &object));
+ return DescriptorSetLayout(object, handle, *dld);
+}
+
+PipelineLayout Device::CreatePipelineLayout(const VkPipelineLayoutCreateInfo& ci) const {
+ VkPipelineLayout object;
+ Check(dld->vkCreatePipelineLayout(handle, &ci, nullptr, &object));
+ return PipelineLayout(object, handle, *dld);
+}
+
+Pipeline Device::CreateGraphicsPipeline(const VkGraphicsPipelineCreateInfo& ci) const {
+ VkPipeline object;
+ Check(dld->vkCreateGraphicsPipelines(handle, nullptr, 1, &ci, nullptr, &object));
+ return Pipeline(object, handle, *dld);
+}
+
+Pipeline Device::CreateComputePipeline(const VkComputePipelineCreateInfo& ci) const {
+ VkPipeline object;
+ Check(dld->vkCreateComputePipelines(handle, nullptr, 1, &ci, nullptr, &object));
+ return Pipeline(object, handle, *dld);
+}
+
+Sampler Device::CreateSampler(const VkSamplerCreateInfo& ci) const {
+ VkSampler object;
+ Check(dld->vkCreateSampler(handle, &ci, nullptr, &object));
+ return Sampler(object, handle, *dld);
+}
+
+Framebuffer Device::CreateFramebuffer(const VkFramebufferCreateInfo& ci) const {
+ VkFramebuffer object;
+ Check(dld->vkCreateFramebuffer(handle, &ci, nullptr, &object));
+ return Framebuffer(object, handle, *dld);
+}
+
+CommandPool Device::CreateCommandPool(const VkCommandPoolCreateInfo& ci) const {
+ VkCommandPool object;
+ Check(dld->vkCreateCommandPool(handle, &ci, nullptr, &object));
+ return CommandPool(object, handle, *dld);
+}
+
+DescriptorUpdateTemplateKHR Device::CreateDescriptorUpdateTemplateKHR(
+ const VkDescriptorUpdateTemplateCreateInfoKHR& ci) const {
+ VkDescriptorUpdateTemplateKHR object;
+ Check(dld->vkCreateDescriptorUpdateTemplateKHR(handle, &ci, nullptr, &object));
+ return DescriptorUpdateTemplateKHR(object, handle, *dld);
+}
+
+QueryPool Device::CreateQueryPool(const VkQueryPoolCreateInfo& ci) const {
+ VkQueryPool object;
+ Check(dld->vkCreateQueryPool(handle, &ci, nullptr, &object));
+ return QueryPool(object, handle, *dld);
+}
+
+ShaderModule Device::CreateShaderModule(const VkShaderModuleCreateInfo& ci) const {
+ VkShaderModule object;
+ Check(dld->vkCreateShaderModule(handle, &ci, nullptr, &object));
+ return ShaderModule(object, handle, *dld);
+}
+
+SwapchainKHR Device::CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const {
+ VkSwapchainKHR object;
+ Check(dld->vkCreateSwapchainKHR(handle, &ci, nullptr, &object));
+ return SwapchainKHR(object, handle, *dld);
+}
+
+DeviceMemory Device::TryAllocateMemory(const VkMemoryAllocateInfo& ai) const noexcept {
+ VkDeviceMemory memory;
+ if (dld->vkAllocateMemory(handle, &ai, nullptr, &memory) != VK_SUCCESS) {
+ return {};
+ }
+ return DeviceMemory(memory, handle, *dld);
+}
+
+DeviceMemory Device::AllocateMemory(const VkMemoryAllocateInfo& ai) const {
+ VkDeviceMemory memory;
+ Check(dld->vkAllocateMemory(handle, &ai, nullptr, &memory));
+ return DeviceMemory(memory, handle, *dld);
+}
+
+VkMemoryRequirements Device::GetBufferMemoryRequirements(VkBuffer buffer) const noexcept {
+ VkMemoryRequirements requirements;
+ dld->vkGetBufferMemoryRequirements(handle, buffer, &requirements);
+ return requirements;
+}
+
+VkMemoryRequirements Device::GetImageMemoryRequirements(VkImage image) const noexcept {
+ VkMemoryRequirements requirements;
+ dld->vkGetImageMemoryRequirements(handle, image, &requirements);
+ return requirements;
+}
+
+void Device::UpdateDescriptorSets(Span<VkWriteDescriptorSet> writes,
+ Span<VkCopyDescriptorSet> copies) const noexcept {
+ dld->vkUpdateDescriptorSets(handle, writes.size(), writes.data(), copies.size(), copies.data());
+}
+
+VkPhysicalDeviceProperties PhysicalDevice::GetProperties() const noexcept {
+ VkPhysicalDeviceProperties properties;
+ dld->vkGetPhysicalDeviceProperties(physical_device, &properties);
+ return properties;
+}
+
+void PhysicalDevice::GetProperties2KHR(VkPhysicalDeviceProperties2KHR& properties) const noexcept {
+ dld->vkGetPhysicalDeviceProperties2KHR(physical_device, &properties);
+}
+
+VkPhysicalDeviceFeatures PhysicalDevice::GetFeatures() const noexcept {
+ VkPhysicalDeviceFeatures2KHR features2;
+ features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR;
+ features2.pNext = nullptr;
+ dld->vkGetPhysicalDeviceFeatures2KHR(physical_device, &features2);
+ return features2.features;
+}
+
+void PhysicalDevice::GetFeatures2KHR(VkPhysicalDeviceFeatures2KHR& features) const noexcept {
+ dld->vkGetPhysicalDeviceFeatures2KHR(physical_device, &features);
+}
+
+VkFormatProperties PhysicalDevice::GetFormatProperties(VkFormat format) const noexcept {
+ VkFormatProperties properties;
+ dld->vkGetPhysicalDeviceFormatProperties(physical_device, format, &properties);
+ return properties;
+}
+
+std::vector<VkExtensionProperties> PhysicalDevice::EnumerateDeviceExtensionProperties() const {
+ u32 num;
+ dld->vkEnumerateDeviceExtensionProperties(physical_device, nullptr, &num, nullptr);
+ std::vector<VkExtensionProperties> properties(num);
+ dld->vkEnumerateDeviceExtensionProperties(physical_device, nullptr, &num, properties.data());
+ return properties;
+}
+
+std::vector<VkQueueFamilyProperties> PhysicalDevice::GetQueueFamilyProperties() const {
+ u32 num;
+ dld->vkGetPhysicalDeviceQueueFamilyProperties(physical_device, &num, nullptr);
+ std::vector<VkQueueFamilyProperties> properties(num);
+ dld->vkGetPhysicalDeviceQueueFamilyProperties(physical_device, &num, properties.data());
+ return properties;
+}
+
+bool PhysicalDevice::GetSurfaceSupportKHR(u32 queue_family_index, VkSurfaceKHR surface) const {
+ VkBool32 supported;
+ Check(dld->vkGetPhysicalDeviceSurfaceSupportKHR(physical_device, queue_family_index, surface,
+ &supported));
+ return supported == VK_TRUE;
+}
+
+VkSurfaceCapabilitiesKHR PhysicalDevice::GetSurfaceCapabilitiesKHR(VkSurfaceKHR surface) const
+ noexcept {
+ VkSurfaceCapabilitiesKHR capabilities;
+ Check(dld->vkGetPhysicalDeviceSurfaceCapabilitiesKHR(physical_device, surface, &capabilities));
+ return capabilities;
+}
+
+std::vector<VkSurfaceFormatKHR> PhysicalDevice::GetSurfaceFormatsKHR(VkSurfaceKHR surface) const {
+ u32 num;
+ Check(dld->vkGetPhysicalDeviceSurfaceFormatsKHR(physical_device, surface, &num, nullptr));
+ std::vector<VkSurfaceFormatKHR> formats(num);
+ Check(
+ dld->vkGetPhysicalDeviceSurfaceFormatsKHR(physical_device, surface, &num, formats.data()));
+ return formats;
+}
+
+std::vector<VkPresentModeKHR> PhysicalDevice::GetSurfacePresentModesKHR(
+ VkSurfaceKHR surface) const {
+ u32 num;
+ Check(dld->vkGetPhysicalDeviceSurfacePresentModesKHR(physical_device, surface, &num, nullptr));
+ std::vector<VkPresentModeKHR> modes(num);
+ Check(dld->vkGetPhysicalDeviceSurfacePresentModesKHR(physical_device, surface, &num,
+ modes.data()));
+ return modes;
+}
+
+VkPhysicalDeviceMemoryProperties PhysicalDevice::GetMemoryProperties() const noexcept {
+ VkPhysicalDeviceMemoryProperties properties;
+ dld->vkGetPhysicalDeviceMemoryProperties(physical_device, &properties);
+ return properties;
+}
+
+std::optional<std::vector<VkExtensionProperties>> EnumerateInstanceExtensionProperties(
+ const InstanceDispatch& dld) {
+ u32 num;
+ if (dld.vkEnumerateInstanceExtensionProperties(nullptr, &num, nullptr) != VK_SUCCESS) {
+ return std::nullopt;
+ }
+ std::vector<VkExtensionProperties> properties(num);
+ if (dld.vkEnumerateInstanceExtensionProperties(nullptr, &num, properties.data()) !=
+ VK_SUCCESS) {
+ return std::nullopt;
+ }
+ return properties;
+}
+
} // namespace Vulkan::vk
diff --git a/src/video_core/renderer_vulkan/wrapper.h b/src/video_core/renderer_vulkan/wrapper.h
index 686c2b9a1..fb3657819 100644
--- a/src/video_core/renderer_vulkan/wrapper.h
+++ b/src/video_core/renderer_vulkan/wrapper.h
@@ -542,4 +542,446 @@ using SurfaceKHR = Handle<VkSurfaceKHR, VkInstance, InstanceDispatch>;
using DescriptorSets = PoolAllocations<VkDescriptorSet, VkDescriptorPool>;
using CommandBuffers = PoolAllocations<VkCommandBuffer, VkCommandPool>;
+/// Vulkan instance owning handle.
+class Instance : public Handle<VkInstance, NoOwner, InstanceDispatch> {
+ using Handle<VkInstance, NoOwner, InstanceDispatch>::Handle;
+
+public:
+ /// Creates a Vulkan instance. Use "operator bool" for error handling.
+ static Instance Create(Span<const char*> layers, Span<const char*> extensions,
+ InstanceDispatch& dld) noexcept;
+
+ /// Enumerates physical devices.
+ /// @return Physical devices and an empty handle on failure.
+ std::optional<std::vector<VkPhysicalDevice>> EnumeratePhysicalDevices();
+
+ /// Tries to create a debug callback messenger. Returns an empty handle on failure.
+ DebugCallback TryCreateDebugCallback(PFN_vkDebugUtilsMessengerCallbackEXT callback) noexcept;
+};
+
+class Queue {
+public:
+ /// Construct an empty queue handle.
+ constexpr Queue() noexcept = default;
+
+ /// Construct a queue handle.
+ constexpr Queue(VkQueue queue, const DeviceDispatch& dld) noexcept : queue{queue}, dld{&dld} {}
+
+ /// Returns the checkpoint data.
+ /// @note Returns an empty vector when the function pointer is not present.
+ std::vector<VkCheckpointDataNV> GetCheckpointDataNV(const DeviceDispatch& dld) const;
+
+ void Submit(Span<VkSubmitInfo> submit_infos, VkFence fence) const {
+ Check(dld->vkQueueSubmit(queue, submit_infos.size(), submit_infos.data(), fence));
+ }
+
+ VkResult Present(const VkPresentInfoKHR& present_info) const noexcept {
+ return dld->vkQueuePresentKHR(queue, &present_info);
+ }
+
+private:
+ VkQueue queue = nullptr;
+ const DeviceDispatch* dld = nullptr;
+};
+
+class Buffer : public Handle<VkBuffer, VkDevice, DeviceDispatch> {
+ using Handle<VkBuffer, VkDevice, DeviceDispatch>::Handle;
+
+public:
+ /// Attaches a memory allocation.
+ void BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const;
+};
+
+class Image : public Handle<VkImage, VkDevice, DeviceDispatch> {
+ using Handle<VkImage, VkDevice, DeviceDispatch>::Handle;
+
+public:
+ /// Attaches a memory allocation.
+ void BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const;
+};
+
+class DeviceMemory : public Handle<VkDeviceMemory, VkDevice, DeviceDispatch> {
+ using Handle<VkDeviceMemory, VkDevice, DeviceDispatch>::Handle;
+
+public:
+ u8* Map(VkDeviceSize offset, VkDeviceSize size) const {
+ void* data;
+ Check(dld->vkMapMemory(owner, handle, offset, size, 0, &data));
+ return static_cast<u8*>(data);
+ }
+
+ void Unmap() const noexcept {
+ dld->vkUnmapMemory(owner, handle);
+ }
+};
+
+class Fence : public Handle<VkFence, VkDevice, DeviceDispatch> {
+ using Handle<VkFence, VkDevice, DeviceDispatch>::Handle;
+
+public:
+ VkResult Wait(u64 timeout = std::numeric_limits<u64>::max()) const noexcept {
+ return dld->vkWaitForFences(owner, 1, &handle, true, timeout);
+ }
+
+ VkResult GetStatus() const noexcept {
+ return dld->vkGetFenceStatus(owner, handle);
+ }
+
+ void Reset() const {
+ Check(dld->vkResetFences(owner, 1, &handle));
+ }
+};
+
+class DescriptorPool : public Handle<VkDescriptorPool, VkDevice, DeviceDispatch> {
+ using Handle<VkDescriptorPool, VkDevice, DeviceDispatch>::Handle;
+
+public:
+ DescriptorSets Allocate(const VkDescriptorSetAllocateInfo& ai) const;
+};
+
+class CommandPool : public Handle<VkCommandPool, VkDevice, DeviceDispatch> {
+ using Handle<VkCommandPool, VkDevice, DeviceDispatch>::Handle;
+
+public:
+ CommandBuffers Allocate(std::size_t num_buffers,
+ VkCommandBufferLevel level = VK_COMMAND_BUFFER_LEVEL_PRIMARY) const;
+};
+
+class SwapchainKHR : public Handle<VkSwapchainKHR, VkDevice, DeviceDispatch> {
+ using Handle<VkSwapchainKHR, VkDevice, DeviceDispatch>::Handle;
+
+public:
+ std::vector<VkImage> GetImages() const;
+};
+
+class Device : public Handle<VkDevice, NoOwner, DeviceDispatch> {
+ using Handle<VkDevice, NoOwner, DeviceDispatch>::Handle;
+
+public:
+ static Device Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreateInfo> queues_ci,
+ Span<const char*> enabled_extensions,
+ const VkPhysicalDeviceFeatures2& enabled_features,
+ DeviceDispatch& dld) noexcept;
+
+ Queue GetQueue(u32 family_index) const noexcept;
+
+ Buffer CreateBuffer(const VkBufferCreateInfo& ci) const;
+
+ BufferView CreateBufferView(const VkBufferViewCreateInfo& ci) const;
+
+ Image CreateImage(const VkImageCreateInfo& ci) const;
+
+ ImageView CreateImageView(const VkImageViewCreateInfo& ci) const;
+
+ Semaphore CreateSemaphore() const;
+
+ Fence CreateFence(const VkFenceCreateInfo& ci) const;
+
+ DescriptorPool CreateDescriptorPool(const VkDescriptorPoolCreateInfo& ci) const;
+
+ RenderPass CreateRenderPass(const VkRenderPassCreateInfo& ci) const;
+
+ DescriptorSetLayout CreateDescriptorSetLayout(const VkDescriptorSetLayoutCreateInfo& ci) const;
+
+ PipelineLayout CreatePipelineLayout(const VkPipelineLayoutCreateInfo& ci) const;
+
+ Pipeline CreateGraphicsPipeline(const VkGraphicsPipelineCreateInfo& ci) const;
+
+ Pipeline CreateComputePipeline(const VkComputePipelineCreateInfo& ci) const;
+
+ Sampler CreateSampler(const VkSamplerCreateInfo& ci) const;
+
+ Framebuffer CreateFramebuffer(const VkFramebufferCreateInfo& ci) const;
+
+ CommandPool CreateCommandPool(const VkCommandPoolCreateInfo& ci) const;
+
+ DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplateKHR(
+ const VkDescriptorUpdateTemplateCreateInfoKHR& ci) const;
+
+ QueryPool CreateQueryPool(const VkQueryPoolCreateInfo& ci) const;
+
+ ShaderModule CreateShaderModule(const VkShaderModuleCreateInfo& ci) const;
+
+ SwapchainKHR CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const;
+
+ DeviceMemory TryAllocateMemory(const VkMemoryAllocateInfo& ai) const noexcept;
+
+ DeviceMemory AllocateMemory(const VkMemoryAllocateInfo& ai) const;
+
+ VkMemoryRequirements GetBufferMemoryRequirements(VkBuffer buffer) const noexcept;
+
+ VkMemoryRequirements GetImageMemoryRequirements(VkImage image) const noexcept;
+
+ void UpdateDescriptorSets(Span<VkWriteDescriptorSet> writes,
+ Span<VkCopyDescriptorSet> copies) const noexcept;
+
+ void UpdateDescriptorSet(VkDescriptorSet set, VkDescriptorUpdateTemplateKHR update_template,
+ const void* data) const noexcept {
+ dld->vkUpdateDescriptorSetWithTemplateKHR(handle, set, update_template, data);
+ }
+
+ VkResult AcquireNextImageKHR(VkSwapchainKHR swapchain, u64 timeout, VkSemaphore semaphore,
+ VkFence fence, u32* image_index) const noexcept {
+ return dld->vkAcquireNextImageKHR(handle, swapchain, timeout, semaphore, fence,
+ image_index);
+ }
+
+ VkResult WaitIdle() const noexcept {
+ return dld->vkDeviceWaitIdle(handle);
+ }
+
+ void ResetQueryPoolEXT(VkQueryPool query_pool, u32 first, u32 count) const noexcept {
+ dld->vkResetQueryPoolEXT(handle, query_pool, first, count);
+ }
+
+ void GetQueryResults(VkQueryPool query_pool, u32 first, u32 count, std::size_t data_size,
+ void* data, VkDeviceSize stride, VkQueryResultFlags flags) const {
+ Check(dld->vkGetQueryPoolResults(handle, query_pool, first, count, data_size, data, stride,
+ flags));
+ }
+
+ template <typename T>
+ T GetQueryResult(VkQueryPool query_pool, u32 first, VkQueryResultFlags flags) const {
+ static_assert(std::is_trivially_copyable_v<T>);
+ T value;
+ GetQueryResults(query_pool, first, 1, sizeof(T), &value, sizeof(T), flags);
+ return value;
+ }
+};
+
+class PhysicalDevice {
+public:
+ constexpr PhysicalDevice() noexcept = default;
+
+ constexpr PhysicalDevice(VkPhysicalDevice physical_device, const InstanceDispatch& dld) noexcept
+ : physical_device{physical_device}, dld{&dld} {}
+
+ constexpr operator VkPhysicalDevice() const noexcept {
+ return physical_device;
+ }
+
+ VkPhysicalDeviceProperties GetProperties() const noexcept;
+
+ void GetProperties2KHR(VkPhysicalDeviceProperties2KHR&) const noexcept;
+
+ VkPhysicalDeviceFeatures GetFeatures() const noexcept;
+
+ void GetFeatures2KHR(VkPhysicalDeviceFeatures2KHR&) const noexcept;
+
+ VkFormatProperties GetFormatProperties(VkFormat) const noexcept;
+
+ std::vector<VkExtensionProperties> EnumerateDeviceExtensionProperties() const;
+
+ std::vector<VkQueueFamilyProperties> GetQueueFamilyProperties() const;
+
+ bool GetSurfaceSupportKHR(u32 queue_family_index, VkSurfaceKHR) const;
+
+ VkSurfaceCapabilitiesKHR GetSurfaceCapabilitiesKHR(VkSurfaceKHR) const noexcept;
+
+ std::vector<VkSurfaceFormatKHR> GetSurfaceFormatsKHR(VkSurfaceKHR) const;
+
+ std::vector<VkPresentModeKHR> GetSurfacePresentModesKHR(VkSurfaceKHR) const;
+
+ VkPhysicalDeviceMemoryProperties GetMemoryProperties() const noexcept;
+
+private:
+ VkPhysicalDevice physical_device = nullptr;
+ const InstanceDispatch* dld = nullptr;
+};
+
+class CommandBuffer {
+public:
+ CommandBuffer() noexcept = default;
+
+ explicit CommandBuffer(VkCommandBuffer handle, const DeviceDispatch& dld) noexcept
+ : handle{handle}, dld{&dld} {}
+
+ const VkCommandBuffer* address() const noexcept {
+ return &handle;
+ }
+
+ void Begin(const VkCommandBufferBeginInfo& begin_info) const {
+ Check(dld->vkBeginCommandBuffer(handle, &begin_info));
+ }
+
+ void End() const {
+ Check(dld->vkEndCommandBuffer(handle));
+ }
+
+ void BeginRenderPass(const VkRenderPassBeginInfo& renderpass_bi,
+ VkSubpassContents contents) const noexcept {
+ dld->vkCmdBeginRenderPass(handle, &renderpass_bi, contents);
+ }
+
+ void EndRenderPass() const noexcept {
+ dld->vkCmdEndRenderPass(handle);
+ }
+
+ void BeginQuery(VkQueryPool query_pool, u32 query, VkQueryControlFlags flags) const noexcept {
+ dld->vkCmdBeginQuery(handle, query_pool, query, flags);
+ }
+
+ void EndQuery(VkQueryPool query_pool, u32 query) const noexcept {
+ dld->vkCmdEndQuery(handle, query_pool, query);
+ }
+
+ void BindDescriptorSets(VkPipelineBindPoint bind_point, VkPipelineLayout layout, u32 first,
+ Span<VkDescriptorSet> sets, Span<u32> dynamic_offsets) const noexcept {
+ dld->vkCmdBindDescriptorSets(handle, bind_point, layout, first, sets.size(), sets.data(),
+ dynamic_offsets.size(), dynamic_offsets.data());
+ }
+
+ void BindPipeline(VkPipelineBindPoint bind_point, VkPipeline pipeline) const noexcept {
+ dld->vkCmdBindPipeline(handle, bind_point, pipeline);
+ }
+
+ void BindIndexBuffer(VkBuffer buffer, VkDeviceSize offset, VkIndexType index_type) const
+ noexcept {
+ dld->vkCmdBindIndexBuffer(handle, buffer, offset, index_type);
+ }
+
+ void BindVertexBuffers(u32 first, u32 count, const VkBuffer* buffers,
+ const VkDeviceSize* offsets) const noexcept {
+ dld->vkCmdBindVertexBuffers(handle, first, count, buffers, offsets);
+ }
+
+ void BindVertexBuffer(u32 binding, VkBuffer buffer, VkDeviceSize offset) const noexcept {
+ BindVertexBuffers(binding, 1, &buffer, &offset);
+ }
+
+ void Draw(u32 vertex_count, u32 instance_count, u32 first_vertex, u32 first_instance) const
+ noexcept {
+ dld->vkCmdDraw(handle, vertex_count, instance_count, first_vertex, first_instance);
+ }
+
+ void DrawIndexed(u32 index_count, u32 instance_count, u32 first_index, u32 vertex_offset,
+ u32 first_instance) const noexcept {
+ dld->vkCmdDrawIndexed(handle, index_count, instance_count, first_index, vertex_offset,
+ first_instance);
+ }
+
+ void ClearAttachments(Span<VkClearAttachment> attachments, Span<VkClearRect> rects) const
+ noexcept {
+ dld->vkCmdClearAttachments(handle, attachments.size(), attachments.data(), rects.size(),
+ rects.data());
+ }
+
+ void BlitImage(VkImage src_image, VkImageLayout src_layout, VkImage dst_image,
+ VkImageLayout dst_layout, Span<VkImageBlit> regions, VkFilter filter) const
+ noexcept {
+ dld->vkCmdBlitImage(handle, src_image, src_layout, dst_image, dst_layout, regions.size(),
+ regions.data(), filter);
+ }
+
+ void Dispatch(u32 x, u32 y, u32 z) const noexcept {
+ dld->vkCmdDispatch(handle, x, y, z);
+ }
+
+ void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask,
+ VkDependencyFlags dependency_flags, Span<VkMemoryBarrier> memory_barriers,
+ Span<VkBufferMemoryBarrier> buffer_barriers,
+ Span<VkImageMemoryBarrier> image_barriers) const noexcept {
+ dld->vkCmdPipelineBarrier(handle, src_stage_mask, dst_stage_mask, dependency_flags,
+ memory_barriers.size(), memory_barriers.data(),
+ buffer_barriers.size(), buffer_barriers.data(),
+ image_barriers.size(), image_barriers.data());
+ }
+
+ void CopyBufferToImage(VkBuffer src_buffer, VkImage dst_image, VkImageLayout dst_image_layout,
+ Span<VkBufferImageCopy> regions) const noexcept {
+ dld->vkCmdCopyBufferToImage(handle, src_buffer, dst_image, dst_image_layout, regions.size(),
+ regions.data());
+ }
+
+ void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer, Span<VkBufferCopy> regions) const
+ noexcept {
+ dld->vkCmdCopyBuffer(handle, src_buffer, dst_buffer, regions.size(), regions.data());
+ }
+
+ void CopyImage(VkImage src_image, VkImageLayout src_layout, VkImage dst_image,
+ VkImageLayout dst_layout, Span<VkImageCopy> regions) const noexcept {
+ dld->vkCmdCopyImage(handle, src_image, src_layout, dst_image, dst_layout, regions.size(),
+ regions.data());
+ }
+
+ void CopyImageToBuffer(VkImage src_image, VkImageLayout src_layout, VkBuffer dst_buffer,
+ Span<VkBufferImageCopy> regions) const noexcept {
+ dld->vkCmdCopyImageToBuffer(handle, src_image, src_layout, dst_buffer, regions.size(),
+ regions.data());
+ }
+
+ void FillBuffer(VkBuffer dst_buffer, VkDeviceSize dst_offset, VkDeviceSize size, u32 data) const
+ noexcept {
+ dld->vkCmdFillBuffer(handle, dst_buffer, dst_offset, size, data);
+ }
+
+ void PushConstants(VkPipelineLayout layout, VkShaderStageFlags flags, u32 offset, u32 size,
+ const void* values) const noexcept {
+ dld->vkCmdPushConstants(handle, layout, flags, offset, size, values);
+ }
+
+ void SetCheckpointNV(const void* checkpoint_marker) const noexcept {
+ dld->vkCmdSetCheckpointNV(handle, checkpoint_marker);
+ }
+
+ void SetViewport(u32 first, Span<VkViewport> viewports) const noexcept {
+ dld->vkCmdSetViewport(handle, first, viewports.size(), viewports.data());
+ }
+
+ void SetScissor(u32 first, Span<VkRect2D> scissors) const noexcept {
+ dld->vkCmdSetScissor(handle, first, scissors.size(), scissors.data());
+ }
+
+ void SetBlendConstants(const float blend_constants[4]) const noexcept {
+ dld->vkCmdSetBlendConstants(handle, blend_constants);
+ }
+
+ void SetStencilCompareMask(VkStencilFaceFlags face_mask, u32 compare_mask) const noexcept {
+ dld->vkCmdSetStencilCompareMask(handle, face_mask, compare_mask);
+ }
+
+ void SetStencilReference(VkStencilFaceFlags face_mask, u32 reference) const noexcept {
+ dld->vkCmdSetStencilReference(handle, face_mask, reference);
+ }
+
+ void SetStencilWriteMask(VkStencilFaceFlags face_mask, u32 write_mask) const noexcept {
+ dld->vkCmdSetStencilWriteMask(handle, face_mask, write_mask);
+ }
+
+ void SetDepthBias(float constant_factor, float clamp, float slope_factor) const noexcept {
+ dld->vkCmdSetDepthBias(handle, constant_factor, clamp, slope_factor);
+ }
+
+ void SetDepthBounds(float min_depth_bounds, float max_depth_bounds) const noexcept {
+ dld->vkCmdSetDepthBounds(handle, min_depth_bounds, max_depth_bounds);
+ }
+
+ void BindTransformFeedbackBuffersEXT(u32 first, u32 count, const VkBuffer* buffers,
+ const VkDeviceSize* offsets,
+ const VkDeviceSize* sizes) const noexcept {
+ dld->vkCmdBindTransformFeedbackBuffersEXT(handle, first, count, buffers, offsets, sizes);
+ }
+
+ void BeginTransformFeedbackEXT(u32 first_counter_buffer, u32 counter_buffers_count,
+ const VkBuffer* counter_buffers,
+ const VkDeviceSize* counter_buffer_offsets) const noexcept {
+ dld->vkCmdBeginTransformFeedbackEXT(handle, first_counter_buffer, counter_buffers_count,
+ counter_buffers, counter_buffer_offsets);
+ }
+
+ void EndTransformFeedbackEXT(u32 first_counter_buffer, u32 counter_buffers_count,
+ const VkBuffer* counter_buffers,
+ const VkDeviceSize* counter_buffer_offsets) const noexcept {
+ dld->vkCmdEndTransformFeedbackEXT(handle, first_counter_buffer, counter_buffers_count,
+ counter_buffers, counter_buffer_offsets);
+ }
+
+private:
+ VkCommandBuffer handle;
+ const DeviceDispatch* dld;
+};
+
+std::optional<std::vector<VkExtensionProperties>> EnumerateInstanceExtensionProperties(
+ const InstanceDispatch& dld);
+
} // namespace Vulkan::vk
diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp
index 2fe787d6f..0f4c3103a 100644
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer.cpp
@@ -235,34 +235,30 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
case OpCode::Id::LEA_IMM:
case OpCode::Id::LEA_RZ:
case OpCode::Id::LEA_HI: {
- const auto [op_a, op_b, op_c] = [&]() -> std::tuple<Node, Node, Node> {
+ auto [op_a, op_b, op_c] = [&]() -> std::tuple<Node, Node, Node> {
switch (opcode->get().GetId()) {
case OpCode::Id::LEA_R2: {
return {GetRegister(instr.gpr20), GetRegister(instr.gpr39),
Immediate(static_cast<u32>(instr.lea.r2.entry_a))};
}
-
case OpCode::Id::LEA_R1: {
const bool neg = instr.lea.r1.neg != 0;
return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true),
GetRegister(instr.gpr20),
Immediate(static_cast<u32>(instr.lea.r1.entry_a))};
}
-
case OpCode::Id::LEA_IMM: {
const bool neg = instr.lea.imm.neg != 0;
return {Immediate(static_cast<u32>(instr.lea.imm.entry_a)),
GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true),
Immediate(static_cast<u32>(instr.lea.imm.entry_b))};
}
-
case OpCode::Id::LEA_RZ: {
const bool neg = instr.lea.rz.neg != 0;
return {GetConstBuffer(instr.lea.rz.cb_index, instr.lea.rz.cb_offset),
GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true),
Immediate(static_cast<u32>(instr.lea.rz.entry_a))};
}
-
case OpCode::Id::LEA_HI:
default:
UNIMPLEMENTED_MSG("Unhandled LEA subinstruction: {}", opcode->get().GetName());
@@ -275,12 +271,9 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
UNIMPLEMENTED_IF_MSG(instr.lea.pred48 != static_cast<u64>(Pred::UnusedIndex),
"Unhandled LEA Predicate");
- const Node shifted_c =
- Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, Immediate(1), op_c);
- const Node mul_bc = Operation(OperationCode::IMul, NO_PRECISE, op_b, shifted_c);
- const Node value = Operation(OperationCode::IAdd, NO_PRECISE, op_a, mul_bc);
-
- SetRegister(bb, instr.gpr0, value);
+ Node value = Operation(OperationCode::ILogicalShiftLeft, std::move(op_a), std::move(op_c));
+ value = Operation(OperationCode::IAdd, std::move(op_b), std::move(value));
+ SetRegister(bb, instr.gpr0, std::move(value));
break;
}
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp
index 6ead42070..c72690b2b 100644
--- a/src/video_core/shader/decode/conversion.cpp
+++ b/src/video_core/shader/decode/conversion.cpp
@@ -138,18 +138,23 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a);
- value = [&]() {
+ value = [&] {
+ if (instr.conversion.src_size != instr.conversion.dst_size) {
+ // Rounding operations only matter when the source and destination conversion size
+ // is the same.
+ return value;
+ }
switch (instr.conversion.f2f.GetRoundingMode()) {
case Tegra::Shader::F2fRoundingOp::None:
return value;
case Tegra::Shader::F2fRoundingOp::Round:
- return Operation(OperationCode::FRoundEven, PRECISE, value);
+ return Operation(OperationCode::FRoundEven, value);
case Tegra::Shader::F2fRoundingOp::Floor:
- return Operation(OperationCode::FFloor, PRECISE, value);
+ return Operation(OperationCode::FFloor, value);
case Tegra::Shader::F2fRoundingOp::Ceil:
- return Operation(OperationCode::FCeil, PRECISE, value);
+ return Operation(OperationCode::FCeil, value);
case Tegra::Shader::F2fRoundingOp::Trunc:
- return Operation(OperationCode::FTrunc, PRECISE, value);
+ return Operation(OperationCode::FTrunc, value);
default:
UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}",
static_cast<u32>(instr.conversion.f2f.rounding.Value()));
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index b5fbc4d58..b8f63922f 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -19,7 +19,6 @@ namespace VideoCommon::Shader {
using Tegra::Shader::AtomicOp;
using Tegra::Shader::AtomicType;
using Tegra::Shader::Attribute;
-using Tegra::Shader::GlobalAtomicOp;
using Tegra::Shader::GlobalAtomicType;
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
@@ -28,6 +27,31 @@ using Tegra::Shader::StoreType;
namespace {
+Node GetAtomOperation(AtomicOp op, bool is_signed, Node memory, Node data) {
+ const OperationCode operation_code = [op] {
+ switch (op) {
+ case AtomicOp::Add:
+ return OperationCode::AtomicIAdd;
+ case AtomicOp::Min:
+ return OperationCode::AtomicIMin;
+ case AtomicOp::Max:
+ return OperationCode::AtomicIMax;
+ case AtomicOp::And:
+ return OperationCode::AtomicIAnd;
+ case AtomicOp::Or:
+ return OperationCode::AtomicIOr;
+ case AtomicOp::Xor:
+ return OperationCode::AtomicIXor;
+ case AtomicOp::Exch:
+ return OperationCode::AtomicIExchange;
+ default:
+ UNIMPLEMENTED_MSG("op={}", static_cast<int>(op));
+ return OperationCode::AtomicIAdd;
+ }
+ }();
+ return SignedOperation(operation_code, is_signed, std::move(memory), std::move(data));
+}
+
bool IsUnaligned(Tegra::Shader::UniformType uniform_type) {
return uniform_type == Tegra::Shader::UniformType::UnsignedByte ||
uniform_type == Tegra::Shader::UniformType::UnsignedShort;
@@ -363,10 +387,13 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
break;
}
case OpCode::Id::ATOM: {
- UNIMPLEMENTED_IF_MSG(instr.atom.operation != GlobalAtomicOp::Add, "operation={}",
- static_cast<int>(instr.atom.operation.Value()));
- UNIMPLEMENTED_IF_MSG(instr.atom.type != GlobalAtomicType::S32, "type={}",
- static_cast<int>(instr.atom.type.Value()));
+ UNIMPLEMENTED_IF_MSG(instr.atom.operation == AtomicOp::Inc ||
+ instr.atom.operation == AtomicOp::Dec ||
+ instr.atom.operation == AtomicOp::SafeAdd,
+ "operation={}", static_cast<int>(instr.atom.operation.Value()));
+ UNIMPLEMENTED_IF_MSG(instr.atom.type == GlobalAtomicType::S64 ||
+ instr.atom.type == GlobalAtomicType::U64,
+ "type={}", static_cast<int>(instr.atom.type.Value()));
const auto [real_address, base_address, descriptor] =
TrackGlobalMemory(bb, instr, true, true);
@@ -375,25 +402,29 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
break;
}
+ const bool is_signed =
+ instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64;
Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
- Node value = Operation(OperationCode::AtomicAdd, std::move(gmem), GetRegister(instr.gpr20));
+ Node value = GetAtomOperation(static_cast<AtomicOp>(instr.atom.operation), is_signed, gmem,
+ GetRegister(instr.gpr20));
SetRegister(bb, instr.gpr0, std::move(value));
break;
}
case OpCode::Id::ATOMS: {
- UNIMPLEMENTED_IF_MSG(instr.atoms.operation != AtomicOp::Add, "operation={}",
- static_cast<int>(instr.atoms.operation.Value()));
- UNIMPLEMENTED_IF_MSG(instr.atoms.type != AtomicType::U32, "type={}",
- static_cast<int>(instr.atoms.type.Value()));
-
+ UNIMPLEMENTED_IF_MSG(instr.atoms.operation == AtomicOp::Inc ||
+ instr.atoms.operation == AtomicOp::Dec,
+ "operation={}", static_cast<int>(instr.atoms.operation.Value()));
+ UNIMPLEMENTED_IF_MSG(instr.atoms.type == AtomicType::S64 ||
+ instr.atoms.type == AtomicType::U64,
+ "type={}", static_cast<int>(instr.atoms.type.Value()));
+ const bool is_signed =
+ instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64;
const s32 offset = instr.atoms.GetImmediateOffset();
Node address = GetRegister(instr.gpr8);
address = Operation(OperationCode::IAdd, std::move(address), Immediate(offset));
-
- Node memory = GetSharedMemory(std::move(address));
- Node data = GetRegister(instr.gpr20);
-
- Node value = Operation(OperationCode::AtomicAdd, std::move(memory), std::move(data));
+ Node value =
+ GetAtomOperation(static_cast<AtomicOp>(instr.atoms.operation), is_signed,
+ GetSharedMemory(std::move(address)), GetRegister(instr.gpr20));
SetRegister(bb, instr.gpr0, std::move(value));
break;
}
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index a1828546e..5fcc9da60 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -162,7 +162,21 @@ enum class OperationCode {
AtomicImageXor, /// (MetaImage, int[N] coords) -> void
AtomicImageExchange, /// (MetaImage, int[N] coords) -> void
- AtomicAdd, /// (memory, {u}int) -> {u}int
+ AtomicUExchange, /// (memory, uint) -> uint
+ AtomicUAdd, /// (memory, uint) -> uint
+ AtomicUMin, /// (memory, uint) -> uint
+ AtomicUMax, /// (memory, uint) -> uint
+ AtomicUAnd, /// (memory, uint) -> uint
+ AtomicUOr, /// (memory, uint) -> uint
+ AtomicUXor, /// (memory, uint) -> uint
+
+ AtomicIExchange, /// (memory, int) -> int
+ AtomicIAdd, /// (memory, int) -> int
+ AtomicIMin, /// (memory, int) -> int
+ AtomicIMax, /// (memory, int) -> int
+ AtomicIAnd, /// (memory, int) -> int
+ AtomicIOr, /// (memory, int) -> int
+ AtomicIXor, /// (memory, int) -> int
Branch, /// (uint branch_target) -> void
BranchIndirect, /// (uint branch_target) -> void
diff --git a/src/video_core/shader/node_helper.cpp b/src/video_core/shader/node_helper.cpp
index 76c56abb5..7bf4ff387 100644
--- a/src/video_core/shader/node_helper.cpp
+++ b/src/video_core/shader/node_helper.cpp
@@ -86,6 +86,20 @@ OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed)
return OperationCode::LogicalUNotEqual;
case OperationCode::LogicalIGreaterEqual:
return OperationCode::LogicalUGreaterEqual;
+ case OperationCode::AtomicIExchange:
+ return OperationCode::AtomicUExchange;
+ case OperationCode::AtomicIAdd:
+ return OperationCode::AtomicUAdd;
+ case OperationCode::AtomicIMin:
+ return OperationCode::AtomicUMin;
+ case OperationCode::AtomicIMax:
+ return OperationCode::AtomicUMax;
+ case OperationCode::AtomicIAnd:
+ return OperationCode::AtomicUAnd;
+ case OperationCode::AtomicIOr:
+ return OperationCode::AtomicUOr;
+ case OperationCode::AtomicIXor:
+ return OperationCode::AtomicUXor;
case OperationCode::INegate:
UNREACHABLE_MSG("Can't negate an unsigned integer");
return {};
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp
index a5f81a8a0..f60bdc60a 100644
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -15,13 +15,13 @@
#endif
#include "video_core/video_core.h"
-namespace VideoCore {
-
-std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window,
- Core::System& system) {
+namespace {
+std::unique_ptr<VideoCore::RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window,
+ Core::System& system,
+ Core::Frontend::GraphicsContext& context) {
switch (Settings::values.renderer_backend) {
case Settings::RendererBackend::OpenGL:
- return std::make_unique<OpenGL::RendererOpenGL>(emu_window, system);
+ return std::make_unique<OpenGL::RendererOpenGL>(emu_window, system, context);
#ifdef HAS_VULKAN
case Settings::RendererBackend::Vulkan:
return std::make_unique<Vulkan::RendererVulkan>(emu_window, system);
@@ -30,13 +30,23 @@ std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_wind
return nullptr;
}
}
+} // Anonymous namespace
-std::unique_ptr<Tegra::GPU> CreateGPU(Core::System& system) {
- if (Settings::values.use_asynchronous_gpu_emulation) {
- return std::make_unique<VideoCommon::GPUAsynch>(system, system.Renderer());
+namespace VideoCore {
+
+std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) {
+ auto context = emu_window.CreateSharedContext();
+ const auto scope = context->Acquire();
+ auto renderer = CreateRenderer(emu_window, system, *context);
+ if (!renderer->Init()) {
+ return nullptr;
}
- return std::make_unique<VideoCommon::GPUSynch>(system, system.Renderer());
+ if (Settings::values.use_asynchronous_gpu_emulation) {
+ return std::make_unique<VideoCommon::GPUAsynch>(system, std::move(renderer),
+ std::move(context));
+ }
+ return std::make_unique<VideoCommon::GPUSynch>(system, std::move(renderer), std::move(context));
}
u16 GetResolutionScaleFactor(const RendererBase& renderer) {
diff --git a/src/video_core/video_core.h b/src/video_core/video_core.h
index b8e0ac372..f5c27125d 100644
--- a/src/video_core/video_core.h
+++ b/src/video_core/video_core.h
@@ -22,17 +22,8 @@ namespace VideoCore {
class RendererBase;
-/**
- * Creates a renderer instance.
- *
- * @note The returned renderer instance is simply allocated. Its Init()
- * function still needs to be called to fully complete its setup.
- */
-std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window,
- Core::System& system);
-
/// Creates an emulated GPU instance using the given system context.
-std::unique_ptr<Tegra::GPU> CreateGPU(Core::System& system);
+std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system);
u16 GetResolutionScaleFactor(const RendererBase& renderer);