summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/engines/maxwell_3d.cpp12
-rw-r--r--src/video_core/engines/maxwell_3d.h2
-rw-r--r--src/video_core/engines/shader_bytecode.h9
-rw-r--r--src/video_core/gpu.cpp12
-rw-r--r--src/video_core/gpu.h9
-rw-r--r--src/video_core/gpu_asynch.cpp5
-rw-r--r--src/video_core/gpu_asynch.h5
-rw-r--r--src/video_core/gpu_synch.cpp5
-rw-r--r--src/video_core/gpu_synch.h5
-rw-r--r--src/video_core/gpu_thread.cpp8
-rw-r--r--src/video_core/gpu_thread.h3
-rw-r--r--src/video_core/morton.cpp116
-rw-r--r--src/video_core/morton.h3
-rw-r--r--src/video_core/renderer_base.h3
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp43
-rw-r--r--src/video_core/renderer_opengl/gl_device.h6
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp930
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp89
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h5
-rw-r--r--src/video_core/shader/decode/conversion.cpp22
-rw-r--r--src/video_core/shader/decode/float_set_predicate.cpp9
-rw-r--r--src/video_core/surface.cpp5
22 files changed, 651 insertions, 655 deletions
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 6a5a4f5c4..f5158d219 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -249,16 +249,10 @@ void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) {
executing_macro = 0;
// Lookup the macro offset
- const u32 entry{(method - MacroRegistersStart) >> 1};
- const auto& search{macro_offsets.find(entry)};
- if (search == macro_offsets.end()) {
- LOG_CRITICAL(HW_GPU, "macro not found for method 0x{:X}!", method);
- UNREACHABLE();
- return;
- }
+ const u32 entry = ((method - MacroRegistersStart) >> 1) % macro_positions.size();
// Execute the current macro.
- macro_interpreter.Execute(search->second, std::move(parameters));
+ macro_interpreter.Execute(macro_positions[entry], std::move(parameters));
}
void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
@@ -421,7 +415,7 @@ void Maxwell3D::ProcessMacroUpload(u32 data) {
}
void Maxwell3D::ProcessMacroBind(u32 data) {
- macro_offsets[regs.macros.entry] = data;
+ macro_positions[regs.macros.entry++] = data;
}
void Maxwell3D::ProcessQueryGet() {
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 1ee982b76..0184342a0 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -1270,7 +1270,7 @@ private:
MemoryManager& memory_manager;
/// Start offsets of each macro in macro_memory
- std::unordered_map<u32, u32> macro_offsets;
+ std::array<u32, 0x80> macro_positions = {};
/// Memory for macro code
MacroMemory macro_memory;
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index bc8c2a1c5..c3678b9ea 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -886,6 +886,7 @@ union Instruction {
union {
BitField<0, 3, u64> pred0;
BitField<3, 3, u64> pred3;
+ BitField<6, 1, u64> neg_b;
BitField<7, 1, u64> abs_a;
BitField<39, 3, u64> pred39;
BitField<42, 1, u64> neg_pred;
@@ -1019,7 +1020,6 @@ union Instruction {
} iset;
union {
- BitField<41, 2, u64> selector; // i2i and i2f only
BitField<45, 1, u64> negate_a;
BitField<49, 1, u64> abs_a;
BitField<10, 2, Register::Size> src_size;
@@ -1045,6 +1045,13 @@ union Instruction {
}
} f2f;
+ union {
+ BitField<41, 2, u64> selector;
+ } int_src;
+
+ union {
+ BitField<41, 1, u64> selector;
+ } float_src;
} conversion;
union {
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 8d9db45f5..2c47541cb 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -17,18 +17,6 @@
namespace Tegra {
-u32 FramebufferConfig::BytesPerPixel(PixelFormat format) {
- switch (format) {
- case PixelFormat::ABGR8:
- case PixelFormat::BGRA8:
- return 4;
- default:
- return 4;
- }
-
- UNREACHABLE();
-}
-
GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async)
: system{system}, renderer{renderer}, is_async{is_async} {
auto& rasterizer{renderer.Rasterizer()};
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 544340ecd..78bc0601a 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -95,14 +95,10 @@ class DebugContext;
struct FramebufferConfig {
enum class PixelFormat : u32 {
ABGR8 = 1,
+ RGB565 = 4,
BGRA8 = 5,
};
- /**
- * Returns the number of bytes per pixel.
- */
- static u32 BytesPerPixel(PixelFormat format);
-
VAddr address;
u32 offset;
u32 width;
@@ -253,8 +249,7 @@ public:
virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0;
/// Swap buffers (render frame)
- virtual void SwapBuffers(
- std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) = 0;
+ virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0;
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
virtual void FlushRegion(CacheAddr addr, u64 size) = 0;
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp
index ea67be831..f2a3a390e 100644
--- a/src/video_core/gpu_asynch.cpp
+++ b/src/video_core/gpu_asynch.cpp
@@ -23,9 +23,8 @@ void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) {
gpu_thread.SubmitList(std::move(entries));
}
-void GPUAsynch::SwapBuffers(
- std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
- gpu_thread.SwapBuffers(std::move(framebuffer));
+void GPUAsynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
+ gpu_thread.SwapBuffers(framebuffer);
}
void GPUAsynch::FlushRegion(CacheAddr addr, u64 size) {
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h
index 36377d677..a12f9bac4 100644
--- a/src/video_core/gpu_asynch.h
+++ b/src/video_core/gpu_asynch.h
@@ -14,15 +14,14 @@ class RendererBase;
namespace VideoCommon {
/// Implementation of GPU interface that runs the GPU asynchronously
-class GPUAsynch : public Tegra::GPU {
+class GPUAsynch final : public Tegra::GPU {
public:
explicit GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer);
~GPUAsynch() override;
void Start() override;
void PushGPUEntries(Tegra::CommandList&& entries) override;
- void SwapBuffers(
- std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
+ void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
void FlushRegion(CacheAddr addr, u64 size) override;
void InvalidateRegion(CacheAddr addr, u64 size) override;
void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp
index d4ead9c47..d48221077 100644
--- a/src/video_core/gpu_synch.cpp
+++ b/src/video_core/gpu_synch.cpp
@@ -19,9 +19,8 @@ void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) {
dma_pusher->DispatchCalls();
}
-void GPUSynch::SwapBuffers(
- std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
- renderer.SwapBuffers(std::move(framebuffer));
+void GPUSynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
+ renderer.SwapBuffers(framebuffer);
}
void GPUSynch::FlushRegion(CacheAddr addr, u64 size) {
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h
index 07bcc47f1..5eb1c461c 100644
--- a/src/video_core/gpu_synch.h
+++ b/src/video_core/gpu_synch.h
@@ -13,15 +13,14 @@ class RendererBase;
namespace VideoCommon {
/// Implementation of GPU interface that runs the GPU synchronously
-class GPUSynch : public Tegra::GPU {
+class GPUSynch final : public Tegra::GPU {
public:
explicit GPUSynch(Core::System& system, VideoCore::RendererBase& renderer);
~GPUSynch() override;
void Start() override;
void PushGPUEntries(Tegra::CommandList&& entries) override;
- void SwapBuffers(
- std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
+ void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
void FlushRegion(CacheAddr addr, u64 size) override;
void InvalidateRegion(CacheAddr addr, u64 size) override;
void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index b441e92b0..5f039e4fd 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -39,7 +39,7 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p
dma_pusher.Push(std::move(submit_list->entries));
dma_pusher.DispatchCalls();
} else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) {
- renderer.SwapBuffers(std::move(data->framebuffer));
+ renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr);
} else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) {
renderer.Rasterizer().FlushRegion(data->addr, data->size);
} else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) {
@@ -78,9 +78,9 @@ void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
system.CoreTiming().ScheduleEvent(synchronization_ticks, synchronization_event, fence);
}
-void ThreadManager::SwapBuffers(
- std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
- PushCommand(SwapBuffersCommand(std::move(framebuffer)));
+void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
+ PushCommand(SwapBuffersCommand(framebuffer ? *framebuffer
+ : std::optional<const Tegra::FramebufferConfig>{}));
}
void ThreadManager::FlushRegion(CacheAddr addr, u64 size) {
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index 1d9d0c39e..3ae0ec9f3 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -110,8 +110,7 @@ public:
void SubmitList(Tegra::CommandList&& entries);
/// Swap buffers (render frame)
- void SwapBuffers(
- std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer);
+ void SwapBuffers(const Tegra::FramebufferConfig* framebuffer);
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
void FlushRegion(CacheAddr addr, u64 size);
diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp
index 3e91cbc83..084f85e67 100644
--- a/src/video_core/morton.cpp
+++ b/src/video_core/morton.cpp
@@ -25,8 +25,8 @@ static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth
// With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual
// pixel values.
- const u32 tile_size_x{GetDefaultBlockWidth(format)};
- const u32 tile_size_y{GetDefaultBlockHeight(format)};
+ constexpr u32 tile_size_x{GetDefaultBlockWidth(format)};
+ constexpr u32 tile_size_y{GetDefaultBlockHeight(format)};
if constexpr (morton_to_linear) {
Tegra::Texture::UnswizzleTexture(buffer, addr, tile_size_x, tile_size_y, bytes_per_pixel,
@@ -186,99 +186,6 @@ static MortonCopyFn GetSwizzleFunction(MortonSwizzleMode mode, Surface::PixelFor
return morton_to_linear_fns[static_cast<std::size_t>(format)];
}
-static u32 MortonInterleave128(u32 x, u32 y) {
- // 128x128 Z-Order coordinate from 2D coordinates
- static constexpr u32 xlut[] = {
- 0x0000, 0x0001, 0x0002, 0x0003, 0x0008, 0x0009, 0x000a, 0x000b, 0x0040, 0x0041, 0x0042,
- 0x0043, 0x0048, 0x0049, 0x004a, 0x004b, 0x0800, 0x0801, 0x0802, 0x0803, 0x0808, 0x0809,
- 0x080a, 0x080b, 0x0840, 0x0841, 0x0842, 0x0843, 0x0848, 0x0849, 0x084a, 0x084b, 0x1000,
- 0x1001, 0x1002, 0x1003, 0x1008, 0x1009, 0x100a, 0x100b, 0x1040, 0x1041, 0x1042, 0x1043,
- 0x1048, 0x1049, 0x104a, 0x104b, 0x1800, 0x1801, 0x1802, 0x1803, 0x1808, 0x1809, 0x180a,
- 0x180b, 0x1840, 0x1841, 0x1842, 0x1843, 0x1848, 0x1849, 0x184a, 0x184b, 0x2000, 0x2001,
- 0x2002, 0x2003, 0x2008, 0x2009, 0x200a, 0x200b, 0x2040, 0x2041, 0x2042, 0x2043, 0x2048,
- 0x2049, 0x204a, 0x204b, 0x2800, 0x2801, 0x2802, 0x2803, 0x2808, 0x2809, 0x280a, 0x280b,
- 0x2840, 0x2841, 0x2842, 0x2843, 0x2848, 0x2849, 0x284a, 0x284b, 0x3000, 0x3001, 0x3002,
- 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x3040, 0x3041, 0x3042, 0x3043, 0x3048, 0x3049,
- 0x304a, 0x304b, 0x3800, 0x3801, 0x3802, 0x3803, 0x3808, 0x3809, 0x380a, 0x380b, 0x3840,
- 0x3841, 0x3842, 0x3843, 0x3848, 0x3849, 0x384a, 0x384b, 0x0000, 0x0001, 0x0002, 0x0003,
- 0x0008, 0x0009, 0x000a, 0x000b, 0x0040, 0x0041, 0x0042, 0x0043, 0x0048, 0x0049, 0x004a,
- 0x004b, 0x0800, 0x0801, 0x0802, 0x0803, 0x0808, 0x0809, 0x080a, 0x080b, 0x0840, 0x0841,
- 0x0842, 0x0843, 0x0848, 0x0849, 0x084a, 0x084b, 0x1000, 0x1001, 0x1002, 0x1003, 0x1008,
- 0x1009, 0x100a, 0x100b, 0x1040, 0x1041, 0x1042, 0x1043, 0x1048, 0x1049, 0x104a, 0x104b,
- 0x1800, 0x1801, 0x1802, 0x1803, 0x1808, 0x1809, 0x180a, 0x180b, 0x1840, 0x1841, 0x1842,
- 0x1843, 0x1848, 0x1849, 0x184a, 0x184b, 0x2000, 0x2001, 0x2002, 0x2003, 0x2008, 0x2009,
- 0x200a, 0x200b, 0x2040, 0x2041, 0x2042, 0x2043, 0x2048, 0x2049, 0x204a, 0x204b, 0x2800,
- 0x2801, 0x2802, 0x2803, 0x2808, 0x2809, 0x280a, 0x280b, 0x2840, 0x2841, 0x2842, 0x2843,
- 0x2848, 0x2849, 0x284a, 0x284b, 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a,
- 0x300b, 0x3040, 0x3041, 0x3042, 0x3043, 0x3048, 0x3049, 0x304a, 0x304b, 0x3800, 0x3801,
- 0x3802, 0x3803, 0x3808, 0x3809, 0x380a, 0x380b, 0x3840, 0x3841, 0x3842, 0x3843, 0x3848,
- 0x3849, 0x384a, 0x384b, 0x0000, 0x0001, 0x0002, 0x0003, 0x0008, 0x0009, 0x000a, 0x000b,
- 0x0040, 0x0041, 0x0042, 0x0043, 0x0048, 0x0049, 0x004a, 0x004b, 0x0800, 0x0801, 0x0802,
- 0x0803, 0x0808, 0x0809, 0x080a, 0x080b, 0x0840, 0x0841, 0x0842, 0x0843, 0x0848, 0x0849,
- 0x084a, 0x084b, 0x1000, 0x1001, 0x1002, 0x1003, 0x1008, 0x1009, 0x100a, 0x100b, 0x1040,
- 0x1041, 0x1042, 0x1043, 0x1048, 0x1049, 0x104a, 0x104b, 0x1800, 0x1801, 0x1802, 0x1803,
- 0x1808, 0x1809, 0x180a, 0x180b, 0x1840, 0x1841, 0x1842, 0x1843, 0x1848, 0x1849, 0x184a,
- 0x184b, 0x2000, 0x2001, 0x2002, 0x2003, 0x2008, 0x2009, 0x200a, 0x200b, 0x2040, 0x2041,
- 0x2042, 0x2043, 0x2048, 0x2049, 0x204a, 0x204b, 0x2800, 0x2801, 0x2802, 0x2803, 0x2808,
- 0x2809, 0x280a, 0x280b, 0x2840, 0x2841, 0x2842, 0x2843, 0x2848, 0x2849, 0x284a, 0x284b,
- 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x3040, 0x3041, 0x3042,
- 0x3043, 0x3048, 0x3049, 0x304a, 0x304b, 0x3800, 0x3801, 0x3802, 0x3803, 0x3808, 0x3809,
- 0x380a, 0x380b, 0x3840, 0x3841, 0x3842, 0x3843, 0x3848, 0x3849, 0x384a, 0x384b,
- };
- static constexpr u32 ylut[] = {
- 0x0000, 0x0004, 0x0010, 0x0014, 0x0020, 0x0024, 0x0030, 0x0034, 0x0080, 0x0084, 0x0090,
- 0x0094, 0x00a0, 0x00a4, 0x00b0, 0x00b4, 0x0100, 0x0104, 0x0110, 0x0114, 0x0120, 0x0124,
- 0x0130, 0x0134, 0x0180, 0x0184, 0x0190, 0x0194, 0x01a0, 0x01a4, 0x01b0, 0x01b4, 0x0200,
- 0x0204, 0x0210, 0x0214, 0x0220, 0x0224, 0x0230, 0x0234, 0x0280, 0x0284, 0x0290, 0x0294,
- 0x02a0, 0x02a4, 0x02b0, 0x02b4, 0x0300, 0x0304, 0x0310, 0x0314, 0x0320, 0x0324, 0x0330,
- 0x0334, 0x0380, 0x0384, 0x0390, 0x0394, 0x03a0, 0x03a4, 0x03b0, 0x03b4, 0x0400, 0x0404,
- 0x0410, 0x0414, 0x0420, 0x0424, 0x0430, 0x0434, 0x0480, 0x0484, 0x0490, 0x0494, 0x04a0,
- 0x04a4, 0x04b0, 0x04b4, 0x0500, 0x0504, 0x0510, 0x0514, 0x0520, 0x0524, 0x0530, 0x0534,
- 0x0580, 0x0584, 0x0590, 0x0594, 0x05a0, 0x05a4, 0x05b0, 0x05b4, 0x0600, 0x0604, 0x0610,
- 0x0614, 0x0620, 0x0624, 0x0630, 0x0634, 0x0680, 0x0684, 0x0690, 0x0694, 0x06a0, 0x06a4,
- 0x06b0, 0x06b4, 0x0700, 0x0704, 0x0710, 0x0714, 0x0720, 0x0724, 0x0730, 0x0734, 0x0780,
- 0x0784, 0x0790, 0x0794, 0x07a0, 0x07a4, 0x07b0, 0x07b4, 0x0000, 0x0004, 0x0010, 0x0014,
- 0x0020, 0x0024, 0x0030, 0x0034, 0x0080, 0x0084, 0x0090, 0x0094, 0x00a0, 0x00a4, 0x00b0,
- 0x00b4, 0x0100, 0x0104, 0x0110, 0x0114, 0x0120, 0x0124, 0x0130, 0x0134, 0x0180, 0x0184,
- 0x0190, 0x0194, 0x01a0, 0x01a4, 0x01b0, 0x01b4, 0x0200, 0x0204, 0x0210, 0x0214, 0x0220,
- 0x0224, 0x0230, 0x0234, 0x0280, 0x0284, 0x0290, 0x0294, 0x02a0, 0x02a4, 0x02b0, 0x02b4,
- 0x0300, 0x0304, 0x0310, 0x0314, 0x0320, 0x0324, 0x0330, 0x0334, 0x0380, 0x0384, 0x0390,
- 0x0394, 0x03a0, 0x03a4, 0x03b0, 0x03b4, 0x0400, 0x0404, 0x0410, 0x0414, 0x0420, 0x0424,
- 0x0430, 0x0434, 0x0480, 0x0484, 0x0490, 0x0494, 0x04a0, 0x04a4, 0x04b0, 0x04b4, 0x0500,
- 0x0504, 0x0510, 0x0514, 0x0520, 0x0524, 0x0530, 0x0534, 0x0580, 0x0584, 0x0590, 0x0594,
- 0x05a0, 0x05a4, 0x05b0, 0x05b4, 0x0600, 0x0604, 0x0610, 0x0614, 0x0620, 0x0624, 0x0630,
- 0x0634, 0x0680, 0x0684, 0x0690, 0x0694, 0x06a0, 0x06a4, 0x06b0, 0x06b4, 0x0700, 0x0704,
- 0x0710, 0x0714, 0x0720, 0x0724, 0x0730, 0x0734, 0x0780, 0x0784, 0x0790, 0x0794, 0x07a0,
- 0x07a4, 0x07b0, 0x07b4, 0x0000, 0x0004, 0x0010, 0x0014, 0x0020, 0x0024, 0x0030, 0x0034,
- 0x0080, 0x0084, 0x0090, 0x0094, 0x00a0, 0x00a4, 0x00b0, 0x00b4, 0x0100, 0x0104, 0x0110,
- 0x0114, 0x0120, 0x0124, 0x0130, 0x0134, 0x0180, 0x0184, 0x0190, 0x0194, 0x01a0, 0x01a4,
- 0x01b0, 0x01b4, 0x0200, 0x0204, 0x0210, 0x0214, 0x0220, 0x0224, 0x0230, 0x0234, 0x0280,
- 0x0284, 0x0290, 0x0294, 0x02a0, 0x02a4, 0x02b0, 0x02b4, 0x0300, 0x0304, 0x0310, 0x0314,
- 0x0320, 0x0324, 0x0330, 0x0334, 0x0380, 0x0384, 0x0390, 0x0394, 0x03a0, 0x03a4, 0x03b0,
- 0x03b4, 0x0400, 0x0404, 0x0410, 0x0414, 0x0420, 0x0424, 0x0430, 0x0434, 0x0480, 0x0484,
- 0x0490, 0x0494, 0x04a0, 0x04a4, 0x04b0, 0x04b4, 0x0500, 0x0504, 0x0510, 0x0514, 0x0520,
- 0x0524, 0x0530, 0x0534, 0x0580, 0x0584, 0x0590, 0x0594, 0x05a0, 0x05a4, 0x05b0, 0x05b4,
- 0x0600, 0x0604, 0x0610, 0x0614, 0x0620, 0x0624, 0x0630, 0x0634, 0x0680, 0x0684, 0x0690,
- 0x0694, 0x06a0, 0x06a4, 0x06b0, 0x06b4, 0x0700, 0x0704, 0x0710, 0x0714, 0x0720, 0x0724,
- 0x0730, 0x0734, 0x0780, 0x0784, 0x0790, 0x0794, 0x07a0, 0x07a4, 0x07b0, 0x07b4,
- };
- return xlut[x % 128] + ylut[y % 128];
-}
-
-static u32 GetMortonOffset128(u32 x, u32 y, u32 bytes_per_pixel) {
- // Calculates the offset of the position of the pixel in Morton order
- // Framebuffer images are split into 128x128 tiles.
-
- constexpr u32 block_height = 128;
- const u32 coarse_x = x & ~127;
-
- const u32 i = MortonInterleave128(x, y);
-
- const u32 offset = coarse_x * block_height;
-
- return (i + offset) * bytes_per_pixel;
-}
-
void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stride,
u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing,
u8* buffer, u8* addr) {
@@ -286,23 +193,4 @@ void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stri
tile_width_spacing, buffer, addr);
}
-void MortonCopyPixels128(MortonSwizzleMode mode, u32 width, u32 height, u32 bytes_per_pixel,
- u32 linear_bytes_per_pixel, u8* morton_data, u8* linear_data) {
- const bool morton_to_linear = mode == MortonSwizzleMode::MortonToLinear;
- u8* data_ptrs[2];
- for (u32 y = 0; y < height; ++y) {
- for (u32 x = 0; x < width; ++x) {
- const u32 coarse_y = y & ~127;
- const u32 morton_offset =
- GetMortonOffset128(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel;
- const u32 linear_pixel_index = (x + y * width) * linear_bytes_per_pixel;
-
- data_ptrs[morton_to_linear ? 1 : 0] = morton_data + morton_offset;
- data_ptrs[morton_to_linear ? 0 : 1] = &linear_data[linear_pixel_index];
-
- std::memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel);
- }
- }
-}
-
} // namespace VideoCore
diff --git a/src/video_core/morton.h b/src/video_core/morton.h
index ee5b45555..b714a7e3f 100644
--- a/src/video_core/morton.h
+++ b/src/video_core/morton.h
@@ -15,7 +15,4 @@ void MortonSwizzle(MortonSwizzleMode mode, VideoCore::Surface::PixelFormat forma
u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing,
u8* buffer, u8* addr);
-void MortonCopyPixels128(MortonSwizzleMode mode, u32 width, u32 height, u32 bytes_per_pixel,
- u32 linear_bytes_per_pixel, u8* morton_data, u8* linear_data);
-
} // namespace VideoCore
diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h
index 1d54c3723..af1bebc4f 100644
--- a/src/video_core/renderer_base.h
+++ b/src/video_core/renderer_base.h
@@ -36,8 +36,7 @@ public:
virtual ~RendererBase();
/// Swap buffers (render frame)
- virtual void SwapBuffers(
- std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) = 0;
+ virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0;
/// Initialize the renderer
virtual bool Init() = 0;
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 03d434b28..4f59a87b4 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -14,12 +14,22 @@
namespace OpenGL {
namespace {
+
template <typename T>
T GetInteger(GLenum pname) {
GLint temporary;
glGetIntegerv(pname, &temporary);
return static_cast<T>(temporary);
}
+
+bool TestProgram(const GLchar* glsl) {
+ const GLuint shader{glCreateShaderProgramv(GL_VERTEX_SHADER, 1, &glsl)};
+ GLint link_status;
+ glGetProgramiv(shader, GL_LINK_STATUS, &link_status);
+ glDeleteProgram(shader);
+ return link_status == GL_TRUE;
+}
+
} // Anonymous namespace
Device::Device() {
@@ -32,6 +42,11 @@ Device::Device() {
has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array;
has_variable_aoffi = TestVariableAoffi();
has_component_indexing_bug = TestComponentIndexingBug();
+ has_precise_bug = TestPreciseBug();
+
+ LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi);
+ LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug);
+ LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug);
}
Device::Device(std::nullptr_t) {
@@ -42,30 +57,21 @@ Device::Device(std::nullptr_t) {
has_vertex_viewport_layer = true;
has_variable_aoffi = true;
has_component_indexing_bug = false;
+ has_precise_bug = false;
}
bool Device::TestVariableAoffi() {
- const GLchar* AOFFI_TEST = R"(#version 430 core
+ return TestProgram(R"(#version 430 core
// This is a unit test, please ignore me on apitrace bug reports.
uniform sampler2D tex;
uniform ivec2 variable_offset;
out vec4 output_attribute;
void main() {
output_attribute = textureOffset(tex, vec2(0), variable_offset);
-}
-)";
- const GLuint shader{glCreateShaderProgramv(GL_VERTEX_SHADER, 1, &AOFFI_TEST)};
- GLint link_status{};
- glGetProgramiv(shader, GL_LINK_STATUS, &link_status);
- glDeleteProgram(shader);
-
- const bool supported{link_status == GL_TRUE};
- LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", supported);
- return supported;
+})");
}
bool Device::TestComponentIndexingBug() {
- constexpr char log_message[] = "Renderer_ComponentIndexingBug: {}";
const GLchar* COMPONENT_TEST = R"(#version 430 core
layout (std430, binding = 0) buffer OutputBuffer {
uint output_value;
@@ -105,12 +111,21 @@ void main() {
GLuint result;
glGetNamedBufferSubData(ssbo.handle, 0, sizeof(result), &result);
if (result != values.at(index)) {
- LOG_INFO(Render_OpenGL, log_message, true);
return true;
}
}
- LOG_INFO(Render_OpenGL, log_message, false);
return false;
}
+bool Device::TestPreciseBug() {
+ return !TestProgram(R"(#version 430 core
+in vec3 coords;
+out float out_value;
+uniform sampler2DShadow tex;
+void main() {
+ precise float tmp_value = vec4(texture(tex, coords)).x;
+ out_value = tmp_value;
+})");
+}
+
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index 3ef7c6dd8..ba6dcd3be 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -46,9 +46,14 @@ public:
return has_component_indexing_bug;
}
+ bool HasPreciseBug() const {
+ return has_precise_bug;
+ }
+
private:
static bool TestVariableAoffi();
static bool TestComponentIndexingBug();
+ static bool TestPreciseBug();
std::size_t uniform_buffer_alignment{};
std::size_t shader_storage_alignment{};
@@ -58,6 +63,7 @@ private:
bool has_vertex_viewport_layer{};
bool has_variable_aoffi{};
bool has_component_indexing_bug{};
+ bool has_precise_bug{};
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 359d58cbe..a5cc1a86f 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -39,7 +39,7 @@ using namespace VideoCommon::Shader;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using Operation = const OperationNode&;
-enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat };
+enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat };
struct TextureAoffi {};
using TextureArgument = std::pair<Type, Node>;
@@ -48,7 +48,7 @@ using TextureIR = std::variant<TextureAoffi, TextureArgument>;
constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
static_cast<u32>(Maxwell::MaxConstBufferSize) / (4 * sizeof(float));
-class ShaderWriter {
+class ShaderWriter final {
public:
void AddExpression(std::string_view text) {
DEBUG_ASSERT(scope >= 0);
@@ -93,9 +93,157 @@ private:
u32 temporary_index = 1;
};
+class Expression final {
+public:
+ Expression(std::string code, Type type) : code{std::move(code)}, type{type} {
+ ASSERT(type != Type::Void);
+ }
+ Expression() : type{Type::Void} {}
+
+ Type GetType() const {
+ return type;
+ }
+
+ std::string GetCode() const {
+ return code;
+ }
+
+ void CheckVoid() const {
+ ASSERT(type == Type::Void);
+ }
+
+ std::string As(Type type) const {
+ switch (type) {
+ case Type::Bool:
+ return AsBool();
+ case Type::Bool2:
+ return AsBool2();
+ case Type::Float:
+ return AsFloat();
+ case Type::Int:
+ return AsInt();
+ case Type::Uint:
+ return AsUint();
+ case Type::HalfFloat:
+ return AsHalfFloat();
+ default:
+ UNREACHABLE_MSG("Invalid type");
+ return code;
+ }
+ }
+
+ std::string AsBool() const {
+ switch (type) {
+ case Type::Bool:
+ return code;
+ default:
+ UNREACHABLE_MSG("Incompatible types");
+ return code;
+ }
+ }
+
+ std::string AsBool2() const {
+ switch (type) {
+ case Type::Bool2:
+ return code;
+ default:
+ UNREACHABLE_MSG("Incompatible types");
+ return code;
+ }
+ }
+
+ std::string AsFloat() const {
+ switch (type) {
+ case Type::Float:
+ return code;
+ case Type::Uint:
+ return fmt::format("utof({})", code);
+ case Type::Int:
+ return fmt::format("itof({})", code);
+ case Type::HalfFloat:
+ return fmt::format("utof(packHalf2x16({}))", code);
+ default:
+ UNREACHABLE_MSG("Incompatible types");
+ return code;
+ }
+ }
+
+ std::string AsInt() const {
+ switch (type) {
+ case Type::Float:
+ return fmt::format("ftoi({})", code);
+ case Type::Uint:
+ return fmt::format("int({})", code);
+ case Type::Int:
+ return code;
+ case Type::HalfFloat:
+ return fmt::format("int(packHalf2x16({}))", code);
+ default:
+ UNREACHABLE_MSG("Incompatible types");
+ return code;
+ }
+ }
+
+ std::string AsUint() const {
+ switch (type) {
+ case Type::Float:
+ return fmt::format("ftou({})", code);
+ case Type::Uint:
+ return code;
+ case Type::Int:
+ return fmt::format("uint({})", code);
+ case Type::HalfFloat:
+ return fmt::format("packHalf2x16({})", code);
+ default:
+ UNREACHABLE_MSG("Incompatible types");
+ return code;
+ }
+ }
+
+ std::string AsHalfFloat() const {
+ switch (type) {
+ case Type::Float:
+ return fmt::format("unpackHalf2x16(ftou({}))", code);
+ case Type::Uint:
+ return fmt::format("unpackHalf2x16({})", code);
+ case Type::Int:
+ return fmt::format("unpackHalf2x16(int({}))", code);
+ case Type::HalfFloat:
+ return code;
+ default:
+ UNREACHABLE_MSG("Incompatible types");
+ return code;
+ }
+ }
+
+private:
+ std::string code;
+ Type type{};
+};
+
+constexpr const char* GetTypeString(Type type) {
+ switch (type) {
+ case Type::Bool:
+ return "bool";
+ case Type::Bool2:
+ return "bvec2";
+ case Type::Float:
+ return "float";
+ case Type::Int:
+ return "int";
+ case Type::Uint:
+ return "uint";
+ case Type::HalfFloat:
+ return "vec2";
+ default:
+ UNREACHABLE_MSG("Invalid type");
+ return "<invalid type>";
+ }
+}
+
/// Generates code to use for a swizzle operation.
constexpr const char* GetSwizzle(u32 element) {
- constexpr std::array<const char*, 4> swizzle = {".x", ".y", ".z", ".w"};
+ constexpr std::array swizzle = {".x", ".y", ".z", ".w"};
return swizzle.at(element);
}
@@ -134,8 +282,8 @@ constexpr bool IsGenericAttribute(Attribute::Index index) {
return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31;
}
-constexpr Attribute::Index ToGenericAttribute(u32 value) {
- return static_cast<Attribute::Index>(value + static_cast<u32>(Attribute::Index::Attribute_0));
+constexpr Attribute::Index ToGenericAttribute(u64 value) {
+ return static_cast<Attribute::Index>(value + static_cast<u64>(Attribute::Index::Attribute_0));
}
u32 GetGenericAttributeIndex(Attribute::Index index) {
@@ -191,7 +339,7 @@ public:
// VM's program counter
const auto first_address = ir.GetBasicBlocks().begin()->first;
- code.AddLine("uint jmp_to = {}u;", first_address);
+ code.AddLine("uint jmp_to = {}U;", first_address);
// TODO(Subv): Figure out the actual depth of the flow stack, for now it seems
// unlikely that shaders will use 20 nested SSYs and PBKs.
@@ -199,7 +347,7 @@ public:
constexpr u32 FLOW_STACK_SIZE = 20;
for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) {
code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE);
- code.AddLine("uint {} = 0u;", FlowStackTopName(stack));
+ code.AddLine("uint {} = 0U;", FlowStackTopName(stack));
}
}
@@ -210,7 +358,7 @@ public:
for (const auto& pair : ir.GetBasicBlocks()) {
const auto [address, bb] = pair;
- code.AddLine("case 0x{:x}u: {{", address);
+ code.AddLine("case 0x{:X}U: {{", address);
++code.scope;
VisitBlock(bb);
@@ -322,7 +470,7 @@ private:
void DeclareRegisters() {
const auto& registers = ir.GetRegisters();
for (const u32 gpr : registers) {
- code.AddLine("float {} = 0;", GetRegister(gpr));
+ code.AddLine("float {} = 0.0f;", GetRegister(gpr));
}
if (!registers.empty()) {
code.AddNewLine();
@@ -348,7 +496,7 @@ private:
return;
}
const auto element_count = Common::AlignUp(local_memory_size, 4) / 4;
- code.AddLine("float {}[{}];", GetLocalMemory(), element_count);
+ code.AddLine("uint {}[{}];", GetLocalMemory(), element_count);
code.AddNewLine();
}
@@ -371,8 +519,6 @@ private:
return "noperspective ";
default:
case AttributeUse::Unused:
- UNREACHABLE_MSG("Unused attribute being fetched");
- return {};
UNIMPLEMENTED_MSG("Unknown attribute usage index={}", static_cast<u32>(attribute));
return {};
}
@@ -449,7 +595,7 @@ private:
const auto [index, size] = entry;
code.AddLine("layout (std140, binding = CBUF_BINDING_{}) uniform {} {{", index,
GetConstBufferBlock(index));
- code.AddLine(" vec4 {}[MAX_CONSTBUFFER_ELEMENTS];", GetConstBuffer(index));
+ code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), MAX_CONSTBUFFER_ELEMENTS);
code.AddLine("}};");
code.AddNewLine();
}
@@ -470,7 +616,7 @@ private:
code.AddLine("layout (std430, binding = GMEM_BINDING_{}_{}) {} buffer {} {{",
base.cbuf_index, base.cbuf_offset, qualifier, GetGlobalMemoryBlock(base));
- code.AddLine(" float {}[];", GetGlobalMemory(base));
+ code.AddLine(" uint {}[];", GetGlobalMemory(base));
code.AddLine("}};");
code.AddNewLine();
}
@@ -528,7 +674,7 @@ private:
if (!ir.HasPhysicalAttributes()) {
return;
}
- code.AddLine("float readPhysicalAttribute(uint physical_address) {{");
+ code.AddLine("float ReadPhysicalAttribute(uint physical_address) {{");
++code.scope;
code.AddLine("switch (physical_address) {{");
@@ -537,15 +683,16 @@ private:
for (u32 index = 0; index < num_attributes; ++index) {
const auto attribute{ToGenericAttribute(index)};
for (u32 element = 0; element < 4; ++element) {
- constexpr u32 generic_base{0x80};
- constexpr u32 generic_stride{16};
- constexpr u32 element_stride{4};
+ constexpr u32 generic_base = 0x80;
+ constexpr u32 generic_stride = 16;
+ constexpr u32 element_stride = 4;
const u32 address{generic_base + index * generic_stride + element * element_stride};
- const bool declared{stage != ProgramType::Fragment ||
- header.ps.GetAttributeUse(index) != AttributeUse::Unused};
- const std::string value{declared ? ReadAttribute(attribute, element) : "0"};
- code.AddLine("case 0x{:x}: return {};", address, value);
+ const bool declared = stage != ProgramType::Fragment ||
+ header.ps.GetAttributeUse(index) != AttributeUse::Unused;
+ const std::string value =
+ declared ? ReadAttribute(attribute, element).AsFloat() : "0.0f";
+ code.AddLine("case 0x{:X}U: return {};", address, value);
}
}
@@ -590,13 +737,11 @@ private:
void VisitBlock(const NodeBlock& bb) {
for (const auto& node : bb) {
- if (const std::string expr = Visit(node); !expr.empty()) {
- code.AddLine(expr);
- }
+ Visit(node).CheckVoid();
}
}
- std::string Visit(const Node& node) {
+ Expression Visit(const Node& node) {
if (const auto operation = std::get_if<OperationNode>(&*node)) {
const auto operation_index = static_cast<std::size_t>(operation->GetCode());
if (operation_index >= operation_decompilers.size()) {
@@ -614,18 +759,18 @@ private:
if (const auto gpr = std::get_if<GprNode>(&*node)) {
const u32 index = gpr->GetIndex();
if (index == Register::ZeroIndex) {
- return "0";
+ return {"0U", Type::Uint};
}
- return GetRegister(index);
+ return {GetRegister(index), Type::Float};
}
if (const auto immediate = std::get_if<ImmediateNode>(&*node)) {
const u32 value = immediate->GetValue();
if (value < 10) {
// For eyecandy avoid using hex numbers on single digits
- return fmt::format("utof({}u)", immediate->GetValue());
+ return {fmt::format("{}U", immediate->GetValue()), Type::Uint};
}
- return fmt::format("utof(0x{:x}u)", immediate->GetValue());
+ return {fmt::format("0x{:X}U", immediate->GetValue()), Type::Uint};
}
if (const auto predicate = std::get_if<PredicateNode>(&*node)) {
@@ -640,17 +785,18 @@ private:
}
}();
if (predicate->IsNegated()) {
- return fmt::format("!({})", value);
+ return {fmt::format("!({})", value), Type::Bool};
}
- return value;
+ return {value, Type::Bool};
}
if (const auto abuf = std::get_if<AbufNode>(&*node)) {
UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ProgramType::Geometry,
"Physical attributes in geometry shaders are not implemented");
if (abuf->IsPhysicalBuffer()) {
- return fmt::format("readPhysicalAttribute(ftou({}))",
- Visit(abuf->GetPhysicalAddress()));
+ return {fmt::format("ReadPhysicalAttribute({})",
+ Visit(abuf->GetPhysicalAddress()).AsUint()),
+ Type::Float};
}
return ReadAttribute(abuf->GetIndex(), abuf->GetElement(), abuf->GetBuffer());
}
@@ -661,59 +807,64 @@ private:
// Direct access
const u32 offset_imm = immediate->GetValue();
ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access");
- return fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()),
- offset_imm / (4 * 4), (offset_imm / 4) % 4);
+ return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()),
+ offset_imm / (4 * 4), (offset_imm / 4) % 4),
+ Type::Uint};
}
if (std::holds_alternative<OperationNode>(*offset)) {
// Indirect access
const std::string final_offset = code.GenerateTemporary();
- code.AddLine("uint {} = ftou({}) >> 2;", final_offset, Visit(offset));
+ code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint());
if (!device.HasComponentIndexingBug()) {
- return fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()),
- final_offset, final_offset);
+ return {fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()),
+ final_offset, final_offset),
+ Type::Uint};
}
// AMD's proprietary GLSL compiler emits ill code for variable component access.
// To bypass this driver bug generate 4 ifs, one per each component.
const std::string pack = code.GenerateTemporary();
- code.AddLine("vec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()),
+ code.AddLine("uvec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()),
final_offset);
const std::string result = code.GenerateTemporary();
- code.AddLine("float {};", result);
+ code.AddLine("uint {};", result);
for (u32 swizzle = 0; swizzle < 4; ++swizzle) {
code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result,
pack, GetSwizzle(swizzle));
}
- return result;
+ return {result, Type::Uint};
}
UNREACHABLE_MSG("Unmanaged offset node type");
}
if (const auto gmem = std::get_if<GmemNode>(&*node)) {
- const std::string real = Visit(gmem->GetRealAddress());
- const std::string base = Visit(gmem->GetBaseAddress());
- const std::string final_offset = fmt::format("(ftou({}) - ftou({})) / 4", real, base);
- return fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset);
+ const std::string real = Visit(gmem->GetRealAddress()).AsUint();
+ const std::string base = Visit(gmem->GetBaseAddress()).AsUint();
+ const std::string final_offset = fmt::format("({} - {}) >> 2", real, base);
+ return {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset),
+ Type::Uint};
}
if (const auto lmem = std::get_if<LmemNode>(&*node)) {
if (stage == ProgramType::Compute) {
LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders");
}
- return fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress()));
+ return {
+ fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()),
+ Type::Uint};
}
if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) {
- return GetInternalFlag(internal_flag->GetFlag());
+ return {GetInternalFlag(internal_flag->GetFlag()), Type::Bool};
}
if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
// It's invalid to call conditional on nested nodes, use an operation instead
- code.AddLine("if ({}) {{", Visit(conditional->GetCondition()));
+ code.AddLine("if ({}) {{", Visit(conditional->GetCondition()).AsBool());
++code.scope;
VisitBlock(conditional->GetCode());
@@ -724,20 +875,21 @@ private:
}
if (const auto comment = std::get_if<CommentNode>(&*node)) {
- return "// " + comment->GetText();
+ code.AddLine("// " + comment->GetText());
+ return {};
}
UNREACHABLE();
return {};
}
- std::string ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) {
+ Expression ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) {
const auto GeometryPass = [&](std::string_view name) {
if (stage == ProgramType::Geometry && buffer) {
// TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games
// set an 0x80000000 index for those and the shader fails to build. Find out why
// this happens and what's its intent.
- return fmt::format("gs_{}[ftou({}) % MAX_VERTEX_INPUT]", name, Visit(buffer));
+ return fmt::format("gs_{}[{} % MAX_VERTEX_INPUT]", name, Visit(buffer).AsUint());
}
return std::string(name);
};
@@ -746,25 +898,27 @@ private:
case Attribute::Index::Position:
switch (stage) {
case ProgramType::Geometry:
- return fmt::format("gl_in[ftou({})].gl_Position{}", Visit(buffer),
- GetSwizzle(element));
+ return {fmt::format("gl_in[{}].gl_Position{}", Visit(buffer).AsUint(),
+ GetSwizzle(element)),
+ Type::Float};
case ProgramType::Fragment:
- return element == 3 ? "1.0f" : ("gl_FragCoord"s + GetSwizzle(element));
+ return {element == 3 ? "1.0f" : ("gl_FragCoord"s + GetSwizzle(element)),
+ Type::Float};
default:
UNREACHABLE();
}
case Attribute::Index::PointCoord:
switch (element) {
case 0:
- return "gl_PointCoord.x";
+ return {"gl_PointCoord.x", Type::Float};
case 1:
- return "gl_PointCoord.y";
+ return {"gl_PointCoord.y", Type::Float};
case 2:
case 3:
- return "0";
+ return {"0.0f", Type::Float};
}
UNREACHABLE();
- return "0";
+ return {"0", Type::Int};
case Attribute::Index::TessCoordInstanceIDVertexID:
// TODO(Subv): Find out what the values are for the first two elements when inside a
// vertex shader, and what's the value of the fourth element when inside a Tess Eval
@@ -773,44 +927,49 @@ private:
switch (element) {
case 2:
// Config pack's first value is instance_id.
- return "uintBitsToFloat(config_pack[0])";
+ return {"config_pack[0]", Type::Uint};
case 3:
- return "uintBitsToFloat(gl_VertexID)";
+ return {"gl_VertexID", Type::Int};
}
UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element);
- return "0";
+ return {"0", Type::Int};
case Attribute::Index::FrontFacing:
// TODO(Subv): Find out what the values are for the other elements.
ASSERT(stage == ProgramType::Fragment);
switch (element) {
case 3:
- return "itof(gl_FrontFacing ? -1 : 0)";
+ return {"(gl_FrontFacing ? -1 : 0)", Type::Int};
}
UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element);
- return "0";
+ return {"0", Type::Int};
default:
if (IsGenericAttribute(attribute)) {
- return GeometryPass(GetInputAttribute(attribute)) + GetSwizzle(element);
+ return {GeometryPass(GetInputAttribute(attribute)) + GetSwizzle(element),
+ Type::Float};
}
break;
}
UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute));
- return "0";
+ return {"0", Type::Int};
}
- std::string ApplyPrecise(Operation operation, const std::string& value) {
+ Expression ApplyPrecise(Operation operation, std::string value, Type type) {
if (!IsPrecise(operation)) {
- return value;
+ return {std::move(value), type};
}
- // There's a bug in NVidia's proprietary drivers that makes precise fail on fragment shaders
- const std::string precise = stage != ProgramType::Fragment ? "precise " : "";
+ // Old Nvidia drivers have a bug with precise and texture sampling. These are more likely to
+ // be found in fragment shaders, so we disable precise there. There are vertex shaders that
+ // also fail to build but nobody seems to care about those.
+ // Note: Only bugged drivers will skip precise.
+ const bool disable_precise = device.HasPreciseBug() && stage == ProgramType::Fragment;
- const std::string temporary = code.GenerateTemporary();
- code.AddLine("{}float {} = {};", precise, temporary, value);
- return temporary;
+ std::string temporary = code.GenerateTemporary();
+ code.AddLine("{}{} {} = {};", disable_precise ? "" : "precise ", GetTypeString(type),
+ temporary, value);
+ return {std::move(temporary), type};
}
- std::string VisitOperand(Operation operation, std::size_t operand_index) {
+ Expression VisitOperand(Operation operation, std::size_t operand_index) {
const auto& operand = operation[operand_index];
const bool parent_precise = IsPrecise(operation);
const bool child_precise = IsPrecise(operand);
@@ -819,19 +978,16 @@ private:
return Visit(operand);
}
- const std::string temporary = code.GenerateTemporary();
- code.AddLine("float {} = {};", temporary, Visit(operand));
- return temporary;
- }
-
- std::string VisitOperand(Operation operation, std::size_t operand_index, Type type) {
- return CastOperand(VisitOperand(operation, operand_index), type);
+ Expression value = Visit(operand);
+ std::string temporary = code.GenerateTemporary();
+ code.AddLine("{} {} = {};", GetTypeString(value.GetType()), temporary, value.GetCode());
+ return {std::move(temporary), value.GetType()};
}
- std::optional<std::pair<std::string, bool>> GetOutputAttribute(const AbufNode* abuf) {
+ Expression GetOutputAttribute(const AbufNode* abuf) {
switch (const auto attribute = abuf->GetIndex()) {
case Attribute::Index::Position:
- return std::make_pair("gl_Position"s + GetSwizzle(abuf->GetElement()), false);
+ return {"gl_Position"s + GetSwizzle(abuf->GetElement()), Type::Float};
case Attribute::Index::LayerViewportPointSize:
switch (abuf->GetElement()) {
case 0:
@@ -841,119 +997,79 @@ private:
if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) {
return {};
}
- return std::make_pair("gl_Layer", true);
+ return {"gl_Layer", Type::Int};
case 2:
if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) {
return {};
}
- return std::make_pair("gl_ViewportIndex", true);
+ return {"gl_ViewportIndex", Type::Int};
case 3:
UNIMPLEMENTED_MSG("Requires some state changes for gl_PointSize to work in shader");
- return std::make_pair("gl_PointSize", false);
+ return {"gl_PointSize", Type::Float};
}
return {};
case Attribute::Index::ClipDistances0123:
- return std::make_pair(fmt::format("gl_ClipDistance[{}]", abuf->GetElement()), false);
+ return {fmt::format("gl_ClipDistance[{}]", abuf->GetElement()), Type::Float};
case Attribute::Index::ClipDistances4567:
- return std::make_pair(fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4),
- false);
+ return {fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4), Type::Float};
default:
if (IsGenericAttribute(attribute)) {
- return std::make_pair(
- GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement()), false);
+ return {GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement()),
+ Type::Float};
}
UNIMPLEMENTED_MSG("Unhandled output attribute: {}", static_cast<u32>(attribute));
return {};
}
}
- std::string CastOperand(const std::string& value, Type type) const {
- switch (type) {
- case Type::Bool:
- case Type::Bool2:
- case Type::Float:
- return value;
- case Type::Int:
- return fmt::format("ftoi({})", value);
- case Type::Uint:
- return fmt::format("ftou({})", value);
- case Type::HalfFloat:
- return fmt::format("toHalf2({})", value);
- }
- UNREACHABLE();
- return value;
+ Expression GenerateUnary(Operation operation, std::string_view func, Type result_type,
+ Type type_a) {
+ std::string op_str = fmt::format("{}({})", func, VisitOperand(operation, 0).As(type_a));
+ return ApplyPrecise(operation, std::move(op_str), result_type);
}
- std::string BitwiseCastResult(const std::string& value, Type type,
- bool needs_parenthesis = false) {
- switch (type) {
- case Type::Bool:
- case Type::Bool2:
- case Type::Float:
- if (needs_parenthesis) {
- return fmt::format("({})", value);
- }
- return value;
- case Type::Int:
- return fmt::format("itof({})", value);
- case Type::Uint:
- return fmt::format("utof({})", value);
- case Type::HalfFloat:
- return fmt::format("fromHalf2({})", value);
- }
- UNREACHABLE();
- return value;
- }
-
- std::string GenerateUnary(Operation operation, const std::string& func, Type result_type,
- Type type_a, bool needs_parenthesis = true) {
- const std::string op_str = fmt::format("{}({})", func, VisitOperand(operation, 0, type_a));
-
- return ApplyPrecise(operation, BitwiseCastResult(op_str, result_type, needs_parenthesis));
- }
-
- std::string GenerateBinaryInfix(Operation operation, const std::string& func, Type result_type,
- Type type_a, Type type_b) {
- const std::string op_a = VisitOperand(operation, 0, type_a);
- const std::string op_b = VisitOperand(operation, 1, type_b);
- const std::string op_str = fmt::format("({} {} {})", op_a, func, op_b);
+ Expression GenerateBinaryInfix(Operation operation, std::string_view func, Type result_type,
+ Type type_a, Type type_b) {
+ const std::string op_a = VisitOperand(operation, 0).As(type_a);
+ const std::string op_b = VisitOperand(operation, 1).As(type_b);
+ std::string op_str = fmt::format("({} {} {})", op_a, func, op_b);
- return ApplyPrecise(operation, BitwiseCastResult(op_str, result_type));
+ return ApplyPrecise(operation, std::move(op_str), result_type);
}
- std::string GenerateBinaryCall(Operation operation, const std::string& func, Type result_type,
- Type type_a, Type type_b) {
- const std::string op_a = VisitOperand(operation, 0, type_a);
- const std::string op_b = VisitOperand(operation, 1, type_b);
- const std::string op_str = fmt::format("{}({}, {})", func, op_a, op_b);
+ Expression GenerateBinaryCall(Operation operation, std::string_view func, Type result_type,
+ Type type_a, Type type_b) {
+ const std::string op_a = VisitOperand(operation, 0).As(type_a);
+ const std::string op_b = VisitOperand(operation, 1).As(type_b);
+ std::string op_str = fmt::format("{}({}, {})", func, op_a, op_b);
- return ApplyPrecise(operation, BitwiseCastResult(op_str, result_type));
+ return ApplyPrecise(operation, std::move(op_str), result_type);
}
- std::string GenerateTernary(Operation operation, const std::string& func, Type result_type,
- Type type_a, Type type_b, Type type_c) {
- const std::string op_a = VisitOperand(operation, 0, type_a);
- const std::string op_b = VisitOperand(operation, 1, type_b);
- const std::string op_c = VisitOperand(operation, 2, type_c);
- const std::string op_str = fmt::format("{}({}, {}, {})", func, op_a, op_b, op_c);
+ Expression GenerateTernary(Operation operation, std::string_view func, Type result_type,
+ Type type_a, Type type_b, Type type_c) {
+ const std::string op_a = VisitOperand(operation, 0).As(type_a);
+ const std::string op_b = VisitOperand(operation, 1).As(type_b);
+ const std::string op_c = VisitOperand(operation, 2).As(type_c);
+ std::string op_str = fmt::format("{}({}, {}, {})", func, op_a, op_b, op_c);
- return ApplyPrecise(operation, BitwiseCastResult(op_str, result_type));
+ return ApplyPrecise(operation, std::move(op_str), result_type);
}
- std::string GenerateQuaternary(Operation operation, const std::string& func, Type result_type,
- Type type_a, Type type_b, Type type_c, Type type_d) {
- const std::string op_a = VisitOperand(operation, 0, type_a);
- const std::string op_b = VisitOperand(operation, 1, type_b);
- const std::string op_c = VisitOperand(operation, 2, type_c);
- const std::string op_d = VisitOperand(operation, 3, type_d);
- const std::string op_str = fmt::format("{}({}, {}, {}, {})", func, op_a, op_b, op_c, op_d);
+ Expression GenerateQuaternary(Operation operation, const std::string& func, Type result_type,
+ Type type_a, Type type_b, Type type_c, Type type_d) {
+ const std::string op_a = VisitOperand(operation, 0).As(type_a);
+ const std::string op_b = VisitOperand(operation, 1).As(type_b);
+ const std::string op_c = VisitOperand(operation, 2).As(type_c);
+ const std::string op_d = VisitOperand(operation, 3).As(type_d);
+ std::string op_str = fmt::format("{}({}, {}, {}, {})", func, op_a, op_b, op_c, op_d);
- return ApplyPrecise(operation, BitwiseCastResult(op_str, result_type));
+ return ApplyPrecise(operation, std::move(op_str), result_type);
}
std::string GenerateTexture(Operation operation, const std::string& function_suffix,
const std::vector<TextureIR>& extras) {
- constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"};
+ constexpr std::array coord_constructors = {"float", "vec2", "vec3", "vec4"};
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
ASSERT(meta);
@@ -970,17 +1086,17 @@ private:
expr += coord_constructors.at(count + (has_array ? 1 : 0) + (has_shadow ? 1 : 0) - 1);
expr += '(';
for (std::size_t i = 0; i < count; ++i) {
- expr += Visit(operation[i]);
+ expr += Visit(operation[i]).AsFloat();
const std::size_t next = i + 1;
if (next < count)
expr += ", ";
}
if (has_array) {
- expr += ", float(ftoi(" + Visit(meta->array) + "))";
+ expr += ", float(" + Visit(meta->array).AsInt() + ')';
}
if (has_shadow) {
- expr += ", " + Visit(meta->depth_compare);
+ expr += ", " + Visit(meta->depth_compare).AsFloat();
}
expr += ')';
@@ -1011,11 +1127,11 @@ private:
// required to be constant)
expr += std::to_string(static_cast<s32>(immediate->GetValue()));
} else {
- expr += fmt::format("ftoi({})", Visit(operand));
+ expr += Visit(operand).AsInt();
}
break;
case Type::Float:
- expr += Visit(operand);
+ expr += Visit(operand).AsFloat();
break;
default: {
const auto type_int = static_cast<u32>(type);
@@ -1031,7 +1147,7 @@ private:
if (aoffi.empty()) {
return {};
}
- constexpr std::array<const char*, 3> coord_constructors = {"int", "ivec2", "ivec3"};
+ constexpr std::array coord_constructors = {"int", "ivec2", "ivec3"};
std::string expr = ", ";
expr += coord_constructors.at(aoffi.size() - 1);
expr += '(';
@@ -1044,7 +1160,7 @@ private:
expr += std::to_string(static_cast<s32>(immediate->GetValue()));
} else if (device.HasVariableAoffi()) {
// Avoid using variable AOFFI on unsupported devices.
- expr += fmt::format("ftoi({})", Visit(operand));
+ expr += Visit(operand).AsInt();
} else {
// Insert 0 on devices not supporting variable AOFFI.
expr += '0';
@@ -1058,328 +1174,314 @@ private:
return expr;
}
- std::string Assign(Operation operation) {
+ Expression Assign(Operation operation) {
const Node& dest = operation[0];
const Node& src = operation[1];
- std::string target;
- bool is_integer = false;
-
+ Expression target;
if (const auto gpr = std::get_if<GprNode>(&*dest)) {
if (gpr->GetIndex() == Register::ZeroIndex) {
// Writing to Register::ZeroIndex is a no op
return {};
}
- target = GetRegister(gpr->GetIndex());
+ target = {GetRegister(gpr->GetIndex()), Type::Float};
} else if (const auto abuf = std::get_if<AbufNode>(&*dest)) {
UNIMPLEMENTED_IF(abuf->IsPhysicalBuffer());
- const auto result = GetOutputAttribute(abuf);
- if (!result) {
- return {};
- }
- target = result->first;
- is_integer = result->second;
+ target = GetOutputAttribute(abuf);
} else if (const auto lmem = std::get_if<LmemNode>(&*dest)) {
if (stage == ProgramType::Compute) {
LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders");
}
- target = fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress()));
+ target = {
+ fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()),
+ Type::Uint};
} else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
- const std::string real = Visit(gmem->GetRealAddress());
- const std::string base = Visit(gmem->GetBaseAddress());
- const std::string final_offset = fmt::format("(ftou({}) - ftou({})) / 4", real, base);
- target = fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset);
+ const std::string real = Visit(gmem->GetRealAddress()).AsUint();
+ const std::string base = Visit(gmem->GetBaseAddress()).AsUint();
+ const std::string final_offset = fmt::format("({} - {}) >> 2", real, base);
+ target = {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset),
+ Type::Uint};
} else {
UNREACHABLE_MSG("Assign called without a proper target");
}
- if (is_integer) {
- code.AddLine("{} = ftoi({});", target, Visit(src));
- } else {
- code.AddLine("{} = {};", target, Visit(src));
- }
+ code.AddLine("{} = {};", target.GetCode(), Visit(src).As(target.GetType()));
return {};
}
template <Type type>
- std::string Add(Operation operation) {
+ Expression Add(Operation operation) {
return GenerateBinaryInfix(operation, "+", type, type, type);
}
template <Type type>
- std::string Mul(Operation operation) {
+ Expression Mul(Operation operation) {
return GenerateBinaryInfix(operation, "*", type, type, type);
}
template <Type type>
- std::string Div(Operation operation) {
+ Expression Div(Operation operation) {
return GenerateBinaryInfix(operation, "/", type, type, type);
}
template <Type type>
- std::string Fma(Operation operation) {
+ Expression Fma(Operation operation) {
return GenerateTernary(operation, "fma", type, type, type, type);
}
template <Type type>
- std::string Negate(Operation operation) {
- return GenerateUnary(operation, "-", type, type, true);
+ Expression Negate(Operation operation) {
+ return GenerateUnary(operation, "-", type, type);
}
template <Type type>
- std::string Absolute(Operation operation) {
- return GenerateUnary(operation, "abs", type, type, false);
+ Expression Absolute(Operation operation) {
+ return GenerateUnary(operation, "abs", type, type);
}
- std::string FClamp(Operation operation) {
+ Expression FClamp(Operation operation) {
return GenerateTernary(operation, "clamp", Type::Float, Type::Float, Type::Float,
Type::Float);
}
- std::string FCastHalf0(Operation operation) {
- const std::string op_a = VisitOperand(operation, 0, Type::HalfFloat);
- return fmt::format("({})[0]", op_a);
+ Expression FCastHalf0(Operation operation) {
+ return {fmt::format("({})[0]", VisitOperand(operation, 0).AsHalfFloat()), Type::Float};
}
- std::string FCastHalf1(Operation operation) {
- const std::string op_a = VisitOperand(operation, 0, Type::HalfFloat);
- return fmt::format("({})[1]", op_a);
+ Expression FCastHalf1(Operation operation) {
+ return {fmt::format("({})[1]", VisitOperand(operation, 0).AsHalfFloat()), Type::Float};
}
template <Type type>
- std::string Min(Operation operation) {
+ Expression Min(Operation operation) {
return GenerateBinaryCall(operation, "min", type, type, type);
}
template <Type type>
- std::string Max(Operation operation) {
+ Expression Max(Operation operation) {
return GenerateBinaryCall(operation, "max", type, type, type);
}
- std::string Select(Operation operation) {
- const std::string condition = Visit(operation[0]);
- const std::string true_case = Visit(operation[1]);
- const std::string false_case = Visit(operation[2]);
- const std::string op_str = fmt::format("({} ? {} : {})", condition, true_case, false_case);
+ Expression Select(Operation operation) {
+ const std::string condition = Visit(operation[0]).AsBool();
+ const std::string true_case = Visit(operation[1]).AsUint();
+ const std::string false_case = Visit(operation[2]).AsUint();
+ std::string op_str = fmt::format("({} ? {} : {})", condition, true_case, false_case);
- return ApplyPrecise(operation, op_str);
+ return ApplyPrecise(operation, std::move(op_str), Type::Uint);
}
- std::string FCos(Operation operation) {
- return GenerateUnary(operation, "cos", Type::Float, Type::Float, false);
+ Expression FCos(Operation operation) {
+ return GenerateUnary(operation, "cos", Type::Float, Type::Float);
}
- std::string FSin(Operation operation) {
- return GenerateUnary(operation, "sin", Type::Float, Type::Float, false);
+ Expression FSin(Operation operation) {
+ return GenerateUnary(operation, "sin", Type::Float, Type::Float);
}
- std::string FExp2(Operation operation) {
- return GenerateUnary(operation, "exp2", Type::Float, Type::Float, false);
+ Expression FExp2(Operation operation) {
+ return GenerateUnary(operation, "exp2", Type::Float, Type::Float);
}
- std::string FLog2(Operation operation) {
- return GenerateUnary(operation, "log2", Type::Float, Type::Float, false);
+ Expression FLog2(Operation operation) {
+ return GenerateUnary(operation, "log2", Type::Float, Type::Float);
}
- std::string FInverseSqrt(Operation operation) {
- return GenerateUnary(operation, "inversesqrt", Type::Float, Type::Float, false);
+ Expression FInverseSqrt(Operation operation) {
+ return GenerateUnary(operation, "inversesqrt", Type::Float, Type::Float);
}
- std::string FSqrt(Operation operation) {
- return GenerateUnary(operation, "sqrt", Type::Float, Type::Float, false);
+ Expression FSqrt(Operation operation) {
+ return GenerateUnary(operation, "sqrt", Type::Float, Type::Float);
}
- std::string FRoundEven(Operation operation) {
- return GenerateUnary(operation, "roundEven", Type::Float, Type::Float, false);
+ Expression FRoundEven(Operation operation) {
+ return GenerateUnary(operation, "roundEven", Type::Float, Type::Float);
}
- std::string FFloor(Operation operation) {
- return GenerateUnary(operation, "floor", Type::Float, Type::Float, false);
+ Expression FFloor(Operation operation) {
+ return GenerateUnary(operation, "floor", Type::Float, Type::Float);
}
- std::string FCeil(Operation operation) {
- return GenerateUnary(operation, "ceil", Type::Float, Type::Float, false);
+ Expression FCeil(Operation operation) {
+ return GenerateUnary(operation, "ceil", Type::Float, Type::Float);
}
- std::string FTrunc(Operation operation) {
- return GenerateUnary(operation, "trunc", Type::Float, Type::Float, false);
+ Expression FTrunc(Operation operation) {
+ return GenerateUnary(operation, "trunc", Type::Float, Type::Float);
}
template <Type type>
- std::string FCastInteger(Operation operation) {
- return GenerateUnary(operation, "float", Type::Float, type, false);
+ Expression FCastInteger(Operation operation) {
+ return GenerateUnary(operation, "float", Type::Float, type);
}
- std::string ICastFloat(Operation operation) {
- return GenerateUnary(operation, "int", Type::Int, Type::Float, false);
+ Expression ICastFloat(Operation operation) {
+ return GenerateUnary(operation, "int", Type::Int, Type::Float);
}
- std::string ICastUnsigned(Operation operation) {
- return GenerateUnary(operation, "int", Type::Int, Type::Uint, false);
+ Expression ICastUnsigned(Operation operation) {
+ return GenerateUnary(operation, "int", Type::Int, Type::Uint);
}
template <Type type>
- std::string LogicalShiftLeft(Operation operation) {
+ Expression LogicalShiftLeft(Operation operation) {
return GenerateBinaryInfix(operation, "<<", type, type, Type::Uint);
}
- std::string ILogicalShiftRight(Operation operation) {
- const std::string op_a = VisitOperand(operation, 0, Type::Uint);
- const std::string op_b = VisitOperand(operation, 1, Type::Uint);
- const std::string op_str = fmt::format("int({} >> {})", op_a, op_b);
+ Expression ILogicalShiftRight(Operation operation) {
+ const std::string op_a = VisitOperand(operation, 0).AsUint();
+ const std::string op_b = VisitOperand(operation, 1).AsUint();
+ std::string op_str = fmt::format("int({} >> {})", op_a, op_b);
- return ApplyPrecise(operation, BitwiseCastResult(op_str, Type::Int));
+ return ApplyPrecise(operation, std::move(op_str), Type::Int);
}
- std::string IArithmeticShiftRight(Operation operation) {
+ Expression IArithmeticShiftRight(Operation operation) {
return GenerateBinaryInfix(operation, ">>", Type::Int, Type::Int, Type::Uint);
}
template <Type type>
- std::string BitwiseAnd(Operation operation) {
+ Expression BitwiseAnd(Operation operation) {
return GenerateBinaryInfix(operation, "&", type, type, type);
}
template <Type type>
- std::string BitwiseOr(Operation operation) {
+ Expression BitwiseOr(Operation operation) {
return GenerateBinaryInfix(operation, "|", type, type, type);
}
template <Type type>
- std::string BitwiseXor(Operation operation) {
+ Expression BitwiseXor(Operation operation) {
return GenerateBinaryInfix(operation, "^", type, type, type);
}
template <Type type>
- std::string BitwiseNot(Operation operation) {
- return GenerateUnary(operation, "~", type, type, false);
+ Expression BitwiseNot(Operation operation) {
+ return GenerateUnary(operation, "~", type, type);
}
- std::string UCastFloat(Operation operation) {
- return GenerateUnary(operation, "uint", Type::Uint, Type::Float, false);
+ Expression UCastFloat(Operation operation) {
+ return GenerateUnary(operation, "uint", Type::Uint, Type::Float);
}
- std::string UCastSigned(Operation operation) {
- return GenerateUnary(operation, "uint", Type::Uint, Type::Int, false);
+ Expression UCastSigned(Operation operation) {
+ return GenerateUnary(operation, "uint", Type::Uint, Type::Int);
}
- std::string UShiftRight(Operation operation) {
+ Expression UShiftRight(Operation operation) {
return GenerateBinaryInfix(operation, ">>", Type::Uint, Type::Uint, Type::Uint);
}
template <Type type>
- std::string BitfieldInsert(Operation operation) {
+ Expression BitfieldInsert(Operation operation) {
return GenerateQuaternary(operation, "bitfieldInsert", type, type, type, Type::Int,
Type::Int);
}
template <Type type>
- std::string BitfieldExtract(Operation operation) {
+ Expression BitfieldExtract(Operation operation) {
return GenerateTernary(operation, "bitfieldExtract", type, type, Type::Int, Type::Int);
}
template <Type type>
- std::string BitCount(Operation operation) {
- return GenerateUnary(operation, "bitCount", type, type, false);
+ Expression BitCount(Operation operation) {
+ return GenerateUnary(operation, "bitCount", type, type);
}
- std::string HNegate(Operation operation) {
+ Expression HNegate(Operation operation) {
const auto GetNegate = [&](std::size_t index) {
- return VisitOperand(operation, index, Type::Bool) + " ? -1 : 1";
+ return VisitOperand(operation, index).AsBool() + " ? -1 : 1";
};
- const std::string value =
- fmt::format("({} * vec2({}, {}))", VisitOperand(operation, 0, Type::HalfFloat),
- GetNegate(1), GetNegate(2));
- return BitwiseCastResult(value, Type::HalfFloat);
- }
-
- std::string HClamp(Operation operation) {
- const std::string value = VisitOperand(operation, 0, Type::HalfFloat);
- const std::string min = VisitOperand(operation, 1, Type::Float);
- const std::string max = VisitOperand(operation, 2, Type::Float);
- const std::string clamped = fmt::format("clamp({}, vec2({}), vec2({}))", value, min, max);
-
- return ApplyPrecise(operation, BitwiseCastResult(clamped, Type::HalfFloat));
- }
-
- std::string HCastFloat(Operation operation) {
- const std::string op_a = VisitOperand(operation, 0, Type::Float);
- return fmt::format("fromHalf2(vec2({}, 0.0f))", op_a);
- }
-
- std::string HUnpack(Operation operation) {
- const std::string operand{VisitOperand(operation, 0, Type::HalfFloat)};
- const auto value = [&]() -> std::string {
- switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) {
- case Tegra::Shader::HalfType::H0_H1:
- return operand;
- case Tegra::Shader::HalfType::F32:
- return fmt::format("vec2(fromHalf2({}))", operand);
- case Tegra::Shader::HalfType::H0_H0:
- return fmt::format("vec2({}[0])", operand);
- case Tegra::Shader::HalfType::H1_H1:
- return fmt::format("vec2({}[1])", operand);
- }
- UNREACHABLE();
- return "0";
- }();
- return fmt::format("fromHalf2({})", value);
+ return {fmt::format("({} * vec2({}, {}))", VisitOperand(operation, 0).AsHalfFloat(),
+ GetNegate(1), GetNegate(2)),
+ Type::HalfFloat};
+ }
+
+ Expression HClamp(Operation operation) {
+ const std::string value = VisitOperand(operation, 0).AsHalfFloat();
+ const std::string min = VisitOperand(operation, 1).AsFloat();
+ const std::string max = VisitOperand(operation, 2).AsFloat();
+ std::string clamped = fmt::format("clamp({}, vec2({}), vec2({}))", value, min, max);
+
+ return ApplyPrecise(operation, std::move(clamped), Type::HalfFloat);
+ }
+
+ Expression HCastFloat(Operation operation) {
+ return {fmt::format("vec2({})", VisitOperand(operation, 0).AsFloat()), Type::HalfFloat};
+ }
+
+ Expression HUnpack(Operation operation) {
+ Expression operand = VisitOperand(operation, 0);
+ switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) {
+ case Tegra::Shader::HalfType::H0_H1:
+ return operand;
+ case Tegra::Shader::HalfType::F32:
+ return {fmt::format("vec2({})", operand.AsFloat()), Type::HalfFloat};
+ case Tegra::Shader::HalfType::H0_H0:
+ return {fmt::format("vec2({}[0])", operand.AsHalfFloat()), Type::HalfFloat};
+ case Tegra::Shader::HalfType::H1_H1:
+ return {fmt::format("vec2({}[1])", operand.AsHalfFloat()), Type::HalfFloat};
+ }
}
- std::string HMergeF32(Operation operation) {
- return fmt::format("float(toHalf2({})[0])", Visit(operation[0]));
+ Expression HMergeF32(Operation operation) {
+ return {fmt::format("float({}[0])", VisitOperand(operation, 0).AsHalfFloat()), Type::Float};
}
- std::string HMergeH0(Operation operation) {
- return fmt::format("fromHalf2(vec2(toHalf2({})[0], toHalf2({})[1]))", Visit(operation[1]),
- Visit(operation[0]));
+ Expression HMergeH0(Operation operation) {
+ std::string dest = VisitOperand(operation, 0).AsUint();
+ std::string src = VisitOperand(operation, 1).AsUint();
+ return {fmt::format("(({} & 0x0000FFFFU) | ({} & 0xFFFF0000U))", src, dest), Type::Uint};
}
- std::string HMergeH1(Operation operation) {
- return fmt::format("fromHalf2(vec2(toHalf2({})[0], toHalf2({})[1]))", Visit(operation[0]),
- Visit(operation[1]));
+ Expression HMergeH1(Operation operation) {
+ std::string dest = VisitOperand(operation, 0).AsUint();
+ std::string src = VisitOperand(operation, 1).AsUint();
+ return {fmt::format("(({} & 0x0000FFFFU) | ({} & 0xFFFF0000U))", dest, src), Type::Uint};
}
- std::string HPack2(Operation operation) {
- return fmt::format("utof(packHalf2x16(vec2({}, {})))", Visit(operation[0]),
- Visit(operation[1]));
+ Expression HPack2(Operation operation) {
+ return {fmt::format("vec2({}, {})", VisitOperand(operation, 0).AsFloat(),
+ VisitOperand(operation, 1).AsFloat()),
+ Type::HalfFloat};
}
template <Type type>
- std::string LogicalLessThan(Operation operation) {
+ Expression LogicalLessThan(Operation operation) {
return GenerateBinaryInfix(operation, "<", Type::Bool, type, type);
}
template <Type type>
- std::string LogicalEqual(Operation operation) {
+ Expression LogicalEqual(Operation operation) {
return GenerateBinaryInfix(operation, "==", Type::Bool, type, type);
}
template <Type type>
- std::string LogicalLessEqual(Operation operation) {
+ Expression LogicalLessEqual(Operation operation) {
return GenerateBinaryInfix(operation, "<=", Type::Bool, type, type);
}
template <Type type>
- std::string LogicalGreaterThan(Operation operation) {
+ Expression LogicalGreaterThan(Operation operation) {
return GenerateBinaryInfix(operation, ">", Type::Bool, type, type);
}
template <Type type>
- std::string LogicalNotEqual(Operation operation) {
+ Expression LogicalNotEqual(Operation operation) {
return GenerateBinaryInfix(operation, "!=", Type::Bool, type, type);
}
template <Type type>
- std::string LogicalGreaterEqual(Operation operation) {
+ Expression LogicalGreaterEqual(Operation operation) {
return GenerateBinaryInfix(operation, ">=", Type::Bool, type, type);
}
- std::string LogicalFIsNan(Operation operation) {
- return GenerateUnary(operation, "isnan", Type::Bool, Type::Float, false);
+ Expression LogicalFIsNan(Operation operation) {
+ return GenerateUnary(operation, "isnan", Type::Bool, Type::Float);
}
- std::string LogicalAssign(Operation operation) {
+ Expression LogicalAssign(Operation operation) {
const Node& dest = operation[0];
const Node& src = operation[1];
@@ -1400,78 +1502,80 @@ private:
target = GetInternalFlag(flag->GetFlag());
}
- code.AddLine("{} = {};", target, Visit(src));
+ code.AddLine("{} = {};", target, Visit(src).AsBool());
return {};
}
- std::string LogicalAnd(Operation operation) {
+ Expression LogicalAnd(Operation operation) {
return GenerateBinaryInfix(operation, "&&", Type::Bool, Type::Bool, Type::Bool);
}
- std::string LogicalOr(Operation operation) {
+ Expression LogicalOr(Operation operation) {
return GenerateBinaryInfix(operation, "||", Type::Bool, Type::Bool, Type::Bool);
}
- std::string LogicalXor(Operation operation) {
+ Expression LogicalXor(Operation operation) {
return GenerateBinaryInfix(operation, "^^", Type::Bool, Type::Bool, Type::Bool);
}
- std::string LogicalNegate(Operation operation) {
- return GenerateUnary(operation, "!", Type::Bool, Type::Bool, false);
+ Expression LogicalNegate(Operation operation) {
+ return GenerateUnary(operation, "!", Type::Bool, Type::Bool);
}
- std::string LogicalPick2(Operation operation) {
- const std::string pair = VisitOperand(operation, 0, Type::Bool2);
- return fmt::format("{}[{}]", pair, VisitOperand(operation, 1, Type::Uint));
+ Expression LogicalPick2(Operation operation) {
+ return {fmt::format("{}[{}]", VisitOperand(operation, 0).AsBool2(),
+ VisitOperand(operation, 1).AsUint()),
+ Type::Bool};
}
- std::string LogicalAnd2(Operation operation) {
+ Expression LogicalAnd2(Operation operation) {
return GenerateUnary(operation, "all", Type::Bool, Type::Bool2);
}
template <bool with_nan>
- std::string GenerateHalfComparison(Operation operation, const std::string& compare_op) {
- const std::string comparison{GenerateBinaryCall(operation, compare_op, Type::Bool2,
- Type::HalfFloat, Type::HalfFloat)};
+ Expression GenerateHalfComparison(Operation operation, std::string_view compare_op) {
+ Expression comparison = GenerateBinaryCall(operation, compare_op, Type::Bool2,
+ Type::HalfFloat, Type::HalfFloat);
if constexpr (!with_nan) {
return comparison;
}
- return fmt::format("halfFloatNanComparison({}, {}, {})", comparison,
- VisitOperand(operation, 0, Type::HalfFloat),
- VisitOperand(operation, 1, Type::HalfFloat));
+ return {fmt::format("HalfFloatNanComparison({}, {}, {})", comparison.AsBool2(),
+ VisitOperand(operation, 0).AsHalfFloat(),
+ VisitOperand(operation, 1).AsHalfFloat()),
+ Type::Bool2};
}
template <bool with_nan>
- std::string Logical2HLessThan(Operation operation) {
+ Expression Logical2HLessThan(Operation operation) {
return GenerateHalfComparison<with_nan>(operation, "lessThan");
}
template <bool with_nan>
- std::string Logical2HEqual(Operation operation) {
+ Expression Logical2HEqual(Operation operation) {
return GenerateHalfComparison<with_nan>(operation, "equal");
}
template <bool with_nan>
- std::string Logical2HLessEqual(Operation operation) {
+ Expression Logical2HLessEqual(Operation operation) {
return GenerateHalfComparison<with_nan>(operation, "lessThanEqual");
}
template <bool with_nan>
- std::string Logical2HGreaterThan(Operation operation) {
+ Expression Logical2HGreaterThan(Operation operation) {
return GenerateHalfComparison<with_nan>(operation, "greaterThan");
}
template <bool with_nan>
- std::string Logical2HNotEqual(Operation operation) {
+ Expression Logical2HNotEqual(Operation operation) {
return GenerateHalfComparison<with_nan>(operation, "notEqual");
}
template <bool with_nan>
- std::string Logical2HGreaterEqual(Operation operation) {
+ Expression Logical2HGreaterEqual(Operation operation) {
return GenerateHalfComparison<with_nan>(operation, "greaterThanEqual");
}
- std::string Texture(Operation operation) {
+ Expression Texture(Operation operation) {
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
ASSERT(meta);
@@ -1480,10 +1584,10 @@ private:
if (meta->sampler.IsShadow()) {
expr = "vec4(" + expr + ')';
}
- return expr + GetSwizzle(meta->element);
+ return {expr + GetSwizzle(meta->element), Type::Float};
}
- std::string TextureLod(Operation operation) {
+ Expression TextureLod(Operation operation) {
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
ASSERT(meta);
@@ -1492,54 +1596,54 @@ private:
if (meta->sampler.IsShadow()) {
expr = "vec4(" + expr + ')';
}
- return expr + GetSwizzle(meta->element);
+ return {expr + GetSwizzle(meta->element), Type::Float};
}
- std::string TextureGather(Operation operation) {
+ Expression TextureGather(Operation operation) {
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
ASSERT(meta);
const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int;
- return GenerateTexture(operation, "Gather",
- {TextureArgument{type, meta->component}, TextureAoffi{}}) +
- GetSwizzle(meta->element);
+ return {GenerateTexture(operation, "Gather",
+ {TextureArgument{type, meta->component}, TextureAoffi{}}) +
+ GetSwizzle(meta->element),
+ Type::Float};
}
- std::string TextureQueryDimensions(Operation operation) {
+ Expression TextureQueryDimensions(Operation operation) {
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
ASSERT(meta);
const std::string sampler = GetSampler(meta->sampler);
- const std::string lod = VisitOperand(operation, 0, Type::Int);
+ const std::string lod = VisitOperand(operation, 0).AsInt();
switch (meta->element) {
case 0:
case 1:
- return fmt::format("itof(int(textureSize({}, {}){}))", sampler, lod,
- GetSwizzle(meta->element));
- case 2:
- return "0";
+ return {fmt::format("textureSize({}, {}){}", sampler, lod, GetSwizzle(meta->element)),
+ Type::Int};
case 3:
- return fmt::format("itof(textureQueryLevels({}))", sampler);
+ return {fmt::format("textureQueryLevels({})", sampler), Type::Int};
}
UNREACHABLE();
- return "0";
+ return {"0", Type::Int};
}
- std::string TextureQueryLod(Operation operation) {
+ Expression TextureQueryLod(Operation operation) {
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
ASSERT(meta);
if (meta->element < 2) {
- return fmt::format("itof(int(({} * vec2(256)){}))",
- GenerateTexture(operation, "QueryLod", {}),
- GetSwizzle(meta->element));
+ return {fmt::format("int(({} * vec2(256)){})",
+ GenerateTexture(operation, "QueryLod", {}),
+ GetSwizzle(meta->element)),
+ Type::Int};
}
- return "0";
+ return {"0", Type::Int};
}
- std::string TexelFetch(Operation operation) {
- constexpr std::array<const char*, 4> constructors = {"int", "ivec2", "ivec3", "ivec4"};
+ Expression TexelFetch(Operation operation) {
+ constexpr std::array constructors = {"int", "ivec2", "ivec3", "ivec4"};
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
ASSERT(meta);
UNIMPLEMENTED_IF(meta->sampler.IsArray());
@@ -1552,7 +1656,7 @@ private:
expr += constructors.at(operation.GetOperandsCount() - 1);
expr += '(';
for (std::size_t i = 0; i < count; ++i) {
- expr += VisitOperand(operation, i, Type::Int);
+ expr += VisitOperand(operation, i).AsInt();
const std::size_t next = i + 1;
if (next == count)
expr += ')';
@@ -1565,7 +1669,7 @@ private:
if (meta->lod) {
expr += ", ";
- expr += CastOperand(Visit(meta->lod), Type::Int);
+ expr += Visit(meta->lod).AsInt();
}
expr += ')';
expr += GetSwizzle(meta->element);
@@ -1580,11 +1684,11 @@ private:
code.AddLine("float {} = {};", tmp, expr);
code.AddLine("#endif");
- return tmp;
+ return {tmp, Type::Float};
}
- std::string ImageStore(Operation operation) {
- constexpr std::array<const char*, 4> constructors{"int(", "ivec2(", "ivec3(", "ivec4("};
+ Expression ImageStore(Operation operation) {
+ constexpr std::array constructors{"int(", "ivec2(", "ivec3(", "ivec4("};
const auto meta{std::get<MetaImage>(operation.GetMeta())};
std::string expr = "imageStore(";
@@ -1594,7 +1698,7 @@ private:
const std::size_t coords_count{operation.GetOperandsCount()};
expr += constructors.at(coords_count - 1);
for (std::size_t i = 0; i < coords_count; ++i) {
- expr += VisitOperand(operation, i, Type::Int);
+ expr += VisitOperand(operation, i).AsInt();
if (i + 1 < coords_count) {
expr += ", ";
}
@@ -1605,7 +1709,7 @@ private:
UNIMPLEMENTED_IF(values_count != 4);
expr += "vec4(";
for (std::size_t i = 0; i < values_count; ++i) {
- expr += Visit(meta.values.at(i));
+ expr += Visit(meta.values.at(i)).AsFloat();
if (i + 1 < values_count) {
expr += ", ";
}
@@ -1616,52 +1720,52 @@ private:
return {};
}
- std::string Branch(Operation operation) {
+ Expression Branch(Operation operation) {
const auto target = std::get_if<ImmediateNode>(&*operation[0]);
UNIMPLEMENTED_IF(!target);
- code.AddLine("jmp_to = 0x{:x}u;", target->GetValue());
+ code.AddLine("jmp_to = 0x{:X}U;", target->GetValue());
code.AddLine("break;");
return {};
}
- std::string BranchIndirect(Operation operation) {
- const std::string op_a = VisitOperand(operation, 0, Type::Uint);
+ Expression BranchIndirect(Operation operation) {
+ const std::string op_a = VisitOperand(operation, 0).AsUint();
code.AddLine("jmp_to = {};", op_a);
code.AddLine("break;");
return {};
}
- std::string PushFlowStack(Operation operation) {
+ Expression PushFlowStack(Operation operation) {
const auto stack = std::get<MetaStackClass>(operation.GetMeta());
const auto target = std::get_if<ImmediateNode>(&*operation[0]);
UNIMPLEMENTED_IF(!target);
- code.AddLine("{}[{}++] = 0x{:x}u;", FlowStackName(stack), FlowStackTopName(stack),
+ code.AddLine("{}[{}++] = 0x{:X}U;", FlowStackName(stack), FlowStackTopName(stack),
target->GetValue());
return {};
}
- std::string PopFlowStack(Operation operation) {
+ Expression PopFlowStack(Operation operation) {
const auto stack = std::get<MetaStackClass>(operation.GetMeta());
code.AddLine("jmp_to = {}[--{}];", FlowStackName(stack), FlowStackTopName(stack));
code.AddLine("break;");
return {};
}
- std::string Exit(Operation operation) {
+ Expression Exit(Operation operation) {
if (stage != ProgramType::Fragment) {
code.AddLine("return;");
return {};
}
const auto& used_registers = ir.GetRegisters();
- const auto SafeGetRegister = [&](u32 reg) -> std::string {
+ const auto SafeGetRegister = [&](u32 reg) -> Expression {
// TODO(Rodrigo): Replace with contains once C++20 releases
if (used_registers.find(reg) != used_registers.end()) {
- return GetRegister(reg);
+ return {GetRegister(reg), Type::Float};
}
- return "0.0f";
+ return {"0.0f", Type::Float};
};
UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0, "Sample mask write is unimplemented");
@@ -1674,7 +1778,7 @@ private:
for (u32 component = 0; component < 4; ++component) {
if (header.ps.IsColorComponentOutputEnabled(render_target, component)) {
code.AddLine("FragColor{}[{}] = {};", render_target, component,
- SafeGetRegister(current_reg));
+ SafeGetRegister(current_reg).AsFloat());
++current_reg;
}
}
@@ -1683,14 +1787,14 @@ private:
if (header.ps.omap.depth) {
// The depth output is always 2 registers after the last color output, and current_reg
// already contains one past the last color register.
- code.AddLine("gl_FragDepth = {};", SafeGetRegister(current_reg + 1));
+ code.AddLine("gl_FragDepth = {};", SafeGetRegister(current_reg + 1).AsFloat());
}
code.AddLine("return;");
return {};
}
- std::string Discard(Operation operation) {
+ Expression Discard(Operation operation) {
// Enclose "discard" in a conditional, so that GLSL compilation does not complain
// about unexecuted instructions that may follow this.
code.AddLine("if (true) {{");
@@ -1701,7 +1805,7 @@ private:
return {};
}
- std::string EmitVertex(Operation operation) {
+ Expression EmitVertex(Operation operation) {
ASSERT_MSG(stage == ProgramType::Geometry,
"EmitVertex is expected to be used in a geometry shader.");
@@ -1712,7 +1816,7 @@ private:
return {};
}
- std::string EndPrimitive(Operation operation) {
+ Expression EndPrimitive(Operation operation) {
ASSERT_MSG(stage == ProgramType::Geometry,
"EndPrimitive is expected to be used in a geometry shader.");
@@ -1720,59 +1824,59 @@ private:
return {};
}
- std::string YNegate(Operation operation) {
+ Expression YNegate(Operation operation) {
// Config pack's third value is Y_NEGATE's state.
- return "uintBitsToFloat(config_pack[2])";
+ return {"config_pack[2]", Type::Uint};
}
template <u32 element>
- std::string LocalInvocationId(Operation) {
- return "utof(gl_LocalInvocationID"s + GetSwizzle(element) + ')';
+ Expression LocalInvocationId(Operation) {
+ return {"gl_LocalInvocationID"s + GetSwizzle(element), Type::Uint};
}
template <u32 element>
- std::string WorkGroupId(Operation) {
- return "utof(gl_WorkGroupID"s + GetSwizzle(element) + ')';
+ Expression WorkGroupId(Operation) {
+ return {"gl_WorkGroupID"s + GetSwizzle(element), Type::Uint};
}
- std::string BallotThread(Operation operation) {
- const std::string value = VisitOperand(operation, 0, Type::Bool);
+ Expression BallotThread(Operation operation) {
+ const std::string value = VisitOperand(operation, 0).AsBool();
if (!device.HasWarpIntrinsics()) {
LOG_ERROR(Render_OpenGL,
"Nvidia warp intrinsics are not available and its required by a shader");
// Stub on non-Nvidia devices by simulating all threads voting the same as the active
// one.
- return fmt::format("utof({} ? 0xFFFFFFFFU : 0U)", value);
+ return {fmt::format("({} ? 0xFFFFFFFFU : 0U)", value), Type::Uint};
}
- return fmt::format("utof(ballotThreadNV({}))", value);
+ return {fmt::format("ballotThreadNV({})", value), Type::Uint};
}
- std::string Vote(Operation operation, const char* func) {
- const std::string value = VisitOperand(operation, 0, Type::Bool);
+ Expression Vote(Operation operation, const char* func) {
+ const std::string value = VisitOperand(operation, 0).AsBool();
if (!device.HasWarpIntrinsics()) {
LOG_ERROR(Render_OpenGL,
"Nvidia vote intrinsics are not available and its required by a shader");
// Stub with a warp size of one.
- return value;
+ return {value, Type::Bool};
}
- return fmt::format("{}({})", func, value);
+ return {fmt::format("{}({})", func, value), Type::Bool};
}
- std::string VoteAll(Operation operation) {
+ Expression VoteAll(Operation operation) {
return Vote(operation, "allThreadsNV");
}
- std::string VoteAny(Operation operation) {
+ Expression VoteAny(Operation operation) {
return Vote(operation, "anyThreadNV");
}
- std::string VoteEqual(Operation operation) {
+ Expression VoteEqual(Operation operation) {
if (!device.HasWarpIntrinsics()) {
LOG_ERROR(Render_OpenGL,
"Nvidia vote intrinsics are not available and its required by a shader");
// We must return true here since a stub for a theoretical warp size of 1 will always
// return an equal result for all its votes.
- return "true";
+ return {"true", Type::Bool};
}
return Vote(operation, "allThreadsEqualNV");
}
@@ -1973,8 +2077,8 @@ private:
}
std::string GetInternalFlag(InternalFlag flag) const {
- constexpr std::array<const char*, 4> InternalFlagNames = {"zero_flag", "sign_flag",
- "carry_flag", "overflow_flag"};
+ constexpr std::array InternalFlagNames = {"zero_flag", "sign_flag", "carry_flag",
+ "overflow_flag"};
const auto index = static_cast<u32>(flag);
ASSERT(index < static_cast<u32>(InternalFlag::Amount));
@@ -2022,24 +2126,16 @@ private:
std::string GetCommonDeclarations() {
return fmt::format(
- "#define MAX_CONSTBUFFER_ELEMENTS {}\n"
"#define ftoi floatBitsToInt\n"
"#define ftou floatBitsToUint\n"
"#define itof intBitsToFloat\n"
"#define utof uintBitsToFloat\n\n"
- "float fromHalf2(vec2 pair) {{\n"
- " return utof(packHalf2x16(pair));\n"
- "}}\n\n"
- "vec2 toHalf2(float value) {{\n"
- " return unpackHalf2x16(ftou(value));\n"
- "}}\n\n"
- "bvec2 halfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{\n"
+ "bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{\n"
" bvec2 is_nan1 = isnan(pair1);\n"
" bvec2 is_nan2 = isnan(pair2);\n"
" return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || "
"is_nan2.y);\n"
- "}}\n",
- MAX_CONSTBUFFER_ELEMENTS);
+ "}}\n\n");
}
ProgramResult Decompile(const Device& device, const ShaderIR& ir, ProgramType stage,
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index a05cef3b9..af9684839 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -101,9 +101,7 @@ RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::Syst
RendererOpenGL::~RendererOpenGL() = default;
-void RendererOpenGL::SwapBuffers(
- std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
-
+void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
system.GetPerfStats().EndSystemFrame();
// Maintain the rasterizer's state as a priority
@@ -113,9 +111,9 @@ void RendererOpenGL::SwapBuffers(
if (framebuffer) {
// If framebuffer is provided, reload it from memory to a texture
- if (screen_info.texture.width != (GLsizei)framebuffer->get().width ||
- screen_info.texture.height != (GLsizei)framebuffer->get().height ||
- screen_info.texture.pixel_format != framebuffer->get().pixel_format) {
+ if (screen_info.texture.width != static_cast<GLsizei>(framebuffer->width) ||
+ screen_info.texture.height != static_cast<GLsizei>(framebuffer->height) ||
+ screen_info.texture.pixel_format != framebuffer->pixel_format) {
// Reallocate texture if the framebuffer size has changed.
// This is expected to not happen very often and hence should not be a
// performance problem.
@@ -149,43 +147,43 @@ void RendererOpenGL::SwapBuffers(
* Loads framebuffer from emulated memory into the active OpenGL texture.
*/
void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer) {
- const u32 bytes_per_pixel{Tegra::FramebufferConfig::BytesPerPixel(framebuffer.pixel_format)};
- const u64 size_in_bytes{framebuffer.stride * framebuffer.height * bytes_per_pixel};
- const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset};
-
// Framebuffer orientation handling
framebuffer_transform_flags = framebuffer.transform_flags;
framebuffer_crop_rect = framebuffer.crop_rect;
- // Ensure no bad interactions with GL_UNPACK_ALIGNMENT, which by default
- // only allows rows to have a memory alignement of 4.
- ASSERT(framebuffer.stride % 4 == 0);
-
- if (!rasterizer->AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride)) {
- // Reset the screen info's display texture to its own permanent texture
- screen_info.display_texture = screen_info.texture.resource.handle;
-
- rasterizer->FlushRegion(ToCacheAddr(Memory::GetPointer(framebuffer_addr)), size_in_bytes);
-
- constexpr u32 linear_bpp = 4;
- VideoCore::MortonCopyPixels128(VideoCore::MortonSwizzleMode::MortonToLinear,
- framebuffer.width, framebuffer.height, bytes_per_pixel,
- linear_bpp, Memory::GetPointer(framebuffer_addr),
- gl_framebuffer_data.data());
-
- glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride));
+ const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset};
+ if (rasterizer->AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride)) {
+ return;
+ }
- // Update existing texture
- // TODO: Test what happens on hardware when you change the framebuffer dimensions so that
- // they differ from the LCD resolution.
- // TODO: Applications could theoretically crash yuzu here by specifying too large
- // framebuffer sizes. We should make sure that this cannot happen.
- glTextureSubImage2D(screen_info.texture.resource.handle, 0, 0, 0, framebuffer.width,
- framebuffer.height, screen_info.texture.gl_format,
- screen_info.texture.gl_type, gl_framebuffer_data.data());
+ // Reset the screen info's display texture to its own permanent texture
+ screen_info.display_texture = screen_info.texture.resource.handle;
- glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
- }
+ const auto pixel_format{
+ VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)};
+ const u32 bytes_per_pixel{VideoCore::Surface::GetBytesPerPixel(pixel_format)};
+ const u64 size_in_bytes{framebuffer.stride * framebuffer.height * bytes_per_pixel};
+ const auto host_ptr{Memory::GetPointer(framebuffer_addr)};
+ rasterizer->FlushRegion(ToCacheAddr(host_ptr), size_in_bytes);
+
+ // TODO(Rodrigo): Read this from HLE
+ constexpr u32 block_height_log2 = 4;
+ VideoCore::MortonSwizzle(VideoCore::MortonSwizzleMode::MortonToLinear, pixel_format,
+ framebuffer.stride, block_height_log2, framebuffer.height, 0, 1, 1,
+ gl_framebuffer_data.data(), host_ptr);
+
+ glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride));
+
+ // Update existing texture
+ // TODO: Test what happens on hardware when you change the framebuffer dimensions so that
+ // they differ from the LCD resolution.
+ // TODO: Applications could theoretically crash yuzu here by specifying too large
+ // framebuffer sizes. We should make sure that this cannot happen.
+ glTextureSubImage2D(screen_info.texture.resource.handle, 0, 0, 0, framebuffer.width,
+ framebuffer.height, screen_info.texture.gl_format,
+ screen_info.texture.gl_type, gl_framebuffer_data.data());
+
+ glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
}
/**
@@ -276,22 +274,29 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
texture.height = framebuffer.height;
texture.pixel_format = framebuffer.pixel_format;
+ const auto pixel_format{
+ VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)};
+ const u32 bytes_per_pixel{VideoCore::Surface::GetBytesPerPixel(pixel_format)};
+ gl_framebuffer_data.resize(texture.width * texture.height * bytes_per_pixel);
+
GLint internal_format;
switch (framebuffer.pixel_format) {
case Tegra::FramebufferConfig::PixelFormat::ABGR8:
internal_format = GL_RGBA8;
texture.gl_format = GL_RGBA;
texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
- gl_framebuffer_data.resize(texture.width * texture.height * 4);
+ break;
+ case Tegra::FramebufferConfig::PixelFormat::RGB565:
+ internal_format = GL_RGB565;
+ texture.gl_format = GL_RGB;
+ texture.gl_type = GL_UNSIGNED_SHORT_5_6_5;
break;
default:
internal_format = GL_RGBA8;
texture.gl_format = GL_RGBA;
texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
- gl_framebuffer_data.resize(texture.width * texture.height * 4);
- LOG_CRITICAL(Render_OpenGL, "Unknown framebuffer pixel format: {}",
- static_cast<u32>(framebuffer.pixel_format));
- UNREACHABLE();
+ UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}",
+ static_cast<u32>(framebuffer.pixel_format));
}
texture.resource.Release();
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index 4aebf2321..9bd086368 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -43,14 +43,13 @@ struct ScreenInfo {
TextureInfo texture;
};
-class RendererOpenGL : public VideoCore::RendererBase {
+class RendererOpenGL final : public VideoCore::RendererBase {
public:
explicit RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system);
~RendererOpenGL() override;
/// Swap buffers (render frame)
- void SwapBuffers(
- std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
+ void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
/// Initialize the renderer
bool Init() override;
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp
index 8973fbefa..32facd6ba 100644
--- a/src/video_core/shader/decode/conversion.cpp
+++ b/src/video_core/shader/decode/conversion.cpp
@@ -14,6 +14,12 @@ using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
using Tegra::Shader::Register;
+namespace {
+constexpr OperationCode GetFloatSelector(u64 selector) {
+ return selector == 0 ? OperationCode::FCastHalf0 : OperationCode::FCastHalf1;
+}
+} // Anonymous namespace
+
u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
@@ -22,7 +28,7 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
case OpCode::Id::I2I_R:
case OpCode::Id::I2I_C:
case OpCode::Id::I2I_IMM: {
- UNIMPLEMENTED_IF(instr.conversion.selector);
+ UNIMPLEMENTED_IF(instr.conversion.int_src.selector != 0);
UNIMPLEMENTED_IF(instr.conversion.dst_size != Register::Size::Word);
UNIMPLEMENTED_IF(instr.alu.saturate_d);
@@ -57,8 +63,8 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
case OpCode::Id::I2F_R:
case OpCode::Id::I2F_C:
case OpCode::Id::I2F_IMM: {
+ UNIMPLEMENTED_IF(instr.conversion.int_src.selector != 0);
UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long);
- UNIMPLEMENTED_IF(instr.conversion.selector);
UNIMPLEMENTED_IF_MSG(instr.generates_cc,
"Condition codes generation in I2F is not implemented");
@@ -113,8 +119,10 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
}();
if (instr.conversion.src_size == Register::Size::Short) {
- // TODO: figure where extract is sey in the encoding
- value = Operation(OperationCode::FCastHalf0, PRECISE, value);
+ value = Operation(GetFloatSelector(instr.conversion.float_src.selector), NO_PRECISE,
+ std::move(value));
+ } else {
+ ASSERT(instr.conversion.float_src.selector == 0);
}
value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a);
@@ -169,8 +177,10 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
}();
if (instr.conversion.src_size == Register::Size::Short) {
- // TODO: figure where extract is sey in the encoding
- value = Operation(OperationCode::FCastHalf0, PRECISE, value);
+ value = Operation(GetFloatSelector(instr.conversion.float_src.selector), NO_PRECISE,
+ std::move(value));
+ } else {
+ ASSERT(instr.conversion.float_src.selector == 0);
}
value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a);
diff --git a/src/video_core/shader/decode/float_set_predicate.cpp b/src/video_core/shader/decode/float_set_predicate.cpp
index 34854fcca..200c2c983 100644
--- a/src/video_core/shader/decode/float_set_predicate.cpp
+++ b/src/video_core/shader/decode/float_set_predicate.cpp
@@ -17,8 +17,8 @@ using Tegra::Shader::Pred;
u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
- const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fsetp.abs_a != 0,
- instr.fsetp.neg_a != 0);
+ Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fsetp.abs_a != 0,
+ instr.fsetp.neg_a != 0);
Node op_b = [&]() {
if (instr.is_b_imm) {
return GetImmediate19(instr);
@@ -28,12 +28,13 @@ u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) {
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
}
}();
- op_b = GetOperandAbsNegFloat(op_b, instr.fsetp.abs_b, false);
+ op_b = GetOperandAbsNegFloat(std::move(op_b), instr.fsetp.abs_b, instr.fsetp.neg_b);
// We can't use the constant predicate as destination.
ASSERT(instr.fsetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
- const Node predicate = GetPredicateComparisonFloat(instr.fsetp.cond, op_a, op_b);
+ const Node predicate =
+ GetPredicateComparisonFloat(instr.fsetp.cond, std::move(op_a), std::move(op_b));
const Node second_pred = GetPredicate(instr.fsetp.pred39, instr.fsetp.neg_pred != 0);
const OperationCode combiner = GetPredicateCombiner(instr.fsetp.op);
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index c50f6354d..4ceb219be 100644
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -445,11 +445,12 @@ PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat
switch (format) {
case Tegra::FramebufferConfig::PixelFormat::ABGR8:
return PixelFormat::ABGR8U;
+ case Tegra::FramebufferConfig::PixelFormat::RGB565:
+ return PixelFormat::B5G6R5U;
case Tegra::FramebufferConfig::PixelFormat::BGRA8:
return PixelFormat::BGRA8;
default:
- LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
- UNREACHABLE();
+ UNIMPLEMENTED_MSG("Unimplemented format={}", static_cast<u32>(format));
return PixelFormat::ABGR8U;
}
}