summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/CMakeLists.txt80
-rw-r--r--src/video_core/buffer_cache/buffer_base.h19
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h619
-rw-r--r--src/video_core/command_classes/vic.cpp21
-rw-r--r--src/video_core/dirty_flags.cpp6
-rw-r--r--src/video_core/dirty_flags.h2
-rw-r--r--src/video_core/dma_pusher.cpp10
-rw-r--r--src/video_core/engines/const_buffer_engine_interface.h103
-rw-r--r--src/video_core/engines/kepler_compute.cpp45
-rw-r--r--src/video_core/engines/kepler_compute.h21
-rw-r--r--src/video_core/engines/maxwell_3d.cpp39
-rw-r--r--src/video_core/engines/maxwell_3d.h46
-rw-r--r--src/video_core/engines/maxwell_dma.cpp37
-rw-r--r--src/video_core/engines/maxwell_dma.h19
-rw-r--r--src/video_core/engines/shader_bytecode.h2298
-rw-r--r--src/video_core/engines/shader_header.h158
-rw-r--r--src/video_core/engines/shader_type.h21
-rw-r--r--src/video_core/fence_manager.h7
-rw-r--r--src/video_core/gpu.cpp5
-rw-r--r--src/video_core/guest_driver.cpp37
-rw-r--r--src/video_core/guest_driver.h46
-rw-r--r--src/video_core/memory_manager.cpp1
-rw-r--r--src/video_core/rasterizer_interface.h24
-rw-r--r--src/video_core/renderer_opengl/gl_arb_decompiler.cpp2124
-rw-r--r--src/video_core/renderer_opengl/gl_arb_decompiler.h29
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp90
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h59
-rw-r--r--src/video_core/renderer_opengl/gl_compute_pipeline.cpp209
-rw-r--r--src/video_core/renderer_opengl/gl_compute_pipeline.h93
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp146
-rw-r--r--src/video_core/renderer_opengl/gl_device.h84
-rw-r--r--src/video_core/renderer_opengl/gl_graphics_pipeline.cpp572
-rw-r--r--src/video_core/renderer_opengl/gl_graphics_pipeline.h169
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp474
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h60
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.cpp27
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.h14
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp994
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h172
-rw-r--r--src/video_core/renderer_opengl/gl_shader_context.h33
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp2986
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h69
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp482
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.h176
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.cpp146
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.h185
-rw-r--r--src/video_core/renderer_opengl/gl_shader_util.cpp123
-rw-r--r--src/video_core/renderer_opengl/gl_shader_util.h89
-rw-r--r--src/video_core/renderer_opengl/gl_state_tracker.cpp6
-rw-r--r--src/video_core/renderer_opengl/gl_state_tracker.h1
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp361
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h53
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h108
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp68
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h6
-rw-r--r--src/video_core/renderer_opengl/util_shaders.cpp23
-rw-r--r--src/video_core/renderer_vulkan/blit_image.cpp40
-rw-r--r--src/video_core/renderer_vulkan/blit_image.h2
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.cpp92
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.h79
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp54
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.h7
-rw-r--r--src/video_core/renderer_vulkan/pipeline_helper.h154
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.cpp60
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.cpp99
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp123
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h30
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.cpp303
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.h34
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pipeline.cpp296
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pipeline.h72
-rw-r--r--src/video_core/renderer_vulkan/vk_descriptor_pool.cpp172
-rw-r--r--src/video_core/renderer_vulkan/vk_descriptor_pool.h70
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp839
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.h145
-rw-r--r--src/video_core/renderer_vulkan/vk_master_semaphore.h6
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp867
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.h176
-rw-r--r--src/video_core/renderer_vulkan/vk_query_cache.cpp8
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp499
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h75
-rw-r--r--src/video_core/renderer_vulkan/vk_render_pass_cache.cpp96
-rw-r--r--src/video_core/renderer_vulkan/vk_render_pass_cache.h55
-rw-r--r--src/video_core/renderer_vulkan/vk_resource_pool.cpp12
-rw-r--r--src/video_core/renderer_vulkan/vk_resource_pool.h12
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.cpp172
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.h38
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp3166
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.h99
-rw-r--r--src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp2
-rw-r--r--src/video_core/renderer_vulkan/vk_state_tracker.cpp56
-rw-r--r--src/video_core/renderer_vulkan/vk_state_tracker.h15
-rw-r--r--src/video_core/renderer_vulkan/vk_swapchain.cpp59
-rw-r--r--src/video_core/renderer_vulkan/vk_swapchain.h31
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp305
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h80
-rw-r--r--src/video_core/renderer_vulkan/vk_update_descriptor.cpp13
-rw-r--r--src/video_core/renderer_vulkan/vk_update_descriptor.h4
-rw-r--r--src/video_core/shader/ast.cpp752
-rw-r--r--src/video_core/shader/ast.h398
-rw-r--r--src/video_core/shader/async_shaders.cpp234
-rw-r--r--src/video_core/shader/async_shaders.h138
-rw-r--r--src/video_core/shader/compiler_settings.cpp26
-rw-r--r--src/video_core/shader/compiler_settings.h26
-rw-r--r--src/video_core/shader/control_flow.cpp751
-rw-r--r--src/video_core/shader/control_flow.h117
-rw-r--r--src/video_core/shader/decode.cpp368
-rw-r--r--src/video_core/shader/decode/arithmetic.cpp166
-rw-r--r--src/video_core/shader/decode/arithmetic_half.cpp101
-rw-r--r--src/video_core/shader/decode/arithmetic_half_immediate.cpp54
-rw-r--r--src/video_core/shader/decode/arithmetic_immediate.cpp53
-rw-r--r--src/video_core/shader/decode/arithmetic_integer.cpp375
-rw-r--r--src/video_core/shader/decode/arithmetic_integer_immediate.cpp99
-rw-r--r--src/video_core/shader/decode/bfe.cpp77
-rw-r--r--src/video_core/shader/decode/bfi.cpp45
-rw-r--r--src/video_core/shader/decode/conversion.cpp321
-rw-r--r--src/video_core/shader/decode/ffma.cpp62
-rw-r--r--src/video_core/shader/decode/float_set.cpp58
-rw-r--r--src/video_core/shader/decode/float_set_predicate.cpp57
-rw-r--r--src/video_core/shader/decode/half_set.cpp115
-rw-r--r--src/video_core/shader/decode/half_set_predicate.cpp80
-rw-r--r--src/video_core/shader/decode/hfma2.cpp73
-rw-r--r--src/video_core/shader/decode/image.cpp536
-rw-r--r--src/video_core/shader/decode/integer_set.cpp49
-rw-r--r--src/video_core/shader/decode/integer_set_predicate.cpp53
-rw-r--r--src/video_core/shader/decode/memory.cpp493
-rw-r--r--src/video_core/shader/decode/other.cpp322
-rw-r--r--src/video_core/shader/decode/predicate_set_predicate.cpp68
-rw-r--r--src/video_core/shader/decode/predicate_set_register.cpp46
-rw-r--r--src/video_core/shader/decode/register_set_predicate.cpp86
-rw-r--r--src/video_core/shader/decode/shift.cpp153
-rw-r--r--src/video_core/shader/decode/texture.cpp935
-rw-r--r--src/video_core/shader/decode/video.cpp169
-rw-r--r--src/video_core/shader/decode/warp.cpp117
-rw-r--r--src/video_core/shader/decode/xmad.cpp156
-rw-r--r--src/video_core/shader/expr.cpp93
-rw-r--r--src/video_core/shader/expr.h156
-rw-r--r--src/video_core/shader/memory_util.cpp76
-rw-r--r--src/video_core/shader/memory_util.h43
-rw-r--r--src/video_core/shader/node.h701
-rw-r--r--src/video_core/shader/node_helper.cpp115
-rw-r--r--src/video_core/shader/node_helper.h71
-rw-r--r--src/video_core/shader/registry.cpp181
-rw-r--r--src/video_core/shader/registry.h172
-rw-r--r--src/video_core/shader/shader_ir.cpp464
-rw-r--r--src/video_core/shader/shader_ir.h479
-rw-r--r--src/video_core/shader/track.cpp236
-rw-r--r--src/video_core/shader/transform_feedback.cpp115
-rw-r--r--src/video_core/shader/transform_feedback.h23
-rw-r--r--src/video_core/shader_cache.cpp250
-rw-r--r--src/video_core/shader_cache.h215
-rw-r--r--src/video_core/shader_environment.cpp460
-rw-r--r--src/video_core/shader_environment.h183
-rw-r--r--src/video_core/shader_notify.cpp51
-rw-r--r--src/video_core/shader_notify.h28
-rw-r--r--src/video_core/texture_cache/formatter.cpp4
-rw-r--r--src/video_core/texture_cache/formatter.h3
-rw-r--r--src/video_core/texture_cache/image_view_base.cpp9
-rw-r--r--src/video_core/texture_cache/image_view_base.h1
-rw-r--r--src/video_core/texture_cache/texture_cache.h41
-rw-r--r--src/video_core/texture_cache/types.h4
-rw-r--r--src/video_core/texture_cache/util.cpp86
-rw-r--r--src/video_core/textures/texture.h9
-rw-r--r--src/video_core/transform_feedback.cpp99
-rw-r--r--src/video_core/transform_feedback.h30
-rw-r--r--src/video_core/vulkan_common/nsight_aftermath_tracker.cpp7
-rw-r--r--src/video_core/vulkan_common/nsight_aftermath_tracker.h21
-rw-r--r--src/video_core/vulkan_common/vulkan_device.cpp362
-rw-r--r--src/video_core/vulkan_common/vulkan_device.h161
-rw-r--r--src/video_core/vulkan_common/vulkan_wrapper.cpp5
-rw-r--r--src/video_core/vulkan_common/vulkan_wrapper.h44
171 files changed, 7988 insertions, 27151 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index e4de55f4d..007ecc13e 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -29,7 +29,6 @@ add_library(video_core STATIC
dirty_flags.h
dma_pusher.cpp
dma_pusher.h
- engines/const_buffer_engine_interface.h
engines/const_buffer_info.h
engines/engine_interface.h
engines/engine_upload.cpp
@@ -44,9 +43,6 @@ add_library(video_core STATIC
engines/maxwell_3d.h
engines/maxwell_dma.cpp
engines/maxwell_dma.h
- engines/shader_bytecode.h
- engines/shader_header.h
- engines/shader_type.h
framebuffer_config.h
macro/macro.cpp
macro/macro.h
@@ -61,8 +57,6 @@ add_library(video_core STATIC
gpu.h
gpu_thread.cpp
gpu_thread.h
- guest_driver.cpp
- guest_driver.h
memory_manager.cpp
memory_manager.h
query_cache.h
@@ -71,26 +65,25 @@ add_library(video_core STATIC
rasterizer_interface.h
renderer_base.cpp
renderer_base.h
- renderer_opengl/gl_arb_decompiler.cpp
- renderer_opengl/gl_arb_decompiler.h
renderer_opengl/gl_buffer_cache.cpp
renderer_opengl/gl_buffer_cache.h
+ renderer_opengl/gl_compute_pipeline.cpp
+ renderer_opengl/gl_compute_pipeline.h
renderer_opengl/gl_device.cpp
renderer_opengl/gl_device.h
renderer_opengl/gl_fence_manager.cpp
renderer_opengl/gl_fence_manager.h
+ renderer_opengl/gl_graphics_pipeline.cpp
+ renderer_opengl/gl_graphics_pipeline.h
renderer_opengl/gl_rasterizer.cpp
renderer_opengl/gl_rasterizer.h
renderer_opengl/gl_resource_manager.cpp
renderer_opengl/gl_resource_manager.h
renderer_opengl/gl_shader_cache.cpp
renderer_opengl/gl_shader_cache.h
- renderer_opengl/gl_shader_decompiler.cpp
- renderer_opengl/gl_shader_decompiler.h
- renderer_opengl/gl_shader_disk_cache.cpp
- renderer_opengl/gl_shader_disk_cache.h
renderer_opengl/gl_shader_manager.cpp
renderer_opengl/gl_shader_manager.h
+ renderer_opengl/gl_shader_context.h
renderer_opengl/gl_shader_util.cpp
renderer_opengl/gl_shader_util.h
renderer_opengl/gl_state_tracker.cpp
@@ -112,6 +105,7 @@ add_library(video_core STATIC
renderer_vulkan/fixed_pipeline_state.h
renderer_vulkan/maxwell_to_vk.cpp
renderer_vulkan/maxwell_to_vk.h
+ renderer_vulkan/pipeline_helper.h
renderer_vulkan/renderer_vulkan.h
renderer_vulkan/renderer_vulkan.cpp
renderer_vulkan/vk_blit_screen.cpp
@@ -138,12 +132,12 @@ add_library(video_core STATIC
renderer_vulkan/vk_query_cache.h
renderer_vulkan/vk_rasterizer.cpp
renderer_vulkan/vk_rasterizer.h
+ renderer_vulkan/vk_render_pass_cache.cpp
+ renderer_vulkan/vk_render_pass_cache.h
renderer_vulkan/vk_resource_pool.cpp
renderer_vulkan/vk_resource_pool.h
renderer_vulkan/vk_scheduler.cpp
renderer_vulkan/vk_scheduler.h
- renderer_vulkan/vk_shader_decompiler.cpp
- renderer_vulkan/vk_shader_decompiler.h
renderer_vulkan/vk_shader_util.cpp
renderer_vulkan/vk_shader_util.h
renderer_vulkan/vk_staging_buffer_pool.cpp
@@ -156,60 +150,12 @@ add_library(video_core STATIC
renderer_vulkan/vk_texture_cache.h
renderer_vulkan/vk_update_descriptor.cpp
renderer_vulkan/vk_update_descriptor.h
+ shader_cache.cpp
shader_cache.h
+ shader_environment.cpp
+ shader_environment.h
shader_notify.cpp
shader_notify.h
- shader/decode/arithmetic.cpp
- shader/decode/arithmetic_immediate.cpp
- shader/decode/bfe.cpp
- shader/decode/bfi.cpp
- shader/decode/shift.cpp
- shader/decode/arithmetic_integer.cpp
- shader/decode/arithmetic_integer_immediate.cpp
- shader/decode/arithmetic_half.cpp
- shader/decode/arithmetic_half_immediate.cpp
- shader/decode/ffma.cpp
- shader/decode/hfma2.cpp
- shader/decode/conversion.cpp
- shader/decode/memory.cpp
- shader/decode/texture.cpp
- shader/decode/image.cpp
- shader/decode/float_set_predicate.cpp
- shader/decode/integer_set_predicate.cpp
- shader/decode/half_set_predicate.cpp
- shader/decode/predicate_set_register.cpp
- shader/decode/predicate_set_predicate.cpp
- shader/decode/register_set_predicate.cpp
- shader/decode/float_set.cpp
- shader/decode/integer_set.cpp
- shader/decode/half_set.cpp
- shader/decode/video.cpp
- shader/decode/warp.cpp
- shader/decode/xmad.cpp
- shader/decode/other.cpp
- shader/ast.cpp
- shader/ast.h
- shader/async_shaders.cpp
- shader/async_shaders.h
- shader/compiler_settings.cpp
- shader/compiler_settings.h
- shader/control_flow.cpp
- shader/control_flow.h
- shader/decode.cpp
- shader/expr.cpp
- shader/expr.h
- shader/memory_util.cpp
- shader/memory_util.h
- shader/node_helper.cpp
- shader/node_helper.h
- shader/node.h
- shader/registry.cpp
- shader/registry.h
- shader/shader_ir.cpp
- shader/shader_ir.h
- shader/track.cpp
- shader/transform_feedback.cpp
- shader/transform_feedback.h
surface.cpp
surface.h
texture_cache/accelerated_swizzle.cpp
@@ -242,6 +188,8 @@ add_library(video_core STATIC
textures/decoders.h
textures/texture.cpp
textures/texture.h
+ transform_feedback.cpp
+ transform_feedback.h
video_core.cpp
video_core.h
vulkan_common/vulkan_debug_callback.cpp
@@ -265,7 +213,7 @@ add_library(video_core STATIC
create_target_directory_groups(video_core)
target_link_libraries(video_core PUBLIC common core)
-target_link_libraries(video_core PRIVATE glad xbyak)
+target_link_libraries(video_core PUBLIC glad shader_recompiler xbyak)
if (YUZU_USE_BUNDLED_FFMPEG AND NOT WIN32)
add_dependencies(video_core ffmpeg-build)
diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h
index b121d36a3..c3318095c 100644
--- a/src/video_core/buffer_cache/buffer_base.h
+++ b/src/video_core/buffer_cache/buffer_base.h
@@ -226,19 +226,24 @@ public:
/// Call 'func' for each CPU modified range and unmark those pages as CPU modified
template <typename Func>
void ForEachUploadRange(VAddr query_cpu_range, u64 size, Func&& func) {
- ForEachModifiedRange<Type::CPU>(query_cpu_range, size, func);
+ ForEachModifiedRange<Type::CPU>(query_cpu_range, size, true, func);
}
/// Call 'func' for each GPU modified range and unmark those pages as GPU modified
template <typename Func>
- void ForEachDownloadRange(VAddr query_cpu_range, u64 size, Func&& func) {
- ForEachModifiedRange<Type::GPU>(query_cpu_range, size, func);
+ void ForEachDownloadRange(VAddr query_cpu_range, u64 size, bool clear, Func&& func) {
+ ForEachModifiedRange<Type::GPU>(query_cpu_range, size, clear, func);
+ }
+
+ template <typename Func>
+ void ForEachDownloadRangeAndClear(VAddr query_cpu_range, u64 size, Func&& func) {
+ ForEachModifiedRange<Type::GPU>(query_cpu_range, size, true, func);
}
/// Call 'func' for each GPU modified range and unmark those pages as GPU modified
template <typename Func>
void ForEachDownloadRange(Func&& func) {
- ForEachModifiedRange<Type::GPU>(cpu_addr, SizeBytes(), func);
+ ForEachModifiedRange<Type::GPU>(cpu_addr, SizeBytes(), true, func);
}
/// Mark buffer as picked
@@ -415,7 +420,7 @@ private:
* @param func Function to call for each turned off region
*/
template <Type type, typename Func>
- void ForEachModifiedRange(VAddr query_cpu_range, s64 size, Func&& func) {
+ void ForEachModifiedRange(VAddr query_cpu_range, s64 size, bool clear, Func&& func) {
static_assert(type != Type::Untracked);
const s64 difference = query_cpu_range - cpu_addr;
@@ -467,7 +472,9 @@ private:
bits = (bits << left_offset) >> left_offset;
const u64 current_word = state_words[word_index] & bits;
- state_words[word_index] &= ~bits;
+ if (clear) {
+ state_words[word_index] &= ~bits;
+ }
if constexpr (type == Type::CPU) {
const u64 current_bits = untracked_words[word_index] & bits;
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index cad7f902d..24c858104 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -15,6 +15,7 @@
#include <vector>
#include <boost/container/small_vector.hpp>
+#include <boost/icl/interval_set.hpp>
#include "common/common_types.h"
#include "common/div_ceil.h"
@@ -30,6 +31,7 @@
#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
+#include "video_core/surface.h"
#include "video_core/texture_cache/slot_vector.h"
#include "video_core/texture_cache/types.h"
@@ -41,14 +43,19 @@ MICROPROFILE_DECLARE(GPU_DownloadMemory);
using BufferId = SlotId;
+using VideoCore::Surface::PixelFormat;
+using namespace Common::Literals;
+
constexpr u32 NUM_VERTEX_BUFFERS = 32;
constexpr u32 NUM_TRANSFORM_FEEDBACK_BUFFERS = 4;
constexpr u32 NUM_GRAPHICS_UNIFORM_BUFFERS = 18;
constexpr u32 NUM_COMPUTE_UNIFORM_BUFFERS = 8;
constexpr u32 NUM_STORAGE_BUFFERS = 16;
+constexpr u32 NUM_TEXTURE_BUFFERS = 16;
constexpr u32 NUM_STAGES = 5;
-using namespace Common::Literals;
+using UniformBufferSizes = std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>;
+using ComputeUniformBufferSizes = std::array<u32, NUM_COMPUTE_UNIFORM_BUFFERS>;
template <typename P>
class BufferCache {
@@ -66,6 +73,7 @@ class BufferCache {
static constexpr bool NEEDS_BIND_UNIFORM_INDEX = P::NEEDS_BIND_UNIFORM_INDEX;
static constexpr bool NEEDS_BIND_STORAGE_INDEX = P::NEEDS_BIND_STORAGE_INDEX;
static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS;
+ static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS;
static constexpr BufferId NULL_BUFFER_ID{0};
@@ -77,6 +85,9 @@ class BufferCache {
using Runtime = typename P::Runtime;
using Buffer = typename P::Buffer;
+ using IntervalSet = boost::icl::interval_set<VAddr>;
+ using IntervalType = typename IntervalSet::interval_type;
+
struct Empty {};
struct OverlapResult {
@@ -92,6 +103,10 @@ class BufferCache {
BufferId buffer_id;
};
+ struct TextureBufferBinding : Binding {
+ PixelFormat format;
+ };
+
static constexpr Binding NULL_BINDING{
.cpu_addr = 0,
.size = 0,
@@ -129,38 +144,63 @@ public:
void BindHostComputeBuffers();
- void SetEnabledUniformBuffers(size_t stage, u32 enabled);
+ void SetUniformBuffersState(const std::array<u32, NUM_STAGES>& mask,
+ const UniformBufferSizes* sizes);
- void SetEnabledComputeUniformBuffers(u32 enabled);
+ void SetComputeUniformBufferState(u32 mask, const ComputeUniformBufferSizes* sizes);
void UnbindGraphicsStorageBuffers(size_t stage);
void BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset,
bool is_written);
+ void UnbindGraphicsTextureBuffers(size_t stage);
+
+ void BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, u32 size,
+ PixelFormat format, bool is_written, bool is_image);
+
void UnbindComputeStorageBuffers();
void BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset,
bool is_written);
+ void UnbindComputeTextureBuffers();
+
+ void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format,
+ bool is_written, bool is_image);
+
void FlushCachedWrites();
/// Return true when there are uncommitted buffers to be downloaded
[[nodiscard]] bool HasUncommittedFlushes() const noexcept;
+ void AccumulateFlushes();
+
/// Return true when the caller should wait for async downloads
[[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept;
/// Commit asynchronous downloads
void CommitAsyncFlushes();
+ void CommitAsyncFlushesHigh();
/// Pop asynchronous downloads
void PopAsyncFlushes();
+ bool DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount);
+
+ bool DMAClear(GPUVAddr src_address, u64 amount, u32 value);
+
/// Return true when a CPU region is modified from the GPU
[[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
+ /// Return true when a region is registered on the cache
+ [[nodiscard]] bool IsRegionRegistered(VAddr addr, size_t size);
+
+ /// Return true when a CPU region is modified from the CPU
+ [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size);
+
std::mutex mutex;
+ Runtime& runtime;
private:
template <typename Func>
@@ -190,6 +230,36 @@ private:
}
}
+ template <typename Func>
+ void ForEachWrittenRange(VAddr cpu_addr, u64 size, Func&& func) {
+ const VAddr start_address = cpu_addr;
+ const VAddr end_address = start_address + size;
+ const VAddr search_base =
+ static_cast<VAddr>(std::min<s64>(0LL, static_cast<s64>(start_address - size)));
+ const IntervalType search_interval{search_base, search_base + 1};
+ auto it = common_ranges.lower_bound(search_interval);
+ if (it == common_ranges.end()) {
+ it = common_ranges.begin();
+ }
+ for (; it != common_ranges.end(); it++) {
+ VAddr inter_addr_end = it->upper();
+ VAddr inter_addr = it->lower();
+ if (inter_addr >= end_address) {
+ break;
+ }
+ if (inter_addr_end <= start_address) {
+ continue;
+ }
+ if (inter_addr_end > end_address) {
+ inter_addr_end = end_address;
+ }
+ if (inter_addr < start_address) {
+ inter_addr = start_address;
+ }
+ func(inter_addr, inter_addr_end);
+ }
+ }
+
static bool IsRangeGranular(VAddr cpu_addr, size_t size) {
return (cpu_addr & ~Core::Memory::PAGE_MASK) ==
((cpu_addr + size) & ~Core::Memory::PAGE_MASK);
@@ -207,12 +277,16 @@ private:
void BindHostGraphicsStorageBuffers(size_t stage);
+ void BindHostGraphicsTextureBuffers(size_t stage);
+
void BindHostTransformFeedbackBuffers();
void BindHostComputeUniformBuffers();
void BindHostComputeStorageBuffers();
+ void BindHostComputeTextureBuffers();
+
void DoUpdateGraphicsBuffers(bool is_indexed);
void DoUpdateComputeBuffers();
@@ -227,6 +301,8 @@ private:
void UpdateStorageBuffers(size_t stage);
+ void UpdateTextureBuffers(size_t stage);
+
void UpdateTransformFeedbackBuffers();
void UpdateTransformFeedbackBuffer(u32 index);
@@ -235,6 +311,8 @@ private:
void UpdateComputeStorageBuffers();
+ void UpdateComputeTextureBuffers();
+
void MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size);
[[nodiscard]] BufferId FindBuffer(VAddr cpu_addr, u32 size);
@@ -272,24 +350,26 @@ private:
void DeleteBuffer(BufferId buffer_id);
- void ReplaceBufferDownloads(BufferId old_buffer_id, BufferId new_buffer_id);
-
void NotifyBufferDeletion();
[[nodiscard]] Binding StorageBufferBinding(GPUVAddr ssbo_addr) const;
+ [[nodiscard]] TextureBufferBinding GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size,
+ PixelFormat format);
+
[[nodiscard]] std::span<const u8> ImmediateBufferWithData(VAddr cpu_addr, size_t size);
[[nodiscard]] std::span<u8> ImmediateBuffer(size_t wanted_capacity);
[[nodiscard]] bool HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept;
+ void ClearDownload(IntervalType subtract_interval);
+
VideoCore::RasterizerInterface& rasterizer;
Tegra::Engines::Maxwell3D& maxwell3d;
Tegra::Engines::KeplerCompute& kepler_compute;
Tegra::MemoryManager& gpu_memory;
Core::Memory::Memory& cpu_memory;
- Runtime& runtime;
SlotVector<Buffer> slot_buffers;
DelayedDestructionRing<Buffer, 8> delayed_destruction_ring;
@@ -300,20 +380,30 @@ private:
std::array<Binding, NUM_VERTEX_BUFFERS> vertex_buffers;
std::array<std::array<Binding, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES> uniform_buffers;
std::array<std::array<Binding, NUM_STORAGE_BUFFERS>, NUM_STAGES> storage_buffers;
+ std::array<std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS>, NUM_STAGES> texture_buffers;
std::array<Binding, NUM_TRANSFORM_FEEDBACK_BUFFERS> transform_feedback_buffers;
std::array<Binding, NUM_COMPUTE_UNIFORM_BUFFERS> compute_uniform_buffers;
std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers;
+ std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS> compute_texture_buffers;
+
+ std::array<u32, NUM_STAGES> enabled_uniform_buffer_masks{};
+ u32 enabled_compute_uniform_buffer_mask = 0;
- std::array<u32, NUM_STAGES> enabled_uniform_buffers{};
- u32 enabled_compute_uniform_buffers = 0;
+ const UniformBufferSizes* uniform_buffer_sizes{};
+ const ComputeUniformBufferSizes* compute_uniform_buffer_sizes{};
std::array<u32, NUM_STAGES> enabled_storage_buffers{};
std::array<u32, NUM_STAGES> written_storage_buffers{};
u32 enabled_compute_storage_buffers = 0;
u32 written_compute_storage_buffers = 0;
- std::array<u32, NUM_STAGES> fast_bound_uniform_buffers{};
+ std::array<u32, NUM_STAGES> enabled_texture_buffers{};
+ std::array<u32, NUM_STAGES> written_texture_buffers{};
+ std::array<u32, NUM_STAGES> image_texture_buffers{};
+ u32 enabled_compute_texture_buffers = 0;
+ u32 written_compute_texture_buffers = 0;
+ u32 image_compute_texture_buffers = 0;
std::array<u32, 16> uniform_cache_hits{};
std::array<u32, 16> uniform_cache_shots{};
@@ -324,12 +414,16 @@ private:
std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, std::array<u32, NUM_STAGES>, Empty>
dirty_uniform_buffers{};
+ std::conditional_t<IS_OPENGL, std::array<u32, NUM_STAGES>, Empty> fast_bound_uniform_buffers{};
+ std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS,
+ std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>, Empty>
+ uniform_buffer_binding_sizes{};
std::vector<BufferId> cached_write_buffer_ids;
- // TODO: This data structure is not optimal and it should be reworked
- std::vector<BufferId> uncommitted_downloads;
- std::deque<std::vector<BufferId>> committed_downloads;
+ IntervalSet uncommitted_ranges;
+ IntervalSet common_ranges;
+ std::deque<IntervalSet> committed_ranges;
size_t immediate_buffer_capacity = 0;
std::unique_ptr<u8[]> immediate_buffer_alloc;
@@ -347,11 +441,12 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_,
Tegra::Engines::KeplerCompute& kepler_compute_,
Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
Runtime& runtime_)
- : rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_},
- gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, runtime{runtime_} {
+ : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
+ kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_} {
// Ensure the first slot is used for the null buffer
void(slot_buffers.insert(runtime, NullBufferParams{}));
deletion_iterator = slot_buffers.end();
+ common_ranges.clear();
}
template <class P>
@@ -422,6 +517,97 @@ void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) {
}
template <class P>
+void BufferCache<P>::ClearDownload(IntervalType subtract_interval) {
+ uncommitted_ranges.subtract(subtract_interval);
+ for (auto& interval_set : committed_ranges) {
+ interval_set.subtract(subtract_interval);
+ }
+}
+
+template <class P>
+bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) {
+ const std::optional<VAddr> cpu_src_address = gpu_memory.GpuToCpuAddress(src_address);
+ const std::optional<VAddr> cpu_dest_address = gpu_memory.GpuToCpuAddress(dest_address);
+ if (!cpu_src_address || !cpu_dest_address) {
+ return false;
+ }
+ const bool source_dirty = IsRegionRegistered(*cpu_src_address, amount);
+ const bool dest_dirty = IsRegionRegistered(*cpu_dest_address, amount);
+ if (!source_dirty && !dest_dirty) {
+ return false;
+ }
+
+ const IntervalType subtract_interval{*cpu_dest_address, *cpu_dest_address + amount};
+ ClearDownload(subtract_interval);
+
+ BufferId buffer_a;
+ BufferId buffer_b;
+ do {
+ has_deleted_buffers = false;
+ buffer_a = FindBuffer(*cpu_src_address, static_cast<u32>(amount));
+ buffer_b = FindBuffer(*cpu_dest_address, static_cast<u32>(amount));
+ } while (has_deleted_buffers);
+ auto& src_buffer = slot_buffers[buffer_a];
+ auto& dest_buffer = slot_buffers[buffer_b];
+ SynchronizeBuffer(src_buffer, *cpu_src_address, static_cast<u32>(amount));
+ SynchronizeBuffer(dest_buffer, *cpu_dest_address, static_cast<u32>(amount));
+ std::array copies{BufferCopy{
+ .src_offset = src_buffer.Offset(*cpu_src_address),
+ .dst_offset = dest_buffer.Offset(*cpu_dest_address),
+ .size = amount,
+ }};
+
+ boost::container::small_vector<IntervalType, 4> tmp_intervals;
+ auto mirror = [&](VAddr base_address, VAddr base_address_end) {
+ const u64 size = base_address_end - base_address;
+ const VAddr diff = base_address - *cpu_src_address;
+ const VAddr new_base_address = *cpu_dest_address + diff;
+ const IntervalType add_interval{new_base_address, new_base_address + size};
+ uncommitted_ranges.add(add_interval);
+ tmp_intervals.push_back(add_interval);
+ };
+ ForEachWrittenRange(*cpu_src_address, amount, mirror);
+ // This subtraction in this order is important for overlapping copies.
+ common_ranges.subtract(subtract_interval);
+ bool atleast_1_download = tmp_intervals.size() != 0;
+ for (const IntervalType add_interval : tmp_intervals) {
+ common_ranges.add(add_interval);
+ }
+
+ runtime.CopyBuffer(dest_buffer, src_buffer, copies);
+ if (atleast_1_download) {
+ dest_buffer.MarkRegionAsGpuModified(*cpu_dest_address, amount);
+ }
+ std::vector<u8> tmp_buffer(amount);
+ cpu_memory.ReadBlockUnsafe(*cpu_src_address, tmp_buffer.data(), amount);
+ cpu_memory.WriteBlockUnsafe(*cpu_dest_address, tmp_buffer.data(), amount);
+ return true;
+}
+
+template <class P>
+bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) {
+ const std::optional<VAddr> cpu_dst_address = gpu_memory.GpuToCpuAddress(dst_address);
+ if (!cpu_dst_address) {
+ return false;
+ }
+ const bool dest_dirty = IsRegionRegistered(*cpu_dst_address, amount);
+ if (!dest_dirty) {
+ return false;
+ }
+
+ const size_t size = amount * sizeof(u32);
+ const IntervalType subtract_interval{*cpu_dst_address, *cpu_dst_address + size};
+ ClearDownload(subtract_interval);
+ common_ranges.subtract(subtract_interval);
+
+ const BufferId buffer = FindBuffer(*cpu_dst_address, static_cast<u32>(size));
+ auto& dest_buffer = slot_buffers[buffer];
+ const u32 offset = dest_buffer.Offset(*cpu_dst_address);
+ runtime.ClearBuffer(dest_buffer, offset, size, value);
+ return true;
+}
+
+template <class P>
void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
u32 size) {
const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
@@ -476,6 +662,7 @@ void BufferCache<P>::BindHostStageBuffers(size_t stage) {
MICROPROFILE_SCOPE(GPU_BindUploadBuffers);
BindHostGraphicsUniformBuffers(stage);
BindHostGraphicsStorageBuffers(stage);
+ BindHostGraphicsTextureBuffers(stage);
}
template <class P>
@@ -483,21 +670,30 @@ void BufferCache<P>::BindHostComputeBuffers() {
MICROPROFILE_SCOPE(GPU_BindUploadBuffers);
BindHostComputeUniformBuffers();
BindHostComputeStorageBuffers();
+ BindHostComputeTextureBuffers();
}
template <class P>
-void BufferCache<P>::SetEnabledUniformBuffers(size_t stage, u32 enabled) {
+void BufferCache<P>::SetUniformBuffersState(const std::array<u32, NUM_STAGES>& mask,
+ const UniformBufferSizes* sizes) {
if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
- if (enabled_uniform_buffers[stage] != enabled) {
- dirty_uniform_buffers[stage] = ~u32{0};
+ if (enabled_uniform_buffer_masks != mask) {
+ if constexpr (IS_OPENGL) {
+ fast_bound_uniform_buffers.fill(0);
+ }
+ dirty_uniform_buffers.fill(~u32{0});
+ uniform_buffer_binding_sizes.fill({});
}
}
- enabled_uniform_buffers[stage] = enabled;
+ enabled_uniform_buffer_masks = mask;
+ uniform_buffer_sizes = sizes;
}
template <class P>
-void BufferCache<P>::SetEnabledComputeUniformBuffers(u32 enabled) {
- enabled_compute_uniform_buffers = enabled;
+void BufferCache<P>::SetComputeUniformBufferState(u32 mask,
+ const ComputeUniformBufferSizes* sizes) {
+ enabled_compute_uniform_buffer_mask = mask;
+ compute_uniform_buffer_sizes = sizes;
}
template <class P>
@@ -518,9 +714,29 @@ void BufferCache<P>::BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index,
}
template <class P>
+void BufferCache<P>::UnbindGraphicsTextureBuffers(size_t stage) {
+ enabled_texture_buffers[stage] = 0;
+ written_texture_buffers[stage] = 0;
+ image_texture_buffers[stage] = 0;
+}
+
+template <class P>
+void BufferCache<P>::BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr,
+ u32 size, PixelFormat format, bool is_written,
+ bool is_image) {
+ enabled_texture_buffers[stage] |= 1U << tbo_index;
+ written_texture_buffers[stage] |= (is_written ? 1U : 0U) << tbo_index;
+ if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
+ image_texture_buffers[stage] |= (is_image ? 1U : 0U) << tbo_index;
+ }
+ texture_buffers[stage][tbo_index] = GetTextureBufferBinding(gpu_addr, size, format);
+}
+
+template <class P>
void BufferCache<P>::UnbindComputeStorageBuffers() {
enabled_compute_storage_buffers = 0;
written_compute_storage_buffers = 0;
+ image_compute_texture_buffers = 0;
}
template <class P>
@@ -538,6 +754,24 @@ void BufferCache<P>::BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index,
}
template <class P>
+void BufferCache<P>::UnbindComputeTextureBuffers() {
+ enabled_compute_texture_buffers = 0;
+ written_compute_texture_buffers = 0;
+ image_compute_texture_buffers = 0;
+}
+
+template <class P>
+void BufferCache<P>::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size,
+ PixelFormat format, bool is_written, bool is_image) {
+ enabled_compute_texture_buffers |= 1U << tbo_index;
+ written_compute_texture_buffers |= (is_written ? 1U : 0U) << tbo_index;
+ if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
+ image_compute_texture_buffers |= (is_image ? 1U : 0U) << tbo_index;
+ }
+ compute_texture_buffers[tbo_index] = GetTextureBufferBinding(gpu_addr, size, format);
+}
+
+template <class P>
void BufferCache<P>::FlushCachedWrites() {
for (const BufferId buffer_id : cached_write_buffer_ids) {
slot_buffers[buffer_id].FlushCachedWrites();
@@ -547,29 +781,30 @@ void BufferCache<P>::FlushCachedWrites() {
template <class P>
bool BufferCache<P>::HasUncommittedFlushes() const noexcept {
- return !uncommitted_downloads.empty();
+ return !uncommitted_ranges.empty() || !committed_ranges.empty();
}
template <class P>
-bool BufferCache<P>::ShouldWaitAsyncFlushes() const noexcept {
- return !committed_downloads.empty() && !committed_downloads.front().empty();
+void BufferCache<P>::AccumulateFlushes() {
+ if (Settings::values.gpu_accuracy.GetValue() != Settings::GPUAccuracy::High) {
+ uncommitted_ranges.clear();
+ return;
+ }
+ if (uncommitted_ranges.empty()) {
+ return;
+ }
+ committed_ranges.emplace_back(std::move(uncommitted_ranges));
}
template <class P>
-void BufferCache<P>::CommitAsyncFlushes() {
- // This is intentionally passing the value by copy
- committed_downloads.push_front(uncommitted_downloads);
- uncommitted_downloads.clear();
+bool BufferCache<P>::ShouldWaitAsyncFlushes() const noexcept {
+ return false;
}
template <class P>
-void BufferCache<P>::PopAsyncFlushes() {
- if (committed_downloads.empty()) {
- return;
- }
- auto scope_exit_pop_download = detail::ScopeExit([this] { committed_downloads.pop_back(); });
- const std::span<const BufferId> download_ids = committed_downloads.back();
- if (download_ids.empty()) {
+void BufferCache<P>::CommitAsyncFlushesHigh() {
+ AccumulateFlushes();
+ if (committed_ranges.empty()) {
return;
}
MICROPROFILE_SCOPE(GPU_DownloadMemory);
@@ -577,20 +812,43 @@ void BufferCache<P>::PopAsyncFlushes() {
boost::container::small_vector<std::pair<BufferCopy, BufferId>, 1> downloads;
u64 total_size_bytes = 0;
u64 largest_copy = 0;
- for (const BufferId buffer_id : download_ids) {
- slot_buffers[buffer_id].ForEachDownloadRange([&](u64 range_offset, u64 range_size) {
- downloads.push_back({
- BufferCopy{
- .src_offset = range_offset,
- .dst_offset = total_size_bytes,
- .size = range_size,
- },
- buffer_id,
+ for (const IntervalSet& intervals : committed_ranges) {
+ for (auto& interval : intervals) {
+ const std::size_t size = interval.upper() - interval.lower();
+ const VAddr cpu_addr = interval.lower();
+ ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) {
+ boost::container::small_vector<BufferCopy, 1> copies;
+ buffer.ForEachDownloadRangeAndClear(
+ cpu_addr, size, [&](u64 range_offset, u64 range_size) {
+ const VAddr buffer_addr = buffer.CpuAddr();
+ const auto add_download = [&](VAddr start, VAddr end) {
+ const u64 new_offset = start - buffer_addr;
+ const u64 new_size = end - start;
+ downloads.push_back({
+ BufferCopy{
+ .src_offset = new_offset,
+ .dst_offset = total_size_bytes,
+ .size = new_size,
+ },
+ buffer_id,
+ });
+ // Align up to avoid cache conflicts
+ constexpr u64 align = 256ULL;
+ constexpr u64 mask = ~(align - 1ULL);
+ total_size_bytes += (new_size + align - 1) & mask;
+ largest_copy = std::max(largest_copy, new_size);
+ };
+
+ const VAddr start_address = buffer_addr + range_offset;
+ const VAddr end_address = start_address + range_size;
+ ForEachWrittenRange(start_address, range_size, add_download);
+ const IntervalType subtract_interval{start_address, end_address};
+ common_ranges.subtract(subtract_interval);
+ });
});
- total_size_bytes += range_size;
- largest_copy = std::max(largest_copy, range_size);
- });
+ }
}
+ committed_ranges.clear();
if (downloads.empty()) {
return;
}
@@ -623,6 +881,19 @@ void BufferCache<P>::PopAsyncFlushes() {
}
template <class P>
+void BufferCache<P>::CommitAsyncFlushes() {
+ if (Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High) {
+ CommitAsyncFlushesHigh();
+ } else {
+ uncommitted_ranges.clear();
+ committed_ranges.clear();
+ }
+}
+
+template <class P>
+void BufferCache<P>::PopAsyncFlushes() {}
+
+template <class P>
bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) {
const u64 page_end = Common::DivCeil(addr + size, PAGE_SIZE);
for (u64 page = addr >> PAGE_BITS; page < page_end;) {
@@ -642,6 +913,46 @@ bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) {
}
template <class P>
+bool BufferCache<P>::IsRegionRegistered(VAddr addr, size_t size) {
+ const VAddr end_addr = addr + size;
+ const u64 page_end = Common::DivCeil(end_addr, PAGE_SIZE);
+ for (u64 page = addr >> PAGE_BITS; page < page_end;) {
+ const BufferId buffer_id = page_table[page];
+ if (!buffer_id) {
+ ++page;
+ continue;
+ }
+ Buffer& buffer = slot_buffers[buffer_id];
+ const VAddr buf_start_addr = buffer.CpuAddr();
+ const VAddr buf_end_addr = buf_start_addr + buffer.SizeBytes();
+ if (buf_start_addr < end_addr && addr < buf_end_addr) {
+ return true;
+ }
+ page = Common::DivCeil(end_addr, PAGE_SIZE);
+ }
+ return false;
+}
+
+template <class P>
+bool BufferCache<P>::IsRegionCpuModified(VAddr addr, size_t size) {
+ const u64 page_end = Common::DivCeil(addr + size, PAGE_SIZE);
+ for (u64 page = addr >> PAGE_BITS; page < page_end;) {
+ const BufferId image_id = page_table[page];
+ if (!image_id) {
+ ++page;
+ continue;
+ }
+ Buffer& buffer = slot_buffers[image_id];
+ if (buffer.IsRegionCpuModified(addr, size)) {
+ return true;
+ }
+ const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes();
+ page = Common::DivCeil(end_addr, PAGE_SIZE);
+ }
+ return false;
+}
+
+template <class P>
void BufferCache<P>::BindHostIndexBuffer() {
Buffer& buffer = slot_buffers[index_buffer.buffer_id];
TouchBuffer(buffer);
@@ -649,7 +960,9 @@ void BufferCache<P>::BindHostIndexBuffer() {
const u32 size = index_buffer.size;
SynchronizeBuffer(buffer, index_buffer.cpu_addr, size);
if constexpr (HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) {
- runtime.BindIndexBuffer(buffer, offset, size);
+ const u32 new_offset = offset + maxwell3d.regs.index_array.first *
+ maxwell3d.regs.index_array.FormatSizeInBytes();
+ runtime.BindIndexBuffer(buffer, new_offset, size);
} else {
runtime.BindIndexBuffer(maxwell3d.regs.draw.topology, maxwell3d.regs.index_array.format,
maxwell3d.regs.index_array.first, maxwell3d.regs.index_array.count,
@@ -683,7 +996,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffers(size_t stage) {
dirty = std::exchange(dirty_uniform_buffers[stage], 0);
}
u32 binding_index = 0;
- ForEachEnabledBit(enabled_uniform_buffers[stage], [&](u32 index) {
+ ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) {
const bool needs_bind = ((dirty >> index) & 1) != 0;
BindHostGraphicsUniformBuffer(stage, index, binding_index, needs_bind);
if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
@@ -697,7 +1010,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
bool needs_bind) {
const Binding& binding = uniform_buffers[stage][index];
const VAddr cpu_addr = binding.cpu_addr;
- const u32 size = binding.size;
+ const u32 size = std::min(binding.size, (*uniform_buffer_sizes)[stage][index]);
Buffer& buffer = slot_buffers[binding.buffer_id];
TouchBuffer(buffer);
const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID &&
@@ -707,8 +1020,13 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
if constexpr (IS_OPENGL) {
if (runtime.HasFastBufferSubData()) {
// Fast path for Nvidia
- if (!HasFastUniformBufferBound(stage, binding_index)) {
+ const bool should_fast_bind =
+ !HasFastUniformBufferBound(stage, binding_index) ||
+ uniform_buffer_binding_sizes[stage][binding_index] != size;
+ if (should_fast_bind) {
// We only have to bind when the currently bound buffer is not the fast version
+ fast_bound_uniform_buffers[stage] |= 1U << binding_index;
+ uniform_buffer_binding_sizes[stage][binding_index] = size;
runtime.BindFastUniformBuffer(stage, binding_index, size);
}
const auto span = ImmediateBufferWithData(cpu_addr, size);
@@ -716,8 +1034,10 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
return;
}
}
- fast_bound_uniform_buffers[stage] |= 1U << binding_index;
-
+ if constexpr (IS_OPENGL) {
+ fast_bound_uniform_buffers[stage] |= 1U << binding_index;
+ uniform_buffer_binding_sizes[stage][binding_index] = size;
+ }
// Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan
const std::span<u8> span = runtime.BindMappedUniformBuffer(stage, binding_index, size);
cpu_memory.ReadBlockUnsafe(cpu_addr, span.data(), size);
@@ -730,14 +1050,27 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
}
++uniform_cache_shots[0];
- if (!needs_bind && !HasFastUniformBufferBound(stage, binding_index)) {
- // Skip binding if it's not needed and if the bound buffer is not the fast version
- // This exists to avoid instances where the fast buffer is bound and a GPU write happens
+ // Skip binding if it's not needed and if the bound buffer is not the fast version
+ // This exists to avoid instances where the fast buffer is bound and a GPU write happens
+ needs_bind |= HasFastUniformBufferBound(stage, binding_index);
+ if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
+ needs_bind |= uniform_buffer_binding_sizes[stage][binding_index] != size;
+ }
+ if (!needs_bind) {
return;
}
- fast_bound_uniform_buffers[stage] &= ~(1U << binding_index);
-
const u32 offset = buffer.Offset(cpu_addr);
+ if constexpr (IS_OPENGL) {
+ // Fast buffer will be unbound
+ fast_bound_uniform_buffers[stage] &= ~(1U << binding_index);
+
+ // Mark the index as dirty if offset doesn't match
+ const bool is_copy_bind = offset != 0 && !runtime.SupportsNonZeroUniformOffset();
+ dirty_uniform_buffers[stage] |= (is_copy_bind ? 1U : 0U) << index;
+ }
+ if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
+ uniform_buffer_binding_sizes[stage][binding_index] = size;
+ }
if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
runtime.BindUniformBuffer(stage, binding_index, buffer, offset, size);
} else {
@@ -767,6 +1100,28 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) {
}
template <class P>
+void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) {
+ ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) {
+ const TextureBufferBinding& binding = texture_buffers[stage][index];
+ Buffer& buffer = slot_buffers[binding.buffer_id];
+ const u32 size = binding.size;
+ SynchronizeBuffer(buffer, binding.cpu_addr, size);
+
+ const u32 offset = buffer.Offset(binding.cpu_addr);
+ const PixelFormat format = binding.format;
+ if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
+ if (((image_texture_buffers[stage] >> index) & 1) != 0) {
+ runtime.BindImageBuffer(buffer, offset, size, format);
+ } else {
+ runtime.BindTextureBuffer(buffer, offset, size, format);
+ }
+ } else {
+ runtime.BindTextureBuffer(buffer, offset, size, format);
+ }
+ });
+}
+
+template <class P>
void BufferCache<P>::BindHostTransformFeedbackBuffers() {
if (maxwell3d.regs.tfb_enabled == 0) {
return;
@@ -788,13 +1143,14 @@ void BufferCache<P>::BindHostComputeUniformBuffers() {
if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
// Mark all uniform buffers as dirty
dirty_uniform_buffers.fill(~u32{0});
+ fast_bound_uniform_buffers.fill(0);
}
u32 binding_index = 0;
- ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) {
+ ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) {
const Binding& binding = compute_uniform_buffers[index];
Buffer& buffer = slot_buffers[binding.buffer_id];
TouchBuffer(buffer);
- const u32 size = binding.size;
+ const u32 size = std::min(binding.size, (*compute_uniform_buffer_sizes)[index]);
SynchronizeBuffer(buffer, binding.cpu_addr, size);
const u32 offset = buffer.Offset(binding.cpu_addr);
@@ -829,6 +1185,28 @@ void BufferCache<P>::BindHostComputeStorageBuffers() {
}
template <class P>
+void BufferCache<P>::BindHostComputeTextureBuffers() {
+ ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) {
+ const TextureBufferBinding& binding = compute_texture_buffers[index];
+ Buffer& buffer = slot_buffers[binding.buffer_id];
+ const u32 size = binding.size;
+ SynchronizeBuffer(buffer, binding.cpu_addr, size);
+
+ const u32 offset = buffer.Offset(binding.cpu_addr);
+ const PixelFormat format = binding.format;
+ if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
+ if (((image_compute_texture_buffers >> index) & 1) != 0) {
+ runtime.BindImageBuffer(buffer, offset, size, format);
+ } else {
+ runtime.BindTextureBuffer(buffer, offset, size, format);
+ }
+ } else {
+ runtime.BindTextureBuffer(buffer, offset, size, format);
+ }
+ });
+}
+
+template <class P>
void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) {
if (is_indexed) {
UpdateIndexBuffer();
@@ -838,6 +1216,7 @@ void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) {
for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
UpdateUniformBuffers(stage);
UpdateStorageBuffers(stage);
+ UpdateTextureBuffers(stage);
}
}
@@ -845,6 +1224,7 @@ template <class P>
void BufferCache<P>::DoUpdateComputeBuffers() {
UpdateComputeUniformBuffers();
UpdateComputeStorageBuffers();
+ UpdateComputeTextureBuffers();
}
template <class P>
@@ -863,7 +1243,7 @@ void BufferCache<P>::UpdateIndexBuffer() {
const GPUVAddr gpu_addr_end = index_array.EndAddress();
const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr_begin);
const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
- const u32 draw_size = index_array.count * index_array.FormatSizeInBytes();
+ const u32 draw_size = (index_array.count + index_array.first) * index_array.FormatSizeInBytes();
const u32 size = std::min(address_size, draw_size);
if (size == 0 || !cpu_addr) {
index_buffer = NULL_BINDING;
@@ -914,7 +1294,7 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) {
template <class P>
void BufferCache<P>::UpdateUniformBuffers(size_t stage) {
- ForEachEnabledBit(enabled_uniform_buffers[stage], [&](u32 index) {
+ ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) {
Binding& binding = uniform_buffers[stage][index];
if (binding.buffer_id) {
// Already updated
@@ -945,6 +1325,18 @@ void BufferCache<P>::UpdateStorageBuffers(size_t stage) {
}
template <class P>
+void BufferCache<P>::UpdateTextureBuffers(size_t stage) {
+ ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) {
+ Binding& binding = texture_buffers[stage][index];
+ binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
+ // Mark buffer as written if needed
+ if (((written_texture_buffers[stage] >> index) & 1) != 0) {
+ MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size);
+ }
+ });
+}
+
+template <class P>
void BufferCache<P>::UpdateTransformFeedbackBuffers() {
if (maxwell3d.regs.tfb_enabled == 0) {
return;
@@ -975,7 +1367,7 @@ void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) {
template <class P>
void BufferCache<P>::UpdateComputeUniformBuffers() {
- ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) {
+ ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) {
Binding& binding = compute_uniform_buffers[index];
binding = NULL_BINDING;
const auto& launch_desc = kepler_compute.launch_description;
@@ -996,11 +1388,22 @@ void BufferCache<P>::UpdateComputeStorageBuffers() {
ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) {
// Resolve buffer
Binding& binding = compute_storage_buffers[index];
- const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size);
- binding.buffer_id = buffer_id;
+ binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
// Mark as written if needed
if (((written_compute_storage_buffers >> index) & 1) != 0) {
- MarkWrittenBuffer(buffer_id, binding.cpu_addr, binding.size);
+ MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size);
+ }
+ });
+}
+
+template <class P>
+void BufferCache<P>::UpdateComputeTextureBuffers() {
+ ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) {
+ Binding& binding = compute_texture_buffers[index];
+ binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
+ // Mark as written if needed
+ if (((written_compute_texture_buffers >> index) & 1) != 0) {
+ MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size);
}
});
}
@@ -1010,16 +1413,16 @@ void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 s
Buffer& buffer = slot_buffers[buffer_id];
buffer.MarkRegionAsGpuModified(cpu_addr, size);
- const bool is_accuracy_high = Settings::IsGPULevelHigh();
+ const IntervalType base_interval{cpu_addr, cpu_addr + size};
+ common_ranges.add(base_interval);
+
+ const bool is_accuracy_high =
+ Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High;
const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue();
- if (!is_accuracy_high || !is_async) {
+ if (!is_async && !is_accuracy_high) {
return;
}
- if (std::ranges::find(uncommitted_downloads, buffer_id) != uncommitted_downloads.end()) {
- // Already inserted
- return;
- }
- uncommitted_downloads.push_back(buffer_id);
+ uncommitted_ranges.add(base_interval);
}
template <class P>
@@ -1103,7 +1506,6 @@ void BufferCache<P>::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id,
if (!copies.empty()) {
runtime.CopyBuffer(slot_buffers[new_buffer_id], overlap, copies);
}
- ReplaceBufferDownloads(overlap_id, new_buffer_id);
DeleteBuffer(overlap_id);
}
@@ -1244,14 +1646,29 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 si
boost::container::small_vector<BufferCopy, 1> copies;
u64 total_size_bytes = 0;
u64 largest_copy = 0;
- buffer.ForEachDownloadRange(cpu_addr, size, [&](u64 range_offset, u64 range_size) {
- copies.push_back(BufferCopy{
- .src_offset = range_offset,
- .dst_offset = total_size_bytes,
- .size = range_size,
- });
- total_size_bytes += range_size;
- largest_copy = std::max(largest_copy, range_size);
+ buffer.ForEachDownloadRangeAndClear(cpu_addr, size, [&](u64 range_offset, u64 range_size) {
+ const VAddr buffer_addr = buffer.CpuAddr();
+ const auto add_download = [&](VAddr start, VAddr end) {
+ const u64 new_offset = start - buffer_addr;
+ const u64 new_size = end - start;
+ copies.push_back(BufferCopy{
+ .src_offset = new_offset,
+ .dst_offset = total_size_bytes,
+ .size = new_size,
+ });
+ // Align up to avoid cache conflicts
+ constexpr u64 align = 256ULL;
+ constexpr u64 mask = ~(align - 1ULL);
+ total_size_bytes += (new_size + align - 1) & mask;
+ largest_copy = std::max(largest_copy, new_size);
+ };
+
+ const VAddr start_address = buffer_addr + range_offset;
+ const VAddr end_address = start_address + range_size;
+ ForEachWrittenRange(start_address, range_size, add_download);
+ const IntervalType subtract_interval{start_address, end_address};
+ ClearDownload(subtract_interval);
+ common_ranges.subtract(subtract_interval);
});
if (total_size_bytes == 0) {
return;
@@ -1316,21 +1733,10 @@ void BufferCache<P>::DeleteBuffer(BufferId buffer_id) {
}
template <class P>
-void BufferCache<P>::ReplaceBufferDownloads(BufferId old_buffer_id, BufferId new_buffer_id) {
- const auto replace = [old_buffer_id, new_buffer_id](std::vector<BufferId>& buffers) {
- std::ranges::replace(buffers, old_buffer_id, new_buffer_id);
- if (auto it = std::ranges::find(buffers, new_buffer_id); it != buffers.end()) {
- buffers.erase(std::remove(it + 1, buffers.end(), new_buffer_id), buffers.end());
- }
- };
- replace(uncommitted_downloads);
- std::ranges::for_each(committed_downloads, replace);
-}
-
-template <class P>
void BufferCache<P>::NotifyBufferDeletion() {
if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
dirty_uniform_buffers.fill(~u32{0});
+ uniform_buffer_binding_sizes.fill({});
}
auto& flags = maxwell3d.dirty.flags;
flags[Dirty::IndexBuffer] = true;
@@ -1349,21 +1755,34 @@ typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr s
if (!cpu_addr || size == 0) {
return NULL_BINDING;
}
- // HACK(Rodrigo): This is the number of bytes bound in host beyond the guest API's range.
- // It exists due to some games like Astral Chain operate out of bounds.
- // Binding the whole map range would be technically correct, but games have large maps that make
- // this approach unaffordable for now.
- static constexpr u32 arbitrary_extra_bytes = 0xc000;
- const u32 bytes_to_map_end = static_cast<u32>(gpu_memory.BytesToMapEnd(gpu_addr));
const Binding binding{
.cpu_addr = *cpu_addr,
- .size = std::min(size + arbitrary_extra_bytes, bytes_to_map_end),
+ .size = size,
.buffer_id = BufferId{},
};
return binding;
}
template <class P>
+typename BufferCache<P>::TextureBufferBinding BufferCache<P>::GetTextureBufferBinding(
+ GPUVAddr gpu_addr, u32 size, PixelFormat format) {
+ const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+ TextureBufferBinding binding;
+ if (!cpu_addr || size == 0) {
+ binding.cpu_addr = 0;
+ binding.size = 0;
+ binding.buffer_id = NULL_BUFFER_ID;
+ binding.format = PixelFormat::Invalid;
+ } else {
+ binding.cpu_addr = *cpu_addr;
+ binding.size = size;
+ binding.buffer_id = BufferId{};
+ binding.format = format;
+ }
+ return binding;
+}
+
+template <class P>
std::span<const u8> BufferCache<P>::ImmediateBufferWithData(VAddr cpu_addr, size_t size) {
u8* const base_pointer = cpu_memory.GetPointer(cpu_addr);
if (IsRangeGranular(cpu_addr, size) ||
diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp
index ff3db0aee..ffb7c82a1 100644
--- a/src/video_core/command_classes/vic.cpp
+++ b/src/video_core/command_classes/vic.cpp
@@ -129,28 +129,27 @@ void Vic::Execute() {
const std::size_t surface_width = config.surface_width_minus1 + 1;
const std::size_t surface_height = config.surface_height_minus1 + 1;
- const std::size_t half_width = surface_width / 2;
- const std::size_t half_height = config.surface_height_minus1 / 2;
+ const auto frame_width = std::min(surface_width, static_cast<size_t>(frame->width));
+ const auto frame_height = std::min(surface_height, static_cast<size_t>(frame->height));
+ const std::size_t half_width = frame_width / 2;
+ const std::size_t half_height = frame_height / 2;
const std::size_t aligned_width = (surface_width + 0xff) & ~0xff;
const auto* luma_ptr = frame->data[0];
const auto* chroma_b_ptr = frame->data[1];
const auto* chroma_r_ptr = frame->data[2];
- const auto stride = frame->linesize[0];
- const auto half_stride = frame->linesize[1];
+ const auto stride = static_cast<size_t>(frame->linesize[0]);
+ const auto half_stride = static_cast<size_t>(frame->linesize[1]);
luma_buffer.resize(aligned_width * surface_height);
- chroma_buffer.resize(aligned_width * half_height);
+ chroma_buffer.resize(aligned_width * surface_height / 2);
// Populate luma buffer
- for (std::size_t y = 0; y < surface_height - 1; ++y) {
+ for (std::size_t y = 0; y < frame_height; ++y) {
const std::size_t src = y * stride;
const std::size_t dst = y * aligned_width;
-
- const std::size_t size = surface_width;
-
- for (std::size_t offset = 0; offset < size; ++offset) {
- luma_buffer[dst + offset] = luma_ptr[src + offset];
+ for (std::size_t x = 0; x < frame_width; ++x) {
+ luma_buffer[dst + x] = luma_ptr[src + x];
}
}
gpu.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(),
diff --git a/src/video_core/dirty_flags.cpp b/src/video_core/dirty_flags.cpp
index 7149af290..b1be065c3 100644
--- a/src/video_core/dirty_flags.cpp
+++ b/src/video_core/dirty_flags.cpp
@@ -58,6 +58,11 @@ void SetupDirtyRenderTargets(Maxwell3D::DirtyState::Tables& tables) {
FillBlock(table, OFF(zeta), NUM(zeta), flag);
}
}
+
+void SetupDirtyShaders(Maxwell3D::DirtyState::Tables& tables) {
+ FillBlock(tables[0], OFF(shader_config[0]),
+ NUM(shader_config[0]) * Maxwell3D::Regs::MaxShaderProgram, Shaders);
+}
} // Anonymous namespace
void SetupDirtyFlags(Maxwell3D::DirtyState::Tables& tables) {
@@ -65,6 +70,7 @@ void SetupDirtyFlags(Maxwell3D::DirtyState::Tables& tables) {
SetupIndexBuffer(tables);
SetupDirtyDescriptors(tables);
SetupDirtyRenderTargets(tables);
+ SetupDirtyShaders(tables);
}
} // namespace VideoCommon::Dirty
diff --git a/src/video_core/dirty_flags.h b/src/video_core/dirty_flags.h
index 702688ace..504465d3f 100644
--- a/src/video_core/dirty_flags.h
+++ b/src/video_core/dirty_flags.h
@@ -36,6 +36,8 @@ enum : u8 {
IndexBuffer,
+ Shaders,
+
LastCommonEntry,
};
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index 8b33c04ab..8d28bd884 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -4,6 +4,7 @@
#include "common/cityhash.h"
#include "common/microprofile.h"
+#include "common/settings.h"
#include "core/core.h"
#include "core/memory.h"
#include "video_core/dma_pusher.h"
@@ -76,8 +77,13 @@ bool DmaPusher::Step() {
// Push buffer non-empty, read a word
command_headers.resize(command_list_header.size);
- gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(),
- command_list_header.size * sizeof(u32));
+ if (Settings::IsGPULevelHigh()) {
+ gpu.MemoryManager().ReadBlock(dma_get, command_headers.data(),
+ command_list_header.size * sizeof(u32));
+ } else {
+ gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(),
+ command_list_header.size * sizeof(u32));
+ }
}
for (std::size_t index = 0; index < command_headers.size();) {
const CommandHeader& command_header = command_headers[index];
diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h
deleted file mode 100644
index f46e81bb7..000000000
--- a/src/video_core/engines/const_buffer_engine_interface.h
+++ /dev/null
@@ -1,103 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <type_traits>
-#include "common/bit_field.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/engines/shader_type.h"
-#include "video_core/guest_driver.h"
-#include "video_core/textures/texture.h"
-
-namespace Tegra::Engines {
-
-struct SamplerDescriptor {
- union {
- u32 raw = 0;
- BitField<0, 2, Tegra::Shader::TextureType> texture_type;
- BitField<2, 3, Tegra::Texture::ComponentType> r_type;
- BitField<5, 1, u32> is_array;
- BitField<6, 1, u32> is_buffer;
- BitField<7, 1, u32> is_shadow;
- BitField<8, 3, Tegra::Texture::ComponentType> g_type;
- BitField<11, 3, Tegra::Texture::ComponentType> b_type;
- BitField<14, 3, Tegra::Texture::ComponentType> a_type;
- BitField<17, 7, Tegra::Texture::TextureFormat> format;
- };
-
- bool operator==(const SamplerDescriptor& rhs) const noexcept {
- return raw == rhs.raw;
- }
-
- bool operator!=(const SamplerDescriptor& rhs) const noexcept {
- return !operator==(rhs);
- }
-
- static SamplerDescriptor FromTIC(const Tegra::Texture::TICEntry& tic) {
- using Tegra::Shader::TextureType;
- SamplerDescriptor result;
-
- result.format.Assign(tic.format.Value());
- result.r_type.Assign(tic.r_type.Value());
- result.g_type.Assign(tic.g_type.Value());
- result.b_type.Assign(tic.b_type.Value());
- result.a_type.Assign(tic.a_type.Value());
-
- switch (tic.texture_type.Value()) {
- case Tegra::Texture::TextureType::Texture1D:
- result.texture_type.Assign(TextureType::Texture1D);
- return result;
- case Tegra::Texture::TextureType::Texture2D:
- result.texture_type.Assign(TextureType::Texture2D);
- return result;
- case Tegra::Texture::TextureType::Texture3D:
- result.texture_type.Assign(TextureType::Texture3D);
- return result;
- case Tegra::Texture::TextureType::TextureCubemap:
- result.texture_type.Assign(TextureType::TextureCube);
- return result;
- case Tegra::Texture::TextureType::Texture1DArray:
- result.texture_type.Assign(TextureType::Texture1D);
- result.is_array.Assign(1);
- return result;
- case Tegra::Texture::TextureType::Texture2DArray:
- result.texture_type.Assign(TextureType::Texture2D);
- result.is_array.Assign(1);
- return result;
- case Tegra::Texture::TextureType::Texture1DBuffer:
- result.texture_type.Assign(TextureType::Texture1D);
- result.is_buffer.Assign(1);
- return result;
- case Tegra::Texture::TextureType::Texture2DNoMipmap:
- result.texture_type.Assign(TextureType::Texture2D);
- return result;
- case Tegra::Texture::TextureType::TextureCubeArray:
- result.texture_type.Assign(TextureType::TextureCube);
- result.is_array.Assign(1);
- return result;
- default:
- result.texture_type.Assign(TextureType::Texture2D);
- return result;
- }
- }
-};
-static_assert(std::is_trivially_copyable_v<SamplerDescriptor>);
-
-class ConstBufferEngineInterface {
-public:
- virtual ~ConstBufferEngineInterface() = default;
- virtual u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const = 0;
- virtual SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const = 0;
- virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
- u64 offset) const = 0;
- virtual SamplerDescriptor AccessSampler(u32 handle) const = 0;
- virtual u32 GetBoundBuffer() const = 0;
-
- virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0;
- virtual const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const = 0;
-};
-
-} // namespace Tegra::Engines
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index a9b75091e..492b4c5a3 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -8,7 +8,6 @@
#include "core/core.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
-#include "video_core/engines/shader_type.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_base.h"
@@ -57,53 +56,11 @@ void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amoun
}
}
-u32 KeplerCompute::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const {
- ASSERT(stage == ShaderType::Compute);
- const auto& buffer = launch_description.const_buffer_config[const_buffer];
- u32 result;
- std::memcpy(&result, memory_manager.GetPointer(buffer.Address() + offset), sizeof(u32));
- return result;
-}
-
-SamplerDescriptor KeplerCompute::AccessBoundSampler(ShaderType stage, u64 offset) const {
- return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle));
-}
-
-SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 const_buffer,
- u64 offset) const {
- ASSERT(stage == ShaderType::Compute);
- const auto& tex_info_buffer = launch_description.const_buffer_config[const_buffer];
- const GPUVAddr tex_info_address = tex_info_buffer.Address() + offset;
- return AccessSampler(memory_manager.Read<u32>(tex_info_address));
-}
-
-SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const {
- const Texture::TextureHandle tex_handle{handle};
- const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id);
- const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id);
-
- SamplerDescriptor result = SamplerDescriptor::FromTIC(tic);
- result.is_shadow.Assign(tsc.depth_compare_enabled.Value());
- return result;
-}
-
-VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() {
- return rasterizer->AccessGuestDriverProfile();
-}
-
-const VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() const {
- return rasterizer->AccessGuestDriverProfile();
-}
-
void KeplerCompute::ProcessLaunch() {
const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address();
memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description,
LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32));
-
- const GPUVAddr code_addr = regs.code_loc.Address() + launch_description.program_start;
- LOG_TRACE(HW_GPU, "Compute invocation launched at address 0x{:016x}", code_addr);
-
- rasterizer->DispatchCompute(code_addr);
+ rasterizer->DispatchCompute();
}
Texture::TICEntry KeplerCompute::GetTICEntry(u32 tic_index) const {
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index 7c40cba38..f8b8d06ac 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -10,10 +10,8 @@
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
-#include "video_core/engines/const_buffer_engine_interface.h"
#include "video_core/engines/engine_interface.h"
#include "video_core/engines/engine_upload.h"
-#include "video_core/engines/shader_type.h"
#include "video_core/gpu.h"
#include "video_core/textures/texture.h"
@@ -40,7 +38,7 @@ namespace Tegra::Engines {
#define KEPLER_COMPUTE_REG_INDEX(field_name) \
(offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32))
-class KeplerCompute final : public ConstBufferEngineInterface, public EngineInterface {
+class KeplerCompute final : public EngineInterface {
public:
explicit KeplerCompute(Core::System& system, MemoryManager& memory_manager);
~KeplerCompute();
@@ -209,23 +207,6 @@ public:
void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
u32 methods_pending) override;
- u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override;
-
- SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override;
-
- SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
- u64 offset) const override;
-
- SamplerDescriptor AccessSampler(u32 handle) const override;
-
- u32 GetBoundBuffer() const override {
- return regs.tex_cb_index;
- }
-
- VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override;
-
- const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override;
-
private:
void ProcessLaunch();
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index aab6b8f7a..b18b8a02a 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -8,7 +8,6 @@
#include "core/core.h"
#include "core/core_timing.h"
#include "video_core/engines/maxwell_3d.h"
-#include "video_core/engines/shader_type.h"
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
@@ -670,42 +669,4 @@ void Maxwell3D::ProcessClearBuffers() {
rasterizer->Clear();
}
-u32 Maxwell3D::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const {
- ASSERT(stage != ShaderType::Compute);
- const auto& shader_stage = state.shader_stages[static_cast<std::size_t>(stage)];
- const auto& buffer = shader_stage.const_buffers[const_buffer];
- return memory_manager.Read<u32>(buffer.address + offset);
-}
-
-SamplerDescriptor Maxwell3D::AccessBoundSampler(ShaderType stage, u64 offset) const {
- return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle));
-}
-
-SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_buffer,
- u64 offset) const {
- ASSERT(stage != ShaderType::Compute);
- const auto& shader = state.shader_stages[static_cast<std::size_t>(stage)];
- const auto& tex_info_buffer = shader.const_buffers[const_buffer];
- const GPUVAddr tex_info_address = tex_info_buffer.address + offset;
- return AccessSampler(memory_manager.Read<u32>(tex_info_address));
-}
-
-SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const {
- const Texture::TextureHandle tex_handle{handle};
- const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id);
- const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id);
-
- SamplerDescriptor result = SamplerDescriptor::FromTIC(tic);
- result.is_shadow.Assign(tsc.depth_compare_enabled.Value());
- return result;
-}
-
-VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() {
- return rasterizer->AccessGuestDriverProfile();
-}
-
-const VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() const {
- return rasterizer->AccessGuestDriverProfile();
-}
-
} // namespace Tegra::Engines
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 335383955..1aa43523a 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -17,11 +17,9 @@
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "common/math_util.h"
-#include "video_core/engines/const_buffer_engine_interface.h"
#include "video_core/engines/const_buffer_info.h"
#include "video_core/engines/engine_interface.h"
#include "video_core/engines/engine_upload.h"
-#include "video_core/engines/shader_type.h"
#include "video_core/gpu.h"
#include "video_core/macro/macro.h"
#include "video_core/textures/texture.h"
@@ -49,7 +47,7 @@ namespace Tegra::Engines {
#define MAXWELL3D_REG_INDEX(field_name) \
(offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32))
-class Maxwell3D final : public ConstBufferEngineInterface, public EngineInterface {
+class Maxwell3D final : public EngineInterface {
public:
explicit Maxwell3D(Core::System& system, MemoryManager& memory_manager);
~Maxwell3D();
@@ -307,10 +305,6 @@ public:
return (type == Type::SignedNorm) || (type == Type::UnsignedNorm);
}
- bool IsConstant() const {
- return constant;
- }
-
bool IsValid() const {
return size != Size::Invalid;
}
@@ -912,7 +906,11 @@ public:
u32 fill_rectangle;
- INSERT_PADDING_WORDS_NOINIT(0x8);
+ INSERT_PADDING_WORDS_NOINIT(0x2);
+
+ u32 conservative_raster_enable;
+
+ INSERT_PADDING_WORDS_NOINIT(0x5);
std::array<VertexAttribute, NumVertexAttributes> vertex_attrib_format;
@@ -959,7 +957,11 @@ public:
SamplerIndex sampler_index;
- INSERT_PADDING_WORDS_NOINIT(0x25);
+ INSERT_PADDING_WORDS_NOINIT(0x2);
+
+ std::array<u32, 8> gp_passthrough_mask;
+
+ INSERT_PADDING_WORDS_NOINIT(0x1B);
u32 depth_test_enable;
@@ -1152,7 +1154,11 @@ public:
u32 index;
} primitive_restart;
- INSERT_PADDING_WORDS_NOINIT(0x5F);
+ INSERT_PADDING_WORDS_NOINIT(0xE);
+
+ u32 provoking_vertex_last;
+
+ INSERT_PADDING_WORDS_NOINIT(0x50);
struct {
u32 start_addr_high;
@@ -1424,23 +1430,6 @@ public:
void FlushMMEInlineDraw();
- u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override;
-
- SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override;
-
- SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
- u64 offset) const override;
-
- SamplerDescriptor AccessSampler(u32 handle) const override;
-
- u32 GetBoundBuffer() const override {
- return regs.tex_cb_index;
- }
-
- VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override;
-
- const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override;
-
bool ShouldExecute() const {
return execute_on;
}
@@ -1630,6 +1619,7 @@ ASSERT_REG_POSITION(zeta, 0x3F8);
ASSERT_REG_POSITION(render_area, 0x3FD);
ASSERT_REG_POSITION(clear_flags, 0x43E);
ASSERT_REG_POSITION(fill_rectangle, 0x44F);
+ASSERT_REG_POSITION(conservative_raster_enable, 0x452);
ASSERT_REG_POSITION(vertex_attrib_format, 0x458);
ASSERT_REG_POSITION(multisample_sample_locations, 0x478);
ASSERT_REG_POSITION(multisample_coverage_to_color, 0x47E);
@@ -1638,6 +1628,7 @@ ASSERT_REG_POSITION(zeta_width, 0x48a);
ASSERT_REG_POSITION(zeta_height, 0x48b);
ASSERT_REG_POSITION(zeta_depth, 0x48c);
ASSERT_REG_POSITION(sampler_index, 0x48D);
+ASSERT_REG_POSITION(gp_passthrough_mask, 0x490);
ASSERT_REG_POSITION(depth_test_enable, 0x4B3);
ASSERT_REG_POSITION(independent_blend_enable, 0x4B9);
ASSERT_REG_POSITION(depth_write_enabled, 0x4BA);
@@ -1690,6 +1681,7 @@ ASSERT_REG_POSITION(point_coord_replace, 0x581);
ASSERT_REG_POSITION(code_address, 0x582);
ASSERT_REG_POSITION(draw, 0x585);
ASSERT_REG_POSITION(primitive_restart, 0x591);
+ASSERT_REG_POSITION(provoking_vertex_last, 0x5A1);
ASSERT_REG_POSITION(index_array, 0x5F2);
ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F);
ASSERT_REG_POSITION(instanced_arrays, 0x620);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 2ee980bab..c7ec1eac9 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -4,6 +4,7 @@
#include "common/assert.h"
#include "common/logging/log.h"
+#include "common/microprofile.h"
#include "common/settings.h"
#include "core/core.h"
#include "video_core/engines/maxwell_3d.h"
@@ -12,6 +13,9 @@
#include "video_core/renderer_base.h"
#include "video_core/textures/decoders.h"
+MICROPROFILE_DECLARE(GPU_DMAEngine);
+MICROPROFILE_DEFINE(GPU_DMAEngine, "GPU", "DMA Engine", MP_RGB(224, 224, 128));
+
namespace Tegra::Engines {
using namespace Texture;
@@ -21,6 +25,10 @@ MaxwellDMA::MaxwellDMA(Core::System& system_, MemoryManager& memory_manager_)
MaxwellDMA::~MaxwellDMA() = default;
+void MaxwellDMA::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
+ rasterizer = rasterizer_;
+}
+
void MaxwellDMA::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
ASSERT_MSG(method < NUM_REGS, "Invalid MaxwellDMA register");
@@ -39,12 +47,12 @@ void MaxwellDMA::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
}
void MaxwellDMA::Launch() {
+ MICROPROFILE_SCOPE(GPU_DMAEngine);
LOG_TRACE(Render_OpenGL, "DMA copy 0x{:x} -> 0x{:x}", static_cast<GPUVAddr>(regs.offset_in),
static_cast<GPUVAddr>(regs.offset_out));
// TODO(Subv): Perform more research and implement all features of this engine.
const LaunchDMA& launch = regs.launch_dma;
- ASSERT(launch.remap_enable == 0);
ASSERT(launch.semaphore_type == LaunchDMA::SemaphoreType::NONE);
ASSERT(launch.interrupt_type == LaunchDMA::InterruptType::NONE);
ASSERT(launch.data_transfer_type == LaunchDMA::DataTransferType::NON_PIPELINED);
@@ -77,11 +85,31 @@ void MaxwellDMA::CopyPitchToPitch() {
// When `multi_line_enable` bit is disabled the copy is performed as if we were copying a 1D
// buffer of length `line_length_in`.
// Otherwise we copy a 2D image of dimensions (line_length_in, line_count).
+ auto& accelerate = rasterizer->AccessAccelerateDMA();
if (!regs.launch_dma.multi_line_enable) {
- memory_manager.CopyBlock(regs.offset_out, regs.offset_in, regs.line_length_in);
+ const bool is_buffer_clear = regs.launch_dma.remap_enable != 0 &&
+ regs.remap_const.dst_x == RemapConst::Swizzle::CONST_A;
+ // TODO: allow multisized components.
+ if (is_buffer_clear) {
+ ASSERT(regs.remap_const.component_size_minus_one == 3);
+ accelerate.BufferClear(regs.offset_out, regs.line_length_in, regs.remap_consta_value);
+ std::vector<u32> tmp_buffer(regs.line_length_in, regs.remap_consta_value);
+ memory_manager.WriteBlockUnsafe(regs.offset_out,
+ reinterpret_cast<u8*>(tmp_buffer.data()),
+ regs.line_length_in * sizeof(u32));
+ return;
+ }
+ UNIMPLEMENTED_IF(regs.launch_dma.remap_enable != 0);
+ if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) {
+ std::vector<u8> tmp_buffer(regs.line_length_in);
+ memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(), regs.line_length_in);
+ memory_manager.WriteBlock(regs.offset_out, tmp_buffer.data(), regs.line_length_in);
+ }
return;
}
+ UNIMPLEMENTED_IF(regs.launch_dma.remap_enable != 0);
+
// Perform a line-by-line copy.
// We're going to take a subrect of size (line_length_in, line_count) from the source rectangle.
// There is no need to manually flush/invalidate the regions because CopyBlock does that for us.
@@ -99,12 +127,14 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
// Optimized path for micro copies.
const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count;
- if (dst_size < GOB_SIZE && regs.pitch_out <= GOB_SIZE_X) {
+ if (dst_size < GOB_SIZE && regs.pitch_out <= GOB_SIZE_X &&
+ regs.src_params.height > GOB_SIZE_Y) {
FastCopyBlockLinearToPitch();
return;
}
// Deswizzle the input and copy it over.
+ UNIMPLEMENTED_IF(regs.launch_dma.remap_enable != 0);
const u32 bytes_per_pixel = regs.pitch_out / regs.line_length_in;
const Parameters& src_params = regs.src_params;
const u32 width = src_params.width;
@@ -134,6 +164,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
void MaxwellDMA::CopyPitchToBlockLinear() {
UNIMPLEMENTED_IF_MSG(regs.dst_params.block_size.width != 0, "Block width is not one");
+ UNIMPLEMENTED_IF(regs.launch_dma.remap_enable != 0);
const auto& dst_params = regs.dst_params;
const u32 bytes_per_pixel = regs.pitch_in / regs.line_length_in;
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index c77f02a22..d3329b0f8 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -21,8 +21,20 @@ namespace Tegra {
class MemoryManager;
}
+namespace VideoCore {
+class RasterizerInterface;
+}
+
namespace Tegra::Engines {
+class AccelerateDMAInterface {
+public:
+ /// Write the value to the register identified by method.
+ virtual bool BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) = 0;
+
+ virtual bool BufferClear(GPUVAddr src_address, u64 amount, u32 value) = 0;
+};
+
/**
* This engine is known as gk104_copy. Documentation can be found in:
* https://github.com/NVIDIA/open-gpu-doc/blob/master/classes/dma-copy/clb0b5.h
@@ -187,6 +199,8 @@ public:
};
static_assert(sizeof(RemapConst) == 12);
+ void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
+
explicit MaxwellDMA(Core::System& system_, MemoryManager& memory_manager_);
~MaxwellDMA() override;
@@ -213,6 +227,7 @@ private:
Core::System& system;
MemoryManager& memory_manager;
+ VideoCore::RasterizerInterface* rasterizer;
std::vector<u8> read_buffer;
std::vector<u8> write_buffer;
@@ -240,7 +255,9 @@ private:
u32 pitch_out;
u32 line_length_in;
u32 line_count;
- u32 reserved06[0xb8];
+ u32 reserved06[0xb6];
+ u32 remap_consta_value;
+ u32 remap_constb_value;
RemapConst remap_const;
Parameters dst_params;
u32 reserved07[0x1];
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
deleted file mode 100644
index 8b45f1b62..000000000
--- a/src/video_core/engines/shader_bytecode.h
+++ /dev/null
@@ -1,2298 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <array>
-#include <bitset>
-#include <optional>
-#include <tuple>
-#include <vector>
-
-#include "common/assert.h"
-#include "common/bit_field.h"
-#include "common/common_types.h"
-
-namespace Tegra::Shader {
-
-struct Register {
- /// Number of registers
- static constexpr std::size_t NumRegisters = 256;
-
- /// Register 255 is special cased to always be 0
- static constexpr std::size_t ZeroIndex = 255;
-
- enum class Size : u64 {
- Byte = 0,
- Short = 1,
- Word = 2,
- Long = 3,
- };
-
- constexpr Register() = default;
-
- constexpr Register(u64 value_) : value(value_) {}
-
- [[nodiscard]] constexpr operator u64() const {
- return value;
- }
-
- template <typename T>
- [[nodiscard]] constexpr u64 operator-(const T& oth) const {
- return value - oth;
- }
-
- template <typename T>
- [[nodiscard]] constexpr u64 operator&(const T& oth) const {
- return value & oth;
- }
-
- [[nodiscard]] constexpr u64 operator&(const Register& oth) const {
- return value & oth.value;
- }
-
- [[nodiscard]] constexpr u64 operator~() const {
- return ~value;
- }
-
- [[nodiscard]] u64 GetSwizzledIndex(u64 elem) const {
- elem = (value + elem) & 3;
- return (value & ~3) + elem;
- }
-
-private:
- u64 value{};
-};
-
-enum class AttributeSize : u64 {
- Word = 0,
- DoubleWord = 1,
- TripleWord = 2,
- QuadWord = 3,
-};
-
-union Attribute {
- Attribute() = default;
-
- constexpr explicit Attribute(u64 value_) : value(value_) {}
-
- enum class Index : u64 {
- LayerViewportPointSize = 6,
- Position = 7,
- Attribute_0 = 8,
- Attribute_31 = 39,
- FrontColor = 40,
- FrontSecondaryColor = 41,
- BackColor = 42,
- BackSecondaryColor = 43,
- ClipDistances0123 = 44,
- ClipDistances4567 = 45,
- PointCoord = 46,
- // This attribute contains a tuple of (~, ~, InstanceId, VertexId) when inside a vertex
- // shader, and a tuple of (TessCoord.x, TessCoord.y, TessCoord.z, ~) when inside a Tess Eval
- // shader.
- TessCoordInstanceIDVertexID = 47,
- TexCoord_0 = 48,
- TexCoord_7 = 55,
- // This attribute contains a tuple of (Unk, Unk, Unk, gl_FrontFacing) when inside a fragment
- // shader. It is unknown what the other values contain.
- FrontFacing = 63,
- };
-
- union {
- BitField<20, 10, u64> immediate;
- BitField<22, 2, u64> element;
- BitField<24, 6, Index> index;
- BitField<31, 1, u64> patch;
- BitField<47, 3, AttributeSize> size;
-
- [[nodiscard]] bool IsPhysical() const {
- return patch == 0 && element == 0 && static_cast<u64>(index.Value()) == 0;
- }
- } fmt20;
-
- union {
- BitField<30, 2, u64> element;
- BitField<32, 6, Index> index;
- } fmt28;
-
- BitField<39, 8, u64> reg;
- u64 value{};
-};
-
-union Sampler {
- Sampler() = default;
-
- constexpr explicit Sampler(u64 value_) : value(value_) {}
-
- enum class Index : u64 {
- Sampler_0 = 8,
- };
-
- BitField<36, 13, Index> index;
- u64 value{};
-};
-
-union Image {
- Image() = default;
-
- constexpr explicit Image(u64 value_) : value{value_} {}
-
- BitField<36, 13, u64> index;
- u64 value;
-};
-
-} // namespace Tegra::Shader
-
-namespace std {
-
-// TODO(bunnei): The below is forbidden by the C++ standard, but works fine. See #330.
-template <>
-struct make_unsigned<Tegra::Shader::Attribute> {
- using type = Tegra::Shader::Attribute;
-};
-
-template <>
-struct make_unsigned<Tegra::Shader::Register> {
- using type = Tegra::Shader::Register;
-};
-
-} // namespace std
-
-namespace Tegra::Shader {
-
-enum class Pred : u64 {
- UnusedIndex = 0x7,
- NeverExecute = 0xF,
-};
-
-enum class PredCondition : u64 {
- F = 0, // Always false
- LT = 1, // Ordered less than
- EQ = 2, // Ordered equal
- LE = 3, // Ordered less than or equal
- GT = 4, // Ordered greater than
- NE = 5, // Ordered not equal
- GE = 6, // Ordered greater than or equal
- NUM = 7, // Ordered
- NAN_ = 8, // Unordered
- LTU = 9, // Unordered less than
- EQU = 10, // Unordered equal
- LEU = 11, // Unordered less than or equal
- GTU = 12, // Unordered greater than
- NEU = 13, // Unordered not equal
- GEU = 14, // Unordered greater than or equal
- T = 15, // Always true
-};
-
-enum class PredOperation : u64 {
- And = 0,
- Or = 1,
- Xor = 2,
-};
-
-enum class LogicOperation : u64 {
- And = 0,
- Or = 1,
- Xor = 2,
- PassB = 3,
-};
-
-enum class SubOp : u64 {
- Cos = 0x0,
- Sin = 0x1,
- Ex2 = 0x2,
- Lg2 = 0x3,
- Rcp = 0x4,
- Rsq = 0x5,
- Sqrt = 0x8,
-};
-
-enum class F2iRoundingOp : u64 {
- RoundEven = 0,
- Floor = 1,
- Ceil = 2,
- Trunc = 3,
-};
-
-enum class F2fRoundingOp : u64 {
- None = 0,
- Pass = 3,
- Round = 8,
- Floor = 9,
- Ceil = 10,
- Trunc = 11,
-};
-
-enum class AtomicOp : u64 {
- Add = 0,
- Min = 1,
- Max = 2,
- Inc = 3,
- Dec = 4,
- And = 5,
- Or = 6,
- Xor = 7,
- Exch = 8,
- SafeAdd = 10,
-};
-
-enum class GlobalAtomicType : u64 {
- U32 = 0,
- S32 = 1,
- U64 = 2,
- F32_FTZ_RN = 3,
- F16x2_FTZ_RN = 4,
- S64 = 5,
-};
-
-enum class UniformType : u64 {
- UnsignedByte = 0,
- SignedByte = 1,
- UnsignedShort = 2,
- SignedShort = 3,
- Single = 4,
- Double = 5,
- Quad = 6,
- UnsignedQuad = 7,
-};
-
-enum class StoreType : u64 {
- Unsigned8 = 0,
- Signed8 = 1,
- Unsigned16 = 2,
- Signed16 = 3,
- Bits32 = 4,
- Bits64 = 5,
- Bits128 = 6,
-};
-
-enum class AtomicType : u64 {
- U32 = 0,
- S32 = 1,
- U64 = 2,
- S64 = 3,
-};
-
-enum class IMinMaxExchange : u64 {
- None = 0,
- XLo = 1,
- XMed = 2,
- XHi = 3,
-};
-
-enum class VideoType : u64 {
- Size16_Low = 0,
- Size16_High = 1,
- Size32 = 2,
- Invalid = 3,
-};
-
-enum class VmadShr : u64 {
- Shr7 = 1,
- Shr15 = 2,
-};
-
-enum class VmnmxType : u64 {
- Bits8,
- Bits16,
- Bits32,
-};
-
-enum class VmnmxOperation : u64 {
- Mrg_16H = 0,
- Mrg_16L = 1,
- Mrg_8B0 = 2,
- Mrg_8B2 = 3,
- Acc = 4,
- Min = 5,
- Max = 6,
- Nop = 7,
-};
-
-enum class XmadMode : u64 {
- None = 0,
- CLo = 1,
- CHi = 2,
- CSfu = 3,
- CBcc = 4,
-};
-
-enum class IAdd3Mode : u64 {
- None = 0,
- RightShift = 1,
- LeftShift = 2,
-};
-
-enum class IAdd3Height : u64 {
- None = 0,
- LowerHalfWord = 1,
- UpperHalfWord = 2,
-};
-
-enum class FlowCondition : u64 {
- Always = 0xF,
- Fcsm_Tr = 0x1C, // TODO(bunnei): What is this used for?
-};
-
-enum class ConditionCode : u64 {
- F = 0,
- LT = 1,
- EQ = 2,
- LE = 3,
- GT = 4,
- NE = 5,
- GE = 6,
- Num = 7,
- Nan = 8,
- LTU = 9,
- EQU = 10,
- LEU = 11,
- GTU = 12,
- NEU = 13,
- GEU = 14,
- T = 15,
- OFF = 16,
- LO = 17,
- SFF = 18,
- LS = 19,
- HI = 20,
- SFT = 21,
- HS = 22,
- OFT = 23,
- CSM_TA = 24,
- CSM_TR = 25,
- CSM_MX = 26,
- FCSM_TA = 27,
- FCSM_TR = 28,
- FCSM_MX = 29,
- RLE = 30,
- RGT = 31,
-};
-
-enum class PredicateResultMode : u64 {
- None = 0x0,
- NotZero = 0x3,
-};
-
-enum class TextureType : u64 {
- Texture1D = 0,
- Texture2D = 1,
- Texture3D = 2,
- TextureCube = 3,
-};
-
-enum class TextureQueryType : u64 {
- Dimension = 1,
- TextureType = 2,
- SamplePosition = 5,
- Filter = 16,
- LevelOfDetail = 18,
- Wrap = 20,
- BorderColor = 22,
-};
-
-enum class TextureProcessMode : u64 {
- None = 0,
- LZ = 1, // Load LOD of zero.
- LB = 2, // Load Bias.
- LL = 3, // Load LOD.
- LBA = 6, // Load Bias. The A is unknown, does not appear to differ with LB.
- LLA = 7 // Load LOD. The A is unknown, does not appear to differ with LL.
-};
-
-enum class TextureMiscMode : u64 {
- DC,
- AOFFI, // Uses Offset
- NDV,
- NODEP,
- MZ,
- PTP,
-};
-
-enum class SurfaceDataMode : u64 {
- P = 0,
- D_BA = 1,
-};
-
-enum class OutOfBoundsStore : u64 {
- Ignore = 0,
- Clamp = 1,
- Trap = 2,
-};
-
-enum class ImageType : u64 {
- Texture1D = 0,
- TextureBuffer = 1,
- Texture1DArray = 2,
- Texture2D = 3,
- Texture2DArray = 4,
- Texture3D = 5,
-};
-
-enum class IsberdMode : u64 {
- None = 0,
- Patch = 1,
- Prim = 2,
- Attr = 3,
-};
-
-enum class IsberdShift : u64 { None = 0, U16 = 1, B32 = 2 };
-
-enum class MembarType : u64 {
- CTA = 0,
- GL = 1,
- SYS = 2,
- VC = 3,
-};
-
-enum class MembarUnknown : u64 { Default = 0, IVALLD = 1, IVALLT = 2, IVALLTD = 3 };
-
-enum class HalfType : u64 {
- H0_H1 = 0,
- F32 = 1,
- H0_H0 = 2,
- H1_H1 = 3,
-};
-
-enum class HalfMerge : u64 {
- H0_H1 = 0,
- F32 = 1,
- Mrg_H0 = 2,
- Mrg_H1 = 3,
-};
-
-enum class HalfPrecision : u64 {
- None = 0,
- FTZ = 1,
- FMZ = 2,
-};
-
-enum class R2pMode : u64 {
- Pr = 0,
- Cc = 1,
-};
-
-enum class IpaInterpMode : u64 {
- Pass = 0,
- Multiply = 1,
- Constant = 2,
- Sc = 3,
-};
-
-enum class IpaSampleMode : u64 {
- Default = 0,
- Centroid = 1,
- Offset = 2,
-};
-
-enum class LmemLoadCacheManagement : u64 {
- Default = 0,
- LU = 1,
- CI = 2,
- CV = 3,
-};
-
-enum class StoreCacheManagement : u64 {
- Default = 0,
- CG = 1,
- CS = 2,
- WT = 3,
-};
-
-struct IpaMode {
- IpaInterpMode interpolation_mode;
- IpaSampleMode sampling_mode;
-
- [[nodiscard]] bool operator==(const IpaMode& a) const {
- return std::tie(interpolation_mode, sampling_mode) ==
- std::tie(a.interpolation_mode, a.sampling_mode);
- }
- [[nodiscard]] bool operator!=(const IpaMode& a) const {
- return !operator==(a);
- }
- [[nodiscard]] bool operator<(const IpaMode& a) const {
- return std::tie(interpolation_mode, sampling_mode) <
- std::tie(a.interpolation_mode, a.sampling_mode);
- }
-};
-
-enum class SystemVariable : u64 {
- LaneId = 0x00,
- VirtCfg = 0x02,
- VirtId = 0x03,
- Pm0 = 0x04,
- Pm1 = 0x05,
- Pm2 = 0x06,
- Pm3 = 0x07,
- Pm4 = 0x08,
- Pm5 = 0x09,
- Pm6 = 0x0a,
- Pm7 = 0x0b,
- OrderingTicket = 0x0f,
- PrimType = 0x10,
- InvocationId = 0x11,
- Ydirection = 0x12,
- ThreadKill = 0x13,
- ShaderType = 0x14,
- DirectBeWriteAddressLow = 0x15,
- DirectBeWriteAddressHigh = 0x16,
- DirectBeWriteEnabled = 0x17,
- MachineId0 = 0x18,
- MachineId1 = 0x19,
- MachineId2 = 0x1a,
- MachineId3 = 0x1b,
- Affinity = 0x1c,
- InvocationInfo = 0x1d,
- WscaleFactorXY = 0x1e,
- WscaleFactorZ = 0x1f,
- Tid = 0x20,
- TidX = 0x21,
- TidY = 0x22,
- TidZ = 0x23,
- CtaParam = 0x24,
- CtaIdX = 0x25,
- CtaIdY = 0x26,
- CtaIdZ = 0x27,
- NtId = 0x28,
- CirQueueIncrMinusOne = 0x29,
- Nlatc = 0x2a,
- SmSpaVersion = 0x2c,
- MultiPassShaderInfo = 0x2d,
- LwinHi = 0x2e,
- SwinHi = 0x2f,
- SwinLo = 0x30,
- SwinSz = 0x31,
- SmemSz = 0x32,
- SmemBanks = 0x33,
- LwinLo = 0x34,
- LwinSz = 0x35,
- LmemLosz = 0x36,
- LmemHioff = 0x37,
- EqMask = 0x38,
- LtMask = 0x39,
- LeMask = 0x3a,
- GtMask = 0x3b,
- GeMask = 0x3c,
- RegAlloc = 0x3d,
- CtxAddr = 0x3e, // .fmask = F_SM50
- BarrierAlloc = 0x3e, // .fmask = F_SM60
- GlobalErrorStatus = 0x40,
- WarpErrorStatus = 0x42,
- WarpErrorStatusClear = 0x43,
- PmHi0 = 0x48,
- PmHi1 = 0x49,
- PmHi2 = 0x4a,
- PmHi3 = 0x4b,
- PmHi4 = 0x4c,
- PmHi5 = 0x4d,
- PmHi6 = 0x4e,
- PmHi7 = 0x4f,
- ClockLo = 0x50,
- ClockHi = 0x51,
- GlobalTimerLo = 0x52,
- GlobalTimerHi = 0x53,
- HwTaskId = 0x60,
- CircularQueueEntryIndex = 0x61,
- CircularQueueEntryAddressLow = 0x62,
- CircularQueueEntryAddressHigh = 0x63,
-};
-
-enum class PhysicalAttributeDirection : u64 {
- Input = 0,
- Output = 1,
-};
-
-enum class VoteOperation : u64 {
- All = 0, // allThreadsNV
- Any = 1, // anyThreadNV
- Eq = 2, // allThreadsEqualNV
-};
-
-enum class ImageAtomicOperationType : u64 {
- U32 = 0,
- S32 = 1,
- U64 = 2,
- F32 = 3,
- S64 = 5,
- SD32 = 6,
- SD64 = 7,
-};
-
-enum class ImageAtomicOperation : u64 {
- Add = 0,
- Min = 1,
- Max = 2,
- Inc = 3,
- Dec = 4,
- And = 5,
- Or = 6,
- Xor = 7,
- Exch = 8,
-};
-
-enum class ShuffleOperation : u64 {
- Idx = 0, // shuffleNV
- Up = 1, // shuffleUpNV
- Down = 2, // shuffleDownNV
- Bfly = 3, // shuffleXorNV
-};
-
-enum class ShfType : u64 {
- Bits32 = 0,
- U64 = 2,
- S64 = 3,
-};
-
-enum class ShfXmode : u64 {
- None = 0,
- HI = 1,
- X = 2,
- XHI = 3,
-};
-
-union Instruction {
- constexpr Instruction& operator=(const Instruction& instr) {
- value = instr.value;
- return *this;
- }
-
- constexpr Instruction(u64 value_) : value{value_} {}
- constexpr Instruction(const Instruction& instr) : value(instr.value) {}
-
- [[nodiscard]] constexpr bool Bit(u64 offset) const {
- return ((value >> offset) & 1) != 0;
- }
-
- BitField<0, 8, Register> gpr0;
- BitField<8, 8, Register> gpr8;
- union {
- BitField<16, 4, Pred> full_pred;
- BitField<16, 3, u64> pred_index;
- } pred;
- BitField<19, 1, u64> negate_pred;
- BitField<20, 8, Register> gpr20;
- BitField<20, 4, SubOp> sub_op;
- BitField<28, 8, Register> gpr28;
- BitField<39, 8, Register> gpr39;
- BitField<48, 16, u64> opcode;
-
- union {
- BitField<8, 5, ConditionCode> cc;
- BitField<13, 1, u64> trigger;
- } nop;
-
- union {
- BitField<48, 2, VoteOperation> operation;
- BitField<45, 3, u64> dest_pred;
- BitField<39, 3, u64> value;
- BitField<42, 1, u64> negate_value;
- } vote;
-
- union {
- BitField<30, 2, ShuffleOperation> operation;
- BitField<48, 3, u64> pred48;
- BitField<28, 1, u64> is_index_imm;
- BitField<29, 1, u64> is_mask_imm;
- BitField<20, 5, u64> index_imm;
- BitField<34, 13, u64> mask_imm;
- } shfl;
-
- union {
- BitField<44, 1, u64> ftz;
- BitField<39, 2, u64> tab5cb8_2;
- BitField<38, 1, u64> ndv;
- BitField<47, 1, u64> cc;
- BitField<28, 8, u64> swizzle;
- } fswzadd;
-
- union {
- BitField<8, 8, Register> gpr;
- BitField<20, 24, s64> offset;
- } gmem;
-
- union {
- BitField<20, 16, u64> imm20_16;
- BitField<20, 19, u64> imm20_19;
- BitField<20, 32, s64> imm20_32;
- BitField<45, 1, u64> negate_b;
- BitField<46, 1, u64> abs_a;
- BitField<48, 1, u64> negate_a;
- BitField<49, 1, u64> abs_b;
- BitField<50, 1, u64> saturate_d;
- BitField<56, 1, u64> negate_imm;
-
- union {
- BitField<39, 3, u64> pred;
- BitField<42, 1, u64> negate_pred;
- } fmnmx;
-
- union {
- BitField<39, 1, u64> invert_a;
- BitField<40, 1, u64> invert_b;
- BitField<41, 2, LogicOperation> operation;
- BitField<44, 2, PredicateResultMode> pred_result_mode;
- BitField<48, 3, Pred> pred48;
- } lop;
-
- union {
- BitField<53, 2, LogicOperation> operation;
- BitField<55, 1, u64> invert_a;
- BitField<56, 1, u64> invert_b;
- } lop32i;
-
- union {
- BitField<28, 8, u64> imm_lut28;
- BitField<48, 8, u64> imm_lut48;
-
- [[nodiscard]] u32 GetImmLut28() const {
- return static_cast<u32>(imm_lut28);
- }
-
- [[nodiscard]] u32 GetImmLut48() const {
- return static_cast<u32>(imm_lut48);
- }
- } lop3;
-
- [[nodiscard]] u16 GetImm20_16() const {
- return static_cast<u16>(imm20_16);
- }
-
- [[nodiscard]] u32 GetImm20_19() const {
- u32 imm{static_cast<u32>(imm20_19)};
- imm <<= 12;
- imm |= negate_imm ? 0x80000000 : 0;
- return imm;
- }
-
- [[nodiscard]] u32 GetImm20_32() const {
- return static_cast<u32>(imm20_32);
- }
-
- [[nodiscard]] s32 GetSignedImm20_20() const {
- const auto immediate = static_cast<u32>(imm20_19 | (negate_imm << 19));
- // Sign extend the 20-bit value.
- const auto mask = 1U << (20 - 1);
- return static_cast<s32>((immediate ^ mask) - mask);
- }
- } alu;
-
- union {
- BitField<38, 1, u64> idx;
- BitField<51, 1, u64> saturate;
- BitField<52, 2, IpaSampleMode> sample_mode;
- BitField<54, 2, IpaInterpMode> interp_mode;
- } ipa;
-
- union {
- BitField<39, 2, u64> tab5cb8_2;
- BitField<41, 3, u64> postfactor;
- BitField<44, 2, u64> tab5c68_0;
- BitField<48, 1, u64> negate_b;
- } fmul;
-
- union {
- BitField<55, 1, u64> saturate;
- } fmul32;
-
- union {
- BitField<52, 1, u64> generates_cc;
- } op_32;
-
- union {
- BitField<48, 1, u64> is_signed;
- } shift;
-
- union {
- BitField<39, 1, u64> wrap;
- } shr;
-
- union {
- BitField<37, 2, ShfType> type;
- BitField<48, 2, ShfXmode> xmode;
- BitField<50, 1, u64> wrap;
- BitField<20, 6, u64> immediate;
- } shf;
-
- union {
- BitField<39, 5, u64> shift_amount;
- BitField<48, 1, u64> negate_b;
- BitField<49, 1, u64> negate_a;
- } alu_integer;
-
- union {
- BitField<43, 1, u64> x;
- } iadd;
-
- union {
- BitField<39, 1, u64> ftz;
- BitField<32, 1, u64> saturate;
- BitField<49, 2, HalfMerge> merge;
-
- BitField<44, 1, u64> abs_a;
- BitField<47, 2, HalfType> type_a;
-
- BitField<30, 1, u64> abs_b;
- BitField<28, 2, HalfType> type_b;
-
- BitField<35, 2, HalfType> type_c;
- } alu_half;
-
- union {
- BitField<39, 2, HalfPrecision> precision;
- BitField<39, 1, u64> ftz;
- BitField<52, 1, u64> saturate;
- BitField<49, 2, HalfMerge> merge;
-
- BitField<43, 1, u64> negate_a;
- BitField<44, 1, u64> abs_a;
- BitField<47, 2, HalfType> type_a;
- } alu_half_imm;
-
- union {
- BitField<29, 1, u64> first_negate;
- BitField<20, 9, u64> first;
-
- BitField<56, 1, u64> second_negate;
- BitField<30, 9, u64> second;
-
- [[nodiscard]] u32 PackImmediates() const {
- // Immediates are half floats shifted.
- constexpr u32 imm_shift = 6;
- return static_cast<u32>((first << imm_shift) | (second << (16 + imm_shift)));
- }
- } half_imm;
-
- union {
- union {
- BitField<37, 2, HalfPrecision> precision;
- BitField<32, 1, u64> saturate;
-
- BitField<31, 1, u64> negate_b;
- BitField<30, 1, u64> negate_c;
- BitField<35, 2, HalfType> type_c;
- } rr;
-
- BitField<57, 2, HalfPrecision> precision;
- BitField<52, 1, u64> saturate;
-
- BitField<49, 2, HalfMerge> merge;
-
- BitField<47, 2, HalfType> type_a;
-
- BitField<56, 1, u64> negate_b;
- BitField<28, 2, HalfType> type_b;
-
- BitField<51, 1, u64> negate_c;
- BitField<53, 2, HalfType> type_reg39;
- } hfma2;
-
- union {
- BitField<40, 1, u64> invert;
- } popc;
-
- union {
- BitField<41, 1, u64> sh;
- BitField<40, 1, u64> invert;
- BitField<48, 1, u64> is_signed;
- } flo;
-
- union {
- BitField<39, 3, u64> pred;
- BitField<42, 1, u64> neg_pred;
- } sel;
-
- union {
- BitField<39, 3, u64> pred;
- BitField<42, 1, u64> negate_pred;
- BitField<43, 2, IMinMaxExchange> exchange;
- BitField<48, 1, u64> is_signed;
- } imnmx;
-
- union {
- BitField<31, 2, IAdd3Height> height_c;
- BitField<33, 2, IAdd3Height> height_b;
- BitField<35, 2, IAdd3Height> height_a;
- BitField<37, 2, IAdd3Mode> mode;
- BitField<49, 1, u64> neg_c;
- BitField<50, 1, u64> neg_b;
- BitField<51, 1, u64> neg_a;
- } iadd3;
-
- union {
- BitField<54, 1, u64> saturate;
- BitField<56, 1, u64> negate_a;
- } iadd32i;
-
- union {
- BitField<53, 1, u64> negate_b;
- BitField<54, 1, u64> abs_a;
- BitField<56, 1, u64> negate_a;
- BitField<57, 1, u64> abs_b;
- } fadd32i;
-
- union {
- BitField<40, 1, u64> brev;
- BitField<47, 1, u64> rd_cc;
- BitField<48, 1, u64> is_signed;
- } bfe;
-
- union {
- BitField<48, 3, u64> pred48;
-
- union {
- BitField<20, 20, u64> entry_a;
- BitField<39, 5, u64> entry_b;
- BitField<45, 1, u64> neg;
- BitField<46, 1, u64> uses_cc;
- } imm;
-
- union {
- BitField<20, 14, u64> cb_index;
- BitField<34, 5, u64> cb_offset;
- BitField<56, 1, u64> neg;
- BitField<57, 1, u64> uses_cc;
- } hi;
-
- union {
- BitField<20, 14, u64> cb_index;
- BitField<34, 5, u64> cb_offset;
- BitField<39, 5, u64> entry_a;
- BitField<45, 1, u64> neg;
- BitField<46, 1, u64> uses_cc;
- } rz;
-
- union {
- BitField<39, 5, u64> entry_a;
- BitField<45, 1, u64> neg;
- BitField<46, 1, u64> uses_cc;
- } r1;
-
- union {
- BitField<28, 8, u64> entry_a;
- BitField<37, 1, u64> neg;
- BitField<38, 1, u64> uses_cc;
- } r2;
-
- } lea;
-
- union {
- BitField<0, 5, FlowCondition> cond;
- } flow;
-
- union {
- BitField<47, 1, u64> cc;
- BitField<48, 1, u64> negate_b;
- BitField<49, 1, u64> negate_c;
- BitField<51, 2, u64> tab5980_1;
- BitField<53, 2, u64> tab5980_0;
- } ffma;
-
- union {
- BitField<48, 3, UniformType> type;
- BitField<44, 2, u64> unknown;
- } ld_c;
-
- union {
- BitField<48, 3, StoreType> type;
- } ldst_sl;
-
- union {
- BitField<44, 2, u64> unknown;
- } ld_l;
-
- union {
- BitField<44, 2, StoreCacheManagement> cache_management;
- } st_l;
-
- union {
- BitField<48, 3, UniformType> type;
- BitField<46, 2, u64> cache_mode;
- } ldg;
-
- union {
- BitField<48, 3, UniformType> type;
- BitField<46, 2, u64> cache_mode;
- } stg;
-
- union {
- BitField<23, 3, AtomicOp> operation;
- BitField<48, 1, u64> extended;
- BitField<20, 3, GlobalAtomicType> type;
- } red;
-
- union {
- BitField<52, 4, AtomicOp> operation;
- BitField<49, 3, GlobalAtomicType> type;
- BitField<28, 20, s64> offset;
- } atom;
-
- union {
- BitField<52, 4, AtomicOp> operation;
- BitField<28, 2, AtomicType> type;
- BitField<30, 22, s64> offset;
-
- [[nodiscard]] s32 GetImmediateOffset() const {
- return static_cast<s32>(offset << 2);
- }
- } atoms;
-
- union {
- BitField<32, 1, PhysicalAttributeDirection> direction;
- BitField<47, 3, AttributeSize> size;
- BitField<20, 11, u64> address;
- } al2p;
-
- union {
- BitField<53, 3, UniformType> type;
- BitField<52, 1, u64> extended;
- } generic;
-
- union {
- BitField<0, 3, u64> pred0;
- BitField<3, 3, u64> pred3;
- BitField<6, 1, u64> neg_b;
- BitField<7, 1, u64> abs_a;
- BitField<39, 3, u64> pred39;
- BitField<42, 1, u64> neg_pred;
- BitField<43, 1, u64> neg_a;
- BitField<44, 1, u64> abs_b;
- BitField<45, 2, PredOperation> op;
- BitField<47, 1, u64> ftz;
- BitField<48, 4, PredCondition> cond;
- } fsetp;
-
- union {
- BitField<0, 3, u64> pred0;
- BitField<3, 3, u64> pred3;
- BitField<39, 3, u64> pred39;
- BitField<42, 1, u64> neg_pred;
- BitField<45, 2, PredOperation> op;
- BitField<48, 1, u64> is_signed;
- BitField<49, 3, PredCondition> cond;
- } isetp;
-
- union {
- BitField<48, 1, u64> is_signed;
- BitField<49, 3, PredCondition> cond;
- } icmp;
-
- union {
- BitField<0, 3, u64> pred0;
- BitField<3, 3, u64> pred3;
- BitField<12, 3, u64> pred12;
- BitField<15, 1, u64> neg_pred12;
- BitField<24, 2, PredOperation> cond;
- BitField<29, 3, u64> pred29;
- BitField<32, 1, u64> neg_pred29;
- BitField<39, 3, u64> pred39;
- BitField<42, 1, u64> neg_pred39;
- BitField<45, 2, PredOperation> op;
- } psetp;
-
- union {
- BitField<43, 4, PredCondition> cond;
- BitField<45, 2, PredOperation> op;
- BitField<3, 3, u64> pred3;
- BitField<0, 3, u64> pred0;
- BitField<39, 3, u64> pred39;
- } vsetp;
-
- union {
- BitField<12, 3, u64> pred12;
- BitField<15, 1, u64> neg_pred12;
- BitField<24, 2, PredOperation> cond;
- BitField<29, 3, u64> pred29;
- BitField<32, 1, u64> neg_pred29;
- BitField<39, 3, u64> pred39;
- BitField<42, 1, u64> neg_pred39;
- BitField<44, 1, u64> bf;
- BitField<45, 2, PredOperation> op;
- } pset;
-
- union {
- BitField<0, 3, u64> pred0;
- BitField<3, 3, u64> pred3;
- BitField<8, 5, ConditionCode> cc; // flag in cc
- BitField<39, 3, u64> pred39;
- BitField<42, 1, u64> neg_pred39;
- BitField<45, 4, PredOperation> op; // op with pred39
- } csetp;
-
- union {
- BitField<6, 1, u64> ftz;
- BitField<45, 2, PredOperation> op;
- BitField<3, 3, u64> pred3;
- BitField<0, 3, u64> pred0;
- BitField<43, 1, u64> negate_a;
- BitField<44, 1, u64> abs_a;
- BitField<47, 2, HalfType> type_a;
- union {
- BitField<35, 4, PredCondition> cond;
- BitField<49, 1, u64> h_and;
- BitField<31, 1, u64> negate_b;
- BitField<30, 1, u64> abs_b;
- BitField<28, 2, HalfType> type_b;
- } reg;
- union {
- BitField<56, 1, u64> negate_b;
- BitField<54, 1, u64> abs_b;
- } cbuf;
- union {
- BitField<49, 4, PredCondition> cond;
- BitField<53, 1, u64> h_and;
- } cbuf_and_imm;
- BitField<42, 1, u64> neg_pred;
- BitField<39, 3, u64> pred39;
- } hsetp2;
-
- union {
- BitField<40, 1, R2pMode> mode;
- BitField<41, 2, u64> byte;
- BitField<20, 7, u64> immediate_mask;
- } p2r_r2p;
-
- union {
- BitField<39, 3, u64> pred39;
- BitField<42, 1, u64> neg_pred;
- BitField<43, 1, u64> neg_a;
- BitField<44, 1, u64> abs_b;
- BitField<45, 2, PredOperation> op;
- BitField<48, 4, PredCondition> cond;
- BitField<52, 1, u64> bf;
- BitField<53, 1, u64> neg_b;
- BitField<54, 1, u64> abs_a;
- BitField<55, 1, u64> ftz;
- } fset;
-
- union {
- BitField<47, 1, u64> ftz;
- BitField<48, 4, PredCondition> cond;
- } fcmp;
-
- union {
- BitField<49, 1, u64> bf;
- BitField<35, 3, PredCondition> cond;
- BitField<50, 1, u64> ftz;
- BitField<45, 2, PredOperation> op;
- BitField<43, 1, u64> negate_a;
- BitField<44, 1, u64> abs_a;
- BitField<47, 2, HalfType> type_a;
- BitField<31, 1, u64> negate_b;
- BitField<30, 1, u64> abs_b;
- BitField<28, 2, HalfType> type_b;
- BitField<42, 1, u64> neg_pred;
- BitField<39, 3, u64> pred39;
- } hset2;
-
- union {
- BitField<39, 3, u64> pred39;
- BitField<42, 1, u64> neg_pred;
- BitField<44, 1, u64> bf;
- BitField<45, 2, PredOperation> op;
- BitField<48, 1, u64> is_signed;
- BitField<49, 3, PredCondition> cond;
- } iset;
-
- union {
- BitField<45, 1, u64> negate_a;
- BitField<49, 1, u64> abs_a;
- BitField<10, 2, Register::Size> src_size;
- BitField<13, 1, u64> is_input_signed;
- BitField<8, 2, Register::Size> dst_size;
- BitField<12, 1, u64> is_output_signed;
-
- union {
- BitField<39, 2, u64> tab5cb8_2;
- } i2f;
-
- union {
- BitField<39, 2, F2iRoundingOp> rounding;
- } f2i;
-
- union {
- BitField<39, 4, u64> rounding;
- // H0, H1 extract for F16 missing
- BitField<41, 1, u64> selector; // Guessed as some games set it, TODO: reverse this value
- [[nodiscard]] F2fRoundingOp GetRoundingMode() const {
- constexpr u64 rounding_mask = 0x0B;
- return static_cast<F2fRoundingOp>(rounding.Value() & rounding_mask);
- }
- } f2f;
-
- union {
- BitField<41, 2, u64> selector;
- } int_src;
-
- union {
- BitField<41, 1, u64> selector;
- } float_src;
- } conversion;
-
- union {
- BitField<28, 1, u64> array;
- BitField<29, 2, TextureType> texture_type;
- BitField<31, 4, u64> component_mask;
- BitField<49, 1, u64> nodep_flag;
- BitField<50, 1, u64> dc_flag;
- BitField<54, 1, u64> aoffi_flag;
- BitField<55, 3, TextureProcessMode> process_mode;
-
- [[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
- return ((1ULL << component) & component_mask) != 0;
- }
-
- [[nodiscard]] TextureProcessMode GetTextureProcessMode() const {
- return process_mode;
- }
-
- [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
- switch (mode) {
- case TextureMiscMode::DC:
- return dc_flag != 0;
- case TextureMiscMode::NODEP:
- return nodep_flag != 0;
- case TextureMiscMode::AOFFI:
- return aoffi_flag != 0;
- default:
- break;
- }
- return false;
- }
- } tex;
-
- union {
- BitField<28, 1, u64> array;
- BitField<29, 2, TextureType> texture_type;
- BitField<31, 4, u64> component_mask;
- BitField<49, 1, u64> nodep_flag;
- BitField<50, 1, u64> dc_flag;
- BitField<36, 1, u64> aoffi_flag;
- BitField<37, 3, TextureProcessMode> process_mode;
-
- [[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
- return ((1ULL << component) & component_mask) != 0;
- }
-
- [[nodiscard]] TextureProcessMode GetTextureProcessMode() const {
- return process_mode;
- }
-
- [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
- switch (mode) {
- case TextureMiscMode::DC:
- return dc_flag != 0;
- case TextureMiscMode::NODEP:
- return nodep_flag != 0;
- case TextureMiscMode::AOFFI:
- return aoffi_flag != 0;
- default:
- break;
- }
- return false;
- }
- } tex_b;
-
- union {
- BitField<22, 6, TextureQueryType> query_type;
- BitField<31, 4, u64> component_mask;
- BitField<49, 1, u64> nodep_flag;
-
- [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
- switch (mode) {
- case TextureMiscMode::NODEP:
- return nodep_flag != 0;
- default:
- break;
- }
- return false;
- }
-
- [[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
- return ((1ULL << component) & component_mask) != 0;
- }
- } txq;
-
- union {
- BitField<28, 1, u64> array;
- BitField<29, 2, TextureType> texture_type;
- BitField<31, 4, u64> component_mask;
- BitField<35, 1, u64> ndv_flag;
- BitField<49, 1, u64> nodep_flag;
-
- [[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
- return ((1ULL << component) & component_mask) != 0;
- }
-
- [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
- switch (mode) {
- case TextureMiscMode::NDV:
- return (ndv_flag != 0);
- case TextureMiscMode::NODEP:
- return (nodep_flag != 0);
- default:
- break;
- }
- return false;
- }
- } tmml;
-
- union {
- BitField<28, 1, u64> array;
- BitField<29, 2, TextureType> texture_type;
- BitField<35, 1, u64> ndv_flag;
- BitField<49, 1, u64> nodep_flag;
- BitField<50, 1, u64> dc_flag;
- BitField<54, 2, u64> offset_mode;
- BitField<56, 2, u64> component;
-
- [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
- switch (mode) {
- case TextureMiscMode::NDV:
- return ndv_flag != 0;
- case TextureMiscMode::NODEP:
- return nodep_flag != 0;
- case TextureMiscMode::DC:
- return dc_flag != 0;
- case TextureMiscMode::AOFFI:
- return offset_mode == 1;
- case TextureMiscMode::PTP:
- return offset_mode == 2;
- default:
- break;
- }
- return false;
- }
- } tld4;
-
- union {
- BitField<35, 1, u64> ndv_flag;
- BitField<49, 1, u64> nodep_flag;
- BitField<50, 1, u64> dc_flag;
- BitField<33, 2, u64> offset_mode;
- BitField<37, 2, u64> component;
-
- [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
- switch (mode) {
- case TextureMiscMode::NDV:
- return ndv_flag != 0;
- case TextureMiscMode::NODEP:
- return nodep_flag != 0;
- case TextureMiscMode::DC:
- return dc_flag != 0;
- case TextureMiscMode::AOFFI:
- return offset_mode == 1;
- case TextureMiscMode::PTP:
- return offset_mode == 2;
- default:
- break;
- }
- return false;
- }
- } tld4_b;
-
- union {
- BitField<49, 1, u64> nodep_flag;
- BitField<50, 1, u64> dc_flag;
- BitField<51, 1, u64> aoffi_flag;
- BitField<52, 2, u64> component;
- BitField<55, 1, u64> fp16_flag;
-
- [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
- switch (mode) {
- case TextureMiscMode::DC:
- return dc_flag != 0;
- case TextureMiscMode::NODEP:
- return nodep_flag != 0;
- case TextureMiscMode::AOFFI:
- return aoffi_flag != 0;
- default:
- break;
- }
- return false;
- }
- } tld4s;
-
- union {
- BitField<0, 8, Register> gpr0;
- BitField<28, 8, Register> gpr28;
- BitField<49, 1, u64> nodep_flag;
- BitField<50, 3, u64> component_mask_selector;
- BitField<53, 4, u64> texture_info;
- BitField<59, 1, u64> fp32_flag;
-
- [[nodiscard]] TextureType GetTextureType() const {
- // The TEXS instruction has a weird encoding for the texture type.
- if (texture_info == 0) {
- return TextureType::Texture1D;
- }
- if (texture_info >= 1 && texture_info <= 9) {
- return TextureType::Texture2D;
- }
- if (texture_info >= 10 && texture_info <= 11) {
- return TextureType::Texture3D;
- }
- if (texture_info >= 12 && texture_info <= 13) {
- return TextureType::TextureCube;
- }
-
- LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}", texture_info.Value());
- UNREACHABLE();
- return TextureType::Texture1D;
- }
-
- [[nodiscard]] TextureProcessMode GetTextureProcessMode() const {
- switch (texture_info) {
- case 0:
- case 2:
- case 6:
- case 8:
- case 9:
- case 11:
- return TextureProcessMode::LZ;
- case 3:
- case 5:
- case 13:
- return TextureProcessMode::LL;
- default:
- break;
- }
- return TextureProcessMode::None;
- }
-
- [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
- switch (mode) {
- case TextureMiscMode::DC:
- return (texture_info >= 4 && texture_info <= 6) || texture_info == 9;
- case TextureMiscMode::NODEP:
- return nodep_flag != 0;
- default:
- break;
- }
- return false;
- }
-
- [[nodiscard]] bool IsArrayTexture() const {
- // TEXS only supports Texture2D arrays.
- return texture_info >= 7 && texture_info <= 9;
- }
-
- [[nodiscard]] bool HasTwoDestinations() const {
- return gpr28.Value() != Register::ZeroIndex;
- }
-
- [[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
- static constexpr std::array<std::array<u32, 8>, 4> mask_lut{{
- {},
- {0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc},
- {0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc},
- {0x7, 0xb, 0xd, 0xe, 0xf},
- }};
-
- std::size_t index{gpr0.Value() != Register::ZeroIndex ? 1U : 0U};
- index |= gpr28.Value() != Register::ZeroIndex ? 2 : 0;
-
- u32 mask = mask_lut[index][component_mask_selector];
- // A mask of 0 means this instruction uses an unimplemented mask.
- ASSERT(mask != 0);
- return ((1ull << component) & mask) != 0;
- }
- } texs;
-
- union {
- BitField<28, 1, u64> is_array;
- BitField<29, 2, TextureType> texture_type;
- BitField<35, 1, u64> aoffi;
- BitField<49, 1, u64> nodep_flag;
- BitField<50, 1, u64> ms; // Multisample?
- BitField<54, 1, u64> cl;
- BitField<55, 1, u64> process_mode;
-
- [[nodiscard]] TextureProcessMode GetTextureProcessMode() const {
- return process_mode == 0 ? TextureProcessMode::LZ : TextureProcessMode::LL;
- }
- } tld;
-
- union {
- BitField<49, 1, u64> nodep_flag;
- BitField<53, 4, u64> texture_info;
- BitField<59, 1, u64> fp32_flag;
-
- [[nodiscard]] TextureType GetTextureType() const {
- // The TLDS instruction has a weird encoding for the texture type.
- if (texture_info <= 1) {
- return TextureType::Texture1D;
- }
- if (texture_info == 2 || texture_info == 8 || texture_info == 12 ||
- (texture_info >= 4 && texture_info <= 6)) {
- return TextureType::Texture2D;
- }
- if (texture_info == 7) {
- return TextureType::Texture3D;
- }
-
- LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}", texture_info.Value());
- UNREACHABLE();
- return TextureType::Texture1D;
- }
-
- [[nodiscard]] TextureProcessMode GetTextureProcessMode() const {
- if (texture_info == 1 || texture_info == 5 || texture_info == 12) {
- return TextureProcessMode::LL;
- }
- return TextureProcessMode::LZ;
- }
-
- [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
- switch (mode) {
- case TextureMiscMode::AOFFI:
- return texture_info == 12 || texture_info == 4;
- case TextureMiscMode::MZ:
- return texture_info == 5;
- case TextureMiscMode::NODEP:
- return nodep_flag != 0;
- default:
- break;
- }
- return false;
- }
-
- [[nodiscard]] bool IsArrayTexture() const {
- // TEXS only supports Texture2D arrays.
- return texture_info == 8;
- }
- } tlds;
-
- union {
- BitField<28, 1, u64> is_array;
- BitField<29, 2, TextureType> texture_type;
- BitField<35, 1, u64> aoffi_flag;
- BitField<49, 1, u64> nodep_flag;
-
- [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
- switch (mode) {
- case TextureMiscMode::AOFFI:
- return aoffi_flag != 0;
- case TextureMiscMode::NODEP:
- return nodep_flag != 0;
- default:
- break;
- }
- return false;
- }
-
- } txd;
-
- union {
- BitField<24, 2, StoreCacheManagement> cache_management;
- BitField<33, 3, ImageType> image_type;
- BitField<49, 2, OutOfBoundsStore> out_of_bounds_store;
- BitField<51, 1, u64> is_immediate;
- BitField<52, 1, SurfaceDataMode> mode;
-
- BitField<20, 3, StoreType> store_data_layout;
- BitField<20, 4, u64> component_mask_selector;
-
- [[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
- ASSERT(mode == SurfaceDataMode::P);
- constexpr u8 R = 0b0001;
- constexpr u8 G = 0b0010;
- constexpr u8 B = 0b0100;
- constexpr u8 A = 0b1000;
- constexpr std::array<u8, 16> mask = {
- 0, (R), (G), (R | G), (B), (R | B),
- (G | B), (R | G | B), (A), (R | A), (G | A), (R | G | A),
- (B | A), (R | B | A), (G | B | A), (R | G | B | A)};
- return std::bitset<4>{mask.at(component_mask_selector)}.test(component);
- }
-
- [[nodiscard]] StoreType GetStoreDataLayout() const {
- ASSERT(mode == SurfaceDataMode::D_BA);
- return store_data_layout;
- }
- } suldst;
-
- union {
- BitField<28, 1, u64> is_ba;
- BitField<51, 3, ImageAtomicOperationType> operation_type;
- BitField<33, 3, ImageType> image_type;
- BitField<29, 4, ImageAtomicOperation> operation;
- BitField<49, 2, OutOfBoundsStore> out_of_bounds_store;
- } suatom_d;
-
- union {
- BitField<20, 24, u64> target;
- BitField<5, 1, u64> constant_buffer;
-
- [[nodiscard]] s32 GetBranchTarget() const {
- // Sign extend the branch target offset
- const auto mask = 1U << (24 - 1);
- const auto target_value = static_cast<u32>(target);
- constexpr auto instruction_size = static_cast<s32>(sizeof(Instruction));
-
- // The branch offset is relative to the next instruction and is stored in bytes, so
- // divide it by the size of an instruction and add 1 to it.
- return static_cast<s32>((target_value ^ mask) - mask) / instruction_size + 1;
- }
- } bra;
-
- union {
- BitField<20, 24, u64> target;
- BitField<5, 1, u64> constant_buffer;
-
- [[nodiscard]] s32 GetBranchExtend() const {
- // Sign extend the branch target offset
- const auto mask = 1U << (24 - 1);
- const auto target_value = static_cast<u32>(target);
- constexpr auto instruction_size = static_cast<s32>(sizeof(Instruction));
-
- // The branch offset is relative to the next instruction and is stored in bytes, so
- // divide it by the size of an instruction and add 1 to it.
- return static_cast<s32>((target_value ^ mask) - mask) / instruction_size + 1;
- }
- } brx;
-
- union {
- BitField<39, 1, u64> emit; // EmitVertex
- BitField<40, 1, u64> cut; // EndPrimitive
- } out;
-
- union {
- BitField<31, 1, u64> skew;
- BitField<32, 1, u64> o;
- BitField<33, 2, IsberdMode> mode;
- BitField<47, 2, IsberdShift> shift;
- } isberd;
-
- union {
- BitField<8, 2, MembarType> type;
- BitField<0, 2, MembarUnknown> unknown;
- } membar;
-
- union {
- BitField<48, 1, u64> signed_a;
- BitField<38, 1, u64> is_byte_chunk_a;
- BitField<36, 2, VideoType> type_a;
- BitField<36, 2, u64> byte_height_a;
-
- BitField<49, 1, u64> signed_b;
- BitField<50, 1, u64> use_register_b;
- BitField<30, 1, u64> is_byte_chunk_b;
- BitField<28, 2, VideoType> type_b;
- BitField<28, 2, u64> byte_height_b;
- } video;
-
- union {
- BitField<51, 2, VmadShr> shr;
- BitField<55, 1, u64> saturate; // Saturates the result (a * b + c)
- BitField<47, 1, u64> cc;
- } vmad;
-
- union {
- BitField<54, 1, u64> is_dest_signed;
- BitField<48, 1, u64> is_src_a_signed;
- BitField<49, 1, u64> is_src_b_signed;
- BitField<37, 2, u64> src_format_a;
- BitField<29, 2, u64> src_format_b;
- BitField<56, 1, u64> mx;
- BitField<55, 1, u64> sat;
- BitField<36, 2, u64> selector_a;
- BitField<28, 2, u64> selector_b;
- BitField<50, 1, u64> is_op_b_register;
- BitField<51, 3, VmnmxOperation> operation;
-
- [[nodiscard]] VmnmxType SourceFormatA() const {
- switch (src_format_a) {
- case 0b11:
- return VmnmxType::Bits32;
- case 0b10:
- return VmnmxType::Bits16;
- default:
- return VmnmxType::Bits8;
- }
- }
-
- [[nodiscard]] VmnmxType SourceFormatB() const {
- switch (src_format_b) {
- case 0b11:
- return VmnmxType::Bits32;
- case 0b10:
- return VmnmxType::Bits16;
- default:
- return VmnmxType::Bits8;
- }
- }
- } vmnmx;
-
- union {
- BitField<20, 16, u64> imm20_16;
- BitField<35, 1, u64> high_b_rr; // used on RR
- BitField<36, 1, u64> product_shift_left;
- BitField<37, 1, u64> merge_37;
- BitField<48, 1, u64> sign_a;
- BitField<49, 1, u64> sign_b;
- BitField<50, 2, XmadMode> mode_cbf; // used by CR, RC
- BitField<50, 3, XmadMode> mode;
- BitField<52, 1, u64> high_b;
- BitField<53, 1, u64> high_a;
- BitField<55, 1, u64> product_shift_left_second; // used on CR
- BitField<56, 1, u64> merge_56;
- } xmad;
-
- union {
- BitField<20, 14, u64> shifted_offset;
- BitField<34, 5, u64> index;
-
- [[nodiscard]] u64 GetOffset() const {
- return shifted_offset * 4;
- }
- } cbuf34;
-
- union {
- BitField<20, 16, s64> offset;
- BitField<36, 5, u64> index;
-
- [[nodiscard]] s64 GetOffset() const {
- return offset;
- }
- } cbuf36;
-
- // Unsure about the size of this one.
- // It's always used with a gpr0, so any size should be fine.
- BitField<20, 8, SystemVariable> sys20;
-
- BitField<47, 1, u64> generates_cc;
- BitField<61, 1, u64> is_b_imm;
- BitField<60, 1, u64> is_b_gpr;
- BitField<59, 1, u64> is_c_gpr;
- BitField<20, 24, s64> smem_imm;
- BitField<0, 5, ConditionCode> flow_condition_code;
-
- Attribute attribute;
- Sampler sampler;
- Image image;
-
- u64 value;
-};
-static_assert(sizeof(Instruction) == 0x8, "Incorrect structure size");
-static_assert(std::is_standard_layout_v<Instruction>, "Instruction is not standard layout");
-
-class OpCode {
-public:
- enum class Id {
- KIL,
- SSY,
- SYNC,
- BRK,
- DEPBAR,
- VOTE,
- VOTE_VTG,
- SHFL,
- FSWZADD,
- BFE_C,
- BFE_R,
- BFE_IMM,
- BFI_RC,
- BFI_IMM_R,
- BRA,
- BRX,
- PBK,
- LD_A,
- LD_L,
- LD_S,
- LD_C,
- LD, // Load from generic memory
- LDG, // Load from global memory
- ST_A,
- ST_L,
- ST_S,
- ST, // Store in generic memory
- STG, // Store in global memory
- RED, // Reduction operation
- ATOM, // Atomic operation on global memory
- ATOMS, // Atomic operation on shared memory
- AL2P, // Transforms attribute memory into physical memory
- TEX,
- TEX_B, // Texture Load Bindless
- TXQ, // Texture Query
- TXQ_B, // Texture Query Bindless
- TEXS, // Texture Fetch with scalar/non-vec4 source/destinations
- TLD, // Texture Load
- TLDS, // Texture Load with scalar/non-vec4 source/destinations
- TLD4, // Texture Gather 4
- TLD4_B, // Texture Gather 4 Bindless
- TLD4S, // Texture Load 4 with scalar / non - vec4 source / destinations
- TMML_B, // Texture Mip Map Level
- TMML, // Texture Mip Map Level
- TXD, // Texture Gradient/Load with Derivates
- TXD_B, // Texture Gradient/Load with Derivates Bindless
- SUST, // Surface Store
- SULD, // Surface Load
- SUATOM, // Surface Atomic Operation
- EXIT,
- NOP,
- IPA,
- OUT_R, // Emit vertex/primitive
- ISBERD,
- BAR,
- MEMBAR,
- VMAD,
- VSETP,
- VMNMX,
- FFMA_IMM, // Fused Multiply and Add
- FFMA_CR,
- FFMA_RC,
- FFMA_RR,
- FADD_C,
- FADD_R,
- FADD_IMM,
- FADD32I,
- FMUL_C,
- FMUL_R,
- FMUL_IMM,
- FMUL32_IMM,
- IADD_C,
- IADD_R,
- IADD_IMM,
- IADD3_C, // Add 3 Integers
- IADD3_R,
- IADD3_IMM,
- IADD32I,
- ISCADD_C, // Scale and Add
- ISCADD_R,
- ISCADD_IMM,
- FLO_R,
- FLO_C,
- FLO_IMM,
- LEA_R1,
- LEA_R2,
- LEA_RZ,
- LEA_IMM,
- LEA_HI,
- HADD2_C,
- HADD2_R,
- HADD2_IMM,
- HMUL2_C,
- HMUL2_R,
- HMUL2_IMM,
- HFMA2_CR,
- HFMA2_RC,
- HFMA2_RR,
- HFMA2_IMM_R,
- HSETP2_C,
- HSETP2_R,
- HSETP2_IMM,
- HSET2_C,
- HSET2_R,
- HSET2_IMM,
- POPC_C,
- POPC_R,
- POPC_IMM,
- SEL_C,
- SEL_R,
- SEL_IMM,
- ICMP_RC,
- ICMP_R,
- ICMP_CR,
- ICMP_IMM,
- FCMP_RR,
- FCMP_RC,
- FCMP_IMMR,
- MUFU, // Multi-Function Operator
- RRO_C, // Range Reduction Operator
- RRO_R,
- RRO_IMM,
- F2F_C,
- F2F_R,
- F2F_IMM,
- F2I_C,
- F2I_R,
- F2I_IMM,
- I2F_C,
- I2F_R,
- I2F_IMM,
- I2I_C,
- I2I_R,
- I2I_IMM,
- LOP_C,
- LOP_R,
- LOP_IMM,
- LOP32I,
- LOP3_C,
- LOP3_R,
- LOP3_IMM,
- MOV_C,
- MOV_R,
- MOV_IMM,
- S2R,
- MOV32_IMM,
- SHL_C,
- SHL_R,
- SHL_IMM,
- SHR_C,
- SHR_R,
- SHR_IMM,
- SHF_RIGHT_R,
- SHF_RIGHT_IMM,
- SHF_LEFT_R,
- SHF_LEFT_IMM,
- FMNMX_C,
- FMNMX_R,
- FMNMX_IMM,
- IMNMX_C,
- IMNMX_R,
- IMNMX_IMM,
- FSETP_C, // Set Predicate
- FSETP_R,
- FSETP_IMM,
- FSET_C,
- FSET_R,
- FSET_IMM,
- ISETP_C,
- ISETP_IMM,
- ISETP_R,
- ISET_R,
- ISET_C,
- ISET_IMM,
- PSETP,
- PSET,
- CSETP,
- R2P_IMM,
- P2R_IMM,
- XMAD_IMM,
- XMAD_CR,
- XMAD_RC,
- XMAD_RR,
- };
-
- enum class Type {
- Trivial,
- Arithmetic,
- ArithmeticImmediate,
- ArithmeticInteger,
- ArithmeticIntegerImmediate,
- ArithmeticHalf,
- ArithmeticHalfImmediate,
- Bfe,
- Bfi,
- Shift,
- Ffma,
- Hfma2,
- Flow,
- Synch,
- Warp,
- Memory,
- Texture,
- Image,
- FloatSet,
- FloatSetPredicate,
- IntegerSet,
- IntegerSetPredicate,
- HalfSet,
- HalfSetPredicate,
- PredicateSetPredicate,
- PredicateSetRegister,
- RegisterSetPredicate,
- Conversion,
- Video,
- Xmad,
- Unknown,
- };
-
- /// Returns whether an opcode has an execution predicate field or not (ie, whether it can be
- /// conditionally executed).
- [[nodiscard]] static bool IsPredicatedInstruction(Id opcode) {
- // TODO(Subv): Add the rest of unpredicated instructions.
- return opcode != Id::SSY && opcode != Id::PBK;
- }
-
- class Matcher {
- public:
- constexpr Matcher(const char* const name_, u16 mask_, u16 expected_, Id id_, Type type_)
- : name{name_}, mask{mask_}, expected{expected_}, id{id_}, type{type_} {}
-
- [[nodiscard]] constexpr const char* GetName() const {
- return name;
- }
-
- [[nodiscard]] constexpr u16 GetMask() const {
- return mask;
- }
-
- [[nodiscard]] constexpr Id GetId() const {
- return id;
- }
-
- [[nodiscard]] constexpr Type GetType() const {
- return type;
- }
-
- /**
- * Tests to see if the given instruction is the instruction this matcher represents.
- * @param instruction The instruction to test
- * @returns true if the given instruction matches.
- */
- [[nodiscard]] constexpr bool Matches(u16 instruction) const {
- return (instruction & mask) == expected;
- }
-
- private:
- const char* name;
- u16 mask;
- u16 expected;
- Id id;
- Type type;
- };
-
- using DecodeResult = std::optional<std::reference_wrapper<const Matcher>>;
- [[nodiscard]] static DecodeResult Decode(Instruction instr) {
- static const auto table{GetDecodeTable()};
-
- const auto matches_instruction = [instr](const auto& matcher) {
- return matcher.Matches(static_cast<u16>(instr.opcode));
- };
-
- auto iter = std::find_if(table.begin(), table.end(), matches_instruction);
- return iter != table.end() ? std::optional<std::reference_wrapper<const Matcher>>(*iter)
- : std::nullopt;
- }
-
-private:
- struct Detail {
- private:
- static constexpr std::size_t opcode_bitsize = 16;
-
- /**
- * Generates the mask and the expected value after masking from a given bitstring.
- * A '0' in a bitstring indicates that a zero must be present at that bit position.
- * A '1' in a bitstring indicates that a one must be present at that bit position.
- */
- [[nodiscard]] static constexpr auto GetMaskAndExpect(const char* const bitstring) {
- u16 mask = 0, expect = 0;
- for (std::size_t i = 0; i < opcode_bitsize; i++) {
- const std::size_t bit_position = opcode_bitsize - i - 1;
- switch (bitstring[i]) {
- case '0':
- mask |= static_cast<u16>(1U << bit_position);
- break;
- case '1':
- expect |= static_cast<u16>(1U << bit_position);
- mask |= static_cast<u16>(1U << bit_position);
- break;
- default:
- // Ignore
- break;
- }
- }
- return std::make_pair(mask, expect);
- }
-
- public:
- /// Creates a matcher that can match and parse instructions based on bitstring.
- [[nodiscard]] static constexpr auto GetMatcher(const char* const bitstring, Id op,
- Type type, const char* const name) {
- const auto [mask, expected] = GetMaskAndExpect(bitstring);
- return Matcher(name, mask, expected, op, type);
- }
- };
-
- [[nodiscard]] static std::vector<Matcher> GetDecodeTable() {
- std::vector<Matcher> table = {
-#define INST(bitstring, op, type, name) Detail::GetMatcher(bitstring, op, type, name)
- INST("111000110011----", Id::KIL, Type::Flow, "KIL"),
- INST("111000101001----", Id::SSY, Type::Flow, "SSY"),
- INST("111000101010----", Id::PBK, Type::Flow, "PBK"),
- INST("111000100100----", Id::BRA, Type::Flow, "BRA"),
- INST("111000100101----", Id::BRX, Type::Flow, "BRX"),
- INST("1111000011111---", Id::SYNC, Type::Flow, "SYNC"),
- INST("111000110100----", Id::BRK, Type::Flow, "BRK"),
- INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"),
- INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"),
- INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"),
- INST("0101000011100---", Id::VOTE_VTG, Type::Warp, "VOTE_VTG"),
- INST("1110111100010---", Id::SHFL, Type::Warp, "SHFL"),
- INST("0101000011111---", Id::FSWZADD, Type::Warp, "FSWZADD"),
- INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"),
- INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"),
- INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"),
- INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"),
- INST("100-------------", Id::LD, Type::Memory, "LD"),
- INST("1110111011010---", Id::LDG, Type::Memory, "LDG"),
- INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"),
- INST("1110111101011---", Id::ST_S, Type::Memory, "ST_S"),
- INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
- INST("101-------------", Id::ST, Type::Memory, "ST"),
- INST("1110111011011---", Id::STG, Type::Memory, "STG"),
- INST("1110101111111---", Id::RED, Type::Memory, "RED"),
- INST("11101101--------", Id::ATOM, Type::Memory, "ATOM"),
- INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"),
- INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"),
- INST("110000----111---", Id::TEX, Type::Texture, "TEX"),
- INST("1101111010111---", Id::TEX_B, Type::Texture, "TEX_B"),
- INST("1101111101001---", Id::TXQ, Type::Texture, "TXQ"),
- INST("1101111101010---", Id::TXQ_B, Type::Texture, "TXQ_B"),
- INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"),
- INST("11011100--11----", Id::TLD, Type::Texture, "TLD"),
- INST("1101-01---------", Id::TLDS, Type::Texture, "TLDS"),
- INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"),
- INST("1101111011111---", Id::TLD4_B, Type::Texture, "TLD4_B"),
- INST("11011111-0------", Id::TLD4S, Type::Texture, "TLD4S"),
- INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"),
- INST("1101111101011---", Id::TMML, Type::Texture, "TMML"),
- INST("11011110011110--", Id::TXD_B, Type::Texture, "TXD_B"),
- INST("11011110001110--", Id::TXD, Type::Texture, "TXD"),
- INST("11101011001-----", Id::SUST, Type::Image, "SUST"),
- INST("11101011000-----", Id::SULD, Type::Image, "SULD"),
- INST("1110101000------", Id::SUATOM, Type::Image, "SUATOM_D"),
- INST("0101000010110---", Id::NOP, Type::Trivial, "NOP"),
- INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
- INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"),
- INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"),
- INST("1111000010101---", Id::BAR, Type::Trivial, "BAR"),
- INST("1110111110011---", Id::MEMBAR, Type::Trivial, "MEMBAR"),
- INST("01011111--------", Id::VMAD, Type::Video, "VMAD"),
- INST("0101000011110---", Id::VSETP, Type::Video, "VSETP"),
- INST("0011101---------", Id::VMNMX, Type::Video, "VMNMX"),
- INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"),
- INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"),
- INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"),
- INST("010110011-------", Id::FFMA_RR, Type::Ffma, "FFMA_RR"),
- INST("0100110001011---", Id::FADD_C, Type::Arithmetic, "FADD_C"),
- INST("0101110001011---", Id::FADD_R, Type::Arithmetic, "FADD_R"),
- INST("0011100-01011---", Id::FADD_IMM, Type::Arithmetic, "FADD_IMM"),
- INST("000010----------", Id::FADD32I, Type::ArithmeticImmediate, "FADD32I"),
- INST("0100110001101---", Id::FMUL_C, Type::Arithmetic, "FMUL_C"),
- INST("0101110001101---", Id::FMUL_R, Type::Arithmetic, "FMUL_R"),
- INST("0011100-01101---", Id::FMUL_IMM, Type::Arithmetic, "FMUL_IMM"),
- INST("00011110--------", Id::FMUL32_IMM, Type::ArithmeticImmediate, "FMUL32_IMM"),
- INST("0100110000010---", Id::IADD_C, Type::ArithmeticInteger, "IADD_C"),
- INST("0101110000010---", Id::IADD_R, Type::ArithmeticInteger, "IADD_R"),
- INST("0011100-00010---", Id::IADD_IMM, Type::ArithmeticInteger, "IADD_IMM"),
- INST("010011001100----", Id::IADD3_C, Type::ArithmeticInteger, "IADD3_C"),
- INST("010111001100----", Id::IADD3_R, Type::ArithmeticInteger, "IADD3_R"),
- INST("0011100-1100----", Id::IADD3_IMM, Type::ArithmeticInteger, "IADD3_IMM"),
- INST("0001110---------", Id::IADD32I, Type::ArithmeticIntegerImmediate, "IADD32I"),
- INST("0100110000011---", Id::ISCADD_C, Type::ArithmeticInteger, "ISCADD_C"),
- INST("0101110000011---", Id::ISCADD_R, Type::ArithmeticInteger, "ISCADD_R"),
- INST("0011100-00011---", Id::ISCADD_IMM, Type::ArithmeticInteger, "ISCADD_IMM"),
- INST("0100110000001---", Id::POPC_C, Type::ArithmeticInteger, "POPC_C"),
- INST("0101110000001---", Id::POPC_R, Type::ArithmeticInteger, "POPC_R"),
- INST("0011100-00001---", Id::POPC_IMM, Type::ArithmeticInteger, "POPC_IMM"),
- INST("0100110010100---", Id::SEL_C, Type::ArithmeticInteger, "SEL_C"),
- INST("0101110010100---", Id::SEL_R, Type::ArithmeticInteger, "SEL_R"),
- INST("0011100-10100---", Id::SEL_IMM, Type::ArithmeticInteger, "SEL_IMM"),
- INST("010100110100----", Id::ICMP_RC, Type::ArithmeticInteger, "ICMP_RC"),
- INST("010110110100----", Id::ICMP_R, Type::ArithmeticInteger, "ICMP_R"),
- INST("010010110100----", Id::ICMP_CR, Type::ArithmeticInteger, "ICMP_CR"),
- INST("0011011-0100----", Id::ICMP_IMM, Type::ArithmeticInteger, "ICMP_IMM"),
- INST("0101110000110---", Id::FLO_R, Type::ArithmeticInteger, "FLO_R"),
- INST("0100110000110---", Id::FLO_C, Type::ArithmeticInteger, "FLO_C"),
- INST("0011100-00110---", Id::FLO_IMM, Type::ArithmeticInteger, "FLO_IMM"),
- INST("0101101111011---", Id::LEA_R2, Type::ArithmeticInteger, "LEA_R2"),
- INST("0101101111010---", Id::LEA_R1, Type::ArithmeticInteger, "LEA_R1"),
- INST("001101101101----", Id::LEA_IMM, Type::ArithmeticInteger, "LEA_IMM"),
- INST("010010111101----", Id::LEA_RZ, Type::ArithmeticInteger, "LEA_RZ"),
- INST("00011000--------", Id::LEA_HI, Type::ArithmeticInteger, "LEA_HI"),
- INST("0111101-1-------", Id::HADD2_C, Type::ArithmeticHalf, "HADD2_C"),
- INST("0101110100010---", Id::HADD2_R, Type::ArithmeticHalf, "HADD2_R"),
- INST("0111101-0-------", Id::HADD2_IMM, Type::ArithmeticHalfImmediate, "HADD2_IMM"),
- INST("0111100-1-------", Id::HMUL2_C, Type::ArithmeticHalf, "HMUL2_C"),
- INST("0101110100001---", Id::HMUL2_R, Type::ArithmeticHalf, "HMUL2_R"),
- INST("0111100-0-------", Id::HMUL2_IMM, Type::ArithmeticHalfImmediate, "HMUL2_IMM"),
- INST("01110---1-------", Id::HFMA2_CR, Type::Hfma2, "HFMA2_CR"),
- INST("01100---1-------", Id::HFMA2_RC, Type::Hfma2, "HFMA2_RC"),
- INST("0101110100000---", Id::HFMA2_RR, Type::Hfma2, "HFMA2_RR"),
- INST("01110---0-------", Id::HFMA2_IMM_R, Type::Hfma2, "HFMA2_R_IMM"),
- INST("0111111-1-------", Id::HSETP2_C, Type::HalfSetPredicate, "HSETP2_C"),
- INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"),
- INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"),
- INST("0111110-1-------", Id::HSET2_C, Type::HalfSet, "HSET2_C"),
- INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"),
- INST("0111110-0-------", Id::HSET2_IMM, Type::HalfSet, "HSET2_IMM"),
- INST("010110111010----", Id::FCMP_RR, Type::Arithmetic, "FCMP_RR"),
- INST("010010111010----", Id::FCMP_RC, Type::Arithmetic, "FCMP_RC"),
- INST("0011011-1010----", Id::FCMP_IMMR, Type::Arithmetic, "FCMP_IMMR"),
- INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
- INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"),
- INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"),
- INST("0011100-10010---", Id::RRO_IMM, Type::Arithmetic, "RRO_IMM"),
- INST("0100110010101---", Id::F2F_C, Type::Conversion, "F2F_C"),
- INST("0101110010101---", Id::F2F_R, Type::Conversion, "F2F_R"),
- INST("0011100-10101---", Id::F2F_IMM, Type::Conversion, "F2F_IMM"),
- INST("0100110010110---", Id::F2I_C, Type::Conversion, "F2I_C"),
- INST("0101110010110---", Id::F2I_R, Type::Conversion, "F2I_R"),
- INST("0011100-10110---", Id::F2I_IMM, Type::Conversion, "F2I_IMM"),
- INST("0100110010011---", Id::MOV_C, Type::Arithmetic, "MOV_C"),
- INST("0101110010011---", Id::MOV_R, Type::Arithmetic, "MOV_R"),
- INST("0011100-10011---", Id::MOV_IMM, Type::Arithmetic, "MOV_IMM"),
- INST("1111000011001---", Id::S2R, Type::Trivial, "S2R"),
- INST("000000010000----", Id::MOV32_IMM, Type::ArithmeticImmediate, "MOV32_IMM"),
- INST("0100110001100---", Id::FMNMX_C, Type::Arithmetic, "FMNMX_C"),
- INST("0101110001100---", Id::FMNMX_R, Type::Arithmetic, "FMNMX_R"),
- INST("0011100-01100---", Id::FMNMX_IMM, Type::Arithmetic, "FMNMX_IMM"),
- INST("0100110000100---", Id::IMNMX_C, Type::ArithmeticInteger, "IMNMX_C"),
- INST("0101110000100---", Id::IMNMX_R, Type::ArithmeticInteger, "IMNMX_R"),
- INST("0011100-00100---", Id::IMNMX_IMM, Type::ArithmeticInteger, "IMNMX_IMM"),
- INST("0100110000000---", Id::BFE_C, Type::Bfe, "BFE_C"),
- INST("0101110000000---", Id::BFE_R, Type::Bfe, "BFE_R"),
- INST("0011100-00000---", Id::BFE_IMM, Type::Bfe, "BFE_IMM"),
- INST("0101001111110---", Id::BFI_RC, Type::Bfi, "BFI_RC"),
- INST("0011011-11110---", Id::BFI_IMM_R, Type::Bfi, "BFI_IMM_R"),
- INST("0100110001000---", Id::LOP_C, Type::ArithmeticInteger, "LOP_C"),
- INST("0101110001000---", Id::LOP_R, Type::ArithmeticInteger, "LOP_R"),
- INST("0011100-01000---", Id::LOP_IMM, Type::ArithmeticInteger, "LOP_IMM"),
- INST("000001----------", Id::LOP32I, Type::ArithmeticIntegerImmediate, "LOP32I"),
- INST("0000001---------", Id::LOP3_C, Type::ArithmeticInteger, "LOP3_C"),
- INST("0101101111100---", Id::LOP3_R, Type::ArithmeticInteger, "LOP3_R"),
- INST("0011110---------", Id::LOP3_IMM, Type::ArithmeticInteger, "LOP3_IMM"),
- INST("0100110001001---", Id::SHL_C, Type::Shift, "SHL_C"),
- INST("0101110001001---", Id::SHL_R, Type::Shift, "SHL_R"),
- INST("0011100-01001---", Id::SHL_IMM, Type::Shift, "SHL_IMM"),
- INST("0100110000101---", Id::SHR_C, Type::Shift, "SHR_C"),
- INST("0101110000101---", Id::SHR_R, Type::Shift, "SHR_R"),
- INST("0011100-00101---", Id::SHR_IMM, Type::Shift, "SHR_IMM"),
- INST("0101110011111---", Id::SHF_RIGHT_R, Type::Shift, "SHF_RIGHT_R"),
- INST("0011100-11111---", Id::SHF_RIGHT_IMM, Type::Shift, "SHF_RIGHT_IMM"),
- INST("0101101111111---", Id::SHF_LEFT_R, Type::Shift, "SHF_LEFT_R"),
- INST("0011011-11111---", Id::SHF_LEFT_IMM, Type::Shift, "SHF_LEFT_IMM"),
- INST("0100110011100---", Id::I2I_C, Type::Conversion, "I2I_C"),
- INST("0101110011100---", Id::I2I_R, Type::Conversion, "I2I_R"),
- INST("0011100-11100---", Id::I2I_IMM, Type::Conversion, "I2I_IMM"),
- INST("0100110010111---", Id::I2F_C, Type::Conversion, "I2F_C"),
- INST("0101110010111---", Id::I2F_R, Type::Conversion, "I2F_R"),
- INST("0011100-10111---", Id::I2F_IMM, Type::Conversion, "I2F_IMM"),
- INST("01011000--------", Id::FSET_R, Type::FloatSet, "FSET_R"),
- INST("0100100---------", Id::FSET_C, Type::FloatSet, "FSET_C"),
- INST("0011000---------", Id::FSET_IMM, Type::FloatSet, "FSET_IMM"),
- INST("010010111011----", Id::FSETP_C, Type::FloatSetPredicate, "FSETP_C"),
- INST("010110111011----", Id::FSETP_R, Type::FloatSetPredicate, "FSETP_R"),
- INST("0011011-1011----", Id::FSETP_IMM, Type::FloatSetPredicate, "FSETP_IMM"),
- INST("010010110110----", Id::ISETP_C, Type::IntegerSetPredicate, "ISETP_C"),
- INST("010110110110----", Id::ISETP_R, Type::IntegerSetPredicate, "ISETP_R"),
- INST("0011011-0110----", Id::ISETP_IMM, Type::IntegerSetPredicate, "ISETP_IMM"),
- INST("010110110101----", Id::ISET_R, Type::IntegerSet, "ISET_R"),
- INST("010010110101----", Id::ISET_C, Type::IntegerSet, "ISET_C"),
- INST("0011011-0101----", Id::ISET_IMM, Type::IntegerSet, "ISET_IMM"),
- INST("0101000010001---", Id::PSET, Type::PredicateSetRegister, "PSET"),
- INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"),
- INST("010100001010----", Id::CSETP, Type::PredicateSetPredicate, "CSETP"),
- INST("0011100-11110---", Id::R2P_IMM, Type::RegisterSetPredicate, "R2P_IMM"),
- INST("0011100-11101---", Id::P2R_IMM, Type::RegisterSetPredicate, "P2R_IMM"),
- INST("0011011-00------", Id::XMAD_IMM, Type::Xmad, "XMAD_IMM"),
- INST("0100111---------", Id::XMAD_CR, Type::Xmad, "XMAD_CR"),
- INST("010100010-------", Id::XMAD_RC, Type::Xmad, "XMAD_RC"),
- INST("0101101100------", Id::XMAD_RR, Type::Xmad, "XMAD_RR"),
- };
-#undef INST
- std::stable_sort(table.begin(), table.end(), [](const auto& a, const auto& b) {
- // If a matcher has more bits in its mask it is more specific, so it
- // should come first.
- return std::bitset<16>(a.GetMask()).count() > std::bitset<16>(b.GetMask()).count();
- });
-
- return table;
- }
-};
-
-} // namespace Tegra::Shader
diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h
deleted file mode 100644
index e0d7b89c5..000000000
--- a/src/video_core/engines/shader_header.h
+++ /dev/null
@@ -1,158 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <array>
-#include <optional>
-
-#include "common/bit_field.h"
-#include "common/common_funcs.h"
-#include "common/common_types.h"
-
-namespace Tegra::Shader {
-
-enum class OutputTopology : u32 {
- PointList = 1,
- LineStrip = 6,
- TriangleStrip = 7,
-};
-
-enum class PixelImap : u8 {
- Unused = 0,
- Constant = 1,
- Perspective = 2,
- ScreenLinear = 3,
-};
-
-// Documentation in:
-// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html
-struct Header {
- union {
- BitField<0, 5, u32> sph_type;
- BitField<5, 5, u32> version;
- BitField<10, 4, u32> shader_type;
- BitField<14, 1, u32> mrt_enable;
- BitField<15, 1, u32> kills_pixels;
- BitField<16, 1, u32> does_global_store;
- BitField<17, 4, u32> sass_version;
- BitField<21, 5, u32> reserved;
- BitField<26, 1, u32> does_load_or_store;
- BitField<27, 1, u32> does_fp64;
- BitField<28, 4, u32> stream_out_mask;
- } common0;
-
- union {
- BitField<0, 24, u32> shader_local_memory_low_size;
- BitField<24, 8, u32> per_patch_attribute_count;
- } common1;
-
- union {
- BitField<0, 24, u32> shader_local_memory_high_size;
- BitField<24, 8, u32> threads_per_input_primitive;
- } common2;
-
- union {
- BitField<0, 24, u32> shader_local_memory_crs_size;
- BitField<24, 4, OutputTopology> output_topology;
- BitField<28, 4, u32> reserved;
- } common3;
-
- union {
- BitField<0, 12, u32> max_output_vertices;
- BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders.
- BitField<20, 4, u32> reserved;
- BitField<24, 8, u32> store_req_end; // NOTE: not used by geometry shaders.
- } common4;
-
- union {
- struct {
- INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA
- INSERT_PADDING_BYTES_NOINIT(1); // ImapSystemValuesB
- INSERT_PADDING_BYTES_NOINIT(16); // ImapGenericVector[32]
- INSERT_PADDING_BYTES_NOINIT(2); // ImapColor
- union {
- BitField<0, 8, u16> clip_distances;
- BitField<8, 1, u16> point_sprite_s;
- BitField<9, 1, u16> point_sprite_t;
- BitField<10, 1, u16> fog_coordinate;
- BitField<12, 1, u16> tessellation_eval_point_u;
- BitField<13, 1, u16> tessellation_eval_point_v;
- BitField<14, 1, u16> instance_id;
- BitField<15, 1, u16> vertex_id;
- };
- INSERT_PADDING_BYTES_NOINIT(5); // ImapFixedFncTexture[10]
- INSERT_PADDING_BYTES_NOINIT(1); // ImapReserved
- INSERT_PADDING_BYTES_NOINIT(3); // OmapSystemValuesA
- INSERT_PADDING_BYTES_NOINIT(1); // OmapSystemValuesB
- INSERT_PADDING_BYTES_NOINIT(16); // OmapGenericVector[32]
- INSERT_PADDING_BYTES_NOINIT(2); // OmapColor
- INSERT_PADDING_BYTES_NOINIT(2); // OmapSystemValuesC
- INSERT_PADDING_BYTES_NOINIT(5); // OmapFixedFncTexture[10]
- INSERT_PADDING_BYTES_NOINIT(1); // OmapReserved
- } vtg;
-
- struct {
- INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA
- INSERT_PADDING_BYTES_NOINIT(1); // ImapSystemValuesB
-
- union {
- BitField<0, 2, PixelImap> x;
- BitField<2, 2, PixelImap> y;
- BitField<4, 2, PixelImap> z;
- BitField<6, 2, PixelImap> w;
- u8 raw;
- } imap_generic_vector[32];
-
- INSERT_PADDING_BYTES_NOINIT(2); // ImapColor
- INSERT_PADDING_BYTES_NOINIT(2); // ImapSystemValuesC
- INSERT_PADDING_BYTES_NOINIT(10); // ImapFixedFncTexture[10]
- INSERT_PADDING_BYTES_NOINIT(2); // ImapReserved
-
- struct {
- u32 target;
- union {
- BitField<0, 1, u32> sample_mask;
- BitField<1, 1, u32> depth;
- BitField<2, 30, u32> reserved;
- };
- } omap;
-
- bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const {
- const u32 bit = render_target * 4 + component;
- return omap.target & (1 << bit);
- }
-
- PixelImap GetPixelImap(u32 attribute) const {
- const auto get_index = [this, attribute](u32 index) {
- return static_cast<PixelImap>(
- (imap_generic_vector[attribute].raw >> (index * 2)) & 3);
- };
-
- std::optional<PixelImap> result;
- for (u32 component = 0; component < 4; ++component) {
- const PixelImap index = get_index(component);
- if (index == PixelImap::Unused) {
- continue;
- }
- if (result && result != index) {
- LOG_CRITICAL(HW_GPU, "Generic attribute conflict in interpolation mode");
- }
- result = index;
- }
- return result.value_or(PixelImap::Unused);
- }
- } ps;
-
- std::array<u32, 0xF> raw;
- };
-
- u64 GetLocalMemorySize() const {
- return (common1.shader_local_memory_low_size |
- (common2.shader_local_memory_high_size << 24));
- }
-};
-static_assert(sizeof(Header) == 0x50, "Incorrect structure size");
-
-} // namespace Tegra::Shader
diff --git a/src/video_core/engines/shader_type.h b/src/video_core/engines/shader_type.h
deleted file mode 100644
index 49ce5cde5..000000000
--- a/src/video_core/engines/shader_type.h
+++ /dev/null
@@ -1,21 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include "common/common_types.h"
-
-namespace Tegra::Engines {
-
-enum class ShaderType : u32 {
- Vertex = 0,
- TesselationControl = 1,
- TesselationEval = 2,
- Geometry = 3,
- Fragment = 4,
- Compute = 5,
-};
-static constexpr std::size_t MaxShaderTypes = 6;
-
-} // namespace Tegra::Engines
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h
index f055b61e9..34dc6c596 100644
--- a/src/video_core/fence_manager.h
+++ b/src/video_core/fence_manager.h
@@ -8,6 +8,7 @@
#include <queue>
#include "common/common_types.h"
+#include "common/settings.h"
#include "core/core.h"
#include "video_core/delayed_destruction_ring.h"
#include "video_core/gpu.h"
@@ -53,6 +54,12 @@ public:
delayed_destruction_ring.Tick();
}
+ // Unlike other fences, this one doesn't
+ void SignalOrdering() {
+ std::scoped_lock lock{buffer_cache.mutex};
+ buffer_cache.AccumulateFlushes();
+ }
+
void SignalSemaphore(GPUVAddr addr, u32 value) {
TryReleasePendingFences();
const bool should_flush = ShouldFlush();
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 35cc561be..ff024f530 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -50,6 +50,7 @@ void GPU::BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer_) {
maxwell_3d->BindRasterizer(rasterizer);
fermi_2d->BindRasterizer(rasterizer);
kepler_compute->BindRasterizer(rasterizer);
+ maxwell_dma->BindRasterizer(rasterizer);
}
Engines::Maxwell3D& GPU::Maxwell3D() {
@@ -268,11 +269,13 @@ void GPU::CallPullerMethod(const MethodCall& method_call) {
case BufferMethods::SemaphoreAddressHigh:
case BufferMethods::SemaphoreAddressLow:
case BufferMethods::SemaphoreSequence:
- case BufferMethods::RefCnt:
case BufferMethods::UnkCacheFlush:
case BufferMethods::WrcacheFlush:
case BufferMethods::FenceValue:
break;
+ case BufferMethods::RefCnt:
+ rasterizer->SignalReference();
+ break;
case BufferMethods::FenceAction:
ProcessFenceActionMethod();
break;
diff --git a/src/video_core/guest_driver.cpp b/src/video_core/guest_driver.cpp
deleted file mode 100644
index f058f2744..000000000
--- a/src/video_core/guest_driver.cpp
+++ /dev/null
@@ -1,37 +0,0 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <limits>
-#include <vector>
-
-#include "common/common_types.h"
-#include "video_core/guest_driver.h"
-
-namespace VideoCore {
-
-void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32> bound_offsets) {
- if (texture_handler_size) {
- return;
- }
- const std::size_t size = bound_offsets.size();
- if (size < 2) {
- return;
- }
- std::sort(bound_offsets.begin(), bound_offsets.end(), std::less{});
- u32 min_val = std::numeric_limits<u32>::max();
- for (std::size_t i = 1; i < size; ++i) {
- if (bound_offsets[i] == bound_offsets[i - 1]) {
- continue;
- }
- const u32 new_min = bound_offsets[i] - bound_offsets[i - 1];
- min_val = std::min(min_val, new_min);
- }
- if (min_val > 2) {
- return;
- }
- texture_handler_size = min_texture_handler_size * min_val;
-}
-
-} // namespace VideoCore
diff --git a/src/video_core/guest_driver.h b/src/video_core/guest_driver.h
deleted file mode 100644
index 21e569ba1..000000000
--- a/src/video_core/guest_driver.h
+++ /dev/null
@@ -1,46 +0,0 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <optional>
-#include <vector>
-
-#include "common/common_types.h"
-
-namespace VideoCore {
-
-/**
- * The GuestDriverProfile class is used to learn about the GPU drivers behavior and collect
- * information necessary for impossible to avoid HLE methods like shader tracks as they are
- * Entscheidungsproblems.
- */
-class GuestDriverProfile {
-public:
- explicit GuestDriverProfile() = default;
- explicit GuestDriverProfile(std::optional<u32> texture_handler_size_)
- : texture_handler_size{texture_handler_size_} {}
-
- void DeduceTextureHandlerSize(std::vector<u32> bound_offsets);
-
- u32 GetTextureHandlerSize() const {
- return texture_handler_size.value_or(default_texture_handler_size);
- }
-
- bool IsTextureHandlerSizeKnown() const {
- return texture_handler_size.has_value();
- }
-
-private:
- // Minimum size of texture handler any driver can use.
- static constexpr u32 min_texture_handler_size = 4;
-
- // This goes with Vulkan and OpenGL standards but Nvidia GPUs can easily use 4 bytes instead.
- // Thus, certain drivers may squish the size.
- static constexpr u32 default_texture_handler_size = 8;
-
- std::optional<u32> texture_handler_size = default_texture_handler_size;
-};
-
-} // namespace VideoCore
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index d2b9d5f2b..882eff880 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -69,7 +69,6 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
} else {
UNREACHABLE_MSG("Unmapping non-existent GPU address=0x{:x}", gpu_addr);
}
-
const auto submapped_ranges = GetSubmappedRange(gpu_addr, size);
for (const auto& map : submapped_ranges) {
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 0cec4225b..b094fc064 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -11,11 +11,13 @@
#include "common/common_types.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/gpu.h"
-#include "video_core/guest_driver.h"
namespace Tegra {
class MemoryManager;
+namespace Engines {
+class AccelerateDMAInterface;
}
+} // namespace Tegra
namespace VideoCore {
@@ -42,7 +44,7 @@ public:
virtual void Clear() = 0;
/// Dispatches a compute shader invocation
- virtual void DispatchCompute(GPUVAddr code_addr) = 0;
+ virtual void DispatchCompute() = 0;
/// Resets the counter of a query
virtual void ResetCounter(QueryType type) = 0;
@@ -63,6 +65,9 @@ public:
/// Signal a GPU based syncpoint as a fence
virtual void SignalSyncPoint(u32 value) = 0;
+ /// Signal a GPU based reference as point
+ virtual void SignalReference() = 0;
+
/// Release all pending fences.
virtual void ReleaseFences() = 0;
@@ -116,6 +121,8 @@ public:
return false;
}
+ [[nodiscard]] virtual Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() = 0;
+
/// Attempt to use a faster method to display the framebuffer to screen
[[nodiscard]] virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& config,
VAddr framebuffer_addr, u32 pixel_stride) {
@@ -128,18 +135,5 @@ public:
/// Initialize disk cached resources for the game being emulated
virtual void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
const DiskResourceLoadCallback& callback) {}
-
- /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver.
- [[nodiscard]] GuestDriverProfile& AccessGuestDriverProfile() {
- return guest_driver_profile;
- }
-
- /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver.
- [[nodiscard]] const GuestDriverProfile& AccessGuestDriverProfile() const {
- return guest_driver_profile;
- }
-
-private:
- GuestDriverProfile guest_driver_profile{};
};
} // namespace VideoCore
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
deleted file mode 100644
index e8d8d2aa5..000000000
--- a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
+++ /dev/null
@@ -1,2124 +0,0 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <array>
-#include <cstddef>
-#include <string>
-#include <string_view>
-#include <utility>
-#include <variant>
-
-#include <fmt/format.h>
-
-#include "common/alignment.h"
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/renderer_opengl/gl_arb_decompiler.h"
-#include "video_core/renderer_opengl/gl_device.h"
-#include "video_core/shader/registry.h"
-#include "video_core/shader/shader_ir.h"
-
-// Predicates in the decompiled code follow the convention that -1 means true and 0 means false.
-// GLASM lacks booleans, so they have to be implemented as integers.
-// Using -1 for true is useful because both CMP.S and NOT.U can negate it, and CMP.S can be used to
-// select between two values, because -1 will be evaluated as true and 0 as false.
-
-namespace OpenGL {
-
-namespace {
-
-using Tegra::Engines::ShaderType;
-using Tegra::Shader::Attribute;
-using Tegra::Shader::PixelImap;
-using Tegra::Shader::Register;
-using namespace VideoCommon::Shader;
-using Operation = const OperationNode&;
-
-constexpr std::array INTERNAL_FLAG_NAMES = {"ZERO", "SIGN", "CARRY", "OVERFLOW"};
-
-char Swizzle(std::size_t component) {
- static constexpr std::string_view SWIZZLE{"xyzw"};
- return SWIZZLE.at(component);
-}
-
-constexpr bool IsGenericAttribute(Attribute::Index index) {
- return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31;
-}
-
-u32 GetGenericAttributeIndex(Attribute::Index index) {
- ASSERT(IsGenericAttribute(index));
- return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0);
-}
-
-std::string_view Modifiers(Operation operation) {
- const auto meta = std::get_if<MetaArithmetic>(&operation.GetMeta());
- if (meta && meta->precise) {
- return ".PREC";
- }
- return "";
-}
-
-std::string_view GetInputFlags(PixelImap attribute) {
- switch (attribute) {
- case PixelImap::Perspective:
- return "";
- case PixelImap::Constant:
- return "FLAT ";
- case PixelImap::ScreenLinear:
- return "NOPERSPECTIVE ";
- case PixelImap::Unused:
- break;
- }
- UNIMPLEMENTED_MSG("Unknown attribute usage index={}", attribute);
- return {};
-}
-
-std::string_view ImageType(Tegra::Shader::ImageType image_type) {
- switch (image_type) {
- case Tegra::Shader::ImageType::Texture1D:
- return "1D";
- case Tegra::Shader::ImageType::TextureBuffer:
- return "BUFFER";
- case Tegra::Shader::ImageType::Texture1DArray:
- return "ARRAY1D";
- case Tegra::Shader::ImageType::Texture2D:
- return "2D";
- case Tegra::Shader::ImageType::Texture2DArray:
- return "ARRAY2D";
- case Tegra::Shader::ImageType::Texture3D:
- return "3D";
- }
- UNREACHABLE();
- return {};
-}
-
-std::string_view StackName(MetaStackClass stack) {
- switch (stack) {
- case MetaStackClass::Ssy:
- return "SSY";
- case MetaStackClass::Pbk:
- return "PBK";
- }
- UNREACHABLE();
- return "";
-};
-
-std::string_view PrimitiveDescription(Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology topology) {
- switch (topology) {
- case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Points:
- return "POINTS";
- case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Lines:
- case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStrip:
- return "LINES";
- case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LinesAdjacency:
- case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStripAdjacency:
- return "LINES_ADJACENCY";
- case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Triangles:
- case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStrip:
- case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleFan:
- return "TRIANGLES";
- case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency:
- case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency:
- return "TRIANGLES_ADJACENCY";
- default:
- UNIMPLEMENTED_MSG("topology={}", topology);
- return "POINTS";
- }
-}
-
-std::string_view TopologyName(Tegra::Shader::OutputTopology topology) {
- switch (topology) {
- case Tegra::Shader::OutputTopology::PointList:
- return "POINTS";
- case Tegra::Shader::OutputTopology::LineStrip:
- return "LINE_STRIP";
- case Tegra::Shader::OutputTopology::TriangleStrip:
- return "TRIANGLE_STRIP";
- default:
- UNIMPLEMENTED_MSG("Unknown output topology: {}", topology);
- return "points";
- }
-}
-
-std::string_view StageInputName(ShaderType stage) {
- switch (stage) {
- case ShaderType::Vertex:
- case ShaderType::Geometry:
- return "vertex";
- case ShaderType::Fragment:
- return "fragment";
- case ShaderType::Compute:
- return "invocation";
- default:
- UNREACHABLE();
- return "";
- }
-}
-
-std::string TextureType(const MetaTexture& meta) {
- if (meta.sampler.is_buffer) {
- return "BUFFER";
- }
- std::string type;
- if (meta.sampler.is_shadow) {
- type += "SHADOW";
- }
- if (meta.sampler.is_array) {
- type += "ARRAY";
- }
- type += [&meta] {
- switch (meta.sampler.type) {
- case Tegra::Shader::TextureType::Texture1D:
- return "1D";
- case Tegra::Shader::TextureType::Texture2D:
- return "2D";
- case Tegra::Shader::TextureType::Texture3D:
- return "3D";
- case Tegra::Shader::TextureType::TextureCube:
- return "CUBE";
- }
- UNREACHABLE();
- return "2D";
- }();
- return type;
-}
-
-class ARBDecompiler final {
-public:
- explicit ARBDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_,
- ShaderType stage_, std::string_view identifier);
-
- std::string Code() const {
- return shader_source;
- }
-
-private:
- void DefineGlobalMemory();
-
- void DeclareHeader();
- void DeclareVertex();
- void DeclareGeometry();
- void DeclareFragment();
- void DeclareCompute();
- void DeclareInputAttributes();
- void DeclareOutputAttributes();
- void DeclareLocalMemory();
- void DeclareGlobalMemory();
- void DeclareConstantBuffers();
- void DeclareRegisters();
- void DeclareTemporaries();
- void DeclarePredicates();
- void DeclareInternalFlags();
-
- void InitializeVariables();
-
- void DecompileAST();
- void DecompileBranchMode();
-
- void VisitAST(const ASTNode& node);
- std::string VisitExpression(const Expr& node);
-
- void VisitBlock(const NodeBlock& bb);
-
- std::string Visit(const Node& node);
-
- std::tuple<std::string, std::string, std::size_t> BuildCoords(Operation);
- std::string BuildAoffi(Operation);
- std::string GlobalMemoryPointer(const GmemNode& gmem);
- void Exit();
-
- std::string Assign(Operation);
- std::string Select(Operation);
- std::string FClamp(Operation);
- std::string FCastHalf0(Operation);
- std::string FCastHalf1(Operation);
- std::string FSqrt(Operation);
- std::string FSwizzleAdd(Operation);
- std::string HAdd2(Operation);
- std::string HMul2(Operation);
- std::string HFma2(Operation);
- std::string HAbsolute(Operation);
- std::string HNegate(Operation);
- std::string HClamp(Operation);
- std::string HCastFloat(Operation);
- std::string HUnpack(Operation);
- std::string HMergeF32(Operation);
- std::string HMergeH0(Operation);
- std::string HMergeH1(Operation);
- std::string HPack2(Operation);
- std::string LogicalAssign(Operation);
- std::string LogicalPick2(Operation);
- std::string LogicalAnd2(Operation);
- std::string FloatOrdered(Operation);
- std::string FloatUnordered(Operation);
- std::string LogicalAddCarry(Operation);
- std::string Texture(Operation);
- std::string TextureGather(Operation);
- std::string TextureQueryDimensions(Operation);
- std::string TextureQueryLod(Operation);
- std::string TexelFetch(Operation);
- std::string TextureGradient(Operation);
- std::string ImageLoad(Operation);
- std::string ImageStore(Operation);
- std::string Branch(Operation);
- std::string BranchIndirect(Operation);
- std::string PushFlowStack(Operation);
- std::string PopFlowStack(Operation);
- std::string Exit(Operation);
- std::string Discard(Operation);
- std::string EmitVertex(Operation);
- std::string EndPrimitive(Operation);
- std::string InvocationId(Operation);
- std::string YNegate(Operation);
- std::string ThreadId(Operation);
- std::string ShuffleIndexed(Operation);
- std::string Barrier(Operation);
- std::string MemoryBarrierGroup(Operation);
- std::string MemoryBarrierGlobal(Operation);
-
- template <const std::string_view& op>
- std::string Unary(Operation operation) {
- std::string temporary = AllocTemporary();
- AddLine("{}{} {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]));
- return temporary;
- }
-
- template <const std::string_view& op>
- std::string Binary(Operation operation) {
- std::string temporary = AllocTemporary();
- AddLine("{}{} {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]),
- Visit(operation[1]));
- return temporary;
- }
-
- template <const std::string_view& op>
- std::string Trinary(Operation operation) {
- std::string temporary = AllocTemporary();
- AddLine("{}{} {}, {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]),
- Visit(operation[1]), Visit(operation[2]));
- return temporary;
- }
-
- template <const std::string_view& op, bool unordered>
- std::string FloatComparison(Operation operation) {
- std::string temporary = AllocTemporary();
- AddLine("TRUNC.U.CC RC.x, {};", Binary<op>(operation));
- AddLine("MOV.S {}, 0;", temporary);
- AddLine("MOV.S {} (NE.x), -1;", temporary);
-
- const std::string op_a = Visit(operation[0]);
- const std::string op_b = Visit(operation[1]);
- if constexpr (unordered) {
- AddLine("SNE.F RC.x, {}, {};", op_a, op_a);
- AddLine("TRUNC.U.CC RC.x, RC.x;");
- AddLine("MOV.S {} (NE.x), -1;", temporary);
- AddLine("SNE.F RC.x, {}, {};", op_b, op_b);
- AddLine("TRUNC.U.CC RC.x, RC.x;");
- AddLine("MOV.S {} (NE.x), -1;", temporary);
- } else if (op == SNE_F) {
- AddLine("SNE.F RC.x, {}, {};", op_a, op_a);
- AddLine("TRUNC.U.CC RC.x, RC.x;");
- AddLine("MOV.S {} (NE.x), 0;", temporary);
- AddLine("SNE.F RC.x, {}, {};", op_b, op_b);
- AddLine("TRUNC.U.CC RC.x, RC.x;");
- AddLine("MOV.S {} (NE.x), 0;", temporary);
- }
- return temporary;
- }
-
- template <const std::string_view& op, bool is_nan>
- std::string HalfComparison(Operation operation) {
- std::string tmp1 = AllocVectorTemporary();
- const std::string tmp2 = AllocVectorTemporary();
- const std::string op_a = Visit(operation[0]);
- const std::string op_b = Visit(operation[1]);
- AddLine("UP2H.F {}, {};", tmp1, op_a);
- AddLine("UP2H.F {}, {};", tmp2, op_b);
- AddLine("{} {}, {}, {};", op, tmp1, tmp1, tmp2);
- AddLine("TRUNC.U.CC RC.xy, {};", tmp1);
- AddLine("MOV.S {}.xy, {{0, 0, 0, 0}};", tmp1);
- AddLine("MOV.S {}.x (NE.x), -1;", tmp1);
- AddLine("MOV.S {}.y (NE.y), -1;", tmp1);
- if constexpr (is_nan) {
- AddLine("MOVC.F RC.x, {};", op_a);
- AddLine("MOV.S {}.x (NAN.x), -1;", tmp1);
- AddLine("MOVC.F RC.x, {};", op_b);
- AddLine("MOV.S {}.y (NAN.x), -1;", tmp1);
- }
- return tmp1;
- }
-
- template <const std::string_view& op, const std::string_view& type>
- std::string AtomicImage(Operation operation) {
- const auto& meta = std::get<MetaImage>(operation.GetMeta());
- const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index;
- const std::size_t num_coords = operation.GetOperandsCount();
- const std::size_t num_values = meta.values.size();
-
- const std::string coord = AllocVectorTemporary();
- const std::string value = AllocVectorTemporary();
- for (std::size_t i = 0; i < num_coords; ++i) {
- AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i]));
- }
- for (std::size_t i = 0; i < num_values; ++i) {
- AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i]));
- }
-
- AddLine("ATOMIM.{}.{} {}.x, {}, {}, image[{}], {};", op, type, coord, value, coord,
- image_id, ImageType(meta.image.type));
- return fmt::format("{}.x", coord);
- }
-
- template <const std::string_view& op, const std::string_view& type>
- std::string Atomic(Operation operation) {
- std::string temporary = AllocTemporary();
- std::string address;
- std::string_view opname;
- bool robust = false;
- if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) {
- address = GlobalMemoryPointer(*gmem);
- opname = "ATOM";
- robust = true;
- } else if (const auto smem = std::get_if<SmemNode>(&*operation[0])) {
- address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress()));
- opname = "ATOMS";
- } else {
- UNREACHABLE();
- return "{0, 0, 0, 0}";
- }
- if (robust) {
- AddLine("IF NE.x;");
- }
- AddLine("{}.{}.{} {}, {}, {};", opname, op, type, temporary, Visit(operation[1]), address);
- if (robust) {
- AddLine("ELSE;");
- AddLine("MOV.S {}, 0;", temporary);
- AddLine("ENDIF;");
- }
- return temporary;
- }
-
- template <char type>
- std::string Negate(Operation operation) {
- std::string temporary = AllocTemporary();
- if constexpr (type == 'F') {
- AddLine("MOV.F32 {}, -{};", temporary, Visit(operation[0]));
- } else {
- AddLine("MOV.{} {}, -{};", type, temporary, Visit(operation[0]));
- }
- return temporary;
- }
-
- template <char type>
- std::string Absolute(Operation operation) {
- std::string temporary = AllocTemporary();
- AddLine("MOV.{} {}, |{}|;", type, temporary, Visit(operation[0]));
- return temporary;
- }
-
- template <char type>
- std::string BitfieldInsert(Operation operation) {
- const std::string temporary = AllocVectorTemporary();
- AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[3]));
- AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[2]));
- AddLine("BFI.{} {}.x, {}, {}, {};", type, temporary, temporary, Visit(operation[1]),
- Visit(operation[0]));
- return fmt::format("{}.x", temporary);
- }
-
- template <char type>
- std::string BitfieldExtract(Operation operation) {
- const std::string temporary = AllocVectorTemporary();
- AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[2]));
- AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[1]));
- AddLine("BFE.{} {}.x, {}, {};", type, temporary, temporary, Visit(operation[0]));
- return fmt::format("{}.x", temporary);
- }
-
- template <char swizzle>
- std::string LocalInvocationId(Operation) {
- return fmt::format("invocation.localid.{}", swizzle);
- }
-
- template <char swizzle>
- std::string WorkGroupId(Operation) {
- return fmt::format("invocation.groupid.{}", swizzle);
- }
-
- template <char c1, char c2>
- std::string ThreadMask(Operation) {
- return fmt::format("{}.thread{}{}mask", StageInputName(stage), c1, c2);
- }
-
- template <typename... Args>
- void AddExpression(std::string_view text, Args&&... args) {
- shader_source += fmt::format(fmt::runtime(text), std::forward<Args>(args)...);
- }
-
- template <typename... Args>
- void AddLine(std::string_view text, Args&&... args) {
- AddExpression(text, std::forward<Args>(args)...);
- shader_source += '\n';
- }
-
- std::string AllocLongVectorTemporary() {
- max_long_temporaries = std::max(max_long_temporaries, num_long_temporaries + 1);
- return fmt::format("L{}", num_long_temporaries++);
- }
-
- std::string AllocLongTemporary() {
- return fmt::format("{}.x", AllocLongVectorTemporary());
- }
-
- std::string AllocVectorTemporary() {
- max_temporaries = std::max(max_temporaries, num_temporaries + 1);
- return fmt::format("T{}", num_temporaries++);
- }
-
- std::string AllocTemporary() {
- return fmt::format("{}.x", AllocVectorTemporary());
- }
-
- void ResetTemporaries() noexcept {
- num_temporaries = 0;
- num_long_temporaries = 0;
- }
-
- const Device& device;
- const ShaderIR& ir;
- const Registry& registry;
- const ShaderType stage;
-
- std::size_t num_temporaries = 0;
- std::size_t max_temporaries = 0;
-
- std::size_t num_long_temporaries = 0;
- std::size_t max_long_temporaries = 0;
-
- std::map<GlobalMemoryBase, u32> global_memory_names;
-
- std::string shader_source;
-
- static constexpr std::string_view ADD_F32 = "ADD.F32";
- static constexpr std::string_view ADD_S = "ADD.S";
- static constexpr std::string_view ADD_U = "ADD.U";
- static constexpr std::string_view MUL_F32 = "MUL.F32";
- static constexpr std::string_view MUL_S = "MUL.S";
- static constexpr std::string_view MUL_U = "MUL.U";
- static constexpr std::string_view DIV_F32 = "DIV.F32";
- static constexpr std::string_view DIV_S = "DIV.S";
- static constexpr std::string_view DIV_U = "DIV.U";
- static constexpr std::string_view MAD_F32 = "MAD.F32";
- static constexpr std::string_view RSQ_F32 = "RSQ.F32";
- static constexpr std::string_view COS_F32 = "COS.F32";
- static constexpr std::string_view SIN_F32 = "SIN.F32";
- static constexpr std::string_view EX2_F32 = "EX2.F32";
- static constexpr std::string_view LG2_F32 = "LG2.F32";
- static constexpr std::string_view SLT_F = "SLT.F32";
- static constexpr std::string_view SLT_S = "SLT.S";
- static constexpr std::string_view SLT_U = "SLT.U";
- static constexpr std::string_view SEQ_F = "SEQ.F32";
- static constexpr std::string_view SEQ_S = "SEQ.S";
- static constexpr std::string_view SEQ_U = "SEQ.U";
- static constexpr std::string_view SLE_F = "SLE.F32";
- static constexpr std::string_view SLE_S = "SLE.S";
- static constexpr std::string_view SLE_U = "SLE.U";
- static constexpr std::string_view SGT_F = "SGT.F32";
- static constexpr std::string_view SGT_S = "SGT.S";
- static constexpr std::string_view SGT_U = "SGT.U";
- static constexpr std::string_view SNE_F = "SNE.F32";
- static constexpr std::string_view SNE_S = "SNE.S";
- static constexpr std::string_view SNE_U = "SNE.U";
- static constexpr std::string_view SGE_F = "SGE.F32";
- static constexpr std::string_view SGE_S = "SGE.S";
- static constexpr std::string_view SGE_U = "SGE.U";
- static constexpr std::string_view AND_S = "AND.S";
- static constexpr std::string_view AND_U = "AND.U";
- static constexpr std::string_view TRUNC_F = "TRUNC.F";
- static constexpr std::string_view TRUNC_S = "TRUNC.S";
- static constexpr std::string_view TRUNC_U = "TRUNC.U";
- static constexpr std::string_view SHL_S = "SHL.S";
- static constexpr std::string_view SHL_U = "SHL.U";
- static constexpr std::string_view SHR_S = "SHR.S";
- static constexpr std::string_view SHR_U = "SHR.U";
- static constexpr std::string_view OR_S = "OR.S";
- static constexpr std::string_view OR_U = "OR.U";
- static constexpr std::string_view XOR_S = "XOR.S";
- static constexpr std::string_view XOR_U = "XOR.U";
- static constexpr std::string_view NOT_S = "NOT.S";
- static constexpr std::string_view NOT_U = "NOT.U";
- static constexpr std::string_view BTC_S = "BTC.S";
- static constexpr std::string_view BTC_U = "BTC.U";
- static constexpr std::string_view BTFM_S = "BTFM.S";
- static constexpr std::string_view BTFM_U = "BTFM.U";
- static constexpr std::string_view ROUND_F = "ROUND.F";
- static constexpr std::string_view CEIL_F = "CEIL.F";
- static constexpr std::string_view FLR_F = "FLR.F";
- static constexpr std::string_view I2F_S = "I2F.S";
- static constexpr std::string_view I2F_U = "I2F.U";
- static constexpr std::string_view MIN_F = "MIN.F";
- static constexpr std::string_view MIN_S = "MIN.S";
- static constexpr std::string_view MIN_U = "MIN.U";
- static constexpr std::string_view MAX_F = "MAX.F";
- static constexpr std::string_view MAX_S = "MAX.S";
- static constexpr std::string_view MAX_U = "MAX.U";
- static constexpr std::string_view MOV_U = "MOV.U";
- static constexpr std::string_view TGBALLOT_U = "TGBALLOT.U";
- static constexpr std::string_view TGALL_U = "TGALL.U";
- static constexpr std::string_view TGANY_U = "TGANY.U";
- static constexpr std::string_view TGEQ_U = "TGEQ.U";
- static constexpr std::string_view EXCH = "EXCH";
- static constexpr std::string_view ADD = "ADD";
- static constexpr std::string_view MIN = "MIN";
- static constexpr std::string_view MAX = "MAX";
- static constexpr std::string_view AND = "AND";
- static constexpr std::string_view OR = "OR";
- static constexpr std::string_view XOR = "XOR";
- static constexpr std::string_view U32 = "U32";
- static constexpr std::string_view S32 = "S32";
-
- static constexpr std::size_t NUM_ENTRIES = static_cast<std::size_t>(OperationCode::Amount);
- using DecompilerType = std::string (ARBDecompiler::*)(Operation);
- static constexpr std::array<DecompilerType, NUM_ENTRIES> OPERATION_DECOMPILERS = {
- &ARBDecompiler::Assign,
-
- &ARBDecompiler::Select,
-
- &ARBDecompiler::Binary<ADD_F32>,
- &ARBDecompiler::Binary<MUL_F32>,
- &ARBDecompiler::Binary<DIV_F32>,
- &ARBDecompiler::Trinary<MAD_F32>,
- &ARBDecompiler::Negate<'F'>,
- &ARBDecompiler::Absolute<'F'>,
- &ARBDecompiler::FClamp,
- &ARBDecompiler::FCastHalf0,
- &ARBDecompiler::FCastHalf1,
- &ARBDecompiler::Binary<MIN_F>,
- &ARBDecompiler::Binary<MAX_F>,
- &ARBDecompiler::Unary<COS_F32>,
- &ARBDecompiler::Unary<SIN_F32>,
- &ARBDecompiler::Unary<EX2_F32>,
- &ARBDecompiler::Unary<LG2_F32>,
- &ARBDecompiler::Unary<RSQ_F32>,
- &ARBDecompiler::FSqrt,
- &ARBDecompiler::Unary<ROUND_F>,
- &ARBDecompiler::Unary<FLR_F>,
- &ARBDecompiler::Unary<CEIL_F>,
- &ARBDecompiler::Unary<TRUNC_F>,
- &ARBDecompiler::Unary<I2F_S>,
- &ARBDecompiler::Unary<I2F_U>,
- &ARBDecompiler::FSwizzleAdd,
-
- &ARBDecompiler::Binary<ADD_S>,
- &ARBDecompiler::Binary<MUL_S>,
- &ARBDecompiler::Binary<DIV_S>,
- &ARBDecompiler::Negate<'S'>,
- &ARBDecompiler::Absolute<'S'>,
- &ARBDecompiler::Binary<MIN_S>,
- &ARBDecompiler::Binary<MAX_S>,
-
- &ARBDecompiler::Unary<TRUNC_S>,
- &ARBDecompiler::Unary<MOV_U>,
- &ARBDecompiler::Binary<SHL_S>,
- &ARBDecompiler::Binary<SHR_U>,
- &ARBDecompiler::Binary<SHR_S>,
- &ARBDecompiler::Binary<AND_S>,
- &ARBDecompiler::Binary<OR_S>,
- &ARBDecompiler::Binary<XOR_S>,
- &ARBDecompiler::Unary<NOT_S>,
- &ARBDecompiler::BitfieldInsert<'S'>,
- &ARBDecompiler::BitfieldExtract<'S'>,
- &ARBDecompiler::Unary<BTC_S>,
- &ARBDecompiler::Unary<BTFM_S>,
-
- &ARBDecompiler::Binary<ADD_U>,
- &ARBDecompiler::Binary<MUL_U>,
- &ARBDecompiler::Binary<DIV_U>,
- &ARBDecompiler::Binary<MIN_U>,
- &ARBDecompiler::Binary<MAX_U>,
- &ARBDecompiler::Unary<TRUNC_U>,
- &ARBDecompiler::Unary<MOV_U>,
- &ARBDecompiler::Binary<SHL_U>,
- &ARBDecompiler::Binary<SHR_U>,
- &ARBDecompiler::Binary<SHR_U>,
- &ARBDecompiler::Binary<AND_U>,
- &ARBDecompiler::Binary<OR_U>,
- &ARBDecompiler::Binary<XOR_U>,
- &ARBDecompiler::Unary<NOT_U>,
- &ARBDecompiler::BitfieldInsert<'U'>,
- &ARBDecompiler::BitfieldExtract<'U'>,
- &ARBDecompiler::Unary<BTC_U>,
- &ARBDecompiler::Unary<BTFM_U>,
-
- &ARBDecompiler::HAdd2,
- &ARBDecompiler::HMul2,
- &ARBDecompiler::HFma2,
- &ARBDecompiler::HAbsolute,
- &ARBDecompiler::HNegate,
- &ARBDecompiler::HClamp,
- &ARBDecompiler::HCastFloat,
- &ARBDecompiler::HUnpack,
- &ARBDecompiler::HMergeF32,
- &ARBDecompiler::HMergeH0,
- &ARBDecompiler::HMergeH1,
- &ARBDecompiler::HPack2,
-
- &ARBDecompiler::LogicalAssign,
- &ARBDecompiler::Binary<AND_U>,
- &ARBDecompiler::Binary<OR_U>,
- &ARBDecompiler::Binary<XOR_U>,
- &ARBDecompiler::Unary<NOT_U>,
- &ARBDecompiler::LogicalPick2,
- &ARBDecompiler::LogicalAnd2,
-
- &ARBDecompiler::FloatComparison<SLT_F, false>,
- &ARBDecompiler::FloatComparison<SEQ_F, false>,
- &ARBDecompiler::FloatComparison<SLE_F, false>,
- &ARBDecompiler::FloatComparison<SGT_F, false>,
- &ARBDecompiler::FloatComparison<SNE_F, false>,
- &ARBDecompiler::FloatComparison<SGE_F, false>,
- &ARBDecompiler::FloatOrdered,
- &ARBDecompiler::FloatUnordered,
- &ARBDecompiler::FloatComparison<SLT_F, true>,
- &ARBDecompiler::FloatComparison<SEQ_F, true>,
- &ARBDecompiler::FloatComparison<SLE_F, true>,
- &ARBDecompiler::FloatComparison<SGT_F, true>,
- &ARBDecompiler::FloatComparison<SNE_F, true>,
- &ARBDecompiler::FloatComparison<SGE_F, true>,
-
- &ARBDecompiler::Binary<SLT_S>,
- &ARBDecompiler::Binary<SEQ_S>,
- &ARBDecompiler::Binary<SLE_S>,
- &ARBDecompiler::Binary<SGT_S>,
- &ARBDecompiler::Binary<SNE_S>,
- &ARBDecompiler::Binary<SGE_S>,
-
- &ARBDecompiler::Binary<SLT_U>,
- &ARBDecompiler::Binary<SEQ_U>,
- &ARBDecompiler::Binary<SLE_U>,
- &ARBDecompiler::Binary<SGT_U>,
- &ARBDecompiler::Binary<SNE_U>,
- &ARBDecompiler::Binary<SGE_U>,
-
- &ARBDecompiler::LogicalAddCarry,
-
- &ARBDecompiler::HalfComparison<SLT_F, false>,
- &ARBDecompiler::HalfComparison<SEQ_F, false>,
- &ARBDecompiler::HalfComparison<SLE_F, false>,
- &ARBDecompiler::HalfComparison<SGT_F, false>,
- &ARBDecompiler::HalfComparison<SNE_F, false>,
- &ARBDecompiler::HalfComparison<SGE_F, false>,
- &ARBDecompiler::HalfComparison<SLT_F, true>,
- &ARBDecompiler::HalfComparison<SEQ_F, true>,
- &ARBDecompiler::HalfComparison<SLE_F, true>,
- &ARBDecompiler::HalfComparison<SGT_F, true>,
- &ARBDecompiler::HalfComparison<SNE_F, true>,
- &ARBDecompiler::HalfComparison<SGE_F, true>,
-
- &ARBDecompiler::Texture,
- &ARBDecompiler::Texture,
- &ARBDecompiler::TextureGather,
- &ARBDecompiler::TextureQueryDimensions,
- &ARBDecompiler::TextureQueryLod,
- &ARBDecompiler::TexelFetch,
- &ARBDecompiler::TextureGradient,
-
- &ARBDecompiler::ImageLoad,
- &ARBDecompiler::ImageStore,
-
- &ARBDecompiler::AtomicImage<ADD, U32>,
- &ARBDecompiler::AtomicImage<AND, U32>,
- &ARBDecompiler::AtomicImage<OR, U32>,
- &ARBDecompiler::AtomicImage<XOR, U32>,
- &ARBDecompiler::AtomicImage<EXCH, U32>,
-
- &ARBDecompiler::Atomic<EXCH, U32>,
- &ARBDecompiler::Atomic<ADD, U32>,
- &ARBDecompiler::Atomic<MIN, U32>,
- &ARBDecompiler::Atomic<MAX, U32>,
- &ARBDecompiler::Atomic<AND, U32>,
- &ARBDecompiler::Atomic<OR, U32>,
- &ARBDecompiler::Atomic<XOR, U32>,
-
- &ARBDecompiler::Atomic<EXCH, S32>,
- &ARBDecompiler::Atomic<ADD, S32>,
- &ARBDecompiler::Atomic<MIN, S32>,
- &ARBDecompiler::Atomic<MAX, S32>,
- &ARBDecompiler::Atomic<AND, S32>,
- &ARBDecompiler::Atomic<OR, S32>,
- &ARBDecompiler::Atomic<XOR, S32>,
-
- &ARBDecompiler::Atomic<ADD, U32>,
- &ARBDecompiler::Atomic<MIN, U32>,
- &ARBDecompiler::Atomic<MAX, U32>,
- &ARBDecompiler::Atomic<AND, U32>,
- &ARBDecompiler::Atomic<OR, U32>,
- &ARBDecompiler::Atomic<XOR, U32>,
-
- &ARBDecompiler::Atomic<ADD, S32>,
- &ARBDecompiler::Atomic<MIN, S32>,
- &ARBDecompiler::Atomic<MAX, S32>,
- &ARBDecompiler::Atomic<AND, S32>,
- &ARBDecompiler::Atomic<OR, S32>,
- &ARBDecompiler::Atomic<XOR, S32>,
-
- &ARBDecompiler::Branch,
- &ARBDecompiler::BranchIndirect,
- &ARBDecompiler::PushFlowStack,
- &ARBDecompiler::PopFlowStack,
- &ARBDecompiler::Exit,
- &ARBDecompiler::Discard,
-
- &ARBDecompiler::EmitVertex,
- &ARBDecompiler::EndPrimitive,
-
- &ARBDecompiler::InvocationId,
- &ARBDecompiler::YNegate,
- &ARBDecompiler::LocalInvocationId<'x'>,
- &ARBDecompiler::LocalInvocationId<'y'>,
- &ARBDecompiler::LocalInvocationId<'z'>,
- &ARBDecompiler::WorkGroupId<'x'>,
- &ARBDecompiler::WorkGroupId<'y'>,
- &ARBDecompiler::WorkGroupId<'z'>,
-
- &ARBDecompiler::Unary<TGBALLOT_U>,
- &ARBDecompiler::Unary<TGALL_U>,
- &ARBDecompiler::Unary<TGANY_U>,
- &ARBDecompiler::Unary<TGEQ_U>,
-
- &ARBDecompiler::ThreadId,
- &ARBDecompiler::ThreadMask<'e', 'q'>,
- &ARBDecompiler::ThreadMask<'g', 'e'>,
- &ARBDecompiler::ThreadMask<'g', 't'>,
- &ARBDecompiler::ThreadMask<'l', 'e'>,
- &ARBDecompiler::ThreadMask<'l', 't'>,
- &ARBDecompiler::ShuffleIndexed,
-
- &ARBDecompiler::Barrier,
- &ARBDecompiler::MemoryBarrierGroup,
- &ARBDecompiler::MemoryBarrierGlobal,
- };
-};
-
-ARBDecompiler::ARBDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_,
- ShaderType stage_, std::string_view identifier)
- : device{device_}, ir{ir_}, registry{registry_}, stage{stage_} {
- DefineGlobalMemory();
-
- AddLine("TEMP RC;");
- AddLine("TEMP FSWZA[4];");
- AddLine("TEMP FSWZB[4];");
- if (ir.IsDecompiled()) {
- DecompileAST();
- } else {
- DecompileBranchMode();
- }
- AddLine("END");
-
- const std::string code = std::move(shader_source);
- DeclareHeader();
- DeclareVertex();
- DeclareGeometry();
- DeclareFragment();
- DeclareCompute();
- DeclareInputAttributes();
- DeclareOutputAttributes();
- DeclareLocalMemory();
- DeclareGlobalMemory();
- DeclareConstantBuffers();
- DeclareRegisters();
- DeclareTemporaries();
- DeclarePredicates();
- DeclareInternalFlags();
-
- shader_source += code;
-}
-
-std::string_view HeaderStageName(ShaderType stage) {
- switch (stage) {
- case ShaderType::Vertex:
- return "vp";
- case ShaderType::Geometry:
- return "gp";
- case ShaderType::Fragment:
- return "fp";
- case ShaderType::Compute:
- return "cp";
- default:
- UNREACHABLE();
- return "";
- }
-}
-
-void ARBDecompiler::DefineGlobalMemory() {
- u32 binding = 0;
- for (const auto& pair : ir.GetGlobalMemory()) {
- const GlobalMemoryBase base = pair.first;
- global_memory_names.emplace(base, binding);
- ++binding;
- }
-}
-
-void ARBDecompiler::DeclareHeader() {
- AddLine("!!NV{}5.0", HeaderStageName(stage));
- // Enabling this allows us to cheat on some instructions like TXL with SHADOWARRAY2D
- AddLine("OPTION NV_internal;");
- AddLine("OPTION NV_gpu_program_fp64;");
- AddLine("OPTION NV_shader_thread_group;");
- if (ir.UsesWarps() && device.HasWarpIntrinsics()) {
- AddLine("OPTION NV_shader_thread_shuffle;");
- }
- if (stage == ShaderType::Vertex) {
- if (device.HasNvViewportArray2()) {
- AddLine("OPTION NV_viewport_array2;");
- }
- }
- if (stage == ShaderType::Fragment) {
- AddLine("OPTION ARB_draw_buffers;");
- }
- if (device.HasImageLoadFormatted()) {
- AddLine("OPTION EXT_shader_image_load_formatted;");
- }
-}
-
-void ARBDecompiler::DeclareVertex() {
- if (stage != ShaderType::Vertex) {
- return;
- }
- AddLine("OUTPUT result_clip[] = {{ result.clip[0..7] }};");
-}
-
-void ARBDecompiler::DeclareGeometry() {
- if (stage != ShaderType::Geometry) {
- return;
- }
- const auto& info = registry.GetGraphicsInfo();
- const auto& header = ir.GetHeader();
- AddLine("PRIMITIVE_IN {};", PrimitiveDescription(info.primitive_topology));
- AddLine("PRIMITIVE_OUT {};", TopologyName(header.common3.output_topology));
- AddLine("VERTICES_OUT {};", header.common4.max_output_vertices.Value());
- AddLine("ATTRIB vertex_position = vertex.position;");
-}
-
-void ARBDecompiler::DeclareFragment() {
- if (stage != ShaderType::Fragment) {
- return;
- }
- AddLine("OUTPUT result_color7 = result.color[7];");
- AddLine("OUTPUT result_color6 = result.color[6];");
- AddLine("OUTPUT result_color5 = result.color[5];");
- AddLine("OUTPUT result_color4 = result.color[4];");
- AddLine("OUTPUT result_color3 = result.color[3];");
- AddLine("OUTPUT result_color2 = result.color[2];");
- AddLine("OUTPUT result_color1 = result.color[1];");
- AddLine("OUTPUT result_color0 = result.color;");
-}
-
-void ARBDecompiler::DeclareCompute() {
- if (stage != ShaderType::Compute) {
- return;
- }
- const ComputeInfo& info = registry.GetComputeInfo();
- AddLine("GROUP_SIZE {} {} {};", info.workgroup_size[0], info.workgroup_size[1],
- info.workgroup_size[2]);
- if (info.shared_memory_size_in_words == 0) {
- return;
- }
- const u32 limit = device.GetMaxComputeSharedMemorySize();
- u32 size_in_bytes = info.shared_memory_size_in_words * 4;
- if (size_in_bytes > limit) {
- LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}",
- size_in_bytes, limit);
- size_in_bytes = limit;
- }
-
- AddLine("SHARED_MEMORY {};", size_in_bytes);
- AddLine("SHARED shared_mem[] = {{program.sharedmem}};");
-}
-
-void ARBDecompiler::DeclareInputAttributes() {
- if (stage == ShaderType::Compute) {
- return;
- }
- const std::string_view stage_name = StageInputName(stage);
- for (const auto attribute : ir.GetInputAttributes()) {
- if (!IsGenericAttribute(attribute)) {
- continue;
- }
- const u32 index = GetGenericAttributeIndex(attribute);
-
- std::string_view suffix;
- if (stage == ShaderType::Fragment) {
- const auto input_mode{ir.GetHeader().ps.GetPixelImap(index)};
- if (input_mode == PixelImap::Unused) {
- return;
- }
- suffix = GetInputFlags(input_mode);
- }
- AddLine("{}ATTRIB in_attr{}[] = {{ {}.attrib[{}..{}] }};", suffix, index, stage_name, index,
- index);
- }
-}
-
-void ARBDecompiler::DeclareOutputAttributes() {
- if (stage == ShaderType::Compute) {
- return;
- }
- for (const auto attribute : ir.GetOutputAttributes()) {
- if (!IsGenericAttribute(attribute)) {
- continue;
- }
- const u32 index = GetGenericAttributeIndex(attribute);
- AddLine("OUTPUT out_attr{}[] = {{ result.attrib[{}..{}] }};", index, index, index);
- }
-}
-
-void ARBDecompiler::DeclareLocalMemory() {
- u64 size = 0;
- if (stage == ShaderType::Compute) {
- size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL;
- } else {
- size = ir.GetHeader().GetLocalMemorySize();
- }
- if (size == 0) {
- return;
- }
- const u64 element_count = Common::AlignUp(size, 4) / 4;
- AddLine("TEMP lmem[{}];", element_count);
-}
-
-void ARBDecompiler::DeclareGlobalMemory() {
- const size_t num_entries = ir.GetGlobalMemory().size();
- if (num_entries > 0) {
- AddLine("PARAM c[{}] = {{ program.local[0..{}] }};", num_entries, num_entries - 1);
- }
-}
-
-void ARBDecompiler::DeclareConstantBuffers() {
- u32 binding = 0;
- for (const auto& cbuf : ir.GetConstantBuffers()) {
- AddLine("CBUFFER cbuf{}[] = {{ program.buffer[{}] }};", cbuf.first, binding);
- ++binding;
- }
-}
-
-void ARBDecompiler::DeclareRegisters() {
- for (const u32 gpr : ir.GetRegisters()) {
- AddLine("TEMP R{};", gpr);
- }
-}
-
-void ARBDecompiler::DeclareTemporaries() {
- for (std::size_t i = 0; i < max_temporaries; ++i) {
- AddLine("TEMP T{};", i);
- }
- for (std::size_t i = 0; i < max_long_temporaries; ++i) {
- AddLine("LONG TEMP L{};", i);
- }
-}
-
-void ARBDecompiler::DeclarePredicates() {
- for (const Tegra::Shader::Pred pred : ir.GetPredicates()) {
- AddLine("TEMP P{};", static_cast<u64>(pred));
- }
-}
-
-void ARBDecompiler::DeclareInternalFlags() {
- for (const char* name : INTERNAL_FLAG_NAMES) {
- AddLine("TEMP {};", name);
- }
-}
-
-void ARBDecompiler::InitializeVariables() {
- AddLine("MOV.F32 FSWZA[0], -1;");
- AddLine("MOV.F32 FSWZA[1], 1;");
- AddLine("MOV.F32 FSWZA[2], -1;");
- AddLine("MOV.F32 FSWZA[3], 0;");
- AddLine("MOV.F32 FSWZB[0], -1;");
- AddLine("MOV.F32 FSWZB[1], -1;");
- AddLine("MOV.F32 FSWZB[2], 1;");
- AddLine("MOV.F32 FSWZB[3], -1;");
-
- if (stage == ShaderType::Vertex || stage == ShaderType::Geometry) {
- AddLine("MOV.F result.position, {{0, 0, 0, 1}};");
- }
- for (const auto attribute : ir.GetOutputAttributes()) {
- if (!IsGenericAttribute(attribute)) {
- continue;
- }
- const u32 index = GetGenericAttributeIndex(attribute);
- AddLine("MOV.F result.attrib[{}], {{0, 0, 0, 1}};", index);
- }
- for (const u32 gpr : ir.GetRegisters()) {
- AddLine("MOV.F R{}, {{0, 0, 0, 0}};", gpr);
- }
- for (const Tegra::Shader::Pred pred : ir.GetPredicates()) {
- AddLine("MOV.U P{}, {{0, 0, 0, 0}};", static_cast<u64>(pred));
- }
-}
-
-void ARBDecompiler::DecompileAST() {
- const u32 num_flow_variables = ir.GetASTNumVariables();
- for (u32 i = 0; i < num_flow_variables; ++i) {
- AddLine("TEMP F{};", i);
- }
- for (u32 i = 0; i < num_flow_variables; ++i) {
- AddLine("MOV.U F{}, {{0, 0, 0, 0}};", i);
- }
-
- InitializeVariables();
-
- VisitAST(ir.GetASTProgram());
-}
-
-void ARBDecompiler::DecompileBranchMode() {
- static constexpr u32 FLOW_STACK_SIZE = 20;
- if (!ir.IsFlowStackDisabled()) {
- AddLine("TEMP SSY[{}];", FLOW_STACK_SIZE);
- AddLine("TEMP PBK[{}];", FLOW_STACK_SIZE);
- AddLine("TEMP SSY_TOP;");
- AddLine("TEMP PBK_TOP;");
- }
-
- AddLine("TEMP PC;");
-
- if (!ir.IsFlowStackDisabled()) {
- AddLine("MOV.U SSY_TOP.x, 0;");
- AddLine("MOV.U PBK_TOP.x, 0;");
- }
-
- InitializeVariables();
-
- const auto basic_block_end = ir.GetBasicBlocks().end();
- auto basic_block_it = ir.GetBasicBlocks().begin();
- const u32 first_address = basic_block_it->first;
- AddLine("MOV.U PC.x, {};", first_address);
-
- AddLine("REP;");
-
- std::size_t num_blocks = 0;
- while (basic_block_it != basic_block_end) {
- const auto& [address, bb] = *basic_block_it;
- ++num_blocks;
-
- AddLine("SEQ.S.CC RC.x, PC.x, {};", address);
- AddLine("IF NE.x;");
-
- VisitBlock(bb);
-
- ++basic_block_it;
-
- if (basic_block_it != basic_block_end) {
- const auto op = std::get_if<OperationNode>(&*bb[bb.size() - 1]);
- if (!op || op->GetCode() != OperationCode::Branch) {
- const u32 next_address = basic_block_it->first;
- AddLine("MOV.U PC.x, {};", next_address);
- AddLine("CONT;");
- }
- }
-
- AddLine("ELSE;");
- }
- AddLine("RET;");
- while (num_blocks--) {
- AddLine("ENDIF;");
- }
-
- AddLine("ENDREP;");
-}
-
-void ARBDecompiler::VisitAST(const ASTNode& node) {
- if (const auto ast = std::get_if<ASTProgram>(&*node->GetInnerData())) {
- for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) {
- VisitAST(current);
- }
- } else if (const auto if_then = std::get_if<ASTIfThen>(&*node->GetInnerData())) {
- const std::string condition = VisitExpression(if_then->condition);
- ResetTemporaries();
-
- AddLine("MOVC.U RC.x, {};", condition);
- AddLine("IF NE.x;");
- for (ASTNode current = if_then->nodes.GetFirst(); current; current = current->GetNext()) {
- VisitAST(current);
- }
- AddLine("ENDIF;");
- } else if (const auto if_else = std::get_if<ASTIfElse>(&*node->GetInnerData())) {
- AddLine("ELSE;");
- for (ASTNode current = if_else->nodes.GetFirst(); current; current = current->GetNext()) {
- VisitAST(current);
- }
- } else if (const auto decoded = std::get_if<ASTBlockDecoded>(&*node->GetInnerData())) {
- VisitBlock(decoded->nodes);
- } else if (const auto var_set = std::get_if<ASTVarSet>(&*node->GetInnerData())) {
- AddLine("MOV.U F{}, {};", var_set->index, VisitExpression(var_set->condition));
- ResetTemporaries();
- } else if (const auto do_while = std::get_if<ASTDoWhile>(&*node->GetInnerData())) {
- const std::string condition = VisitExpression(do_while->condition);
- ResetTemporaries();
- AddLine("REP;");
- for (ASTNode current = do_while->nodes.GetFirst(); current; current = current->GetNext()) {
- VisitAST(current);
- }
- AddLine("MOVC.U RC.x, {};", condition);
- AddLine("BRK (NE.x);");
- AddLine("ENDREP;");
- } else if (const auto ast_return = std::get_if<ASTReturn>(&*node->GetInnerData())) {
- const bool is_true = ExprIsTrue(ast_return->condition);
- if (!is_true) {
- AddLine("MOVC.U RC.x, {};", VisitExpression(ast_return->condition));
- AddLine("IF NE.x;");
- ResetTemporaries();
- }
- if (ast_return->kills) {
- AddLine("KIL TR;");
- } else {
- Exit();
- }
- if (!is_true) {
- AddLine("ENDIF;");
- }
- } else if (const auto ast_break = std::get_if<ASTBreak>(&*node->GetInnerData())) {
- if (ExprIsTrue(ast_break->condition)) {
- AddLine("BRK;");
- } else {
- AddLine("MOVC.U RC.x, {};", VisitExpression(ast_break->condition));
- AddLine("BRK (NE.x);");
- ResetTemporaries();
- }
- } else if (std::holds_alternative<ASTLabel>(*node->GetInnerData())) {
- // Nothing to do
- } else {
- UNREACHABLE();
- }
-}
-
-std::string ARBDecompiler::VisitExpression(const Expr& node) {
- if (const auto expr = std::get_if<ExprAnd>(&*node)) {
- std::string result = AllocTemporary();
- AddLine("AND.U {}, {}, {};", result, VisitExpression(expr->operand1),
- VisitExpression(expr->operand2));
- return result;
- }
- if (const auto expr = std::get_if<ExprOr>(&*node)) {
- std::string result = AllocTemporary();
- AddLine("OR.U {}, {}, {};", result, VisitExpression(expr->operand1),
- VisitExpression(expr->operand2));
- return result;
- }
- if (const auto expr = std::get_if<ExprNot>(&*node)) {
- std::string result = AllocTemporary();
- AddLine("CMP.S {}, {}, 0, -1;", result, VisitExpression(expr->operand1));
- return result;
- }
- if (const auto expr = std::get_if<ExprPredicate>(&*node)) {
- return fmt::format("P{}.x", static_cast<u64>(expr->predicate));
- }
- if (const auto expr = std::get_if<ExprCondCode>(&*node)) {
- return Visit(ir.GetConditionCode(expr->cc));
- }
- if (const auto expr = std::get_if<ExprVar>(&*node)) {
- return fmt::format("F{}.x", expr->var_index);
- }
- if (const auto expr = std::get_if<ExprBoolean>(&*node)) {
- return expr->value ? "0xffffffff" : "0";
- }
- if (const auto expr = std::get_if<ExprGprEqual>(&*node)) {
- std::string result = AllocTemporary();
- AddLine("SEQ.U {}, R{}.x, {};", result, expr->gpr, expr->value);
- return result;
- }
- UNREACHABLE();
- return "0";
-}
-
-void ARBDecompiler::VisitBlock(const NodeBlock& bb) {
- for (const auto& node : bb) {
- Visit(node);
- }
-}
-
-std::string ARBDecompiler::Visit(const Node& node) {
- if (const auto operation = std::get_if<OperationNode>(&*node)) {
- if (const auto amend_index = operation->GetAmendIndex()) {
- Visit(ir.GetAmendNode(*amend_index));
- }
- const std::size_t index = static_cast<std::size_t>(operation->GetCode());
- if (index >= OPERATION_DECOMPILERS.size()) {
- UNREACHABLE_MSG("Out of bounds operation: {}", index);
- return {};
- }
- const auto decompiler = OPERATION_DECOMPILERS[index];
- if (decompiler == nullptr) {
- UNREACHABLE_MSG("Undefined operation: {}", index);
- return {};
- }
- return (this->*decompiler)(*operation);
- }
-
- if (const auto gpr = std::get_if<GprNode>(&*node)) {
- const u32 index = gpr->GetIndex();
- if (index == Register::ZeroIndex) {
- return "{0, 0, 0, 0}.x";
- }
- return fmt::format("R{}.x", index);
- }
-
- if (const auto cv = std::get_if<CustomVarNode>(&*node)) {
- return fmt::format("CV{}.x", cv->GetIndex());
- }
-
- if (const auto immediate = std::get_if<ImmediateNode>(&*node)) {
- std::string temporary = AllocTemporary();
- AddLine("MOV.U {}, {};", temporary, immediate->GetValue());
- return temporary;
- }
-
- if (const auto predicate = std::get_if<PredicateNode>(&*node)) {
- std::string temporary = AllocTemporary();
- switch (const auto index = predicate->GetIndex(); index) {
- case Tegra::Shader::Pred::UnusedIndex:
- AddLine("MOV.S {}, -1;", temporary);
- break;
- case Tegra::Shader::Pred::NeverExecute:
- AddLine("MOV.S {}, 0;", temporary);
- break;
- default:
- AddLine("MOV.S {}, P{}.x;", temporary, static_cast<u64>(index));
- break;
- }
- if (predicate->IsNegated()) {
- AddLine("CMP.S {}, {}, 0, -1;", temporary, temporary);
- }
- return temporary;
- }
-
- if (const auto abuf = std::get_if<AbufNode>(&*node)) {
- if (abuf->IsPhysicalBuffer()) {
- UNIMPLEMENTED_MSG("Physical buffers are not implemented");
- return "{0, 0, 0, 0}.x";
- }
-
- const Attribute::Index index = abuf->GetIndex();
- const u32 element = abuf->GetElement();
- const char swizzle = Swizzle(element);
- switch (index) {
- case Attribute::Index::Position: {
- if (stage == ShaderType::Geometry) {
- return fmt::format("{}_position[{}].{}", StageInputName(stage),
- Visit(abuf->GetBuffer()), swizzle);
- } else {
- return fmt::format("{}.position.{}", StageInputName(stage), swizzle);
- }
- }
- case Attribute::Index::TessCoordInstanceIDVertexID:
- ASSERT(stage == ShaderType::Vertex);
- switch (element) {
- case 2:
- return "vertex.instance";
- case 3:
- return "vertex.id";
- }
- UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element);
- break;
- case Attribute::Index::PointCoord:
- switch (element) {
- case 0:
- return "fragment.pointcoord.x";
- case 1:
- return "fragment.pointcoord.y";
- }
- UNIMPLEMENTED();
- break;
- case Attribute::Index::FrontFacing: {
- ASSERT(stage == ShaderType::Fragment);
- ASSERT(element == 3);
- const std::string temporary = AllocVectorTemporary();
- AddLine("SGT.S RC.x, fragment.facing, {{0, 0, 0, 0}};");
- AddLine("MOV.U.CC RC.x, -RC;");
- AddLine("MOV.S {}.x, 0;", temporary);
- AddLine("MOV.S {}.x (NE.x), -1;", temporary);
- return fmt::format("{}.x", temporary);
- }
- default:
- if (IsGenericAttribute(index)) {
- if (stage == ShaderType::Geometry) {
- return fmt::format("in_attr{}[{}][0].{}", GetGenericAttributeIndex(index),
- Visit(abuf->GetBuffer()), swizzle);
- } else {
- return fmt::format("{}.attrib[{}].{}", StageInputName(stage),
- GetGenericAttributeIndex(index), swizzle);
- }
- }
- UNIMPLEMENTED_MSG("Unimplemented input attribute={}", index);
- break;
- }
- return "{0, 0, 0, 0}.x";
- }
-
- if (const auto cbuf = std::get_if<CbufNode>(&*node)) {
- std::string offset_string;
- const auto& offset = cbuf->GetOffset();
- if (const auto imm = std::get_if<ImmediateNode>(&*offset)) {
- offset_string = std::to_string(imm->GetValue());
- } else {
- offset_string = Visit(offset);
- }
- std::string temporary = AllocTemporary();
- AddLine("LDC.F32 {}, cbuf{}[{}];", temporary, cbuf->GetIndex(), offset_string);
- return temporary;
- }
-
- if (const auto gmem = std::get_if<GmemNode>(&*node)) {
- std::string temporary = AllocTemporary();
- AddLine("MOV {}, 0;", temporary);
- AddLine("LOAD.U32 {} (NE.x), {};", temporary, GlobalMemoryPointer(*gmem));
- return temporary;
- }
-
- if (const auto lmem = std::get_if<LmemNode>(&*node)) {
- std::string temporary = Visit(lmem->GetAddress());
- AddLine("SHR.U {}, {}, 2;", temporary, temporary);
- AddLine("MOV.U {}, lmem[{}].x;", temporary, temporary);
- return temporary;
- }
-
- if (const auto smem = std::get_if<SmemNode>(&*node)) {
- std::string temporary = Visit(smem->GetAddress());
- AddLine("LDS.U32 {}, shared_mem[{}];", temporary, temporary);
- return temporary;
- }
-
- if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) {
- const std::size_t index = static_cast<std::size_t>(internal_flag->GetFlag());
- return fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]);
- }
-
- if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
- if (const auto amend_index = conditional->GetAmendIndex()) {
- Visit(ir.GetAmendNode(*amend_index));
- }
- AddLine("MOVC.U RC.x, {};", Visit(conditional->GetCondition()));
- AddLine("IF NE.x;");
- VisitBlock(conditional->GetCode());
- AddLine("ENDIF;");
- return {};
- }
-
- if ([[maybe_unused]] const auto cmt = std::get_if<CommentNode>(&*node)) {
- // Uncommenting this will generate invalid code. GLASM lacks comments.
- // AddLine("// {}", cmt->GetText());
- return {};
- }
-
- UNIMPLEMENTED();
- return {};
-}
-
-std::tuple<std::string, std::string, std::size_t> ARBDecompiler::BuildCoords(Operation operation) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
- UNIMPLEMENTED_IF(meta.sampler.is_indexed);
-
- const bool is_extended = meta.sampler.is_shadow && meta.sampler.is_array &&
- meta.sampler.type == Tegra::Shader::TextureType::TextureCube;
- const std::size_t count = operation.GetOperandsCount();
- std::string temporary = AllocVectorTemporary();
- std::size_t i = 0;
- for (; i < count; ++i) {
- AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i]));
- }
- if (meta.sampler.is_array) {
- AddLine("I2F.S {}.{}, {};", temporary, Swizzle(i), Visit(meta.array));
- ++i;
- }
- if (meta.sampler.is_shadow) {
- std::string compare = Visit(meta.depth_compare);
- if (is_extended) {
- ASSERT(i == 4);
- std::string extra_coord = AllocVectorTemporary();
- AddLine("MOV.F {}.x, {};", extra_coord, compare);
- return {fmt::format("{}, {}", temporary, extra_coord), extra_coord, 0};
- }
- AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), compare);
- ++i;
- }
- return {temporary, temporary, i};
-}
-
-std::string ARBDecompiler::BuildAoffi(Operation operation) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
- if (meta.aoffi.empty()) {
- return {};
- }
- const std::string temporary = AllocVectorTemporary();
- std::size_t i = 0;
- for (auto& node : meta.aoffi) {
- AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i++), Visit(node));
- }
- return fmt::format(", offset({})", temporary);
-}
-
-std::string ARBDecompiler::GlobalMemoryPointer(const GmemNode& gmem) {
- // Read a bindless SSBO, return its address and set CC accordingly
- // address = c[binding].xy
- // length = c[binding].z
- const u32 binding = global_memory_names.at(gmem.GetDescriptor());
-
- const std::string pointer = AllocLongVectorTemporary();
- std::string temporary = AllocTemporary();
-
- AddLine("PK64.U {}, c[{}];", pointer, binding);
- AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem.GetRealAddress()),
- Visit(gmem.GetBaseAddress()));
- AddLine("CVT.U64.U32 {}.z, {};", pointer, temporary);
- AddLine("ADD.U64 {}.x, {}.x, {}.z;", pointer, pointer, pointer);
- // Compare offset to length and set CC
- AddLine("SLT.U.CC RC.x, {}, c[{}].z;", temporary, binding);
- return fmt::format("{}.x", pointer);
-}
-
-void ARBDecompiler::Exit() {
- if (stage != ShaderType::Fragment) {
- AddLine("RET;");
- return;
- }
-
- const auto safe_get_register = [this](u32 reg) -> std::string {
- if (ir.GetRegisters().contains(reg)) {
- return fmt::format("R{}.x", reg);
- }
- return "{0, 0, 0, 0}.x";
- };
-
- const auto& header = ir.GetHeader();
- u32 current_reg = 0;
- for (u32 rt = 0; rt < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; ++rt) {
- for (u32 component = 0; component < 4; ++component) {
- if (!header.ps.IsColorComponentOutputEnabled(rt, component)) {
- continue;
- }
- AddLine("MOV.F result_color{}.{}, {};", rt, Swizzle(component),
- safe_get_register(current_reg));
- ++current_reg;
- }
- }
- if (header.ps.omap.depth) {
- AddLine("MOV.F result.depth.z, {};", safe_get_register(current_reg + 1));
- }
-
- AddLine("RET;");
-}
-
-std::string ARBDecompiler::Assign(Operation operation) {
- const Node& dest = operation[0];
- const Node& src = operation[1];
-
- std::string dest_name;
- if (const auto gpr = std::get_if<GprNode>(&*dest)) {
- if (gpr->GetIndex() == Register::ZeroIndex) {
- // Writing to Register::ZeroIndex is a no op
- return {};
- }
- dest_name = fmt::format("R{}.x", gpr->GetIndex());
- } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) {
- const u32 element = abuf->GetElement();
- const char swizzle = Swizzle(element);
- switch (const Attribute::Index index = abuf->GetIndex()) {
- case Attribute::Index::Position:
- dest_name = fmt::format("result.position.{}", swizzle);
- break;
- case Attribute::Index::LayerViewportPointSize:
- switch (element) {
- case 0:
- UNIMPLEMENTED();
- return {};
- case 1:
- case 2:
- if (!device.HasNvViewportArray2()) {
- LOG_ERROR(
- Render_OpenGL,
- "NV_viewport_array2 is missing. Maxwell gen 2 or better is required.");
- return {};
- }
- dest_name = element == 1 ? "result.layer.x" : "result.viewport.x";
- break;
- case 3:
- dest_name = "result.pointsize.x";
- break;
- }
- break;
- case Attribute::Index::ClipDistances0123:
- dest_name = fmt::format("result.clip[{}].x", element);
- break;
- case Attribute::Index::ClipDistances4567:
- dest_name = fmt::format("result.clip[{}].x", element + 4);
- break;
- default:
- if (!IsGenericAttribute(index)) {
- UNREACHABLE();
- return {};
- }
- dest_name =
- fmt::format("result.attrib[{}].{}", GetGenericAttributeIndex(index), swizzle);
- break;
- }
- } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) {
- const std::string address = Visit(lmem->GetAddress());
- AddLine("SHR.U {}, {}, 2;", address, address);
- dest_name = fmt::format("lmem[{}].x", address);
- } else if (const auto smem = std::get_if<SmemNode>(&*dest)) {
- AddLine("STS.U32 {}, shared_mem[{}];", Visit(src), Visit(smem->GetAddress()));
- ResetTemporaries();
- return {};
- } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
- AddLine("IF NE.x;");
- AddLine("STORE.U32 {}, {};", Visit(src), GlobalMemoryPointer(*gmem));
- AddLine("ENDIF;");
- ResetTemporaries();
- return {};
- } else {
- UNREACHABLE();
- ResetTemporaries();
- return {};
- }
-
- AddLine("MOV.U {}, {};", dest_name, Visit(src));
- ResetTemporaries();
- return {};
-}
-
-std::string ARBDecompiler::Select(Operation operation) {
- std::string temporary = AllocTemporary();
- AddLine("CMP.S {}, {}, {}, {};", temporary, Visit(operation[0]), Visit(operation[1]),
- Visit(operation[2]));
- return temporary;
-}
-
-std::string ARBDecompiler::FClamp(Operation operation) {
- // 1.0f in hex, replace with std::bit_cast on C++20
- static constexpr u32 POSITIVE_ONE = 0x3f800000;
-
- std::string temporary = AllocTemporary();
- const Node& value = operation[0];
- const Node& low = operation[1];
- const Node& high = operation[2];
- const auto* const imm_low = std::get_if<ImmediateNode>(&*low);
- const auto* const imm_high = std::get_if<ImmediateNode>(&*high);
- if (imm_low && imm_high && imm_low->GetValue() == 0 && imm_high->GetValue() == POSITIVE_ONE) {
- AddLine("MOV.F32.SAT {}, {};", temporary, Visit(value));
- } else {
- AddLine("MIN.F {}, {}, {};", temporary, Visit(value), Visit(high));
- AddLine("MAX.F {}, {}, {};", temporary, temporary, Visit(low));
- }
- return temporary;
-}
-
-std::string ARBDecompiler::FCastHalf0(Operation operation) {
- const std::string temporary = AllocVectorTemporary();
- AddLine("UP2H.F {}.x, {};", temporary, Visit(operation[0]));
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::FCastHalf1(Operation operation) {
- const std::string temporary = AllocVectorTemporary();
- AddLine("UP2H.F {}.y, {};", temporary, Visit(operation[0]));
- AddLine("MOV {}.x, {}.y;", temporary, temporary);
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::FSqrt(Operation operation) {
- std::string temporary = AllocTemporary();
- AddLine("RSQ.F32 {}, {};", temporary, Visit(operation[0]));
- AddLine("RCP.F32 {}, {};", temporary, temporary);
- return temporary;
-}
-
-std::string ARBDecompiler::FSwizzleAdd(Operation operation) {
- const std::string temporary = AllocVectorTemporary();
- if (!device.HasWarpIntrinsics()) {
- LOG_ERROR(Render_OpenGL,
- "NV_shader_thread_shuffle is missing. Kepler or better is required.");
- AddLine("ADD.F {}.x, {}, {};", temporary, Visit(operation[0]), Visit(operation[1]));
- return fmt::format("{}.x", temporary);
- }
-
- AddLine("AND.U {}.z, {}.threadid, 3;", temporary, StageInputName(stage));
- AddLine("SHL.U {}.z, {}.z, 1;", temporary, temporary);
- AddLine("SHR.U {}.z, {}, {}.z;", temporary, Visit(operation[2]), temporary);
- AddLine("AND.U {}.z, {}.z, 3;", temporary, temporary);
- AddLine("MUL.F32 {}.x, {}, FSWZA[{}.z];", temporary, Visit(operation[0]), temporary);
- AddLine("MUL.F32 {}.y, {}, FSWZB[{}.z];", temporary, Visit(operation[1]), temporary);
- AddLine("ADD.F32 {}.x, {}.x, {}.y;", temporary, temporary, temporary);
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::HAdd2(Operation operation) {
- const std::string tmp1 = AllocVectorTemporary();
- const std::string tmp2 = AllocVectorTemporary();
- AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
- AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1]));
- AddLine("ADD.F16 {}, {}, {};", tmp1, tmp1, tmp2);
- AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
- return fmt::format("{}.x", tmp1);
-}
-
-std::string ARBDecompiler::HMul2(Operation operation) {
- const std::string tmp1 = AllocVectorTemporary();
- const std::string tmp2 = AllocVectorTemporary();
- AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
- AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1]));
- AddLine("MUL.F16 {}, {}, {};", tmp1, tmp1, tmp2);
- AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
- return fmt::format("{}.x", tmp1);
-}
-
-std::string ARBDecompiler::HFma2(Operation operation) {
- const std::string tmp1 = AllocVectorTemporary();
- const std::string tmp2 = AllocVectorTemporary();
- const std::string tmp3 = AllocVectorTemporary();
- AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
- AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1]));
- AddLine("UP2H.F {}.xy, {};", tmp3, Visit(operation[2]));
- AddLine("MAD.F16 {}, {}, {}, {};", tmp1, tmp1, tmp2, tmp3);
- AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
- return fmt::format("{}.x", tmp1);
-}
-
-std::string ARBDecompiler::HAbsolute(Operation operation) {
- const std::string temporary = AllocVectorTemporary();
- AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
- AddLine("PK2H.F {}.x, |{}|;", temporary, temporary);
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::HNegate(Operation operation) {
- const std::string temporary = AllocVectorTemporary();
- AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
- AddLine("MOVC.S RC.x, {};", Visit(operation[1]));
- AddLine("MOV.F {}.x (NE.x), -{}.x;", temporary, temporary);
- AddLine("MOVC.S RC.x, {};", Visit(operation[2]));
- AddLine("MOV.F {}.y (NE.x), -{}.y;", temporary, temporary);
- AddLine("PK2H.F {}.x, {};", temporary, temporary);
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::HClamp(Operation operation) {
- const std::string tmp1 = AllocVectorTemporary();
- const std::string tmp2 = AllocVectorTemporary();
- AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
- AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[1]));
- AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2);
- AddLine("MAX.F {}, {}, {};", tmp1, tmp1, tmp2);
- AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[2]));
- AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2);
- AddLine("MIN.F {}, {}, {};", tmp1, tmp1, tmp2);
- AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
- return fmt::format("{}.x", tmp1);
-}
-
-std::string ARBDecompiler::HCastFloat(Operation operation) {
- const std::string temporary = AllocVectorTemporary();
- AddLine("MOV.F {}.y, {{0, 0, 0, 0}};", temporary);
- AddLine("MOV.F {}.x, {};", temporary, Visit(operation[0]));
- AddLine("PK2H.F {}.x, {};", temporary, temporary);
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::HUnpack(Operation operation) {
- std::string operand = Visit(operation[0]);
- switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) {
- case Tegra::Shader::HalfType::H0_H1:
- return operand;
- case Tegra::Shader::HalfType::F32: {
- const std::string temporary = AllocVectorTemporary();
- AddLine("MOV.U {}.x, {};", temporary, operand);
- AddLine("MOV.U {}.y, {}.x;", temporary, temporary);
- AddLine("PK2H.F {}.x, {};", temporary, temporary);
- return fmt::format("{}.x", temporary);
- }
- case Tegra::Shader::HalfType::H0_H0: {
- const std::string temporary = AllocVectorTemporary();
- AddLine("UP2H.F {}.xy, {};", temporary, operand);
- AddLine("MOV.U {}.y, {}.x;", temporary, temporary);
- AddLine("PK2H.F {}.x, {};", temporary, temporary);
- return fmt::format("{}.x", temporary);
- }
- case Tegra::Shader::HalfType::H1_H1: {
- const std::string temporary = AllocVectorTemporary();
- AddLine("UP2H.F {}.xy, {};", temporary, operand);
- AddLine("MOV.U {}.x, {}.y;", temporary, temporary);
- AddLine("PK2H.F {}.x, {};", temporary, temporary);
- return fmt::format("{}.x", temporary);
- }
- }
- UNREACHABLE();
- return "{0, 0, 0, 0}.x";
-}
-
-std::string ARBDecompiler::HMergeF32(Operation operation) {
- const std::string temporary = AllocVectorTemporary();
- AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::HMergeH0(Operation operation) {
- const std::string temporary = AllocVectorTemporary();
- AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
- AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1]));
- AddLine("MOV.U {}.x, {}.z;", temporary, temporary);
- AddLine("PK2H.F {}.x, {};", temporary, temporary);
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::HMergeH1(Operation operation) {
- const std::string temporary = AllocVectorTemporary();
- AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
- AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1]));
- AddLine("MOV.U {}.y, {}.w;", temporary, temporary);
- AddLine("PK2H.F {}.x, {};", temporary, temporary);
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::HPack2(Operation operation) {
- const std::string temporary = AllocVectorTemporary();
- AddLine("MOV.U {}.x, {};", temporary, Visit(operation[0]));
- AddLine("MOV.U {}.y, {};", temporary, Visit(operation[1]));
- AddLine("PK2H.F {}.x, {};", temporary, temporary);
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::LogicalAssign(Operation operation) {
- const Node& dest = operation[0];
- const Node& src = operation[1];
-
- std::string target;
-
- if (const auto pred = std::get_if<PredicateNode>(&*dest)) {
- ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment");
-
- const Tegra::Shader::Pred index = pred->GetIndex();
- switch (index) {
- case Tegra::Shader::Pred::NeverExecute:
- case Tegra::Shader::Pred::UnusedIndex:
- // Writing to these predicates is a no-op
- return {};
- }
- target = fmt::format("P{}.x", static_cast<u64>(index));
- } else if (const auto internal_flag = std::get_if<InternalFlagNode>(&*dest)) {
- const std::size_t index = static_cast<std::size_t>(internal_flag->GetFlag());
- target = fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]);
- } else {
- UNREACHABLE();
- ResetTemporaries();
- return {};
- }
-
- AddLine("MOV.U {}, {};", target, Visit(src));
- ResetTemporaries();
- return {};
-}
-
-std::string ARBDecompiler::LogicalPick2(Operation operation) {
- std::string temporary = AllocTemporary();
- const u32 index = std::get<ImmediateNode>(*operation[1]).GetValue();
- AddLine("MOV.U {}, {}.{};", temporary, Visit(operation[0]), Swizzle(index));
- return temporary;
-}
-
-std::string ARBDecompiler::LogicalAnd2(Operation operation) {
- std::string temporary = AllocTemporary();
- const std::string op = Visit(operation[0]);
- AddLine("AND.U {}, {}.x, {}.y;", temporary, op, op);
- return temporary;
-}
-
-std::string ARBDecompiler::FloatOrdered(Operation operation) {
- std::string temporary = AllocTemporary();
- AddLine("MOVC.F32 RC.x, {};", Visit(operation[0]));
- AddLine("MOVC.F32 RC.y, {};", Visit(operation[1]));
- AddLine("MOV.S {}, -1;", temporary);
- AddLine("MOV.S {} (NAN.x), 0;", temporary);
- AddLine("MOV.S {} (NAN.y), 0;", temporary);
- return temporary;
-}
-
-std::string ARBDecompiler::FloatUnordered(Operation operation) {
- std::string temporary = AllocTemporary();
- AddLine("MOVC.F32 RC.x, {};", Visit(operation[0]));
- AddLine("MOVC.F32 RC.y, {};", Visit(operation[1]));
- AddLine("MOV.S {}, 0;", temporary);
- AddLine("MOV.S {} (NAN.x), -1;", temporary);
- AddLine("MOV.S {} (NAN.y), -1;", temporary);
- return temporary;
-}
-
-std::string ARBDecompiler::LogicalAddCarry(Operation operation) {
- std::string temporary = AllocTemporary();
- AddLine("ADDC.U RC, {}, {};", Visit(operation[0]), Visit(operation[1]));
- AddLine("MOV.S {}, 0;", temporary);
- AddLine("IF CF.x;");
- AddLine("MOV.S {}, -1;", temporary);
- AddLine("ENDIF;");
- return temporary;
-}
-
-std::string ARBDecompiler::Texture(Operation operation) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
- const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
- const auto [coords, temporary, swizzle] = BuildCoords(operation);
-
- std::string_view opcode = "TEX";
- std::string extra;
- if (meta.bias) {
- ASSERT(!meta.lod);
- opcode = "TXB";
-
- if (swizzle < 4) {
- AddLine("MOV.F {}.w, {};", temporary, Visit(meta.bias));
- } else {
- const std::string bias = AllocTemporary();
- AddLine("MOV.F {}, {};", bias, Visit(meta.bias));
- extra = fmt::format(" {},", bias);
- }
- }
- if (meta.lod) {
- ASSERT(!meta.bias);
- opcode = "TXL";
-
- if (swizzle < 4) {
- AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod));
- } else {
- const std::string lod = AllocTemporary();
- AddLine("MOV.F {}, {};", lod, Visit(meta.lod));
- extra = fmt::format(" {},", lod);
- }
- }
-
- AddLine("{}.F {}, {},{} texture[{}], {}{};", opcode, temporary, coords, extra, sampler_id,
- TextureType(meta), BuildAoffi(operation));
- AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::TextureGather(Operation operation) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
- const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
- const auto [coords, temporary, swizzle] = BuildCoords(operation);
-
- std::string comp;
- if (!meta.sampler.is_shadow) {
- const auto& immediate = std::get<ImmediateNode>(*meta.component);
- comp = fmt::format(".{}", Swizzle(immediate.GetValue()));
- }
-
- AddLine("TXG.F {}, {}, texture[{}]{}, {}{};", temporary, temporary, sampler_id, comp,
- TextureType(meta), BuildAoffi(operation));
- AddLine("MOV.U {}.x, {}.{};", temporary, coords, Swizzle(meta.element));
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::TextureQueryDimensions(Operation operation) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
- const std::string temporary = AllocVectorTemporary();
- const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
-
- ASSERT(!meta.sampler.is_array);
-
- const std::string lod = operation.GetOperandsCount() > 0 ? Visit(operation[0]) : "0";
- AddLine("TXQ {}, {}, texture[{}], {};", temporary, lod, sampler_id, TextureType(meta));
- AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::TextureQueryLod(Operation operation) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
- const std::string temporary = AllocVectorTemporary();
- const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
-
- ASSERT(!meta.sampler.is_array);
-
- const std::size_t count = operation.GetOperandsCount();
- for (std::size_t i = 0; i < count; ++i) {
- AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i]));
- }
- AddLine("LOD.F {}, {}, texture[{}], {};", temporary, temporary, sampler_id, TextureType(meta));
- AddLine("MUL.F32 {}, {}, {{256, 256, 0, 0}};", temporary, temporary);
- AddLine("TRUNC.S {}, {};", temporary, temporary);
- AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::TexelFetch(Operation operation) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
- const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
- const auto [coords, temporary, swizzle] = BuildCoords(operation);
-
- if (!meta.sampler.is_buffer) {
- ASSERT(swizzle < 4);
- AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod));
- }
- AddLine("TXF.F {}, {}, texture[{}], {}{};", temporary, coords, sampler_id, TextureType(meta),
- BuildAoffi(operation));
- AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::TextureGradient(Operation operation) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
- const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
- const std::string ddx = AllocVectorTemporary();
- const std::string ddy = AllocVectorTemporary();
- const std::string coord = std::get<1>(BuildCoords(operation));
-
- const std::size_t num_components = meta.derivates.size() / 2;
- for (std::size_t index = 0; index < num_components; ++index) {
- const char swizzle = Swizzle(index);
- AddLine("MOV.F {}.{}, {};", ddx, swizzle, Visit(meta.derivates[index * 2]));
- AddLine("MOV.F {}.{}, {};", ddy, swizzle, Visit(meta.derivates[index * 2 + 1]));
- }
-
- const std::string_view result = coord;
- AddLine("TXD.F {}, {}, {}, {}, texture[{}], {}{};", result, coord, ddx, ddy, sampler_id,
- TextureType(meta), BuildAoffi(operation));
- AddLine("MOV.F {}.x, {}.{};", result, result, Swizzle(meta.element));
- return fmt::format("{}.x", result);
-}
-
-std::string ARBDecompiler::ImageLoad(Operation operation) {
- const auto& meta = std::get<MetaImage>(operation.GetMeta());
- const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index;
- const std::size_t count = operation.GetOperandsCount();
- const std::string_view type = ImageType(meta.image.type);
-
- const std::string temporary = AllocVectorTemporary();
- for (std::size_t i = 0; i < count; ++i) {
- AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i), Visit(operation[i]));
- }
- AddLine("LOADIM.F {}, {}, image[{}], {};", temporary, temporary, image_id, type);
- AddLine("MOV.F {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::ImageStore(Operation operation) {
- const auto& meta = std::get<MetaImage>(operation.GetMeta());
- const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index;
- const std::size_t num_coords = operation.GetOperandsCount();
- const std::size_t num_values = meta.values.size();
- const std::string_view type = ImageType(meta.image.type);
-
- const std::string coord = AllocVectorTemporary();
- const std::string value = AllocVectorTemporary();
- for (std::size_t i = 0; i < num_coords; ++i) {
- AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i]));
- }
- for (std::size_t i = 0; i < num_values; ++i) {
- AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i]));
- }
- AddLine("STOREIM.F image[{}], {}, {}, {};", image_id, value, coord, type);
- return {};
-}
-
-std::string ARBDecompiler::Branch(Operation operation) {
- const auto target = std::get<ImmediateNode>(*operation[0]);
- AddLine("MOV.U PC.x, {};", target.GetValue());
- AddLine("CONT;");
- return {};
-}
-
-std::string ARBDecompiler::BranchIndirect(Operation operation) {
- AddLine("MOV.U PC.x, {};", Visit(operation[0]));
- AddLine("CONT;");
- return {};
-}
-
-std::string ARBDecompiler::PushFlowStack(Operation operation) {
- const auto stack = std::get<MetaStackClass>(operation.GetMeta());
- const u32 target = std::get<ImmediateNode>(*operation[0]).GetValue();
- const std::string_view stack_name = StackName(stack);
- AddLine("MOV.U {}[{}_TOP.x].x, {};", stack_name, stack_name, target);
- AddLine("ADD.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name);
- return {};
-}
-
-std::string ARBDecompiler::PopFlowStack(Operation operation) {
- const auto stack = std::get<MetaStackClass>(operation.GetMeta());
- const std::string_view stack_name = StackName(stack);
- AddLine("SUB.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name);
- AddLine("MOV.U PC.x, {}[{}_TOP.x].x;", stack_name, stack_name);
- AddLine("CONT;");
- return {};
-}
-
-std::string ARBDecompiler::Exit(Operation) {
- Exit();
- return {};
-}
-
-std::string ARBDecompiler::Discard(Operation) {
- AddLine("KIL TR;");
- return {};
-}
-
-std::string ARBDecompiler::EmitVertex(Operation) {
- AddLine("EMIT;");
- return {};
-}
-
-std::string ARBDecompiler::EndPrimitive(Operation) {
- AddLine("ENDPRIM;");
- return {};
-}
-
-std::string ARBDecompiler::InvocationId(Operation) {
- return "primitive.invocation";
-}
-
-std::string ARBDecompiler::YNegate(Operation) {
- LOG_WARNING(Render_OpenGL, "(STUBBED)");
- std::string temporary = AllocTemporary();
- AddLine("MOV.F {}, 1;", temporary);
- return temporary;
-}
-
-std::string ARBDecompiler::ThreadId(Operation) {
- return fmt::format("{}.threadid", StageInputName(stage));
-}
-
-std::string ARBDecompiler::ShuffleIndexed(Operation operation) {
- if (!device.HasWarpIntrinsics()) {
- LOG_ERROR(Render_OpenGL,
- "NV_shader_thread_shuffle is missing. Kepler or better is required.");
- return Visit(operation[0]);
- }
- const std::string temporary = AllocVectorTemporary();
- AddLine("SHFIDX.U {}, {}, {}, {{31, 0, 0, 0}};", temporary, Visit(operation[0]),
- Visit(operation[1]));
- AddLine("MOV.U {}.x, {}.y;", temporary, temporary);
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::Barrier(Operation) {
- AddLine("BAR;");
- return {};
-}
-
-std::string ARBDecompiler::MemoryBarrierGroup(Operation) {
- AddLine("MEMBAR.CTA;");
- return {};
-}
-
-std::string ARBDecompiler::MemoryBarrierGlobal(Operation) {
- AddLine("MEMBAR;");
- return {};
-}
-
-} // Anonymous namespace
-
-std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
- const VideoCommon::Shader::Registry& registry,
- Tegra::Engines::ShaderType stage, std::string_view identifier) {
- return ARBDecompiler(device, ir, registry, stage, identifier).Code();
-}
-
-} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.h b/src/video_core/renderer_opengl/gl_arb_decompiler.h
deleted file mode 100644
index 6afc87220..000000000
--- a/src/video_core/renderer_opengl/gl_arb_decompiler.h
+++ /dev/null
@@ -1,29 +0,0 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <string>
-#include <string_view>
-
-#include "common/common_types.h"
-
-namespace Tegra::Engines {
-enum class ShaderType : u32;
-}
-
-namespace VideoCommon::Shader {
-class ShaderIR;
-class Registry;
-} // namespace VideoCommon::Shader
-
-namespace OpenGL {
-
-class Device;
-
-std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
- const VideoCommon::Shader::Registry& registry,
- Tegra::Engines::ShaderType stage, std::string_view identifier);
-
-} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index c225d1fc9..07a995f7d 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -2,14 +2,18 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <algorithm>
#include <span>
#include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_device.h"
+#include "video_core/renderer_opengl/maxwell_to_gl.h"
namespace OpenGL {
namespace {
+using VideoCore::Surface::PixelFormat;
+
struct BindlessSSBO {
GLuint64EXT address;
GLsizei length;
@@ -21,6 +25,25 @@ constexpr std::array PROGRAM_LUT{
GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
};
+
+[[nodiscard]] GLenum GetTextureBufferFormat(GLenum gl_format) {
+ switch (gl_format) {
+ case GL_RGBA8_SNORM:
+ return GL_RGBA8;
+ case GL_R8_SNORM:
+ return GL_R8;
+ case GL_RGBA16_SNORM:
+ return GL_RGBA16;
+ case GL_R16_SNORM:
+ return GL_R16;
+ case GL_RG16_SNORM:
+ return GL_RG16;
+ case GL_RG8_SNORM:
+ return GL_RG8;
+ default:
+ return gl_format;
+ }
+}
} // Anonymous namespace
Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
@@ -62,6 +85,30 @@ void Buffer::MakeResident(GLenum access) noexcept {
glMakeNamedBufferResidentNV(buffer.handle, access);
}
+GLuint Buffer::View(u32 offset, u32 size, PixelFormat format) {
+ const auto it{std::ranges::find_if(views, [offset, size, format](const BufferView& view) {
+ return offset == view.offset && size == view.size && format == view.format;
+ })};
+ if (it != views.end()) {
+ return it->texture.handle;
+ }
+ OGLTexture texture;
+ texture.Create(GL_TEXTURE_BUFFER);
+ const GLenum gl_format{MaxwellToGL::GetFormatTuple(format).internal_format};
+ const GLenum texture_format{GetTextureBufferFormat(gl_format)};
+ if (texture_format != gl_format) {
+ LOG_WARNING(Render_OpenGL, "Emulating SNORM texture buffer with UNORM.");
+ }
+ glTextureBufferRange(texture.handle, texture_format, buffer.handle, offset, size);
+ views.push_back({
+ .offset = offset,
+ .size = size,
+ .format = format,
+ .texture = std::move(texture),
+ });
+ return views.back().texture.handle;
+}
+
BufferCacheRuntime::BufferCacheRuntime(const Device& device_)
: device{device_}, has_fast_buffer_sub_data{device.HasFastBufferSubData()},
use_assembly_shaders{device.UseAssemblyShaders()},
@@ -98,6 +145,12 @@ void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
}
}
+void BufferCacheRuntime::ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value) {
+ glClearNamedBufferSubData(dest_buffer.Handle(), GL_R32UI, static_cast<GLintptr>(offset),
+ static_cast<GLsizeiptr>(size / sizeof(u32)), GL_RED, GL_UNSIGNED_INT,
+ &value);
+}
+
void BufferCacheRuntime::BindIndexBuffer(Buffer& buffer, u32 offset, u32 size) {
if (has_unified_vertex_buffers) {
buffer.MakeResident(GL_READ_ONLY);
@@ -138,7 +191,7 @@ void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buff
glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0,
static_cast<GLsizeiptr>(size));
} else {
- const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
+ const GLuint base_binding = graphics_base_uniform_bindings[stage];
const GLuint binding = base_binding + binding_index;
glBindBufferRange(GL_UNIFORM_BUFFER, binding, buffer.Handle(),
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
@@ -165,7 +218,12 @@ void BufferCacheRuntime::BindComputeUniformBuffer(u32 binding_index, Buffer& buf
void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer,
u32 offset, u32 size, bool is_written) {
- if (use_assembly_shaders) {
+ if (use_storage_buffers) {
+ const GLuint base_binding = graphics_base_storage_bindings[stage];
+ const GLuint binding = base_binding + binding_index;
+ glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(),
+ static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
+ } else {
const BindlessSSBO ssbo{
.address = buffer.HostGpuAddr() + offset,
.length = static_cast<GLsizei>(size),
@@ -174,17 +232,19 @@ void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buff
buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1,
reinterpret_cast<const GLuint*>(&ssbo));
- } else {
- const GLuint base_binding = device.GetBaseBindings(stage).shader_storage_buffer;
- const GLuint binding = base_binding + binding_index;
- glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(),
- static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
}
}
void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset,
u32 size, bool is_written) {
- if (use_assembly_shaders) {
+ if (use_storage_buffers) {
+ if (size != 0) {
+ glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, buffer.Handle(),
+ static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
+ } else {
+ glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, 0, 0, 0);
+ }
+ } else {
const BindlessSSBO ssbo{
.address = buffer.HostGpuAddr() + offset,
.length = static_cast<GLsizei>(size),
@@ -193,11 +253,6 @@ void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buf
buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
glProgramLocalParametersI4uivNV(GL_COMPUTE_PROGRAM_NV, binding_index, 1,
reinterpret_cast<const GLuint*>(&ssbo));
- } else if (size == 0) {
- glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, 0, 0, 0);
- } else {
- glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, buffer.Handle(),
- static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
}
}
@@ -207,4 +262,13 @@ void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, Buffer& buffer,
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
}
+void BufferCacheRuntime::BindTextureBuffer(Buffer& buffer, u32 offset, u32 size,
+ PixelFormat format) {
+ *texture_handles++ = buffer.View(offset, size, format);
+}
+
+void BufferCacheRuntime::BindImageBuffer(Buffer& buffer, u32 offset, u32 size, PixelFormat format) {
+ *image_handles++ = buffer.View(offset, size, format);
+}
+
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index d8b20a9af..060d36427 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -32,6 +32,8 @@ public:
void MakeResident(GLenum access) noexcept;
+ [[nodiscard]] GLuint View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format);
+
[[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept {
return address;
}
@@ -41,9 +43,17 @@ public:
}
private:
+ struct BufferView {
+ u32 offset;
+ u32 size;
+ VideoCore::Surface::PixelFormat format;
+ OGLTexture texture;
+ };
+
GLuint64EXT address = 0;
OGLBuffer buffer;
GLenum current_residency_access = GL_NONE;
+ std::vector<BufferView> views;
};
class BufferCacheRuntime {
@@ -57,6 +67,8 @@ public:
void CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
std::span<const VideoCommon::BufferCopy> copies);
+ void ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value);
+
void BindIndexBuffer(Buffer& buffer, u32 offset, u32 size);
void BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size, u32 stride);
@@ -73,17 +85,21 @@ public:
void BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset, u32 size);
+ void BindTextureBuffer(Buffer& buffer, u32 offset, u32 size,
+ VideoCore::Surface::PixelFormat format);
+
+ void BindImageBuffer(Buffer& buffer, u32 offset, u32 size,
+ VideoCore::Surface::PixelFormat format);
+
void BindFastUniformBuffer(size_t stage, u32 binding_index, u32 size) {
+ const GLuint handle = fast_uniforms[stage][binding_index].handle;
+ const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size);
if (use_assembly_shaders) {
- const GLuint handle = fast_uniforms[stage][binding_index].handle;
- const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size);
glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, gl_size);
} else {
- const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
+ const GLuint base_binding = graphics_base_uniform_bindings[stage];
const GLuint binding = base_binding + binding_index;
- glBindBufferRange(GL_UNIFORM_BUFFER, binding,
- fast_uniforms[stage][binding_index].handle, 0,
- static_cast<GLsizeiptr>(size));
+ glBindBufferRange(GL_UNIFORM_BUFFER, binding, handle, 0, gl_size);
}
}
@@ -101,7 +117,7 @@ public:
std::span<u8> BindMappedUniformBuffer(size_t stage, u32 binding_index, u32 size) noexcept {
const auto [mapped_span, offset] = stream_buffer->Request(static_cast<size_t>(size));
- const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
+ const GLuint base_binding = graphics_base_uniform_bindings[stage];
const GLuint binding = base_binding + binding_index;
glBindBufferRange(GL_UNIFORM_BUFFER, binding, stream_buffer->Handle(),
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
@@ -116,6 +132,27 @@ public:
return has_fast_buffer_sub_data;
}
+ [[nodiscard]] bool SupportsNonZeroUniformOffset() const noexcept {
+ return !use_assembly_shaders;
+ }
+
+ void SetBaseUniformBindings(const std::array<GLuint, 5>& bindings) {
+ graphics_base_uniform_bindings = bindings;
+ }
+
+ void SetBaseStorageBindings(const std::array<GLuint, 5>& bindings) {
+ graphics_base_storage_bindings = bindings;
+ }
+
+ void SetImagePointers(GLuint* texture_handles_, GLuint* image_handles_) {
+ texture_handles = texture_handles_;
+ image_handles = image_handles_;
+ }
+
+ void SetEnableStorageBuffers(bool use_storage_buffers_) {
+ use_storage_buffers = use_storage_buffers_;
+ }
+
private:
static constexpr std::array PABO_LUT{
GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
@@ -129,8 +166,15 @@ private:
bool use_assembly_shaders = false;
bool has_unified_vertex_buffers = false;
+ bool use_storage_buffers = false;
+
u32 max_attributes = 0;
+ std::array<GLuint, 5> graphics_base_uniform_bindings{};
+ std::array<GLuint, 5> graphics_base_storage_bindings{};
+ GLuint* texture_handles = nullptr;
+ GLuint* image_handles = nullptr;
+
std::optional<StreamBuffer> stream_buffer;
std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>,
@@ -154,6 +198,7 @@ struct BufferCacheParams {
static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true;
static constexpr bool NEEDS_BIND_STORAGE_INDEX = true;
static constexpr bool USE_MEMORY_MAPS = false;
+ static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true;
};
using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp
new file mode 100644
index 000000000..aa1cc592f
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp
@@ -0,0 +1,209 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <cstring>
+
+#include "common/cityhash.h"
+#include "common/settings.h" // for enum class Settings::ShaderBackend
+#include "video_core/renderer_opengl/gl_compute_pipeline.h"
+#include "video_core/renderer_opengl/gl_shader_manager.h"
+#include "video_core/renderer_opengl/gl_shader_util.h"
+
+namespace OpenGL {
+
+using Shader::ImageBufferDescriptor;
+using Tegra::Texture::TexturePair;
+using VideoCommon::ImageId;
+
+constexpr u32 MAX_TEXTURES = 64;
+constexpr u32 MAX_IMAGES = 16;
+
+template <typename Range>
+u32 AccumulateCount(const Range& range) {
+ u32 num{};
+ for (const auto& desc : range) {
+ num += desc.count;
+ }
+ return num;
+}
+
+size_t ComputePipelineKey::Hash() const noexcept {
+ return static_cast<size_t>(
+ Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this));
+}
+
+bool ComputePipelineKey::operator==(const ComputePipelineKey& rhs) const noexcept {
+ return std::memcmp(this, &rhs, sizeof *this) == 0;
+}
+
+ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cache_,
+ BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_,
+ Tegra::Engines::KeplerCompute& kepler_compute_,
+ ProgramManager& program_manager_, const Shader::Info& info_,
+ std::string code, std::vector<u32> code_v)
+ : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_},
+ kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_} {
+ switch (device.GetShaderBackend()) {
+ case Settings::ShaderBackend::GLSL:
+ source_program = CreateProgram(code, GL_COMPUTE_SHADER);
+ break;
+ case Settings::ShaderBackend::GLASM:
+ assembly_program = CompileProgram(code, GL_COMPUTE_PROGRAM_NV);
+ break;
+ case Settings::ShaderBackend::SPIRV:
+ source_program = CreateProgram(code_v, GL_COMPUTE_SHADER);
+ break;
+ }
+ std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(),
+ uniform_buffer_sizes.begin());
+
+ num_texture_buffers = AccumulateCount(info.texture_buffer_descriptors);
+ num_image_buffers = AccumulateCount(info.image_buffer_descriptors);
+
+ const u32 num_textures{num_texture_buffers + AccumulateCount(info.texture_descriptors)};
+ ASSERT(num_textures <= MAX_TEXTURES);
+
+ const u32 num_images{num_image_buffers + AccumulateCount(info.image_descriptors)};
+ ASSERT(num_images <= MAX_IMAGES);
+
+ const bool is_glasm{assembly_program.handle != 0};
+ const u32 num_storage_buffers{AccumulateCount(info.storage_buffers_descriptors)};
+ use_storage_buffers =
+ !is_glasm || num_storage_buffers < device.GetMaxGLASMStorageBufferBlocks();
+ writes_global_memory = !use_storage_buffers &&
+ std::ranges::any_of(info.storage_buffers_descriptors,
+ [](const auto& desc) { return desc.is_written; });
+}
+
+void ComputePipeline::Configure() {
+ buffer_cache.SetComputeUniformBufferState(info.constant_buffer_mask, &uniform_buffer_sizes);
+ buffer_cache.UnbindComputeStorageBuffers();
+ size_t ssbo_index{};
+ for (const auto& desc : info.storage_buffers_descriptors) {
+ ASSERT(desc.count == 1);
+ buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset,
+ desc.is_written);
+ ++ssbo_index;
+ }
+ texture_cache.SynchronizeComputeDescriptors();
+
+ std::array<ImageViewId, MAX_TEXTURES + MAX_IMAGES> image_view_ids;
+ boost::container::static_vector<u32, MAX_TEXTURES + MAX_IMAGES> image_view_indices;
+ std::array<GLuint, MAX_TEXTURES> samplers;
+ std::array<GLuint, MAX_TEXTURES> textures;
+ std::array<GLuint, MAX_IMAGES> images;
+ GLsizei sampler_binding{};
+ GLsizei texture_binding{};
+ GLsizei image_binding{};
+
+ const auto& qmd{kepler_compute.launch_description};
+ const auto& cbufs{qmd.const_buffer_config};
+ const bool via_header_index{qmd.linked_tsc != 0};
+ const auto read_handle{[&](const auto& desc, u32 index) {
+ ASSERT(((qmd.const_buffer_enable_mask >> desc.cbuf_index) & 1) != 0);
+ const u32 index_offset{index << desc.size_shift};
+ const u32 offset{desc.cbuf_offset + index_offset};
+ const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + offset};
+ if constexpr (std::is_same_v<decltype(desc), const Shader::TextureDescriptor&> ||
+ std::is_same_v<decltype(desc), const Shader::TextureBufferDescriptor&>) {
+ if (desc.has_secondary) {
+ ASSERT(((qmd.const_buffer_enable_mask >> desc.secondary_cbuf_index) & 1) != 0);
+ const u32 secondary_offset{desc.secondary_cbuf_offset + index_offset};
+ const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].Address() +
+ secondary_offset};
+ const u32 lhs_raw{gpu_memory.Read<u32>(addr)};
+ const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)};
+ return TexturePair(lhs_raw | rhs_raw, via_header_index);
+ }
+ }
+ return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
+ }};
+ const auto add_image{[&](const auto& desc) {
+ for (u32 index = 0; index < desc.count; ++index) {
+ const auto handle{read_handle(desc, index)};
+ image_view_indices.push_back(handle.first);
+ }
+ }};
+ for (const auto& desc : info.texture_buffer_descriptors) {
+ for (u32 index = 0; index < desc.count; ++index) {
+ const auto handle{read_handle(desc, index)};
+ image_view_indices.push_back(handle.first);
+ samplers[sampler_binding++] = 0;
+ }
+ }
+ std::ranges::for_each(info.image_buffer_descriptors, add_image);
+ for (const auto& desc : info.texture_descriptors) {
+ for (u32 index = 0; index < desc.count; ++index) {
+ const auto handle{read_handle(desc, index)};
+ image_view_indices.push_back(handle.first);
+
+ Sampler* const sampler = texture_cache.GetComputeSampler(handle.second);
+ samplers[sampler_binding++] = sampler->Handle();
+ }
+ }
+ std::ranges::for_each(info.image_descriptors, add_image);
+
+ const std::span indices_span(image_view_indices.data(), image_view_indices.size());
+ texture_cache.FillComputeImageViews(indices_span, image_view_ids);
+
+ if (assembly_program.handle != 0) {
+ program_manager.BindComputeAssemblyProgram(assembly_program.handle);
+ } else {
+ program_manager.BindComputeProgram(source_program.handle);
+ }
+ buffer_cache.UnbindComputeTextureBuffers();
+ size_t texbuf_index{};
+ const auto add_buffer{[&](const auto& desc) {
+ constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>;
+ for (u32 i = 0; i < desc.count; ++i) {
+ bool is_written{false};
+ if constexpr (is_image) {
+ is_written = desc.is_written;
+ }
+ ImageView& image_view{texture_cache.GetImageView(image_view_ids[texbuf_index])};
+ buffer_cache.BindComputeTextureBuffer(texbuf_index, image_view.GpuAddr(),
+ image_view.BufferSize(), image_view.format,
+ is_written, is_image);
+ ++texbuf_index;
+ }
+ }};
+ std::ranges::for_each(info.texture_buffer_descriptors, add_buffer);
+ std::ranges::for_each(info.image_buffer_descriptors, add_buffer);
+
+ buffer_cache.UpdateComputeBuffers();
+
+ buffer_cache.runtime.SetEnableStorageBuffers(use_storage_buffers);
+ buffer_cache.runtime.SetImagePointers(textures.data(), images.data());
+ buffer_cache.BindHostComputeBuffers();
+
+ const ImageId* views_it{image_view_ids.data() + num_texture_buffers + num_image_buffers};
+ texture_binding += num_texture_buffers;
+ image_binding += num_image_buffers;
+
+ for (const auto& desc : info.texture_descriptors) {
+ for (u32 index = 0; index < desc.count; ++index) {
+ ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
+ textures[texture_binding++] = image_view.Handle(desc.type);
+ }
+ }
+ for (const auto& desc : info.image_descriptors) {
+ for (u32 index = 0; index < desc.count; ++index) {
+ ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
+ if (desc.is_written) {
+ texture_cache.MarkModification(image_view.image_id);
+ }
+ images[image_binding++] = image_view.StorageView(desc.type, desc.format);
+ }
+ }
+ if (texture_binding != 0) {
+ ASSERT(texture_binding == sampler_binding);
+ glBindTextures(0, texture_binding, textures.data());
+ glBindSamplers(0, sampler_binding, samplers.data());
+ }
+ if (image_binding != 0) {
+ glBindImageTextures(0, image_binding, images.data());
+ }
+}
+
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.h b/src/video_core/renderer_opengl/gl_compute_pipeline.h
new file mode 100644
index 000000000..50c676365
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_compute_pipeline.h
@@ -0,0 +1,93 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <type_traits>
+#include <utility>
+
+#include "common/common_types.h"
+#include "shader_recompiler/shader_info.h"
+#include "video_core/renderer_opengl/gl_buffer_cache.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
+#include "video_core/renderer_opengl/gl_texture_cache.h"
+
+namespace Tegra {
+class MemoryManager;
+}
+
+namespace Tegra::Engines {
+class KeplerCompute;
+}
+
+namespace Shader {
+struct Info;
+}
+
+namespace OpenGL {
+
+class Device;
+class ProgramManager;
+
+struct ComputePipelineKey {
+ u64 unique_hash;
+ u32 shared_memory_size;
+ std::array<u32, 3> workgroup_size;
+
+ size_t Hash() const noexcept;
+
+ bool operator==(const ComputePipelineKey&) const noexcept;
+
+ bool operator!=(const ComputePipelineKey& rhs) const noexcept {
+ return !operator==(rhs);
+ }
+};
+static_assert(std::has_unique_object_representations_v<ComputePipelineKey>);
+static_assert(std::is_trivially_copyable_v<ComputePipelineKey>);
+static_assert(std::is_trivially_constructible_v<ComputePipelineKey>);
+
+class ComputePipeline {
+public:
+ explicit ComputePipeline(const Device& device, TextureCache& texture_cache_,
+ BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_,
+ Tegra::Engines::KeplerCompute& kepler_compute_,
+ ProgramManager& program_manager_, const Shader::Info& info_,
+ std::string code, std::vector<u32> code_v);
+
+ void Configure();
+
+ [[nodiscard]] bool WritesGlobalMemory() const noexcept {
+ return writes_global_memory;
+ }
+
+private:
+ TextureCache& texture_cache;
+ BufferCache& buffer_cache;
+ Tegra::MemoryManager& gpu_memory;
+ Tegra::Engines::KeplerCompute& kepler_compute;
+ ProgramManager& program_manager;
+
+ Shader::Info info;
+ OGLProgram source_program;
+ OGLAssemblyProgram assembly_program;
+ VideoCommon::ComputeUniformBufferSizes uniform_buffer_sizes{};
+
+ u32 num_texture_buffers{};
+ u32 num_image_buffers{};
+
+ bool use_storage_buffers{};
+ bool writes_global_memory{};
+};
+
+} // namespace OpenGL
+
+namespace std {
+template <>
+struct hash<OpenGL::ComputePipelineKey> {
+ size_t operator()(const OpenGL::ComputePipelineKey& k) const noexcept {
+ return k.Hash();
+ }
+};
+} // namespace std
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 3b00614e7..9692b8e94 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -17,39 +17,17 @@
#include "common/logging/log.h"
#include "common/scope_exit.h"
#include "common/settings.h"
+#include "shader_recompiler/stage.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace OpenGL {
namespace {
-// One uniform block is reserved for emulation purposes
-constexpr u32 ReservedUniformBlocks = 1;
-
-constexpr u32 NumStages = 5;
-
constexpr std::array LIMIT_UBOS = {
GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS,
GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS,
GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS,
};
-constexpr std::array LIMIT_SSBOS = {
- GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS,
- GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS,
- GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS,
-};
-constexpr std::array LIMIT_SAMPLERS = {
- GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS,
- GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS,
- GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS,
- GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS,
- GL_MAX_TEXTURE_IMAGE_UNITS,
- GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS,
-};
-constexpr std::array LIMIT_IMAGES = {
- GL_MAX_VERTEX_IMAGE_UNIFORMS, GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS,
- GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, GL_MAX_GEOMETRY_IMAGE_UNIFORMS,
- GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS,
-};
template <typename T>
T GetInteger(GLenum pname) {
@@ -82,81 +60,18 @@ bool HasExtension(std::span<const std::string_view> extensions, std::string_view
return std::ranges::find(extensions, extension) != extensions.end();
}
-u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) {
- ASSERT(num >= amount);
- if (limit) {
- amount = std::min(amount, GetInteger<u32>(*limit));
- }
- num -= amount;
- return std::exchange(base, base + amount);
-}
-
-std::array<u32, Tegra::Engines::MaxShaderTypes> BuildMaxUniformBuffers() noexcept {
- std::array<u32, Tegra::Engines::MaxShaderTypes> max;
- std::ranges::transform(LIMIT_UBOS, max.begin(),
- [](GLenum pname) { return GetInteger<u32>(pname); });
+std::array<u32, Shader::MaxStageTypes> BuildMaxUniformBuffers() noexcept {
+ std::array<u32, Shader::MaxStageTypes> max;
+ std::ranges::transform(LIMIT_UBOS, max.begin(), &GetInteger<u32>);
return max;
}
-std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindings() noexcept {
- std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> bindings;
-
- static constexpr std::array<std::size_t, 5> stage_swizzle{0, 1, 2, 3, 4};
- const u32 total_ubos = GetInteger<u32>(GL_MAX_UNIFORM_BUFFER_BINDINGS);
- const u32 total_ssbos = GetInteger<u32>(GL_MAX_SHADER_STORAGE_BUFFER_BINDINGS);
- const u32 total_samplers = GetInteger<u32>(GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS);
-
- u32 num_ubos = total_ubos - ReservedUniformBlocks;
- u32 num_ssbos = total_ssbos;
- u32 num_samplers = total_samplers;
-
- u32 base_ubo = ReservedUniformBlocks;
- u32 base_ssbo = 0;
- u32 base_samplers = 0;
-
- for (std::size_t i = 0; i < NumStages; ++i) {
- const std::size_t stage = stage_swizzle[i];
- bindings[stage] = {
- Extract(base_ubo, num_ubos, total_ubos / NumStages, LIMIT_UBOS[stage]),
- Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LIMIT_SSBOS[stage]),
- Extract(base_samplers, num_samplers, total_samplers / NumStages,
- LIMIT_SAMPLERS[stage])};
- }
-
- u32 num_images = GetInteger<u32>(GL_MAX_IMAGE_UNITS);
- u32 base_images = 0;
-
- // GL_MAX_IMAGE_UNITS is guaranteed by the spec to have a minimum value of 8.
- // Due to the limitation of GL_MAX_IMAGE_UNITS, reserve at least 4 image bindings on the
- // fragment stage, and at least 1 for the rest of the stages.
- // So far games are observed to use 1 image binding on vertex and 4 on fragment stages.
-
- // Reserve at least 4 image bindings on the fragment stage.
- bindings[4].image =
- Extract(base_images, num_images, std::max(4U, num_images / NumStages), LIMIT_IMAGES[4]);
-
- // This is guaranteed to be at least 1.
- const u32 total_extracted_images = num_images / (NumStages - 1);
-
- // Reserve the other image bindings.
- for (std::size_t i = 0; i < NumStages; ++i) {
- const std::size_t stage = stage_swizzle[i];
- if (stage == 4) {
- continue;
- }
- bindings[stage].image =
- Extract(base_images, num_images, total_extracted_images, LIMIT_IMAGES[stage]);
- }
-
- // Compute doesn't care about any of this.
- bindings[5] = {0, 0, 0, 0};
-
- return bindings;
-}
-
bool IsASTCSupported() {
- static constexpr std::array targets = {GL_TEXTURE_2D, GL_TEXTURE_2D_ARRAY};
- static constexpr std::array formats = {
+ static constexpr std::array targets{
+ GL_TEXTURE_2D,
+ GL_TEXTURE_2D_ARRAY,
+ };
+ static constexpr std::array formats{
GL_COMPRESSED_RGBA_ASTC_4x4_KHR, GL_COMPRESSED_RGBA_ASTC_5x4_KHR,
GL_COMPRESSED_RGBA_ASTC_5x5_KHR, GL_COMPRESSED_RGBA_ASTC_6x5_KHR,
GL_COMPRESSED_RGBA_ASTC_6x6_KHR, GL_COMPRESSED_RGBA_ASTC_8x5_KHR,
@@ -172,11 +87,10 @@ bool IsASTCSupported() {
GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR,
GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR,
};
- static constexpr std::array required_support = {
+ static constexpr std::array required_support{
GL_VERTEX_TEXTURE, GL_TESS_CONTROL_TEXTURE, GL_TESS_EVALUATION_TEXTURE,
GL_GEOMETRY_TEXTURE, GL_FRAGMENT_TEXTURE, GL_COMPUTE_TEXTURE,
};
-
for (const GLenum target : targets) {
for (const GLenum format : formats) {
for (const GLenum support : required_support) {
@@ -223,14 +137,13 @@ Device::Device() {
"Beta driver 443.24 is known to have issues. There might be performance issues.");
disable_fast_buffer_sub_data = true;
}
-
max_uniform_buffers = BuildMaxUniformBuffers();
- base_bindings = BuildBaseBindings();
uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
max_compute_shared_memory_size = GetInteger<u32>(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE);
+ max_glasm_storage_buffer_blocks = GetInteger<u32>(GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS);
has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group &&
GLAD_GL_NV_shader_thread_shuffle;
has_shader_ballot = GLAD_GL_ARB_shader_ballot;
@@ -243,18 +156,30 @@ Device::Device() {
has_precise_bug = TestPreciseBug();
has_broken_texture_view_formats = is_amd || (!is_linux && is_intel);
has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2;
+ has_derivative_control = GLAD_GL_ARB_derivative_control;
has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory;
has_debugging_tool_attached = IsDebugToolAttached(extensions);
has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float");
+ has_geometry_shader_passthrough = GLAD_GL_NV_geometry_shader_passthrough;
+ has_nv_gpu_shader_5 = GLAD_GL_NV_gpu_shader5;
+ has_shader_int64 = HasExtension(extensions, "GL_ARB_gpu_shader_int64");
+ has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float;
+ has_sparse_texture_2 = GLAD_GL_ARB_sparse_texture2;
+ warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel;
+ need_fastmath_off = is_nvidia;
// At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive
// uniform buffers as "push constants"
has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data;
- use_assembly_shaders = Settings::values.use_assembly_shaders.GetValue() &&
+ shader_backend = Settings::values.shader_backend.GetValue();
+ use_assembly_shaders = shader_backend == Settings::ShaderBackend::GLASM &&
GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 &&
GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2;
-
+ if (shader_backend == Settings::ShaderBackend::GLASM && !use_assembly_shaders) {
+ LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported");
+ shader_backend = Settings::ShaderBackend::GLSL;
+ }
// Blocks AMD and Intel OpenGL drivers on Windows from using asynchronous shader compilation.
use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue() &&
!(is_amd || (is_intel && !is_linux));
@@ -265,11 +190,6 @@ Device::Device() {
LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug);
LOG_INFO(Render_OpenGL, "Renderer_BrokenTextureViewFormats: {}",
has_broken_texture_view_formats);
-
- if (Settings::values.use_assembly_shaders.GetValue() && !use_assembly_shaders) {
- LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported");
- }
-
if (Settings::values.use_asynchronous_shaders.GetValue() && !use_asynchronous_shaders) {
LOG_WARNING(Render_OpenGL, "Asynchronous shader compilation enabled but not supported");
}
@@ -325,22 +245,6 @@ std::string Device::GetVendorName() const {
return vendor_name;
}
-Device::Device(std::nullptr_t) {
- max_uniform_buffers.fill(std::numeric_limits<u32>::max());
- uniform_buffer_alignment = 4;
- shader_storage_alignment = 4;
- max_vertex_attributes = 16;
- max_varyings = 15;
- max_compute_shared_memory_size = 0x10000;
- has_warp_intrinsics = true;
- has_shader_ballot = true;
- has_vertex_viewport_layer = true;
- has_image_load_formatted = true;
- has_texture_shadow_lod = true;
- has_variable_aoffi = true;
- has_depth_buffer_float = true;
-}
-
bool Device::TestVariableAoffi() {
return TestProgram(R"(#version 430 core
// This is a unit test, please ignore me on apitrace bug reports.
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index 2c2b13767..ee992aed4 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -6,34 +6,22 @@
#include <cstddef>
#include "common/common_types.h"
-#include "video_core/engines/shader_type.h"
+#include "shader_recompiler/stage.h"
+
+namespace Settings {
+enum class ShaderBackend : u32;
+};
namespace OpenGL {
class Device {
public:
- struct BaseBindings {
- u32 uniform_buffer{};
- u32 shader_storage_buffer{};
- u32 sampler{};
- u32 image{};
- };
-
explicit Device();
- explicit Device(std::nullptr_t);
[[nodiscard]] std::string GetVendorName() const;
- u32 GetMaxUniformBuffers(Tegra::Engines::ShaderType shader_type) const noexcept {
- return max_uniform_buffers[static_cast<std::size_t>(shader_type)];
- }
-
- const BaseBindings& GetBaseBindings(std::size_t stage_index) const noexcept {
- return base_bindings[stage_index];
- }
-
- const BaseBindings& GetBaseBindings(Tegra::Engines::ShaderType shader_type) const noexcept {
- return GetBaseBindings(static_cast<std::size_t>(shader_type));
+ u32 GetMaxUniformBuffers(Shader::Stage stage) const noexcept {
+ return max_uniform_buffers[static_cast<size_t>(stage)];
}
size_t GetUniformBufferAlignment() const {
@@ -56,6 +44,10 @@ public:
return max_compute_shared_memory_size;
}
+ u32 GetMaxGLASMStorageBufferBlocks() const {
+ return max_glasm_storage_buffer_blocks;
+ }
+
bool HasWarpIntrinsics() const {
return has_warp_intrinsics;
}
@@ -108,6 +100,10 @@ public:
return has_nv_viewport_array2;
}
+ bool HasDerivativeControl() const {
+ return has_derivative_control;
+ }
+
bool HasDebuggingToolAttached() const {
return has_debugging_tool_attached;
}
@@ -128,18 +124,52 @@ public:
return has_depth_buffer_float;
}
+ bool HasGeometryShaderPassthrough() const {
+ return has_geometry_shader_passthrough;
+ }
+
+ bool HasNvGpuShader5() const {
+ return has_nv_gpu_shader_5;
+ }
+
+ bool HasShaderInt64() const {
+ return has_shader_int64;
+ }
+
+ bool HasAmdShaderHalfFloat() const {
+ return has_amd_shader_half_float;
+ }
+
+ bool HasSparseTexture2() const {
+ return has_sparse_texture_2;
+ }
+
+ bool IsWarpSizePotentiallyLargerThanGuest() const {
+ return warp_size_potentially_larger_than_guest;
+ }
+
+ bool NeedsFastmathOff() const {
+ return need_fastmath_off;
+ }
+
+ Settings::ShaderBackend GetShaderBackend() const {
+ return shader_backend;
+ }
+
private:
static bool TestVariableAoffi();
static bool TestPreciseBug();
- std::string vendor_name;
- std::array<u32, Tegra::Engines::MaxShaderTypes> max_uniform_buffers{};
- std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings{};
+ std::array<u32, Shader::MaxStageTypes> max_uniform_buffers{};
size_t uniform_buffer_alignment{};
size_t shader_storage_alignment{};
u32 max_vertex_attributes{};
u32 max_varyings{};
u32 max_compute_shared_memory_size{};
+ u32 max_glasm_storage_buffer_blocks{};
+
+ Settings::ShaderBackend shader_backend{};
+
bool has_warp_intrinsics{};
bool has_shader_ballot{};
bool has_vertex_viewport_layer{};
@@ -153,11 +183,21 @@ private:
bool has_broken_texture_view_formats{};
bool has_fast_buffer_sub_data{};
bool has_nv_viewport_array2{};
+ bool has_derivative_control{};
bool has_debugging_tool_attached{};
bool use_assembly_shaders{};
bool use_asynchronous_shaders{};
bool use_driver_cache{};
bool has_depth_buffer_float{};
+ bool has_geometry_shader_passthrough{};
+ bool has_nv_gpu_shader_5{};
+ bool has_shader_int64{};
+ bool has_amd_shader_half_float{};
+ bool has_sparse_texture_2{};
+ bool warp_size_potentially_larger_than_guest{};
+ bool need_fastmath_off{};
+
+ std::string vendor_name;
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
new file mode 100644
index 000000000..fac0034fb
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
@@ -0,0 +1,572 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <array>
+#include <string>
+#include <vector>
+
+#include "common/settings.h" // for enum class Settings::ShaderBackend
+#include "common/thread_worker.h"
+#include "shader_recompiler/shader_info.h"
+#include "video_core/renderer_opengl/gl_graphics_pipeline.h"
+#include "video_core/renderer_opengl/gl_shader_manager.h"
+#include "video_core/renderer_opengl/gl_shader_util.h"
+#include "video_core/renderer_opengl/gl_state_tracker.h"
+#include "video_core/shader_notify.h"
+#include "video_core/texture_cache/texture_cache.h"
+
+#if defined(_MSC_VER) && defined(NDEBUG)
+#define LAMBDA_FORCEINLINE [[msvc::forceinline]]
+#else
+#define LAMBDA_FORCEINLINE
+#endif
+
+namespace OpenGL {
+namespace {
+using Shader::ImageBufferDescriptor;
+using Shader::ImageDescriptor;
+using Shader::TextureBufferDescriptor;
+using Shader::TextureDescriptor;
+using Tegra::Texture::TexturePair;
+using VideoCommon::ImageId;
+
+constexpr u32 MAX_TEXTURES = 64;
+constexpr u32 MAX_IMAGES = 8;
+
+template <typename Range>
+u32 AccumulateCount(const Range& range) {
+ u32 num{};
+ for (const auto& desc : range) {
+ num += desc.count;
+ }
+ return num;
+}
+
+GLenum Stage(size_t stage_index) {
+ switch (stage_index) {
+ case 0:
+ return GL_VERTEX_SHADER;
+ case 1:
+ return GL_TESS_CONTROL_SHADER;
+ case 2:
+ return GL_TESS_EVALUATION_SHADER;
+ case 3:
+ return GL_GEOMETRY_SHADER;
+ case 4:
+ return GL_FRAGMENT_SHADER;
+ }
+ UNREACHABLE_MSG("{}", stage_index);
+ return GL_NONE;
+}
+
+GLenum AssemblyStage(size_t stage_index) {
+ switch (stage_index) {
+ case 0:
+ return GL_VERTEX_PROGRAM_NV;
+ case 1:
+ return GL_TESS_CONTROL_PROGRAM_NV;
+ case 2:
+ return GL_TESS_EVALUATION_PROGRAM_NV;
+ case 3:
+ return GL_GEOMETRY_PROGRAM_NV;
+ case 4:
+ return GL_FRAGMENT_PROGRAM_NV;
+ }
+ UNREACHABLE_MSG("{}", stage_index);
+ return GL_NONE;
+}
+
+/// Translates hardware transform feedback indices
+/// @param location Hardware location
+/// @return Pair of ARB_transform_feedback3 token stream first and third arguments
+/// @note Read https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_transform_feedback3.txt
+std::pair<GLint, GLint> TransformFeedbackEnum(u8 location) {
+ const u8 index = location / 4;
+ if (index >= 8 && index <= 39) {
+ return {GL_GENERIC_ATTRIB_NV, index - 8};
+ }
+ if (index >= 48 && index <= 55) {
+ return {GL_TEXTURE_COORD_NV, index - 48};
+ }
+ switch (index) {
+ case 7:
+ return {GL_POSITION, 0};
+ case 40:
+ return {GL_PRIMARY_COLOR_NV, 0};
+ case 41:
+ return {GL_SECONDARY_COLOR_NV, 0};
+ case 42:
+ return {GL_BACK_PRIMARY_COLOR_NV, 0};
+ case 43:
+ return {GL_BACK_SECONDARY_COLOR_NV, 0};
+ }
+ UNIMPLEMENTED_MSG("index={}", index);
+ return {GL_POSITION, 0};
+}
+
+template <typename Spec>
+bool Passes(const std::array<Shader::Info, 5>& stage_infos, u32 enabled_mask) {
+ for (size_t stage = 0; stage < stage_infos.size(); ++stage) {
+ if (!Spec::enabled_stages[stage] && ((enabled_mask >> stage) & 1) != 0) {
+ return false;
+ }
+ const auto& info{stage_infos[stage]};
+ if constexpr (!Spec::has_storage_buffers) {
+ if (!info.storage_buffers_descriptors.empty()) {
+ return false;
+ }
+ }
+ if constexpr (!Spec::has_texture_buffers) {
+ if (!info.texture_buffer_descriptors.empty()) {
+ return false;
+ }
+ }
+ if constexpr (!Spec::has_image_buffers) {
+ if (!info.image_buffer_descriptors.empty()) {
+ return false;
+ }
+ }
+ if constexpr (!Spec::has_images) {
+ if (!info.image_descriptors.empty()) {
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+using ConfigureFuncPtr = void (*)(GraphicsPipeline*, bool);
+
+template <typename Spec, typename... Specs>
+ConfigureFuncPtr FindSpec(const std::array<Shader::Info, 5>& stage_infos, u32 enabled_mask) {
+ if constexpr (sizeof...(Specs) > 0) {
+ if (!Passes<Spec>(stage_infos, enabled_mask)) {
+ return FindSpec<Specs...>(stage_infos, enabled_mask);
+ }
+ }
+ return GraphicsPipeline::MakeConfigureSpecFunc<Spec>();
+}
+
+struct SimpleVertexFragmentSpec {
+ static constexpr std::array<bool, 5> enabled_stages{true, false, false, false, true};
+ static constexpr bool has_storage_buffers = false;
+ static constexpr bool has_texture_buffers = false;
+ static constexpr bool has_image_buffers = false;
+ static constexpr bool has_images = false;
+};
+
+struct SimpleVertexSpec {
+ static constexpr std::array<bool, 5> enabled_stages{true, false, false, false, false};
+ static constexpr bool has_storage_buffers = false;
+ static constexpr bool has_texture_buffers = false;
+ static constexpr bool has_image_buffers = false;
+ static constexpr bool has_images = false;
+};
+
+struct DefaultSpec {
+ static constexpr std::array<bool, 5> enabled_stages{true, true, true, true, true};
+ static constexpr bool has_storage_buffers = true;
+ static constexpr bool has_texture_buffers = true;
+ static constexpr bool has_image_buffers = true;
+ static constexpr bool has_images = true;
+};
+
+ConfigureFuncPtr ConfigureFunc(const std::array<Shader::Info, 5>& infos, u32 enabled_mask) {
+ return FindSpec<SimpleVertexSpec, SimpleVertexFragmentSpec, DefaultSpec>(infos, enabled_mask);
+}
+} // Anonymous namespace
+
+GraphicsPipeline::GraphicsPipeline(
+ const Device& device, TextureCache& texture_cache_, BufferCache& buffer_cache_,
+ Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_,
+ ProgramManager& program_manager_, StateTracker& state_tracker_, ShaderWorker* thread_worker,
+ VideoCore::ShaderNotify* shader_notify, std::array<std::string, 5> sources,
+ std::array<std::vector<u32>, 5> sources_spirv, const std::array<const Shader::Info*, 5>& infos,
+ const GraphicsPipelineKey& key_)
+ : texture_cache{texture_cache_}, buffer_cache{buffer_cache_},
+ gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, program_manager{program_manager_},
+ state_tracker{state_tracker_}, key{key_} {
+ if (shader_notify) {
+ shader_notify->MarkShaderBuilding();
+ }
+ u32 num_textures{};
+ u32 num_images{};
+ u32 num_storage_buffers{};
+ for (size_t stage = 0; stage < base_uniform_bindings.size(); ++stage) {
+ auto& info{stage_infos[stage]};
+ if (infos[stage]) {
+ info = *infos[stage];
+ enabled_stages_mask |= 1u << stage;
+ }
+ if (stage < 4) {
+ base_uniform_bindings[stage + 1] = base_uniform_bindings[stage];
+ base_storage_bindings[stage + 1] = base_storage_bindings[stage];
+
+ base_uniform_bindings[stage + 1] += AccumulateCount(info.constant_buffer_descriptors);
+ base_storage_bindings[stage + 1] += AccumulateCount(info.storage_buffers_descriptors);
+ }
+ enabled_uniform_buffer_masks[stage] = info.constant_buffer_mask;
+ std::ranges::copy(info.constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin());
+
+ const u32 num_tex_buffer_bindings{AccumulateCount(info.texture_buffer_descriptors)};
+ num_texture_buffers[stage] += num_tex_buffer_bindings;
+ num_textures += num_tex_buffer_bindings;
+
+ const u32 num_img_buffers_bindings{AccumulateCount(info.image_buffer_descriptors)};
+ num_image_buffers[stage] += num_img_buffers_bindings;
+ num_images += num_img_buffers_bindings;
+
+ num_textures += AccumulateCount(info.texture_descriptors);
+ num_images += AccumulateCount(info.image_descriptors);
+ num_storage_buffers += AccumulateCount(info.storage_buffers_descriptors);
+
+ writes_global_memory |= std::ranges::any_of(
+ info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; });
+ }
+ ASSERT(num_textures <= MAX_TEXTURES);
+ ASSERT(num_images <= MAX_IMAGES);
+
+ const bool assembly_shaders{assembly_programs[0].handle != 0};
+ use_storage_buffers =
+ !assembly_shaders || num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks();
+ writes_global_memory &= !use_storage_buffers;
+ configure_func = ConfigureFunc(stage_infos, enabled_stages_mask);
+
+ if (key.xfb_enabled && device.UseAssemblyShaders()) {
+ GenerateTransformFeedbackState();
+ }
+ const bool in_parallel = thread_worker != nullptr;
+ const auto backend = device.GetShaderBackend();
+ auto func{[this, sources = std::move(sources), sources_spirv = std::move(sources_spirv),
+ shader_notify, backend, in_parallel](ShaderContext::Context*) mutable {
+ for (size_t stage = 0; stage < 5; ++stage) {
+ switch (backend) {
+ case Settings::ShaderBackend::GLSL:
+ if (!sources[stage].empty()) {
+ source_programs[stage] = CreateProgram(sources[stage], Stage(stage));
+ }
+ break;
+ case Settings::ShaderBackend::GLASM:
+ if (!sources[stage].empty()) {
+ assembly_programs[stage] = CompileProgram(sources[stage], AssemblyStage(stage));
+ if (in_parallel) {
+ // Make sure program is built before continuing when building in parallel
+ glGetString(GL_PROGRAM_ERROR_STRING_NV);
+ }
+ }
+ break;
+ case Settings::ShaderBackend::SPIRV:
+ if (!sources_spirv[stage].empty()) {
+ source_programs[stage] = CreateProgram(sources_spirv[stage], Stage(stage));
+ }
+ break;
+ }
+ }
+ if (in_parallel && backend != Settings::ShaderBackend::GLASM) {
+ // Make sure programs have built if we are building shaders in parallel
+ for (OGLProgram& program : source_programs) {
+ if (program.handle != 0) {
+ GLint status{};
+ glGetProgramiv(program.handle, GL_LINK_STATUS, &status);
+ }
+ }
+ }
+ if (shader_notify) {
+ shader_notify->MarkShaderComplete();
+ }
+ is_built = true;
+ built_condvar.notify_one();
+ }};
+ if (thread_worker) {
+ thread_worker->QueueWork(std::move(func));
+ } else {
+ func(nullptr);
+ }
+}
+
+template <typename Spec>
+void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
+ std::array<ImageId, MAX_TEXTURES + MAX_IMAGES> image_view_ids;
+ std::array<u32, MAX_TEXTURES + MAX_IMAGES> image_view_indices;
+ std::array<GLuint, MAX_TEXTURES> samplers;
+ size_t image_view_index{};
+ GLsizei sampler_binding{};
+
+ texture_cache.SynchronizeGraphicsDescriptors();
+
+ buffer_cache.SetUniformBuffersState(enabled_uniform_buffer_masks, &uniform_buffer_sizes);
+ buffer_cache.runtime.SetBaseUniformBindings(base_uniform_bindings);
+ buffer_cache.runtime.SetBaseStorageBindings(base_storage_bindings);
+ buffer_cache.runtime.SetEnableStorageBuffers(use_storage_buffers);
+
+ const auto& regs{maxwell3d.regs};
+ const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex};
+ const auto config_stage{[&](size_t stage) LAMBDA_FORCEINLINE {
+ const Shader::Info& info{stage_infos[stage]};
+ buffer_cache.UnbindGraphicsStorageBuffers(stage);
+ if constexpr (Spec::has_storage_buffers) {
+ size_t ssbo_index{};
+ for (const auto& desc : info.storage_buffers_descriptors) {
+ ASSERT(desc.count == 1);
+ buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, desc.cbuf_index,
+ desc.cbuf_offset, desc.is_written);
+ ++ssbo_index;
+ }
+ }
+ const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers};
+ const auto read_handle{[&](const auto& desc, u32 index) {
+ ASSERT(cbufs[desc.cbuf_index].enabled);
+ const u32 index_offset{index << desc.size_shift};
+ const u32 offset{desc.cbuf_offset + index_offset};
+ const GPUVAddr addr{cbufs[desc.cbuf_index].address + offset};
+ if constexpr (std::is_same_v<decltype(desc), const TextureDescriptor&> ||
+ std::is_same_v<decltype(desc), const TextureBufferDescriptor&>) {
+ if (desc.has_secondary) {
+ ASSERT(cbufs[desc.secondary_cbuf_index].enabled);
+ const u32 second_offset{desc.secondary_cbuf_offset + index_offset};
+ const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].address +
+ second_offset};
+ const u32 lhs_raw{gpu_memory.Read<u32>(addr)};
+ const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)};
+ const u32 raw{lhs_raw | rhs_raw};
+ return TexturePair(raw, via_header_index);
+ }
+ }
+ return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
+ }};
+ const auto add_image{[&](const auto& desc) {
+ for (u32 index = 0; index < desc.count; ++index) {
+ const auto handle{read_handle(desc, index)};
+ image_view_indices[image_view_index++] = handle.first;
+ }
+ }};
+ if constexpr (Spec::has_texture_buffers) {
+ for (const auto& desc : info.texture_buffer_descriptors) {
+ for (u32 index = 0; index < desc.count; ++index) {
+ const auto handle{read_handle(desc, index)};
+ image_view_indices[image_view_index++] = handle.first;
+ samplers[sampler_binding++] = 0;
+ }
+ }
+ }
+ if constexpr (Spec::has_image_buffers) {
+ for (const auto& desc : info.image_buffer_descriptors) {
+ add_image(desc);
+ }
+ }
+ for (const auto& desc : info.texture_descriptors) {
+ for (u32 index = 0; index < desc.count; ++index) {
+ const auto handle{read_handle(desc, index)};
+ image_view_indices[image_view_index++] = handle.first;
+
+ Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)};
+ samplers[sampler_binding++] = sampler->Handle();
+ }
+ }
+ if constexpr (Spec::has_images) {
+ for (const auto& desc : info.image_descriptors) {
+ add_image(desc);
+ }
+ }
+ }};
+ if constexpr (Spec::enabled_stages[0]) {
+ config_stage(0);
+ }
+ if constexpr (Spec::enabled_stages[1]) {
+ config_stage(1);
+ }
+ if constexpr (Spec::enabled_stages[2]) {
+ config_stage(2);
+ }
+ if constexpr (Spec::enabled_stages[3]) {
+ config_stage(3);
+ }
+ if constexpr (Spec::enabled_stages[4]) {
+ config_stage(4);
+ }
+ const std::span indices_span(image_view_indices.data(), image_view_index);
+ texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
+
+ texture_cache.UpdateRenderTargets(false);
+ state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
+
+ ImageId* texture_buffer_index{image_view_ids.data()};
+ const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE {
+ size_t index{};
+ const auto add_buffer{[&](const auto& desc) {
+ constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>;
+ for (u32 i = 0; i < desc.count; ++i) {
+ bool is_written{false};
+ if constexpr (is_image) {
+ is_written = desc.is_written;
+ }
+ ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)};
+ buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(),
+ image_view.BufferSize(), image_view.format,
+ is_written, is_image);
+ ++index;
+ ++texture_buffer_index;
+ }
+ }};
+ const Shader::Info& info{stage_infos[stage]};
+ buffer_cache.UnbindGraphicsTextureBuffers(stage);
+
+ if constexpr (Spec::has_texture_buffers) {
+ for (const auto& desc : info.texture_buffer_descriptors) {
+ add_buffer(desc);
+ }
+ }
+ if constexpr (Spec::has_image_buffers) {
+ for (const auto& desc : info.image_buffer_descriptors) {
+ add_buffer(desc);
+ }
+ }
+ for (const auto& desc : info.texture_descriptors) {
+ texture_buffer_index += desc.count;
+ }
+ if constexpr (Spec::has_images) {
+ for (const auto& desc : info.image_descriptors) {
+ texture_buffer_index += desc.count;
+ }
+ }
+ }};
+ if constexpr (Spec::enabled_stages[0]) {
+ bind_stage_info(0);
+ }
+ if constexpr (Spec::enabled_stages[1]) {
+ bind_stage_info(1);
+ }
+ if constexpr (Spec::enabled_stages[2]) {
+ bind_stage_info(2);
+ }
+ if constexpr (Spec::enabled_stages[3]) {
+ bind_stage_info(3);
+ }
+ if constexpr (Spec::enabled_stages[4]) {
+ bind_stage_info(4);
+ }
+ buffer_cache.UpdateGraphicsBuffers(is_indexed);
+ buffer_cache.BindHostGeometryBuffers(is_indexed);
+
+ if (!is_built.load(std::memory_order::relaxed)) {
+ WaitForBuild();
+ }
+ if (assembly_programs[0].handle != 0) {
+ program_manager.BindAssemblyPrograms(assembly_programs, enabled_stages_mask);
+ } else {
+ program_manager.BindSourcePrograms(source_programs);
+ }
+ const ImageId* views_it{image_view_ids.data()};
+ GLsizei texture_binding = 0;
+ GLsizei image_binding = 0;
+ std::array<GLuint, MAX_TEXTURES> textures;
+ std::array<GLuint, MAX_IMAGES> images;
+ const auto prepare_stage{[&](size_t stage) {
+ buffer_cache.runtime.SetImagePointers(&textures[texture_binding], &images[image_binding]);
+ buffer_cache.BindHostStageBuffers(stage);
+
+ texture_binding += num_texture_buffers[stage];
+ image_binding += num_image_buffers[stage];
+
+ views_it += num_texture_buffers[stage];
+ views_it += num_image_buffers[stage];
+
+ const auto& info{stage_infos[stage]};
+ for (const auto& desc : info.texture_descriptors) {
+ for (u32 index = 0; index < desc.count; ++index) {
+ ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
+ textures[texture_binding++] = image_view.Handle(desc.type);
+ }
+ }
+ for (const auto& desc : info.image_descriptors) {
+ for (u32 index = 0; index < desc.count; ++index) {
+ ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
+ if (desc.is_written) {
+ texture_cache.MarkModification(image_view.image_id);
+ }
+ images[image_binding++] = image_view.StorageView(desc.type, desc.format);
+ }
+ }
+ }};
+ if constexpr (Spec::enabled_stages[0]) {
+ prepare_stage(0);
+ }
+ if constexpr (Spec::enabled_stages[1]) {
+ prepare_stage(1);
+ }
+ if constexpr (Spec::enabled_stages[2]) {
+ prepare_stage(2);
+ }
+ if constexpr (Spec::enabled_stages[3]) {
+ prepare_stage(3);
+ }
+ if constexpr (Spec::enabled_stages[4]) {
+ prepare_stage(4);
+ }
+ if (texture_binding != 0) {
+ ASSERT(texture_binding == sampler_binding);
+ glBindTextures(0, texture_binding, textures.data());
+ glBindSamplers(0, sampler_binding, samplers.data());
+ }
+ if (image_binding != 0) {
+ glBindImageTextures(0, image_binding, images.data());
+ }
+}
+
+void GraphicsPipeline::ConfigureTransformFeedbackImpl() const {
+ glTransformFeedbackStreamAttribsNV(num_xfb_attribs, xfb_attribs.data(), num_xfb_strides,
+ xfb_streams.data(), GL_INTERLEAVED_ATTRIBS);
+}
+
+void GraphicsPipeline::GenerateTransformFeedbackState() {
+ // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal
+ // when this is required.
+ GLint* cursor{xfb_attribs.data()};
+ GLint* current_stream{xfb_streams.data()};
+
+ for (size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) {
+ const auto& layout = key.xfb_state.layouts[feedback];
+ UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding");
+ if (layout.varying_count == 0) {
+ continue;
+ }
+ *current_stream = static_cast<GLint>(feedback);
+ if (current_stream != xfb_streams.data()) {
+ // When stepping one stream, push the expected token
+ cursor[0] = GL_NEXT_BUFFER_NV;
+ cursor[1] = 0;
+ cursor[2] = 0;
+ cursor += XFB_ENTRY_STRIDE;
+ }
+ ++current_stream;
+
+ const auto& locations = key.xfb_state.varyings[feedback];
+ std::optional<u8> current_index;
+ for (u32 offset = 0; offset < layout.varying_count; ++offset) {
+ const u8 location = locations[offset];
+ const u8 index = location / 4;
+
+ if (current_index == index) {
+ // Increase number of components of the previous attachment
+ ++cursor[-2];
+ continue;
+ }
+ current_index = index;
+
+ std::tie(cursor[0], cursor[2]) = TransformFeedbackEnum(location);
+ cursor[1] = 1;
+ cursor += XFB_ENTRY_STRIDE;
+ }
+ }
+ num_xfb_attribs = static_cast<GLsizei>((cursor - xfb_attribs.data()) / XFB_ENTRY_STRIDE);
+ num_xfb_strides = static_cast<GLsizei>(current_stream - xfb_streams.data());
+}
+
+void GraphicsPipeline::WaitForBuild() {
+ std::unique_lock lock{built_mutex};
+ built_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); });
+}
+
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h
new file mode 100644
index 000000000..4e28d9a42
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h
@@ -0,0 +1,169 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <cstring>
+#include <type_traits>
+#include <utility>
+
+#include "common/bit_field.h"
+#include "common/cityhash.h"
+#include "common/common_types.h"
+#include "shader_recompiler/shader_info.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/memory_manager.h"
+#include "video_core/renderer_opengl/gl_buffer_cache.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
+#include "video_core/renderer_opengl/gl_texture_cache.h"
+#include "video_core/transform_feedback.h"
+
+namespace OpenGL {
+
+namespace ShaderContext {
+struct Context;
+}
+
+class Device;
+class ProgramManager;
+
+using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+using ShaderWorker = Common::StatefulThreadWorker<ShaderContext::Context>;
+
+struct GraphicsPipelineKey {
+ std::array<u64, 6> unique_hashes;
+ union {
+ u32 raw;
+ BitField<0, 1, u32> xfb_enabled;
+ BitField<1, 1, u32> early_z;
+ BitField<2, 4, Maxwell::PrimitiveTopology> gs_input_topology;
+ BitField<6, 2, Maxwell::TessellationPrimitive> tessellation_primitive;
+ BitField<8, 2, Maxwell::TessellationSpacing> tessellation_spacing;
+ BitField<10, 1, u32> tessellation_clockwise;
+ };
+ std::array<u32, 3> padding;
+ VideoCommon::TransformFeedbackState xfb_state;
+
+ size_t Hash() const noexcept {
+ return static_cast<size_t>(Common::CityHash64(reinterpret_cast<const char*>(this), Size()));
+ }
+
+ bool operator==(const GraphicsPipelineKey& rhs) const noexcept {
+ return std::memcmp(this, &rhs, Size()) == 0;
+ }
+
+ bool operator!=(const GraphicsPipelineKey& rhs) const noexcept {
+ return !operator==(rhs);
+ }
+
+ [[nodiscard]] size_t Size() const noexcept {
+ if (xfb_enabled != 0) {
+ return sizeof(GraphicsPipelineKey);
+ } else {
+ return offsetof(GraphicsPipelineKey, padding);
+ }
+ }
+};
+static_assert(std::has_unique_object_representations_v<GraphicsPipelineKey>);
+static_assert(std::is_trivially_copyable_v<GraphicsPipelineKey>);
+static_assert(std::is_trivially_constructible_v<GraphicsPipelineKey>);
+
+class GraphicsPipeline {
+public:
+ explicit GraphicsPipeline(const Device& device, TextureCache& texture_cache_,
+ BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_,
+ Tegra::Engines::Maxwell3D& maxwell3d_,
+ ProgramManager& program_manager_, StateTracker& state_tracker_,
+ ShaderWorker* thread_worker, VideoCore::ShaderNotify* shader_notify,
+ std::array<std::string, 5> sources,
+ std::array<std::vector<u32>, 5> sources_spirv,
+ const std::array<const Shader::Info*, 5>& infos,
+ const GraphicsPipelineKey& key_);
+
+ void Configure(bool is_indexed) {
+ configure_func(this, is_indexed);
+ }
+
+ void ConfigureTransformFeedback() const {
+ if (num_xfb_attribs != 0) {
+ ConfigureTransformFeedbackImpl();
+ }
+ }
+
+ [[nodiscard]] const GraphicsPipelineKey& Key() const noexcept {
+ return key;
+ }
+
+ [[nodiscard]] bool WritesGlobalMemory() const noexcept {
+ return writes_global_memory;
+ }
+
+ [[nodiscard]] bool IsBuilt() const noexcept {
+ return is_built.load(std::memory_order::relaxed);
+ }
+
+ template <typename Spec>
+ static auto MakeConfigureSpecFunc() {
+ return [](GraphicsPipeline* pipeline, bool is_indexed) {
+ pipeline->ConfigureImpl<Spec>(is_indexed);
+ };
+ }
+
+private:
+ template <typename Spec>
+ void ConfigureImpl(bool is_indexed);
+
+ void ConfigureTransformFeedbackImpl() const;
+
+ void GenerateTransformFeedbackState();
+
+ void WaitForBuild();
+
+ TextureCache& texture_cache;
+ BufferCache& buffer_cache;
+ Tegra::MemoryManager& gpu_memory;
+ Tegra::Engines::Maxwell3D& maxwell3d;
+ ProgramManager& program_manager;
+ StateTracker& state_tracker;
+ const GraphicsPipelineKey key;
+
+ void (*configure_func)(GraphicsPipeline*, bool){};
+
+ std::array<OGLProgram, 5> source_programs;
+ std::array<OGLAssemblyProgram, 5> assembly_programs;
+ u32 enabled_stages_mask{};
+
+ std::array<Shader::Info, 5> stage_infos{};
+ std::array<u32, 5> enabled_uniform_buffer_masks{};
+ VideoCommon::UniformBufferSizes uniform_buffer_sizes{};
+ std::array<u32, 5> base_uniform_bindings{};
+ std::array<u32, 5> base_storage_bindings{};
+ std::array<u32, 5> num_texture_buffers{};
+ std::array<u32, 5> num_image_buffers{};
+
+ bool use_storage_buffers{};
+ bool writes_global_memory{};
+
+ static constexpr std::size_t XFB_ENTRY_STRIDE = 3;
+ GLsizei num_xfb_attribs{};
+ GLsizei num_xfb_strides{};
+ std::array<GLint, 128 * XFB_ENTRY_STRIDE * Maxwell::NumTransformFeedbackBuffers> xfb_attribs{};
+ std::array<GLint, Maxwell::NumTransformFeedbackBuffers> xfb_streams{};
+
+ std::mutex built_mutex;
+ std::condition_variable built_condvar;
+ std::atomic_bool is_built{false};
+};
+
+} // namespace OpenGL
+
+namespace std {
+template <>
+struct hash<OpenGL::GraphicsPipelineKey> {
+ size_t operator()(const OpenGL::GraphicsPipelineKey& k) const noexcept {
+ return k.Hash();
+ }
+};
+} // namespace std
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 07ad0e205..41d2b73f4 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -23,7 +23,6 @@
#include "core/memory.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
-#include "video_core/engines/shader_type.h"
#include "video_core/memory_manager.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_query_cache.h"
@@ -40,7 +39,6 @@ namespace OpenGL {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using GLvec4 = std::array<GLfloat, 4>;
-using Tegra::Engines::ShaderType;
using VideoCore::Surface::PixelFormat;
using VideoCore::Surface::SurfaceTarget;
using VideoCore::Surface::SurfaceType;
@@ -51,112 +49,11 @@ MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Management", MP_RGB(100, 255, 100));
namespace {
-
constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16;
-struct TextureHandle {
- constexpr TextureHandle(u32 data, bool via_header_index) {
- const Tegra::Texture::TextureHandle handle{data};
- image = handle.tic_id;
- sampler = via_header_index ? image : handle.tsc_id.Value();
- }
-
- u32 image;
- u32 sampler;
-};
-
-template <typename Engine, typename Entry>
-TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry,
- ShaderType shader_type, size_t index = 0) {
- if constexpr (std::is_same_v<Entry, SamplerEntry>) {
- if (entry.is_separated) {
- const u32 buffer_1 = entry.buffer;
- const u32 buffer_2 = entry.secondary_buffer;
- const u32 offset_1 = entry.offset;
- const u32 offset_2 = entry.secondary_offset;
- const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1);
- const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2);
- return TextureHandle(handle_1 | handle_2, via_header_index);
- }
- }
- if (entry.is_bindless) {
- const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
- return TextureHandle(raw, via_header_index);
- }
- const u32 buffer = engine.GetBoundBuffer();
- const u64 offset = (entry.offset + index) * sizeof(u32);
- return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index);
-}
-
-/// Translates hardware transform feedback indices
-/// @param location Hardware location
-/// @return Pair of ARB_transform_feedback3 token stream first and third arguments
-/// @note Read https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_transform_feedback3.txt
-std::pair<GLint, GLint> TransformFeedbackEnum(u8 location) {
- const u8 index = location / 4;
- if (index >= 8 && index <= 39) {
- return {GL_GENERIC_ATTRIB_NV, index - 8};
- }
- if (index >= 48 && index <= 55) {
- return {GL_TEXTURE_COORD_NV, index - 48};
- }
- switch (index) {
- case 7:
- return {GL_POSITION, 0};
- case 40:
- return {GL_PRIMARY_COLOR_NV, 0};
- case 41:
- return {GL_SECONDARY_COLOR_NV, 0};
- case 42:
- return {GL_BACK_PRIMARY_COLOR_NV, 0};
- case 43:
- return {GL_BACK_SECONDARY_COLOR_NV, 0};
- }
- UNIMPLEMENTED_MSG("index={}", index);
- return {GL_POSITION, 0};
-}
-
void oglEnable(GLenum cap, bool state) {
(state ? glEnable : glDisable)(cap);
}
-
-ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) {
- if (entry.is_buffer) {
- return ImageViewType::Buffer;
- }
- switch (entry.type) {
- case Tegra::Shader::TextureType::Texture1D:
- return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D;
- case Tegra::Shader::TextureType::Texture2D:
- return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D;
- case Tegra::Shader::TextureType::Texture3D:
- return ImageViewType::e3D;
- case Tegra::Shader::TextureType::TextureCube:
- return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube;
- }
- UNREACHABLE();
- return ImageViewType::e2D;
-}
-
-ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) {
- switch (entry.type) {
- case Tegra::Shader::ImageType::Texture1D:
- return ImageViewType::e1D;
- case Tegra::Shader::ImageType::Texture1DArray:
- return ImageViewType::e1DArray;
- case Tegra::Shader::ImageType::Texture2D:
- return ImageViewType::e2D;
- case Tegra::Shader::ImageType::Texture2DArray:
- return ImageViewType::e2DArray;
- case Tegra::Shader::ImageType::Texture3D:
- return ImageViewType::e3D;
- case Tegra::Shader::ImageType::TextureBuffer:
- return ImageViewType::Buffer;
- }
- UNREACHABLE();
- return ImageViewType::e2D;
-}
-
} // Anonymous namespace
RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
@@ -170,14 +67,10 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra
texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
buffer_cache_runtime(device),
buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
- shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device),
- query_cache(*this, maxwell3d, gpu_memory),
- fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache),
- async_shaders(emu_window_) {
- if (device.UseAsynchronousShaders()) {
- async_shaders.AllocateWorkers();
- }
-}
+ shader_cache(*this, emu_window_, maxwell3d, kepler_compute, gpu_memory, device, texture_cache,
+ buffer_cache, program_manager, state_tracker, gpu.ShaderNotify()),
+ query_cache(*this, maxwell3d, gpu_memory), accelerate_dma(buffer_cache),
+ fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {}
RasterizerOpenGL::~RasterizerOpenGL() = default;
@@ -204,7 +97,7 @@ void RasterizerOpenGL::SyncVertexFormats() {
const auto gl_index = static_cast<GLuint>(index);
// Disable constant attributes.
- if (attrib.IsConstant()) {
+ if (attrib.constant) {
glDisableVertexAttribArray(gl_index);
continue;
}
@@ -244,116 +137,9 @@ void RasterizerOpenGL::SyncVertexInstances() {
}
}
-void RasterizerOpenGL::SetupShaders(bool is_indexed) {
- u32 clip_distances = 0;
-
- std::array<Shader*, Maxwell::MaxShaderStage> shaders{};
- image_view_indices.clear();
- sampler_handles.clear();
-
- texture_cache.SynchronizeGraphicsDescriptors();
-
- for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
- const auto& shader_config = maxwell3d.regs.shader_config[index];
- const auto program{static_cast<Maxwell::ShaderProgram>(index)};
-
- // Skip stages that are not enabled
- if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
- switch (program) {
- case Maxwell::ShaderProgram::Geometry:
- program_manager.UseGeometryShader(0);
- break;
- case Maxwell::ShaderProgram::Fragment:
- program_manager.UseFragmentShader(0);
- break;
- default:
- break;
- }
- continue;
- }
- // Currently this stages are not supported in the OpenGL backend.
- // TODO(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL
- if (program == Maxwell::ShaderProgram::TesselationControl ||
- program == Maxwell::ShaderProgram::TesselationEval) {
- continue;
- }
-
- Shader* const shader = shader_cache.GetStageProgram(program, async_shaders);
- const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0;
- switch (program) {
- case Maxwell::ShaderProgram::VertexA:
- case Maxwell::ShaderProgram::VertexB:
- program_manager.UseVertexShader(program_handle);
- break;
- case Maxwell::ShaderProgram::Geometry:
- program_manager.UseGeometryShader(program_handle);
- break;
- case Maxwell::ShaderProgram::Fragment:
- program_manager.UseFragmentShader(program_handle);
- break;
- default:
- UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index,
- shader_config.enable.Value(), shader_config.offset);
- break;
- }
-
- // Stage indices are 0 - 5
- const size_t stage = index == 0 ? 0 : index - 1;
- shaders[stage] = shader;
-
- SetupDrawTextures(shader, stage);
- SetupDrawImages(shader, stage);
-
- buffer_cache.SetEnabledUniformBuffers(stage, shader->GetEntries().enabled_uniform_buffers);
-
- buffer_cache.UnbindGraphicsStorageBuffers(stage);
- u32 ssbo_index = 0;
- for (const auto& buffer : shader->GetEntries().global_memory_entries) {
- buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index,
- buffer.cbuf_offset, buffer.is_written);
- ++ssbo_index;
- }
-
- // Workaround for Intel drivers.
- // When a clip distance is enabled but not set in the shader it crops parts of the screen
- // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the
- // clip distances only when it's written by a shader stage.
- clip_distances |= shader->GetEntries().clip_distances;
-
- // When VertexA is enabled, we have dual vertex shaders
- if (program == Maxwell::ShaderProgram::VertexA) {
- // VertexB was combined with VertexA, so we skip the VertexB iteration
- ++index;
- }
- }
- SyncClipEnabled(clip_distances);
- maxwell3d.dirty.flags[Dirty::Shaders] = false;
-
- buffer_cache.UpdateGraphicsBuffers(is_indexed);
-
- const std::span indices_span(image_view_indices.data(), image_view_indices.size());
- texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
-
- buffer_cache.BindHostGeometryBuffers(is_indexed);
-
- size_t image_view_index = 0;
- size_t texture_index = 0;
- size_t image_index = 0;
- for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
- const Shader* const shader = shaders[stage];
- if (!shader) {
- continue;
- }
- buffer_cache.BindHostStageBuffers(stage);
- const auto& base = device.GetBaseBindings(stage);
- BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index,
- texture_index, image_index);
- }
-}
-
void RasterizerOpenGL::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) {
- shader_cache.LoadDiskCache(title_id, stop_loading, callback);
+ shader_cache.LoadDiskResources(title_id, stop_loading, callback);
}
void RasterizerOpenGL::Clear() {
@@ -432,16 +218,15 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
SyncState();
- // Setup shaders and their used resources.
+ GraphicsPipeline* const pipeline{shader_cache.CurrentGraphicsPipeline()};
+ if (!pipeline) {
+ return;
+ }
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
- SetupShaders(is_indexed);
-
- texture_cache.UpdateRenderTargets(false);
- state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
- program_manager.BindGraphicsPipeline();
+ pipeline->Configure(is_indexed);
const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology);
- BeginTransformFeedback(primitive_mode);
+ BeginTransformFeedback(pipeline, primitive_mode);
const GLuint base_instance = static_cast<GLuint>(maxwell3d.regs.vb_base_instance);
const GLsizei num_instances =
@@ -480,35 +265,24 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
num_instances, base_instance);
}
}
-
EndTransformFeedback();
++num_queued_commands;
+ has_written_global_memory |= pipeline->WritesGlobalMemory();
gpu.TickWork();
}
-void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
- Shader* const kernel = shader_cache.GetComputeKernel(code_addr);
-
- std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
- BindComputeTextures(kernel);
-
- const auto& entries = kernel->GetEntries();
- buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers);
- buffer_cache.UnbindComputeStorageBuffers();
- u32 ssbo_index = 0;
- for (const auto& buffer : entries.global_memory_entries) {
- buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset,
- buffer.is_written);
- ++ssbo_index;
- }
- buffer_cache.UpdateComputeBuffers();
- buffer_cache.BindHostComputeBuffers();
-
- const auto& launch_desc = kepler_compute.launch_description;
- glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
+void RasterizerOpenGL::DispatchCompute() {
+ ComputePipeline* const pipeline{shader_cache.CurrentComputePipeline()};
+ if (!pipeline) {
+ return;
+ }
+ pipeline->Configure();
+ const auto& qmd{kepler_compute.launch_description};
+ glDispatchCompute(qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z);
++num_queued_commands;
+ has_written_global_memory |= pipeline->WritesGlobalMemory();
}
void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) {
@@ -634,6 +408,13 @@ void RasterizerOpenGL::SignalSyncPoint(u32 value) {
fence_manager.SignalSyncPoint(value);
}
+void RasterizerOpenGL::SignalReference() {
+ if (!gpu.IsAsync()) {
+ return;
+ }
+ fence_manager.SignalOrdering();
+}
+
void RasterizerOpenGL::ReleaseFences() {
if (!gpu.IsAsync()) {
return;
@@ -650,10 +431,11 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
void RasterizerOpenGL::WaitForIdle() {
glMemoryBarrier(GL_ALL_BARRIER_BITS);
+ SignalReference();
}
void RasterizerOpenGL::FragmentBarrier() {
- glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT);
+ glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT | GL_TEXTURE_FETCH_BARRIER_BIT);
}
void RasterizerOpenGL::TiledCacheBarrier() {
@@ -666,6 +448,13 @@ void RasterizerOpenGL::FlushCommands() {
return;
}
num_queued_commands = 0;
+
+ // Make sure memory stored from the previous GL command stream is visible
+ // This is only needed on assembly shaders where we write to GPU memory with raw pointers
+ if (has_written_global_memory) {
+ has_written_global_memory = false;
+ glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
+ }
glFlush();
}
@@ -693,6 +482,10 @@ bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surf
return true;
}
+Tegra::Engines::AccelerateDMAInterface& RasterizerOpenGL::AccessAccelerateDMA() {
+ return accelerate_dma;
+}
+
bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
VAddr framebuffer_addr, u32 pixel_stride) {
if (framebuffer_addr == 0) {
@@ -709,111 +502,11 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
// ASSERT_MSG(image_view->size.width == config.width, "Framebuffer width is different");
// ASSERT_MSG(image_view->size.height == config.height, "Framebuffer height is different");
- screen_info.display_texture = image_view->Handle(ImageViewType::e2D);
+ screen_info.display_texture = image_view->Handle(Shader::TextureType::Color2D);
screen_info.display_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format);
return true;
}
-void RasterizerOpenGL::BindComputeTextures(Shader* kernel) {
- image_view_indices.clear();
- sampler_handles.clear();
-
- texture_cache.SynchronizeComputeDescriptors();
-
- SetupComputeTextures(kernel);
- SetupComputeImages(kernel);
-
- const std::span indices_span(image_view_indices.data(), image_view_indices.size());
- texture_cache.FillComputeImageViews(indices_span, image_view_ids);
-
- program_manager.BindCompute(kernel->GetHandle());
- size_t image_view_index = 0;
- size_t texture_index = 0;
- size_t image_index = 0;
- BindTextures(kernel->GetEntries(), 0, 0, image_view_index, texture_index, image_index);
-}
-
-void RasterizerOpenGL::BindTextures(const ShaderEntries& entries, GLuint base_texture,
- GLuint base_image, size_t& image_view_index,
- size_t& texture_index, size_t& image_index) {
- const GLuint* const samplers = sampler_handles.data() + texture_index;
- const GLuint* const textures = texture_handles.data() + texture_index;
- const GLuint* const images = image_handles.data() + image_index;
-
- const size_t num_samplers = entries.samplers.size();
- for (const auto& sampler : entries.samplers) {
- for (size_t i = 0; i < sampler.size; ++i) {
- const ImageViewId image_view_id = image_view_ids[image_view_index++];
- const ImageView& image_view = texture_cache.GetImageView(image_view_id);
- const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(sampler));
- texture_handles[texture_index++] = handle;
- }
- }
- const size_t num_images = entries.images.size();
- for (size_t unit = 0; unit < num_images; ++unit) {
- // TODO: Mark as modified
- const ImageViewId image_view_id = image_view_ids[image_view_index++];
- const ImageView& image_view = texture_cache.GetImageView(image_view_id);
- const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(entries.images[unit]));
- image_handles[image_index] = handle;
- ++image_index;
- }
- if (num_samplers > 0) {
- glBindSamplers(base_texture, static_cast<GLsizei>(num_samplers), samplers);
- glBindTextures(base_texture, static_cast<GLsizei>(num_samplers), textures);
- }
- if (num_images > 0) {
- glBindImageTextures(base_image, static_cast<GLsizei>(num_images), images);
- }
-}
-
-void RasterizerOpenGL::SetupDrawTextures(const Shader* shader, size_t stage_index) {
- const bool via_header_index =
- maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
- for (const auto& entry : shader->GetEntries().samplers) {
- const auto shader_type = static_cast<ShaderType>(stage_index);
- for (size_t index = 0; index < entry.size; ++index) {
- const auto handle =
- GetTextureInfo(maxwell3d, via_header_index, entry, shader_type, index);
- const Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler);
- sampler_handles.push_back(sampler->Handle());
- image_view_indices.push_back(handle.image);
- }
- }
-}
-
-void RasterizerOpenGL::SetupComputeTextures(const Shader* kernel) {
- const bool via_header_index = kepler_compute.launch_description.linked_tsc;
- for (const auto& entry : kernel->GetEntries().samplers) {
- for (size_t i = 0; i < entry.size; ++i) {
- const auto handle =
- GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute, i);
- const Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler);
- sampler_handles.push_back(sampler->Handle());
- image_view_indices.push_back(handle.image);
- }
- }
-}
-
-void RasterizerOpenGL::SetupDrawImages(const Shader* shader, size_t stage_index) {
- const bool via_header_index =
- maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
- for (const auto& entry : shader->GetEntries().images) {
- const auto shader_type = static_cast<ShaderType>(stage_index);
- const auto handle = GetTextureInfo(maxwell3d, via_header_index, entry, shader_type);
- image_view_indices.push_back(handle.image);
- }
-}
-
-void RasterizerOpenGL::SetupComputeImages(const Shader* shader) {
- const bool via_header_index = kepler_compute.launch_description.linked_tsc;
- for (const auto& entry : shader->GetEntries().images) {
- const auto handle =
- GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute);
- image_view_indices.push_back(handle.image);
- }
-}
-
void RasterizerOpenGL::SyncState() {
SyncViewport();
SyncRasterizeEnable();
@@ -929,7 +622,7 @@ void RasterizerOpenGL::SyncDepthClamp() {
void RasterizerOpenGL::SyncClipEnabled(u32 clip_mask) {
auto& flags = maxwell3d.dirty.flags;
- if (!flags[Dirty::ClipDistances] && !flags[Dirty::Shaders]) {
+ if (!flags[Dirty::ClipDistances] && !flags[VideoCommon::Dirty::Shaders]) {
return;
}
flags[Dirty::ClipDistances] = false;
@@ -1306,68 +999,13 @@ void RasterizerOpenGL::SyncFramebufferSRGB() {
oglEnable(GL_FRAMEBUFFER_SRGB, maxwell3d.regs.framebuffer_srgb);
}
-void RasterizerOpenGL::SyncTransformFeedback() {
- // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal
- // when this is required.
- const auto& regs = maxwell3d.regs;
-
- static constexpr std::size_t STRIDE = 3;
- std::array<GLint, 128 * STRIDE * Maxwell::NumTransformFeedbackBuffers> attribs;
- std::array<GLint, Maxwell::NumTransformFeedbackBuffers> streams;
-
- GLint* cursor = attribs.data();
- GLint* current_stream = streams.data();
-
- for (std::size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) {
- const auto& layout = regs.tfb_layouts[feedback];
- UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding");
- if (layout.varying_count == 0) {
- continue;
- }
-
- *current_stream = static_cast<GLint>(feedback);
- if (current_stream != streams.data()) {
- // When stepping one stream, push the expected token
- cursor[0] = GL_NEXT_BUFFER_NV;
- cursor[1] = 0;
- cursor[2] = 0;
- cursor += STRIDE;
- }
- ++current_stream;
-
- const auto& locations = regs.tfb_varying_locs[feedback];
- std::optional<u8> current_index;
- for (u32 offset = 0; offset < layout.varying_count; ++offset) {
- const u8 location = locations[offset];
- const u8 index = location / 4;
-
- if (current_index == index) {
- // Increase number of components of the previous attachment
- ++cursor[-2];
- continue;
- }
- current_index = index;
-
- std::tie(cursor[0], cursor[2]) = TransformFeedbackEnum(location);
- cursor[1] = 1;
- cursor += STRIDE;
- }
- }
-
- const GLsizei num_attribs = static_cast<GLsizei>((cursor - attribs.data()) / STRIDE);
- const GLsizei num_strides = static_cast<GLsizei>(current_stream - streams.data());
- glTransformFeedbackStreamAttribsNV(num_attribs, attribs.data(), num_strides, streams.data(),
- GL_INTERLEAVED_ATTRIBS);
-}
-
-void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
+void RasterizerOpenGL::BeginTransformFeedback(GraphicsPipeline* program, GLenum primitive_mode) {
const auto& regs = maxwell3d.regs;
if (regs.tfb_enabled == 0) {
return;
}
- if (device.UseAssemblyShaders()) {
- SyncTransformFeedback();
- }
+ program->ConfigureTransformFeedback();
+
UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
@@ -1381,11 +1019,21 @@ void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
}
void RasterizerOpenGL::EndTransformFeedback() {
- const auto& regs = maxwell3d.regs;
- if (regs.tfb_enabled == 0) {
- return;
+ if (maxwell3d.regs.tfb_enabled != 0) {
+ glEndTransformFeedback();
}
- glEndTransformFeedback();
+}
+
+AccelerateDMA::AccelerateDMA(BufferCache& buffer_cache_) : buffer_cache{buffer_cache_} {}
+
+bool AccelerateDMA::BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) {
+ std::scoped_lock lock{buffer_cache.mutex};
+ return buffer_cache.DMACopy(src_address, dest_address, amount);
+}
+
+bool AccelerateDMA::BufferClear(GPUVAddr src_address, u64 amount, u32 value) {
+ std::scoped_lock lock{buffer_cache.mutex};
+ return buffer_cache.DMAClear(src_address, amount, value);
}
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 482efed7a..d0397b745 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -19,6 +19,7 @@
#include "common/common_types.h"
#include "video_core/engines/const_buffer_info.h"
#include "video_core/engines/maxwell_3d.h"
+#include "video_core/engines/maxwell_dma.h"
#include "video_core/rasterizer_accelerated.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
@@ -27,11 +28,9 @@
#include "video_core/renderer_opengl/gl_query_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
-#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
-#include "video_core/shader/async_shaders.h"
#include "video_core/textures/texture.h"
namespace Core::Memory {
@@ -58,6 +57,18 @@ struct BindlessSSBO {
};
static_assert(sizeof(BindlessSSBO) * CHAR_BIT == 128);
+class AccelerateDMA : public Tegra::Engines::AccelerateDMAInterface {
+public:
+ explicit AccelerateDMA(BufferCache& buffer_cache);
+
+ bool BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) override;
+
+ bool BufferClear(GPUVAddr src_address, u64 amount, u32 value) override;
+
+private:
+ BufferCache& buffer_cache;
+};
+
class RasterizerOpenGL : public VideoCore::RasterizerAccelerated {
public:
explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
@@ -68,7 +79,7 @@ public:
void Draw(bool is_indexed, bool is_instanced) override;
void Clear() override;
- void DispatchCompute(GPUVAddr code_addr) override;
+ void DispatchCompute() override;
void ResetCounter(VideoCore::QueryType type) override;
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
@@ -83,6 +94,7 @@ public:
void ModifyGPUMemory(GPUVAddr addr, u64 size) override;
void SignalSemaphore(GPUVAddr addr, u32 value) override;
void SignalSyncPoint(u32 value) override;
+ void SignalReference() override;
void ReleaseFences() override;
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
void WaitForIdle() override;
@@ -93,6 +105,7 @@ public:
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Config& copy_config) override;
+ Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override;
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
u32 pixel_stride) override;
void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
@@ -103,36 +116,11 @@ public:
return num_queued_commands > 0;
}
- VideoCommon::Shader::AsyncShaders& GetAsyncShaders() {
- return async_shaders;
- }
-
- const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const {
- return async_shaders;
- }
-
private:
static constexpr size_t MAX_TEXTURES = 192;
static constexpr size_t MAX_IMAGES = 48;
static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES;
- void BindComputeTextures(Shader* kernel);
-
- void BindTextures(const ShaderEntries& entries, GLuint base_texture, GLuint base_image,
- size_t& image_view_index, size_t& texture_index, size_t& image_index);
-
- /// Configures the current textures to use for the draw command.
- void SetupDrawTextures(const Shader* shader, size_t stage_index);
-
- /// Configures the textures used in a compute shader.
- void SetupComputeTextures(const Shader* kernel);
-
- /// Configures images in a graphics shader.
- void SetupDrawImages(const Shader* shader, size_t stage_index);
-
- /// Configures images in a compute shader.
- void SetupComputeImages(const Shader* shader);
-
/// Syncs state to match guest's
void SyncState();
@@ -205,18 +193,12 @@ private:
/// Syncs vertex instances to match the guest state
void SyncVertexInstances();
- /// Syncs transform feedback state to match guest state
- /// @note Only valid on assembly shaders
- void SyncTransformFeedback();
-
/// Begin a transform feedback
- void BeginTransformFeedback(GLenum primitive_mode);
+ void BeginTransformFeedback(GraphicsPipeline* pipeline, GLenum primitive_mode);
/// End a transform feedback
void EndTransformFeedback();
- void SetupShaders(bool is_indexed);
-
Tegra::GPU& gpu;
Tegra::Engines::Maxwell3D& maxwell3d;
Tegra::Engines::KeplerCompute& kepler_compute;
@@ -231,12 +213,11 @@ private:
TextureCache texture_cache;
BufferCacheRuntime buffer_cache_runtime;
BufferCache buffer_cache;
- ShaderCacheOpenGL shader_cache;
+ ShaderCache shader_cache;
QueryCache query_cache;
+ AccelerateDMA accelerate_dma;
FenceManagerOpenGL fence_manager;
- VideoCommon::Shader::AsyncShaders async_shaders;
-
boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;
std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;
boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles;
@@ -244,7 +225,8 @@ private:
std::array<GLuint, MAX_IMAGES> image_handles{};
/// Number of commands queued to the OpenGL driver. Resetted on flush.
- std::size_t num_queued_commands = 0;
+ size_t num_queued_commands = 0;
+ bool has_written_global_memory = false;
u32 last_clip_distance_mask = 0;
};
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp
index 3428e5e21..8695c29e3 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp
@@ -83,18 +83,6 @@ void OGLSampler::Release() {
handle = 0;
}
-void OGLShader::Create(std::string_view source, GLenum type) {
- if (handle != 0) {
- return;
- }
- if (source.empty()) {
- return;
- }
-
- MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
- handle = GLShader::LoadShader(source, type);
-}
-
void OGLShader::Release() {
if (handle == 0)
return;
@@ -104,21 +92,6 @@ void OGLShader::Release() {
handle = 0;
}
-void OGLProgram::CreateFromSource(const char* vert_shader, const char* geo_shader,
- const char* frag_shader, bool separable_program,
- bool hint_retrievable) {
- OGLShader vert, geo, frag;
- if (vert_shader)
- vert.Create(vert_shader, GL_VERTEX_SHADER);
- if (geo_shader)
- geo.Create(geo_shader, GL_GEOMETRY_SHADER);
- if (frag_shader)
- frag.Create(frag_shader, GL_FRAGMENT_SHADER);
-
- MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
- Create(separable_program, hint_retrievable, vert.handle, geo.handle, frag.handle);
-}
-
void OGLProgram::Release() {
if (handle == 0)
return;
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h
index 552d79db4..b2d5bfd3b 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.h
+++ b/src/video_core/renderer_opengl/gl_resource_manager.h
@@ -8,7 +8,6 @@
#include <utility>
#include <glad/glad.h>
#include "common/common_types.h"
-#include "video_core/renderer_opengl/gl_shader_util.h"
namespace OpenGL {
@@ -128,8 +127,6 @@ public:
return *this;
}
- void Create(std::string_view source, GLenum type);
-
void Release();
GLuint handle = 0;
@@ -151,17 +148,6 @@ public:
return *this;
}
- template <typename... T>
- void Create(bool separable_program, bool hint_retrievable, T... shaders) {
- if (handle != 0)
- return;
- handle = GLShader::LoadProgram(separable_program, hint_retrievable, shaders...);
- }
-
- /// Creates a new internal OpenGL resource and stores the handle
- void CreateFromSource(const char* vert_shader, const char* geo_shader, const char* frag_shader,
- bool separable_program = false, bool hint_retrievable = false);
-
/// Deletes the internal OpenGL resource
void Release();
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 5a01c59ec..8d6cc074c 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -3,606 +3,544 @@
// Refer to the license.txt file included.
#include <atomic>
+#include <fstream>
#include <functional>
#include <mutex>
-#include <optional>
#include <string>
#include <thread>
-#include <unordered_set>
#include "common/alignment.h"
#include "common/assert.h"
+#include "common/fs/fs.h"
+#include "common/fs/path_util.h"
#include "common/logging/log.h"
#include "common/scope_exit.h"
+#include "common/settings.h"
+#include "common/thread_worker.h"
#include "core/core.h"
-#include "core/frontend/emu_window.h"
+#include "shader_recompiler/backend/glasm/emit_glasm.h"
+#include "shader_recompiler/backend/glsl/emit_glsl.h"
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/frontend/maxwell/control_flow.h"
+#include "shader_recompiler/frontend/maxwell/translate_program.h"
+#include "shader_recompiler/profile.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
-#include "video_core/engines/shader_type.h"
#include "video_core/memory_manager.h"
-#include "video_core/renderer_opengl/gl_arb_decompiler.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
-#include "video_core/renderer_opengl/gl_shader_decompiler.h"
-#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
+#include "video_core/renderer_opengl/gl_shader_util.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
-#include "video_core/shader/memory_util.h"
-#include "video_core/shader/registry.h"
-#include "video_core/shader/shader_ir.h"
#include "video_core/shader_cache.h"
+#include "video_core/shader_environment.h"
#include "video_core/shader_notify.h"
namespace OpenGL {
-
-using Tegra::Engines::ShaderType;
-using VideoCommon::Shader::GetShaderAddress;
-using VideoCommon::Shader::GetShaderCode;
-using VideoCommon::Shader::GetUniqueIdentifier;
-using VideoCommon::Shader::KERNEL_MAIN_OFFSET;
-using VideoCommon::Shader::ProgramCode;
-using VideoCommon::Shader::Registry;
-using VideoCommon::Shader::ShaderIR;
-using VideoCommon::Shader::STAGE_MAIN_OFFSET;
-
namespace {
-
-constexpr VideoCommon::Shader::CompilerSettings COMPILER_SETTINGS{};
-
-/// Gets the shader type from a Maxwell program type
-constexpr GLenum GetGLShaderType(ShaderType shader_type) {
- switch (shader_type) {
- case ShaderType::Vertex:
- return GL_VERTEX_SHADER;
- case ShaderType::Geometry:
- return GL_GEOMETRY_SHADER;
- case ShaderType::Fragment:
- return GL_FRAGMENT_SHADER;
- case ShaderType::Compute:
- return GL_COMPUTE_SHADER;
- default:
- return GL_NONE;
- }
+using Shader::Backend::GLASM::EmitGLASM;
+using Shader::Backend::GLSL::EmitGLSL;
+using Shader::Backend::SPIRV::EmitSPIRV;
+using Shader::Maxwell::MergeDualVertexPrograms;
+using Shader::Maxwell::TranslateProgram;
+using VideoCommon::ComputeEnvironment;
+using VideoCommon::FileEnvironment;
+using VideoCommon::GenericEnvironment;
+using VideoCommon::GraphicsEnvironment;
+using VideoCommon::LoadPipelines;
+using VideoCommon::SerializePipeline;
+using Context = ShaderContext::Context;
+
+constexpr u32 CACHE_VERSION = 5;
+
+template <typename Container>
+auto MakeSpan(Container& container) {
+ return std::span(container.data(), container.size());
}
-constexpr const char* GetShaderTypeName(ShaderType shader_type) {
- switch (shader_type) {
- case ShaderType::Vertex:
- return "VS";
- case ShaderType::TesselationControl:
- return "HS";
- case ShaderType::TesselationEval:
- return "DS";
- case ShaderType::Geometry:
- return "GS";
- case ShaderType::Fragment:
- return "FS";
- case ShaderType::Compute:
- return "CS";
- }
- return "UNK";
+Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key,
+ const Shader::IR::Program& program,
+ const Shader::IR::Program* previous_program,
+ bool glasm_use_storage_buffers, bool use_assembly_shaders) {
+ Shader::RuntimeInfo info;
+ if (previous_program) {
+ info.previous_stage_stores = previous_program->info.stores;
+ } else {
+ // Mark all stores as available for vertex shaders
+ info.previous_stage_stores.mask.set();
+ }
+ switch (program.stage) {
+ case Shader::Stage::VertexB:
+ case Shader::Stage::Geometry:
+ if (!use_assembly_shaders && key.xfb_enabled != 0) {
+ info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.xfb_state);
+ }
+ break;
+ case Shader::Stage::TessellationEval:
+ info.tess_clockwise = key.tessellation_clockwise != 0;
+ info.tess_primitive = [&key] {
+ switch (key.tessellation_primitive) {
+ case Maxwell::TessellationPrimitive::Isolines:
+ return Shader::TessPrimitive::Isolines;
+ case Maxwell::TessellationPrimitive::Triangles:
+ return Shader::TessPrimitive::Triangles;
+ case Maxwell::TessellationPrimitive::Quads:
+ return Shader::TessPrimitive::Quads;
+ }
+ UNREACHABLE();
+ return Shader::TessPrimitive::Triangles;
+ }();
+ info.tess_spacing = [&] {
+ switch (key.tessellation_spacing) {
+ case Maxwell::TessellationSpacing::Equal:
+ return Shader::TessSpacing::Equal;
+ case Maxwell::TessellationSpacing::FractionalOdd:
+ return Shader::TessSpacing::FractionalOdd;
+ case Maxwell::TessellationSpacing::FractionalEven:
+ return Shader::TessSpacing::FractionalEven;
+ }
+ UNREACHABLE();
+ return Shader::TessSpacing::Equal;
+ }();
+ break;
+ case Shader::Stage::Fragment:
+ info.force_early_z = key.early_z != 0;
+ break;
+ default:
+ break;
+ }
+ switch (key.gs_input_topology) {
+ case Maxwell::PrimitiveTopology::Points:
+ info.input_topology = Shader::InputTopology::Points;
+ break;
+ case Maxwell::PrimitiveTopology::Lines:
+ case Maxwell::PrimitiveTopology::LineLoop:
+ case Maxwell::PrimitiveTopology::LineStrip:
+ info.input_topology = Shader::InputTopology::Lines;
+ break;
+ case Maxwell::PrimitiveTopology::Triangles:
+ case Maxwell::PrimitiveTopology::TriangleStrip:
+ case Maxwell::PrimitiveTopology::TriangleFan:
+ case Maxwell::PrimitiveTopology::Quads:
+ case Maxwell::PrimitiveTopology::QuadStrip:
+ case Maxwell::PrimitiveTopology::Polygon:
+ case Maxwell::PrimitiveTopology::Patches:
+ info.input_topology = Shader::InputTopology::Triangles;
+ break;
+ case Maxwell::PrimitiveTopology::LinesAdjacency:
+ case Maxwell::PrimitiveTopology::LineStripAdjacency:
+ info.input_topology = Shader::InputTopology::LinesAdjacency;
+ break;
+ case Maxwell::PrimitiveTopology::TrianglesAdjacency:
+ case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
+ info.input_topology = Shader::InputTopology::TrianglesAdjacency;
+ break;
+ }
+ info.glasm_use_storage_buffers = glasm_use_storage_buffers;
+ return info;
}
-constexpr ShaderType GetShaderType(Maxwell::ShaderProgram program_type) {
- switch (program_type) {
- case Maxwell::ShaderProgram::VertexA:
- case Maxwell::ShaderProgram::VertexB:
- return ShaderType::Vertex;
- case Maxwell::ShaderProgram::TesselationControl:
- return ShaderType::TesselationControl;
- case Maxwell::ShaderProgram::TesselationEval:
- return ShaderType::TesselationEval;
- case Maxwell::ShaderProgram::Geometry:
- return ShaderType::Geometry;
- case Maxwell::ShaderProgram::Fragment:
- return ShaderType::Fragment;
- }
- return {};
+void SetXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell& regs) {
+ std::ranges::transform(regs.tfb_layouts, state.layouts.begin(), [](const auto& layout) {
+ return VideoCommon::TransformFeedbackState::Layout{
+ .stream = layout.stream,
+ .varying_count = layout.varying_count,
+ .stride = layout.stride,
+ };
+ });
+ state.varyings = regs.tfb_varying_locs;
}
+} // Anonymous namespace
-constexpr GLenum AssemblyEnum(ShaderType shader_type) {
- switch (shader_type) {
- case ShaderType::Vertex:
- return GL_VERTEX_PROGRAM_NV;
- case ShaderType::TesselationControl:
- return GL_TESS_CONTROL_PROGRAM_NV;
- case ShaderType::TesselationEval:
- return GL_TESS_EVALUATION_PROGRAM_NV;
- case ShaderType::Geometry:
- return GL_GEOMETRY_PROGRAM_NV;
- case ShaderType::Fragment:
- return GL_FRAGMENT_PROGRAM_NV;
- case ShaderType::Compute:
- return GL_COMPUTE_PROGRAM_NV;
+ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_,
+ Tegra::Engines::Maxwell3D& maxwell3d_,
+ Tegra::Engines::KeplerCompute& kepler_compute_,
+ Tegra::MemoryManager& gpu_memory_, const Device& device_,
+ TextureCache& texture_cache_, BufferCache& buffer_cache_,
+ ProgramManager& program_manager_, StateTracker& state_tracker_,
+ VideoCore::ShaderNotify& shader_notify_)
+ : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_},
+ emu_window{emu_window_}, device{device_}, texture_cache{texture_cache_},
+ buffer_cache{buffer_cache_}, program_manager{program_manager_}, state_tracker{state_tracker_},
+ shader_notify{shader_notify_}, use_asynchronous_shaders{device.UseAsynchronousShaders()},
+ profile{
+ .supported_spirv = 0x00010000,
+
+ .unified_descriptor_binding = false,
+ .support_descriptor_aliasing = false,
+ .support_int8 = false,
+ .support_int16 = false,
+ .support_int64 = device.HasShaderInt64(),
+ .support_vertex_instance_id = true,
+ .support_float_controls = false,
+ .support_separate_denorm_behavior = false,
+ .support_separate_rounding_mode = false,
+ .support_fp16_denorm_preserve = false,
+ .support_fp32_denorm_preserve = false,
+ .support_fp16_denorm_flush = false,
+ .support_fp32_denorm_flush = false,
+ .support_fp16_signed_zero_nan_preserve = false,
+ .support_fp32_signed_zero_nan_preserve = false,
+ .support_fp64_signed_zero_nan_preserve = false,
+ .support_explicit_workgroup_layout = false,
+ .support_vote = true,
+ .support_viewport_index_layer_non_geometry =
+ device.HasNvViewportArray2() || device.HasVertexViewportLayer(),
+ .support_viewport_mask = device.HasNvViewportArray2(),
+ .support_typeless_image_loads = device.HasImageLoadFormatted(),
+ .support_demote_to_helper_invocation = false,
+ .support_int64_atomics = false,
+ .support_derivative_control = device.HasDerivativeControl(),
+ .support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(),
+ .support_gl_nv_gpu_shader_5 = device.HasNvGpuShader5(),
+ .support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(),
+ .support_gl_texture_shadow_lod = device.HasTextureShadowLod(),
+ .support_gl_warp_intrinsics = false,
+ .support_gl_variable_aoffi = device.HasVariableAoffi(),
+ .support_gl_sparse_textures = device.HasSparseTexture2(),
+ .support_gl_derivative_control = device.HasDerivativeControl(),
+
+ .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyLargerThanGuest(),
+
+ .lower_left_origin_mode = true,
+ .need_declared_frag_colors = true,
+ .need_fastmath_off = device.NeedsFastmathOff(),
+
+ .has_broken_spirv_clamp = true,
+ .has_broken_unsigned_image_offsets = true,
+ .has_broken_signed_operations = true,
+ .has_broken_fp16_float_controls = false,
+ .has_gl_component_indexing_bug = device.HasComponentIndexingBug(),
+ .has_gl_precise_bug = device.HasPreciseBug(),
+ .ignore_nan_fp_comparisons = true,
+ .gl_max_compute_smem_size = device.GetMaxComputeSharedMemorySize(),
+ },
+ host_info{
+ .support_float16 = false,
+ .support_int64 = device.HasShaderInt64(),
+ } {
+ if (use_asynchronous_shaders) {
+ workers = CreateWorkers();
}
- return {};
}
-std::string MakeShaderID(u64 unique_identifier, ShaderType shader_type) {
- return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier);
-}
+ShaderCache::~ShaderCache() = default;
-std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) {
- const VideoCore::GuestDriverProfile guest_profile{entry.texture_handler_size};
- const VideoCommon::Shader::SerializedRegistryInfo info{guest_profile, entry.bound_buffer,
- entry.graphics_info, entry.compute_info};
- auto registry = std::make_shared<Registry>(entry.type, info);
- for (const auto& [address, value] : entry.keys) {
- const auto [buffer, offset] = address;
- registry->InsertKey(buffer, offset, value);
- }
- for (const auto& [offset, sampler] : entry.bound_samplers) {
- registry->InsertBoundSampler(offset, sampler);
+void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
+ const VideoCore::DiskResourceLoadCallback& callback) {
+ if (title_id == 0) {
+ return;
}
- for (const auto& [key, sampler] : entry.bindless_samplers) {
- const auto [buffer, offset] = key;
- registry->InsertBindlessSampler(buffer, offset, sampler);
+ const auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)};
+ const auto base_dir{shader_dir / fmt::format("{:016x}", title_id)};
+ if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir)) {
+ LOG_ERROR(Common_Filesystem, "Failed to create shader cache directories");
+ return;
}
- return registry;
-}
-
-std::unordered_set<GLenum> GetSupportedFormats() {
- GLint num_formats;
- glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats);
+ shader_cache_filename = base_dir / "opengl.bin";
+
+ if (!workers) {
+ workers = CreateWorkers();
+ }
+ struct {
+ std::mutex mutex;
+ size_t total{};
+ size_t built{};
+ bool has_loaded{};
+ } state;
+
+ const auto load_compute{[&](std::ifstream& file, FileEnvironment env) {
+ ComputePipelineKey key;
+ file.read(reinterpret_cast<char*>(&key), sizeof(key));
+ workers->QueueWork(
+ [this, key, env = std::move(env), &state, &callback](Context* ctx) mutable {
+ ctx->pools.ReleaseContents();
+ auto pipeline{CreateComputePipeline(ctx->pools, key, env)};
+ std::lock_guard lock{state.mutex};
+ if (pipeline) {
+ compute_cache.emplace(key, std::move(pipeline));
+ }
+ ++state.built;
+ if (state.has_loaded) {
+ callback(VideoCore::LoadCallbackStage::Build, state.built, state.total);
+ }
+ });
+ ++state.total;
+ }};
+ const auto load_graphics{[&](std::ifstream& file, std::vector<FileEnvironment> envs) {
+ GraphicsPipelineKey key;
+ file.read(reinterpret_cast<char*>(&key), sizeof(key));
+ workers->QueueWork(
+ [this, key, envs = std::move(envs), &state, &callback](Context* ctx) mutable {
+ boost::container::static_vector<Shader::Environment*, 5> env_ptrs;
+ for (auto& env : envs) {
+ env_ptrs.push_back(&env);
+ }
+ ctx->pools.ReleaseContents();
+ auto pipeline{CreateGraphicsPipeline(ctx->pools, key, MakeSpan(env_ptrs), false)};
+ std::lock_guard lock{state.mutex};
+ if (pipeline) {
+ graphics_cache.emplace(key, std::move(pipeline));
+ }
+ ++state.built;
+ if (state.has_loaded) {
+ callback(VideoCore::LoadCallbackStage::Build, state.built, state.total);
+ }
+ });
+ ++state.total;
+ }};
+ LoadPipelines(stop_loading, shader_cache_filename, CACHE_VERSION, load_compute, load_graphics);
- std::vector<GLint> formats(num_formats);
- glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data());
+ std::unique_lock lock{state.mutex};
+ callback(VideoCore::LoadCallbackStage::Build, 0, state.total);
+ state.has_loaded = true;
+ lock.unlock();
- std::unordered_set<GLenum> supported_formats;
- for (const GLint format : formats) {
- supported_formats.insert(static_cast<GLenum>(format));
+ workers->WaitForRequests();
+ if (!use_asynchronous_shaders) {
+ workers.reset();
}
- return supported_formats;
}
-} // Anonymous namespace
-
-ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 unique_identifier,
- const ShaderIR& ir, const Registry& registry, bool hint_retrievable) {
- if (device.UseDriverCache()) {
- // Ignore hint retrievable if we are using the driver cache
- hint_retrievable = false;
- }
- const std::string shader_id = MakeShaderID(unique_identifier, shader_type);
- LOG_INFO(Render_OpenGL, "{}", shader_id);
-
- auto program = std::make_shared<ProgramHandle>();
-
- if (device.UseAssemblyShaders()) {
- const std::string arb =
- DecompileAssemblyShader(device, ir, registry, shader_type, shader_id);
-
- GLuint& arb_prog = program->assembly_program.handle;
-
-// Commented out functions signal OpenGL errors but are compatible with apitrace.
-// Use them only to capture and replay on apitrace.
-#if 0
- glGenProgramsNV(1, &arb_prog);
- glLoadProgramNV(AssemblyEnum(shader_type), arb_prog, static_cast<GLsizei>(arb.size()),
- reinterpret_cast<const GLubyte*>(arb.data()));
-#else
- glGenProgramsARB(1, &arb_prog);
- glNamedProgramStringEXT(arb_prog, AssemblyEnum(shader_type), GL_PROGRAM_FORMAT_ASCII_ARB,
- static_cast<GLsizei>(arb.size()), arb.data());
-#endif
- const auto err = reinterpret_cast<const char*>(glGetString(GL_PROGRAM_ERROR_STRING_NV));
- if (err && *err) {
- LOG_CRITICAL(Render_OpenGL, "{}", err);
- LOG_INFO(Render_OpenGL, "\n{}", arb);
- }
- } else {
- const std::string glsl = DecompileShader(device, ir, registry, shader_type, shader_id);
- OGLShader shader;
- shader.Create(glsl.c_str(), GetGLShaderType(shader_type));
-
- program->source_program.Create(true, hint_retrievable, shader.handle);
- }
-
- return program;
+GraphicsPipeline* ShaderCache::CurrentGraphicsPipeline() {
+ if (!RefreshStages(graphics_key.unique_hashes)) {
+ current_pipeline = nullptr;
+ return nullptr;
+ }
+ const auto& regs{maxwell3d.regs};
+ graphics_key.raw = 0;
+ graphics_key.early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0);
+ graphics_key.gs_input_topology.Assign(graphics_key.unique_hashes[4] != 0
+ ? regs.draw.topology.Value()
+ : Maxwell::PrimitiveTopology{});
+ graphics_key.tessellation_primitive.Assign(regs.tess_mode.prim.Value());
+ graphics_key.tessellation_spacing.Assign(regs.tess_mode.spacing.Value());
+ graphics_key.tessellation_clockwise.Assign(regs.tess_mode.cw.Value());
+ graphics_key.xfb_enabled.Assign(regs.tfb_enabled != 0 ? 1 : 0);
+ if (graphics_key.xfb_enabled) {
+ SetXfbState(graphics_key.xfb_state, regs);
+ }
+ if (current_pipeline && graphics_key == current_pipeline->Key()) {
+ return BuiltPipeline(current_pipeline);
+ }
+ return CurrentGraphicsPipelineSlowPath();
}
-Shader::Shader(std::shared_ptr<Registry> registry_, ShaderEntries entries_,
- ProgramSharedPtr program_, bool is_built_)
- : registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)},
- is_built{is_built_} {
- handle = program->assembly_program.handle;
- if (handle == 0) {
- handle = program->source_program.handle;
+GraphicsPipeline* ShaderCache::CurrentGraphicsPipelineSlowPath() {
+ const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)};
+ auto& pipeline{pair->second};
+ if (is_new) {
+ pipeline = CreateGraphicsPipeline();
}
- if (is_built) {
- ASSERT(handle != 0);
+ if (!pipeline) {
+ return nullptr;
}
+ current_pipeline = pipeline.get();
+ return BuiltPipeline(current_pipeline);
}
-Shader::~Shader() = default;
-
-GLuint Shader::GetHandle() const {
- DEBUG_ASSERT(registry->IsConsistent());
- return handle;
-}
-
-bool Shader::IsBuilt() const {
- return is_built;
-}
-
-void Shader::AsyncOpenGLBuilt(OGLProgram new_program) {
- program->source_program = std::move(new_program);
- handle = program->source_program.handle;
- is_built = true;
-}
-
-void Shader::AsyncGLASMBuilt(OGLAssemblyProgram new_program) {
- program->assembly_program = std::move(new_program);
- handle = program->assembly_program.handle;
- is_built = true;
-}
-
-std::unique_ptr<Shader> Shader::CreateStageFromMemory(
- const ShaderParameters& params, Maxwell::ShaderProgram program_type, ProgramCode code,
- ProgramCode code_b, VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr) {
- const auto shader_type = GetShaderType(program_type);
-
- auto& gpu = params.gpu;
- gpu.ShaderNotify().MarkSharderBuilding();
-
- auto registry = std::make_shared<Registry>(shader_type, gpu.Maxwell3D());
- if (!async_shaders.IsShaderAsync(gpu) || !params.device.UseAsynchronousShaders()) {
- const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
- // TODO(Rodrigo): Handle VertexA shaders
- // std::optional<ShaderIR> ir_b;
- // if (!code_b.empty()) {
- // ir_b.emplace(code_b, STAGE_MAIN_OFFSET);
- // }
- auto program =
- BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry);
- ShaderDiskCacheEntry entry;
- entry.type = shader_type;
- entry.code = std::move(code);
- entry.code_b = std::move(code_b);
- entry.unique_identifier = params.unique_identifier;
- entry.bound_buffer = registry->GetBoundBuffer();
- entry.graphics_info = registry->GetGraphicsInfo();
- entry.keys = registry->GetKeys();
- entry.bound_samplers = registry->GetBoundSamplers();
- entry.bindless_samplers = registry->GetBindlessSamplers();
- params.disk_cache.SaveEntry(std::move(entry));
-
- gpu.ShaderNotify().MarkShaderComplete();
-
- return std::unique_ptr<Shader>(new Shader(std::move(registry),
- MakeEntries(params.device, ir, shader_type),
- std::move(program), true));
- } else {
- // Required for entries
- const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
- auto entries = MakeEntries(params.device, ir, shader_type);
-
- async_shaders.QueueOpenGLShader(params.device, shader_type, params.unique_identifier,
- std::move(code), std::move(code_b), STAGE_MAIN_OFFSET,
- COMPILER_SETTINGS, *registry, cpu_addr);
-
- auto program = std::make_shared<ProgramHandle>();
- return std::unique_ptr<Shader>(
- new Shader(std::move(registry), std::move(entries), std::move(program), false));
+GraphicsPipeline* ShaderCache::BuiltPipeline(GraphicsPipeline* pipeline) const noexcept {
+ if (pipeline->IsBuilt()) {
+ return pipeline;
}
-}
-
-std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& params,
- ProgramCode code) {
- auto& gpu = params.gpu;
- gpu.ShaderNotify().MarkSharderBuilding();
-
- auto registry = std::make_shared<Registry>(ShaderType::Compute, params.engine);
- const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
- const u64 uid = params.unique_identifier;
- auto program = BuildShader(params.device, ShaderType::Compute, uid, ir, *registry);
-
- ShaderDiskCacheEntry entry;
- entry.type = ShaderType::Compute;
- entry.code = std::move(code);
- entry.unique_identifier = uid;
- entry.bound_buffer = registry->GetBoundBuffer();
- entry.compute_info = registry->GetComputeInfo();
- entry.keys = registry->GetKeys();
- entry.bound_samplers = registry->GetBoundSamplers();
- entry.bindless_samplers = registry->GetBindlessSamplers();
- params.disk_cache.SaveEntry(std::move(entry));
-
- gpu.ShaderNotify().MarkShaderComplete();
-
- return std::unique_ptr<Shader>(new Shader(std::move(registry),
- MakeEntries(params.device, ir, ShaderType::Compute),
- std::move(program)));
-}
-
-std::unique_ptr<Shader> Shader::CreateFromCache(const ShaderParameters& params,
- const PrecompiledShader& precompiled_shader) {
- return std::unique_ptr<Shader>(new Shader(
- precompiled_shader.registry, precompiled_shader.entries, precompiled_shader.program));
-}
-
-ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_,
- Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
- Tegra::Engines::Maxwell3D& maxwell3d_,
- Tegra::Engines::KeplerCompute& kepler_compute_,
- Tegra::MemoryManager& gpu_memory_, const Device& device_)
- : ShaderCache{rasterizer_}, emu_window{emu_window_}, gpu{gpu_}, gpu_memory{gpu_memory_},
- maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, device{device_} {}
-
-ShaderCacheOpenGL::~ShaderCacheOpenGL() = default;
-
-void ShaderCacheOpenGL::LoadDiskCache(u64 title_id, std::stop_token stop_loading,
- const VideoCore::DiskResourceLoadCallback& callback) {
- disk_cache.BindTitleID(title_id);
- const std::optional transferable = disk_cache.LoadTransferable();
-
- LOG_INFO(Render_OpenGL, "Total Shader Count: {}",
- transferable.has_value() ? transferable->size() : 0);
-
- if (!transferable) {
- return;
+ if (!use_asynchronous_shaders) {
+ return pipeline;
}
-
- std::vector<ShaderDiskCachePrecompiled> gl_cache;
- if (!device.UseAssemblyShaders() && !device.UseDriverCache()) {
- // Only load precompiled cache when we are not using assembly shaders
- gl_cache = disk_cache.LoadPrecompiled();
+ // If something is using depth, we can assume that games are not rendering anything which
+ // will be used one time.
+ if (maxwell3d.regs.zeta_enable) {
+ return nullptr;
}
- const auto supported_formats = GetSupportedFormats();
-
- // Track if precompiled cache was altered during loading to know if we have to
- // serialize the virtual precompiled cache file back to the hard drive
- bool precompiled_cache_altered = false;
-
- // Inform the frontend about shader build initialization
- if (callback) {
- callback(VideoCore::LoadCallbackStage::Build, 0, transferable->size());
+ // If games are using a small index count, we can assume these are full screen quads.
+ // Usually these shaders are only used once for building textures so we can assume they
+ // can't be built async
+ if (maxwell3d.regs.index_array.count <= 6 || maxwell3d.regs.vertex_buffer.count <= 6) {
+ return pipeline;
}
+ return nullptr;
+}
- std::mutex mutex;
- std::size_t built_shaders = 0; // It doesn't have be atomic since it's used behind a mutex
- std::atomic_bool gl_cache_failed = false;
-
- const auto find_precompiled = [&gl_cache](u64 id) {
- return std::ranges::find(gl_cache, id, &ShaderDiskCachePrecompiled::unique_identifier);
- };
-
- const auto worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin,
- std::size_t end) {
- const auto scope = context->Acquire();
-
- for (std::size_t i = begin; i < end; ++i) {
- if (stop_loading.stop_requested()) {
- return;
- }
- const auto& entry = (*transferable)[i];
- const u64 uid = entry.unique_identifier;
- const auto it = find_precompiled(uid);
- const auto precompiled_entry = it != gl_cache.end() ? &*it : nullptr;
-
- const bool is_compute = entry.type == ShaderType::Compute;
- const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
- auto registry = MakeRegistry(entry);
- const ShaderIR ir(entry.code, main_offset, COMPILER_SETTINGS, *registry);
-
- ProgramSharedPtr program;
- if (precompiled_entry) {
- // If the shader is precompiled, attempt to load it with
- program = GeneratePrecompiledProgram(entry, *precompiled_entry, supported_formats);
- if (!program) {
- gl_cache_failed = true;
- }
- }
- if (!program) {
- // Otherwise compile it from GLSL
- program = BuildShader(device, entry.type, uid, ir, *registry, true);
- }
-
- PrecompiledShader shader;
- shader.program = std::move(program);
- shader.registry = std::move(registry);
- shader.entries = MakeEntries(device, ir, entry.type);
-
- std::scoped_lock lock{mutex};
- if (callback) {
- callback(VideoCore::LoadCallbackStage::Build, ++built_shaders,
- transferable->size());
- }
- runtime_cache.emplace(entry.unique_identifier, std::move(shader));
- }
- };
-
- const std::size_t num_workers{std::max(1U, std::thread::hardware_concurrency())};
- const std::size_t bucket_size{transferable->size() / num_workers};
- std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers);
- std::vector<std::thread> threads(num_workers);
- for (std::size_t i = 0; i < num_workers; ++i) {
- const bool is_last_worker = i + 1 == num_workers;
- const std::size_t start{bucket_size * i};
- const std::size_t end{is_last_worker ? transferable->size() : start + bucket_size};
-
- // On some platforms the shared context has to be created from the GUI thread
- contexts[i] = emu_window.CreateSharedContext();
- threads[i] = std::thread(worker, contexts[i].get(), start, end);
+ComputePipeline* ShaderCache::CurrentComputePipeline() {
+ const VideoCommon::ShaderInfo* const shader{ComputeShader()};
+ if (!shader) {
+ return nullptr;
}
- for (auto& thread : threads) {
- thread.join();
+ const auto& qmd{kepler_compute.launch_description};
+ const ComputePipelineKey key{
+ .unique_hash = shader->unique_hash,
+ .shared_memory_size = qmd.shared_alloc,
+ .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z},
+ };
+ const auto [pair, is_new]{compute_cache.try_emplace(key)};
+ auto& pipeline{pair->second};
+ if (!is_new) {
+ return pipeline.get();
}
+ pipeline = CreateComputePipeline(key, shader);
+ return pipeline.get();
+}
- if (gl_cache_failed) {
- // Invalidate the precompiled cache if a shader dumped shader was rejected
- disk_cache.InvalidatePrecompiled();
- precompiled_cache_altered = true;
- return;
- }
- if (stop_loading.stop_requested()) {
- return;
- }
+std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline() {
+ GraphicsEnvironments environments;
+ GetGraphicsEnvironments(environments, graphics_key.unique_hashes);
- if (device.UseAssemblyShaders() || device.UseDriverCache()) {
- // Don't store precompiled binaries for assembly shaders or when using the driver cache
- return;
+ main_pools.ReleaseContents();
+ auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(),
+ use_asynchronous_shaders)};
+ if (!pipeline || shader_cache_filename.empty()) {
+ return pipeline;
}
-
- // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw
- // before precompiling them
-
- for (std::size_t i = 0; i < transferable->size(); ++i) {
- const u64 id = (*transferable)[i].unique_identifier;
- const auto it = find_precompiled(id);
- if (it == gl_cache.end()) {
- const GLuint program = runtime_cache.at(id).program->source_program.handle;
- disk_cache.SavePrecompiled(id, program);
- precompiled_cache_altered = true;
+ boost::container::static_vector<const GenericEnvironment*, Maxwell::MaxShaderProgram> env_ptrs;
+ for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
+ if (graphics_key.unique_hashes[index] != 0) {
+ env_ptrs.push_back(&environments.envs[index]);
}
}
-
- if (precompiled_cache_altered) {
- disk_cache.SaveVirtualPrecompiledFile();
- }
-}
-
-ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram(
- const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
- const std::unordered_set<GLenum>& supported_formats) {
- if (!supported_formats.contains(precompiled_entry.binary_format)) {
- LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format, removing");
- return {};
- }
-
- auto program = std::make_shared<ProgramHandle>();
- GLuint& handle = program->source_program.handle;
- handle = glCreateProgram();
- glProgramParameteri(handle, GL_PROGRAM_SEPARABLE, GL_TRUE);
- glProgramBinary(handle, precompiled_entry.binary_format, precompiled_entry.binary.data(),
- static_cast<GLsizei>(precompiled_entry.binary.size()));
-
- GLint link_status;
- glGetProgramiv(handle, GL_LINK_STATUS, &link_status);
- if (link_status == GL_FALSE) {
- LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver, removing");
- return {};
- }
-
- return program;
+ SerializePipeline(graphics_key, env_ptrs, shader_cache_filename, CACHE_VERSION);
+ return pipeline;
}
-Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program,
- VideoCommon::Shader::AsyncShaders& async_shaders) {
- if (!maxwell3d.dirty.flags[Dirty::Shaders]) {
- auto* last_shader = last_shaders[static_cast<std::size_t>(program)];
- if (last_shader->IsBuilt()) {
- return last_shader;
+std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
+ ShaderContext::ShaderPools& pools, const GraphicsPipelineKey& key,
+ std::span<Shader::Environment* const> envs, bool build_in_parallel) try {
+ LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash());
+ size_t env_index{};
+ u32 total_storage_buffers{};
+ std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs;
+ const bool uses_vertex_a{key.unique_hashes[0] != 0};
+ const bool uses_vertex_b{key.unique_hashes[1] != 0};
+ for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
+ if (key.unique_hashes[index] == 0) {
+ continue;
}
- }
+ Shader::Environment& env{*envs[env_index]};
+ ++env_index;
- const GPUVAddr address{GetShaderAddress(maxwell3d, program)};
+ const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))};
+ Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0);
+ if (!uses_vertex_a || index != 1) {
+ // Normal path
+ programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info);
- if (device.UseAsynchronousShaders() && async_shaders.HasCompletedWork()) {
- auto completed_work = async_shaders.GetCompletedWork();
- for (auto& work : completed_work) {
- Shader* shader = TryGet(work.cpu_address);
- gpu.ShaderNotify().MarkShaderComplete();
- if (shader == nullptr) {
- continue;
+ for (const auto& desc : programs[index].info.storage_buffers_descriptors) {
+ total_storage_buffers += desc.count;
}
- using namespace VideoCommon::Shader;
- if (work.backend == AsyncShaders::Backend::OpenGL) {
- shader->AsyncOpenGLBuilt(std::move(work.program.opengl));
- } else if (work.backend == AsyncShaders::Backend::GLASM) {
- shader->AsyncGLASMBuilt(std::move(work.program.glasm));
+ } else {
+ // VertexB path when VertexA is present.
+ auto& program_va{programs[0]};
+ auto program_vb{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
+ for (const auto& desc : program_vb.info.storage_buffers_descriptors) {
+ total_storage_buffers += desc.count;
}
-
- auto& registry = shader->GetRegistry();
-
- ShaderDiskCacheEntry entry;
- entry.type = work.shader_type;
- entry.code = std::move(work.code);
- entry.code_b = std::move(work.code_b);
- entry.unique_identifier = work.uid;
- entry.bound_buffer = registry.GetBoundBuffer();
- entry.graphics_info = registry.GetGraphicsInfo();
- entry.keys = registry.GetKeys();
- entry.bound_samplers = registry.GetBoundSamplers();
- entry.bindless_samplers = registry.GetBindlessSamplers();
- disk_cache.SaveEntry(std::move(entry));
+ programs[index] = MergeDualVertexPrograms(program_va, program_vb, env);
}
}
-
- // Look up shader in the cache based on address
- const std::optional<VAddr> cpu_addr{gpu_memory.GpuToCpuAddress(address)};
- if (Shader* const shader{cpu_addr ? TryGet(*cpu_addr) : null_shader.get()}) {
- return last_shaders[static_cast<std::size_t>(program)] = shader;
- }
-
- const u8* const host_ptr{gpu_memory.GetPointer(address)};
-
- // No shader found - create a new one
- ProgramCode code{GetShaderCode(gpu_memory, address, host_ptr, false)};
- ProgramCode code_b;
- if (program == Maxwell::ShaderProgram::VertexA) {
- const GPUVAddr address_b{GetShaderAddress(maxwell3d, Maxwell::ShaderProgram::VertexB)};
- const u8* host_ptr_b = gpu_memory.GetPointer(address_b);
- code_b = GetShaderCode(gpu_memory, address_b, host_ptr_b, false);
- }
- const std::size_t code_size = code.size() * sizeof(u64);
-
- const u64 unique_identifier = GetUniqueIdentifier(
- GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b);
-
- const ShaderParameters params{gpu, maxwell3d, disk_cache, device,
- *cpu_addr, host_ptr, unique_identifier};
-
- std::unique_ptr<Shader> shader;
- const auto found = runtime_cache.find(unique_identifier);
- if (found == runtime_cache.end()) {
- shader = Shader::CreateStageFromMemory(params, program, std::move(code), std::move(code_b),
- async_shaders, cpu_addr.value_or(0));
- } else {
- shader = Shader::CreateFromCache(params, found->second);
- }
-
- Shader* const result = shader.get();
- if (cpu_addr) {
- Register(std::move(shader), *cpu_addr, code_size);
- } else {
- null_shader = std::move(shader);
+ const u32 glasm_storage_buffer_limit{device.GetMaxGLASMStorageBufferBlocks()};
+ const bool glasm_use_storage_buffers{total_storage_buffers <= glasm_storage_buffer_limit};
+
+ std::array<const Shader::Info*, Maxwell::MaxShaderStage> infos{};
+
+ OGLProgram source_program;
+ std::array<std::string, 5> sources;
+ std::array<std::vector<u32>, 5> sources_spirv;
+ Shader::Backend::Bindings binding;
+ Shader::IR::Program* previous_program{};
+ const bool use_glasm{device.UseAssemblyShaders()};
+ const size_t first_index = uses_vertex_a && uses_vertex_b ? 1 : 0;
+ for (size_t index = first_index; index < Maxwell::MaxShaderProgram; ++index) {
+ if (key.unique_hashes[index] == 0) {
+ continue;
+ }
+ UNIMPLEMENTED_IF(index == 0);
+
+ Shader::IR::Program& program{programs[index]};
+ const size_t stage_index{index - 1};
+ infos[stage_index] = &program.info;
+
+ const auto runtime_info{
+ MakeRuntimeInfo(key, program, previous_program, glasm_use_storage_buffers, use_glasm)};
+ switch (device.GetShaderBackend()) {
+ case Settings::ShaderBackend::GLSL:
+ sources[stage_index] = EmitGLSL(profile, runtime_info, program, binding);
+ break;
+ case Settings::ShaderBackend::GLASM:
+ sources[stage_index] = EmitGLASM(profile, runtime_info, program, binding);
+ break;
+ case Settings::ShaderBackend::SPIRV:
+ sources_spirv[stage_index] = EmitSPIRV(profile, runtime_info, program, binding);
+ break;
+ }
+ previous_program = &program;
}
+ auto* const thread_worker{build_in_parallel ? workers.get() : nullptr};
+ return std::make_unique<GraphicsPipeline>(
+ device, texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker,
+ thread_worker, &shader_notify, sources, sources_spirv, infos, key);
- return last_shaders[static_cast<std::size_t>(program)] = result;
+} catch (Shader::Exception& exception) {
+ LOG_ERROR(Render_OpenGL, "{}", exception.what());
+ return nullptr;
}
-Shader* ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
- const std::optional<VAddr> cpu_addr{gpu_memory.GpuToCpuAddress(code_addr)};
-
- if (Shader* const kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get()) {
- return kernel;
- }
-
- // No kernel found, create a new one
- const u8* host_ptr{gpu_memory.GetPointer(code_addr)};
- ProgramCode code{GetShaderCode(gpu_memory, code_addr, host_ptr, true)};
- const std::size_t code_size{code.size() * sizeof(u64)};
- const u64 unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)};
-
- const ShaderParameters params{gpu, kepler_compute, disk_cache, device,
- *cpu_addr, host_ptr, unique_identifier};
+std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(
+ const ComputePipelineKey& key, const VideoCommon::ShaderInfo* shader) {
+ const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()};
+ const auto& qmd{kepler_compute.launch_description};
+ ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start};
+ env.SetCachedSize(shader->size_bytes);
+
+ main_pools.ReleaseContents();
+ auto pipeline{CreateComputePipeline(main_pools, key, env)};
+ if (!pipeline || shader_cache_filename.empty()) {
+ return pipeline;
+ }
+ SerializePipeline(key, std::array<const GenericEnvironment*, 1>{&env}, shader_cache_filename,
+ CACHE_VERSION);
+ return pipeline;
+}
- std::unique_ptr<Shader> kernel;
- const auto found = runtime_cache.find(unique_identifier);
- if (found == runtime_cache.end()) {
- kernel = Shader::CreateKernelFromMemory(params, std::move(code));
- } else {
- kernel = Shader::CreateFromCache(params, found->second);
- }
+std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(
+ ShaderContext::ShaderPools& pools, const ComputePipelineKey& key,
+ Shader::Environment& env) try {
+ LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash());
+
+ Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()};
+ auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
+
+ u32 num_storage_buffers{};
+ for (const auto& desc : program.info.storage_buffers_descriptors) {
+ num_storage_buffers += desc.count;
+ }
+ Shader::RuntimeInfo info;
+ info.glasm_use_storage_buffers = num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks();
+
+ std::string code{};
+ std::vector<u32> code_spirv;
+ switch (device.GetShaderBackend()) {
+ case Settings::ShaderBackend::GLSL:
+ code = EmitGLSL(profile, program);
+ break;
+ case Settings::ShaderBackend::GLASM:
+ code = EmitGLASM(profile, info, program);
+ break;
+ case Settings::ShaderBackend::SPIRV:
+ code_spirv = EmitSPIRV(profile, program);
+ break;
+ }
+
+ return std::make_unique<ComputePipeline>(device, texture_cache, buffer_cache, gpu_memory,
+ kepler_compute, program_manager, program.info, code,
+ code_spirv);
+} catch (Shader::Exception& exception) {
+ LOG_ERROR(Render_OpenGL, "{}", exception.what());
+ return nullptr;
+}
- Shader* const result = kernel.get();
- if (cpu_addr) {
- Register(std::move(kernel), *cpu_addr, code_size);
- } else {
- null_kernel = std::move(kernel);
- }
- return result;
+std::unique_ptr<ShaderWorker> ShaderCache::CreateWorkers() const {
+ return std::make_unique<ShaderWorker>(std::max(std::thread::hardware_concurrency(), 2U) - 1,
+ "yuzu:ShaderBuilder",
+ [this] { return Context{emu_window}; });
}
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index b30308b6f..a34110b37 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -5,157 +5,93 @@
#pragma once
#include <array>
-#include <atomic>
-#include <bitset>
-#include <memory>
-#include <string>
-#include <tuple>
+#include <filesystem>
+#include <stop_token>
#include <unordered_map>
-#include <unordered_set>
-#include <vector>
#include <glad/glad.h>
#include "common/common_types.h"
-#include "video_core/engines/shader_type.h"
-#include "video_core/renderer_opengl/gl_resource_manager.h"
-#include "video_core/renderer_opengl/gl_shader_decompiler.h"
-#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
-#include "video_core/shader/registry.h"
-#include "video_core/shader/shader_ir.h"
+#include "common/thread_worker.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/host_translate_info.h"
+#include "shader_recompiler/object_pool.h"
+#include "shader_recompiler/profile.h"
+#include "video_core/renderer_opengl/gl_compute_pipeline.h"
+#include "video_core/renderer_opengl/gl_graphics_pipeline.h"
+#include "video_core/renderer_opengl/gl_shader_context.h"
#include "video_core/shader_cache.h"
namespace Tegra {
class MemoryManager;
}
-namespace Core::Frontend {
-class EmuWindow;
-}
-
-namespace VideoCommon::Shader {
-class AsyncShaders;
-}
-
namespace OpenGL {
class Device;
+class ProgramManager;
class RasterizerOpenGL;
+using ShaderWorker = Common::StatefulThreadWorker<ShaderContext::Context>;
-using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-
-struct ProgramHandle {
- OGLProgram source_program;
- OGLAssemblyProgram assembly_program;
-};
-using ProgramSharedPtr = std::shared_ptr<ProgramHandle>;
-
-struct PrecompiledShader {
- ProgramSharedPtr program;
- std::shared_ptr<VideoCommon::Shader::Registry> registry;
- ShaderEntries entries;
-};
-
-struct ShaderParameters {
- Tegra::GPU& gpu;
- Tegra::Engines::ConstBufferEngineInterface& engine;
- ShaderDiskCacheOpenGL& disk_cache;
- const Device& device;
- VAddr cpu_addr;
- const u8* host_ptr;
- u64 unique_identifier;
-};
-
-ProgramSharedPtr BuildShader(const Device& device, Tegra::Engines::ShaderType shader_type,
- u64 unique_identifier, const VideoCommon::Shader::ShaderIR& ir,
- const VideoCommon::Shader::Registry& registry,
- bool hint_retrievable = false);
-
-class Shader final {
+class ShaderCache : public VideoCommon::ShaderCache {
public:
- ~Shader();
-
- /// Gets the GL program handle for the shader
- GLuint GetHandle() const;
-
- bool IsBuilt() const;
-
- /// Gets the shader entries for the shader
- const ShaderEntries& GetEntries() const {
- return entries;
- }
-
- const VideoCommon::Shader::Registry& GetRegistry() const {
- return *registry;
- }
-
- /// Mark a OpenGL shader as built
- void AsyncOpenGLBuilt(OGLProgram new_program);
+ explicit ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_,
+ Tegra::Engines::Maxwell3D& maxwell3d_,
+ Tegra::Engines::KeplerCompute& kepler_compute_,
+ Tegra::MemoryManager& gpu_memory_, const Device& device_,
+ TextureCache& texture_cache_, BufferCache& buffer_cache_,
+ ProgramManager& program_manager_, StateTracker& state_tracker_,
+ VideoCore::ShaderNotify& shader_notify_);
+ ~ShaderCache();
- /// Mark a GLASM shader as built
- void AsyncGLASMBuilt(OGLAssemblyProgram new_program);
+ void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
+ const VideoCore::DiskResourceLoadCallback& callback);
- static std::unique_ptr<Shader> CreateStageFromMemory(
- const ShaderParameters& params, Maxwell::ShaderProgram program_type,
- ProgramCode program_code, ProgramCode program_code_b,
- VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr);
+ [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline();
- static std::unique_ptr<Shader> CreateKernelFromMemory(const ShaderParameters& params,
- ProgramCode code);
-
- static std::unique_ptr<Shader> CreateFromCache(const ShaderParameters& params,
- const PrecompiledShader& precompiled_shader);
+ [[nodiscard]] ComputePipeline* CurrentComputePipeline();
private:
- explicit Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry, ShaderEntries entries,
- ProgramSharedPtr program, bool is_built_ = true);
-
- std::shared_ptr<VideoCommon::Shader::Registry> registry;
- ShaderEntries entries;
- ProgramSharedPtr program;
- GLuint handle = 0;
- bool is_built{};
-};
+ GraphicsPipeline* CurrentGraphicsPipelineSlowPath();
-class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> {
-public:
- explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_,
- Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu,
- Tegra::Engines::Maxwell3D& maxwell3d_,
- Tegra::Engines::KeplerCompute& kepler_compute_,
- Tegra::MemoryManager& gpu_memory_, const Device& device_);
- ~ShaderCacheOpenGL() override;
+ [[nodiscard]] GraphicsPipeline* BuiltPipeline(GraphicsPipeline* pipeline) const noexcept;
- /// Loads disk cache for the current game
- void LoadDiskCache(u64 title_id, std::stop_token stop_loading,
- const VideoCore::DiskResourceLoadCallback& callback);
+ std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline();
- /// Gets the current specified shader stage program
- Shader* GetStageProgram(Maxwell::ShaderProgram program,
- VideoCommon::Shader::AsyncShaders& async_shaders);
+ std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline(
+ ShaderContext::ShaderPools& pools, const GraphicsPipelineKey& key,
+ std::span<Shader::Environment* const> envs, bool build_in_parallel);
- /// Gets a compute kernel in the passed address
- Shader* GetComputeKernel(GPUVAddr code_addr);
+ std::unique_ptr<ComputePipeline> CreateComputePipeline(const ComputePipelineKey& key,
+ const VideoCommon::ShaderInfo* shader);
-private:
- ProgramSharedPtr GeneratePrecompiledProgram(
- const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
- const std::unordered_set<GLenum>& supported_formats);
+ std::unique_ptr<ComputePipeline> CreateComputePipeline(ShaderContext::ShaderPools& pools,
+ const ComputePipelineKey& key,
+ Shader::Environment& env);
+
+ std::unique_ptr<ShaderWorker> CreateWorkers() const;
Core::Frontend::EmuWindow& emu_window;
- Tegra::GPU& gpu;
- Tegra::MemoryManager& gpu_memory;
- Tegra::Engines::Maxwell3D& maxwell3d;
- Tegra::Engines::KeplerCompute& kepler_compute;
const Device& device;
+ TextureCache& texture_cache;
+ BufferCache& buffer_cache;
+ ProgramManager& program_manager;
+ StateTracker& state_tracker;
+ VideoCore::ShaderNotify& shader_notify;
+ const bool use_asynchronous_shaders;
+
+ GraphicsPipelineKey graphics_key{};
+ GraphicsPipeline* current_pipeline{};
- ShaderDiskCacheOpenGL disk_cache;
- std::unordered_map<u64, PrecompiledShader> runtime_cache;
+ ShaderContext::ShaderPools main_pools;
+ std::unordered_map<GraphicsPipelineKey, std::unique_ptr<GraphicsPipeline>> graphics_cache;
+ std::unordered_map<ComputePipelineKey, std::unique_ptr<ComputePipeline>> compute_cache;
- std::unique_ptr<Shader> null_shader;
- std::unique_ptr<Shader> null_kernel;
+ Shader::Profile profile;
+ Shader::HostTranslateInfo host_info;
- std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{};
+ std::filesystem::path shader_cache_filename;
+ std::unique_ptr<ShaderWorker> workers;
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_context.h b/src/video_core/renderer_opengl/gl_shader_context.h
new file mode 100644
index 000000000..6ff34e5d6
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_shader_context.h
@@ -0,0 +1,33 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "core/frontend/emu_window.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/maxwell/control_flow.h"
+
+namespace OpenGL::ShaderContext {
+struct ShaderPools {
+ void ReleaseContents() {
+ flow_block.ReleaseContents();
+ block.ReleaseContents();
+ inst.ReleaseContents();
+ }
+
+ Shader::ObjectPool<Shader::IR::Inst> inst;
+ Shader::ObjectPool<Shader::IR::Block> block;
+ Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block;
+};
+
+struct Context {
+ explicit Context(Core::Frontend::EmuWindow& emu_window)
+ : gl_context{emu_window.CreateSharedContext()}, scoped{*gl_context} {}
+
+ std::unique_ptr<Core::Frontend::GraphicsContext> gl_context;
+ Core::Frontend::GraphicsContext::Scoped scoped;
+ ShaderPools pools;
+};
+
+} // namespace OpenGL::ShaderContext
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
deleted file mode 100644
index 9c28498e8..000000000
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ /dev/null
@@ -1,2986 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <array>
-#include <string>
-#include <string_view>
-#include <utility>
-#include <variant>
-#include <vector>
-
-#include <fmt/format.h>
-
-#include "common/alignment.h"
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "common/div_ceil.h"
-#include "common/logging/log.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/engines/shader_type.h"
-#include "video_core/renderer_opengl/gl_device.h"
-#include "video_core/renderer_opengl/gl_rasterizer.h"
-#include "video_core/renderer_opengl/gl_shader_decompiler.h"
-#include "video_core/shader/ast.h"
-#include "video_core/shader/node.h"
-#include "video_core/shader/shader_ir.h"
-#include "video_core/shader/transform_feedback.h"
-
-namespace OpenGL {
-
-namespace {
-
-using Tegra::Engines::ShaderType;
-using Tegra::Shader::Attribute;
-using Tegra::Shader::Header;
-using Tegra::Shader::IpaInterpMode;
-using Tegra::Shader::IpaMode;
-using Tegra::Shader::IpaSampleMode;
-using Tegra::Shader::PixelImap;
-using Tegra::Shader::Register;
-using Tegra::Shader::TextureType;
-
-using namespace VideoCommon::Shader;
-using namespace std::string_literals;
-
-using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-using Operation = const OperationNode&;
-
-class ASTDecompiler;
-class ExprDecompiler;
-
-enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat };
-
-constexpr std::array FLOAT_TYPES{"float", "vec2", "vec3", "vec4"};
-
-constexpr std::string_view INPUT_ATTRIBUTE_NAME = "in_attr";
-constexpr std::string_view OUTPUT_ATTRIBUTE_NAME = "out_attr";
-
-struct TextureOffset {};
-struct TextureDerivates {};
-using TextureArgument = std::pair<Type, Node>;
-using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument>;
-
-constexpr u32 MAX_CONSTBUFFER_SCALARS = static_cast<u32>(Maxwell::MaxConstBufferSize) / sizeof(u32);
-constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_SCALARS / sizeof(u32);
-
-constexpr std::string_view COMMON_DECLARATIONS = R"(#define ftoi floatBitsToInt
-#define ftou floatBitsToUint
-#define itof intBitsToFloat
-#define utof uintBitsToFloat
-
-bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{
- bvec2 is_nan1 = isnan(pair1);
- bvec2 is_nan2 = isnan(pair2);
- return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y);
-}}
-
-const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f );
-const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f );
-)";
-
-class ShaderWriter final {
-public:
- void AddExpression(std::string_view text) {
- DEBUG_ASSERT(scope >= 0);
- if (!text.empty()) {
- AppendIndentation();
- }
- shader_source += text;
- }
-
- // Forwards all arguments directly to libfmt.
- // Note that all formatting requirements for fmt must be
- // obeyed when using this function. (e.g. {{ must be used
- // printing the character '{' is desirable. Ditto for }} and '}',
- // etc).
- template <typename... Args>
- void AddLine(std::string_view text, Args&&... args) {
- AddExpression(fmt::format(fmt::runtime(text), std::forward<Args>(args)...));
- AddNewLine();
- }
-
- void AddNewLine() {
- DEBUG_ASSERT(scope >= 0);
- shader_source += '\n';
- }
-
- std::string GenerateTemporary() {
- return fmt::format("tmp{}", temporary_index++);
- }
-
- std::string GetResult() {
- return std::move(shader_source);
- }
-
- s32 scope = 0;
-
-private:
- void AppendIndentation() {
- shader_source.append(static_cast<std::size_t>(scope) * 4, ' ');
- }
-
- std::string shader_source;
- u32 temporary_index = 1;
-};
-
-class Expression final {
-public:
- Expression(std::string code_, Type type_) : code{std::move(code_)}, type{type_} {
- ASSERT(type != Type::Void);
- }
- Expression() : type{Type::Void} {}
-
- Type GetType() const {
- return type;
- }
-
- std::string GetCode() const {
- return code;
- }
-
- void CheckVoid() const {
- ASSERT(type == Type::Void);
- }
-
- std::string As(Type type_) const {
- switch (type_) {
- case Type::Bool:
- return AsBool();
- case Type::Bool2:
- return AsBool2();
- case Type::Float:
- return AsFloat();
- case Type::Int:
- return AsInt();
- case Type::Uint:
- return AsUint();
- case Type::HalfFloat:
- return AsHalfFloat();
- default:
- UNREACHABLE_MSG("Invalid type");
- return code;
- }
- }
-
- std::string AsBool() const {
- switch (type) {
- case Type::Bool:
- return code;
- default:
- UNREACHABLE_MSG("Incompatible types");
- return code;
- }
- }
-
- std::string AsBool2() const {
- switch (type) {
- case Type::Bool2:
- return code;
- default:
- UNREACHABLE_MSG("Incompatible types");
- return code;
- }
- }
-
- std::string AsFloat() const {
- switch (type) {
- case Type::Float:
- return code;
- case Type::Uint:
- return fmt::format("utof({})", code);
- case Type::Int:
- return fmt::format("itof({})", code);
- case Type::HalfFloat:
- return fmt::format("utof(packHalf2x16({}))", code);
- default:
- UNREACHABLE_MSG("Incompatible types");
- return code;
- }
- }
-
- std::string AsInt() const {
- switch (type) {
- case Type::Float:
- return fmt::format("ftoi({})", code);
- case Type::Uint:
- return fmt::format("int({})", code);
- case Type::Int:
- return code;
- case Type::HalfFloat:
- return fmt::format("int(packHalf2x16({}))", code);
- default:
- UNREACHABLE_MSG("Incompatible types");
- return code;
- }
- }
-
- std::string AsUint() const {
- switch (type) {
- case Type::Float:
- return fmt::format("ftou({})", code);
- case Type::Uint:
- return code;
- case Type::Int:
- return fmt::format("uint({})", code);
- case Type::HalfFloat:
- return fmt::format("packHalf2x16({})", code);
- default:
- UNREACHABLE_MSG("Incompatible types");
- return code;
- }
- }
-
- std::string AsHalfFloat() const {
- switch (type) {
- case Type::Float:
- return fmt::format("unpackHalf2x16(ftou({}))", code);
- case Type::Uint:
- return fmt::format("unpackHalf2x16({})", code);
- case Type::Int:
- return fmt::format("unpackHalf2x16(int({}))", code);
- case Type::HalfFloat:
- return code;
- default:
- UNREACHABLE_MSG("Incompatible types");
- return code;
- }
- }
-
-private:
- std::string code;
- Type type{};
-};
-
-const char* GetTypeString(Type type) {
- switch (type) {
- case Type::Bool:
- return "bool";
- case Type::Bool2:
- return "bvec2";
- case Type::Float:
- return "float";
- case Type::Int:
- return "int";
- case Type::Uint:
- return "uint";
- case Type::HalfFloat:
- return "vec2";
- default:
- UNREACHABLE_MSG("Invalid type");
- return "<invalid type>";
- }
-}
-
-const char* GetImageTypeDeclaration(Tegra::Shader::ImageType image_type) {
- switch (image_type) {
- case Tegra::Shader::ImageType::Texture1D:
- return "1D";
- case Tegra::Shader::ImageType::TextureBuffer:
- return "Buffer";
- case Tegra::Shader::ImageType::Texture1DArray:
- return "1DArray";
- case Tegra::Shader::ImageType::Texture2D:
- return "2D";
- case Tegra::Shader::ImageType::Texture2DArray:
- return "2DArray";
- case Tegra::Shader::ImageType::Texture3D:
- return "3D";
- default:
- UNREACHABLE();
- return "1D";
- }
-}
-
-/// Describes primitive behavior on geometry shaders
-std::pair<const char*, u32> GetPrimitiveDescription(Maxwell::PrimitiveTopology topology) {
- switch (topology) {
- case Maxwell::PrimitiveTopology::Points:
- return {"points", 1};
- case Maxwell::PrimitiveTopology::Lines:
- case Maxwell::PrimitiveTopology::LineStrip:
- return {"lines", 2};
- case Maxwell::PrimitiveTopology::LinesAdjacency:
- case Maxwell::PrimitiveTopology::LineStripAdjacency:
- return {"lines_adjacency", 4};
- case Maxwell::PrimitiveTopology::Triangles:
- case Maxwell::PrimitiveTopology::TriangleStrip:
- case Maxwell::PrimitiveTopology::TriangleFan:
- return {"triangles", 3};
- case Maxwell::PrimitiveTopology::TrianglesAdjacency:
- case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
- return {"triangles_adjacency", 6};
- default:
- UNIMPLEMENTED_MSG("topology={}", topology);
- return {"points", 1};
- }
-}
-
-/// Generates code to use for a swizzle operation.
-constexpr const char* GetSwizzle(std::size_t element) {
- constexpr std::array swizzle = {".x", ".y", ".z", ".w"};
- return swizzle.at(element);
-}
-
-constexpr const char* GetColorSwizzle(std::size_t element) {
- constexpr std::array swizzle = {".r", ".g", ".b", ".a"};
- return swizzle.at(element);
-}
-
-/// Translate topology
-std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
- switch (topology) {
- case Tegra::Shader::OutputTopology::PointList:
- return "points";
- case Tegra::Shader::OutputTopology::LineStrip:
- return "line_strip";
- case Tegra::Shader::OutputTopology::TriangleStrip:
- return "triangle_strip";
- default:
- UNIMPLEMENTED_MSG("Unknown output topology: {}", topology);
- return "points";
- }
-}
-
-/// Returns true if an object has to be treated as precise
-bool IsPrecise(Operation operand) {
- const auto& meta{operand.GetMeta()};
- if (const auto arithmetic = std::get_if<MetaArithmetic>(&meta)) {
- return arithmetic->precise;
- }
- return false;
-}
-
-bool IsPrecise(const Node& node) {
- if (const auto operation = std::get_if<OperationNode>(&*node)) {
- return IsPrecise(*operation);
- }
- return false;
-}
-
-constexpr bool IsGenericAttribute(Attribute::Index index) {
- return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31;
-}
-
-constexpr bool IsLegacyTexCoord(Attribute::Index index) {
- return static_cast<int>(index) >= static_cast<int>(Attribute::Index::TexCoord_0) &&
- static_cast<int>(index) <= static_cast<int>(Attribute::Index::TexCoord_7);
-}
-
-constexpr Attribute::Index ToGenericAttribute(u64 value) {
- return static_cast<Attribute::Index>(value + static_cast<u64>(Attribute::Index::Attribute_0));
-}
-
-constexpr int GetLegacyTexCoordIndex(Attribute::Index index) {
- return static_cast<int>(index) - static_cast<int>(Attribute::Index::TexCoord_0);
-}
-
-u32 GetGenericAttributeIndex(Attribute::Index index) {
- ASSERT(IsGenericAttribute(index));
- return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0);
-}
-
-constexpr const char* GetFlowStackPrefix(MetaStackClass stack) {
- switch (stack) {
- case MetaStackClass::Ssy:
- return "ssy";
- case MetaStackClass::Pbk:
- return "pbk";
- }
- return {};
-}
-
-std::string FlowStackName(MetaStackClass stack) {
- return fmt::format("{}_flow_stack", GetFlowStackPrefix(stack));
-}
-
-std::string FlowStackTopName(MetaStackClass stack) {
- return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack));
-}
-
-struct GenericVaryingDescription {
- std::string name;
- u8 first_element = 0;
- bool is_scalar = false;
-};
-
-class GLSLDecompiler final {
-public:
- explicit GLSLDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_,
- ShaderType stage_, std::string_view identifier_,
- std::string_view suffix_)
- : device{device_}, ir{ir_}, registry{registry_}, stage{stage_},
- identifier{identifier_}, suffix{suffix_}, header{ir.GetHeader()} {
- if (stage != ShaderType::Compute) {
- transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo());
- }
- }
-
- void Decompile() {
- DeclareHeader();
- DeclareVertex();
- DeclareGeometry();
- DeclareFragment();
- DeclareCompute();
- DeclareInputAttributes();
- DeclareOutputAttributes();
- DeclareImages();
- DeclareSamplers();
- DeclareGlobalMemory();
- DeclareConstantBuffers();
- DeclareLocalMemory();
- DeclareRegisters();
- DeclarePredicates();
- DeclareInternalFlags();
- DeclareCustomVariables();
- DeclarePhysicalAttributeReader();
-
- code.AddLine("void main() {{");
- ++code.scope;
-
- if (stage == ShaderType::Vertex) {
- code.AddLine("gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);");
- }
-
- if (ir.IsDecompiled()) {
- DecompileAST();
- } else {
- DecompileBranchMode();
- }
-
- --code.scope;
- code.AddLine("}}");
- }
-
- std::string GetResult() {
- return code.GetResult();
- }
-
-private:
- friend class ASTDecompiler;
- friend class ExprDecompiler;
-
- void DecompileBranchMode() {
- // VM's program counter
- const auto first_address = ir.GetBasicBlocks().begin()->first;
- code.AddLine("uint jmp_to = {}U;", first_address);
-
- // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems
- // unlikely that shaders will use 20 nested SSYs and PBKs.
- constexpr u32 FLOW_STACK_SIZE = 20;
- if (!ir.IsFlowStackDisabled()) {
- for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) {
- code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE);
- code.AddLine("uint {} = 0U;", FlowStackTopName(stack));
- }
- }
-
- code.AddLine("while (true) {{");
- ++code.scope;
-
- code.AddLine("switch (jmp_to) {{");
-
- for (const auto& pair : ir.GetBasicBlocks()) {
- const auto& [address, bb] = pair;
- code.AddLine("case 0x{:X}U: {{", address);
- ++code.scope;
-
- VisitBlock(bb);
-
- --code.scope;
- code.AddLine("}}");
- }
-
- code.AddLine("default: return;");
- code.AddLine("}}");
-
- --code.scope;
- code.AddLine("}}");
- }
-
- void DecompileAST();
-
- void DeclareHeader() {
- if (!identifier.empty()) {
- code.AddLine("// {}", identifier);
- }
- const bool use_compatibility = ir.UsesLegacyVaryings() || ir.UsesYNegate();
- code.AddLine("#version 440 {}", use_compatibility ? "compatibility" : "core");
- code.AddLine("#extension GL_ARB_separate_shader_objects : enable");
- if (device.HasShaderBallot()) {
- code.AddLine("#extension GL_ARB_shader_ballot : require");
- }
- if (device.HasVertexViewportLayer()) {
- code.AddLine("#extension GL_ARB_shader_viewport_layer_array : require");
- }
- if (device.HasImageLoadFormatted()) {
- code.AddLine("#extension GL_EXT_shader_image_load_formatted : require");
- }
- if (device.HasTextureShadowLod()) {
- code.AddLine("#extension GL_EXT_texture_shadow_lod : require");
- }
- if (device.HasWarpIntrinsics()) {
- code.AddLine("#extension GL_NV_gpu_shader5 : require");
- code.AddLine("#extension GL_NV_shader_thread_group : require");
- code.AddLine("#extension GL_NV_shader_thread_shuffle : require");
- }
- // This pragma stops Nvidia's driver from over optimizing math (probably using fp16
- // operations) on places where we don't want to.
- // Thanks to Ryujinx for finding this workaround.
- code.AddLine("#pragma optionNV(fastmath off)");
-
- code.AddNewLine();
-
- code.AddLine(COMMON_DECLARATIONS);
- }
-
- void DeclareVertex() {
- if (stage != ShaderType::Vertex) {
- return;
- }
-
- DeclareVertexRedeclarations();
- }
-
- void DeclareGeometry() {
- if (stage != ShaderType::Geometry) {
- return;
- }
-
- const auto& info = registry.GetGraphicsInfo();
- const auto input_topology = info.primitive_topology;
- const auto [glsl_topology, max_vertices] = GetPrimitiveDescription(input_topology);
- max_input_vertices = max_vertices;
- code.AddLine("layout ({}) in;", glsl_topology);
-
- const auto topology = GetTopologyName(header.common3.output_topology);
- const auto max_output_vertices = header.common4.max_output_vertices.Value();
- code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_output_vertices);
- code.AddNewLine();
-
- code.AddLine("in gl_PerVertex {{");
- ++code.scope;
- code.AddLine("vec4 gl_Position;");
- --code.scope;
- code.AddLine("}} gl_in[];");
-
- DeclareVertexRedeclarations();
- }
-
- void DeclareFragment() {
- if (stage != ShaderType::Fragment) {
- return;
- }
- if (ir.UsesLegacyVaryings()) {
- code.AddLine("in gl_PerFragment {{");
- ++code.scope;
- code.AddLine("vec4 gl_TexCoord[8];");
- code.AddLine("vec4 gl_Color;");
- code.AddLine("vec4 gl_SecondaryColor;");
- --code.scope;
- code.AddLine("}};");
- }
-
- for (u32 rt = 0; rt < Maxwell::NumRenderTargets; ++rt) {
- code.AddLine("layout (location = {}) out vec4 frag_color{};", rt, rt);
- }
- }
-
- void DeclareCompute() {
- if (stage != ShaderType::Compute) {
- return;
- }
- const auto& info = registry.GetComputeInfo();
- if (u32 size = info.shared_memory_size_in_words * 4; size > 0) {
- const u32 limit = device.GetMaxComputeSharedMemorySize();
- if (size > limit) {
- LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}",
- size, limit);
- size = limit;
- }
-
- code.AddLine("shared uint smem[{}];", size / 4);
- code.AddNewLine();
- }
- code.AddLine("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;",
- info.workgroup_size[0], info.workgroup_size[1], info.workgroup_size[2]);
- code.AddNewLine();
- }
-
- void DeclareVertexRedeclarations() {
- code.AddLine("out gl_PerVertex {{");
- ++code.scope;
-
- auto pos_xfb = GetTransformFeedbackDecoration(Attribute::Index::Position);
- if (!pos_xfb.empty()) {
- pos_xfb = fmt::format("layout ({}) ", pos_xfb);
- }
- const char* pos_type =
- FLOAT_TYPES.at(GetNumComponents(Attribute::Index::Position).value_or(4) - 1);
- code.AddLine("{}{} gl_Position;", pos_xfb, pos_type);
-
- for (const auto attribute : ir.GetOutputAttributes()) {
- if (attribute == Attribute::Index::ClipDistances0123 ||
- attribute == Attribute::Index::ClipDistances4567) {
- code.AddLine("float gl_ClipDistance[];");
- break;
- }
- }
-
- if (stage != ShaderType::Geometry &&
- (stage != ShaderType::Vertex || device.HasVertexViewportLayer())) {
- if (ir.UsesLayer()) {
- code.AddLine("int gl_Layer;");
- }
- if (ir.UsesViewportIndex()) {
- code.AddLine("int gl_ViewportIndex;");
- }
- } else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && stage == ShaderType::Vertex &&
- !device.HasVertexViewportLayer()) {
- LOG_ERROR(
- Render_OpenGL,
- "GL_ARB_shader_viewport_layer_array is not available and its required by a shader");
- }
-
- if (ir.UsesPointSize()) {
- code.AddLine("float gl_PointSize;");
- }
-
- if (ir.UsesLegacyVaryings()) {
- code.AddLine("vec4 gl_TexCoord[8];");
- code.AddLine("vec4 gl_FrontColor;");
- code.AddLine("vec4 gl_FrontSecondaryColor;");
- code.AddLine("vec4 gl_BackColor;");
- code.AddLine("vec4 gl_BackSecondaryColor;");
- }
-
- --code.scope;
- code.AddLine("}};");
- code.AddNewLine();
-
- if (stage == ShaderType::Geometry) {
- if (ir.UsesLayer()) {
- code.AddLine("out int gl_Layer;");
- }
- if (ir.UsesViewportIndex()) {
- code.AddLine("out int gl_ViewportIndex;");
- }
- }
- code.AddNewLine();
- }
-
- void DeclareRegisters() {
- const auto& registers = ir.GetRegisters();
- for (const u32 gpr : registers) {
- code.AddLine("float {} = 0.0f;", GetRegister(gpr));
- }
- if (!registers.empty()) {
- code.AddNewLine();
- }
- }
-
- void DeclareCustomVariables() {
- const u32 num_custom_variables = ir.GetNumCustomVariables();
- for (u32 i = 0; i < num_custom_variables; ++i) {
- code.AddLine("float {} = 0.0f;", GetCustomVariable(i));
- }
- if (num_custom_variables > 0) {
- code.AddNewLine();
- }
- }
-
- void DeclarePredicates() {
- const auto& predicates = ir.GetPredicates();
- for (const auto pred : predicates) {
- code.AddLine("bool {} = false;", GetPredicate(pred));
- }
- if (!predicates.empty()) {
- code.AddNewLine();
- }
- }
-
- void DeclareLocalMemory() {
- u64 local_memory_size = 0;
- if (stage == ShaderType::Compute) {
- local_memory_size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL;
- } else {
- local_memory_size = header.GetLocalMemorySize();
- }
- if (local_memory_size == 0) {
- return;
- }
- const u64 element_count = Common::AlignUp(local_memory_size, 4) / 4;
- code.AddLine("uint {}[{}];", GetLocalMemory(), element_count);
- code.AddNewLine();
- }
-
- void DeclareInternalFlags() {
- for (u32 flag = 0; flag < static_cast<u32>(InternalFlag::Amount); flag++) {
- const auto flag_code = static_cast<InternalFlag>(flag);
- code.AddLine("bool {} = false;", GetInternalFlag(flag_code));
- }
- code.AddNewLine();
- }
-
- const char* GetInputFlags(PixelImap attribute) {
- switch (attribute) {
- case PixelImap::Perspective:
- return "smooth";
- case PixelImap::Constant:
- return "flat";
- case PixelImap::ScreenLinear:
- return "noperspective";
- case PixelImap::Unused:
- break;
- }
- UNIMPLEMENTED_MSG("Unknown attribute usage index={}", attribute);
- return {};
- }
-
- void DeclareInputAttributes() {
- if (ir.HasPhysicalAttributes()) {
- const u32 num_inputs{GetNumPhysicalInputAttributes()};
- for (u32 i = 0; i < num_inputs; ++i) {
- DeclareInputAttribute(ToGenericAttribute(i), true);
- }
- code.AddNewLine();
- return;
- }
-
- const auto& attributes = ir.GetInputAttributes();
- for (const auto index : attributes) {
- if (IsGenericAttribute(index)) {
- DeclareInputAttribute(index, false);
- }
- }
- if (!attributes.empty()) {
- code.AddNewLine();
- }
- }
-
- void DeclareInputAttribute(Attribute::Index index, bool skip_unused) {
- const u32 location{GetGenericAttributeIndex(index)};
-
- std::string name{GetGenericInputAttribute(index)};
- if (stage == ShaderType::Geometry) {
- name = "gs_" + name + "[]";
- }
-
- std::string suffix_;
- if (stage == ShaderType::Fragment) {
- const auto input_mode{header.ps.GetPixelImap(location)};
- if (input_mode == PixelImap::Unused) {
- return;
- }
- suffix_ = GetInputFlags(input_mode);
- }
-
- code.AddLine("layout (location = {}) {} in vec4 {};", location, suffix_, name);
- }
-
- void DeclareOutputAttributes() {
- if (ir.HasPhysicalAttributes() && stage != ShaderType::Fragment) {
- for (u32 i = 0; i < GetNumPhysicalVaryings(); ++i) {
- DeclareOutputAttribute(ToGenericAttribute(i));
- }
- code.AddNewLine();
- return;
- }
-
- const auto& attributes = ir.GetOutputAttributes();
- for (const auto index : attributes) {
- if (IsGenericAttribute(index)) {
- DeclareOutputAttribute(index);
- }
- }
- if (!attributes.empty()) {
- code.AddNewLine();
- }
- }
-
- std::optional<std::size_t> GetNumComponents(Attribute::Index index, u8 element = 0) const {
- const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element);
- const auto it = transform_feedback.find(location);
- if (it == transform_feedback.end()) {
- return std::nullopt;
- }
- return it->second.components;
- }
-
- std::string GetTransformFeedbackDecoration(Attribute::Index index, u8 element = 0) const {
- const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element);
- const auto it = transform_feedback.find(location);
- if (it == transform_feedback.end()) {
- return {};
- }
-
- const VaryingTFB& tfb = it->second;
- return fmt::format("xfb_buffer = {}, xfb_offset = {}, xfb_stride = {}", tfb.buffer,
- tfb.offset, tfb.stride);
- }
-
- void DeclareOutputAttribute(Attribute::Index index) {
- static constexpr std::string_view swizzle = "xyzw";
- u8 element = 0;
- while (element < 4) {
- auto xfb = GetTransformFeedbackDecoration(index, element);
- if (!xfb.empty()) {
- xfb = fmt::format(", {}", xfb);
- }
- const std::size_t remainder = 4 - element;
- const std::size_t num_components = GetNumComponents(index, element).value_or(remainder);
- const char* const type = FLOAT_TYPES.at(num_components - 1);
-
- const u32 location = GetGenericAttributeIndex(index);
-
- GenericVaryingDescription description;
- description.first_element = static_cast<u8>(element);
- description.is_scalar = num_components == 1;
- description.name = AppendSuffix(location, OUTPUT_ATTRIBUTE_NAME);
- if (element != 0 || num_components != 4) {
- const std::string_view name_swizzle = swizzle.substr(element, num_components);
- description.name = fmt::format("{}_{}", description.name, name_swizzle);
- }
- for (std::size_t i = 0; i < num_components; ++i) {
- const u8 offset = static_cast<u8>(location * 4 + element + i);
- varying_description.insert({offset, description});
- }
-
- code.AddLine("layout (location = {}, component = {}{}) out {} {};", location, element,
- xfb, type, description.name);
-
- element = static_cast<u8>(static_cast<std::size_t>(element) + num_components);
- }
- }
-
- void DeclareConstantBuffers() {
- u32 binding = device.GetBaseBindings(stage).uniform_buffer;
- for (const auto& [index, info] : ir.GetConstantBuffers()) {
- const u32 num_elements = Common::DivCeil(info.GetSize(), 4 * sizeof(u32));
- const u32 size = info.IsIndirect() ? MAX_CONSTBUFFER_ELEMENTS : num_elements;
- code.AddLine("layout (std140, binding = {}) uniform {} {{", binding++,
- GetConstBufferBlock(index));
- code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), size);
- code.AddLine("}};");
- code.AddNewLine();
- }
- }
-
- void DeclareGlobalMemory() {
- u32 binding = device.GetBaseBindings(stage).shader_storage_buffer;
- for (const auto& [base, usage] : ir.GetGlobalMemory()) {
- // Since we don't know how the shader will use the shader, hint the driver to disable as
- // much optimizations as possible
- std::string qualifier = "coherent volatile";
- if (usage.is_read && !usage.is_written) {
- qualifier += " readonly";
- } else if (usage.is_written && !usage.is_read) {
- qualifier += " writeonly";
- }
-
- code.AddLine("layout (std430, binding = {}) {} buffer {} {{", binding++, qualifier,
- GetGlobalMemoryBlock(base));
- code.AddLine(" uint {}[];", GetGlobalMemory(base));
- code.AddLine("}};");
- code.AddNewLine();
- }
- }
-
- void DeclareSamplers() {
- u32 binding = device.GetBaseBindings(stage).sampler;
- for (const auto& sampler : ir.GetSamplers()) {
- const std::string name = GetSampler(sampler);
- const std::string description = fmt::format("layout (binding = {}) uniform", binding);
- binding += sampler.is_indexed ? sampler.size : 1;
-
- std::string sampler_type = [&]() {
- if (sampler.is_buffer) {
- return "samplerBuffer";
- }
- switch (sampler.type) {
- case TextureType::Texture1D:
- return "sampler1D";
- case TextureType::Texture2D:
- return "sampler2D";
- case TextureType::Texture3D:
- return "sampler3D";
- case TextureType::TextureCube:
- return "samplerCube";
- default:
- UNREACHABLE();
- return "sampler2D";
- }
- }();
- if (sampler.is_array) {
- sampler_type += "Array";
- }
- if (sampler.is_shadow) {
- sampler_type += "Shadow";
- }
-
- if (!sampler.is_indexed) {
- code.AddLine("{} {} {};", description, sampler_type, name);
- } else {
- code.AddLine("{} {} {}[{}];", description, sampler_type, name, sampler.size);
- }
- }
- if (!ir.GetSamplers().empty()) {
- code.AddNewLine();
- }
- }
-
- void DeclarePhysicalAttributeReader() {
- if (!ir.HasPhysicalAttributes()) {
- return;
- }
- code.AddLine("float ReadPhysicalAttribute(uint physical_address) {{");
- ++code.scope;
- code.AddLine("switch (physical_address) {{");
-
- // Just declare generic attributes for now.
- const auto num_attributes{static_cast<u32>(GetNumPhysicalInputAttributes())};
- for (u32 index = 0; index < num_attributes; ++index) {
- const auto attribute{ToGenericAttribute(index)};
- for (u32 element = 0; element < 4; ++element) {
- constexpr u32 generic_base = 0x80;
- constexpr u32 generic_stride = 16;
- constexpr u32 element_stride = 4;
- const u32 address{generic_base + index * generic_stride + element * element_stride};
-
- const bool declared = stage != ShaderType::Fragment ||
- header.ps.GetPixelImap(index) != PixelImap::Unused;
- const std::string value =
- declared ? ReadAttribute(attribute, element).AsFloat() : "0.0f";
- code.AddLine("case 0x{:X}U: return {};", address, value);
- }
- }
-
- code.AddLine("default: return 0;");
-
- code.AddLine("}}");
- --code.scope;
- code.AddLine("}}");
- code.AddNewLine();
- }
-
- void DeclareImages() {
- u32 binding = device.GetBaseBindings(stage).image;
- for (const auto& image : ir.GetImages()) {
- std::string qualifier = "coherent volatile";
- if (image.is_read && !image.is_written) {
- qualifier += " readonly";
- } else if (image.is_written && !image.is_read) {
- qualifier += " writeonly";
- }
-
- const char* format = image.is_atomic ? "r32ui, " : "";
- const char* type_declaration = GetImageTypeDeclaration(image.type);
- code.AddLine("layout ({}binding = {}) {} uniform uimage{} {};", format, binding++,
- qualifier, type_declaration, GetImage(image));
- }
- if (!ir.GetImages().empty()) {
- code.AddNewLine();
- }
- }
-
- void VisitBlock(const NodeBlock& bb) {
- for (const auto& node : bb) {
- Visit(node).CheckVoid();
- }
- }
-
- Expression Visit(const Node& node) {
- if (const auto operation = std::get_if<OperationNode>(&*node)) {
- if (const auto amend_index = operation->GetAmendIndex()) {
- Visit(ir.GetAmendNode(*amend_index)).CheckVoid();
- }
- const auto operation_index = static_cast<std::size_t>(operation->GetCode());
- if (operation_index >= operation_decompilers.size()) {
- UNREACHABLE_MSG("Out of bounds operation: {}", operation_index);
- return {};
- }
- const auto decompiler = operation_decompilers[operation_index];
- if (decompiler == nullptr) {
- UNREACHABLE_MSG("Undefined operation: {}", operation_index);
- return {};
- }
- return (this->*decompiler)(*operation);
- }
-
- if (const auto gpr = std::get_if<GprNode>(&*node)) {
- const u32 index = gpr->GetIndex();
- if (index == Register::ZeroIndex) {
- return {"0U", Type::Uint};
- }
- return {GetRegister(index), Type::Float};
- }
-
- if (const auto cv = std::get_if<CustomVarNode>(&*node)) {
- const u32 index = cv->GetIndex();
- return {GetCustomVariable(index), Type::Float};
- }
-
- if (const auto immediate = std::get_if<ImmediateNode>(&*node)) {
- const u32 value = immediate->GetValue();
- if (value < 10) {
- // For eyecandy avoid using hex numbers on single digits
- return {fmt::format("{}U", immediate->GetValue()), Type::Uint};
- }
- return {fmt::format("0x{:X}U", immediate->GetValue()), Type::Uint};
- }
-
- if (const auto predicate = std::get_if<PredicateNode>(&*node)) {
- const auto value = [&]() -> std::string {
- switch (const auto index = predicate->GetIndex(); index) {
- case Tegra::Shader::Pred::UnusedIndex:
- return "true";
- case Tegra::Shader::Pred::NeverExecute:
- return "false";
- default:
- return GetPredicate(index);
- }
- }();
- if (predicate->IsNegated()) {
- return {fmt::format("!({})", value), Type::Bool};
- }
- return {value, Type::Bool};
- }
-
- if (const auto abuf = std::get_if<AbufNode>(&*node)) {
- UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ShaderType::Geometry,
- "Physical attributes in geometry shaders are not implemented");
- if (abuf->IsPhysicalBuffer()) {
- return {fmt::format("ReadPhysicalAttribute({})",
- Visit(abuf->GetPhysicalAddress()).AsUint()),
- Type::Float};
- }
- return ReadAttribute(abuf->GetIndex(), abuf->GetElement(), abuf->GetBuffer());
- }
-
- if (const auto cbuf = std::get_if<CbufNode>(&*node)) {
- const Node offset = cbuf->GetOffset();
-
- if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
- // Direct access
- const u32 offset_imm = immediate->GetValue();
- ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access");
- return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()),
- offset_imm / (4 * 4), (offset_imm / 4) % 4),
- Type::Uint};
- }
-
- // Indirect access
- const std::string final_offset = code.GenerateTemporary();
- code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint());
-
- if (!device.HasComponentIndexingBug()) {
- return {fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()),
- final_offset, final_offset),
- Type::Uint};
- }
-
- // AMD's proprietary GLSL compiler emits ill code for variable component access.
- // To bypass this driver bug generate 4 ifs, one per each component.
- const std::string pack = code.GenerateTemporary();
- code.AddLine("uvec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()),
- final_offset);
-
- const std::string result = code.GenerateTemporary();
- code.AddLine("uint {};", result);
- for (u32 swizzle = 0; swizzle < 4; ++swizzle) {
- code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result, pack,
- GetSwizzle(swizzle));
- }
- return {result, Type::Uint};
- }
-
- if (const auto gmem = std::get_if<GmemNode>(&*node)) {
- const std::string real = Visit(gmem->GetRealAddress()).AsUint();
- const std::string base = Visit(gmem->GetBaseAddress()).AsUint();
- const std::string final_offset = fmt::format("({} - {}) >> 2", real, base);
- return {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset),
- Type::Uint};
- }
-
- if (const auto lmem = std::get_if<LmemNode>(&*node)) {
- return {
- fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()),
- Type::Uint};
- }
-
- if (const auto smem = std::get_if<SmemNode>(&*node)) {
- return {fmt::format("smem[{} >> 2]", Visit(smem->GetAddress()).AsUint()), Type::Uint};
- }
-
- if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) {
- return {GetInternalFlag(internal_flag->GetFlag()), Type::Bool};
- }
-
- if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
- if (const auto amend_index = conditional->GetAmendIndex()) {
- Visit(ir.GetAmendNode(*amend_index)).CheckVoid();
- }
- // It's invalid to call conditional on nested nodes, use an operation instead
- code.AddLine("if ({}) {{", Visit(conditional->GetCondition()).AsBool());
- ++code.scope;
-
- VisitBlock(conditional->GetCode());
-
- --code.scope;
- code.AddLine("}}");
- return {};
- }
-
- if (const auto comment = std::get_if<CommentNode>(&*node)) {
- code.AddLine("// " + comment->GetText());
- return {};
- }
-
- UNREACHABLE();
- return {};
- }
-
- Expression ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) {
- const auto GeometryPass = [&](std::string_view name) {
- if (stage == ShaderType::Geometry && buffer) {
- // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games
- // set an 0x80000000 index for those and the shader fails to build. Find out why
- // this happens and what's its intent.
- return fmt::format("gs_{}[{} % {}]", name, Visit(buffer).AsUint(),
- max_input_vertices.value());
- }
- return std::string(name);
- };
-
- switch (attribute) {
- case Attribute::Index::Position:
- switch (stage) {
- case ShaderType::Geometry:
- return {fmt::format("gl_in[{}].gl_Position{}", Visit(buffer).AsUint(),
- GetSwizzle(element)),
- Type::Float};
- case ShaderType::Fragment:
- return {"gl_FragCoord"s + GetSwizzle(element), Type::Float};
- default:
- UNREACHABLE();
- return {"0", Type::Int};
- }
- case Attribute::Index::FrontColor:
- return {"gl_Color"s + GetSwizzle(element), Type::Float};
- case Attribute::Index::FrontSecondaryColor:
- return {"gl_SecondaryColor"s + GetSwizzle(element), Type::Float};
- case Attribute::Index::PointCoord:
- switch (element) {
- case 0:
- return {"gl_PointCoord.x", Type::Float};
- case 1:
- return {"gl_PointCoord.y", Type::Float};
- case 2:
- case 3:
- return {"0.0f", Type::Float};
- }
- UNREACHABLE();
- return {"0", Type::Int};
- case Attribute::Index::TessCoordInstanceIDVertexID:
- // TODO(Subv): Find out what the values are for the first two elements when inside a
- // vertex shader, and what's the value of the fourth element when inside a Tess Eval
- // shader.
- ASSERT(stage == ShaderType::Vertex);
- switch (element) {
- case 2:
- // Config pack's first value is instance_id.
- return {"gl_InstanceID", Type::Int};
- case 3:
- return {"gl_VertexID", Type::Int};
- }
- UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element);
- return {"0", Type::Int};
- case Attribute::Index::FrontFacing:
- // TODO(Subv): Find out what the values are for the other elements.
- ASSERT(stage == ShaderType::Fragment);
- switch (element) {
- case 3:
- return {"(gl_FrontFacing ? -1 : 0)", Type::Int};
- }
- UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element);
- return {"0", Type::Int};
- default:
- if (IsGenericAttribute(attribute)) {
- return {GeometryPass(GetGenericInputAttribute(attribute)) + GetSwizzle(element),
- Type::Float};
- }
- if (IsLegacyTexCoord(attribute)) {
- UNIMPLEMENTED_IF(stage == ShaderType::Geometry);
- return {fmt::format("gl_TexCoord[{}]{}", GetLegacyTexCoordIndex(attribute),
- GetSwizzle(element)),
- Type::Float};
- }
- break;
- }
- UNIMPLEMENTED_MSG("Unhandled input attribute: {}", attribute);
- return {"0", Type::Int};
- }
-
- Expression ApplyPrecise(Operation operation, std::string value, Type type) {
- if (!IsPrecise(operation)) {
- return {std::move(value), type};
- }
- // Old Nvidia drivers have a bug with precise and texture sampling. These are more likely to
- // be found in fragment shaders, so we disable precise there. There are vertex shaders that
- // also fail to build but nobody seems to care about those.
- // Note: Only bugged drivers will skip precise.
- const bool disable_precise = device.HasPreciseBug() && stage == ShaderType::Fragment;
-
- std::string temporary = code.GenerateTemporary();
- code.AddLine("{}{} {} = {};", disable_precise ? "" : "precise ", GetTypeString(type),
- temporary, value);
- return {std::move(temporary), type};
- }
-
- Expression VisitOperand(Operation operation, std::size_t operand_index) {
- const auto& operand = operation[operand_index];
- const bool parent_precise = IsPrecise(operation);
- const bool child_precise = IsPrecise(operand);
- const bool child_trivial = !std::holds_alternative<OperationNode>(*operand);
- if (!parent_precise || child_precise || child_trivial) {
- return Visit(operand);
- }
-
- Expression value = Visit(operand);
- std::string temporary = code.GenerateTemporary();
- code.AddLine("{} {} = {};", GetTypeString(value.GetType()), temporary, value.GetCode());
- return {std::move(temporary), value.GetType()};
- }
-
- std::optional<Expression> GetOutputAttribute(const AbufNode* abuf) {
- const u32 element = abuf->GetElement();
- switch (const auto attribute = abuf->GetIndex()) {
- case Attribute::Index::Position:
- return {{"gl_Position"s + GetSwizzle(element), Type::Float}};
- case Attribute::Index::LayerViewportPointSize:
- switch (element) {
- case 0:
- UNIMPLEMENTED();
- return std::nullopt;
- case 1:
- if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) {
- return std::nullopt;
- }
- return {{"gl_Layer", Type::Int}};
- case 2:
- if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) {
- return std::nullopt;
- }
- return {{"gl_ViewportIndex", Type::Int}};
- case 3:
- return {{"gl_PointSize", Type::Float}};
- }
- return std::nullopt;
- case Attribute::Index::FrontColor:
- return {{"gl_FrontColor"s + GetSwizzle(element), Type::Float}};
- case Attribute::Index::FrontSecondaryColor:
- return {{"gl_FrontSecondaryColor"s + GetSwizzle(element), Type::Float}};
- case Attribute::Index::BackColor:
- return {{"gl_BackColor"s + GetSwizzle(element), Type::Float}};
- case Attribute::Index::BackSecondaryColor:
- return {{"gl_BackSecondaryColor"s + GetSwizzle(element), Type::Float}};
- case Attribute::Index::ClipDistances0123:
- return {{fmt::format("gl_ClipDistance[{}]", element), Type::Float}};
- case Attribute::Index::ClipDistances4567:
- return {{fmt::format("gl_ClipDistance[{}]", element + 4), Type::Float}};
- default:
- if (IsGenericAttribute(attribute)) {
- return {{GetGenericOutputAttribute(attribute, element), Type::Float}};
- }
- if (IsLegacyTexCoord(attribute)) {
- return {{fmt::format("gl_TexCoord[{}]{}", GetLegacyTexCoordIndex(attribute),
- GetSwizzle(element)),
- Type::Float}};
- }
- UNIMPLEMENTED_MSG("Unhandled output attribute: {}", attribute);
- return std::nullopt;
- }
- }
-
- Expression GenerateUnary(Operation operation, std::string_view func, Type result_type,
- Type type_a) {
- std::string op_str = fmt::format("{}({})", func, VisitOperand(operation, 0).As(type_a));
- return ApplyPrecise(operation, std::move(op_str), result_type);
- }
-
- Expression GenerateBinaryInfix(Operation operation, std::string_view func, Type result_type,
- Type type_a, Type type_b) {
- const std::string op_a = VisitOperand(operation, 0).As(type_a);
- const std::string op_b = VisitOperand(operation, 1).As(type_b);
- std::string op_str = fmt::format("({} {} {})", op_a, func, op_b);
-
- return ApplyPrecise(operation, std::move(op_str), result_type);
- }
-
- Expression GenerateBinaryCall(Operation operation, std::string_view func, Type result_type,
- Type type_a, Type type_b) {
- const std::string op_a = VisitOperand(operation, 0).As(type_a);
- const std::string op_b = VisitOperand(operation, 1).As(type_b);
- std::string op_str = fmt::format("{}({}, {})", func, op_a, op_b);
-
- return ApplyPrecise(operation, std::move(op_str), result_type);
- }
-
- Expression GenerateTernary(Operation operation, std::string_view func, Type result_type,
- Type type_a, Type type_b, Type type_c) {
- const std::string op_a = VisitOperand(operation, 0).As(type_a);
- const std::string op_b = VisitOperand(operation, 1).As(type_b);
- const std::string op_c = VisitOperand(operation, 2).As(type_c);
- std::string op_str = fmt::format("{}({}, {}, {})", func, op_a, op_b, op_c);
-
- return ApplyPrecise(operation, std::move(op_str), result_type);
- }
-
- Expression GenerateQuaternary(Operation operation, const std::string& func, Type result_type,
- Type type_a, Type type_b, Type type_c, Type type_d) {
- const std::string op_a = VisitOperand(operation, 0).As(type_a);
- const std::string op_b = VisitOperand(operation, 1).As(type_b);
- const std::string op_c = VisitOperand(operation, 2).As(type_c);
- const std::string op_d = VisitOperand(operation, 3).As(type_d);
- std::string op_str = fmt::format("{}({}, {}, {}, {})", func, op_a, op_b, op_c, op_d);
-
- return ApplyPrecise(operation, std::move(op_str), result_type);
- }
-
- std::string GenerateTexture(Operation operation, const std::string& function_suffix,
- const std::vector<TextureIR>& extras, bool separate_dc = false) {
- constexpr std::array coord_constructors = {"float", "vec2", "vec3", "vec4"};
-
- const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
- ASSERT(meta);
-
- const std::size_t count = operation.GetOperandsCount();
- const bool has_array = meta->sampler.is_array;
- const bool has_shadow = meta->sampler.is_shadow;
- const bool workaround_lod_array_shadow_as_grad =
- !device.HasTextureShadowLod() && function_suffix == "Lod" && meta->sampler.is_shadow &&
- ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) ||
- meta->sampler.type == TextureType::TextureCube);
-
- std::string expr = "texture";
-
- if (workaround_lod_array_shadow_as_grad) {
- expr += "Grad";
- } else {
- expr += function_suffix;
- }
-
- if (!meta->aoffi.empty()) {
- expr += "Offset";
- } else if (!meta->ptp.empty()) {
- expr += "Offsets";
- }
- if (!meta->sampler.is_indexed) {
- expr += '(' + GetSampler(meta->sampler) + ", ";
- } else {
- expr += '(' + GetSampler(meta->sampler) + '[' + Visit(meta->index).AsUint() + "], ";
- }
- expr += coord_constructors.at(count + (has_array ? 1 : 0) +
- (has_shadow && !separate_dc ? 1 : 0) - 1);
- expr += '(';
- for (std::size_t i = 0; i < count; ++i) {
- expr += Visit(operation[i]).AsFloat();
-
- const std::size_t next = i + 1;
- if (next < count)
- expr += ", ";
- }
- if (has_array) {
- expr += ", float(" + Visit(meta->array).AsInt() + ')';
- }
- if (has_shadow) {
- if (separate_dc) {
- expr += "), " + Visit(meta->depth_compare).AsFloat();
- } else {
- expr += ", " + Visit(meta->depth_compare).AsFloat() + ')';
- }
- } else {
- expr += ')';
- }
-
- if (workaround_lod_array_shadow_as_grad) {
- switch (meta->sampler.type) {
- case TextureType::Texture2D:
- return expr + ", vec2(0.0), vec2(0.0))";
- case TextureType::TextureCube:
- return expr + ", vec3(0.0), vec3(0.0))";
- default:
- UNREACHABLE();
- break;
- }
- }
-
- for (const auto& variant : extras) {
- if (const auto argument = std::get_if<TextureArgument>(&variant)) {
- expr += GenerateTextureArgument(*argument);
- } else if (std::holds_alternative<TextureOffset>(variant)) {
- if (!meta->aoffi.empty()) {
- expr += GenerateTextureAoffi(meta->aoffi);
- } else if (!meta->ptp.empty()) {
- expr += GenerateTexturePtp(meta->ptp);
- }
- } else if (std::holds_alternative<TextureDerivates>(variant)) {
- expr += GenerateTextureDerivates(meta->derivates);
- } else {
- UNREACHABLE();
- }
- }
-
- return expr + ')';
- }
-
- std::string GenerateTextureArgument(const TextureArgument& argument) {
- const auto& [type, operand] = argument;
- if (operand == nullptr) {
- return {};
- }
-
- std::string expr = ", ";
- switch (type) {
- case Type::Int:
- if (const auto immediate = std::get_if<ImmediateNode>(&*operand)) {
- // Inline the string as an immediate integer in GLSL (some extra arguments are
- // required to be constant)
- expr += std::to_string(static_cast<s32>(immediate->GetValue()));
- } else {
- expr += Visit(operand).AsInt();
- }
- break;
- case Type::Float:
- expr += Visit(operand).AsFloat();
- break;
- default: {
- const auto type_int = static_cast<u32>(type);
- UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int);
- expr += '0';
- break;
- }
- }
- return expr;
- }
-
- std::string ReadTextureOffset(const Node& value) {
- if (const auto immediate = std::get_if<ImmediateNode>(&*value)) {
- // Inline the string as an immediate integer in GLSL (AOFFI arguments are required
- // to be constant by the standard).
- return std::to_string(static_cast<s32>(immediate->GetValue()));
- } else if (device.HasVariableAoffi()) {
- // Avoid using variable AOFFI on unsupported devices.
- return Visit(value).AsInt();
- } else {
- // Insert 0 on devices not supporting variable AOFFI.
- return "0";
- }
- }
-
- std::string GenerateTextureAoffi(const std::vector<Node>& aoffi) {
- if (aoffi.empty()) {
- return {};
- }
- constexpr std::array coord_constructors = {"int", "ivec2", "ivec3"};
- std::string expr = ", ";
- expr += coord_constructors.at(aoffi.size() - 1);
- expr += '(';
-
- for (std::size_t index = 0; index < aoffi.size(); ++index) {
- expr += ReadTextureOffset(aoffi.at(index));
- if (index + 1 < aoffi.size()) {
- expr += ", ";
- }
- }
- expr += ')';
-
- return expr;
- }
-
- std::string GenerateTexturePtp(const std::vector<Node>& ptp) {
- static constexpr std::size_t num_vectors = 4;
- ASSERT(ptp.size() == num_vectors * 2);
-
- std::string expr = ", ivec2[](";
- for (std::size_t vector = 0; vector < num_vectors; ++vector) {
- const bool has_next = vector + 1 < num_vectors;
- expr += fmt::format("ivec2({}, {}){}", ReadTextureOffset(ptp.at(vector * 2)),
- ReadTextureOffset(ptp.at(vector * 2 + 1)), has_next ? ", " : "");
- }
- expr += ')';
- return expr;
- }
-
- std::string GenerateTextureDerivates(const std::vector<Node>& derivates) {
- if (derivates.empty()) {
- return {};
- }
- constexpr std::array coord_constructors = {"float", "vec2", "vec3"};
- std::string expr = ", ";
- const std::size_t components = derivates.size() / 2;
- std::string dx = coord_constructors.at(components - 1);
- std::string dy = coord_constructors.at(components - 1);
- dx += '(';
- dy += '(';
-
- for (std::size_t index = 0; index < components; ++index) {
- const auto& operand_x{derivates.at(index * 2)};
- const auto& operand_y{derivates.at(index * 2 + 1)};
- dx += Visit(operand_x).AsFloat();
- dy += Visit(operand_y).AsFloat();
-
- if (index + 1 < components) {
- dx += ", ";
- dy += ", ";
- }
- }
- dx += ')';
- dy += ')';
- expr += dx + ", " + dy;
-
- return expr;
- }
-
- std::string BuildIntegerCoordinates(Operation operation) {
- constexpr std::array constructors{"int(", "ivec2(", "ivec3(", "ivec4("};
- const std::size_t coords_count{operation.GetOperandsCount()};
- std::string expr = constructors.at(coords_count - 1);
- for (std::size_t i = 0; i < coords_count; ++i) {
- expr += VisitOperand(operation, i).AsInt();
- if (i + 1 < coords_count) {
- expr += ", ";
- }
- }
- expr += ')';
- return expr;
- }
-
- std::string BuildImageValues(Operation operation) {
- constexpr std::array constructors{"uint", "uvec2", "uvec3", "uvec4"};
- const auto& meta{std::get<MetaImage>(operation.GetMeta())};
-
- const std::size_t values_count{meta.values.size()};
- std::string expr = fmt::format("{}(", constructors.at(values_count - 1));
- for (std::size_t i = 0; i < values_count; ++i) {
- expr += Visit(meta.values.at(i)).AsUint();
- if (i + 1 < values_count) {
- expr += ", ";
- }
- }
- expr += ')';
- return expr;
- }
-
- Expression Assign(Operation operation) {
- const Node& dest = operation[0];
- const Node& src = operation[1];
-
- Expression target;
- if (const auto gpr = std::get_if<GprNode>(&*dest)) {
- if (gpr->GetIndex() == Register::ZeroIndex) {
- // Writing to Register::ZeroIndex is a no op but we still have to visit the source
- // as it might have side effects.
- code.AddLine("{};", Visit(src).GetCode());
- return {};
- }
- target = {GetRegister(gpr->GetIndex()), Type::Float};
- } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) {
- UNIMPLEMENTED_IF(abuf->IsPhysicalBuffer());
- auto output = GetOutputAttribute(abuf);
- if (!output) {
- return {};
- }
- target = std::move(*output);
- } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) {
- target = {
- fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()),
- Type::Uint};
- } else if (const auto smem = std::get_if<SmemNode>(&*dest)) {
- ASSERT(stage == ShaderType::Compute);
- target = {fmt::format("smem[{} >> 2]", Visit(smem->GetAddress()).AsUint()), Type::Uint};
- } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
- const std::string real = Visit(gmem->GetRealAddress()).AsUint();
- const std::string base = Visit(gmem->GetBaseAddress()).AsUint();
- const std::string final_offset = fmt::format("({} - {}) >> 2", real, base);
- target = {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset),
- Type::Uint};
- } else if (const auto cv = std::get_if<CustomVarNode>(&*dest)) {
- target = {GetCustomVariable(cv->GetIndex()), Type::Float};
- } else {
- UNREACHABLE_MSG("Assign called without a proper target");
- }
-
- code.AddLine("{} = {};", target.GetCode(), Visit(src).As(target.GetType()));
- return {};
- }
-
- template <Type type>
- Expression Add(Operation operation) {
- return GenerateBinaryInfix(operation, "+", type, type, type);
- }
-
- template <Type type>
- Expression Mul(Operation operation) {
- return GenerateBinaryInfix(operation, "*", type, type, type);
- }
-
- template <Type type>
- Expression Div(Operation operation) {
- return GenerateBinaryInfix(operation, "/", type, type, type);
- }
-
- template <Type type>
- Expression Fma(Operation operation) {
- return GenerateTernary(operation, "fma", type, type, type, type);
- }
-
- template <Type type>
- Expression Negate(Operation operation) {
- return GenerateUnary(operation, "-", type, type);
- }
-
- template <Type type>
- Expression Absolute(Operation operation) {
- return GenerateUnary(operation, "abs", type, type);
- }
-
- Expression FClamp(Operation operation) {
- return GenerateTernary(operation, "clamp", Type::Float, Type::Float, Type::Float,
- Type::Float);
- }
-
- Expression FCastHalf0(Operation operation) {
- return {fmt::format("({})[0]", VisitOperand(operation, 0).AsHalfFloat()), Type::Float};
- }
-
- Expression FCastHalf1(Operation operation) {
- return {fmt::format("({})[1]", VisitOperand(operation, 0).AsHalfFloat()), Type::Float};
- }
-
- template <Type type>
- Expression Min(Operation operation) {
- return GenerateBinaryCall(operation, "min", type, type, type);
- }
-
- template <Type type>
- Expression Max(Operation operation) {
- return GenerateBinaryCall(operation, "max", type, type, type);
- }
-
- Expression Select(Operation operation) {
- const std::string condition = Visit(operation[0]).AsBool();
- const std::string true_case = Visit(operation[1]).AsUint();
- const std::string false_case = Visit(operation[2]).AsUint();
- std::string op_str = fmt::format("({} ? {} : {})", condition, true_case, false_case);
-
- return ApplyPrecise(operation, std::move(op_str), Type::Uint);
- }
-
- Expression FCos(Operation operation) {
- return GenerateUnary(operation, "cos", Type::Float, Type::Float);
- }
-
- Expression FSin(Operation operation) {
- return GenerateUnary(operation, "sin", Type::Float, Type::Float);
- }
-
- Expression FExp2(Operation operation) {
- return GenerateUnary(operation, "exp2", Type::Float, Type::Float);
- }
-
- Expression FLog2(Operation operation) {
- return GenerateUnary(operation, "log2", Type::Float, Type::Float);
- }
-
- Expression FInverseSqrt(Operation operation) {
- return GenerateUnary(operation, "inversesqrt", Type::Float, Type::Float);
- }
-
- Expression FSqrt(Operation operation) {
- return GenerateUnary(operation, "sqrt", Type::Float, Type::Float);
- }
-
- Expression FRoundEven(Operation operation) {
- return GenerateUnary(operation, "roundEven", Type::Float, Type::Float);
- }
-
- Expression FFloor(Operation operation) {
- return GenerateUnary(operation, "floor", Type::Float, Type::Float);
- }
-
- Expression FCeil(Operation operation) {
- return GenerateUnary(operation, "ceil", Type::Float, Type::Float);
- }
-
- Expression FTrunc(Operation operation) {
- return GenerateUnary(operation, "trunc", Type::Float, Type::Float);
- }
-
- template <Type type>
- Expression FCastInteger(Operation operation) {
- return GenerateUnary(operation, "float", Type::Float, type);
- }
-
- Expression FSwizzleAdd(Operation operation) {
- const std::string op_a = VisitOperand(operation, 0).AsFloat();
- const std::string op_b = VisitOperand(operation, 1).AsFloat();
-
- if (!device.HasShaderBallot()) {
- LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader");
- return {fmt::format("{} + {}", op_a, op_b), Type::Float};
- }
-
- const std::string instr_mask = VisitOperand(operation, 2).AsUint();
- const std::string mask = code.GenerateTemporary();
- code.AddLine("uint {} = ({} >> ((gl_SubGroupInvocationARB & 3) << 1)) & 3;", mask,
- instr_mask);
-
- const std::string modifier_a = fmt::format("fswzadd_modifiers_a[{}]", mask);
- const std::string modifier_b = fmt::format("fswzadd_modifiers_b[{}]", mask);
- return {fmt::format("(({} * {}) + ({} * {}))", op_a, modifier_a, op_b, modifier_b),
- Type::Float};
- }
-
- Expression ICastFloat(Operation operation) {
- return GenerateUnary(operation, "int", Type::Int, Type::Float);
- }
-
- Expression ICastUnsigned(Operation operation) {
- return GenerateUnary(operation, "int", Type::Int, Type::Uint);
- }
-
- template <Type type>
- Expression LogicalShiftLeft(Operation operation) {
- return GenerateBinaryInfix(operation, "<<", type, type, Type::Uint);
- }
-
- Expression ILogicalShiftRight(Operation operation) {
- const std::string op_a = VisitOperand(operation, 0).AsUint();
- const std::string op_b = VisitOperand(operation, 1).AsUint();
- std::string op_str = fmt::format("int({} >> {})", op_a, op_b);
-
- return ApplyPrecise(operation, std::move(op_str), Type::Int);
- }
-
- Expression IArithmeticShiftRight(Operation operation) {
- return GenerateBinaryInfix(operation, ">>", Type::Int, Type::Int, Type::Uint);
- }
-
- template <Type type>
- Expression BitwiseAnd(Operation operation) {
- return GenerateBinaryInfix(operation, "&", type, type, type);
- }
-
- template <Type type>
- Expression BitwiseOr(Operation operation) {
- return GenerateBinaryInfix(operation, "|", type, type, type);
- }
-
- template <Type type>
- Expression BitwiseXor(Operation operation) {
- return GenerateBinaryInfix(operation, "^", type, type, type);
- }
-
- template <Type type>
- Expression BitwiseNot(Operation operation) {
- return GenerateUnary(operation, "~", type, type);
- }
-
- Expression UCastFloat(Operation operation) {
- return GenerateUnary(operation, "uint", Type::Uint, Type::Float);
- }
-
- Expression UCastSigned(Operation operation) {
- return GenerateUnary(operation, "uint", Type::Uint, Type::Int);
- }
-
- Expression UShiftRight(Operation operation) {
- return GenerateBinaryInfix(operation, ">>", Type::Uint, Type::Uint, Type::Uint);
- }
-
- template <Type type>
- Expression BitfieldInsert(Operation operation) {
- return GenerateQuaternary(operation, "bitfieldInsert", type, type, type, Type::Int,
- Type::Int);
- }
-
- template <Type type>
- Expression BitfieldExtract(Operation operation) {
- return GenerateTernary(operation, "bitfieldExtract", type, type, Type::Int, Type::Int);
- }
-
- template <Type type>
- Expression BitCount(Operation operation) {
- return GenerateUnary(operation, "bitCount", type, type);
- }
-
- template <Type type>
- Expression BitMSB(Operation operation) {
- return GenerateUnary(operation, "findMSB", type, type);
- }
-
- Expression HNegate(Operation operation) {
- const auto GetNegate = [&](std::size_t index) {
- return VisitOperand(operation, index).AsBool() + " ? -1 : 1";
- };
- return {fmt::format("({} * vec2({}, {}))", VisitOperand(operation, 0).AsHalfFloat(),
- GetNegate(1), GetNegate(2)),
- Type::HalfFloat};
- }
-
- Expression HClamp(Operation operation) {
- const std::string value = VisitOperand(operation, 0).AsHalfFloat();
- const std::string min = VisitOperand(operation, 1).AsFloat();
- const std::string max = VisitOperand(operation, 2).AsFloat();
- std::string clamped = fmt::format("clamp({}, vec2({}), vec2({}))", value, min, max);
-
- return ApplyPrecise(operation, std::move(clamped), Type::HalfFloat);
- }
-
- Expression HCastFloat(Operation operation) {
- return {fmt::format("vec2({}, 0.0f)", VisitOperand(operation, 0).AsFloat()),
- Type::HalfFloat};
- }
-
- Expression HUnpack(Operation operation) {
- Expression operand = VisitOperand(operation, 0);
- switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) {
- case Tegra::Shader::HalfType::H0_H1:
- return operand;
- case Tegra::Shader::HalfType::F32:
- return {fmt::format("vec2({})", operand.AsFloat()), Type::HalfFloat};
- case Tegra::Shader::HalfType::H0_H0:
- return {fmt::format("vec2({}[0])", operand.AsHalfFloat()), Type::HalfFloat};
- case Tegra::Shader::HalfType::H1_H1:
- return {fmt::format("vec2({}[1])", operand.AsHalfFloat()), Type::HalfFloat};
- }
- UNREACHABLE();
- return {"0", Type::Int};
- }
-
- Expression HMergeF32(Operation operation) {
- return {fmt::format("float({}[0])", VisitOperand(operation, 0).AsHalfFloat()), Type::Float};
- }
-
- Expression HMergeH0(Operation operation) {
- const std::string dest = VisitOperand(operation, 0).AsUint();
- const std::string src = VisitOperand(operation, 1).AsUint();
- return {fmt::format("vec2(unpackHalf2x16({}).x, unpackHalf2x16({}).y)", src, dest),
- Type::HalfFloat};
- }
-
- Expression HMergeH1(Operation operation) {
- const std::string dest = VisitOperand(operation, 0).AsUint();
- const std::string src = VisitOperand(operation, 1).AsUint();
- return {fmt::format("vec2(unpackHalf2x16({}).x, unpackHalf2x16({}).y)", dest, src),
- Type::HalfFloat};
- }
-
- Expression HPack2(Operation operation) {
- return {fmt::format("vec2({}, {})", VisitOperand(operation, 0).AsFloat(),
- VisitOperand(operation, 1).AsFloat()),
- Type::HalfFloat};
- }
-
- template <const std::string_view& op, Type type, bool unordered = false>
- Expression Comparison(Operation operation) {
- static_assert(!unordered || type == Type::Float);
-
- Expression expr = GenerateBinaryInfix(operation, op, Type::Bool, type, type);
-
- if constexpr (op.compare("!=") == 0 && type == Type::Float && !unordered) {
- // GLSL's operator!=(float, float) doesn't seem be ordered. This happens on both AMD's
- // and Nvidia's proprietary stacks. Manually force an ordered comparison.
- return {fmt::format("({} && !isnan({}) && !isnan({}))", expr.AsBool(),
- VisitOperand(operation, 0).AsFloat(),
- VisitOperand(operation, 1).AsFloat()),
- Type::Bool};
- }
- if constexpr (!unordered) {
- return expr;
- }
- // Unordered comparisons are always true for NaN operands.
- return {fmt::format("({} || isnan({}) || isnan({}))", expr.AsBool(),
- VisitOperand(operation, 0).AsFloat(),
- VisitOperand(operation, 1).AsFloat()),
- Type::Bool};
- }
-
- Expression FOrdered(Operation operation) {
- return {fmt::format("(!isnan({}) && !isnan({}))", VisitOperand(operation, 0).AsFloat(),
- VisitOperand(operation, 1).AsFloat()),
- Type::Bool};
- }
-
- Expression FUnordered(Operation operation) {
- return {fmt::format("(isnan({}) || isnan({}))", VisitOperand(operation, 0).AsFloat(),
- VisitOperand(operation, 1).AsFloat()),
- Type::Bool};
- }
-
- Expression LogicalAddCarry(Operation operation) {
- const std::string carry = code.GenerateTemporary();
- code.AddLine("uint {};", carry);
- code.AddLine("uaddCarry({}, {}, {});", VisitOperand(operation, 0).AsUint(),
- VisitOperand(operation, 1).AsUint(), carry);
- return {fmt::format("({} != 0)", carry), Type::Bool};
- }
-
- Expression LogicalAssign(Operation operation) {
- const Node& dest = operation[0];
- const Node& src = operation[1];
-
- std::string target;
-
- if (const auto pred = std::get_if<PredicateNode>(&*dest)) {
- ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment");
-
- const auto index = pred->GetIndex();
- switch (index) {
- case Tegra::Shader::Pred::NeverExecute:
- case Tegra::Shader::Pred::UnusedIndex:
- // Writing to these predicates is a no-op
- return {};
- }
- target = GetPredicate(index);
- } else if (const auto flag = std::get_if<InternalFlagNode>(&*dest)) {
- target = GetInternalFlag(flag->GetFlag());
- }
-
- code.AddLine("{} = {};", target, Visit(src).AsBool());
- return {};
- }
-
- Expression LogicalAnd(Operation operation) {
- return GenerateBinaryInfix(operation, "&&", Type::Bool, Type::Bool, Type::Bool);
- }
-
- Expression LogicalOr(Operation operation) {
- return GenerateBinaryInfix(operation, "||", Type::Bool, Type::Bool, Type::Bool);
- }
-
- Expression LogicalXor(Operation operation) {
- return GenerateBinaryInfix(operation, "^^", Type::Bool, Type::Bool, Type::Bool);
- }
-
- Expression LogicalNegate(Operation operation) {
- return GenerateUnary(operation, "!", Type::Bool, Type::Bool);
- }
-
- Expression LogicalPick2(Operation operation) {
- return {fmt::format("{}[{}]", VisitOperand(operation, 0).AsBool2(),
- VisitOperand(operation, 1).AsUint()),
- Type::Bool};
- }
-
- Expression LogicalAnd2(Operation operation) {
- return GenerateUnary(operation, "all", Type::Bool, Type::Bool2);
- }
-
- template <bool with_nan>
- Expression GenerateHalfComparison(Operation operation, std::string_view compare_op) {
- Expression comparison = GenerateBinaryCall(operation, compare_op, Type::Bool2,
- Type::HalfFloat, Type::HalfFloat);
- if constexpr (!with_nan) {
- return comparison;
- }
- return {fmt::format("HalfFloatNanComparison({}, {}, {})", comparison.AsBool2(),
- VisitOperand(operation, 0).AsHalfFloat(),
- VisitOperand(operation, 1).AsHalfFloat()),
- Type::Bool2};
- }
-
- template <bool with_nan>
- Expression Logical2HLessThan(Operation operation) {
- return GenerateHalfComparison<with_nan>(operation, "lessThan");
- }
-
- template <bool with_nan>
- Expression Logical2HEqual(Operation operation) {
- return GenerateHalfComparison<with_nan>(operation, "equal");
- }
-
- template <bool with_nan>
- Expression Logical2HLessEqual(Operation operation) {
- return GenerateHalfComparison<with_nan>(operation, "lessThanEqual");
- }
-
- template <bool with_nan>
- Expression Logical2HGreaterThan(Operation operation) {
- return GenerateHalfComparison<with_nan>(operation, "greaterThan");
- }
-
- template <bool with_nan>
- Expression Logical2HNotEqual(Operation operation) {
- return GenerateHalfComparison<with_nan>(operation, "notEqual");
- }
-
- template <bool with_nan>
- Expression Logical2HGreaterEqual(Operation operation) {
- return GenerateHalfComparison<with_nan>(operation, "greaterThanEqual");
- }
-
- Expression Texture(Operation operation) {
- const auto meta = std::get<MetaTexture>(operation.GetMeta());
- const bool separate_dc = meta.sampler.type == TextureType::TextureCube &&
- meta.sampler.is_array && meta.sampler.is_shadow;
- // TODO: Replace this with an array and make GenerateTexture use C++20 std::span
- const std::vector<TextureIR> extras{
- TextureOffset{},
- TextureArgument{Type::Float, meta.bias},
- };
- std::string expr = GenerateTexture(operation, "", extras, separate_dc);
- if (meta.sampler.is_shadow) {
- expr = fmt::format("vec4({})", expr);
- }
- return {expr + GetSwizzle(meta.element), Type::Float};
- }
-
- Expression TextureLod(Operation operation) {
- const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
- ASSERT(meta);
-
- std::string expr{};
-
- if (!device.HasTextureShadowLod() && meta->sampler.is_shadow &&
- ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) ||
- meta->sampler.type == TextureType::TextureCube)) {
- LOG_ERROR(Render_OpenGL,
- "Device lacks GL_EXT_texture_shadow_lod, using textureGrad as a workaround");
- expr = GenerateTexture(operation, "Lod", {});
- } else {
- expr = GenerateTexture(operation, "Lod",
- {TextureArgument{Type::Float, meta->lod}, TextureOffset{}});
- }
-
- if (meta->sampler.is_shadow) {
- expr = "vec4(" + expr + ')';
- }
- return {expr + GetSwizzle(meta->element), Type::Float};
- }
-
- Expression TextureGather(Operation operation) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
-
- const auto type = meta.sampler.is_shadow ? Type::Float : Type::Int;
- const bool separate_dc = meta.sampler.is_shadow;
-
- std::vector<TextureIR> ir_;
- if (meta.sampler.is_shadow) {
- ir_ = {TextureOffset{}};
- } else {
- ir_ = {TextureOffset{}, TextureArgument{type, meta.component}};
- }
- return {GenerateTexture(operation, "Gather", ir_, separate_dc) + GetSwizzle(meta.element),
- Type::Float};
- }
-
- Expression TextureQueryDimensions(Operation operation) {
- const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
- ASSERT(meta);
-
- const std::string sampler = GetSampler(meta->sampler);
- const std::string lod = VisitOperand(operation, 0).AsInt();
-
- switch (meta->element) {
- case 0:
- case 1:
- return {fmt::format("textureSize({}, {}){}", sampler, lod, GetSwizzle(meta->element)),
- Type::Int};
- case 3:
- return {fmt::format("textureQueryLevels({})", sampler), Type::Int};
- }
- UNREACHABLE();
- return {"0", Type::Int};
- }
-
- Expression TextureQueryLod(Operation operation) {
- const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
- ASSERT(meta);
-
- if (meta->element < 2) {
- return {fmt::format("int(({} * vec2(256)){})",
- GenerateTexture(operation, "QueryLod", {}),
- GetSwizzle(meta->element)),
- Type::Int};
- }
- return {"0", Type::Int};
- }
-
- Expression TexelFetch(Operation operation) {
- constexpr std::array constructors = {"int", "ivec2", "ivec3", "ivec4"};
- const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
- ASSERT(meta);
- UNIMPLEMENTED_IF(meta->sampler.is_array);
- const std::size_t count = operation.GetOperandsCount();
-
- std::string expr = "texelFetch(";
- expr += GetSampler(meta->sampler);
- expr += ", ";
-
- expr += constructors.at(operation.GetOperandsCount() + (meta->array ? 1 : 0) - 1);
- expr += '(';
- for (std::size_t i = 0; i < count; ++i) {
- if (i > 0) {
- expr += ", ";
- }
- expr += VisitOperand(operation, i).AsInt();
- }
- if (meta->array) {
- expr += ", ";
- expr += Visit(meta->array).AsInt();
- }
- expr += ')';
-
- if (meta->lod && !meta->sampler.is_buffer) {
- expr += ", ";
- expr += Visit(meta->lod).AsInt();
- }
- expr += ')';
- expr += GetSwizzle(meta->element);
-
- return {std::move(expr), Type::Float};
- }
-
- Expression TextureGradient(Operation operation) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
- std::string expr =
- GenerateTexture(operation, "Grad", {TextureDerivates{}, TextureOffset{}});
- return {std::move(expr) + GetSwizzle(meta.element), Type::Float};
- }
-
- Expression ImageLoad(Operation operation) {
- if (!device.HasImageLoadFormatted()) {
- LOG_ERROR(Render_OpenGL,
- "Device lacks GL_EXT_shader_image_load_formatted, stubbing image load");
- return {"0", Type::Int};
- }
-
- const auto& meta{std::get<MetaImage>(operation.GetMeta())};
- return {fmt::format("imageLoad({}, {}){}", GetImage(meta.image),
- BuildIntegerCoordinates(operation), GetSwizzle(meta.element)),
- Type::Uint};
- }
-
- Expression ImageStore(Operation operation) {
- const auto& meta{std::get<MetaImage>(operation.GetMeta())};
- code.AddLine("imageStore({}, {}, {});", GetImage(meta.image),
- BuildIntegerCoordinates(operation), BuildImageValues(operation));
- return {};
- }
-
- template <const std::string_view& opname>
- Expression AtomicImage(Operation operation) {
- const auto& meta{std::get<MetaImage>(operation.GetMeta())};
- ASSERT(meta.values.size() == 1);
-
- return {fmt::format("imageAtomic{}({}, {}, {})", opname, GetImage(meta.image),
- BuildIntegerCoordinates(operation), Visit(meta.values[0]).AsUint()),
- Type::Uint};
- }
-
- template <const std::string_view& opname, Type type>
- Expression Atomic(Operation operation) {
- if ((opname == Func::Min || opname == Func::Max) && type == Type::Int) {
- UNIMPLEMENTED_MSG("Unimplemented Min & Max for atomic operations");
- return {};
- }
- return {fmt::format("atomic{}({}, {})", opname, Visit(operation[0]).GetCode(),
- Visit(operation[1]).AsUint()),
- Type::Uint};
- }
-
- template <const std::string_view& opname, Type type>
- Expression Reduce(Operation operation) {
- code.AddLine("{};", Atomic<opname, type>(operation).GetCode());
- return {};
- }
-
- Expression Branch(Operation operation) {
- const auto target = std::get_if<ImmediateNode>(&*operation[0]);
- UNIMPLEMENTED_IF(!target);
-
- code.AddLine("jmp_to = 0x{:X}U;", target->GetValue());
- code.AddLine("break;");
- return {};
- }
-
- Expression BranchIndirect(Operation operation) {
- const std::string op_a = VisitOperand(operation, 0).AsUint();
-
- code.AddLine("jmp_to = {};", op_a);
- code.AddLine("break;");
- return {};
- }
-
- Expression PushFlowStack(Operation operation) {
- const auto stack = std::get<MetaStackClass>(operation.GetMeta());
- const auto target = std::get_if<ImmediateNode>(&*operation[0]);
- UNIMPLEMENTED_IF(!target);
-
- code.AddLine("{}[{}++] = 0x{:X}U;", FlowStackName(stack), FlowStackTopName(stack),
- target->GetValue());
- return {};
- }
-
- Expression PopFlowStack(Operation operation) {
- const auto stack = std::get<MetaStackClass>(operation.GetMeta());
- code.AddLine("jmp_to = {}[--{}];", FlowStackName(stack), FlowStackTopName(stack));
- code.AddLine("break;");
- return {};
- }
-
- void PreExit() {
- if (stage != ShaderType::Fragment) {
- return;
- }
- const auto& used_registers = ir.GetRegisters();
- const auto SafeGetRegister = [&](u32 reg) -> Expression {
- // TODO(Rodrigo): Replace with contains once C++20 releases
- if (used_registers.find(reg) != used_registers.end()) {
- return {GetRegister(reg), Type::Float};
- }
- return {"0.0f", Type::Float};
- };
-
- UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0, "Sample mask write is unimplemented");
-
- // Write the color outputs using the data in the shader registers, disabled
- // rendertargets/components are skipped in the register assignment.
- u32 current_reg = 0;
- for (u32 render_target = 0; render_target < Maxwell::NumRenderTargets; ++render_target) {
- // TODO(Subv): Figure out how dual-source blending is configured in the Switch.
- for (u32 component = 0; component < 4; ++component) {
- if (header.ps.IsColorComponentOutputEnabled(render_target, component)) {
- code.AddLine("frag_color{}{} = {};", render_target, GetColorSwizzle(component),
- SafeGetRegister(current_reg).AsFloat());
- ++current_reg;
- }
- }
- }
- if (header.ps.omap.depth) {
- // The depth output is always 2 registers after the last color output, and current_reg
- // already contains one past the last color register.
- code.AddLine("gl_FragDepth = {};", SafeGetRegister(current_reg + 1).AsFloat());
- }
- }
-
- Expression Exit(Operation operation) {
- PreExit();
- code.AddLine("return;");
- return {};
- }
-
- Expression Discard(Operation operation) {
- // Enclose "discard" in a conditional, so that GLSL compilation does not complain
- // about unexecuted instructions that may follow this.
- code.AddLine("if (true) {{");
- ++code.scope;
- code.AddLine("discard;");
- --code.scope;
- code.AddLine("}}");
- return {};
- }
-
- Expression EmitVertex(Operation operation) {
- ASSERT_MSG(stage == ShaderType::Geometry,
- "EmitVertex is expected to be used in a geometry shader.");
- code.AddLine("EmitVertex();");
- return {};
- }
-
- Expression EndPrimitive(Operation operation) {
- ASSERT_MSG(stage == ShaderType::Geometry,
- "EndPrimitive is expected to be used in a geometry shader.");
- code.AddLine("EndPrimitive();");
- return {};
- }
-
- Expression InvocationId(Operation operation) {
- return {"gl_InvocationID", Type::Int};
- }
-
- Expression YNegate(Operation operation) {
- // Y_NEGATE is mapped to this uniform value
- return {"gl_FrontMaterial.ambient.a", Type::Float};
- }
-
- template <u32 element>
- Expression LocalInvocationId(Operation) {
- return {"gl_LocalInvocationID"s + GetSwizzle(element), Type::Uint};
- }
-
- template <u32 element>
- Expression WorkGroupId(Operation) {
- return {"gl_WorkGroupID"s + GetSwizzle(element), Type::Uint};
- }
-
- Expression BallotThread(Operation operation) {
- const std::string value = VisitOperand(operation, 0).AsBool();
- if (!device.HasWarpIntrinsics()) {
- LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader");
- // Stub on non-Nvidia devices by simulating all threads voting the same as the active
- // one.
- return {fmt::format("({} ? 0xFFFFFFFFU : 0U)", value), Type::Uint};
- }
- return {fmt::format("ballotThreadNV({})", value), Type::Uint};
- }
-
- Expression Vote(Operation operation, const char* func) {
- const std::string value = VisitOperand(operation, 0).AsBool();
- if (!device.HasWarpIntrinsics()) {
- LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader");
- // Stub with a warp size of one.
- return {value, Type::Bool};
- }
- return {fmt::format("{}({})", func, value), Type::Bool};
- }
-
- Expression VoteAll(Operation operation) {
- return Vote(operation, "allThreadsNV");
- }
-
- Expression VoteAny(Operation operation) {
- return Vote(operation, "anyThreadNV");
- }
-
- Expression VoteEqual(Operation operation) {
- if (!device.HasWarpIntrinsics()) {
- LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader");
- // We must return true here since a stub for a theoretical warp size of 1.
- // This will always return an equal result across all votes.
- return {"true", Type::Bool};
- }
- return Vote(operation, "allThreadsEqualNV");
- }
-
- Expression ThreadId(Operation operation) {
- if (!device.HasShaderBallot()) {
- LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader");
- return {"0U", Type::Uint};
- }
- return {"gl_SubGroupInvocationARB", Type::Uint};
- }
-
- template <const std::string_view& comparison>
- Expression ThreadMask(Operation) {
- if (device.HasWarpIntrinsics()) {
- return {fmt::format("gl_Thread{}MaskNV", comparison), Type::Uint};
- }
- if (device.HasShaderBallot()) {
- return {fmt::format("uint(gl_SubGroup{}MaskARB)", comparison), Type::Uint};
- }
- LOG_ERROR(Render_OpenGL, "Thread mask intrinsics are required by the shader");
- return {"0U", Type::Uint};
- }
-
- Expression ShuffleIndexed(Operation operation) {
- std::string value = VisitOperand(operation, 0).AsFloat();
-
- if (!device.HasShaderBallot()) {
- LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader");
- return {std::move(value), Type::Float};
- }
-
- const std::string index = VisitOperand(operation, 1).AsUint();
- return {fmt::format("readInvocationARB({}, {})", value, index), Type::Float};
- }
-
- Expression Barrier(Operation) {
- if (!ir.IsDecompiled()) {
- LOG_ERROR(Render_OpenGL, "barrier() used but shader is not decompiled");
- return {};
- }
- code.AddLine("barrier();");
- return {};
- }
-
- Expression MemoryBarrierGroup(Operation) {
- code.AddLine("groupMemoryBarrier();");
- return {};
- }
-
- Expression MemoryBarrierGlobal(Operation) {
- code.AddLine("memoryBarrier();");
- return {};
- }
-
- struct Func final {
- Func() = delete;
- ~Func() = delete;
-
- static constexpr std::string_view LessThan = "<";
- static constexpr std::string_view Equal = "==";
- static constexpr std::string_view LessEqual = "<=";
- static constexpr std::string_view GreaterThan = ">";
- static constexpr std::string_view NotEqual = "!=";
- static constexpr std::string_view GreaterEqual = ">=";
-
- static constexpr std::string_view Eq = "Eq";
- static constexpr std::string_view Ge = "Ge";
- static constexpr std::string_view Gt = "Gt";
- static constexpr std::string_view Le = "Le";
- static constexpr std::string_view Lt = "Lt";
-
- static constexpr std::string_view Add = "Add";
- static constexpr std::string_view Min = "Min";
- static constexpr std::string_view Max = "Max";
- static constexpr std::string_view And = "And";
- static constexpr std::string_view Or = "Or";
- static constexpr std::string_view Xor = "Xor";
- static constexpr std::string_view Exchange = "Exchange";
- };
-
- static constexpr std::array operation_decompilers = {
- &GLSLDecompiler::Assign,
-
- &GLSLDecompiler::Select,
-
- &GLSLDecompiler::Add<Type::Float>,
- &GLSLDecompiler::Mul<Type::Float>,
- &GLSLDecompiler::Div<Type::Float>,
- &GLSLDecompiler::Fma<Type::Float>,
- &GLSLDecompiler::Negate<Type::Float>,
- &GLSLDecompiler::Absolute<Type::Float>,
- &GLSLDecompiler::FClamp,
- &GLSLDecompiler::FCastHalf0,
- &GLSLDecompiler::FCastHalf1,
- &GLSLDecompiler::Min<Type::Float>,
- &GLSLDecompiler::Max<Type::Float>,
- &GLSLDecompiler::FCos,
- &GLSLDecompiler::FSin,
- &GLSLDecompiler::FExp2,
- &GLSLDecompiler::FLog2,
- &GLSLDecompiler::FInverseSqrt,
- &GLSLDecompiler::FSqrt,
- &GLSLDecompiler::FRoundEven,
- &GLSLDecompiler::FFloor,
- &GLSLDecompiler::FCeil,
- &GLSLDecompiler::FTrunc,
- &GLSLDecompiler::FCastInteger<Type::Int>,
- &GLSLDecompiler::FCastInteger<Type::Uint>,
- &GLSLDecompiler::FSwizzleAdd,
-
- &GLSLDecompiler::Add<Type::Int>,
- &GLSLDecompiler::Mul<Type::Int>,
- &GLSLDecompiler::Div<Type::Int>,
- &GLSLDecompiler::Negate<Type::Int>,
- &GLSLDecompiler::Absolute<Type::Int>,
- &GLSLDecompiler::Min<Type::Int>,
- &GLSLDecompiler::Max<Type::Int>,
-
- &GLSLDecompiler::ICastFloat,
- &GLSLDecompiler::ICastUnsigned,
- &GLSLDecompiler::LogicalShiftLeft<Type::Int>,
- &GLSLDecompiler::ILogicalShiftRight,
- &GLSLDecompiler::IArithmeticShiftRight,
- &GLSLDecompiler::BitwiseAnd<Type::Int>,
- &GLSLDecompiler::BitwiseOr<Type::Int>,
- &GLSLDecompiler::BitwiseXor<Type::Int>,
- &GLSLDecompiler::BitwiseNot<Type::Int>,
- &GLSLDecompiler::BitfieldInsert<Type::Int>,
- &GLSLDecompiler::BitfieldExtract<Type::Int>,
- &GLSLDecompiler::BitCount<Type::Int>,
- &GLSLDecompiler::BitMSB<Type::Int>,
-
- &GLSLDecompiler::Add<Type::Uint>,
- &GLSLDecompiler::Mul<Type::Uint>,
- &GLSLDecompiler::Div<Type::Uint>,
- &GLSLDecompiler::Min<Type::Uint>,
- &GLSLDecompiler::Max<Type::Uint>,
- &GLSLDecompiler::UCastFloat,
- &GLSLDecompiler::UCastSigned,
- &GLSLDecompiler::LogicalShiftLeft<Type::Uint>,
- &GLSLDecompiler::UShiftRight,
- &GLSLDecompiler::UShiftRight,
- &GLSLDecompiler::BitwiseAnd<Type::Uint>,
- &GLSLDecompiler::BitwiseOr<Type::Uint>,
- &GLSLDecompiler::BitwiseXor<Type::Uint>,
- &GLSLDecompiler::BitwiseNot<Type::Uint>,
- &GLSLDecompiler::BitfieldInsert<Type::Uint>,
- &GLSLDecompiler::BitfieldExtract<Type::Uint>,
- &GLSLDecompiler::BitCount<Type::Uint>,
- &GLSLDecompiler::BitMSB<Type::Uint>,
-
- &GLSLDecompiler::Add<Type::HalfFloat>,
- &GLSLDecompiler::Mul<Type::HalfFloat>,
- &GLSLDecompiler::Fma<Type::HalfFloat>,
- &GLSLDecompiler::Absolute<Type::HalfFloat>,
- &GLSLDecompiler::HNegate,
- &GLSLDecompiler::HClamp,
- &GLSLDecompiler::HCastFloat,
- &GLSLDecompiler::HUnpack,
- &GLSLDecompiler::HMergeF32,
- &GLSLDecompiler::HMergeH0,
- &GLSLDecompiler::HMergeH1,
- &GLSLDecompiler::HPack2,
-
- &GLSLDecompiler::LogicalAssign,
- &GLSLDecompiler::LogicalAnd,
- &GLSLDecompiler::LogicalOr,
- &GLSLDecompiler::LogicalXor,
- &GLSLDecompiler::LogicalNegate,
- &GLSLDecompiler::LogicalPick2,
- &GLSLDecompiler::LogicalAnd2,
-
- &GLSLDecompiler::Comparison<Func::LessThan, Type::Float, false>,
- &GLSLDecompiler::Comparison<Func::Equal, Type::Float, false>,
- &GLSLDecompiler::Comparison<Func::LessEqual, Type::Float, false>,
- &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Float, false>,
- &GLSLDecompiler::Comparison<Func::NotEqual, Type::Float, false>,
- &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Float, false>,
- &GLSLDecompiler::FOrdered,
- &GLSLDecompiler::FUnordered,
- &GLSLDecompiler::Comparison<Func::LessThan, Type::Float, true>,
- &GLSLDecompiler::Comparison<Func::Equal, Type::Float, true>,
- &GLSLDecompiler::Comparison<Func::LessEqual, Type::Float, true>,
- &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Float, true>,
- &GLSLDecompiler::Comparison<Func::NotEqual, Type::Float, true>,
- &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Float, true>,
-
- &GLSLDecompiler::Comparison<Func::LessThan, Type::Int>,
- &GLSLDecompiler::Comparison<Func::Equal, Type::Int>,
- &GLSLDecompiler::Comparison<Func::LessEqual, Type::Int>,
- &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Int>,
- &GLSLDecompiler::Comparison<Func::NotEqual, Type::Int>,
- &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Int>,
-
- &GLSLDecompiler::Comparison<Func::LessThan, Type::Uint>,
- &GLSLDecompiler::Comparison<Func::Equal, Type::Uint>,
- &GLSLDecompiler::Comparison<Func::LessEqual, Type::Uint>,
- &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Uint>,
- &GLSLDecompiler::Comparison<Func::NotEqual, Type::Uint>,
- &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Uint>,
-
- &GLSLDecompiler::LogicalAddCarry,
-
- &GLSLDecompiler::Logical2HLessThan<false>,
- &GLSLDecompiler::Logical2HEqual<false>,
- &GLSLDecompiler::Logical2HLessEqual<false>,
- &GLSLDecompiler::Logical2HGreaterThan<false>,
- &GLSLDecompiler::Logical2HNotEqual<false>,
- &GLSLDecompiler::Logical2HGreaterEqual<false>,
- &GLSLDecompiler::Logical2HLessThan<true>,
- &GLSLDecompiler::Logical2HEqual<true>,
- &GLSLDecompiler::Logical2HLessEqual<true>,
- &GLSLDecompiler::Logical2HGreaterThan<true>,
- &GLSLDecompiler::Logical2HNotEqual<true>,
- &GLSLDecompiler::Logical2HGreaterEqual<true>,
-
- &GLSLDecompiler::Texture,
- &GLSLDecompiler::TextureLod,
- &GLSLDecompiler::TextureGather,
- &GLSLDecompiler::TextureQueryDimensions,
- &GLSLDecompiler::TextureQueryLod,
- &GLSLDecompiler::TexelFetch,
- &GLSLDecompiler::TextureGradient,
-
- &GLSLDecompiler::ImageLoad,
- &GLSLDecompiler::ImageStore,
-
- &GLSLDecompiler::AtomicImage<Func::Add>,
- &GLSLDecompiler::AtomicImage<Func::And>,
- &GLSLDecompiler::AtomicImage<Func::Or>,
- &GLSLDecompiler::AtomicImage<Func::Xor>,
- &GLSLDecompiler::AtomicImage<Func::Exchange>,
-
- &GLSLDecompiler::Atomic<Func::Exchange, Type::Uint>,
- &GLSLDecompiler::Atomic<Func::Add, Type::Uint>,
- &GLSLDecompiler::Atomic<Func::Min, Type::Uint>,
- &GLSLDecompiler::Atomic<Func::Max, Type::Uint>,
- &GLSLDecompiler::Atomic<Func::And, Type::Uint>,
- &GLSLDecompiler::Atomic<Func::Or, Type::Uint>,
- &GLSLDecompiler::Atomic<Func::Xor, Type::Uint>,
-
- &GLSLDecompiler::Atomic<Func::Exchange, Type::Int>,
- &GLSLDecompiler::Atomic<Func::Add, Type::Int>,
- &GLSLDecompiler::Atomic<Func::Min, Type::Int>,
- &GLSLDecompiler::Atomic<Func::Max, Type::Int>,
- &GLSLDecompiler::Atomic<Func::And, Type::Int>,
- &GLSLDecompiler::Atomic<Func::Or, Type::Int>,
- &GLSLDecompiler::Atomic<Func::Xor, Type::Int>,
-
- &GLSLDecompiler::Reduce<Func::Add, Type::Uint>,
- &GLSLDecompiler::Reduce<Func::Min, Type::Uint>,
- &GLSLDecompiler::Reduce<Func::Max, Type::Uint>,
- &GLSLDecompiler::Reduce<Func::And, Type::Uint>,
- &GLSLDecompiler::Reduce<Func::Or, Type::Uint>,
- &GLSLDecompiler::Reduce<Func::Xor, Type::Uint>,
-
- &GLSLDecompiler::Reduce<Func::Add, Type::Int>,
- &GLSLDecompiler::Reduce<Func::Min, Type::Int>,
- &GLSLDecompiler::Reduce<Func::Max, Type::Int>,
- &GLSLDecompiler::Reduce<Func::And, Type::Int>,
- &GLSLDecompiler::Reduce<Func::Or, Type::Int>,
- &GLSLDecompiler::Reduce<Func::Xor, Type::Int>,
-
- &GLSLDecompiler::Branch,
- &GLSLDecompiler::BranchIndirect,
- &GLSLDecompiler::PushFlowStack,
- &GLSLDecompiler::PopFlowStack,
- &GLSLDecompiler::Exit,
- &GLSLDecompiler::Discard,
-
- &GLSLDecompiler::EmitVertex,
- &GLSLDecompiler::EndPrimitive,
-
- &GLSLDecompiler::InvocationId,
- &GLSLDecompiler::YNegate,
- &GLSLDecompiler::LocalInvocationId<0>,
- &GLSLDecompiler::LocalInvocationId<1>,
- &GLSLDecompiler::LocalInvocationId<2>,
- &GLSLDecompiler::WorkGroupId<0>,
- &GLSLDecompiler::WorkGroupId<1>,
- &GLSLDecompiler::WorkGroupId<2>,
-
- &GLSLDecompiler::BallotThread,
- &GLSLDecompiler::VoteAll,
- &GLSLDecompiler::VoteAny,
- &GLSLDecompiler::VoteEqual,
-
- &GLSLDecompiler::ThreadId,
- &GLSLDecompiler::ThreadMask<Func::Eq>,
- &GLSLDecompiler::ThreadMask<Func::Ge>,
- &GLSLDecompiler::ThreadMask<Func::Gt>,
- &GLSLDecompiler::ThreadMask<Func::Le>,
- &GLSLDecompiler::ThreadMask<Func::Lt>,
- &GLSLDecompiler::ShuffleIndexed,
-
- &GLSLDecompiler::Barrier,
- &GLSLDecompiler::MemoryBarrierGroup,
- &GLSLDecompiler::MemoryBarrierGlobal,
- };
- static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
-
- std::string GetRegister(u32 index) const {
- return AppendSuffix(index, "gpr");
- }
-
- std::string GetCustomVariable(u32 index) const {
- return AppendSuffix(index, "custom_var");
- }
-
- std::string GetPredicate(Tegra::Shader::Pred pred) const {
- return AppendSuffix(static_cast<u32>(pred), "pred");
- }
-
- std::string GetGenericInputAttribute(Attribute::Index attribute) const {
- return AppendSuffix(GetGenericAttributeIndex(attribute), INPUT_ATTRIBUTE_NAME);
- }
-
- std::unordered_map<u8, GenericVaryingDescription> varying_description;
-
- std::string GetGenericOutputAttribute(Attribute::Index attribute, std::size_t element) const {
- const u8 offset = static_cast<u8>(GetGenericAttributeIndex(attribute) * 4 + element);
- const auto& description = varying_description.at(offset);
- if (description.is_scalar) {
- return description.name;
- }
- return fmt::format("{}[{}]", description.name, element - description.first_element);
- }
-
- std::string GetConstBuffer(u32 index) const {
- return AppendSuffix(index, "cbuf");
- }
-
- std::string GetGlobalMemory(const GlobalMemoryBase& descriptor) const {
- return fmt::format("gmem_{}_{}_{}", descriptor.cbuf_index, descriptor.cbuf_offset, suffix);
- }
-
- std::string GetGlobalMemoryBlock(const GlobalMemoryBase& descriptor) const {
- return fmt::format("gmem_block_{}_{}_{}", descriptor.cbuf_index, descriptor.cbuf_offset,
- suffix);
- }
-
- std::string GetConstBufferBlock(u32 index) const {
- return AppendSuffix(index, "cbuf_block");
- }
-
- std::string GetLocalMemory() const {
- if (suffix.empty()) {
- return "lmem";
- } else {
- return "lmem_" + std::string{suffix};
- }
- }
-
- std::string GetInternalFlag(InternalFlag flag) const {
- constexpr std::array InternalFlagNames = {"zero_flag", "sign_flag", "carry_flag",
- "overflow_flag"};
- const auto index = static_cast<u32>(flag);
- ASSERT(index < static_cast<u32>(InternalFlag::Amount));
-
- if (suffix.empty()) {
- return InternalFlagNames[index];
- } else {
- return fmt::format("{}_{}", InternalFlagNames[index], suffix);
- }
- }
-
- std::string GetSampler(const SamplerEntry& sampler) const {
- return AppendSuffix(sampler.index, "sampler");
- }
-
- std::string GetImage(const ImageEntry& image) const {
- return AppendSuffix(image.index, "image");
- }
-
- std::string AppendSuffix(u32 index, std::string_view name) const {
- if (suffix.empty()) {
- return fmt::format("{}{}", name, index);
- } else {
- return fmt::format("{}{}_{}", name, index, suffix);
- }
- }
-
- u32 GetNumPhysicalInputAttributes() const {
- return stage == ShaderType::Vertex ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings();
- }
-
- u32 GetNumPhysicalAttributes() const {
- return std::min<u32>(device.GetMaxVertexAttributes(), Maxwell::NumVertexAttributes);
- }
-
- u32 GetNumPhysicalVaryings() const {
- return std::min<u32>(device.GetMaxVaryings(), Maxwell::NumVaryings);
- }
-
- const Device& device;
- const ShaderIR& ir;
- const Registry& registry;
- const ShaderType stage;
- const std::string_view identifier;
- const std::string_view suffix;
- const Header header;
- std::unordered_map<u8, VaryingTFB> transform_feedback;
-
- ShaderWriter code;
-
- std::optional<u32> max_input_vertices;
-};
-
-std::string GetFlowVariable(u32 index) {
- return fmt::format("flow_var{}", index);
-}
-
-class ExprDecompiler {
-public:
- explicit ExprDecompiler(GLSLDecompiler& decomp_) : decomp{decomp_} {}
-
- void operator()(const ExprAnd& expr) {
- inner += '(';
- std::visit(*this, *expr.operand1);
- inner += " && ";
- std::visit(*this, *expr.operand2);
- inner += ')';
- }
-
- void operator()(const ExprOr& expr) {
- inner += '(';
- std::visit(*this, *expr.operand1);
- inner += " || ";
- std::visit(*this, *expr.operand2);
- inner += ')';
- }
-
- void operator()(const ExprNot& expr) {
- inner += '!';
- std::visit(*this, *expr.operand1);
- }
-
- void operator()(const ExprPredicate& expr) {
- const auto pred = static_cast<Tegra::Shader::Pred>(expr.predicate);
- inner += decomp.GetPredicate(pred);
- }
-
- void operator()(const ExprCondCode& expr) {
- inner += decomp.Visit(decomp.ir.GetConditionCode(expr.cc)).AsBool();
- }
-
- void operator()(const ExprVar& expr) {
- inner += GetFlowVariable(expr.var_index);
- }
-
- void operator()(const ExprBoolean& expr) {
- inner += expr.value ? "true" : "false";
- }
-
- void operator()(VideoCommon::Shader::ExprGprEqual& expr) {
- inner += fmt::format("(ftou({}) == {})", decomp.GetRegister(expr.gpr), expr.value);
- }
-
- const std::string& GetResult() const {
- return inner;
- }
-
-private:
- GLSLDecompiler& decomp;
- std::string inner;
-};
-
-class ASTDecompiler {
-public:
- explicit ASTDecompiler(GLSLDecompiler& decomp_) : decomp{decomp_} {}
-
- void operator()(const ASTProgram& ast) {
- ASTNode current = ast.nodes.GetFirst();
- while (current) {
- Visit(current);
- current = current->GetNext();
- }
- }
-
- void operator()(const ASTIfThen& ast) {
- ExprDecompiler expr_parser{decomp};
- std::visit(expr_parser, *ast.condition);
- decomp.code.AddLine("if ({}) {{", expr_parser.GetResult());
- decomp.code.scope++;
- ASTNode current = ast.nodes.GetFirst();
- while (current) {
- Visit(current);
- current = current->GetNext();
- }
- decomp.code.scope--;
- decomp.code.AddLine("}}");
- }
-
- void operator()(const ASTIfElse& ast) {
- decomp.code.AddLine("else {{");
- decomp.code.scope++;
- ASTNode current = ast.nodes.GetFirst();
- while (current) {
- Visit(current);
- current = current->GetNext();
- }
- decomp.code.scope--;
- decomp.code.AddLine("}}");
- }
-
- void operator()([[maybe_unused]] const ASTBlockEncoded& ast) {
- UNREACHABLE();
- }
-
- void operator()(const ASTBlockDecoded& ast) {
- decomp.VisitBlock(ast.nodes);
- }
-
- void operator()(const ASTVarSet& ast) {
- ExprDecompiler expr_parser{decomp};
- std::visit(expr_parser, *ast.condition);
- decomp.code.AddLine("{} = {};", GetFlowVariable(ast.index), expr_parser.GetResult());
- }
-
- void operator()(const ASTLabel& ast) {
- decomp.code.AddLine("// Label_{}:", ast.index);
- }
-
- void operator()([[maybe_unused]] const ASTGoto& ast) {
- UNREACHABLE();
- }
-
- void operator()(const ASTDoWhile& ast) {
- ExprDecompiler expr_parser{decomp};
- std::visit(expr_parser, *ast.condition);
- decomp.code.AddLine("do {{");
- decomp.code.scope++;
- ASTNode current = ast.nodes.GetFirst();
- while (current) {
- Visit(current);
- current = current->GetNext();
- }
- decomp.code.scope--;
- decomp.code.AddLine("}} while({});", expr_parser.GetResult());
- }
-
- void operator()(const ASTReturn& ast) {
- const bool is_true = VideoCommon::Shader::ExprIsTrue(ast.condition);
- if (!is_true) {
- ExprDecompiler expr_parser{decomp};
- std::visit(expr_parser, *ast.condition);
- decomp.code.AddLine("if ({}) {{", expr_parser.GetResult());
- decomp.code.scope++;
- }
- if (ast.kills) {
- decomp.code.AddLine("discard;");
- } else {
- decomp.PreExit();
- decomp.code.AddLine("return;");
- }
- if (!is_true) {
- decomp.code.scope--;
- decomp.code.AddLine("}}");
- }
- }
-
- void operator()(const ASTBreak& ast) {
- const bool is_true = VideoCommon::Shader::ExprIsTrue(ast.condition);
- if (!is_true) {
- ExprDecompiler expr_parser{decomp};
- std::visit(expr_parser, *ast.condition);
- decomp.code.AddLine("if ({}) {{", expr_parser.GetResult());
- decomp.code.scope++;
- }
- decomp.code.AddLine("break;");
- if (!is_true) {
- decomp.code.scope--;
- decomp.code.AddLine("}}");
- }
- }
-
- void Visit(const ASTNode& node) {
- std::visit(*this, *node->GetInnerData());
- }
-
-private:
- GLSLDecompiler& decomp;
-};
-
-void GLSLDecompiler::DecompileAST() {
- const u32 num_flow_variables = ir.GetASTNumVariables();
- for (u32 i = 0; i < num_flow_variables; i++) {
- code.AddLine("bool {} = false;", GetFlowVariable(i));
- }
-
- ASTDecompiler decompiler{*this};
- decompiler.Visit(ir.GetASTProgram());
-}
-
-} // Anonymous namespace
-
-ShaderEntries MakeEntries(const Device& device, const ShaderIR& ir, ShaderType stage) {
- ShaderEntries entries;
- for (const auto& cbuf : ir.GetConstantBuffers()) {
- entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(),
- cbuf.first);
- }
- for (const auto& [base, usage] : ir.GetGlobalMemory()) {
- entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset, usage.is_read,
- usage.is_written);
- }
- for (const auto& sampler : ir.GetSamplers()) {
- entries.samplers.emplace_back(sampler);
- }
- for (const auto& image : ir.GetImages()) {
- entries.images.emplace_back(image);
- }
- const auto clip_distances = ir.GetClipDistances();
- for (std::size_t i = 0; i < std::size(clip_distances); ++i) {
- entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i;
- }
- for (const auto& buffer : entries.const_buffers) {
- entries.enabled_uniform_buffers |= 1U << buffer.GetIndex();
- }
- entries.shader_length = ir.GetLength();
- return entries;
-}
-
-std::string DecompileShader(const Device& device, const ShaderIR& ir, const Registry& registry,
- ShaderType stage, std::string_view identifier,
- std::string_view suffix) {
- GLSLDecompiler decompiler(device, ir, registry, stage, identifier, suffix);
- decompiler.Decompile();
- return decompiler.GetResult();
-}
-
-} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
deleted file mode 100644
index 0397a000c..000000000
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ /dev/null
@@ -1,69 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <array>
-#include <string>
-#include <string_view>
-#include <utility>
-#include <vector>
-#include "common/common_types.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/engines/shader_type.h"
-#include "video_core/shader/registry.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace OpenGL {
-
-class Device;
-
-using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-using SamplerEntry = VideoCommon::Shader::SamplerEntry;
-using ImageEntry = VideoCommon::Shader::ImageEntry;
-
-class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer {
-public:
- explicit ConstBufferEntry(u32 max_offset_, bool is_indirect_, u32 index_)
- : ConstBuffer{max_offset_, is_indirect_}, index{index_} {}
-
- u32 GetIndex() const {
- return index;
- }
-
-private:
- u32 index = 0;
-};
-
-struct GlobalMemoryEntry {
- constexpr explicit GlobalMemoryEntry(u32 cbuf_index_, u32 cbuf_offset_, bool is_read_,
- bool is_written_)
- : cbuf_index{cbuf_index_}, cbuf_offset{cbuf_offset_}, is_read{is_read_}, is_written{
- is_written_} {}
-
- u32 cbuf_index = 0;
- u32 cbuf_offset = 0;
- bool is_read = false;
- bool is_written = false;
-};
-
-struct ShaderEntries {
- std::vector<ConstBufferEntry> const_buffers;
- std::vector<GlobalMemoryEntry> global_memory_entries;
- std::vector<SamplerEntry> samplers;
- std::vector<ImageEntry> images;
- std::size_t shader_length{};
- u32 clip_distances{};
- u32 enabled_uniform_buffers{};
-};
-
-ShaderEntries MakeEntries(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
- Tegra::Engines::ShaderType stage);
-
-std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
- const VideoCommon::Shader::Registry& registry,
- Tegra::Engines::ShaderType stage, std::string_view identifier,
- std::string_view suffix = {});
-
-} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
deleted file mode 100644
index 0deb86517..000000000
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ /dev/null
@@ -1,482 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <cstring>
-
-#include <fmt/format.h>
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "common/fs/file.h"
-#include "common/fs/fs.h"
-#include "common/fs/path_util.h"
-#include "common/logging/log.h"
-#include "common/scm_rev.h"
-#include "common/settings.h"
-#include "common/zstd_compression.h"
-#include "core/core.h"
-#include "core/hle/kernel/k_process.h"
-#include "video_core/engines/shader_type.h"
-#include "video_core/renderer_opengl/gl_shader_cache.h"
-#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
-
-namespace OpenGL {
-
-using Tegra::Engines::ShaderType;
-using VideoCommon::Shader::BindlessSamplerMap;
-using VideoCommon::Shader::BoundSamplerMap;
-using VideoCommon::Shader::KeyMap;
-using VideoCommon::Shader::SeparateSamplerKey;
-using ShaderCacheVersionHash = std::array<u8, 64>;
-
-struct ConstBufferKey {
- u32 cbuf = 0;
- u32 offset = 0;
- u32 value = 0;
-};
-
-struct BoundSamplerEntry {
- u32 offset = 0;
- Tegra::Engines::SamplerDescriptor sampler;
-};
-
-struct SeparateSamplerEntry {
- u32 cbuf1 = 0;
- u32 cbuf2 = 0;
- u32 offset1 = 0;
- u32 offset2 = 0;
- Tegra::Engines::SamplerDescriptor sampler;
-};
-
-struct BindlessSamplerEntry {
- u32 cbuf = 0;
- u32 offset = 0;
- Tegra::Engines::SamplerDescriptor sampler;
-};
-
-namespace {
-
-constexpr u32 NativeVersion = 21;
-
-ShaderCacheVersionHash GetShaderCacheVersionHash() {
- ShaderCacheVersionHash hash{};
- const std::size_t length = std::min(std::strlen(Common::g_shader_cache_version), hash.size());
- std::memcpy(hash.data(), Common::g_shader_cache_version, length);
- return hash;
-}
-
-} // Anonymous namespace
-
-ShaderDiskCacheEntry::ShaderDiskCacheEntry() = default;
-
-ShaderDiskCacheEntry::~ShaderDiskCacheEntry() = default;
-
-bool ShaderDiskCacheEntry::Load(Common::FS::IOFile& file) {
- if (!file.ReadObject(type)) {
- return false;
- }
- u32 code_size;
- u32 code_size_b;
- if (!file.ReadObject(code_size) || !file.ReadObject(code_size_b)) {
- return false;
- }
- code.resize(code_size);
- code_b.resize(code_size_b);
- if (file.Read(code) != code_size) {
- return false;
- }
- if (HasProgramA() && file.Read(code_b) != code_size_b) {
- return false;
- }
-
- u8 is_texture_handler_size_known;
- u32 texture_handler_size_value;
- u32 num_keys;
- u32 num_bound_samplers;
- u32 num_separate_samplers;
- u32 num_bindless_samplers;
- if (!file.ReadObject(unique_identifier) || !file.ReadObject(bound_buffer) ||
- !file.ReadObject(is_texture_handler_size_known) ||
- !file.ReadObject(texture_handler_size_value) || !file.ReadObject(graphics_info) ||
- !file.ReadObject(compute_info) || !file.ReadObject(num_keys) ||
- !file.ReadObject(num_bound_samplers) || !file.ReadObject(num_separate_samplers) ||
- !file.ReadObject(num_bindless_samplers)) {
- return false;
- }
- if (is_texture_handler_size_known) {
- texture_handler_size = texture_handler_size_value;
- }
-
- std::vector<ConstBufferKey> flat_keys(num_keys);
- std::vector<BoundSamplerEntry> flat_bound_samplers(num_bound_samplers);
- std::vector<SeparateSamplerEntry> flat_separate_samplers(num_separate_samplers);
- std::vector<BindlessSamplerEntry> flat_bindless_samplers(num_bindless_samplers);
- if (file.Read(flat_keys) != flat_keys.size() ||
- file.Read(flat_bound_samplers) != flat_bound_samplers.size() ||
- file.Read(flat_separate_samplers) != flat_separate_samplers.size() ||
- file.Read(flat_bindless_samplers) != flat_bindless_samplers.size()) {
- return false;
- }
- for (const auto& entry : flat_keys) {
- keys.insert({{entry.cbuf, entry.offset}, entry.value});
- }
- for (const auto& entry : flat_bound_samplers) {
- bound_samplers.emplace(entry.offset, entry.sampler);
- }
- for (const auto& entry : flat_separate_samplers) {
- SeparateSamplerKey key;
- key.buffers = {entry.cbuf1, entry.cbuf2};
- key.offsets = {entry.offset1, entry.offset2};
- separate_samplers.emplace(key, entry.sampler);
- }
- for (const auto& entry : flat_bindless_samplers) {
- bindless_samplers.insert({{entry.cbuf, entry.offset}, entry.sampler});
- }
-
- return true;
-}
-
-bool ShaderDiskCacheEntry::Save(Common::FS::IOFile& file) const {
- if (!file.WriteObject(static_cast<u32>(type)) ||
- !file.WriteObject(static_cast<u32>(code.size())) ||
- !file.WriteObject(static_cast<u32>(code_b.size()))) {
- return false;
- }
- if (file.Write(code) != code.size()) {
- return false;
- }
- if (HasProgramA() && file.Write(code_b) != code_b.size()) {
- return false;
- }
-
- if (!file.WriteObject(unique_identifier) || !file.WriteObject(bound_buffer) ||
- !file.WriteObject(static_cast<u8>(texture_handler_size.has_value())) ||
- !file.WriteObject(texture_handler_size.value_or(0)) || !file.WriteObject(graphics_info) ||
- !file.WriteObject(compute_info) || !file.WriteObject(static_cast<u32>(keys.size())) ||
- !file.WriteObject(static_cast<u32>(bound_samplers.size())) ||
- !file.WriteObject(static_cast<u32>(separate_samplers.size())) ||
- !file.WriteObject(static_cast<u32>(bindless_samplers.size()))) {
- return false;
- }
-
- std::vector<ConstBufferKey> flat_keys;
- flat_keys.reserve(keys.size());
- for (const auto& [address, value] : keys) {
- flat_keys.push_back(ConstBufferKey{address.first, address.second, value});
- }
-
- std::vector<BoundSamplerEntry> flat_bound_samplers;
- flat_bound_samplers.reserve(bound_samplers.size());
- for (const auto& [address, sampler] : bound_samplers) {
- flat_bound_samplers.push_back(BoundSamplerEntry{address, sampler});
- }
-
- std::vector<SeparateSamplerEntry> flat_separate_samplers;
- flat_separate_samplers.reserve(separate_samplers.size());
- for (const auto& [key, sampler] : separate_samplers) {
- SeparateSamplerEntry entry;
- std::tie(entry.cbuf1, entry.cbuf2) = key.buffers;
- std::tie(entry.offset1, entry.offset2) = key.offsets;
- entry.sampler = sampler;
- flat_separate_samplers.push_back(entry);
- }
-
- std::vector<BindlessSamplerEntry> flat_bindless_samplers;
- flat_bindless_samplers.reserve(bindless_samplers.size());
- for (const auto& [address, sampler] : bindless_samplers) {
- flat_bindless_samplers.push_back(
- BindlessSamplerEntry{address.first, address.second, sampler});
- }
-
- return file.Write(flat_keys) == flat_keys.size() &&
- file.Write(flat_bound_samplers) == flat_bound_samplers.size() &&
- file.Write(flat_separate_samplers) == flat_separate_samplers.size() &&
- file.Write(flat_bindless_samplers) == flat_bindless_samplers.size();
-}
-
-ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL() = default;
-
-ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default;
-
-void ShaderDiskCacheOpenGL::BindTitleID(u64 title_id_) {
- title_id = title_id_;
-}
-
-std::optional<std::vector<ShaderDiskCacheEntry>> ShaderDiskCacheOpenGL::LoadTransferable() {
- // Skip games without title id
- const bool has_title_id = title_id != 0;
- if (!Settings::values.use_disk_shader_cache.GetValue() || !has_title_id) {
- return std::nullopt;
- }
-
- Common::FS::IOFile file{GetTransferablePath(), Common::FS::FileAccessMode::Read,
- Common::FS::FileType::BinaryFile};
- if (!file.IsOpen()) {
- LOG_INFO(Render_OpenGL, "No transferable shader cache found");
- is_usable = true;
- return std::nullopt;
- }
-
- u32 version{};
- if (!file.ReadObject(version)) {
- LOG_ERROR(Render_OpenGL, "Failed to get transferable cache version, skipping it");
- return std::nullopt;
- }
-
- if (version < NativeVersion) {
- LOG_INFO(Render_OpenGL, "Transferable shader cache is old, removing");
- file.Close();
- InvalidateTransferable();
- is_usable = true;
- return std::nullopt;
- }
- if (version > NativeVersion) {
- LOG_WARNING(Render_OpenGL, "Transferable shader cache was generated with a newer version "
- "of the emulator, skipping");
- return std::nullopt;
- }
-
- // Version is valid, load the shaders
- std::vector<ShaderDiskCacheEntry> entries;
- while (static_cast<u64>(file.Tell()) < file.GetSize()) {
- ShaderDiskCacheEntry& entry = entries.emplace_back();
- if (!entry.Load(file)) {
- LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry, skipping");
- return std::nullopt;
- }
- }
-
- is_usable = true;
- return {std::move(entries)};
-}
-
-std::vector<ShaderDiskCachePrecompiled> ShaderDiskCacheOpenGL::LoadPrecompiled() {
- if (!is_usable) {
- return {};
- }
-
- Common::FS::IOFile file{GetPrecompiledPath(), Common::FS::FileAccessMode::Read,
- Common::FS::FileType::BinaryFile};
- if (!file.IsOpen()) {
- LOG_INFO(Render_OpenGL, "No precompiled shader cache found");
- return {};
- }
-
- if (const auto result = LoadPrecompiledFile(file)) {
- return *result;
- }
-
- LOG_INFO(Render_OpenGL, "Failed to load precompiled cache");
- file.Close();
- InvalidatePrecompiled();
- return {};
-}
-
-std::optional<std::vector<ShaderDiskCachePrecompiled>> ShaderDiskCacheOpenGL::LoadPrecompiledFile(
- Common::FS::IOFile& file) {
- // Read compressed file from disk and decompress to virtual precompiled cache file
- std::vector<u8> compressed(file.GetSize());
- if (file.Read(compressed) != file.GetSize()) {
- return std::nullopt;
- }
- const std::vector<u8> decompressed = Common::Compression::DecompressDataZSTD(compressed);
- SaveArrayToPrecompiled(decompressed.data(), decompressed.size());
- precompiled_cache_virtual_file_offset = 0;
-
- ShaderCacheVersionHash file_hash{};
- if (!LoadArrayFromPrecompiled(file_hash.data(), file_hash.size())) {
- precompiled_cache_virtual_file_offset = 0;
- return std::nullopt;
- }
- if (GetShaderCacheVersionHash() != file_hash) {
- LOG_INFO(Render_OpenGL, "Precompiled cache is from another version of the emulator");
- precompiled_cache_virtual_file_offset = 0;
- return std::nullopt;
- }
-
- std::vector<ShaderDiskCachePrecompiled> entries;
- while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) {
- u32 binary_size;
- auto& entry = entries.emplace_back();
- if (!LoadObjectFromPrecompiled(entry.unique_identifier) ||
- !LoadObjectFromPrecompiled(entry.binary_format) ||
- !LoadObjectFromPrecompiled(binary_size)) {
- return std::nullopt;
- }
-
- entry.binary.resize(binary_size);
- if (!LoadArrayFromPrecompiled(entry.binary.data(), entry.binary.size())) {
- return std::nullopt;
- }
- }
- return entries;
-}
-
-void ShaderDiskCacheOpenGL::InvalidateTransferable() {
- if (!Common::FS::RemoveFile(GetTransferablePath())) {
- LOG_ERROR(Render_OpenGL, "Failed to invalidate transferable file={}",
- Common::FS::PathToUTF8String(GetTransferablePath()));
- }
- InvalidatePrecompiled();
-}
-
-void ShaderDiskCacheOpenGL::InvalidatePrecompiled() {
- // Clear virtaul precompiled cache file
- precompiled_cache_virtual_file.Resize(0);
-
- if (!Common::FS::RemoveFile(GetPrecompiledPath())) {
- LOG_ERROR(Render_OpenGL, "Failed to invalidate precompiled file={}",
- Common::FS::PathToUTF8String(GetPrecompiledPath()));
- }
-}
-
-void ShaderDiskCacheOpenGL::SaveEntry(const ShaderDiskCacheEntry& entry) {
- if (!is_usable) {
- return;
- }
-
- const u64 id = entry.unique_identifier;
- if (stored_transferable.contains(id)) {
- // The shader already exists
- return;
- }
-
- Common::FS::IOFile file = AppendTransferableFile();
- if (!file.IsOpen()) {
- return;
- }
- if (!entry.Save(file)) {
- LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry, removing");
- file.Close();
- InvalidateTransferable();
- return;
- }
-
- stored_transferable.insert(id);
-}
-
-void ShaderDiskCacheOpenGL::SavePrecompiled(u64 unique_identifier, GLuint program) {
- if (!is_usable) {
- return;
- }
-
- // TODO(Rodrigo): This is a design smell. I shouldn't be having to manually write the header
- // when writing the dump. This should be done the moment I get access to write to the virtual
- // file.
- if (precompiled_cache_virtual_file.GetSize() == 0) {
- SavePrecompiledHeaderToVirtualPrecompiledCache();
- }
-
- GLint binary_length;
- glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length);
-
- GLenum binary_format;
- std::vector<u8> binary(binary_length);
- glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data());
-
- if (!SaveObjectToPrecompiled(unique_identifier) || !SaveObjectToPrecompiled(binary_format) ||
- !SaveObjectToPrecompiled(static_cast<u32>(binary.size())) ||
- !SaveArrayToPrecompiled(binary.data(), binary.size())) {
- LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016X}, removing",
- unique_identifier);
- InvalidatePrecompiled();
- }
-}
-
-Common::FS::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const {
- if (!EnsureDirectories()) {
- return {};
- }
-
- const auto transferable_path{GetTransferablePath()};
- const bool existed = Common::FS::Exists(transferable_path);
-
- Common::FS::IOFile file{transferable_path, Common::FS::FileAccessMode::Append,
- Common::FS::FileType::BinaryFile};
- if (!file.IsOpen()) {
- LOG_ERROR(Render_OpenGL, "Failed to open transferable cache in path={}",
- Common::FS::PathToUTF8String(transferable_path));
- return {};
- }
- if (!existed || file.GetSize() == 0) {
- // If the file didn't exist, write its version
- if (!file.WriteObject(NativeVersion)) {
- LOG_ERROR(Render_OpenGL, "Failed to write transferable cache version in path={}",
- Common::FS::PathToUTF8String(transferable_path));
- return {};
- }
- }
- return file;
-}
-
-void ShaderDiskCacheOpenGL::SavePrecompiledHeaderToVirtualPrecompiledCache() {
- const auto hash{GetShaderCacheVersionHash()};
- if (!SaveArrayToPrecompiled(hash.data(), hash.size())) {
- LOG_ERROR(
- Render_OpenGL,
- "Failed to write precompiled cache version hash to virtual precompiled cache file");
- }
-}
-
-void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() {
- precompiled_cache_virtual_file_offset = 0;
- const std::vector<u8> uncompressed = precompiled_cache_virtual_file.ReadAllBytes();
- const std::vector<u8> compressed =
- Common::Compression::CompressDataZSTDDefault(uncompressed.data(), uncompressed.size());
-
- const auto precompiled_path = GetPrecompiledPath();
- Common::FS::IOFile file{precompiled_path, Common::FS::FileAccessMode::Write,
- Common::FS::FileType::BinaryFile};
-
- if (!file.IsOpen()) {
- LOG_ERROR(Render_OpenGL, "Failed to open precompiled cache in path={}",
- Common::FS::PathToUTF8String(precompiled_path));
- return;
- }
- if (file.Write(compressed) != compressed.size()) {
- LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version in path={}",
- Common::FS::PathToUTF8String(precompiled_path));
- }
-}
-
-bool ShaderDiskCacheOpenGL::EnsureDirectories() const {
- const auto CreateDir = [](const std::filesystem::path& dir) {
- if (!Common::FS::CreateDir(dir)) {
- LOG_ERROR(Render_OpenGL, "Failed to create directory={}",
- Common::FS::PathToUTF8String(dir));
- return false;
- }
- return true;
- };
-
- return CreateDir(Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)) &&
- CreateDir(GetBaseDir()) && CreateDir(GetTransferableDir()) &&
- CreateDir(GetPrecompiledDir());
-}
-
-std::filesystem::path ShaderDiskCacheOpenGL::GetTransferablePath() const {
- return GetTransferableDir() / fmt::format("{}.bin", GetTitleID());
-}
-
-std::filesystem::path ShaderDiskCacheOpenGL::GetPrecompiledPath() const {
- return GetPrecompiledDir() / fmt::format("{}.bin", GetTitleID());
-}
-
-std::filesystem::path ShaderDiskCacheOpenGL::GetTransferableDir() const {
- return GetBaseDir() / "transferable";
-}
-
-std::filesystem::path ShaderDiskCacheOpenGL::GetPrecompiledDir() const {
- return GetBaseDir() / "precompiled";
-}
-
-std::filesystem::path ShaderDiskCacheOpenGL::GetBaseDir() const {
- return Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir) / "opengl";
-}
-
-std::string ShaderDiskCacheOpenGL::GetTitleID() const {
- return fmt::format("{:016X}", title_id);
-}
-
-} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
deleted file mode 100644
index f8bc23868..000000000
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ /dev/null
@@ -1,176 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <filesystem>
-#include <optional>
-#include <string>
-#include <tuple>
-#include <type_traits>
-#include <unordered_map>
-#include <unordered_set>
-#include <utility>
-#include <vector>
-
-#include <glad/glad.h>
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "core/file_sys/vfs_vector.h"
-#include "video_core/engines/shader_type.h"
-#include "video_core/shader/registry.h"
-
-namespace Common::FS {
-class IOFile;
-}
-
-namespace OpenGL {
-
-using ProgramCode = std::vector<u64>;
-
-/// Describes a shader and how it's used by the guest GPU
-struct ShaderDiskCacheEntry {
- ShaderDiskCacheEntry();
- ~ShaderDiskCacheEntry();
-
- bool Load(Common::FS::IOFile& file);
-
- bool Save(Common::FS::IOFile& file) const;
-
- bool HasProgramA() const {
- return !code.empty() && !code_b.empty();
- }
-
- Tegra::Engines::ShaderType type{};
- ProgramCode code;
- ProgramCode code_b;
-
- u64 unique_identifier = 0;
- std::optional<u32> texture_handler_size;
- u32 bound_buffer = 0;
- VideoCommon::Shader::GraphicsInfo graphics_info;
- VideoCommon::Shader::ComputeInfo compute_info;
- VideoCommon::Shader::KeyMap keys;
- VideoCommon::Shader::BoundSamplerMap bound_samplers;
- VideoCommon::Shader::SeparateSamplerMap separate_samplers;
- VideoCommon::Shader::BindlessSamplerMap bindless_samplers;
-};
-
-/// Contains an OpenGL dumped binary program
-struct ShaderDiskCachePrecompiled {
- u64 unique_identifier = 0;
- GLenum binary_format = 0;
- std::vector<u8> binary;
-};
-
-class ShaderDiskCacheOpenGL {
-public:
- explicit ShaderDiskCacheOpenGL();
- ~ShaderDiskCacheOpenGL();
-
- /// Binds a title ID for all future operations.
- void BindTitleID(u64 title_id);
-
- /// Loads transferable cache. If file has a old version or on failure, it deletes the file.
- std::optional<std::vector<ShaderDiskCacheEntry>> LoadTransferable();
-
- /// Loads current game's precompiled cache. Invalidates on failure.
- std::vector<ShaderDiskCachePrecompiled> LoadPrecompiled();
-
- /// Removes the transferable (and precompiled) cache file.
- void InvalidateTransferable();
-
- /// Removes the precompiled cache file and clears virtual precompiled cache file.
- void InvalidatePrecompiled();
-
- /// Saves a raw dump to the transferable file. Checks for collisions.
- void SaveEntry(const ShaderDiskCacheEntry& entry);
-
- /// Saves a dump entry to the precompiled file. Does not check for collisions.
- void SavePrecompiled(u64 unique_identifier, GLuint program);
-
- /// Serializes virtual precompiled shader cache file to real file
- void SaveVirtualPrecompiledFile();
-
-private:
- /// Loads the transferable cache. Returns empty on failure.
- std::optional<std::vector<ShaderDiskCachePrecompiled>> LoadPrecompiledFile(
- Common::FS::IOFile& file);
-
- /// Opens current game's transferable file and write it's header if it doesn't exist
- Common::FS::IOFile AppendTransferableFile() const;
-
- /// Save precompiled header to precompiled_cache_in_memory
- void SavePrecompiledHeaderToVirtualPrecompiledCache();
-
- /// Create shader disk cache directories. Returns true on success.
- bool EnsureDirectories() const;
-
- /// Gets current game's transferable file path
- std::filesystem::path GetTransferablePath() const;
-
- /// Gets current game's precompiled file path
- std::filesystem::path GetPrecompiledPath() const;
-
- /// Get user's transferable directory path
- std::filesystem::path GetTransferableDir() const;
-
- /// Get user's precompiled directory path
- std::filesystem::path GetPrecompiledDir() const;
-
- /// Get user's shader directory path
- std::filesystem::path GetBaseDir() const;
-
- /// Get current game's title id
- std::string GetTitleID() const;
-
- template <typename T>
- bool SaveArrayToPrecompiled(const T* data, std::size_t length) {
- const std::size_t write_length = precompiled_cache_virtual_file.WriteArray(
- data, length, precompiled_cache_virtual_file_offset);
- precompiled_cache_virtual_file_offset += write_length;
- return write_length == sizeof(T) * length;
- }
-
- template <typename T>
- bool LoadArrayFromPrecompiled(T* data, std::size_t length) {
- const std::size_t read_length = precompiled_cache_virtual_file.ReadArray(
- data, length, precompiled_cache_virtual_file_offset);
- precompiled_cache_virtual_file_offset += read_length;
- return read_length == sizeof(T) * length;
- }
-
- template <typename T>
- bool SaveObjectToPrecompiled(const T& object) {
- return SaveArrayToPrecompiled(&object, 1);
- }
-
- bool SaveObjectToPrecompiled(bool object) {
- const auto value = static_cast<u8>(object);
- return SaveArrayToPrecompiled(&value, 1);
- }
-
- template <typename T>
- bool LoadObjectFromPrecompiled(T& object) {
- return LoadArrayFromPrecompiled(&object, 1);
- }
-
- // Stores whole precompiled cache which will be read from or saved to the precompiled chache
- // file
- FileSys::VectorVfsFile precompiled_cache_virtual_file;
- // Stores the current offset of the precompiled cache file for IO purposes
- std::size_t precompiled_cache_virtual_file_offset = 0;
-
- // Stored transferable shaders
- std::unordered_set<u64> stored_transferable;
-
- /// Title ID to operate on
- u64 title_id = 0;
-
- // The cache has been loaded at boot
- bool is_usable = false;
-};
-
-} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index 553e6e8d6..399959afb 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -1,149 +1,3 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-
-#include <glad/glad.h>
-
-#include "common/common_types.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/renderer_opengl/gl_device.h"
-#include "video_core/renderer_opengl/gl_shader_manager.h"
-
-namespace OpenGL {
-
-namespace {
-
-void BindProgram(GLenum stage, GLuint current, GLuint old, bool& enabled) {
- if (current == old) {
- return;
- }
- if (current == 0) {
- if (enabled) {
- enabled = false;
- glDisable(stage);
- }
- return;
- }
- if (!enabled) {
- enabled = true;
- glEnable(stage);
- }
- glBindProgramARB(stage, current);
-}
-
-} // Anonymous namespace
-
-ProgramManager::ProgramManager(const Device& device)
- : use_assembly_programs{device.UseAssemblyShaders()} {
- if (use_assembly_programs) {
- glEnable(GL_COMPUTE_PROGRAM_NV);
- } else {
- graphics_pipeline.Create();
- glBindProgramPipeline(graphics_pipeline.handle);
- }
-}
-
-ProgramManager::~ProgramManager() = default;
-
-void ProgramManager::BindCompute(GLuint program) {
- if (use_assembly_programs) {
- glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program);
- } else {
- is_graphics_bound = false;
- glUseProgram(program);
- }
-}
-
-void ProgramManager::BindGraphicsPipeline() {
- if (!use_assembly_programs) {
- UpdateSourcePrograms();
- }
-}
-
-void ProgramManager::BindHostPipeline(GLuint pipeline) {
- if (use_assembly_programs) {
- if (geometry_enabled) {
- geometry_enabled = false;
- old_state.geometry = 0;
- glDisable(GL_GEOMETRY_PROGRAM_NV);
- }
- } else {
- if (!is_graphics_bound) {
- glUseProgram(0);
- }
- }
- glBindProgramPipeline(pipeline);
-}
-
-void ProgramManager::RestoreGuestPipeline() {
- if (use_assembly_programs) {
- glBindProgramPipeline(0);
- } else {
- glBindProgramPipeline(graphics_pipeline.handle);
- }
-}
-
-void ProgramManager::BindHostCompute(GLuint program) {
- if (use_assembly_programs) {
- glDisable(GL_COMPUTE_PROGRAM_NV);
- }
- glUseProgram(program);
- is_graphics_bound = false;
-}
-
-void ProgramManager::RestoreGuestCompute() {
- if (use_assembly_programs) {
- glEnable(GL_COMPUTE_PROGRAM_NV);
- glUseProgram(0);
- }
-}
-
-void ProgramManager::UseVertexShader(GLuint program) {
- if (use_assembly_programs) {
- BindProgram(GL_VERTEX_PROGRAM_NV, program, current_state.vertex, vertex_enabled);
- }
- current_state.vertex = program;
-}
-
-void ProgramManager::UseGeometryShader(GLuint program) {
- if (use_assembly_programs) {
- BindProgram(GL_GEOMETRY_PROGRAM_NV, program, current_state.vertex, geometry_enabled);
- }
- current_state.geometry = program;
-}
-
-void ProgramManager::UseFragmentShader(GLuint program) {
- if (use_assembly_programs) {
- BindProgram(GL_FRAGMENT_PROGRAM_NV, program, current_state.vertex, fragment_enabled);
- }
- current_state.fragment = program;
-}
-
-void ProgramManager::UpdateSourcePrograms() {
- if (!is_graphics_bound) {
- is_graphics_bound = true;
- glUseProgram(0);
- }
-
- const GLuint handle = graphics_pipeline.handle;
- const auto update_state = [handle](GLenum stage, GLuint current, GLuint old) {
- if (current == old) {
- return;
- }
- glUseProgramStages(handle, stage, current);
- };
- update_state(GL_VERTEX_SHADER_BIT, current_state.vertex, old_state.vertex);
- update_state(GL_GEOMETRY_SHADER_BIT, current_state.geometry, old_state.geometry);
- update_state(GL_FRAGMENT_SHADER_BIT, current_state.fragment, old_state.fragment);
-
- old_state = current_state;
-}
-
-void MaxwellUniformData::SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell) {
- const auto& regs = maxwell.regs;
-
- // Y_NEGATE controls what value S2R returns for the Y_DIRECTION system value.
- y_direction = regs.screen_y_control.y_negate == 0 ? 1.0f : -1.0f;
-}
-
-} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index ad42cce74..d7ef0775d 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -4,79 +4,142 @@
#pragma once
-#include <cstddef>
+#include <array>
+#include <span>
#include <glad/glad.h>
+#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
-#include "video_core/renderer_opengl/maxwell_to_gl.h"
namespace OpenGL {
-class Device;
-
-/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
-/// @note Always keep a vec4 at the end. The GL spec is not clear whether the alignment at
-/// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
-/// Not following that rule will cause problems on some AMD drivers.
-struct alignas(16) MaxwellUniformData {
- void SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell);
-
- GLfloat y_direction;
-};
-static_assert(sizeof(MaxwellUniformData) == 16, "MaxwellUniformData structure size is incorrect");
-static_assert(sizeof(MaxwellUniformData) < 16384,
- "MaxwellUniformData structure must be less than 16kb as per the OpenGL spec");
-
class ProgramManager {
-public:
- explicit ProgramManager(const Device& device);
- ~ProgramManager();
-
- /// Binds a compute program
- void BindCompute(GLuint program);
-
- /// Updates bound programs.
- void BindGraphicsPipeline();
-
- /// Binds an OpenGL pipeline object unsynchronized with the guest state.
- void BindHostPipeline(GLuint pipeline);
-
- /// Rewinds BindHostPipeline state changes.
- void RestoreGuestPipeline();
-
- /// Binds an OpenGL GLSL program object unsynchronized with the guest state.
- void BindHostCompute(GLuint program);
+ static constexpr size_t NUM_STAGES = 5;
- /// Rewinds BindHostCompute state changes.
- void RestoreGuestCompute();
-
- void UseVertexShader(GLuint program);
- void UseGeometryShader(GLuint program);
- void UseFragmentShader(GLuint program);
-
-private:
- struct PipelineState {
- GLuint vertex = 0;
- GLuint geometry = 0;
- GLuint fragment = 0;
+ static constexpr std::array ASSEMBLY_PROGRAM_ENUMS{
+ GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
+ GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
};
- /// Update GLSL programs.
- void UpdateSourcePrograms();
-
- OGLPipeline graphics_pipeline;
-
- PipelineState current_state;
- PipelineState old_state;
-
- bool use_assembly_programs = false;
-
- bool is_graphics_bound = true;
+public:
+ explicit ProgramManager(const Device& device) {
+ glCreateProgramPipelines(1, &pipeline.handle);
+ if (device.UseAssemblyShaders()) {
+ glEnable(GL_COMPUTE_PROGRAM_NV);
+ }
+ }
+
+ void BindComputeProgram(GLuint program) {
+ glUseProgram(program);
+ is_compute_bound = true;
+ }
+
+ void BindComputeAssemblyProgram(GLuint program) {
+ if (current_assembly_compute_program != program) {
+ current_assembly_compute_program = program;
+ glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program);
+ }
+ UnbindPipeline();
+ }
+
+ void BindSourcePrograms(std::span<const OGLProgram, NUM_STAGES> programs) {
+ static constexpr std::array<GLenum, 5> stage_enums{
+ GL_VERTEX_SHADER_BIT, GL_TESS_CONTROL_SHADER_BIT, GL_TESS_EVALUATION_SHADER_BIT,
+ GL_GEOMETRY_SHADER_BIT, GL_FRAGMENT_SHADER_BIT,
+ };
+ for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
+ if (current_programs[stage] != programs[stage].handle) {
+ current_programs[stage] = programs[stage].handle;
+ glUseProgramStages(pipeline.handle, stage_enums[stage], programs[stage].handle);
+ }
+ }
+ BindPipeline();
+ }
+
+ void BindPresentPrograms(GLuint vertex, GLuint fragment) {
+ if (current_programs[0] != vertex) {
+ current_programs[0] = vertex;
+ glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex);
+ }
+ if (current_programs[4] != fragment) {
+ current_programs[4] = fragment;
+ glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment);
+ }
+ glUseProgramStages(
+ pipeline.handle,
+ GL_TESS_CONTROL_SHADER_BIT | GL_TESS_EVALUATION_SHADER_BIT | GL_GEOMETRY_SHADER_BIT, 0);
+ current_programs[1] = 0;
+ current_programs[2] = 0;
+ current_programs[3] = 0;
+
+ if (current_stage_mask != 0) {
+ current_stage_mask = 0;
+ for (const GLenum program_type : ASSEMBLY_PROGRAM_ENUMS) {
+ glDisable(program_type);
+ }
+ }
+ BindPipeline();
+ }
+
+ void BindAssemblyPrograms(std::span<const OGLAssemblyProgram, NUM_STAGES> programs,
+ u32 stage_mask) {
+ const u32 changed_mask = current_stage_mask ^ stage_mask;
+ current_stage_mask = stage_mask;
+
+ if (changed_mask != 0) {
+ for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
+ if (((changed_mask >> stage) & 1) != 0) {
+ if (((stage_mask >> stage) & 1) != 0) {
+ glEnable(ASSEMBLY_PROGRAM_ENUMS[stage]);
+ } else {
+ glDisable(ASSEMBLY_PROGRAM_ENUMS[stage]);
+ }
+ }
+ }
+ }
+ for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
+ if (current_programs[stage] != programs[stage].handle) {
+ current_programs[stage] = programs[stage].handle;
+ glBindProgramARB(ASSEMBLY_PROGRAM_ENUMS[stage], programs[stage].handle);
+ }
+ }
+ UnbindPipeline();
+ }
+
+ void RestoreGuestCompute() {}
- bool vertex_enabled = false;
- bool geometry_enabled = false;
- bool fragment_enabled = false;
+private:
+ void BindPipeline() {
+ if (!is_pipeline_bound) {
+ is_pipeline_bound = true;
+ glBindProgramPipeline(pipeline.handle);
+ }
+ UnbindCompute();
+ }
+
+ void UnbindPipeline() {
+ if (is_pipeline_bound) {
+ is_pipeline_bound = false;
+ glBindProgramPipeline(0);
+ }
+ UnbindCompute();
+ }
+
+ void UnbindCompute() {
+ if (is_compute_bound) {
+ is_compute_bound = false;
+ glUseProgram(0);
+ }
+ }
+
+ OGLPipeline pipeline;
+ bool is_pipeline_bound{};
+ bool is_compute_bound{};
+
+ u32 current_stage_mask = 0;
+ std::array<GLuint, NUM_STAGES> current_programs{};
+ GLuint current_assembly_compute_program = 0;
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp
index 4bf0d6090..d432072ad 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_util.cpp
@@ -5,57 +5,108 @@
#include <string_view>
#include <vector>
#include <glad/glad.h>
+
#include "common/assert.h"
#include "common/logging/log.h"
+#include "common/settings.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
-namespace OpenGL::GLShader {
+namespace OpenGL {
-namespace {
+static OGLProgram LinkSeparableProgram(GLuint shader) {
+ OGLProgram program;
+ program.handle = glCreateProgram();
+ glProgramParameteri(program.handle, GL_PROGRAM_SEPARABLE, GL_TRUE);
+ glAttachShader(program.handle, shader);
+ glLinkProgram(program.handle);
+ if (!Settings::values.renderer_debug) {
+ return program;
+ }
+ GLint link_status{};
+ glGetProgramiv(program.handle, GL_LINK_STATUS, &link_status);
-std::string_view StageDebugName(GLenum type) {
- switch (type) {
- case GL_VERTEX_SHADER:
- return "vertex";
- case GL_GEOMETRY_SHADER:
- return "geometry";
- case GL_FRAGMENT_SHADER:
- return "fragment";
- case GL_COMPUTE_SHADER:
- return "compute";
+ GLint log_length{};
+ glGetProgramiv(program.handle, GL_INFO_LOG_LENGTH, &log_length);
+ if (log_length == 0) {
+ return program;
+ }
+ std::string log(log_length, 0);
+ glGetProgramInfoLog(program.handle, log_length, nullptr, log.data());
+ if (link_status == GL_FALSE) {
+ LOG_ERROR(Render_OpenGL, "{}", log);
+ } else {
+ LOG_WARNING(Render_OpenGL, "{}", log);
}
- UNIMPLEMENTED();
- return "unknown";
+ return program;
}
-} // Anonymous namespace
+static void LogShader(GLuint shader, std::string_view code = {}) {
+ GLint shader_status{};
+ glGetShaderiv(shader, GL_COMPILE_STATUS, &shader_status);
+ if (shader_status == GL_FALSE) {
+ LOG_ERROR(Render_OpenGL, "Failed to build shader");
+ }
+ GLint log_length{};
+ glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &log_length);
+ if (log_length == 0) {
+ return;
+ }
+ std::string log(log_length, 0);
+ glGetShaderInfoLog(shader, log_length, nullptr, log.data());
+ if (shader_status == GL_FALSE) {
+ LOG_ERROR(Render_OpenGL, "{}", log);
+ if (!code.empty()) {
+ LOG_INFO(Render_OpenGL, "\n{}", code);
+ }
+ } else {
+ LOG_WARNING(Render_OpenGL, "{}", log);
+ }
+}
-GLuint LoadShader(std::string_view source, GLenum type) {
- const std::string_view debug_type = StageDebugName(type);
- const GLuint shader_id = glCreateShader(type);
+OGLProgram CreateProgram(std::string_view code, GLenum stage) {
+ OGLShader shader;
+ shader.handle = glCreateShader(stage);
- const GLchar* source_string = source.data();
- const GLint source_length = static_cast<GLint>(source.size());
+ const GLint length = static_cast<GLint>(code.size());
+ const GLchar* const code_ptr = code.data();
+ glShaderSource(shader.handle, 1, &code_ptr, &length);
+ glCompileShader(shader.handle);
+ if (Settings::values.renderer_debug) {
+ LogShader(shader.handle, code);
+ }
+ return LinkSeparableProgram(shader.handle);
+}
- glShaderSource(shader_id, 1, &source_string, &source_length);
- LOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type);
- glCompileShader(shader_id);
+OGLProgram CreateProgram(std::span<const u32> code, GLenum stage) {
+ OGLShader shader;
+ shader.handle = glCreateShader(stage);
- GLint result = GL_FALSE;
- GLint info_log_length;
- glGetShaderiv(shader_id, GL_COMPILE_STATUS, &result);
- glGetShaderiv(shader_id, GL_INFO_LOG_LENGTH, &info_log_length);
+ glShaderBinary(1, &shader.handle, GL_SHADER_BINARY_FORMAT_SPIR_V_ARB, code.data(),
+ static_cast<GLsizei>(code.size_bytes()));
+ glSpecializeShader(shader.handle, "main", 0, nullptr, nullptr);
+ if (Settings::values.renderer_debug) {
+ LogShader(shader.handle);
+ }
+ return LinkSeparableProgram(shader.handle);
+}
- if (info_log_length > 1) {
- std::string shader_error(info_log_length, ' ');
- glGetShaderInfoLog(shader_id, info_log_length, nullptr, &shader_error[0]);
- if (result == GL_TRUE) {
- LOG_DEBUG(Render_OpenGL, "{}", shader_error);
- } else {
- LOG_ERROR(Render_OpenGL, "Error compiling {} shader:\n{}", debug_type, shader_error);
+OGLAssemblyProgram CompileProgram(std::string_view code, GLenum target) {
+ OGLAssemblyProgram program;
+ glGenProgramsARB(1, &program.handle);
+ glNamedProgramStringEXT(program.handle, target, GL_PROGRAM_FORMAT_ASCII_ARB,
+ static_cast<GLsizei>(code.size()), code.data());
+ if (Settings::values.renderer_debug) {
+ const auto err = reinterpret_cast<const char*>(glGetString(GL_PROGRAM_ERROR_STRING_NV));
+ if (err && *err) {
+ if (std::strstr(err, "error")) {
+ LOG_CRITICAL(Render_OpenGL, "\n{}", err);
+ LOG_INFO(Render_OpenGL, "\n{}", code);
+ } else {
+ LOG_WARNING(Render_OpenGL, "\n{}", err);
+ }
}
}
- return shader_id;
+ return program;
}
-} // namespace OpenGL::GLShader
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h
index 1b770532e..4e1a2a8e1 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.h
+++ b/src/video_core/renderer_opengl/gl_shader_util.h
@@ -4,92 +4,23 @@
#pragma once
+#include <span>
#include <string>
+#include <string_view>
#include <vector>
+
#include <glad/glad.h>
+
#include "common/assert.h"
#include "common/logging/log.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
-namespace OpenGL::GLShader {
-
-/**
- * Utility function to log the source code of a list of shaders.
- * @param shaders The OpenGL shaders whose source we will print.
- */
-template <typename... T>
-void LogShaderSource(T... shaders) {
- auto shader_list = {shaders...};
-
- for (const auto& shader : shader_list) {
- if (shader == 0)
- continue;
-
- GLint source_length;
- glGetShaderiv(shader, GL_SHADER_SOURCE_LENGTH, &source_length);
-
- std::string source(source_length, ' ');
- glGetShaderSource(shader, source_length, nullptr, &source[0]);
- LOG_INFO(Render_OpenGL, "Shader source {}", source);
- }
-}
-
-/**
- * Utility function to create and compile an OpenGL GLSL shader
- * @param source String of the GLSL shader program
- * @param type Type of the shader (GL_VERTEX_SHADER, GL_GEOMETRY_SHADER or GL_FRAGMENT_SHADER)
- */
-GLuint LoadShader(std::string_view source, GLenum type);
-
-/**
- * Utility function to create and compile an OpenGL GLSL shader program (vertex + fragment shader)
- * @param separable_program whether to create a separable program
- * @param shaders ID of shaders to attach to the program
- * @returns Handle of the newly created OpenGL program object
- */
-template <typename... T>
-GLuint LoadProgram(bool separable_program, bool hint_retrievable, T... shaders) {
- // Link the program
- LOG_DEBUG(Render_OpenGL, "Linking program...");
-
- GLuint program_id = glCreateProgram();
-
- ((shaders == 0 ? (void)0 : glAttachShader(program_id, shaders)), ...);
-
- if (separable_program) {
- glProgramParameteri(program_id, GL_PROGRAM_SEPARABLE, GL_TRUE);
- }
- if (hint_retrievable) {
- glProgramParameteri(program_id, GL_PROGRAM_BINARY_RETRIEVABLE_HINT, GL_TRUE);
- }
-
- glLinkProgram(program_id);
-
- // Check the program
- GLint result = GL_FALSE;
- GLint info_log_length;
- glGetProgramiv(program_id, GL_LINK_STATUS, &result);
- glGetProgramiv(program_id, GL_INFO_LOG_LENGTH, &info_log_length);
-
- if (info_log_length > 1) {
- std::string program_error(info_log_length, ' ');
- glGetProgramInfoLog(program_id, info_log_length, nullptr, &program_error[0]);
- if (result == GL_TRUE) {
- LOG_DEBUG(Render_OpenGL, "{}", program_error);
- } else {
- LOG_ERROR(Render_OpenGL, "Error linking shader:\n{}", program_error);
- }
- }
-
- if (result == GL_FALSE) {
- // There was a problem linking the shader, print the source for debugging purposes.
- LogShaderSource(shaders...);
- }
+namespace OpenGL {
- ASSERT_MSG(result == GL_TRUE, "Shader not linked");
+OGLProgram CreateProgram(std::string_view code, GLenum stage);
- ((shaders == 0 ? (void)0 : glDetachShader(program_id, shaders)), ...);
+OGLProgram CreateProgram(std::span<const u32> code, GLenum stage);
- return program_id;
-}
+OGLAssemblyProgram CompileProgram(std::string_view code, GLenum target);
-} // namespace OpenGL::GLShader
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.cpp b/src/video_core/renderer_opengl/gl_state_tracker.cpp
index dbdf5230f..586da84e3 100644
--- a/src/video_core/renderer_opengl/gl_state_tracker.cpp
+++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp
@@ -83,11 +83,6 @@ void SetupDirtyScissors(Tables& tables) {
FillBlock(tables[1], OFF(scissor_test), NUM(scissor_test), Scissors);
}
-void SetupDirtyShaders(Tables& tables) {
- FillBlock(tables[0], OFF(shader_config[0]), NUM(shader_config[0]) * Regs::MaxShaderProgram,
- Shaders);
-}
-
void SetupDirtyPolygonModes(Tables& tables) {
tables[0][OFF(polygon_mode_front)] = PolygonModeFront;
tables[0][OFF(polygon_mode_back)] = PolygonModeBack;
@@ -217,7 +212,6 @@ StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags}
SetupDirtyScissors(tables);
SetupDirtyVertexInstances(tables);
SetupDirtyVertexFormat(tables);
- SetupDirtyShaders(tables);
SetupDirtyPolygonModes(tables);
SetupDirtyDepthTest(tables);
SetupDirtyStencilTest(tables);
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.h b/src/video_core/renderer_opengl/gl_state_tracker.h
index 94c905116..5864c7c07 100644
--- a/src/video_core/renderer_opengl/gl_state_tracker.h
+++ b/src/video_core/renderer_opengl/gl_state_tracker.h
@@ -52,7 +52,6 @@ enum : u8 {
BlendState0,
BlendState7 = BlendState0 + 7,
- Shaders,
ClipDistances,
PolygonModes,
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index ff0f03e99..c373c9cb4 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -24,9 +24,7 @@
#include "video_core/textures/decoders.h"
namespace OpenGL {
-
namespace {
-
using Tegra::Texture::SwizzleSource;
using Tegra::Texture::TextureMipmapFilter;
using Tegra::Texture::TextureType;
@@ -59,107 +57,6 @@ struct CopyRegion {
GLsizei depth;
};
-struct FormatTuple {
- GLenum internal_format;
- GLenum format = GL_NONE;
- GLenum type = GL_NONE;
-};
-
-constexpr std::array<FormatTuple, MaxPixelFormat> FORMAT_TABLE = {{
- {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM
- {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM
- {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT
- {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT
- {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM
- {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM
- {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM
- {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM
- {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT
- {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM
- {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM
- {GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM
- {GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT
- {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT
- {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT
- {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM
- {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM
- {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT
- {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT
- {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT
- {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT
- {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM
- {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM
- {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM
- {GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM
- {GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM
- {GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM
- {GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM
- {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM
- {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT
- {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT
- {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM
- {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM
- {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT
- {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT
- {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT
- {GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT
- {GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT
- {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT
- {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM
- {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM
- {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT
- {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT
- {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM
- {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT
- {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT
- {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT
- {GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM
- {GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT
- {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_SRGB
- {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM
- {GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM
- {GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT
- {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT
- {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT
- {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT
- {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT
- {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT
- {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM
- {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM
- {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM
- {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_SRGB
- {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB
- {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB
- {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB
- {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB
- {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB
- {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB
- {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB
- {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB
- {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB
- {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB
- {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB
- {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB
- {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT
- {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT
- {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM
- {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT
- {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM
- {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL,
- GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT
-}};
-
constexpr std::array ACCELERATED_FORMATS{
GL_RGBA32F, GL_RGBA16F, GL_RG32F, GL_RG16F, GL_R11F_G11F_B10F, GL_R32F,
GL_R16F, GL_RGBA32UI, GL_RGBA16UI, GL_RGB10_A2UI, GL_RGBA8UI, GL_RG32UI,
@@ -170,11 +67,6 @@ constexpr std::array ACCELERATED_FORMATS{
GL_RG8_SNORM, GL_R16_SNORM, GL_R8_SNORM,
};
-const FormatTuple& GetFormatTuple(PixelFormat pixel_format) {
- ASSERT(static_cast<size_t>(pixel_format) < FORMAT_TABLE.size());
- return FORMAT_TABLE[static_cast<size_t>(pixel_format)];
-}
-
GLenum ImageTarget(const VideoCommon::ImageInfo& info) {
switch (info.type) {
case ImageType::e1D:
@@ -195,26 +87,24 @@ GLenum ImageTarget(const VideoCommon::ImageInfo& info) {
return GL_NONE;
}
-GLenum ImageTarget(ImageViewType type, int num_samples = 1) {
+GLenum ImageTarget(Shader::TextureType type, int num_samples = 1) {
const bool is_multisampled = num_samples > 1;
switch (type) {
- case ImageViewType::e1D:
+ case Shader::TextureType::Color1D:
return GL_TEXTURE_1D;
- case ImageViewType::e2D:
+ case Shader::TextureType::Color2D:
return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D;
- case ImageViewType::Cube:
+ case Shader::TextureType::ColorCube:
return GL_TEXTURE_CUBE_MAP;
- case ImageViewType::e3D:
+ case Shader::TextureType::Color3D:
return GL_TEXTURE_3D;
- case ImageViewType::e1DArray:
+ case Shader::TextureType::ColorArray1D:
return GL_TEXTURE_1D_ARRAY;
- case ImageViewType::e2DArray:
+ case Shader::TextureType::ColorArray2D:
return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE_ARRAY : GL_TEXTURE_2D_ARRAY;
- case ImageViewType::CubeArray:
+ case Shader::TextureType::ColorArrayCube:
return GL_TEXTURE_CUBE_MAP_ARRAY;
- case ImageViewType::Rect:
- return GL_TEXTURE_RECTANGLE;
- case ImageViewType::Buffer:
+ case Shader::TextureType::Buffer:
return GL_TEXTURE_BUFFER;
}
UNREACHABLE_MSG("Invalid image view type={}", type);
@@ -322,7 +212,7 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4
default:
return false;
}
- const GLenum internal_format = GetFormatTuple(info.format).internal_format;
+ const GLenum internal_format = MaxwellToGL::GetFormatTuple(info.format).internal_format;
const auto& format_info = runtime.FormatInfo(info.type, internal_format);
if (format_info.is_compressed) {
return false;
@@ -414,11 +304,10 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4
void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) {
if (False(image_view->flags & VideoCommon::ImageViewFlagBits::Slice)) {
- const GLuint texture = image_view->DefaultHandle();
- glNamedFramebufferTexture(fbo, attachment, texture, 0);
+ glNamedFramebufferTexture(fbo, attachment, image_view->DefaultHandle(), 0);
return;
}
- const GLuint texture = image_view->Handle(ImageViewType::e3D);
+ const GLuint texture = image_view->Handle(Shader::TextureType::Color3D);
if (image_view->range.extent.layers > 1) {
// TODO: OpenGL doesn't support rendering to a fixed number of slices
glNamedFramebufferTexture(fbo, attachment, texture, 0);
@@ -439,6 +328,28 @@ void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) {
}
}
+[[nodiscard]] GLenum ShaderFormat(Shader::ImageFormat format) {
+ switch (format) {
+ case Shader::ImageFormat::Typeless:
+ break;
+ case Shader::ImageFormat::R8_SINT:
+ return GL_R8I;
+ case Shader::ImageFormat::R8_UINT:
+ return GL_R8UI;
+ case Shader::ImageFormat::R16_UINT:
+ return GL_R16UI;
+ case Shader::ImageFormat::R16_SINT:
+ return GL_R16I;
+ case Shader::ImageFormat::R32_UINT:
+ return GL_R32UI;
+ case Shader::ImageFormat::R32G32_UINT:
+ return GL_RG32UI;
+ case Shader::ImageFormat::R32G32B32A32_UINT:
+ return GL_RGBA32UI;
+ }
+ UNREACHABLE_MSG("Invalid image format={}", format);
+ return GL_R32UI;
+}
} // Anonymous namespace
ImageBufferMap::~ImageBufferMap() {
@@ -453,7 +364,7 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager&
static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D};
for (size_t i = 0; i < TARGETS.size(); ++i) {
const GLenum target = TARGETS[i];
- for (const FormatTuple& tuple : FORMAT_TABLE) {
+ for (const MaxwellToGL::FormatTuple& tuple : MaxwellToGL::FORMAT_TABLE) {
const GLenum format = tuple.internal_format;
GLint compat_class;
GLint compat_type;
@@ -475,11 +386,9 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager&
null_image_1d_array.Create(GL_TEXTURE_1D_ARRAY);
null_image_cube_array.Create(GL_TEXTURE_CUBE_MAP_ARRAY);
null_image_3d.Create(GL_TEXTURE_3D);
- null_image_rect.Create(GL_TEXTURE_RECTANGLE);
glTextureStorage2D(null_image_1d_array.handle, 1, GL_R8, 1, 1);
glTextureStorage3D(null_image_cube_array.handle, 1, GL_R8, 1, 1, 6);
glTextureStorage3D(null_image_3d.handle, 1, GL_R8, 1, 1, 1);
- glTextureStorage2D(null_image_rect.handle, 1, GL_R8, 1, 1);
std::array<GLuint, 4> new_handles;
glGenTextures(static_cast<GLsizei>(new_handles.size()), new_handles.data());
@@ -496,29 +405,28 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager&
glTextureView(null_image_view_cube.handle, GL_TEXTURE_CUBE_MAP, null_image_cube_array.handle,
GL_R8, 0, 1, 0, 6);
const std::array texture_handles{
- null_image_1d_array.handle, null_image_cube_array.handle, null_image_3d.handle,
- null_image_rect.handle, null_image_view_1d.handle, null_image_view_2d.handle,
- null_image_view_2d_array.handle, null_image_view_cube.handle,
+ null_image_1d_array.handle, null_image_cube_array.handle, null_image_3d.handle,
+ null_image_view_1d.handle, null_image_view_2d.handle, null_image_view_2d_array.handle,
+ null_image_view_cube.handle,
};
for (const GLuint handle : texture_handles) {
static constexpr std::array NULL_SWIZZLE{GL_ZERO, GL_ZERO, GL_ZERO, GL_ZERO};
glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, NULL_SWIZZLE.data());
}
- const auto set_view = [this](ImageViewType type, GLuint handle) {
+ const auto set_view = [this](Shader::TextureType type, GLuint handle) {
if (device.HasDebuggingToolAttached()) {
const std::string name = fmt::format("NullImage {}", type);
glObjectLabel(GL_TEXTURE, handle, static_cast<GLsizei>(name.size()), name.data());
}
null_image_views[static_cast<size_t>(type)] = handle;
};
- set_view(ImageViewType::e1D, null_image_view_1d.handle);
- set_view(ImageViewType::e2D, null_image_view_2d.handle);
- set_view(ImageViewType::Cube, null_image_view_cube.handle);
- set_view(ImageViewType::e3D, null_image_3d.handle);
- set_view(ImageViewType::e1DArray, null_image_1d_array.handle);
- set_view(ImageViewType::e2DArray, null_image_view_2d_array.handle);
- set_view(ImageViewType::CubeArray, null_image_cube_array.handle);
- set_view(ImageViewType::Rect, null_image_rect.handle);
+ set_view(Shader::TextureType::Color1D, null_image_view_1d.handle);
+ set_view(Shader::TextureType::Color2D, null_image_view_2d.handle);
+ set_view(Shader::TextureType::ColorCube, null_image_view_cube.handle);
+ set_view(Shader::TextureType::Color3D, null_image_3d.handle);
+ set_view(Shader::TextureType::ColorArray1D, null_image_1d_array.handle);
+ set_view(Shader::TextureType::ColorArray2D, null_image_view_2d_array.handle);
+ set_view(Shader::TextureType::ColorArrayCube, null_image_cube_array.handle);
}
TextureCacheRuntime::~TextureCacheRuntime() = default;
@@ -710,7 +618,7 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_,
gl_format = GL_RGBA;
gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
} else {
- const auto& tuple = GetFormatTuple(info.format);
+ const auto& tuple = MaxwellToGL::GetFormatTuple(info.format);
gl_internal_format = tuple.internal_format;
gl_format = tuple.format;
gl_type = tuple.type;
@@ -750,8 +658,7 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_,
glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, depth);
break;
case GL_TEXTURE_BUFFER:
- buffer.Create();
- glNamedBufferStorage(buffer.handle, guest_size_bytes, nullptr, 0);
+ UNREACHABLE();
break;
default:
UNREACHABLE_MSG("Invalid target=0x{:x}", target);
@@ -789,14 +696,6 @@ void Image::UploadMemory(const ImageBufferMap& map,
}
}
-void Image::UploadMemory(const ImageBufferMap& map,
- std::span<const VideoCommon::BufferCopy> copies) {
- for (const VideoCommon::BufferCopy& copy : copies) {
- glCopyNamedBufferSubData(map.buffer, buffer.handle, copy.src_offset + map.offset,
- copy.dst_offset, copy.size);
- }
-}
-
void Image::DownloadMemory(ImageBufferMap& map,
std::span<const VideoCommon::BufferImageCopy> copies) {
glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API
@@ -958,23 +857,30 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
if (True(image.flags & ImageFlagBits::Converted)) {
internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8;
} else {
- internal_format = GetFormatTuple(format).internal_format;
+ internal_format = MaxwellToGL::GetFormatTuple(format).internal_format;
+ }
+ full_range = info.range;
+ flat_range = info.range;
+ set_object_label = device.HasDebuggingToolAttached();
+ is_render_target = info.IsRenderTarget();
+ original_texture = image.texture.handle;
+ num_samples = image.info.num_samples;
+ if (!is_render_target) {
+ swizzle[0] = info.x_source;
+ swizzle[1] = info.y_source;
+ swizzle[2] = info.z_source;
+ swizzle[3] = info.w_source;
}
- VideoCommon::SubresourceRange flatten_range = info.range;
- std::array<GLuint, 2> handles;
- stored_views.reserve(2);
-
switch (info.type) {
case ImageViewType::e1DArray:
- flatten_range.extent.layers = 1;
+ flat_range.extent.layers = 1;
[[fallthrough]];
case ImageViewType::e1D:
- glGenTextures(2, handles.data());
- SetupView(device, image, ImageViewType::e1D, handles[0], info, flatten_range);
- SetupView(device, image, ImageViewType::e1DArray, handles[1], info, info.range);
+ SetupView(Shader::TextureType::Color1D);
+ SetupView(Shader::TextureType::ColorArray1D);
break;
case ImageViewType::e2DArray:
- flatten_range.extent.layers = 1;
+ flat_range.extent.layers = 1;
[[fallthrough]];
case ImageViewType::e2D:
if (True(flags & VideoCommon::ImageViewFlagBits::Slice)) {
@@ -984,63 +890,126 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
.base = {.level = info.range.base.level, .layer = 0},
.extent = {.levels = 1, .layers = 1},
};
- glGenTextures(1, handles.data());
- SetupView(device, image, ImageViewType::e3D, handles[0], info, slice_range);
- break;
+ full_range = slice_range;
+
+ SetupView(Shader::TextureType::Color3D);
+ } else {
+ SetupView(Shader::TextureType::Color2D);
+ SetupView(Shader::TextureType::ColorArray2D);
}
- glGenTextures(2, handles.data());
- SetupView(device, image, ImageViewType::e2D, handles[0], info, flatten_range);
- SetupView(device, image, ImageViewType::e2DArray, handles[1], info, info.range);
break;
case ImageViewType::e3D:
- glGenTextures(1, handles.data());
- SetupView(device, image, ImageViewType::e3D, handles[0], info, info.range);
+ SetupView(Shader::TextureType::Color3D);
break;
case ImageViewType::CubeArray:
- flatten_range.extent.layers = 6;
+ flat_range.extent.layers = 6;
[[fallthrough]];
case ImageViewType::Cube:
- glGenTextures(2, handles.data());
- SetupView(device, image, ImageViewType::Cube, handles[0], info, flatten_range);
- SetupView(device, image, ImageViewType::CubeArray, handles[1], info, info.range);
+ SetupView(Shader::TextureType::ColorCube);
+ SetupView(Shader::TextureType::ColorArrayCube);
break;
case ImageViewType::Rect:
- glGenTextures(1, handles.data());
- SetupView(device, image, ImageViewType::Rect, handles[0], info, info.range);
+ UNIMPLEMENTED();
break;
case ImageViewType::Buffer:
- glCreateTextures(GL_TEXTURE_BUFFER, 1, handles.data());
- SetupView(device, image, ImageViewType::Buffer, handles[0], info, info.range);
+ UNREACHABLE();
+ break;
+ }
+ switch (info.type) {
+ case ImageViewType::e1D:
+ default_handle = Handle(Shader::TextureType::Color1D);
+ break;
+ case ImageViewType::e1DArray:
+ default_handle = Handle(Shader::TextureType::ColorArray1D);
+ break;
+ case ImageViewType::e2D:
+ default_handle = Handle(Shader::TextureType::Color2D);
+ break;
+ case ImageViewType::e2DArray:
+ default_handle = Handle(Shader::TextureType::ColorArray2D);
+ break;
+ case ImageViewType::e3D:
+ default_handle = Handle(Shader::TextureType::Color3D);
+ break;
+ case ImageViewType::Cube:
+ default_handle = Handle(Shader::TextureType::ColorCube);
+ break;
+ case ImageViewType::CubeArray:
+ default_handle = Handle(Shader::TextureType::ColorArrayCube);
+ break;
+ default:
break;
}
- default_handle = Handle(info.type);
}
+ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
+ const VideoCommon::ImageViewInfo& view_info, GPUVAddr gpu_addr_)
+ : VideoCommon::ImageViewBase{info, view_info}, gpu_addr{gpu_addr_},
+ buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {}
+
+ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
+ const VideoCommon::ImageViewInfo& view_info)
+ : VideoCommon::ImageViewBase{info, view_info} {}
+
ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageParams& params)
: VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {}
-void ImageView::SetupView(const Device& device, Image& image, ImageViewType view_type,
- GLuint handle, const VideoCommon::ImageViewInfo& info,
- VideoCommon::SubresourceRange view_range) {
- if (info.type == ImageViewType::Buffer) {
- // TODO: Take offset from buffer cache
- glTextureBufferRange(handle, internal_format, image.buffer.handle, 0,
- image.guest_size_bytes);
- } else {
- const GLuint parent = image.texture.handle;
- const GLenum target = ImageTarget(view_type, image.info.num_samples);
- glTextureView(handle, target, parent, internal_format, view_range.base.level,
- view_range.extent.levels, view_range.base.layer, view_range.extent.layers);
- if (!info.IsRenderTarget()) {
- ApplySwizzle(handle, format, info.Swizzle());
- }
+GLuint ImageView::StorageView(Shader::TextureType texture_type, Shader::ImageFormat image_format) {
+ if (image_format == Shader::ImageFormat::Typeless) {
+ return Handle(texture_type);
+ }
+ const bool is_signed{image_format == Shader::ImageFormat::R8_SINT ||
+ image_format == Shader::ImageFormat::R16_SINT};
+ if (!storage_views) {
+ storage_views = std::make_unique<StorageViews>();
+ }
+ auto& type_views{is_signed ? storage_views->signeds : storage_views->unsigneds};
+ GLuint& view{type_views[static_cast<size_t>(texture_type)]};
+ if (view == 0) {
+ view = MakeView(texture_type, ShaderFormat(image_format));
+ }
+ return view;
+}
+
+void ImageView::SetupView(Shader::TextureType view_type) {
+ views[static_cast<size_t>(view_type)] = MakeView(view_type, internal_format);
+}
+
+GLuint ImageView::MakeView(Shader::TextureType view_type, GLenum view_format) {
+ VideoCommon::SubresourceRange view_range;
+ switch (view_type) {
+ case Shader::TextureType::Color1D:
+ case Shader::TextureType::Color2D:
+ case Shader::TextureType::ColorCube:
+ view_range = flat_range;
+ break;
+ case Shader::TextureType::ColorArray1D:
+ case Shader::TextureType::ColorArray2D:
+ case Shader::TextureType::Color3D:
+ case Shader::TextureType::ColorArrayCube:
+ view_range = full_range;
+ break;
+ default:
+ UNREACHABLE();
}
- if (device.HasDebuggingToolAttached()) {
- const std::string name = VideoCommon::Name(*this, view_type);
- glObjectLabel(GL_TEXTURE, handle, static_cast<GLsizei>(name.size()), name.data());
+ OGLTextureView& view = stored_views.emplace_back();
+ view.Create();
+
+ const GLenum target = ImageTarget(view_type, num_samples);
+ glTextureView(view.handle, target, original_texture, view_format, view_range.base.level,
+ view_range.extent.levels, view_range.base.layer, view_range.extent.layers);
+ if (!is_render_target) {
+ std::array<SwizzleSource, 4> casted_swizzle;
+ std::ranges::transform(swizzle, casted_swizzle.begin(), [](u8 component_swizzle) {
+ return static_cast<SwizzleSource>(component_swizzle);
+ });
+ ApplySwizzle(view.handle, format, casted_swizzle);
+ }
+ if (set_object_label) {
+ const std::string name = VideoCommon::Name(*this);
+ glObjectLabel(GL_TEXTURE, view.handle, static_cast<GLsizei>(name.size()), name.data());
}
- stored_views.emplace_back().handle = handle;
- views[static_cast<size_t>(view_type)] = handle;
+ return view.handle;
}
Sampler::Sampler(TextureCacheRuntime& runtime, const TSCEntry& config) {
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 25fe61566..921072ebe 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -9,6 +9,7 @@
#include <glad/glad.h>
+#include "shader_recompiler/shader_info.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/util_shaders.h"
#include "video_core/texture_cache/texture_cache.h"
@@ -122,18 +123,17 @@ private:
bool has_broken_texture_view_formats = false;
StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT};
- StagingBuffers download_buffers{GL_MAP_READ_BIT, GL_MAP_READ_BIT};
+ StagingBuffers download_buffers{GL_MAP_READ_BIT | GL_CLIENT_STORAGE_BIT, GL_MAP_READ_BIT};
OGLTexture null_image_1d_array;
OGLTexture null_image_cube_array;
OGLTexture null_image_3d;
- OGLTexture null_image_rect;
OGLTextureView null_image_view_1d;
OGLTextureView null_image_view_2d;
OGLTextureView null_image_view_2d_array;
OGLTextureView null_image_view_cube;
- std::array<GLuint, VideoCommon::NUM_IMAGE_VIEW_TYPES> null_image_views;
+ std::array<GLuint, Shader::NUM_TEXTURE_TYPES> null_image_views{};
};
class Image : public VideoCommon::ImageBase {
@@ -154,8 +154,6 @@ public:
void UploadMemory(const ImageBufferMap& map,
std::span<const VideoCommon::BufferImageCopy> copies);
- void UploadMemory(const ImageBufferMap& map, std::span<const VideoCommon::BufferCopy> copies);
-
void DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies);
GLuint StorageHandle() noexcept;
@@ -170,7 +168,6 @@ private:
void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset);
OGLTexture texture;
- OGLBuffer buffer;
OGLTextureView store_view;
GLenum gl_internal_format = GL_NONE;
GLenum gl_format = GL_NONE;
@@ -182,10 +179,17 @@ class ImageView : public VideoCommon::ImageViewBase {
public:
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&);
+ explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo&,
+ const VideoCommon::ImageViewInfo&, GPUVAddr);
+ explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
+ const VideoCommon::ImageViewInfo& view_info);
explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&);
- [[nodiscard]] GLuint Handle(ImageViewType query_type) const noexcept {
- return views[static_cast<size_t>(query_type)];
+ [[nodiscard]] GLuint StorageView(Shader::TextureType texture_type,
+ Shader::ImageFormat image_format);
+
+ [[nodiscard]] GLuint Handle(Shader::TextureType handle_type) const noexcept {
+ return views[static_cast<size_t>(handle_type)];
}
[[nodiscard]] GLuint DefaultHandle() const noexcept {
@@ -196,15 +200,38 @@ public:
return internal_format;
}
+ [[nodiscard]] GPUVAddr GpuAddr() const noexcept {
+ return gpu_addr;
+ }
+
+ [[nodiscard]] u32 BufferSize() const noexcept {
+ return buffer_size;
+ }
+
private:
- void SetupView(const Device& device, Image& image, ImageViewType view_type, GLuint handle,
- const VideoCommon::ImageViewInfo& info,
- VideoCommon::SubresourceRange view_range);
+ struct StorageViews {
+ std::array<GLuint, Shader::NUM_TEXTURE_TYPES> signeds{};
+ std::array<GLuint, Shader::NUM_TEXTURE_TYPES> unsigneds{};
+ };
+
+ void SetupView(Shader::TextureType view_type);
+
+ GLuint MakeView(Shader::TextureType view_type, GLenum view_format);
- std::array<GLuint, VideoCommon::NUM_IMAGE_VIEW_TYPES> views{};
+ std::array<GLuint, Shader::NUM_TEXTURE_TYPES> views{};
std::vector<OGLTextureView> stored_views;
- GLuint default_handle = 0;
+ std::unique_ptr<StorageViews> storage_views;
GLenum internal_format = GL_NONE;
+ GLuint default_handle = 0;
+ GPUVAddr gpu_addr = 0;
+ u32 buffer_size = 0;
+ GLuint original_texture = 0;
+ int num_samples = 0;
+ VideoCommon::SubresourceRange flat_range;
+ VideoCommon::SubresourceRange full_range;
+ std::array<u8, 4> swizzle{};
+ bool set_object_label = false;
+ bool is_render_target = false;
};
class ImageAlloc : public VideoCommon::ImageAllocBase {};
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index f7ad8f370..672f94bfc 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -5,12 +5,120 @@
#pragma once
#include <glad/glad.h>
+
#include "video_core/engines/maxwell_3d.h"
+#include "video_core/surface.h"
namespace OpenGL::MaxwellToGL {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+struct FormatTuple {
+ GLenum internal_format;
+ GLenum format = GL_NONE;
+ GLenum type = GL_NONE;
+};
+
+constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> FORMAT_TABLE = {{
+ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM
+ {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM
+ {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT
+ {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT
+ {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM
+ {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM
+ {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM
+ {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM
+ {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT
+ {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM
+ {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM
+ {GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM
+ {GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT
+ {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT
+ {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT
+ {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM
+ {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM
+ {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT
+ {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT
+ {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT
+ {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT
+ {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM
+ {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM
+ {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM
+ {GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM
+ {GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM
+ {GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM
+ {GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM
+ {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM
+ {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT
+ {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT
+ {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM
+ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM
+ {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT
+ {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT
+ {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT
+ {GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT
+ {GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT
+ {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT
+ {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM
+ {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM
+ {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT
+ {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT
+ {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM
+ {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT
+ {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT
+ {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT
+ {GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM
+ {GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT
+ {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_SRGB
+ {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM
+ {GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM
+ {GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT
+ {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT
+ {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT
+ {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT
+ {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT
+ {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT
+ {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM
+ {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM
+ {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM
+ {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_SRGB
+ {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB
+ {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB
+ {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB
+ {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB
+ {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB
+ {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB
+ {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB
+ {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB
+ {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB
+ {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB
+ {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB
+ {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB
+ {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT
+ {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT
+ {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM
+ {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT
+ {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM
+ {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL,
+ GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT
+}};
+
+inline const FormatTuple& GetFormatTuple(VideoCore::Surface::PixelFormat pixel_format) {
+ ASSERT(static_cast<size_t>(pixel_format) < FORMAT_TABLE.size());
+ return FORMAT_TABLE[static_cast<size_t>(pixel_format)];
+}
+
inline GLenum VertexFormat(Maxwell::VertexAttribute attrib) {
switch (attrib.type) {
case Maxwell::VertexAttribute::Type::UnsignedNorm:
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index a718bff7a..285e78384 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -25,6 +25,7 @@
#include "video_core/host_shaders/opengl_present_vert.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
+#include "video_core/renderer_opengl/gl_shader_util.h"
#include "video_core/renderer_opengl/renderer_opengl.h"
#include "video_core/textures/decoders.h"
@@ -139,6 +140,26 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_,
}
AddTelemetryFields();
InitOpenGLObjects();
+
+ // Initialize default attributes to match hardware's disabled attributes
+ GLint max_attribs{};
+ glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &max_attribs);
+ for (GLint attrib = 0; attrib < max_attribs; ++attrib) {
+ glVertexAttrib4f(attrib, 0.0f, 0.0f, 0.0f, 1.0f);
+ }
+ // Enable seamless cubemaps when per texture parameters are not available
+ if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) {
+ glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS);
+ }
+ // Enable unified vertex attributes and query vertex buffer address when the driver supports it
+ if (device.HasVertexBufferUnifiedMemory()) {
+ glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
+ glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV);
+
+ glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY);
+ glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV,
+ &vertex_buffer_address);
+ }
}
RendererOpenGL::~RendererOpenGL() = default;
@@ -229,22 +250,9 @@ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color
}
void RendererOpenGL::InitOpenGLObjects() {
- glClearColor(Settings::values.bg_red.GetValue(), Settings::values.bg_green.GetValue(),
- Settings::values.bg_blue.GetValue(), 0.0f);
-
// Create shader programs
- OGLShader vertex_shader;
- vertex_shader.Create(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER);
-
- OGLShader fragment_shader;
- fragment_shader.Create(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER);
-
- vertex_program.Create(true, false, vertex_shader.handle);
- fragment_program.Create(true, false, fragment_shader.handle);
-
- pipeline.Create();
- glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex_program.handle);
- glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment_program.handle);
+ present_vertex = CreateProgram(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER);
+ present_fragment = CreateProgram(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER);
// Generate presentation sampler
present_sampler.Create();
@@ -266,21 +274,6 @@ void RendererOpenGL::InitOpenGLObjects() {
// Clear screen to black
LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture);
-
- // Enable seamless cubemaps when per texture parameters are not available
- if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) {
- glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS);
- }
-
- // Enable unified vertex attributes and query vertex buffer address when the driver supports it
- if (device.HasVertexBufferUnifiedMemory()) {
- glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
- glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV);
-
- glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY);
- glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV,
- &vertex_buffer_address);
- }
}
void RendererOpenGL::AddTelemetryFields() {
@@ -337,15 +330,17 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
if (renderer_settings.set_background_color) {
// Update background color before drawing
- glClearColor(Settings::values.bg_red.GetValue(), Settings::values.bg_green.GetValue(),
- Settings::values.bg_blue.GetValue(), 0.0f);
+ glClearColor(Settings::values.bg_red.GetValue() / 255.0f,
+ Settings::values.bg_green.GetValue() / 255.0f,
+ Settings::values.bg_blue.GetValue() / 255.0f, 1.0f);
}
// Set projection matrix
const std::array ortho_matrix =
MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height));
- glProgramUniformMatrix3x2fv(vertex_program.handle, ModelViewMatrixLocation, 1, GL_FALSE,
- std::data(ortho_matrix));
+ program_manager.BindPresentPrograms(present_vertex.handle, present_fragment.handle);
+ glProgramUniformMatrix3x2fv(present_vertex.handle, ModelViewMatrixLocation, 1, GL_FALSE,
+ ortho_matrix.data());
const auto& texcoords = screen_info.display_texcoords;
auto left = texcoords.left;
@@ -406,8 +401,6 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
state_tracker.NotifyClipControl();
state_tracker.NotifyAlphaTest();
- program_manager.BindHostPipeline(pipeline.handle);
-
state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE);
glEnable(GL_CULL_FACE);
if (screen_info.display_srgb) {
@@ -455,7 +448,8 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
glClear(GL_COLOR_BUFFER_BIT);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
- program_manager.RestoreGuestPipeline();
+ // TODO
+ // program_manager.RestoreGuestPipeline();
}
void RendererOpenGL::RenderScreenshot() {
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index 0b66f8332..d455f572f 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -12,7 +12,6 @@
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
-#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
namespace Core {
@@ -111,9 +110,8 @@ private:
// OpenGL object IDs
OGLSampler present_sampler;
OGLBuffer vertex_buffer;
- OGLProgram vertex_program;
- OGLProgram fragment_program;
- OGLPipeline pipeline;
+ OGLProgram present_vertex;
+ OGLProgram present_fragment;
OGLFramebuffer screenshot_framebuffer;
// GPU address of the vertex buffer
diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp
index 8fb5be393..37a4d1d9d 100644
--- a/src/video_core/renderer_opengl/util_shaders.cpp
+++ b/src/video_core/renderer_opengl/util_shaders.cpp
@@ -16,8 +16,8 @@
#include "video_core/host_shaders/opengl_copy_bc4_comp.h"
#include "video_core/host_shaders/opengl_copy_bgra_comp.h"
#include "video_core/host_shaders/pitch_unswizzle_comp.h"
-#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
+#include "video_core/renderer_opengl/gl_shader_util.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
#include "video_core/renderer_opengl/util_shaders.h"
#include "video_core/texture_cache/accelerated_swizzle.h"
@@ -41,21 +41,14 @@ using VideoCommon::Accelerated::MakeBlockLinearSwizzle3DParams;
using VideoCore::Surface::BytesPerBlock;
namespace {
-
OGLProgram MakeProgram(std::string_view source) {
- OGLShader shader;
- shader.Create(source, GL_COMPUTE_SHADER);
-
- OGLProgram program;
- program.Create(true, false, shader.handle);
- return program;
+ return CreateProgram(source, GL_COMPUTE_SHADER);
}
size_t NumPixelsInCopy(const VideoCommon::ImageCopy& copy) {
return static_cast<size_t>(copy.extent.width * copy.extent.height *
copy.src_subresource.num_layers);
}
-
} // Anonymous namespace
UtilShaders::UtilShaders(ProgramManager& program_manager_)
@@ -86,7 +79,7 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
.width = VideoCore::Surface::DefaultBlockWidth(image.info.format),
.height = VideoCore::Surface::DefaultBlockHeight(image.info.format),
};
- program_manager.BindHostCompute(astc_decoder_program.handle);
+ program_manager.BindComputeProgram(astc_decoder_program.handle);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_ENC_BUFFER, astc_buffer.handle);
@@ -134,7 +127,7 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map,
static constexpr GLuint BINDING_INPUT_BUFFER = 1;
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
- program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle);
+ program_manager.BindComputeProgram(block_linear_unswizzle_2d_program.handle);
glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
@@ -173,7 +166,7 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map,
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
- program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle);
+ program_manager.BindComputeProgram(block_linear_unswizzle_3d_program.handle);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format));
@@ -222,7 +215,7 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map,
UNIMPLEMENTED_IF_MSG(!std::has_single_bit(bytes_per_block),
"Non-power of two images are not implemented");
- program_manager.BindHostCompute(pitch_unswizzle_program.handle);
+ program_manager.BindComputeProgram(pitch_unswizzle_program.handle);
glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
glUniform2ui(LOC_ORIGIN, 0, 0);
glUniform2i(LOC_DESTINATION, 0, 0);
@@ -250,7 +243,7 @@ void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const Im
static constexpr GLuint LOC_SRC_OFFSET = 0;
static constexpr GLuint LOC_DST_OFFSET = 1;
- program_manager.BindHostCompute(copy_bc4_program.handle);
+ program_manager.BindComputeProgram(copy_bc4_program.handle);
for (const ImageCopy& copy : copies) {
ASSERT(copy.src_subresource.base_layer == 0);
@@ -286,7 +279,7 @@ void UtilShaders::CopyBGR(Image& dst_image, Image& src_image,
break;
case 4: {
// BGRA8 copy
- program_manager.BindHostCompute(copy_bgra_program.handle);
+ program_manager.BindComputeProgram(copy_bgra_program.handle);
constexpr GLenum FORMAT = GL_RGBA8;
for (const ImageCopy& copy : copies) {
ASSERT(copy.src_offset == zero_offset);
diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp
index b7f5b8bc2..6c1b2f063 100644
--- a/src/video_core/renderer_vulkan/blit_image.cpp
+++ b/src/video_core/renderer_vulkan/blit_image.cpp
@@ -49,6 +49,16 @@ constexpr VkDescriptorSetLayoutCreateInfo ONE_TEXTURE_DESCRIPTOR_SET_LAYOUT_CREA
.bindingCount = 1,
.pBindings = &TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING<0>,
};
+template <u32 num_textures>
+inline constexpr DescriptorBankInfo TEXTURE_DESCRIPTOR_BANK_INFO{
+ .uniform_buffers = 0,
+ .storage_buffers = 0,
+ .texture_buffers = 0,
+ .image_buffers = 0,
+ .textures = num_textures,
+ .images = 0,
+ .score = 2,
+};
constexpr VkDescriptorSetLayoutCreateInfo TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO{
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
.pNext = nullptr,
@@ -323,18 +333,19 @@ void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Regi
cmdbuf.SetScissor(0, scissor);
cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants);
}
-
} // Anonymous namespace
BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_,
- StateTracker& state_tracker_, VKDescriptorPool& descriptor_pool)
+ StateTracker& state_tracker_, DescriptorPool& descriptor_pool)
: device{device_}, scheduler{scheduler_}, state_tracker{state_tracker_},
one_texture_set_layout(device.GetLogical().CreateDescriptorSetLayout(
ONE_TEXTURE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)),
two_textures_set_layout(device.GetLogical().CreateDescriptorSetLayout(
TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)),
- one_texture_descriptor_allocator(descriptor_pool, *one_texture_set_layout),
- two_textures_descriptor_allocator(descriptor_pool, *two_textures_set_layout),
+ one_texture_descriptor_allocator{
+ descriptor_pool.Allocator(*one_texture_set_layout, TEXTURE_DESCRIPTOR_BANK_INFO<1>)},
+ two_textures_descriptor_allocator{
+ descriptor_pool.Allocator(*two_textures_set_layout, TEXTURE_DESCRIPTOR_BANK_INFO<2>)},
one_texture_pipeline_layout(device.GetLogical().CreatePipelineLayout(
PipelineLayoutCreateInfo(one_texture_set_layout.address()))),
two_textures_pipeline_layout(device.GetLogical().CreatePipelineLayout(
@@ -362,14 +373,14 @@ void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, const ImageV
.operation = operation,
};
const VkPipelineLayout layout = *one_texture_pipeline_layout;
- const VkImageView src_view = src_image_view.Handle(ImageViewType::e2D);
+ const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D);
const VkSampler sampler = is_linear ? *linear_sampler : *nearest_sampler;
const VkPipeline pipeline = FindOrEmplacePipeline(key);
- const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit();
scheduler.RequestRenderpass(dst_framebuffer);
- scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_view, descriptor_set,
- &device = device](vk::CommandBuffer cmdbuf) {
+ scheduler.Record([this, dst_region, src_region, pipeline, layout, sampler,
+ src_view](vk::CommandBuffer cmdbuf) {
// TODO: Barriers
+ const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit();
UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view);
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set,
@@ -391,12 +402,11 @@ void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer,
const VkPipelineLayout layout = *two_textures_pipeline_layout;
const VkSampler sampler = *nearest_sampler;
const VkPipeline pipeline = BlitDepthStencilPipeline(dst_framebuffer->RenderPass());
- const VkDescriptorSet descriptor_set = two_textures_descriptor_allocator.Commit();
scheduler.RequestRenderpass(dst_framebuffer);
scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_depth_view,
- src_stencil_view, descriptor_set,
- &device = device](vk::CommandBuffer cmdbuf) {
+ src_stencil_view, this](vk::CommandBuffer cmdbuf) {
// TODO: Barriers
+ const VkDescriptorSet descriptor_set = two_textures_descriptor_allocator.Commit();
UpdateTwoTexturesDescriptorSet(device, descriptor_set, sampler, src_depth_view,
src_stencil_view);
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
@@ -416,7 +426,6 @@ void BlitImageHelper::ConvertD32ToR32(const Framebuffer* dst_framebuffer,
void BlitImageHelper::ConvertR32ToD32(const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
-
ConvertColorToDepthPipeline(convert_r32_to_d32_pipeline, dst_framebuffer->RenderPass());
Convert(*convert_r32_to_d32_pipeline, dst_framebuffer, src_image_view);
}
@@ -436,16 +445,14 @@ void BlitImageHelper::ConvertR16ToD16(const Framebuffer* dst_framebuffer,
void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
const VkPipelineLayout layout = *one_texture_pipeline_layout;
- const VkImageView src_view = src_image_view.Handle(ImageViewType::e2D);
+ const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D);
const VkSampler sampler = *nearest_sampler;
- const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit();
const VkExtent2D extent{
.width = src_image_view.size.width,
.height = src_image_view.size.height,
};
scheduler.RequestRenderpass(dst_framebuffer);
- scheduler.Record([pipeline, layout, sampler, src_view, descriptor_set, extent,
- &device = device](vk::CommandBuffer cmdbuf) {
+ scheduler.Record([pipeline, layout, sampler, src_view, extent, this](vk::CommandBuffer cmdbuf) {
const VkOffset2D offset{
.x = 0,
.y = 0,
@@ -466,6 +473,7 @@ void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_frameb
.tex_scale = {viewport.width, viewport.height},
.tex_offset = {0.0f, 0.0f},
};
+ const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit();
UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view);
// TODO: Barriers
diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h
index 0d81a06ed..33ee095c1 100644
--- a/src/video_core/renderer_vulkan/blit_image.h
+++ b/src/video_core/renderer_vulkan/blit_image.h
@@ -31,7 +31,7 @@ struct BlitImagePipelineKey {
class BlitImageHelper {
public:
explicit BlitImageHelper(const Device& device, VKScheduler& scheduler,
- StateTracker& state_tracker, VKDescriptorPool& descriptor_pool);
+ StateTracker& state_tracker, DescriptorPool& descriptor_pool);
~BlitImageHelper();
void BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
index 362278f01..d70153df3 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
@@ -15,9 +15,7 @@
#include "video_core/renderer_vulkan/vk_state_tracker.h"
namespace Vulkan {
-
namespace {
-
constexpr size_t POINT = 0;
constexpr size_t LINE = 1;
constexpr size_t POLYGON = 2;
@@ -39,10 +37,20 @@ constexpr std::array POLYGON_OFFSET_ENABLE_LUT = {
POLYGON, // Patches
};
+void RefreshXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell& regs) {
+ std::ranges::transform(regs.tfb_layouts, state.layouts.begin(), [](const auto& layout) {
+ return VideoCommon::TransformFeedbackState::Layout{
+ .stream = layout.stream,
+ .varying_count = layout.varying_count,
+ .stride = layout.stride,
+ };
+ });
+ state.varyings = regs.tfb_varying_locs;
+}
} // Anonymous namespace
void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d,
- bool has_extended_dynamic_state) {
+ bool has_extended_dynamic_state, bool has_dynamic_vertex_input) {
const Maxwell& regs = maxwell3d.regs;
const std::array enabled_lut{
regs.polygon_offset_point_enable,
@@ -52,6 +60,9 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d,
const u32 topology_index = static_cast<u32>(regs.draw.topology.Value());
raw1 = 0;
+ extended_dynamic_state.Assign(has_extended_dynamic_state ? 1 : 0);
+ dynamic_vertex_input.Assign(has_dynamic_vertex_input ? 1 : 0);
+ xfb_enabled.Assign(regs.tfb_enabled != 0);
primitive_restart_enable.Assign(regs.primitive_restart.enabled != 0 ? 1 : 0);
depth_bias_enable.Assign(enabled_lut[POLYGON_OFFSET_ENABLE_LUT[topology_index]] != 0 ? 1 : 0);
depth_clamp_disabled.Assign(regs.view_volume_clip_control.depth_clamp_disabled.Value());
@@ -63,37 +74,66 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d,
tessellation_clockwise.Assign(regs.tess_mode.cw.Value());
logic_op_enable.Assign(regs.logic_op.enable != 0 ? 1 : 0);
logic_op.Assign(PackLogicOp(regs.logic_op.operation));
- rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0);
topology.Assign(regs.draw.topology);
msaa_mode.Assign(regs.multisample_mode);
raw2 = 0;
+ rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0);
const auto test_func =
regs.alpha_test_enabled != 0 ? regs.alpha_test_func : Maxwell::ComparisonOp::Always;
alpha_test_func.Assign(PackComparisonOp(test_func));
early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0);
-
+ depth_enabled.Assign(regs.zeta_enable != 0 ? 1 : 0);
+ depth_format.Assign(static_cast<u32>(regs.zeta.format));
+ y_negate.Assign(regs.screen_y_control.y_negate != 0 ? 1 : 0);
+ provoking_vertex_last.Assign(regs.provoking_vertex_last != 0 ? 1 : 0);
+ conservative_raster_enable.Assign(regs.conservative_raster_enable != 0 ? 1 : 0);
+ smooth_lines.Assign(regs.line_smooth_enable != 0 ? 1 : 0);
+
+ for (size_t i = 0; i < regs.rt.size(); ++i) {
+ color_formats[i] = static_cast<u8>(regs.rt[i].format);
+ }
alpha_test_ref = Common::BitCast<u32>(regs.alpha_test_ref);
point_size = Common::BitCast<u32>(regs.point_size);
- if (maxwell3d.dirty.flags[Dirty::InstanceDivisors]) {
- maxwell3d.dirty.flags[Dirty::InstanceDivisors] = false;
- for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
- const bool is_enabled = regs.instanced_arrays.IsInstancingEnabled(index);
- binding_divisors[index] = is_enabled ? regs.vertex_array[index].divisor : 0;
- }
- }
- if (maxwell3d.dirty.flags[Dirty::VertexAttributes]) {
- maxwell3d.dirty.flags[Dirty::VertexAttributes] = false;
- for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
- const auto& input = regs.vertex_attrib_format[index];
- auto& attribute = attributes[index];
- attribute.raw = 0;
- attribute.enabled.Assign(input.IsConstant() ? 0 : 1);
- attribute.buffer.Assign(input.buffer);
- attribute.offset.Assign(input.offset);
- attribute.type.Assign(static_cast<u32>(input.type.Value()));
- attribute.size.Assign(static_cast<u32>(input.size.Value()));
+ if (maxwell3d.dirty.flags[Dirty::VertexInput]) {
+ if (has_dynamic_vertex_input) {
+ // Dirty flag will be reset by the command buffer update
+ static constexpr std::array LUT{
+ 0u, // Invalid
+ 1u, // SignedNorm
+ 1u, // UnsignedNorm
+ 2u, // SignedInt
+ 3u, // UnsignedInt
+ 1u, // UnsignedScaled
+ 1u, // SignedScaled
+ 1u, // Float
+ };
+ const auto& attrs = regs.vertex_attrib_format;
+ attribute_types = 0;
+ for (size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) {
+ const u32 mask = attrs[i].constant != 0 ? 0 : 3;
+ const u32 type = LUT[static_cast<size_t>(attrs[i].type.Value())];
+ attribute_types |= static_cast<u64>(type & mask) << (i * 2);
+ }
+ } else {
+ maxwell3d.dirty.flags[Dirty::VertexInput] = false;
+ enabled_divisors = 0;
+ for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
+ const bool is_enabled = regs.instanced_arrays.IsInstancingEnabled(index);
+ binding_divisors[index] = is_enabled ? regs.vertex_array[index].divisor : 0;
+ enabled_divisors |= (is_enabled ? u64{1} : 0) << index;
+ }
+ for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
+ const auto& input = regs.vertex_attrib_format[index];
+ auto& attribute = attributes[index];
+ attribute.raw = 0;
+ attribute.enabled.Assign(input.constant ? 0 : 1);
+ attribute.buffer.Assign(input.buffer);
+ attribute.offset.Assign(input.offset);
+ attribute.type.Assign(static_cast<u32>(input.type.Value()));
+ attribute.size.Assign(static_cast<u32>(input.size.Value()));
+ }
}
}
if (maxwell3d.dirty.flags[Dirty::Blending]) {
@@ -109,10 +149,12 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d,
return static_cast<u16>(viewport.swizzle.raw);
});
}
- if (!has_extended_dynamic_state) {
- no_extended_dynamic_state.Assign(1);
+ if (!extended_dynamic_state) {
dynamic_state.Refresh(regs);
}
+ if (xfb_enabled) {
+ RefreshXfbState(xfb_state, regs);
+ }
}
void FixedPipelineState::BlendingAttachment::Refresh(const Maxwell& regs, size_t index) {
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
index a0eb83a68..c9be37935 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
@@ -12,6 +12,7 @@
#include "video_core/engines/maxwell_3d.h"
#include "video_core/surface.h"
+#include "video_core/transform_feedback.h"
namespace Vulkan {
@@ -60,7 +61,7 @@ struct FixedPipelineState {
void Refresh(const Maxwell& regs, size_t index);
- constexpr std::array<bool, 4> Mask() const noexcept {
+ std::array<bool, 4> Mask() const noexcept {
return {mask_r != 0, mask_g != 0, mask_b != 0, mask_a != 0};
}
@@ -97,11 +98,11 @@ struct FixedPipelineState {
BitField<20, 3, u32> type;
BitField<23, 6, u32> size;
- constexpr Maxwell::VertexAttribute::Type Type() const noexcept {
+ Maxwell::VertexAttribute::Type Type() const noexcept {
return static_cast<Maxwell::VertexAttribute::Type>(type.Value());
}
- constexpr Maxwell::VertexAttribute::Size Size() const noexcept {
+ Maxwell::VertexAttribute::Size Size() const noexcept {
return static_cast<Maxwell::VertexAttribute::Size>(size.Value());
}
};
@@ -167,37 +168,53 @@ struct FixedPipelineState {
union {
u32 raw1;
- BitField<0, 1, u32> no_extended_dynamic_state;
- BitField<2, 1, u32> primitive_restart_enable;
- BitField<3, 1, u32> depth_bias_enable;
- BitField<4, 1, u32> depth_clamp_disabled;
- BitField<5, 1, u32> ndc_minus_one_to_one;
- BitField<6, 2, u32> polygon_mode;
- BitField<8, 5, u32> patch_control_points_minus_one;
- BitField<13, 2, u32> tessellation_primitive;
- BitField<15, 2, u32> tessellation_spacing;
- BitField<17, 1, u32> tessellation_clockwise;
- BitField<18, 1, u32> logic_op_enable;
- BitField<19, 4, u32> logic_op;
- BitField<23, 1, u32> rasterize_enable;
+ BitField<0, 1, u32> extended_dynamic_state;
+ BitField<1, 1, u32> dynamic_vertex_input;
+ BitField<2, 1, u32> xfb_enabled;
+ BitField<3, 1, u32> primitive_restart_enable;
+ BitField<4, 1, u32> depth_bias_enable;
+ BitField<5, 1, u32> depth_clamp_disabled;
+ BitField<6, 1, u32> ndc_minus_one_to_one;
+ BitField<7, 2, u32> polygon_mode;
+ BitField<9, 5, u32> patch_control_points_minus_one;
+ BitField<14, 2, u32> tessellation_primitive;
+ BitField<16, 2, u32> tessellation_spacing;
+ BitField<18, 1, u32> tessellation_clockwise;
+ BitField<19, 1, u32> logic_op_enable;
+ BitField<20, 4, u32> logic_op;
BitField<24, 4, Maxwell::PrimitiveTopology> topology;
BitField<28, 4, Tegra::Texture::MsaaMode> msaa_mode;
};
union {
u32 raw2;
- BitField<0, 3, u32> alpha_test_func;
- BitField<3, 1, u32> early_z;
+ BitField<0, 1, u32> rasterize_enable;
+ BitField<1, 3, u32> alpha_test_func;
+ BitField<4, 1, u32> early_z;
+ BitField<5, 1, u32> depth_enabled;
+ BitField<6, 5, u32> depth_format;
+ BitField<11, 1, u32> y_negate;
+ BitField<12, 1, u32> provoking_vertex_last;
+ BitField<13, 1, u32> conservative_raster_enable;
+ BitField<14, 1, u32> smooth_lines;
};
+ std::array<u8, Maxwell::NumRenderTargets> color_formats;
u32 alpha_test_ref;
u32 point_size;
- std::array<u32, Maxwell::NumVertexArrays> binding_divisors;
- std::array<VertexAttribute, Maxwell::NumVertexAttributes> attributes;
std::array<BlendingAttachment, Maxwell::NumRenderTargets> attachments;
std::array<u16, Maxwell::NumViewports> viewport_swizzles;
+ union {
+ u64 attribute_types; // Used with VK_EXT_vertex_input_dynamic_state
+ u64 enabled_divisors;
+ };
+ std::array<VertexAttribute, Maxwell::NumVertexAttributes> attributes;
+ std::array<u32, Maxwell::NumVertexArrays> binding_divisors;
+
DynamicState dynamic_state;
+ VideoCommon::TransformFeedbackState xfb_state;
- void Refresh(Tegra::Engines::Maxwell3D& maxwell3d, bool has_extended_dynamic_state);
+ void Refresh(Tegra::Engines::Maxwell3D& maxwell3d, bool has_extended_dynamic_state,
+ bool has_dynamic_vertex_input);
size_t Hash() const noexcept;
@@ -208,8 +225,24 @@ struct FixedPipelineState {
}
size_t Size() const noexcept {
- const size_t total_size = sizeof *this;
- return total_size - (no_extended_dynamic_state != 0 ? 0 : sizeof(DynamicState));
+ if (xfb_enabled) {
+ // When transform feedback is enabled, use the whole struct
+ return sizeof(*this);
+ }
+ if (dynamic_vertex_input) {
+ // Exclude dynamic state and attributes
+ return offsetof(FixedPipelineState, attributes);
+ }
+ if (extended_dynamic_state) {
+ // Exclude dynamic state
+ return offsetof(FixedPipelineState, dynamic_state);
+ }
+ // Default
+ return offsetof(FixedPipelineState, xfb_state);
+ }
+
+ u32 DynamicAttributeType(size_t index) const noexcept {
+ return (attribute_types >> (index * 2)) & 0b11;
}
};
static_assert(std::has_unique_object_representations_v<FixedPipelineState>);
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index f088447e9..68a23b602 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -157,7 +157,7 @@ struct FormatTuple {
{VK_FORMAT_R32_SFLOAT, Attachable | Storage}, // R32_FLOAT
{VK_FORMAT_R16_SFLOAT, Attachable | Storage}, // R16_FLOAT
{VK_FORMAT_R16_UNORM, Attachable | Storage}, // R16_UNORM
- {VK_FORMAT_UNDEFINED}, // R16_SNORM
+ {VK_FORMAT_R16_SNORM, Attachable | Storage}, // R16_SNORM
{VK_FORMAT_R16_UINT, Attachable | Storage}, // R16_UINT
{VK_FORMAT_UNDEFINED}, // R16_SINT
{VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // R16G16_UNORM
@@ -266,19 +266,20 @@ FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with
return {device.GetSupportedFormat(tuple.format, usage, format_type), attachable, storage};
}
-VkShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage) {
+VkShaderStageFlagBits ShaderStage(Shader::Stage stage) {
switch (stage) {
- case Tegra::Engines::ShaderType::Vertex:
+ case Shader::Stage::VertexA:
+ case Shader::Stage::VertexB:
return VK_SHADER_STAGE_VERTEX_BIT;
- case Tegra::Engines::ShaderType::TesselationControl:
+ case Shader::Stage::TessellationControl:
return VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
- case Tegra::Engines::ShaderType::TesselationEval:
+ case Shader::Stage::TessellationEval:
return VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
- case Tegra::Engines::ShaderType::Geometry:
+ case Shader::Stage::Geometry:
return VK_SHADER_STAGE_GEOMETRY_BIT;
- case Tegra::Engines::ShaderType::Fragment:
+ case Shader::Stage::Fragment:
return VK_SHADER_STAGE_FRAGMENT_BIT;
- case Tegra::Engines::ShaderType::Compute:
+ case Shader::Stage::Compute:
return VK_SHADER_STAGE_COMPUTE_BIT;
}
UNIMPLEMENTED_MSG("Unimplemented shader stage={}", stage);
@@ -685,6 +686,19 @@ VkCullModeFlagBits CullFace(Maxwell::CullFace cull_face) {
return {};
}
+VkPolygonMode PolygonMode(Maxwell::PolygonMode polygon_mode) {
+ switch (polygon_mode) {
+ case Maxwell::PolygonMode::Point:
+ return VK_POLYGON_MODE_POINT;
+ case Maxwell::PolygonMode::Line:
+ return VK_POLYGON_MODE_LINE;
+ case Maxwell::PolygonMode::Fill:
+ return VK_POLYGON_MODE_FILL;
+ }
+ UNIMPLEMENTED_MSG("Unimplemented polygon mode={}", polygon_mode);
+ return {};
+}
+
VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle) {
switch (swizzle) {
case Tegra::Texture::SwizzleSource::Zero:
@@ -741,4 +755,28 @@ VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reducti
return VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT;
}
+VkSampleCountFlagBits MsaaMode(Tegra::Texture::MsaaMode msaa_mode) {
+ switch (msaa_mode) {
+ case Tegra::Texture::MsaaMode::Msaa1x1:
+ return VK_SAMPLE_COUNT_1_BIT;
+ case Tegra::Texture::MsaaMode::Msaa2x1:
+ case Tegra::Texture::MsaaMode::Msaa2x1_D3D:
+ return VK_SAMPLE_COUNT_2_BIT;
+ case Tegra::Texture::MsaaMode::Msaa2x2:
+ case Tegra::Texture::MsaaMode::Msaa2x2_VC4:
+ case Tegra::Texture::MsaaMode::Msaa2x2_VC12:
+ return VK_SAMPLE_COUNT_4_BIT;
+ case Tegra::Texture::MsaaMode::Msaa4x2:
+ case Tegra::Texture::MsaaMode::Msaa4x2_D3D:
+ case Tegra::Texture::MsaaMode::Msaa4x2_VC8:
+ case Tegra::Texture::MsaaMode::Msaa4x2_VC24:
+ return VK_SAMPLE_COUNT_8_BIT;
+ case Tegra::Texture::MsaaMode::Msaa4x4:
+ return VK_SAMPLE_COUNT_16_BIT;
+ default:
+ UNREACHABLE_MSG("Invalid msaa_mode={}", static_cast<int>(msaa_mode));
+ return VK_SAMPLE_COUNT_1_BIT;
+ }
+}
+
} // namespace Vulkan::MaxwellToVK
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h
index e3e06ba38..8a9616039 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.h
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h
@@ -5,6 +5,7 @@
#pragma once
#include "common/common_types.h"
+#include "shader_recompiler/stage.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/surface.h"
#include "video_core/textures/texture.h"
@@ -45,7 +46,7 @@ struct FormatInfo {
[[nodiscard]] FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with_srgb,
PixelFormat pixel_format);
-VkShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage);
+VkShaderStageFlagBits ShaderStage(Shader::Stage stage);
VkPrimitiveTopology PrimitiveTopology(const Device& device, Maxwell::PrimitiveTopology topology);
@@ -65,10 +66,14 @@ VkFrontFace FrontFace(Maxwell::FrontFace front_face);
VkCullModeFlagBits CullFace(Maxwell::CullFace cull_face);
+VkPolygonMode PolygonMode(Maxwell::PolygonMode polygon_mode);
+
VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle);
VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle);
VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reduction);
+VkSampleCountFlagBits MsaaMode(Tegra::Texture::MsaaMode msaa_mode);
+
} // namespace Vulkan::MaxwellToVK
diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h
new file mode 100644
index 000000000..4847db6b6
--- /dev/null
+++ b/src/video_core/renderer_vulkan/pipeline_helper.h
@@ -0,0 +1,154 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <cstddef>
+
+#include <boost/container/small_vector.hpp>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "shader_recompiler/shader_info.h"
+#include "video_core/renderer_vulkan/vk_texture_cache.h"
+#include "video_core/renderer_vulkan/vk_update_descriptor.h"
+#include "video_core/texture_cache/texture_cache.h"
+#include "video_core/texture_cache/types.h"
+#include "video_core/textures/texture.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+
+namespace Vulkan {
+
+class DescriptorLayoutBuilder {
+public:
+ DescriptorLayoutBuilder(const Device& device_) : device{&device_} {}
+
+ bool CanUsePushDescriptor() const noexcept {
+ return device->IsKhrPushDescriptorSupported() &&
+ num_descriptors <= device->MaxPushDescriptors();
+ }
+
+ vk::DescriptorSetLayout CreateDescriptorSetLayout(bool use_push_descriptor) const {
+ if (bindings.empty()) {
+ return nullptr;
+ }
+ const VkDescriptorSetLayoutCreateFlags flags =
+ use_push_descriptor ? VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR : 0;
+ return device->GetLogical().CreateDescriptorSetLayout({
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = flags,
+ .bindingCount = static_cast<u32>(bindings.size()),
+ .pBindings = bindings.data(),
+ });
+ }
+
+ vk::DescriptorUpdateTemplateKHR CreateTemplate(VkDescriptorSetLayout descriptor_set_layout,
+ VkPipelineLayout pipeline_layout,
+ bool use_push_descriptor) const {
+ if (entries.empty()) {
+ return nullptr;
+ }
+ const VkDescriptorUpdateTemplateType type =
+ use_push_descriptor ? VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_PUSH_DESCRIPTORS_KHR
+ : VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR;
+ return device->GetLogical().CreateDescriptorUpdateTemplateKHR({
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR,
+ .pNext = nullptr,
+ .flags = 0,
+ .descriptorUpdateEntryCount = static_cast<u32>(entries.size()),
+ .pDescriptorUpdateEntries = entries.data(),
+ .templateType = type,
+ .descriptorSetLayout = descriptor_set_layout,
+ .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ .pipelineLayout = pipeline_layout,
+ .set = 0,
+ });
+ }
+
+ vk::PipelineLayout CreatePipelineLayout(VkDescriptorSetLayout descriptor_set_layout) const {
+ return device->GetLogical().CreatePipelineLayout({
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .setLayoutCount = descriptor_set_layout ? 1U : 0U,
+ .pSetLayouts = bindings.empty() ? nullptr : &descriptor_set_layout,
+ .pushConstantRangeCount = 0,
+ .pPushConstantRanges = nullptr,
+ });
+ }
+
+ void Add(const Shader::Info& info, VkShaderStageFlags stage) {
+ Add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, stage, info.constant_buffer_descriptors);
+ Add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stage, info.storage_buffers_descriptors);
+ Add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, stage, info.texture_buffer_descriptors);
+ Add(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, stage, info.image_buffer_descriptors);
+ Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage, info.texture_descriptors);
+ Add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, stage, info.image_descriptors);
+ }
+
+private:
+ template <typename Descriptors>
+ void Add(VkDescriptorType type, VkShaderStageFlags stage, const Descriptors& descriptors) {
+ const size_t num{descriptors.size()};
+ for (size_t i = 0; i < num; ++i) {
+ bindings.push_back({
+ .binding = binding,
+ .descriptorType = type,
+ .descriptorCount = descriptors[i].count,
+ .stageFlags = stage,
+ .pImmutableSamplers = nullptr,
+ });
+ entries.push_back({
+ .dstBinding = binding,
+ .dstArrayElement = 0,
+ .descriptorCount = descriptors[i].count,
+ .descriptorType = type,
+ .offset = offset,
+ .stride = sizeof(DescriptorUpdateEntry),
+ });
+ ++binding;
+ num_descriptors += descriptors[i].count;
+ offset += sizeof(DescriptorUpdateEntry);
+ }
+ }
+
+ const Device* device{};
+ boost::container::small_vector<VkDescriptorSetLayoutBinding, 32> bindings;
+ boost::container::small_vector<VkDescriptorUpdateTemplateEntryKHR, 32> entries;
+ u32 binding{};
+ u32 num_descriptors{};
+ size_t offset{};
+};
+
+inline void PushImageDescriptors(const Shader::Info& info, const VkSampler*& samplers,
+ const ImageId*& image_view_ids, TextureCache& texture_cache,
+ VKUpdateDescriptorQueue& update_descriptor_queue) {
+ for (const auto& desc : info.texture_buffer_descriptors) {
+ image_view_ids += desc.count;
+ }
+ for (const auto& desc : info.image_buffer_descriptors) {
+ image_view_ids += desc.count;
+ }
+ for (const auto& desc : info.texture_descriptors) {
+ for (u32 index = 0; index < desc.count; ++index) {
+ const VkSampler sampler{*(samplers++)};
+ ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))};
+ const VkImageView vk_image_view{image_view.Handle(desc.type)};
+ update_descriptor_queue.AddSampledImage(vk_image_view, sampler);
+ }
+ }
+ for (const auto& desc : info.image_descriptors) {
+ for (u32 index = 0; index < desc.count; ++index) {
+ ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))};
+ if (desc.is_written) {
+ texture_cache.MarkModification(image_view.image_id);
+ }
+ const VkImageView vk_image_view{image_view.StorageView(desc.type, desc.format)};
+ update_descriptor_queue.AddImage(vk_image_view);
+ }
+ }
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index bec3a81d9..a8d04dc61 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -130,35 +130,45 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
if (!framebuffer) {
return;
}
- const auto& layout = render_window.GetFramebufferLayout();
- if (layout.width > 0 && layout.height > 0 && render_window.IsShown()) {
- const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset;
- const bool use_accelerated =
- rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride);
- const bool is_srgb = use_accelerated && screen_info.is_srgb;
- if (swapchain.HasFramebufferChanged(layout) || swapchain.GetSrgbState() != is_srgb) {
- swapchain.Create(layout.width, layout.height, is_srgb);
- blit_screen.Recreate();
- }
-
- scheduler.WaitWorker();
-
- while (!swapchain.AcquireNextImage()) {
- swapchain.Create(layout.width, layout.height, is_srgb);
- blit_screen.Recreate();
+ SCOPE_EXIT({ render_window.OnFrameDisplayed(); });
+ if (!render_window.IsShown()) {
+ return;
+ }
+ const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset;
+ const bool use_accelerated =
+ rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride);
+ const bool is_srgb = use_accelerated && screen_info.is_srgb;
+
+ bool has_been_recreated = false;
+ const auto recreate_swapchain = [&] {
+ if (!has_been_recreated) {
+ has_been_recreated = true;
+ scheduler.WaitWorker();
}
- const VkSemaphore render_semaphore = blit_screen.Draw(*framebuffer, use_accelerated);
-
- scheduler.Flush(render_semaphore);
-
- if (swapchain.Present(render_semaphore)) {
- blit_screen.Recreate();
+ const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout();
+ swapchain.Create(layout.width, layout.height, is_srgb);
+ };
+ if (swapchain.IsSubOptimal() || swapchain.HasColorSpaceChanged(is_srgb)) {
+ recreate_swapchain();
+ }
+ bool is_outdated;
+ do {
+ swapchain.AcquireNextImage();
+ is_outdated = swapchain.IsOutDated();
+ if (is_outdated) {
+ recreate_swapchain();
}
- gpu.RendererFrameEndNotify();
- rasterizer.TickFrame();
+ } while (is_outdated);
+ if (has_been_recreated) {
+ blit_screen.Recreate();
}
+ const VkSemaphore render_semaphore = blit_screen.Draw(*framebuffer, use_accelerated);
+ scheduler.Flush(render_semaphore);
+ scheduler.WaitWorker();
+ swapchain.Present(render_semaphore);
- render_window.OnFrameDisplayed();
+ gpu.RendererFrameEndNotify();
+ rasterizer.TickFrame();
}
void RendererVulkan::Report() const {
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
index a1a32aabe..516f428e7 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -184,55 +184,54 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
.depth = 1,
},
};
- scheduler.Record(
- [buffer = *buffer, image = *raw_images[image_index], copy](vk::CommandBuffer cmdbuf) {
- const VkImageMemoryBarrier base_barrier{
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .pNext = nullptr,
- .srcAccessMask = 0,
- .dstAccessMask = 0,
- .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
- .newLayout = VK_IMAGE_LAYOUT_GENERAL,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = image,
- .subresourceRange =
- {
- .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .baseMipLevel = 0,
- .levelCount = 1,
- .baseArrayLayer = 0,
- .layerCount = 1,
- },
- };
- VkImageMemoryBarrier read_barrier = base_barrier;
- read_barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT;
- read_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
- read_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
-
- VkImageMemoryBarrier write_barrier = base_barrier;
- write_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
- write_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
-
- cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
- 0, read_barrier);
- cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_GENERAL, copy);
- cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
- VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, write_barrier);
- });
+ scheduler.Record([this, copy, image_index](vk::CommandBuffer cmdbuf) {
+ const VkImage image = *raw_images[image_index];
+ const VkImageMemoryBarrier base_barrier{
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = 0,
+ .dstAccessMask = 0,
+ .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .newLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = image,
+ .subresourceRange{
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .baseMipLevel = 0,
+ .levelCount = 1,
+ .baseArrayLayer = 0,
+ .layerCount = 1,
+ },
+ };
+ VkImageMemoryBarrier read_barrier = base_barrier;
+ read_barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT;
+ read_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+ read_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
+
+ VkImageMemoryBarrier write_barrier = base_barrier;
+ write_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+ write_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
+
+ cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
+ read_barrier);
+ cmdbuf.CopyBufferToImage(*buffer, image, VK_IMAGE_LAYOUT_GENERAL, copy);
+ cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
+ VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, write_barrier);
+ });
}
- scheduler.Record([renderpass = *renderpass, framebuffer = *framebuffers[image_index],
- descriptor_set = descriptor_sets[image_index], buffer = *buffer,
- size = swapchain.GetSize(), pipeline = *pipeline,
- layout = *pipeline_layout](vk::CommandBuffer cmdbuf) {
+ scheduler.Record([this, image_index, size = swapchain.GetSize()](vk::CommandBuffer cmdbuf) {
+ const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f;
+ const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f;
+ const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f;
const VkClearValue clear_color{
- .color = {.float32 = {0.0f, 0.0f, 0.0f, 0.0f}},
+ .color = {.float32 = {bg_red, bg_green, bg_blue, 1.0f}},
};
const VkRenderPassBeginInfo renderpass_bi{
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
.pNext = nullptr,
- .renderPass = renderpass,
- .framebuffer = framebuffer,
+ .renderPass = *renderpass,
+ .framebuffer = *framebuffers[image_index],
.renderArea =
{
.offset = {0, 0},
@@ -254,12 +253,13 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
.extent = size,
};
cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE);
- cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
+ cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
cmdbuf.SetViewport(0, viewport);
cmdbuf.SetScissor(0, scissor);
- cmdbuf.BindVertexBuffer(0, buffer, offsetof(BufferData, vertices));
- cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, {});
+ cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices));
+ cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0,
+ descriptor_sets[image_index], {});
cmdbuf.Draw(4, 1, 0, 0);
cmdbuf.EndRenderPass();
});
@@ -301,8 +301,7 @@ void VKBlitScreen::CreateShaders() {
void VKBlitScreen::CreateSemaphores() {
semaphores.resize(image_count);
- std::generate(semaphores.begin(), semaphores.end(),
- [this] { return device.GetLogical().CreateSemaphore(); });
+ std::ranges::generate(semaphores, [this] { return device.GetLogical().CreateSemaphore(); });
}
void VKBlitScreen::CreateDescriptorPool() {
@@ -630,8 +629,8 @@ void VKBlitScreen::CreateFramebuffers() {
}
void VKBlitScreen::ReleaseRawImages() {
- for (std::size_t i = 0; i < raw_images.size(); ++i) {
- scheduler.Wait(resource_ticks.at(i));
+ for (const u64 tick : resource_ticks) {
+ scheduler.Wait(tick);
}
raw_images.clear();
raw_buffer_commits.clear();
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 0df4e1a1c..f4b3ee95c 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -60,38 +60,74 @@ std::array<T, 6> MakeQuadIndices(u32 quad, u32 first) {
}
return indices;
}
-} // Anonymous namespace
-
-Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
- : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {}
-Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
- VAddr cpu_addr_, u64 size_bytes_)
- : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_) {
- buffer = runtime.device.GetLogical().CreateBuffer(VkBufferCreateInfo{
+vk::Buffer CreateBuffer(const Device& device, u64 size) {
+ VkBufferUsageFlags flags =
+ VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
+ VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT |
+ VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
+ VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
+ if (device.IsExtTransformFeedbackSupported()) {
+ flags |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT;
+ }
+ return device.GetLogical().CreateBuffer({
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
- .size = SizeBytes(),
- .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
- VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
- VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
- VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
- VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
+ .size = size,
+ .usage = flags,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
});
+}
+} // Anonymous namespace
+
+Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
+ : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {}
+
+Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
+ VAddr cpu_addr_, u64 size_bytes_)
+ : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_),
+ device{&runtime.device}, buffer{CreateBuffer(*device, SizeBytes())},
+ commit{runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal)} {
if (runtime.device.HasDebuggingToolAttached()) {
buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str());
}
- commit = runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal);
+}
+
+VkBufferView Buffer::View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format) {
+ if (!device) {
+ // Null buffer, return a null descriptor
+ return VK_NULL_HANDLE;
+ }
+ const auto it{std::ranges::find_if(views, [offset, size, format](const BufferView& view) {
+ return offset == view.offset && size == view.size && format == view.format;
+ })};
+ if (it != views.end()) {
+ return *it->handle;
+ }
+ views.push_back({
+ .offset = offset,
+ .size = size,
+ .format = format,
+ .handle = device->GetLogical().CreateBufferView({
+ .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .buffer = *buffer,
+ .format = MaxwellToVK::SurfaceFormat(*device, FormatType::Buffer, false, format).format,
+ .offset = offset,
+ .range = size,
+ }),
+ });
+ return *views.back().handle;
}
BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_allocator_,
VKScheduler& scheduler_, StagingBufferPool& staging_pool_,
VKUpdateDescriptorQueue& update_descriptor_queue_,
- VKDescriptorPool& descriptor_pool)
+ DescriptorPool& descriptor_pool)
: device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
staging_pool{staging_pool_}, update_descriptor_queue{update_descriptor_queue_},
uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
@@ -136,6 +172,30 @@ void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
});
}
+void BufferCacheRuntime::ClearBuffer(VkBuffer dest_buffer, u32 offset, size_t size, u32 value) {
+ static constexpr VkMemoryBarrier READ_BARRIER{
+ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
+ .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
+ };
+ static constexpr VkMemoryBarrier WRITE_BARRIER{
+ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+ .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
+ };
+
+ scheduler.RequestOutsideRenderPassOperationContext();
+ scheduler.Record([dest_buffer, offset, size, value](vk::CommandBuffer cmdbuf) {
+ cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
+ 0, READ_BARRIER);
+ cmdbuf.FillBuffer(dest_buffer, offset, size, value);
+ cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+ 0, WRITE_BARRIER);
+ });
+}
+
void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format,
u32 base_vertex, u32 num_indices, VkBuffer buffer,
u32 offset, [[maybe_unused]] u32 size) {
@@ -152,8 +212,8 @@ void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat
}
if (vk_buffer == VK_NULL_HANDLE) {
// Vulkan doesn't support null index buffers. Replace it with our own null buffer.
- ReserveNullIndexBuffer();
- vk_buffer = *null_index_buffer;
+ ReserveNullBuffer();
+ vk_buffer = *null_buffer;
}
scheduler.Record([vk_buffer, vk_offset, vk_index_type](vk::CommandBuffer cmdbuf) {
cmdbuf.BindIndexBuffer(vk_buffer, vk_offset, vk_index_type);
@@ -161,6 +221,13 @@ void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat
}
void BufferCacheRuntime::BindQuadArrayIndexBuffer(u32 first, u32 count) {
+ if (count == 0) {
+ ReserveNullBuffer();
+ scheduler.Record([this](vk::CommandBuffer cmdbuf) {
+ cmdbuf.BindIndexBuffer(*null_buffer, 0, VK_INDEX_TYPE_UINT32);
+ });
+ return;
+ }
ReserveQuadArrayLUT(first + count, true);
// The LUT has the indices 0, 1, 2, and 3 copied as an array
@@ -195,6 +262,14 @@ void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, VkBuffer buffer,
// Already logged in the rasterizer
return;
}
+ if (buffer == VK_NULL_HANDLE) {
+ // Vulkan doesn't support null transform feedback buffers.
+ // Replace it with our own null buffer.
+ ReserveNullBuffer();
+ buffer = *null_buffer;
+ offset = 0;
+ size = 0;
+ }
scheduler.Record([index, buffer, offset, size](vk::CommandBuffer cmdbuf) {
const VkDeviceSize vk_offset = offset;
const VkDeviceSize vk_size = size;
@@ -279,11 +354,11 @@ void BufferCacheRuntime::ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle
});
}
-void BufferCacheRuntime::ReserveNullIndexBuffer() {
- if (null_index_buffer) {
+void BufferCacheRuntime::ReserveNullBuffer() {
+ if (null_buffer) {
return;
}
- null_index_buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
+ null_buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
@@ -294,12 +369,12 @@ void BufferCacheRuntime::ReserveNullIndexBuffer() {
.pQueueFamilyIndices = nullptr,
});
if (device.HasDebuggingToolAttached()) {
- null_index_buffer.SetObjectNameEXT("Null index buffer");
+ null_buffer.SetObjectNameEXT("Null index buffer");
}
- null_index_buffer_commit = memory_allocator.Commit(null_index_buffer, MemoryUsage::DeviceLocal);
+ null_buffer_commit = memory_allocator.Commit(null_buffer, MemoryUsage::DeviceLocal);
scheduler.RequestOutsideRenderPassOperationContext();
- scheduler.Record([buffer = *null_index_buffer](vk::CommandBuffer cmdbuf) {
+ scheduler.Record([buffer = *null_buffer](vk::CommandBuffer cmdbuf) {
cmdbuf.FillBuffer(buffer, 0, VK_WHOLE_SIZE, 0);
});
}
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index 982e92191..c27402ff0 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -9,13 +9,14 @@
#include "video_core/renderer_vulkan/vk_compute_pass.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
+#include "video_core/surface.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
class Device;
-class VKDescriptorPool;
+class DescriptorPool;
class VKScheduler;
class BufferCacheRuntime;
@@ -26,6 +27,8 @@ public:
explicit Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
VAddr cpu_addr_, u64 size_bytes_);
+ [[nodiscard]] VkBufferView View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format);
+
[[nodiscard]] VkBuffer Handle() const noexcept {
return *buffer;
}
@@ -35,8 +38,17 @@ public:
}
private:
+ struct BufferView {
+ u32 offset;
+ u32 size;
+ VideoCore::Surface::PixelFormat format;
+ vk::BufferView handle;
+ };
+
+ const Device* device{};
vk::Buffer buffer;
MemoryCommit commit;
+ std::vector<BufferView> views;
};
class BufferCacheRuntime {
@@ -49,7 +61,7 @@ public:
explicit BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_manager_,
VKScheduler& scheduler_, StagingBufferPool& staging_pool_,
VKUpdateDescriptorQueue& update_descriptor_queue_,
- VKDescriptorPool& descriptor_pool);
+ DescriptorPool& descriptor_pool);
void Finish();
@@ -60,6 +72,8 @@ public:
void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer,
std::span<const VideoCommon::BufferCopy> copies);
+ void ClearBuffer(VkBuffer dest_buffer, u32 offset, size_t size, u32 value);
+
void BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format, u32 num_indices,
u32 base_vertex, VkBuffer buffer, u32 offset, u32 size);
@@ -85,6 +99,11 @@ public:
BindBuffer(buffer, offset, size);
}
+ void BindTextureBuffer(Buffer& buffer, u32 offset, u32 size,
+ VideoCore::Surface::PixelFormat format) {
+ update_descriptor_queue.AddTexelBuffer(buffer.View(offset, size, format));
+ }
+
private:
void BindBuffer(VkBuffer buffer, u32 offset, u32 size) {
update_descriptor_queue.AddBuffer(buffer, offset, size);
@@ -92,7 +111,7 @@ private:
void ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle);
- void ReserveNullIndexBuffer();
+ void ReserveNullBuffer();
const Device& device;
MemoryAllocator& memory_allocator;
@@ -105,8 +124,8 @@ private:
VkIndexType quad_array_lut_index_type{};
u32 current_num_indices = 0;
- vk::Buffer null_index_buffer;
- MemoryCommit null_index_buffer_commit;
+ vk::Buffer null_buffer;
+ MemoryCommit null_buffer_commit;
Uint8Pass uint8_pass;
QuadIndexedPass quad_index_pass;
@@ -122,6 +141,7 @@ struct BufferCacheParams {
static constexpr bool NEEDS_BIND_UNIFORM_INDEX = false;
static constexpr bool NEEDS_BIND_STORAGE_INDEX = false;
static constexpr bool USE_MEMORY_MAPS = true;
+ static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = false;
};
using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
index 205cd3b05..8e426ce2c 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
@@ -41,80 +41,92 @@ constexpr u32 ASTC_BINDING_SWIZZLE_BUFFER = 2;
constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 3;
constexpr size_t ASTC_NUM_BINDINGS = 4;
-VkPushConstantRange BuildComputePushConstantRange(std::size_t size) {
- return {
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .offset = 0,
- .size = static_cast<u32>(size),
- };
-}
-
-std::array<VkDescriptorSetLayoutBinding, 2> BuildInputOutputDescriptorSetBindings() {
- return {{
- {
- .binding = 0,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = nullptr,
- },
- {
- .binding = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = nullptr,
- },
- }};
-}
+template <size_t size>
+inline constexpr VkPushConstantRange COMPUTE_PUSH_CONSTANT_RANGE{
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .offset = 0,
+ .size = static_cast<u32>(size),
+};
-std::array<VkDescriptorSetLayoutBinding, ASTC_NUM_BINDINGS> BuildASTCDescriptorSetBindings() {
- return {{
- {
- .binding = ASTC_BINDING_INPUT_BUFFER,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = nullptr,
- },
- {
- .binding = ASTC_BINDING_ENC_BUFFER,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = nullptr,
- },
- {
- .binding = ASTC_BINDING_SWIZZLE_BUFFER,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = nullptr,
- },
- {
- .binding = ASTC_BINDING_OUTPUT_IMAGE,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = nullptr,
- },
- }};
-}
+constexpr std::array<VkDescriptorSetLayoutBinding, 2> INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS{{
+ {
+ .binding = 0,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = nullptr,
+ },
+ {
+ .binding = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = nullptr,
+ },
+}};
+
+constexpr DescriptorBankInfo INPUT_OUTPUT_BANK_INFO{
+ .uniform_buffers = 0,
+ .storage_buffers = 2,
+ .texture_buffers = 0,
+ .image_buffers = 0,
+ .textures = 0,
+ .images = 0,
+ .score = 2,
+};
-VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() {
- return {
- .dstBinding = 0,
- .dstArrayElement = 0,
- .descriptorCount = 2,
+constexpr std::array<VkDescriptorSetLayoutBinding, 4> ASTC_DESCRIPTOR_SET_BINDINGS{{
+ {
+ .binding = ASTC_BINDING_INPUT_BUFFER,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .offset = 0,
- .stride = sizeof(DescriptorUpdateEntry),
- };
-}
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = nullptr,
+ },
+ {
+ .binding = ASTC_BINDING_ENC_BUFFER,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = nullptr,
+ },
+ {
+ .binding = ASTC_BINDING_SWIZZLE_BUFFER,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = nullptr,
+ },
+ {
+ .binding = ASTC_BINDING_OUTPUT_IMAGE,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = nullptr,
+ },
+}};
+
+constexpr DescriptorBankInfo ASTC_BANK_INFO{
+ .uniform_buffers = 0,
+ .storage_buffers = 3,
+ .texture_buffers = 0,
+ .image_buffers = 0,
+ .textures = 0,
+ .images = 1,
+ .score = 4,
+};
-std::array<VkDescriptorUpdateTemplateEntryKHR, ASTC_NUM_BINDINGS>
-BuildASTCPassDescriptorUpdateTemplateEntry() {
- return {{
+constexpr VkDescriptorUpdateTemplateEntryKHR INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE{
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 2,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .offset = 0,
+ .stride = sizeof(DescriptorUpdateEntry),
+};
+
+constexpr std::array<VkDescriptorUpdateTemplateEntryKHR, ASTC_NUM_BINDINGS>
+ ASTC_PASS_DESCRIPTOR_UPDATE_TEMPLATE_ENTRY{{
{
.dstBinding = ASTC_BINDING_INPUT_BUFFER,
.dstArrayElement = 0,
@@ -148,7 +160,6 @@ BuildASTCPassDescriptorUpdateTemplateEntry() {
.stride = sizeof(DescriptorUpdateEntry),
},
}};
-}
struct AstcPushConstants {
std::array<u32, 2> blocks_dims;
@@ -159,14 +170,14 @@ struct AstcPushConstants {
u32 block_height;
u32 block_height_mask;
};
-
} // Anonymous namespace
-VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool,
- vk::Span<VkDescriptorSetLayoutBinding> bindings,
- vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates,
- vk::Span<VkPushConstantRange> push_constants,
- std::span<const u32> code) {
+ComputePass::ComputePass(const Device& device_, DescriptorPool& descriptor_pool,
+ vk::Span<VkDescriptorSetLayoutBinding> bindings,
+ vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates,
+ const DescriptorBankInfo& bank_info,
+ vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code)
+ : device{device_} {
descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
.pNext = nullptr,
@@ -196,8 +207,7 @@ VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_
.pipelineLayout = *layout,
.set = 0,
});
-
- descriptor_allocator.emplace(descriptor_pool, *descriptor_set_layout);
+ descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, bank_info);
}
module = device.GetLogical().CreateShaderModule({
.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
@@ -206,43 +216,34 @@ VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_
.codeSize = static_cast<u32>(code.size_bytes()),
.pCode = code.data(),
});
+ device.SaveShader(code);
pipeline = device.GetLogical().CreateComputePipeline({
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
- .stage =
- {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .pNext = nullptr,
- .flags = 0,
- .stage = VK_SHADER_STAGE_COMPUTE_BIT,
- .module = *module,
- .pName = "main",
- .pSpecializationInfo = nullptr,
- },
+ .stage{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+ .module = *module,
+ .pName = "main",
+ .pSpecializationInfo = nullptr,
+ },
.layout = *layout,
.basePipelineHandle = nullptr,
.basePipelineIndex = 0,
});
}
-VKComputePass::~VKComputePass() = default;
-
-VkDescriptorSet VKComputePass::CommitDescriptorSet(
- VKUpdateDescriptorQueue& update_descriptor_queue) {
- if (!descriptor_template) {
- return nullptr;
- }
- const VkDescriptorSet set = descriptor_allocator->Commit();
- update_descriptor_queue.Send(*descriptor_template, set);
- return set;
-}
+ComputePass::~ComputePass() = default;
-Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_,
- VKDescriptorPool& descriptor_pool, StagingBufferPool& staging_buffer_pool_,
+Uint8Pass::Uint8Pass(const Device& device_, VKScheduler& scheduler_,
+ DescriptorPool& descriptor_pool, StagingBufferPool& staging_buffer_pool_,
VKUpdateDescriptorQueue& update_descriptor_queue_)
- : VKComputePass(device, descriptor_pool, BuildInputOutputDescriptorSetBindings(),
- BuildInputOutputDescriptorUpdateTemplate(), {}, VULKAN_UINT8_COMP_SPV),
+ : ComputePass(device_, descriptor_pool, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS,
+ INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO, {},
+ VULKAN_UINT8_COMP_SPV),
scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
update_descriptor_queue{update_descriptor_queue_} {}
@@ -256,11 +257,11 @@ std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer
update_descriptor_queue.Acquire();
update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices);
update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size);
- const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
+ const void* const descriptor_data{update_descriptor_queue.UpdateData()};
+ const VkBuffer buffer{staging.buffer};
scheduler.RequestOutsideRenderPassOperationContext();
- scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set,
- num_vertices](vk::CommandBuffer cmdbuf) {
+ scheduler.Record([this, buffer, descriptor_data, num_vertices](vk::CommandBuffer cmdbuf) {
static constexpr u32 DISPATCH_SIZE = 1024;
static constexpr VkMemoryBarrier WRITE_BARRIER{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
@@ -268,8 +269,10 @@ std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
};
- cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
- cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {});
+ const VkDescriptorSet set = descriptor_allocator.Commit();
+ device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data);
+ cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
+ cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {});
cmdbuf.Dispatch(Common::DivCeil(num_vertices, DISPATCH_SIZE), 1, 1);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, WRITE_BARRIER);
@@ -278,12 +281,12 @@ std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer
}
QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_,
- VKDescriptorPool& descriptor_pool_,
+ DescriptorPool& descriptor_pool_,
StagingBufferPool& staging_buffer_pool_,
VKUpdateDescriptorQueue& update_descriptor_queue_)
- : VKComputePass(device_, descriptor_pool_, BuildInputOutputDescriptorSetBindings(),
- BuildInputOutputDescriptorUpdateTemplate(),
- BuildComputePushConstantRange(sizeof(u32) * 2), VULKAN_QUAD_INDEXED_COMP_SPV),
+ : ComputePass(device_, descriptor_pool_, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS,
+ INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO,
+ COMPUTE_PUSH_CONSTANT_RANGE<sizeof(u32) * 2>, VULKAN_QUAD_INDEXED_COMP_SPV),
scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
update_descriptor_queue{update_descriptor_queue_} {}
@@ -313,11 +316,11 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
update_descriptor_queue.Acquire();
update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size);
update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size);
- const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
+ const void* const descriptor_data{update_descriptor_queue.UpdateData()};
scheduler.RequestOutsideRenderPassOperationContext();
- scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set,
- num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) {
+ scheduler.Record([this, buffer = staging.buffer, descriptor_data, num_tri_vertices, base_vertex,
+ index_shift](vk::CommandBuffer cmdbuf) {
static constexpr u32 DISPATCH_SIZE = 1024;
static constexpr VkMemoryBarrier WRITE_BARRIER{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
@@ -325,10 +328,12 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
};
- const std::array push_constants = {base_vertex, index_shift};
- cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
- cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {});
- cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants),
+ const std::array push_constants{base_vertex, index_shift};
+ const VkDescriptorSet set = descriptor_allocator.Commit();
+ device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data);
+ cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
+ cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {});
+ cmdbuf.PushConstants(*layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants),
&push_constants);
cmdbuf.Dispatch(Common::DivCeil(num_tri_vertices, DISPATCH_SIZE), 1, 1);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
@@ -338,15 +343,14 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
}
ASTCDecoderPass::ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_,
- VKDescriptorPool& descriptor_pool_,
+ DescriptorPool& descriptor_pool_,
StagingBufferPool& staging_buffer_pool_,
VKUpdateDescriptorQueue& update_descriptor_queue_,
MemoryAllocator& memory_allocator_)
- : VKComputePass(device_, descriptor_pool_, BuildASTCDescriptorSetBindings(),
- BuildASTCPassDescriptorUpdateTemplateEntry(),
- BuildComputePushConstantRange(sizeof(AstcPushConstants)),
- ASTC_DECODER_COMP_SPV),
- device{device_}, scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
+ : ComputePass(device_, descriptor_pool_, ASTC_DESCRIPTOR_SET_BINDINGS,
+ ASTC_PASS_DESCRIPTOR_UPDATE_TEMPLATE_ENTRY, ASTC_BANK_INFO,
+ COMPUTE_PUSH_CONSTANT_RANGE<sizeof(AstcPushConstants)>, ASTC_DECODER_COMP_SPV),
+ scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
update_descriptor_queue{update_descriptor_queue_}, memory_allocator{memory_allocator_} {}
ASTCDecoderPass::~ASTCDecoderPass() = default;
@@ -374,20 +378,20 @@ void ASTCDecoderPass::MakeDataBuffer() {
scheduler.Record([src = staging_ref.buffer, offset = staging_ref.offset, dst = *data_buffer,
TOTAL_BUFFER_SIZE](vk::CommandBuffer cmdbuf) {
- cmdbuf.CopyBuffer(src, dst,
- VkBufferCopy{
- .srcOffset = offset,
- .dstOffset = 0,
- .size = TOTAL_BUFFER_SIZE,
- });
- cmdbuf.PipelineBarrier(
- VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0,
- VkMemoryBarrier{
- .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
- .pNext = nullptr,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
- });
+ static constexpr VkMemoryBarrier write_barrier{
+ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+ .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
+ };
+ const VkBufferCopy copy{
+ .srcOffset = offset,
+ .dstOffset = 0,
+ .size = TOTAL_BUFFER_SIZE,
+ };
+ cmdbuf.CopyBuffer(src, dst, copy);
+ cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
+ 0, write_barrier);
});
}
@@ -411,7 +415,7 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
const VkImageMemoryBarrier image_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
- .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
+ .srcAccessMask = is_initialized ? VK_ACCESS_SHADER_WRITE_BIT : VkAccessFlags{},
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
.oldLayout = is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED,
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
@@ -426,7 +430,8 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
- cmdbuf.PipelineBarrier(is_initialized ? VK_PIPELINE_STAGE_ALL_COMMANDS_BIT : 0,
+ cmdbuf.PipelineBarrier(is_initialized ? VK_PIPELINE_STAGE_ALL_COMMANDS_BIT
+ : VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, image_barrier);
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, vk_pipeline);
});
@@ -443,16 +448,14 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
update_descriptor_queue.AddBuffer(*data_buffer, sizeof(ASTC_ENCODINGS_VALUES),
sizeof(SWIZZLE_TABLE));
update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level));
-
- const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
- const VkPipelineLayout vk_layout = *layout;
+ const void* const descriptor_data{update_descriptor_queue.UpdateData()};
// To unswizzle the ASTC data
const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info);
ASSERT(params.origin == (std::array<u32, 3>{0, 0, 0}));
ASSERT(params.destination == (std::array<s32, 3>{0, 0, 0}));
- scheduler.Record([vk_layout, num_dispatches_x, num_dispatches_y, num_dispatches_z,
- block_dims, params, set](vk::CommandBuffer cmdbuf) {
+ scheduler.Record([this, num_dispatches_x, num_dispatches_y, num_dispatches_z, block_dims,
+ params, descriptor_data](vk::CommandBuffer cmdbuf) {
const AstcPushConstants uniforms{
.blocks_dims = block_dims,
.bytes_per_block_log2 = params.bytes_per_block_log2,
@@ -462,8 +465,10 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
.block_height = params.block_height,
.block_height_mask = params.block_height_mask,
};
- cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, vk_layout, 0, set, {});
- cmdbuf.PushConstants(vk_layout, VK_SHADER_STAGE_COMPUTE_BIT, uniforms);
+ const VkDescriptorSet set = descriptor_allocator.Commit();
+ device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data);
+ cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {});
+ cmdbuf.PushConstants(*layout, VK_SHADER_STAGE_COMPUTE_BIT, uniforms);
cmdbuf.Dispatch(num_dispatches_x, num_dispatches_y, num_dispatches_z);
});
}
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h
index 5ea187c30..114aef2bd 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.h
@@ -4,7 +4,6 @@
#pragma once
-#include <optional>
#include <span>
#include <utility>
@@ -27,31 +26,31 @@ class VKUpdateDescriptorQueue;
class Image;
struct StagingBufferRef;
-class VKComputePass {
+class ComputePass {
public:
- explicit VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool,
- vk::Span<VkDescriptorSetLayoutBinding> bindings,
- vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates,
- vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code);
- ~VKComputePass();
+ explicit ComputePass(const Device& device, DescriptorPool& descriptor_pool,
+ vk::Span<VkDescriptorSetLayoutBinding> bindings,
+ vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates,
+ const DescriptorBankInfo& bank_info,
+ vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code);
+ ~ComputePass();
protected:
- VkDescriptorSet CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue);
-
+ const Device& device;
vk::DescriptorUpdateTemplateKHR descriptor_template;
vk::PipelineLayout layout;
vk::Pipeline pipeline;
+ vk::DescriptorSetLayout descriptor_set_layout;
+ DescriptorAllocator descriptor_allocator;
private:
- vk::DescriptorSetLayout descriptor_set_layout;
- std::optional<DescriptorAllocator> descriptor_allocator;
vk::ShaderModule module;
};
-class Uint8Pass final : public VKComputePass {
+class Uint8Pass final : public ComputePass {
public:
explicit Uint8Pass(const Device& device_, VKScheduler& scheduler_,
- VKDescriptorPool& descriptor_pool_, StagingBufferPool& staging_buffer_pool_,
+ DescriptorPool& descriptor_pool_, StagingBufferPool& staging_buffer_pool_,
VKUpdateDescriptorQueue& update_descriptor_queue_);
~Uint8Pass();
@@ -66,10 +65,10 @@ private:
VKUpdateDescriptorQueue& update_descriptor_queue;
};
-class QuadIndexedPass final : public VKComputePass {
+class QuadIndexedPass final : public ComputePass {
public:
explicit QuadIndexedPass(const Device& device_, VKScheduler& scheduler_,
- VKDescriptorPool& descriptor_pool_,
+ DescriptorPool& descriptor_pool_,
StagingBufferPool& staging_buffer_pool_,
VKUpdateDescriptorQueue& update_descriptor_queue_);
~QuadIndexedPass();
@@ -84,10 +83,10 @@ private:
VKUpdateDescriptorQueue& update_descriptor_queue;
};
-class ASTCDecoderPass final : public VKComputePass {
+class ASTCDecoderPass final : public ComputePass {
public:
explicit ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_,
- VKDescriptorPool& descriptor_pool_,
+ DescriptorPool& descriptor_pool_,
StagingBufferPool& staging_buffer_pool_,
VKUpdateDescriptorQueue& update_descriptor_queue_,
MemoryAllocator& memory_allocator_);
@@ -99,7 +98,6 @@ public:
private:
void MakeDataBuffer();
- const Device& device;
VKScheduler& scheduler;
StagingBufferPool& staging_buffer_pool;
VKUpdateDescriptorQueue& update_descriptor_queue;
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
index 3a48219b7..70b84c7a6 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
@@ -2,152 +2,198 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <algorithm>
#include <vector>
+#include <boost/container/small_vector.hpp>
+
+#include "video_core/renderer_vulkan/pipeline_helper.h"
+#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
-#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
+#include "video_core/shader_notify.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
-VKComputePipeline::VKComputePipeline(const Device& device_, VKScheduler& scheduler_,
- VKDescriptorPool& descriptor_pool_,
- VKUpdateDescriptorQueue& update_descriptor_queue_,
- const SPIRVShader& shader_)
- : device{device_}, scheduler{scheduler_}, entries{shader_.entries},
- descriptor_set_layout{CreateDescriptorSetLayout()},
- descriptor_allocator{descriptor_pool_, *descriptor_set_layout},
- update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()},
- descriptor_template{CreateDescriptorUpdateTemplate()},
- shader_module{CreateShaderModule(shader_.code)}, pipeline{CreatePipeline()} {}
-
-VKComputePipeline::~VKComputePipeline() = default;
-
-VkDescriptorSet VKComputePipeline::CommitDescriptorSet() {
- if (!descriptor_template) {
- return {};
- }
- const VkDescriptorSet set = descriptor_allocator.Commit();
- update_descriptor_queue.Send(*descriptor_template, set);
- return set;
-}
-
-vk::DescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const {
- std::vector<VkDescriptorSetLayoutBinding> bindings;
- u32 binding = 0;
- const auto add_bindings = [&](VkDescriptorType descriptor_type, std::size_t num_entries) {
- // TODO(Rodrigo): Maybe make individual bindings here?
- for (u32 bindpoint = 0; bindpoint < static_cast<u32>(num_entries); ++bindpoint) {
- bindings.push_back({
- .binding = binding++,
- .descriptorType = descriptor_type,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = nullptr,
- });
- }
- };
- add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, entries.const_buffers.size());
- add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, entries.global_buffers.size());
- add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, entries.uniform_texels.size());
- add_bindings(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, entries.samplers.size());
- add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, entries.storage_texels.size());
- add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, entries.images.size());
-
- return device.GetLogical().CreateDescriptorSetLayout({
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
- .pNext = nullptr,
- .flags = 0,
- .bindingCount = static_cast<u32>(bindings.size()),
- .pBindings = bindings.data(),
- });
-}
-
-vk::PipelineLayout VKComputePipeline::CreatePipelineLayout() const {
- return device.GetLogical().CreatePipelineLayout({
- .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
- .pNext = nullptr,
- .flags = 0,
- .setLayoutCount = 1,
- .pSetLayouts = descriptor_set_layout.address(),
- .pushConstantRangeCount = 0,
- .pPushConstantRanges = nullptr,
- });
-}
-
-vk::DescriptorUpdateTemplateKHR VKComputePipeline::CreateDescriptorUpdateTemplate() const {
- std::vector<VkDescriptorUpdateTemplateEntryKHR> template_entries;
- u32 binding = 0;
- u32 offset = 0;
- FillDescriptorUpdateTemplateEntries(entries, binding, offset, template_entries);
- if (template_entries.empty()) {
- // If the shader doesn't use descriptor sets, skip template creation.
- return {};
+using Shader::ImageBufferDescriptor;
+using Tegra::Texture::TexturePair;
+
+ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descriptor_pool,
+ VKUpdateDescriptorQueue& update_descriptor_queue_,
+ Common::ThreadWorker* thread_worker,
+ VideoCore::ShaderNotify* shader_notify, const Shader::Info& info_,
+ vk::ShaderModule spv_module_)
+ : device{device_}, update_descriptor_queue{update_descriptor_queue_}, info{info_},
+ spv_module(std::move(spv_module_)) {
+ if (shader_notify) {
+ shader_notify->MarkShaderBuilding();
}
-
- return device.GetLogical().CreateDescriptorUpdateTemplateKHR({
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR,
- .pNext = nullptr,
- .flags = 0,
- .descriptorUpdateEntryCount = static_cast<u32>(template_entries.size()),
- .pDescriptorUpdateEntries = template_entries.data(),
- .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR,
- .descriptorSetLayout = *descriptor_set_layout,
- .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
- .pipelineLayout = *layout,
- .set = DESCRIPTOR_SET,
- });
-}
-
-vk::ShaderModule VKComputePipeline::CreateShaderModule(const std::vector<u32>& code) const {
- device.SaveShader(code);
-
- return device.GetLogical().CreateShaderModule({
- .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
- .pNext = nullptr,
- .flags = 0,
- .codeSize = code.size() * sizeof(u32),
- .pCode = code.data(),
- });
-}
-
-vk::Pipeline VKComputePipeline::CreatePipeline() const {
-
- VkComputePipelineCreateInfo ci{
- .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
- .pNext = nullptr,
- .flags = 0,
- .stage =
- {
+ std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(),
+ uniform_buffer_sizes.begin());
+
+ auto func{[this, &descriptor_pool, shader_notify] {
+ DescriptorLayoutBuilder builder{device};
+ builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT);
+
+ descriptor_set_layout = builder.CreateDescriptorSetLayout(false);
+ pipeline_layout = builder.CreatePipelineLayout(*descriptor_set_layout);
+ descriptor_update_template =
+ builder.CreateTemplate(*descriptor_set_layout, *pipeline_layout, false);
+ descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, info);
+ const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
+ .pNext = nullptr,
+ .requiredSubgroupSize = GuestWarpSize,
+ };
+ pipeline = device.GetLogical().CreateComputePipeline({
+ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .stage{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .pNext = nullptr,
+ .pNext = device.IsExtSubgroupSizeControlSupported() ? &subgroup_size_ci : nullptr,
.flags = 0,
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
- .module = *shader_module,
+ .module = *spv_module,
.pName = "main",
.pSpecializationInfo = nullptr,
},
- .layout = *layout,
- .basePipelineHandle = nullptr,
- .basePipelineIndex = 0,
- };
-
- const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
- .pNext = nullptr,
- .requiredSubgroupSize = GuestWarpSize,
- };
-
- if (entries.uses_warps && device.IsGuestWarpSizeSupported(VK_SHADER_STAGE_COMPUTE_BIT)) {
- ci.stage.pNext = &subgroup_size_ci;
+ .layout = *pipeline_layout,
+ .basePipelineHandle = 0,
+ .basePipelineIndex = 0,
+ });
+ std::lock_guard lock{build_mutex};
+ is_built = true;
+ build_condvar.notify_one();
+ if (shader_notify) {
+ shader_notify->MarkShaderComplete();
+ }
+ }};
+ if (thread_worker) {
+ thread_worker->QueueWork(std::move(func));
+ } else {
+ func();
+ }
+}
+
+void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
+ Tegra::MemoryManager& gpu_memory, VKScheduler& scheduler,
+ BufferCache& buffer_cache, TextureCache& texture_cache) {
+ update_descriptor_queue.Acquire();
+
+ buffer_cache.SetComputeUniformBufferState(info.constant_buffer_mask, &uniform_buffer_sizes);
+ buffer_cache.UnbindComputeStorageBuffers();
+ size_t ssbo_index{};
+ for (const auto& desc : info.storage_buffers_descriptors) {
+ ASSERT(desc.count == 1);
+ buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset,
+ desc.is_written);
+ ++ssbo_index;
}
- return device.GetLogical().CreateComputePipeline(ci);
+ texture_cache.SynchronizeComputeDescriptors();
+
+ static constexpr size_t max_elements = 64;
+ std::array<ImageId, max_elements> image_view_ids;
+ boost::container::static_vector<u32, max_elements> image_view_indices;
+ boost::container::static_vector<VkSampler, max_elements> samplers;
+
+ const auto& qmd{kepler_compute.launch_description};
+ const auto& cbufs{qmd.const_buffer_config};
+ const bool via_header_index{qmd.linked_tsc != 0};
+ const auto read_handle{[&](const auto& desc, u32 index) {
+ ASSERT(((qmd.const_buffer_enable_mask >> desc.cbuf_index) & 1) != 0);
+ const u32 index_offset{index << desc.size_shift};
+ const u32 offset{desc.cbuf_offset + index_offset};
+ const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + offset};
+ if constexpr (std::is_same_v<decltype(desc), const Shader::TextureDescriptor&> ||
+ std::is_same_v<decltype(desc), const Shader::TextureBufferDescriptor&>) {
+ if (desc.has_secondary) {
+ ASSERT(((qmd.const_buffer_enable_mask >> desc.secondary_cbuf_index) & 1) != 0);
+ const u32 secondary_offset{desc.secondary_cbuf_offset + index_offset};
+ const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].Address() +
+ secondary_offset};
+ const u32 lhs_raw{gpu_memory.Read<u32>(addr)};
+ const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)};
+ return TexturePair(lhs_raw | rhs_raw, via_header_index);
+ }
+ }
+ return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
+ }};
+ const auto add_image{[&](const auto& desc) {
+ for (u32 index = 0; index < desc.count; ++index) {
+ const auto handle{read_handle(desc, index)};
+ image_view_indices.push_back(handle.first);
+ }
+ }};
+ std::ranges::for_each(info.texture_buffer_descriptors, add_image);
+ std::ranges::for_each(info.image_buffer_descriptors, add_image);
+ for (const auto& desc : info.texture_descriptors) {
+ for (u32 index = 0; index < desc.count; ++index) {
+ const auto handle{read_handle(desc, index)};
+ image_view_indices.push_back(handle.first);
+
+ Sampler* const sampler = texture_cache.GetComputeSampler(handle.second);
+ samplers.push_back(sampler->Handle());
+ }
+ }
+ std::ranges::for_each(info.image_descriptors, add_image);
+
+ const std::span indices_span(image_view_indices.data(), image_view_indices.size());
+ texture_cache.FillComputeImageViews(indices_span, image_view_ids);
+
+ buffer_cache.UnbindComputeTextureBuffers();
+ ImageId* texture_buffer_ids{image_view_ids.data()};
+ size_t index{};
+ const auto add_buffer{[&](const auto& desc) {
+ constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>;
+ for (u32 i = 0; i < desc.count; ++i) {
+ bool is_written{false};
+ if constexpr (is_image) {
+ is_written = desc.is_written;
+ }
+ ImageView& image_view = texture_cache.GetImageView(*texture_buffer_ids);
+ buffer_cache.BindComputeTextureBuffer(index, image_view.GpuAddr(),
+ image_view.BufferSize(), image_view.format,
+ is_written, is_image);
+ ++texture_buffer_ids;
+ ++index;
+ }
+ }};
+ std::ranges::for_each(info.texture_buffer_descriptors, add_buffer);
+ std::ranges::for_each(info.image_buffer_descriptors, add_buffer);
+
+ buffer_cache.UpdateComputeBuffers();
+ buffer_cache.BindHostComputeBuffers();
+
+ const VkSampler* samplers_it{samplers.data()};
+ const ImageId* views_it{image_view_ids.data()};
+ PushImageDescriptors(info, samplers_it, views_it, texture_cache, update_descriptor_queue);
+
+ if (!is_built.load(std::memory_order::relaxed)) {
+ // Wait for the pipeline to be built
+ scheduler.Record([this](vk::CommandBuffer) {
+ std::unique_lock lock{build_mutex};
+ build_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); });
+ });
+ }
+ const void* const descriptor_data{update_descriptor_queue.UpdateData()};
+ scheduler.Record([this, descriptor_data](vk::CommandBuffer cmdbuf) {
+ cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
+ if (!descriptor_set_layout) {
+ return;
+ }
+ const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()};
+ const vk::Device& dev{device.GetLogical()};
+ dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, descriptor_data);
+ cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0,
+ descriptor_set, nullptr);
+ });
}
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
index 7e16575ac..52fec04d3 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
@@ -4,61 +4,63 @@
#pragma once
+#include <atomic>
+#include <condition_variable>
+#include <mutex>
+
#include "common/common_types.h"
+#include "common/thread_worker.h"
+#include "shader_recompiler/shader_info.h"
+#include "video_core/memory_manager.h"
+#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
-#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
+#include "video_core/renderer_vulkan/vk_texture_cache.h"
+#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
+namespace VideoCore {
+class ShaderNotify;
+}
+
namespace Vulkan {
class Device;
class VKScheduler;
-class VKUpdateDescriptorQueue;
-class VKComputePipeline final {
+class ComputePipeline {
public:
- explicit VKComputePipeline(const Device& device_, VKScheduler& scheduler_,
- VKDescriptorPool& descriptor_pool_,
- VKUpdateDescriptorQueue& update_descriptor_queue_,
- const SPIRVShader& shader_);
- ~VKComputePipeline();
-
- VkDescriptorSet CommitDescriptorSet();
+ explicit ComputePipeline(const Device& device, DescriptorPool& descriptor_pool,
+ VKUpdateDescriptorQueue& update_descriptor_queue,
+ Common::ThreadWorker* thread_worker,
+ VideoCore::ShaderNotify* shader_notify, const Shader::Info& info,
+ vk::ShaderModule spv_module);
- VkPipeline GetHandle() const {
- return *pipeline;
- }
+ ComputePipeline& operator=(ComputePipeline&&) noexcept = delete;
+ ComputePipeline(ComputePipeline&&) noexcept = delete;
- VkPipelineLayout GetLayout() const {
- return *layout;
- }
+ ComputePipeline& operator=(const ComputePipeline&) = delete;
+ ComputePipeline(const ComputePipeline&) = delete;
- const ShaderEntries& GetEntries() const {
- return entries;
- }
+ void Configure(Tegra::Engines::KeplerCompute& kepler_compute, Tegra::MemoryManager& gpu_memory,
+ VKScheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache);
private:
- vk::DescriptorSetLayout CreateDescriptorSetLayout() const;
-
- vk::PipelineLayout CreatePipelineLayout() const;
-
- vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate() const;
-
- vk::ShaderModule CreateShaderModule(const std::vector<u32>& code) const;
-
- vk::Pipeline CreatePipeline() const;
-
const Device& device;
- VKScheduler& scheduler;
- ShaderEntries entries;
+ VKUpdateDescriptorQueue& update_descriptor_queue;
+ Shader::Info info;
+ VideoCommon::ComputeUniformBufferSizes uniform_buffer_sizes{};
+
+ vk::ShaderModule spv_module;
vk::DescriptorSetLayout descriptor_set_layout;
DescriptorAllocator descriptor_allocator;
- VKUpdateDescriptorQueue& update_descriptor_queue;
- vk::PipelineLayout layout;
- vk::DescriptorUpdateTemplateKHR descriptor_template;
- vk::ShaderModule shader_module;
+ vk::PipelineLayout pipeline_layout;
+ vk::DescriptorUpdateTemplateKHR descriptor_update_template;
vk::Pipeline pipeline;
+
+ std::condition_variable build_condvar;
+ std::mutex build_mutex;
+ std::atomic_bool is_built{false};
};
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
index ef9fb5910..8e77e4796 100644
--- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
@@ -2,6 +2,8 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <mutex>
+#include <span>
#include <vector>
#include "common/common_types.h"
@@ -13,79 +15,149 @@
namespace Vulkan {
-// Prefer small grow rates to avoid saturating the descriptor pool with barely used pipelines.
-constexpr std::size_t SETS_GROW_RATE = 0x20;
+// Prefer small grow rates to avoid saturating the descriptor pool with barely used pipelines
+constexpr size_t SETS_GROW_RATE = 16;
+constexpr s32 SCORE_THRESHOLD = 3;
+constexpr u32 SETS_PER_POOL = 64;
-DescriptorAllocator::DescriptorAllocator(VKDescriptorPool& descriptor_pool_,
- VkDescriptorSetLayout layout_)
- : ResourcePool(descriptor_pool_.master_semaphore, SETS_GROW_RATE),
- descriptor_pool{descriptor_pool_}, layout{layout_} {}
+struct DescriptorBank {
+ DescriptorBankInfo info;
+ std::vector<vk::DescriptorPool> pools;
+};
-DescriptorAllocator::~DescriptorAllocator() = default;
+bool DescriptorBankInfo::IsSuperset(const DescriptorBankInfo& subset) const noexcept {
+ return uniform_buffers >= subset.uniform_buffers && storage_buffers >= subset.storage_buffers &&
+ texture_buffers >= subset.texture_buffers && image_buffers >= subset.image_buffers &&
+ textures >= subset.textures && images >= subset.image_buffers;
+}
-VkDescriptorSet DescriptorAllocator::Commit() {
- const std::size_t index = CommitResource();
- return descriptors_allocations[index / SETS_GROW_RATE][index % SETS_GROW_RATE];
+template <typename Descriptors>
+static u32 Accumulate(const Descriptors& descriptors) {
+ u32 count = 0;
+ for (const auto& descriptor : descriptors) {
+ count += descriptor.count;
+ }
+ return count;
}
-void DescriptorAllocator::Allocate(std::size_t begin, std::size_t end) {
- descriptors_allocations.push_back(descriptor_pool.AllocateDescriptors(layout, end - begin));
+static DescriptorBankInfo MakeBankInfo(std::span<const Shader::Info> infos) {
+ DescriptorBankInfo bank;
+ for (const Shader::Info& info : infos) {
+ bank.uniform_buffers += Accumulate(info.constant_buffer_descriptors);
+ bank.storage_buffers += Accumulate(info.storage_buffers_descriptors);
+ bank.texture_buffers += Accumulate(info.texture_buffer_descriptors);
+ bank.image_buffers += Accumulate(info.image_buffer_descriptors);
+ bank.textures += Accumulate(info.texture_descriptors);
+ bank.images += Accumulate(info.image_descriptors);
+ }
+ bank.score = bank.uniform_buffers + bank.storage_buffers + bank.texture_buffers +
+ bank.image_buffers + bank.textures + bank.images;
+ return bank;
}
-VKDescriptorPool::VKDescriptorPool(const Device& device_, VKScheduler& scheduler)
- : device{device_}, master_semaphore{scheduler.GetMasterSemaphore()}, active_pool{
- AllocateNewPool()} {}
-
-VKDescriptorPool::~VKDescriptorPool() = default;
-
-vk::DescriptorPool* VKDescriptorPool::AllocateNewPool() {
- static constexpr u32 num_sets = 0x20000;
- static constexpr VkDescriptorPoolSize pool_sizes[] = {
- {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, num_sets * 90},
- {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, num_sets * 60},
- {VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, num_sets * 64},
- {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, num_sets * 64},
- {VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, num_sets * 64},
- {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, num_sets * 40},
+static void AllocatePool(const Device& device, DescriptorBank& bank) {
+ std::array<VkDescriptorPoolSize, 6> pool_sizes;
+ size_t pool_cursor{};
+ const auto add = [&](VkDescriptorType type, u32 count) {
+ if (count > 0) {
+ pool_sizes[pool_cursor++] = {
+ .type = type,
+ .descriptorCount = count * SETS_PER_POOL,
+ };
+ }
};
-
- const VkDescriptorPoolCreateInfo ci{
+ const auto& info{bank.info};
+ add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, info.uniform_buffers);
+ add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, info.storage_buffers);
+ add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, info.texture_buffers);
+ add(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, info.image_buffers);
+ add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, info.textures);
+ add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, info.images);
+ bank.pools.push_back(device.GetLogical().CreateDescriptorPool({
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
.pNext = nullptr,
.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
- .maxSets = num_sets,
- .poolSizeCount = static_cast<u32>(std::size(pool_sizes)),
+ .maxSets = SETS_PER_POOL,
+ .poolSizeCount = static_cast<u32>(pool_cursor),
.pPoolSizes = std::data(pool_sizes),
- };
- return &pools.emplace_back(device.GetLogical().CreateDescriptorPool(ci));
+ }));
+}
+
+DescriptorAllocator::DescriptorAllocator(const Device& device_, MasterSemaphore& master_semaphore_,
+ DescriptorBank& bank_, VkDescriptorSetLayout layout_)
+ : ResourcePool(master_semaphore_, SETS_GROW_RATE), device{&device_}, bank{&bank_},
+ layout{layout_} {}
+
+VkDescriptorSet DescriptorAllocator::Commit() {
+ const size_t index = CommitResource();
+ return sets[index / SETS_GROW_RATE][index % SETS_GROW_RATE];
}
-vk::DescriptorSets VKDescriptorPool::AllocateDescriptors(VkDescriptorSetLayout layout,
- std::size_t count) {
- const std::vector layout_copies(count, layout);
- VkDescriptorSetAllocateInfo ai{
+void DescriptorAllocator::Allocate(size_t begin, size_t end) {
+ sets.push_back(AllocateDescriptors(end - begin));
+}
+
+vk::DescriptorSets DescriptorAllocator::AllocateDescriptors(size_t count) {
+ const std::vector<VkDescriptorSetLayout> layouts(count, layout);
+ VkDescriptorSetAllocateInfo allocate_info{
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
.pNext = nullptr,
- .descriptorPool = **active_pool,
+ .descriptorPool = *bank->pools.back(),
.descriptorSetCount = static_cast<u32>(count),
- .pSetLayouts = layout_copies.data(),
+ .pSetLayouts = layouts.data(),
};
-
- vk::DescriptorSets sets = active_pool->Allocate(ai);
- if (!sets.IsOutOfPoolMemory()) {
- return sets;
+ vk::DescriptorSets new_sets = bank->pools.back().Allocate(allocate_info);
+ if (!new_sets.IsOutOfPoolMemory()) {
+ return new_sets;
}
-
// Our current pool is out of memory. Allocate a new one and retry
- active_pool = AllocateNewPool();
- ai.descriptorPool = **active_pool;
- sets = active_pool->Allocate(ai);
- if (!sets.IsOutOfPoolMemory()) {
- return sets;
+ AllocatePool(*device, *bank);
+ allocate_info.descriptorPool = *bank->pools.back();
+ new_sets = bank->pools.back().Allocate(allocate_info);
+ if (!new_sets.IsOutOfPoolMemory()) {
+ return new_sets;
}
-
// After allocating a new pool, we are out of memory again. We can't handle this from here.
throw vk::Exception(VK_ERROR_OUT_OF_POOL_MEMORY);
}
+DescriptorPool::DescriptorPool(const Device& device_, VKScheduler& scheduler)
+ : device{device_}, master_semaphore{scheduler.GetMasterSemaphore()} {}
+
+DescriptorPool::~DescriptorPool() = default;
+
+DescriptorAllocator DescriptorPool::Allocator(VkDescriptorSetLayout layout,
+ std::span<const Shader::Info> infos) {
+ return Allocator(layout, MakeBankInfo(infos));
+}
+
+DescriptorAllocator DescriptorPool::Allocator(VkDescriptorSetLayout layout,
+ const Shader::Info& info) {
+ return Allocator(layout, MakeBankInfo(std::array{info}));
+}
+
+DescriptorAllocator DescriptorPool::Allocator(VkDescriptorSetLayout layout,
+ const DescriptorBankInfo& info) {
+ return DescriptorAllocator(device, master_semaphore, Bank(info), layout);
+}
+
+DescriptorBank& DescriptorPool::Bank(const DescriptorBankInfo& reqs) {
+ std::shared_lock read_lock{banks_mutex};
+ const auto it = std::ranges::find_if(bank_infos, [&reqs](const DescriptorBankInfo& bank) {
+ return std::abs(bank.score - reqs.score) < SCORE_THRESHOLD && bank.IsSuperset(reqs);
+ });
+ if (it != bank_infos.end()) {
+ return *banks[std::distance(bank_infos.begin(), it)].get();
+ }
+ read_lock.unlock();
+
+ std::unique_lock write_lock{banks_mutex};
+ bank_infos.push_back(reqs);
+
+ auto& bank = *banks.emplace_back(std::make_unique<DescriptorBank>());
+ bank.info = reqs;
+ AllocatePool(device, bank);
+ return bank;
+}
+
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.h b/src/video_core/renderer_vulkan/vk_descriptor_pool.h
index f892be7be..59466aac5 100644
--- a/src/video_core/renderer_vulkan/vk_descriptor_pool.h
+++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.h
@@ -4,57 +4,85 @@
#pragma once
+#include <shared_mutex>
+#include <span>
#include <vector>
+#include "shader_recompiler/shader_info.h"
#include "video_core/renderer_vulkan/vk_resource_pool.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
class Device;
-class VKDescriptorPool;
class VKScheduler;
+struct DescriptorBank;
+
+struct DescriptorBankInfo {
+ [[nodiscard]] bool IsSuperset(const DescriptorBankInfo& subset) const noexcept;
+
+ u32 uniform_buffers{}; ///< Number of uniform buffer descriptors
+ u32 storage_buffers{}; ///< Number of storage buffer descriptors
+ u32 texture_buffers{}; ///< Number of texture buffer descriptors
+ u32 image_buffers{}; ///< Number of image buffer descriptors
+ u32 textures{}; ///< Number of texture descriptors
+ u32 images{}; ///< Number of image descriptors
+ s32 score{}; ///< Number of descriptors in total
+};
+
class DescriptorAllocator final : public ResourcePool {
+ friend class DescriptorPool;
+
public:
- explicit DescriptorAllocator(VKDescriptorPool& descriptor_pool, VkDescriptorSetLayout layout);
- ~DescriptorAllocator() override;
+ explicit DescriptorAllocator() = default;
+ ~DescriptorAllocator() override = default;
+
+ DescriptorAllocator& operator=(DescriptorAllocator&&) noexcept = default;
+ DescriptorAllocator(DescriptorAllocator&&) noexcept = default;
DescriptorAllocator& operator=(const DescriptorAllocator&) = delete;
DescriptorAllocator(const DescriptorAllocator&) = delete;
VkDescriptorSet Commit();
-protected:
- void Allocate(std::size_t begin, std::size_t end) override;
-
private:
- VKDescriptorPool& descriptor_pool;
- const VkDescriptorSetLayout layout;
+ explicit DescriptorAllocator(const Device& device_, MasterSemaphore& master_semaphore_,
+ DescriptorBank& bank_, VkDescriptorSetLayout layout_);
- std::vector<vk::DescriptorSets> descriptors_allocations;
-};
+ void Allocate(size_t begin, size_t end) override;
+
+ vk::DescriptorSets AllocateDescriptors(size_t count);
+
+ const Device* device{};
+ DescriptorBank* bank{};
+ VkDescriptorSetLayout layout{};
-class VKDescriptorPool final {
- friend DescriptorAllocator;
+ std::vector<vk::DescriptorSets> sets;
+};
+class DescriptorPool {
public:
- explicit VKDescriptorPool(const Device& device, VKScheduler& scheduler);
- ~VKDescriptorPool();
+ explicit DescriptorPool(const Device& device, VKScheduler& scheduler);
+ ~DescriptorPool();
- VKDescriptorPool(const VKDescriptorPool&) = delete;
- VKDescriptorPool& operator=(const VKDescriptorPool&) = delete;
+ DescriptorPool& operator=(const DescriptorPool&) = delete;
+ DescriptorPool(const DescriptorPool&) = delete;
-private:
- vk::DescriptorPool* AllocateNewPool();
+ DescriptorAllocator Allocator(VkDescriptorSetLayout layout,
+ std::span<const Shader::Info> infos);
+ DescriptorAllocator Allocator(VkDescriptorSetLayout layout, const Shader::Info& info);
+ DescriptorAllocator Allocator(VkDescriptorSetLayout layout, const DescriptorBankInfo& info);
- vk::DescriptorSets AllocateDescriptors(VkDescriptorSetLayout layout, std::size_t count);
+private:
+ DescriptorBank& Bank(const DescriptorBankInfo& reqs);
const Device& device;
MasterSemaphore& master_semaphore;
- std::vector<vk::DescriptorPool> pools;
- vk::DescriptorPool* active_pool;
+ std::shared_mutex banks_mutex;
+ std::vector<DescriptorBankInfo> bank_infos;
+ std::vector<std::unique_ptr<DescriptorBank>> banks;
};
} // namespace Vulkan \ No newline at end of file
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index fc6dd83eb..18482e1d0 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -1,29 +1,58 @@
-// Copyright 2019 yuzu Emulator Project
+// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
-#include <array>
-#include <cstring>
-#include <vector>
+#include <span>
-#include "common/common_types.h"
-#include "common/microprofile.h"
-#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
+#include <boost/container/small_vector.hpp>
+#include <boost/container/static_vector.hpp>
+
+#include "common/bit_field.h"
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
-#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
+#include "video_core/renderer_vulkan/pipeline_helper.h"
+#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
-#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
+#include "video_core/renderer_vulkan/vk_render_pass_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
+#include "video_core/shader_notify.h"
#include "video_core/vulkan_common/vulkan_device.h"
-#include "video_core/vulkan_common/vulkan_wrapper.h"
-
-namespace Vulkan {
-MICROPROFILE_DECLARE(Vulkan_PipelineCache);
+#if defined(_MSC_VER) && defined(NDEBUG)
+#define LAMBDA_FORCEINLINE [[msvc::forceinline]]
+#else
+#define LAMBDA_FORCEINLINE
+#endif
+namespace Vulkan {
namespace {
+using boost::container::small_vector;
+using boost::container::static_vector;
+using Shader::ImageBufferDescriptor;
+using Tegra::Texture::TexturePair;
+using VideoCore::Surface::PixelFormat;
+using VideoCore::Surface::PixelFormatFromDepthFormat;
+using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
+
+constexpr size_t NUM_STAGES = Maxwell::MaxShaderStage;
+constexpr size_t MAX_IMAGE_ELEMENTS = 64;
+
+DescriptorLayoutBuilder MakeBuilder(const Device& device, std::span<const Shader::Info> infos) {
+ DescriptorLayoutBuilder builder{device};
+ for (size_t index = 0; index < infos.size(); ++index) {
+ static constexpr std::array stages{
+ VK_SHADER_STAGE_VERTEX_BIT,
+ VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
+ VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
+ VK_SHADER_STAGE_GEOMETRY_BIT,
+ VK_SHADER_STAGE_FRAGMENT_BIT,
+ };
+ builder.Add(infos[index], stages.at(index));
+ }
+ return builder;
+}
template <class StencilFace>
VkStencilOpState GetStencilFaceState(const StencilFace& face) {
@@ -39,15 +68,24 @@ VkStencilOpState GetStencilFaceState(const StencilFace& face) {
}
bool SupportsPrimitiveRestart(VkPrimitiveTopology topology) {
- static constexpr std::array unsupported_topologies = {
+ static constexpr std::array unsupported_topologies{
VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
VK_PRIMITIVE_TOPOLOGY_LINE_LIST,
VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY,
VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY,
- VK_PRIMITIVE_TOPOLOGY_PATCH_LIST};
- return std::find(std::begin(unsupported_topologies), std::end(unsupported_topologies),
- topology) == std::end(unsupported_topologies);
+ VK_PRIMITIVE_TOPOLOGY_PATCH_LIST,
+ // VK_PRIMITIVE_TOPOLOGY_QUAD_LIST_EXT,
+ };
+ return std::ranges::find(unsupported_topologies, topology) == unsupported_topologies.end();
+}
+
+bool IsLine(VkPrimitiveTopology topology) {
+ static constexpr std::array line_topologies{
+ VK_PRIMITIVE_TOPOLOGY_LINE_LIST, VK_PRIMITIVE_TOPOLOGY_LINE_STRIP,
+ // VK_PRIMITIVE_TOPOLOGY_LINE_LOOP_EXT,
+ };
+ return std::ranges::find(line_topologies, topology) == line_topologies.end();
}
VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) {
@@ -59,8 +97,7 @@ VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) {
BitField<12, 3, Maxwell::ViewportSwizzle> w;
};
const Swizzle unpacked{swizzle};
-
- return {
+ return VkViewportSwizzleNV{
.x = MaxwellToVK::ViewportSwizzle(unpacked.x),
.y = MaxwellToVK::ViewportSwizzle(unpacked.y),
.z = MaxwellToVK::ViewportSwizzle(unpacked.z),
@@ -68,193 +105,446 @@ VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) {
};
}
-VkSampleCountFlagBits ConvertMsaaMode(Tegra::Texture::MsaaMode msaa_mode) {
- switch (msaa_mode) {
- case Tegra::Texture::MsaaMode::Msaa1x1:
- return VK_SAMPLE_COUNT_1_BIT;
- case Tegra::Texture::MsaaMode::Msaa2x1:
- case Tegra::Texture::MsaaMode::Msaa2x1_D3D:
- return VK_SAMPLE_COUNT_2_BIT;
- case Tegra::Texture::MsaaMode::Msaa2x2:
- case Tegra::Texture::MsaaMode::Msaa2x2_VC4:
- case Tegra::Texture::MsaaMode::Msaa2x2_VC12:
- return VK_SAMPLE_COUNT_4_BIT;
- case Tegra::Texture::MsaaMode::Msaa4x2:
- case Tegra::Texture::MsaaMode::Msaa4x2_D3D:
- case Tegra::Texture::MsaaMode::Msaa4x2_VC8:
- case Tegra::Texture::MsaaMode::Msaa4x2_VC24:
- return VK_SAMPLE_COUNT_8_BIT;
- case Tegra::Texture::MsaaMode::Msaa4x4:
- return VK_SAMPLE_COUNT_16_BIT;
- default:
- UNREACHABLE_MSG("Invalid msaa_mode={}", static_cast<int>(msaa_mode));
- return VK_SAMPLE_COUNT_1_BIT;
+PixelFormat DecodeFormat(u8 encoded_format) {
+ const auto format{static_cast<Tegra::RenderTargetFormat>(encoded_format)};
+ if (format == Tegra::RenderTargetFormat::NONE) {
+ return PixelFormat::Invalid;
}
+ return PixelFormatFromRenderTargetFormat(format);
}
-} // Anonymous namespace
+RenderPassKey MakeRenderPassKey(const FixedPipelineState& state) {
+ RenderPassKey key;
+ std::ranges::transform(state.color_formats, key.color_formats.begin(), DecodeFormat);
+ if (state.depth_enabled != 0) {
+ const auto depth_format{static_cast<Tegra::DepthFormat>(state.depth_format.Value())};
+ key.depth_format = PixelFormatFromDepthFormat(depth_format);
+ } else {
+ key.depth_format = PixelFormat::Invalid;
+ }
+ key.samples = MaxwellToVK::MsaaMode(state.msaa_mode);
+ return key;
+}
-VKGraphicsPipeline::VKGraphicsPipeline(const Device& device_, VKScheduler& scheduler_,
- VKDescriptorPool& descriptor_pool_,
- VKUpdateDescriptorQueue& update_descriptor_queue_,
- const GraphicsPipelineCacheKey& key,
- vk::Span<VkDescriptorSetLayoutBinding> bindings,
- const SPIRVProgram& program, u32 num_color_buffers)
- : device{device_}, scheduler{scheduler_}, cache_key{key}, hash{cache_key.Hash()},
- descriptor_set_layout{CreateDescriptorSetLayout(bindings)},
- descriptor_allocator{descriptor_pool_, *descriptor_set_layout},
- update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()},
- descriptor_template{CreateDescriptorUpdateTemplate(program)},
- modules(CreateShaderModules(program)),
- pipeline(CreatePipeline(program, cache_key.renderpass, num_color_buffers)) {}
-
-VKGraphicsPipeline::~VKGraphicsPipeline() = default;
-
-VkDescriptorSet VKGraphicsPipeline::CommitDescriptorSet() {
- if (!descriptor_template) {
- return {};
- }
- const VkDescriptorSet set = descriptor_allocator.Commit();
- update_descriptor_queue.Send(*descriptor_template, set);
- return set;
+size_t NumAttachments(const FixedPipelineState& state) {
+ size_t num{};
+ for (size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
+ const auto format{static_cast<Tegra::RenderTargetFormat>(state.color_formats[index])};
+ if (format != Tegra::RenderTargetFormat::NONE) {
+ num = index + 1;
+ }
+ }
+ return num;
}
-vk::DescriptorSetLayout VKGraphicsPipeline::CreateDescriptorSetLayout(
- vk::Span<VkDescriptorSetLayoutBinding> bindings) const {
- const VkDescriptorSetLayoutCreateInfo ci{
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
- .pNext = nullptr,
- .flags = 0,
- .bindingCount = bindings.size(),
- .pBindings = bindings.data(),
- };
- return device.GetLogical().CreateDescriptorSetLayout(ci);
+template <typename Spec>
+bool Passes(const std::array<vk::ShaderModule, NUM_STAGES>& modules,
+ const std::array<Shader::Info, NUM_STAGES>& stage_infos) {
+ for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
+ if (!Spec::enabled_stages[stage] && modules[stage]) {
+ return false;
+ }
+ const auto& info{stage_infos[stage]};
+ if constexpr (!Spec::has_storage_buffers) {
+ if (!info.storage_buffers_descriptors.empty()) {
+ return false;
+ }
+ }
+ if constexpr (!Spec::has_texture_buffers) {
+ if (!info.texture_buffer_descriptors.empty()) {
+ return false;
+ }
+ }
+ if constexpr (!Spec::has_image_buffers) {
+ if (!info.image_buffer_descriptors.empty()) {
+ return false;
+ }
+ }
+ if constexpr (!Spec::has_images) {
+ if (!info.image_descriptors.empty()) {
+ return false;
+ }
+ }
+ }
+ return true;
}
-vk::PipelineLayout VKGraphicsPipeline::CreatePipelineLayout() const {
- const VkPipelineLayoutCreateInfo ci{
- .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
- .pNext = nullptr,
- .flags = 0,
- .setLayoutCount = 1,
- .pSetLayouts = descriptor_set_layout.address(),
- .pushConstantRangeCount = 0,
- .pPushConstantRanges = nullptr,
- };
- return device.GetLogical().CreatePipelineLayout(ci);
+using ConfigureFuncPtr = void (*)(GraphicsPipeline*, bool);
+
+template <typename Spec, typename... Specs>
+ConfigureFuncPtr FindSpec(const std::array<vk::ShaderModule, NUM_STAGES>& modules,
+ const std::array<Shader::Info, NUM_STAGES>& stage_infos) {
+ if constexpr (sizeof...(Specs) > 0) {
+ if (!Passes<Spec>(modules, stage_infos)) {
+ return FindSpec<Specs...>(modules, stage_infos);
+ }
+ }
+ return GraphicsPipeline::MakeConfigureSpecFunc<Spec>();
}
-vk::DescriptorUpdateTemplateKHR VKGraphicsPipeline::CreateDescriptorUpdateTemplate(
- const SPIRVProgram& program) const {
- std::vector<VkDescriptorUpdateTemplateEntry> template_entries;
- u32 binding = 0;
- u32 offset = 0;
- for (const auto& stage : program) {
- if (stage) {
- FillDescriptorUpdateTemplateEntries(stage->entries, binding, offset, template_entries);
+struct SimpleVertexFragmentSpec {
+ static constexpr std::array<bool, 5> enabled_stages{true, false, false, false, true};
+ static constexpr bool has_storage_buffers = false;
+ static constexpr bool has_texture_buffers = false;
+ static constexpr bool has_image_buffers = false;
+ static constexpr bool has_images = false;
+};
+
+struct SimpleVertexSpec {
+ static constexpr std::array<bool, 5> enabled_stages{true, false, false, false, false};
+ static constexpr bool has_storage_buffers = false;
+ static constexpr bool has_texture_buffers = false;
+ static constexpr bool has_image_buffers = false;
+ static constexpr bool has_images = false;
+};
+
+struct DefaultSpec {
+ static constexpr std::array<bool, 5> enabled_stages{true, true, true, true, true};
+ static constexpr bool has_storage_buffers = true;
+ static constexpr bool has_texture_buffers = true;
+ static constexpr bool has_image_buffers = true;
+ static constexpr bool has_images = true;
+};
+
+ConfigureFuncPtr ConfigureFunc(const std::array<vk::ShaderModule, NUM_STAGES>& modules,
+ const std::array<Shader::Info, NUM_STAGES>& infos) {
+ return FindSpec<SimpleVertexSpec, SimpleVertexFragmentSpec, DefaultSpec>(modules, infos);
+}
+} // Anonymous namespace
+
+GraphicsPipeline::GraphicsPipeline(
+ Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_,
+ VKScheduler& scheduler_, BufferCache& buffer_cache_, TextureCache& texture_cache_,
+ VideoCore::ShaderNotify* shader_notify, const Device& device_, DescriptorPool& descriptor_pool,
+ VKUpdateDescriptorQueue& update_descriptor_queue_, Common::ThreadWorker* worker_thread,
+ RenderPassCache& render_pass_cache, const GraphicsPipelineCacheKey& key_,
+ std::array<vk::ShaderModule, NUM_STAGES> stages,
+ const std::array<const Shader::Info*, NUM_STAGES>& infos)
+ : key{key_}, maxwell3d{maxwell3d_}, gpu_memory{gpu_memory_}, device{device_},
+ texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, scheduler{scheduler_},
+ update_descriptor_queue{update_descriptor_queue_}, spv_modules{std::move(stages)} {
+ if (shader_notify) {
+ shader_notify->MarkShaderBuilding();
+ }
+ for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
+ const Shader::Info* const info{infos[stage]};
+ if (!info) {
+ continue;
}
+ stage_infos[stage] = *info;
+ enabled_uniform_buffer_masks[stage] = info->constant_buffer_mask;
+ std::ranges::copy(info->constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin());
}
- if (template_entries.empty()) {
- // If the shader doesn't use descriptor sets, skip template creation.
- return {};
+ auto func{[this, shader_notify, &render_pass_cache, &descriptor_pool] {
+ DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)};
+ uses_push_descriptor = builder.CanUsePushDescriptor();
+ descriptor_set_layout = builder.CreateDescriptorSetLayout(uses_push_descriptor);
+ if (!uses_push_descriptor) {
+ descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, stage_infos);
+ }
+ const VkDescriptorSetLayout set_layout{*descriptor_set_layout};
+ pipeline_layout = builder.CreatePipelineLayout(set_layout);
+ descriptor_update_template =
+ builder.CreateTemplate(set_layout, *pipeline_layout, uses_push_descriptor);
+
+ const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(key.state))};
+ Validate();
+ MakePipeline(render_pass);
+
+ std::lock_guard lock{build_mutex};
+ is_built = true;
+ build_condvar.notify_one();
+ if (shader_notify) {
+ shader_notify->MarkShaderComplete();
+ }
+ }};
+ if (worker_thread) {
+ worker_thread->QueueWork(std::move(func));
+ } else {
+ func();
}
+ configure_func = ConfigureFunc(spv_modules, stage_infos);
+}
- const VkDescriptorUpdateTemplateCreateInfoKHR ci{
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR,
- .pNext = nullptr,
- .flags = 0,
- .descriptorUpdateEntryCount = static_cast<u32>(template_entries.size()),
- .pDescriptorUpdateEntries = template_entries.data(),
- .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR,
- .descriptorSetLayout = *descriptor_set_layout,
- .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
- .pipelineLayout = *layout,
- .set = DESCRIPTOR_SET,
- };
- return device.GetLogical().CreateDescriptorUpdateTemplateKHR(ci);
+void GraphicsPipeline::AddTransition(GraphicsPipeline* transition) {
+ transition_keys.push_back(transition->key);
+ transitions.push_back(transition);
}
-std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules(
- const SPIRVProgram& program) const {
- VkShaderModuleCreateInfo ci{
- .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
- .pNext = nullptr,
- .flags = 0,
- .codeSize = 0,
- .pCode = nullptr,
- };
+template <typename Spec>
+void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
+ std::array<ImageId, MAX_IMAGE_ELEMENTS> image_view_ids;
+ std::array<u32, MAX_IMAGE_ELEMENTS> image_view_indices;
+ std::array<VkSampler, MAX_IMAGE_ELEMENTS> samplers;
+ size_t sampler_index{};
+ size_t image_index{};
+
+ texture_cache.SynchronizeGraphicsDescriptors();
+
+ buffer_cache.SetUniformBuffersState(enabled_uniform_buffer_masks, &uniform_buffer_sizes);
+
+ const auto& regs{maxwell3d.regs};
+ const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex};
+ const auto config_stage{[&](size_t stage) LAMBDA_FORCEINLINE {
+ const Shader::Info& info{stage_infos[stage]};
+ buffer_cache.UnbindGraphicsStorageBuffers(stage);
+ if constexpr (Spec::has_storage_buffers) {
+ size_t ssbo_index{};
+ for (const auto& desc : info.storage_buffers_descriptors) {
+ ASSERT(desc.count == 1);
+ buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, desc.cbuf_index,
+ desc.cbuf_offset, desc.is_written);
+ ++ssbo_index;
+ }
+ }
+ const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers};
+ const auto read_handle{[&](const auto& desc, u32 index) {
+ ASSERT(cbufs[desc.cbuf_index].enabled);
+ const u32 index_offset{index << desc.size_shift};
+ const u32 offset{desc.cbuf_offset + index_offset};
+ const GPUVAddr addr{cbufs[desc.cbuf_index].address + offset};
+ if constexpr (std::is_same_v<decltype(desc), const Shader::TextureDescriptor&> ||
+ std::is_same_v<decltype(desc), const Shader::TextureBufferDescriptor&>) {
+ if (desc.has_secondary) {
+ ASSERT(cbufs[desc.secondary_cbuf_index].enabled);
+ const u32 second_offset{desc.secondary_cbuf_offset + index_offset};
+ const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].address +
+ second_offset};
+ const u32 lhs_raw{gpu_memory.Read<u32>(addr)};
+ const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)};
+ const u32 raw{lhs_raw | rhs_raw};
+ return TexturePair(raw, via_header_index);
+ }
+ }
+ return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
+ }};
+ const auto add_image{[&](const auto& desc) {
+ for (u32 index = 0; index < desc.count; ++index) {
+ const auto handle{read_handle(desc, index)};
+ image_view_indices[image_index++] = handle.first;
+ }
+ }};
+ if constexpr (Spec::has_texture_buffers) {
+ for (const auto& desc : info.texture_buffer_descriptors) {
+ add_image(desc);
+ }
+ }
+ if constexpr (Spec::has_image_buffers) {
+ for (const auto& desc : info.image_buffer_descriptors) {
+ add_image(desc);
+ }
+ }
+ for (const auto& desc : info.texture_descriptors) {
+ for (u32 index = 0; index < desc.count; ++index) {
+ const auto handle{read_handle(desc, index)};
+ image_view_indices[image_index++] = handle.first;
- std::vector<vk::ShaderModule> shader_modules;
- shader_modules.reserve(Maxwell::MaxShaderStage);
- for (std::size_t i = 0; i < Maxwell::MaxShaderStage; ++i) {
- const auto& stage = program[i];
- if (!stage) {
- continue;
+ Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)};
+ samplers[sampler_index++] = sampler->Handle();
+ }
+ }
+ if constexpr (Spec::has_images) {
+ for (const auto& desc : info.image_descriptors) {
+ add_image(desc);
+ }
}
+ }};
+ if constexpr (Spec::enabled_stages[0]) {
+ config_stage(0);
+ }
+ if constexpr (Spec::enabled_stages[1]) {
+ config_stage(1);
+ }
+ if constexpr (Spec::enabled_stages[2]) {
+ config_stage(2);
+ }
+ if constexpr (Spec::enabled_stages[3]) {
+ config_stage(3);
+ }
+ if constexpr (Spec::enabled_stages[4]) {
+ config_stage(4);
+ }
+ const std::span indices_span(image_view_indices.data(), image_index);
+ texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
+
+ ImageId* texture_buffer_index{image_view_ids.data()};
+ const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE {
+ size_t index{};
+ const auto add_buffer{[&](const auto& desc) {
+ constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>;
+ for (u32 i = 0; i < desc.count; ++i) {
+ bool is_written{false};
+ if constexpr (is_image) {
+ is_written = desc.is_written;
+ }
+ ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)};
+ buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(),
+ image_view.BufferSize(), image_view.format,
+ is_written, is_image);
+ ++index;
+ ++texture_buffer_index;
+ }
+ }};
+ buffer_cache.UnbindGraphicsTextureBuffers(stage);
- device.SaveShader(stage->code);
+ const Shader::Info& info{stage_infos[stage]};
+ if constexpr (Spec::has_texture_buffers) {
+ for (const auto& desc : info.texture_buffer_descriptors) {
+ add_buffer(desc);
+ }
+ }
+ if constexpr (Spec::has_image_buffers) {
+ for (const auto& desc : info.image_buffer_descriptors) {
+ add_buffer(desc);
+ }
+ }
+ for (const auto& desc : info.texture_descriptors) {
+ texture_buffer_index += desc.count;
+ }
+ if constexpr (Spec::has_images) {
+ for (const auto& desc : info.image_descriptors) {
+ texture_buffer_index += desc.count;
+ }
+ }
+ }};
+ if constexpr (Spec::enabled_stages[0]) {
+ bind_stage_info(0);
+ }
+ if constexpr (Spec::enabled_stages[1]) {
+ bind_stage_info(1);
+ }
+ if constexpr (Spec::enabled_stages[2]) {
+ bind_stage_info(2);
+ }
+ if constexpr (Spec::enabled_stages[3]) {
+ bind_stage_info(3);
+ }
+ if constexpr (Spec::enabled_stages[4]) {
+ bind_stage_info(4);
+ }
+
+ buffer_cache.UpdateGraphicsBuffers(is_indexed);
+ buffer_cache.BindHostGeometryBuffers(is_indexed);
- ci.codeSize = stage->code.size() * sizeof(u32);
- ci.pCode = stage->code.data();
- shader_modules.push_back(device.GetLogical().CreateShaderModule(ci));
+ update_descriptor_queue.Acquire();
+
+ const VkSampler* samplers_it{samplers.data()};
+ const ImageId* views_it{image_view_ids.data()};
+ const auto prepare_stage{[&](size_t stage) LAMBDA_FORCEINLINE {
+ buffer_cache.BindHostStageBuffers(stage);
+ PushImageDescriptors(stage_infos[stage], samplers_it, views_it, texture_cache,
+ update_descriptor_queue);
+ }};
+ if constexpr (Spec::enabled_stages[0]) {
+ prepare_stage(0);
+ }
+ if constexpr (Spec::enabled_stages[1]) {
+ prepare_stage(1);
}
- return shader_modules;
+ if constexpr (Spec::enabled_stages[2]) {
+ prepare_stage(2);
+ }
+ if constexpr (Spec::enabled_stages[3]) {
+ prepare_stage(3);
+ }
+ if constexpr (Spec::enabled_stages[4]) {
+ prepare_stage(4);
+ }
+ ConfigureDraw();
}
-vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program,
- VkRenderPass renderpass,
- u32 num_color_buffers) const {
- const auto& state = cache_key.fixed_state;
- const auto& viewport_swizzles = state.viewport_swizzles;
-
- FixedPipelineState::DynamicState dynamic;
- if (device.IsExtExtendedDynamicStateSupported()) {
- // Insert dummy values, as long as they are valid they don't matter as extended dynamic
- // state is ignored
- dynamic.raw1 = 0;
- dynamic.raw2 = 0;
- dynamic.vertex_strides.fill(0);
- } else {
- dynamic = state.dynamic_state;
- }
-
- std::vector<VkVertexInputBindingDescription> vertex_bindings;
- std::vector<VkVertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors;
- for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
- const bool instanced = state.binding_divisors[index] != 0;
- const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX;
- vertex_bindings.push_back({
- .binding = static_cast<u32>(index),
- .stride = dynamic.vertex_strides[index],
- .inputRate = rate,
+void GraphicsPipeline::ConfigureDraw() {
+ texture_cache.UpdateRenderTargets(false);
+ scheduler.RequestRenderpass(texture_cache.GetFramebuffer());
+
+ if (!is_built.load(std::memory_order::relaxed)) {
+ // Wait for the pipeline to be built
+ scheduler.Record([this](vk::CommandBuffer) {
+ std::unique_lock lock{build_mutex};
+ build_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); });
});
- if (instanced) {
- vertex_binding_divisors.push_back({
- .binding = static_cast<u32>(index),
- .divisor = state.binding_divisors[index],
- });
- }
}
+ const bool bind_pipeline{scheduler.UpdateGraphicsPipeline(this)};
+ const void* const descriptor_data{update_descriptor_queue.UpdateData()};
+ scheduler.Record([this, descriptor_data, bind_pipeline](vk::CommandBuffer cmdbuf) {
+ if (bind_pipeline) {
+ cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
+ }
+ if (!descriptor_set_layout) {
+ return;
+ }
+ if (uses_push_descriptor) {
+ cmdbuf.PushDescriptorSetWithTemplateKHR(*descriptor_update_template, *pipeline_layout,
+ 0, descriptor_data);
+ } else {
+ const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()};
+ const vk::Device& dev{device.GetLogical()};
+ dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, descriptor_data);
+ cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0,
+ descriptor_set, nullptr);
+ }
+ });
+}
- std::vector<VkVertexInputAttributeDescription> vertex_attributes;
- const auto& input_attributes = program[0]->entries.attributes;
- for (std::size_t index = 0; index < state.attributes.size(); ++index) {
- const auto& attribute = state.attributes[index];
- if (!attribute.enabled) {
- continue;
+void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
+ FixedPipelineState::DynamicState dynamic{};
+ if (!key.state.extended_dynamic_state) {
+ dynamic = key.state.dynamic_state;
+ }
+ static_vector<VkVertexInputBindingDescription, 32> vertex_bindings;
+ static_vector<VkVertexInputBindingDivisorDescriptionEXT, 32> vertex_binding_divisors;
+ static_vector<VkVertexInputAttributeDescription, 32> vertex_attributes;
+ if (key.state.dynamic_vertex_input) {
+ for (size_t index = 0; index < key.state.attributes.size(); ++index) {
+ const u32 type = key.state.DynamicAttributeType(index);
+ if (!stage_infos[0].loads.Generic(index) || type == 0) {
+ continue;
+ }
+ vertex_attributes.push_back({
+ .location = static_cast<u32>(index),
+ .binding = 0,
+ .format = type == 1 ? VK_FORMAT_R32_SFLOAT
+ : type == 2 ? VK_FORMAT_R32_SINT : VK_FORMAT_R32_UINT,
+ .offset = 0,
+ });
}
- if (!input_attributes.contains(static_cast<u32>(index))) {
- // Skip attributes not used by the vertex shaders.
- continue;
+ if (!vertex_attributes.empty()) {
+ vertex_bindings.push_back({
+ .binding = 0,
+ .stride = 4,
+ .inputRate = VK_VERTEX_INPUT_RATE_VERTEX,
+ });
+ }
+ } else {
+ for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
+ const bool instanced = key.state.binding_divisors[index] != 0;
+ const auto rate =
+ instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX;
+ vertex_bindings.push_back({
+ .binding = static_cast<u32>(index),
+ .stride = dynamic.vertex_strides[index],
+ .inputRate = rate,
+ });
+ if (instanced) {
+ vertex_binding_divisors.push_back({
+ .binding = static_cast<u32>(index),
+ .divisor = key.state.binding_divisors[index],
+ });
+ }
+ }
+ for (size_t index = 0; index < key.state.attributes.size(); ++index) {
+ const auto& attribute = key.state.attributes[index];
+ if (!attribute.enabled || !stage_infos[0].loads.Generic(index)) {
+ continue;
+ }
+ vertex_attributes.push_back({
+ .location = static_cast<u32>(index),
+ .binding = attribute.buffer,
+ .format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size()),
+ .offset = attribute.offset,
+ });
}
- vertex_attributes.push_back({
- .location = static_cast<u32>(index),
- .binding = attribute.buffer,
- .format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size()),
- .offset = attribute.offset,
- });
}
-
VkPipelineVertexInputStateCreateInfo vertex_input_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
.pNext = nullptr,
@@ -264,7 +554,6 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program,
.vertexAttributeDescriptionCount = static_cast<u32>(vertex_attributes.size()),
.pVertexAttributeDescriptions = vertex_attributes.data(),
};
-
const VkPipelineVertexInputDivisorStateCreateInfoEXT input_divisor_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT,
.pNext = nullptr,
@@ -274,78 +563,113 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program,
if (!vertex_binding_divisors.empty()) {
vertex_input_ci.pNext = &input_divisor_ci;
}
-
- const auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, state.topology);
+ auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, key.state.topology);
+ if (input_assembly_topology == VK_PRIMITIVE_TOPOLOGY_PATCH_LIST) {
+ if (!spv_modules[1] && !spv_modules[2]) {
+ LOG_WARNING(Render_Vulkan, "Patch topology used without tessellation, using points");
+ input_assembly_topology = VK_PRIMITIVE_TOPOLOGY_POINT_LIST;
+ }
+ }
const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
- .topology = MaxwellToVK::PrimitiveTopology(device, state.topology),
- .primitiveRestartEnable = state.primitive_restart_enable != 0 &&
+ .topology = input_assembly_topology,
+ .primitiveRestartEnable = key.state.primitive_restart_enable != 0 &&
SupportsPrimitiveRestart(input_assembly_topology),
};
-
const VkPipelineTessellationStateCreateInfo tessellation_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
- .patchControlPoints = state.patch_control_points_minus_one.Value() + 1,
- };
-
- VkPipelineViewportStateCreateInfo viewport_ci{
- .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
- .pNext = nullptr,
- .flags = 0,
- .viewportCount = Maxwell::NumViewports,
- .pViewports = nullptr,
- .scissorCount = Maxwell::NumViewports,
- .pScissors = nullptr,
+ .patchControlPoints = key.state.patch_control_points_minus_one.Value() + 1,
};
std::array<VkViewportSwizzleNV, Maxwell::NumViewports> swizzles;
- std::ranges::transform(viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle);
- VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{
+ std::ranges::transform(key.state.viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle);
+ const VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV,
.pNext = nullptr,
.flags = 0,
.viewportCount = Maxwell::NumViewports,
.pViewportSwizzles = swizzles.data(),
};
- if (device.IsNvViewportSwizzleSupported()) {
- viewport_ci.pNext = &swizzle_ci;
- }
+ const VkPipelineViewportStateCreateInfo viewport_ci{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+ .pNext = device.IsNvViewportSwizzleSupported() ? &swizzle_ci : nullptr,
+ .flags = 0,
+ .viewportCount = Maxwell::NumViewports,
+ .pViewports = nullptr,
+ .scissorCount = Maxwell::NumViewports,
+ .pScissors = nullptr,
+ };
- const VkPipelineRasterizationStateCreateInfo rasterization_ci{
+ VkPipelineRasterizationStateCreateInfo rasterization_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.depthClampEnable =
- static_cast<VkBool32>(state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE),
+ static_cast<VkBool32>(key.state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE),
.rasterizerDiscardEnable =
- static_cast<VkBool32>(state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE),
- .polygonMode = VK_POLYGON_MODE_FILL,
+ static_cast<VkBool32>(key.state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE),
+ .polygonMode =
+ MaxwellToVK::PolygonMode(FixedPipelineState::UnpackPolygonMode(key.state.polygon_mode)),
.cullMode = static_cast<VkCullModeFlags>(
dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE),
.frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()),
- .depthBiasEnable = state.depth_bias_enable,
+ .depthBiasEnable = key.state.depth_bias_enable,
.depthBiasConstantFactor = 0.0f,
.depthBiasClamp = 0.0f,
.depthBiasSlopeFactor = 0.0f,
.lineWidth = 1.0f,
};
+ VkPipelineRasterizationLineStateCreateInfoEXT line_state{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT,
+ .pNext = nullptr,
+ .lineRasterizationMode = key.state.smooth_lines != 0
+ ? VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT
+ : VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT,
+ .stippledLineEnable = VK_FALSE, // TODO
+ .lineStippleFactor = 0,
+ .lineStipplePattern = 0,
+ };
+ VkPipelineRasterizationConservativeStateCreateInfoEXT conservative_raster{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_CONSERVATIVE_STATE_CREATE_INFO_EXT,
+ .pNext = nullptr,
+ .flags = 0,
+ .conservativeRasterizationMode = key.state.conservative_raster_enable != 0
+ ? VK_CONSERVATIVE_RASTERIZATION_MODE_OVERESTIMATE_EXT
+ : VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT,
+ .extraPrimitiveOverestimationSize = 0.0f,
+ };
+ VkPipelineRasterizationProvokingVertexStateCreateInfoEXT provoking_vertex{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT,
+ .pNext = nullptr,
+ .provokingVertexMode = key.state.provoking_vertex_last != 0
+ ? VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT
+ : VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT,
+ };
+ if (IsLine(input_assembly_topology) && device.IsExtLineRasterizationSupported()) {
+ line_state.pNext = std::exchange(rasterization_ci.pNext, &line_state);
+ }
+ if (device.IsExtConservativeRasterizationSupported()) {
+ conservative_raster.pNext = std::exchange(rasterization_ci.pNext, &conservative_raster);
+ }
+ if (device.IsExtProvokingVertexSupported()) {
+ provoking_vertex.pNext = std::exchange(rasterization_ci.pNext, &provoking_vertex);
+ }
const VkPipelineMultisampleStateCreateInfo multisample_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
- .rasterizationSamples = ConvertMsaaMode(state.msaa_mode),
+ .rasterizationSamples = MaxwellToVK::MsaaMode(key.state.msaa_mode),
.sampleShadingEnable = VK_FALSE,
.minSampleShading = 0.0f,
.pSampleMask = nullptr,
.alphaToCoverageEnable = VK_FALSE,
.alphaToOneEnable = VK_FALSE,
};
-
const VkPipelineDepthStencilStateCreateInfo depth_stencil_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
.pNext = nullptr,
@@ -355,32 +679,32 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program,
.depthCompareOp = dynamic.depth_test_enable
? MaxwellToVK::ComparisonOp(dynamic.DepthTestFunc())
: VK_COMPARE_OP_ALWAYS,
- .depthBoundsTestEnable = dynamic.depth_bounds_enable,
+ .depthBoundsTestEnable = dynamic.depth_bounds_enable && device.IsDepthBoundsSupported(),
.stencilTestEnable = dynamic.stencil_enable,
.front = GetStencilFaceState(dynamic.front),
.back = GetStencilFaceState(dynamic.back),
.minDepthBounds = 0.0f,
.maxDepthBounds = 0.0f,
};
-
- std::array<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments;
- for (std::size_t index = 0; index < num_color_buffers; ++index) {
- static constexpr std::array COMPONENT_TABLE{
+ if (dynamic.depth_bounds_enable && !device.IsDepthBoundsSupported()) {
+ LOG_WARNING(Render_Vulkan, "Depth bounds is enabled but not supported");
+ }
+ static_vector<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments;
+ const size_t num_attachments{NumAttachments(key.state)};
+ for (size_t index = 0; index < num_attachments; ++index) {
+ static constexpr std::array mask_table{
VK_COLOR_COMPONENT_R_BIT,
VK_COLOR_COMPONENT_G_BIT,
VK_COLOR_COMPONENT_B_BIT,
VK_COLOR_COMPONENT_A_BIT,
};
- const auto& blend = state.attachments[index];
-
- VkColorComponentFlags color_components = 0;
- for (std::size_t i = 0; i < COMPONENT_TABLE.size(); ++i) {
- if (blend.Mask()[i]) {
- color_components |= COMPONENT_TABLE[i];
- }
+ const auto& blend{key.state.attachments[index]};
+ const std::array mask{blend.Mask()};
+ VkColorComponentFlags write_mask{};
+ for (size_t i = 0; i < mask_table.size(); ++i) {
+ write_mask |= mask[i] ? mask_table[i] : 0;
}
-
- cb_attachments[index] = {
+ cb_attachments.push_back({
.blendEnable = blend.enable != 0,
.srcColorBlendFactor = MaxwellToVK::BlendFactor(blend.SourceRGBFactor()),
.dstColorBlendFactor = MaxwellToVK::BlendFactor(blend.DestRGBFactor()),
@@ -388,28 +712,27 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program,
.srcAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.SourceAlphaFactor()),
.dstAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.DestAlphaFactor()),
.alphaBlendOp = MaxwellToVK::BlendEquation(blend.EquationAlpha()),
- .colorWriteMask = color_components,
- };
+ .colorWriteMask = write_mask,
+ });
}
-
const VkPipelineColorBlendStateCreateInfo color_blend_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.logicOpEnable = VK_FALSE,
.logicOp = VK_LOGIC_OP_COPY,
- .attachmentCount = num_color_buffers,
+ .attachmentCount = static_cast<u32>(cb_attachments.size()),
.pAttachments = cb_attachments.data(),
.blendConstants = {},
};
-
- std::vector dynamic_states{
+ static_vector<VkDynamicState, 19> dynamic_states{
VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR,
VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS,
VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE,
+ VK_DYNAMIC_STATE_LINE_WIDTH,
};
- if (device.IsExtExtendedDynamicStateSupported()) {
+ if (key.state.extended_dynamic_state) {
static constexpr std::array extended{
VK_DYNAMIC_STATE_CULL_MODE_EXT,
VK_DYNAMIC_STATE_FRONT_FACE_EXT,
@@ -421,9 +744,11 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program,
VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT,
VK_DYNAMIC_STATE_STENCIL_OP_EXT,
};
+ if (key.state.dynamic_vertex_input) {
+ dynamic_states.push_back(VK_DYNAMIC_STATE_VERTEX_INPUT_EXT);
+ }
dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end());
}
-
const VkPipelineDynamicStateCreateInfo dynamic_state_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
.pNext = nullptr,
@@ -431,34 +756,33 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program,
.dynamicStateCount = static_cast<u32>(dynamic_states.size()),
.pDynamicStates = dynamic_states.data(),
};
-
- const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{
+ [[maybe_unused]] const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
.pNext = nullptr,
.requiredSubgroupSize = GuestWarpSize,
};
-
- std::vector<VkPipelineShaderStageCreateInfo> shader_stages;
- std::size_t module_index = 0;
- for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
- if (!program[stage]) {
+ static_vector<VkPipelineShaderStageCreateInfo, 5> shader_stages;
+ for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
+ if (!spv_modules[stage]) {
continue;
}
-
- VkPipelineShaderStageCreateInfo& stage_ci = shader_stages.emplace_back();
- stage_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
- stage_ci.pNext = nullptr;
- stage_ci.flags = 0;
- stage_ci.stage = MaxwellToVK::ShaderStage(static_cast<Tegra::Engines::ShaderType>(stage));
- stage_ci.module = *modules[module_index++];
- stage_ci.pName = "main";
- stage_ci.pSpecializationInfo = nullptr;
-
+ [[maybe_unused]] auto& stage_ci =
+ shader_stages.emplace_back(VkPipelineShaderStageCreateInfo{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .stage = MaxwellToVK::ShaderStage(Shader::StageFromIndex(stage)),
+ .module = *spv_modules[stage],
+ .pName = "main",
+ .pSpecializationInfo = nullptr,
+ });
+ /*
if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(stage_ci.stage)) {
stage_ci.pNext = &subgroup_size_ci;
}
+ */
}
- return device.GetLogical().CreateGraphicsPipeline(VkGraphicsPipelineCreateInfo{
+ pipeline = device.GetLogical().CreateGraphicsPipeline({
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
@@ -473,12 +797,31 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program,
.pDepthStencilState = &depth_stencil_ci,
.pColorBlendState = &color_blend_ci,
.pDynamicState = &dynamic_state_ci,
- .layout = *layout,
- .renderPass = renderpass,
+ .layout = *pipeline_layout,
+ .renderPass = render_pass,
.subpass = 0,
.basePipelineHandle = nullptr,
.basePipelineIndex = 0,
});
}
+void GraphicsPipeline::Validate() {
+ size_t num_images{};
+ for (const auto& info : stage_infos) {
+ for (const auto& desc : info.texture_buffer_descriptors) {
+ num_images += desc.count;
+ }
+ for (const auto& desc : info.image_buffer_descriptors) {
+ num_images += desc.count;
+ }
+ for (const auto& desc : info.texture_descriptors) {
+ num_images += desc.count;
+ }
+ for (const auto& desc : info.image_descriptors) {
+ num_images += desc.count;
+ }
+ }
+ ASSERT(num_images <= MAX_IMAGE_ELEMENTS);
+}
+
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
index 8b6a98fe0..2bd48d697 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
@@ -1,30 +1,36 @@
-// Copyright 2019 yuzu Emulator Project
+// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
+#include <algorithm>
#include <array>
-#include <optional>
-#include <vector>
+#include <atomic>
+#include <condition_variable>
+#include <mutex>
+#include <type_traits>
-#include "common/common_types.h"
+#include "common/thread_worker.h"
+#include "shader_recompiler/shader_info.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
+#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
-#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
+#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
-namespace Vulkan {
+namespace VideoCore {
+class ShaderNotify;
+}
-using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+namespace Vulkan {
struct GraphicsPipelineCacheKey {
- VkRenderPass renderpass;
- std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders;
- FixedPipelineState fixed_state;
+ std::array<u64, 6> unique_hashes;
+ FixedPipelineState state;
- std::size_t Hash() const noexcept;
+ size_t Hash() const noexcept;
bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept;
@@ -32,72 +38,115 @@ struct GraphicsPipelineCacheKey {
return !operator==(rhs);
}
- std::size_t Size() const noexcept {
- return sizeof(renderpass) + sizeof(shaders) + fixed_state.Size();
+ size_t Size() const noexcept {
+ return sizeof(unique_hashes) + state.Size();
}
};
static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>);
static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>);
static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>);
+} // namespace Vulkan
+
+namespace std {
+template <>
+struct hash<Vulkan::GraphicsPipelineCacheKey> {
+ size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept {
+ return k.Hash();
+ }
+};
+} // namespace std
+
+namespace Vulkan {
+
class Device;
-class VKDescriptorPool;
+class RenderPassCache;
class VKScheduler;
class VKUpdateDescriptorQueue;
-using SPIRVProgram = std::array<std::optional<SPIRVShader>, Maxwell::MaxShaderStage>;
+class GraphicsPipeline {
+ static constexpr size_t NUM_STAGES = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage;
-class VKGraphicsPipeline final {
public:
- explicit VKGraphicsPipeline(const Device& device_, VKScheduler& scheduler_,
- VKDescriptorPool& descriptor_pool,
- VKUpdateDescriptorQueue& update_descriptor_queue_,
- const GraphicsPipelineCacheKey& key,
- vk::Span<VkDescriptorSetLayoutBinding> bindings,
- const SPIRVProgram& program, u32 num_color_buffers);
- ~VKGraphicsPipeline();
-
- VkDescriptorSet CommitDescriptorSet();
-
- VkPipeline GetHandle() const {
- return *pipeline;
+ explicit GraphicsPipeline(
+ Tegra::Engines::Maxwell3D& maxwell3d, Tegra::MemoryManager& gpu_memory,
+ VKScheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache,
+ VideoCore::ShaderNotify* shader_notify, const Device& device,
+ DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue,
+ Common::ThreadWorker* worker_thread, RenderPassCache& render_pass_cache,
+ const GraphicsPipelineCacheKey& key, std::array<vk::ShaderModule, NUM_STAGES> stages,
+ const std::array<const Shader::Info*, NUM_STAGES>& infos);
+
+ GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = delete;
+ GraphicsPipeline(GraphicsPipeline&&) noexcept = delete;
+
+ GraphicsPipeline& operator=(const GraphicsPipeline&) = delete;
+ GraphicsPipeline(const GraphicsPipeline&) = delete;
+
+ void AddTransition(GraphicsPipeline* transition);
+
+ void Configure(bool is_indexed) {
+ configure_func(this, is_indexed);
}
- VkPipelineLayout GetLayout() const {
- return *layout;
+ [[nodiscard]] GraphicsPipeline* Next(const GraphicsPipelineCacheKey& current_key) noexcept {
+ if (key == current_key) {
+ return this;
+ }
+ const auto it{std::find(transition_keys.begin(), transition_keys.end(), current_key)};
+ return it != transition_keys.end() ? transitions[std::distance(transition_keys.begin(), it)]
+ : nullptr;
}
- GraphicsPipelineCacheKey GetCacheKey() const {
- return cache_key;
+ [[nodiscard]] bool IsBuilt() const noexcept {
+ return is_built.load(std::memory_order::relaxed);
}
-private:
- vk::DescriptorSetLayout CreateDescriptorSetLayout(
- vk::Span<VkDescriptorSetLayoutBinding> bindings) const;
+ template <typename Spec>
+ static auto MakeConfigureSpecFunc() {
+ return [](GraphicsPipeline* pl, bool is_indexed) { pl->ConfigureImpl<Spec>(is_indexed); };
+ }
- vk::PipelineLayout CreatePipelineLayout() const;
+private:
+ template <typename Spec>
+ void ConfigureImpl(bool is_indexed);
- vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate(
- const SPIRVProgram& program) const;
+ void ConfigureDraw();
- std::vector<vk::ShaderModule> CreateShaderModules(const SPIRVProgram& program) const;
+ void MakePipeline(VkRenderPass render_pass);
- vk::Pipeline CreatePipeline(const SPIRVProgram& program, VkRenderPass renderpass,
- u32 num_color_buffers) const;
+ void Validate();
+ const GraphicsPipelineCacheKey key;
+ Tegra::Engines::Maxwell3D& maxwell3d;
+ Tegra::MemoryManager& gpu_memory;
const Device& device;
+ TextureCache& texture_cache;
+ BufferCache& buffer_cache;
VKScheduler& scheduler;
- const GraphicsPipelineCacheKey cache_key;
- const u64 hash;
+ VKUpdateDescriptorQueue& update_descriptor_queue;
+
+ void (*configure_func)(GraphicsPipeline*, bool){};
+
+ std::vector<GraphicsPipelineCacheKey> transition_keys;
+ std::vector<GraphicsPipeline*> transitions;
+
+ std::array<vk::ShaderModule, NUM_STAGES> spv_modules;
+
+ std::array<Shader::Info, NUM_STAGES> stage_infos;
+ std::array<u32, 5> enabled_uniform_buffer_masks{};
+ VideoCommon::UniformBufferSizes uniform_buffer_sizes{};
vk::DescriptorSetLayout descriptor_set_layout;
DescriptorAllocator descriptor_allocator;
- VKUpdateDescriptorQueue& update_descriptor_queue;
- vk::PipelineLayout layout;
- vk::DescriptorUpdateTemplateKHR descriptor_template;
- std::vector<vk::ShaderModule> modules;
-
+ vk::PipelineLayout pipeline_layout;
+ vk::DescriptorUpdateTemplateKHR descriptor_update_template;
vk::Pipeline pipeline;
+
+ std::condition_variable build_condvar;
+ std::mutex build_mutex;
+ std::atomic_bool is_built{false};
+ bool uses_push_descriptor{false};
};
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h
index ee3cd35d0..4f8688118 100644
--- a/src/video_core/renderer_vulkan/vk_master_semaphore.h
+++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h
@@ -39,9 +39,9 @@ public:
return KnownGpuTick() >= tick;
}
- /// Advance to the logical tick.
- void NextTick() noexcept {
- ++current_tick;
+ /// Advance to the logical tick and return the old one
+ [[nodiscard]] u64 NextTick() noexcept {
+ return current_tick.fetch_add(1, std::memory_order::relaxed);
}
/// Refresh the known GPU tick
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 8991505ca..57b163247 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -4,444 +4,613 @@
#include <algorithm>
#include <cstddef>
+#include <fstream>
#include <memory>
+#include <thread>
#include <vector>
#include "common/bit_cast.h"
#include "common/cityhash.h"
+#include "common/fs/fs.h"
+#include "common/fs/path_util.h"
#include "common/microprofile.h"
+#include "common/thread_worker.h"
#include "core/core.h"
#include "core/memory.h"
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/environment.h"
+#include "shader_recompiler/frontend/maxwell/control_flow.h"
+#include "shader_recompiler/frontend/maxwell/translate_program.h"
+#include "shader_recompiler/program_header.h"
+#include "video_core/dirty_flags.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
+#include "video_core/renderer_vulkan/pipeline_helper.h"
#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
-#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_shader_util.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
-#include "video_core/shader/compiler_settings.h"
-#include "video_core/shader/memory_util.h"
#include "video_core/shader_cache.h"
+#include "video_core/shader_environment.h"
#include "video_core/shader_notify.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
-
MICROPROFILE_DECLARE(Vulkan_PipelineCache);
-using Tegra::Engines::ShaderType;
-using VideoCommon::Shader::GetShaderAddress;
-using VideoCommon::Shader::GetShaderCode;
-using VideoCommon::Shader::KERNEL_MAIN_OFFSET;
-using VideoCommon::Shader::ProgramCode;
-using VideoCommon::Shader::STAGE_MAIN_OFFSET;
-
namespace {
+using Shader::Backend::SPIRV::EmitSPIRV;
+using Shader::Maxwell::MergeDualVertexPrograms;
+using Shader::Maxwell::TranslateProgram;
+using VideoCommon::ComputeEnvironment;
+using VideoCommon::FileEnvironment;
+using VideoCommon::GenericEnvironment;
+using VideoCommon::GraphicsEnvironment;
+
+constexpr u32 CACHE_VERSION = 5;
+
+template <typename Container>
+auto MakeSpan(Container& container) {
+ return std::span(container.data(), container.size());
+}
-constexpr VkDescriptorType UNIFORM_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
-constexpr VkDescriptorType STORAGE_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
-constexpr VkDescriptorType UNIFORM_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER;
-constexpr VkDescriptorType COMBINED_IMAGE_SAMPLER = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
-constexpr VkDescriptorType STORAGE_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER;
-constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
-
-constexpr VideoCommon::Shader::CompilerSettings compiler_settings{
- .depth = VideoCommon::Shader::CompileDepth::FullDecompile,
- .disable_else_derivation = true,
-};
-
-constexpr std::size_t GetStageFromProgram(std::size_t program) {
- return program == 0 ? 0 : program - 1;
+Shader::CompareFunction MaxwellToCompareFunction(Maxwell::ComparisonOp comparison) {
+ switch (comparison) {
+ case Maxwell::ComparisonOp::Never:
+ case Maxwell::ComparisonOp::NeverOld:
+ return Shader::CompareFunction::Never;
+ case Maxwell::ComparisonOp::Less:
+ case Maxwell::ComparisonOp::LessOld:
+ return Shader::CompareFunction::Less;
+ case Maxwell::ComparisonOp::Equal:
+ case Maxwell::ComparisonOp::EqualOld:
+ return Shader::CompareFunction::Equal;
+ case Maxwell::ComparisonOp::LessEqual:
+ case Maxwell::ComparisonOp::LessEqualOld:
+ return Shader::CompareFunction::LessThanEqual;
+ case Maxwell::ComparisonOp::Greater:
+ case Maxwell::ComparisonOp::GreaterOld:
+ return Shader::CompareFunction::Greater;
+ case Maxwell::ComparisonOp::NotEqual:
+ case Maxwell::ComparisonOp::NotEqualOld:
+ return Shader::CompareFunction::NotEqual;
+ case Maxwell::ComparisonOp::GreaterEqual:
+ case Maxwell::ComparisonOp::GreaterEqualOld:
+ return Shader::CompareFunction::GreaterThanEqual;
+ case Maxwell::ComparisonOp::Always:
+ case Maxwell::ComparisonOp::AlwaysOld:
+ return Shader::CompareFunction::Always;
+ }
+ UNIMPLEMENTED_MSG("Unimplemented comparison op={}", comparison);
+ return {};
}
-constexpr ShaderType GetStageFromProgram(Maxwell::ShaderProgram program) {
- return static_cast<ShaderType>(GetStageFromProgram(static_cast<std::size_t>(program)));
+Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexAttribute& attr) {
+ if (attr.enabled == 0) {
+ return Shader::AttributeType::Disabled;
+ }
+ switch (attr.Type()) {
+ case Maxwell::VertexAttribute::Type::SignedNorm:
+ case Maxwell::VertexAttribute::Type::UnsignedNorm:
+ case Maxwell::VertexAttribute::Type::UnsignedScaled:
+ case Maxwell::VertexAttribute::Type::SignedScaled:
+ case Maxwell::VertexAttribute::Type::Float:
+ return Shader::AttributeType::Float;
+ case Maxwell::VertexAttribute::Type::SignedInt:
+ return Shader::AttributeType::SignedInt;
+ case Maxwell::VertexAttribute::Type::UnsignedInt:
+ return Shader::AttributeType::UnsignedInt;
+ }
+ return Shader::AttributeType::Float;
}
-ShaderType GetShaderType(Maxwell::ShaderProgram program) {
- switch (program) {
- case Maxwell::ShaderProgram::VertexB:
- return ShaderType::Vertex;
- case Maxwell::ShaderProgram::TesselationControl:
- return ShaderType::TesselationControl;
- case Maxwell::ShaderProgram::TesselationEval:
- return ShaderType::TesselationEval;
- case Maxwell::ShaderProgram::Geometry:
- return ShaderType::Geometry;
- case Maxwell::ShaderProgram::Fragment:
- return ShaderType::Fragment;
- default:
- UNIMPLEMENTED_MSG("program={}", program);
- return ShaderType::Vertex;
+Shader::AttributeType AttributeType(const FixedPipelineState& state, size_t index) {
+ switch (state.DynamicAttributeType(index)) {
+ case 0:
+ return Shader::AttributeType::Disabled;
+ case 1:
+ return Shader::AttributeType::Float;
+ case 2:
+ return Shader::AttributeType::SignedInt;
+ case 3:
+ return Shader::AttributeType::UnsignedInt;
}
+ return Shader::AttributeType::Disabled;
}
-template <VkDescriptorType descriptor_type, class Container>
-void AddBindings(std::vector<VkDescriptorSetLayoutBinding>& bindings, u32& binding,
- VkShaderStageFlags stage_flags, const Container& container) {
- const u32 num_entries = static_cast<u32>(std::size(container));
- for (std::size_t i = 0; i < num_entries; ++i) {
- u32 count = 1;
- if constexpr (descriptor_type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) {
- // Combined image samplers can be arrayed.
- count = container[i].size;
+Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> programs,
+ const GraphicsPipelineCacheKey& key,
+ const Shader::IR::Program& program,
+ const Shader::IR::Program* previous_program) {
+ Shader::RuntimeInfo info;
+ if (previous_program) {
+ info.previous_stage_stores = previous_program->info.stores;
+ if (previous_program->is_geometry_passthrough) {
+ info.previous_stage_stores.mask |= previous_program->info.passthrough.mask;
}
- bindings.push_back({
- .binding = binding++,
- .descriptorType = descriptor_type,
- .descriptorCount = count,
- .stageFlags = stage_flags,
- .pImmutableSamplers = nullptr,
- });
+ } else {
+ info.previous_stage_stores.mask.set();
+ }
+ const Shader::Stage stage{program.stage};
+ const bool has_geometry{key.unique_hashes[4] != 0 && !programs[4].is_geometry_passthrough};
+ const bool gl_ndc{key.state.ndc_minus_one_to_one != 0};
+ const float point_size{Common::BitCast<float>(key.state.point_size)};
+ switch (stage) {
+ case Shader::Stage::VertexB:
+ if (!has_geometry) {
+ if (key.state.topology == Maxwell::PrimitiveTopology::Points) {
+ info.fixed_state_point_size = point_size;
+ }
+ if (key.state.xfb_enabled) {
+ info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state);
+ }
+ info.convert_depth_mode = gl_ndc;
+ }
+ if (key.state.dynamic_vertex_input) {
+ for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
+ info.generic_input_types[index] = AttributeType(key.state, index);
+ }
+ } else {
+ std::ranges::transform(key.state.attributes, info.generic_input_types.begin(),
+ &CastAttributeType);
+ }
+ break;
+ case Shader::Stage::TessellationEval:
+ // We have to flip tessellation clockwise for some reason...
+ info.tess_clockwise = key.state.tessellation_clockwise == 0;
+ info.tess_primitive = [&key] {
+ const u32 raw{key.state.tessellation_primitive.Value()};
+ switch (static_cast<Maxwell::TessellationPrimitive>(raw)) {
+ case Maxwell::TessellationPrimitive::Isolines:
+ return Shader::TessPrimitive::Isolines;
+ case Maxwell::TessellationPrimitive::Triangles:
+ return Shader::TessPrimitive::Triangles;
+ case Maxwell::TessellationPrimitive::Quads:
+ return Shader::TessPrimitive::Quads;
+ }
+ UNREACHABLE();
+ return Shader::TessPrimitive::Triangles;
+ }();
+ info.tess_spacing = [&] {
+ const u32 raw{key.state.tessellation_spacing};
+ switch (static_cast<Maxwell::TessellationSpacing>(raw)) {
+ case Maxwell::TessellationSpacing::Equal:
+ return Shader::TessSpacing::Equal;
+ case Maxwell::TessellationSpacing::FractionalOdd:
+ return Shader::TessSpacing::FractionalOdd;
+ case Maxwell::TessellationSpacing::FractionalEven:
+ return Shader::TessSpacing::FractionalEven;
+ }
+ UNREACHABLE();
+ return Shader::TessSpacing::Equal;
+ }();
+ break;
+ case Shader::Stage::Geometry:
+ if (program.output_topology == Shader::OutputTopology::PointList) {
+ info.fixed_state_point_size = point_size;
+ }
+ if (key.state.xfb_enabled != 0) {
+ info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state);
+ }
+ info.convert_depth_mode = gl_ndc;
+ break;
+ case Shader::Stage::Fragment:
+ info.alpha_test_func = MaxwellToCompareFunction(
+ key.state.UnpackComparisonOp(key.state.alpha_test_func.Value()));
+ info.alpha_test_reference = Common::BitCast<float>(key.state.alpha_test_ref);
+ break;
+ default:
+ break;
+ }
+ switch (key.state.topology) {
+ case Maxwell::PrimitiveTopology::Points:
+ info.input_topology = Shader::InputTopology::Points;
+ break;
+ case Maxwell::PrimitiveTopology::Lines:
+ case Maxwell::PrimitiveTopology::LineLoop:
+ case Maxwell::PrimitiveTopology::LineStrip:
+ info.input_topology = Shader::InputTopology::Lines;
+ break;
+ case Maxwell::PrimitiveTopology::Triangles:
+ case Maxwell::PrimitiveTopology::TriangleStrip:
+ case Maxwell::PrimitiveTopology::TriangleFan:
+ case Maxwell::PrimitiveTopology::Quads:
+ case Maxwell::PrimitiveTopology::QuadStrip:
+ case Maxwell::PrimitiveTopology::Polygon:
+ case Maxwell::PrimitiveTopology::Patches:
+ info.input_topology = Shader::InputTopology::Triangles;
+ break;
+ case Maxwell::PrimitiveTopology::LinesAdjacency:
+ case Maxwell::PrimitiveTopology::LineStripAdjacency:
+ info.input_topology = Shader::InputTopology::LinesAdjacency;
+ break;
+ case Maxwell::PrimitiveTopology::TrianglesAdjacency:
+ case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
+ info.input_topology = Shader::InputTopology::TrianglesAdjacency;
+ break;
}
+ info.force_early_z = key.state.early_z != 0;
+ info.y_negate = key.state.y_negate != 0;
+ return info;
}
+} // Anonymous namespace
-u32 FillDescriptorLayout(const ShaderEntries& entries,
- std::vector<VkDescriptorSetLayoutBinding>& bindings,
- Maxwell::ShaderProgram program_type, u32 base_binding) {
- const ShaderType stage = GetStageFromProgram(program_type);
- const VkShaderStageFlags flags = MaxwellToVK::ShaderStage(stage);
-
- u32 binding = base_binding;
- AddBindings<UNIFORM_BUFFER>(bindings, binding, flags, entries.const_buffers);
- AddBindings<STORAGE_BUFFER>(bindings, binding, flags, entries.global_buffers);
- AddBindings<UNIFORM_TEXEL_BUFFER>(bindings, binding, flags, entries.uniform_texels);
- AddBindings<COMBINED_IMAGE_SAMPLER>(bindings, binding, flags, entries.samplers);
- AddBindings<STORAGE_TEXEL_BUFFER>(bindings, binding, flags, entries.storage_texels);
- AddBindings<STORAGE_IMAGE>(bindings, binding, flags, entries.images);
- return binding;
+size_t ComputePipelineCacheKey::Hash() const noexcept {
+ const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this);
+ return static_cast<size_t>(hash);
}
-} // Anonymous namespace
+bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) const noexcept {
+ return std::memcmp(&rhs, this, sizeof *this) == 0;
+}
-std::size_t GraphicsPipelineCacheKey::Hash() const noexcept {
+size_t GraphicsPipelineCacheKey::Hash() const noexcept {
const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), Size());
- return static_cast<std::size_t>(hash);
+ return static_cast<size_t>(hash);
}
bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) const noexcept {
return std::memcmp(&rhs, this, Size()) == 0;
}
-std::size_t ComputePipelineCacheKey::Hash() const noexcept {
- const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this);
- return static_cast<std::size_t>(hash);
-}
-
-bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) const noexcept {
- return std::memcmp(&rhs, this, sizeof *this) == 0;
+PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_,
+ Tegra::Engines::KeplerCompute& kepler_compute_,
+ Tegra::MemoryManager& gpu_memory_, const Device& device_,
+ VKScheduler& scheduler_, DescriptorPool& descriptor_pool_,
+ VKUpdateDescriptorQueue& update_descriptor_queue_,
+ RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_,
+ TextureCache& texture_cache_, VideoCore::ShaderNotify& shader_notify_)
+ : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_},
+ device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_},
+ update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_},
+ buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, shader_notify{shader_notify_},
+ use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()},
+ workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:PipelineBuilder"),
+ serialization_thread(1, "yuzu:PipelineSerialization") {
+ const auto& float_control{device.FloatControlProperties()};
+ const VkDriverIdKHR driver_id{device.GetDriverID()};
+ profile = Shader::Profile{
+ .supported_spirv = device.IsKhrSpirv1_4Supported() ? 0x00010400U : 0x00010000U,
+ .unified_descriptor_binding = true,
+ .support_descriptor_aliasing = true,
+ .support_int8 = true,
+ .support_int16 = device.IsShaderInt16Supported(),
+ .support_int64 = device.IsShaderInt64Supported(),
+ .support_vertex_instance_id = false,
+ .support_float_controls = true,
+ .support_separate_denorm_behavior = float_control.denormBehaviorIndependence ==
+ VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR,
+ .support_separate_rounding_mode =
+ float_control.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR,
+ .support_fp16_denorm_preserve = float_control.shaderDenormPreserveFloat16 != VK_FALSE,
+ .support_fp32_denorm_preserve = float_control.shaderDenormPreserveFloat32 != VK_FALSE,
+ .support_fp16_denorm_flush = float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE,
+ .support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE,
+ .support_fp16_signed_zero_nan_preserve =
+ float_control.shaderSignedZeroInfNanPreserveFloat16 != VK_FALSE,
+ .support_fp32_signed_zero_nan_preserve =
+ float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE,
+ .support_fp64_signed_zero_nan_preserve =
+ float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE,
+ .support_explicit_workgroup_layout = device.IsKhrWorkgroupMemoryExplicitLayoutSupported(),
+ .support_vote = true,
+ .support_viewport_index_layer_non_geometry =
+ device.IsExtShaderViewportIndexLayerSupported(),
+ .support_viewport_mask = device.IsNvViewportArray2Supported(),
+ .support_typeless_image_loads = device.IsFormatlessImageLoadSupported(),
+ .support_demote_to_helper_invocation = true,
+ .support_int64_atomics = device.IsExtShaderAtomicInt64Supported(),
+ .support_derivative_control = true,
+ .support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(),
+
+ .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(),
+
+ .lower_left_origin_mode = false,
+ .need_declared_frag_colors = false,
+
+ .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR,
+ .has_broken_unsigned_image_offsets = false,
+ .has_broken_signed_operations = false,
+ .has_broken_fp16_float_controls = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR,
+ .ignore_nan_fp_comparisons = false,
+ };
+ host_info = Shader::HostTranslateInfo{
+ .support_float16 = device.IsFloat16Supported(),
+ .support_int64 = device.IsShaderInt64Supported(),
+ };
}
-Shader::Shader(Tegra::Engines::ConstBufferEngineInterface& engine_, ShaderType stage_,
- GPUVAddr gpu_addr_, VAddr cpu_addr_, ProgramCode program_code_, u32 main_offset_)
- : gpu_addr(gpu_addr_), program_code(std::move(program_code_)), registry(stage_, engine_),
- shader_ir(program_code, main_offset_, compiler_settings, registry),
- entries(GenerateShaderEntries(shader_ir)) {}
-
-Shader::~Shader() = default;
-
-VKPipelineCache::VKPipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_,
- Tegra::Engines::Maxwell3D& maxwell3d_,
- Tegra::Engines::KeplerCompute& kepler_compute_,
- Tegra::MemoryManager& gpu_memory_, const Device& device_,
- VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_,
- VKUpdateDescriptorQueue& update_descriptor_queue_)
- : VideoCommon::ShaderCache<Shader>{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_},
- kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_},
- scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{
- update_descriptor_queue_} {}
-
-VKPipelineCache::~VKPipelineCache() = default;
+PipelineCache::~PipelineCache() = default;
-std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
- std::array<Shader*, Maxwell::MaxShaderProgram> shaders{};
-
- for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
- const auto program{static_cast<Maxwell::ShaderProgram>(index)};
-
- // Skip stages that are not enabled
- if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
- continue;
- }
-
- const GPUVAddr gpu_addr{GetShaderAddress(maxwell3d, program)};
- const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
- ASSERT(cpu_addr);
-
- Shader* result = cpu_addr ? TryGet(*cpu_addr) : null_shader.get();
- if (!result) {
- const u8* const host_ptr{gpu_memory.GetPointer(gpu_addr)};
-
- // No shader found - create a new one
- static constexpr u32 stage_offset = STAGE_MAIN_OFFSET;
- const auto stage = static_cast<ShaderType>(index == 0 ? 0 : index - 1);
- ProgramCode code = GetShaderCode(gpu_memory, gpu_addr, host_ptr, false);
- const std::size_t size_in_bytes = code.size() * sizeof(u64);
-
- auto shader = std::make_unique<Shader>(maxwell3d, stage, gpu_addr, *cpu_addr,
- std::move(code), stage_offset);
- result = shader.get();
+GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() {
+ MICROPROFILE_SCOPE(Vulkan_PipelineCache);
- if (cpu_addr) {
- Register(std::move(shader), *cpu_addr, size_in_bytes);
- } else {
- null_shader = std::move(shader);
- }
+ if (!RefreshStages(graphics_key.unique_hashes)) {
+ current_pipeline = nullptr;
+ return nullptr;
+ }
+ graphics_key.state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported(),
+ device.IsExtVertexInputDynamicStateSupported());
+
+ if (current_pipeline) {
+ GraphicsPipeline* const next{current_pipeline->Next(graphics_key)};
+ if (next) {
+ current_pipeline = next;
+ return BuiltPipeline(current_pipeline);
}
- shaders[index] = result;
}
- return last_shaders = shaders;
+ return CurrentGraphicsPipelineSlowPath();
}
-VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline(
- const GraphicsPipelineCacheKey& key, u32 num_color_buffers,
- VideoCommon::Shader::AsyncShaders& async_shaders) {
+ComputePipeline* PipelineCache::CurrentComputePipeline() {
MICROPROFILE_SCOPE(Vulkan_PipelineCache);
- if (last_graphics_pipeline && last_graphics_key == key) {
- return last_graphics_pipeline;
- }
- last_graphics_key = key;
-
- if (device.UseAsynchronousShaders() && async_shaders.IsShaderAsync(gpu)) {
- std::unique_lock lock{pipeline_cache};
- const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key);
- if (is_cache_miss) {
- gpu.ShaderNotify().MarkSharderBuilding();
- LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
- const auto [program, bindings] = DecompileShaders(key.fixed_state);
- async_shaders.QueueVulkanShader(this, device, scheduler, descriptor_pool,
- update_descriptor_queue, bindings, program, key,
- num_color_buffers);
- }
- last_graphics_pipeline = pair->second.get();
- return last_graphics_pipeline;
+ const ShaderInfo* const shader{ComputeShader()};
+ if (!shader) {
+ return nullptr;
}
-
- const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key);
- auto& entry = pair->second;
- if (is_cache_miss) {
- gpu.ShaderNotify().MarkSharderBuilding();
- LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
- const auto [program, bindings] = DecompileShaders(key.fixed_state);
- entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool,
- update_descriptor_queue, key, bindings,
- program, num_color_buffers);
- gpu.ShaderNotify().MarkShaderComplete();
+ const auto& qmd{kepler_compute.launch_description};
+ const ComputePipelineCacheKey key{
+ .unique_hash = shader->unique_hash,
+ .shared_memory_size = qmd.shared_alloc,
+ .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z},
+ };
+ const auto [pair, is_new]{compute_cache.try_emplace(key)};
+ auto& pipeline{pair->second};
+ if (!is_new) {
+ return pipeline.get();
}
- last_graphics_pipeline = entry.get();
- return last_graphics_pipeline;
+ pipeline = CreateComputePipeline(key, shader);
+ return pipeline.get();
}
-VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) {
- MICROPROFILE_SCOPE(Vulkan_PipelineCache);
-
- const auto [pair, is_cache_miss] = compute_cache.try_emplace(key);
- auto& entry = pair->second;
- if (!is_cache_miss) {
- return *entry;
+void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
+ const VideoCore::DiskResourceLoadCallback& callback) {
+ if (title_id == 0) {
+ return;
}
- LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
-
- const GPUVAddr gpu_addr = key.shader;
-
- const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
- ASSERT(cpu_addr);
+ const auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)};
+ const auto base_dir{shader_dir / fmt::format("{:016x}", title_id)};
+ if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir)) {
+ LOG_ERROR(Common_Filesystem, "Failed to create pipeline cache directories");
+ return;
+ }
+ pipeline_cache_filename = base_dir / "vulkan.bin";
+
+ struct {
+ std::mutex mutex;
+ size_t total{};
+ size_t built{};
+ bool has_loaded{};
+ } state;
+
+ const auto load_compute{[&](std::ifstream& file, FileEnvironment env) {
+ ComputePipelineCacheKey key;
+ file.read(reinterpret_cast<char*>(&key), sizeof(key));
+
+ workers.QueueWork([this, key, env = std::move(env), &state, &callback]() mutable {
+ ShaderPools pools;
+ auto pipeline{CreateComputePipeline(pools, key, env, false)};
+ std::lock_guard lock{state.mutex};
+ if (pipeline) {
+ compute_cache.emplace(key, std::move(pipeline));
+ }
+ ++state.built;
+ if (state.has_loaded) {
+ callback(VideoCore::LoadCallbackStage::Build, state.built, state.total);
+ }
+ });
+ ++state.total;
+ }};
+ const bool extended_dynamic_state = device.IsExtExtendedDynamicStateSupported();
+ const bool dynamic_vertex_input = device.IsExtVertexInputDynamicStateSupported();
+ const auto load_graphics{[&](std::ifstream& file, std::vector<FileEnvironment> envs) {
+ GraphicsPipelineCacheKey key;
+ file.read(reinterpret_cast<char*>(&key), sizeof(key));
+
+ if ((key.state.extended_dynamic_state != 0) != extended_dynamic_state ||
+ (key.state.dynamic_vertex_input != 0) != dynamic_vertex_input) {
+ return;
+ }
+ workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable {
+ ShaderPools pools;
+ boost::container::static_vector<Shader::Environment*, 5> env_ptrs;
+ for (auto& env : envs) {
+ env_ptrs.push_back(&env);
+ }
+ auto pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs), false)};
- Shader* shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get();
- if (!shader) {
- // No shader found - create a new one
- const auto host_ptr = gpu_memory.GetPointer(gpu_addr);
+ std::lock_guard lock{state.mutex};
+ graphics_cache.emplace(key, std::move(pipeline));
+ ++state.built;
+ if (state.has_loaded) {
+ callback(VideoCore::LoadCallbackStage::Build, state.built, state.total);
+ }
+ });
+ ++state.total;
+ }};
+ VideoCommon::LoadPipelines(stop_loading, pipeline_cache_filename, CACHE_VERSION, load_compute,
+ load_graphics);
- ProgramCode code = GetShaderCode(gpu_memory, gpu_addr, host_ptr, true);
- const std::size_t size_in_bytes = code.size() * sizeof(u64);
+ std::unique_lock lock{state.mutex};
+ callback(VideoCore::LoadCallbackStage::Build, 0, state.total);
+ state.has_loaded = true;
+ lock.unlock();
- auto shader_info = std::make_unique<Shader>(kepler_compute, ShaderType::Compute, gpu_addr,
- *cpu_addr, std::move(code), KERNEL_MAIN_OFFSET);
- shader = shader_info.get();
+ workers.WaitForRequests();
+}
- if (cpu_addr) {
- Register(std::move(shader_info), *cpu_addr, size_in_bytes);
- } else {
- null_kernel = std::move(shader_info);
- }
+GraphicsPipeline* PipelineCache::CurrentGraphicsPipelineSlowPath() {
+ const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)};
+ auto& pipeline{pair->second};
+ if (is_new) {
+ pipeline = CreateGraphicsPipeline();
}
-
- const Specialization specialization{
- .base_binding = 0,
- .workgroup_size = key.workgroup_size,
- .shared_memory_size = key.shared_memory_size,
- .point_size = std::nullopt,
- .enabled_attributes = {},
- .attribute_types = {},
- .ndc_minus_one_to_one = false,
- };
- const SPIRVShader spirv_shader{Decompile(device, shader->GetIR(), ShaderType::Compute,
- shader->GetRegistry(), specialization),
- shader->GetEntries()};
- entry = std::make_unique<VKComputePipeline>(device, scheduler, descriptor_pool,
- update_descriptor_queue, spirv_shader);
- return *entry;
+ if (!pipeline) {
+ return nullptr;
+ }
+ if (current_pipeline) {
+ current_pipeline->AddTransition(pipeline.get());
+ }
+ current_pipeline = pipeline.get();
+ return BuiltPipeline(current_pipeline);
}
-void VKPipelineCache::EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline) {
- gpu.ShaderNotify().MarkShaderComplete();
- std::unique_lock lock{pipeline_cache};
- graphics_cache.at(pipeline->GetCacheKey()) = std::move(pipeline);
+GraphicsPipeline* PipelineCache::BuiltPipeline(GraphicsPipeline* pipeline) const noexcept {
+ if (pipeline->IsBuilt()) {
+ return pipeline;
+ }
+ if (!use_asynchronous_shaders) {
+ return pipeline;
+ }
+ // If something is using depth, we can assume that games are not rendering anything which
+ // will be used one time.
+ if (maxwell3d.regs.zeta_enable) {
+ return nullptr;
+ }
+ // If games are using a small index count, we can assume these are full screen quads.
+ // Usually these shaders are only used once for building textures so we can assume they
+ // can't be built async
+ if (maxwell3d.regs.index_array.count <= 6 || maxwell3d.regs.vertex_buffer.count <= 6) {
+ return pipeline;
+ }
+ return nullptr;
}
-void VKPipelineCache::OnShaderRemoval(Shader* shader) {
- bool finished = false;
- const auto Finish = [&] {
- // TODO(Rodrigo): Instead of finishing here, wait for the fences that use this pipeline and
- // flush.
- if (finished) {
- return;
- }
- finished = true;
- scheduler.Finish();
- };
-
- const GPUVAddr invalidated_addr = shader->GetGpuAddr();
- for (auto it = graphics_cache.begin(); it != graphics_cache.end();) {
- auto& entry = it->first;
- if (std::find(entry.shaders.begin(), entry.shaders.end(), invalidated_addr) ==
- entry.shaders.end()) {
- ++it;
+std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
+ ShaderPools& pools, const GraphicsPipelineCacheKey& key,
+ std::span<Shader::Environment* const> envs, bool build_in_parallel) try {
+ LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash());
+ size_t env_index{0};
+ std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs;
+ const bool uses_vertex_a{key.unique_hashes[0] != 0};
+ const bool uses_vertex_b{key.unique_hashes[1] != 0};
+ for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
+ if (key.unique_hashes[index] == 0) {
continue;
}
- Finish();
- it = graphics_cache.erase(it);
+ Shader::Environment& env{*envs[env_index]};
+ ++env_index;
+
+ const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))};
+ Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0);
+ if (!uses_vertex_a || index != 1) {
+ // Normal path
+ programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info);
+ } else {
+ // VertexB path when VertexA is present.
+ auto& program_va{programs[0]};
+ auto program_vb{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
+ programs[index] = MergeDualVertexPrograms(program_va, program_vb, env);
+ }
}
- for (auto it = compute_cache.begin(); it != compute_cache.end();) {
- auto& entry = it->first;
- if (entry.shader != invalidated_addr) {
- ++it;
+ std::array<const Shader::Info*, Maxwell::MaxShaderStage> infos{};
+ std::array<vk::ShaderModule, Maxwell::MaxShaderStage> modules;
+
+ const Shader::IR::Program* previous_stage{};
+ Shader::Backend::Bindings binding;
+ for (size_t index = uses_vertex_a && uses_vertex_b ? 1 : 0; index < Maxwell::MaxShaderProgram;
+ ++index) {
+ if (key.unique_hashes[index] == 0) {
continue;
}
- Finish();
- it = compute_cache.erase(it);
+ UNIMPLEMENTED_IF(index == 0);
+
+ Shader::IR::Program& program{programs[index]};
+ const size_t stage_index{index - 1};
+ infos[stage_index] = &program.info;
+
+ const auto runtime_info{MakeRuntimeInfo(programs, key, program, previous_stage)};
+ const std::vector<u32> code{EmitSPIRV(profile, runtime_info, program, binding)};
+ device.SaveShader(code);
+ modules[stage_index] = BuildShader(device, code);
+ if (device.HasDebuggingToolAttached()) {
+ const std::string name{fmt::format("Shader {:016x}", key.unique_hashes[index])};
+ modules[stage_index].SetObjectNameEXT(name.c_str());
+ }
+ previous_stage = &program;
}
+ Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr};
+ return std::make_unique<GraphicsPipeline>(
+ maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, &shader_notify, device,
+ descriptor_pool, update_descriptor_queue, thread_worker, render_pass_cache, key,
+ std::move(modules), infos);
+
+} catch (const Shader::Exception& exception) {
+ LOG_ERROR(Render_Vulkan, "{}", exception.what());
+ return nullptr;
}
-std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>>
-VKPipelineCache::DecompileShaders(const FixedPipelineState& fixed_state) {
- Specialization specialization;
- if (fixed_state.topology == Maxwell::PrimitiveTopology::Points) {
- float point_size;
- std::memcpy(&point_size, &fixed_state.point_size, sizeof(float));
- specialization.point_size = point_size;
- ASSERT(point_size != 0.0f);
- }
- for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) {
- const auto& attribute = fixed_state.attributes[i];
- specialization.enabled_attributes[i] = attribute.enabled.Value() != 0;
- specialization.attribute_types[i] = attribute.Type();
- }
- specialization.ndc_minus_one_to_one = fixed_state.ndc_minus_one_to_one;
- specialization.early_fragment_tests = fixed_state.early_z;
-
- // Alpha test
- specialization.alpha_test_func =
- FixedPipelineState::UnpackComparisonOp(fixed_state.alpha_test_func.Value());
- specialization.alpha_test_ref = Common::BitCast<float>(fixed_state.alpha_test_ref);
-
- SPIRVProgram program;
- std::vector<VkDescriptorSetLayoutBinding> bindings;
+std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
+ GraphicsEnvironments environments;
+ GetGraphicsEnvironments(environments, graphics_key.unique_hashes);
- for (std::size_t index = 1; index < Maxwell::MaxShaderProgram; ++index) {
- const auto program_enum = static_cast<Maxwell::ShaderProgram>(index);
- // Skip stages that are not enabled
- if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
- continue;
- }
- const GPUVAddr gpu_addr = GetShaderAddress(maxwell3d, program_enum);
- const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
- Shader* const shader = cpu_addr ? TryGet(*cpu_addr) : null_shader.get();
-
- const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5
- const ShaderType program_type = GetShaderType(program_enum);
- const auto& entries = shader->GetEntries();
- program[stage] = {
- Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization),
- entries,
- };
-
- const u32 old_binding = specialization.base_binding;
- specialization.base_binding =
- FillDescriptorLayout(entries, bindings, program_enum, specialization.base_binding);
- ASSERT(old_binding + entries.NumBindings() == specialization.base_binding);
+ main_pools.ReleaseContents();
+ auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(), true)};
+ if (!pipeline || pipeline_cache_filename.empty()) {
+ return pipeline;
}
- return {std::move(program), std::move(bindings)};
-}
-
-template <VkDescriptorType descriptor_type, class Container>
-void AddEntry(std::vector<VkDescriptorUpdateTemplateEntry>& template_entries, u32& binding,
- u32& offset, const Container& container) {
- static constexpr u32 entry_size = static_cast<u32>(sizeof(DescriptorUpdateEntry));
- const u32 count = static_cast<u32>(std::size(container));
-
- if constexpr (descriptor_type == COMBINED_IMAGE_SAMPLER) {
- for (u32 i = 0; i < count; ++i) {
- const u32 num_samplers = container[i].size;
- template_entries.push_back({
- .dstBinding = binding,
- .dstArrayElement = 0,
- .descriptorCount = num_samplers,
- .descriptorType = descriptor_type,
- .offset = offset,
- .stride = entry_size,
- });
-
- ++binding;
- offset += num_samplers * entry_size;
+ serialization_thread.QueueWork([this, key = graphics_key, envs = std::move(environments.envs)] {
+ boost::container::static_vector<const GenericEnvironment*, Maxwell::MaxShaderProgram>
+ env_ptrs;
+ for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
+ if (key.unique_hashes[index] != 0) {
+ env_ptrs.push_back(&envs[index]);
+ }
}
- return;
- }
+ SerializePipeline(key, env_ptrs, pipeline_cache_filename, CACHE_VERSION);
+ });
+ return pipeline;
+}
- if constexpr (descriptor_type == UNIFORM_TEXEL_BUFFER ||
- descriptor_type == STORAGE_TEXEL_BUFFER) {
- // Nvidia has a bug where updating multiple texels at once causes the driver to crash.
- // Note: Fixed in driver Windows 443.24, Linux 440.66.15
- for (u32 i = 0; i < count; ++i) {
- template_entries.push_back({
- .dstBinding = binding + i,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = descriptor_type,
- .offset = static_cast<std::size_t>(offset + i * entry_size),
- .stride = entry_size,
- });
- }
- } else if (count > 0) {
- template_entries.push_back({
- .dstBinding = binding,
- .dstArrayElement = 0,
- .descriptorCount = count,
- .descriptorType = descriptor_type,
- .offset = offset,
- .stride = entry_size,
- });
+std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
+ const ComputePipelineCacheKey& key, const ShaderInfo* shader) {
+ const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()};
+ const auto& qmd{kepler_compute.launch_description};
+ ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start};
+ env.SetCachedSize(shader->size_bytes);
+
+ main_pools.ReleaseContents();
+ auto pipeline{CreateComputePipeline(main_pools, key, env, true)};
+ if (!pipeline || pipeline_cache_filename.empty()) {
+ return pipeline;
}
- offset += count * entry_size;
- binding += count;
+ serialization_thread.QueueWork([this, key, env = std::move(env)] {
+ SerializePipeline(key, std::array<const GenericEnvironment*, 1>{&env},
+ pipeline_cache_filename, CACHE_VERSION);
+ });
+ return pipeline;
}
-void FillDescriptorUpdateTemplateEntries(
- const ShaderEntries& entries, u32& binding, u32& offset,
- std::vector<VkDescriptorUpdateTemplateEntryKHR>& template_entries) {
- AddEntry<UNIFORM_BUFFER>(template_entries, offset, binding, entries.const_buffers);
- AddEntry<STORAGE_BUFFER>(template_entries, offset, binding, entries.global_buffers);
- AddEntry<UNIFORM_TEXEL_BUFFER>(template_entries, offset, binding, entries.uniform_texels);
- AddEntry<COMBINED_IMAGE_SAMPLER>(template_entries, offset, binding, entries.samplers);
- AddEntry<STORAGE_TEXEL_BUFFER>(template_entries, offset, binding, entries.storage_texels);
- AddEntry<STORAGE_IMAGE>(template_entries, offset, binding, entries.images);
+std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
+ ShaderPools& pools, const ComputePipelineCacheKey& key, Shader::Environment& env,
+ bool build_in_parallel) try {
+ LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash());
+
+ Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()};
+ auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
+ const std::vector<u32> code{EmitSPIRV(profile, program)};
+ device.SaveShader(code);
+ vk::ShaderModule spv_module{BuildShader(device, code)};
+ if (device.HasDebuggingToolAttached()) {
+ const auto name{fmt::format("Shader {:016x}", key.unique_hash)};
+ spv_module.SetObjectNameEXT(name.c_str());
+ }
+ Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr};
+ return std::make_unique<ComputePipeline>(device, descriptor_pool, update_descriptor_queue,
+ thread_worker, &shader_notify, program.info,
+ std::move(spv_module));
+
+} catch (const Shader::Exception& exception) {
+ LOG_ERROR(Render_Vulkan, "{}", exception.what());
+ return nullptr;
}
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index 89d635a3d..efe5a7ed8 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -6,24 +6,28 @@
#include <array>
#include <cstddef>
+#include <filesystem>
+#include <iosfwd>
#include <memory>
#include <type_traits>
#include <unordered_map>
#include <utility>
#include <vector>
-#include <boost/functional/hash.hpp>
-
#include "common/common_types.h"
-#include "video_core/engines/const_buffer_engine_interface.h"
+#include "common/thread_worker.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/frontend/maxwell/control_flow.h"
+#include "shader_recompiler/host_translate_info.h"
+#include "shader_recompiler/object_pool.h"
+#include "shader_recompiler/profile.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
+#include "video_core/renderer_vulkan/vk_buffer_cache.h"
+#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
-#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
-#include "video_core/shader/async_shaders.h"
-#include "video_core/shader/memory_util.h"
-#include "video_core/shader/registry.h"
-#include "video_core/shader/shader_ir.h"
+#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/shader_cache.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
@@ -31,23 +35,24 @@ namespace Core {
class System;
}
-namespace Vulkan {
+namespace Shader::IR {
+struct Program;
+}
-class Device;
-class RasterizerVulkan;
-class VKComputePipeline;
-class VKDescriptorPool;
-class VKScheduler;
-class VKUpdateDescriptorQueue;
+namespace VideoCore {
+class ShaderNotify;
+}
+
+namespace Vulkan {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
struct ComputePipelineCacheKey {
- GPUVAddr shader;
+ u64 unique_hash;
u32 shared_memory_size;
std::array<u32, 3> workgroup_size;
- std::size_t Hash() const noexcept;
+ size_t Hash() const noexcept;
bool operator==(const ComputePipelineCacheKey& rhs) const noexcept;
@@ -64,15 +69,8 @@ static_assert(std::is_trivially_constructible_v<ComputePipelineCacheKey>);
namespace std {
template <>
-struct hash<Vulkan::GraphicsPipelineCacheKey> {
- std::size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept {
- return k.Hash();
- }
-};
-
-template <>
struct hash<Vulkan::ComputePipelineCacheKey> {
- std::size_t operator()(const Vulkan::ComputePipelineCacheKey& k) const noexcept {
+ size_t operator()(const Vulkan::ComputePipelineCacheKey& k) const noexcept {
return k.Hash();
}
};
@@ -81,94 +79,90 @@ struct hash<Vulkan::ComputePipelineCacheKey> {
namespace Vulkan {
-class Shader {
-public:
- explicit Shader(Tegra::Engines::ConstBufferEngineInterface& engine_,
- Tegra::Engines::ShaderType stage_, GPUVAddr gpu_addr, VAddr cpu_addr_,
- VideoCommon::Shader::ProgramCode program_code, u32 main_offset_);
- ~Shader();
-
- GPUVAddr GetGpuAddr() const {
- return gpu_addr;
- }
-
- VideoCommon::Shader::ShaderIR& GetIR() {
- return shader_ir;
- }
-
- const VideoCommon::Shader::ShaderIR& GetIR() const {
- return shader_ir;
- }
+class ComputePipeline;
+class Device;
+class DescriptorPool;
+class RasterizerVulkan;
+class RenderPassCache;
+class VKScheduler;
+class VKUpdateDescriptorQueue;
- const VideoCommon::Shader::Registry& GetRegistry() const {
- return registry;
- }
+using VideoCommon::ShaderInfo;
- const ShaderEntries& GetEntries() const {
- return entries;
+struct ShaderPools {
+ void ReleaseContents() {
+ flow_block.ReleaseContents();
+ block.ReleaseContents();
+ inst.ReleaseContents();
}
-private:
- GPUVAddr gpu_addr{};
- VideoCommon::Shader::ProgramCode program_code;
- VideoCommon::Shader::Registry registry;
- VideoCommon::Shader::ShaderIR shader_ir;
- ShaderEntries entries;
+ Shader::ObjectPool<Shader::IR::Inst> inst;
+ Shader::ObjectPool<Shader::IR::Block> block;
+ Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block;
};
-class VKPipelineCache final : public VideoCommon::ShaderCache<Shader> {
+class PipelineCache : public VideoCommon::ShaderCache {
public:
- explicit VKPipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu,
- Tegra::Engines::Maxwell3D& maxwell3d,
- Tegra::Engines::KeplerCompute& kepler_compute,
- Tegra::MemoryManager& gpu_memory, const Device& device,
- VKScheduler& scheduler, VKDescriptorPool& descriptor_pool,
- VKUpdateDescriptorQueue& update_descriptor_queue);
- ~VKPipelineCache() override;
+ explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::Engines::Maxwell3D& maxwell3d,
+ Tegra::Engines::KeplerCompute& kepler_compute,
+ Tegra::MemoryManager& gpu_memory, const Device& device,
+ VKScheduler& scheduler, DescriptorPool& descriptor_pool,
+ VKUpdateDescriptorQueue& update_descriptor_queue,
+ RenderPassCache& render_pass_cache, BufferCache& buffer_cache,
+ TextureCache& texture_cache, VideoCore::ShaderNotify& shader_notify_);
+ ~PipelineCache();
+
+ [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline();
- std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders();
+ [[nodiscard]] ComputePipeline* CurrentComputePipeline();
- VKGraphicsPipeline* GetGraphicsPipeline(const GraphicsPipelineCacheKey& key,
- u32 num_color_buffers,
- VideoCommon::Shader::AsyncShaders& async_shaders);
+ void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
+ const VideoCore::DiskResourceLoadCallback& callback);
- VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key);
+private:
+ [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipelineSlowPath();
- void EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline);
+ [[nodiscard]] GraphicsPipeline* BuiltPipeline(GraphicsPipeline* pipeline) const noexcept;
-protected:
- void OnShaderRemoval(Shader* shader) final;
+ std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline();
-private:
- std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders(
- const FixedPipelineState& fixed_state);
+ std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline(
+ ShaderPools& pools, const GraphicsPipelineCacheKey& key,
+ std::span<Shader::Environment* const> envs, bool build_in_parallel);
- Tegra::GPU& gpu;
- Tegra::Engines::Maxwell3D& maxwell3d;
- Tegra::Engines::KeplerCompute& kepler_compute;
- Tegra::MemoryManager& gpu_memory;
+ std::unique_ptr<ComputePipeline> CreateComputePipeline(const ComputePipelineCacheKey& key,
+ const ShaderInfo* shader);
+
+ std::unique_ptr<ComputePipeline> CreateComputePipeline(ShaderPools& pools,
+ const ComputePipelineCacheKey& key,
+ Shader::Environment& env,
+ bool build_in_parallel);
const Device& device;
VKScheduler& scheduler;
- VKDescriptorPool& descriptor_pool;
+ DescriptorPool& descriptor_pool;
VKUpdateDescriptorQueue& update_descriptor_queue;
+ RenderPassCache& render_pass_cache;
+ BufferCache& buffer_cache;
+ TextureCache& texture_cache;
+ VideoCore::ShaderNotify& shader_notify;
+ bool use_asynchronous_shaders{};
- std::unique_ptr<Shader> null_shader;
- std::unique_ptr<Shader> null_kernel;
+ GraphicsPipelineCacheKey graphics_key{};
+ GraphicsPipeline* current_pipeline{};
- std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{};
+ std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<ComputePipeline>> compute_cache;
+ std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<GraphicsPipeline>> graphics_cache;
- GraphicsPipelineCacheKey last_graphics_key;
- VKGraphicsPipeline* last_graphics_pipeline = nullptr;
+ ShaderPools main_pools;
- std::mutex pipeline_cache;
- std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<VKGraphicsPipeline>>
- graphics_cache;
- std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<VKComputePipeline>> compute_cache;
-};
+ Shader::Profile profile;
+ Shader::HostTranslateInfo host_info;
-void FillDescriptorUpdateTemplateEntries(
- const ShaderEntries& entries, u32& binding, u32& offset,
- std::vector<VkDescriptorUpdateTemplateEntryKHR>& template_entries);
+ std::filesystem::path pipeline_cache_filename;
+
+ Common::ThreadWorker workers;
+ Common::ThreadWorker serialization_thread;
+};
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp
index 7cadd5147..c9cb32d71 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp
@@ -114,14 +114,10 @@ void HostCounter::EndQuery() {
}
u64 HostCounter::BlockingQuery() const {
- if (tick >= cache.GetScheduler().CurrentTick()) {
- cache.GetScheduler().Flush();
- }
-
+ cache.GetScheduler().Wait(tick);
u64 data;
const VkResult query_result = cache.GetDevice().GetLogical().GetQueryResults(
- query.first, query.second, 1, sizeof(data), &data, sizeof(data),
- VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
+ query.first, query.second, 1, sizeof(data), &data, sizeof(data), VK_QUERY_RESULT_64_BIT);
switch (query_result) {
case VK_SUCCESS:
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index bd4d649cc..c7a07fdd8 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -24,7 +24,6 @@
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
-#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
@@ -55,11 +54,10 @@ struct DrawParams {
u32 num_instances;
u32 base_vertex;
u32 num_vertices;
+ u32 first_index;
bool is_indexed;
};
-constexpr auto COMPUTE_SHADER_INDEX = static_cast<size_t>(Tegra::Engines::ShaderType::Compute);
-
VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t index) {
const auto& src = regs.viewport_transform[index];
const float width = src.scale_x * 2.0f;
@@ -97,118 +95,6 @@ VkRect2D GetScissorState(const Maxwell& regs, size_t index) {
return scissor;
}
-std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses(
- const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) {
- std::array<GPUVAddr, Maxwell::MaxShaderProgram> addresses;
- for (size_t i = 0; i < std::size(addresses); ++i) {
- addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0;
- }
- return addresses;
-}
-
-struct TextureHandle {
- constexpr TextureHandle(u32 data, bool via_header_index) {
- const Tegra::Texture::TextureHandle handle{data};
- image = handle.tic_id;
- sampler = via_header_index ? image : handle.tsc_id.Value();
- }
-
- u32 image;
- u32 sampler;
-};
-
-template <typename Engine, typename Entry>
-TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry,
- size_t stage, size_t index = 0) {
- const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage);
- if constexpr (std::is_same_v<Entry, SamplerEntry>) {
- if (entry.is_separated) {
- const u32 buffer_1 = entry.buffer;
- const u32 buffer_2 = entry.secondary_buffer;
- const u32 offset_1 = entry.offset;
- const u32 offset_2 = entry.secondary_offset;
- const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1);
- const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2);
- return TextureHandle(handle_1 | handle_2, via_header_index);
- }
- }
- if (entry.is_bindless) {
- const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
- return TextureHandle(raw, via_header_index);
- }
- const u32 buffer = engine.GetBoundBuffer();
- const u64 offset = (entry.offset + index) * sizeof(u32);
- return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index);
-}
-
-ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) {
- if (entry.is_buffer) {
- return ImageViewType::e2D;
- }
- switch (entry.type) {
- case Tegra::Shader::TextureType::Texture1D:
- return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D;
- case Tegra::Shader::TextureType::Texture2D:
- return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D;
- case Tegra::Shader::TextureType::Texture3D:
- return ImageViewType::e3D;
- case Tegra::Shader::TextureType::TextureCube:
- return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube;
- }
- UNREACHABLE();
- return ImageViewType::e2D;
-}
-
-ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) {
- switch (entry.type) {
- case Tegra::Shader::ImageType::Texture1D:
- return ImageViewType::e1D;
- case Tegra::Shader::ImageType::Texture1DArray:
- return ImageViewType::e1DArray;
- case Tegra::Shader::ImageType::Texture2D:
- return ImageViewType::e2D;
- case Tegra::Shader::ImageType::Texture2DArray:
- return ImageViewType::e2DArray;
- case Tegra::Shader::ImageType::Texture3D:
- return ImageViewType::e3D;
- case Tegra::Shader::ImageType::TextureBuffer:
- return ImageViewType::Buffer;
- }
- UNREACHABLE();
- return ImageViewType::e2D;
-}
-
-void PushImageDescriptors(const ShaderEntries& entries, TextureCache& texture_cache,
- VKUpdateDescriptorQueue& update_descriptor_queue,
- ImageViewId*& image_view_id_ptr, VkSampler*& sampler_ptr) {
- for ([[maybe_unused]] const auto& entry : entries.uniform_texels) {
- const ImageViewId image_view_id = *image_view_id_ptr++;
- const ImageView& image_view = texture_cache.GetImageView(image_view_id);
- update_descriptor_queue.AddTexelBuffer(image_view.BufferView());
- }
- for (const auto& entry : entries.samplers) {
- for (size_t i = 0; i < entry.size; ++i) {
- const VkSampler sampler = *sampler_ptr++;
- const ImageViewId image_view_id = *image_view_id_ptr++;
- const ImageView& image_view = texture_cache.GetImageView(image_view_id);
- const VkImageView handle = image_view.Handle(ImageViewTypeFromEntry(entry));
- update_descriptor_queue.AddSampledImage(handle, sampler);
- }
- }
- for ([[maybe_unused]] const auto& entry : entries.storage_texels) {
- const ImageViewId image_view_id = *image_view_id_ptr++;
- const ImageView& image_view = texture_cache.GetImageView(image_view_id);
- update_descriptor_queue.AddTexelBuffer(image_view.BufferView());
- }
- for (const auto& entry : entries.images) {
- // TODO: Mark as modified
- const ImageViewId image_view_id = *image_view_id_ptr++;
- const ImageView& image_view = texture_cache.GetImageView(image_view_id);
- const VkImageView handle = image_view.Handle(ImageViewTypeFromEntry(entry));
- update_descriptor_queue.AddImage(handle);
- }
-}
-
DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced,
bool is_indexed) {
DrawParams params{
@@ -216,6 +102,7 @@ DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instan
.num_instances = is_instanced ? num_instances : 1,
.base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first,
.num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count,
+ .first_index = is_indexed ? regs.index_array.first : 0,
.is_indexed = is_indexed,
};
if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) {
@@ -243,21 +130,21 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
blit_image(device, scheduler, state_tracker, descriptor_pool),
astc_decoder_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue,
memory_allocator),
- texture_cache_runtime{device, scheduler, memory_allocator,
- staging_pool, blit_image, astc_decoder_pass},
+ render_pass_cache(device), texture_cache_runtime{device, scheduler,
+ memory_allocator, staging_pool,
+ blit_image, astc_decoder_pass,
+ render_pass_cache},
texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool,
update_descriptor_queue, descriptor_pool),
buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
- pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler,
- descriptor_pool, update_descriptor_queue),
- query_cache{*this, maxwell3d, gpu_memory, device, scheduler},
+ pipeline_cache(*this, maxwell3d, kepler_compute, gpu_memory, device, scheduler,
+ descriptor_pool, update_descriptor_queue, render_pass_cache, buffer_cache,
+ texture_cache, gpu.ShaderNotify()),
+ query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{buffer_cache},
fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler),
- wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) {
+ wfi_event(device.GetLogical().CreateEvent()) {
scheduler.SetQueryCache(query_cache);
- if (device.UseAsynchronousShaders()) {
- async_shaders.AllocateWorkers();
- }
}
RasterizerVulkan::~RasterizerVulkan() = default;
@@ -270,53 +157,30 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
query_cache.UpdateCounters();
- graphics_key.fixed_state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported());
-
- std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
-
- texture_cache.SynchronizeGraphicsDescriptors();
- texture_cache.UpdateRenderTargets(false);
-
- const auto shaders = pipeline_cache.GetShaders();
- graphics_key.shaders = GetShaderAddresses(shaders);
-
- SetupShaderDescriptors(shaders, is_indexed);
-
- const Framebuffer* const framebuffer = texture_cache.GetFramebuffer();
- graphics_key.renderpass = framebuffer->RenderPass();
-
- VKGraphicsPipeline* const pipeline = pipeline_cache.GetGraphicsPipeline(
- graphics_key, framebuffer->NumColorBuffers(), async_shaders);
- if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) {
- // Async graphics pipeline was not ready.
+ GraphicsPipeline* const pipeline{pipeline_cache.CurrentGraphicsPipeline()};
+ if (!pipeline) {
return;
}
+ std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
+ pipeline->Configure(is_indexed);
BeginTransformFeedback();
- scheduler.RequestRenderpass(framebuffer);
- scheduler.BindGraphicsPipeline(pipeline->GetHandle());
UpdateDynamicStates();
- const auto& regs = maxwell3d.regs;
- const u32 num_instances = maxwell3d.mme_draw.instance_count;
- const DrawParams draw_params = MakeDrawParams(regs, num_instances, is_instanced, is_indexed);
- const VkPipelineLayout pipeline_layout = pipeline->GetLayout();
- const VkDescriptorSet descriptor_set = pipeline->CommitDescriptorSet();
- scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) {
- if (descriptor_set) {
- cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout,
- DESCRIPTOR_SET, descriptor_set, nullptr);
- }
+ const auto& regs{maxwell3d.regs};
+ const u32 num_instances{maxwell3d.mme_draw.instance_count};
+ const DrawParams draw_params{MakeDrawParams(regs, num_instances, is_instanced, is_indexed)};
+ scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) {
if (draw_params.is_indexed) {
- cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, 0,
- draw_params.base_vertex, draw_params.base_instance);
+ cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances,
+ draw_params.first_index, draw_params.base_vertex,
+ draw_params.base_instance);
} else {
cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances,
draw_params.base_vertex, draw_params.base_instance);
}
});
-
EndTransformFeedback();
}
@@ -326,6 +190,7 @@ void RasterizerVulkan::Clear() {
if (!maxwell3d.ShouldExecute()) {
return;
}
+ FlushWork();
query_cache.UpdateCounters();
@@ -357,11 +222,13 @@ void RasterizerVulkan::Clear() {
.height = std::min(clear_rect.rect.extent.height, render_area.height),
};
- if (use_color) {
+ const u32 color_attachment = regs.clear_buffers.RT;
+ const auto attachment_aspect_mask = framebuffer->ImageRanges()[color_attachment].aspectMask;
+ const bool is_color_rt = (attachment_aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) != 0;
+ if (use_color && is_color_rt) {
VkClearValue clear_value;
std::memcpy(clear_value.color.float32, regs.clear_color, sizeof(regs.clear_color));
- const u32 color_attachment = regs.clear_buffers.RT;
scheduler.Record([color_attachment, clear_value, clear_rect](vk::CommandBuffer cmdbuf) {
const VkClearAttachment attachment{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
@@ -393,73 +260,20 @@ void RasterizerVulkan::Clear() {
});
}
-void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
- MICROPROFILE_SCOPE(Vulkan_Compute);
-
- query_cache.UpdateCounters();
+void RasterizerVulkan::DispatchCompute() {
+ FlushWork();
- const auto& launch_desc = kepler_compute.launch_description;
- auto& pipeline = pipeline_cache.GetComputePipeline({
- .shader = code_addr,
- .shared_memory_size = launch_desc.shared_alloc,
- .workgroup_size{
- launch_desc.block_dim_x,
- launch_desc.block_dim_y,
- launch_desc.block_dim_z,
- },
- });
+ ComputePipeline* const pipeline{pipeline_cache.CurrentComputePipeline()};
+ if (!pipeline) {
+ return;
+ }
+ std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex};
+ pipeline->Configure(kepler_compute, gpu_memory, scheduler, buffer_cache, texture_cache);
- // Compute dispatches can't be executed inside a renderpass
+ const auto& qmd{kepler_compute.launch_description};
+ const std::array<u32, 3> dim{qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z};
scheduler.RequestOutsideRenderPassOperationContext();
-
- image_view_indices.clear();
- sampler_handles.clear();
-
- std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
-
- const auto& entries = pipeline.GetEntries();
- buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers);
- buffer_cache.UnbindComputeStorageBuffers();
- u32 ssbo_index = 0;
- for (const auto& buffer : entries.global_buffers) {
- buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset,
- buffer.is_written);
- ++ssbo_index;
- }
- buffer_cache.UpdateComputeBuffers();
-
- texture_cache.SynchronizeComputeDescriptors();
-
- SetupComputeUniformTexels(entries);
- SetupComputeTextures(entries);
- SetupComputeStorageTexels(entries);
- SetupComputeImages(entries);
-
- const std::span indices_span(image_view_indices.data(), image_view_indices.size());
- texture_cache.FillComputeImageViews(indices_span, image_view_ids);
-
- update_descriptor_queue.Acquire();
-
- buffer_cache.BindHostComputeBuffers();
-
- ImageViewId* image_view_id_ptr = image_view_ids.data();
- VkSampler* sampler_ptr = sampler_handles.data();
- PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr,
- sampler_ptr);
-
- const VkPipeline pipeline_handle = pipeline.GetHandle();
- const VkPipelineLayout pipeline_layout = pipeline.GetLayout();
- const VkDescriptorSet descriptor_set = pipeline.CommitDescriptorSet();
- scheduler.Record([grid_x = launch_desc.grid_dim_x, grid_y = launch_desc.grid_dim_y,
- grid_z = launch_desc.grid_dim_z, pipeline_handle, pipeline_layout,
- descriptor_set](vk::CommandBuffer cmdbuf) {
- cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_handle);
- if (descriptor_set) {
- cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout,
- DESCRIPTOR_SET, descriptor_set, nullptr);
- }
- cmdbuf.Dispatch(grid_x, grid_y, grid_z);
- });
+ scheduler.Record([dim](vk::CommandBuffer cmdbuf) { cmdbuf.Dispatch(dim[0], dim[1], dim[2]); });
}
void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) {
@@ -580,6 +394,13 @@ void RasterizerVulkan::SignalSyncPoint(u32 value) {
fence_manager.SignalSyncPoint(value);
}
+void RasterizerVulkan::SignalReference() {
+ if (!gpu.IsAsync()) {
+ return;
+ }
+ fence_manager.SignalOrdering();
+}
+
void RasterizerVulkan::ReleaseFences() {
if (!gpu.IsAsync()) {
return;
@@ -612,10 +433,12 @@ void RasterizerVulkan::WaitForIdle() {
cmdbuf.SetEvent(event, flags);
cmdbuf.WaitEvents(event, flags, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, {}, {}, {});
});
+ SignalReference();
}
void RasterizerVulkan::FragmentBarrier() {
// We already put barriers when a render pass finishes
+ scheduler.RequestOutsideRenderPassOperationContext();
}
void RasterizerVulkan::TiledCacheBarrier() {
@@ -623,10 +446,11 @@ void RasterizerVulkan::TiledCacheBarrier() {
}
void RasterizerVulkan::FlushCommands() {
- if (draw_counter > 0) {
- draw_counter = 0;
- scheduler.Flush();
+ if (draw_counter == 0) {
+ return;
}
+ draw_counter = 0;
+ scheduler.Flush();
}
void RasterizerVulkan::TickFrame() {
@@ -652,6 +476,10 @@ bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surf
return true;
}
+Tegra::Engines::AccelerateDMAInterface& RasterizerVulkan::AccessAccelerateDMA() {
+ return accelerate_dma;
+}
+
bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config,
VAddr framebuffer_addr, u32 pixel_stride) {
if (!framebuffer_addr) {
@@ -662,13 +490,18 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config,
if (!image_view) {
return false;
}
- screen_info.image_view = image_view->Handle(VideoCommon::ImageViewType::e2D);
+ screen_info.image_view = image_view->Handle(Shader::TextureType::Color2D);
screen_info.width = image_view->size.width;
screen_info.height = image_view->size.height;
screen_info.is_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format);
return true;
}
+void RasterizerVulkan::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
+ const VideoCore::DiskResourceLoadCallback& callback) {
+ pipeline_cache.LoadDiskResources(title_id, stop_loading, callback);
+}
+
void RasterizerVulkan::FlushWork() {
static constexpr u32 DRAWS_TO_DISPATCH = 4096;
@@ -677,63 +510,27 @@ void RasterizerVulkan::FlushWork() {
if ((++draw_counter & 7) != 7) {
return;
}
-
if (draw_counter < DRAWS_TO_DISPATCH) {
// Send recorded tasks to the worker thread
scheduler.DispatchWork();
return;
}
-
// Otherwise (every certain number of draws) flush execution.
// This submits commands to the Vulkan driver.
scheduler.Flush();
draw_counter = 0;
}
-void RasterizerVulkan::SetupShaderDescriptors(
- const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders, bool is_indexed) {
- image_view_indices.clear();
- sampler_handles.clear();
- for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
- Shader* const shader = shaders[stage + 1];
- if (!shader) {
- continue;
- }
- const ShaderEntries& entries = shader->GetEntries();
- SetupGraphicsUniformTexels(entries, stage);
- SetupGraphicsTextures(entries, stage);
- SetupGraphicsStorageTexels(entries, stage);
- SetupGraphicsImages(entries, stage);
-
- buffer_cache.SetEnabledUniformBuffers(stage, entries.enabled_uniform_buffers);
- buffer_cache.UnbindGraphicsStorageBuffers(stage);
- u32 ssbo_index = 0;
- for (const auto& buffer : entries.global_buffers) {
- buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index,
- buffer.cbuf_offset, buffer.is_written);
- ++ssbo_index;
- }
- }
- const std::span indices_span(image_view_indices.data(), image_view_indices.size());
- buffer_cache.UpdateGraphicsBuffers(is_indexed);
- texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
-
- buffer_cache.BindHostGeometryBuffers(is_indexed);
+AccelerateDMA::AccelerateDMA(BufferCache& buffer_cache_) : buffer_cache{buffer_cache_} {}
- update_descriptor_queue.Acquire();
+bool AccelerateDMA::BufferClear(GPUVAddr src_address, u64 amount, u32 value) {
+ std::scoped_lock lock{buffer_cache.mutex};
+ return buffer_cache.DMAClear(src_address, amount, value);
+}
- ImageViewId* image_view_id_ptr = image_view_ids.data();
- VkSampler* sampler_ptr = sampler_handles.data();
- for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
- // Skip VertexA stage
- Shader* const shader = shaders[stage + 1];
- if (!shader) {
- continue;
- }
- buffer_cache.BindHostStageBuffers(stage);
- PushImageDescriptors(shader->GetEntries(), texture_cache, update_descriptor_queue,
- image_view_id_ptr, sampler_ptr);
- }
+bool AccelerateDMA::BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) {
+ std::scoped_lock lock{buffer_cache.mutex};
+ return buffer_cache.DMACopy(src_address, dest_address, amount);
}
void RasterizerVulkan::UpdateDynamicStates() {
@@ -744,6 +541,7 @@ void RasterizerVulkan::UpdateDynamicStates() {
UpdateBlendConstants(regs);
UpdateDepthBounds(regs);
UpdateStencilFaces(regs);
+ UpdateLineWidth(regs);
if (device.IsExtExtendedDynamicStateSupported()) {
UpdateCullMode(regs);
UpdateDepthBoundsTestEnable(regs);
@@ -753,6 +551,9 @@ void RasterizerVulkan::UpdateDynamicStates() {
UpdateFrontFace(regs);
UpdateStencilOp(regs);
UpdateStencilTestEnable(regs);
+ if (device.IsExtVertexInputDynamicStateSupported()) {
+ UpdateVertexInput(regs);
+ }
}
}
@@ -784,89 +585,6 @@ void RasterizerVulkan::EndTransformFeedback() {
[](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); });
}
-void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, size_t stage) {
- const auto& regs = maxwell3d.regs;
- const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
- for (const auto& entry : entries.uniform_texels) {
- const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage);
- image_view_indices.push_back(handle.image);
- }
-}
-
-void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_t stage) {
- const auto& regs = maxwell3d.regs;
- const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
- for (const auto& entry : entries.samplers) {
- for (size_t index = 0; index < entry.size; ++index) {
- const TextureHandle handle =
- GetTextureInfo(maxwell3d, via_header_index, entry, stage, index);
- image_view_indices.push_back(handle.image);
-
- Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler);
- sampler_handles.push_back(sampler->Handle());
- }
- }
-}
-
-void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, size_t stage) {
- const auto& regs = maxwell3d.regs;
- const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
- for (const auto& entry : entries.storage_texels) {
- const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage);
- image_view_indices.push_back(handle.image);
- }
-}
-
-void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t stage) {
- const auto& regs = maxwell3d.regs;
- const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
- for (const auto& entry : entries.images) {
- const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage);
- image_view_indices.push_back(handle.image);
- }
-}
-
-void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) {
- const bool via_header_index = kepler_compute.launch_description.linked_tsc;
- for (const auto& entry : entries.uniform_texels) {
- const TextureHandle handle =
- GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX);
- image_view_indices.push_back(handle.image);
- }
-}
-
-void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) {
- const bool via_header_index = kepler_compute.launch_description.linked_tsc;
- for (const auto& entry : entries.samplers) {
- for (size_t index = 0; index < entry.size; ++index) {
- const TextureHandle handle = GetTextureInfo(kepler_compute, via_header_index, entry,
- COMPUTE_SHADER_INDEX, index);
- image_view_indices.push_back(handle.image);
-
- Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler);
- sampler_handles.push_back(sampler->Handle());
- }
- }
-}
-
-void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) {
- const bool via_header_index = kepler_compute.launch_description.linked_tsc;
- for (const auto& entry : entries.storage_texels) {
- const TextureHandle handle =
- GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX);
- image_view_indices.push_back(handle.image);
- }
-}
-
-void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) {
- const bool via_header_index = kepler_compute.launch_description.linked_tsc;
- for (const auto& entry : entries.images) {
- const TextureHandle handle =
- GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX);
- image_view_indices.push_back(handle.image);
- }
-}
-
void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchViewports()) {
return;
@@ -959,6 +677,14 @@ void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs)
}
}
+void RasterizerVulkan::UpdateLineWidth(Tegra::Engines::Maxwell3D::Regs& regs) {
+ if (!state_tracker.TouchLineWidth()) {
+ return;
+ }
+ const float width = regs.line_smooth_enable ? regs.line_width_smooth : regs.line_width_aliased;
+ scheduler.Record([width](vk::CommandBuffer cmdbuf) { cmdbuf.SetLineWidth(width); });
+}
+
void RasterizerVulkan::UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchCullMode()) {
return;
@@ -973,6 +699,11 @@ void RasterizerVulkan::UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Re
if (!state_tracker.TouchDepthBoundsTestEnable()) {
return;
}
+ bool enabled = regs.depth_bounds_enable;
+ if (enabled && !device.IsDepthBoundsSupported()) {
+ LOG_WARNING(Render_Vulkan, "Depth bounds is enabled but not supported");
+ enabled = false;
+ }
scheduler.Record([enable = regs.depth_bounds_enable](vk::CommandBuffer cmdbuf) {
cmdbuf.SetDepthBoundsTestEnableEXT(enable);
});
@@ -1060,4 +791,62 @@ void RasterizerVulkan::UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs&
});
}
+void RasterizerVulkan::UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs) {
+ auto& dirty{maxwell3d.dirty.flags};
+ if (!dirty[Dirty::VertexInput]) {
+ return;
+ }
+ dirty[Dirty::VertexInput] = false;
+
+ boost::container::static_vector<VkVertexInputBindingDescription2EXT, 32> bindings;
+ boost::container::static_vector<VkVertexInputAttributeDescription2EXT, 32> attributes;
+
+ // There seems to be a bug on Nvidia's driver where updating only higher attributes ends up
+ // generating dirty state. Track the highest dirty attribute and update all attributes until
+ // that one.
+ size_t highest_dirty_attr{};
+ for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
+ if (dirty[Dirty::VertexAttribute0 + index]) {
+ highest_dirty_attr = index;
+ }
+ }
+ for (size_t index = 0; index < highest_dirty_attr; ++index) {
+ const Maxwell::VertexAttribute attribute{regs.vertex_attrib_format[index]};
+ const u32 binding{attribute.buffer};
+ dirty[Dirty::VertexAttribute0 + index] = false;
+ dirty[Dirty::VertexBinding0 + static_cast<size_t>(binding)] = true;
+ if (!attribute.constant) {
+ attributes.push_back({
+ .sType = VK_STRUCTURE_TYPE_VERTEX_INPUT_ATTRIBUTE_DESCRIPTION_2_EXT,
+ .pNext = nullptr,
+ .location = static_cast<u32>(index),
+ .binding = binding,
+ .format = MaxwellToVK::VertexFormat(attribute.type, attribute.size),
+ .offset = attribute.offset,
+ });
+ }
+ }
+ for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
+ if (!dirty[Dirty::VertexBinding0 + index]) {
+ continue;
+ }
+ dirty[Dirty::VertexBinding0 + index] = false;
+
+ const u32 binding{static_cast<u32>(index)};
+ const auto& input_binding{regs.vertex_array[binding]};
+ const bool is_instanced{regs.instanced_arrays.IsInstancingEnabled(binding)};
+ bindings.push_back({
+ .sType = VK_STRUCTURE_TYPE_VERTEX_INPUT_BINDING_DESCRIPTION_2_EXT,
+ .pNext = nullptr,
+ .binding = binding,
+ .stride = input_binding.stride,
+ .inputRate = is_instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX,
+ .divisor = is_instanced ? input_binding.divisor : 1,
+ });
+ }
+ scheduler.Record([bindings, attributes](vk::CommandBuffer cmdbuf) {
+ cmdbuf.SetVertexInputEXT(bindings, attributes);
+ });
+}
+
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 41459c5c5..866827247 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -13,6 +13,7 @@
#include <boost/container/static_vector.hpp>
#include "common/common_types.h"
+#include "video_core/engines/maxwell_dma.h"
#include "video_core/rasterizer_accelerated.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_vulkan/blit_image.h"
@@ -20,14 +21,13 @@
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_fence_manager.h"
-#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_query_cache.h"
+#include "video_core/renderer_vulkan/vk_render_pass_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
-#include "video_core/shader/async_shaders.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
@@ -49,6 +49,18 @@ struct VKScreenInfo;
class StateTracker;
+class AccelerateDMA : public Tegra::Engines::AccelerateDMAInterface {
+public:
+ explicit AccelerateDMA(BufferCache& buffer_cache);
+
+ bool BufferCopy(GPUVAddr start_address, GPUVAddr end_address, u64 amount) override;
+
+ bool BufferClear(GPUVAddr src_address, u64 amount, u32 value) override;
+
+private:
+ BufferCache& buffer_cache;
+};
+
class RasterizerVulkan final : public VideoCore::RasterizerAccelerated {
public:
explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
@@ -60,7 +72,7 @@ public:
void Draw(bool is_indexed, bool is_instanced) override;
void Clear() override;
- void DispatchCompute(GPUVAddr code_addr) override;
+ void DispatchCompute() override;
void ResetCounter(VideoCore::QueryType type) override;
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
@@ -75,6 +87,7 @@ public:
void ModifyGPUMemory(GPUVAddr addr, u64 size) override;
void SignalSemaphore(GPUVAddr addr, u32 value) override;
void SignalSyncPoint(u32 value) override;
+ void SignalReference() override;
void ReleaseFences() override;
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
void WaitForIdle() override;
@@ -85,21 +98,11 @@ public:
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Config& copy_config) override;
+ Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override;
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
u32 pixel_stride) override;
-
- VideoCommon::Shader::AsyncShaders& GetAsyncShaders() {
- return async_shaders;
- }
-
- const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const {
- return async_shaders;
- }
-
- /// Maximum supported size that a constbuffer can have in bytes.
- static constexpr size_t MaxConstbufferSize = 0x10000;
- static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0,
- "The maximum size of a constbuffer must be a multiple of the size of GLvec4");
+ void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
+ const VideoCore::DiskResourceLoadCallback& callback) override;
private:
static constexpr size_t MAX_TEXTURES = 192;
@@ -110,46 +113,19 @@ private:
void FlushWork();
- /// Setup descriptors in the graphics pipeline.
- void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders,
- bool is_indexed);
-
void UpdateDynamicStates();
void BeginTransformFeedback();
void EndTransformFeedback();
- /// Setup uniform texels in the graphics pipeline.
- void SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage);
-
- /// Setup textures in the graphics pipeline.
- void SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage);
-
- /// Setup storage texels in the graphics pipeline.
- void SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage);
-
- /// Setup images in the graphics pipeline.
- void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage);
-
- /// Setup texel buffers in the compute pipeline.
- void SetupComputeUniformTexels(const ShaderEntries& entries);
-
- /// Setup textures in the compute pipeline.
- void SetupComputeTextures(const ShaderEntries& entries);
-
- /// Setup storage texels in the compute pipeline.
- void SetupComputeStorageTexels(const ShaderEntries& entries);
-
- /// Setup images in the compute pipeline.
- void SetupComputeImages(const ShaderEntries& entries);
-
void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateBlendConstants(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs);
+ void UpdateLineWidth(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
@@ -160,6 +136,8 @@ private:
void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
+ void UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs);
+
Tegra::GPU& gpu;
Tegra::MemoryManager& gpu_memory;
Tegra::Engines::Maxwell3D& maxwell3d;
@@ -172,23 +150,22 @@ private:
VKScheduler& scheduler;
StagingBufferPool staging_pool;
- VKDescriptorPool descriptor_pool;
+ DescriptorPool descriptor_pool;
VKUpdateDescriptorQueue update_descriptor_queue;
BlitImageHelper blit_image;
ASTCDecoderPass astc_decoder_pass;
-
- GraphicsPipelineCacheKey graphics_key;
+ RenderPassCache render_pass_cache;
TextureCacheRuntime texture_cache_runtime;
TextureCache texture_cache;
BufferCacheRuntime buffer_cache_runtime;
BufferCache buffer_cache;
- VKPipelineCache pipeline_cache;
+ PipelineCache pipeline_cache;
VKQueryCache query_cache;
+ AccelerateDMA accelerate_dma;
VKFenceManager fence_manager;
vk::Event wfi_event;
- VideoCommon::Shader::AsyncShaders async_shaders;
boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;
std::array<VideoCommon::ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;
diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp
new file mode 100644
index 000000000..451ffe019
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp
@@ -0,0 +1,96 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <unordered_map>
+
+#include <boost/container/static_vector.hpp>
+
+#include "video_core/renderer_vulkan/maxwell_to_vk.h"
+#include "video_core/renderer_vulkan/vk_render_pass_cache.h"
+#include "video_core/surface.h"
+#include "video_core/vulkan_common/vulkan_device.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
+
+namespace Vulkan {
+namespace {
+using VideoCore::Surface::PixelFormat;
+
+VkAttachmentDescription AttachmentDescription(const Device& device, PixelFormat format,
+ VkSampleCountFlagBits samples) {
+ using MaxwellToVK::SurfaceFormat;
+ return {
+ .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT,
+ .format = SurfaceFormat(device, FormatType::Optimal, true, format).format,
+ .samples = samples,
+ .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+ .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+ .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+ .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE,
+ .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
+ };
+}
+} // Anonymous namespace
+
+RenderPassCache::RenderPassCache(const Device& device_) : device{&device_} {}
+
+VkRenderPass RenderPassCache::Get(const RenderPassKey& key) {
+ std::lock_guard lock{mutex};
+ const auto [pair, is_new] = cache.try_emplace(key);
+ if (!is_new) {
+ return *pair->second;
+ }
+ boost::container::static_vector<VkAttachmentDescription, 9> descriptions;
+ std::array<VkAttachmentReference, 8> references{};
+ u32 num_attachments{};
+ u32 num_colors{};
+ for (size_t index = 0; index < key.color_formats.size(); ++index) {
+ const PixelFormat format{key.color_formats[index]};
+ const bool is_valid{format != PixelFormat::Invalid};
+ references[index] = VkAttachmentReference{
+ .attachment = is_valid ? num_colors : VK_ATTACHMENT_UNUSED,
+ .layout = VK_IMAGE_LAYOUT_GENERAL,
+ };
+ if (is_valid) {
+ descriptions.push_back(AttachmentDescription(*device, format, key.samples));
+ num_attachments = static_cast<u32>(index + 1);
+ ++num_colors;
+ }
+ }
+ const bool has_depth{key.depth_format != PixelFormat::Invalid};
+ VkAttachmentReference depth_reference{};
+ if (key.depth_format != PixelFormat::Invalid) {
+ depth_reference = VkAttachmentReference{
+ .attachment = num_colors,
+ .layout = VK_IMAGE_LAYOUT_GENERAL,
+ };
+ descriptions.push_back(AttachmentDescription(*device, key.depth_format, key.samples));
+ }
+ const VkSubpassDescription subpass{
+ .flags = 0,
+ .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ .inputAttachmentCount = 0,
+ .pInputAttachments = nullptr,
+ .colorAttachmentCount = num_attachments,
+ .pColorAttachments = references.data(),
+ .pResolveAttachments = nullptr,
+ .pDepthStencilAttachment = has_depth ? &depth_reference : nullptr,
+ .preserveAttachmentCount = 0,
+ .pPreserveAttachments = nullptr,
+ };
+ pair->second = device->GetLogical().CreateRenderPass({
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .attachmentCount = static_cast<u32>(descriptions.size()),
+ .pAttachments = descriptions.empty() ? nullptr : descriptions.data(),
+ .subpassCount = 1,
+ .pSubpasses = &subpass,
+ .dependencyCount = 0,
+ .pDependencies = nullptr,
+ });
+ return *pair->second;
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.h b/src/video_core/renderer_vulkan/vk_render_pass_cache.h
new file mode 100644
index 000000000..eaa0ed775
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.h
@@ -0,0 +1,55 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <mutex>
+#include <unordered_map>
+
+#include "video_core/surface.h"
+#include "video_core/vulkan_common/vulkan_wrapper.h"
+
+namespace Vulkan {
+
+struct RenderPassKey {
+ auto operator<=>(const RenderPassKey&) const noexcept = default;
+
+ std::array<VideoCore::Surface::PixelFormat, 8> color_formats;
+ VideoCore::Surface::PixelFormat depth_format;
+ VkSampleCountFlagBits samples;
+};
+
+} // namespace Vulkan
+
+namespace std {
+template <>
+struct hash<Vulkan::RenderPassKey> {
+ [[nodiscard]] size_t operator()(const Vulkan::RenderPassKey& key) const noexcept {
+ size_t value = static_cast<size_t>(key.depth_format) << 48;
+ value ^= static_cast<size_t>(key.samples) << 52;
+ for (size_t i = 0; i < key.color_formats.size(); ++i) {
+ value ^= static_cast<size_t>(key.color_formats[i]) << (i * 6);
+ }
+ return value;
+ }
+};
+} // namespace std
+
+namespace Vulkan {
+
+class Device;
+
+class RenderPassCache {
+public:
+ explicit RenderPassCache(const Device& device_);
+
+ VkRenderPass Get(const RenderPassKey& key);
+
+private:
+ const Device* device{};
+ std::unordered_map<RenderPassKey, vk::RenderPass> cache;
+ std::mutex mutex;
+};
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.cpp b/src/video_core/renderer_vulkan/vk_resource_pool.cpp
index a8bf7bda8..2dd514968 100644
--- a/src/video_core/renderer_vulkan/vk_resource_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_resource_pool.cpp
@@ -10,18 +10,16 @@
namespace Vulkan {
ResourcePool::ResourcePool(MasterSemaphore& master_semaphore_, size_t grow_step_)
- : master_semaphore{master_semaphore_}, grow_step{grow_step_} {}
-
-ResourcePool::~ResourcePool() = default;
+ : master_semaphore{&master_semaphore_}, grow_step{grow_step_} {}
size_t ResourcePool::CommitResource() {
// Refresh semaphore to query updated results
- master_semaphore.Refresh();
- const u64 gpu_tick = master_semaphore.KnownGpuTick();
+ master_semaphore->Refresh();
+ const u64 gpu_tick = master_semaphore->KnownGpuTick();
const auto search = [this, gpu_tick](size_t begin, size_t end) -> std::optional<size_t> {
for (size_t iterator = begin; iterator < end; ++iterator) {
if (gpu_tick >= ticks[iterator]) {
- ticks[iterator] = master_semaphore.CurrentTick();
+ ticks[iterator] = master_semaphore->CurrentTick();
return iterator;
}
}
@@ -36,7 +34,7 @@ size_t ResourcePool::CommitResource() {
// Both searches failed, the pool is full; handle it.
const size_t free_resource = ManageOverflow();
- ticks[free_resource] = master_semaphore.CurrentTick();
+ ticks[free_resource] = master_semaphore->CurrentTick();
found = free_resource;
}
}
diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.h b/src/video_core/renderer_vulkan/vk_resource_pool.h
index 9d0bb3b4d..f0b80ad59 100644
--- a/src/video_core/renderer_vulkan/vk_resource_pool.h
+++ b/src/video_core/renderer_vulkan/vk_resource_pool.h
@@ -18,8 +18,16 @@ class MasterSemaphore;
*/
class ResourcePool {
public:
+ explicit ResourcePool() = default;
explicit ResourcePool(MasterSemaphore& master_semaphore, size_t grow_step);
- virtual ~ResourcePool();
+
+ virtual ~ResourcePool() = default;
+
+ ResourcePool& operator=(ResourcePool&&) noexcept = default;
+ ResourcePool(ResourcePool&&) noexcept = default;
+
+ ResourcePool& operator=(const ResourcePool&) = default;
+ ResourcePool(const ResourcePool&) = default;
protected:
size_t CommitResource();
@@ -34,7 +42,7 @@ private:
/// Allocates a new page of resources.
void Grow();
- MasterSemaphore& master_semaphore;
+ MasterSemaphore* master_semaphore{};
size_t grow_step = 0; ///< Number of new resources created after an overflow
size_t hint_iterator = 0; ///< Hint to where the next free resources is likely to be found
std::vector<u64> ticks; ///< Ticks for each resource
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index f35c120b0..4840962de 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -31,7 +31,7 @@ void VKScheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf) {
command->~Command();
command = next;
}
-
+ submit = false;
command_offset = 0;
first = nullptr;
last = nullptr;
@@ -42,13 +42,16 @@ VKScheduler::VKScheduler(const Device& device_, StateTracker& state_tracker_)
master_semaphore{std::make_unique<MasterSemaphore>(device)},
command_pool{std::make_unique<CommandPool>(*master_semaphore, device)} {
AcquireNewChunk();
- AllocateNewContext();
+ AllocateWorkerCommandBuffer();
worker_thread = std::thread(&VKScheduler::WorkerThread, this);
}
VKScheduler::~VKScheduler() {
- quit = true;
- cv.notify_all();
+ {
+ std::lock_guard lock{work_mutex};
+ quit = true;
+ }
+ work_cv.notify_all();
worker_thread.join();
}
@@ -60,6 +63,7 @@ void VKScheduler::Flush(VkSemaphore semaphore) {
void VKScheduler::Finish(VkSemaphore semaphore) {
const u64 presubmit_tick = CurrentTick();
SubmitExecution(semaphore);
+ WaitWorker();
Wait(presubmit_tick);
AllocateNewContext();
}
@@ -68,20 +72,19 @@ void VKScheduler::WaitWorker() {
MICROPROFILE_SCOPE(Vulkan_WaitForWorker);
DispatchWork();
- bool finished = false;
- do {
- cv.notify_all();
- std::unique_lock lock{mutex};
- finished = chunk_queue.Empty();
- } while (!finished);
+ std::unique_lock lock{work_mutex};
+ wait_cv.wait(lock, [this] { return work_queue.empty(); });
}
void VKScheduler::DispatchWork() {
if (chunk->Empty()) {
return;
}
- chunk_queue.Push(std::move(chunk));
- cv.notify_all();
+ {
+ std::lock_guard lock{work_mutex};
+ work_queue.push(std::move(chunk));
+ }
+ work_cv.notify_one();
AcquireNewChunk();
}
@@ -124,93 +127,101 @@ void VKScheduler::RequestOutsideRenderPassOperationContext() {
EndRenderPass();
}
-void VKScheduler::BindGraphicsPipeline(VkPipeline pipeline) {
+bool VKScheduler::UpdateGraphicsPipeline(GraphicsPipeline* pipeline) {
if (state.graphics_pipeline == pipeline) {
- return;
+ return false;
}
state.graphics_pipeline = pipeline;
- Record([pipeline](vk::CommandBuffer cmdbuf) {
- cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
- });
+ return true;
}
void VKScheduler::WorkerThread() {
- Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
- std::unique_lock lock{mutex};
+ Common::SetCurrentThreadName("yuzu:VulkanWorker");
do {
- cv.wait(lock, [this] { return !chunk_queue.Empty() || quit; });
- if (quit) {
- continue;
+ if (work_queue.empty()) {
+ wait_cv.notify_all();
+ }
+ std::unique_ptr<CommandChunk> work;
+ {
+ std::unique_lock lock{work_mutex};
+ work_cv.wait(lock, [this] { return !work_queue.empty() || quit; });
+ if (quit) {
+ continue;
+ }
+ work = std::move(work_queue.front());
+ work_queue.pop();
+ }
+ const bool has_submit = work->HasSubmit();
+ work->ExecuteAll(current_cmdbuf);
+ if (has_submit) {
+ AllocateWorkerCommandBuffer();
}
- auto extracted_chunk = std::move(chunk_queue.Front());
- chunk_queue.Pop();
- extracted_chunk->ExecuteAll(current_cmdbuf);
- chunk_reserve.Push(std::move(extracted_chunk));
+ std::lock_guard reserve_lock{reserve_mutex};
+ chunk_reserve.push_back(std::move(work));
} while (!quit);
}
+void VKScheduler::AllocateWorkerCommandBuffer() {
+ current_cmdbuf = vk::CommandBuffer(command_pool->Commit(), device.GetDispatchLoader());
+ current_cmdbuf.Begin({
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
+ .pNext = nullptr,
+ .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
+ .pInheritanceInfo = nullptr,
+ });
+}
+
void VKScheduler::SubmitExecution(VkSemaphore semaphore) {
EndPendingOperations();
InvalidateState();
- WaitWorker();
- std::unique_lock lock{mutex};
+ const u64 signal_value = master_semaphore->NextTick();
+ Record([semaphore, signal_value, this](vk::CommandBuffer cmdbuf) {
+ cmdbuf.End();
- current_cmdbuf.End();
+ const u32 num_signal_semaphores = semaphore ? 2U : 1U;
- const VkSemaphore timeline_semaphore = master_semaphore->Handle();
- const u32 num_signal_semaphores = semaphore ? 2U : 1U;
+ const u64 wait_value = signal_value - 1;
+ const VkPipelineStageFlags wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
- const u64 signal_value = master_semaphore->CurrentTick();
- const u64 wait_value = signal_value - 1;
- const VkPipelineStageFlags wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
+ const VkSemaphore timeline_semaphore = master_semaphore->Handle();
+ const std::array signal_values{signal_value, u64(0)};
+ const std::array signal_semaphores{timeline_semaphore, semaphore};
- master_semaphore->NextTick();
-
- const std::array signal_values{signal_value, u64(0)};
- const std::array signal_semaphores{timeline_semaphore, semaphore};
-
- const VkTimelineSemaphoreSubmitInfoKHR timeline_si{
- .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR,
- .pNext = nullptr,
- .waitSemaphoreValueCount = 1,
- .pWaitSemaphoreValues = &wait_value,
- .signalSemaphoreValueCount = num_signal_semaphores,
- .pSignalSemaphoreValues = signal_values.data(),
- };
- const VkSubmitInfo submit_info{
- .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
- .pNext = &timeline_si,
- .waitSemaphoreCount = 1,
- .pWaitSemaphores = &timeline_semaphore,
- .pWaitDstStageMask = &wait_stage_mask,
- .commandBufferCount = 1,
- .pCommandBuffers = current_cmdbuf.address(),
- .signalSemaphoreCount = num_signal_semaphores,
- .pSignalSemaphores = signal_semaphores.data(),
- };
- switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info)) {
- case VK_SUCCESS:
- break;
- case VK_ERROR_DEVICE_LOST:
- device.ReportLoss();
- [[fallthrough]];
- default:
- vk::Check(result);
- }
+ const VkTimelineSemaphoreSubmitInfoKHR timeline_si{
+ .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR,
+ .pNext = nullptr,
+ .waitSemaphoreValueCount = 1,
+ .pWaitSemaphoreValues = &wait_value,
+ .signalSemaphoreValueCount = num_signal_semaphores,
+ .pSignalSemaphoreValues = signal_values.data(),
+ };
+ const VkSubmitInfo submit_info{
+ .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+ .pNext = &timeline_si,
+ .waitSemaphoreCount = 1,
+ .pWaitSemaphores = &timeline_semaphore,
+ .pWaitDstStageMask = &wait_stage_mask,
+ .commandBufferCount = 1,
+ .pCommandBuffers = cmdbuf.address(),
+ .signalSemaphoreCount = num_signal_semaphores,
+ .pSignalSemaphores = signal_semaphores.data(),
+ };
+ switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info)) {
+ case VK_SUCCESS:
+ break;
+ case VK_ERROR_DEVICE_LOST:
+ device.ReportLoss();
+ [[fallthrough]];
+ default:
+ vk::Check(result);
+ }
+ });
+ chunk->MarkSubmit();
+ DispatchWork();
}
void VKScheduler::AllocateNewContext() {
- std::unique_lock lock{mutex};
-
- current_cmdbuf = vk::CommandBuffer(command_pool->Commit(), device.GetDispatchLoader());
- current_cmdbuf.Begin({
- .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
- .pNext = nullptr,
- .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
- .pInheritanceInfo = nullptr,
- });
-
// Enable counters once again. These are disabled when a command buffer is finished.
if (query_cache) {
query_cache->UpdateCounters();
@@ -265,12 +276,13 @@ void VKScheduler::EndRenderPass() {
}
void VKScheduler::AcquireNewChunk() {
- if (chunk_reserve.Empty()) {
+ std::lock_guard lock{reserve_mutex};
+ if (chunk_reserve.empty()) {
chunk = std::make_unique<CommandChunk>();
return;
}
- chunk = std::move(chunk_reserve.Front());
- chunk_reserve.Pop();
+ chunk = std::move(chunk_reserve.back());
+ chunk_reserve.pop_back();
}
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
index 3ce48e9d2..cf39a2363 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -8,12 +8,12 @@
#include <condition_variable>
#include <cstddef>
#include <memory>
-#include <stack>
#include <thread>
#include <utility>
+#include <queue>
+
#include "common/alignment.h"
#include "common/common_types.h"
-#include "common/threadsafe_queue.h"
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
@@ -22,6 +22,7 @@ namespace Vulkan {
class CommandPool;
class Device;
class Framebuffer;
+class GraphicsPipeline;
class StateTracker;
class VKQueryCache;
@@ -52,8 +53,8 @@ public:
/// of a renderpass.
void RequestOutsideRenderPassOperationContext();
- /// Binds a pipeline to the current execution context.
- void BindGraphicsPipeline(VkPipeline pipeline);
+ /// Update the pipeline to the current execution context.
+ bool UpdateGraphicsPipeline(GraphicsPipeline* pipeline);
/// Invalidates current command buffer state except for render passes
void InvalidateState();
@@ -85,6 +86,10 @@ public:
/// Waits for the given tick to trigger on the GPU.
void Wait(u64 tick) {
+ if (tick >= master_semaphore->CurrentTick()) {
+ // Make sure we are not waiting for the current tick without signalling
+ Flush();
+ }
master_semaphore->Wait(tick);
}
@@ -154,15 +159,24 @@ private:
return true;
}
+ void MarkSubmit() {
+ submit = true;
+ }
+
bool Empty() const {
return command_offset == 0;
}
+ bool HasSubmit() const {
+ return submit;
+ }
+
private:
Command* first = nullptr;
Command* last = nullptr;
size_t command_offset = 0;
+ bool submit = false;
alignas(std::max_align_t) std::array<u8, 0x8000> data{};
};
@@ -170,11 +184,13 @@ private:
VkRenderPass renderpass = nullptr;
VkFramebuffer framebuffer = nullptr;
VkExtent2D render_area = {0, 0};
- VkPipeline graphics_pipeline = nullptr;
+ GraphicsPipeline* graphics_pipeline = nullptr;
};
void WorkerThread();
+ void AllocateWorkerCommandBuffer();
+
void SubmitExecution(VkSemaphore semaphore);
void AllocateNewContext();
@@ -204,11 +220,13 @@ private:
std::array<VkImage, 9> renderpass_images{};
std::array<VkImageSubresourceRange, 9> renderpass_image_ranges{};
- Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_queue;
- Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_reserve;
- std::mutex mutex;
- std::condition_variable cv;
- bool quit = false;
+ std::queue<std::unique_ptr<CommandChunk>> work_queue;
+ std::vector<std::unique_ptr<CommandChunk>> chunk_reserve;
+ std::mutex reserve_mutex;
+ std::mutex work_mutex;
+ std::condition_variable work_cv;
+ std::condition_variable wait_cv;
+ std::atomic_bool quit{};
};
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
deleted file mode 100644
index c6846d886..000000000
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ /dev/null
@@ -1,3166 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <functional>
-#include <limits>
-#include <map>
-#include <optional>
-#include <type_traits>
-#include <unordered_map>
-#include <utility>
-
-#include <fmt/format.h>
-
-#include <sirit/sirit.h>
-
-#include "common/alignment.h"
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "common/logging/log.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/engines/shader_header.h"
-#include "video_core/engines/shader_type.h"
-#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
-#include "video_core/shader/node.h"
-#include "video_core/shader/shader_ir.h"
-#include "video_core/shader/transform_feedback.h"
-#include "video_core/vulkan_common/vulkan_device.h"
-
-namespace Vulkan {
-
-namespace {
-
-using Sirit::Id;
-using Tegra::Engines::ShaderType;
-using Tegra::Shader::Attribute;
-using Tegra::Shader::PixelImap;
-using Tegra::Shader::Register;
-using namespace VideoCommon::Shader;
-
-using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-using Operation = const OperationNode&;
-
-class ASTDecompiler;
-class ExprDecompiler;
-
-// TODO(Rodrigo): Use rasterizer's value
-constexpr u32 MaxConstBufferFloats = 0x4000;
-constexpr u32 MaxConstBufferElements = MaxConstBufferFloats / 4;
-
-constexpr u32 NumInputPatches = 32; // This value seems to be the standard
-
-enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat };
-
-class Expression final {
-public:
- Expression(Id id_, Type type_) : id{id_}, type{type_} {
- ASSERT(type_ != Type::Void);
- }
- Expression() : type{Type::Void} {}
-
- Id id{};
- Type type{};
-};
-static_assert(std::is_standard_layout_v<Expression>);
-
-struct TexelBuffer {
- Id image_type{};
- Id image{};
-};
-
-struct SampledImage {
- Id image_type{};
- Id sampler_type{};
- Id sampler_pointer_type{};
- Id variable{};
-};
-
-struct StorageImage {
- Id image_type{};
- Id image{};
-};
-
-struct AttributeType {
- Type type;
- Id scalar;
- Id vector;
-};
-
-struct VertexIndices {
- std::optional<u32> position;
- std::optional<u32> layer;
- std::optional<u32> viewport;
- std::optional<u32> point_size;
- std::optional<u32> clip_distances;
-};
-
-struct GenericVaryingDescription {
- Id id = nullptr;
- u32 first_element = 0;
- bool is_scalar = false;
-};
-
-spv::Dim GetSamplerDim(const SamplerEntry& sampler) {
- ASSERT(!sampler.is_buffer);
- switch (sampler.type) {
- case Tegra::Shader::TextureType::Texture1D:
- return spv::Dim::Dim1D;
- case Tegra::Shader::TextureType::Texture2D:
- return spv::Dim::Dim2D;
- case Tegra::Shader::TextureType::Texture3D:
- return spv::Dim::Dim3D;
- case Tegra::Shader::TextureType::TextureCube:
- return spv::Dim::Cube;
- default:
- UNIMPLEMENTED_MSG("Unimplemented sampler type={}", sampler.type);
- return spv::Dim::Dim2D;
- }
-}
-
-std::pair<spv::Dim, bool> GetImageDim(const ImageEntry& image) {
- switch (image.type) {
- case Tegra::Shader::ImageType::Texture1D:
- return {spv::Dim::Dim1D, false};
- case Tegra::Shader::ImageType::TextureBuffer:
- return {spv::Dim::Buffer, false};
- case Tegra::Shader::ImageType::Texture1DArray:
- return {spv::Dim::Dim1D, true};
- case Tegra::Shader::ImageType::Texture2D:
- return {spv::Dim::Dim2D, false};
- case Tegra::Shader::ImageType::Texture2DArray:
- return {spv::Dim::Dim2D, true};
- case Tegra::Shader::ImageType::Texture3D:
- return {spv::Dim::Dim3D, false};
- default:
- UNIMPLEMENTED_MSG("Unimplemented image type={}", image.type);
- return {spv::Dim::Dim2D, false};
- }
-}
-
-/// Returns the number of vertices present in a primitive topology.
-u32 GetNumPrimitiveTopologyVertices(Maxwell::PrimitiveTopology primitive_topology) {
- switch (primitive_topology) {
- case Maxwell::PrimitiveTopology::Points:
- return 1;
- case Maxwell::PrimitiveTopology::Lines:
- case Maxwell::PrimitiveTopology::LineLoop:
- case Maxwell::PrimitiveTopology::LineStrip:
- return 2;
- case Maxwell::PrimitiveTopology::Triangles:
- case Maxwell::PrimitiveTopology::TriangleStrip:
- case Maxwell::PrimitiveTopology::TriangleFan:
- return 3;
- case Maxwell::PrimitiveTopology::LinesAdjacency:
- case Maxwell::PrimitiveTopology::LineStripAdjacency:
- return 4;
- case Maxwell::PrimitiveTopology::TrianglesAdjacency:
- case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
- return 6;
- case Maxwell::PrimitiveTopology::Quads:
- UNIMPLEMENTED_MSG("Quads");
- return 3;
- case Maxwell::PrimitiveTopology::QuadStrip:
- UNIMPLEMENTED_MSG("QuadStrip");
- return 3;
- case Maxwell::PrimitiveTopology::Polygon:
- UNIMPLEMENTED_MSG("Polygon");
- return 3;
- case Maxwell::PrimitiveTopology::Patches:
- UNIMPLEMENTED_MSG("Patches");
- return 3;
- default:
- UNREACHABLE();
- return 3;
- }
-}
-
-spv::ExecutionMode GetExecutionMode(Maxwell::TessellationPrimitive primitive) {
- switch (primitive) {
- case Maxwell::TessellationPrimitive::Isolines:
- return spv::ExecutionMode::Isolines;
- case Maxwell::TessellationPrimitive::Triangles:
- return spv::ExecutionMode::Triangles;
- case Maxwell::TessellationPrimitive::Quads:
- return spv::ExecutionMode::Quads;
- }
- UNREACHABLE();
- return spv::ExecutionMode::Triangles;
-}
-
-spv::ExecutionMode GetExecutionMode(Maxwell::TessellationSpacing spacing) {
- switch (spacing) {
- case Maxwell::TessellationSpacing::Equal:
- return spv::ExecutionMode::SpacingEqual;
- case Maxwell::TessellationSpacing::FractionalOdd:
- return spv::ExecutionMode::SpacingFractionalOdd;
- case Maxwell::TessellationSpacing::FractionalEven:
- return spv::ExecutionMode::SpacingFractionalEven;
- }
- UNREACHABLE();
- return spv::ExecutionMode::SpacingEqual;
-}
-
-spv::ExecutionMode GetExecutionMode(Maxwell::PrimitiveTopology input_topology) {
- switch (input_topology) {
- case Maxwell::PrimitiveTopology::Points:
- return spv::ExecutionMode::InputPoints;
- case Maxwell::PrimitiveTopology::Lines:
- case Maxwell::PrimitiveTopology::LineLoop:
- case Maxwell::PrimitiveTopology::LineStrip:
- return spv::ExecutionMode::InputLines;
- case Maxwell::PrimitiveTopology::Triangles:
- case Maxwell::PrimitiveTopology::TriangleStrip:
- case Maxwell::PrimitiveTopology::TriangleFan:
- return spv::ExecutionMode::Triangles;
- case Maxwell::PrimitiveTopology::LinesAdjacency:
- case Maxwell::PrimitiveTopology::LineStripAdjacency:
- return spv::ExecutionMode::InputLinesAdjacency;
- case Maxwell::PrimitiveTopology::TrianglesAdjacency:
- case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
- return spv::ExecutionMode::InputTrianglesAdjacency;
- case Maxwell::PrimitiveTopology::Quads:
- UNIMPLEMENTED_MSG("Quads");
- return spv::ExecutionMode::Triangles;
- case Maxwell::PrimitiveTopology::QuadStrip:
- UNIMPLEMENTED_MSG("QuadStrip");
- return spv::ExecutionMode::Triangles;
- case Maxwell::PrimitiveTopology::Polygon:
- UNIMPLEMENTED_MSG("Polygon");
- return spv::ExecutionMode::Triangles;
- case Maxwell::PrimitiveTopology::Patches:
- UNIMPLEMENTED_MSG("Patches");
- return spv::ExecutionMode::Triangles;
- }
- UNREACHABLE();
- return spv::ExecutionMode::Triangles;
-}
-
-spv::ExecutionMode GetExecutionMode(Tegra::Shader::OutputTopology output_topology) {
- switch (output_topology) {
- case Tegra::Shader::OutputTopology::PointList:
- return spv::ExecutionMode::OutputPoints;
- case Tegra::Shader::OutputTopology::LineStrip:
- return spv::ExecutionMode::OutputLineStrip;
- case Tegra::Shader::OutputTopology::TriangleStrip:
- return spv::ExecutionMode::OutputTriangleStrip;
- default:
- UNREACHABLE();
- return spv::ExecutionMode::OutputPoints;
- }
-}
-
-/// Returns true if an attribute index is one of the 32 generic attributes
-constexpr bool IsGenericAttribute(Attribute::Index attribute) {
- return attribute >= Attribute::Index::Attribute_0 &&
- attribute <= Attribute::Index::Attribute_31;
-}
-
-/// Returns the location of a generic attribute
-u32 GetGenericAttributeLocation(Attribute::Index attribute) {
- ASSERT(IsGenericAttribute(attribute));
- return static_cast<u32>(attribute) - static_cast<u32>(Attribute::Index::Attribute_0);
-}
-
-/// Returns true if an object has to be treated as precise
-bool IsPrecise(Operation operand) {
- const auto& meta{operand.GetMeta()};
- if (std::holds_alternative<MetaArithmetic>(meta)) {
- return std::get<MetaArithmetic>(meta).precise;
- }
- return false;
-}
-
-class SPIRVDecompiler final : public Sirit::Module {
-public:
- explicit SPIRVDecompiler(const Device& device_, const ShaderIR& ir_, ShaderType stage_,
- const Registry& registry_, const Specialization& specialization_)
- : Module(0x00010300), device{device_}, ir{ir_}, stage{stage_}, header{ir_.GetHeader()},
- registry{registry_}, specialization{specialization_} {
- if (stage_ != ShaderType::Compute) {
- transform_feedback = BuildTransformFeedback(registry_.GetGraphicsInfo());
- }
-
- AddCapability(spv::Capability::Shader);
- AddCapability(spv::Capability::UniformAndStorageBuffer16BitAccess);
- AddCapability(spv::Capability::ImageQuery);
- AddCapability(spv::Capability::Image1D);
- AddCapability(spv::Capability::ImageBuffer);
- AddCapability(spv::Capability::ImageGatherExtended);
- AddCapability(spv::Capability::SampledBuffer);
- AddCapability(spv::Capability::StorageImageWriteWithoutFormat);
- AddCapability(spv::Capability::DrawParameters);
- AddCapability(spv::Capability::SubgroupBallotKHR);
- AddCapability(spv::Capability::SubgroupVoteKHR);
- AddExtension("SPV_KHR_16bit_storage");
- AddExtension("SPV_KHR_shader_ballot");
- AddExtension("SPV_KHR_subgroup_vote");
- AddExtension("SPV_KHR_storage_buffer_storage_class");
- AddExtension("SPV_KHR_variable_pointers");
- AddExtension("SPV_KHR_shader_draw_parameters");
-
- if (!transform_feedback.empty()) {
- if (device.IsExtTransformFeedbackSupported()) {
- AddCapability(spv::Capability::TransformFeedback);
- } else {
- LOG_ERROR(Render_Vulkan, "Shader requires transform feedbacks but these are not "
- "supported on this device");
- }
- }
- if (ir.UsesLayer() || ir.UsesViewportIndex()) {
- if (ir.UsesViewportIndex()) {
- AddCapability(spv::Capability::MultiViewport);
- }
- if (stage != ShaderType::Geometry && device.IsExtShaderViewportIndexLayerSupported()) {
- AddExtension("SPV_EXT_shader_viewport_index_layer");
- AddCapability(spv::Capability::ShaderViewportIndexLayerEXT);
- }
- }
- if (device.IsFormatlessImageLoadSupported()) {
- AddCapability(spv::Capability::StorageImageReadWithoutFormat);
- }
- if (device.IsFloat16Supported()) {
- AddCapability(spv::Capability::Float16);
- }
- t_scalar_half = Name(TypeFloat(device_.IsFloat16Supported() ? 16 : 32), "scalar_half");
- t_half = Name(TypeVector(t_scalar_half, 2), "half");
-
- const Id main = Decompile();
-
- switch (stage) {
- case ShaderType::Vertex:
- AddEntryPoint(spv::ExecutionModel::Vertex, main, "main", interfaces);
- break;
- case ShaderType::TesselationControl:
- AddCapability(spv::Capability::Tessellation);
- AddEntryPoint(spv::ExecutionModel::TessellationControl, main, "main", interfaces);
- AddExecutionMode(main, spv::ExecutionMode::OutputVertices,
- header.common2.threads_per_input_primitive);
- break;
- case ShaderType::TesselationEval: {
- const auto& info = registry.GetGraphicsInfo();
- AddCapability(spv::Capability::Tessellation);
- AddEntryPoint(spv::ExecutionModel::TessellationEvaluation, main, "main", interfaces);
- AddExecutionMode(main, GetExecutionMode(info.tessellation_primitive));
- AddExecutionMode(main, GetExecutionMode(info.tessellation_spacing));
- AddExecutionMode(main, info.tessellation_clockwise
- ? spv::ExecutionMode::VertexOrderCw
- : spv::ExecutionMode::VertexOrderCcw);
- break;
- }
- case ShaderType::Geometry: {
- const auto& info = registry.GetGraphicsInfo();
- AddCapability(spv::Capability::Geometry);
- AddEntryPoint(spv::ExecutionModel::Geometry, main, "main", interfaces);
- AddExecutionMode(main, GetExecutionMode(info.primitive_topology));
- AddExecutionMode(main, GetExecutionMode(header.common3.output_topology));
- AddExecutionMode(main, spv::ExecutionMode::OutputVertices,
- header.common4.max_output_vertices);
- // TODO(Rodrigo): Where can we get this info from?
- AddExecutionMode(main, spv::ExecutionMode::Invocations, 1U);
- break;
- }
- case ShaderType::Fragment:
- AddEntryPoint(spv::ExecutionModel::Fragment, main, "main", interfaces);
- AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft);
- if (header.ps.omap.depth) {
- AddExecutionMode(main, spv::ExecutionMode::DepthReplacing);
- }
- if (specialization.early_fragment_tests) {
- AddExecutionMode(main, spv::ExecutionMode::EarlyFragmentTests);
- }
- break;
- case ShaderType::Compute:
- const auto workgroup_size = specialization.workgroup_size;
- AddExecutionMode(main, spv::ExecutionMode::LocalSize, workgroup_size[0],
- workgroup_size[1], workgroup_size[2]);
- AddEntryPoint(spv::ExecutionModel::GLCompute, main, "main", interfaces);
- break;
- }
- }
-
-private:
- Id Decompile() {
- DeclareCommon();
- DeclareVertex();
- DeclareTessControl();
- DeclareTessEval();
- DeclareGeometry();
- DeclareFragment();
- DeclareCompute();
- DeclareRegisters();
- DeclareCustomVariables();
- DeclarePredicates();
- DeclareLocalMemory();
- DeclareSharedMemory();
- DeclareInternalFlags();
- DeclareInputAttributes();
- DeclareOutputAttributes();
-
- u32 binding = specialization.base_binding;
- binding = DeclareConstantBuffers(binding);
- binding = DeclareGlobalBuffers(binding);
- binding = DeclareUniformTexels(binding);
- binding = DeclareSamplers(binding);
- binding = DeclareStorageTexels(binding);
- binding = DeclareImages(binding);
-
- const Id main = OpFunction(t_void, {}, TypeFunction(t_void));
- AddLabel();
-
- if (ir.IsDecompiled()) {
- DeclareFlowVariables();
- DecompileAST();
- } else {
- AllocateLabels();
- DecompileBranchMode();
- }
-
- OpReturn();
- OpFunctionEnd();
-
- return main;
- }
-
- void DefinePrologue() {
- if (stage == ShaderType::Vertex) {
- // Clear Position to avoid reading trash on the Z conversion.
- const auto position_index = out_indices.position.value();
- const Id position = AccessElement(t_out_float4, out_vertex, position_index);
- OpStore(position, v_varying_default);
-
- if (specialization.point_size) {
- const u32 point_size_index = out_indices.point_size.value();
- const Id out_point_size = AccessElement(t_out_float, out_vertex, point_size_index);
- OpStore(out_point_size, Constant(t_float, *specialization.point_size));
- }
- }
- }
-
- void DecompileAST();
-
- void DecompileBranchMode() {
- const u32 first_address = ir.GetBasicBlocks().begin()->first;
- const Id loop_label = OpLabel("loop");
- const Id merge_label = OpLabel("merge");
- const Id dummy_label = OpLabel();
- const Id jump_label = OpLabel();
- continue_label = OpLabel("continue");
-
- std::vector<Sirit::Literal> literals;
- std::vector<Id> branch_labels;
- for (const auto& [literal, label] : labels) {
- literals.push_back(literal);
- branch_labels.push_back(label);
- }
-
- jmp_to = OpVariable(TypePointer(spv::StorageClass::Function, t_uint),
- spv::StorageClass::Function, Constant(t_uint, first_address));
- AddLocalVariable(jmp_to);
-
- std::tie(ssy_flow_stack, ssy_flow_stack_top) = CreateFlowStack();
- std::tie(pbk_flow_stack, pbk_flow_stack_top) = CreateFlowStack();
-
- Name(jmp_to, "jmp_to");
- Name(ssy_flow_stack, "ssy_flow_stack");
- Name(ssy_flow_stack_top, "ssy_flow_stack_top");
- Name(pbk_flow_stack, "pbk_flow_stack");
- Name(pbk_flow_stack_top, "pbk_flow_stack_top");
-
- DefinePrologue();
-
- OpBranch(loop_label);
- AddLabel(loop_label);
- OpLoopMerge(merge_label, continue_label, spv::LoopControlMask::MaskNone);
- OpBranch(dummy_label);
-
- AddLabel(dummy_label);
- const Id default_branch = OpLabel();
- const Id jmp_to_load = OpLoad(t_uint, jmp_to);
- OpSelectionMerge(jump_label, spv::SelectionControlMask::MaskNone);
- OpSwitch(jmp_to_load, default_branch, literals, branch_labels);
-
- AddLabel(default_branch);
- OpReturn();
-
- for (const auto& [address, bb] : ir.GetBasicBlocks()) {
- AddLabel(labels.at(address));
-
- VisitBasicBlock(bb);
-
- const auto next_it = labels.lower_bound(address + 1);
- const Id next_label = next_it != labels.end() ? next_it->second : default_branch;
- OpBranch(next_label);
- }
-
- AddLabel(jump_label);
- OpBranch(continue_label);
- AddLabel(continue_label);
- OpBranch(loop_label);
- AddLabel(merge_label);
- }
-
-private:
- friend class ASTDecompiler;
- friend class ExprDecompiler;
-
- static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount);
-
- void AllocateLabels() {
- for (const auto& pair : ir.GetBasicBlocks()) {
- const u32 address = pair.first;
- labels.emplace(address, OpLabel(fmt::format("label_0x{:x}", address)));
- }
- }
-
- void DeclareCommon() {
- thread_id =
- DeclareInputBuiltIn(spv::BuiltIn::SubgroupLocalInvocationId, t_in_uint, "thread_id");
- thread_masks[0] =
- DeclareInputBuiltIn(spv::BuiltIn::SubgroupEqMask, t_in_uint4, "thread_eq_mask");
- thread_masks[1] =
- DeclareInputBuiltIn(spv::BuiltIn::SubgroupGeMask, t_in_uint4, "thread_ge_mask");
- thread_masks[2] =
- DeclareInputBuiltIn(spv::BuiltIn::SubgroupGtMask, t_in_uint4, "thread_gt_mask");
- thread_masks[3] =
- DeclareInputBuiltIn(spv::BuiltIn::SubgroupLeMask, t_in_uint4, "thread_le_mask");
- thread_masks[4] =
- DeclareInputBuiltIn(spv::BuiltIn::SubgroupLtMask, t_in_uint4, "thread_lt_mask");
- }
-
- void DeclareVertex() {
- if (stage != ShaderType::Vertex) {
- return;
- }
- Id out_vertex_struct;
- std::tie(out_vertex_struct, out_indices) = DeclareVertexStruct();
- const Id vertex_ptr = TypePointer(spv::StorageClass::Output, out_vertex_struct);
- out_vertex = OpVariable(vertex_ptr, spv::StorageClass::Output);
- interfaces.push_back(AddGlobalVariable(Name(out_vertex, "out_vertex")));
-
- // Declare input attributes
- vertex_index = DeclareInputBuiltIn(spv::BuiltIn::VertexIndex, t_in_int, "vertex_index");
- instance_index =
- DeclareInputBuiltIn(spv::BuiltIn::InstanceIndex, t_in_int, "instance_index");
- base_vertex = DeclareInputBuiltIn(spv::BuiltIn::BaseVertex, t_in_int, "base_vertex");
- base_instance = DeclareInputBuiltIn(spv::BuiltIn::BaseInstance, t_in_int, "base_instance");
- }
-
- void DeclareTessControl() {
- if (stage != ShaderType::TesselationControl) {
- return;
- }
- DeclareInputVertexArray(NumInputPatches);
- DeclareOutputVertexArray(header.common2.threads_per_input_primitive);
-
- tess_level_outer = DeclareBuiltIn(
- spv::BuiltIn::TessLevelOuter, spv::StorageClass::Output,
- TypePointer(spv::StorageClass::Output, TypeArray(t_float, Constant(t_uint, 4U))),
- "tess_level_outer");
- Decorate(tess_level_outer, spv::Decoration::Patch);
-
- tess_level_inner = DeclareBuiltIn(
- spv::BuiltIn::TessLevelInner, spv::StorageClass::Output,
- TypePointer(spv::StorageClass::Output, TypeArray(t_float, Constant(t_uint, 2U))),
- "tess_level_inner");
- Decorate(tess_level_inner, spv::Decoration::Patch);
-
- invocation_id = DeclareInputBuiltIn(spv::BuiltIn::InvocationId, t_in_int, "invocation_id");
- }
-
- void DeclareTessEval() {
- if (stage != ShaderType::TesselationEval) {
- return;
- }
- DeclareInputVertexArray(NumInputPatches);
- DeclareOutputVertex();
-
- tess_coord = DeclareInputBuiltIn(spv::BuiltIn::TessCoord, t_in_float3, "tess_coord");
- }
-
- void DeclareGeometry() {
- if (stage != ShaderType::Geometry) {
- return;
- }
- const auto& info = registry.GetGraphicsInfo();
- const u32 num_input = GetNumPrimitiveTopologyVertices(info.primitive_topology);
- DeclareInputVertexArray(num_input);
- DeclareOutputVertex();
- }
-
- void DeclareFragment() {
- if (stage != ShaderType::Fragment) {
- return;
- }
-
- for (u32 rt = 0; rt < static_cast<u32>(std::size(frag_colors)); ++rt) {
- if (!IsRenderTargetEnabled(rt)) {
- continue;
- }
- const Id id = AddGlobalVariable(OpVariable(t_out_float4, spv::StorageClass::Output));
- Name(id, fmt::format("frag_color{}", rt));
- Decorate(id, spv::Decoration::Location, rt);
-
- frag_colors[rt] = id;
- interfaces.push_back(id);
- }
-
- if (header.ps.omap.depth) {
- frag_depth = AddGlobalVariable(OpVariable(t_out_float, spv::StorageClass::Output));
- Name(frag_depth, "frag_depth");
- Decorate(frag_depth, spv::Decoration::BuiltIn,
- static_cast<u32>(spv::BuiltIn::FragDepth));
-
- interfaces.push_back(frag_depth);
- }
-
- frag_coord = DeclareInputBuiltIn(spv::BuiltIn::FragCoord, t_in_float4, "frag_coord");
- front_facing = DeclareInputBuiltIn(spv::BuiltIn::FrontFacing, t_in_bool, "front_facing");
- point_coord = DeclareInputBuiltIn(spv::BuiltIn::PointCoord, t_in_float2, "point_coord");
- }
-
- void DeclareCompute() {
- if (stage != ShaderType::Compute) {
- return;
- }
-
- workgroup_id = DeclareInputBuiltIn(spv::BuiltIn::WorkgroupId, t_in_uint3, "workgroup_id");
- local_invocation_id =
- DeclareInputBuiltIn(spv::BuiltIn::LocalInvocationId, t_in_uint3, "local_invocation_id");
- }
-
- void DeclareRegisters() {
- for (const u32 gpr : ir.GetRegisters()) {
- const Id id = OpVariable(t_prv_float, spv::StorageClass::Private, v_float_zero);
- Name(id, fmt::format("gpr_{}", gpr));
- registers.emplace(gpr, AddGlobalVariable(id));
- }
- }
-
- void DeclareCustomVariables() {
- const u32 num_custom_variables = ir.GetNumCustomVariables();
- for (u32 i = 0; i < num_custom_variables; ++i) {
- const Id id = OpVariable(t_prv_float, spv::StorageClass::Private, v_float_zero);
- Name(id, fmt::format("custom_var_{}", i));
- custom_variables.emplace(i, AddGlobalVariable(id));
- }
- }
-
- void DeclarePredicates() {
- for (const auto pred : ir.GetPredicates()) {
- const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
- Name(id, fmt::format("pred_{}", static_cast<u32>(pred)));
- predicates.emplace(pred, AddGlobalVariable(id));
- }
- }
-
- void DeclareFlowVariables() {
- for (u32 i = 0; i < ir.GetASTNumVariables(); i++) {
- const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
- Name(id, fmt::format("flow_var_{}", static_cast<u32>(i)));
- flow_variables.emplace(i, AddGlobalVariable(id));
- }
- }
-
- void DeclareLocalMemory() {
- // TODO(Rodrigo): Unstub kernel local memory size and pass it from a register at
- // specialization time.
- const u64 lmem_size = stage == ShaderType::Compute ? 0x400 : header.GetLocalMemorySize();
- if (lmem_size == 0) {
- return;
- }
- const auto element_count = static_cast<u32>(Common::AlignUp(lmem_size, 4) / 4);
- const Id type_array = TypeArray(t_float, Constant(t_uint, element_count));
- const Id type_pointer = TypePointer(spv::StorageClass::Private, type_array);
- Name(type_pointer, "LocalMemory");
-
- local_memory =
- OpVariable(type_pointer, spv::StorageClass::Private, ConstantNull(type_array));
- AddGlobalVariable(Name(local_memory, "local_memory"));
- }
-
- void DeclareSharedMemory() {
- if (stage != ShaderType::Compute) {
- return;
- }
- t_smem_uint = TypePointer(spv::StorageClass::Workgroup, t_uint);
-
- u32 smem_size = specialization.shared_memory_size * 4;
- if (smem_size == 0) {
- // Avoid declaring an empty array.
- return;
- }
- const u32 limit = device.GetMaxComputeSharedMemorySize();
- if (smem_size > limit) {
- LOG_ERROR(Render_Vulkan, "Shared memory size {} is clamped to host's limit {}",
- smem_size, limit);
- smem_size = limit;
- }
-
- const Id type_array = TypeArray(t_uint, Constant(t_uint, smem_size / 4));
- const Id type_pointer = TypePointer(spv::StorageClass::Workgroup, type_array);
- Name(type_pointer, "SharedMemory");
-
- shared_memory = OpVariable(type_pointer, spv::StorageClass::Workgroup);
- AddGlobalVariable(Name(shared_memory, "shared_memory"));
- }
-
- void DeclareInternalFlags() {
- static constexpr std::array names{"zero", "sign", "carry", "overflow"};
-
- for (std::size_t flag = 0; flag < INTERNAL_FLAGS_COUNT; ++flag) {
- const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
- internal_flags[flag] = AddGlobalVariable(Name(id, names[flag]));
- }
- }
-
- void DeclareInputVertexArray(u32 length) {
- constexpr auto storage = spv::StorageClass::Input;
- std::tie(in_indices, in_vertex) = DeclareVertexArray(storage, "in_indices", length);
- }
-
- void DeclareOutputVertexArray(u32 length) {
- constexpr auto storage = spv::StorageClass::Output;
- std::tie(out_indices, out_vertex) = DeclareVertexArray(storage, "out_indices", length);
- }
-
- std::tuple<VertexIndices, Id> DeclareVertexArray(spv::StorageClass storage_class,
- std::string name, u32 length) {
- const auto [struct_id, indices] = DeclareVertexStruct();
- const Id vertex_array = TypeArray(struct_id, Constant(t_uint, length));
- const Id vertex_ptr = TypePointer(storage_class, vertex_array);
- const Id vertex = OpVariable(vertex_ptr, storage_class);
- AddGlobalVariable(Name(vertex, std::move(name)));
- interfaces.push_back(vertex);
- return {indices, vertex};
- }
-
- void DeclareOutputVertex() {
- Id out_vertex_struct;
- std::tie(out_vertex_struct, out_indices) = DeclareVertexStruct();
- const Id out_vertex_ptr = TypePointer(spv::StorageClass::Output, out_vertex_struct);
- out_vertex = OpVariable(out_vertex_ptr, spv::StorageClass::Output);
- interfaces.push_back(AddGlobalVariable(Name(out_vertex, "out_vertex")));
- }
-
- void DeclareInputAttributes() {
- for (const auto index : ir.GetInputAttributes()) {
- if (!IsGenericAttribute(index)) {
- continue;
- }
- const u32 location = GetGenericAttributeLocation(index);
- if (!IsAttributeEnabled(location)) {
- continue;
- }
- const auto type_descriptor = GetAttributeType(location);
- Id type;
- if (IsInputAttributeArray()) {
- type = GetTypeVectorDefinitionLut(type_descriptor.type).at(3);
- type = TypeArray(type, Constant(t_uint, GetNumInputVertices()));
- type = TypePointer(spv::StorageClass::Input, type);
- } else {
- type = type_descriptor.vector;
- }
- const Id id = OpVariable(type, spv::StorageClass::Input);
- AddGlobalVariable(Name(id, fmt::format("in_attr{}", location)));
- input_attributes.emplace(index, id);
- interfaces.push_back(id);
-
- Decorate(id, spv::Decoration::Location, location);
-
- if (stage != ShaderType::Fragment) {
- continue;
- }
- switch (header.ps.GetPixelImap(location)) {
- case PixelImap::Constant:
- Decorate(id, spv::Decoration::Flat);
- break;
- case PixelImap::Perspective:
- // Default
- break;
- case PixelImap::ScreenLinear:
- Decorate(id, spv::Decoration::NoPerspective);
- break;
- default:
- UNREACHABLE_MSG("Unused attribute being fetched");
- }
- }
- }
-
- void DeclareOutputAttributes() {
- if (stage == ShaderType::Compute || stage == ShaderType::Fragment) {
- return;
- }
-
- UNIMPLEMENTED_IF(registry.GetGraphicsInfo().tfb_enabled && stage != ShaderType::Vertex);
- for (const auto index : ir.GetOutputAttributes()) {
- if (!IsGenericAttribute(index)) {
- continue;
- }
- DeclareOutputAttribute(index);
- }
- }
-
- void DeclareOutputAttribute(Attribute::Index index) {
- static constexpr std::string_view swizzle = "xyzw";
-
- const u32 location = GetGenericAttributeLocation(index);
- u8 element = 0;
- while (element < 4) {
- const std::size_t remainder = 4 - element;
-
- std::size_t num_components = remainder;
- const std::optional tfb = GetTransformFeedbackInfo(index, element);
- if (tfb) {
- num_components = tfb->components;
- }
-
- Id type = GetTypeVectorDefinitionLut(Type::Float).at(num_components - 1);
- Id varying_default = v_varying_default;
- if (IsOutputAttributeArray()) {
- const u32 num = GetNumOutputVertices();
- type = TypeArray(type, Constant(t_uint, num));
- if (device.GetDriverID() != VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR) {
- // Intel's proprietary driver fails to setup defaults for arrayed output
- // attributes.
- varying_default = ConstantComposite(type, std::vector(num, varying_default));
- }
- }
- type = TypePointer(spv::StorageClass::Output, type);
-
- std::string name = fmt::format("out_attr{}", location);
- if (num_components < 4 || element > 0) {
- name = fmt::format("{}_{}", name, swizzle.substr(element, num_components));
- }
-
- const Id id = OpVariable(type, spv::StorageClass::Output, varying_default);
- Name(AddGlobalVariable(id), name);
-
- GenericVaryingDescription description;
- description.id = id;
- description.first_element = element;
- description.is_scalar = num_components == 1;
- for (u32 i = 0; i < num_components; ++i) {
- const u8 offset = static_cast<u8>(static_cast<u32>(index) * 4 + element + i);
- output_attributes.emplace(offset, description);
- }
- interfaces.push_back(id);
-
- Decorate(id, spv::Decoration::Location, location);
- if (element > 0) {
- Decorate(id, spv::Decoration::Component, static_cast<u32>(element));
- }
- if (tfb && device.IsExtTransformFeedbackSupported()) {
- Decorate(id, spv::Decoration::XfbBuffer, static_cast<u32>(tfb->buffer));
- Decorate(id, spv::Decoration::XfbStride, static_cast<u32>(tfb->stride));
- Decorate(id, spv::Decoration::Offset, static_cast<u32>(tfb->offset));
- }
-
- element = static_cast<u8>(static_cast<std::size_t>(element) + num_components);
- }
- }
-
- std::optional<VaryingTFB> GetTransformFeedbackInfo(Attribute::Index index, u8 element = 0) {
- const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element);
- const auto it = transform_feedback.find(location);
- if (it == transform_feedback.end()) {
- return {};
- }
- return it->second;
- }
-
- u32 DeclareConstantBuffers(u32 binding) {
- for (const auto& [index, size] : ir.GetConstantBuffers()) {
- const Id type = device.IsKhrUniformBufferStandardLayoutSupported() ? t_cbuf_scalar_ubo
- : t_cbuf_std140_ubo;
- const Id id = OpVariable(type, spv::StorageClass::Uniform);
- AddGlobalVariable(Name(id, fmt::format("cbuf_{}", index)));
-
- Decorate(id, spv::Decoration::Binding, binding++);
- Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
- constant_buffers.emplace(index, id);
- }
- return binding;
- }
-
- u32 DeclareGlobalBuffers(u32 binding) {
- for (const auto& [base, usage] : ir.GetGlobalMemory()) {
- const Id id = OpVariable(t_gmem_ssbo, spv::StorageClass::StorageBuffer);
- AddGlobalVariable(
- Name(id, fmt::format("gmem_{}_{}", base.cbuf_index, base.cbuf_offset)));
-
- Decorate(id, spv::Decoration::Binding, binding++);
- Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
- global_buffers.emplace(base, id);
- }
- return binding;
- }
-
- u32 DeclareUniformTexels(u32 binding) {
- for (const auto& sampler : ir.GetSamplers()) {
- if (!sampler.is_buffer) {
- continue;
- }
- ASSERT(!sampler.is_array);
- ASSERT(!sampler.is_shadow);
-
- constexpr auto dim = spv::Dim::Buffer;
- constexpr int depth = 0;
- constexpr int arrayed = 0;
- constexpr bool ms = false;
- constexpr int sampled = 1;
- constexpr auto format = spv::ImageFormat::Unknown;
- const Id image_type = TypeImage(t_float, dim, depth, arrayed, ms, sampled, format);
- const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, image_type);
- const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant);
- AddGlobalVariable(Name(id, fmt::format("sampler_{}", sampler.index)));
- Decorate(id, spv::Decoration::Binding, binding++);
- Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
-
- uniform_texels.emplace(sampler.index, TexelBuffer{image_type, id});
- }
- return binding;
- }
-
- u32 DeclareSamplers(u32 binding) {
- for (const auto& sampler : ir.GetSamplers()) {
- if (sampler.is_buffer) {
- continue;
- }
- const auto dim = GetSamplerDim(sampler);
- const int depth = sampler.is_shadow ? 1 : 0;
- const int arrayed = sampler.is_array ? 1 : 0;
- constexpr bool ms = false;
- constexpr int sampled = 1;
- constexpr auto format = spv::ImageFormat::Unknown;
- const Id image_type = TypeImage(t_float, dim, depth, arrayed, ms, sampled, format);
- const Id sampler_type = TypeSampledImage(image_type);
- const Id sampler_pointer_type =
- TypePointer(spv::StorageClass::UniformConstant, sampler_type);
- const Id type = sampler.is_indexed
- ? TypeArray(sampler_type, Constant(t_uint, sampler.size))
- : sampler_type;
- const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, type);
- const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant);
- AddGlobalVariable(Name(id, fmt::format("sampler_{}", sampler.index)));
- Decorate(id, spv::Decoration::Binding, binding++);
- Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
-
- sampled_images.emplace(
- sampler.index, SampledImage{image_type, sampler_type, sampler_pointer_type, id});
- }
- return binding;
- }
-
- u32 DeclareStorageTexels(u32 binding) {
- for (const auto& image : ir.GetImages()) {
- if (image.type != Tegra::Shader::ImageType::TextureBuffer) {
- continue;
- }
- DeclareImage(image, binding);
- }
- return binding;
- }
-
- u32 DeclareImages(u32 binding) {
- for (const auto& image : ir.GetImages()) {
- if (image.type == Tegra::Shader::ImageType::TextureBuffer) {
- continue;
- }
- DeclareImage(image, binding);
- }
- return binding;
- }
-
- void DeclareImage(const ImageEntry& image, u32& binding) {
- const auto [dim, arrayed] = GetImageDim(image);
- constexpr int depth = 0;
- constexpr bool ms = false;
- constexpr int sampled = 2; // This won't be accessed with a sampler
- const auto format = image.is_atomic ? spv::ImageFormat::R32ui : spv::ImageFormat::Unknown;
- const Id image_type = TypeImage(t_uint, dim, depth, arrayed, ms, sampled, format, {});
- const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, image_type);
- const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant);
- AddGlobalVariable(Name(id, fmt::format("image_{}", image.index)));
-
- Decorate(id, spv::Decoration::Binding, binding++);
- Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
- if (image.is_read && !image.is_written) {
- Decorate(id, spv::Decoration::NonWritable);
- } else if (image.is_written && !image.is_read) {
- Decorate(id, spv::Decoration::NonReadable);
- }
-
- images.emplace(image.index, StorageImage{image_type, id});
- }
-
- bool IsRenderTargetEnabled(u32 rt) const {
- for (u32 component = 0; component < 4; ++component) {
- if (header.ps.IsColorComponentOutputEnabled(rt, component)) {
- return true;
- }
- }
- return false;
- }
-
- bool IsInputAttributeArray() const {
- return stage == ShaderType::TesselationControl || stage == ShaderType::TesselationEval ||
- stage == ShaderType::Geometry;
- }
-
- bool IsOutputAttributeArray() const {
- return stage == ShaderType::TesselationControl;
- }
-
- bool IsAttributeEnabled(u32 location) const {
- return stage != ShaderType::Vertex || specialization.enabled_attributes[location];
- }
-
- u32 GetNumInputVertices() const {
- switch (stage) {
- case ShaderType::Geometry:
- return GetNumPrimitiveTopologyVertices(registry.GetGraphicsInfo().primitive_topology);
- case ShaderType::TesselationControl:
- case ShaderType::TesselationEval:
- return NumInputPatches;
- default:
- UNREACHABLE();
- return 1;
- }
- }
-
- u32 GetNumOutputVertices() const {
- switch (stage) {
- case ShaderType::TesselationControl:
- return header.common2.threads_per_input_primitive;
- default:
- UNREACHABLE();
- return 1;
- }
- }
-
- std::tuple<Id, VertexIndices> DeclareVertexStruct() {
- struct BuiltIn {
- Id type;
- spv::BuiltIn builtin;
- const char* name;
- };
- std::vector<BuiltIn> members;
- members.reserve(4);
-
- const auto AddBuiltIn = [&](Id type, spv::BuiltIn builtin, const char* name) {
- const auto index = static_cast<u32>(members.size());
- members.push_back(BuiltIn{type, builtin, name});
- return index;
- };
-
- VertexIndices indices;
- indices.position = AddBuiltIn(t_float4, spv::BuiltIn::Position, "position");
-
- if (ir.UsesLayer()) {
- if (stage != ShaderType::Vertex || device.IsExtShaderViewportIndexLayerSupported()) {
- indices.layer = AddBuiltIn(t_int, spv::BuiltIn::Layer, "layer");
- } else {
- LOG_ERROR(
- Render_Vulkan,
- "Shader requires Layer but it's not supported on this stage with this device.");
- }
- }
-
- if (ir.UsesViewportIndex()) {
- if (stage != ShaderType::Vertex || device.IsExtShaderViewportIndexLayerSupported()) {
- indices.viewport = AddBuiltIn(t_int, spv::BuiltIn::ViewportIndex, "viewport_index");
- } else {
- LOG_ERROR(Render_Vulkan, "Shader requires ViewportIndex but it's not supported on "
- "this stage with this device.");
- }
- }
-
- if (ir.UsesPointSize() || specialization.point_size) {
- indices.point_size = AddBuiltIn(t_float, spv::BuiltIn::PointSize, "point_size");
- }
-
- const auto& ir_output_attributes = ir.GetOutputAttributes();
- const bool declare_clip_distances = std::any_of(
- ir_output_attributes.begin(), ir_output_attributes.end(), [](const auto& index) {
- return index == Attribute::Index::ClipDistances0123 ||
- index == Attribute::Index::ClipDistances4567;
- });
- if (declare_clip_distances) {
- indices.clip_distances = AddBuiltIn(TypeArray(t_float, Constant(t_uint, 8)),
- spv::BuiltIn::ClipDistance, "clip_distances");
- }
-
- std::vector<Id> member_types;
- member_types.reserve(members.size());
- for (std::size_t i = 0; i < members.size(); ++i) {
- member_types.push_back(members[i].type);
- }
- const Id per_vertex_struct = Name(TypeStruct(member_types), "PerVertex");
- Decorate(per_vertex_struct, spv::Decoration::Block);
-
- for (std::size_t index = 0; index < members.size(); ++index) {
- const auto& member = members[index];
- MemberName(per_vertex_struct, static_cast<u32>(index), member.name);
- MemberDecorate(per_vertex_struct, static_cast<u32>(index), spv::Decoration::BuiltIn,
- static_cast<u32>(member.builtin));
- }
-
- return {per_vertex_struct, indices};
- }
-
- void VisitBasicBlock(const NodeBlock& bb) {
- for (const auto& node : bb) {
- Visit(node);
- }
- }
-
- Expression Visit(const Node& node) {
- if (const auto operation = std::get_if<OperationNode>(&*node)) {
- if (const auto amend_index = operation->GetAmendIndex()) {
- [[maybe_unused]] const Type type = Visit(ir.GetAmendNode(*amend_index)).type;
- ASSERT(type == Type::Void);
- }
- const auto operation_index = static_cast<std::size_t>(operation->GetCode());
- const auto decompiler = operation_decompilers[operation_index];
- if (decompiler == nullptr) {
- UNREACHABLE_MSG("Operation decompiler {} not defined", operation_index);
- }
- return (this->*decompiler)(*operation);
- }
-
- if (const auto gpr = std::get_if<GprNode>(&*node)) {
- const u32 index = gpr->GetIndex();
- if (index == Register::ZeroIndex) {
- return {v_float_zero, Type::Float};
- }
- return {OpLoad(t_float, registers.at(index)), Type::Float};
- }
-
- if (const auto cv = std::get_if<CustomVarNode>(&*node)) {
- const u32 index = cv->GetIndex();
- return {OpLoad(t_float, custom_variables.at(index)), Type::Float};
- }
-
- if (const auto immediate = std::get_if<ImmediateNode>(&*node)) {
- return {Constant(t_uint, immediate->GetValue()), Type::Uint};
- }
-
- if (const auto predicate = std::get_if<PredicateNode>(&*node)) {
- const auto value = [&]() -> Id {
- switch (const auto index = predicate->GetIndex(); index) {
- case Tegra::Shader::Pred::UnusedIndex:
- return v_true;
- case Tegra::Shader::Pred::NeverExecute:
- return v_false;
- default:
- return OpLoad(t_bool, predicates.at(index));
- }
- }();
- if (predicate->IsNegated()) {
- return {OpLogicalNot(t_bool, value), Type::Bool};
- }
- return {value, Type::Bool};
- }
-
- if (const auto abuf = std::get_if<AbufNode>(&*node)) {
- const auto attribute = abuf->GetIndex();
- const u32 element = abuf->GetElement();
- const auto& buffer = abuf->GetBuffer();
-
- const auto ArrayPass = [&](Id pointer_type, Id composite, std::vector<u32> indices) {
- std::vector<Id> members;
- members.reserve(std::size(indices) + 1);
-
- if (buffer && IsInputAttributeArray()) {
- members.push_back(AsUint(Visit(buffer)));
- }
- for (const u32 index : indices) {
- members.push_back(Constant(t_uint, index));
- }
- return OpAccessChain(pointer_type, composite, members);
- };
-
- switch (attribute) {
- case Attribute::Index::Position: {
- if (stage == ShaderType::Fragment) {
- return {OpLoad(t_float, AccessElement(t_in_float, frag_coord, element)),
- Type::Float};
- }
- const std::vector elements = {in_indices.position.value(), element};
- return {OpLoad(t_float, ArrayPass(t_in_float, in_vertex, elements)), Type::Float};
- }
- case Attribute::Index::PointCoord: {
- switch (element) {
- case 0:
- case 1:
- return {OpCompositeExtract(t_float, OpLoad(t_float2, point_coord), element),
- Type::Float};
- }
- UNIMPLEMENTED_MSG("Unimplemented point coord element={}", element);
- return {v_float_zero, Type::Float};
- }
- case Attribute::Index::TessCoordInstanceIDVertexID:
- // TODO(Subv): Find out what the values are for the first two elements when inside a
- // vertex shader, and what's the value of the fourth element when inside a Tess Eval
- // shader.
- switch (element) {
- case 0:
- case 1:
- return {OpLoad(t_float, AccessElement(t_in_float, tess_coord, element)),
- Type::Float};
- case 2:
- return {
- OpISub(t_int, OpLoad(t_int, instance_index), OpLoad(t_int, base_instance)),
- Type::Int};
- case 3:
- return {OpISub(t_int, OpLoad(t_int, vertex_index), OpLoad(t_int, base_vertex)),
- Type::Int};
- }
- UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element);
- return {Constant(t_uint, 0U), Type::Uint};
- case Attribute::Index::FrontFacing:
- // TODO(Subv): Find out what the values are for the other elements.
- ASSERT(stage == ShaderType::Fragment);
- if (element == 3) {
- const Id is_front_facing = OpLoad(t_bool, front_facing);
- const Id true_value = Constant(t_int, static_cast<s32>(-1));
- const Id false_value = Constant(t_int, 0);
- return {OpSelect(t_int, is_front_facing, true_value, false_value), Type::Int};
- }
- UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element);
- return {v_float_zero, Type::Float};
- default:
- if (!IsGenericAttribute(attribute)) {
- break;
- }
- const u32 location = GetGenericAttributeLocation(attribute);
- if (!IsAttributeEnabled(location)) {
- // Disabled attributes (also known as constant attributes) always return zero.
- return {v_float_zero, Type::Float};
- }
- const auto type_descriptor = GetAttributeType(location);
- const Type type = type_descriptor.type;
- const Id attribute_id = input_attributes.at(attribute);
- const std::vector elements = {element};
- const Id pointer = ArrayPass(type_descriptor.scalar, attribute_id, elements);
- return {OpLoad(GetTypeDefinition(type), pointer), type};
- }
- UNIMPLEMENTED_MSG("Unhandled input attribute: {}", attribute);
- return {v_float_zero, Type::Float};
- }
-
- if (const auto cbuf = std::get_if<CbufNode>(&*node)) {
- const Node& offset = cbuf->GetOffset();
- const Id buffer_id = constant_buffers.at(cbuf->GetIndex());
-
- Id pointer{};
- if (device.IsKhrUniformBufferStandardLayoutSupported()) {
- const Id buffer_offset =
- OpShiftRightLogical(t_uint, AsUint(Visit(offset)), Constant(t_uint, 2U));
- pointer =
- OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0U), buffer_offset);
- } else {
- Id buffer_index{};
- Id buffer_element{};
- if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
- // Direct access
- const u32 offset_imm = immediate->GetValue();
- ASSERT(offset_imm % 4 == 0);
- buffer_index = Constant(t_uint, offset_imm / 16);
- buffer_element = Constant(t_uint, (offset_imm / 4) % 4);
- } else if (std::holds_alternative<OperationNode>(*offset)) {
- // Indirect access
- const Id offset_id = AsUint(Visit(offset));
- const Id unsafe_offset = OpUDiv(t_uint, offset_id, Constant(t_uint, 4));
- const Id final_offset =
- OpUMod(t_uint, unsafe_offset, Constant(t_uint, MaxConstBufferElements - 1));
- buffer_index = OpUDiv(t_uint, final_offset, Constant(t_uint, 4));
- buffer_element = OpUMod(t_uint, final_offset, Constant(t_uint, 4));
- } else {
- UNREACHABLE_MSG("Unmanaged offset node type");
- }
- pointer = OpAccessChain(t_cbuf_float, buffer_id, v_uint_zero, buffer_index,
- buffer_element);
- }
- return {OpLoad(t_float, pointer), Type::Float};
- }
-
- if (const auto gmem = std::get_if<GmemNode>(&*node)) {
- return {OpLoad(t_uint, GetGlobalMemoryPointer(*gmem)), Type::Uint};
- }
-
- if (const auto lmem = std::get_if<LmemNode>(&*node)) {
- Id address = AsUint(Visit(lmem->GetAddress()));
- address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U));
- const Id pointer = OpAccessChain(t_prv_float, local_memory, address);
- return {OpLoad(t_float, pointer), Type::Float};
- }
-
- if (const auto smem = std::get_if<SmemNode>(&*node)) {
- return {OpLoad(t_uint, GetSharedMemoryPointer(*smem)), Type::Uint};
- }
-
- if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) {
- const Id flag = internal_flags.at(static_cast<std::size_t>(internal_flag->GetFlag()));
- return {OpLoad(t_bool, flag), Type::Bool};
- }
-
- if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
- if (const auto amend_index = conditional->GetAmendIndex()) {
- [[maybe_unused]] const Type type = Visit(ir.GetAmendNode(*amend_index)).type;
- ASSERT(type == Type::Void);
- }
- // It's invalid to call conditional on nested nodes, use an operation instead
- const Id true_label = OpLabel();
- const Id skip_label = OpLabel();
- const Id condition = AsBool(Visit(conditional->GetCondition()));
- OpSelectionMerge(skip_label, spv::SelectionControlMask::MaskNone);
- OpBranchConditional(condition, true_label, skip_label);
- AddLabel(true_label);
-
- conditional_branch_set = true;
- inside_branch = false;
- VisitBasicBlock(conditional->GetCode());
- conditional_branch_set = false;
- if (!inside_branch) {
- OpBranch(skip_label);
- } else {
- inside_branch = false;
- }
- AddLabel(skip_label);
- return {};
- }
-
- if (const auto comment = std::get_if<CommentNode>(&*node)) {
- if (device.HasDebuggingToolAttached()) {
- // We should insert comments with OpString instead of using named variables
- Name(OpUndef(t_int), comment->GetText());
- }
- return {};
- }
-
- UNREACHABLE();
- return {};
- }
-
- template <Id (Module::*func)(Id, Id), Type result_type, Type type_a = result_type>
- Expression Unary(Operation operation) {
- const Id type_def = GetTypeDefinition(result_type);
- const Id op_a = As(Visit(operation[0]), type_a);
-
- const Id value = (this->*func)(type_def, op_a);
- if (IsPrecise(operation)) {
- Decorate(value, spv::Decoration::NoContraction);
- }
- return {value, result_type};
- }
-
- template <Id (Module::*func)(Id, Id, Id), Type result_type, Type type_a = result_type,
- Type type_b = type_a>
- Expression Binary(Operation operation) {
- const Id type_def = GetTypeDefinition(result_type);
- const Id op_a = As(Visit(operation[0]), type_a);
- const Id op_b = As(Visit(operation[1]), type_b);
-
- const Id value = (this->*func)(type_def, op_a, op_b);
- if (IsPrecise(operation)) {
- Decorate(value, spv::Decoration::NoContraction);
- }
- return {value, result_type};
- }
-
- template <Id (Module::*func)(Id, Id, Id, Id), Type result_type, Type type_a = result_type,
- Type type_b = type_a, Type type_c = type_b>
- Expression Ternary(Operation operation) {
- const Id type_def = GetTypeDefinition(result_type);
- const Id op_a = As(Visit(operation[0]), type_a);
- const Id op_b = As(Visit(operation[1]), type_b);
- const Id op_c = As(Visit(operation[2]), type_c);
-
- const Id value = (this->*func)(type_def, op_a, op_b, op_c);
- if (IsPrecise(operation)) {
- Decorate(value, spv::Decoration::NoContraction);
- }
- return {value, result_type};
- }
-
- template <Id (Module::*func)(Id, Id, Id, Id, Id), Type result_type, Type type_a = result_type,
- Type type_b = type_a, Type type_c = type_b, Type type_d = type_c>
- Expression Quaternary(Operation operation) {
- const Id type_def = GetTypeDefinition(result_type);
- const Id op_a = As(Visit(operation[0]), type_a);
- const Id op_b = As(Visit(operation[1]), type_b);
- const Id op_c = As(Visit(operation[2]), type_c);
- const Id op_d = As(Visit(operation[3]), type_d);
-
- const Id value = (this->*func)(type_def, op_a, op_b, op_c, op_d);
- if (IsPrecise(operation)) {
- Decorate(value, spv::Decoration::NoContraction);
- }
- return {value, result_type};
- }
-
- Expression Assign(Operation operation) {
- const Node& dest = operation[0];
- const Node& src = operation[1];
-
- Expression target{};
- if (const auto gpr = std::get_if<GprNode>(&*dest)) {
- if (gpr->GetIndex() == Register::ZeroIndex) {
- // Writing to Register::ZeroIndex is a no op but we still have to visit its source
- // because it might have side effects.
- Visit(src);
- return {};
- }
- target = {registers.at(gpr->GetIndex()), Type::Float};
-
- } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) {
- const auto& buffer = abuf->GetBuffer();
- const auto ArrayPass = [&](Id pointer_type, Id composite, std::vector<u32> indices) {
- std::vector<Id> members;
- members.reserve(std::size(indices) + 1);
-
- if (buffer && IsOutputAttributeArray()) {
- members.push_back(AsUint(Visit(buffer)));
- }
- for (const u32 index : indices) {
- members.push_back(Constant(t_uint, index));
- }
- return OpAccessChain(pointer_type, composite, members);
- };
-
- target = [&]() -> Expression {
- const u32 element = abuf->GetElement();
- switch (const auto attribute = abuf->GetIndex(); attribute) {
- case Attribute::Index::Position: {
- const u32 index = out_indices.position.value();
- return {ArrayPass(t_out_float, out_vertex, {index, element}), Type::Float};
- }
- case Attribute::Index::LayerViewportPointSize:
- switch (element) {
- case 1: {
- if (!out_indices.layer) {
- return {};
- }
- const u32 index = out_indices.layer.value();
- return {AccessElement(t_out_int, out_vertex, index), Type::Int};
- }
- case 2: {
- if (!out_indices.viewport) {
- return {};
- }
- const u32 index = out_indices.viewport.value();
- return {AccessElement(t_out_int, out_vertex, index), Type::Int};
- }
- case 3: {
- const auto index = out_indices.point_size.value();
- return {AccessElement(t_out_float, out_vertex, index), Type::Float};
- }
- default:
- UNIMPLEMENTED_MSG("LayerViewportPoint element={}", abuf->GetElement());
- return {};
- }
- case Attribute::Index::ClipDistances0123: {
- const u32 index = out_indices.clip_distances.value();
- return {AccessElement(t_out_float, out_vertex, index, element), Type::Float};
- }
- case Attribute::Index::ClipDistances4567: {
- const u32 index = out_indices.clip_distances.value();
- return {AccessElement(t_out_float, out_vertex, index, element + 4),
- Type::Float};
- }
- default:
- if (IsGenericAttribute(attribute)) {
- const u8 offset = static_cast<u8>(static_cast<u8>(attribute) * 4 + element);
- const GenericVaryingDescription description = output_attributes.at(offset);
- const Id composite = description.id;
- std::vector<u32> indices;
- if (!description.is_scalar) {
- indices.push_back(element - description.first_element);
- }
- return {ArrayPass(t_out_float, composite, indices), Type::Float};
- }
- UNIMPLEMENTED_MSG("Unhandled output attribute: {}",
- static_cast<u32>(attribute));
- return {};
- }
- }();
-
- } else if (const auto patch = std::get_if<PatchNode>(&*dest)) {
- target = [&]() -> Expression {
- const u32 offset = patch->GetOffset();
- switch (offset) {
- case 0:
- case 1:
- case 2:
- case 3:
- return {AccessElement(t_out_float, tess_level_outer, offset % 4), Type::Float};
- case 4:
- case 5:
- return {AccessElement(t_out_float, tess_level_inner, offset % 4), Type::Float};
- }
- UNIMPLEMENTED_MSG("Unhandled patch output offset: {}", offset);
- return {};
- }();
-
- } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) {
- Id address = AsUint(Visit(lmem->GetAddress()));
- address = OpUDiv(t_uint, address, Constant(t_uint, 4));
- target = {OpAccessChain(t_prv_float, local_memory, address), Type::Float};
-
- } else if (const auto smem = std::get_if<SmemNode>(&*dest)) {
- target = {GetSharedMemoryPointer(*smem), Type::Uint};
-
- } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
- target = {GetGlobalMemoryPointer(*gmem), Type::Uint};
-
- } else if (const auto cv = std::get_if<CustomVarNode>(&*dest)) {
- target = {custom_variables.at(cv->GetIndex()), Type::Float};
-
- } else {
- UNIMPLEMENTED();
- }
-
- if (!target.id) {
- // On failure we return a nullptr target.id, skip these stores.
- return {};
- }
-
- OpStore(target.id, As(Visit(src), target.type));
- return {};
- }
-
- template <u32 offset>
- Expression FCastHalf(Operation operation) {
- const Id value = AsHalfFloat(Visit(operation[0]));
- return {GetFloatFromHalfScalar(OpCompositeExtract(t_scalar_half, value, offset)),
- Type::Float};
- }
-
- Expression FSwizzleAdd(Operation operation) {
- const Id minus = Constant(t_float, -1.0f);
- const Id plus = v_float_one;
- const Id zero = v_float_zero;
- const Id lut_a = ConstantComposite(t_float4, minus, plus, minus, zero);
- const Id lut_b = ConstantComposite(t_float4, minus, minus, plus, minus);
-
- Id mask = OpLoad(t_uint, thread_id);
- mask = OpBitwiseAnd(t_uint, mask, Constant(t_uint, 3));
- mask = OpShiftLeftLogical(t_uint, mask, Constant(t_uint, 1));
- mask = OpShiftRightLogical(t_uint, AsUint(Visit(operation[2])), mask);
- mask = OpBitwiseAnd(t_uint, mask, Constant(t_uint, 3));
-
- const Id modifier_a = OpVectorExtractDynamic(t_float, lut_a, mask);
- const Id modifier_b = OpVectorExtractDynamic(t_float, lut_b, mask);
-
- const Id op_a = OpFMul(t_float, AsFloat(Visit(operation[0])), modifier_a);
- const Id op_b = OpFMul(t_float, AsFloat(Visit(operation[1])), modifier_b);
- return {OpFAdd(t_float, op_a, op_b), Type::Float};
- }
-
- Expression HNegate(Operation operation) {
- const bool is_f16 = device.IsFloat16Supported();
- const Id minus_one = Constant(t_scalar_half, is_f16 ? 0xbc00 : 0xbf800000);
- const Id one = Constant(t_scalar_half, is_f16 ? 0x3c00 : 0x3f800000);
- const auto GetNegate = [&](std::size_t index) {
- return OpSelect(t_scalar_half, AsBool(Visit(operation[index])), minus_one, one);
- };
- const Id negation = OpCompositeConstruct(t_half, GetNegate(1), GetNegate(2));
- return {OpFMul(t_half, AsHalfFloat(Visit(operation[0])), negation), Type::HalfFloat};
- }
-
- Expression HClamp(Operation operation) {
- const auto Pack = [&](std::size_t index) {
- const Id scalar = GetHalfScalarFromFloat(AsFloat(Visit(operation[index])));
- return OpCompositeConstruct(t_half, scalar, scalar);
- };
- const Id value = AsHalfFloat(Visit(operation[0]));
- const Id min = Pack(1);
- const Id max = Pack(2);
-
- const Id clamped = OpFClamp(t_half, value, min, max);
- if (IsPrecise(operation)) {
- Decorate(clamped, spv::Decoration::NoContraction);
- }
- return {clamped, Type::HalfFloat};
- }
-
- Expression HCastFloat(Operation operation) {
- const Id value = GetHalfScalarFromFloat(AsFloat(Visit(operation[0])));
- return {OpCompositeConstruct(t_half, value, Constant(t_scalar_half, 0)), Type::HalfFloat};
- }
-
- Expression HUnpack(Operation operation) {
- Expression operand = Visit(operation[0]);
- const auto type = std::get<Tegra::Shader::HalfType>(operation.GetMeta());
- if (type == Tegra::Shader::HalfType::H0_H1) {
- return operand;
- }
- const auto value = [&] {
- switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) {
- case Tegra::Shader::HalfType::F32:
- return GetHalfScalarFromFloat(AsFloat(operand));
- case Tegra::Shader::HalfType::H0_H0:
- return OpCompositeExtract(t_scalar_half, AsHalfFloat(operand), 0);
- case Tegra::Shader::HalfType::H1_H1:
- return OpCompositeExtract(t_scalar_half, AsHalfFloat(operand), 1);
- default:
- UNREACHABLE();
- return ConstantNull(t_half);
- }
- }();
- return {OpCompositeConstruct(t_half, value, value), Type::HalfFloat};
- }
-
- Expression HMergeF32(Operation operation) {
- const Id value = AsHalfFloat(Visit(operation[0]));
- return {GetFloatFromHalfScalar(OpCompositeExtract(t_scalar_half, value, 0)), Type::Float};
- }
-
- template <u32 offset>
- Expression HMergeHN(Operation operation) {
- const Id target = AsHalfFloat(Visit(operation[0]));
- const Id source = AsHalfFloat(Visit(operation[1]));
- const Id object = OpCompositeExtract(t_scalar_half, source, offset);
- return {OpCompositeInsert(t_half, object, target, offset), Type::HalfFloat};
- }
-
- Expression HPack2(Operation operation) {
- const Id low = GetHalfScalarFromFloat(AsFloat(Visit(operation[0])));
- const Id high = GetHalfScalarFromFloat(AsFloat(Visit(operation[1])));
- return {OpCompositeConstruct(t_half, low, high), Type::HalfFloat};
- }
-
- Expression LogicalAddCarry(Operation operation) {
- const Id op_a = AsUint(Visit(operation[0]));
- const Id op_b = AsUint(Visit(operation[1]));
-
- const Id result = OpIAddCarry(TypeStruct({t_uint, t_uint}), op_a, op_b);
- const Id carry = OpCompositeExtract(t_uint, result, 1);
- return {OpINotEqual(t_bool, carry, v_uint_zero), Type::Bool};
- }
-
- Expression LogicalAssign(Operation operation) {
- const Node& dest = operation[0];
- const Node& src = operation[1];
-
- Id target{};
- if (const auto pred = std::get_if<PredicateNode>(&*dest)) {
- ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment");
-
- const auto index = pred->GetIndex();
- switch (index) {
- case Tegra::Shader::Pred::NeverExecute:
- case Tegra::Shader::Pred::UnusedIndex:
- // Writing to these predicates is a no-op
- return {};
- }
- target = predicates.at(index);
-
- } else if (const auto flag = std::get_if<InternalFlagNode>(&*dest)) {
- target = internal_flags.at(static_cast<u32>(flag->GetFlag()));
- }
-
- OpStore(target, AsBool(Visit(src)));
- return {};
- }
-
- Expression LogicalFOrdered(Operation operation) {
- // Emulate SPIR-V's OpOrdered
- const Id op_a = AsFloat(Visit(operation[0]));
- const Id op_b = AsFloat(Visit(operation[1]));
- const Id is_num_a = OpFOrdEqual(t_bool, op_a, op_a);
- const Id is_num_b = OpFOrdEqual(t_bool, op_b, op_b);
- return {OpLogicalAnd(t_bool, is_num_a, is_num_b), Type::Bool};
- }
-
- Expression LogicalFUnordered(Operation operation) {
- // Emulate SPIR-V's OpUnordered
- const Id op_a = AsFloat(Visit(operation[0]));
- const Id op_b = AsFloat(Visit(operation[1]));
- const Id is_nan_a = OpIsNan(t_bool, op_a);
- const Id is_nan_b = OpIsNan(t_bool, op_b);
- return {OpLogicalOr(t_bool, is_nan_a, is_nan_b), Type::Bool};
- }
-
- Id GetTextureSampler(Operation operation) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
- ASSERT(!meta.sampler.is_buffer);
-
- const auto& entry = sampled_images.at(meta.sampler.index);
- Id sampler = entry.variable;
- if (meta.sampler.is_indexed) {
- const Id index = AsInt(Visit(meta.index));
- sampler = OpAccessChain(entry.sampler_pointer_type, sampler, index);
- }
- return OpLoad(entry.sampler_type, sampler);
- }
-
- Id GetTextureImage(Operation operation) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
- const u32 index = meta.sampler.index;
- if (meta.sampler.is_buffer) {
- const auto& entry = uniform_texels.at(index);
- return OpLoad(entry.image_type, entry.image);
- } else {
- const auto& entry = sampled_images.at(index);
- return OpImage(entry.image_type, GetTextureSampler(operation));
- }
- }
-
- Id GetImage(Operation operation) {
- const auto& meta = std::get<MetaImage>(operation.GetMeta());
- const auto entry = images.at(meta.image.index);
- return OpLoad(entry.image_type, entry.image);
- }
-
- Id AssembleVector(const std::vector<Id>& coords, Type type) {
- const Id coords_type = GetTypeVectorDefinitionLut(type).at(coords.size() - 1);
- return coords.size() == 1 ? coords[0] : OpCompositeConstruct(coords_type, coords);
- }
-
- Id GetCoordinates(Operation operation, Type type) {
- std::vector<Id> coords;
- for (std::size_t i = 0; i < operation.GetOperandsCount(); ++i) {
- coords.push_back(As(Visit(operation[i]), type));
- }
- if (const auto meta = std::get_if<MetaTexture>(&operation.GetMeta())) {
- // Add array coordinate for textures
- if (meta->sampler.is_array) {
- Id array = AsInt(Visit(meta->array));
- if (type == Type::Float) {
- array = OpConvertSToF(t_float, array);
- }
- coords.push_back(array);
- }
- }
- return AssembleVector(coords, type);
- }
-
- Id GetOffsetCoordinates(Operation operation) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
- std::vector<Id> coords;
- coords.reserve(meta.aoffi.size());
- for (const auto& coord : meta.aoffi) {
- coords.push_back(AsInt(Visit(coord)));
- }
- return AssembleVector(coords, Type::Int);
- }
-
- std::pair<Id, Id> GetDerivatives(Operation operation) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
- const auto& derivatives = meta.derivates;
- ASSERT(derivatives.size() % 2 == 0);
-
- const std::size_t components = derivatives.size() / 2;
- std::vector<Id> dx, dy;
- dx.reserve(components);
- dy.reserve(components);
- for (std::size_t index = 0; index < components; ++index) {
- dx.push_back(AsFloat(Visit(derivatives.at(index * 2 + 0))));
- dy.push_back(AsFloat(Visit(derivatives.at(index * 2 + 1))));
- }
- return {AssembleVector(dx, Type::Float), AssembleVector(dy, Type::Float)};
- }
-
- Expression GetTextureElement(Operation operation, Id sample_value, Type type) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
- const auto type_def = GetTypeDefinition(type);
- return {OpCompositeExtract(type_def, sample_value, meta.element), type};
- }
-
- Expression Texture(Operation operation) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
-
- const bool can_implicit = stage == ShaderType::Fragment;
- const Id sampler = GetTextureSampler(operation);
- const Id coords = GetCoordinates(operation, Type::Float);
-
- std::vector<Id> operands;
- spv::ImageOperandsMask mask{};
- if (meta.bias) {
- mask = mask | spv::ImageOperandsMask::Bias;
- operands.push_back(AsFloat(Visit(meta.bias)));
- }
-
- if (!can_implicit) {
- mask = mask | spv::ImageOperandsMask::Lod;
- operands.push_back(v_float_zero);
- }
-
- if (!meta.aoffi.empty()) {
- mask = mask | spv::ImageOperandsMask::Offset;
- operands.push_back(GetOffsetCoordinates(operation));
- }
-
- if (meta.depth_compare) {
- // Depth sampling
- UNIMPLEMENTED_IF(meta.bias);
- const Id dref = AsFloat(Visit(meta.depth_compare));
- if (can_implicit) {
- return {
- OpImageSampleDrefImplicitLod(t_float, sampler, coords, dref, mask, operands),
- Type::Float};
- } else {
- return {
- OpImageSampleDrefExplicitLod(t_float, sampler, coords, dref, mask, operands),
- Type::Float};
- }
- }
-
- Id texture;
- if (can_implicit) {
- texture = OpImageSampleImplicitLod(t_float4, sampler, coords, mask, operands);
- } else {
- texture = OpImageSampleExplicitLod(t_float4, sampler, coords, mask, operands);
- }
- return GetTextureElement(operation, texture, Type::Float);
- }
-
- Expression TextureLod(Operation operation) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
-
- const Id sampler = GetTextureSampler(operation);
- const Id coords = GetCoordinates(operation, Type::Float);
- const Id lod = AsFloat(Visit(meta.lod));
-
- spv::ImageOperandsMask mask = spv::ImageOperandsMask::Lod;
- std::vector<Id> operands{lod};
-
- if (!meta.aoffi.empty()) {
- mask = mask | spv::ImageOperandsMask::Offset;
- operands.push_back(GetOffsetCoordinates(operation));
- }
-
- if (meta.sampler.is_shadow) {
- const Id dref = AsFloat(Visit(meta.depth_compare));
- return {OpImageSampleDrefExplicitLod(t_float, sampler, coords, dref, mask, operands),
- Type::Float};
- }
- const Id texture = OpImageSampleExplicitLod(t_float4, sampler, coords, mask, operands);
- return GetTextureElement(operation, texture, Type::Float);
- }
-
- Expression TextureGather(Operation operation) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
-
- const Id coords = GetCoordinates(operation, Type::Float);
-
- spv::ImageOperandsMask mask = spv::ImageOperandsMask::MaskNone;
- std::vector<Id> operands;
- Id texture{};
-
- if (!meta.aoffi.empty()) {
- mask = mask | spv::ImageOperandsMask::Offset;
- operands.push_back(GetOffsetCoordinates(operation));
- }
-
- if (meta.sampler.is_shadow) {
- texture = OpImageDrefGather(t_float4, GetTextureSampler(operation), coords,
- AsFloat(Visit(meta.depth_compare)), mask, operands);
- } else {
- u32 component_value = 0;
- if (meta.component) {
- const auto component = std::get_if<ImmediateNode>(&*meta.component);
- ASSERT_MSG(component, "Component is not an immediate value");
- component_value = component->GetValue();
- }
- texture = OpImageGather(t_float4, GetTextureSampler(operation), coords,
- Constant(t_uint, component_value), mask, operands);
- }
- return GetTextureElement(operation, texture, Type::Float);
- }
-
- Expression TextureQueryDimensions(Operation operation) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
- UNIMPLEMENTED_IF(!meta.aoffi.empty());
- UNIMPLEMENTED_IF(meta.depth_compare);
-
- const auto image_id = GetTextureImage(operation);
- if (meta.element == 3) {
- return {OpImageQueryLevels(t_int, image_id), Type::Int};
- }
-
- const Id lod = AsUint(Visit(operation[0]));
- const std::size_t coords_count = [&meta] {
- switch (const auto type = meta.sampler.type) {
- case Tegra::Shader::TextureType::Texture1D:
- return 1;
- case Tegra::Shader::TextureType::Texture2D:
- case Tegra::Shader::TextureType::TextureCube:
- return 2;
- case Tegra::Shader::TextureType::Texture3D:
- return 3;
- default:
- UNREACHABLE_MSG("Invalid texture type={}", type);
- return 2;
- }
- }();
-
- if (meta.element >= coords_count) {
- return {v_float_zero, Type::Float};
- }
-
- const std::array<Id, 3> types = {t_int, t_int2, t_int3};
- const Id sizes = OpImageQuerySizeLod(types.at(coords_count - 1), image_id, lod);
- const Id size = OpCompositeExtract(t_int, sizes, meta.element);
- return {size, Type::Int};
- }
-
- Expression TextureQueryLod(Operation operation) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
- UNIMPLEMENTED_IF(!meta.aoffi.empty());
- UNIMPLEMENTED_IF(meta.depth_compare);
-
- if (meta.element >= 2) {
- UNREACHABLE_MSG("Invalid element");
- return {v_float_zero, Type::Float};
- }
- const auto sampler_id = GetTextureSampler(operation);
-
- const Id multiplier = Constant(t_float, 256.0f);
- const Id multipliers = ConstantComposite(t_float2, multiplier, multiplier);
-
- const Id coords = GetCoordinates(operation, Type::Float);
- Id size = OpImageQueryLod(t_float2, sampler_id, coords);
- size = OpFMul(t_float2, size, multipliers);
- size = OpConvertFToS(t_int2, size);
- return GetTextureElement(operation, size, Type::Int);
- }
-
- Expression TexelFetch(Operation operation) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
- UNIMPLEMENTED_IF(meta.depth_compare);
-
- const Id image = GetTextureImage(operation);
- const Id coords = GetCoordinates(operation, Type::Int);
-
- spv::ImageOperandsMask mask = spv::ImageOperandsMask::MaskNone;
- std::vector<Id> operands;
- Id fetch;
-
- if (meta.lod && !meta.sampler.is_buffer) {
- mask = mask | spv::ImageOperandsMask::Lod;
- operands.push_back(AsInt(Visit(meta.lod)));
- }
-
- if (!meta.aoffi.empty()) {
- mask = mask | spv::ImageOperandsMask::Offset;
- operands.push_back(GetOffsetCoordinates(operation));
- }
-
- fetch = OpImageFetch(t_float4, image, coords, mask, operands);
- return GetTextureElement(operation, fetch, Type::Float);
- }
-
- Expression TextureGradient(Operation operation) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
- UNIMPLEMENTED_IF(!meta.aoffi.empty());
-
- const Id sampler = GetTextureSampler(operation);
- const Id coords = GetCoordinates(operation, Type::Float);
- const auto [dx, dy] = GetDerivatives(operation);
- const std::vector grad = {dx, dy};
-
- static constexpr auto mask = spv::ImageOperandsMask::Grad;
- const Id texture = OpImageSampleExplicitLod(t_float4, sampler, coords, mask, grad);
- return GetTextureElement(operation, texture, Type::Float);
- }
-
- Expression ImageLoad(Operation operation) {
- if (!device.IsFormatlessImageLoadSupported()) {
- return {v_float_zero, Type::Float};
- }
-
- const auto& meta{std::get<MetaImage>(operation.GetMeta())};
-
- const Id coords = GetCoordinates(operation, Type::Int);
- const Id texel = OpImageRead(t_uint4, GetImage(operation), coords);
-
- return {OpCompositeExtract(t_uint, texel, meta.element), Type::Uint};
- }
-
- Expression ImageStore(Operation operation) {
- const auto meta{std::get<MetaImage>(operation.GetMeta())};
- std::vector<Id> colors;
- for (const auto& value : meta.values) {
- colors.push_back(AsUint(Visit(value)));
- }
-
- const Id coords = GetCoordinates(operation, Type::Int);
- const Id texel = OpCompositeConstruct(t_uint4, colors);
-
- OpImageWrite(GetImage(operation), coords, texel, {});
- return {};
- }
-
- template <Id (Module::*func)(Id, Id, Id, Id, Id)>
- Expression AtomicImage(Operation operation) {
- const auto& meta{std::get<MetaImage>(operation.GetMeta())};
- ASSERT(meta.values.size() == 1);
-
- const Id coordinate = GetCoordinates(operation, Type::Int);
- const Id image = images.at(meta.image.index).image;
- const Id sample = v_uint_zero;
- const Id pointer = OpImageTexelPointer(t_image_uint, image, coordinate, sample);
-
- const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device));
- const Id semantics = v_uint_zero;
- const Id value = AsUint(Visit(meta.values[0]));
- return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint};
- }
-
- template <Id (Module::*func)(Id, Id, Id, Id, Id)>
- Expression Atomic(Operation operation) {
- Id pointer;
- if (const auto smem = std::get_if<SmemNode>(&*operation[0])) {
- pointer = GetSharedMemoryPointer(*smem);
- } else if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) {
- pointer = GetGlobalMemoryPointer(*gmem);
- } else {
- UNREACHABLE();
- return {v_float_zero, Type::Float};
- }
- const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device));
- const Id semantics = v_uint_zero;
- const Id value = AsUint(Visit(operation[1]));
-
- return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint};
- }
-
- template <Id (Module::*func)(Id, Id, Id, Id, Id)>
- Expression Reduce(Operation operation) {
- Atomic<func>(operation);
- return {};
- }
-
- Expression Branch(Operation operation) {
- const auto& target = std::get<ImmediateNode>(*operation[0]);
- OpStore(jmp_to, Constant(t_uint, target.GetValue()));
- OpBranch(continue_label);
- inside_branch = true;
- if (!conditional_branch_set) {
- AddLabel();
- }
- return {};
- }
-
- Expression BranchIndirect(Operation operation) {
- const Id op_a = AsUint(Visit(operation[0]));
-
- OpStore(jmp_to, op_a);
- OpBranch(continue_label);
- inside_branch = true;
- if (!conditional_branch_set) {
- AddLabel();
- }
- return {};
- }
-
- Expression PushFlowStack(Operation operation) {
- const auto& target = std::get<ImmediateNode>(*operation[0]);
- const auto [flow_stack, flow_stack_top] = GetFlowStack(operation);
- const Id current = OpLoad(t_uint, flow_stack_top);
- const Id next = OpIAdd(t_uint, current, Constant(t_uint, 1));
- const Id access = OpAccessChain(t_func_uint, flow_stack, current);
-
- OpStore(access, Constant(t_uint, target.GetValue()));
- OpStore(flow_stack_top, next);
- return {};
- }
-
- Expression PopFlowStack(Operation operation) {
- const auto [flow_stack, flow_stack_top] = GetFlowStack(operation);
- const Id current = OpLoad(t_uint, flow_stack_top);
- const Id previous = OpISub(t_uint, current, Constant(t_uint, 1));
- const Id access = OpAccessChain(t_func_uint, flow_stack, previous);
- const Id target = OpLoad(t_uint, access);
-
- OpStore(flow_stack_top, previous);
- OpStore(jmp_to, target);
- OpBranch(continue_label);
- inside_branch = true;
- if (!conditional_branch_set) {
- AddLabel();
- }
- return {};
- }
-
- Id MaxwellToSpirvComparison(Maxwell::ComparisonOp compare_op, Id operand_1, Id operand_2) {
- using Compare = Maxwell::ComparisonOp;
- switch (compare_op) {
- case Compare::NeverOld:
- return v_false; // Never let the test pass
- case Compare::LessOld:
- return OpFOrdLessThan(t_bool, operand_1, operand_2);
- case Compare::EqualOld:
- return OpFOrdEqual(t_bool, operand_1, operand_2);
- case Compare::LessEqualOld:
- return OpFOrdLessThanEqual(t_bool, operand_1, operand_2);
- case Compare::GreaterOld:
- return OpFOrdGreaterThan(t_bool, operand_1, operand_2);
- case Compare::NotEqualOld:
- return OpFOrdNotEqual(t_bool, operand_1, operand_2);
- case Compare::GreaterEqualOld:
- return OpFOrdGreaterThanEqual(t_bool, operand_1, operand_2);
- default:
- UNREACHABLE();
- return v_true;
- }
- }
-
- void AlphaTest(Id pointer) {
- if (specialization.alpha_test_func == Maxwell::ComparisonOp::AlwaysOld) {
- return;
- }
- const Id true_label = OpLabel();
- const Id discard_label = OpLabel();
- const Id alpha_reference = Constant(t_float, specialization.alpha_test_ref);
- const Id alpha_value = OpLoad(t_float, pointer);
- const Id condition =
- MaxwellToSpirvComparison(specialization.alpha_test_func, alpha_value, alpha_reference);
-
- OpBranchConditional(condition, true_label, discard_label);
- AddLabel(discard_label);
- OpKill();
- AddLabel(true_label);
- }
-
- void PreExit() {
- if (stage == ShaderType::Vertex && specialization.ndc_minus_one_to_one) {
- const u32 position_index = out_indices.position.value();
- const Id z_pointer = AccessElement(t_out_float, out_vertex, position_index, 2U);
- const Id w_pointer = AccessElement(t_out_float, out_vertex, position_index, 3U);
- Id depth = OpLoad(t_float, z_pointer);
- depth = OpFAdd(t_float, depth, OpLoad(t_float, w_pointer));
- depth = OpFMul(t_float, depth, Constant(t_float, 0.5f));
- OpStore(z_pointer, depth);
- }
- if (stage == ShaderType::Fragment) {
- const auto SafeGetRegister = [this](u32 reg) {
- if (const auto it = registers.find(reg); it != registers.end()) {
- return OpLoad(t_float, it->second);
- }
- return v_float_zero;
- };
-
- UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0,
- "Sample mask write is unimplemented");
-
- // Write the color outputs using the data in the shader registers, disabled
- // rendertargets/components are skipped in the register assignment.
- u32 current_reg = 0;
- for (u32 rt = 0; rt < Maxwell::NumRenderTargets; ++rt) {
- // TODO(Subv): Figure out how dual-source blending is configured in the Switch.
- for (u32 component = 0; component < 4; ++component) {
- if (!header.ps.IsColorComponentOutputEnabled(rt, component)) {
- continue;
- }
- const Id pointer = AccessElement(t_out_float, frag_colors[rt], component);
- OpStore(pointer, SafeGetRegister(current_reg));
- if (rt == 0 && component == 3) {
- AlphaTest(pointer);
- }
- ++current_reg;
- }
- }
- if (header.ps.omap.depth) {
- // The depth output is always 2 registers after the last color output, and
- // current_reg already contains one past the last color register.
- OpStore(frag_depth, SafeGetRegister(current_reg + 1));
- }
- }
- }
-
- Expression Exit(Operation operation) {
- PreExit();
- inside_branch = true;
- if (conditional_branch_set) {
- OpReturn();
- } else {
- const Id dummy = OpLabel();
- OpBranch(dummy);
- AddLabel(dummy);
- OpReturn();
- AddLabel();
- }
- return {};
- }
-
- Expression Discard(Operation operation) {
- inside_branch = true;
- if (conditional_branch_set) {
- OpKill();
- } else {
- const Id dummy = OpLabel();
- OpBranch(dummy);
- AddLabel(dummy);
- OpKill();
- AddLabel();
- }
- return {};
- }
-
- Expression EmitVertex(Operation) {
- OpEmitVertex();
- return {};
- }
-
- Expression EndPrimitive(Operation operation) {
- OpEndPrimitive();
- return {};
- }
-
- Expression InvocationId(Operation) {
- return {OpLoad(t_int, invocation_id), Type::Int};
- }
-
- Expression YNegate(Operation) {
- LOG_WARNING(Render_Vulkan, "(STUBBED)");
- return {Constant(t_float, 1.0f), Type::Float};
- }
-
- template <u32 element>
- Expression LocalInvocationId(Operation) {
- const Id id = OpLoad(t_uint3, local_invocation_id);
- return {OpCompositeExtract(t_uint, id, element), Type::Uint};
- }
-
- template <u32 element>
- Expression WorkGroupId(Operation operation) {
- const Id id = OpLoad(t_uint3, workgroup_id);
- return {OpCompositeExtract(t_uint, id, element), Type::Uint};
- }
-
- Expression BallotThread(Operation operation) {
- const Id predicate = AsBool(Visit(operation[0]));
- const Id ballot = OpSubgroupBallotKHR(t_uint4, predicate);
-
- if (!device.IsWarpSizePotentiallyBiggerThanGuest()) {
- // Guest-like devices can just return the first index.
- return {OpCompositeExtract(t_uint, ballot, 0U), Type::Uint};
- }
-
- // The others will have to return what is local to the current thread.
- // For instance a device with a warp size of 64 will return the upper uint when the current
- // thread is 38.
- const Id tid = OpLoad(t_uint, thread_id);
- const Id thread_index = OpShiftRightLogical(t_uint, tid, Constant(t_uint, 5));
- return {OpVectorExtractDynamic(t_uint, ballot, thread_index), Type::Uint};
- }
-
- template <Id (Module::*func)(Id, Id)>
- Expression Vote(Operation operation) {
- // TODO(Rodrigo): Handle devices with different warp sizes
- const Id predicate = AsBool(Visit(operation[0]));
- return {(this->*func)(t_bool, predicate), Type::Bool};
- }
-
- Expression ThreadId(Operation) {
- return {OpLoad(t_uint, thread_id), Type::Uint};
- }
-
- template <std::size_t index>
- Expression ThreadMask(Operation) {
- // TODO(Rodrigo): Handle devices with different warp sizes
- const Id mask = thread_masks[index];
- return {OpLoad(t_uint, AccessElement(t_in_uint, mask, 0)), Type::Uint};
- }
-
- Expression ShuffleIndexed(Operation operation) {
- const Id value = AsFloat(Visit(operation[0]));
- const Id index = AsUint(Visit(operation[1]));
- return {OpSubgroupReadInvocationKHR(t_float, value, index), Type::Float};
- }
-
- Expression Barrier(Operation) {
- if (!ir.IsDecompiled()) {
- LOG_ERROR(Render_Vulkan, "OpBarrier used by shader is not decompiled");
- return {};
- }
-
- const auto scope = spv::Scope::Workgroup;
- const auto memory = spv::Scope::Workgroup;
- const auto semantics =
- spv::MemorySemanticsMask::WorkgroupMemory | spv::MemorySemanticsMask::AcquireRelease;
- OpControlBarrier(Constant(t_uint, static_cast<u32>(scope)),
- Constant(t_uint, static_cast<u32>(memory)),
- Constant(t_uint, static_cast<u32>(semantics)));
- return {};
- }
-
- template <spv::Scope scope>
- Expression MemoryBarrier(Operation) {
- const auto semantics =
- spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::UniformMemory |
- spv::MemorySemanticsMask::WorkgroupMemory |
- spv::MemorySemanticsMask::AtomicCounterMemory | spv::MemorySemanticsMask::ImageMemory;
-
- OpMemoryBarrier(Constant(t_uint, static_cast<u32>(scope)),
- Constant(t_uint, static_cast<u32>(semantics)));
- return {};
- }
-
- Id DeclareBuiltIn(spv::BuiltIn builtin, spv::StorageClass storage, Id type, std::string name) {
- const Id id = OpVariable(type, storage);
- Decorate(id, spv::Decoration::BuiltIn, static_cast<u32>(builtin));
- AddGlobalVariable(Name(id, std::move(name)));
- interfaces.push_back(id);
- return id;
- }
-
- Id DeclareInputBuiltIn(spv::BuiltIn builtin, Id type, std::string name) {
- return DeclareBuiltIn(builtin, spv::StorageClass::Input, type, std::move(name));
- }
-
- template <typename... Args>
- Id AccessElement(Id pointer_type, Id composite, Args... elements_) {
- std::vector<Id> members;
- auto elements = {elements_...};
- for (const auto element : elements) {
- members.push_back(Constant(t_uint, element));
- }
-
- return OpAccessChain(pointer_type, composite, members);
- }
-
- Id As(Expression expr, Type wanted_type) {
- switch (wanted_type) {
- case Type::Bool:
- return AsBool(expr);
- case Type::Bool2:
- return AsBool2(expr);
- case Type::Float:
- return AsFloat(expr);
- case Type::Int:
- return AsInt(expr);
- case Type::Uint:
- return AsUint(expr);
- case Type::HalfFloat:
- return AsHalfFloat(expr);
- default:
- UNREACHABLE();
- return expr.id;
- }
- }
-
- Id AsBool(Expression expr) {
- ASSERT(expr.type == Type::Bool);
- return expr.id;
- }
-
- Id AsBool2(Expression expr) {
- ASSERT(expr.type == Type::Bool2);
- return expr.id;
- }
-
- Id AsFloat(Expression expr) {
- switch (expr.type) {
- case Type::Float:
- return expr.id;
- case Type::Int:
- case Type::Uint:
- return OpBitcast(t_float, expr.id);
- case Type::HalfFloat:
- if (device.IsFloat16Supported()) {
- return OpBitcast(t_float, expr.id);
- }
- return OpBitcast(t_float, OpPackHalf2x16(t_uint, expr.id));
- default:
- UNREACHABLE();
- return expr.id;
- }
- }
-
- Id AsInt(Expression expr) {
- switch (expr.type) {
- case Type::Int:
- return expr.id;
- case Type::Float:
- case Type::Uint:
- return OpBitcast(t_int, expr.id);
- case Type::HalfFloat:
- if (device.IsFloat16Supported()) {
- return OpBitcast(t_int, expr.id);
- }
- return OpPackHalf2x16(t_int, expr.id);
- default:
- UNREACHABLE();
- return expr.id;
- }
- }
-
- Id AsUint(Expression expr) {
- switch (expr.type) {
- case Type::Uint:
- return expr.id;
- case Type::Float:
- case Type::Int:
- return OpBitcast(t_uint, expr.id);
- case Type::HalfFloat:
- if (device.IsFloat16Supported()) {
- return OpBitcast(t_uint, expr.id);
- }
- return OpPackHalf2x16(t_uint, expr.id);
- default:
- UNREACHABLE();
- return expr.id;
- }
- }
-
- Id AsHalfFloat(Expression expr) {
- switch (expr.type) {
- case Type::HalfFloat:
- return expr.id;
- case Type::Float:
- case Type::Int:
- case Type::Uint:
- if (device.IsFloat16Supported()) {
- return OpBitcast(t_half, expr.id);
- }
- return OpUnpackHalf2x16(t_half, AsUint(expr));
- default:
- UNREACHABLE();
- return expr.id;
- }
- }
-
- Id GetHalfScalarFromFloat(Id value) {
- if (device.IsFloat16Supported()) {
- return OpFConvert(t_scalar_half, value);
- }
- return value;
- }
-
- Id GetFloatFromHalfScalar(Id value) {
- if (device.IsFloat16Supported()) {
- return OpFConvert(t_float, value);
- }
- return value;
- }
-
- AttributeType GetAttributeType(u32 location) const {
- if (stage != ShaderType::Vertex) {
- return {Type::Float, t_in_float, t_in_float4};
- }
- switch (specialization.attribute_types.at(location)) {
- case Maxwell::VertexAttribute::Type::SignedNorm:
- case Maxwell::VertexAttribute::Type::UnsignedNorm:
- case Maxwell::VertexAttribute::Type::UnsignedScaled:
- case Maxwell::VertexAttribute::Type::SignedScaled:
- case Maxwell::VertexAttribute::Type::Float:
- return {Type::Float, t_in_float, t_in_float4};
- case Maxwell::VertexAttribute::Type::SignedInt:
- return {Type::Int, t_in_int, t_in_int4};
- case Maxwell::VertexAttribute::Type::UnsignedInt:
- return {Type::Uint, t_in_uint, t_in_uint4};
- default:
- UNREACHABLE();
- return {Type::Float, t_in_float, t_in_float4};
- }
- }
-
- Id GetTypeDefinition(Type type) const {
- switch (type) {
- case Type::Bool:
- return t_bool;
- case Type::Bool2:
- return t_bool2;
- case Type::Float:
- return t_float;
- case Type::Int:
- return t_int;
- case Type::Uint:
- return t_uint;
- case Type::HalfFloat:
- return t_half;
- default:
- UNREACHABLE();
- return {};
- }
- }
-
- std::array<Id, 4> GetTypeVectorDefinitionLut(Type type) const {
- switch (type) {
- case Type::Float:
- return {t_float, t_float2, t_float3, t_float4};
- case Type::Int:
- return {t_int, t_int2, t_int3, t_int4};
- case Type::Uint:
- return {t_uint, t_uint2, t_uint3, t_uint4};
- default:
- UNIMPLEMENTED();
- return {};
- }
- }
-
- std::tuple<Id, Id> CreateFlowStack() {
- // TODO(Rodrigo): Figure out the actual depth of the flow stack, for now it seems unlikely
- // that shaders will use 20 nested SSYs and PBKs.
- constexpr u32 FLOW_STACK_SIZE = 20;
- constexpr auto storage_class = spv::StorageClass::Function;
-
- const Id flow_stack_type = TypeArray(t_uint, Constant(t_uint, FLOW_STACK_SIZE));
- const Id stack = OpVariable(TypePointer(storage_class, flow_stack_type), storage_class,
- ConstantNull(flow_stack_type));
- const Id top = OpVariable(t_func_uint, storage_class, Constant(t_uint, 0));
- AddLocalVariable(stack);
- AddLocalVariable(top);
- return std::tie(stack, top);
- }
-
- std::pair<Id, Id> GetFlowStack(Operation operation) {
- const auto stack_class = std::get<MetaStackClass>(operation.GetMeta());
- switch (stack_class) {
- case MetaStackClass::Ssy:
- return {ssy_flow_stack, ssy_flow_stack_top};
- case MetaStackClass::Pbk:
- return {pbk_flow_stack, pbk_flow_stack_top};
- }
- UNREACHABLE();
- return {};
- }
-
- Id GetGlobalMemoryPointer(const GmemNode& gmem) {
- const Id real = AsUint(Visit(gmem.GetRealAddress()));
- const Id base = AsUint(Visit(gmem.GetBaseAddress()));
- const Id diff = OpISub(t_uint, real, base);
- const Id offset = OpShiftRightLogical(t_uint, diff, Constant(t_uint, 2));
- const Id buffer = global_buffers.at(gmem.GetDescriptor());
- return OpAccessChain(t_gmem_uint, buffer, Constant(t_uint, 0), offset);
- }
-
- Id GetSharedMemoryPointer(const SmemNode& smem) {
- ASSERT(stage == ShaderType::Compute);
- Id address = AsUint(Visit(smem.GetAddress()));
- address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U));
- return OpAccessChain(t_smem_uint, shared_memory, address);
- }
-
- static constexpr std::array operation_decompilers = {
- &SPIRVDecompiler::Assign,
-
- &SPIRVDecompiler::Ternary<&Module::OpSelect, Type::Float, Type::Bool, Type::Float,
- Type::Float>,
-
- &SPIRVDecompiler::Binary<&Module::OpFAdd, Type::Float>,
- &SPIRVDecompiler::Binary<&Module::OpFMul, Type::Float>,
- &SPIRVDecompiler::Binary<&Module::OpFDiv, Type::Float>,
- &SPIRVDecompiler::Ternary<&Module::OpFma, Type::Float>,
- &SPIRVDecompiler::Unary<&Module::OpFNegate, Type::Float>,
- &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::Float>,
- &SPIRVDecompiler::Ternary<&Module::OpFClamp, Type::Float>,
- &SPIRVDecompiler::FCastHalf<0>,
- &SPIRVDecompiler::FCastHalf<1>,
- &SPIRVDecompiler::Binary<&Module::OpFMin, Type::Float>,
- &SPIRVDecompiler::Binary<&Module::OpFMax, Type::Float>,
- &SPIRVDecompiler::Unary<&Module::OpCos, Type::Float>,
- &SPIRVDecompiler::Unary<&Module::OpSin, Type::Float>,
- &SPIRVDecompiler::Unary<&Module::OpExp2, Type::Float>,
- &SPIRVDecompiler::Unary<&Module::OpLog2, Type::Float>,
- &SPIRVDecompiler::Unary<&Module::OpInverseSqrt, Type::Float>,
- &SPIRVDecompiler::Unary<&Module::OpSqrt, Type::Float>,
- &SPIRVDecompiler::Unary<&Module::OpRoundEven, Type::Float>,
- &SPIRVDecompiler::Unary<&Module::OpFloor, Type::Float>,
- &SPIRVDecompiler::Unary<&Module::OpCeil, Type::Float>,
- &SPIRVDecompiler::Unary<&Module::OpTrunc, Type::Float>,
- &SPIRVDecompiler::Unary<&Module::OpConvertSToF, Type::Float, Type::Int>,
- &SPIRVDecompiler::Unary<&Module::OpConvertUToF, Type::Float, Type::Uint>,
- &SPIRVDecompiler::FSwizzleAdd,
-
- &SPIRVDecompiler::Binary<&Module::OpIAdd, Type::Int>,
- &SPIRVDecompiler::Binary<&Module::OpIMul, Type::Int>,
- &SPIRVDecompiler::Binary<&Module::OpSDiv, Type::Int>,
- &SPIRVDecompiler::Unary<&Module::OpSNegate, Type::Int>,
- &SPIRVDecompiler::Unary<&Module::OpSAbs, Type::Int>,
- &SPIRVDecompiler::Binary<&Module::OpSMin, Type::Int>,
- &SPIRVDecompiler::Binary<&Module::OpSMax, Type::Int>,
-
- &SPIRVDecompiler::Unary<&Module::OpConvertFToS, Type::Int, Type::Float>,
- &SPIRVDecompiler::Unary<&Module::OpBitcast, Type::Int, Type::Uint>,
- &SPIRVDecompiler::Binary<&Module::OpShiftLeftLogical, Type::Int, Type::Int, Type::Uint>,
- &SPIRVDecompiler::Binary<&Module::OpShiftRightLogical, Type::Int, Type::Int, Type::Uint>,
- &SPIRVDecompiler::Binary<&Module::OpShiftRightArithmetic, Type::Int, Type::Int, Type::Uint>,
- &SPIRVDecompiler::Binary<&Module::OpBitwiseAnd, Type::Int>,
- &SPIRVDecompiler::Binary<&Module::OpBitwiseOr, Type::Int>,
- &SPIRVDecompiler::Binary<&Module::OpBitwiseXor, Type::Int>,
- &SPIRVDecompiler::Unary<&Module::OpNot, Type::Int>,
- &SPIRVDecompiler::Quaternary<&Module::OpBitFieldInsert, Type::Int>,
- &SPIRVDecompiler::Ternary<&Module::OpBitFieldSExtract, Type::Int>,
- &SPIRVDecompiler::Unary<&Module::OpBitCount, Type::Int>,
- &SPIRVDecompiler::Unary<&Module::OpFindSMsb, Type::Int>,
-
- &SPIRVDecompiler::Binary<&Module::OpIAdd, Type::Uint>,
- &SPIRVDecompiler::Binary<&Module::OpIMul, Type::Uint>,
- &SPIRVDecompiler::Binary<&Module::OpUDiv, Type::Uint>,
- &SPIRVDecompiler::Binary<&Module::OpUMin, Type::Uint>,
- &SPIRVDecompiler::Binary<&Module::OpUMax, Type::Uint>,
- &SPIRVDecompiler::Unary<&Module::OpConvertFToU, Type::Uint, Type::Float>,
- &SPIRVDecompiler::Unary<&Module::OpBitcast, Type::Uint, Type::Int>,
- &SPIRVDecompiler::Binary<&Module::OpShiftLeftLogical, Type::Uint>,
- &SPIRVDecompiler::Binary<&Module::OpShiftRightLogical, Type::Uint>,
- &SPIRVDecompiler::Binary<&Module::OpShiftRightLogical, Type::Uint>,
- &SPIRVDecompiler::Binary<&Module::OpBitwiseAnd, Type::Uint>,
- &SPIRVDecompiler::Binary<&Module::OpBitwiseOr, Type::Uint>,
- &SPIRVDecompiler::Binary<&Module::OpBitwiseXor, Type::Uint>,
- &SPIRVDecompiler::Unary<&Module::OpNot, Type::Uint>,
- &SPIRVDecompiler::Quaternary<&Module::OpBitFieldInsert, Type::Uint>,
- &SPIRVDecompiler::Ternary<&Module::OpBitFieldUExtract, Type::Uint>,
- &SPIRVDecompiler::Unary<&Module::OpBitCount, Type::Uint>,
- &SPIRVDecompiler::Unary<&Module::OpFindUMsb, Type::Uint>,
-
- &SPIRVDecompiler::Binary<&Module::OpFAdd, Type::HalfFloat>,
- &SPIRVDecompiler::Binary<&Module::OpFMul, Type::HalfFloat>,
- &SPIRVDecompiler::Ternary<&Module::OpFma, Type::HalfFloat>,
- &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::HalfFloat>,
- &SPIRVDecompiler::HNegate,
- &SPIRVDecompiler::HClamp,
- &SPIRVDecompiler::HCastFloat,
- &SPIRVDecompiler::HUnpack,
- &SPIRVDecompiler::HMergeF32,
- &SPIRVDecompiler::HMergeHN<0>,
- &SPIRVDecompiler::HMergeHN<1>,
- &SPIRVDecompiler::HPack2,
-
- &SPIRVDecompiler::LogicalAssign,
- &SPIRVDecompiler::Binary<&Module::OpLogicalAnd, Type::Bool>,
- &SPIRVDecompiler::Binary<&Module::OpLogicalOr, Type::Bool>,
- &SPIRVDecompiler::Binary<&Module::OpLogicalNotEqual, Type::Bool>,
- &SPIRVDecompiler::Unary<&Module::OpLogicalNot, Type::Bool>,
- &SPIRVDecompiler::Binary<&Module::OpVectorExtractDynamic, Type::Bool, Type::Bool2,
- Type::Uint>,
- &SPIRVDecompiler::Unary<&Module::OpAll, Type::Bool, Type::Bool2>,
-
- &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool, Type::Float>,
- &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool, Type::Float>,
- &SPIRVDecompiler::Binary<&Module::OpFOrdLessThanEqual, Type::Bool, Type::Float>,
- &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool, Type::Float>,
- &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool, Type::Float>,
- &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool, Type::Float>,
- &SPIRVDecompiler::LogicalFOrdered,
- &SPIRVDecompiler::LogicalFUnordered,
- &SPIRVDecompiler::Binary<&Module::OpFUnordLessThan, Type::Bool, Type::Float>,
- &SPIRVDecompiler::Binary<&Module::OpFUnordEqual, Type::Bool, Type::Float>,
- &SPIRVDecompiler::Binary<&Module::OpFUnordLessThanEqual, Type::Bool, Type::Float>,
- &SPIRVDecompiler::Binary<&Module::OpFUnordGreaterThan, Type::Bool, Type::Float>,
- &SPIRVDecompiler::Binary<&Module::OpFUnordNotEqual, Type::Bool, Type::Float>,
- &SPIRVDecompiler::Binary<&Module::OpFUnordGreaterThanEqual, Type::Bool, Type::Float>,
-
- &SPIRVDecompiler::Binary<&Module::OpSLessThan, Type::Bool, Type::Int>,
- &SPIRVDecompiler::Binary<&Module::OpIEqual, Type::Bool, Type::Int>,
- &SPIRVDecompiler::Binary<&Module::OpSLessThanEqual, Type::Bool, Type::Int>,
- &SPIRVDecompiler::Binary<&Module::OpSGreaterThan, Type::Bool, Type::Int>,
- &SPIRVDecompiler::Binary<&Module::OpINotEqual, Type::Bool, Type::Int>,
- &SPIRVDecompiler::Binary<&Module::OpSGreaterThanEqual, Type::Bool, Type::Int>,
-
- &SPIRVDecompiler::Binary<&Module::OpULessThan, Type::Bool, Type::Uint>,
- &SPIRVDecompiler::Binary<&Module::OpIEqual, Type::Bool, Type::Uint>,
- &SPIRVDecompiler::Binary<&Module::OpULessThanEqual, Type::Bool, Type::Uint>,
- &SPIRVDecompiler::Binary<&Module::OpUGreaterThan, Type::Bool, Type::Uint>,
- &SPIRVDecompiler::Binary<&Module::OpINotEqual, Type::Bool, Type::Uint>,
- &SPIRVDecompiler::Binary<&Module::OpUGreaterThanEqual, Type::Bool, Type::Uint>,
-
- &SPIRVDecompiler::LogicalAddCarry,
-
- &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool2, Type::HalfFloat>,
- &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool2, Type::HalfFloat>,
- &SPIRVDecompiler::Binary<&Module::OpFOrdLessThanEqual, Type::Bool2, Type::HalfFloat>,
- &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool2, Type::HalfFloat>,
- &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool2, Type::HalfFloat>,
- &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool2, Type::HalfFloat>,
- // TODO(Rodrigo): Should these use the OpFUnord* variants?
- &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool2, Type::HalfFloat>,
- &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool2, Type::HalfFloat>,
- &SPIRVDecompiler::Binary<&Module::OpFOrdLessThanEqual, Type::Bool2, Type::HalfFloat>,
- &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool2, Type::HalfFloat>,
- &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool2, Type::HalfFloat>,
- &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool2, Type::HalfFloat>,
-
- &SPIRVDecompiler::Texture,
- &SPIRVDecompiler::TextureLod,
- &SPIRVDecompiler::TextureGather,
- &SPIRVDecompiler::TextureQueryDimensions,
- &SPIRVDecompiler::TextureQueryLod,
- &SPIRVDecompiler::TexelFetch,
- &SPIRVDecompiler::TextureGradient,
-
- &SPIRVDecompiler::ImageLoad,
- &SPIRVDecompiler::ImageStore,
- &SPIRVDecompiler::AtomicImage<&Module::OpAtomicIAdd>,
- &SPIRVDecompiler::AtomicImage<&Module::OpAtomicAnd>,
- &SPIRVDecompiler::AtomicImage<&Module::OpAtomicOr>,
- &SPIRVDecompiler::AtomicImage<&Module::OpAtomicXor>,
- &SPIRVDecompiler::AtomicImage<&Module::OpAtomicExchange>,
-
- &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>,
- &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>,
- &SPIRVDecompiler::Atomic<&Module::OpAtomicUMin>,
- &SPIRVDecompiler::Atomic<&Module::OpAtomicUMax>,
- &SPIRVDecompiler::Atomic<&Module::OpAtomicAnd>,
- &SPIRVDecompiler::Atomic<&Module::OpAtomicOr>,
- &SPIRVDecompiler::Atomic<&Module::OpAtomicXor>,
-
- &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>,
- &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>,
- &SPIRVDecompiler::Atomic<&Module::OpAtomicSMin>,
- &SPIRVDecompiler::Atomic<&Module::OpAtomicSMax>,
- &SPIRVDecompiler::Atomic<&Module::OpAtomicAnd>,
- &SPIRVDecompiler::Atomic<&Module::OpAtomicOr>,
- &SPIRVDecompiler::Atomic<&Module::OpAtomicXor>,
-
- &SPIRVDecompiler::Reduce<&Module::OpAtomicIAdd>,
- &SPIRVDecompiler::Reduce<&Module::OpAtomicUMin>,
- &SPIRVDecompiler::Reduce<&Module::OpAtomicUMax>,
- &SPIRVDecompiler::Reduce<&Module::OpAtomicAnd>,
- &SPIRVDecompiler::Reduce<&Module::OpAtomicOr>,
- &SPIRVDecompiler::Reduce<&Module::OpAtomicXor>,
-
- &SPIRVDecompiler::Reduce<&Module::OpAtomicIAdd>,
- &SPIRVDecompiler::Reduce<&Module::OpAtomicSMin>,
- &SPIRVDecompiler::Reduce<&Module::OpAtomicSMax>,
- &SPIRVDecompiler::Reduce<&Module::OpAtomicAnd>,
- &SPIRVDecompiler::Reduce<&Module::OpAtomicOr>,
- &SPIRVDecompiler::Reduce<&Module::OpAtomicXor>,
-
- &SPIRVDecompiler::Branch,
- &SPIRVDecompiler::BranchIndirect,
- &SPIRVDecompiler::PushFlowStack,
- &SPIRVDecompiler::PopFlowStack,
- &SPIRVDecompiler::Exit,
- &SPIRVDecompiler::Discard,
-
- &SPIRVDecompiler::EmitVertex,
- &SPIRVDecompiler::EndPrimitive,
-
- &SPIRVDecompiler::InvocationId,
- &SPIRVDecompiler::YNegate,
- &SPIRVDecompiler::LocalInvocationId<0>,
- &SPIRVDecompiler::LocalInvocationId<1>,
- &SPIRVDecompiler::LocalInvocationId<2>,
- &SPIRVDecompiler::WorkGroupId<0>,
- &SPIRVDecompiler::WorkGroupId<1>,
- &SPIRVDecompiler::WorkGroupId<2>,
-
- &SPIRVDecompiler::BallotThread,
- &SPIRVDecompiler::Vote<&Module::OpSubgroupAllKHR>,
- &SPIRVDecompiler::Vote<&Module::OpSubgroupAnyKHR>,
- &SPIRVDecompiler::Vote<&Module::OpSubgroupAllEqualKHR>,
-
- &SPIRVDecompiler::ThreadId,
- &SPIRVDecompiler::ThreadMask<0>, // Eq
- &SPIRVDecompiler::ThreadMask<1>, // Ge
- &SPIRVDecompiler::ThreadMask<2>, // Gt
- &SPIRVDecompiler::ThreadMask<3>, // Le
- &SPIRVDecompiler::ThreadMask<4>, // Lt
- &SPIRVDecompiler::ShuffleIndexed,
-
- &SPIRVDecompiler::Barrier,
- &SPIRVDecompiler::MemoryBarrier<spv::Scope::Workgroup>,
- &SPIRVDecompiler::MemoryBarrier<spv::Scope::Device>,
- };
- static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
-
- const Device& device;
- const ShaderIR& ir;
- const ShaderType stage;
- const Tegra::Shader::Header header;
- const Registry& registry;
- const Specialization& specialization;
- std::unordered_map<u8, VaryingTFB> transform_feedback;
-
- const Id t_void = Name(TypeVoid(), "void");
-
- const Id t_bool = Name(TypeBool(), "bool");
- const Id t_bool2 = Name(TypeVector(t_bool, 2), "bool2");
-
- const Id t_int = Name(TypeInt(32, true), "int");
- const Id t_int2 = Name(TypeVector(t_int, 2), "int2");
- const Id t_int3 = Name(TypeVector(t_int, 3), "int3");
- const Id t_int4 = Name(TypeVector(t_int, 4), "int4");
-
- const Id t_uint = Name(TypeInt(32, false), "uint");
- const Id t_uint2 = Name(TypeVector(t_uint, 2), "uint2");
- const Id t_uint3 = Name(TypeVector(t_uint, 3), "uint3");
- const Id t_uint4 = Name(TypeVector(t_uint, 4), "uint4");
-
- const Id t_float = Name(TypeFloat(32), "float");
- const Id t_float2 = Name(TypeVector(t_float, 2), "float2");
- const Id t_float3 = Name(TypeVector(t_float, 3), "float3");
- const Id t_float4 = Name(TypeVector(t_float, 4), "float4");
-
- const Id t_prv_bool = Name(TypePointer(spv::StorageClass::Private, t_bool), "prv_bool");
- const Id t_prv_float = Name(TypePointer(spv::StorageClass::Private, t_float), "prv_float");
-
- const Id t_func_uint = Name(TypePointer(spv::StorageClass::Function, t_uint), "func_uint");
-
- const Id t_in_bool = Name(TypePointer(spv::StorageClass::Input, t_bool), "in_bool");
- const Id t_in_int = Name(TypePointer(spv::StorageClass::Input, t_int), "in_int");
- const Id t_in_int4 = Name(TypePointer(spv::StorageClass::Input, t_int4), "in_int4");
- const Id t_in_uint = Name(TypePointer(spv::StorageClass::Input, t_uint), "in_uint");
- const Id t_in_uint3 = Name(TypePointer(spv::StorageClass::Input, t_uint3), "in_uint3");
- const Id t_in_uint4 = Name(TypePointer(spv::StorageClass::Input, t_uint4), "in_uint4");
- const Id t_in_float = Name(TypePointer(spv::StorageClass::Input, t_float), "in_float");
- const Id t_in_float2 = Name(TypePointer(spv::StorageClass::Input, t_float2), "in_float2");
- const Id t_in_float3 = Name(TypePointer(spv::StorageClass::Input, t_float3), "in_float3");
- const Id t_in_float4 = Name(TypePointer(spv::StorageClass::Input, t_float4), "in_float4");
-
- const Id t_out_int = Name(TypePointer(spv::StorageClass::Output, t_int), "out_int");
-
- const Id t_out_float = Name(TypePointer(spv::StorageClass::Output, t_float), "out_float");
- const Id t_out_float4 = Name(TypePointer(spv::StorageClass::Output, t_float4), "out_float4");
-
- const Id t_cbuf_float = TypePointer(spv::StorageClass::Uniform, t_float);
- const Id t_cbuf_std140 = Decorate(
- Name(TypeArray(t_float4, Constant(t_uint, MaxConstBufferElements)), "CbufStd140Array"),
- spv::Decoration::ArrayStride, 16U);
- const Id t_cbuf_scalar = Decorate(
- Name(TypeArray(t_float, Constant(t_uint, MaxConstBufferFloats)), "CbufScalarArray"),
- spv::Decoration::ArrayStride, 4U);
- const Id t_cbuf_std140_struct = MemberDecorate(
- Decorate(TypeStruct(t_cbuf_std140), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
- const Id t_cbuf_scalar_struct = MemberDecorate(
- Decorate(TypeStruct(t_cbuf_scalar), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
- const Id t_cbuf_std140_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_std140_struct);
- const Id t_cbuf_scalar_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_scalar_struct);
-
- Id t_smem_uint{};
-
- const Id t_gmem_uint = TypePointer(spv::StorageClass::StorageBuffer, t_uint);
- const Id t_gmem_array =
- Name(Decorate(TypeRuntimeArray(t_uint), spv::Decoration::ArrayStride, 4U), "GmemArray");
- const Id t_gmem_struct = MemberDecorate(
- Decorate(TypeStruct(t_gmem_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
- const Id t_gmem_ssbo = TypePointer(spv::StorageClass::StorageBuffer, t_gmem_struct);
-
- const Id t_image_uint = TypePointer(spv::StorageClass::Image, t_uint);
-
- const Id v_float_zero = Constant(t_float, 0.0f);
- const Id v_float_one = Constant(t_float, 1.0f);
- const Id v_uint_zero = Constant(t_uint, 0);
-
- // Nvidia uses these defaults for varyings (e.g. position and generic attributes)
- const Id v_varying_default =
- ConstantComposite(t_float4, v_float_zero, v_float_zero, v_float_zero, v_float_one);
-
- const Id v_true = ConstantTrue(t_bool);
- const Id v_false = ConstantFalse(t_bool);
-
- Id t_scalar_half{};
- Id t_half{};
-
- Id out_vertex{};
- Id in_vertex{};
- std::map<u32, Id> registers;
- std::map<u32, Id> custom_variables;
- std::map<Tegra::Shader::Pred, Id> predicates;
- std::map<u32, Id> flow_variables;
- Id local_memory{};
- Id shared_memory{};
- std::array<Id, INTERNAL_FLAGS_COUNT> internal_flags{};
- std::map<Attribute::Index, Id> input_attributes;
- std::unordered_map<u8, GenericVaryingDescription> output_attributes;
- std::map<u32, Id> constant_buffers;
- std::map<GlobalMemoryBase, Id> global_buffers;
- std::map<u32, TexelBuffer> uniform_texels;
- std::map<u32, SampledImage> sampled_images;
- std::map<u32, StorageImage> images;
-
- std::array<Id, Maxwell::NumRenderTargets> frag_colors{};
- Id instance_index{};
- Id vertex_index{};
- Id base_instance{};
- Id base_vertex{};
- Id frag_depth{};
- Id frag_coord{};
- Id front_facing{};
- Id point_coord{};
- Id tess_level_outer{};
- Id tess_level_inner{};
- Id tess_coord{};
- Id invocation_id{};
- Id workgroup_id{};
- Id local_invocation_id{};
- Id thread_id{};
- std::array<Id, 5> thread_masks{}; // eq, ge, gt, le, lt
-
- VertexIndices in_indices;
- VertexIndices out_indices;
-
- std::vector<Id> interfaces;
-
- Id jmp_to{};
- Id ssy_flow_stack_top{};
- Id pbk_flow_stack_top{};
- Id ssy_flow_stack{};
- Id pbk_flow_stack{};
- Id continue_label{};
- std::map<u32, Id> labels;
-
- bool conditional_branch_set{};
- bool inside_branch{};
-};
-
-class ExprDecompiler {
-public:
- explicit ExprDecompiler(SPIRVDecompiler& decomp_) : decomp{decomp_} {}
-
- Id operator()(const ExprAnd& expr) {
- const Id type_def = decomp.GetTypeDefinition(Type::Bool);
- const Id op1 = Visit(expr.operand1);
- const Id op2 = Visit(expr.operand2);
- return decomp.OpLogicalAnd(type_def, op1, op2);
- }
-
- Id operator()(const ExprOr& expr) {
- const Id type_def = decomp.GetTypeDefinition(Type::Bool);
- const Id op1 = Visit(expr.operand1);
- const Id op2 = Visit(expr.operand2);
- return decomp.OpLogicalOr(type_def, op1, op2);
- }
-
- Id operator()(const ExprNot& expr) {
- const Id type_def = decomp.GetTypeDefinition(Type::Bool);
- const Id op1 = Visit(expr.operand1);
- return decomp.OpLogicalNot(type_def, op1);
- }
-
- Id operator()(const ExprPredicate& expr) {
- const auto pred = static_cast<Tegra::Shader::Pred>(expr.predicate);
- return decomp.OpLoad(decomp.t_bool, decomp.predicates.at(pred));
- }
-
- Id operator()(const ExprCondCode& expr) {
- return decomp.AsBool(decomp.Visit(decomp.ir.GetConditionCode(expr.cc)));
- }
-
- Id operator()(const ExprVar& expr) {
- return decomp.OpLoad(decomp.t_bool, decomp.flow_variables.at(expr.var_index));
- }
-
- Id operator()(const ExprBoolean& expr) {
- return expr.value ? decomp.v_true : decomp.v_false;
- }
-
- Id operator()(const ExprGprEqual& expr) {
- const Id target = decomp.Constant(decomp.t_uint, expr.value);
- Id gpr = decomp.OpLoad(decomp.t_float, decomp.registers.at(expr.gpr));
- gpr = decomp.OpBitcast(decomp.t_uint, gpr);
- return decomp.OpIEqual(decomp.t_bool, gpr, target);
- }
-
- Id Visit(const Expr& node) {
- return std::visit(*this, *node);
- }
-
-private:
- SPIRVDecompiler& decomp;
-};
-
-class ASTDecompiler {
-public:
- explicit ASTDecompiler(SPIRVDecompiler& decomp_) : decomp{decomp_} {}
-
- void operator()(const ASTProgram& ast) {
- ASTNode current = ast.nodes.GetFirst();
- while (current) {
- Visit(current);
- current = current->GetNext();
- }
- }
-
- void operator()(const ASTIfThen& ast) {
- ExprDecompiler expr_parser{decomp};
- const Id condition = expr_parser.Visit(ast.condition);
- const Id then_label = decomp.OpLabel();
- const Id endif_label = decomp.OpLabel();
- decomp.OpSelectionMerge(endif_label, spv::SelectionControlMask::MaskNone);
- decomp.OpBranchConditional(condition, then_label, endif_label);
- decomp.AddLabel(then_label);
- ASTNode current = ast.nodes.GetFirst();
- while (current) {
- Visit(current);
- current = current->GetNext();
- }
- decomp.OpBranch(endif_label);
- decomp.AddLabel(endif_label);
- }
-
- void operator()([[maybe_unused]] const ASTIfElse& ast) {
- UNREACHABLE();
- }
-
- void operator()([[maybe_unused]] const ASTBlockEncoded& ast) {
- UNREACHABLE();
- }
-
- void operator()(const ASTBlockDecoded& ast) {
- decomp.VisitBasicBlock(ast.nodes);
- }
-
- void operator()(const ASTVarSet& ast) {
- ExprDecompiler expr_parser{decomp};
- const Id condition = expr_parser.Visit(ast.condition);
- decomp.OpStore(decomp.flow_variables.at(ast.index), condition);
- }
-
- void operator()([[maybe_unused]] const ASTLabel& ast) {
- // Do nothing
- }
-
- void operator()([[maybe_unused]] const ASTGoto& ast) {
- UNREACHABLE();
- }
-
- void operator()(const ASTDoWhile& ast) {
- const Id loop_label = decomp.OpLabel();
- const Id endloop_label = decomp.OpLabel();
- const Id loop_start_block = decomp.OpLabel();
- const Id loop_continue_block = decomp.OpLabel();
- current_loop_exit = endloop_label;
- decomp.OpBranch(loop_label);
- decomp.AddLabel(loop_label);
- decomp.OpLoopMerge(endloop_label, loop_continue_block, spv::LoopControlMask::MaskNone);
- decomp.OpBranch(loop_start_block);
- decomp.AddLabel(loop_start_block);
- ASTNode current = ast.nodes.GetFirst();
- while (current) {
- Visit(current);
- current = current->GetNext();
- }
- decomp.OpBranch(loop_continue_block);
- decomp.AddLabel(loop_continue_block);
- ExprDecompiler expr_parser{decomp};
- const Id condition = expr_parser.Visit(ast.condition);
- decomp.OpBranchConditional(condition, loop_label, endloop_label);
- decomp.AddLabel(endloop_label);
- }
-
- void operator()(const ASTReturn& ast) {
- if (!VideoCommon::Shader::ExprIsTrue(ast.condition)) {
- ExprDecompiler expr_parser{decomp};
- const Id condition = expr_parser.Visit(ast.condition);
- const Id then_label = decomp.OpLabel();
- const Id endif_label = decomp.OpLabel();
- decomp.OpSelectionMerge(endif_label, spv::SelectionControlMask::MaskNone);
- decomp.OpBranchConditional(condition, then_label, endif_label);
- decomp.AddLabel(then_label);
- if (ast.kills) {
- decomp.OpKill();
- } else {
- decomp.PreExit();
- decomp.OpReturn();
- }
- decomp.AddLabel(endif_label);
- } else {
- const Id next_block = decomp.OpLabel();
- decomp.OpBranch(next_block);
- decomp.AddLabel(next_block);
- if (ast.kills) {
- decomp.OpKill();
- } else {
- decomp.PreExit();
- decomp.OpReturn();
- }
- decomp.AddLabel(decomp.OpLabel());
- }
- }
-
- void operator()(const ASTBreak& ast) {
- if (!VideoCommon::Shader::ExprIsTrue(ast.condition)) {
- ExprDecompiler expr_parser{decomp};
- const Id condition = expr_parser.Visit(ast.condition);
- const Id then_label = decomp.OpLabel();
- const Id endif_label = decomp.OpLabel();
- decomp.OpSelectionMerge(endif_label, spv::SelectionControlMask::MaskNone);
- decomp.OpBranchConditional(condition, then_label, endif_label);
- decomp.AddLabel(then_label);
- decomp.OpBranch(current_loop_exit);
- decomp.AddLabel(endif_label);
- } else {
- const Id next_block = decomp.OpLabel();
- decomp.OpBranch(next_block);
- decomp.AddLabel(next_block);
- decomp.OpBranch(current_loop_exit);
- decomp.AddLabel(decomp.OpLabel());
- }
- }
-
- void Visit(const ASTNode& node) {
- std::visit(*this, *node->GetInnerData());
- }
-
-private:
- SPIRVDecompiler& decomp;
- Id current_loop_exit{};
-};
-
-void SPIRVDecompiler::DecompileAST() {
- const u32 num_flow_variables = ir.GetASTNumVariables();
- for (u32 i = 0; i < num_flow_variables; i++) {
- const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
- Name(id, fmt::format("flow_var_{}", i));
- flow_variables.emplace(i, AddGlobalVariable(id));
- }
-
- DefinePrologue();
-
- const ASTNode program = ir.GetASTProgram();
- ASTDecompiler decompiler{*this};
- decompiler.Visit(program);
-
- const Id next_block = OpLabel();
- OpBranch(next_block);
- AddLabel(next_block);
-}
-
-} // Anonymous namespace
-
-ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) {
- ShaderEntries entries;
- for (const auto& cbuf : ir.GetConstantBuffers()) {
- entries.const_buffers.emplace_back(cbuf.second, cbuf.first);
- }
- for (const auto& [base, usage] : ir.GetGlobalMemory()) {
- entries.global_buffers.emplace_back(GlobalBufferEntry{
- .cbuf_index = base.cbuf_index,
- .cbuf_offset = base.cbuf_offset,
- .is_written = usage.is_written,
- });
- }
- for (const auto& sampler : ir.GetSamplers()) {
- if (sampler.is_buffer) {
- entries.uniform_texels.emplace_back(sampler);
- } else {
- entries.samplers.emplace_back(sampler);
- }
- }
- for (const auto& image : ir.GetImages()) {
- if (image.type == Tegra::Shader::ImageType::TextureBuffer) {
- entries.storage_texels.emplace_back(image);
- } else {
- entries.images.emplace_back(image);
- }
- }
- for (const auto& attribute : ir.GetInputAttributes()) {
- if (IsGenericAttribute(attribute)) {
- entries.attributes.insert(GetGenericAttributeLocation(attribute));
- }
- }
- for (const auto& buffer : entries.const_buffers) {
- entries.enabled_uniform_buffers |= 1U << buffer.GetIndex();
- }
- entries.clip_distances = ir.GetClipDistances();
- entries.shader_length = ir.GetLength();
- entries.uses_warps = ir.UsesWarps();
- return entries;
-}
-
-std::vector<u32> Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
- ShaderType stage, const VideoCommon::Shader::Registry& registry,
- const Specialization& specialization) {
- return SPIRVDecompiler(device, ir, stage, registry, specialization).Assemble();
-}
-
-} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
deleted file mode 100644
index 5d94132a5..000000000
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h
+++ /dev/null
@@ -1,99 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <array>
-#include <set>
-#include <vector>
-
-#include "common/common_types.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/engines/shader_type.h"
-#include "video_core/shader/registry.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace Vulkan {
-
-class Device;
-
-using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-using UniformTexelEntry = VideoCommon::Shader::SamplerEntry;
-using SamplerEntry = VideoCommon::Shader::SamplerEntry;
-using StorageTexelEntry = VideoCommon::Shader::ImageEntry;
-using ImageEntry = VideoCommon::Shader::ImageEntry;
-
-constexpr u32 DESCRIPTOR_SET = 0;
-
-class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer {
-public:
- explicit constexpr ConstBufferEntry(const ConstBuffer& entry_, u32 index_)
- : ConstBuffer{entry_}, index{index_} {}
-
- constexpr u32 GetIndex() const {
- return index;
- }
-
-private:
- u32 index{};
-};
-
-struct GlobalBufferEntry {
- u32 cbuf_index{};
- u32 cbuf_offset{};
- bool is_written{};
-};
-
-struct ShaderEntries {
- u32 NumBindings() const {
- return static_cast<u32>(const_buffers.size() + global_buffers.size() +
- uniform_texels.size() + samplers.size() + storage_texels.size() +
- images.size());
- }
-
- std::vector<ConstBufferEntry> const_buffers;
- std::vector<GlobalBufferEntry> global_buffers;
- std::vector<UniformTexelEntry> uniform_texels;
- std::vector<SamplerEntry> samplers;
- std::vector<StorageTexelEntry> storage_texels;
- std::vector<ImageEntry> images;
- std::set<u32> attributes;
- std::array<bool, Maxwell::NumClipDistances> clip_distances{};
- std::size_t shader_length{};
- u32 enabled_uniform_buffers{};
- bool uses_warps{};
-};
-
-struct Specialization final {
- u32 base_binding{};
-
- // Compute specific
- std::array<u32, 3> workgroup_size{};
- u32 shared_memory_size{};
-
- // Graphics specific
- std::optional<float> point_size;
- std::bitset<Maxwell::NumVertexAttributes> enabled_attributes;
- std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{};
- bool ndc_minus_one_to_one{};
- bool early_fragment_tests{};
- float alpha_test_ref{};
- Maxwell::ComparisonOp alpha_test_func{};
-};
-// Old gcc versions don't consider this trivially copyable.
-// static_assert(std::is_trivially_copyable_v<Specialization>);
-
-struct SPIRVShader {
- std::vector<u32> code;
- ShaderEntries entries;
-};
-
-ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir);
-
-std::vector<u32> Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
- Tegra::Engines::ShaderType stage,
- const VideoCommon::Shader::Registry& registry,
- const Specialization& specialization);
-
-} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
index 0412b5234..555b12ed7 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
@@ -91,7 +91,7 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem
.flags = 0,
.size = STREAM_BUFFER_SIZE,
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
- VK_BUFFER_USAGE_INDEX_BUFFER_BIT,
+ VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
index 956f86845..e3b7dd61c 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
@@ -29,9 +29,10 @@ using Flags = Maxwell3D::DirtyState::Flags;
Flags MakeInvalidationFlags() {
static constexpr int INVALIDATION_FLAGS[]{
- Viewports, Scissors, DepthBias, BlendConstants, DepthBounds,
- StencilProperties, CullMode, DepthBoundsEnable, DepthTestEnable, DepthWriteEnable,
- DepthCompareOp, FrontFace, StencilOp, StencilTestEnable, VertexBuffers,
+ Viewports, Scissors, DepthBias, BlendConstants, DepthBounds,
+ StencilProperties, LineWidth, CullMode, DepthBoundsEnable, DepthTestEnable,
+ DepthWriteEnable, DepthCompareOp, FrontFace, StencilOp, StencilTestEnable,
+ VertexBuffers, VertexInput,
};
Flags flags{};
for (const int flag : INVALIDATION_FLAGS) {
@@ -40,6 +41,12 @@ Flags MakeInvalidationFlags() {
for (int index = VertexBuffer0; index <= VertexBuffer31; ++index) {
flags[index] = true;
}
+ for (int index = VertexAttribute0; index <= VertexAttribute31; ++index) {
+ flags[index] = true;
+ }
+ for (int index = VertexBinding0; index <= VertexBinding31; ++index) {
+ flags[index] = true;
+ }
return flags;
}
@@ -79,6 +86,11 @@ void SetupDirtyStencilProperties(Tables& tables) {
table[OFF(stencil_back_func_mask)] = StencilProperties;
}
+void SetupDirtyLineWidth(Tables& tables) {
+ tables[0][OFF(line_width_smooth)] = LineWidth;
+ tables[0][OFF(line_width_aliased)] = LineWidth;
+}
+
void SetupDirtyCullMode(Tables& tables) {
auto& table = tables[0];
table[OFF(cull_face)] = CullMode;
@@ -134,31 +146,38 @@ void SetupDirtyBlending(Tables& tables) {
FillBlock(tables[0], OFF(independent_blend), NUM(independent_blend), Blending);
}
-void SetupDirtyInstanceDivisors(Tables& tables) {
- static constexpr size_t divisor_offset = 3;
- for (size_t index = 0; index < Regs::NumVertexArrays; ++index) {
- tables[0][OFF(instanced_arrays) + index] = InstanceDivisors;
- tables[0][OFF(vertex_array) + index * NUM(vertex_array[0]) + divisor_offset] =
- InstanceDivisors;
+void SetupDirtyViewportSwizzles(Tables& tables) {
+ static constexpr size_t swizzle_offset = 6;
+ for (size_t index = 0; index < Regs::NumViewports; ++index) {
+ tables[0][OFF(viewport_transform) + index * NUM(viewport_transform[0]) + swizzle_offset] =
+ ViewportSwizzles;
}
}
void SetupDirtyVertexAttributes(Tables& tables) {
- FillBlock(tables[0], OFF(vertex_attrib_format), NUM(vertex_attrib_format), VertexAttributes);
+ for (size_t i = 0; i < Regs::NumVertexAttributes; ++i) {
+ const size_t offset = OFF(vertex_attrib_format) + i * NUM(vertex_attrib_format[0]);
+ FillBlock(tables[0], offset, NUM(vertex_attrib_format[0]), VertexAttribute0 + i);
+ }
+ FillBlock(tables[1], OFF(vertex_attrib_format), Regs::NumVertexAttributes, VertexInput);
}
-void SetupDirtyViewportSwizzles(Tables& tables) {
- static constexpr size_t swizzle_offset = 6;
- for (size_t index = 0; index < Regs::NumViewports; ++index) {
- tables[0][OFF(viewport_transform) + index * NUM(viewport_transform[0]) + swizzle_offset] =
- ViewportSwizzles;
+void SetupDirtyVertexBindings(Tables& tables) {
+ // Do NOT include stride here, it's implicit in VertexBuffer
+ static constexpr size_t divisor_offset = 3;
+ for (size_t i = 0; i < Regs::NumVertexArrays; ++i) {
+ const u8 flag = static_cast<u8>(VertexBinding0 + i);
+ tables[0][OFF(instanced_arrays) + i] = VertexInput;
+ tables[1][OFF(instanced_arrays) + i] = flag;
+ tables[0][OFF(vertex_array) + i * NUM(vertex_array[0]) + divisor_offset] = VertexInput;
+ tables[1][OFF(vertex_array) + i * NUM(vertex_array[0]) + divisor_offset] = flag;
}
}
} // Anonymous namespace
StateTracker::StateTracker(Tegra::GPU& gpu)
: flags{gpu.Maxwell3D().dirty.flags}, invalidation_flags{MakeInvalidationFlags()} {
- auto& tables = gpu.Maxwell3D().dirty.tables;
+ auto& tables{gpu.Maxwell3D().dirty.tables};
SetupDirtyFlags(tables);
SetupDirtyViewports(tables);
SetupDirtyScissors(tables);
@@ -166,6 +185,7 @@ StateTracker::StateTracker(Tegra::GPU& gpu)
SetupDirtyBlendConstants(tables);
SetupDirtyDepthBounds(tables);
SetupDirtyStencilProperties(tables);
+ SetupDirtyLineWidth(tables);
SetupDirtyCullMode(tables);
SetupDirtyDepthBoundsEnable(tables);
SetupDirtyDepthTestEnable(tables);
@@ -175,9 +195,9 @@ StateTracker::StateTracker(Tegra::GPU& gpu)
SetupDirtyStencilOp(tables);
SetupDirtyStencilTestEnable(tables);
SetupDirtyBlending(tables);
- SetupDirtyInstanceDivisors(tables);
- SetupDirtyVertexAttributes(tables);
SetupDirtyViewportSwizzles(tables);
+ SetupDirtyVertexAttributes(tables);
+ SetupDirtyVertexBindings(tables);
}
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h
index 84e918a71..5f78f6950 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.h
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.h
@@ -19,12 +19,19 @@ namespace Dirty {
enum : u8 {
First = VideoCommon::Dirty::LastCommonEntry,
+ VertexInput,
+ VertexAttribute0,
+ VertexAttribute31 = VertexAttribute0 + 31,
+ VertexBinding0,
+ VertexBinding31 = VertexBinding0 + 31,
+
Viewports,
Scissors,
DepthBias,
BlendConstants,
DepthBounds,
StencilProperties,
+ LineWidth,
CullMode,
DepthBoundsEnable,
@@ -36,11 +43,9 @@ enum : u8 {
StencilTestEnable,
Blending,
- InstanceDivisors,
- VertexAttributes,
ViewportSwizzles,
- Last
+ Last,
};
static_assert(Last <= std::numeric_limits<u8>::max());
@@ -89,6 +94,10 @@ public:
return Exchange(Dirty::StencilProperties, false);
}
+ bool TouchLineWidth() const {
+ return Exchange(Dirty::LineWidth, false);
+ }
+
bool TouchCullMode() {
return Exchange(Dirty::CullMode, false);
}
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp
index dfd5c65ba..d990eefba 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.cpp
+++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp
@@ -65,6 +65,9 @@ VKSwapchain::VKSwapchain(VkSurfaceKHR surface_, const Device& device_, VKSchedul
VKSwapchain::~VKSwapchain() = default;
void VKSwapchain::Create(u32 width, u32 height, bool srgb) {
+ is_outdated = false;
+ is_suboptimal = false;
+
const auto physical_device = device.GetPhysical();
const auto capabilities{physical_device.GetSurfaceCapabilitiesKHR(surface)};
if (capabilities.maxImageExtent.width == 0 || capabilities.maxImageExtent.height == 0) {
@@ -82,21 +85,31 @@ void VKSwapchain::Create(u32 width, u32 height, bool srgb) {
resource_ticks.resize(image_count);
}
-bool VKSwapchain::AcquireNextImage() {
- const VkResult result =
- device.GetLogical().AcquireNextImageKHR(*swapchain, std::numeric_limits<u64>::max(),
- *present_semaphores[frame_index], {}, &image_index);
-
+void VKSwapchain::AcquireNextImage() {
+ const VkResult result = device.GetLogical().AcquireNextImageKHR(
+ *swapchain, std::numeric_limits<u64>::max(), *present_semaphores[frame_index],
+ VK_NULL_HANDLE, &image_index);
+ switch (result) {
+ case VK_SUCCESS:
+ break;
+ case VK_SUBOPTIMAL_KHR:
+ is_suboptimal = true;
+ break;
+ case VK_ERROR_OUT_OF_DATE_KHR:
+ is_outdated = true;
+ break;
+ default:
+ LOG_ERROR(Render_Vulkan, "vkAcquireNextImageKHR returned {}", vk::ToString(result));
+ break;
+ }
scheduler.Wait(resource_ticks[image_index]);
- return result == VK_SUCCESS || result == VK_SUBOPTIMAL_KHR;
+ resource_ticks[image_index] = scheduler.CurrentTick();
}
-bool VKSwapchain::Present(VkSemaphore render_semaphore) {
+void VKSwapchain::Present(VkSemaphore render_semaphore) {
const VkSemaphore present_semaphore{*present_semaphores[frame_index]};
const std::array<VkSemaphore, 2> semaphores{present_semaphore, render_semaphore};
const auto present_queue{device.GetPresentQueue()};
- bool recreated = false;
-
const VkPresentInfoKHR present_info{
.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
.pNext = nullptr,
@@ -107,7 +120,6 @@ bool VKSwapchain::Present(VkSemaphore render_semaphore) {
.pImageIndices = &image_index,
.pResults = nullptr,
};
-
switch (const VkResult result = present_queue.Present(present_info)) {
case VK_SUCCESS:
break;
@@ -115,24 +127,16 @@ bool VKSwapchain::Present(VkSemaphore render_semaphore) {
LOG_DEBUG(Render_Vulkan, "Suboptimal swapchain");
break;
case VK_ERROR_OUT_OF_DATE_KHR:
- if (current_width > 0 && current_height > 0) {
- Create(current_width, current_height, current_srgb);
- recreated = true;
- }
+ is_outdated = true;
break;
default:
LOG_CRITICAL(Render_Vulkan, "Failed to present with error {}", vk::ToString(result));
break;
}
-
- resource_ticks[image_index] = scheduler.CurrentTick();
- frame_index = (frame_index + 1) % static_cast<u32>(image_count);
- return recreated;
-}
-
-bool VKSwapchain::HasFramebufferChanged(const Layout::FramebufferLayout& framebuffer) const {
- // TODO(Rodrigo): Handle framebuffer pixel format changes
- return framebuffer.width != current_width || framebuffer.height != current_height;
+ ++frame_index;
+ if (frame_index >= image_count) {
+ frame_index = 0;
+ }
}
void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u32 width,
@@ -148,7 +152,6 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities,
if (capabilities.maxImageCount > 0 && requested_image_count > capabilities.maxImageCount) {
requested_image_count = capabilities.maxImageCount;
}
-
VkSwapchainCreateInfoKHR swapchain_ci{
.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR,
.pNext = nullptr,
@@ -169,7 +172,6 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities,
.clipped = VK_FALSE,
.oldSwapchain = nullptr,
};
-
const u32 graphics_family{device.GetGraphicsFamily()};
const u32 present_family{device.GetPresentFamily()};
const std::array<u32, 2> queue_indices{graphics_family, present_family};
@@ -178,7 +180,6 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities,
swapchain_ci.queueFamilyIndexCount = static_cast<u32>(queue_indices.size());
swapchain_ci.pQueueFamilyIndices = queue_indices.data();
}
-
// Request the size again to reduce the possibility of a TOCTOU race condition.
const auto updated_capabilities = physical_device.GetSurfaceCapabilitiesKHR(surface);
swapchain_ci.imageExtent = ChooseSwapExtent(updated_capabilities, width, height);
@@ -186,8 +187,6 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities,
swapchain = device.GetLogical().CreateSwapchainKHR(swapchain_ci);
extent = swapchain_ci.imageExtent;
- current_width = extent.width;
- current_height = extent.height;
current_srgb = srgb;
images = swapchain.GetImages();
@@ -197,8 +196,8 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities,
void VKSwapchain::CreateSemaphores() {
present_semaphores.resize(image_count);
- std::generate(present_semaphores.begin(), present_semaphores.end(),
- [this] { return device.GetLogical().CreateSemaphore(); });
+ std::ranges::generate(present_semaphores,
+ [this] { return device.GetLogical().CreateSemaphore(); });
}
void VKSwapchain::CreateImageViews() {
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h
index adc8d27cf..35c2cdc14 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.h
+++ b/src/video_core/renderer_vulkan/vk_swapchain.h
@@ -28,14 +28,25 @@ public:
void Create(u32 width, u32 height, bool srgb);
/// Acquires the next image in the swapchain, waits as needed.
- bool AcquireNextImage();
+ void AcquireNextImage();
- /// Presents the rendered image to the swapchain. Returns true when the swapchains had to be
- /// recreated. Takes responsability for the ownership of fence.
- bool Present(VkSemaphore render_semaphore);
+ /// Presents the rendered image to the swapchain.
+ void Present(VkSemaphore render_semaphore);
- /// Returns true when the framebuffer layout has changed.
- bool HasFramebufferChanged(const Layout::FramebufferLayout& framebuffer) const;
+ /// Returns true when the color space has changed.
+ bool HasColorSpaceChanged(bool is_srgb) const {
+ return current_srgb != is_srgb;
+ }
+
+ /// Returns true when the swapchain is outdated.
+ bool IsOutDated() const {
+ return is_outdated;
+ }
+
+ /// Returns true when the swapchain is suboptimal.
+ bool IsSubOptimal() const {
+ return is_suboptimal;
+ }
VkExtent2D GetSize() const {
return extent;
@@ -61,10 +72,6 @@ public:
return image_format;
}
- bool GetSrgbState() const {
- return current_srgb;
- }
-
private:
void CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, u32 height,
bool srgb);
@@ -92,9 +99,9 @@ private:
VkFormat image_format{};
VkExtent2D extent{};
- u32 current_width{};
- u32 current_height{};
bool current_srgb{};
+ bool is_outdated{};
+ bool is_suboptimal{};
};
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index a2ab4d1ee..8e029bcb3 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -15,6 +15,7 @@
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
#include "video_core/renderer_vulkan/vk_compute_pass.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h"
+#include "video_core/renderer_vulkan/vk_render_pass_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
@@ -34,19 +35,6 @@ using VideoCommon::SubresourceRange;
using VideoCore::Surface::IsPixelFormatASTC;
namespace {
-
-constexpr std::array ATTACHMENT_REFERENCES{
- VkAttachmentReference{0, VK_IMAGE_LAYOUT_GENERAL},
- VkAttachmentReference{1, VK_IMAGE_LAYOUT_GENERAL},
- VkAttachmentReference{2, VK_IMAGE_LAYOUT_GENERAL},
- VkAttachmentReference{3, VK_IMAGE_LAYOUT_GENERAL},
- VkAttachmentReference{4, VK_IMAGE_LAYOUT_GENERAL},
- VkAttachmentReference{5, VK_IMAGE_LAYOUT_GENERAL},
- VkAttachmentReference{6, VK_IMAGE_LAYOUT_GENERAL},
- VkAttachmentReference{7, VK_IMAGE_LAYOUT_GENERAL},
- VkAttachmentReference{8, VK_IMAGE_LAYOUT_GENERAL},
-};
-
constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
if (color == std::array<float, 4>{0, 0, 0, 0}) {
return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
@@ -174,25 +162,6 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
return device.GetLogical().CreateImage(MakeImageCreateInfo(device, info));
}
-[[nodiscard]] vk::Buffer MakeBuffer(const Device& device, const ImageInfo& info) {
- if (info.type != ImageType::Buffer) {
- return vk::Buffer{};
- }
- const size_t bytes_per_block = VideoCore::Surface::BytesPerBlock(info.format);
- return device.GetLogical().CreateBuffer(VkBufferCreateInfo{
- .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
- .pNext = nullptr,
- .flags = 0,
- .size = info.size.width * bytes_per_block,
- .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
- VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
- VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT,
- .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
- .queueFamilyIndexCount = 0,
- .pQueueFamilyIndices = nullptr,
- });
-}
-
[[nodiscard]] VkImageAspectFlags ImageAspectMask(PixelFormat format) {
switch (VideoCore::Surface::GetFormatType(format)) {
case VideoCore::Surface::SurfaceType::ColorTexture:
@@ -226,23 +195,6 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
}
}
-[[nodiscard]] VkAttachmentDescription AttachmentDescription(const Device& device,
- const ImageView* image_view) {
- using MaxwellToVK::SurfaceFormat;
- const PixelFormat pixel_format = image_view->format;
- return VkAttachmentDescription{
- .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT,
- .format = SurfaceFormat(device, FormatType::Optimal, true, pixel_format).format,
- .samples = image_view->Samples(),
- .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
- .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
- .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
- .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE,
- .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
- .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
- };
-}
-
[[nodiscard]] VkComponentSwizzle ComponentSwizzle(SwizzleSource swizzle) {
switch (swizzle) {
case SwizzleSource::Zero:
@@ -263,6 +215,30 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
return VK_COMPONENT_SWIZZLE_ZERO;
}
+[[nodiscard]] VkImageViewType ImageViewType(Shader::TextureType type) {
+ switch (type) {
+ case Shader::TextureType::Color1D:
+ return VK_IMAGE_VIEW_TYPE_1D;
+ case Shader::TextureType::Color2D:
+ return VK_IMAGE_VIEW_TYPE_2D;
+ case Shader::TextureType::ColorCube:
+ return VK_IMAGE_VIEW_TYPE_CUBE;
+ case Shader::TextureType::Color3D:
+ return VK_IMAGE_VIEW_TYPE_3D;
+ case Shader::TextureType::ColorArray1D:
+ return VK_IMAGE_VIEW_TYPE_1D_ARRAY;
+ case Shader::TextureType::ColorArray2D:
+ return VK_IMAGE_VIEW_TYPE_2D_ARRAY;
+ case Shader::TextureType::ColorArrayCube:
+ return VK_IMAGE_VIEW_TYPE_CUBE_ARRAY;
+ case Shader::TextureType::Buffer:
+ UNREACHABLE_MSG("Texture buffers can't be image views");
+ return VK_IMAGE_VIEW_TYPE_1D;
+ }
+ UNREACHABLE_MSG("Invalid image view type={}", type);
+ return VK_IMAGE_VIEW_TYPE_2D;
+}
+
[[nodiscard]] VkImageViewType ImageViewType(VideoCommon::ImageViewType type) {
switch (type) {
case VideoCommon::ImageViewType::e1D:
@@ -280,7 +256,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
case VideoCommon::ImageViewType::CubeArray:
return VK_IMAGE_VIEW_TYPE_CUBE_ARRAY;
case VideoCommon::ImageViewType::Rect:
- LOG_WARNING(Render_Vulkan, "Unnormalized image view type not supported");
+ UNIMPLEMENTED_MSG("Rect image view");
return VK_IMAGE_VIEW_TYPE_2D;
case VideoCommon::ImageViewType::Buffer:
UNREACHABLE_MSG("Texture buffers can't be image views");
@@ -327,7 +303,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
};
}
-[[nodiscard]] std::vector<VkBufferCopy> TransformBufferCopies(
+[[maybe_unused]] [[nodiscard]] std::vector<VkBufferCopy> TransformBufferCopies(
std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) {
std::vector<VkBufferCopy> result(copies.size());
std::ranges::transform(
@@ -587,6 +563,28 @@ struct RangedBarrierRange {
}
};
+[[nodiscard]] VkFormat Format(Shader::ImageFormat format) {
+ switch (format) {
+ case Shader::ImageFormat::Typeless:
+ break;
+ case Shader::ImageFormat::R8_SINT:
+ return VK_FORMAT_R8_SINT;
+ case Shader::ImageFormat::R8_UINT:
+ return VK_FORMAT_R8_UINT;
+ case Shader::ImageFormat::R16_UINT:
+ return VK_FORMAT_R16_UINT;
+ case Shader::ImageFormat::R16_SINT:
+ return VK_FORMAT_R16_SINT;
+ case Shader::ImageFormat::R32_UINT:
+ return VK_FORMAT_R32_UINT;
+ case Shader::ImageFormat::R32G32_UINT:
+ return VK_FORMAT_R32G32_UINT;
+ case Shader::ImageFormat::R32G32B32A32_UINT:
+ return VK_FORMAT_R32G32B32A32_UINT;
+ }
+ UNREACHABLE_MSG("Invalid image format={}", format);
+ return VK_FORMAT_R32_UINT;
+}
} // Anonymous namespace
void TextureCacheRuntime::Finish() {
@@ -608,7 +606,10 @@ void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst
const VkImageAspectFlags aspect_mask = ImageAspectMask(src.format);
const bool is_dst_msaa = dst.Samples() != VK_SAMPLE_COUNT_1_BIT;
const bool is_src_msaa = src.Samples() != VK_SAMPLE_COUNT_1_BIT;
- ASSERT(aspect_mask == ImageAspectMask(dst.format));
+ if (aspect_mask != ImageAspectMask(dst.format)) {
+ UNIMPLEMENTED_MSG("Incompatible blit from format {} to {}", src.format, dst.format);
+ return;
+ }
if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT && !is_src_msaa && !is_dst_msaa) {
blit_image_helper.BlitColor(dst_framebuffer, src, dst_region, src_region, filter,
operation);
@@ -622,7 +623,7 @@ void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst
return;
}
}
- ASSERT(src.ImageFormat() == dst.ImageFormat());
+ ASSERT(src.format == dst.format);
ASSERT(!(is_dst_msaa && !is_src_msaa));
ASSERT(operation == Fermi2D::Operation::SrcCopy);
@@ -762,7 +763,7 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
dst_range.AddLayers(copy.dstSubresource);
src_range.AddLayers(copy.srcSubresource);
}
- const std::array read_barriers{
+ const std::array pre_barriers{
VkImageMemoryBarrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
@@ -771,7 +772,7 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
- .newLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = src_image,
@@ -792,29 +793,43 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
.subresourceRange = dst_range.SubresourceRange(aspect_mask),
},
};
- const VkImageMemoryBarrier write_barrier{
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .pNext = nullptr,
- .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
- .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT |
- VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
- VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
- VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
- VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
- VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
- .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
- .newLayout = VK_IMAGE_LAYOUT_GENERAL,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = dst_image,
- .subresourceRange = dst_range.SubresourceRange(aspect_mask),
+ const std::array post_barriers{
+ VkImageMemoryBarrier{
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = 0,
+ .dstAccessMask = 0,
+ .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+ .newLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = src_image,
+ .subresourceRange = src_range.SubresourceRange(aspect_mask),
+ },
+ VkImageMemoryBarrier{
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+ .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT |
+ VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
+ VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
+ VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
+ VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
+ VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
+ .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+ .newLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = dst_image,
+ .subresourceRange = dst_range.SubresourceRange(aspect_mask),
+ },
};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
- 0, {}, {}, read_barriers);
- cmdbuf.CopyImage(src_image, VK_IMAGE_LAYOUT_GENERAL, dst_image,
+ 0, {}, {}, pre_barriers);
+ cmdbuf.CopyImage(src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_image,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, vk_copies);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
- 0, write_barrier);
+ 0, {}, {}, post_barriers);
});
}
@@ -825,13 +840,9 @@ u64 TextureCacheRuntime::GetDeviceLocalMemory() const {
Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_addr_,
VAddr cpu_addr_)
: VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime.scheduler},
- image(MakeImage(runtime.device, info)), buffer(MakeBuffer(runtime.device, info)),
+ image(MakeImage(runtime.device, info)),
+ commit(runtime.memory_allocator.Commit(image, MemoryUsage::DeviceLocal)),
aspect_mask(ImageAspectMask(info.format)) {
- if (image) {
- commit = runtime.memory_allocator.Commit(image, MemoryUsage::DeviceLocal);
- } else {
- commit = runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal);
- }
if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) {
if (Settings::values.accelerate_astc.GetValue()) {
flags |= VideoCommon::ImageFlagBits::AcceleratedUpload;
@@ -840,11 +851,7 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_
}
}
if (runtime.device.HasDebuggingToolAttached()) {
- if (image) {
- image.SetObjectNameEXT(VideoCommon::Name(*this).c_str());
- } else {
- buffer.SetObjectNameEXT(VideoCommon::Name(*this).c_str());
- }
+ image.SetObjectNameEXT(VideoCommon::Name(*this).c_str());
}
static constexpr VkImageViewUsageCreateInfo storage_image_view_usage_create_info{
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO,
@@ -896,21 +903,9 @@ void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImag
});
}
-void Image::UploadMemory(const StagingBufferRef& map,
- std::span<const VideoCommon::BufferCopy> copies) {
- // TODO: Move this to another API
- scheduler->RequestOutsideRenderPassOperationContext();
- std::vector vk_copies = TransformBufferCopies(copies, map.offset);
- const VkBuffer src_buffer = map.buffer;
- const VkBuffer dst_buffer = *buffer;
- scheduler->Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) {
- // TODO: Barriers
- cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies);
- });
-}
-
void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) {
std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask);
+ scheduler->RequestOutsideRenderPassOperationContext();
scheduler->Record([buffer = map.buffer, image = *image, aspect_mask = aspect_mask,
vk_copies](vk::CommandBuffer cmdbuf) {
const VkImageMemoryBarrier read_barrier{
@@ -966,8 +961,9 @@ void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferIm
ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info,
ImageId image_id_, Image& image)
: VideoCommon::ImageViewBase{info, image.info, image_id_}, device{&runtime.device},
- image_handle{image.Handle()}, image_format{image.info.format}, samples{ConvertSampleCount(
- image.info.num_samples)} {
+ image_handle{image.Handle()}, samples{ConvertSampleCount(image.info.num_samples)} {
+ using Shader::TextureType;
+
const VkImageAspectFlags aspect_mask = ImageViewAspectMask(info);
std::array<SwizzleSource, 4> swizzle{
SwizzleSource::R,
@@ -1005,57 +1001,54 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
},
.subresourceRange = MakeSubresourceRange(aspect_mask, info.range),
};
- const auto create = [&](VideoCommon::ImageViewType view_type, std::optional<u32> num_layers) {
+ const auto create = [&](TextureType tex_type, std::optional<u32> num_layers) {
VkImageViewCreateInfo ci{create_info};
- ci.viewType = ImageViewType(view_type);
+ ci.viewType = ImageViewType(tex_type);
if (num_layers) {
ci.subresourceRange.layerCount = *num_layers;
}
vk::ImageView handle = device->GetLogical().CreateImageView(ci);
if (device->HasDebuggingToolAttached()) {
- handle.SetObjectNameEXT(VideoCommon::Name(*this, view_type).c_str());
+ handle.SetObjectNameEXT(VideoCommon::Name(*this).c_str());
}
- image_views[static_cast<size_t>(view_type)] = std::move(handle);
+ image_views[static_cast<size_t>(tex_type)] = std::move(handle);
};
switch (info.type) {
case VideoCommon::ImageViewType::e1D:
case VideoCommon::ImageViewType::e1DArray:
- create(VideoCommon::ImageViewType::e1D, 1);
- create(VideoCommon::ImageViewType::e1DArray, std::nullopt);
- render_target = Handle(VideoCommon::ImageViewType::e1DArray);
+ create(TextureType::Color1D, 1);
+ create(TextureType::ColorArray1D, std::nullopt);
+ render_target = Handle(TextureType::ColorArray1D);
break;
case VideoCommon::ImageViewType::e2D:
case VideoCommon::ImageViewType::e2DArray:
- create(VideoCommon::ImageViewType::e2D, 1);
- create(VideoCommon::ImageViewType::e2DArray, std::nullopt);
- render_target = Handle(VideoCommon::ImageViewType::e2DArray);
+ create(TextureType::Color2D, 1);
+ create(TextureType::ColorArray2D, std::nullopt);
+ render_target = Handle(Shader::TextureType::ColorArray2D);
break;
case VideoCommon::ImageViewType::e3D:
- create(VideoCommon::ImageViewType::e3D, std::nullopt);
- render_target = Handle(VideoCommon::ImageViewType::e3D);
+ create(TextureType::Color3D, std::nullopt);
+ render_target = Handle(Shader::TextureType::Color3D);
break;
case VideoCommon::ImageViewType::Cube:
case VideoCommon::ImageViewType::CubeArray:
- create(VideoCommon::ImageViewType::Cube, 6);
- create(VideoCommon::ImageViewType::CubeArray, std::nullopt);
+ create(TextureType::ColorCube, 6);
+ create(TextureType::ColorArrayCube, std::nullopt);
break;
case VideoCommon::ImageViewType::Rect:
UNIMPLEMENTED();
break;
case VideoCommon::ImageViewType::Buffer:
- buffer_view = device->GetLogical().CreateBufferView(VkBufferViewCreateInfo{
- .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
- .pNext = nullptr,
- .flags = 0,
- .buffer = image.Buffer(),
- .format = format_info.format,
- .offset = 0, // TODO: Redesign buffer cache to support this
- .range = image.guest_size_bytes,
- });
+ UNREACHABLE();
break;
}
}
+ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
+ const VideoCommon::ImageViewInfo& view_info, GPUVAddr gpu_addr_)
+ : VideoCommon::ImageViewBase{info, view_info}, gpu_addr{gpu_addr_},
+ buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {}
+
ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams& params)
: VideoCommon::ImageViewBase{params} {}
@@ -1063,7 +1056,8 @@ VkImageView ImageView::DepthView() {
if (depth_view) {
return *depth_view;
}
- depth_view = MakeDepthStencilView(VK_IMAGE_ASPECT_DEPTH_BIT);
+ const auto& info = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format);
+ depth_view = MakeView(info.format, VK_IMAGE_ASPECT_DEPTH_BIT);
return *depth_view;
}
@@ -1071,18 +1065,38 @@ VkImageView ImageView::StencilView() {
if (stencil_view) {
return *stencil_view;
}
- stencil_view = MakeDepthStencilView(VK_IMAGE_ASPECT_STENCIL_BIT);
+ const auto& info = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format);
+ stencil_view = MakeView(info.format, VK_IMAGE_ASPECT_STENCIL_BIT);
return *stencil_view;
}
-vk::ImageView ImageView::MakeDepthStencilView(VkImageAspectFlags aspect_mask) {
+VkImageView ImageView::StorageView(Shader::TextureType texture_type,
+ Shader::ImageFormat image_format) {
+ if (image_format == Shader::ImageFormat::Typeless) {
+ return Handle(texture_type);
+ }
+ const bool is_signed{image_format == Shader::ImageFormat::R8_SINT ||
+ image_format == Shader::ImageFormat::R16_SINT};
+ if (!storage_views) {
+ storage_views = std::make_unique<StorageViews>();
+ }
+ auto& views{is_signed ? storage_views->signeds : storage_views->unsigneds};
+ auto& view{views[static_cast<size_t>(texture_type)]};
+ if (view) {
+ return *view;
+ }
+ view = MakeView(Format(image_format), VK_IMAGE_ASPECT_COLOR_BIT);
+ return *view;
+}
+
+vk::ImageView ImageView::MakeView(VkFormat vk_format, VkImageAspectFlags aspect_mask) {
return device->GetLogical().CreateImageView({
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.image = image_handle,
.viewType = ImageViewType(type),
- .format = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format).format,
+ .format = vk_format,
.components{
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
@@ -1146,7 +1160,6 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& t
Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers,
ImageView* depth_buffer, const VideoCommon::RenderTargets& key) {
- std::vector<VkAttachmentDescription> descriptions;
std::vector<VkImageView> attachments;
RenderPassKey renderpass_key{};
s32 num_layers = 1;
@@ -1157,7 +1170,6 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
renderpass_key.color_formats[index] = PixelFormat::Invalid;
continue;
}
- descriptions.push_back(AttachmentDescription(runtime.device, color_buffer));
attachments.push_back(color_buffer->RenderTarget());
renderpass_key.color_formats[index] = color_buffer->format;
num_layers = std::max(num_layers, color_buffer->range.extent.layers);
@@ -1167,10 +1179,7 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
++num_images;
}
const size_t num_colors = attachments.size();
- const VkAttachmentReference* depth_attachment =
- depth_buffer ? &ATTACHMENT_REFERENCES[num_colors] : nullptr;
if (depth_buffer) {
- descriptions.push_back(AttachmentDescription(runtime.device, depth_buffer));
attachments.push_back(depth_buffer->RenderTarget());
renderpass_key.depth_format = depth_buffer->format;
num_layers = std::max(num_layers, depth_buffer->range.extent.layers);
@@ -1183,40 +1192,14 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
}
renderpass_key.samples = samples;
- const auto& device = runtime.device.GetLogical();
- const auto [cache_pair, is_new] = runtime.renderpass_cache.try_emplace(renderpass_key);
- if (is_new) {
- const VkSubpassDescription subpass{
- .flags = 0,
- .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
- .inputAttachmentCount = 0,
- .pInputAttachments = nullptr,
- .colorAttachmentCount = static_cast<u32>(num_colors),
- .pColorAttachments = num_colors != 0 ? ATTACHMENT_REFERENCES.data() : nullptr,
- .pResolveAttachments = nullptr,
- .pDepthStencilAttachment = depth_attachment,
- .preserveAttachmentCount = 0,
- .pPreserveAttachments = nullptr,
- };
- cache_pair->second = device.CreateRenderPass(VkRenderPassCreateInfo{
- .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
- .pNext = nullptr,
- .flags = 0,
- .attachmentCount = static_cast<u32>(descriptions.size()),
- .pAttachments = descriptions.data(),
- .subpassCount = 1,
- .pSubpasses = &subpass,
- .dependencyCount = 0,
- .pDependencies = nullptr,
- });
- }
- renderpass = *cache_pair->second;
+ renderpass = runtime.render_pass_cache.Get(renderpass_key);
+
render_area = VkExtent2D{
.width = key.size.width,
.height = key.size.height,
};
num_color_buffers = static_cast<u32>(num_colors);
- framebuffer = device.CreateFramebuffer(VkFramebufferCreateInfo{
+ framebuffer = runtime.device.GetLogical().CreateFramebuffer({
.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index 172bcdf98..0b73d55f8 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -7,6 +7,7 @@
#include <compare>
#include <span>
+#include "shader_recompiler/shader_info.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/texture_cache/texture_cache.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
@@ -26,35 +27,10 @@ class Device;
class Image;
class ImageView;
class Framebuffer;
+class RenderPassCache;
class StagingBufferPool;
class VKScheduler;
-struct RenderPassKey {
- constexpr auto operator<=>(const RenderPassKey&) const noexcept = default;
-
- std::array<PixelFormat, NUM_RT> color_formats;
- PixelFormat depth_format;
- VkSampleCountFlagBits samples;
-};
-
-} // namespace Vulkan
-
-namespace std {
-template <>
-struct hash<Vulkan::RenderPassKey> {
- [[nodiscard]] constexpr size_t operator()(const Vulkan::RenderPassKey& key) const noexcept {
- size_t value = static_cast<size_t>(key.depth_format) << 48;
- value ^= static_cast<size_t>(key.samples) << 52;
- for (size_t i = 0; i < key.color_formats.size(); ++i) {
- value ^= static_cast<size_t>(key.color_formats[i]) << (i * 6);
- }
- return value;
- }
-};
-} // namespace std
-
-namespace Vulkan {
-
struct TextureCacheRuntime {
const Device& device;
VKScheduler& scheduler;
@@ -62,13 +38,13 @@ struct TextureCacheRuntime {
StagingBufferPool& staging_buffer_pool;
BlitImageHelper& blit_image_helper;
ASTCDecoderPass& astc_decoder_pass;
- std::unordered_map<RenderPassKey, vk::RenderPass> renderpass_cache{};
+ RenderPassCache& render_pass_cache;
void Finish();
- [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size);
+ StagingBufferRef UploadStagingBuffer(size_t size);
- [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size);
+ StagingBufferRef DownloadStagingBuffer(size_t size);
void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
const Region2D& dst_region, const Region2D& src_region,
@@ -79,7 +55,7 @@ struct TextureCacheRuntime {
void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view);
- [[nodiscard]] bool CanAccelerateImageUpload(Image&) const noexcept {
+ bool CanAccelerateImageUpload(Image&) const noexcept {
return false;
}
@@ -117,8 +93,6 @@ public:
void UploadMemory(const StagingBufferRef& map,
std::span<const VideoCommon::BufferImageCopy> copies);
- void UploadMemory(const StagingBufferRef& map, std::span<const VideoCommon::BufferCopy> copies);
-
void DownloadMemory(const StagingBufferRef& map,
std::span<const VideoCommon::BufferImageCopy> copies);
@@ -126,10 +100,6 @@ public:
return *image;
}
- [[nodiscard]] VkBuffer Buffer() const noexcept {
- return *buffer;
- }
-
[[nodiscard]] VkImageAspectFlags AspectMask() const noexcept {
return aspect_mask;
}
@@ -146,7 +116,6 @@ public:
private:
VKScheduler* scheduler;
vk::Image image;
- vk::Buffer buffer;
MemoryCommit commit;
vk::ImageView image_view;
std::vector<vk::ImageView> storage_image_views;
@@ -157,18 +126,19 @@ private:
class ImageView : public VideoCommon::ImageViewBase {
public:
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&);
+ explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo&,
+ const VideoCommon::ImageViewInfo&, GPUVAddr);
explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&);
[[nodiscard]] VkImageView DepthView();
[[nodiscard]] VkImageView StencilView();
- [[nodiscard]] VkImageView Handle(VideoCommon::ImageViewType query_type) const noexcept {
- return *image_views[static_cast<size_t>(query_type)];
- }
+ [[nodiscard]] VkImageView StorageView(Shader::TextureType texture_type,
+ Shader::ImageFormat image_format);
- [[nodiscard]] VkBufferView BufferView() const noexcept {
- return *buffer_view;
+ [[nodiscard]] VkImageView Handle(Shader::TextureType texture_type) const noexcept {
+ return *image_views[static_cast<size_t>(texture_type)];
}
[[nodiscard]] VkImage ImageHandle() const noexcept {
@@ -179,26 +149,36 @@ public:
return render_target;
}
- [[nodiscard]] PixelFormat ImageFormat() const noexcept {
- return image_format;
- }
-
[[nodiscard]] VkSampleCountFlagBits Samples() const noexcept {
return samples;
}
+ [[nodiscard]] GPUVAddr GpuAddr() const noexcept {
+ return gpu_addr;
+ }
+
+ [[nodiscard]] u32 BufferSize() const noexcept {
+ return buffer_size;
+ }
+
private:
- [[nodiscard]] vk::ImageView MakeDepthStencilView(VkImageAspectFlags aspect_mask);
+ struct StorageViews {
+ std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> signeds;
+ std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> unsigneds;
+ };
+
+ [[nodiscard]] vk::ImageView MakeView(VkFormat vk_format, VkImageAspectFlags aspect_mask);
const Device* device = nullptr;
- std::array<vk::ImageView, VideoCommon::NUM_IMAGE_VIEW_TYPES> image_views;
+ std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> image_views;
+ std::unique_ptr<StorageViews> storage_views;
vk::ImageView depth_view;
vk::ImageView stencil_view;
- vk::BufferView buffer_view;
VkImage image_handle = VK_NULL_HANDLE;
VkImageView render_target = VK_NULL_HANDLE;
- PixelFormat image_format = PixelFormat::Invalid;
VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT;
+ GPUVAddr gpu_addr = 0;
+ u32 buffer_size = 0;
};
class ImageAlloc : public VideoCommon::ImageAllocBase {};
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
index dc45fdcb1..0df3a7fe9 100644
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
@@ -15,7 +15,9 @@
namespace Vulkan {
VKUpdateDescriptorQueue::VKUpdateDescriptorQueue(const Device& device_, VKScheduler& scheduler_)
- : device{device_}, scheduler{scheduler_} {}
+ : device{device_}, scheduler{scheduler_} {
+ payload_cursor = payload.data();
+}
VKUpdateDescriptorQueue::~VKUpdateDescriptorQueue() = default;
@@ -36,13 +38,4 @@ void VKUpdateDescriptorQueue::Acquire() {
upload_start = payload_cursor;
}
-void VKUpdateDescriptorQueue::Send(VkDescriptorUpdateTemplateKHR update_template,
- VkDescriptorSet set) {
- const void* const data = upload_start;
- const vk::Device* const logical = &device.GetLogical();
- scheduler.Record([data, logical, set, update_template](vk::CommandBuffer) {
- logical->UpdateDescriptorSet(set, update_template, data);
- });
-}
-
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h
index d35e77c44..d7de4c490 100644
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.h
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h
@@ -39,7 +39,9 @@ public:
void Acquire();
- void Send(VkDescriptorUpdateTemplateKHR update_template, VkDescriptorSet set);
+ const DescriptorUpdateEntry* UpdateData() const noexcept {
+ return upload_start;
+ }
void AddSampledImage(VkImageView image_view, VkSampler sampler) {
*(payload_cursor++) = VkDescriptorImageInfo{
diff --git a/src/video_core/shader/ast.cpp b/src/video_core/shader/ast.cpp
deleted file mode 100644
index db11144c7..000000000
--- a/src/video_core/shader/ast.cpp
+++ /dev/null
@@ -1,752 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <string>
-#include <string_view>
-
-#include <fmt/format.h>
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/shader/ast.h"
-#include "video_core/shader/expr.h"
-
-namespace VideoCommon::Shader {
-
-ASTZipper::ASTZipper() = default;
-
-void ASTZipper::Init(const ASTNode new_first, const ASTNode parent) {
- ASSERT(new_first->manager == nullptr);
- first = new_first;
- last = new_first;
-
- ASTNode current = first;
- while (current) {
- current->manager = this;
- current->parent = parent;
- last = current;
- current = current->next;
- }
-}
-
-void ASTZipper::PushBack(const ASTNode new_node) {
- ASSERT(new_node->manager == nullptr);
- new_node->previous = last;
- if (last) {
- last->next = new_node;
- }
- new_node->next.reset();
- last = new_node;
- if (!first) {
- first = new_node;
- }
- new_node->manager = this;
-}
-
-void ASTZipper::PushFront(const ASTNode new_node) {
- ASSERT(new_node->manager == nullptr);
- new_node->previous.reset();
- new_node->next = first;
- if (first) {
- first->previous = new_node;
- }
- if (last == first) {
- last = new_node;
- }
- first = new_node;
- new_node->manager = this;
-}
-
-void ASTZipper::InsertAfter(const ASTNode new_node, const ASTNode at_node) {
- ASSERT(new_node->manager == nullptr);
- if (!at_node) {
- PushFront(new_node);
- return;
- }
- const ASTNode next = at_node->next;
- if (next) {
- next->previous = new_node;
- }
- new_node->previous = at_node;
- if (at_node == last) {
- last = new_node;
- }
- new_node->next = next;
- at_node->next = new_node;
- new_node->manager = this;
-}
-
-void ASTZipper::InsertBefore(const ASTNode new_node, const ASTNode at_node) {
- ASSERT(new_node->manager == nullptr);
- if (!at_node) {
- PushBack(new_node);
- return;
- }
- const ASTNode previous = at_node->previous;
- if (previous) {
- previous->next = new_node;
- }
- new_node->next = at_node;
- if (at_node == first) {
- first = new_node;
- }
- new_node->previous = previous;
- at_node->previous = new_node;
- new_node->manager = this;
-}
-
-void ASTZipper::DetachTail(ASTNode node) {
- ASSERT(node->manager == this);
- if (node == first) {
- first.reset();
- last.reset();
- return;
- }
-
- last = node->previous;
- last->next.reset();
- node->previous.reset();
-
- ASTNode current = std::move(node);
- while (current) {
- current->manager = nullptr;
- current->parent.reset();
- current = current->next;
- }
-}
-
-void ASTZipper::DetachSegment(const ASTNode start, const ASTNode end) {
- ASSERT(start->manager == this && end->manager == this);
- if (start == end) {
- DetachSingle(start);
- return;
- }
- const ASTNode prev = start->previous;
- const ASTNode post = end->next;
- if (!prev) {
- first = post;
- } else {
- prev->next = post;
- }
- if (!post) {
- last = prev;
- } else {
- post->previous = prev;
- }
- start->previous.reset();
- end->next.reset();
- ASTNode current = start;
- bool found = false;
- while (current) {
- current->manager = nullptr;
- current->parent.reset();
- found |= current == end;
- current = current->next;
- }
- ASSERT(found);
-}
-
-void ASTZipper::DetachSingle(const ASTNode node) {
- ASSERT(node->manager == this);
- const ASTNode prev = node->previous;
- const ASTNode post = node->next;
- node->previous.reset();
- node->next.reset();
- if (!prev) {
- first = post;
- } else {
- prev->next = post;
- }
- if (!post) {
- last = prev;
- } else {
- post->previous = prev;
- }
-
- node->manager = nullptr;
- node->parent.reset();
-}
-
-void ASTZipper::Remove(const ASTNode node) {
- ASSERT(node->manager == this);
- const ASTNode next = node->next;
- const ASTNode previous = node->previous;
- if (previous) {
- previous->next = next;
- }
- if (next) {
- next->previous = previous;
- }
- node->parent.reset();
- node->manager = nullptr;
- if (node == last) {
- last = previous;
- }
- if (node == first) {
- first = next;
- }
-}
-
-class ExprPrinter final {
-public:
- void operator()(const ExprAnd& expr) {
- inner += "( ";
- std::visit(*this, *expr.operand1);
- inner += " && ";
- std::visit(*this, *expr.operand2);
- inner += ')';
- }
-
- void operator()(const ExprOr& expr) {
- inner += "( ";
- std::visit(*this, *expr.operand1);
- inner += " || ";
- std::visit(*this, *expr.operand2);
- inner += ')';
- }
-
- void operator()(const ExprNot& expr) {
- inner += "!";
- std::visit(*this, *expr.operand1);
- }
-
- void operator()(const ExprPredicate& expr) {
- inner += fmt::format("P{}", expr.predicate);
- }
-
- void operator()(const ExprCondCode& expr) {
- inner += fmt::format("CC{}", expr.cc);
- }
-
- void operator()(const ExprVar& expr) {
- inner += fmt::format("V{}", expr.var_index);
- }
-
- void operator()(const ExprBoolean& expr) {
- inner += expr.value ? "true" : "false";
- }
-
- void operator()(const ExprGprEqual& expr) {
- inner += fmt::format("(gpr_{} == {})", expr.gpr, expr.value);
- }
-
- const std::string& GetResult() const {
- return inner;
- }
-
-private:
- std::string inner;
-};
-
-class ASTPrinter {
-public:
- void operator()(const ASTProgram& ast) {
- scope++;
- inner += "program {\n";
- ASTNode current = ast.nodes.GetFirst();
- while (current) {
- Visit(current);
- current = current->GetNext();
- }
- inner += "}\n";
- scope--;
- }
-
- void operator()(const ASTIfThen& ast) {
- ExprPrinter expr_parser{};
- std::visit(expr_parser, *ast.condition);
- inner += fmt::format("{}if ({}) {{\n", Indent(), expr_parser.GetResult());
- scope++;
- ASTNode current = ast.nodes.GetFirst();
- while (current) {
- Visit(current);
- current = current->GetNext();
- }
- scope--;
- inner += fmt::format("{}}}\n", Indent());
- }
-
- void operator()(const ASTIfElse& ast) {
- inner += Indent();
- inner += "else {\n";
-
- scope++;
- ASTNode current = ast.nodes.GetFirst();
- while (current) {
- Visit(current);
- current = current->GetNext();
- }
- scope--;
-
- inner += Indent();
- inner += "}\n";
- }
-
- void operator()(const ASTBlockEncoded& ast) {
- inner += fmt::format("{}Block({}, {});\n", Indent(), ast.start, ast.end);
- }
-
- void operator()([[maybe_unused]] const ASTBlockDecoded& ast) {
- inner += Indent();
- inner += "Block;\n";
- }
-
- void operator()(const ASTVarSet& ast) {
- ExprPrinter expr_parser{};
- std::visit(expr_parser, *ast.condition);
- inner += fmt::format("{}V{} := {};\n", Indent(), ast.index, expr_parser.GetResult());
- }
-
- void operator()(const ASTLabel& ast) {
- inner += fmt::format("Label_{}:\n", ast.index);
- }
-
- void operator()(const ASTGoto& ast) {
- ExprPrinter expr_parser{};
- std::visit(expr_parser, *ast.condition);
- inner +=
- fmt::format("{}({}) -> goto Label_{};\n", Indent(), expr_parser.GetResult(), ast.label);
- }
-
- void operator()(const ASTDoWhile& ast) {
- ExprPrinter expr_parser{};
- std::visit(expr_parser, *ast.condition);
- inner += fmt::format("{}do {{\n", Indent());
- scope++;
- ASTNode current = ast.nodes.GetFirst();
- while (current) {
- Visit(current);
- current = current->GetNext();
- }
- scope--;
- inner += fmt::format("{}}} while ({});\n", Indent(), expr_parser.GetResult());
- }
-
- void operator()(const ASTReturn& ast) {
- ExprPrinter expr_parser{};
- std::visit(expr_parser, *ast.condition);
- inner += fmt::format("{}({}) -> {};\n", Indent(), expr_parser.GetResult(),
- ast.kills ? "discard" : "exit");
- }
-
- void operator()(const ASTBreak& ast) {
- ExprPrinter expr_parser{};
- std::visit(expr_parser, *ast.condition);
- inner += fmt::format("{}({}) -> break;\n", Indent(), expr_parser.GetResult());
- }
-
- void Visit(const ASTNode& node) {
- std::visit(*this, *node->GetInnerData());
- }
-
- const std::string& GetResult() const {
- return inner;
- }
-
-private:
- std::string_view Indent() {
- if (space_segment_scope == scope) {
- return space_segment;
- }
-
- // Ensure that we don't exceed our view.
- ASSERT(scope * 2 < spaces.size());
-
- space_segment = spaces.substr(0, scope * 2);
- space_segment_scope = scope;
- return space_segment;
- }
-
- std::string inner{};
- std::string_view space_segment;
-
- u32 scope{};
- u32 space_segment_scope{};
-
- static constexpr std::string_view spaces{" "};
-};
-
-std::string ASTManager::Print() const {
- ASTPrinter printer{};
- printer.Visit(main_node);
- return printer.GetResult();
-}
-
-ASTManager::ASTManager(bool do_full_decompile, bool disable_else_derivation_)
- : full_decompile{do_full_decompile}, disable_else_derivation{disable_else_derivation_} {}
-
-ASTManager::~ASTManager() {
- Clear();
-}
-
-void ASTManager::Init() {
- main_node = ASTBase::Make<ASTProgram>(ASTNode{});
- program = std::get_if<ASTProgram>(main_node->GetInnerData());
- false_condition = MakeExpr<ExprBoolean>(false);
-}
-
-void ASTManager::DeclareLabel(u32 address) {
- const auto pair = labels_map.emplace(address, labels_count);
- if (pair.second) {
- labels_count++;
- labels.resize(labels_count);
- }
-}
-
-void ASTManager::InsertLabel(u32 address) {
- const u32 index = labels_map[address];
- const ASTNode label = ASTBase::Make<ASTLabel>(main_node, index);
- labels[index] = label;
- program->nodes.PushBack(label);
-}
-
-void ASTManager::InsertGoto(Expr condition, u32 address) {
- const u32 index = labels_map[address];
- const ASTNode goto_node = ASTBase::Make<ASTGoto>(main_node, std::move(condition), index);
- gotos.push_back(goto_node);
- program->nodes.PushBack(goto_node);
-}
-
-void ASTManager::InsertBlock(u32 start_address, u32 end_address) {
- ASTNode block = ASTBase::Make<ASTBlockEncoded>(main_node, start_address, end_address);
- program->nodes.PushBack(std::move(block));
-}
-
-void ASTManager::InsertReturn(Expr condition, bool kills) {
- ASTNode node = ASTBase::Make<ASTReturn>(main_node, std::move(condition), kills);
- program->nodes.PushBack(std::move(node));
-}
-
-// The decompile algorithm is based on
-// "Taming control flow: A structured approach to eliminating goto statements"
-// by AM Erosa, LJ Hendren 1994. In general, the idea is to get gotos to be
-// on the same structured level as the label which they jump to. This is done,
-// through outward/inward movements and lifting. Once they are at the same
-// level, you can enclose them in an "if" structure or a "do-while" structure.
-void ASTManager::Decompile() {
- auto it = gotos.begin();
- while (it != gotos.end()) {
- const ASTNode goto_node = *it;
- const auto label_index = goto_node->GetGotoLabel();
- if (!label_index) {
- return;
- }
- const ASTNode label = labels[*label_index];
- if (!full_decompile) {
- // We only decompile backward jumps
- if (!IsBackwardsJump(goto_node, label)) {
- it++;
- continue;
- }
- }
- if (IndirectlyRelated(goto_node, label)) {
- while (!DirectlyRelated(goto_node, label)) {
- MoveOutward(goto_node);
- }
- }
- if (DirectlyRelated(goto_node, label)) {
- u32 goto_level = goto_node->GetLevel();
- const u32 label_level = label->GetLevel();
- while (label_level < goto_level) {
- MoveOutward(goto_node);
- goto_level--;
- }
- // TODO(Blinkhawk): Implement Lifting and Inward Movements
- }
- if (label->GetParent() == goto_node->GetParent()) {
- bool is_loop = false;
- ASTNode current = goto_node->GetPrevious();
- while (current) {
- if (current == label) {
- is_loop = true;
- break;
- }
- current = current->GetPrevious();
- }
-
- if (is_loop) {
- EncloseDoWhile(goto_node, label);
- } else {
- EncloseIfThen(goto_node, label);
- }
- it = gotos.erase(it);
- continue;
- }
- it++;
- }
- if (full_decompile) {
- for (const ASTNode& label : labels) {
- auto& manager = label->GetManager();
- manager.Remove(label);
- }
- labels.clear();
- } else {
- auto label_it = labels.begin();
- while (label_it != labels.end()) {
- bool can_remove = true;
- ASTNode label = *label_it;
- for (const ASTNode& goto_node : gotos) {
- const auto label_index = goto_node->GetGotoLabel();
- if (!label_index) {
- return;
- }
- ASTNode& glabel = labels[*label_index];
- if (glabel == label) {
- can_remove = false;
- break;
- }
- }
- if (can_remove) {
- label->MarkLabelUnused();
- }
- }
- }
-}
-
-bool ASTManager::IsBackwardsJump(ASTNode goto_node, ASTNode label_node) const {
- u32 goto_level = goto_node->GetLevel();
- u32 label_level = label_node->GetLevel();
- while (goto_level > label_level) {
- goto_level--;
- goto_node = goto_node->GetParent();
- }
- while (label_level > goto_level) {
- label_level--;
- label_node = label_node->GetParent();
- }
- while (goto_node->GetParent() != label_node->GetParent()) {
- goto_node = goto_node->GetParent();
- label_node = label_node->GetParent();
- }
- ASTNode current = goto_node->GetPrevious();
- while (current) {
- if (current == label_node) {
- return true;
- }
- current = current->GetPrevious();
- }
- return false;
-}
-
-bool ASTManager::IndirectlyRelated(const ASTNode& first, const ASTNode& second) const {
- return !(first->GetParent() == second->GetParent() || DirectlyRelated(first, second));
-}
-
-bool ASTManager::DirectlyRelated(const ASTNode& first, const ASTNode& second) const {
- if (first->GetParent() == second->GetParent()) {
- return false;
- }
- const u32 first_level = first->GetLevel();
- const u32 second_level = second->GetLevel();
- u32 min_level;
- u32 max_level;
- ASTNode max;
- ASTNode min;
- if (first_level > second_level) {
- min_level = second_level;
- min = second;
- max_level = first_level;
- max = first;
- } else {
- min_level = first_level;
- min = first;
- max_level = second_level;
- max = second;
- }
-
- while (max_level > min_level) {
- max_level--;
- max = max->GetParent();
- }
-
- return min->GetParent() == max->GetParent();
-}
-
-void ASTManager::ShowCurrentState(std::string_view state) const {
- LOG_CRITICAL(HW_GPU, "\nState {}:\n\n{}\n", state, Print());
- SanityCheck();
-}
-
-void ASTManager::SanityCheck() const {
- for (const auto& label : labels) {
- if (!label->GetParent()) {
- LOG_CRITICAL(HW_GPU, "Sanity Check Failed");
- }
- }
-}
-
-void ASTManager::EncloseDoWhile(ASTNode goto_node, ASTNode label) {
- ASTZipper& zipper = goto_node->GetManager();
- const ASTNode loop_start = label->GetNext();
- if (loop_start == goto_node) {
- zipper.Remove(goto_node);
- return;
- }
- const ASTNode parent = label->GetParent();
- const Expr condition = goto_node->GetGotoCondition();
- zipper.DetachSegment(loop_start, goto_node);
- const ASTNode do_while_node = ASTBase::Make<ASTDoWhile>(parent, condition);
- ASTZipper* sub_zipper = do_while_node->GetSubNodes();
- sub_zipper->Init(loop_start, do_while_node);
- zipper.InsertAfter(do_while_node, label);
- sub_zipper->Remove(goto_node);
-}
-
-void ASTManager::EncloseIfThen(ASTNode goto_node, ASTNode label) {
- ASTZipper& zipper = goto_node->GetManager();
- const ASTNode if_end = label->GetPrevious();
- if (if_end == goto_node) {
- zipper.Remove(goto_node);
- return;
- }
- const ASTNode prev = goto_node->GetPrevious();
- const Expr condition = goto_node->GetGotoCondition();
- bool do_else = false;
- if (!disable_else_derivation && prev->IsIfThen()) {
- const Expr if_condition = prev->GetIfCondition();
- do_else = ExprAreEqual(if_condition, condition);
- }
- const ASTNode parent = label->GetParent();
- zipper.DetachSegment(goto_node, if_end);
- ASTNode if_node;
- if (do_else) {
- if_node = ASTBase::Make<ASTIfElse>(parent);
- } else {
- Expr neg_condition = MakeExprNot(condition);
- if_node = ASTBase::Make<ASTIfThen>(parent, neg_condition);
- }
- ASTZipper* sub_zipper = if_node->GetSubNodes();
- sub_zipper->Init(goto_node, if_node);
- zipper.InsertAfter(if_node, prev);
- sub_zipper->Remove(goto_node);
-}
-
-void ASTManager::MoveOutward(ASTNode goto_node) {
- ASTZipper& zipper = goto_node->GetManager();
- const ASTNode parent = goto_node->GetParent();
- ASTZipper& zipper2 = parent->GetManager();
- const ASTNode grandpa = parent->GetParent();
- const bool is_loop = parent->IsLoop();
- const bool is_else = parent->IsIfElse();
- const bool is_if = parent->IsIfThen();
-
- const ASTNode prev = goto_node->GetPrevious();
- const ASTNode post = goto_node->GetNext();
-
- const Expr condition = goto_node->GetGotoCondition();
- zipper.DetachSingle(goto_node);
- if (is_loop) {
- const u32 var_index = NewVariable();
- const Expr var_condition = MakeExpr<ExprVar>(var_index);
- const ASTNode var_node = ASTBase::Make<ASTVarSet>(parent, var_index, condition);
- const ASTNode var_node_init = ASTBase::Make<ASTVarSet>(parent, var_index, false_condition);
- zipper2.InsertBefore(var_node_init, parent);
- zipper.InsertAfter(var_node, prev);
- goto_node->SetGotoCondition(var_condition);
- const ASTNode break_node = ASTBase::Make<ASTBreak>(parent, var_condition);
- zipper.InsertAfter(break_node, var_node);
- } else if (is_if || is_else) {
- const u32 var_index = NewVariable();
- const Expr var_condition = MakeExpr<ExprVar>(var_index);
- const ASTNode var_node = ASTBase::Make<ASTVarSet>(parent, var_index, condition);
- const ASTNode var_node_init = ASTBase::Make<ASTVarSet>(parent, var_index, false_condition);
- if (is_if) {
- zipper2.InsertBefore(var_node_init, parent);
- } else {
- zipper2.InsertBefore(var_node_init, parent->GetPrevious());
- }
- zipper.InsertAfter(var_node, prev);
- goto_node->SetGotoCondition(var_condition);
- if (post) {
- zipper.DetachTail(post);
- const ASTNode if_node = ASTBase::Make<ASTIfThen>(parent, MakeExprNot(var_condition));
- ASTZipper* sub_zipper = if_node->GetSubNodes();
- sub_zipper->Init(post, if_node);
- zipper.InsertAfter(if_node, var_node);
- }
- } else {
- UNREACHABLE();
- }
- const ASTNode next = parent->GetNext();
- if (is_if && next && next->IsIfElse()) {
- zipper2.InsertAfter(goto_node, next);
- goto_node->SetParent(grandpa);
- return;
- }
- zipper2.InsertAfter(goto_node, parent);
- goto_node->SetParent(grandpa);
-}
-
-class ASTClearer {
-public:
- ASTClearer() = default;
-
- void operator()(const ASTProgram& ast) {
- ASTNode current = ast.nodes.GetFirst();
- while (current) {
- Visit(current);
- current = current->GetNext();
- }
- }
-
- void operator()(const ASTIfThen& ast) {
- ASTNode current = ast.nodes.GetFirst();
- while (current) {
- Visit(current);
- current = current->GetNext();
- }
- }
-
- void operator()(const ASTIfElse& ast) {
- ASTNode current = ast.nodes.GetFirst();
- while (current) {
- Visit(current);
- current = current->GetNext();
- }
- }
-
- void operator()([[maybe_unused]] const ASTBlockEncoded& ast) {}
-
- void operator()(ASTBlockDecoded& ast) {
- ast.nodes.clear();
- }
-
- void operator()([[maybe_unused]] const ASTVarSet& ast) {}
-
- void operator()([[maybe_unused]] const ASTLabel& ast) {}
-
- void operator()([[maybe_unused]] const ASTGoto& ast) {}
-
- void operator()(const ASTDoWhile& ast) {
- ASTNode current = ast.nodes.GetFirst();
- while (current) {
- Visit(current);
- current = current->GetNext();
- }
- }
-
- void operator()([[maybe_unused]] const ASTReturn& ast) {}
-
- void operator()([[maybe_unused]] const ASTBreak& ast) {}
-
- void Visit(const ASTNode& node) {
- std::visit(*this, *node->GetInnerData());
- node->Clear();
- }
-};
-
-void ASTManager::Clear() {
- if (!main_node) {
- return;
- }
- ASTClearer clearer{};
- clearer.Visit(main_node);
- main_node.reset();
- program = nullptr;
- labels_map.clear();
- labels.clear();
- gotos.clear();
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/ast.h b/src/video_core/shader/ast.h
deleted file mode 100644
index dc49b369e..000000000
--- a/src/video_core/shader/ast.h
+++ /dev/null
@@ -1,398 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <functional>
-#include <list>
-#include <memory>
-#include <optional>
-#include <string>
-#include <unordered_map>
-#include <vector>
-
-#include "video_core/shader/expr.h"
-#include "video_core/shader/node.h"
-
-namespace VideoCommon::Shader {
-
-class ASTBase;
-class ASTBlockDecoded;
-class ASTBlockEncoded;
-class ASTBreak;
-class ASTDoWhile;
-class ASTGoto;
-class ASTIfElse;
-class ASTIfThen;
-class ASTLabel;
-class ASTProgram;
-class ASTReturn;
-class ASTVarSet;
-
-using ASTData = std::variant<ASTProgram, ASTIfThen, ASTIfElse, ASTBlockEncoded, ASTBlockDecoded,
- ASTVarSet, ASTGoto, ASTLabel, ASTDoWhile, ASTReturn, ASTBreak>;
-
-using ASTNode = std::shared_ptr<ASTBase>;
-
-enum class ASTZipperType : u32 {
- Program,
- IfThen,
- IfElse,
- Loop,
-};
-
-class ASTZipper final {
-public:
- explicit ASTZipper();
-
- void Init(ASTNode first, ASTNode parent);
-
- ASTNode GetFirst() const {
- return first;
- }
-
- ASTNode GetLast() const {
- return last;
- }
-
- void PushBack(ASTNode new_node);
- void PushFront(ASTNode new_node);
- void InsertAfter(ASTNode new_node, ASTNode at_node);
- void InsertBefore(ASTNode new_node, ASTNode at_node);
- void DetachTail(ASTNode node);
- void DetachSingle(ASTNode node);
- void DetachSegment(ASTNode start, ASTNode end);
- void Remove(ASTNode node);
-
- ASTNode first;
- ASTNode last;
-};
-
-class ASTProgram {
-public:
- ASTZipper nodes{};
-};
-
-class ASTIfThen {
-public:
- explicit ASTIfThen(Expr condition_) : condition{std::move(condition_)} {}
- Expr condition;
- ASTZipper nodes{};
-};
-
-class ASTIfElse {
-public:
- ASTZipper nodes{};
-};
-
-class ASTBlockEncoded {
-public:
- explicit ASTBlockEncoded(u32 start_, u32 _) : start{start_}, end{_} {}
- u32 start;
- u32 end;
-};
-
-class ASTBlockDecoded {
-public:
- explicit ASTBlockDecoded(NodeBlock&& new_nodes_) : nodes(std::move(new_nodes_)) {}
- NodeBlock nodes;
-};
-
-class ASTVarSet {
-public:
- explicit ASTVarSet(u32 index_, Expr condition_)
- : index{index_}, condition{std::move(condition_)} {}
-
- u32 index;
- Expr condition;
-};
-
-class ASTLabel {
-public:
- explicit ASTLabel(u32 index_) : index{index_} {}
- u32 index;
- bool unused{};
-};
-
-class ASTGoto {
-public:
- explicit ASTGoto(Expr condition_, u32 label_)
- : condition{std::move(condition_)}, label{label_} {}
-
- Expr condition;
- u32 label;
-};
-
-class ASTDoWhile {
-public:
- explicit ASTDoWhile(Expr condition_) : condition{std::move(condition_)} {}
- Expr condition;
- ASTZipper nodes{};
-};
-
-class ASTReturn {
-public:
- explicit ASTReturn(Expr condition_, bool kills_)
- : condition{std::move(condition_)}, kills{kills_} {}
-
- Expr condition;
- bool kills;
-};
-
-class ASTBreak {
-public:
- explicit ASTBreak(Expr condition_) : condition{std::move(condition_)} {}
- Expr condition;
-};
-
-class ASTBase {
-public:
- explicit ASTBase(ASTNode parent_, ASTData data_)
- : data{std::move(data_)}, parent{std::move(parent_)} {}
-
- template <class U, class... Args>
- static ASTNode Make(ASTNode parent, Args&&... args) {
- return std::make_shared<ASTBase>(std::move(parent),
- ASTData(U(std::forward<Args>(args)...)));
- }
-
- void SetParent(ASTNode new_parent) {
- parent = std::move(new_parent);
- }
-
- ASTNode& GetParent() {
- return parent;
- }
-
- const ASTNode& GetParent() const {
- return parent;
- }
-
- u32 GetLevel() const {
- u32 level = 0;
- auto next_parent = parent;
- while (next_parent) {
- next_parent = next_parent->GetParent();
- level++;
- }
- return level;
- }
-
- ASTData* GetInnerData() {
- return &data;
- }
-
- const ASTData* GetInnerData() const {
- return &data;
- }
-
- ASTNode GetNext() const {
- return next;
- }
-
- ASTNode GetPrevious() const {
- return previous;
- }
-
- ASTZipper& GetManager() {
- return *manager;
- }
-
- const ASTZipper& GetManager() const {
- return *manager;
- }
-
- std::optional<u32> GetGotoLabel() const {
- if (const auto* inner = std::get_if<ASTGoto>(&data)) {
- return {inner->label};
- }
- return std::nullopt;
- }
-
- Expr GetGotoCondition() const {
- if (const auto* inner = std::get_if<ASTGoto>(&data)) {
- return inner->condition;
- }
- return nullptr;
- }
-
- void MarkLabelUnused() {
- if (auto* inner = std::get_if<ASTLabel>(&data)) {
- inner->unused = true;
- }
- }
-
- bool IsLabelUnused() const {
- if (const auto* inner = std::get_if<ASTLabel>(&data)) {
- return inner->unused;
- }
- return true;
- }
-
- std::optional<u32> GetLabelIndex() const {
- if (const auto* inner = std::get_if<ASTLabel>(&data)) {
- return {inner->index};
- }
- return std::nullopt;
- }
-
- Expr GetIfCondition() const {
- if (const auto* inner = std::get_if<ASTIfThen>(&data)) {
- return inner->condition;
- }
- return nullptr;
- }
-
- void SetGotoCondition(Expr new_condition) {
- if (auto* inner = std::get_if<ASTGoto>(&data)) {
- inner->condition = std::move(new_condition);
- }
- }
-
- bool IsIfThen() const {
- return std::holds_alternative<ASTIfThen>(data);
- }
-
- bool IsIfElse() const {
- return std::holds_alternative<ASTIfElse>(data);
- }
-
- bool IsBlockEncoded() const {
- return std::holds_alternative<ASTBlockEncoded>(data);
- }
-
- void TransformBlockEncoded(NodeBlock&& nodes) {
- data = ASTBlockDecoded(std::move(nodes));
- }
-
- bool IsLoop() const {
- return std::holds_alternative<ASTDoWhile>(data);
- }
-
- ASTZipper* GetSubNodes() {
- if (std::holds_alternative<ASTProgram>(data)) {
- return &std::get_if<ASTProgram>(&data)->nodes;
- }
- if (std::holds_alternative<ASTIfThen>(data)) {
- return &std::get_if<ASTIfThen>(&data)->nodes;
- }
- if (std::holds_alternative<ASTIfElse>(data)) {
- return &std::get_if<ASTIfElse>(&data)->nodes;
- }
- if (std::holds_alternative<ASTDoWhile>(data)) {
- return &std::get_if<ASTDoWhile>(&data)->nodes;
- }
- return nullptr;
- }
-
- void Clear() {
- next.reset();
- previous.reset();
- parent.reset();
- manager = nullptr;
- }
-
-private:
- friend class ASTZipper;
-
- ASTData data;
- ASTNode parent;
- ASTNode next;
- ASTNode previous;
- ASTZipper* manager{};
-};
-
-class ASTManager final {
-public:
- explicit ASTManager(bool do_full_decompile, bool disable_else_derivation_);
- ~ASTManager();
-
- ASTManager(const ASTManager& o) = delete;
- ASTManager& operator=(const ASTManager& other) = delete;
-
- ASTManager(ASTManager&& other) noexcept = default;
- ASTManager& operator=(ASTManager&& other) noexcept = default;
-
- void Init();
-
- void DeclareLabel(u32 address);
-
- void InsertLabel(u32 address);
-
- void InsertGoto(Expr condition, u32 address);
-
- void InsertBlock(u32 start_address, u32 end_address);
-
- void InsertReturn(Expr condition, bool kills);
-
- std::string Print() const;
-
- void Decompile();
-
- void ShowCurrentState(std::string_view state) const;
-
- void SanityCheck() const;
-
- void Clear();
-
- bool IsFullyDecompiled() const {
- if (full_decompile) {
- return gotos.empty();
- }
-
- for (ASTNode goto_node : gotos) {
- auto label_index = goto_node->GetGotoLabel();
- if (!label_index) {
- return false;
- }
- ASTNode glabel = labels[*label_index];
- if (IsBackwardsJump(goto_node, glabel)) {
- return false;
- }
- }
- return true;
- }
-
- ASTNode GetProgram() const {
- return main_node;
- }
-
- u32 GetVariables() const {
- return variables;
- }
-
- const std::vector<ASTNode>& GetLabels() const {
- return labels;
- }
-
-private:
- bool IsBackwardsJump(ASTNode goto_node, ASTNode label_node) const;
-
- bool IndirectlyRelated(const ASTNode& first, const ASTNode& second) const;
-
- bool DirectlyRelated(const ASTNode& first, const ASTNode& second) const;
-
- void EncloseDoWhile(ASTNode goto_node, ASTNode label);
-
- void EncloseIfThen(ASTNode goto_node, ASTNode label);
-
- void MoveOutward(ASTNode goto_node);
-
- u32 NewVariable() {
- return variables++;
- }
-
- bool full_decompile{};
- bool disable_else_derivation{};
- std::unordered_map<u32, u32> labels_map{};
- u32 labels_count{};
- std::vector<ASTNode> labels{};
- std::list<ASTNode> gotos{};
- u32 variables{};
- ASTProgram* program{};
- ASTNode main_node{};
- Expr false_condition{};
-};
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp
deleted file mode 100644
index 02adcf9c7..000000000
--- a/src/video_core/shader/async_shaders.cpp
+++ /dev/null
@@ -1,234 +0,0 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <condition_variable>
-#include <mutex>
-#include <thread>
-#include <vector>
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/renderer_base.h"
-#include "video_core/renderer_opengl/gl_shader_cache.h"
-#include "video_core/shader/async_shaders.h"
-
-namespace VideoCommon::Shader {
-
-AsyncShaders::AsyncShaders(Core::Frontend::EmuWindow& emu_window_) : emu_window(emu_window_) {}
-
-AsyncShaders::~AsyncShaders() {
- KillWorkers();
-}
-
-void AsyncShaders::AllocateWorkers() {
- // Use at least one thread
- u32 num_workers = 1;
-
- // Deduce how many more threads we can use
- const u32 thread_count = std::thread::hardware_concurrency();
- if (thread_count >= 8) {
- // Increase async workers by 1 for every 2 threads >= 8
- num_workers += 1 + (thread_count - 8) / 2;
- }
-
- // If we already have workers queued, ignore
- if (num_workers == worker_threads.size()) {
- return;
- }
-
- // If workers already exist, clear them
- if (!worker_threads.empty()) {
- FreeWorkers();
- }
-
- // Create workers
- for (std::size_t i = 0; i < num_workers; i++) {
- context_list.push_back(emu_window.CreateSharedContext());
- worker_threads.emplace_back(&AsyncShaders::ShaderCompilerThread, this,
- context_list[i].get());
- }
-}
-
-void AsyncShaders::FreeWorkers() {
- // Mark all threads to quit
- is_thread_exiting.store(true);
- cv.notify_all();
- for (auto& thread : worker_threads) {
- thread.join();
- }
- // Clear our shared contexts
- context_list.clear();
-
- // Clear our worker threads
- worker_threads.clear();
-}
-
-void AsyncShaders::KillWorkers() {
- is_thread_exiting.store(true);
- cv.notify_all();
- for (auto& thread : worker_threads) {
- thread.detach();
- }
- // Clear our shared contexts
- context_list.clear();
-
- // Clear our worker threads
- worker_threads.clear();
-}
-
-bool AsyncShaders::HasWorkQueued() const {
- return !pending_queue.empty();
-}
-
-bool AsyncShaders::HasCompletedWork() const {
- std::shared_lock lock{completed_mutex};
- return !finished_work.empty();
-}
-
-bool AsyncShaders::IsShaderAsync(const Tegra::GPU& gpu) const {
- const auto& regs = gpu.Maxwell3D().regs;
-
- // If something is using depth, we can assume that games are not rendering anything which will
- // be used one time.
- if (regs.zeta_enable) {
- return true;
- }
-
- // If games are using a small index count, we can assume these are full screen quads. Usually
- // these shaders are only used once for building textures so we can assume they can't be built
- // async
- if (regs.index_array.count <= 6 || regs.vertex_buffer.count <= 6) {
- return false;
- }
-
- return true;
-}
-
-std::vector<AsyncShaders::Result> AsyncShaders::GetCompletedWork() {
- std::vector<Result> results;
- {
- std::unique_lock lock{completed_mutex};
- results = std::move(finished_work);
- finished_work.clear();
- }
- return results;
-}
-
-void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device,
- Tegra::Engines::ShaderType shader_type, u64 uid,
- std::vector<u64> code, std::vector<u64> code_b,
- u32 main_offset, CompilerSettings compiler_settings,
- const Registry& registry, VAddr cpu_addr) {
- std::unique_lock lock(queue_mutex);
- pending_queue.push({
- .backend = device.UseAssemblyShaders() ? Backend::GLASM : Backend::OpenGL,
- .device = &device,
- .shader_type = shader_type,
- .uid = uid,
- .code = std::move(code),
- .code_b = std::move(code_b),
- .main_offset = main_offset,
- .compiler_settings = compiler_settings,
- .registry = registry,
- .cpu_address = cpu_addr,
- .pp_cache = nullptr,
- .vk_device = nullptr,
- .scheduler = nullptr,
- .descriptor_pool = nullptr,
- .update_descriptor_queue = nullptr,
- .bindings{},
- .program{},
- .key{},
- .num_color_buffers = 0,
- });
- cv.notify_one();
-}
-
-void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache,
- const Vulkan::Device& device, Vulkan::VKScheduler& scheduler,
- Vulkan::VKDescriptorPool& descriptor_pool,
- Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue,
- std::vector<VkDescriptorSetLayoutBinding> bindings,
- Vulkan::SPIRVProgram program,
- Vulkan::GraphicsPipelineCacheKey key, u32 num_color_buffers) {
- std::unique_lock lock(queue_mutex);
- pending_queue.push({
- .backend = Backend::Vulkan,
- .device = nullptr,
- .shader_type{},
- .uid = 0,
- .code{},
- .code_b{},
- .main_offset = 0,
- .compiler_settings{},
- .registry{},
- .cpu_address = 0,
- .pp_cache = pp_cache,
- .vk_device = &device,
- .scheduler = &scheduler,
- .descriptor_pool = &descriptor_pool,
- .update_descriptor_queue = &update_descriptor_queue,
- .bindings = std::move(bindings),
- .program = std::move(program),
- .key = key,
- .num_color_buffers = num_color_buffers,
- });
- cv.notify_one();
-}
-
-void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context) {
- while (!is_thread_exiting.load(std::memory_order_relaxed)) {
- std::unique_lock lock{queue_mutex};
- cv.wait(lock, [this] { return HasWorkQueued() || is_thread_exiting; });
- if (is_thread_exiting) {
- return;
- }
-
- // Partial lock to allow all threads to read at the same time
- if (!HasWorkQueued()) {
- continue;
- }
- // Another thread beat us, just unlock and wait for the next load
- if (pending_queue.empty()) {
- continue;
- }
-
- // Pull work from queue
- WorkerParams work = std::move(pending_queue.front());
- pending_queue.pop();
- lock.unlock();
-
- if (work.backend == Backend::OpenGL || work.backend == Backend::GLASM) {
- const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, *work.registry);
- const auto scope = context->Acquire();
- auto program =
- OpenGL::BuildShader(*work.device, work.shader_type, work.uid, ir, *work.registry);
- Result result{};
- result.backend = work.backend;
- result.cpu_address = work.cpu_address;
- result.uid = work.uid;
- result.code = std::move(work.code);
- result.code_b = std::move(work.code_b);
- result.shader_type = work.shader_type;
-
- if (work.backend == Backend::OpenGL) {
- result.program.opengl = std::move(program->source_program);
- } else if (work.backend == Backend::GLASM) {
- result.program.glasm = std::move(program->assembly_program);
- }
-
- {
- std::unique_lock complete_lock(completed_mutex);
- finished_work.push_back(std::move(result));
- }
- } else if (work.backend == Backend::Vulkan) {
- auto pipeline = std::make_unique<Vulkan::VKGraphicsPipeline>(
- *work.vk_device, *work.scheduler, *work.descriptor_pool,
- *work.update_descriptor_queue, work.key, work.bindings, work.program,
- work.num_color_buffers);
-
- work.pp_cache->EmplacePipeline(std::move(pipeline));
- }
- }
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h
deleted file mode 100644
index 7fdff6e56..000000000
--- a/src/video_core/shader/async_shaders.h
+++ /dev/null
@@ -1,138 +0,0 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <condition_variable>
-#include <memory>
-#include <shared_mutex>
-#include <thread>
-
-#include <glad/glad.h>
-
-#include "common/common_types.h"
-#include "video_core/renderer_opengl/gl_device.h"
-#include "video_core/renderer_opengl/gl_resource_manager.h"
-#include "video_core/renderer_opengl/gl_shader_decompiler.h"
-#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
-#include "video_core/renderer_vulkan/vk_scheduler.h"
-#include "video_core/vulkan_common/vulkan_device.h"
-
-namespace Core::Frontend {
-class EmuWindow;
-class GraphicsContext;
-} // namespace Core::Frontend
-
-namespace Tegra {
-class GPU;
-}
-
-namespace Vulkan {
-class VKPipelineCache;
-}
-
-namespace VideoCommon::Shader {
-
-class AsyncShaders {
-public:
- enum class Backend {
- OpenGL,
- GLASM,
- Vulkan,
- };
-
- struct ResultPrograms {
- OpenGL::OGLProgram opengl;
- OpenGL::OGLAssemblyProgram glasm;
- };
-
- struct Result {
- u64 uid;
- VAddr cpu_address;
- Backend backend;
- ResultPrograms program;
- std::vector<u64> code;
- std::vector<u64> code_b;
- Tegra::Engines::ShaderType shader_type;
- };
-
- explicit AsyncShaders(Core::Frontend::EmuWindow& emu_window_);
- ~AsyncShaders();
-
- /// Start up shader worker threads
- void AllocateWorkers();
-
- /// Clear the shader queue and kill all worker threads
- void FreeWorkers();
-
- // Force end all threads
- void KillWorkers();
-
- /// Check to see if any shaders have actually been compiled
- [[nodiscard]] bool HasCompletedWork() const;
-
- /// Deduce if a shader can be build on another thread of MUST be built in sync. We cannot build
- /// every shader async as some shaders are only built and executed once. We try to "guess" which
- /// shader would be used only once
- [[nodiscard]] bool IsShaderAsync(const Tegra::GPU& gpu) const;
-
- /// Pulls completed compiled shaders
- [[nodiscard]] std::vector<Result> GetCompletedWork();
-
- void QueueOpenGLShader(const OpenGL::Device& device, Tegra::Engines::ShaderType shader_type,
- u64 uid, std::vector<u64> code, std::vector<u64> code_b, u32 main_offset,
- CompilerSettings compiler_settings, const Registry& registry,
- VAddr cpu_addr);
-
- void QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, const Vulkan::Device& device,
- Vulkan::VKScheduler& scheduler,
- Vulkan::VKDescriptorPool& descriptor_pool,
- Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue,
- std::vector<VkDescriptorSetLayoutBinding> bindings,
- Vulkan::SPIRVProgram program, Vulkan::GraphicsPipelineCacheKey key,
- u32 num_color_buffers);
-
-private:
- void ShaderCompilerThread(Core::Frontend::GraphicsContext* context);
-
- /// Check our worker queue to see if we have any work queued already
- [[nodiscard]] bool HasWorkQueued() const;
-
- struct WorkerParams {
- Backend backend;
- // For OGL
- const OpenGL::Device* device;
- Tegra::Engines::ShaderType shader_type;
- u64 uid;
- std::vector<u64> code;
- std::vector<u64> code_b;
- u32 main_offset;
- CompilerSettings compiler_settings;
- std::optional<Registry> registry;
- VAddr cpu_address;
-
- // For Vulkan
- Vulkan::VKPipelineCache* pp_cache;
- const Vulkan::Device* vk_device;
- Vulkan::VKScheduler* scheduler;
- Vulkan::VKDescriptorPool* descriptor_pool;
- Vulkan::VKUpdateDescriptorQueue* update_descriptor_queue;
- std::vector<VkDescriptorSetLayoutBinding> bindings;
- Vulkan::SPIRVProgram program;
- Vulkan::GraphicsPipelineCacheKey key;
- u32 num_color_buffers;
- };
-
- std::condition_variable cv;
- mutable std::mutex queue_mutex;
- mutable std::shared_mutex completed_mutex;
- std::atomic<bool> is_thread_exiting{};
- std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> context_list;
- std::vector<std::thread> worker_threads;
- std::queue<WorkerParams> pending_queue;
- std::vector<Result> finished_work;
- Core::Frontend::EmuWindow& emu_window;
-};
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/compiler_settings.cpp b/src/video_core/shader/compiler_settings.cpp
deleted file mode 100644
index cddcbd4f0..000000000
--- a/src/video_core/shader/compiler_settings.cpp
+++ /dev/null
@@ -1,26 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "video_core/shader/compiler_settings.h"
-
-namespace VideoCommon::Shader {
-
-std::string CompileDepthAsString(const CompileDepth cd) {
- switch (cd) {
- case CompileDepth::BruteForce:
- return "Brute Force Compile";
- case CompileDepth::FlowStack:
- return "Simple Flow Stack Mode";
- case CompileDepth::NoFlowStack:
- return "Remove Flow Stack";
- case CompileDepth::DecompileBackwards:
- return "Decompile Backward Jumps";
- case CompileDepth::FullDecompile:
- return "Full Decompilation";
- default:
- return "Unknown Compiler Process";
- }
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/compiler_settings.h b/src/video_core/shader/compiler_settings.h
deleted file mode 100644
index 916018c01..000000000
--- a/src/video_core/shader/compiler_settings.h
+++ /dev/null
@@ -1,26 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include "video_core/engines/shader_bytecode.h"
-
-namespace VideoCommon::Shader {
-
-enum class CompileDepth : u32 {
- BruteForce = 0,
- FlowStack = 1,
- NoFlowStack = 2,
- DecompileBackwards = 3,
- FullDecompile = 4,
-};
-
-std::string CompileDepthAsString(CompileDepth cd);
-
-struct CompilerSettings {
- CompileDepth depth{CompileDepth::NoFlowStack};
- bool disable_else_derivation{true};
-};
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp
deleted file mode 100644
index 43d965f2f..000000000
--- a/src/video_core/shader/control_flow.cpp
+++ /dev/null
@@ -1,751 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <list>
-#include <map>
-#include <set>
-#include <stack>
-#include <unordered_map>
-#include <vector>
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/shader/ast.h"
-#include "video_core/shader/control_flow.h"
-#include "video_core/shader/memory_util.h"
-#include "video_core/shader/registry.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-namespace {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-
-constexpr s32 unassigned_branch = -2;
-
-struct Query {
- u32 address{};
- std::stack<u32> ssy_stack{};
- std::stack<u32> pbk_stack{};
-};
-
-struct BlockStack {
- BlockStack() = default;
- explicit BlockStack(const Query& q) : ssy_stack{q.ssy_stack}, pbk_stack{q.pbk_stack} {}
- std::stack<u32> ssy_stack{};
- std::stack<u32> pbk_stack{};
-};
-
-template <typename T, typename... Args>
-BlockBranchInfo MakeBranchInfo(Args&&... args) {
- static_assert(std::is_convertible_v<T, BranchData>);
- return std::make_shared<BranchData>(T(std::forward<Args>(args)...));
-}
-
-bool BlockBranchIsIgnored(BlockBranchInfo first) {
- bool ignore = false;
- if (std::holds_alternative<SingleBranch>(*first)) {
- const auto branch = std::get_if<SingleBranch>(first.get());
- ignore = branch->ignore;
- }
- return ignore;
-}
-
-struct BlockInfo {
- u32 start{};
- u32 end{};
- bool visited{};
- BlockBranchInfo branch{};
-
- bool IsInside(const u32 address) const {
- return start <= address && address <= end;
- }
-};
-
-struct CFGRebuildState {
- explicit CFGRebuildState(const ProgramCode& program_code_, u32 start_, Registry& registry_)
- : program_code{program_code_}, registry{registry_}, start{start_} {}
-
- const ProgramCode& program_code;
- Registry& registry;
- u32 start{};
- std::vector<BlockInfo> block_info;
- std::list<u32> inspect_queries;
- std::list<Query> queries;
- std::unordered_map<u32, u32> registered;
- std::set<u32> labels;
- std::map<u32, u32> ssy_labels;
- std::map<u32, u32> pbk_labels;
- std::unordered_map<u32, BlockStack> stacks;
- ASTManager* manager{};
-};
-
-enum class BlockCollision : u32 { None, Found, Inside };
-
-std::pair<BlockCollision, u32> TryGetBlock(CFGRebuildState& state, u32 address) {
- const auto& blocks = state.block_info;
- for (u32 index = 0; index < blocks.size(); index++) {
- if (blocks[index].start == address) {
- return {BlockCollision::Found, index};
- }
- if (blocks[index].IsInside(address)) {
- return {BlockCollision::Inside, index};
- }
- }
- return {BlockCollision::None, 0xFFFFFFFF};
-}
-
-struct ParseInfo {
- BlockBranchInfo branch_info{};
- u32 end_address{};
-};
-
-BlockInfo& CreateBlockInfo(CFGRebuildState& state, u32 start, u32 end) {
- auto& it = state.block_info.emplace_back();
- it.start = start;
- it.end = end;
- const u32 index = static_cast<u32>(state.block_info.size() - 1);
- state.registered.insert({start, index});
- return it;
-}
-
-Pred GetPredicate(u32 index, bool negated) {
- return static_cast<Pred>(static_cast<u64>(index) + (negated ? 8ULL : 0ULL));
-}
-
-enum class ParseResult : u32 {
- ControlCaught,
- BlockEnd,
- AbnormalFlow,
-};
-
-struct BranchIndirectInfo {
- u32 buffer{};
- u32 offset{};
- u32 entries{};
- s32 relative_position{};
-};
-
-struct BufferInfo {
- u32 index;
- u32 offset;
-};
-
-std::optional<std::pair<s32, u64>> GetBRXInfo(const CFGRebuildState& state, u32& pos) {
- const Instruction instr = state.program_code[pos];
- const auto opcode = OpCode::Decode(instr);
- if (opcode->get().GetId() != OpCode::Id::BRX) {
- return std::nullopt;
- }
- if (instr.brx.constant_buffer != 0) {
- return std::nullopt;
- }
- --pos;
- return std::make_pair(instr.brx.GetBranchExtend(), instr.gpr8.Value());
-}
-
-template <typename Result, typename TestCallable, typename PackCallable>
-// requires std::predicate<TestCallable, Instruction, const OpCode::Matcher&>
-// requires std::invocable<PackCallable, Instruction, const OpCode::Matcher&>
-std::optional<Result> TrackInstruction(const CFGRebuildState& state, u32& pos, TestCallable test,
- PackCallable pack) {
- for (; pos >= state.start; --pos) {
- if (IsSchedInstruction(pos, state.start)) {
- continue;
- }
- const Instruction instr = state.program_code[pos];
- const auto opcode = OpCode::Decode(instr);
- if (!opcode) {
- continue;
- }
- if (test(instr, opcode->get())) {
- --pos;
- return std::make_optional(pack(instr, opcode->get()));
- }
- }
- return std::nullopt;
-}
-
-std::optional<std::pair<BufferInfo, u64>> TrackLDC(const CFGRebuildState& state, u32& pos,
- u64 brx_tracked_register) {
- return TrackInstruction<std::pair<BufferInfo, u64>>(
- state, pos,
- [brx_tracked_register](auto instr, const auto& opcode) {
- return opcode.GetId() == OpCode::Id::LD_C &&
- instr.gpr0.Value() == brx_tracked_register &&
- instr.ld_c.type.Value() == Tegra::Shader::UniformType::Single;
- },
- [](auto instr, const auto& opcode) {
- const BufferInfo info = {static_cast<u32>(instr.cbuf36.index.Value()),
- static_cast<u32>(instr.cbuf36.GetOffset())};
- return std::make_pair(info, instr.gpr8.Value());
- });
-}
-
-std::optional<u64> TrackSHLRegister(const CFGRebuildState& state, u32& pos,
- u64 ldc_tracked_register) {
- return TrackInstruction<u64>(
- state, pos,
- [ldc_tracked_register](auto instr, const auto& opcode) {
- return opcode.GetId() == OpCode::Id::SHL_IMM &&
- instr.gpr0.Value() == ldc_tracked_register;
- },
- [](auto instr, const auto&) { return instr.gpr8.Value(); });
-}
-
-std::optional<u32> TrackIMNMXValue(const CFGRebuildState& state, u32& pos,
- u64 shl_tracked_register) {
- return TrackInstruction<u32>(
- state, pos,
- [shl_tracked_register](auto instr, const auto& opcode) {
- return opcode.GetId() == OpCode::Id::IMNMX_IMM &&
- instr.gpr0.Value() == shl_tracked_register;
- },
- [](auto instr, const auto&) {
- return static_cast<u32>(instr.alu.GetSignedImm20_20() + 1);
- });
-}
-
-std::optional<BranchIndirectInfo> TrackBranchIndirectInfo(const CFGRebuildState& state, u32 pos) {
- const auto brx_info = GetBRXInfo(state, pos);
- if (!brx_info) {
- return std::nullopt;
- }
- const auto [relative_position, brx_tracked_register] = *brx_info;
-
- const auto ldc_info = TrackLDC(state, pos, brx_tracked_register);
- if (!ldc_info) {
- return std::nullopt;
- }
- const auto [buffer_info, ldc_tracked_register] = *ldc_info;
-
- const auto shl_tracked_register = TrackSHLRegister(state, pos, ldc_tracked_register);
- if (!shl_tracked_register) {
- return std::nullopt;
- }
-
- const auto entries = TrackIMNMXValue(state, pos, *shl_tracked_register);
- if (!entries) {
- return std::nullopt;
- }
-
- return BranchIndirectInfo{buffer_info.index, buffer_info.offset, *entries, relative_position};
-}
-
-std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) {
- u32 offset = static_cast<u32>(address);
- const u32 end_address = static_cast<u32>(state.program_code.size());
- ParseInfo parse_info{};
- SingleBranch single_branch{};
-
- const auto insert_label = [](CFGRebuildState& rebuild_state, u32 label_address) {
- const auto pair = rebuild_state.labels.emplace(label_address);
- if (pair.second) {
- rebuild_state.inspect_queries.push_back(label_address);
- }
- };
-
- while (true) {
- if (offset >= end_address) {
- // ASSERT_OR_EXECUTE can't be used, as it ignores the break
- ASSERT_MSG(false, "Shader passed the current limit!");
-
- single_branch.address = exit_branch;
- single_branch.ignore = false;
- break;
- }
- if (state.registered.contains(offset)) {
- single_branch.address = offset;
- single_branch.ignore = true;
- break;
- }
- if (IsSchedInstruction(offset, state.start)) {
- offset++;
- continue;
- }
- const Instruction instr = {state.program_code[offset]};
- const auto opcode = OpCode::Decode(instr);
- if (!opcode || opcode->get().GetType() != OpCode::Type::Flow) {
- offset++;
- continue;
- }
-
- switch (opcode->get().GetId()) {
- case OpCode::Id::EXIT: {
- const auto pred_index = static_cast<u32>(instr.pred.pred_index);
- single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
- if (single_branch.condition.predicate == Pred::NeverExecute) {
- offset++;
- continue;
- }
- const ConditionCode cc = instr.flow_condition_code;
- single_branch.condition.cc = cc;
- if (cc == ConditionCode::F) {
- offset++;
- continue;
- }
- single_branch.address = exit_branch;
- single_branch.kill = false;
- single_branch.is_sync = false;
- single_branch.is_brk = false;
- single_branch.ignore = false;
- parse_info.end_address = offset;
- parse_info.branch_info = MakeBranchInfo<SingleBranch>(
- single_branch.condition, single_branch.address, single_branch.kill,
- single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
-
- return {ParseResult::ControlCaught, parse_info};
- }
- case OpCode::Id::BRA: {
- if (instr.bra.constant_buffer != 0) {
- return {ParseResult::AbnormalFlow, parse_info};
- }
- const auto pred_index = static_cast<u32>(instr.pred.pred_index);
- single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
- if (single_branch.condition.predicate == Pred::NeverExecute) {
- offset++;
- continue;
- }
- const ConditionCode cc = instr.flow_condition_code;
- single_branch.condition.cc = cc;
- if (cc == ConditionCode::F) {
- offset++;
- continue;
- }
- const u32 branch_offset = offset + instr.bra.GetBranchTarget();
- if (branch_offset == 0) {
- single_branch.address = exit_branch;
- } else {
- single_branch.address = branch_offset;
- }
- insert_label(state, branch_offset);
- single_branch.kill = false;
- single_branch.is_sync = false;
- single_branch.is_brk = false;
- single_branch.ignore = false;
- parse_info.end_address = offset;
- parse_info.branch_info = MakeBranchInfo<SingleBranch>(
- single_branch.condition, single_branch.address, single_branch.kill,
- single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
-
- return {ParseResult::ControlCaught, parse_info};
- }
- case OpCode::Id::SYNC: {
- const auto pred_index = static_cast<u32>(instr.pred.pred_index);
- single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
- if (single_branch.condition.predicate == Pred::NeverExecute) {
- offset++;
- continue;
- }
- const ConditionCode cc = instr.flow_condition_code;
- single_branch.condition.cc = cc;
- if (cc == ConditionCode::F) {
- offset++;
- continue;
- }
- single_branch.address = unassigned_branch;
- single_branch.kill = false;
- single_branch.is_sync = true;
- single_branch.is_brk = false;
- single_branch.ignore = false;
- parse_info.end_address = offset;
- parse_info.branch_info = MakeBranchInfo<SingleBranch>(
- single_branch.condition, single_branch.address, single_branch.kill,
- single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
-
- return {ParseResult::ControlCaught, parse_info};
- }
- case OpCode::Id::BRK: {
- const auto pred_index = static_cast<u32>(instr.pred.pred_index);
- single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
- if (single_branch.condition.predicate == Pred::NeverExecute) {
- offset++;
- continue;
- }
- const ConditionCode cc = instr.flow_condition_code;
- single_branch.condition.cc = cc;
- if (cc == ConditionCode::F) {
- offset++;
- continue;
- }
- single_branch.address = unassigned_branch;
- single_branch.kill = false;
- single_branch.is_sync = false;
- single_branch.is_brk = true;
- single_branch.ignore = false;
- parse_info.end_address = offset;
- parse_info.branch_info = MakeBranchInfo<SingleBranch>(
- single_branch.condition, single_branch.address, single_branch.kill,
- single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
-
- return {ParseResult::ControlCaught, parse_info};
- }
- case OpCode::Id::KIL: {
- const auto pred_index = static_cast<u32>(instr.pred.pred_index);
- single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
- if (single_branch.condition.predicate == Pred::NeverExecute) {
- offset++;
- continue;
- }
- const ConditionCode cc = instr.flow_condition_code;
- single_branch.condition.cc = cc;
- if (cc == ConditionCode::F) {
- offset++;
- continue;
- }
- single_branch.address = exit_branch;
- single_branch.kill = true;
- single_branch.is_sync = false;
- single_branch.is_brk = false;
- single_branch.ignore = false;
- parse_info.end_address = offset;
- parse_info.branch_info = MakeBranchInfo<SingleBranch>(
- single_branch.condition, single_branch.address, single_branch.kill,
- single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
-
- return {ParseResult::ControlCaught, parse_info};
- }
- case OpCode::Id::SSY: {
- const u32 target = offset + instr.bra.GetBranchTarget();
- insert_label(state, target);
- state.ssy_labels.emplace(offset, target);
- break;
- }
- case OpCode::Id::PBK: {
- const u32 target = offset + instr.bra.GetBranchTarget();
- insert_label(state, target);
- state.pbk_labels.emplace(offset, target);
- break;
- }
- case OpCode::Id::BRX: {
- const auto tmp = TrackBranchIndirectInfo(state, offset);
- if (!tmp) {
- LOG_WARNING(HW_GPU, "BRX Track Unsuccesful");
- return {ParseResult::AbnormalFlow, parse_info};
- }
-
- const auto result = *tmp;
- const s32 pc_target = offset + result.relative_position;
- std::vector<CaseBranch> branches;
- for (u32 i = 0; i < result.entries; i++) {
- auto key = state.registry.ObtainKey(result.buffer, result.offset + i * 4);
- if (!key) {
- return {ParseResult::AbnormalFlow, parse_info};
- }
- u32 value = *key;
- u32 target = static_cast<u32>((value >> 3) + pc_target);
- insert_label(state, target);
- branches.emplace_back(value, target);
- }
- parse_info.end_address = offset;
- parse_info.branch_info = MakeBranchInfo<MultiBranch>(
- static_cast<u32>(instr.gpr8.Value()), std::move(branches));
-
- return {ParseResult::ControlCaught, parse_info};
- }
- default:
- break;
- }
-
- offset++;
- }
- single_branch.kill = false;
- single_branch.is_sync = false;
- single_branch.is_brk = false;
- parse_info.end_address = offset - 1;
- parse_info.branch_info = MakeBranchInfo<SingleBranch>(
- single_branch.condition, single_branch.address, single_branch.kill, single_branch.is_sync,
- single_branch.is_brk, single_branch.ignore);
- return {ParseResult::BlockEnd, parse_info};
-}
-
-bool TryInspectAddress(CFGRebuildState& state) {
- if (state.inspect_queries.empty()) {
- return false;
- }
-
- const u32 address = state.inspect_queries.front();
- state.inspect_queries.pop_front();
- const auto [result, block_index] = TryGetBlock(state, address);
- switch (result) {
- case BlockCollision::Found: {
- return true;
- }
- case BlockCollision::Inside: {
- // This case is the tricky one:
- // We need to split the block into 2 separate blocks
- const u32 end = state.block_info[block_index].end;
- BlockInfo& new_block = CreateBlockInfo(state, address, end);
- BlockInfo& current_block = state.block_info[block_index];
- current_block.end = address - 1;
- new_block.branch = std::move(current_block.branch);
- BlockBranchInfo forward_branch = MakeBranchInfo<SingleBranch>();
- const auto branch = std::get_if<SingleBranch>(forward_branch.get());
- branch->address = address;
- branch->ignore = true;
- current_block.branch = std::move(forward_branch);
- return true;
- }
- default:
- break;
- }
- const auto [parse_result, parse_info] = ParseCode(state, address);
- if (parse_result == ParseResult::AbnormalFlow) {
- // if it's AbnormalFlow, we end it as false, ending the CFG reconstruction
- return false;
- }
-
- BlockInfo& block_info = CreateBlockInfo(state, address, parse_info.end_address);
- block_info.branch = parse_info.branch_info;
- if (std::holds_alternative<SingleBranch>(*block_info.branch)) {
- const auto branch = std::get_if<SingleBranch>(block_info.branch.get());
- if (branch->condition.IsUnconditional()) {
- return true;
- }
- const u32 fallthrough_address = parse_info.end_address + 1;
- state.inspect_queries.push_front(fallthrough_address);
- return true;
- }
- return true;
-}
-
-bool TryQuery(CFGRebuildState& state) {
- const auto gather_labels = [](std::stack<u32>& cc, std::map<u32, u32>& labels,
- BlockInfo& block) {
- auto gather_start = labels.lower_bound(block.start);
- const auto gather_end = labels.upper_bound(block.end);
- while (gather_start != gather_end) {
- cc.push(gather_start->second);
- ++gather_start;
- }
- };
- if (state.queries.empty()) {
- return false;
- }
-
- Query& q = state.queries.front();
- const u32 block_index = state.registered[q.address];
- BlockInfo& block = state.block_info[block_index];
- // If the block is visited, check if the stacks match, else gather the ssy/pbk
- // labels into the current stack and look if the branch at the end of the block
- // consumes a label. Schedule new queries accordingly
- if (block.visited) {
- BlockStack& stack = state.stacks[q.address];
- const bool all_okay = (stack.ssy_stack.empty() || q.ssy_stack == stack.ssy_stack) &&
- (stack.pbk_stack.empty() || q.pbk_stack == stack.pbk_stack);
- state.queries.pop_front();
- return all_okay;
- }
- block.visited = true;
- state.stacks.insert_or_assign(q.address, BlockStack{q});
-
- Query q2(q);
- state.queries.pop_front();
- gather_labels(q2.ssy_stack, state.ssy_labels, block);
- gather_labels(q2.pbk_stack, state.pbk_labels, block);
- if (std::holds_alternative<SingleBranch>(*block.branch)) {
- auto* branch = std::get_if<SingleBranch>(block.branch.get());
- if (!branch->condition.IsUnconditional()) {
- q2.address = block.end + 1;
- state.queries.push_back(q2);
- }
-
- auto& conditional_query = state.queries.emplace_back(q2);
- if (branch->is_sync) {
- if (branch->address == unassigned_branch) {
- branch->address = conditional_query.ssy_stack.top();
- }
- conditional_query.ssy_stack.pop();
- }
- if (branch->is_brk) {
- if (branch->address == unassigned_branch) {
- branch->address = conditional_query.pbk_stack.top();
- }
- conditional_query.pbk_stack.pop();
- }
- conditional_query.address = branch->address;
- return true;
- }
-
- const auto* multi_branch = std::get_if<MultiBranch>(block.branch.get());
- for (const auto& branch_case : multi_branch->branches) {
- auto& conditional_query = state.queries.emplace_back(q2);
- conditional_query.address = branch_case.address;
- }
-
- return true;
-}
-
-void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) {
- const auto get_expr = [](const Condition& cond) -> Expr {
- Expr result;
- if (cond.cc != ConditionCode::T) {
- result = MakeExpr<ExprCondCode>(cond.cc);
- }
- if (cond.predicate != Pred::UnusedIndex) {
- u32 pred = static_cast<u32>(cond.predicate);
- bool negate = false;
- if (pred > 7) {
- negate = true;
- pred -= 8;
- }
- Expr extra = MakeExpr<ExprPredicate>(pred);
- if (negate) {
- extra = MakeExpr<ExprNot>(std::move(extra));
- }
- if (result) {
- return MakeExpr<ExprAnd>(std::move(extra), std::move(result));
- }
- return extra;
- }
- if (result) {
- return result;
- }
- return MakeExpr<ExprBoolean>(true);
- };
-
- if (std::holds_alternative<SingleBranch>(*branch_info)) {
- const auto* branch = std::get_if<SingleBranch>(branch_info.get());
- if (branch->address < 0) {
- if (branch->kill) {
- mm.InsertReturn(get_expr(branch->condition), true);
- return;
- }
- mm.InsertReturn(get_expr(branch->condition), false);
- return;
- }
- mm.InsertGoto(get_expr(branch->condition), branch->address);
- return;
- }
- const auto* multi_branch = std::get_if<MultiBranch>(branch_info.get());
- for (const auto& branch_case : multi_branch->branches) {
- mm.InsertGoto(MakeExpr<ExprGprEqual>(multi_branch->gpr, branch_case.cmp_value),
- branch_case.address);
- }
-}
-
-void DecompileShader(CFGRebuildState& state) {
- state.manager->Init();
- for (auto label : state.labels) {
- state.manager->DeclareLabel(label);
- }
- for (const auto& block : state.block_info) {
- if (state.labels.contains(block.start)) {
- state.manager->InsertLabel(block.start);
- }
- const bool ignore = BlockBranchIsIgnored(block.branch);
- const u32 end = ignore ? block.end + 1 : block.end;
- state.manager->InsertBlock(block.start, end);
- if (!ignore) {
- InsertBranch(*state.manager, block.branch);
- }
- }
- state.manager->Decompile();
-}
-
-} // Anonymous namespace
-
-std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address,
- const CompilerSettings& settings,
- Registry& registry) {
- auto result_out = std::make_unique<ShaderCharacteristics>();
- if (settings.depth == CompileDepth::BruteForce) {
- result_out->settings.depth = CompileDepth::BruteForce;
- return result_out;
- }
-
- CFGRebuildState state{program_code, start_address, registry};
- // Inspect Code and generate blocks
- state.labels.clear();
- state.labels.emplace(start_address);
- state.inspect_queries.push_back(state.start);
- while (!state.inspect_queries.empty()) {
- if (!TryInspectAddress(state)) {
- result_out->settings.depth = CompileDepth::BruteForce;
- return result_out;
- }
- }
-
- bool use_flow_stack = true;
-
- bool decompiled = false;
-
- if (settings.depth != CompileDepth::FlowStack) {
- // Decompile Stacks
- state.queries.push_back(Query{state.start, {}, {}});
- decompiled = true;
- while (!state.queries.empty()) {
- if (!TryQuery(state)) {
- decompiled = false;
- break;
- }
- }
- }
-
- use_flow_stack = !decompiled;
-
- // Sort and organize results
- std::sort(state.block_info.begin(), state.block_info.end(),
- [](const BlockInfo& a, const BlockInfo& b) -> bool { return a.start < b.start; });
- if (decompiled && settings.depth != CompileDepth::NoFlowStack) {
- ASTManager manager{settings.depth != CompileDepth::DecompileBackwards,
- settings.disable_else_derivation};
- state.manager = &manager;
- DecompileShader(state);
- decompiled = state.manager->IsFullyDecompiled();
- if (!decompiled) {
- if (settings.depth == CompileDepth::FullDecompile) {
- LOG_CRITICAL(HW_GPU, "Failed to remove all the gotos!:");
- } else {
- LOG_CRITICAL(HW_GPU, "Failed to remove all backward gotos!:");
- }
- state.manager->ShowCurrentState("Of Shader");
- state.manager->Clear();
- } else {
- auto characteristics = std::make_unique<ShaderCharacteristics>();
- characteristics->start = start_address;
- characteristics->settings.depth = settings.depth;
- characteristics->manager = std::move(manager);
- characteristics->end = state.block_info.back().end + 1;
- return characteristics;
- }
- }
-
- result_out->start = start_address;
- result_out->settings.depth =
- use_flow_stack ? CompileDepth::FlowStack : CompileDepth::NoFlowStack;
- result_out->blocks.clear();
- for (auto& block : state.block_info) {
- ShaderBlock new_block{};
- new_block.start = block.start;
- new_block.end = block.end;
- new_block.ignore_branch = BlockBranchIsIgnored(block.branch);
- if (!new_block.ignore_branch) {
- new_block.branch = block.branch;
- }
- result_out->end = std::max(result_out->end, block.end);
- result_out->blocks.push_back(new_block);
- }
- if (!use_flow_stack) {
- result_out->labels = std::move(state.labels);
- return result_out;
- }
-
- auto back = result_out->blocks.begin();
- auto next = std::next(back);
- while (next != result_out->blocks.end()) {
- if (!state.labels.contains(next->start) && next->start == back->end + 1) {
- back->end = next->end;
- next = result_out->blocks.erase(next);
- continue;
- }
- back = next;
- ++next;
- }
-
- return result_out;
-}
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h
deleted file mode 100644
index 37bf96492..000000000
--- a/src/video_core/shader/control_flow.h
+++ /dev/null
@@ -1,117 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <list>
-#include <optional>
-#include <set>
-#include <variant>
-
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/ast.h"
-#include "video_core/shader/compiler_settings.h"
-#include "video_core/shader/registry.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::ConditionCode;
-using Tegra::Shader::Pred;
-
-constexpr s32 exit_branch = -1;
-
-struct Condition {
- Pred predicate{Pred::UnusedIndex};
- ConditionCode cc{ConditionCode::T};
-
- bool IsUnconditional() const {
- return predicate == Pred::UnusedIndex && cc == ConditionCode::T;
- }
-
- bool operator==(const Condition& other) const {
- return std::tie(predicate, cc) == std::tie(other.predicate, other.cc);
- }
-
- bool operator!=(const Condition& other) const {
- return !operator==(other);
- }
-};
-
-class SingleBranch {
-public:
- SingleBranch() = default;
- explicit SingleBranch(Condition condition_, s32 address_, bool kill_, bool is_sync_,
- bool is_brk_, bool ignore_)
- : condition{condition_}, address{address_}, kill{kill_}, is_sync{is_sync_}, is_brk{is_brk_},
- ignore{ignore_} {}
-
- bool operator==(const SingleBranch& b) const {
- return std::tie(condition, address, kill, is_sync, is_brk, ignore) ==
- std::tie(b.condition, b.address, b.kill, b.is_sync, b.is_brk, b.ignore);
- }
-
- bool operator!=(const SingleBranch& b) const {
- return !operator==(b);
- }
-
- Condition condition{};
- s32 address{exit_branch};
- bool kill{};
- bool is_sync{};
- bool is_brk{};
- bool ignore{};
-};
-
-struct CaseBranch {
- explicit CaseBranch(u32 cmp_value_, u32 address_) : cmp_value{cmp_value_}, address{address_} {}
- u32 cmp_value;
- u32 address;
-};
-
-class MultiBranch {
-public:
- explicit MultiBranch(u32 gpr_, std::vector<CaseBranch>&& branches_)
- : gpr{gpr_}, branches{std::move(branches_)} {}
-
- u32 gpr{};
- std::vector<CaseBranch> branches{};
-};
-
-using BranchData = std::variant<SingleBranch, MultiBranch>;
-using BlockBranchInfo = std::shared_ptr<BranchData>;
-
-bool BlockBranchInfoAreEqual(BlockBranchInfo first, BlockBranchInfo second);
-
-struct ShaderBlock {
- u32 start{};
- u32 end{};
- bool ignore_branch{};
- BlockBranchInfo branch{};
-
- bool operator==(const ShaderBlock& sb) const {
- return std::tie(start, end, ignore_branch) ==
- std::tie(sb.start, sb.end, sb.ignore_branch) &&
- BlockBranchInfoAreEqual(branch, sb.branch);
- }
-
- bool operator!=(const ShaderBlock& sb) const {
- return !operator==(sb);
- }
-};
-
-struct ShaderCharacteristics {
- std::list<ShaderBlock> blocks{};
- std::set<u32> labels{};
- u32 start{};
- u32 end{};
- ASTManager manager{true, true};
- CompilerSettings settings{};
-};
-
-std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address,
- const CompilerSettings& settings,
- Registry& registry);
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
deleted file mode 100644
index 6576d1208..000000000
--- a/src/video_core/shader/decode.cpp
+++ /dev/null
@@ -1,368 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <cstring>
-#include <limits>
-#include <set>
-
-#include <fmt/format.h>
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/engines/shader_header.h"
-#include "video_core/shader/control_flow.h"
-#include "video_core/shader/memory_util.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-
-namespace {
-
-void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver,
- const std::list<SamplerEntry>& used_samplers) {
- if (gpu_driver.IsTextureHandlerSizeKnown() || used_samplers.size() <= 1) {
- return;
- }
- u32 count{};
- std::vector<u32> bound_offsets;
- for (const auto& sampler : used_samplers) {
- if (sampler.is_bindless) {
- continue;
- }
- ++count;
- bound_offsets.emplace_back(sampler.offset);
- }
- if (count > 1) {
- gpu_driver.DeduceTextureHandlerSize(std::move(bound_offsets));
- }
-}
-
-std::optional<u32> TryDeduceSamplerSize(const SamplerEntry& sampler_to_deduce,
- VideoCore::GuestDriverProfile& gpu_driver,
- const std::list<SamplerEntry>& used_samplers) {
- const u32 base_offset = sampler_to_deduce.offset;
- u32 max_offset{std::numeric_limits<u32>::max()};
- for (const auto& sampler : used_samplers) {
- if (sampler.is_bindless) {
- continue;
- }
- if (sampler.offset > base_offset) {
- max_offset = std::min(sampler.offset, max_offset);
- }
- }
- if (max_offset == std::numeric_limits<u32>::max()) {
- return std::nullopt;
- }
- return ((max_offset - base_offset) * 4) / gpu_driver.GetTextureHandlerSize();
-}
-
-} // Anonymous namespace
-
-class ASTDecoder {
-public:
- explicit ASTDecoder(ShaderIR& ir_) : ir(ir_) {}
-
- void operator()(ASTProgram& ast) {
- ASTNode current = ast.nodes.GetFirst();
- while (current) {
- Visit(current);
- current = current->GetNext();
- }
- }
-
- void operator()(ASTIfThen& ast) {
- ASTNode current = ast.nodes.GetFirst();
- while (current) {
- Visit(current);
- current = current->GetNext();
- }
- }
-
- void operator()(ASTIfElse& ast) {
- ASTNode current = ast.nodes.GetFirst();
- while (current) {
- Visit(current);
- current = current->GetNext();
- }
- }
-
- void operator()(ASTBlockEncoded& ast) {}
-
- void operator()(ASTBlockDecoded& ast) {}
-
- void operator()(ASTVarSet& ast) {}
-
- void operator()(ASTLabel& ast) {}
-
- void operator()(ASTGoto& ast) {}
-
- void operator()(ASTDoWhile& ast) {
- ASTNode current = ast.nodes.GetFirst();
- while (current) {
- Visit(current);
- current = current->GetNext();
- }
- }
-
- void operator()(ASTReturn& ast) {}
-
- void operator()(ASTBreak& ast) {}
-
- void Visit(ASTNode& node) {
- std::visit(*this, *node->GetInnerData());
- if (node->IsBlockEncoded()) {
- auto block = std::get_if<ASTBlockEncoded>(node->GetInnerData());
- NodeBlock bb = ir.DecodeRange(block->start, block->end);
- node->TransformBlockEncoded(std::move(bb));
- }
- }
-
-private:
- ShaderIR& ir;
-};
-
-void ShaderIR::Decode() {
- std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
-
- decompiled = false;
- auto info = ScanFlow(program_code, main_offset, settings, registry);
- auto& shader_info = *info;
- coverage_begin = shader_info.start;
- coverage_end = shader_info.end;
- switch (shader_info.settings.depth) {
- case CompileDepth::FlowStack: {
- for (const auto& block : shader_info.blocks) {
- basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)});
- }
- break;
- }
- case CompileDepth::NoFlowStack: {
- disable_flow_stack = true;
- const auto insert_block = [this](NodeBlock& nodes, u32 label) {
- if (label == static_cast<u32>(exit_branch)) {
- return;
- }
- basic_blocks.insert({label, nodes});
- };
- const auto& blocks = shader_info.blocks;
- NodeBlock current_block;
- u32 current_label = static_cast<u32>(exit_branch);
- for (const auto& block : blocks) {
- if (shader_info.labels.contains(block.start)) {
- insert_block(current_block, current_label);
- current_block.clear();
- current_label = block.start;
- }
- if (!block.ignore_branch) {
- DecodeRangeInner(current_block, block.start, block.end);
- InsertControlFlow(current_block, block);
- } else {
- DecodeRangeInner(current_block, block.start, block.end + 1);
- }
- }
- insert_block(current_block, current_label);
- break;
- }
- case CompileDepth::DecompileBackwards:
- case CompileDepth::FullDecompile: {
- program_manager = std::move(shader_info.manager);
- disable_flow_stack = true;
- decompiled = true;
- ASTDecoder decoder{*this};
- ASTNode program = GetASTProgram();
- decoder.Visit(program);
- break;
- }
- default:
- LOG_CRITICAL(HW_GPU, "Unknown decompilation mode!");
- [[fallthrough]];
- case CompileDepth::BruteForce: {
- const auto shader_end = static_cast<u32>(program_code.size());
- coverage_begin = main_offset;
- coverage_end = shader_end;
- for (u32 label = main_offset; label < shader_end; ++label) {
- basic_blocks.insert({label, DecodeRange(label, label + 1)});
- }
- break;
- }
- }
- if (settings.depth != shader_info.settings.depth) {
- LOG_WARNING(
- HW_GPU, "Decompiling to this setting \"{}\" failed, downgrading to this setting \"{}\"",
- CompileDepthAsString(settings.depth), CompileDepthAsString(shader_info.settings.depth));
- }
-}
-
-NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) {
- NodeBlock basic_block;
- DecodeRangeInner(basic_block, begin, end);
- return basic_block;
-}
-
-void ShaderIR::DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end) {
- for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) {
- pc = DecodeInstr(bb, pc);
- }
-}
-
-void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) {
- const auto apply_conditions = [&](const Condition& cond, Node n) -> Node {
- Node result = n;
- if (cond.cc != ConditionCode::T) {
- result = Conditional(GetConditionCode(cond.cc), {result});
- }
- if (cond.predicate != Pred::UnusedIndex) {
- u32 pred = static_cast<u32>(cond.predicate);
- const bool is_neg = pred > 7;
- if (is_neg) {
- pred -= 8;
- }
- result = Conditional(GetPredicate(pred, is_neg), {result});
- }
- return result;
- };
- if (std::holds_alternative<SingleBranch>(*block.branch)) {
- auto branch = std::get_if<SingleBranch>(block.branch.get());
- if (branch->address < 0) {
- if (branch->kill) {
- Node n = Operation(OperationCode::Discard);
- n = apply_conditions(branch->condition, n);
- bb.push_back(n);
- global_code.push_back(n);
- return;
- }
- Node n = Operation(OperationCode::Exit);
- n = apply_conditions(branch->condition, n);
- bb.push_back(n);
- global_code.push_back(n);
- return;
- }
- Node n = Operation(OperationCode::Branch, Immediate(branch->address));
- n = apply_conditions(branch->condition, n);
- bb.push_back(n);
- global_code.push_back(n);
- return;
- }
- auto multi_branch = std::get_if<MultiBranch>(block.branch.get());
- Node op_a = GetRegister(multi_branch->gpr);
- for (auto& branch_case : multi_branch->branches) {
- Node n = Operation(OperationCode::Branch, Immediate(branch_case.address));
- Node op_b = Immediate(branch_case.cmp_value);
- Node condition =
- GetPredicateComparisonInteger(Tegra::Shader::PredCondition::EQ, false, op_a, op_b);
- auto result = Conditional(condition, {n});
- bb.push_back(result);
- global_code.push_back(result);
- }
-}
-
-u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
- // Ignore sched instructions when generating code.
- if (IsSchedInstruction(pc, main_offset)) {
- return pc + 1;
- }
-
- const Instruction instr = {program_code[pc]};
- const auto opcode = OpCode::Decode(instr);
- const u32 nv_address = ConvertAddressToNvidiaSpace(pc);
-
- // Decoding failure
- if (!opcode) {
- UNIMPLEMENTED_MSG("Unhandled instruction: {0:x}", instr.value);
- bb.push_back(Comment(fmt::format("{:05x} Unimplemented Shader instruction (0x{:016x})",
- nv_address, instr.value)));
- return pc + 1;
- }
-
- bb.push_back(Comment(
- fmt::format("{:05x} {} (0x{:016x})", nv_address, opcode->get().GetName(), instr.value)));
-
- using Tegra::Shader::Pred;
- UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute,
- "NeverExecute predicate not implemented");
-
- static const std::map<OpCode::Type, u32 (ShaderIR::*)(NodeBlock&, u32)> decoders = {
- {OpCode::Type::Arithmetic, &ShaderIR::DecodeArithmetic},
- {OpCode::Type::ArithmeticImmediate, &ShaderIR::DecodeArithmeticImmediate},
- {OpCode::Type::Bfe, &ShaderIR::DecodeBfe},
- {OpCode::Type::Bfi, &ShaderIR::DecodeBfi},
- {OpCode::Type::Shift, &ShaderIR::DecodeShift},
- {OpCode::Type::ArithmeticInteger, &ShaderIR::DecodeArithmeticInteger},
- {OpCode::Type::ArithmeticIntegerImmediate, &ShaderIR::DecodeArithmeticIntegerImmediate},
- {OpCode::Type::ArithmeticHalf, &ShaderIR::DecodeArithmeticHalf},
- {OpCode::Type::ArithmeticHalfImmediate, &ShaderIR::DecodeArithmeticHalfImmediate},
- {OpCode::Type::Ffma, &ShaderIR::DecodeFfma},
- {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2},
- {OpCode::Type::Conversion, &ShaderIR::DecodeConversion},
- {OpCode::Type::Warp, &ShaderIR::DecodeWarp},
- {OpCode::Type::Memory, &ShaderIR::DecodeMemory},
- {OpCode::Type::Texture, &ShaderIR::DecodeTexture},
- {OpCode::Type::Image, &ShaderIR::DecodeImage},
- {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate},
- {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate},
- {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate},
- {OpCode::Type::PredicateSetRegister, &ShaderIR::DecodePredicateSetRegister},
- {OpCode::Type::PredicateSetPredicate, &ShaderIR::DecodePredicateSetPredicate},
- {OpCode::Type::RegisterSetPredicate, &ShaderIR::DecodeRegisterSetPredicate},
- {OpCode::Type::FloatSet, &ShaderIR::DecodeFloatSet},
- {OpCode::Type::IntegerSet, &ShaderIR::DecodeIntegerSet},
- {OpCode::Type::HalfSet, &ShaderIR::DecodeHalfSet},
- {OpCode::Type::Video, &ShaderIR::DecodeVideo},
- {OpCode::Type::Xmad, &ShaderIR::DecodeXmad},
- };
-
- std::vector<Node> tmp_block;
- if (const auto decoder = decoders.find(opcode->get().GetType()); decoder != decoders.end()) {
- pc = (this->*decoder->second)(tmp_block, pc);
- } else {
- pc = DecodeOther(tmp_block, pc);
- }
-
- // Some instructions (like SSY) don't have a predicate field, they are always unconditionally
- // executed.
- const bool can_be_predicated = OpCode::IsPredicatedInstruction(opcode->get().GetId());
- const auto pred_index = static_cast<u32>(instr.pred.pred_index);
-
- if (can_be_predicated && pred_index != static_cast<u32>(Pred::UnusedIndex)) {
- const Node conditional =
- Conditional(GetPredicate(pred_index, instr.negate_pred != 0), std::move(tmp_block));
- global_code.push_back(conditional);
- bb.push_back(conditional);
- } else {
- for (auto& node : tmp_block) {
- global_code.push_back(node);
- bb.push_back(node);
- }
- }
-
- return pc + 1;
-}
-
-void ShaderIR::PostDecode() {
- // Deduce texture handler size if needed
- auto gpu_driver = registry.AccessGuestDriverProfile();
- DeduceTextureHandlerSize(gpu_driver, used_samplers);
- // Deduce Indexed Samplers
- if (!uses_indexed_samplers) {
- return;
- }
- for (auto& sampler : used_samplers) {
- if (!sampler.is_indexed) {
- continue;
- }
- if (const auto size = TryDeduceSamplerSize(sampler, gpu_driver, used_samplers)) {
- sampler.size = *size;
- } else {
- LOG_CRITICAL(HW_GPU, "Failed to deduce size of indexed sampler");
- sampler.size = 1;
- }
- }
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp
deleted file mode 100644
index 15eb700e7..000000000
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ /dev/null
@@ -1,166 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "common/logging/log.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-using Tegra::Shader::SubOp;
-
-u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
- const Instruction instr = {program_code[pc]};
- const auto opcode = OpCode::Decode(instr);
-
- Node op_a = GetRegister(instr.gpr8);
-
- Node op_b = [&] {
- if (instr.is_b_imm) {
- return GetImmediate19(instr);
- } else if (instr.is_b_gpr) {
- return GetRegister(instr.gpr20);
- } else {
- return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
- }
- }();
-
- switch (opcode->get().GetId()) {
- case OpCode::Id::MOV_C:
- case OpCode::Id::MOV_R: {
- // MOV does not have neither 'abs' nor 'neg' bits.
- SetRegister(bb, instr.gpr0, op_b);
- break;
- }
- case OpCode::Id::FMUL_C:
- case OpCode::Id::FMUL_R:
- case OpCode::Id::FMUL_IMM: {
- // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit.
- if (instr.fmul.tab5cb8_2 != 0) {
- LOG_DEBUG(HW_GPU, "FMUL tab5cb8_2({}) is not implemented",
- instr.fmul.tab5cb8_2.Value());
- }
- if (instr.fmul.tab5c68_0 != 1) {
- LOG_DEBUG(HW_GPU, "FMUL tab5cb8_0({}) is not implemented",
- instr.fmul.tab5c68_0.Value());
- }
-
- op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b);
-
- static constexpr std::array FmulPostFactor = {
- 1.000f, // None
- 0.500f, // Divide 2
- 0.250f, // Divide 4
- 0.125f, // Divide 8
- 8.000f, // Mul 8
- 4.000f, // Mul 4
- 2.000f, // Mul 2
- };
-
- if (instr.fmul.postfactor != 0) {
- op_a = Operation(OperationCode::FMul, NO_PRECISE, op_a,
- Immediate(FmulPostFactor[instr.fmul.postfactor]));
- }
-
- // TODO(Rodrigo): Should precise be used when there's a postfactor?
- Node value = Operation(OperationCode::FMul, PRECISE, op_a, op_b);
-
- value = GetSaturatedFloat(value, instr.alu.saturate_d);
-
- SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
- SetRegister(bb, instr.gpr0, value);
- break;
- }
- case OpCode::Id::FADD_C:
- case OpCode::Id::FADD_R:
- case OpCode::Id::FADD_IMM: {
- op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a);
- op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
-
- Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b);
- value = GetSaturatedFloat(value, instr.alu.saturate_d);
-
- SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
- SetRegister(bb, instr.gpr0, value);
- break;
- }
- case OpCode::Id::MUFU: {
- op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a);
-
- Node value = [&]() {
- switch (instr.sub_op) {
- case SubOp::Cos:
- return Operation(OperationCode::FCos, PRECISE, op_a);
- case SubOp::Sin:
- return Operation(OperationCode::FSin, PRECISE, op_a);
- case SubOp::Ex2:
- return Operation(OperationCode::FExp2, PRECISE, op_a);
- case SubOp::Lg2:
- return Operation(OperationCode::FLog2, PRECISE, op_a);
- case SubOp::Rcp:
- return Operation(OperationCode::FDiv, PRECISE, Immediate(1.0f), op_a);
- case SubOp::Rsq:
- return Operation(OperationCode::FInverseSqrt, PRECISE, op_a);
- case SubOp::Sqrt:
- return Operation(OperationCode::FSqrt, PRECISE, op_a);
- default:
- UNIMPLEMENTED_MSG("Unhandled MUFU sub op={0:x}", instr.sub_op.Value());
- return Immediate(0);
- }
- }();
- value = GetSaturatedFloat(value, instr.alu.saturate_d);
-
- SetRegister(bb, instr.gpr0, value);
- break;
- }
- case OpCode::Id::FMNMX_C:
- case OpCode::Id::FMNMX_R:
- case OpCode::Id::FMNMX_IMM: {
- op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a);
- op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
-
- const Node condition = GetPredicate(instr.alu.fmnmx.pred, instr.alu.fmnmx.negate_pred != 0);
-
- const Node min = Operation(OperationCode::FMin, NO_PRECISE, op_a, op_b);
- const Node max = Operation(OperationCode::FMax, NO_PRECISE, op_a, op_b);
- const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max);
-
- SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
- SetRegister(bb, instr.gpr0, value);
- break;
- }
- case OpCode::Id::FCMP_RR:
- case OpCode::Id::FCMP_RC:
- case OpCode::Id::FCMP_IMMR: {
- UNIMPLEMENTED_IF(instr.fcmp.ftz == 0);
- Node op_c = GetRegister(instr.gpr39);
- Node comp = GetPredicateComparisonFloat(instr.fcmp.cond, std::move(op_c), Immediate(0.0f));
- SetRegister(
- bb, instr.gpr0,
- Operation(OperationCode::Select, std::move(comp), std::move(op_a), std::move(op_b)));
- break;
- }
- case OpCode::Id::RRO_C:
- case OpCode::Id::RRO_R:
- case OpCode::Id::RRO_IMM: {
- LOG_DEBUG(HW_GPU, "(STUBBED) RRO used");
-
- // Currently RRO is only implemented as a register move.
- op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
- SetRegister(bb, instr.gpr0, op_b);
- break;
- }
- default:
- UNIMPLEMENTED_MSG("Unhandled arithmetic instruction: {}", opcode->get().GetName());
- }
-
- return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp
deleted file mode 100644
index 88103fede..000000000
--- a/src/video_core/shader/decode/arithmetic_half.cpp
+++ /dev/null
@@ -1,101 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "common/logging/log.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::HalfType;
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-
-u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
- const Instruction instr = {program_code[pc]};
- const auto opcode = OpCode::Decode(instr);
-
- bool negate_a = false;
- bool negate_b = false;
- bool absolute_a = false;
- bool absolute_b = false;
-
- switch (opcode->get().GetId()) {
- case OpCode::Id::HADD2_R:
- if (instr.alu_half.ftz == 0) {
- LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
- }
- negate_a = ((instr.value >> 43) & 1) != 0;
- negate_b = ((instr.value >> 31) & 1) != 0;
- absolute_a = ((instr.value >> 44) & 1) != 0;
- absolute_b = ((instr.value >> 30) & 1) != 0;
- break;
- case OpCode::Id::HADD2_C:
- if (instr.alu_half.ftz == 0) {
- LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
- }
- negate_a = ((instr.value >> 43) & 1) != 0;
- negate_b = ((instr.value >> 56) & 1) != 0;
- absolute_a = ((instr.value >> 44) & 1) != 0;
- absolute_b = ((instr.value >> 54) & 1) != 0;
- break;
- case OpCode::Id::HMUL2_R:
- negate_a = ((instr.value >> 43) & 1) != 0;
- absolute_a = ((instr.value >> 44) & 1) != 0;
- absolute_b = ((instr.value >> 30) & 1) != 0;
- break;
- case OpCode::Id::HMUL2_C:
- negate_b = ((instr.value >> 31) & 1) != 0;
- absolute_a = ((instr.value >> 44) & 1) != 0;
- absolute_b = ((instr.value >> 54) & 1) != 0;
- break;
- default:
- UNREACHABLE();
- break;
- }
-
- Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half.type_a);
- op_a = GetOperandAbsNegHalf(op_a, absolute_a, negate_a);
-
- auto [type_b, op_b] = [this, instr, opcode]() -> std::pair<HalfType, Node> {
- switch (opcode->get().GetId()) {
- case OpCode::Id::HADD2_C:
- case OpCode::Id::HMUL2_C:
- return {HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
- case OpCode::Id::HADD2_R:
- case OpCode::Id::HMUL2_R:
- return {instr.alu_half.type_b, GetRegister(instr.gpr20)};
- default:
- UNREACHABLE();
- return {HalfType::F32, Immediate(0)};
- }
- }();
- op_b = UnpackHalfFloat(op_b, type_b);
- op_b = GetOperandAbsNegHalf(op_b, absolute_b, negate_b);
-
- Node value = [this, opcode, op_a, op_b = op_b] {
- switch (opcode->get().GetId()) {
- case OpCode::Id::HADD2_C:
- case OpCode::Id::HADD2_R:
- return Operation(OperationCode::HAdd, PRECISE, op_a, op_b);
- case OpCode::Id::HMUL2_C:
- case OpCode::Id::HMUL2_R:
- return Operation(OperationCode::HMul, PRECISE, op_a, op_b);
- default:
- UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName());
- return Immediate(0);
- }
- }();
- value = GetSaturatedHalfFloat(value, instr.alu_half.saturate);
- value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half.merge);
-
- SetRegister(bb, instr.gpr0, value);
-
- return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
deleted file mode 100644
index d179b9873..000000000
--- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp
+++ /dev/null
@@ -1,54 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "common/logging/log.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-
-u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) {
- const Instruction instr = {program_code[pc]};
- const auto opcode = OpCode::Decode(instr);
-
- if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) {
- if (instr.alu_half_imm.ftz == 0) {
- LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
- }
- } else {
- if (instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::FTZ) {
- LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
- }
- }
-
- Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half_imm.type_a);
- op_a = GetOperandAbsNegHalf(op_a, instr.alu_half_imm.abs_a, instr.alu_half_imm.negate_a);
-
- const Node op_b = UnpackHalfImmediate(instr, true);
-
- Node value = [&]() {
- switch (opcode->get().GetId()) {
- case OpCode::Id::HADD2_IMM:
- return Operation(OperationCode::HAdd, PRECISE, op_a, op_b);
- case OpCode::Id::HMUL2_IMM:
- return Operation(OperationCode::HMul, PRECISE, op_a, op_b);
- default:
- UNREACHABLE();
- return Immediate(0);
- }
- }();
-
- value = GetSaturatedHalfFloat(value, instr.alu_half_imm.saturate);
- value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half_imm.merge);
- SetRegister(bb, instr.gpr0, value);
- return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/arithmetic_immediate.cpp b/src/video_core/shader/decode/arithmetic_immediate.cpp
deleted file mode 100644
index f1875967c..000000000
--- a/src/video_core/shader/decode/arithmetic_immediate.cpp
+++ /dev/null
@@ -1,53 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-
-u32 ShaderIR::DecodeArithmeticImmediate(NodeBlock& bb, u32 pc) {
- const Instruction instr = {program_code[pc]};
- const auto opcode = OpCode::Decode(instr);
-
- switch (opcode->get().GetId()) {
- case OpCode::Id::MOV32_IMM: {
- SetRegister(bb, instr.gpr0, GetImmediate32(instr));
- break;
- }
- case OpCode::Id::FMUL32_IMM: {
- Node value =
- Operation(OperationCode::FMul, PRECISE, GetRegister(instr.gpr8), GetImmediate32(instr));
- value = GetSaturatedFloat(value, instr.fmul32.saturate);
-
- SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc);
- SetRegister(bb, instr.gpr0, value);
- break;
- }
- case OpCode::Id::FADD32I: {
- const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fadd32i.abs_a,
- instr.fadd32i.negate_a);
- const Node op_b = GetOperandAbsNegFloat(GetImmediate32(instr), instr.fadd32i.abs_b,
- instr.fadd32i.negate_b);
-
- const Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b);
- SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc);
- SetRegister(bb, instr.gpr0, value);
- break;
- }
- default:
- UNIMPLEMENTED_MSG("Unhandled arithmetic immediate instruction: {}",
- opcode->get().GetName());
- }
-
- return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp
deleted file mode 100644
index 7b5bb7003..000000000
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ /dev/null
@@ -1,375 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::IAdd3Height;
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-using Tegra::Shader::Pred;
-using Tegra::Shader::Register;
-
-u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
- const Instruction instr = {program_code[pc]};
- const auto opcode = OpCode::Decode(instr);
-
- Node op_a = GetRegister(instr.gpr8);
- Node op_b = [&]() {
- if (instr.is_b_imm) {
- return Immediate(instr.alu.GetSignedImm20_20());
- } else if (instr.is_b_gpr) {
- return GetRegister(instr.gpr20);
- } else {
- return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
- }
- }();
-
- switch (opcode->get().GetId()) {
- case OpCode::Id::IADD_C:
- case OpCode::Id::IADD_R:
- case OpCode::Id::IADD_IMM: {
- UNIMPLEMENTED_IF_MSG(instr.alu.saturate_d, "IADD.SAT");
- UNIMPLEMENTED_IF_MSG(instr.iadd.x && instr.generates_cc, "IADD.X Rd.CC");
-
- op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true);
- op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true);
-
- Node value = Operation(OperationCode::UAdd, op_a, op_b);
-
- if (instr.iadd.x) {
- Node carry = GetInternalFlag(InternalFlag::Carry);
- Node x = Operation(OperationCode::Select, std::move(carry), Immediate(1), Immediate(0));
- value = Operation(OperationCode::UAdd, std::move(value), std::move(x));
- }
-
- if (instr.generates_cc) {
- const Node i0 = Immediate(0);
-
- Node zero = Operation(OperationCode::LogicalIEqual, value, i0);
- Node sign = Operation(OperationCode::LogicalILessThan, value, i0);
- Node carry = Operation(OperationCode::LogicalAddCarry, op_a, op_b);
-
- Node pos_a = Operation(OperationCode::LogicalIGreaterThan, op_a, i0);
- Node pos_b = Operation(OperationCode::LogicalIGreaterThan, op_b, i0);
- Node pos = Operation(OperationCode::LogicalAnd, std::move(pos_a), std::move(pos_b));
- Node overflow = Operation(OperationCode::LogicalAnd, pos, sign);
-
- SetInternalFlag(bb, InternalFlag::Zero, std::move(zero));
- SetInternalFlag(bb, InternalFlag::Sign, std::move(sign));
- SetInternalFlag(bb, InternalFlag::Carry, std::move(carry));
- SetInternalFlag(bb, InternalFlag::Overflow, std::move(overflow));
- }
- SetRegister(bb, instr.gpr0, std::move(value));
- break;
- }
- case OpCode::Id::IADD3_C:
- case OpCode::Id::IADD3_R:
- case OpCode::Id::IADD3_IMM: {
- Node op_c = GetRegister(instr.gpr39);
-
- const auto ApplyHeight = [&](IAdd3Height height, Node value) {
- switch (height) {
- case IAdd3Height::None:
- return value;
- case IAdd3Height::LowerHalfWord:
- return BitfieldExtract(value, 0, 16);
- case IAdd3Height::UpperHalfWord:
- return BitfieldExtract(value, 16, 16);
- default:
- UNIMPLEMENTED_MSG("Unhandled IADD3 height: {}", height);
- return Immediate(0);
- }
- };
-
- if (opcode->get().GetId() == OpCode::Id::IADD3_R) {
- op_a = ApplyHeight(instr.iadd3.height_a, op_a);
- op_b = ApplyHeight(instr.iadd3.height_b, op_b);
- op_c = ApplyHeight(instr.iadd3.height_c, op_c);
- }
-
- op_a = GetOperandAbsNegInteger(op_a, false, instr.iadd3.neg_a, true);
- op_b = GetOperandAbsNegInteger(op_b, false, instr.iadd3.neg_b, true);
- op_c = GetOperandAbsNegInteger(op_c, false, instr.iadd3.neg_c, true);
-
- const Node value = [&] {
- Node add_ab = Operation(OperationCode::IAdd, NO_PRECISE, op_a, op_b);
- if (opcode->get().GetId() != OpCode::Id::IADD3_R) {
- return Operation(OperationCode::IAdd, NO_PRECISE, add_ab, op_c);
- }
- const Node shifted = [&] {
- switch (instr.iadd3.mode) {
- case Tegra::Shader::IAdd3Mode::RightShift:
- // TODO(tech4me): According to
- // https://envytools.readthedocs.io/en/latest/hw/graph/maxwell/cuda/int.html?highlight=iadd3
- // The addition between op_a and op_b should be done in uint33, more
- // investigation required
- return Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, add_ab,
- Immediate(16));
- case Tegra::Shader::IAdd3Mode::LeftShift:
- return Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, add_ab,
- Immediate(16));
- default:
- return add_ab;
- }
- }();
- return Operation(OperationCode::IAdd, NO_PRECISE, shifted, op_c);
- }();
-
- SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
- SetRegister(bb, instr.gpr0, value);
- break;
- }
- case OpCode::Id::ISCADD_C:
- case OpCode::Id::ISCADD_R:
- case OpCode::Id::ISCADD_IMM: {
- UNIMPLEMENTED_IF_MSG(instr.generates_cc,
- "Condition codes generation in ISCADD is not implemented");
-
- op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true);
- op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true);
-
- const Node shift = Immediate(static_cast<u32>(instr.alu_integer.shift_amount));
- const Node shifted_a = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, shift);
- const Node value = Operation(OperationCode::IAdd, NO_PRECISE, shifted_a, op_b);
-
- SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
- SetRegister(bb, instr.gpr0, value);
- break;
- }
- case OpCode::Id::POPC_C:
- case OpCode::Id::POPC_R:
- case OpCode::Id::POPC_IMM: {
- if (instr.popc.invert) {
- op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b);
- }
- const Node value = Operation(OperationCode::IBitCount, PRECISE, op_b);
- SetRegister(bb, instr.gpr0, value);
- break;
- }
- case OpCode::Id::FLO_R:
- case OpCode::Id::FLO_C:
- case OpCode::Id::FLO_IMM: {
- Node value;
- if (instr.flo.invert) {
- op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_b));
- }
- if (instr.flo.is_signed) {
- value = Operation(OperationCode::IBitMSB, NO_PRECISE, std::move(op_b));
- } else {
- value = Operation(OperationCode::UBitMSB, NO_PRECISE, std::move(op_b));
- }
- if (instr.flo.sh) {
- value =
- Operation(OperationCode::UBitwiseXor, NO_PRECISE, std::move(value), Immediate(31));
- }
- SetRegister(bb, instr.gpr0, std::move(value));
- break;
- }
- case OpCode::Id::SEL_C:
- case OpCode::Id::SEL_R:
- case OpCode::Id::SEL_IMM: {
- const Node condition = GetPredicate(instr.sel.pred, instr.sel.neg_pred != 0);
- const Node value = Operation(OperationCode::Select, PRECISE, condition, op_a, op_b);
- SetRegister(bb, instr.gpr0, value);
- break;
- }
- case OpCode::Id::ICMP_CR:
- case OpCode::Id::ICMP_R:
- case OpCode::Id::ICMP_RC:
- case OpCode::Id::ICMP_IMM: {
- const Node zero = Immediate(0);
-
- const auto [op_rhs, test] = [&]() -> std::pair<Node, Node> {
- switch (opcode->get().GetId()) {
- case OpCode::Id::ICMP_CR:
- return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
- GetRegister(instr.gpr39)};
- case OpCode::Id::ICMP_R:
- return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)};
- case OpCode::Id::ICMP_RC:
- return {GetRegister(instr.gpr39),
- GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
- case OpCode::Id::ICMP_IMM:
- return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)};
- default:
- UNREACHABLE();
- return {zero, zero};
- }
- }();
- const Node op_lhs = GetRegister(instr.gpr8);
- const Node comparison =
- GetPredicateComparisonInteger(instr.icmp.cond, instr.icmp.is_signed != 0, test, zero);
- SetRegister(bb, instr.gpr0, Operation(OperationCode::Select, comparison, op_lhs, op_rhs));
- break;
- }
- case OpCode::Id::LOP_C:
- case OpCode::Id::LOP_R:
- case OpCode::Id::LOP_IMM: {
- if (instr.alu.lop.invert_a)
- op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a);
- if (instr.alu.lop.invert_b)
- op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b);
-
- WriteLogicOperation(bb, instr.gpr0, instr.alu.lop.operation, op_a, op_b,
- instr.alu.lop.pred_result_mode, instr.alu.lop.pred48,
- instr.generates_cc);
- break;
- }
- case OpCode::Id::LOP3_C:
- case OpCode::Id::LOP3_R:
- case OpCode::Id::LOP3_IMM: {
- const Node op_c = GetRegister(instr.gpr39);
- const Node lut = [&]() {
- if (opcode->get().GetId() == OpCode::Id::LOP3_R) {
- return Immediate(instr.alu.lop3.GetImmLut28());
- } else {
- return Immediate(instr.alu.lop3.GetImmLut48());
- }
- }();
-
- WriteLop3Instruction(bb, instr.gpr0, op_a, op_b, op_c, lut, instr.generates_cc);
- break;
- }
- case OpCode::Id::IMNMX_C:
- case OpCode::Id::IMNMX_R:
- case OpCode::Id::IMNMX_IMM: {
- UNIMPLEMENTED_IF(instr.imnmx.exchange != Tegra::Shader::IMinMaxExchange::None);
-
- const bool is_signed = instr.imnmx.is_signed;
-
- const Node condition = GetPredicate(instr.imnmx.pred, instr.imnmx.negate_pred != 0);
- const Node min = SignedOperation(OperationCode::IMin, is_signed, NO_PRECISE, op_a, op_b);
- const Node max = SignedOperation(OperationCode::IMax, is_signed, NO_PRECISE, op_a, op_b);
- const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max);
-
- SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
- SetRegister(bb, instr.gpr0, value);
- break;
- }
- case OpCode::Id::LEA_R2:
- case OpCode::Id::LEA_R1:
- case OpCode::Id::LEA_IMM:
- case OpCode::Id::LEA_RZ:
- case OpCode::Id::LEA_HI: {
- auto [op_a_, op_b_, op_c_] = [&]() -> std::tuple<Node, Node, Node> {
- switch (opcode->get().GetId()) {
- case OpCode::Id::LEA_R2: {
- return {GetRegister(instr.gpr20), GetRegister(instr.gpr39),
- Immediate(static_cast<u32>(instr.lea.r2.entry_a))};
- }
- case OpCode::Id::LEA_R1: {
- const bool neg = instr.lea.r1.neg != 0;
- return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true),
- GetRegister(instr.gpr20),
- Immediate(static_cast<u32>(instr.lea.r1.entry_a))};
- }
- case OpCode::Id::LEA_IMM: {
- const bool neg = instr.lea.imm.neg != 0;
- return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true),
- Immediate(static_cast<u32>(instr.lea.imm.entry_a)),
- Immediate(static_cast<u32>(instr.lea.imm.entry_b))};
- }
- case OpCode::Id::LEA_RZ: {
- const bool neg = instr.lea.rz.neg != 0;
- return {GetConstBuffer(instr.lea.rz.cb_index, instr.lea.rz.cb_offset),
- GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true),
- Immediate(static_cast<u32>(instr.lea.rz.entry_a))};
- }
- case OpCode::Id::LEA_HI:
- default:
- UNIMPLEMENTED_MSG("Unhandled LEA subinstruction: {}", opcode->get().GetName());
-
- return {Immediate(static_cast<u32>(instr.lea.imm.entry_a)), GetRegister(instr.gpr8),
- Immediate(static_cast<u32>(instr.lea.imm.entry_b))};
- }
- }();
-
- UNIMPLEMENTED_IF_MSG(instr.lea.pred48 != static_cast<u64>(Pred::UnusedIndex),
- "Unhandled LEA Predicate");
-
- Node value =
- Operation(OperationCode::ILogicalShiftLeft, std::move(op_a_), std::move(op_c_));
- value = Operation(OperationCode::IAdd, std::move(op_b_), std::move(value));
- SetRegister(bb, instr.gpr0, std::move(value));
-
- break;
- }
- default:
- UNIMPLEMENTED_MSG("Unhandled ArithmeticInteger instruction: {}", opcode->get().GetName());
- }
-
- return pc;
-}
-
-void ShaderIR::WriteLop3Instruction(NodeBlock& bb, Register dest, Node op_a, Node op_b, Node op_c,
- Node imm_lut, bool sets_cc) {
- const Node lop3_fast = [&](const Node na, const Node nb, const Node nc, const Node ttbl) {
- Node value = Immediate(0);
- const ImmediateNode imm = std::get<ImmediateNode>(*ttbl);
- if (imm.GetValue() & 0x01) {
- const Node a = Operation(OperationCode::IBitwiseNot, na);
- const Node b = Operation(OperationCode::IBitwiseNot, nb);
- const Node c = Operation(OperationCode::IBitwiseNot, nc);
- Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, b);
- r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c);
- value = Operation(OperationCode::IBitwiseOr, value, r);
- }
- if (imm.GetValue() & 0x02) {
- const Node a = Operation(OperationCode::IBitwiseNot, na);
- const Node b = Operation(OperationCode::IBitwiseNot, nb);
- Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, b);
- r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc);
- value = Operation(OperationCode::IBitwiseOr, value, r);
- }
- if (imm.GetValue() & 0x04) {
- const Node a = Operation(OperationCode::IBitwiseNot, na);
- const Node c = Operation(OperationCode::IBitwiseNot, nc);
- Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, nb);
- r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c);
- value = Operation(OperationCode::IBitwiseOr, value, r);
- }
- if (imm.GetValue() & 0x08) {
- const Node a = Operation(OperationCode::IBitwiseNot, na);
- Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, nb);
- r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc);
- value = Operation(OperationCode::IBitwiseOr, value, r);
- }
- if (imm.GetValue() & 0x10) {
- const Node b = Operation(OperationCode::IBitwiseNot, nb);
- const Node c = Operation(OperationCode::IBitwiseNot, nc);
- Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, b);
- r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c);
- value = Operation(OperationCode::IBitwiseOr, value, r);
- }
- if (imm.GetValue() & 0x20) {
- const Node b = Operation(OperationCode::IBitwiseNot, nb);
- Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, b);
- r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc);
- value = Operation(OperationCode::IBitwiseOr, value, r);
- }
- if (imm.GetValue() & 0x40) {
- const Node c = Operation(OperationCode::IBitwiseNot, nc);
- Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, nb);
- r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c);
- value = Operation(OperationCode::IBitwiseOr, value, r);
- }
- if (imm.GetValue() & 0x80) {
- Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, nb);
- r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc);
- value = Operation(OperationCode::IBitwiseOr, value, r);
- }
- return value;
- }(op_a, op_b, op_c, imm_lut);
-
- SetInternalFlagsFromInteger(bb, lop3_fast, sets_cc);
- SetRegister(bb, dest, lop3_fast);
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
deleted file mode 100644
index 73580277a..000000000
--- a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
+++ /dev/null
@@ -1,99 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::LogicOperation;
-using Tegra::Shader::OpCode;
-using Tegra::Shader::Pred;
-using Tegra::Shader::PredicateResultMode;
-using Tegra::Shader::Register;
-
-u32 ShaderIR::DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc) {
- const Instruction instr = {program_code[pc]};
- const auto opcode = OpCode::Decode(instr);
-
- Node op_a = GetRegister(instr.gpr8);
- Node op_b = Immediate(static_cast<s32>(instr.alu.imm20_32));
-
- switch (opcode->get().GetId()) {
- case OpCode::Id::IADD32I: {
- UNIMPLEMENTED_IF_MSG(instr.iadd32i.saturate, "IADD32I saturation is not implemented");
-
- op_a = GetOperandAbsNegInteger(std::move(op_a), false, instr.iadd32i.negate_a != 0, true);
-
- Node value = Operation(OperationCode::IAdd, PRECISE, std::move(op_a), std::move(op_b));
-
- SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc != 0);
- SetRegister(bb, instr.gpr0, std::move(value));
- break;
- }
- case OpCode::Id::LOP32I: {
- if (instr.alu.lop32i.invert_a) {
- op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_a));
- }
-
- if (instr.alu.lop32i.invert_b) {
- op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_b));
- }
-
- WriteLogicOperation(bb, instr.gpr0, instr.alu.lop32i.operation, std::move(op_a),
- std::move(op_b), PredicateResultMode::None, Pred::UnusedIndex,
- instr.op_32.generates_cc != 0);
- break;
- }
- default:
- UNIMPLEMENTED_MSG("Unhandled ArithmeticIntegerImmediate instruction: {}",
- opcode->get().GetName());
- }
-
- return pc;
-}
-
-void ShaderIR::WriteLogicOperation(NodeBlock& bb, Register dest, LogicOperation logic_op, Node op_a,
- Node op_b, PredicateResultMode predicate_mode, Pred predicate,
- bool sets_cc) {
- Node result = [&] {
- switch (logic_op) {
- case LogicOperation::And:
- return Operation(OperationCode::IBitwiseAnd, PRECISE, std::move(op_a), std::move(op_b));
- case LogicOperation::Or:
- return Operation(OperationCode::IBitwiseOr, PRECISE, std::move(op_a), std::move(op_b));
- case LogicOperation::Xor:
- return Operation(OperationCode::IBitwiseXor, PRECISE, std::move(op_a), std::move(op_b));
- case LogicOperation::PassB:
- return op_b;
- default:
- UNIMPLEMENTED_MSG("Unimplemented logic operation={}", logic_op);
- return Immediate(0);
- }
- }();
-
- SetInternalFlagsFromInteger(bb, result, sets_cc);
- SetRegister(bb, dest, result);
-
- // Write the predicate value depending on the predicate mode.
- switch (predicate_mode) {
- case PredicateResultMode::None:
- // Do nothing.
- return;
- case PredicateResultMode::NotZero: {
- // Set the predicate to true if the result is not zero.
- Node compare = Operation(OperationCode::LogicalINotEqual, std::move(result), Immediate(0));
- SetPredicate(bb, static_cast<u64>(predicate), std::move(compare));
- break;
- }
- default:
- UNIMPLEMENTED_MSG("Unimplemented predicate result mode: {}", predicate_mode);
- }
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/bfe.cpp b/src/video_core/shader/decode/bfe.cpp
deleted file mode 100644
index 8e3b46e8e..000000000
--- a/src/video_core/shader/decode/bfe.cpp
+++ /dev/null
@@ -1,77 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-
-u32 ShaderIR::DecodeBfe(NodeBlock& bb, u32 pc) {
- const Instruction instr = {program_code[pc]};
- const auto opcode = OpCode::Decode(instr);
-
- Node op_a = GetRegister(instr.gpr8);
- Node op_b = [&] {
- switch (opcode->get().GetId()) {
- case OpCode::Id::BFE_R:
- return GetRegister(instr.gpr20);
- case OpCode::Id::BFE_C:
- return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
- case OpCode::Id::BFE_IMM:
- return Immediate(instr.alu.GetSignedImm20_20());
- default:
- UNREACHABLE();
- return Immediate(0);
- }
- }();
-
- UNIMPLEMENTED_IF_MSG(instr.bfe.rd_cc, "Condition codes in BFE is not implemented");
-
- const bool is_signed = instr.bfe.is_signed;
-
- // using reverse parallel method in
- // https://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel
- // note for later if possible to implement faster method.
- if (instr.bfe.brev) {
- const auto swap = [&](u32 s, u32 mask) {
- Node v1 =
- SignedOperation(OperationCode::ILogicalShiftRight, is_signed, op_a, Immediate(s));
- if (mask != 0) {
- v1 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v1),
- Immediate(mask));
- }
- Node v2 = op_a;
- if (mask != 0) {
- v2 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v2),
- Immediate(mask));
- }
- v2 = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, std::move(v2),
- Immediate(s));
- return SignedOperation(OperationCode::IBitwiseOr, is_signed, std::move(v1),
- std::move(v2));
- };
- op_a = swap(1, 0x55555555U);
- op_a = swap(2, 0x33333333U);
- op_a = swap(4, 0x0F0F0F0FU);
- op_a = swap(8, 0x00FF00FFU);
- op_a = swap(16, 0);
- }
-
- const auto offset = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b,
- Immediate(0), Immediate(8));
- const auto bits = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b,
- Immediate(8), Immediate(8));
- auto result = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_a, offset, bits);
- SetRegister(bb, instr.gpr0, std::move(result));
-
- return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/bfi.cpp b/src/video_core/shader/decode/bfi.cpp
deleted file mode 100644
index 70d1c055b..000000000
--- a/src/video_core/shader/decode/bfi.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-
-u32 ShaderIR::DecodeBfi(NodeBlock& bb, u32 pc) {
- const Instruction instr = {program_code[pc]};
- const auto opcode = OpCode::Decode(instr);
-
- const auto [packed_shift, base] = [&]() -> std::pair<Node, Node> {
- switch (opcode->get().GetId()) {
- case OpCode::Id::BFI_RC:
- return {GetRegister(instr.gpr39),
- GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
- case OpCode::Id::BFI_IMM_R:
- return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)};
- default:
- UNREACHABLE();
- return {Immediate(0), Immediate(0)};
- }
- }();
- const Node insert = GetRegister(instr.gpr8);
- const Node offset = BitfieldExtract(packed_shift, 0, 8);
- const Node bits = BitfieldExtract(packed_shift, 8, 8);
-
- const Node value =
- Operation(OperationCode::UBitfieldInsert, PRECISE, base, insert, offset, bits);
-
- SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
- SetRegister(bb, instr.gpr0, value);
-
- return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp
deleted file mode 100644
index fea7a54df..000000000
--- a/src/video_core/shader/decode/conversion.cpp
+++ /dev/null
@@ -1,321 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <limits>
-#include <optional>
-#include <utility>
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-using Tegra::Shader::Register;
-
-namespace {
-
-constexpr OperationCode GetFloatSelector(u64 selector) {
- return selector == 0 ? OperationCode::FCastHalf0 : OperationCode::FCastHalf1;
-}
-
-constexpr u32 SizeInBits(Register::Size size) {
- switch (size) {
- case Register::Size::Byte:
- return 8;
- case Register::Size::Short:
- return 16;
- case Register::Size::Word:
- return 32;
- case Register::Size::Long:
- return 64;
- }
- return 0;
-}
-
-constexpr std::optional<std::pair<s32, s32>> IntegerSaturateBounds(Register::Size src_size,
- Register::Size dst_size,
- bool src_signed,
- bool dst_signed) {
- const u32 dst_bits = SizeInBits(dst_size);
- if (src_size == Register::Size::Word && dst_size == Register::Size::Word) {
- if (src_signed == dst_signed) {
- return std::nullopt;
- }
- return std::make_pair(0, std::numeric_limits<s32>::max());
- }
- if (dst_signed) {
- // Signed destination, clamp to [-128, 127] for instance
- return std::make_pair(-(1 << (dst_bits - 1)), (1 << (dst_bits - 1)) - 1);
- } else {
- // Unsigned destination
- if (dst_bits == 32) {
- // Avoid shifting by 32, that is undefined behavior
- return std::make_pair(0, s32(std::numeric_limits<u32>::max()));
- }
- return std::make_pair(0, (1 << dst_bits) - 1);
- }
-}
-
-} // Anonymous namespace
-
-u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
- const Instruction instr = {program_code[pc]};
- const auto opcode = OpCode::Decode(instr);
-
- switch (opcode->get().GetId()) {
- case OpCode::Id::I2I_R:
- case OpCode::Id::I2I_C:
- case OpCode::Id::I2I_IMM: {
- const bool src_signed = instr.conversion.is_input_signed;
- const bool dst_signed = instr.conversion.is_output_signed;
- const Register::Size src_size = instr.conversion.src_size;
- const Register::Size dst_size = instr.conversion.dst_size;
- const u32 selector = static_cast<u32>(instr.conversion.int_src.selector);
-
- Node value = [this, instr, opcode] {
- switch (opcode->get().GetId()) {
- case OpCode::Id::I2I_R:
- return GetRegister(instr.gpr20);
- case OpCode::Id::I2I_C:
- return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
- case OpCode::Id::I2I_IMM:
- return Immediate(instr.alu.GetSignedImm20_20());
- default:
- UNREACHABLE();
- return Immediate(0);
- }
- }();
-
- // Ensure the source selector is valid
- switch (instr.conversion.src_size) {
- case Register::Size::Byte:
- break;
- case Register::Size::Short:
- ASSERT(selector == 0 || selector == 2);
- break;
- default:
- ASSERT(selector == 0);
- break;
- }
-
- if (src_size != Register::Size::Word || selector != 0) {
- value = SignedOperation(OperationCode::IBitfieldExtract, src_signed, std::move(value),
- Immediate(selector * 8), Immediate(SizeInBits(src_size)));
- }
-
- value = GetOperandAbsNegInteger(std::move(value), instr.conversion.abs_a,
- instr.conversion.negate_a, src_signed);
-
- if (instr.alu.saturate_d) {
- if (src_signed && !dst_signed) {
- Node is_negative = Operation(OperationCode::LogicalUGreaterEqual, value,
- Immediate(1 << (SizeInBits(src_size) - 1)));
- value = Operation(OperationCode::Select, std::move(is_negative), Immediate(0),
- std::move(value));
-
- // Simplify generated expressions, this can be removed without semantic impact
- SetTemporary(bb, 0, std::move(value));
- value = GetTemporary(0);
-
- if (dst_size != Register::Size::Word) {
- const Node limit = Immediate((1 << SizeInBits(dst_size)) - 1);
- Node is_large =
- Operation(OperationCode::LogicalUGreaterThan, std::move(value), limit);
- value = Operation(OperationCode::Select, std::move(is_large), limit,
- std::move(value));
- }
- } else if (const std::optional bounds =
- IntegerSaturateBounds(src_size, dst_size, src_signed, dst_signed)) {
- value = SignedOperation(OperationCode::IMax, src_signed, std::move(value),
- Immediate(bounds->first));
- value = SignedOperation(OperationCode::IMin, src_signed, std::move(value),
- Immediate(bounds->second));
- }
- } else if (dst_size != Register::Size::Word) {
- // No saturation, we only have to mask the result
- Node mask = Immediate((1 << SizeInBits(dst_size)) - 1);
- value = Operation(OperationCode::UBitwiseAnd, std::move(value), std::move(mask));
- }
-
- SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
- SetRegister(bb, instr.gpr0, std::move(value));
- break;
- }
- case OpCode::Id::I2F_R:
- case OpCode::Id::I2F_C:
- case OpCode::Id::I2F_IMM: {
- UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long);
- UNIMPLEMENTED_IF_MSG(instr.generates_cc,
- "Condition codes generation in I2F is not implemented");
-
- Node value = [&] {
- switch (opcode->get().GetId()) {
- case OpCode::Id::I2F_R:
- return GetRegister(instr.gpr20);
- case OpCode::Id::I2F_C:
- return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
- case OpCode::Id::I2F_IMM:
- return Immediate(instr.alu.GetSignedImm20_20());
- default:
- UNREACHABLE();
- return Immediate(0);
- }
- }();
-
- const bool input_signed = instr.conversion.is_input_signed;
-
- if (const u32 offset = static_cast<u32>(instr.conversion.int_src.selector); offset > 0) {
- ASSERT(instr.conversion.src_size == Register::Size::Byte ||
- instr.conversion.src_size == Register::Size::Short);
- if (instr.conversion.src_size == Register::Size::Short) {
- ASSERT(offset == 0 || offset == 2);
- }
- value = SignedOperation(OperationCode::ILogicalShiftRight, input_signed,
- std::move(value), Immediate(offset * 8));
- }
-
- value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed);
- value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, false, input_signed);
- value = SignedOperation(OperationCode::FCastInteger, input_signed, PRECISE, value);
- value = GetOperandAbsNegFloat(value, false, instr.conversion.negate_a);
-
- SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
-
- if (instr.conversion.dst_size == Register::Size::Short) {
- value = Operation(OperationCode::HCastFloat, PRECISE, value);
- }
-
- SetRegister(bb, instr.gpr0, value);
- break;
- }
- case OpCode::Id::F2F_R:
- case OpCode::Id::F2F_C:
- case OpCode::Id::F2F_IMM: {
- UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long);
- UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long);
- UNIMPLEMENTED_IF_MSG(instr.generates_cc,
- "Condition codes generation in F2F is not implemented");
-
- Node value = [&]() {
- switch (opcode->get().GetId()) {
- case OpCode::Id::F2F_R:
- return GetRegister(instr.gpr20);
- case OpCode::Id::F2F_C:
- return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
- case OpCode::Id::F2F_IMM:
- return GetImmediate19(instr);
- default:
- UNREACHABLE();
- return Immediate(0);
- }
- }();
-
- if (instr.conversion.src_size == Register::Size::Short) {
- value = Operation(GetFloatSelector(instr.conversion.float_src.selector), NO_PRECISE,
- std::move(value));
- } else {
- ASSERT(instr.conversion.float_src.selector == 0);
- }
-
- value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a);
-
- value = [&] {
- if (instr.conversion.src_size != instr.conversion.dst_size) {
- // Rounding operations only matter when the source and destination conversion size
- // is the same.
- return value;
- }
- switch (instr.conversion.f2f.GetRoundingMode()) {
- case Tegra::Shader::F2fRoundingOp::None:
- return value;
- case Tegra::Shader::F2fRoundingOp::Round:
- return Operation(OperationCode::FRoundEven, value);
- case Tegra::Shader::F2fRoundingOp::Floor:
- return Operation(OperationCode::FFloor, value);
- case Tegra::Shader::F2fRoundingOp::Ceil:
- return Operation(OperationCode::FCeil, value);
- case Tegra::Shader::F2fRoundingOp::Trunc:
- return Operation(OperationCode::FTrunc, value);
- default:
- UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}",
- instr.conversion.f2f.rounding.Value());
- return value;
- }
- }();
- value = GetSaturatedFloat(value, instr.alu.saturate_d);
-
- SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
-
- if (instr.conversion.dst_size == Register::Size::Short) {
- value = Operation(OperationCode::HCastFloat, PRECISE, value);
- }
-
- SetRegister(bb, instr.gpr0, value);
- break;
- }
- case OpCode::Id::F2I_R:
- case OpCode::Id::F2I_C:
- case OpCode::Id::F2I_IMM: {
- UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long);
- UNIMPLEMENTED_IF_MSG(instr.generates_cc,
- "Condition codes generation in F2I is not implemented");
- Node value = [&]() {
- switch (opcode->get().GetId()) {
- case OpCode::Id::F2I_R:
- return GetRegister(instr.gpr20);
- case OpCode::Id::F2I_C:
- return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
- case OpCode::Id::F2I_IMM:
- return GetImmediate19(instr);
- default:
- UNREACHABLE();
- return Immediate(0);
- }
- }();
-
- if (instr.conversion.src_size == Register::Size::Short) {
- value = Operation(GetFloatSelector(instr.conversion.float_src.selector), NO_PRECISE,
- std::move(value));
- } else {
- ASSERT(instr.conversion.float_src.selector == 0);
- }
-
- value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a);
-
- value = [&]() {
- switch (instr.conversion.f2i.rounding) {
- case Tegra::Shader::F2iRoundingOp::RoundEven:
- return Operation(OperationCode::FRoundEven, PRECISE, value);
- case Tegra::Shader::F2iRoundingOp::Floor:
- return Operation(OperationCode::FFloor, PRECISE, value);
- case Tegra::Shader::F2iRoundingOp::Ceil:
- return Operation(OperationCode::FCeil, PRECISE, value);
- case Tegra::Shader::F2iRoundingOp::Trunc:
- return Operation(OperationCode::FTrunc, PRECISE, value);
- default:
- UNIMPLEMENTED_MSG("Unimplemented F2I rounding mode {}",
- instr.conversion.f2i.rounding.Value());
- return Immediate(0);
- }
- }();
- const bool is_signed = instr.conversion.is_output_signed;
- value = SignedOperation(OperationCode::ICastFloat, is_signed, PRECISE, value);
- value = ConvertIntegerSize(value, instr.conversion.dst_size, is_signed);
-
- SetRegister(bb, instr.gpr0, value);
- break;
- }
- default:
- UNIMPLEMENTED_MSG("Unhandled conversion instruction: {}", opcode->get().GetName());
- }
-
- return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp
deleted file mode 100644
index 5973588d6..000000000
--- a/src/video_core/shader/decode/ffma.cpp
+++ /dev/null
@@ -1,62 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-
-u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) {
- const Instruction instr = {program_code[pc]};
- const auto opcode = OpCode::Decode(instr);
-
- UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented");
- if (instr.ffma.tab5980_0 != 1) {
- LOG_DEBUG(HW_GPU, "FFMA tab5980_0({}) not implemented", instr.ffma.tab5980_0.Value());
- }
- if (instr.ffma.tab5980_1 != 0) {
- LOG_DEBUG(HW_GPU, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value());
- }
-
- const Node op_a = GetRegister(instr.gpr8);
-
- auto [op_b, op_c] = [&]() -> std::tuple<Node, Node> {
- switch (opcode->get().GetId()) {
- case OpCode::Id::FFMA_CR: {
- return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
- GetRegister(instr.gpr39)};
- }
- case OpCode::Id::FFMA_RR:
- return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)};
- case OpCode::Id::FFMA_RC: {
- return {GetRegister(instr.gpr39),
- GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
- }
- case OpCode::Id::FFMA_IMM:
- return {GetImmediate19(instr), GetRegister(instr.gpr39)};
- default:
- UNIMPLEMENTED_MSG("Unhandled FFMA instruction: {}", opcode->get().GetName());
- return {Immediate(0), Immediate(0)};
- }
- }();
-
- op_b = GetOperandAbsNegFloat(op_b, false, instr.ffma.negate_b);
- op_c = GetOperandAbsNegFloat(op_c, false, instr.ffma.negate_c);
-
- Node value = Operation(OperationCode::FFma, PRECISE, op_a, op_b, op_c);
- value = GetSaturatedFloat(value, instr.alu.saturate_d);
-
- SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
- SetRegister(bb, instr.gpr0, value);
-
- return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/float_set.cpp b/src/video_core/shader/decode/float_set.cpp
deleted file mode 100644
index 5614e8a0d..000000000
--- a/src/video_core/shader/decode/float_set.cpp
+++ /dev/null
@@ -1,58 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-
-u32 ShaderIR::DecodeFloatSet(NodeBlock& bb, u32 pc) {
- const Instruction instr = {program_code[pc]};
-
- const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fset.abs_a != 0,
- instr.fset.neg_a != 0);
-
- Node op_b = [&]() {
- if (instr.is_b_imm) {
- return GetImmediate19(instr);
- } else if (instr.is_b_gpr) {
- return GetRegister(instr.gpr20);
- } else {
- return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
- }
- }();
-
- op_b = GetOperandAbsNegFloat(op_b, instr.fset.abs_b != 0, instr.fset.neg_b != 0);
-
- // The fset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the
- // condition is true, and to 0 otherwise.
- const Node second_pred = GetPredicate(instr.fset.pred39, instr.fset.neg_pred != 0);
-
- const OperationCode combiner = GetPredicateCombiner(instr.fset.op);
- const Node first_pred = GetPredicateComparisonFloat(instr.fset.cond, op_a, op_b);
-
- const Node predicate = Operation(combiner, first_pred, second_pred);
-
- const Node true_value = instr.fset.bf ? Immediate(1.0f) : Immediate(-1);
- const Node false_value = instr.fset.bf ? Immediate(0.0f) : Immediate(0);
- const Node value =
- Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value);
-
- if (instr.fset.bf) {
- SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
- } else {
- SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
- }
- SetRegister(bb, instr.gpr0, value);
-
- return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/float_set_predicate.cpp b/src/video_core/shader/decode/float_set_predicate.cpp
deleted file mode 100644
index 200c2c983..000000000
--- a/src/video_core/shader/decode/float_set_predicate.cpp
+++ /dev/null
@@ -1,57 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-using Tegra::Shader::Pred;
-
-u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) {
- const Instruction instr = {program_code[pc]};
-
- Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fsetp.abs_a != 0,
- instr.fsetp.neg_a != 0);
- Node op_b = [&]() {
- if (instr.is_b_imm) {
- return GetImmediate19(instr);
- } else if (instr.is_b_gpr) {
- return GetRegister(instr.gpr20);
- } else {
- return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
- }
- }();
- op_b = GetOperandAbsNegFloat(std::move(op_b), instr.fsetp.abs_b, instr.fsetp.neg_b);
-
- // We can't use the constant predicate as destination.
- ASSERT(instr.fsetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
-
- const Node predicate =
- GetPredicateComparisonFloat(instr.fsetp.cond, std::move(op_a), std::move(op_b));
- const Node second_pred = GetPredicate(instr.fsetp.pred39, instr.fsetp.neg_pred != 0);
-
- const OperationCode combiner = GetPredicateCombiner(instr.fsetp.op);
- const Node value = Operation(combiner, predicate, second_pred);
-
- // Set the primary predicate to the result of Predicate OP SecondPredicate
- SetPredicate(bb, instr.fsetp.pred3, value);
-
- if (instr.fsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
- // Set the secondary predicate to the result of !Predicate OP SecondPredicate,
- // if enabled
- const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate);
- const Node second_value = Operation(combiner, negated_pred, second_pred);
- SetPredicate(bb, instr.fsetp.pred0, second_value);
- }
-
- return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp
deleted file mode 100644
index fa83108cd..000000000
--- a/src/video_core/shader/decode/half_set.cpp
+++ /dev/null
@@ -1,115 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <array>
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "common/logging/log.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using std::move;
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-using Tegra::Shader::PredCondition;
-
-u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) {
- const Instruction instr = {program_code[pc]};
- const auto opcode = OpCode::Decode(instr);
-
- PredCondition cond{};
- bool bf = false;
- bool ftz = false;
- bool neg_a = false;
- bool abs_a = false;
- bool neg_b = false;
- bool abs_b = false;
- switch (opcode->get().GetId()) {
- case OpCode::Id::HSET2_C:
- case OpCode::Id::HSET2_IMM:
- cond = instr.hsetp2.cbuf_and_imm.cond;
- bf = instr.Bit(53);
- ftz = instr.Bit(54);
- neg_a = instr.Bit(43);
- abs_a = instr.Bit(44);
- neg_b = instr.Bit(56);
- abs_b = instr.Bit(54);
- break;
- case OpCode::Id::HSET2_R:
- cond = instr.hsetp2.reg.cond;
- bf = instr.Bit(49);
- ftz = instr.Bit(50);
- neg_a = instr.Bit(43);
- abs_a = instr.Bit(44);
- neg_b = instr.Bit(31);
- abs_b = instr.Bit(30);
- break;
- default:
- UNREACHABLE();
- }
-
- Node op_b = [this, instr, opcode] {
- switch (opcode->get().GetId()) {
- case OpCode::Id::HSET2_C:
- // Inform as unimplemented as this is not tested.
- UNIMPLEMENTED_MSG("HSET2_C is not implemented");
- return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
- case OpCode::Id::HSET2_R:
- return GetRegister(instr.gpr20);
- case OpCode::Id::HSET2_IMM:
- return UnpackHalfImmediate(instr, true);
- default:
- UNREACHABLE();
- return Node{};
- }
- }();
-
- if (!ftz) {
- LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
- }
-
- Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a);
- op_a = GetOperandAbsNegHalf(op_a, abs_a, neg_a);
-
- switch (opcode->get().GetId()) {
- case OpCode::Id::HSET2_R:
- op_b = GetOperandAbsNegHalf(move(op_b), abs_b, neg_b);
- [[fallthrough]];
- case OpCode::Id::HSET2_C:
- op_b = UnpackHalfFloat(move(op_b), instr.hset2.type_b);
- break;
- default:
- break;
- }
-
- Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred);
-
- Node comparison_pair = GetPredicateComparisonHalf(cond, op_a, op_b);
-
- const OperationCode combiner = GetPredicateCombiner(instr.hset2.op);
-
- // HSET2 operates on each half float in the pack.
- std::array<Node, 2> values;
- for (u32 i = 0; i < 2; ++i) {
- const u32 raw_value = bf ? 0x3c00 : 0xffff;
- Node true_value = Immediate(raw_value << (i * 16));
- Node false_value = Immediate(0);
-
- Node comparison = Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i));
- Node predicate = Operation(combiner, comparison, second_pred);
- values[i] =
- Operation(OperationCode::Select, predicate, move(true_value), move(false_value));
- }
-
- Node value = Operation(OperationCode::UBitwiseOr, values[0], values[1]);
- SetRegister(bb, instr.gpr0, move(value));
-
- return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp
deleted file mode 100644
index 310655619..000000000
--- a/src/video_core/shader/decode/half_set_predicate.cpp
+++ /dev/null
@@ -1,80 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "common/logging/log.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-using Tegra::Shader::Pred;
-
-u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) {
- const Instruction instr = {program_code[pc]};
- const auto opcode = OpCode::Decode(instr);
-
- if (instr.hsetp2.ftz != 0) {
- LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
- }
-
- Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a);
- op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a);
-
- Tegra::Shader::PredCondition cond{};
- bool h_and{};
- Node op_b{};
- switch (opcode->get().GetId()) {
- case OpCode::Id::HSETP2_C:
- cond = instr.hsetp2.cbuf_and_imm.cond;
- h_and = instr.hsetp2.cbuf_and_imm.h_and;
- op_b = GetOperandAbsNegHalf(GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
- instr.hsetp2.cbuf.abs_b, instr.hsetp2.cbuf.negate_b);
- // F32 is hardcoded in hardware
- op_b = UnpackHalfFloat(std::move(op_b), Tegra::Shader::HalfType::F32);
- break;
- case OpCode::Id::HSETP2_IMM:
- cond = instr.hsetp2.cbuf_and_imm.cond;
- h_and = instr.hsetp2.cbuf_and_imm.h_and;
- op_b = UnpackHalfImmediate(instr, true);
- break;
- case OpCode::Id::HSETP2_R:
- cond = instr.hsetp2.reg.cond;
- h_and = instr.hsetp2.reg.h_and;
- op_b =
- GetOperandAbsNegHalf(UnpackHalfFloat(GetRegister(instr.gpr20), instr.hsetp2.reg.type_b),
- instr.hsetp2.reg.abs_b, instr.hsetp2.reg.negate_b);
- break;
- default:
- UNREACHABLE();
- op_b = Immediate(0);
- }
-
- const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op);
- const Node combined_pred = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred);
-
- const auto Write = [&](u64 dest, Node src) {
- SetPredicate(bb, dest, Operation(combiner, std::move(src), combined_pred));
- };
-
- const Node comparison = GetPredicateComparisonHalf(cond, op_a, op_b);
- const u64 first = instr.hsetp2.pred3;
- const u64 second = instr.hsetp2.pred0;
- if (h_and) {
- Node joined = Operation(OperationCode::LogicalAnd2, comparison);
- Write(first, joined);
- Write(second, Operation(OperationCode::LogicalNegate, std::move(joined)));
- } else {
- Write(first, Operation(OperationCode::LogicalPick2, comparison, Immediate(0U)));
- Write(second, Operation(OperationCode::LogicalPick2, comparison, Immediate(1U)));
- }
-
- return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp
deleted file mode 100644
index 5b44cb79c..000000000
--- a/src/video_core/shader/decode/hfma2.cpp
+++ /dev/null
@@ -1,73 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <tuple>
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::HalfPrecision;
-using Tegra::Shader::HalfType;
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-
-u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) {
- const Instruction instr = {program_code[pc]};
- const auto opcode = OpCode::Decode(instr);
-
- if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) {
- DEBUG_ASSERT(instr.hfma2.rr.precision == HalfPrecision::None);
- } else {
- DEBUG_ASSERT(instr.hfma2.precision == HalfPrecision::None);
- }
-
- constexpr auto identity = HalfType::H0_H1;
- bool neg_b{}, neg_c{};
- auto [saturate, type_b, op_b, type_c,
- op_c] = [&]() -> std::tuple<bool, HalfType, Node, HalfType, Node> {
- switch (opcode->get().GetId()) {
- case OpCode::Id::HFMA2_CR:
- neg_b = instr.hfma2.negate_b;
- neg_c = instr.hfma2.negate_c;
- return {instr.hfma2.saturate, HalfType::F32,
- GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
- instr.hfma2.type_reg39, GetRegister(instr.gpr39)};
- case OpCode::Id::HFMA2_RC:
- neg_b = instr.hfma2.negate_b;
- neg_c = instr.hfma2.negate_c;
- return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39),
- HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
- case OpCode::Id::HFMA2_RR:
- neg_b = instr.hfma2.rr.negate_b;
- neg_c = instr.hfma2.rr.negate_c;
- return {instr.hfma2.rr.saturate, instr.hfma2.type_b, GetRegister(instr.gpr20),
- instr.hfma2.rr.type_c, GetRegister(instr.gpr39)};
- case OpCode::Id::HFMA2_IMM_R:
- neg_c = instr.hfma2.negate_c;
- return {instr.hfma2.saturate, identity, UnpackHalfImmediate(instr, true),
- instr.hfma2.type_reg39, GetRegister(instr.gpr39)};
- default:
- return {false, identity, Immediate(0), identity, Immediate(0)};
- }
- }();
-
- const Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hfma2.type_a);
- op_b = GetOperandAbsNegHalf(UnpackHalfFloat(op_b, type_b), false, neg_b);
- op_c = GetOperandAbsNegHalf(UnpackHalfFloat(op_c, type_c), false, neg_c);
-
- Node value = Operation(OperationCode::HFma, PRECISE, op_a, op_b, op_c);
- value = GetSaturatedHalfFloat(value, saturate);
- value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge);
-
- SetRegister(bb, instr.gpr0, value);
-
- return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp
deleted file mode 100644
index 5470e8cf4..000000000
--- a/src/video_core/shader/decode/image.cpp
+++ /dev/null
@@ -1,536 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <vector>
-#include <fmt/format.h>
-
-#include "common/assert.h"
-#include "common/bit_field.h"
-#include "common/common_types.h"
-#include "common/logging/log.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-#include "video_core/textures/texture.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-using Tegra::Shader::PredCondition;
-using Tegra::Shader::StoreType;
-using Tegra::Texture::ComponentType;
-using Tegra::Texture::TextureFormat;
-using Tegra::Texture::TICEntry;
-
-namespace {
-
-ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor,
- std::size_t component) {
- const TextureFormat format{descriptor.format};
- switch (format) {
- case TextureFormat::R16G16B16A16:
- case TextureFormat::R32G32B32A32:
- case TextureFormat::R32G32B32:
- case TextureFormat::R32G32:
- case TextureFormat::R16G16:
- case TextureFormat::R32:
- case TextureFormat::R16:
- case TextureFormat::R8:
- case TextureFormat::R1:
- if (component == 0) {
- return descriptor.r_type;
- }
- if (component == 1) {
- return descriptor.g_type;
- }
- if (component == 2) {
- return descriptor.b_type;
- }
- if (component == 3) {
- return descriptor.a_type;
- }
- break;
- case TextureFormat::A8R8G8B8:
- if (component == 0) {
- return descriptor.a_type;
- }
- if (component == 1) {
- return descriptor.r_type;
- }
- if (component == 2) {
- return descriptor.g_type;
- }
- if (component == 3) {
- return descriptor.b_type;
- }
- break;
- case TextureFormat::A2B10G10R10:
- case TextureFormat::A4B4G4R4:
- case TextureFormat::A5B5G5R1:
- case TextureFormat::A1B5G5R5:
- if (component == 0) {
- return descriptor.a_type;
- }
- if (component == 1) {
- return descriptor.b_type;
- }
- if (component == 2) {
- return descriptor.g_type;
- }
- if (component == 3) {
- return descriptor.r_type;
- }
- break;
- case TextureFormat::R32_B24G8:
- if (component == 0) {
- return descriptor.r_type;
- }
- if (component == 1) {
- return descriptor.b_type;
- }
- if (component == 2) {
- return descriptor.g_type;
- }
- break;
- case TextureFormat::B5G6R5:
- case TextureFormat::B6G5R5:
- case TextureFormat::B10G11R11:
- if (component == 0) {
- return descriptor.b_type;
- }
- if (component == 1) {
- return descriptor.g_type;
- }
- if (component == 2) {
- return descriptor.r_type;
- }
- break;
- case TextureFormat::R24G8:
- case TextureFormat::R8G24:
- case TextureFormat::R8G8:
- case TextureFormat::G4R4:
- if (component == 0) {
- return descriptor.g_type;
- }
- if (component == 1) {
- return descriptor.r_type;
- }
- break;
- default:
- break;
- }
- UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
- return ComponentType::FLOAT;
-}
-
-bool IsComponentEnabled(std::size_t component_mask, std::size_t component) {
- constexpr u8 R = 0b0001;
- constexpr u8 G = 0b0010;
- constexpr u8 B = 0b0100;
- constexpr u8 A = 0b1000;
- constexpr std::array<u8, 16> mask = {
- 0, (R), (G), (R | G), (B), (R | B), (G | B), (R | G | B),
- (A), (R | A), (G | A), (R | G | A), (B | A), (R | B | A), (G | B | A), (R | G | B | A)};
- return std::bitset<4>{mask.at(component_mask)}.test(component);
-}
-
-u32 GetComponentSize(TextureFormat format, std::size_t component) {
- switch (format) {
- case TextureFormat::R32G32B32A32:
- return 32;
- case TextureFormat::R16G16B16A16:
- return 16;
- case TextureFormat::R32G32B32:
- return component <= 2 ? 32 : 0;
- case TextureFormat::R32G32:
- return component <= 1 ? 32 : 0;
- case TextureFormat::R16G16:
- return component <= 1 ? 16 : 0;
- case TextureFormat::R32:
- return component == 0 ? 32 : 0;
- case TextureFormat::R16:
- return component == 0 ? 16 : 0;
- case TextureFormat::R8:
- return component == 0 ? 8 : 0;
- case TextureFormat::R1:
- return component == 0 ? 1 : 0;
- case TextureFormat::A8R8G8B8:
- return 8;
- case TextureFormat::A2B10G10R10:
- return (component == 3 || component == 2 || component == 1) ? 10 : 2;
- case TextureFormat::A4B4G4R4:
- return 4;
- case TextureFormat::A5B5G5R1:
- return (component == 0 || component == 1 || component == 2) ? 5 : 1;
- case TextureFormat::A1B5G5R5:
- return (component == 1 || component == 2 || component == 3) ? 5 : 1;
- case TextureFormat::R32_B24G8:
- if (component == 0) {
- return 32;
- }
- if (component == 1) {
- return 24;
- }
- if (component == 2) {
- return 8;
- }
- return 0;
- case TextureFormat::B5G6R5:
- if (component == 0 || component == 2) {
- return 5;
- }
- if (component == 1) {
- return 6;
- }
- return 0;
- case TextureFormat::B6G5R5:
- if (component == 1 || component == 2) {
- return 5;
- }
- if (component == 0) {
- return 6;
- }
- return 0;
- case TextureFormat::B10G11R11:
- if (component == 1 || component == 2) {
- return 11;
- }
- if (component == 0) {
- return 10;
- }
- return 0;
- case TextureFormat::R24G8:
- if (component == 0) {
- return 8;
- }
- if (component == 1) {
- return 24;
- }
- return 0;
- case TextureFormat::R8G24:
- if (component == 0) {
- return 24;
- }
- if (component == 1) {
- return 8;
- }
- return 0;
- case TextureFormat::R8G8:
- return (component == 0 || component == 1) ? 8 : 0;
- case TextureFormat::G4R4:
- return (component == 0 || component == 1) ? 4 : 0;
- default:
- UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
- return 0;
- }
-}
-
-std::size_t GetImageComponentMask(TextureFormat format) {
- constexpr u8 R = 0b0001;
- constexpr u8 G = 0b0010;
- constexpr u8 B = 0b0100;
- constexpr u8 A = 0b1000;
- switch (format) {
- case TextureFormat::R32G32B32A32:
- case TextureFormat::R16G16B16A16:
- case TextureFormat::A8R8G8B8:
- case TextureFormat::A2B10G10R10:
- case TextureFormat::A4B4G4R4:
- case TextureFormat::A5B5G5R1:
- case TextureFormat::A1B5G5R5:
- return std::size_t{R | G | B | A};
- case TextureFormat::R32G32B32:
- case TextureFormat::R32_B24G8:
- case TextureFormat::B5G6R5:
- case TextureFormat::B6G5R5:
- case TextureFormat::B10G11R11:
- return std::size_t{R | G | B};
- case TextureFormat::R32G32:
- case TextureFormat::R16G16:
- case TextureFormat::R24G8:
- case TextureFormat::R8G24:
- case TextureFormat::R8G8:
- case TextureFormat::G4R4:
- return std::size_t{R | G};
- case TextureFormat::R32:
- case TextureFormat::R16:
- case TextureFormat::R8:
- case TextureFormat::R1:
- return std::size_t{R};
- default:
- UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
- return std::size_t{R | G | B | A};
- }
-}
-
-std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) {
- switch (image_type) {
- case Tegra::Shader::ImageType::Texture1D:
- case Tegra::Shader::ImageType::TextureBuffer:
- return 1;
- case Tegra::Shader::ImageType::Texture1DArray:
- case Tegra::Shader::ImageType::Texture2D:
- return 2;
- case Tegra::Shader::ImageType::Texture2DArray:
- case Tegra::Shader::ImageType::Texture3D:
- return 3;
- }
- UNREACHABLE();
- return 1;
-}
-} // Anonymous namespace
-
-std::pair<Node, bool> ShaderIR::GetComponentValue(ComponentType component_type, u32 component_size,
- Node original_value) {
- switch (component_type) {
- case ComponentType::SNORM: {
- // range [-1.0, 1.0]
- auto cnv_value = Operation(OperationCode::FMul, original_value,
- Immediate(static_cast<float>(1 << component_size) / 2.f - 1.f));
- cnv_value = Operation(OperationCode::ICastFloat, std::move(cnv_value));
- return {BitfieldExtract(std::move(cnv_value), 0, component_size), true};
- }
- case ComponentType::SINT:
- case ComponentType::UNORM: {
- bool is_signed = component_type == ComponentType::SINT;
- // range [0.0, 1.0]
- auto cnv_value = Operation(OperationCode::FMul, original_value,
- Immediate(static_cast<float>(1 << component_size) - 1.f));
- return {SignedOperation(OperationCode::ICastFloat, is_signed, std::move(cnv_value)),
- is_signed};
- }
- case ComponentType::UINT: // range [0, (1 << component_size) - 1]
- return {std::move(original_value), false};
- case ComponentType::FLOAT:
- if (component_size == 16) {
- return {Operation(OperationCode::HCastFloat, original_value), true};
- } else {
- return {std::move(original_value), true};
- }
- default:
- UNIMPLEMENTED_MSG("Unimplemented component type={}", component_type);
- return {std::move(original_value), true};
- }
-}
-
-u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
- const Instruction instr = {program_code[pc]};
- const auto opcode = OpCode::Decode(instr);
-
- const auto GetCoordinates = [this, instr](Tegra::Shader::ImageType image_type) {
- std::vector<Node> coords;
- const std::size_t num_coords{GetImageTypeNumCoordinates(image_type)};
- coords.reserve(num_coords);
- for (std::size_t i = 0; i < num_coords; ++i) {
- coords.push_back(GetRegister(instr.gpr8.Value() + i));
- }
- return coords;
- };
-
- switch (opcode->get().GetId()) {
- case OpCode::Id::SULD: {
- UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store !=
- Tegra::Shader::OutOfBoundsStore::Ignore);
-
- const auto type{instr.suldst.image_type};
- auto& image{instr.suldst.is_immediate ? GetImage(instr.image, type)
- : GetBindlessImage(instr.gpr39, type)};
- image.MarkRead();
-
- if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::P) {
- u32 indexer = 0;
- for (u32 element = 0; element < 4; ++element) {
- if (!instr.suldst.IsComponentEnabled(element)) {
- continue;
- }
- MetaImage meta{image, {}, element};
- Node value = Operation(OperationCode::ImageLoad, meta, GetCoordinates(type));
- SetTemporary(bb, indexer++, std::move(value));
- }
- for (u32 i = 0; i < indexer; ++i) {
- SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
- }
- } else if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::D_BA) {
- UNIMPLEMENTED_IF(instr.suldst.GetStoreDataLayout() != StoreType::Bits32 &&
- instr.suldst.GetStoreDataLayout() != StoreType::Bits64);
-
- auto descriptor = [this, instr] {
- std::optional<Tegra::Engines::SamplerDescriptor> sampler_descriptor;
- if (instr.suldst.is_immediate) {
- sampler_descriptor =
- registry.ObtainBoundSampler(static_cast<u32>(instr.image.index.Value()));
- } else {
- const Node image_register = GetRegister(instr.gpr39);
- const auto result = TrackCbuf(image_register, global_code,
- static_cast<s64>(global_code.size()));
- const auto buffer = std::get<1>(result);
- const auto offset = std::get<2>(result);
- sampler_descriptor = registry.ObtainBindlessSampler(buffer, offset);
- }
- if (!sampler_descriptor) {
- UNREACHABLE_MSG("Failed to obtain image descriptor");
- }
- return *sampler_descriptor;
- }();
-
- const auto comp_mask = GetImageComponentMask(descriptor.format);
-
- switch (instr.suldst.GetStoreDataLayout()) {
- case StoreType::Bits32:
- case StoreType::Bits64: {
- u32 indexer = 0;
- u32 shifted_counter = 0;
- Node value = Immediate(0);
- for (u32 element = 0; element < 4; ++element) {
- if (!IsComponentEnabled(comp_mask, element)) {
- continue;
- }
- const auto component_type = GetComponentType(descriptor, element);
- const auto component_size = GetComponentSize(descriptor.format, element);
- MetaImage meta{image, {}, element};
-
- auto [converted_value, is_signed] = GetComponentValue(
- component_type, component_size,
- Operation(OperationCode::ImageLoad, meta, GetCoordinates(type)));
-
- // shift element to correct position
- const auto shifted = shifted_counter;
- if (shifted > 0) {
- converted_value =
- SignedOperation(OperationCode::ILogicalShiftLeft, is_signed,
- std::move(converted_value), Immediate(shifted));
- }
- shifted_counter += component_size;
-
- // add value into result
- value = Operation(OperationCode::UBitwiseOr, value, std::move(converted_value));
-
- // if we shifted enough for 1 byte -> we save it into temp
- if (shifted_counter >= 32) {
- SetTemporary(bb, indexer++, std::move(value));
- // reset counter and value to prepare pack next byte
- value = Immediate(0);
- shifted_counter = 0;
- }
- }
- for (u32 i = 0; i < indexer; ++i) {
- SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
- }
- break;
- }
- default:
- UNREACHABLE();
- break;
- }
- }
- break;
- }
- case OpCode::Id::SUST: {
- UNIMPLEMENTED_IF(instr.suldst.mode != Tegra::Shader::SurfaceDataMode::P);
- UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store !=
- Tegra::Shader::OutOfBoundsStore::Ignore);
- UNIMPLEMENTED_IF(instr.suldst.component_mask_selector != 0xf); // Ensure we have RGBA
-
- std::vector<Node> values;
- constexpr std::size_t hardcoded_size{4};
- for (std::size_t i = 0; i < hardcoded_size; ++i) {
- values.push_back(GetRegister(instr.gpr0.Value() + i));
- }
-
- const auto type{instr.suldst.image_type};
- auto& image{instr.suldst.is_immediate ? GetImage(instr.image, type)
- : GetBindlessImage(instr.gpr39, type)};
- image.MarkWrite();
-
- MetaImage meta{image, std::move(values)};
- bb.push_back(Operation(OperationCode::ImageStore, meta, GetCoordinates(type)));
- break;
- }
- case OpCode::Id::SUATOM: {
- UNIMPLEMENTED_IF(instr.suatom_d.is_ba != 0);
-
- const OperationCode operation_code = [instr] {
- switch (instr.suatom_d.operation_type) {
- case Tegra::Shader::ImageAtomicOperationType::S32:
- case Tegra::Shader::ImageAtomicOperationType::U32:
- switch (instr.suatom_d.operation) {
- case Tegra::Shader::ImageAtomicOperation::Add:
- return OperationCode::AtomicImageAdd;
- case Tegra::Shader::ImageAtomicOperation::And:
- return OperationCode::AtomicImageAnd;
- case Tegra::Shader::ImageAtomicOperation::Or:
- return OperationCode::AtomicImageOr;
- case Tegra::Shader::ImageAtomicOperation::Xor:
- return OperationCode::AtomicImageXor;
- case Tegra::Shader::ImageAtomicOperation::Exch:
- return OperationCode::AtomicImageExchange;
- default:
- break;
- }
- break;
- default:
- break;
- }
- UNIMPLEMENTED_MSG("Unimplemented operation={}, type={}",
- static_cast<u64>(instr.suatom_d.operation.Value()),
- static_cast<u64>(instr.suatom_d.operation_type.Value()));
- return OperationCode::AtomicImageAdd;
- }();
-
- Node value = GetRegister(instr.gpr0);
-
- const auto type = instr.suatom_d.image_type;
- auto& image = GetImage(instr.image, type);
- image.MarkAtomic();
-
- MetaImage meta{image, {std::move(value)}};
- SetRegister(bb, instr.gpr0, Operation(operation_code, meta, GetCoordinates(type)));
- break;
- }
- default:
- UNIMPLEMENTED_MSG("Unhandled image instruction: {}", opcode->get().GetName());
- }
-
- return pc;
-}
-
-ImageEntry& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) {
- const auto offset = static_cast<u32>(image.index.Value());
-
- const auto it =
- std::find_if(std::begin(used_images), std::end(used_images),
- [offset](const ImageEntry& entry) { return entry.offset == offset; });
- if (it != std::end(used_images)) {
- ASSERT(!it->is_bindless && it->type == type);
- return *it;
- }
-
- const auto next_index = static_cast<u32>(used_images.size());
- return used_images.emplace_back(next_index, offset, type);
-}
-
-ImageEntry& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) {
- const Node image_register = GetRegister(reg);
- const auto result =
- TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()));
-
- const auto buffer = std::get<1>(result);
- const auto offset = std::get<2>(result);
-
- const auto it = std::find_if(std::begin(used_images), std::end(used_images),
- [buffer, offset](const ImageEntry& entry) {
- return entry.buffer == buffer && entry.offset == offset;
- });
- if (it != std::end(used_images)) {
- ASSERT(it->is_bindless && it->type == type);
- return *it;
- }
-
- const auto next_index = static_cast<u32>(used_images.size());
- return used_images.emplace_back(next_index, offset, buffer, type);
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/integer_set.cpp b/src/video_core/shader/decode/integer_set.cpp
deleted file mode 100644
index 59809bcd8..000000000
--- a/src/video_core/shader/decode/integer_set.cpp
+++ /dev/null
@@ -1,49 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-
-u32 ShaderIR::DecodeIntegerSet(NodeBlock& bb, u32 pc) {
- const Instruction instr = {program_code[pc]};
-
- const Node op_a = GetRegister(instr.gpr8);
- const Node op_b = [&]() {
- if (instr.is_b_imm) {
- return Immediate(instr.alu.GetSignedImm20_20());
- } else if (instr.is_b_gpr) {
- return GetRegister(instr.gpr20);
- } else {
- return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
- }
- }();
-
- // The iset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the condition
- // is true, and to 0 otherwise.
- const Node second_pred = GetPredicate(instr.iset.pred39, instr.iset.neg_pred != 0);
- const Node first_pred =
- GetPredicateComparisonInteger(instr.iset.cond, instr.iset.is_signed, op_a, op_b);
-
- const OperationCode combiner = GetPredicateCombiner(instr.iset.op);
-
- const Node predicate = Operation(combiner, first_pred, second_pred);
-
- const Node true_value = instr.iset.bf ? Immediate(1.0f) : Immediate(-1);
- const Node false_value = instr.iset.bf ? Immediate(0.0f) : Immediate(0);
- const Node value =
- Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value);
-
- SetRegister(bb, instr.gpr0, value);
-
- return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/integer_set_predicate.cpp b/src/video_core/shader/decode/integer_set_predicate.cpp
deleted file mode 100644
index 25e48fef8..000000000
--- a/src/video_core/shader/decode/integer_set_predicate.cpp
+++ /dev/null
@@ -1,53 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-using Tegra::Shader::Pred;
-
-u32 ShaderIR::DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc) {
- const Instruction instr = {program_code[pc]};
-
- const Node op_a = GetRegister(instr.gpr8);
-
- const Node op_b = [&]() {
- if (instr.is_b_imm) {
- return Immediate(instr.alu.GetSignedImm20_20());
- } else if (instr.is_b_gpr) {
- return GetRegister(instr.gpr20);
- } else {
- return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
- }
- }();
-
- // We can't use the constant predicate as destination.
- ASSERT(instr.isetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
-
- const Node second_pred = GetPredicate(instr.isetp.pred39, instr.isetp.neg_pred != 0);
- const Node predicate =
- GetPredicateComparisonInteger(instr.isetp.cond, instr.isetp.is_signed, op_a, op_b);
-
- // Set the primary predicate to the result of Predicate OP SecondPredicate
- const OperationCode combiner = GetPredicateCombiner(instr.isetp.op);
- const Node value = Operation(combiner, predicate, second_pred);
- SetPredicate(bb, instr.isetp.pred3, value);
-
- if (instr.isetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
- // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled
- const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate);
- SetPredicate(bb, instr.isetp.pred0, Operation(combiner, negated_pred, second_pred));
- }
-
- return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
deleted file mode 100644
index 7728f600e..000000000
--- a/src/video_core/shader/decode/memory.cpp
+++ /dev/null
@@ -1,493 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <utility>
-#include <vector>
-
-#include <fmt/format.h>
-
-#include "common/alignment.h"
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "common/logging/log.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using std::move;
-using Tegra::Shader::AtomicOp;
-using Tegra::Shader::AtomicType;
-using Tegra::Shader::Attribute;
-using Tegra::Shader::GlobalAtomicType;
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-using Tegra::Shader::Register;
-using Tegra::Shader::StoreType;
-
-namespace {
-
-OperationCode GetAtomOperation(AtomicOp op) {
- switch (op) {
- case AtomicOp::Add:
- return OperationCode::AtomicIAdd;
- case AtomicOp::Min:
- return OperationCode::AtomicIMin;
- case AtomicOp::Max:
- return OperationCode::AtomicIMax;
- case AtomicOp::And:
- return OperationCode::AtomicIAnd;
- case AtomicOp::Or:
- return OperationCode::AtomicIOr;
- case AtomicOp::Xor:
- return OperationCode::AtomicIXor;
- case AtomicOp::Exch:
- return OperationCode::AtomicIExchange;
- default:
- UNIMPLEMENTED_MSG("op={}", op);
- return OperationCode::AtomicIAdd;
- }
-}
-
-bool IsUnaligned(Tegra::Shader::UniformType uniform_type) {
- return uniform_type == Tegra::Shader::UniformType::UnsignedByte ||
- uniform_type == Tegra::Shader::UniformType::UnsignedShort;
-}
-
-u32 GetUnalignedMask(Tegra::Shader::UniformType uniform_type) {
- switch (uniform_type) {
- case Tegra::Shader::UniformType::UnsignedByte:
- return 0b11;
- case Tegra::Shader::UniformType::UnsignedShort:
- return 0b10;
- default:
- UNREACHABLE();
- return 0;
- }
-}
-
-u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) {
- switch (uniform_type) {
- case Tegra::Shader::UniformType::UnsignedByte:
- return 8;
- case Tegra::Shader::UniformType::UnsignedShort:
- return 16;
- case Tegra::Shader::UniformType::Single:
- return 32;
- case Tegra::Shader::UniformType::Double:
- return 64;
- case Tegra::Shader::UniformType::Quad:
- case Tegra::Shader::UniformType::UnsignedQuad:
- return 128;
- default:
- UNIMPLEMENTED_MSG("Unimplemented size={}!", uniform_type);
- return 32;
- }
-}
-
-Node ExtractUnaligned(Node value, Node address, u32 mask, u32 size) {
- Node offset = Operation(OperationCode::UBitwiseAnd, address, Immediate(mask));
- offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3));
- return Operation(OperationCode::UBitfieldExtract, move(value), move(offset), Immediate(size));
-}
-
-Node InsertUnaligned(Node dest, Node value, Node address, u32 mask, u32 size) {
- Node offset = Operation(OperationCode::UBitwiseAnd, move(address), Immediate(mask));
- offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3));
- return Operation(OperationCode::UBitfieldInsert, move(dest), move(value), move(offset),
- Immediate(size));
-}
-
-Node Sign16Extend(Node value) {
- Node sign = Operation(OperationCode::UBitwiseAnd, value, Immediate(1U << 15));
- Node is_sign = Operation(OperationCode::LogicalUEqual, move(sign), Immediate(1U << 15));
- Node extend = Operation(OperationCode::Select, is_sign, Immediate(0xFFFF0000), Immediate(0));
- return Operation(OperationCode::UBitwiseOr, move(value), move(extend));
-}
-
-} // Anonymous namespace
-
-u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
- const Instruction instr = {program_code[pc]};
- const auto opcode = OpCode::Decode(instr);
-
- switch (opcode->get().GetId()) {
- case OpCode::Id::LD_A: {
- // Note: Shouldn't this be interp mode flat? As in no interpolation made.
- UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex,
- "Indirect attribute loads are not supported");
- UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
- "Unaligned attribute loads are not supported");
- UNIMPLEMENTED_IF_MSG(instr.attribute.fmt20.IsPhysical() &&
- instr.attribute.fmt20.size != Tegra::Shader::AttributeSize::Word,
- "Non-32 bits PHYS reads are not implemented");
-
- const Node buffer{GetRegister(instr.gpr39)};
-
- u64 next_element = instr.attribute.fmt20.element;
- auto next_index = static_cast<u64>(instr.attribute.fmt20.index.Value());
-
- const auto LoadNextElement = [&](u32 reg_offset) {
- const Node attribute{instr.attribute.fmt20.IsPhysical()
- ? GetPhysicalInputAttribute(instr.gpr8, buffer)
- : GetInputAttribute(static_cast<Attribute::Index>(next_index),
- next_element, buffer)};
-
- SetRegister(bb, instr.gpr0.Value() + reg_offset, attribute);
-
- // Load the next attribute element into the following register. If the element
- // to load goes beyond the vec4 size, load the first element of the next
- // attribute.
- next_element = (next_element + 1) % 4;
- next_index = next_index + (next_element == 0 ? 1 : 0);
- };
-
- const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
- for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
- LoadNextElement(reg_offset);
- }
- break;
- }
- case OpCode::Id::LD_C: {
- UNIMPLEMENTED_IF(instr.ld_c.unknown != 0);
-
- Node index = GetRegister(instr.gpr8);
-
- const Node op_a =
- GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index);
-
- switch (instr.ld_c.type.Value()) {
- case Tegra::Shader::UniformType::Single:
- SetRegister(bb, instr.gpr0, op_a);
- break;
-
- case Tegra::Shader::UniformType::Double: {
- const Node op_b =
- GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 4, index);
-
- SetTemporary(bb, 0, op_a);
- SetTemporary(bb, 1, op_b);
- SetRegister(bb, instr.gpr0, GetTemporary(0));
- SetRegister(bb, instr.gpr0.Value() + 1, GetTemporary(1));
- break;
- }
- default:
- UNIMPLEMENTED_MSG("Unhandled type: {}", instr.ld_c.type.Value());
- }
- break;
- }
- case OpCode::Id::LD_L:
- LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}", instr.ld_l.unknown);
- [[fallthrough]];
- case OpCode::Id::LD_S: {
- const auto GetAddress = [&](s32 offset) {
- ASSERT(offset % 4 == 0);
- const Node immediate_offset = Immediate(static_cast<s32>(instr.smem_imm) + offset);
- return Operation(OperationCode::IAdd, GetRegister(instr.gpr8), immediate_offset);
- };
- const auto GetMemory = [&](s32 offset) {
- return opcode->get().GetId() == OpCode::Id::LD_S ? GetSharedMemory(GetAddress(offset))
- : GetLocalMemory(GetAddress(offset));
- };
-
- switch (instr.ldst_sl.type.Value()) {
- case StoreType::Signed16:
- SetRegister(bb, instr.gpr0,
- Sign16Extend(ExtractUnaligned(GetMemory(0), GetAddress(0), 0b10, 16)));
- break;
- case StoreType::Bits32:
- case StoreType::Bits64:
- case StoreType::Bits128: {
- const u32 count = [&] {
- switch (instr.ldst_sl.type.Value()) {
- case StoreType::Bits32:
- return 1;
- case StoreType::Bits64:
- return 2;
- case StoreType::Bits128:
- return 4;
- default:
- UNREACHABLE();
- return 0;
- }
- }();
- for (u32 i = 0; i < count; ++i) {
- SetTemporary(bb, i, GetMemory(i * 4));
- }
- for (u32 i = 0; i < count; ++i) {
- SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
- }
- break;
- }
- default:
- UNIMPLEMENTED_MSG("{} Unhandled type: {}", opcode->get().GetName(),
- instr.ldst_sl.type.Value());
- }
- break;
- }
- case OpCode::Id::LD:
- case OpCode::Id::LDG: {
- const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType {
- switch (opcode->get().GetId()) {
- case OpCode::Id::LD:
- UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended LD is not implemented");
- return instr.generic.type;
- case OpCode::Id::LDG:
- return instr.ldg.type;
- default:
- UNREACHABLE();
- return {};
- }
- }();
-
- const auto [real_address_base, base_address, descriptor] =
- TrackGlobalMemory(bb, instr, true, false);
-
- const u32 size = GetMemorySize(type);
- const u32 count = Common::AlignUp(size, 32) / 32;
- if (!real_address_base || !base_address) {
- // Tracking failed, load zeroes.
- for (u32 i = 0; i < count; ++i) {
- SetRegister(bb, instr.gpr0.Value() + i, Immediate(0.0f));
- }
- break;
- }
-
- for (u32 i = 0; i < count; ++i) {
- const Node it_offset = Immediate(i * 4);
- const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
- Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
-
- // To handle unaligned loads get the bytes used to dereference global memory and extract
- // those bytes from the loaded u32.
- if (IsUnaligned(type)) {
- gmem = ExtractUnaligned(gmem, real_address, GetUnalignedMask(type), size);
- }
-
- SetTemporary(bb, i, gmem);
- }
-
- for (u32 i = 0; i < count; ++i) {
- SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
- }
- break;
- }
- case OpCode::Id::ST_A: {
- UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex,
- "Indirect attribute loads are not supported");
- UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
- "Unaligned attribute loads are not supported");
-
- u64 element = instr.attribute.fmt20.element;
- auto index = static_cast<u64>(instr.attribute.fmt20.index.Value());
-
- const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
- for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
- Node dest;
- if (instr.attribute.fmt20.patch) {
- const u32 offset = static_cast<u32>(index) * 4 + static_cast<u32>(element);
- dest = MakeNode<PatchNode>(offset);
- } else {
- dest = GetOutputAttribute(static_cast<Attribute::Index>(index), element,
- GetRegister(instr.gpr39));
- }
- const auto src = GetRegister(instr.gpr0.Value() + reg_offset);
-
- bb.push_back(Operation(OperationCode::Assign, dest, src));
-
- // Load the next attribute element into the following register. If the element to load
- // goes beyond the vec4 size, load the first element of the next attribute.
- element = (element + 1) % 4;
- index = index + (element == 0 ? 1 : 0);
- }
- break;
- }
- case OpCode::Id::ST_L:
- LOG_DEBUG(HW_GPU, "ST_L cache management mode: {}", instr.st_l.cache_management.Value());
- [[fallthrough]];
- case OpCode::Id::ST_S: {
- const auto GetAddress = [&](s32 offset) {
- ASSERT(offset % 4 == 0);
- const Node immediate = Immediate(static_cast<s32>(instr.smem_imm) + offset);
- return Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), immediate);
- };
-
- const bool is_local = opcode->get().GetId() == OpCode::Id::ST_L;
- const auto set_memory = is_local ? &ShaderIR::SetLocalMemory : &ShaderIR::SetSharedMemory;
- const auto get_memory = is_local ? &ShaderIR::GetLocalMemory : &ShaderIR::GetSharedMemory;
-
- switch (instr.ldst_sl.type.Value()) {
- case StoreType::Bits128:
- (this->*set_memory)(bb, GetAddress(12), GetRegister(instr.gpr0.Value() + 3));
- (this->*set_memory)(bb, GetAddress(8), GetRegister(instr.gpr0.Value() + 2));
- [[fallthrough]];
- case StoreType::Bits64:
- (this->*set_memory)(bb, GetAddress(4), GetRegister(instr.gpr0.Value() + 1));
- [[fallthrough]];
- case StoreType::Bits32:
- (this->*set_memory)(bb, GetAddress(0), GetRegister(instr.gpr0));
- break;
- case StoreType::Unsigned16:
- case StoreType::Signed16: {
- Node address = GetAddress(0);
- Node memory = (this->*get_memory)(address);
- (this->*set_memory)(
- bb, address, InsertUnaligned(memory, GetRegister(instr.gpr0), address, 0b10, 16));
- break;
- }
- default:
- UNIMPLEMENTED_MSG("{} unhandled type: {}", opcode->get().GetName(),
- instr.ldst_sl.type.Value());
- }
- break;
- }
- case OpCode::Id::ST:
- case OpCode::Id::STG: {
- const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType {
- switch (opcode->get().GetId()) {
- case OpCode::Id::ST:
- UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended ST is not implemented");
- return instr.generic.type;
- case OpCode::Id::STG:
- return instr.stg.type;
- default:
- UNREACHABLE();
- return {};
- }
- }();
-
- // For unaligned reads we have to read memory too.
- const bool is_read = IsUnaligned(type);
- const auto [real_address_base, base_address, descriptor] =
- TrackGlobalMemory(bb, instr, is_read, true);
- if (!real_address_base || !base_address) {
- // Tracking failed, skip the store.
- break;
- }
-
- const u32 size = GetMemorySize(type);
- const u32 count = Common::AlignUp(size, 32) / 32;
- for (u32 i = 0; i < count; ++i) {
- const Node it_offset = Immediate(i * 4);
- const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
- const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
- Node value = GetRegister(instr.gpr0.Value() + i);
-
- if (IsUnaligned(type)) {
- const u32 mask = GetUnalignedMask(type);
- value = InsertUnaligned(gmem, move(value), real_address, mask, size);
- }
-
- bb.push_back(Operation(OperationCode::Assign, gmem, value));
- }
- break;
- }
- case OpCode::Id::RED: {
- UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32, "type={}",
- instr.red.type.Value());
- const auto [real_address, base_address, descriptor] =
- TrackGlobalMemory(bb, instr, true, true);
- if (!real_address || !base_address) {
- // Tracking failed, skip atomic.
- break;
- }
- Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
- Node value = GetRegister(instr.gpr0);
- bb.push_back(Operation(GetAtomOperation(instr.red.operation), move(gmem), move(value)));
- break;
- }
- case OpCode::Id::ATOM: {
- UNIMPLEMENTED_IF_MSG(instr.atom.operation == AtomicOp::Inc ||
- instr.atom.operation == AtomicOp::Dec ||
- instr.atom.operation == AtomicOp::SafeAdd,
- "operation={}", instr.atom.operation.Value());
- UNIMPLEMENTED_IF_MSG(instr.atom.type == GlobalAtomicType::S64 ||
- instr.atom.type == GlobalAtomicType::U64 ||
- instr.atom.type == GlobalAtomicType::F16x2_FTZ_RN ||
- instr.atom.type == GlobalAtomicType::F32_FTZ_RN,
- "type={}", instr.atom.type.Value());
-
- const auto [real_address, base_address, descriptor] =
- TrackGlobalMemory(bb, instr, true, true);
- if (!real_address || !base_address) {
- // Tracking failed, skip atomic.
- break;
- }
-
- const bool is_signed =
- instr.atom.type == GlobalAtomicType::S32 || instr.atom.type == GlobalAtomicType::S64;
- Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
- SetRegister(bb, instr.gpr0,
- SignedOperation(GetAtomOperation(instr.atom.operation), is_signed, gmem,
- GetRegister(instr.gpr20)));
- break;
- }
- case OpCode::Id::ATOMS: {
- UNIMPLEMENTED_IF_MSG(instr.atoms.operation == AtomicOp::Inc ||
- instr.atoms.operation == AtomicOp::Dec,
- "operation={}", instr.atoms.operation.Value());
- UNIMPLEMENTED_IF_MSG(instr.atoms.type == AtomicType::S64 ||
- instr.atoms.type == AtomicType::U64,
- "type={}", instr.atoms.type.Value());
- const bool is_signed =
- instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64;
- const s32 offset = instr.atoms.GetImmediateOffset();
- Node address = GetRegister(instr.gpr8);
- address = Operation(OperationCode::IAdd, move(address), Immediate(offset));
- SetRegister(bb, instr.gpr0,
- SignedOperation(GetAtomOperation(instr.atoms.operation), is_signed,
- GetSharedMemory(move(address)), GetRegister(instr.gpr20)));
- break;
- }
- case OpCode::Id::AL2P: {
- // Ignore al2p.direction since we don't care about it.
-
- // Calculate emulation fake physical address.
- const Node fixed_address{Immediate(static_cast<u32>(instr.al2p.address))};
- const Node reg{GetRegister(instr.gpr8)};
- const Node fake_address{Operation(OperationCode::IAdd, NO_PRECISE, reg, fixed_address)};
-
- // Set the fake address to target register.
- SetRegister(bb, instr.gpr0, fake_address);
-
- // Signal the shader IR to declare all possible attributes and varyings
- uses_physical_attributes = true;
- break;
- }
- default:
- UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
- }
-
- return pc;
-}
-
-std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock& bb,
- Instruction instr,
- bool is_read, bool is_write) {
- const auto addr_register{GetRegister(instr.gmem.gpr)};
- const auto immediate_offset{static_cast<u32>(instr.gmem.offset)};
-
- const auto [base_address, index, offset] =
- TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()));
- ASSERT_OR_EXECUTE_MSG(
- base_address != nullptr, { return std::make_tuple(nullptr, nullptr, GlobalMemoryBase{}); },
- "Global memory tracking failed");
-
- bb.push_back(Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", index, offset)));
-
- const GlobalMemoryBase descriptor{index, offset};
- const auto& entry = used_global_memory.try_emplace(descriptor).first;
- auto& usage = entry->second;
- usage.is_written |= is_write;
- usage.is_read |= is_read;
-
- const auto real_address =
- Operation(OperationCode::UAdd, NO_PRECISE, Immediate(immediate_offset), addr_register);
-
- return {real_address, base_address, descriptor};
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
deleted file mode 100644
index 5f88537bc..000000000
--- a/src/video_core/shader/decode/other.cpp
+++ /dev/null
@@ -1,322 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "common/logging/log.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using std::move;
-using Tegra::Shader::ConditionCode;
-using Tegra::Shader::Instruction;
-using Tegra::Shader::IpaInterpMode;
-using Tegra::Shader::OpCode;
-using Tegra::Shader::PixelImap;
-using Tegra::Shader::Register;
-using Tegra::Shader::SystemVariable;
-
-using Index = Tegra::Shader::Attribute::Index;
-
-u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
- const Instruction instr = {program_code[pc]};
- const auto opcode = OpCode::Decode(instr);
-
- switch (opcode->get().GetId()) {
- case OpCode::Id::NOP: {
- UNIMPLEMENTED_IF(instr.nop.cc != Tegra::Shader::ConditionCode::T);
- UNIMPLEMENTED_IF(instr.nop.trigger != 0);
- // With the previous preconditions, this instruction is a no-operation.
- break;
- }
- case OpCode::Id::EXIT: {
- const ConditionCode cc = instr.flow_condition_code;
- UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "EXIT condition code used: {}", cc);
-
- switch (instr.flow.cond) {
- case Tegra::Shader::FlowCondition::Always:
- bb.push_back(Operation(OperationCode::Exit));
- if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) {
- // If this is an unconditional exit then just end processing here,
- // otherwise we have to account for the possibility of the condition
- // not being met, so continue processing the next instruction.
- pc = MAX_PROGRAM_LENGTH - 1;
- }
- break;
-
- case Tegra::Shader::FlowCondition::Fcsm_Tr:
- // TODO(bunnei): What is this used for? If we assume this conditon is not
- // satisifed, dual vertex shaders in Farming Simulator make more sense
- UNIMPLEMENTED_MSG("Skipping unknown FlowCondition::Fcsm_Tr");
- break;
-
- default:
- UNIMPLEMENTED_MSG("Unhandled flow condition: {}", instr.flow.cond.Value());
- }
- break;
- }
- case OpCode::Id::KIL: {
- UNIMPLEMENTED_IF(instr.flow.cond != Tegra::Shader::FlowCondition::Always);
-
- const ConditionCode cc = instr.flow_condition_code;
- UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "KIL condition code used: {}", cc);
-
- bb.push_back(Operation(OperationCode::Discard));
- break;
- }
- case OpCode::Id::S2R: {
- const Node value = [this, instr] {
- switch (instr.sys20) {
- case SystemVariable::LaneId:
- return Operation(OperationCode::ThreadId);
- case SystemVariable::InvocationId:
- return Operation(OperationCode::InvocationId);
- case SystemVariable::Ydirection:
- uses_y_negate = true;
- return Operation(OperationCode::YNegate);
- case SystemVariable::InvocationInfo:
- LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete");
- return Immediate(0x00ff'0000U);
- case SystemVariable::WscaleFactorXY:
- UNIMPLEMENTED_MSG("S2R WscaleFactorXY is not implemented");
- return Immediate(0U);
- case SystemVariable::WscaleFactorZ:
- UNIMPLEMENTED_MSG("S2R WscaleFactorZ is not implemented");
- return Immediate(0U);
- case SystemVariable::Tid: {
- Node val = Immediate(0);
- val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdX), 0, 9);
- val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdY), 16, 9);
- val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdZ), 26, 5);
- return val;
- }
- case SystemVariable::TidX:
- return Operation(OperationCode::LocalInvocationIdX);
- case SystemVariable::TidY:
- return Operation(OperationCode::LocalInvocationIdY);
- case SystemVariable::TidZ:
- return Operation(OperationCode::LocalInvocationIdZ);
- case SystemVariable::CtaIdX:
- return Operation(OperationCode::WorkGroupIdX);
- case SystemVariable::CtaIdY:
- return Operation(OperationCode::WorkGroupIdY);
- case SystemVariable::CtaIdZ:
- return Operation(OperationCode::WorkGroupIdZ);
- case SystemVariable::EqMask:
- case SystemVariable::LtMask:
- case SystemVariable::LeMask:
- case SystemVariable::GtMask:
- case SystemVariable::GeMask:
- uses_warps = true;
- switch (instr.sys20) {
- case SystemVariable::EqMask:
- return Operation(OperationCode::ThreadEqMask);
- case SystemVariable::LtMask:
- return Operation(OperationCode::ThreadLtMask);
- case SystemVariable::LeMask:
- return Operation(OperationCode::ThreadLeMask);
- case SystemVariable::GtMask:
- return Operation(OperationCode::ThreadGtMask);
- case SystemVariable::GeMask:
- return Operation(OperationCode::ThreadGeMask);
- default:
- UNREACHABLE();
- return Immediate(0u);
- }
- default:
- UNIMPLEMENTED_MSG("Unhandled system move: {}", instr.sys20.Value());
- return Immediate(0u);
- }
- }();
- SetRegister(bb, instr.gpr0, value);
-
- break;
- }
- case OpCode::Id::BRA: {
- Node branch;
- if (instr.bra.constant_buffer == 0) {
- const u32 target = pc + instr.bra.GetBranchTarget();
- branch = Operation(OperationCode::Branch, Immediate(target));
- } else {
- const u32 target = pc + 1;
- const Node op_a = GetConstBuffer(instr.cbuf36.index, instr.cbuf36.GetOffset());
- const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true,
- PRECISE, op_a, Immediate(3));
- const Node operand =
- Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
- branch = Operation(OperationCode::BranchIndirect, operand);
- }
-
- const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
- if (cc != Tegra::Shader::ConditionCode::T) {
- bb.push_back(Conditional(GetConditionCode(cc), {branch}));
- } else {
- bb.push_back(branch);
- }
- break;
- }
- case OpCode::Id::BRX: {
- Node operand;
- if (instr.brx.constant_buffer != 0) {
- const s32 target = pc + 1;
- const Node index = GetRegister(instr.gpr8);
- const Node op_a =
- GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index);
- const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true,
- PRECISE, op_a, Immediate(3));
- operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
- } else {
- const s32 target = pc + instr.brx.GetBranchExtend();
- const Node op_a = GetRegister(instr.gpr8);
- const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true,
- PRECISE, op_a, Immediate(3));
- operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
- }
- const Node branch = Operation(OperationCode::BranchIndirect, operand);
-
- const ConditionCode cc = instr.flow_condition_code;
- if (cc != ConditionCode::T) {
- bb.push_back(Conditional(GetConditionCode(cc), {branch}));
- } else {
- bb.push_back(branch);
- }
- break;
- }
- case OpCode::Id::SSY: {
- UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
- "Constant buffer flow is not supported");
-
- if (disable_flow_stack) {
- break;
- }
-
- // The SSY opcode tells the GPU where to re-converge divergent execution paths with SYNC.
- const u32 target = pc + instr.bra.GetBranchTarget();
- bb.push_back(
- Operation(OperationCode::PushFlowStack, MetaStackClass::Ssy, Immediate(target)));
- break;
- }
- case OpCode::Id::PBK: {
- UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
- "Constant buffer PBK is not supported");
-
- if (disable_flow_stack) {
- break;
- }
-
- // PBK pushes to a stack the address where BRK will jump to.
- const u32 target = pc + instr.bra.GetBranchTarget();
- bb.push_back(
- Operation(OperationCode::PushFlowStack, MetaStackClass::Pbk, Immediate(target)));
- break;
- }
- case OpCode::Id::SYNC: {
- const ConditionCode cc = instr.flow_condition_code;
- UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "SYNC condition code used: {}", cc);
-
- if (decompiled) {
- break;
- }
-
- // The SYNC opcode jumps to the address previously set by the SSY opcode
- bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Ssy));
- break;
- }
- case OpCode::Id::BRK: {
- const ConditionCode cc = instr.flow_condition_code;
- UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "BRK condition code used: {}", cc);
- if (decompiled) {
- break;
- }
-
- // The BRK opcode jumps to the address previously set by the PBK opcode
- bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Pbk));
- break;
- }
- case OpCode::Id::IPA: {
- const bool is_physical = instr.ipa.idx && instr.gpr8.Value() != 0xff;
- const auto attribute = instr.attribute.fmt28;
- const Index index = attribute.index;
-
- Node value = is_physical ? GetPhysicalInputAttribute(instr.gpr8)
- : GetInputAttribute(index, attribute.element);
-
- // Code taken from Ryujinx.
- if (index >= Index::Attribute_0 && index <= Index::Attribute_31) {
- const u32 location = static_cast<u32>(index) - static_cast<u32>(Index::Attribute_0);
- if (header.ps.GetPixelImap(location) == PixelImap::Perspective) {
- Node position_w = GetInputAttribute(Index::Position, 3);
- value = Operation(OperationCode::FMul, move(value), move(position_w));
- }
- }
-
- if (instr.ipa.interp_mode == IpaInterpMode::Multiply) {
- value = Operation(OperationCode::FMul, move(value), GetRegister(instr.gpr20));
- }
-
- value = GetSaturatedFloat(move(value), instr.ipa.saturate);
-
- SetRegister(bb, instr.gpr0, move(value));
- break;
- }
- case OpCode::Id::OUT_R: {
- UNIMPLEMENTED_IF_MSG(instr.gpr20.Value() != Register::ZeroIndex,
- "Stream buffer is not supported");
-
- if (instr.out.emit) {
- // gpr0 is used to store the next address and gpr8 contains the address to emit.
- // Hardware uses pointers here but we just ignore it
- bb.push_back(Operation(OperationCode::EmitVertex));
- SetRegister(bb, instr.gpr0, Immediate(0));
- }
- if (instr.out.cut) {
- bb.push_back(Operation(OperationCode::EndPrimitive));
- }
- break;
- }
- case OpCode::Id::ISBERD: {
- UNIMPLEMENTED_IF(instr.isberd.o != 0);
- UNIMPLEMENTED_IF(instr.isberd.skew != 0);
- UNIMPLEMENTED_IF(instr.isberd.shift != Tegra::Shader::IsberdShift::None);
- UNIMPLEMENTED_IF(instr.isberd.mode != Tegra::Shader::IsberdMode::None);
- LOG_WARNING(HW_GPU, "ISBERD instruction is incomplete");
- SetRegister(bb, instr.gpr0, GetRegister(instr.gpr8));
- break;
- }
- case OpCode::Id::BAR: {
- UNIMPLEMENTED_IF_MSG(instr.value != 0xF0A81B8000070000ULL, "BAR is not BAR.SYNC 0x0");
- bb.push_back(Operation(OperationCode::Barrier));
- break;
- }
- case OpCode::Id::MEMBAR: {
- UNIMPLEMENTED_IF(instr.membar.unknown != Tegra::Shader::MembarUnknown::Default);
- const OperationCode type = [instr] {
- switch (instr.membar.type) {
- case Tegra::Shader::MembarType::CTA:
- return OperationCode::MemoryBarrierGroup;
- case Tegra::Shader::MembarType::GL:
- return OperationCode::MemoryBarrierGlobal;
- default:
- UNIMPLEMENTED_MSG("MEMBAR type={}", instr.membar.type.Value());
- return OperationCode::MemoryBarrierGlobal;
- }
- }();
- bb.push_back(Operation(type));
- break;
- }
- case OpCode::Id::DEPBAR: {
- LOG_DEBUG(HW_GPU, "DEPBAR instruction is stubbed");
- break;
- }
- default:
- UNIMPLEMENTED_MSG("Unhandled instruction: {}", opcode->get().GetName());
- }
-
- return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/predicate_set_predicate.cpp b/src/video_core/shader/decode/predicate_set_predicate.cpp
deleted file mode 100644
index 9290d22eb..000000000
--- a/src/video_core/shader/decode/predicate_set_predicate.cpp
+++ /dev/null
@@ -1,68 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-using Tegra::Shader::Pred;
-
-u32 ShaderIR::DecodePredicateSetPredicate(NodeBlock& bb, u32 pc) {
- const Instruction instr = {program_code[pc]};
- const auto opcode = OpCode::Decode(instr);
-
- switch (opcode->get().GetId()) {
- case OpCode::Id::PSETP: {
- const Node op_a = GetPredicate(instr.psetp.pred12, instr.psetp.neg_pred12 != 0);
- const Node op_b = GetPredicate(instr.psetp.pred29, instr.psetp.neg_pred29 != 0);
-
- // We can't use the constant predicate as destination.
- ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
-
- const Node second_pred = GetPredicate(instr.psetp.pred39, instr.psetp.neg_pred39 != 0);
-
- const OperationCode combiner = GetPredicateCombiner(instr.psetp.op);
- const Node predicate = Operation(combiner, op_a, op_b);
-
- // Set the primary predicate to the result of Predicate OP SecondPredicate
- SetPredicate(bb, instr.psetp.pred3, Operation(combiner, predicate, second_pred));
-
- if (instr.psetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
- // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if
- // enabled
- SetPredicate(bb, instr.psetp.pred0,
- Operation(combiner, Operation(OperationCode::LogicalNegate, predicate),
- second_pred));
- }
- break;
- }
- case OpCode::Id::CSETP: {
- const Node pred = GetPredicate(instr.csetp.pred39, instr.csetp.neg_pred39 != 0);
- const Node condition_code = GetConditionCode(instr.csetp.cc);
-
- const OperationCode combiner = GetPredicateCombiner(instr.csetp.op);
-
- if (instr.csetp.pred3 != static_cast<u64>(Pred::UnusedIndex)) {
- SetPredicate(bb, instr.csetp.pred3, Operation(combiner, condition_code, pred));
- }
- if (instr.csetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
- const Node neg_cc = Operation(OperationCode::LogicalNegate, condition_code);
- SetPredicate(bb, instr.csetp.pred0, Operation(combiner, neg_cc, pred));
- }
- break;
- }
- default:
- UNIMPLEMENTED_MSG("Unhandled predicate instruction: {}", opcode->get().GetName());
- }
-
- return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/predicate_set_register.cpp b/src/video_core/shader/decode/predicate_set_register.cpp
deleted file mode 100644
index 84dbc50fe..000000000
--- a/src/video_core/shader/decode/predicate_set_register.cpp
+++ /dev/null
@@ -1,46 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-
-u32 ShaderIR::DecodePredicateSetRegister(NodeBlock& bb, u32 pc) {
- const Instruction instr = {program_code[pc]};
-
- UNIMPLEMENTED_IF_MSG(instr.generates_cc,
- "Condition codes generation in PSET is not implemented");
-
- const Node op_a = GetPredicate(instr.pset.pred12, instr.pset.neg_pred12 != 0);
- const Node op_b = GetPredicate(instr.pset.pred29, instr.pset.neg_pred29 != 0);
- const Node first_pred = Operation(GetPredicateCombiner(instr.pset.cond), op_a, op_b);
-
- const Node second_pred = GetPredicate(instr.pset.pred39, instr.pset.neg_pred39 != 0);
-
- const OperationCode combiner = GetPredicateCombiner(instr.pset.op);
- const Node predicate = Operation(combiner, first_pred, second_pred);
-
- const Node true_value = instr.pset.bf ? Immediate(1.0f) : Immediate(0xffffffff);
- const Node false_value = instr.pset.bf ? Immediate(0.0f) : Immediate(0);
- const Node value =
- Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value);
-
- if (instr.pset.bf) {
- SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
- } else {
- SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
- }
- SetRegister(bb, instr.gpr0, value);
-
- return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/register_set_predicate.cpp b/src/video_core/shader/decode/register_set_predicate.cpp
deleted file mode 100644
index 6116c31aa..000000000
--- a/src/video_core/shader/decode/register_set_predicate.cpp
+++ /dev/null
@@ -1,86 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <utility>
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using std::move;
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-
-namespace {
-constexpr u64 NUM_CONDITION_CODES = 4;
-constexpr u64 NUM_PREDICATES = 7;
-} // namespace
-
-u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) {
- const Instruction instr = {program_code[pc]};
- const auto opcode = OpCode::Decode(instr);
-
- Node apply_mask = [this, opcode, instr] {
- switch (opcode->get().GetId()) {
- case OpCode::Id::R2P_IMM:
- case OpCode::Id::P2R_IMM:
- return Immediate(static_cast<u32>(instr.p2r_r2p.immediate_mask));
- default:
- UNREACHABLE();
- return Immediate(0);
- }
- }();
-
- const u32 offset = static_cast<u32>(instr.p2r_r2p.byte) * 8;
-
- const bool cc = instr.p2r_r2p.mode == Tegra::Shader::R2pMode::Cc;
- const u64 num_entries = cc ? NUM_CONDITION_CODES : NUM_PREDICATES;
- const auto get_entry = [this, cc](u64 entry) {
- return cc ? GetInternalFlag(static_cast<InternalFlag>(entry)) : GetPredicate(entry);
- };
-
- switch (opcode->get().GetId()) {
- case OpCode::Id::R2P_IMM: {
- Node mask = GetRegister(instr.gpr8);
-
- for (u64 entry = 0; entry < num_entries; ++entry) {
- const u32 shift = static_cast<u32>(entry);
-
- Node apply = BitfieldExtract(apply_mask, shift, 1);
- Node condition = Operation(OperationCode::LogicalUNotEqual, apply, Immediate(0));
-
- Node compare = BitfieldExtract(mask, offset + shift, 1);
- Node value = Operation(OperationCode::LogicalUNotEqual, move(compare), Immediate(0));
-
- Node code = Operation(OperationCode::LogicalAssign, get_entry(entry), move(value));
- bb.push_back(Conditional(condition, {move(code)}));
- }
- break;
- }
- case OpCode::Id::P2R_IMM: {
- Node value = Immediate(0);
- for (u64 entry = 0; entry < num_entries; ++entry) {
- Node bit = Operation(OperationCode::Select, get_entry(entry), Immediate(1U << entry),
- Immediate(0));
- value = Operation(OperationCode::UBitwiseOr, move(value), move(bit));
- }
- value = Operation(OperationCode::UBitwiseAnd, move(value), apply_mask);
- value = BitfieldInsert(GetRegister(instr.gpr8), move(value), offset, 8);
-
- SetRegister(bb, instr.gpr0, move(value));
- break;
- }
- default:
- UNIMPLEMENTED_MSG("Unhandled P2R/R2R instruction: {}", opcode->get().GetName());
- break;
- }
-
- return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp
deleted file mode 100644
index a53819c15..000000000
--- a/src/video_core/shader/decode/shift.cpp
+++ /dev/null
@@ -1,153 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using std::move;
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-using Tegra::Shader::ShfType;
-using Tegra::Shader::ShfXmode;
-
-namespace {
-
-Node IsFull(Node shift) {
- return Operation(OperationCode::LogicalIEqual, move(shift), Immediate(32));
-}
-
-Node Shift(OperationCode opcode, Node value, Node shift) {
- Node shifted = Operation(opcode, move(value), shift);
- return Operation(OperationCode::Select, IsFull(move(shift)), Immediate(0), move(shifted));
-}
-
-Node ClampShift(Node shift, s32 size = 32) {
- shift = Operation(OperationCode::IMax, move(shift), Immediate(0));
- return Operation(OperationCode::IMin, move(shift), Immediate(size));
-}
-
-Node WrapShift(Node shift, s32 size = 32) {
- return Operation(OperationCode::UBitwiseAnd, move(shift), Immediate(size - 1));
-}
-
-Node ShiftRight(Node low, Node high, Node shift, Node low_shift, ShfType type) {
- // These values are used when the shift value is less than 32
- Node less_low = Shift(OperationCode::ILogicalShiftRight, low, shift);
- Node less_high = Shift(OperationCode::ILogicalShiftLeft, high, low_shift);
- Node less = Operation(OperationCode::IBitwiseOr, move(less_high), move(less_low));
-
- if (type == ShfType::Bits32) {
- // On 32 bit shifts we are either full (shifting 32) or shifting less than 32 bits
- return Operation(OperationCode::Select, IsFull(move(shift)), move(high), move(less));
- }
-
- // And these when it's larger than or 32
- const bool is_signed = type == ShfType::S64;
- const auto opcode = SignedToUnsignedCode(OperationCode::IArithmeticShiftRight, is_signed);
- Node reduced = Operation(OperationCode::IAdd, shift, Immediate(-32));
- Node greater = Shift(opcode, high, move(reduced));
-
- Node is_less = Operation(OperationCode::LogicalILessThan, shift, Immediate(32));
- Node is_zero = Operation(OperationCode::LogicalIEqual, move(shift), Immediate(0));
-
- Node value = Operation(OperationCode::Select, move(is_less), move(less), move(greater));
- return Operation(OperationCode::Select, move(is_zero), move(high), move(value));
-}
-
-Node ShiftLeft(Node low, Node high, Node shift, Node low_shift, ShfType type) {
- // These values are used when the shift value is less than 32
- Node less_low = Operation(OperationCode::ILogicalShiftRight, low, low_shift);
- Node less_high = Operation(OperationCode::ILogicalShiftLeft, high, shift);
- Node less = Operation(OperationCode::IBitwiseOr, move(less_low), move(less_high));
-
- if (type == ShfType::Bits32) {
- // On 32 bit shifts we are either full (shifting 32) or shifting less than 32 bits
- return Operation(OperationCode::Select, IsFull(move(shift)), move(low), move(less));
- }
-
- // And these when it's larger than or 32
- Node reduced = Operation(OperationCode::IAdd, shift, Immediate(-32));
- Node greater = Shift(OperationCode::ILogicalShiftLeft, move(low), move(reduced));
-
- Node is_less = Operation(OperationCode::LogicalILessThan, shift, Immediate(32));
- Node is_zero = Operation(OperationCode::LogicalIEqual, move(shift), Immediate(0));
-
- Node value = Operation(OperationCode::Select, move(is_less), move(less), move(greater));
- return Operation(OperationCode::Select, move(is_zero), move(high), move(value));
-}
-
-} // Anonymous namespace
-
-u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) {
- const Instruction instr = {program_code[pc]};
- const auto opcode = OpCode::Decode(instr);
-
- Node op_a = GetRegister(instr.gpr8);
- Node op_b = [this, instr] {
- if (instr.is_b_imm) {
- return Immediate(instr.alu.GetSignedImm20_20());
- } else if (instr.is_b_gpr) {
- return GetRegister(instr.gpr20);
- } else {
- return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
- }
- }();
-
- switch (const auto opid = opcode->get().GetId(); opid) {
- case OpCode::Id::SHR_C:
- case OpCode::Id::SHR_R:
- case OpCode::Id::SHR_IMM: {
- op_b = instr.shr.wrap ? WrapShift(move(op_b)) : ClampShift(move(op_b));
-
- Node value = SignedOperation(OperationCode::IArithmeticShiftRight, instr.shift.is_signed,
- move(op_a), move(op_b));
- SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
- SetRegister(bb, instr.gpr0, move(value));
- break;
- }
- case OpCode::Id::SHL_C:
- case OpCode::Id::SHL_R:
- case OpCode::Id::SHL_IMM: {
- Node value = Operation(OperationCode::ILogicalShiftLeft, op_a, op_b);
- SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
- SetRegister(bb, instr.gpr0, move(value));
- break;
- }
- case OpCode::Id::SHF_RIGHT_R:
- case OpCode::Id::SHF_RIGHT_IMM:
- case OpCode::Id::SHF_LEFT_R:
- case OpCode::Id::SHF_LEFT_IMM: {
- UNIMPLEMENTED_IF(instr.generates_cc);
- UNIMPLEMENTED_IF_MSG(instr.shf.xmode != ShfXmode::None, "xmode={}",
- instr.shf.xmode.Value());
-
- if (instr.is_b_imm) {
- op_b = Immediate(static_cast<u32>(instr.shf.immediate));
- }
- const s32 size = instr.shf.type == ShfType::Bits32 ? 32 : 64;
- Node shift = instr.shf.wrap ? WrapShift(move(op_b), size) : ClampShift(move(op_b), size);
-
- Node negated_shift = Operation(OperationCode::INegate, shift);
- Node low_shift = Operation(OperationCode::IAdd, move(negated_shift), Immediate(32));
-
- const bool is_right = opid == OpCode::Id::SHF_RIGHT_R || opid == OpCode::Id::SHF_RIGHT_IMM;
- Node value = (is_right ? ShiftRight : ShiftLeft)(
- move(op_a), GetRegister(instr.gpr39), move(shift), move(low_shift), instr.shf.type);
-
- SetRegister(bb, instr.gpr0, move(value));
- break;
- }
- default:
- UNIMPLEMENTED_MSG("Unhandled shift instruction: {}", opcode->get().GetName());
- }
-
- return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
deleted file mode 100644
index c69681e8d..000000000
--- a/src/video_core/shader/decode/texture.cpp
+++ /dev/null
@@ -1,935 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <vector>
-#include <fmt/format.h>
-
-#include "common/assert.h"
-#include "common/bit_field.h"
-#include "common/common_types.h"
-#include "common/logging/log.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/registry.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-using Tegra::Shader::Register;
-using Tegra::Shader::TextureMiscMode;
-using Tegra::Shader::TextureProcessMode;
-using Tegra::Shader::TextureType;
-
-static std::size_t GetCoordCount(TextureType texture_type) {
- switch (texture_type) {
- case TextureType::Texture1D:
- return 1;
- case TextureType::Texture2D:
- return 2;
- case TextureType::Texture3D:
- case TextureType::TextureCube:
- return 3;
- default:
- UNIMPLEMENTED_MSG("Unhandled texture type: {}", texture_type);
- return 0;
- }
-}
-
-u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
- const Instruction instr = {program_code[pc]};
- const auto opcode = OpCode::Decode(instr);
- bool is_bindless = false;
- switch (opcode->get().GetId()) {
- case OpCode::Id::TEX: {
- const TextureType texture_type{instr.tex.texture_type};
- const bool is_array = instr.tex.array != 0;
- const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI);
- const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC);
- const auto process_mode = instr.tex.GetTextureProcessMode();
- WriteTexInstructionFloat(
- bb, instr,
- GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, is_aoffi, {}));
- break;
- }
- case OpCode::Id::TEX_B: {
- UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI),
- "AOFFI is not implemented");
-
- const TextureType texture_type{instr.tex_b.texture_type};
- const bool is_array = instr.tex_b.array != 0;
- const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI);
- const bool depth_compare = instr.tex_b.UsesMiscMode(TextureMiscMode::DC);
- const auto process_mode = instr.tex_b.GetTextureProcessMode();
- WriteTexInstructionFloat(bb, instr,
- GetTexCode(instr, texture_type, process_mode, depth_compare,
- is_array, is_aoffi, {instr.gpr20}));
- break;
- }
- case OpCode::Id::TEXS: {
- const TextureType texture_type{instr.texs.GetTextureType()};
- const bool is_array{instr.texs.IsArrayTexture()};
- const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC);
- const auto process_mode = instr.texs.GetTextureProcessMode();
-
- const Node4 components =
- GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array);
-
- if (instr.texs.fp32_flag) {
- WriteTexsInstructionFloat(bb, instr, components);
- } else {
- WriteTexsInstructionHalfFloat(bb, instr, components);
- }
- break;
- }
- case OpCode::Id::TLD4_B: {
- is_bindless = true;
- [[fallthrough]];
- }
- case OpCode::Id::TLD4: {
- UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV),
- "NDV is not implemented");
- const auto texture_type = instr.tld4.texture_type.Value();
- const bool depth_compare = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::DC)
- : instr.tld4.UsesMiscMode(TextureMiscMode::DC);
- const bool is_array = instr.tld4.array != 0;
- const bool is_aoffi = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::AOFFI)
- : instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI);
- const bool is_ptp = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::PTP)
- : instr.tld4.UsesMiscMode(TextureMiscMode::PTP);
- WriteTexInstructionFloat(bb, instr,
- GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi,
- is_ptp, is_bindless));
- break;
- }
- case OpCode::Id::TLD4S: {
- constexpr std::size_t num_coords = 2;
- const bool is_aoffi = instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI);
- const bool is_depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC);
- const Node op_a = GetRegister(instr.gpr8);
- const Node op_b = GetRegister(instr.gpr20);
-
- // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
- std::vector<Node> coords;
- std::vector<Node> aoffi;
- Node depth_compare;
- if (is_depth_compare) {
- // Note: TLD4S coordinate encoding works just like TEXS's
- const Node op_y = GetRegister(instr.gpr8.Value() + 1);
- coords.push_back(op_a);
- coords.push_back(op_y);
- if (is_aoffi) {
- aoffi = GetAoffiCoordinates(op_b, num_coords, true);
- depth_compare = GetRegister(instr.gpr20.Value() + 1);
- } else {
- depth_compare = op_b;
- }
- } else {
- // There's no depth compare
- coords.push_back(op_a);
- if (is_aoffi) {
- coords.push_back(GetRegister(instr.gpr8.Value() + 1));
- aoffi = GetAoffiCoordinates(op_b, num_coords, true);
- } else {
- coords.push_back(op_b);
- }
- }
- const Node component = Immediate(static_cast<u32>(instr.tld4s.component));
-
- SamplerInfo info;
- info.is_shadow = is_depth_compare;
- const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, info);
-
- Node4 values;
- for (u32 element = 0; element < values.size(); ++element) {
- MetaTexture meta{*sampler, {}, depth_compare, aoffi, {}, {},
- {}, {}, component, element, {}};
- values[element] = Operation(OperationCode::TextureGather, meta, coords);
- }
-
- if (instr.tld4s.fp16_flag) {
- WriteTexsInstructionHalfFloat(bb, instr, values, true);
- } else {
- WriteTexsInstructionFloat(bb, instr, values, true);
- }
- break;
- }
- case OpCode::Id::TXD_B:
- is_bindless = true;
- [[fallthrough]];
- case OpCode::Id::TXD: {
- UNIMPLEMENTED_IF_MSG(instr.txd.UsesMiscMode(TextureMiscMode::AOFFI),
- "AOFFI is not implemented");
-
- const bool is_array = instr.txd.is_array != 0;
- const auto derivate_reg = instr.gpr20.Value();
- const auto texture_type = instr.txd.texture_type.Value();
- const auto coord_count = GetCoordCount(texture_type);
- u64 base_reg = instr.gpr8.Value();
- Node index_var;
- SamplerInfo info;
- info.type = texture_type;
- info.is_array = is_array;
- const std::optional<SamplerEntry> sampler =
- is_bindless ? GetBindlessSampler(base_reg, info, index_var)
- : GetSampler(instr.sampler, info);
- Node4 values;
- if (!sampler) {
- std::generate(values.begin(), values.end(), [this] { return Immediate(0); });
- WriteTexInstructionFloat(bb, instr, values);
- break;
- }
-
- if (is_bindless) {
- base_reg++;
- }
-
- std::vector<Node> coords;
- std::vector<Node> derivates;
- for (std::size_t i = 0; i < coord_count; ++i) {
- coords.push_back(GetRegister(base_reg + i));
- const std::size_t derivate = i * 2;
- derivates.push_back(GetRegister(derivate_reg + derivate));
- derivates.push_back(GetRegister(derivate_reg + derivate + 1));
- }
-
- Node array_node = {};
- if (is_array) {
- const Node info_reg = GetRegister(base_reg + coord_count);
- array_node = BitfieldExtract(info_reg, 0, 16);
- }
-
- for (u32 element = 0; element < values.size(); ++element) {
- MetaTexture meta{*sampler, array_node, {}, {}, {}, derivates,
- {}, {}, {}, element, index_var};
- values[element] = Operation(OperationCode::TextureGradient, std::move(meta), coords);
- }
-
- WriteTexInstructionFloat(bb, instr, values);
-
- break;
- }
- case OpCode::Id::TXQ_B:
- is_bindless = true;
- [[fallthrough]];
- case OpCode::Id::TXQ: {
- Node index_var;
- const std::optional<SamplerEntry> sampler =
- is_bindless ? GetBindlessSampler(instr.gpr8, {}, index_var)
- : GetSampler(instr.sampler, {});
-
- if (!sampler) {
- u32 indexer = 0;
- for (u32 element = 0; element < 4; ++element) {
- if (!instr.txq.IsComponentEnabled(element)) {
- continue;
- }
- const Node value = Immediate(0);
- SetTemporary(bb, indexer++, value);
- }
- for (u32 i = 0; i < indexer; ++i) {
- SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
- }
- break;
- }
-
- u32 indexer = 0;
- switch (instr.txq.query_type) {
- case Tegra::Shader::TextureQueryType::Dimension: {
- for (u32 element = 0; element < 4; ++element) {
- if (!instr.txq.IsComponentEnabled(element)) {
- continue;
- }
- MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var};
- const Node value =
- Operation(OperationCode::TextureQueryDimensions, meta,
- GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0)));
- SetTemporary(bb, indexer++, value);
- }
- for (u32 i = 0; i < indexer; ++i) {
- SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
- }
- break;
- }
- default:
- UNIMPLEMENTED_MSG("Unhandled texture query type: {}", instr.txq.query_type.Value());
- }
- break;
- }
- case OpCode::Id::TMML_B:
- is_bindless = true;
- [[fallthrough]];
- case OpCode::Id::TMML: {
- UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
- "NDV is not implemented");
-
- const auto texture_type = instr.tmml.texture_type.Value();
- const bool is_array = instr.tmml.array != 0;
- SamplerInfo info;
- info.type = texture_type;
- info.is_array = is_array;
- Node index_var;
- const std::optional<SamplerEntry> sampler =
- is_bindless ? GetBindlessSampler(instr.gpr20, info, index_var)
- : GetSampler(instr.sampler, info);
-
- if (!sampler) {
- u32 indexer = 0;
- for (u32 element = 0; element < 2; ++element) {
- if (!instr.tmml.IsComponentEnabled(element)) {
- continue;
- }
- const Node value = Immediate(0);
- SetTemporary(bb, indexer++, value);
- }
- for (u32 i = 0; i < indexer; ++i) {
- SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
- }
- break;
- }
-
- const u64 base_index = is_array ? 1 : 0;
- const u64 num_components = [texture_type] {
- switch (texture_type) {
- case TextureType::Texture1D:
- return 1;
- case TextureType::Texture2D:
- return 2;
- case TextureType::TextureCube:
- return 3;
- default:
- UNIMPLEMENTED_MSG("Unhandled texture type {}", texture_type);
- return 2;
- }
- }();
- // TODO: What's the array component used for?
-
- std::vector<Node> coords;
- coords.reserve(num_components);
- for (u64 component = 0; component < num_components; ++component) {
- coords.push_back(GetRegister(instr.gpr8.Value() + base_index + component));
- }
-
- u32 indexer = 0;
- for (u32 element = 0; element < 2; ++element) {
- if (!instr.tmml.IsComponentEnabled(element)) {
- continue;
- }
- MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var};
- Node value = Operation(OperationCode::TextureQueryLod, meta, coords);
- SetTemporary(bb, indexer++, std::move(value));
- }
- for (u32 i = 0; i < indexer; ++i) {
- SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
- }
- break;
- }
- case OpCode::Id::TLD: {
- UNIMPLEMENTED_IF_MSG(instr.tld.aoffi, "AOFFI is not implemented");
- UNIMPLEMENTED_IF_MSG(instr.tld.ms, "MS is not implemented");
- UNIMPLEMENTED_IF_MSG(instr.tld.cl, "CL is not implemented");
-
- WriteTexInstructionFloat(bb, instr, GetTldCode(instr));
- break;
- }
- case OpCode::Id::TLDS: {
- const TextureType texture_type{instr.tlds.GetTextureType()};
- const bool is_array{instr.tlds.IsArrayTexture()};
-
- UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI),
- "AOFFI is not implemented");
- UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented");
-
- const Node4 components = GetTldsCode(instr, texture_type, is_array);
-
- if (instr.tlds.fp32_flag) {
- WriteTexsInstructionFloat(bb, instr, components);
- } else {
- WriteTexsInstructionHalfFloat(bb, instr, components);
- }
- break;
- }
- default:
- UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
- }
-
- return pc;
-}
-
-ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(
- SamplerInfo info, std::optional<Tegra::Engines::SamplerDescriptor> sampler) {
- if (info.IsComplete()) {
- return info;
- }
- if (!sampler) {
- LOG_WARNING(HW_GPU, "Unknown sampler info");
- info.type = info.type.value_or(Tegra::Shader::TextureType::Texture2D);
- info.is_array = info.is_array.value_or(false);
- info.is_shadow = info.is_shadow.value_or(false);
- info.is_buffer = info.is_buffer.value_or(false);
- return info;
- }
- info.type = info.type.value_or(sampler->texture_type);
- info.is_array = info.is_array.value_or(sampler->is_array != 0);
- info.is_shadow = info.is_shadow.value_or(sampler->is_shadow != 0);
- info.is_buffer = info.is_buffer.value_or(sampler->is_buffer != 0);
- return info;
-}
-
-std::optional<SamplerEntry> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler,
- SamplerInfo sampler_info) {
- const u32 offset = static_cast<u32>(sampler.index.Value());
- const auto info = GetSamplerInfo(sampler_info, registry.ObtainBoundSampler(offset));
-
- // If this sampler has already been used, return the existing mapping.
- const auto it =
- std::find_if(used_samplers.begin(), used_samplers.end(),
- [offset](const SamplerEntry& entry) { return entry.offset == offset; });
- if (it != used_samplers.end()) {
- ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array &&
- it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer);
- return *it;
- }
-
- // Otherwise create a new mapping for this sampler
- const auto next_index = static_cast<u32>(used_samplers.size());
- return used_samplers.emplace_back(next_index, offset, *info.type, *info.is_array,
- *info.is_shadow, *info.is_buffer, false);
-}
-
-std::optional<SamplerEntry> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
- SamplerInfo info, Node& index_var) {
- const Node sampler_register = GetRegister(reg);
- const auto [base_node, tracked_sampler_info] =
- TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size()));
- if (!base_node) {
- UNREACHABLE();
- return std::nullopt;
- }
-
- if (const auto sampler_info = std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) {
- const u32 buffer = sampler_info->index;
- const u32 offset = sampler_info->offset;
- info = GetSamplerInfo(info, registry.ObtainBindlessSampler(buffer, offset));
-
- // If this sampler has already been used, return the existing mapping.
- const auto it = std::find_if(used_samplers.begin(), used_samplers.end(),
- [buffer, offset](const SamplerEntry& entry) {
- return entry.buffer == buffer && entry.offset == offset;
- });
- if (it != used_samplers.end()) {
- ASSERT(it->is_bindless && it->type == info.type && it->is_array == info.is_array &&
- it->is_shadow == info.is_shadow);
- return *it;
- }
-
- // Otherwise create a new mapping for this sampler
- const auto next_index = static_cast<u32>(used_samplers.size());
- return used_samplers.emplace_back(next_index, offset, buffer, *info.type, *info.is_array,
- *info.is_shadow, *info.is_buffer, false);
- }
- if (const auto sampler_info = std::get_if<SeparateSamplerNode>(&*tracked_sampler_info)) {
- const std::pair indices = sampler_info->indices;
- const std::pair offsets = sampler_info->offsets;
- info = GetSamplerInfo(info, registry.ObtainSeparateSampler(indices, offsets));
-
- // Try to use an already created sampler if it exists
- const auto it =
- std::find_if(used_samplers.begin(), used_samplers.end(),
- [indices, offsets](const SamplerEntry& entry) {
- return offsets == std::pair{entry.offset, entry.secondary_offset} &&
- indices == std::pair{entry.buffer, entry.secondary_buffer};
- });
- if (it != used_samplers.end()) {
- ASSERT(it->is_separated && it->type == info.type && it->is_array == info.is_array &&
- it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer);
- return *it;
- }
-
- // Otherwise create a new mapping for this sampler
- const u32 next_index = static_cast<u32>(used_samplers.size());
- return used_samplers.emplace_back(next_index, offsets, indices, *info.type, *info.is_array,
- *info.is_shadow, *info.is_buffer);
- }
- if (const auto sampler_info = std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) {
- const u32 base_offset = sampler_info->base_offset / 4;
- index_var = GetCustomVariable(sampler_info->bindless_var);
- info = GetSamplerInfo(info, registry.ObtainBoundSampler(base_offset));
-
- // If this sampler has already been used, return the existing mapping.
- const auto it = std::find_if(
- used_samplers.begin(), used_samplers.end(),
- [base_offset](const SamplerEntry& entry) { return entry.offset == base_offset; });
- if (it != used_samplers.end()) {
- ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array &&
- it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer &&
- it->is_indexed);
- return *it;
- }
-
- uses_indexed_samplers = true;
- // Otherwise create a new mapping for this sampler
- const auto next_index = static_cast<u32>(used_samplers.size());
- return used_samplers.emplace_back(next_index, base_offset, *info.type, *info.is_array,
- *info.is_shadow, *info.is_buffer, true);
- }
- return std::nullopt;
-}
-
-void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
- u32 dest_elem = 0;
- for (u32 elem = 0; elem < 4; ++elem) {
- if (!instr.tex.IsComponentEnabled(elem)) {
- // Skip disabled components
- continue;
- }
- SetTemporary(bb, dest_elem++, components[elem]);
- }
- // After writing values in temporals, move them to the real registers
- for (u32 i = 0; i < dest_elem; ++i) {
- SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
- }
-}
-
-void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components,
- bool ignore_mask) {
- // TEXS has two destination registers and a swizzle. The first two elements in the swizzle
- // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
-
- u32 dest_elem = 0;
- for (u32 component = 0; component < 4; ++component) {
- if (!instr.texs.IsComponentEnabled(component) && !ignore_mask)
- continue;
- SetTemporary(bb, dest_elem++, components[component]);
- }
-
- for (u32 i = 0; i < dest_elem; ++i) {
- if (i < 2) {
- // Write the first two swizzle components to gpr0 and gpr0+1
- SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporary(i));
- } else {
- ASSERT(instr.texs.HasTwoDestinations());
- // Write the rest of the swizzle components to gpr28 and gpr28+1
- SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporary(i));
- }
- }
-}
-
-void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
- const Node4& components, bool ignore_mask) {
- // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half
- // float instruction).
-
- Node4 values;
- u32 dest_elem = 0;
- for (u32 component = 0; component < 4; ++component) {
- if (!instr.texs.IsComponentEnabled(component) && !ignore_mask)
- continue;
- values[dest_elem++] = components[component];
- }
- if (dest_elem == 0)
- return;
-
- std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); });
-
- const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]);
- if (dest_elem <= 2) {
- SetRegister(bb, instr.gpr0, first_value);
- return;
- }
-
- SetTemporary(bb, 0, first_value);
- SetTemporary(bb, 1, Operation(OperationCode::HPack2, values[2], values[3]));
-
- SetRegister(bb, instr.gpr0, GetTemporary(0));
- SetRegister(bb, instr.gpr28, GetTemporary(1));
-}
-
-Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
- TextureProcessMode process_mode, std::vector<Node> coords,
- Node array, Node depth_compare, u32 bias_offset,
- std::vector<Node> aoffi,
- std::optional<Tegra::Shader::Register> bindless_reg) {
- const bool is_array = array != nullptr;
- const bool is_shadow = depth_compare != nullptr;
- const bool is_bindless = bindless_reg.has_value();
-
- ASSERT_MSG(texture_type != TextureType::Texture3D || !is_array || !is_shadow,
- "Illegal texture type");
-
- SamplerInfo info;
- info.type = texture_type;
- info.is_array = is_array;
- info.is_shadow = is_shadow;
- info.is_buffer = false;
-
- Node index_var;
- const std::optional<SamplerEntry> sampler =
- is_bindless ? GetBindlessSampler(*bindless_reg, info, index_var)
- : GetSampler(instr.sampler, info);
- if (!sampler) {
- return {Immediate(0), Immediate(0), Immediate(0), Immediate(0)};
- }
-
- const bool lod_needed = process_mode == TextureProcessMode::LZ ||
- process_mode == TextureProcessMode::LL ||
- process_mode == TextureProcessMode::LLA;
- const OperationCode opcode = lod_needed ? OperationCode::TextureLod : OperationCode::Texture;
-
- Node bias;
- Node lod;
- switch (process_mode) {
- case TextureProcessMode::None:
- break;
- case TextureProcessMode::LZ:
- lod = Immediate(0.0f);
- break;
- case TextureProcessMode::LB:
- // If present, lod or bias are always stored in the register indexed by the gpr20 field with
- // an offset depending on the usage of the other registers.
- bias = GetRegister(instr.gpr20.Value() + bias_offset);
- break;
- case TextureProcessMode::LL:
- lod = GetRegister(instr.gpr20.Value() + bias_offset);
- break;
- default:
- UNIMPLEMENTED_MSG("Unimplemented process mode={}", process_mode);
- break;
- }
-
- Node4 values;
- for (u32 element = 0; element < values.size(); ++element) {
- MetaTexture meta{*sampler, array, depth_compare, aoffi, {}, {}, bias,
- lod, {}, element, index_var};
- values[element] = Operation(opcode, meta, coords);
- }
-
- return values;
-}
-
-Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
- TextureProcessMode process_mode, bool depth_compare, bool is_array,
- bool is_aoffi, std::optional<Tegra::Shader::Register> bindless_reg) {
- const bool lod_bias_enabled{
- (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ)};
-
- const bool is_bindless = bindless_reg.has_value();
-
- u64 parameter_register = instr.gpr20.Value();
- if (is_bindless) {
- ++parameter_register;
- }
-
- const u32 bias_lod_offset = (is_bindless ? 1 : 0);
- if (lod_bias_enabled) {
- ++parameter_register;
- }
-
- const auto coord_counts = ValidateAndGetCoordinateElement(texture_type, depth_compare, is_array,
- lod_bias_enabled, 4, 5);
- const auto coord_count = std::get<0>(coord_counts);
- // If enabled arrays index is always stored in the gpr8 field
- const u64 array_register = instr.gpr8.Value();
- // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
- const u64 coord_register = array_register + (is_array ? 1 : 0);
-
- std::vector<Node> coords;
- for (std::size_t i = 0; i < coord_count; ++i) {
- coords.push_back(GetRegister(coord_register + i));
- }
- // 1D.DC in OpenGL the 2nd component is ignored.
- if (depth_compare && !is_array && texture_type == TextureType::Texture1D) {
- coords.push_back(Immediate(0.0f));
- }
-
- const Node array = is_array ? GetRegister(array_register) : nullptr;
-
- std::vector<Node> aoffi;
- if (is_aoffi) {
- aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, false);
- }
-
- Node dc;
- if (depth_compare) {
- // Depth is always stored in the register signaled by gpr20 or in the next register if lod
- // or bias are used
- dc = GetRegister(parameter_register++);
- }
-
- return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_lod_offset,
- aoffi, bindless_reg);
-}
-
-Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
- TextureProcessMode process_mode, bool depth_compare, bool is_array) {
- const bool lod_bias_enabled =
- (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
-
- const auto coord_counts = ValidateAndGetCoordinateElement(texture_type, depth_compare, is_array,
- lod_bias_enabled, 4, 4);
- const auto coord_count = std::get<0>(coord_counts);
-
- // If enabled arrays index is always stored in the gpr8 field
- const u64 array_register = instr.gpr8.Value();
- // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used
- const u64 coord_register = array_register + (is_array ? 1 : 0);
- const u64 last_coord_register =
- (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2))
- ? static_cast<u64>(instr.gpr20.Value())
- : coord_register + 1;
- const u32 bias_offset = coord_count > 2 ? 1 : 0;
-
- std::vector<Node> coords;
- for (std::size_t i = 0; i < coord_count; ++i) {
- const bool last = (i == (coord_count - 1)) && (coord_count > 1);
- coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
- }
-
- const Node array = is_array ? GetRegister(array_register) : nullptr;
-
- Node dc;
- if (depth_compare) {
- // Depth is always stored in the register signaled by gpr20 or in the next register if lod
- // or bias are used
- const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
- dc = GetRegister(depth_register);
- }
-
- return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset, {},
- {});
-}
-
-Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
- bool is_array, bool is_aoffi, bool is_ptp, bool is_bindless) {
- ASSERT_MSG(!(is_aoffi && is_ptp), "AOFFI and PTP can't be enabled at the same time");
-
- const std::size_t coord_count = GetCoordCount(texture_type);
-
- // If enabled arrays index is always stored in the gpr8 field
- const u64 array_register = instr.gpr8.Value();
- // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
- const u64 coord_register = array_register + (is_array ? 1 : 0);
-
- std::vector<Node> coords;
- for (std::size_t i = 0; i < coord_count; ++i) {
- coords.push_back(GetRegister(coord_register + i));
- }
-
- u64 parameter_register = instr.gpr20.Value();
-
- SamplerInfo info;
- info.type = texture_type;
- info.is_array = is_array;
- info.is_shadow = depth_compare;
-
- Node index_var;
- const std::optional<SamplerEntry> sampler =
- is_bindless ? GetBindlessSampler(parameter_register++, info, index_var)
- : GetSampler(instr.sampler, info);
- Node4 values;
- if (!sampler) {
- for (u32 element = 0; element < values.size(); ++element) {
- values[element] = Immediate(0);
- }
- return values;
- }
-
- std::vector<Node> aoffi, ptp;
- if (is_aoffi) {
- aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, true);
- } else if (is_ptp) {
- ptp = GetPtpCoordinates(
- {GetRegister(parameter_register++), GetRegister(parameter_register++)});
- }
-
- Node dc;
- if (depth_compare) {
- dc = GetRegister(parameter_register++);
- }
-
- const Node component = is_bindless ? Immediate(static_cast<u32>(instr.tld4_b.component))
- : Immediate(static_cast<u32>(instr.tld4.component));
-
- for (u32 element = 0; element < values.size(); ++element) {
- auto coords_copy = coords;
- MetaTexture meta{
- *sampler, GetRegister(array_register), dc, aoffi, ptp, {}, {}, {}, component, element,
- index_var};
- values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
- }
-
- return values;
-}
-
-Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) {
- const auto texture_type{instr.tld.texture_type};
- const bool is_array{instr.tld.is_array != 0};
- const bool lod_enabled{instr.tld.GetTextureProcessMode() == TextureProcessMode::LL};
- const std::size_t coord_count{GetCoordCount(texture_type)};
-
- u64 gpr8_cursor{instr.gpr8.Value()};
- const Node array_register{is_array ? GetRegister(gpr8_cursor++) : nullptr};
-
- std::vector<Node> coords;
- coords.reserve(coord_count);
- for (std::size_t i = 0; i < coord_count; ++i) {
- coords.push_back(GetRegister(gpr8_cursor++));
- }
-
- u64 gpr20_cursor{instr.gpr20.Value()};
- // const Node bindless_register{is_bindless ? GetRegister(gpr20_cursor++) : nullptr};
- const Node lod{lod_enabled ? GetRegister(gpr20_cursor++) : Immediate(0u)};
- // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr};
- // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr};
-
- const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, {});
-
- Node4 values;
- for (u32 element = 0; element < values.size(); ++element) {
- auto coords_copy = coords;
- MetaTexture meta{*sampler, array_register, {}, {}, {}, {}, {}, lod, {}, element, {}};
- values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
- }
-
- return values;
-}
-
-Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) {
- SamplerInfo info;
- info.type = texture_type;
- info.is_array = is_array;
- info.is_shadow = false;
- const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, info);
-
- const std::size_t type_coord_count = GetCoordCount(texture_type);
- const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;
- const bool aoffi_enabled = instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI);
-
- // If enabled arrays index is always stored in the gpr8 field
- const u64 array_register = instr.gpr8.Value();
- // if is array gpr20 is used
- const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value();
-
- const u64 last_coord_register =
- ((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array
- ? static_cast<u64>(instr.gpr20.Value())
- : coord_register + 1;
-
- std::vector<Node> coords;
- for (std::size_t i = 0; i < type_coord_count; ++i) {
- const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1);
- coords.push_back(
- GetRegister(last && !aoffi_enabled ? last_coord_register : coord_register + i));
- }
-
- const Node array = is_array ? GetRegister(array_register) : nullptr;
- // When lod is used always is in gpr20
- const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);
-
- std::vector<Node> aoffi;
- if (aoffi_enabled) {
- aoffi = GetAoffiCoordinates(GetRegister(instr.gpr20), type_coord_count, false);
- }
-
- Node4 values;
- for (u32 element = 0; element < values.size(); ++element) {
- auto coords_copy = coords;
- MetaTexture meta{*sampler, array, {}, aoffi, {}, {}, {}, lod, {}, element, {}};
- values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
- }
- return values;
-}
-
-std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(
- TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled,
- std::size_t max_coords, std::size_t max_inputs) {
- const std::size_t coord_count = GetCoordCount(texture_type);
-
- std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0);
- const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0);
- if (total_coord_count > max_coords || total_reg_count > max_inputs) {
- UNIMPLEMENTED_MSG("Unsupported Texture operation");
- total_coord_count = std::min(total_coord_count, max_coords);
- }
- // 1D.DC OpenGL is using a vec3 but 2nd component is ignored later.
- total_coord_count +=
- (depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0;
-
- return {coord_count, total_coord_count};
-}
-
-std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count,
- bool is_tld4) {
- const std::array coord_offsets = is_tld4 ? std::array{0U, 8U, 16U} : std::array{0U, 4U, 8U};
- const u32 size = is_tld4 ? 6 : 4;
- const s32 wrap_value = is_tld4 ? 32 : 8;
- const s32 diff_value = is_tld4 ? 64 : 16;
- const u32 mask = (1U << size) - 1;
-
- std::vector<Node> aoffi;
- aoffi.reserve(coord_count);
-
- const auto aoffi_immediate{
- TrackImmediate(aoffi_reg, global_code, static_cast<s64>(global_code.size()))};
- if (!aoffi_immediate) {
- // Variable access, not supported on AMD.
- LOG_WARNING(HW_GPU,
- "AOFFI constant folding failed, some hardware might have graphical issues");
- for (std::size_t coord = 0; coord < coord_count; ++coord) {
- const Node value = BitfieldExtract(aoffi_reg, coord_offsets[coord], size);
- const Node condition =
- Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(wrap_value));
- const Node negative = Operation(OperationCode::IAdd, value, Immediate(-diff_value));
- aoffi.push_back(Operation(OperationCode::Select, condition, negative, value));
- }
- return aoffi;
- }
-
- for (std::size_t coord = 0; coord < coord_count; ++coord) {
- s32 value = (*aoffi_immediate >> coord_offsets[coord]) & mask;
- if (value >= wrap_value) {
- value -= diff_value;
- }
- aoffi.push_back(Immediate(value));
- }
- return aoffi;
-}
-
-std::vector<Node> ShaderIR::GetPtpCoordinates(std::array<Node, 2> ptp_regs) {
- static constexpr u32 num_entries = 8;
-
- std::vector<Node> ptp;
- ptp.reserve(num_entries);
-
- const auto global_size = static_cast<s64>(global_code.size());
- const std::optional low = TrackImmediate(ptp_regs[0], global_code, global_size);
- const std::optional high = TrackImmediate(ptp_regs[1], global_code, global_size);
- if (!low || !high) {
- for (u32 entry = 0; entry < num_entries; ++entry) {
- const u32 reg = entry / 4;
- const u32 offset = entry % 4;
- const Node value = BitfieldExtract(ptp_regs[reg], offset * 8, 6);
- const Node condition =
- Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(32));
- const Node negative = Operation(OperationCode::IAdd, value, Immediate(-64));
- ptp.push_back(Operation(OperationCode::Select, condition, negative, value));
- }
- return ptp;
- }
-
- const u64 immediate = (static_cast<u64>(*high) << 32) | static_cast<u64>(*low);
- for (u32 entry = 0; entry < num_entries; ++entry) {
- s32 value = (immediate >> (entry * 8)) & 0b111111;
- if (value >= 32) {
- value -= 64;
- }
- ptp.push_back(Immediate(value));
- }
-
- return ptp;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp
deleted file mode 100644
index 1c0957277..000000000
--- a/src/video_core/shader/decode/video.cpp
+++ /dev/null
@@ -1,169 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using std::move;
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-using Tegra::Shader::Pred;
-using Tegra::Shader::VideoType;
-using Tegra::Shader::VmadShr;
-using Tegra::Shader::VmnmxOperation;
-using Tegra::Shader::VmnmxType;
-
-u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) {
- const Instruction instr = {program_code[pc]};
- const auto opcode = OpCode::Decode(instr);
-
- if (opcode->get().GetId() == OpCode::Id::VMNMX) {
- DecodeVMNMX(bb, instr);
- return pc;
- }
-
- const Node op_a =
- GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a,
- instr.video.type_a, instr.video.byte_height_a);
- const Node op_b = [this, instr] {
- if (instr.video.use_register_b) {
- return GetVideoOperand(GetRegister(instr.gpr20), instr.video.is_byte_chunk_b,
- instr.video.signed_b, instr.video.type_b,
- instr.video.byte_height_b);
- }
- if (instr.video.signed_b) {
- const auto imm = static_cast<s16>(instr.alu.GetImm20_16());
- return Immediate(static_cast<u32>(imm));
- } else {
- return Immediate(instr.alu.GetImm20_16());
- }
- }();
-
- switch (opcode->get().GetId()) {
- case OpCode::Id::VMAD: {
- const bool result_signed = instr.video.signed_a == 1 || instr.video.signed_b == 1;
- const Node op_c = GetRegister(instr.gpr39);
-
- Node value = SignedOperation(OperationCode::IMul, result_signed, NO_PRECISE, op_a, op_b);
- value = SignedOperation(OperationCode::IAdd, result_signed, NO_PRECISE, value, op_c);
-
- if (instr.vmad.shr == VmadShr::Shr7 || instr.vmad.shr == VmadShr::Shr15) {
- const Node shift = Immediate(instr.vmad.shr == VmadShr::Shr7 ? 7 : 15);
- value =
- SignedOperation(OperationCode::IArithmeticShiftRight, result_signed, value, shift);
- }
-
- SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
- SetRegister(bb, instr.gpr0, value);
- break;
- }
- case OpCode::Id::VSETP: {
- // We can't use the constant predicate as destination.
- ASSERT(instr.vsetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
-
- const bool sign = instr.video.signed_a == 1 || instr.video.signed_b == 1;
- const Node first_pred = GetPredicateComparisonInteger(instr.vsetp.cond, sign, op_a, op_b);
- const Node second_pred = GetPredicate(instr.vsetp.pred39, false);
-
- const OperationCode combiner = GetPredicateCombiner(instr.vsetp.op);
-
- // Set the primary predicate to the result of Predicate OP SecondPredicate
- SetPredicate(bb, instr.vsetp.pred3, Operation(combiner, first_pred, second_pred));
-
- if (instr.vsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
- // Set the secondary predicate to the result of !Predicate OP SecondPredicate,
- // if enabled
- const Node negate_pred = Operation(OperationCode::LogicalNegate, first_pred);
- SetPredicate(bb, instr.vsetp.pred0, Operation(combiner, negate_pred, second_pred));
- }
- break;
- }
- default:
- UNIMPLEMENTED_MSG("Unhandled video instruction: {}", opcode->get().GetName());
- }
-
- return pc;
-}
-
-Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed, VideoType type,
- u64 byte_height) {
- if (!is_chunk) {
- return BitfieldExtract(op, static_cast<u32>(byte_height * 8), 8);
- }
-
- switch (type) {
- case VideoType::Size16_Low:
- return BitfieldExtract(op, 0, 16);
- case VideoType::Size16_High:
- return BitfieldExtract(op, 16, 16);
- case VideoType::Size32:
- // TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when this type is used
- // (1 * 1 + 0 == 0x5b800000). Until a better explanation is found: abort.
- UNIMPLEMENTED();
- return Immediate(0);
- case VideoType::Invalid:
- UNREACHABLE_MSG("Invalid instruction encoding");
- return Immediate(0);
- default:
- UNREACHABLE();
- return Immediate(0);
- }
-}
-
-void ShaderIR::DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr) {
- UNIMPLEMENTED_IF(!instr.vmnmx.is_op_b_register);
- UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatA() != VmnmxType::Bits32);
- UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatB() != VmnmxType::Bits32);
- UNIMPLEMENTED_IF(instr.vmnmx.is_src_a_signed != instr.vmnmx.is_src_b_signed);
- UNIMPLEMENTED_IF(instr.vmnmx.sat);
- UNIMPLEMENTED_IF(instr.generates_cc);
-
- Node op_a = GetRegister(instr.gpr8);
- Node op_b = GetRegister(instr.gpr20);
- Node op_c = GetRegister(instr.gpr39);
-
- const bool is_oper1_signed = instr.vmnmx.is_src_a_signed; // Stubbed
- const bool is_oper2_signed = instr.vmnmx.is_dest_signed;
-
- const auto operation_a = instr.vmnmx.mx ? OperationCode::IMax : OperationCode::IMin;
- Node value = SignedOperation(operation_a, is_oper1_signed, move(op_a), move(op_b));
-
- switch (instr.vmnmx.operation) {
- case VmnmxOperation::Mrg_16H:
- value = BitfieldInsert(move(op_c), move(value), 16, 16);
- break;
- case VmnmxOperation::Mrg_16L:
- value = BitfieldInsert(move(op_c), move(value), 0, 16);
- break;
- case VmnmxOperation::Mrg_8B0:
- value = BitfieldInsert(move(op_c), move(value), 0, 8);
- break;
- case VmnmxOperation::Mrg_8B2:
- value = BitfieldInsert(move(op_c), move(value), 16, 8);
- break;
- case VmnmxOperation::Acc:
- value = Operation(OperationCode::IAdd, move(value), move(op_c));
- break;
- case VmnmxOperation::Min:
- value = SignedOperation(OperationCode::IMin, is_oper2_signed, move(value), move(op_c));
- break;
- case VmnmxOperation::Max:
- value = SignedOperation(OperationCode::IMax, is_oper2_signed, move(value), move(op_c));
- break;
- case VmnmxOperation::Nop:
- break;
- default:
- UNREACHABLE();
- break;
- }
-
- SetRegister(bb, instr.gpr0, move(value));
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp
deleted file mode 100644
index 37433d783..000000000
--- a/src/video_core/shader/decode/warp.cpp
+++ /dev/null
@@ -1,117 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-using Tegra::Shader::Pred;
-using Tegra::Shader::ShuffleOperation;
-using Tegra::Shader::VoteOperation;
-
-namespace {
-
-OperationCode GetOperationCode(VoteOperation vote_op) {
- switch (vote_op) {
- case VoteOperation::All:
- return OperationCode::VoteAll;
- case VoteOperation::Any:
- return OperationCode::VoteAny;
- case VoteOperation::Eq:
- return OperationCode::VoteEqual;
- default:
- UNREACHABLE_MSG("Invalid vote operation={}", vote_op);
- return OperationCode::VoteAll;
- }
-}
-
-} // Anonymous namespace
-
-u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) {
- const Instruction instr = {program_code[pc]};
- const auto opcode = OpCode::Decode(instr);
-
- // Signal the backend that this shader uses warp instructions.
- uses_warps = true;
-
- switch (opcode->get().GetId()) {
- case OpCode::Id::VOTE: {
- const Node value = GetPredicate(instr.vote.value, instr.vote.negate_value != 0);
- const Node active = Operation(OperationCode::BallotThread, value);
- const Node vote = Operation(GetOperationCode(instr.vote.operation), value);
- SetRegister(bb, instr.gpr0, active);
- SetPredicate(bb, instr.vote.dest_pred, vote);
- break;
- }
- case OpCode::Id::SHFL: {
- Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm))
- : GetRegister(instr.gpr39);
- Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm))
- : GetRegister(instr.gpr20);
-
- Node thread_id = Operation(OperationCode::ThreadId);
- Node clamp = Operation(OperationCode::IBitwiseAnd, mask, Immediate(0x1FU));
- Node seg_mask = BitfieldExtract(mask, 8, 16);
-
- Node neg_seg_mask = Operation(OperationCode::IBitwiseNot, seg_mask);
- Node min_thread_id = Operation(OperationCode::IBitwiseAnd, thread_id, seg_mask);
- Node max_thread_id = Operation(OperationCode::IBitwiseOr, min_thread_id,
- Operation(OperationCode::IBitwiseAnd, clamp, neg_seg_mask));
-
- Node src_thread_id = [instr, index, neg_seg_mask, min_thread_id, thread_id] {
- switch (instr.shfl.operation) {
- case ShuffleOperation::Idx:
- return Operation(OperationCode::IBitwiseOr,
- Operation(OperationCode::IBitwiseAnd, index, neg_seg_mask),
- min_thread_id);
- case ShuffleOperation::Down:
- return Operation(OperationCode::IAdd, thread_id, index);
- case ShuffleOperation::Up:
- return Operation(OperationCode::IAdd, thread_id,
- Operation(OperationCode::INegate, index));
- case ShuffleOperation::Bfly:
- return Operation(OperationCode::IBitwiseXor, thread_id, index);
- }
- UNREACHABLE();
- return Immediate(0U);
- }();
-
- Node in_bounds = [instr, src_thread_id, min_thread_id, max_thread_id] {
- if (instr.shfl.operation == ShuffleOperation::Up) {
- return Operation(OperationCode::LogicalIGreaterEqual, src_thread_id, min_thread_id);
- } else {
- return Operation(OperationCode::LogicalILessEqual, src_thread_id, max_thread_id);
- }
- }();
-
- SetPredicate(bb, instr.shfl.pred48, in_bounds);
- SetRegister(
- bb, instr.gpr0,
- Operation(OperationCode::ShuffleIndexed, GetRegister(instr.gpr8), src_thread_id));
- break;
- }
- case OpCode::Id::FSWZADD: {
- UNIMPLEMENTED_IF(instr.fswzadd.ndv);
-
- Node op_a = GetRegister(instr.gpr8);
- Node op_b = GetRegister(instr.gpr20);
- Node mask = Immediate(static_cast<u32>(instr.fswzadd.swizzle));
- SetRegister(bb, instr.gpr0, Operation(OperationCode::FSwizzleAdd, op_a, op_b, mask));
- break;
- }
- default:
- UNIMPLEMENTED_MSG("Unhandled warp instruction: {}", opcode->get().GetName());
- break;
- }
-
- return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp
deleted file mode 100644
index 233b8fa42..000000000
--- a/src/video_core/shader/decode/xmad.cpp
+++ /dev/null
@@ -1,156 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-using Tegra::Shader::PredCondition;
-
-u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
- const Instruction instr = {program_code[pc]};
- const auto opcode = OpCode::Decode(instr);
-
- UNIMPLEMENTED_IF(instr.xmad.sign_a);
- UNIMPLEMENTED_IF(instr.xmad.sign_b);
- UNIMPLEMENTED_IF_MSG(instr.generates_cc,
- "Condition codes generation in XMAD is not implemented");
-
- Node op_a = GetRegister(instr.gpr8);
-
- // TODO(bunnei): Needs to be fixed once op_a or op_b is signed
- UNIMPLEMENTED_IF(instr.xmad.sign_a != instr.xmad.sign_b);
- const bool is_signed_a = instr.xmad.sign_a == 1;
- const bool is_signed_b = instr.xmad.sign_b == 1;
- const bool is_signed_c = is_signed_a;
-
- auto [is_merge, is_psl, is_high_b, mode, op_b_binding,
- op_c] = [&]() -> std::tuple<bool, bool, bool, Tegra::Shader::XmadMode, Node, Node> {
- switch (opcode->get().GetId()) {
- case OpCode::Id::XMAD_CR:
- return {instr.xmad.merge_56,
- instr.xmad.product_shift_left_second,
- instr.xmad.high_b,
- instr.xmad.mode_cbf,
- GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
- GetRegister(instr.gpr39)};
- case OpCode::Id::XMAD_RR:
- return {instr.xmad.merge_37, instr.xmad.product_shift_left, instr.xmad.high_b_rr,
- instr.xmad.mode, GetRegister(instr.gpr20), GetRegister(instr.gpr39)};
- case OpCode::Id::XMAD_RC:
- return {false,
- false,
- instr.xmad.high_b,
- instr.xmad.mode_cbf,
- GetRegister(instr.gpr39),
- GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
- case OpCode::Id::XMAD_IMM:
- return {instr.xmad.merge_37,
- instr.xmad.product_shift_left,
- false,
- instr.xmad.mode,
- Immediate(static_cast<u32>(instr.xmad.imm20_16)),
- GetRegister(instr.gpr39)};
- default:
- UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName());
- return {false, false, false, Tegra::Shader::XmadMode::None, Immediate(0), Immediate(0)};
- }
- }();
-
- op_a = SignedOperation(OperationCode::IBitfieldExtract, is_signed_a, std::move(op_a),
- instr.xmad.high_a ? Immediate(16) : Immediate(0), Immediate(16));
-
- const Node original_b = op_b_binding;
- const Node op_b =
- SignedOperation(OperationCode::IBitfieldExtract, is_signed_b, std::move(op_b_binding),
- is_high_b ? Immediate(16) : Immediate(0), Immediate(16));
-
- // we already check sign_a and sign_b is difference or not before so just use one in here.
- Node product = SignedOperation(OperationCode::IMul, is_signed_a, op_a, op_b);
- if (is_psl) {
- product =
- SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_a, product, Immediate(16));
- }
- SetTemporary(bb, 0, product);
- product = GetTemporary(0);
-
- Node original_c = op_c;
- const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error
- op_c = [&] {
- switch (set_mode) {
- case Tegra::Shader::XmadMode::None:
- return original_c;
- case Tegra::Shader::XmadMode::CLo:
- return BitfieldExtract(std::move(original_c), 0, 16);
- case Tegra::Shader::XmadMode::CHi:
- return BitfieldExtract(std::move(original_c), 16, 16);
- case Tegra::Shader::XmadMode::CBcc: {
- Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b,
- original_b, Immediate(16));
- return SignedOperation(OperationCode::IAdd, is_signed_c, std::move(original_c),
- std::move(shifted_b));
- }
- case Tegra::Shader::XmadMode::CSfu: {
- const Node comp_a =
- GetPredicateComparisonInteger(PredCondition::EQ, is_signed_a, op_a, Immediate(0));
- const Node comp_b =
- GetPredicateComparisonInteger(PredCondition::EQ, is_signed_b, op_b, Immediate(0));
- const Node comp = Operation(OperationCode::LogicalOr, comp_a, comp_b);
-
- const Node comp_minus_a = GetPredicateComparisonInteger(
- PredCondition::NE, is_signed_a,
- SignedOperation(OperationCode::IBitwiseAnd, is_signed_a, op_a,
- Immediate(0x80000000)),
- Immediate(0));
- const Node comp_minus_b = GetPredicateComparisonInteger(
- PredCondition::NE, is_signed_b,
- SignedOperation(OperationCode::IBitwiseAnd, is_signed_b, op_b,
- Immediate(0x80000000)),
- Immediate(0));
-
- Node new_c = Operation(
- OperationCode::Select, comp_minus_a,
- SignedOperation(OperationCode::IAdd, is_signed_c, original_c, Immediate(-65536)),
- original_c);
- new_c = Operation(
- OperationCode::Select, comp_minus_b,
- SignedOperation(OperationCode::IAdd, is_signed_c, new_c, Immediate(-65536)),
- std::move(new_c));
-
- return Operation(OperationCode::Select, comp, original_c, std::move(new_c));
- }
- default:
- UNREACHABLE();
- return Immediate(0);
- }
- }();
-
- SetTemporary(bb, 1, op_c);
- op_c = GetTemporary(1);
-
- // TODO(Rodrigo): Use an appropiate sign for this operation
- Node sum = SignedOperation(OperationCode::IAdd, is_signed_a, product, std::move(op_c));
- SetTemporary(bb, 2, sum);
- sum = GetTemporary(2);
- if (is_merge) {
- const Node a = SignedOperation(OperationCode::IBitfieldExtract, is_signed_a, std::move(sum),
- Immediate(0), Immediate(16));
- const Node b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b, original_b,
- Immediate(16));
- sum = SignedOperation(OperationCode::IBitwiseOr, is_signed_a, a, b);
- }
-
- SetInternalFlagsFromInteger(bb, sum, instr.generates_cc);
- SetRegister(bb, instr.gpr0, std::move(sum));
-
- return pc;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/expr.cpp b/src/video_core/shader/expr.cpp
deleted file mode 100644
index 2647865d4..000000000
--- a/src/video_core/shader/expr.cpp
+++ /dev/null
@@ -1,93 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <memory>
-#include <variant>
-
-#include "video_core/shader/expr.h"
-
-namespace VideoCommon::Shader {
-namespace {
-bool ExprIsBoolean(const Expr& expr) {
- return std::holds_alternative<ExprBoolean>(*expr);
-}
-
-bool ExprBooleanGet(const Expr& expr) {
- return std::get_if<ExprBoolean>(expr.get())->value;
-}
-} // Anonymous namespace
-
-bool ExprAnd::operator==(const ExprAnd& b) const {
- return (*operand1 == *b.operand1) && (*operand2 == *b.operand2);
-}
-
-bool ExprAnd::operator!=(const ExprAnd& b) const {
- return !operator==(b);
-}
-
-bool ExprOr::operator==(const ExprOr& b) const {
- return (*operand1 == *b.operand1) && (*operand2 == *b.operand2);
-}
-
-bool ExprOr::operator!=(const ExprOr& b) const {
- return !operator==(b);
-}
-
-bool ExprNot::operator==(const ExprNot& b) const {
- return *operand1 == *b.operand1;
-}
-
-bool ExprNot::operator!=(const ExprNot& b) const {
- return !operator==(b);
-}
-
-Expr MakeExprNot(Expr first) {
- if (std::holds_alternative<ExprNot>(*first)) {
- return std::get_if<ExprNot>(first.get())->operand1;
- }
- return MakeExpr<ExprNot>(std::move(first));
-}
-
-Expr MakeExprAnd(Expr first, Expr second) {
- if (ExprIsBoolean(first)) {
- return ExprBooleanGet(first) ? second : first;
- }
- if (ExprIsBoolean(second)) {
- return ExprBooleanGet(second) ? first : second;
- }
- return MakeExpr<ExprAnd>(std::move(first), std::move(second));
-}
-
-Expr MakeExprOr(Expr first, Expr second) {
- if (ExprIsBoolean(first)) {
- return ExprBooleanGet(first) ? first : second;
- }
- if (ExprIsBoolean(second)) {
- return ExprBooleanGet(second) ? second : first;
- }
- return MakeExpr<ExprOr>(std::move(first), std::move(second));
-}
-
-bool ExprAreEqual(const Expr& first, const Expr& second) {
- return (*first) == (*second);
-}
-
-bool ExprAreOpposite(const Expr& first, const Expr& second) {
- if (std::holds_alternative<ExprNot>(*first)) {
- return ExprAreEqual(std::get_if<ExprNot>(first.get())->operand1, second);
- }
- if (std::holds_alternative<ExprNot>(*second)) {
- return ExprAreEqual(std::get_if<ExprNot>(second.get())->operand1, first);
- }
- return false;
-}
-
-bool ExprIsTrue(const Expr& first) {
- if (ExprIsBoolean(first)) {
- return ExprBooleanGet(first);
- }
- return false;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/expr.h b/src/video_core/shader/expr.h
deleted file mode 100644
index cda284c72..000000000
--- a/src/video_core/shader/expr.h
+++ /dev/null
@@ -1,156 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <memory>
-#include <variant>
-
-#include "video_core/engines/shader_bytecode.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::ConditionCode;
-using Tegra::Shader::Pred;
-
-class ExprAnd;
-class ExprBoolean;
-class ExprCondCode;
-class ExprGprEqual;
-class ExprNot;
-class ExprOr;
-class ExprPredicate;
-class ExprVar;
-
-using ExprData = std::variant<ExprVar, ExprCondCode, ExprPredicate, ExprNot, ExprOr, ExprAnd,
- ExprBoolean, ExprGprEqual>;
-using Expr = std::shared_ptr<ExprData>;
-
-class ExprAnd final {
-public:
- explicit ExprAnd(Expr a, Expr b) : operand1{std::move(a)}, operand2{std::move(b)} {}
-
- bool operator==(const ExprAnd& b) const;
- bool operator!=(const ExprAnd& b) const;
-
- Expr operand1;
- Expr operand2;
-};
-
-class ExprOr final {
-public:
- explicit ExprOr(Expr a, Expr b) : operand1{std::move(a)}, operand2{std::move(b)} {}
-
- bool operator==(const ExprOr& b) const;
- bool operator!=(const ExprOr& b) const;
-
- Expr operand1;
- Expr operand2;
-};
-
-class ExprNot final {
-public:
- explicit ExprNot(Expr a) : operand1{std::move(a)} {}
-
- bool operator==(const ExprNot& b) const;
- bool operator!=(const ExprNot& b) const;
-
- Expr operand1;
-};
-
-class ExprVar final {
-public:
- explicit ExprVar(u32 index) : var_index{index} {}
-
- bool operator==(const ExprVar& b) const {
- return var_index == b.var_index;
- }
-
- bool operator!=(const ExprVar& b) const {
- return !operator==(b);
- }
-
- u32 var_index;
-};
-
-class ExprPredicate final {
-public:
- explicit ExprPredicate(u32 predicate_) : predicate{predicate_} {}
-
- bool operator==(const ExprPredicate& b) const {
- return predicate == b.predicate;
- }
-
- bool operator!=(const ExprPredicate& b) const {
- return !operator==(b);
- }
-
- u32 predicate;
-};
-
-class ExprCondCode final {
-public:
- explicit ExprCondCode(ConditionCode condition_code) : cc{condition_code} {}
-
- bool operator==(const ExprCondCode& b) const {
- return cc == b.cc;
- }
-
- bool operator!=(const ExprCondCode& b) const {
- return !operator==(b);
- }
-
- ConditionCode cc;
-};
-
-class ExprBoolean final {
-public:
- explicit ExprBoolean(bool val) : value{val} {}
-
- bool operator==(const ExprBoolean& b) const {
- return value == b.value;
- }
-
- bool operator!=(const ExprBoolean& b) const {
- return !operator==(b);
- }
-
- bool value;
-};
-
-class ExprGprEqual final {
-public:
- explicit ExprGprEqual(u32 gpr_, u32 value_) : gpr{gpr_}, value{value_} {}
-
- bool operator==(const ExprGprEqual& b) const {
- return gpr == b.gpr && value == b.value;
- }
-
- bool operator!=(const ExprGprEqual& b) const {
- return !operator==(b);
- }
-
- u32 gpr;
- u32 value;
-};
-
-template <typename T, typename... Args>
-Expr MakeExpr(Args&&... args) {
- static_assert(std::is_convertible_v<T, ExprData>);
- return std::make_shared<ExprData>(T(std::forward<Args>(args)...));
-}
-
-bool ExprAreEqual(const Expr& first, const Expr& second);
-
-bool ExprAreOpposite(const Expr& first, const Expr& second);
-
-Expr MakeExprNot(Expr first);
-
-Expr MakeExprAnd(Expr first, Expr second);
-
-Expr MakeExprOr(Expr first, Expr second);
-
-bool ExprIsTrue(const Expr& first);
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/memory_util.cpp b/src/video_core/shader/memory_util.cpp
deleted file mode 100644
index e18ccba8e..000000000
--- a/src/video_core/shader/memory_util.cpp
+++ /dev/null
@@ -1,76 +0,0 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <cstddef>
-
-#include <boost/container_hash/hash.hpp>
-
-#include "common/common_types.h"
-#include "core/core.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/memory_manager.h"
-#include "video_core/shader/memory_util.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-GPUVAddr GetShaderAddress(Tegra::Engines::Maxwell3D& maxwell3d,
- Tegra::Engines::Maxwell3D::Regs::ShaderProgram program) {
- const auto& shader_config{maxwell3d.regs.shader_config[static_cast<std::size_t>(program)]};
- return maxwell3d.regs.code_address.CodeAddress() + shader_config.offset;
-}
-
-bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
- // Sched instructions appear once every 4 instructions.
- constexpr std::size_t SchedPeriod = 4;
- const std::size_t absolute_offset = offset - main_offset;
- return (absolute_offset % SchedPeriod) == 0;
-}
-
-std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute) {
- // This is the encoded version of BRA that jumps to itself. All Nvidia
- // shaders end with one.
- static constexpr u64 SELF_JUMPING_BRANCH = 0xE2400FFFFF07000FULL;
- static constexpr u64 MASK = 0xFFFFFFFFFF7FFFFFULL;
-
- const std::size_t start_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
- std::size_t offset = start_offset;
- while (offset < program.size()) {
- const u64 instruction = program[offset];
- if (!IsSchedInstruction(offset, start_offset)) {
- if ((instruction & MASK) == SELF_JUMPING_BRANCH) {
- // End on Maxwell's "nop" instruction
- break;
- }
- if (instruction == 0) {
- break;
- }
- }
- ++offset;
- }
- // The last instruction is included in the program size
- return std::min(offset + 1, program.size());
-}
-
-ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, GPUVAddr gpu_addr,
- const u8* host_ptr, bool is_compute) {
- ProgramCode code(VideoCommon::Shader::MAX_PROGRAM_LENGTH);
- ASSERT_OR_EXECUTE(host_ptr != nullptr, { return code; });
- memory_manager.ReadBlockUnsafe(gpu_addr, code.data(), code.size() * sizeof(u64));
- code.resize(CalculateProgramSize(code, is_compute));
- return code;
-}
-
-u64 GetUniqueIdentifier(Tegra::Engines::ShaderType shader_type, bool is_a, const ProgramCode& code,
- const ProgramCode& code_b) {
- size_t unique_identifier = boost::hash_value(code);
- if (is_a) {
- // VertexA programs include two programs
- boost::hash_combine(unique_identifier, boost::hash_value(code_b));
- }
- return static_cast<u64>(unique_identifier);
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/memory_util.h b/src/video_core/shader/memory_util.h
deleted file mode 100644
index 4624d38e6..000000000
--- a/src/video_core/shader/memory_util.h
+++ /dev/null
@@ -1,43 +0,0 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <cstddef>
-#include <vector>
-
-#include "common/common_types.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/engines/shader_type.h"
-
-namespace Tegra {
-class MemoryManager;
-}
-
-namespace VideoCommon::Shader {
-
-using ProgramCode = std::vector<u64>;
-
-constexpr u32 STAGE_MAIN_OFFSET = 10;
-constexpr u32 KERNEL_MAIN_OFFSET = 0;
-
-/// Gets the address for the specified shader stage program
-GPUVAddr GetShaderAddress(Tegra::Engines::Maxwell3D& maxwell3d,
- Tegra::Engines::Maxwell3D::Regs::ShaderProgram program);
-
-/// Gets if the current instruction offset is a scheduler instruction
-bool IsSchedInstruction(std::size_t offset, std::size_t main_offset);
-
-/// Calculates the size of a program stream
-std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute);
-
-/// Gets the shader program code from memory for the specified address
-ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, GPUVAddr gpu_addr,
- const u8* host_ptr, bool is_compute);
-
-/// Hashes one (or two) program streams
-u64 GetUniqueIdentifier(Tegra::Engines::ShaderType shader_type, bool is_a, const ProgramCode& code,
- const ProgramCode& code_b = {});
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
deleted file mode 100644
index b54d33763..000000000
--- a/src/video_core/shader/node.h
+++ /dev/null
@@ -1,701 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <array>
-#include <cstddef>
-#include <memory>
-#include <optional>
-#include <string>
-#include <tuple>
-#include <utility>
-#include <variant>
-#include <vector>
-
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-
-namespace VideoCommon::Shader {
-
-enum class OperationCode {
- Assign, /// (float& dest, float src) -> void
-
- Select, /// (MetaArithmetic, bool pred, float a, float b) -> float
-
- FAdd, /// (MetaArithmetic, float a, float b) -> float
- FMul, /// (MetaArithmetic, float a, float b) -> float
- FDiv, /// (MetaArithmetic, float a, float b) -> float
- FFma, /// (MetaArithmetic, float a, float b, float c) -> float
- FNegate, /// (MetaArithmetic, float a) -> float
- FAbsolute, /// (MetaArithmetic, float a) -> float
- FClamp, /// (MetaArithmetic, float value, float min, float max) -> float
- FCastHalf0, /// (MetaArithmetic, f16vec2 a) -> float
- FCastHalf1, /// (MetaArithmetic, f16vec2 a) -> float
- FMin, /// (MetaArithmetic, float a, float b) -> float
- FMax, /// (MetaArithmetic, float a, float b) -> float
- FCos, /// (MetaArithmetic, float a) -> float
- FSin, /// (MetaArithmetic, float a) -> float
- FExp2, /// (MetaArithmetic, float a) -> float
- FLog2, /// (MetaArithmetic, float a) -> float
- FInverseSqrt, /// (MetaArithmetic, float a) -> float
- FSqrt, /// (MetaArithmetic, float a) -> float
- FRoundEven, /// (MetaArithmetic, float a) -> float
- FFloor, /// (MetaArithmetic, float a) -> float
- FCeil, /// (MetaArithmetic, float a) -> float
- FTrunc, /// (MetaArithmetic, float a) -> float
- FCastInteger, /// (MetaArithmetic, int a) -> float
- FCastUInteger, /// (MetaArithmetic, uint a) -> float
- FSwizzleAdd, /// (float a, float b, uint mask) -> float
-
- IAdd, /// (MetaArithmetic, int a, int b) -> int
- IMul, /// (MetaArithmetic, int a, int b) -> int
- IDiv, /// (MetaArithmetic, int a, int b) -> int
- INegate, /// (MetaArithmetic, int a) -> int
- IAbsolute, /// (MetaArithmetic, int a) -> int
- IMin, /// (MetaArithmetic, int a, int b) -> int
- IMax, /// (MetaArithmetic, int a, int b) -> int
- ICastFloat, /// (MetaArithmetic, float a) -> int
- ICastUnsigned, /// (MetaArithmetic, uint a) -> int
- ILogicalShiftLeft, /// (MetaArithmetic, int a, uint b) -> int
- ILogicalShiftRight, /// (MetaArithmetic, int a, uint b) -> int
- IArithmeticShiftRight, /// (MetaArithmetic, int a, uint b) -> int
- IBitwiseAnd, /// (MetaArithmetic, int a, int b) -> int
- IBitwiseOr, /// (MetaArithmetic, int a, int b) -> int
- IBitwiseXor, /// (MetaArithmetic, int a, int b) -> int
- IBitwiseNot, /// (MetaArithmetic, int a) -> int
- IBitfieldInsert, /// (MetaArithmetic, int base, int insert, int offset, int bits) -> int
- IBitfieldExtract, /// (MetaArithmetic, int value, int offset, int offset) -> int
- IBitCount, /// (MetaArithmetic, int) -> int
- IBitMSB, /// (MetaArithmetic, int) -> int
-
- UAdd, /// (MetaArithmetic, uint a, uint b) -> uint
- UMul, /// (MetaArithmetic, uint a, uint b) -> uint
- UDiv, /// (MetaArithmetic, uint a, uint b) -> uint
- UMin, /// (MetaArithmetic, uint a, uint b) -> uint
- UMax, /// (MetaArithmetic, uint a, uint b) -> uint
- UCastFloat, /// (MetaArithmetic, float a) -> uint
- UCastSigned, /// (MetaArithmetic, int a) -> uint
- ULogicalShiftLeft, /// (MetaArithmetic, uint a, uint b) -> uint
- ULogicalShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint
- UArithmeticShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint
- UBitwiseAnd, /// (MetaArithmetic, uint a, uint b) -> uint
- UBitwiseOr, /// (MetaArithmetic, uint a, uint b) -> uint
- UBitwiseXor, /// (MetaArithmetic, uint a, uint b) -> uint
- UBitwiseNot, /// (MetaArithmetic, uint a) -> uint
- UBitfieldInsert, /// (MetaArithmetic, uint base, uint insert, int offset, int bits) -> uint
- UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint
- UBitCount, /// (MetaArithmetic, uint) -> uint
- UBitMSB, /// (MetaArithmetic, uint) -> uint
-
- HAdd, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
- HMul, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
- HFma, /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2
- HAbsolute, /// (f16vec2 a) -> f16vec2
- HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2
- HClamp, /// (f16vec2 src, float min, float max) -> f16vec2
- HCastFloat, /// (MetaArithmetic, float a) -> f16vec2
- HUnpack, /// (Tegra::Shader::HalfType, T value) -> f16vec2
- HMergeF32, /// (f16vec2 src) -> float
- HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2
- HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2
- HPack2, /// (float a, float b) -> f16vec2
-
- LogicalAssign, /// (bool& dst, bool src) -> void
- LogicalAnd, /// (bool a, bool b) -> bool
- LogicalOr, /// (bool a, bool b) -> bool
- LogicalXor, /// (bool a, bool b) -> bool
- LogicalNegate, /// (bool a) -> bool
- LogicalPick2, /// (bool2 pair, uint index) -> bool
- LogicalAnd2, /// (bool2 a) -> bool
-
- LogicalFOrdLessThan, /// (float a, float b) -> bool
- LogicalFOrdEqual, /// (float a, float b) -> bool
- LogicalFOrdLessEqual, /// (float a, float b) -> bool
- LogicalFOrdGreaterThan, /// (float a, float b) -> bool
- LogicalFOrdNotEqual, /// (float a, float b) -> bool
- LogicalFOrdGreaterEqual, /// (float a, float b) -> bool
- LogicalFOrdered, /// (float a, float b) -> bool
- LogicalFUnordered, /// (float a, float b) -> bool
- LogicalFUnordLessThan, /// (float a, float b) -> bool
- LogicalFUnordEqual, /// (float a, float b) -> bool
- LogicalFUnordLessEqual, /// (float a, float b) -> bool
- LogicalFUnordGreaterThan, /// (float a, float b) -> bool
- LogicalFUnordNotEqual, /// (float a, float b) -> bool
- LogicalFUnordGreaterEqual, /// (float a, float b) -> bool
-
- LogicalILessThan, /// (int a, int b) -> bool
- LogicalIEqual, /// (int a, int b) -> bool
- LogicalILessEqual, /// (int a, int b) -> bool
- LogicalIGreaterThan, /// (int a, int b) -> bool
- LogicalINotEqual, /// (int a, int b) -> bool
- LogicalIGreaterEqual, /// (int a, int b) -> bool
-
- LogicalULessThan, /// (uint a, uint b) -> bool
- LogicalUEqual, /// (uint a, uint b) -> bool
- LogicalULessEqual, /// (uint a, uint b) -> bool
- LogicalUGreaterThan, /// (uint a, uint b) -> bool
- LogicalUNotEqual, /// (uint a, uint b) -> bool
- LogicalUGreaterEqual, /// (uint a, uint b) -> bool
-
- LogicalAddCarry, /// (uint a, uint b) -> bool
-
- Logical2HLessThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
- Logical2HEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
- Logical2HLessEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
- Logical2HGreaterThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
- Logical2HNotEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
- Logical2HGreaterEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
- Logical2HLessThanWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
- Logical2HEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
- Logical2HLessEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
- Logical2HGreaterThanWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
- Logical2HNotEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
- Logical2HGreaterEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
-
- Texture, /// (MetaTexture, float[N] coords) -> float4
- TextureLod, /// (MetaTexture, float[N] coords) -> float4
- TextureGather, /// (MetaTexture, float[N] coords) -> float4
- TextureQueryDimensions, /// (MetaTexture, float a) -> float4
- TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4
- TexelFetch, /// (MetaTexture, int[N], int) -> float4
- TextureGradient, /// (MetaTexture, float[N] coords, float[N*2] derivates) -> float4
-
- ImageLoad, /// (MetaImage, int[N] coords) -> void
- ImageStore, /// (MetaImage, int[N] coords) -> void
-
- AtomicImageAdd, /// (MetaImage, int[N] coords) -> void
- AtomicImageAnd, /// (MetaImage, int[N] coords) -> void
- AtomicImageOr, /// (MetaImage, int[N] coords) -> void
- AtomicImageXor, /// (MetaImage, int[N] coords) -> void
- AtomicImageExchange, /// (MetaImage, int[N] coords) -> void
-
- AtomicUExchange, /// (memory, uint) -> uint
- AtomicUAdd, /// (memory, uint) -> uint
- AtomicUMin, /// (memory, uint) -> uint
- AtomicUMax, /// (memory, uint) -> uint
- AtomicUAnd, /// (memory, uint) -> uint
- AtomicUOr, /// (memory, uint) -> uint
- AtomicUXor, /// (memory, uint) -> uint
-
- AtomicIExchange, /// (memory, int) -> int
- AtomicIAdd, /// (memory, int) -> int
- AtomicIMin, /// (memory, int) -> int
- AtomicIMax, /// (memory, int) -> int
- AtomicIAnd, /// (memory, int) -> int
- AtomicIOr, /// (memory, int) -> int
- AtomicIXor, /// (memory, int) -> int
-
- ReduceUAdd, /// (memory, uint) -> void
- ReduceUMin, /// (memory, uint) -> void
- ReduceUMax, /// (memory, uint) -> void
- ReduceUAnd, /// (memory, uint) -> void
- ReduceUOr, /// (memory, uint) -> void
- ReduceUXor, /// (memory, uint) -> void
-
- ReduceIAdd, /// (memory, int) -> void
- ReduceIMin, /// (memory, int) -> void
- ReduceIMax, /// (memory, int) -> void
- ReduceIAnd, /// (memory, int) -> void
- ReduceIOr, /// (memory, int) -> void
- ReduceIXor, /// (memory, int) -> void
-
- Branch, /// (uint branch_target) -> void
- BranchIndirect, /// (uint branch_target) -> void
- PushFlowStack, /// (uint branch_target) -> void
- PopFlowStack, /// () -> void
- Exit, /// () -> void
- Discard, /// () -> void
-
- EmitVertex, /// () -> void
- EndPrimitive, /// () -> void
-
- InvocationId, /// () -> int
- YNegate, /// () -> float
- LocalInvocationIdX, /// () -> uint
- LocalInvocationIdY, /// () -> uint
- LocalInvocationIdZ, /// () -> uint
- WorkGroupIdX, /// () -> uint
- WorkGroupIdY, /// () -> uint
- WorkGroupIdZ, /// () -> uint
-
- BallotThread, /// (bool) -> uint
- VoteAll, /// (bool) -> bool
- VoteAny, /// (bool) -> bool
- VoteEqual, /// (bool) -> bool
-
- ThreadId, /// () -> uint
- ThreadEqMask, /// () -> uint
- ThreadGeMask, /// () -> uint
- ThreadGtMask, /// () -> uint
- ThreadLeMask, /// () -> uint
- ThreadLtMask, /// () -> uint
- ShuffleIndexed, /// (uint value, uint index) -> uint
-
- Barrier, /// () -> void
- MemoryBarrierGroup, /// () -> void
- MemoryBarrierGlobal, /// () -> void
-
- Amount,
-};
-
-enum class InternalFlag {
- Zero = 0,
- Sign = 1,
- Carry = 2,
- Overflow = 3,
- Amount = 4,
-};
-
-enum class MetaStackClass {
- Ssy,
- Pbk,
-};
-
-class OperationNode;
-class ConditionalNode;
-class GprNode;
-class CustomVarNode;
-class ImmediateNode;
-class InternalFlagNode;
-class PredicateNode;
-class AbufNode;
-class CbufNode;
-class LmemNode;
-class PatchNode;
-class SmemNode;
-class GmemNode;
-class CommentNode;
-
-using NodeData = std::variant<OperationNode, ConditionalNode, GprNode, CustomVarNode, ImmediateNode,
- InternalFlagNode, PredicateNode, AbufNode, PatchNode, CbufNode,
- LmemNode, SmemNode, GmemNode, CommentNode>;
-using Node = std::shared_ptr<NodeData>;
-using Node4 = std::array<Node, 4>;
-using NodeBlock = std::vector<Node>;
-
-struct ArraySamplerNode;
-struct BindlessSamplerNode;
-struct SeparateSamplerNode;
-
-using TrackSamplerData = std::variant<BindlessSamplerNode, SeparateSamplerNode, ArraySamplerNode>;
-using TrackSampler = std::shared_ptr<TrackSamplerData>;
-
-struct SamplerEntry {
- /// Bound samplers constructor
- explicit SamplerEntry(u32 index_, u32 offset_, Tegra::Shader::TextureType type_, bool is_array_,
- bool is_shadow_, bool is_buffer_, bool is_indexed_)
- : index{index_}, offset{offset_}, type{type_}, is_array{is_array_}, is_shadow{is_shadow_},
- is_buffer{is_buffer_}, is_indexed{is_indexed_} {}
-
- /// Separate sampler constructor
- explicit SamplerEntry(u32 index_, std::pair<u32, u32> offsets, std::pair<u32, u32> buffers,
- Tegra::Shader::TextureType type_, bool is_array_, bool is_shadow_,
- bool is_buffer_)
- : index{index_}, offset{offsets.first}, secondary_offset{offsets.second},
- buffer{buffers.first}, secondary_buffer{buffers.second}, type{type_}, is_array{is_array_},
- is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_separated{true} {}
-
- /// Bindless samplers constructor
- explicit SamplerEntry(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::TextureType type_,
- bool is_array_, bool is_shadow_, bool is_buffer_, bool is_indexed_)
- : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_array{is_array_},
- is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_bindless{true}, is_indexed{is_indexed_} {
- }
-
- u32 index = 0; ///< Emulated index given for the this sampler.
- u32 offset = 0; ///< Offset in the const buffer from where the sampler is being read.
- u32 secondary_offset = 0; ///< Secondary offset in the const buffer.
- u32 buffer = 0; ///< Buffer where the bindless sampler is read.
- u32 secondary_buffer = 0; ///< Secondary buffer where the bindless sampler is read.
- u32 size = 1; ///< Size of the sampler.
-
- Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc)
- bool is_array = false; ///< Whether the texture is being sampled as an array texture or not.
- bool is_shadow = false; ///< Whether the texture is being sampled as a depth texture or not.
- bool is_buffer = false; ///< Whether the texture is a texture buffer without sampler.
- bool is_bindless = false; ///< Whether this sampler belongs to a bindless texture or not.
- bool is_indexed = false; ///< Whether this sampler is an indexed array of textures.
- bool is_separated = false; ///< Whether the image and sampler is separated or not.
-};
-
-/// Represents a tracked bindless sampler into a direct const buffer
-struct ArraySamplerNode {
- u32 index;
- u32 base_offset;
- u32 bindless_var;
-};
-
-/// Represents a tracked separate sampler image pair that was folded statically
-struct SeparateSamplerNode {
- std::pair<u32, u32> indices;
- std::pair<u32, u32> offsets;
-};
-
-/// Represents a tracked bindless sampler into a direct const buffer
-struct BindlessSamplerNode {
- u32 index;
- u32 offset;
-};
-
-struct ImageEntry {
-public:
- /// Bound images constructor
- explicit ImageEntry(u32 index_, u32 offset_, Tegra::Shader::ImageType type_)
- : index{index_}, offset{offset_}, type{type_} {}
-
- /// Bindless samplers constructor
- explicit ImageEntry(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::ImageType type_)
- : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_bindless{true} {}
-
- void MarkWrite() {
- is_written = true;
- }
-
- void MarkRead() {
- is_read = true;
- }
-
- void MarkAtomic() {
- MarkWrite();
- MarkRead();
- is_atomic = true;
- }
-
- u32 index = 0;
- u32 offset = 0;
- u32 buffer = 0;
-
- Tegra::Shader::ImageType type{};
- bool is_bindless = false;
- bool is_written = false;
- bool is_read = false;
- bool is_atomic = false;
-};
-
-struct GlobalMemoryBase {
- u32 cbuf_index = 0;
- u32 cbuf_offset = 0;
-
- [[nodiscard]] bool operator<(const GlobalMemoryBase& rhs) const {
- return std::tie(cbuf_index, cbuf_offset) < std::tie(rhs.cbuf_index, rhs.cbuf_offset);
- }
-};
-
-/// Parameters describing an arithmetic operation
-struct MetaArithmetic {
- bool precise{}; ///< Whether the operation can be constraint or not
-};
-
-/// Parameters describing a texture sampler
-struct MetaTexture {
- SamplerEntry sampler;
- Node array;
- Node depth_compare;
- std::vector<Node> aoffi;
- std::vector<Node> ptp;
- std::vector<Node> derivates;
- Node bias;
- Node lod;
- Node component;
- u32 element{};
- Node index;
-};
-
-struct MetaImage {
- const ImageEntry& image;
- std::vector<Node> values;
- u32 element{};
-};
-
-/// Parameters that modify an operation but are not part of any particular operand
-using Meta =
- std::variant<MetaArithmetic, MetaTexture, MetaImage, MetaStackClass, Tegra::Shader::HalfType>;
-
-class AmendNode {
-public:
- [[nodiscard]] std::optional<std::size_t> GetAmendIndex() const {
- if (amend_index == amend_null_index) {
- return std::nullopt;
- }
- return {amend_index};
- }
-
- void SetAmendIndex(std::size_t index) {
- amend_index = index;
- }
-
- void ClearAmend() {
- amend_index = amend_null_index;
- }
-
-private:
- static constexpr std::size_t amend_null_index = 0xFFFFFFFFFFFFFFFFULL;
- std::size_t amend_index{amend_null_index};
-};
-
-/// Holds any kind of operation that can be done in the IR
-class OperationNode final : public AmendNode {
-public:
- explicit OperationNode(OperationCode code_) : OperationNode(code_, Meta{}) {}
-
- explicit OperationNode(OperationCode code_, Meta meta_)
- : OperationNode(code_, std::move(meta_), std::vector<Node>{}) {}
-
- explicit OperationNode(OperationCode code_, std::vector<Node> operands_)
- : OperationNode(code_, Meta{}, std::move(operands_)) {}
-
- explicit OperationNode(OperationCode code_, Meta meta_, std::vector<Node> operands_)
- : code{code_}, meta{std::move(meta_)}, operands{std::move(operands_)} {}
-
- template <typename... Args>
- explicit OperationNode(OperationCode code_, Meta meta_, Args&&... operands_)
- : code{code_}, meta{std::move(meta_)}, operands{operands_...} {}
-
- [[nodiscard]] OperationCode GetCode() const {
- return code;
- }
-
- [[nodiscard]] const Meta& GetMeta() const {
- return meta;
- }
-
- [[nodiscard]] std::size_t GetOperandsCount() const {
- return operands.size();
- }
-
- [[nodiscard]] const Node& operator[](std::size_t operand_index) const {
- return operands.at(operand_index);
- }
-
-private:
- OperationCode code{};
- Meta meta{};
- std::vector<Node> operands;
-};
-
-/// Encloses inside any kind of node that returns a boolean conditionally-executed code
-class ConditionalNode final : public AmendNode {
-public:
- explicit ConditionalNode(Node condition_, std::vector<Node>&& code_)
- : condition{std::move(condition_)}, code{std::move(code_)} {}
-
- [[nodiscard]] const Node& GetCondition() const {
- return condition;
- }
-
- [[nodiscard]] const std::vector<Node>& GetCode() const {
- return code;
- }
-
-private:
- Node condition; ///< Condition to be satisfied
- std::vector<Node> code; ///< Code to execute
-};
-
-/// A general purpose register
-class GprNode final {
-public:
- explicit constexpr GprNode(Tegra::Shader::Register index_) : index{index_} {}
-
- [[nodiscard]] constexpr u32 GetIndex() const {
- return static_cast<u32>(index);
- }
-
-private:
- Tegra::Shader::Register index{};
-};
-
-/// A custom variable
-class CustomVarNode final {
-public:
- explicit constexpr CustomVarNode(u32 index_) : index{index_} {}
-
- [[nodiscard]] constexpr u32 GetIndex() const {
- return index;
- }
-
-private:
- u32 index{};
-};
-
-/// A 32-bits value that represents an immediate value
-class ImmediateNode final {
-public:
- explicit constexpr ImmediateNode(u32 value_) : value{value_} {}
-
- [[nodiscard]] constexpr u32 GetValue() const {
- return value;
- }
-
-private:
- u32 value{};
-};
-
-/// One of Maxwell's internal flags
-class InternalFlagNode final {
-public:
- explicit constexpr InternalFlagNode(InternalFlag flag_) : flag{flag_} {}
-
- [[nodiscard]] constexpr InternalFlag GetFlag() const {
- return flag;
- }
-
-private:
- InternalFlag flag{};
-};
-
-/// A predicate register, it can be negated without additional nodes
-class PredicateNode final {
-public:
- explicit constexpr PredicateNode(Tegra::Shader::Pred index_, bool negated_)
- : index{index_}, negated{negated_} {}
-
- [[nodiscard]] constexpr Tegra::Shader::Pred GetIndex() const {
- return index;
- }
-
- [[nodiscard]] constexpr bool IsNegated() const {
- return negated;
- }
-
-private:
- Tegra::Shader::Pred index{};
- bool negated{};
-};
-
-/// Attribute buffer memory (known as attributes or varyings in GLSL terms)
-class AbufNode final {
-public:
- // Initialize for standard attributes (index is explicit).
- explicit AbufNode(Tegra::Shader::Attribute::Index index_, u32 element_, Node buffer_ = {})
- : buffer{std::move(buffer_)}, index{index_}, element{element_} {}
-
- // Initialize for physical attributes (index is a variable value).
- explicit AbufNode(Node physical_address_, Node buffer_ = {})
- : physical_address{std::move(physical_address_)}, buffer{std::move(buffer_)} {}
-
- [[nodiscard]] Tegra::Shader::Attribute::Index GetIndex() const {
- return index;
- }
-
- [[nodiscard]] u32 GetElement() const {
- return element;
- }
-
- [[nodiscard]] const Node& GetBuffer() const {
- return buffer;
- }
-
- [[nodiscard]] bool IsPhysicalBuffer() const {
- return static_cast<bool>(physical_address);
- }
-
- [[nodiscard]] const Node& GetPhysicalAddress() const {
- return physical_address;
- }
-
-private:
- Node physical_address;
- Node buffer;
- Tegra::Shader::Attribute::Index index{};
- u32 element{};
-};
-
-/// Patch memory (used to communicate tessellation stages).
-class PatchNode final {
-public:
- explicit constexpr PatchNode(u32 offset_) : offset{offset_} {}
-
- [[nodiscard]] constexpr u32 GetOffset() const {
- return offset;
- }
-
-private:
- u32 offset{};
-};
-
-/// Constant buffer node, usually mapped to uniform buffers in GLSL
-class CbufNode final {
-public:
- explicit CbufNode(u32 index_, Node offset_) : index{index_}, offset{std::move(offset_)} {}
-
- [[nodiscard]] u32 GetIndex() const {
- return index;
- }
-
- [[nodiscard]] const Node& GetOffset() const {
- return offset;
- }
-
-private:
- u32 index{};
- Node offset;
-};
-
-/// Local memory node
-class LmemNode final {
-public:
- explicit LmemNode(Node address_) : address{std::move(address_)} {}
-
- [[nodiscard]] const Node& GetAddress() const {
- return address;
- }
-
-private:
- Node address;
-};
-
-/// Shared memory node
-class SmemNode final {
-public:
- explicit SmemNode(Node address_) : address{std::move(address_)} {}
-
- [[nodiscard]] const Node& GetAddress() const {
- return address;
- }
-
-private:
- Node address;
-};
-
-/// Global memory node
-class GmemNode final {
-public:
- explicit GmemNode(Node real_address_, Node base_address_, const GlobalMemoryBase& descriptor_)
- : real_address{std::move(real_address_)}, base_address{std::move(base_address_)},
- descriptor{descriptor_} {}
-
- [[nodiscard]] const Node& GetRealAddress() const {
- return real_address;
- }
-
- [[nodiscard]] const Node& GetBaseAddress() const {
- return base_address;
- }
-
- [[nodiscard]] const GlobalMemoryBase& GetDescriptor() const {
- return descriptor;
- }
-
-private:
- Node real_address;
- Node base_address;
- GlobalMemoryBase descriptor;
-};
-
-/// Commentary, can be dropped
-class CommentNode final {
-public:
- explicit CommentNode(std::string text_) : text{std::move(text_)} {}
-
- [[nodiscard]] const std::string& GetText() const {
- return text;
- }
-
-private:
- std::string text;
-};
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/node_helper.cpp b/src/video_core/shader/node_helper.cpp
deleted file mode 100644
index 6a5b6940d..000000000
--- a/src/video_core/shader/node_helper.cpp
+++ /dev/null
@@ -1,115 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <cstring>
-#include <vector>
-
-#include "common/common_types.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-Node Conditional(Node condition, std::vector<Node> code) {
- return MakeNode<ConditionalNode>(std::move(condition), std::move(code));
-}
-
-Node Comment(std::string text) {
- return MakeNode<CommentNode>(std::move(text));
-}
-
-Node Immediate(u32 value) {
- return MakeNode<ImmediateNode>(value);
-}
-
-Node Immediate(s32 value) {
- return Immediate(static_cast<u32>(value));
-}
-
-Node Immediate(f32 value) {
- u32 integral;
- std::memcpy(&integral, &value, sizeof(u32));
- return Immediate(integral);
-}
-
-OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed) {
- if (is_signed) {
- return operation_code;
- }
- switch (operation_code) {
- case OperationCode::FCastInteger:
- return OperationCode::FCastUInteger;
- case OperationCode::IAdd:
- return OperationCode::UAdd;
- case OperationCode::IMul:
- return OperationCode::UMul;
- case OperationCode::IDiv:
- return OperationCode::UDiv;
- case OperationCode::IMin:
- return OperationCode::UMin;
- case OperationCode::IMax:
- return OperationCode::UMax;
- case OperationCode::ICastFloat:
- return OperationCode::UCastFloat;
- case OperationCode::ICastUnsigned:
- return OperationCode::UCastSigned;
- case OperationCode::ILogicalShiftLeft:
- return OperationCode::ULogicalShiftLeft;
- case OperationCode::ILogicalShiftRight:
- return OperationCode::ULogicalShiftRight;
- case OperationCode::IArithmeticShiftRight:
- return OperationCode::UArithmeticShiftRight;
- case OperationCode::IBitwiseAnd:
- return OperationCode::UBitwiseAnd;
- case OperationCode::IBitwiseOr:
- return OperationCode::UBitwiseOr;
- case OperationCode::IBitwiseXor:
- return OperationCode::UBitwiseXor;
- case OperationCode::IBitwiseNot:
- return OperationCode::UBitwiseNot;
- case OperationCode::IBitfieldExtract:
- return OperationCode::UBitfieldExtract;
- case OperationCode::IBitfieldInsert:
- return OperationCode::UBitfieldInsert;
- case OperationCode::IBitCount:
- return OperationCode::UBitCount;
- case OperationCode::LogicalILessThan:
- return OperationCode::LogicalULessThan;
- case OperationCode::LogicalIEqual:
- return OperationCode::LogicalUEqual;
- case OperationCode::LogicalILessEqual:
- return OperationCode::LogicalULessEqual;
- case OperationCode::LogicalIGreaterThan:
- return OperationCode::LogicalUGreaterThan;
- case OperationCode::LogicalINotEqual:
- return OperationCode::LogicalUNotEqual;
- case OperationCode::LogicalIGreaterEqual:
- return OperationCode::LogicalUGreaterEqual;
- case OperationCode::AtomicIExchange:
- return OperationCode::AtomicUExchange;
- case OperationCode::AtomicIAdd:
- return OperationCode::AtomicUAdd;
- case OperationCode::AtomicIMin:
- return OperationCode::AtomicUMin;
- case OperationCode::AtomicIMax:
- return OperationCode::AtomicUMax;
- case OperationCode::AtomicIAnd:
- return OperationCode::AtomicUAnd;
- case OperationCode::AtomicIOr:
- return OperationCode::AtomicUOr;
- case OperationCode::AtomicIXor:
- return OperationCode::AtomicUXor;
- case OperationCode::INegate:
- UNREACHABLE_MSG("Can't negate an unsigned integer");
- return {};
- case OperationCode::IAbsolute:
- UNREACHABLE_MSG("Can't apply absolute to an unsigned integer");
- return {};
- default:
- UNREACHABLE_MSG("Unknown signed operation with code={}", operation_code);
- return {};
- }
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/node_helper.h b/src/video_core/shader/node_helper.h
deleted file mode 100644
index 1e0886185..000000000
--- a/src/video_core/shader/node_helper.h
+++ /dev/null
@@ -1,71 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <memory>
-#include <string>
-#include <tuple>
-#include <type_traits>
-#include <utility>
-#include <vector>
-
-#include "common/common_types.h"
-#include "video_core/shader/node.h"
-
-namespace VideoCommon::Shader {
-
-/// This arithmetic operation cannot be constraint
-inline constexpr MetaArithmetic PRECISE = {true};
-/// This arithmetic operation can be optimized away
-inline constexpr MetaArithmetic NO_PRECISE = {false};
-
-/// Creates a conditional node
-Node Conditional(Node condition, std::vector<Node> code);
-
-/// Creates a commentary node
-Node Comment(std::string text);
-
-/// Creates an u32 immediate
-Node Immediate(u32 value);
-
-/// Creates a s32 immediate
-Node Immediate(s32 value);
-
-/// Creates a f32 immediate
-Node Immediate(f32 value);
-
-/// Converts an signed operation code to an unsigned operation code
-OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed);
-
-template <typename T, typename... Args>
-Node MakeNode(Args&&... args) {
- static_assert(std::is_convertible_v<T, NodeData>);
- return std::make_shared<NodeData>(T(std::forward<Args>(args)...));
-}
-
-template <typename T, typename... Args>
-TrackSampler MakeTrackSampler(Args&&... args) {
- static_assert(std::is_convertible_v<T, TrackSamplerData>);
- return std::make_shared<TrackSamplerData>(T{std::forward<Args>(args)...});
-}
-
-template <typename... Args>
-Node Operation(OperationCode code, Args&&... args) {
- if constexpr (sizeof...(args) == 0) {
- return MakeNode<OperationNode>(code);
- } else if constexpr (std::is_convertible_v<std::tuple_element_t<0, std::tuple<Args...>>,
- Meta>) {
- return MakeNode<OperationNode>(code, std::forward<Args>(args)...);
- } else {
- return MakeNode<OperationNode>(code, Meta{}, std::forward<Args>(args)...);
- }
-}
-
-template <typename... Args>
-Node SignedOperation(OperationCode code, bool is_signed, Args&&... args) {
- return Operation(SignedToUnsignedCode(code, is_signed), std::forward<Args>(args)...);
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/registry.cpp b/src/video_core/shader/registry.cpp
deleted file mode 100644
index 148d91fcb..000000000
--- a/src/video_core/shader/registry.cpp
+++ /dev/null
@@ -1,181 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <tuple>
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/kepler_compute.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/engines/shader_type.h"
-#include "video_core/shader/registry.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Engines::ConstBufferEngineInterface;
-using Tegra::Engines::SamplerDescriptor;
-using Tegra::Engines::ShaderType;
-
-namespace {
-
-GraphicsInfo MakeGraphicsInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) {
- if (shader_stage == ShaderType::Compute) {
- return {};
- }
-
- auto& graphics = dynamic_cast<Tegra::Engines::Maxwell3D&>(engine);
-
- return {
- .tfb_layouts = graphics.regs.tfb_layouts,
- .tfb_varying_locs = graphics.regs.tfb_varying_locs,
- .primitive_topology = graphics.regs.draw.topology,
- .tessellation_primitive = graphics.regs.tess_mode.prim,
- .tessellation_spacing = graphics.regs.tess_mode.spacing,
- .tfb_enabled = graphics.regs.tfb_enabled != 0,
- .tessellation_clockwise = graphics.regs.tess_mode.cw.Value() != 0,
- };
-}
-
-ComputeInfo MakeComputeInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) {
- if (shader_stage != ShaderType::Compute) {
- return {};
- }
-
- auto& compute = dynamic_cast<Tegra::Engines::KeplerCompute&>(engine);
- const auto& launch = compute.launch_description;
-
- return {
- .workgroup_size = {launch.block_dim_x, launch.block_dim_y, launch.block_dim_z},
- .shared_memory_size_in_words = launch.shared_alloc,
- .local_memory_size_in_words = launch.local_pos_alloc,
- };
-}
-
-} // Anonymous namespace
-
-Registry::Registry(ShaderType shader_stage, const SerializedRegistryInfo& info)
- : stage{shader_stage}, stored_guest_driver_profile{info.guest_driver_profile},
- bound_buffer{info.bound_buffer}, graphics_info{info.graphics}, compute_info{info.compute} {}
-
-Registry::Registry(ShaderType shader_stage, ConstBufferEngineInterface& engine_)
- : stage{shader_stage}, engine{&engine_}, bound_buffer{engine_.GetBoundBuffer()},
- graphics_info{MakeGraphicsInfo(shader_stage, engine_)}, compute_info{MakeComputeInfo(
- shader_stage, engine_)} {}
-
-Registry::~Registry() = default;
-
-std::optional<u32> Registry::ObtainKey(u32 buffer, u32 offset) {
- const std::pair<u32, u32> key = {buffer, offset};
- const auto iter = keys.find(key);
- if (iter != keys.end()) {
- return iter->second;
- }
- if (!engine) {
- return std::nullopt;
- }
- const u32 value = engine->AccessConstBuffer32(stage, buffer, offset);
- keys.emplace(key, value);
- return value;
-}
-
-std::optional<SamplerDescriptor> Registry::ObtainBoundSampler(u32 offset) {
- const u32 key = offset;
- const auto iter = bound_samplers.find(key);
- if (iter != bound_samplers.end()) {
- return iter->second;
- }
- if (!engine) {
- return std::nullopt;
- }
- const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset);
- bound_samplers.emplace(key, value);
- return value;
-}
-
-std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainSeparateSampler(
- std::pair<u32, u32> buffers, std::pair<u32, u32> offsets) {
- SeparateSamplerKey key;
- key.buffers = buffers;
- key.offsets = offsets;
- const auto iter = separate_samplers.find(key);
- if (iter != separate_samplers.end()) {
- return iter->second;
- }
- if (!engine) {
- return std::nullopt;
- }
-
- const u32 handle_1 = engine->AccessConstBuffer32(stage, key.buffers.first, key.offsets.first);
- const u32 handle_2 = engine->AccessConstBuffer32(stage, key.buffers.second, key.offsets.second);
- const SamplerDescriptor value = engine->AccessSampler(handle_1 | handle_2);
- separate_samplers.emplace(key, value);
- return value;
-}
-
-std::optional<SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer, u32 offset) {
- const std::pair key = {buffer, offset};
- const auto iter = bindless_samplers.find(key);
- if (iter != bindless_samplers.end()) {
- return iter->second;
- }
- if (!engine) {
- return std::nullopt;
- }
- const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset);
- bindless_samplers.emplace(key, value);
- return value;
-}
-
-void Registry::InsertKey(u32 buffer, u32 offset, u32 value) {
- keys.insert_or_assign({buffer, offset}, value);
-}
-
-void Registry::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) {
- bound_samplers.insert_or_assign(offset, sampler);
-}
-
-void Registry::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) {
- bindless_samplers.insert_or_assign({buffer, offset}, sampler);
-}
-
-bool Registry::IsConsistent() const {
- if (!engine) {
- return true;
- }
- return std::all_of(keys.begin(), keys.end(),
- [this](const auto& pair) {
- const auto [cbuf, offset] = pair.first;
- const auto value = pair.second;
- return value == engine->AccessConstBuffer32(stage, cbuf, offset);
- }) &&
- std::all_of(bound_samplers.begin(), bound_samplers.end(),
- [this](const auto& sampler) {
- const auto [key, value] = sampler;
- return value == engine->AccessBoundSampler(stage, key);
- }) &&
- std::all_of(bindless_samplers.begin(), bindless_samplers.end(),
- [this](const auto& sampler) {
- const auto [cbuf, offset] = sampler.first;
- const auto value = sampler.second;
- return value == engine->AccessBindlessSampler(stage, cbuf, offset);
- });
-}
-
-bool Registry::HasEqualKeys(const Registry& rhs) const {
- return std::tie(keys, bound_samplers, bindless_samplers) ==
- std::tie(rhs.keys, rhs.bound_samplers, rhs.bindless_samplers);
-}
-
-const GraphicsInfo& Registry::GetGraphicsInfo() const {
- ASSERT(stage != Tegra::Engines::ShaderType::Compute);
- return graphics_info;
-}
-
-const ComputeInfo& Registry::GetComputeInfo() const {
- ASSERT(stage == Tegra::Engines::ShaderType::Compute);
- return compute_info;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/registry.h b/src/video_core/shader/registry.h
deleted file mode 100644
index 4bebefdde..000000000
--- a/src/video_core/shader/registry.h
+++ /dev/null
@@ -1,172 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <array>
-#include <optional>
-#include <type_traits>
-#include <unordered_map>
-#include <utility>
-
-#include "common/common_types.h"
-#include "common/hash.h"
-#include "video_core/engines/const_buffer_engine_interface.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/engines/shader_type.h"
-#include "video_core/guest_driver.h"
-
-namespace VideoCommon::Shader {
-
-struct SeparateSamplerKey {
- std::pair<u32, u32> buffers;
- std::pair<u32, u32> offsets;
-};
-
-} // namespace VideoCommon::Shader
-
-namespace std {
-
-template <>
-struct hash<VideoCommon::Shader::SeparateSamplerKey> {
- std::size_t operator()(const VideoCommon::Shader::SeparateSamplerKey& key) const noexcept {
- return std::hash<u32>{}(key.buffers.first ^ key.buffers.second ^ key.offsets.first ^
- key.offsets.second);
- }
-};
-
-template <>
-struct equal_to<VideoCommon::Shader::SeparateSamplerKey> {
- bool operator()(const VideoCommon::Shader::SeparateSamplerKey& lhs,
- const VideoCommon::Shader::SeparateSamplerKey& rhs) const noexcept {
- return lhs.buffers == rhs.buffers && lhs.offsets == rhs.offsets;
- }
-};
-
-} // namespace std
-
-namespace VideoCommon::Shader {
-
-using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>;
-using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>;
-using SeparateSamplerMap =
- std::unordered_map<SeparateSamplerKey, Tegra::Engines::SamplerDescriptor>;
-using BindlessSamplerMap =
- std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>;
-
-struct GraphicsInfo {
- using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-
- std::array<Maxwell::TransformFeedbackLayout, Maxwell::NumTransformFeedbackBuffers>
- tfb_layouts{};
- std::array<std::array<u8, 128>, Maxwell::NumTransformFeedbackBuffers> tfb_varying_locs{};
- Maxwell::PrimitiveTopology primitive_topology{};
- Maxwell::TessellationPrimitive tessellation_primitive{};
- Maxwell::TessellationSpacing tessellation_spacing{};
- bool tfb_enabled = false;
- bool tessellation_clockwise = false;
-};
-static_assert(std::is_trivially_copyable_v<GraphicsInfo> &&
- std::is_standard_layout_v<GraphicsInfo>);
-
-struct ComputeInfo {
- std::array<u32, 3> workgroup_size{};
- u32 shared_memory_size_in_words = 0;
- u32 local_memory_size_in_words = 0;
-};
-static_assert(std::is_trivially_copyable_v<ComputeInfo> && std::is_standard_layout_v<ComputeInfo>);
-
-struct SerializedRegistryInfo {
- VideoCore::GuestDriverProfile guest_driver_profile;
- u32 bound_buffer = 0;
- GraphicsInfo graphics;
- ComputeInfo compute;
-};
-
-/**
- * The Registry is a class use to interface the 3D and compute engines with the shader compiler.
- * With it, the shader can obtain required data from GPU state and store it for disk shader
- * compilation.
- */
-class Registry {
-public:
- explicit Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info);
-
- explicit Registry(Tegra::Engines::ShaderType shader_stage,
- Tegra::Engines::ConstBufferEngineInterface& engine_);
-
- ~Registry();
-
- /// Retrieves a key from the registry, if it's registered, it will give the registered value, if
- /// not it will obtain it from maxwell3d and register it.
- std::optional<u32> ObtainKey(u32 buffer, u32 offset);
-
- std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset);
-
- std::optional<Tegra::Engines::SamplerDescriptor> ObtainSeparateSampler(
- std::pair<u32, u32> buffers, std::pair<u32, u32> offsets);
-
- std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset);
-
- /// Inserts a key.
- void InsertKey(u32 buffer, u32 offset, u32 value);
-
- /// Inserts a bound sampler key.
- void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler);
-
- /// Inserts a bindless sampler key.
- void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler);
-
- /// Checks keys and samplers against engine's current const buffers.
- /// Returns true if they are the same value, false otherwise.
- bool IsConsistent() const;
-
- /// Returns true if the keys are equal to the other ones in the registry.
- bool HasEqualKeys(const Registry& rhs) const;
-
- /// Returns graphics information from this shader
- const GraphicsInfo& GetGraphicsInfo() const;
-
- /// Returns compute information from this shader
- const ComputeInfo& GetComputeInfo() const;
-
- /// Gives an getter to the const buffer keys in the database.
- const KeyMap& GetKeys() const {
- return keys;
- }
-
- /// Gets samplers database.
- const BoundSamplerMap& GetBoundSamplers() const {
- return bound_samplers;
- }
-
- /// Gets bindless samplers database.
- const BindlessSamplerMap& GetBindlessSamplers() const {
- return bindless_samplers;
- }
-
- /// Gets bound buffer used on this shader
- u32 GetBoundBuffer() const {
- return bound_buffer;
- }
-
- /// Obtains access to the guest driver's profile.
- VideoCore::GuestDriverProfile& AccessGuestDriverProfile() {
- return engine ? engine->AccessGuestDriverProfile() : stored_guest_driver_profile;
- }
-
-private:
- const Tegra::Engines::ShaderType stage;
- VideoCore::GuestDriverProfile stored_guest_driver_profile;
- Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;
- KeyMap keys;
- BoundSamplerMap bound_samplers;
- SeparateSamplerMap separate_samplers;
- BindlessSamplerMap bindless_samplers;
- u32 bound_buffer;
- GraphicsInfo graphics_info;
- ComputeInfo compute_info;
-};
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
deleted file mode 100644
index a4987ffc6..000000000
--- a/src/video_core/shader/shader_ir.cpp
+++ /dev/null
@@ -1,464 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <array>
-#include <cmath>
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "common/logging/log.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/node.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/registry.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Attribute;
-using Tegra::Shader::Instruction;
-using Tegra::Shader::IpaMode;
-using Tegra::Shader::Pred;
-using Tegra::Shader::PredCondition;
-using Tegra::Shader::PredOperation;
-using Tegra::Shader::Register;
-
-ShaderIR::ShaderIR(const ProgramCode& program_code_, u32 main_offset_, CompilerSettings settings_,
- Registry& registry_)
- : program_code{program_code_}, main_offset{main_offset_}, settings{settings_}, registry{
- registry_} {
- Decode();
- PostDecode();
-}
-
-ShaderIR::~ShaderIR() = default;
-
-Node ShaderIR::GetRegister(Register reg) {
- if (reg != Register::ZeroIndex) {
- used_registers.insert(static_cast<u32>(reg));
- }
- return MakeNode<GprNode>(reg);
-}
-
-Node ShaderIR::GetCustomVariable(u32 id) {
- return MakeNode<CustomVarNode>(id);
-}
-
-Node ShaderIR::GetImmediate19(Instruction instr) {
- return Immediate(instr.alu.GetImm20_19());
-}
-
-Node ShaderIR::GetImmediate32(Instruction instr) {
- return Immediate(instr.alu.GetImm20_32());
-}
-
-Node ShaderIR::GetConstBuffer(u64 index_, u64 offset_) {
- const auto index = static_cast<u32>(index_);
- const auto offset = static_cast<u32>(offset_);
-
- used_cbufs.try_emplace(index).first->second.MarkAsUsed(offset);
-
- return MakeNode<CbufNode>(index, Immediate(offset));
-}
-
-Node ShaderIR::GetConstBufferIndirect(u64 index_, u64 offset_, Node node) {
- const auto index = static_cast<u32>(index_);
- const auto offset = static_cast<u32>(offset_);
-
- used_cbufs.try_emplace(index).first->second.MarkAsUsedIndirect();
-
- Node final_offset = [&] {
- // Attempt to inline constant buffer without a variable offset. This is done to allow
- // tracking LDC calls.
- if (const auto gpr = std::get_if<GprNode>(&*node)) {
- if (gpr->GetIndex() == Register::ZeroIndex) {
- return Immediate(offset);
- }
- }
- return Operation(OperationCode::UAdd, NO_PRECISE, std::move(node), Immediate(offset));
- }();
- return MakeNode<CbufNode>(index, std::move(final_offset));
-}
-
-Node ShaderIR::GetPredicate(u64 pred_, bool negated) {
- const auto pred = static_cast<Pred>(pred_);
- if (pred != Pred::UnusedIndex && pred != Pred::NeverExecute) {
- used_predicates.insert(pred);
- }
-
- return MakeNode<PredicateNode>(pred, negated);
-}
-
-Node ShaderIR::GetPredicate(bool immediate) {
- return GetPredicate(static_cast<u64>(immediate ? Pred::UnusedIndex : Pred::NeverExecute));
-}
-
-Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, Node buffer) {
- MarkAttributeUsage(index, element);
- used_input_attributes.emplace(index);
- return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer));
-}
-
-Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer) {
- uses_physical_attributes = true;
- return MakeNode<AbufNode>(GetRegister(physical_address), buffer);
-}
-
-Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buffer) {
- MarkAttributeUsage(index, element);
- used_output_attributes.insert(index);
- return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer));
-}
-
-Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) const {
- Node node = MakeNode<InternalFlagNode>(flag);
- if (negated) {
- return Operation(OperationCode::LogicalNegate, std::move(node));
- }
- return node;
-}
-
-Node ShaderIR::GetLocalMemory(Node address) {
- return MakeNode<LmemNode>(std::move(address));
-}
-
-Node ShaderIR::GetSharedMemory(Node address) {
- return MakeNode<SmemNode>(std::move(address));
-}
-
-Node ShaderIR::GetTemporary(u32 id) {
- return GetRegister(Register::ZeroIndex + 1 + id);
-}
-
-Node ShaderIR::GetOperandAbsNegFloat(Node value, bool absolute, bool negate) {
- if (absolute) {
- value = Operation(OperationCode::FAbsolute, NO_PRECISE, std::move(value));
- }
- if (negate) {
- value = Operation(OperationCode::FNegate, NO_PRECISE, std::move(value));
- }
- return value;
-}
-
-Node ShaderIR::GetSaturatedFloat(Node value, bool saturate) {
- if (!saturate) {
- return value;
- }
-
- Node positive_zero = Immediate(std::copysignf(0, 1));
- Node positive_one = Immediate(1.0f);
- return Operation(OperationCode::FClamp, NO_PRECISE, std::move(value), std::move(positive_zero),
- std::move(positive_one));
-}
-
-Node ShaderIR::ConvertIntegerSize(Node value, Register::Size size, bool is_signed) {
- switch (size) {
- case Register::Size::Byte:
- value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE,
- std::move(value), Immediate(24));
- value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE,
- std::move(value), Immediate(24));
- return value;
- case Register::Size::Short:
- value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE,
- std::move(value), Immediate(16));
- value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE,
- std::move(value), Immediate(16));
- return value;
- case Register::Size::Word:
- // Default - do nothing
- return value;
- default:
- UNREACHABLE_MSG("Unimplemented conversion size: {}", size);
- return value;
- }
-}
-
-Node ShaderIR::GetOperandAbsNegInteger(Node value, bool absolute, bool negate, bool is_signed) {
- if (!is_signed) {
- // Absolute or negate on an unsigned is pointless
- return value;
- }
- if (absolute) {
- value = Operation(OperationCode::IAbsolute, NO_PRECISE, std::move(value));
- }
- if (negate) {
- value = Operation(OperationCode::INegate, NO_PRECISE, std::move(value));
- }
- return value;
-}
-
-Node ShaderIR::UnpackHalfImmediate(Instruction instr, bool has_negation) {
- Node value = Immediate(instr.half_imm.PackImmediates());
- if (!has_negation) {
- return value;
- }
-
- Node first_negate = GetPredicate(instr.half_imm.first_negate != 0);
- Node second_negate = GetPredicate(instr.half_imm.second_negate != 0);
-
- return Operation(OperationCode::HNegate, NO_PRECISE, std::move(value), std::move(first_negate),
- std::move(second_negate));
-}
-
-Node ShaderIR::UnpackHalfFloat(Node value, Tegra::Shader::HalfType type) {
- return Operation(OperationCode::HUnpack, type, std::move(value));
-}
-
-Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) {
- switch (merge) {
- case Tegra::Shader::HalfMerge::H0_H1:
- return src;
- case Tegra::Shader::HalfMerge::F32:
- return Operation(OperationCode::HMergeF32, std::move(src));
- case Tegra::Shader::HalfMerge::Mrg_H0:
- return Operation(OperationCode::HMergeH0, std::move(dest), std::move(src));
- case Tegra::Shader::HalfMerge::Mrg_H1:
- return Operation(OperationCode::HMergeH1, std::move(dest), std::move(src));
- }
- UNREACHABLE();
- return src;
-}
-
-Node ShaderIR::GetOperandAbsNegHalf(Node value, bool absolute, bool negate) {
- if (absolute) {
- value = Operation(OperationCode::HAbsolute, NO_PRECISE, std::move(value));
- }
- if (negate) {
- value = Operation(OperationCode::HNegate, NO_PRECISE, std::move(value), GetPredicate(true),
- GetPredicate(true));
- }
- return value;
-}
-
-Node ShaderIR::GetSaturatedHalfFloat(Node value, bool saturate) {
- if (!saturate) {
- return value;
- }
-
- Node positive_zero = Immediate(std::copysignf(0, 1));
- Node positive_one = Immediate(1.0f);
- return Operation(OperationCode::HClamp, NO_PRECISE, std::move(value), std::move(positive_zero),
- std::move(positive_one));
-}
-
-Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) {
- if (condition == PredCondition::T) {
- return GetPredicate(true);
- } else if (condition == PredCondition::F) {
- return GetPredicate(false);
- }
-
- static constexpr std::array comparison_table{
- OperationCode(0),
- OperationCode::LogicalFOrdLessThan, // LT
- OperationCode::LogicalFOrdEqual, // EQ
- OperationCode::LogicalFOrdLessEqual, // LE
- OperationCode::LogicalFOrdGreaterThan, // GT
- OperationCode::LogicalFOrdNotEqual, // NE
- OperationCode::LogicalFOrdGreaterEqual, // GE
- OperationCode::LogicalFOrdered, // NUM
- OperationCode::LogicalFUnordered, // NAN
- OperationCode::LogicalFUnordLessThan, // LTU
- OperationCode::LogicalFUnordEqual, // EQU
- OperationCode::LogicalFUnordLessEqual, // LEU
- OperationCode::LogicalFUnordGreaterThan, // GTU
- OperationCode::LogicalFUnordNotEqual, // NEU
- OperationCode::LogicalFUnordGreaterEqual, // GEU
- };
- const std::size_t index = static_cast<std::size_t>(condition);
- ASSERT_MSG(index < std::size(comparison_table), "Invalid condition={}", index);
-
- return Operation(comparison_table[index], op_a, op_b);
-}
-
-Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_signed, Node op_a,
- Node op_b) {
- static constexpr std::array comparison_table{
- std::pair{PredCondition::LT, OperationCode::LogicalILessThan},
- std::pair{PredCondition::EQ, OperationCode::LogicalIEqual},
- std::pair{PredCondition::LE, OperationCode::LogicalILessEqual},
- std::pair{PredCondition::GT, OperationCode::LogicalIGreaterThan},
- std::pair{PredCondition::NE, OperationCode::LogicalINotEqual},
- std::pair{PredCondition::GE, OperationCode::LogicalIGreaterEqual},
- };
-
- const auto comparison =
- std::find_if(comparison_table.cbegin(), comparison_table.cend(),
- [condition](const auto entry) { return condition == entry.first; });
- UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(),
- "Unknown predicate comparison operation");
-
- return SignedOperation(comparison->second, is_signed, NO_PRECISE, std::move(op_a),
- std::move(op_b));
-}
-
-Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a,
- Node op_b) {
- static constexpr std::array comparison_table{
- std::pair{PredCondition::LT, OperationCode::Logical2HLessThan},
- std::pair{PredCondition::EQ, OperationCode::Logical2HEqual},
- std::pair{PredCondition::LE, OperationCode::Logical2HLessEqual},
- std::pair{PredCondition::GT, OperationCode::Logical2HGreaterThan},
- std::pair{PredCondition::NE, OperationCode::Logical2HNotEqual},
- std::pair{PredCondition::GE, OperationCode::Logical2HGreaterEqual},
- std::pair{PredCondition::LTU, OperationCode::Logical2HLessThanWithNan},
- std::pair{PredCondition::LEU, OperationCode::Logical2HLessEqualWithNan},
- std::pair{PredCondition::GTU, OperationCode::Logical2HGreaterThanWithNan},
- std::pair{PredCondition::NEU, OperationCode::Logical2HNotEqualWithNan},
- std::pair{PredCondition::GEU, OperationCode::Logical2HGreaterEqualWithNan},
- };
-
- const auto comparison =
- std::find_if(comparison_table.cbegin(), comparison_table.cend(),
- [condition](const auto entry) { return condition == entry.first; });
- UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(),
- "Unknown predicate comparison operation");
-
- return Operation(comparison->second, NO_PRECISE, std::move(op_a), std::move(op_b));
-}
-
-OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) {
- static constexpr std::array operation_table{
- OperationCode::LogicalAnd,
- OperationCode::LogicalOr,
- OperationCode::LogicalXor,
- };
-
- const auto index = static_cast<std::size_t>(operation);
- if (index >= operation_table.size()) {
- UNIMPLEMENTED_MSG("Unknown predicate operation.");
- return {};
- }
-
- return operation_table[index];
-}
-
-Node ShaderIR::GetConditionCode(ConditionCode cc) const {
- switch (cc) {
- case ConditionCode::NEU:
- return GetInternalFlag(InternalFlag::Zero, true);
- case ConditionCode::FCSM_TR:
- UNIMPLEMENTED_MSG("EXIT.FCSM_TR is not implemented");
- return MakeNode<PredicateNode>(Pred::NeverExecute, false);
- default:
- UNIMPLEMENTED_MSG("Unimplemented condition code: {}", cc);
- return MakeNode<PredicateNode>(Pred::NeverExecute, false);
- }
-}
-
-void ShaderIR::SetRegister(NodeBlock& bb, Register dest, Node src) {
- bb.push_back(Operation(OperationCode::Assign, GetRegister(dest), std::move(src)));
-}
-
-void ShaderIR::SetPredicate(NodeBlock& bb, u64 dest, Node src) {
- bb.push_back(Operation(OperationCode::LogicalAssign, GetPredicate(dest), std::move(src)));
-}
-
-void ShaderIR::SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value) {
- bb.push_back(Operation(OperationCode::LogicalAssign, GetInternalFlag(flag), std::move(value)));
-}
-
-void ShaderIR::SetLocalMemory(NodeBlock& bb, Node address, Node value) {
- bb.push_back(
- Operation(OperationCode::Assign, GetLocalMemory(std::move(address)), std::move(value)));
-}
-
-void ShaderIR::SetSharedMemory(NodeBlock& bb, Node address, Node value) {
- bb.push_back(
- Operation(OperationCode::Assign, GetSharedMemory(std::move(address)), std::move(value)));
-}
-
-void ShaderIR::SetTemporary(NodeBlock& bb, u32 id, Node value) {
- SetRegister(bb, Register::ZeroIndex + 1 + id, std::move(value));
-}
-
-void ShaderIR::SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc) {
- if (!sets_cc) {
- return;
- }
- Node zerop = Operation(OperationCode::LogicalFOrdEqual, std::move(value), Immediate(0.0f));
- SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop));
- LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete");
-}
-
-void ShaderIR::SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_cc) {
- if (!sets_cc) {
- return;
- }
- Node zerop = Operation(OperationCode::LogicalIEqual, std::move(value), Immediate(0));
- SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop));
- LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete");
-}
-
-Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) {
- return Operation(OperationCode::UBitfieldExtract, NO_PRECISE, std::move(value),
- Immediate(offset), Immediate(bits));
-}
-
-Node ShaderIR::BitfieldInsert(Node base, Node insert, u32 offset, u32 bits) {
- return Operation(OperationCode::UBitfieldInsert, NO_PRECISE, base, insert, Immediate(offset),
- Immediate(bits));
-}
-
-void ShaderIR::MarkAttributeUsage(Attribute::Index index, u64 element) {
- switch (index) {
- case Attribute::Index::LayerViewportPointSize:
- switch (element) {
- case 0:
- UNIMPLEMENTED();
- break;
- case 1:
- uses_layer = true;
- break;
- case 2:
- uses_viewport_index = true;
- break;
- case 3:
- uses_point_size = true;
- break;
- }
- break;
- case Attribute::Index::TessCoordInstanceIDVertexID:
- switch (element) {
- case 2:
- uses_instance_id = true;
- break;
- case 3:
- uses_vertex_id = true;
- break;
- }
- break;
- case Attribute::Index::ClipDistances0123:
- case Attribute::Index::ClipDistances4567: {
- const u64 clip_index = (index == Attribute::Index::ClipDistances4567 ? 4 : 0) + element;
- used_clip_distances.at(clip_index) = true;
- break;
- }
- case Attribute::Index::FrontColor:
- case Attribute::Index::FrontSecondaryColor:
- case Attribute::Index::BackColor:
- case Attribute::Index::BackSecondaryColor:
- uses_legacy_varyings = true;
- break;
- default:
- if (index >= Attribute::Index::TexCoord_0 && index <= Attribute::Index::TexCoord_7) {
- uses_legacy_varyings = true;
- }
- break;
- }
-}
-
-std::size_t ShaderIR::DeclareAmend(Node new_amend) {
- const auto id = amend_code.size();
- amend_code.push_back(std::move(new_amend));
- return id;
-}
-
-u32 ShaderIR::NewCustomVariable() {
- return num_custom_variables++;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
deleted file mode 100644
index 1cd7c14d7..000000000
--- a/src/video_core/shader/shader_ir.h
+++ /dev/null
@@ -1,479 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <array>
-#include <list>
-#include <map>
-#include <optional>
-#include <set>
-#include <tuple>
-#include <vector>
-
-#include "common/common_types.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/engines/shader_header.h"
-#include "video_core/shader/ast.h"
-#include "video_core/shader/compiler_settings.h"
-#include "video_core/shader/memory_util.h"
-#include "video_core/shader/node.h"
-#include "video_core/shader/registry.h"
-
-namespace VideoCommon::Shader {
-
-struct ShaderBlock;
-
-constexpr u32 MAX_PROGRAM_LENGTH = 0x1000;
-
-struct ConstBuffer {
- constexpr explicit ConstBuffer(u32 max_offset_, bool is_indirect_)
- : max_offset{max_offset_}, is_indirect{is_indirect_} {}
-
- constexpr ConstBuffer() = default;
-
- void MarkAsUsed(u64 offset) {
- max_offset = std::max(max_offset, static_cast<u32>(offset));
- }
-
- void MarkAsUsedIndirect() {
- is_indirect = true;
- }
-
- bool IsIndirect() const {
- return is_indirect;
- }
-
- u32 GetSize() const {
- return max_offset + static_cast<u32>(sizeof(float));
- }
-
- u32 GetMaxOffset() const {
- return max_offset;
- }
-
-private:
- u32 max_offset = 0;
- bool is_indirect = false;
-};
-
-struct GlobalMemoryUsage {
- bool is_read{};
- bool is_written{};
-};
-
-class ShaderIR final {
-public:
- explicit ShaderIR(const ProgramCode& program_code_, u32 main_offset_,
- CompilerSettings settings_, Registry& registry_);
- ~ShaderIR();
-
- const std::map<u32, NodeBlock>& GetBasicBlocks() const {
- return basic_blocks;
- }
-
- const std::set<u32>& GetRegisters() const {
- return used_registers;
- }
-
- const std::set<Tegra::Shader::Pred>& GetPredicates() const {
- return used_predicates;
- }
-
- const std::set<Tegra::Shader::Attribute::Index>& GetInputAttributes() const {
- return used_input_attributes;
- }
-
- const std::set<Tegra::Shader::Attribute::Index>& GetOutputAttributes() const {
- return used_output_attributes;
- }
-
- const std::map<u32, ConstBuffer>& GetConstantBuffers() const {
- return used_cbufs;
- }
-
- const std::list<SamplerEntry>& GetSamplers() const {
- return used_samplers;
- }
-
- const std::list<ImageEntry>& GetImages() const {
- return used_images;
- }
-
- const std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances>& GetClipDistances()
- const {
- return used_clip_distances;
- }
-
- const std::map<GlobalMemoryBase, GlobalMemoryUsage>& GetGlobalMemory() const {
- return used_global_memory;
- }
-
- std::size_t GetLength() const {
- return static_cast<std::size_t>(coverage_end * sizeof(u64));
- }
-
- bool UsesLayer() const {
- return uses_layer;
- }
-
- bool UsesViewportIndex() const {
- return uses_viewport_index;
- }
-
- bool UsesPointSize() const {
- return uses_point_size;
- }
-
- bool UsesInstanceId() const {
- return uses_instance_id;
- }
-
- bool UsesVertexId() const {
- return uses_vertex_id;
- }
-
- bool UsesLegacyVaryings() const {
- return uses_legacy_varyings;
- }
-
- bool UsesYNegate() const {
- return uses_y_negate;
- }
-
- bool UsesWarps() const {
- return uses_warps;
- }
-
- bool HasPhysicalAttributes() const {
- return uses_physical_attributes;
- }
-
- const Tegra::Shader::Header& GetHeader() const {
- return header;
- }
-
- bool IsFlowStackDisabled() const {
- return disable_flow_stack;
- }
-
- bool IsDecompiled() const {
- return decompiled;
- }
-
- const ASTManager& GetASTManager() const {
- return program_manager;
- }
-
- ASTNode GetASTProgram() const {
- return program_manager.GetProgram();
- }
-
- u32 GetASTNumVariables() const {
- return program_manager.GetVariables();
- }
-
- u32 ConvertAddressToNvidiaSpace(u32 address) const {
- return (address - main_offset) * static_cast<u32>(sizeof(Tegra::Shader::Instruction));
- }
-
- /// Returns a condition code evaluated from internal flags
- Node GetConditionCode(Tegra::Shader::ConditionCode cc) const;
-
- const Node& GetAmendNode(std::size_t index) const {
- return amend_code[index];
- }
-
- u32 GetNumCustomVariables() const {
- return num_custom_variables;
- }
-
-private:
- friend class ASTDecoder;
-
- struct SamplerInfo {
- std::optional<Tegra::Shader::TextureType> type;
- std::optional<bool> is_array;
- std::optional<bool> is_shadow;
- std::optional<bool> is_buffer;
-
- constexpr bool IsComplete() const noexcept {
- return type && is_array && is_shadow && is_buffer;
- }
- };
-
- void Decode();
- void PostDecode();
-
- NodeBlock DecodeRange(u32 begin, u32 end);
- void DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end);
- void InsertControlFlow(NodeBlock& bb, const ShaderBlock& block);
-
- /**
- * Decodes a single instruction from Tegra to IR.
- * @param bb Basic block where the nodes will be written to.
- * @param pc Program counter. Offset to decode.
- * @return Next address to decode.
- */
- u32 DecodeInstr(NodeBlock& bb, u32 pc);
-
- u32 DecodeArithmetic(NodeBlock& bb, u32 pc);
- u32 DecodeArithmeticImmediate(NodeBlock& bb, u32 pc);
- u32 DecodeBfe(NodeBlock& bb, u32 pc);
- u32 DecodeBfi(NodeBlock& bb, u32 pc);
- u32 DecodeShift(NodeBlock& bb, u32 pc);
- u32 DecodeArithmeticInteger(NodeBlock& bb, u32 pc);
- u32 DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc);
- u32 DecodeArithmeticHalf(NodeBlock& bb, u32 pc);
- u32 DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc);
- u32 DecodeFfma(NodeBlock& bb, u32 pc);
- u32 DecodeHfma2(NodeBlock& bb, u32 pc);
- u32 DecodeConversion(NodeBlock& bb, u32 pc);
- u32 DecodeWarp(NodeBlock& bb, u32 pc);
- u32 DecodeMemory(NodeBlock& bb, u32 pc);
- u32 DecodeTexture(NodeBlock& bb, u32 pc);
- u32 DecodeImage(NodeBlock& bb, u32 pc);
- u32 DecodeFloatSetPredicate(NodeBlock& bb, u32 pc);
- u32 DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc);
- u32 DecodeHalfSetPredicate(NodeBlock& bb, u32 pc);
- u32 DecodePredicateSetRegister(NodeBlock& bb, u32 pc);
- u32 DecodePredicateSetPredicate(NodeBlock& bb, u32 pc);
- u32 DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc);
- u32 DecodeFloatSet(NodeBlock& bb, u32 pc);
- u32 DecodeIntegerSet(NodeBlock& bb, u32 pc);
- u32 DecodeHalfSet(NodeBlock& bb, u32 pc);
- u32 DecodeVideo(NodeBlock& bb, u32 pc);
- u32 DecodeXmad(NodeBlock& bb, u32 pc);
- u32 DecodeOther(NodeBlock& bb, u32 pc);
-
- /// Generates a node for a passed register.
- Node GetRegister(Tegra::Shader::Register reg);
- /// Generates a node for a custom variable
- Node GetCustomVariable(u32 id);
- /// Generates a node representing a 19-bit immediate value
- Node GetImmediate19(Tegra::Shader::Instruction instr);
- /// Generates a node representing a 32-bit immediate value
- Node GetImmediate32(Tegra::Shader::Instruction instr);
- /// Generates a node representing a constant buffer
- Node GetConstBuffer(u64 index, u64 offset);
- /// Generates a node representing a constant buffer with a variadic offset
- Node GetConstBufferIndirect(u64 index, u64 offset, Node node);
- /// Generates a node for a passed predicate. It can be optionally negated
- Node GetPredicate(u64 pred, bool negated = false);
- /// Generates a predicate node for an immediate true or false value
- Node GetPredicate(bool immediate);
- /// Generates a node representing an input attribute. Keeps track of used attributes.
- Node GetInputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer = {});
- /// Generates a node representing a physical input attribute.
- Node GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer = {});
- /// Generates a node representing an output attribute. Keeps track of used attributes.
- Node GetOutputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer);
- /// Generates a node representing an internal flag
- Node GetInternalFlag(InternalFlag flag, bool negated = false) const;
- /// Generates a node representing a local memory address
- Node GetLocalMemory(Node address);
- /// Generates a node representing a shared memory address
- Node GetSharedMemory(Node address);
- /// Generates a temporary, internally it uses a post-RZ register
- Node GetTemporary(u32 id);
-
- /// Sets a register. src value must be a number-evaluated node.
- void SetRegister(NodeBlock& bb, Tegra::Shader::Register dest, Node src);
- /// Sets a predicate. src value must be a bool-evaluated node
- void SetPredicate(NodeBlock& bb, u64 dest, Node src);
- /// Sets an internal flag. src value must be a bool-evaluated node
- void SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value);
- /// Sets a local memory address with a value.
- void SetLocalMemory(NodeBlock& bb, Node address, Node value);
- /// Sets a shared memory address with a value.
- void SetSharedMemory(NodeBlock& bb, Node address, Node value);
- /// Sets a temporary. Internally it uses a post-RZ register
- void SetTemporary(NodeBlock& bb, u32 id, Node value);
-
- /// Sets internal flags from a float
- void SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc = true);
- /// Sets internal flags from an integer
- void SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_cc = true);
-
- /// Conditionally absolute/negated float. Absolute is applied first
- Node GetOperandAbsNegFloat(Node value, bool absolute, bool negate);
- /// Conditionally saturates a float
- Node GetSaturatedFloat(Node value, bool saturate = true);
-
- /// Converts an integer to different sizes.
- Node ConvertIntegerSize(Node value, Tegra::Shader::Register::Size size, bool is_signed);
- /// Conditionally absolute/negated integer. Absolute is applied first
- Node GetOperandAbsNegInteger(Node value, bool absolute, bool negate, bool is_signed);
-
- /// Unpacks a half immediate from an instruction
- Node UnpackHalfImmediate(Tegra::Shader::Instruction instr, bool has_negation);
- /// Unpacks a binary value into a half float pair with a type format
- Node UnpackHalfFloat(Node value, Tegra::Shader::HalfType type);
- /// Merges a half pair into another value
- Node HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge);
- /// Conditionally absolute/negated half float pair. Absolute is applied first
- Node GetOperandAbsNegHalf(Node value, bool absolute, bool negate);
- /// Conditionally saturates a half float pair
- Node GetSaturatedHalfFloat(Node value, bool saturate = true);
-
- /// Get image component value by type and size
- std::pair<Node, bool> GetComponentValue(Tegra::Texture::ComponentType component_type,
- u32 component_size, Node original_value);
-
- /// Returns a predicate comparing two floats
- Node GetPredicateComparisonFloat(Tegra::Shader::PredCondition condition, Node op_a, Node op_b);
- /// Returns a predicate comparing two integers
- Node GetPredicateComparisonInteger(Tegra::Shader::PredCondition condition, bool is_signed,
- Node op_a, Node op_b);
- /// Returns a predicate comparing two half floats. meta consumes how both pairs will be compared
- Node GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a, Node op_b);
-
- /// Returns a predicate combiner operation
- OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation);
-
- /// Queries the missing sampler info from the execution context.
- SamplerInfo GetSamplerInfo(SamplerInfo info,
- std::optional<Tegra::Engines::SamplerDescriptor> sampler);
-
- /// Accesses a texture sampler.
- std::optional<SamplerEntry> GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo info);
-
- /// Accesses a texture sampler for a bindless texture.
- std::optional<SamplerEntry> GetBindlessSampler(Tegra::Shader::Register reg, SamplerInfo info,
- Node& index_var);
-
- /// Accesses an image.
- ImageEntry& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type);
-
- /// Access a bindless image sampler.
- ImageEntry& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type);
-
- /// Extracts a sequence of bits from a node
- Node BitfieldExtract(Node value, u32 offset, u32 bits);
-
- /// Inserts a sequence of bits from a node
- Node BitfieldInsert(Node base, Node insert, u32 offset, u32 bits);
-
- /// Marks the usage of a input or output attribute.
- void MarkAttributeUsage(Tegra::Shader::Attribute::Index index, u64 element);
-
- /// Decodes VMNMX instruction and inserts its code into the passed basic block.
- void DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr);
-
- void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
- const Node4& components);
-
- void WriteTexsInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
- const Node4& components, bool ignore_mask = false);
- void WriteTexsInstructionHalfFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
- const Node4& components, bool ignore_mask = false);
-
- Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
- Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
- bool is_array, bool is_aoffi,
- std::optional<Tegra::Shader::Register> bindless_reg);
-
- Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
- Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
- bool is_array);
-
- Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
- bool depth_compare, bool is_array, bool is_aoffi, bool is_ptp,
- bool is_bindless);
-
- Node4 GetTldCode(Tegra::Shader::Instruction instr);
-
- Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
- bool is_array);
-
- std::tuple<std::size_t, std::size_t> ValidateAndGetCoordinateElement(
- Tegra::Shader::TextureType texture_type, bool depth_compare, bool is_array,
- bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs);
-
- std::vector<Node> GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, bool is_tld4);
-
- std::vector<Node> GetPtpCoordinates(std::array<Node, 2> ptp_regs);
-
- Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
- Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords,
- Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi,
- std::optional<Tegra::Shader::Register> bindless_reg);
-
- Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type,
- u64 byte_height);
-
- void WriteLogicOperation(NodeBlock& bb, Tegra::Shader::Register dest,
- Tegra::Shader::LogicOperation logic_op, Node op_a, Node op_b,
- Tegra::Shader::PredicateResultMode predicate_mode,
- Tegra::Shader::Pred predicate, bool sets_cc);
- void WriteLop3Instruction(NodeBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b,
- Node op_c, Node imm_lut, bool sets_cc);
-
- std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const;
-
- std::pair<Node, TrackSampler> TrackBindlessSampler(Node tracked, const NodeBlock& code,
- s64 cursor);
-
- std::pair<Node, TrackSampler> HandleBindlessIndirectRead(const CbufNode& cbuf,
- const OperationNode& operation,
- Node gpr, Node base_offset,
- Node tracked, const NodeBlock& code,
- s64 cursor);
-
- std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const;
-
- std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code,
- s64 cursor) const;
-
- std::tuple<Node, Node, GlobalMemoryBase> TrackGlobalMemory(NodeBlock& bb,
- Tegra::Shader::Instruction instr,
- bool is_read, bool is_write);
-
- /// Register new amending code and obtain the reference id.
- std::size_t DeclareAmend(Node new_amend);
-
- u32 NewCustomVariable();
-
- const ProgramCode& program_code;
- const u32 main_offset;
- const CompilerSettings settings;
- Registry& registry;
-
- bool decompiled{};
- bool disable_flow_stack{};
-
- u32 coverage_begin{};
- u32 coverage_end{};
-
- std::map<u32, NodeBlock> basic_blocks;
- NodeBlock global_code;
- ASTManager program_manager{true, true};
- std::vector<Node> amend_code;
- u32 num_custom_variables{};
-
- std::set<u32> used_registers;
- std::set<Tegra::Shader::Pred> used_predicates;
- std::set<Tegra::Shader::Attribute::Index> used_input_attributes;
- std::set<Tegra::Shader::Attribute::Index> used_output_attributes;
- std::map<u32, ConstBuffer> used_cbufs;
- std::list<SamplerEntry> used_samplers;
- std::list<ImageEntry> used_images;
- std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{};
- std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory;
- bool uses_layer{};
- bool uses_viewport_index{};
- bool uses_point_size{};
- bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes
- bool uses_instance_id{};
- bool uses_vertex_id{};
- bool uses_legacy_varyings{};
- bool uses_y_negate{};
- bool uses_warps{};
- bool uses_indexed_samplers{};
-
- Tegra::Shader::Header header;
-};
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
deleted file mode 100644
index 6be3ea92b..000000000
--- a/src/video_core/shader/track.cpp
+++ /dev/null
@@ -1,236 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <utility>
-#include <variant>
-
-#include "common/common_types.h"
-#include "video_core/shader/node.h"
-#include "video_core/shader/node_helper.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-namespace {
-
-std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
- OperationCode operation_code) {
- for (; cursor >= 0; --cursor) {
- Node node = code.at(cursor);
-
- if (const auto operation = std::get_if<OperationNode>(&*node)) {
- if (operation->GetCode() == operation_code) {
- return {std::move(node), cursor};
- }
- }
-
- if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
- const auto& conditional_code = conditional->GetCode();
- auto result = FindOperation(
- conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code);
- auto& found = result.first;
- if (found) {
- return {std::move(found), cursor};
- }
- }
- }
- return {};
-}
-
-std::optional<std::pair<Node, Node>> DecoupleIndirectRead(const OperationNode& operation) {
- if (operation.GetCode() != OperationCode::UAdd) {
- return std::nullopt;
- }
- Node gpr;
- Node offset;
- ASSERT(operation.GetOperandsCount() == 2);
- for (std::size_t i = 0; i < operation.GetOperandsCount(); i++) {
- Node operand = operation[i];
- if (std::holds_alternative<ImmediateNode>(*operand)) {
- offset = operation[i];
- } else if (std::holds_alternative<GprNode>(*operand)) {
- gpr = operation[i];
- }
- }
- if (offset && gpr) {
- return std::make_pair(gpr, offset);
- }
- return std::nullopt;
-}
-
-bool AmendNodeCv(std::size_t amend_index, Node node) {
- if (const auto operation = std::get_if<OperationNode>(&*node)) {
- operation->SetAmendIndex(amend_index);
- return true;
- }
- if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
- conditional->SetAmendIndex(amend_index);
- return true;
- }
- return false;
-}
-
-} // Anonymous namespace
-
-std::pair<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code,
- s64 cursor) {
- if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) {
- const u32 cbuf_index = cbuf->GetIndex();
-
- // Constant buffer found, test if it's an immediate
- const auto& offset = cbuf->GetOffset();
- if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
- auto track = MakeTrackSampler<BindlessSamplerNode>(cbuf_index, immediate->GetValue());
- return {tracked, track};
- }
- if (const auto operation = std::get_if<OperationNode>(&*offset)) {
- const u32 bound_buffer = registry.GetBoundBuffer();
- if (bound_buffer != cbuf_index) {
- return {};
- }
- if (const std::optional pair = DecoupleIndirectRead(*operation)) {
- auto [gpr, base_offset] = *pair;
- return HandleBindlessIndirectRead(*cbuf, *operation, gpr, base_offset, tracked,
- code, cursor);
- }
- }
- return {};
- }
- if (const auto gpr = std::get_if<GprNode>(&*tracked)) {
- if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) {
- return {};
- }
- // Reduce the cursor in one to avoid infinite loops when the instruction sets the same
- // register that it uses as operand
- const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1);
- if (!source) {
- return {};
- }
- return TrackBindlessSampler(source, code, new_cursor);
- }
- if (const auto operation = std::get_if<OperationNode>(&*tracked)) {
- const OperationNode& op = *operation;
-
- const OperationCode opcode = operation->GetCode();
- if (opcode == OperationCode::IBitwiseOr || opcode == OperationCode::UBitwiseOr) {
- ASSERT(op.GetOperandsCount() == 2);
- auto [node_a, index_a, offset_a] = TrackCbuf(op[0], code, cursor);
- auto [node_b, index_b, offset_b] = TrackCbuf(op[1], code, cursor);
- if (node_a && node_b) {
- auto track = MakeTrackSampler<SeparateSamplerNode>(std::pair{index_a, index_b},
- std::pair{offset_a, offset_b});
- return {tracked, std::move(track)};
- }
- }
- std::size_t i = op.GetOperandsCount();
- while (i--) {
- if (auto found = TrackBindlessSampler(op[i - 1], code, cursor); std::get<0>(found)) {
- // Constant buffer found in operand.
- return found;
- }
- }
- return {};
- }
- if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) {
- const auto& conditional_code = conditional->GetCode();
- return TrackBindlessSampler(tracked, conditional_code,
- static_cast<s64>(conditional_code.size()));
- }
- return {};
-}
-
-std::pair<Node, TrackSampler> ShaderIR::HandleBindlessIndirectRead(
- const CbufNode& cbuf, const OperationNode& operation, Node gpr, Node base_offset, Node tracked,
- const NodeBlock& code, s64 cursor) {
- const auto offset_imm = std::get<ImmediateNode>(*base_offset);
- const auto& gpu_driver = registry.AccessGuestDriverProfile();
- const u32 bindless_cv = NewCustomVariable();
- const u32 texture_handler_size = gpu_driver.GetTextureHandlerSize();
- Node op = Operation(OperationCode::UDiv, gpr, Immediate(texture_handler_size));
-
- Node cv_node = GetCustomVariable(bindless_cv);
- Node amend_op = Operation(OperationCode::Assign, std::move(cv_node), std::move(op));
- const std::size_t amend_index = DeclareAmend(std::move(amend_op));
- AmendNodeCv(amend_index, code[cursor]);
-
- // TODO: Implement bindless index custom variable
- auto track =
- MakeTrackSampler<ArraySamplerNode>(cbuf.GetIndex(), offset_imm.GetValue(), bindless_cv);
- return {tracked, track};
-}
-
-std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code,
- s64 cursor) const {
- if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) {
- // Constant buffer found, test if it's an immediate
- const auto& offset = cbuf->GetOffset();
- if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
- return {tracked, cbuf->GetIndex(), immediate->GetValue()};
- }
- return {};
- }
- if (const auto gpr = std::get_if<GprNode>(&*tracked)) {
- if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) {
- return {};
- }
- // Reduce the cursor in one to avoid infinite loops when the instruction sets the same
- // register that it uses as operand
- const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1);
- if (!source) {
- return {};
- }
- return TrackCbuf(source, code, new_cursor);
- }
- if (const auto operation = std::get_if<OperationNode>(&*tracked)) {
- for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) {
- if (auto found = TrackCbuf((*operation)[i - 1], code, cursor); std::get<0>(found)) {
- // Cbuf found in operand.
- return found;
- }
- }
- return {};
- }
- if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) {
- const auto& conditional_code = conditional->GetCode();
- return TrackCbuf(tracked, conditional_code, static_cast<s64>(conditional_code.size()));
- }
- return {};
-}
-
-std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const {
- // Reduce the cursor in one to avoid infinite loops when the instruction sets the same register
- // that it uses as operand
- const auto result = TrackRegister(&std::get<GprNode>(*tracked), code, cursor - 1);
- const auto& found = result.first;
- if (!found) {
- return std::nullopt;
- }
- if (const auto immediate = std::get_if<ImmediateNode>(&*found)) {
- return immediate->GetValue();
- }
- return std::nullopt;
-}
-
-std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code,
- s64 cursor) const {
- for (; cursor >= 0; --cursor) {
- const auto [found_node, new_cursor] = FindOperation(code, cursor, OperationCode::Assign);
- if (!found_node) {
- return {};
- }
- const auto operation = std::get_if<OperationNode>(&*found_node);
- ASSERT(operation);
-
- const auto& target = (*operation)[0];
- if (const auto gpr_target = std::get_if<GprNode>(&*target)) {
- if (gpr_target->GetIndex() == tracked->GetIndex()) {
- return {(*operation)[1], new_cursor};
- }
- }
- }
- return {};
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/transform_feedback.cpp b/src/video_core/shader/transform_feedback.cpp
deleted file mode 100644
index 22a933761..000000000
--- a/src/video_core/shader/transform_feedback.cpp
+++ /dev/null
@@ -1,115 +0,0 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <array>
-#include <unordered_map>
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/shader/registry.h"
-#include "video_core/shader/transform_feedback.h"
-
-namespace VideoCommon::Shader {
-
-namespace {
-
-using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-
-// TODO(Rodrigo): Change this to constexpr std::unordered_set in C++20
-
-/// Attribute offsets that describe a vector
-constexpr std::array VECTORS = {
- 28, // gl_Position
- 32, // Generic 0
- 36, // Generic 1
- 40, // Generic 2
- 44, // Generic 3
- 48, // Generic 4
- 52, // Generic 5
- 56, // Generic 6
- 60, // Generic 7
- 64, // Generic 8
- 68, // Generic 9
- 72, // Generic 10
- 76, // Generic 11
- 80, // Generic 12
- 84, // Generic 13
- 88, // Generic 14
- 92, // Generic 15
- 96, // Generic 16
- 100, // Generic 17
- 104, // Generic 18
- 108, // Generic 19
- 112, // Generic 20
- 116, // Generic 21
- 120, // Generic 22
- 124, // Generic 23
- 128, // Generic 24
- 132, // Generic 25
- 136, // Generic 26
- 140, // Generic 27
- 144, // Generic 28
- 148, // Generic 29
- 152, // Generic 30
- 156, // Generic 31
- 160, // gl_FrontColor
- 164, // gl_FrontSecondaryColor
- 160, // gl_BackColor
- 164, // gl_BackSecondaryColor
- 192, // gl_TexCoord[0]
- 196, // gl_TexCoord[1]
- 200, // gl_TexCoord[2]
- 204, // gl_TexCoord[3]
- 208, // gl_TexCoord[4]
- 212, // gl_TexCoord[5]
- 216, // gl_TexCoord[6]
- 220, // gl_TexCoord[7]
-};
-} // namespace
-
-std::unordered_map<u8, VaryingTFB> BuildTransformFeedback(const GraphicsInfo& info) {
-
- std::unordered_map<u8, VaryingTFB> tfb;
-
- for (std::size_t buffer = 0; buffer < Maxwell::NumTransformFeedbackBuffers; ++buffer) {
- const auto& locations = info.tfb_varying_locs[buffer];
- const auto& layout = info.tfb_layouts[buffer];
- const std::size_t varying_count = layout.varying_count;
-
- std::size_t highest = 0;
-
- for (std::size_t offset = 0; offset < varying_count; ++offset) {
- const std::size_t base_offset = offset;
- const u8 location = locations[offset];
-
- VaryingTFB varying;
- varying.buffer = layout.stream;
- varying.stride = layout.stride;
- varying.offset = offset * sizeof(u32);
- varying.components = 1;
-
- if (std::find(VECTORS.begin(), VECTORS.end(), location / 4 * 4) != VECTORS.end()) {
- UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB");
-
- const u8 base_index = location / 4;
- while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) {
- ++offset;
- ++varying.components;
- }
- }
-
- [[maybe_unused]] const bool inserted = tfb.emplace(location, varying).second;
- UNIMPLEMENTED_IF_MSG(!inserted, "Varying already stored");
-
- highest = std::max(highest, (base_offset + varying.components) * sizeof(u32));
- }
-
- UNIMPLEMENTED_IF(highest != layout.stride);
- }
- return tfb;
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/transform_feedback.h b/src/video_core/shader/transform_feedback.h
deleted file mode 100644
index 77d05f64c..000000000
--- a/src/video_core/shader/transform_feedback.h
+++ /dev/null
@@ -1,23 +0,0 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <unordered_map>
-
-#include "common/common_types.h"
-#include "video_core/shader/registry.h"
-
-namespace VideoCommon::Shader {
-
-struct VaryingTFB {
- std::size_t buffer;
- std::size_t stride;
- std::size_t offset;
- std::size_t components;
-};
-
-std::unordered_map<u8, VaryingTFB> BuildTransformFeedback(const GraphicsInfo& info);
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp
new file mode 100644
index 000000000..78bf90c48
--- /dev/null
+++ b/src/video_core/shader_cache.cpp
@@ -0,0 +1,250 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <array>
+#include <vector>
+
+#include "common/assert.h"
+#include "shader_recompiler/frontend/maxwell/control_flow.h"
+#include "shader_recompiler/object_pool.h"
+#include "video_core/dirty_flags.h"
+#include "video_core/engines/kepler_compute.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/memory_manager.h"
+#include "video_core/shader_cache.h"
+#include "video_core/shader_environment.h"
+
+namespace VideoCommon {
+
+void ShaderCache::InvalidateRegion(VAddr addr, size_t size) {
+ std::scoped_lock lock{invalidation_mutex};
+ InvalidatePagesInRegion(addr, size);
+ RemovePendingShaders();
+}
+
+void ShaderCache::OnCPUWrite(VAddr addr, size_t size) {
+ std::lock_guard lock{invalidation_mutex};
+ InvalidatePagesInRegion(addr, size);
+}
+
+void ShaderCache::SyncGuestHost() {
+ std::scoped_lock lock{invalidation_mutex};
+ RemovePendingShaders();
+}
+
+ShaderCache::ShaderCache(VideoCore::RasterizerInterface& rasterizer_,
+ Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_,
+ Tegra::Engines::KeplerCompute& kepler_compute_)
+ : gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_},
+ rasterizer{rasterizer_} {}
+
+bool ShaderCache::RefreshStages(std::array<u64, 6>& unique_hashes) {
+ auto& dirty{maxwell3d.dirty.flags};
+ if (!dirty[VideoCommon::Dirty::Shaders]) {
+ return last_shaders_valid;
+ }
+ dirty[VideoCommon::Dirty::Shaders] = false;
+
+ const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()};
+ for (size_t index = 0; index < Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram; ++index) {
+ if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
+ unique_hashes[index] = 0;
+ continue;
+ }
+ const auto& shader_config{maxwell3d.regs.shader_config[index]};
+ const auto program{static_cast<Tegra::Engines::Maxwell3D::Regs::ShaderProgram>(index)};
+ const GPUVAddr shader_addr{base_addr + shader_config.offset};
+ const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)};
+ if (!cpu_shader_addr) {
+ LOG_ERROR(HW_GPU, "Invalid GPU address for shader 0x{:016x}", shader_addr);
+ last_shaders_valid = false;
+ return false;
+ }
+ const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)};
+ if (!shader_info) {
+ const u32 start_address{shader_config.offset};
+ GraphicsEnvironment env{maxwell3d, gpu_memory, program, base_addr, start_address};
+ shader_info = MakeShaderInfo(env, *cpu_shader_addr);
+ }
+ shader_infos[index] = shader_info;
+ unique_hashes[index] = shader_info->unique_hash;
+ }
+ last_shaders_valid = true;
+ return true;
+}
+
+const ShaderInfo* ShaderCache::ComputeShader() {
+ const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()};
+ const auto& qmd{kepler_compute.launch_description};
+ const GPUVAddr shader_addr{program_base + qmd.program_start};
+ const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)};
+ if (!cpu_shader_addr) {
+ LOG_ERROR(HW_GPU, "Invalid GPU address for shader 0x{:016x}", shader_addr);
+ return nullptr;
+ }
+ if (const ShaderInfo* const shader = TryGet(*cpu_shader_addr)) {
+ return shader;
+ }
+ ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start};
+ return MakeShaderInfo(env, *cpu_shader_addr);
+}
+
+void ShaderCache::GetGraphicsEnvironments(GraphicsEnvironments& result,
+ const std::array<u64, NUM_PROGRAMS>& unique_hashes) {
+ size_t env_index{};
+ const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()};
+ for (size_t index = 0; index < NUM_PROGRAMS; ++index) {
+ if (unique_hashes[index] == 0) {
+ continue;
+ }
+ const auto program{static_cast<Tegra::Engines::Maxwell3D::Regs::ShaderProgram>(index)};
+ auto& env{result.envs[index]};
+ const u32 start_address{maxwell3d.regs.shader_config[index].offset};
+ env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address};
+ env.SetCachedSize(shader_infos[index]->size_bytes);
+ result.env_ptrs[env_index++] = &env;
+ }
+}
+
+ShaderInfo* ShaderCache::TryGet(VAddr addr) const {
+ std::scoped_lock lock{lookup_mutex};
+
+ const auto it = lookup_cache.find(addr);
+ if (it == lookup_cache.end()) {
+ return nullptr;
+ }
+ return it->second->data;
+}
+
+void ShaderCache::Register(std::unique_ptr<ShaderInfo> data, VAddr addr, size_t size) {
+ std::scoped_lock lock{invalidation_mutex, lookup_mutex};
+
+ const VAddr addr_end = addr + size;
+ Entry* const entry = NewEntry(addr, addr_end, data.get());
+
+ const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS;
+ for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) {
+ invalidation_cache[page].push_back(entry);
+ }
+
+ storage.push_back(std::move(data));
+
+ rasterizer.UpdatePagesCachedCount(addr, size, 1);
+}
+
+void ShaderCache::InvalidatePagesInRegion(VAddr addr, size_t size) {
+ const VAddr addr_end = addr + size;
+ const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS;
+ for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) {
+ auto it = invalidation_cache.find(page);
+ if (it == invalidation_cache.end()) {
+ continue;
+ }
+ InvalidatePageEntries(it->second, addr, addr_end);
+ }
+}
+
+void ShaderCache::RemovePendingShaders() {
+ if (marked_for_removal.empty()) {
+ return;
+ }
+ // Remove duplicates
+ std::ranges::sort(marked_for_removal);
+ marked_for_removal.erase(std::unique(marked_for_removal.begin(), marked_for_removal.end()),
+ marked_for_removal.end());
+
+ std::vector<ShaderInfo*> removed_shaders;
+ removed_shaders.reserve(marked_for_removal.size());
+
+ std::scoped_lock lock{lookup_mutex};
+
+ for (Entry* const entry : marked_for_removal) {
+ removed_shaders.push_back(entry->data);
+
+ const auto it = lookup_cache.find(entry->addr_start);
+ ASSERT(it != lookup_cache.end());
+ lookup_cache.erase(it);
+ }
+ marked_for_removal.clear();
+
+ if (!removed_shaders.empty()) {
+ RemoveShadersFromStorage(std::move(removed_shaders));
+ }
+}
+
+void ShaderCache::InvalidatePageEntries(std::vector<Entry*>& entries, VAddr addr, VAddr addr_end) {
+ size_t index = 0;
+ while (index < entries.size()) {
+ Entry* const entry = entries[index];
+ if (!entry->Overlaps(addr, addr_end)) {
+ ++index;
+ continue;
+ }
+
+ UnmarkMemory(entry);
+ RemoveEntryFromInvalidationCache(entry);
+ marked_for_removal.push_back(entry);
+ }
+}
+
+void ShaderCache::RemoveEntryFromInvalidationCache(const Entry* entry) {
+ const u64 page_end = (entry->addr_end + PAGE_SIZE - 1) >> PAGE_BITS;
+ for (u64 page = entry->addr_start >> PAGE_BITS; page < page_end; ++page) {
+ const auto entries_it = invalidation_cache.find(page);
+ ASSERT(entries_it != invalidation_cache.end());
+ std::vector<Entry*>& entries = entries_it->second;
+
+ const auto entry_it = std::ranges::find(entries, entry);
+ ASSERT(entry_it != entries.end());
+ entries.erase(entry_it);
+ }
+}
+
+void ShaderCache::UnmarkMemory(Entry* entry) {
+ if (!entry->is_memory_marked) {
+ return;
+ }
+ entry->is_memory_marked = false;
+
+ const VAddr addr = entry->addr_start;
+ const size_t size = entry->addr_end - addr;
+ rasterizer.UpdatePagesCachedCount(addr, size, -1);
+}
+
+void ShaderCache::RemoveShadersFromStorage(std::vector<ShaderInfo*> removed_shaders) {
+ // Remove them from the cache
+ std::erase_if(storage, [&removed_shaders](const std::unique_ptr<ShaderInfo>& shader) {
+ return std::ranges::find(removed_shaders, shader.get()) != removed_shaders.end();
+ });
+}
+
+ShaderCache::Entry* ShaderCache::NewEntry(VAddr addr, VAddr addr_end, ShaderInfo* data) {
+ auto entry = std::make_unique<Entry>(Entry{addr, addr_end, data});
+ Entry* const entry_pointer = entry.get();
+
+ lookup_cache.emplace(addr, std::move(entry));
+ return entry_pointer;
+}
+
+const ShaderInfo* ShaderCache::MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr) {
+ auto info = std::make_unique<ShaderInfo>();
+ if (const std::optional<u64> cached_hash{env.Analyze()}) {
+ info->unique_hash = *cached_hash;
+ info->size_bytes = env.CachedSize();
+ } else {
+ // Slow path, not really hit on commercial games
+ // Build a control flow graph to get the real shader size
+ Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block;
+ Shader::Maxwell::Flow::CFG cfg{env, flow_block, env.StartAddress()};
+ info->unique_hash = env.CalculateHash();
+ info->size_bytes = env.ReadSize();
+ }
+ const size_t size_bytes{info->size_bytes};
+ const ShaderInfo* const result{info.get()};
+ Register(std::move(info), cpu_addr, size_bytes);
+ return result;
+}
+
+} // namespace VideoCommon
diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h
index 015a789d6..136fe294c 100644
--- a/src/video_core/shader_cache.h
+++ b/src/video_core/shader_cache.h
@@ -5,226 +5,147 @@
#pragma once
#include <algorithm>
+#include <array>
#include <memory>
#include <mutex>
+#include <span>
#include <unordered_map>
#include <utility>
#include <vector>
-#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/rasterizer_interface.h"
+#include "video_core/shader_environment.h"
+
+namespace Tegra {
+class MemoryManager;
+}
namespace VideoCommon {
-template <class T>
+class GenericEnvironment;
+
+struct ShaderInfo {
+ u64 unique_hash{};
+ size_t size_bytes{};
+};
+
class ShaderCache {
static constexpr u64 PAGE_BITS = 14;
static constexpr u64 PAGE_SIZE = u64(1) << PAGE_BITS;
+ static constexpr size_t NUM_PROGRAMS = 6;
+
struct Entry {
VAddr addr_start;
VAddr addr_end;
- T* data;
+ ShaderInfo* data;
bool is_memory_marked = true;
- constexpr bool Overlaps(VAddr start, VAddr end) const noexcept {
+ bool Overlaps(VAddr start, VAddr end) const noexcept {
return start < addr_end && addr_start < end;
}
};
public:
- virtual ~ShaderCache() = default;
-
/// @brief Removes shaders inside a given region
/// @note Checks for ranges
/// @param addr Start address of the invalidation
/// @param size Number of bytes of the invalidation
- void InvalidateRegion(VAddr addr, std::size_t size) {
- std::scoped_lock lock{invalidation_mutex};
- InvalidatePagesInRegion(addr, size);
- RemovePendingShaders();
- }
+ void InvalidateRegion(VAddr addr, size_t size);
/// @brief Unmarks a memory region as cached and marks it for removal
/// @param addr Start address of the CPU write operation
/// @param size Number of bytes of the CPU write operation
- void OnCPUWrite(VAddr addr, std::size_t size) {
- std::lock_guard lock{invalidation_mutex};
- InvalidatePagesInRegion(addr, size);
- }
+ void OnCPUWrite(VAddr addr, size_t size);
/// @brief Flushes delayed removal operations
- void SyncGuestHost() {
- std::scoped_lock lock{invalidation_mutex};
- RemovePendingShaders();
- }
+ void SyncGuestHost();
- /// @brief Tries to obtain a cached shader starting in a given address
- /// @note Doesn't check for ranges, the given address has to be the start of the shader
- /// @param addr Start address of the shader, this doesn't cache for region
- /// @return Pointer to a valid shader, nullptr when nothing is found
- T* TryGet(VAddr addr) const {
- std::scoped_lock lock{lookup_mutex};
+protected:
+ struct GraphicsEnvironments {
+ std::array<GraphicsEnvironment, NUM_PROGRAMS> envs;
+ std::array<Shader::Environment*, NUM_PROGRAMS> env_ptrs;
- const auto it = lookup_cache.find(addr);
- if (it == lookup_cache.end()) {
- return nullptr;
+ std::span<Shader::Environment* const> Span() const noexcept {
+ return std::span(env_ptrs.begin(), std::ranges::find(env_ptrs, nullptr));
}
- return it->second->data;
- }
-
-protected:
- explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_) : rasterizer{rasterizer_} {}
+ };
- /// @brief Register in the cache a given entry
- /// @param data Shader to store in the cache
- /// @param addr Start address of the shader that will be registered
- /// @param size Size in bytes of the shader
- void Register(std::unique_ptr<T> data, VAddr addr, std::size_t size) {
- std::scoped_lock lock{invalidation_mutex, lookup_mutex};
+ explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_,
+ Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_,
+ Tegra::Engines::KeplerCompute& kepler_compute_);
- const VAddr addr_end = addr + size;
- Entry* const entry = NewEntry(addr, addr_end, data.get());
+ /// @brief Update the hashes and information of shader stages
+ /// @param unique_hashes Shader hashes to store into when a stage is enabled
+ /// @return True no success, false on error
+ bool RefreshStages(std::array<u64, NUM_PROGRAMS>& unique_hashes);
- const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS;
- for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) {
- invalidation_cache[page].push_back(entry);
- }
+ /// @brief Returns information about the current compute shader
+ /// @return Pointer to a valid shader, nullptr on error
+ const ShaderInfo* ComputeShader();
- storage.push_back(std::move(data));
+ /// @brief Collect the current graphics environments
+ void GetGraphicsEnvironments(GraphicsEnvironments& result,
+ const std::array<u64, NUM_PROGRAMS>& unique_hashes);
- rasterizer.UpdatePagesCachedCount(addr, size, 1);
- }
+ Tegra::MemoryManager& gpu_memory;
+ Tegra::Engines::Maxwell3D& maxwell3d;
+ Tegra::Engines::KeplerCompute& kepler_compute;
- /// @brief Called when a shader is going to be removed
- /// @param shader Shader that will be removed
- /// @pre invalidation_cache is locked
- /// @pre lookup_mutex is locked
- virtual void OnShaderRemoval([[maybe_unused]] T* shader) {}
+ std::array<const ShaderInfo*, NUM_PROGRAMS> shader_infos{};
+ bool last_shaders_valid = false;
private:
+ /// @brief Tries to obtain a cached shader starting in a given address
+ /// @note Doesn't check for ranges, the given address has to be the start of the shader
+ /// @param addr Start address of the shader, this doesn't cache for region
+ /// @return Pointer to a valid shader, nullptr when nothing is found
+ ShaderInfo* TryGet(VAddr addr) const;
+
+ /// @brief Register in the cache a given entry
+ /// @param data Shader to store in the cache
+ /// @param addr Start address of the shader that will be registered
+ /// @param size Size in bytes of the shader
+ void Register(std::unique_ptr<ShaderInfo> data, VAddr addr, size_t size);
+
/// @brief Invalidate pages in a given region
/// @pre invalidation_mutex is locked
- void InvalidatePagesInRegion(VAddr addr, std::size_t size) {
- const VAddr addr_end = addr + size;
- const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS;
- for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) {
- auto it = invalidation_cache.find(page);
- if (it == invalidation_cache.end()) {
- continue;
- }
- InvalidatePageEntries(it->second, addr, addr_end);
- }
- }
+ void InvalidatePagesInRegion(VAddr addr, size_t size);
/// @brief Remove shaders marked for deletion
/// @pre invalidation_mutex is locked
- void RemovePendingShaders() {
- if (marked_for_removal.empty()) {
- return;
- }
- // Remove duplicates
- std::sort(marked_for_removal.begin(), marked_for_removal.end());
- marked_for_removal.erase(std::unique(marked_for_removal.begin(), marked_for_removal.end()),
- marked_for_removal.end());
-
- std::vector<T*> removed_shaders;
- removed_shaders.reserve(marked_for_removal.size());
-
- std::scoped_lock lock{lookup_mutex};
-
- for (Entry* const entry : marked_for_removal) {
- removed_shaders.push_back(entry->data);
-
- const auto it = lookup_cache.find(entry->addr_start);
- ASSERT(it != lookup_cache.end());
- lookup_cache.erase(it);
- }
- marked_for_removal.clear();
-
- if (!removed_shaders.empty()) {
- RemoveShadersFromStorage(std::move(removed_shaders));
- }
- }
+ void RemovePendingShaders();
/// @brief Invalidates entries in a given range for the passed page
/// @param entries Vector of entries in the page, it will be modified on overlaps
/// @param addr Start address of the invalidation
/// @param addr_end Non-inclusive end address of the invalidation
/// @pre invalidation_mutex is locked
- void InvalidatePageEntries(std::vector<Entry*>& entries, VAddr addr, VAddr addr_end) {
- std::size_t index = 0;
- while (index < entries.size()) {
- Entry* const entry = entries[index];
- if (!entry->Overlaps(addr, addr_end)) {
- ++index;
- continue;
- }
-
- UnmarkMemory(entry);
- RemoveEntryFromInvalidationCache(entry);
- marked_for_removal.push_back(entry);
- }
- }
+ void InvalidatePageEntries(std::vector<Entry*>& entries, VAddr addr, VAddr addr_end);
/// @brief Removes all references to an entry in the invalidation cache
/// @param entry Entry to remove from the invalidation cache
/// @pre invalidation_mutex is locked
- void RemoveEntryFromInvalidationCache(const Entry* entry) {
- const u64 page_end = (entry->addr_end + PAGE_SIZE - 1) >> PAGE_BITS;
- for (u64 page = entry->addr_start >> PAGE_BITS; page < page_end; ++page) {
- const auto entries_it = invalidation_cache.find(page);
- ASSERT(entries_it != invalidation_cache.end());
- std::vector<Entry*>& entries = entries_it->second;
-
- const auto entry_it = std::find(entries.begin(), entries.end(), entry);
- ASSERT(entry_it != entries.end());
- entries.erase(entry_it);
- }
- }
+ void RemoveEntryFromInvalidationCache(const Entry* entry);
/// @brief Unmarks an entry from the rasterizer cache
/// @param entry Entry to unmark from memory
- void UnmarkMemory(Entry* entry) {
- if (!entry->is_memory_marked) {
- return;
- }
- entry->is_memory_marked = false;
-
- const VAddr addr = entry->addr_start;
- const std::size_t size = entry->addr_end - addr;
- rasterizer.UpdatePagesCachedCount(addr, size, -1);
- }
+ void UnmarkMemory(Entry* entry);
/// @brief Removes a vector of shaders from a list
/// @param removed_shaders Shaders to be removed from the storage
/// @pre invalidation_mutex is locked
/// @pre lookup_mutex is locked
- void RemoveShadersFromStorage(std::vector<T*> removed_shaders) {
- // Notify removals
- for (T* const shader : removed_shaders) {
- OnShaderRemoval(shader);
- }
-
- // Remove them from the cache
- const auto is_removed = [&removed_shaders](const std::unique_ptr<T>& shader) {
- return std::find(removed_shaders.begin(), removed_shaders.end(), shader.get()) !=
- removed_shaders.end();
- };
- std::erase_if(storage, is_removed);
- }
+ void RemoveShadersFromStorage(std::vector<ShaderInfo*> removed_shaders);
/// @brief Creates a new entry in the lookup cache and returns its pointer
/// @pre lookup_mutex is locked
- Entry* NewEntry(VAddr addr, VAddr addr_end, T* data) {
- auto entry = std::make_unique<Entry>(Entry{addr, addr_end, data});
- Entry* const entry_pointer = entry.get();
+ Entry* NewEntry(VAddr addr, VAddr addr_end, ShaderInfo* data);
- lookup_cache.emplace(addr, std::move(entry));
- return entry_pointer;
- }
+ /// @brief Create a new shader entry and register it
+ const ShaderInfo* MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr);
VideoCore::RasterizerInterface& rasterizer;
@@ -233,7 +154,7 @@ private:
std::unordered_map<u64, std::unique_ptr<Entry>> lookup_cache;
std::unordered_map<u64, std::vector<Entry*>> invalidation_cache;
- std::vector<std::unique_ptr<T>> storage;
+ std::vector<std::unique_ptr<ShaderInfo>> storage;
std::vector<Entry*> marked_for_removal;
};
diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp
new file mode 100644
index 000000000..8a4581c19
--- /dev/null
+++ b/src/video_core/shader_environment.cpp
@@ -0,0 +1,460 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <filesystem>
+#include <fstream>
+#include <memory>
+#include <optional>
+#include <utility>
+
+#include "common/assert.h"
+#include "common/cityhash.h"
+#include "common/common_types.h"
+#include "common/div_ceil.h"
+#include "common/fs/fs.h"
+#include "common/logging/log.h"
+#include "shader_recompiler/environment.h"
+#include "video_core/memory_manager.h"
+#include "video_core/shader_environment.h"
+#include "video_core/textures/texture.h"
+
+namespace VideoCommon {
+
+constexpr std::array<char, 8> MAGIC_NUMBER{'y', 'u', 'z', 'u', 'c', 'a', 'c', 'h'};
+
+constexpr size_t INST_SIZE = sizeof(u64);
+
+using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+
+static u64 MakeCbufKey(u32 index, u32 offset) {
+ return (static_cast<u64>(index) << 32) | offset;
+}
+
+static Shader::TextureType ConvertType(const Tegra::Texture::TICEntry& entry) {
+ switch (entry.texture_type) {
+ case Tegra::Texture::TextureType::Texture1D:
+ return Shader::TextureType::Color1D;
+ case Tegra::Texture::TextureType::Texture2D:
+ case Tegra::Texture::TextureType::Texture2DNoMipmap:
+ return Shader::TextureType::Color2D;
+ case Tegra::Texture::TextureType::Texture3D:
+ return Shader::TextureType::Color3D;
+ case Tegra::Texture::TextureType::TextureCubemap:
+ return Shader::TextureType::ColorCube;
+ case Tegra::Texture::TextureType::Texture1DArray:
+ return Shader::TextureType::ColorArray1D;
+ case Tegra::Texture::TextureType::Texture2DArray:
+ return Shader::TextureType::ColorArray2D;
+ case Tegra::Texture::TextureType::Texture1DBuffer:
+ return Shader::TextureType::Buffer;
+ case Tegra::Texture::TextureType::TextureCubeArray:
+ return Shader::TextureType::ColorArrayCube;
+ default:
+ throw Shader::NotImplementedException("Unknown texture type");
+ }
+}
+
+GenericEnvironment::GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_,
+ u32 start_address_)
+ : gpu_memory{&gpu_memory_}, program_base{program_base_} {
+ start_address = start_address_;
+}
+
+GenericEnvironment::~GenericEnvironment() = default;
+
+u32 GenericEnvironment::TextureBoundBuffer() const {
+ return texture_bound;
+}
+
+u32 GenericEnvironment::LocalMemorySize() const {
+ return local_memory_size;
+}
+
+u32 GenericEnvironment::SharedMemorySize() const {
+ return shared_memory_size;
+}
+
+std::array<u32, 3> GenericEnvironment::WorkgroupSize() const {
+ return workgroup_size;
+}
+
+u64 GenericEnvironment::ReadInstruction(u32 address) {
+ read_lowest = std::min(read_lowest, address);
+ read_highest = std::max(read_highest, address);
+
+ if (address >= cached_lowest && address < cached_highest) {
+ return code[(address - cached_lowest) / INST_SIZE];
+ }
+ has_unbound_instructions = true;
+ return gpu_memory->Read<u64>(program_base + address);
+}
+
+std::optional<u64> GenericEnvironment::Analyze() {
+ const std::optional<u64> size{TryFindSize()};
+ if (!size) {
+ return std::nullopt;
+ }
+ cached_lowest = start_address;
+ cached_highest = start_address + static_cast<u32>(*size);
+ return Common::CityHash64(reinterpret_cast<const char*>(code.data()), *size);
+}
+
+void GenericEnvironment::SetCachedSize(size_t size_bytes) {
+ cached_lowest = start_address;
+ cached_highest = start_address + static_cast<u32>(size_bytes);
+ code.resize(CachedSize());
+ gpu_memory->ReadBlock(program_base + cached_lowest, code.data(), code.size() * sizeof(u64));
+}
+
+size_t GenericEnvironment::CachedSize() const noexcept {
+ return cached_highest - cached_lowest + INST_SIZE;
+}
+
+size_t GenericEnvironment::ReadSize() const noexcept {
+ return read_highest - read_lowest + INST_SIZE;
+}
+
+bool GenericEnvironment::CanBeSerialized() const noexcept {
+ return !has_unbound_instructions;
+}
+
+u64 GenericEnvironment::CalculateHash() const {
+ const size_t size{ReadSize()};
+ const auto data{std::make_unique<char[]>(size)};
+ gpu_memory->ReadBlock(program_base + read_lowest, data.get(), size);
+ return Common::CityHash64(data.get(), size);
+}
+
+void GenericEnvironment::Serialize(std::ofstream& file) const {
+ const u64 code_size{static_cast<u64>(CachedSize())};
+ const u64 num_texture_types{static_cast<u64>(texture_types.size())};
+ const u64 num_cbuf_values{static_cast<u64>(cbuf_values.size())};
+
+ file.write(reinterpret_cast<const char*>(&code_size), sizeof(code_size))
+ .write(reinterpret_cast<const char*>(&num_texture_types), sizeof(num_texture_types))
+ .write(reinterpret_cast<const char*>(&num_cbuf_values), sizeof(num_cbuf_values))
+ .write(reinterpret_cast<const char*>(&local_memory_size), sizeof(local_memory_size))
+ .write(reinterpret_cast<const char*>(&texture_bound), sizeof(texture_bound))
+ .write(reinterpret_cast<const char*>(&start_address), sizeof(start_address))
+ .write(reinterpret_cast<const char*>(&cached_lowest), sizeof(cached_lowest))
+ .write(reinterpret_cast<const char*>(&cached_highest), sizeof(cached_highest))
+ .write(reinterpret_cast<const char*>(&stage), sizeof(stage))
+ .write(reinterpret_cast<const char*>(code.data()), code_size);
+ for (const auto [key, type] : texture_types) {
+ file.write(reinterpret_cast<const char*>(&key), sizeof(key))
+ .write(reinterpret_cast<const char*>(&type), sizeof(type));
+ }
+ for (const auto [key, type] : cbuf_values) {
+ file.write(reinterpret_cast<const char*>(&key), sizeof(key))
+ .write(reinterpret_cast<const char*>(&type), sizeof(type));
+ }
+ if (stage == Shader::Stage::Compute) {
+ file.write(reinterpret_cast<const char*>(&workgroup_size), sizeof(workgroup_size))
+ .write(reinterpret_cast<const char*>(&shared_memory_size), sizeof(shared_memory_size));
+ } else {
+ file.write(reinterpret_cast<const char*>(&sph), sizeof(sph));
+ if (stage == Shader::Stage::Geometry) {
+ file.write(reinterpret_cast<const char*>(&gp_passthrough_mask),
+ sizeof(gp_passthrough_mask));
+ }
+ }
+}
+
+std::optional<u64> GenericEnvironment::TryFindSize() {
+ static constexpr size_t BLOCK_SIZE = 0x1000;
+ static constexpr size_t MAXIMUM_SIZE = 0x100000;
+
+ static constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL;
+ static constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL;
+
+ GPUVAddr guest_addr{program_base + start_address};
+ size_t offset{0};
+ size_t size{BLOCK_SIZE};
+ while (size <= MAXIMUM_SIZE) {
+ code.resize(size / INST_SIZE);
+ u64* const data = code.data() + offset / INST_SIZE;
+ gpu_memory->ReadBlock(guest_addr, data, BLOCK_SIZE);
+ for (size_t index = 0; index < BLOCK_SIZE; index += INST_SIZE) {
+ const u64 inst = data[index / INST_SIZE];
+ if (inst == SELF_BRANCH_A || inst == SELF_BRANCH_B) {
+ return offset + index;
+ }
+ }
+ guest_addr += BLOCK_SIZE;
+ size += BLOCK_SIZE;
+ offset += BLOCK_SIZE;
+ }
+ return std::nullopt;
+}
+
+Shader::TextureType GenericEnvironment::ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit,
+ bool via_header_index, u32 raw) {
+ const auto handle{Tegra::Texture::TexturePair(raw, via_header_index)};
+ const GPUVAddr descriptor_addr{tic_addr + handle.first * sizeof(Tegra::Texture::TICEntry)};
+ Tegra::Texture::TICEntry entry;
+ gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry));
+ const Shader::TextureType result{ConvertType(entry)};
+ texture_types.emplace(raw, result);
+ return result;
+}
+
+GraphicsEnvironment::GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_,
+ Tegra::MemoryManager& gpu_memory_,
+ Maxwell::ShaderProgram program, GPUVAddr program_base_,
+ u32 start_address_)
+ : GenericEnvironment{gpu_memory_, program_base_, start_address_}, maxwell3d{&maxwell3d_} {
+ gpu_memory->ReadBlock(program_base + start_address, &sph, sizeof(sph));
+ gp_passthrough_mask = maxwell3d->regs.gp_passthrough_mask;
+ switch (program) {
+ case Maxwell::ShaderProgram::VertexA:
+ stage = Shader::Stage::VertexA;
+ stage_index = 0;
+ break;
+ case Maxwell::ShaderProgram::VertexB:
+ stage = Shader::Stage::VertexB;
+ stage_index = 0;
+ break;
+ case Maxwell::ShaderProgram::TesselationControl:
+ stage = Shader::Stage::TessellationControl;
+ stage_index = 1;
+ break;
+ case Maxwell::ShaderProgram::TesselationEval:
+ stage = Shader::Stage::TessellationEval;
+ stage_index = 2;
+ break;
+ case Maxwell::ShaderProgram::Geometry:
+ stage = Shader::Stage::Geometry;
+ stage_index = 3;
+ break;
+ case Maxwell::ShaderProgram::Fragment:
+ stage = Shader::Stage::Fragment;
+ stage_index = 4;
+ break;
+ default:
+ UNREACHABLE_MSG("Invalid program={}", program);
+ break;
+ }
+ const u64 local_size{sph.LocalMemorySize()};
+ ASSERT(local_size <= std::numeric_limits<u32>::max());
+ local_memory_size = static_cast<u32>(local_size) + sph.common3.shader_local_memory_crs_size;
+ texture_bound = maxwell3d->regs.tex_cb_index;
+}
+
+u32 GraphicsEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) {
+ const auto& cbuf{maxwell3d->state.shader_stages[stage_index].const_buffers[cbuf_index]};
+ ASSERT(cbuf.enabled);
+ u32 value{};
+ if (cbuf_offset < cbuf.size) {
+ value = gpu_memory->Read<u32>(cbuf.address + cbuf_offset);
+ }
+ cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value);
+ return value;
+}
+
+Shader::TextureType GraphicsEnvironment::ReadTextureType(u32 handle) {
+ const auto& regs{maxwell3d->regs};
+ const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex};
+ return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, via_header_index, handle);
+}
+
+ComputeEnvironment::ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_,
+ Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_,
+ u32 start_address_)
+ : GenericEnvironment{gpu_memory_, program_base_, start_address_}, kepler_compute{
+ &kepler_compute_} {
+ const auto& qmd{kepler_compute->launch_description};
+ stage = Shader::Stage::Compute;
+ local_memory_size = qmd.local_pos_alloc + qmd.local_crs_alloc;
+ texture_bound = kepler_compute->regs.tex_cb_index;
+ shared_memory_size = qmd.shared_alloc;
+ workgroup_size = {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z};
+}
+
+u32 ComputeEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) {
+ const auto& qmd{kepler_compute->launch_description};
+ ASSERT(((qmd.const_buffer_enable_mask.Value() >> cbuf_index) & 1) != 0);
+ const auto& cbuf{qmd.const_buffer_config[cbuf_index]};
+ u32 value{};
+ if (cbuf_offset < cbuf.size) {
+ value = gpu_memory->Read<u32>(cbuf.Address() + cbuf_offset);
+ }
+ cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value);
+ return value;
+}
+
+Shader::TextureType ComputeEnvironment::ReadTextureType(u32 handle) {
+ const auto& regs{kepler_compute->regs};
+ const auto& qmd{kepler_compute->launch_description};
+ return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, handle);
+}
+
+void FileEnvironment::Deserialize(std::ifstream& file) {
+ u64 code_size{};
+ u64 num_texture_types{};
+ u64 num_cbuf_values{};
+ file.read(reinterpret_cast<char*>(&code_size), sizeof(code_size))
+ .read(reinterpret_cast<char*>(&num_texture_types), sizeof(num_texture_types))
+ .read(reinterpret_cast<char*>(&num_cbuf_values), sizeof(num_cbuf_values))
+ .read(reinterpret_cast<char*>(&local_memory_size), sizeof(local_memory_size))
+ .read(reinterpret_cast<char*>(&texture_bound), sizeof(texture_bound))
+ .read(reinterpret_cast<char*>(&start_address), sizeof(start_address))
+ .read(reinterpret_cast<char*>(&read_lowest), sizeof(read_lowest))
+ .read(reinterpret_cast<char*>(&read_highest), sizeof(read_highest))
+ .read(reinterpret_cast<char*>(&stage), sizeof(stage));
+ code = std::make_unique<u64[]>(Common::DivCeil(code_size, sizeof(u64)));
+ file.read(reinterpret_cast<char*>(code.get()), code_size);
+ for (size_t i = 0; i < num_texture_types; ++i) {
+ u32 key;
+ Shader::TextureType type;
+ file.read(reinterpret_cast<char*>(&key), sizeof(key))
+ .read(reinterpret_cast<char*>(&type), sizeof(type));
+ texture_types.emplace(key, type);
+ }
+ for (size_t i = 0; i < num_cbuf_values; ++i) {
+ u64 key;
+ u32 value;
+ file.read(reinterpret_cast<char*>(&key), sizeof(key))
+ .read(reinterpret_cast<char*>(&value), sizeof(value));
+ cbuf_values.emplace(key, value);
+ }
+ if (stage == Shader::Stage::Compute) {
+ file.read(reinterpret_cast<char*>(&workgroup_size), sizeof(workgroup_size))
+ .read(reinterpret_cast<char*>(&shared_memory_size), sizeof(shared_memory_size));
+ } else {
+ file.read(reinterpret_cast<char*>(&sph), sizeof(sph));
+ if (stage == Shader::Stage::Geometry) {
+ file.read(reinterpret_cast<char*>(&gp_passthrough_mask), sizeof(gp_passthrough_mask));
+ }
+ }
+}
+
+u64 FileEnvironment::ReadInstruction(u32 address) {
+ if (address < read_lowest || address > read_highest) {
+ throw Shader::LogicError("Out of bounds address {}", address);
+ }
+ return code[(address - read_lowest) / sizeof(u64)];
+}
+
+u32 FileEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) {
+ const auto it{cbuf_values.find(MakeCbufKey(cbuf_index, cbuf_offset))};
+ if (it == cbuf_values.end()) {
+ throw Shader::LogicError("Uncached read texture type");
+ }
+ return it->second;
+}
+
+Shader::TextureType FileEnvironment::ReadTextureType(u32 handle) {
+ const auto it{texture_types.find(handle)};
+ if (it == texture_types.end()) {
+ throw Shader::LogicError("Uncached read texture type");
+ }
+ return it->second;
+}
+
+u32 FileEnvironment::LocalMemorySize() const {
+ return local_memory_size;
+}
+
+u32 FileEnvironment::SharedMemorySize() const {
+ return shared_memory_size;
+}
+
+u32 FileEnvironment::TextureBoundBuffer() const {
+ return texture_bound;
+}
+
+std::array<u32, 3> FileEnvironment::WorkgroupSize() const {
+ return workgroup_size;
+}
+
+void SerializePipeline(std::span<const char> key, std::span<const GenericEnvironment* const> envs,
+ const std::filesystem::path& filename, u32 cache_version) try {
+ std::ofstream file(filename, std::ios::binary | std::ios::ate | std::ios::app);
+ file.exceptions(std::ifstream::failbit);
+ if (!file.is_open()) {
+ LOG_ERROR(Common_Filesystem, "Failed to open pipeline cache file {}",
+ Common::FS::PathToUTF8String(filename));
+ return;
+ }
+ if (file.tellp() == 0) {
+ // Write header
+ file.write(MAGIC_NUMBER.data(), MAGIC_NUMBER.size())
+ .write(reinterpret_cast<const char*>(&cache_version), sizeof(cache_version));
+ }
+ if (!std::ranges::all_of(envs, &GenericEnvironment::CanBeSerialized)) {
+ return;
+ }
+ const u32 num_envs{static_cast<u32>(envs.size())};
+ file.write(reinterpret_cast<const char*>(&num_envs), sizeof(num_envs));
+ for (const GenericEnvironment* const env : envs) {
+ env->Serialize(file);
+ }
+ file.write(key.data(), key.size_bytes());
+
+} catch (const std::ios_base::failure& e) {
+ LOG_ERROR(Common_Filesystem, "{}", e.what());
+ if (!Common::FS::RemoveFile(filename)) {
+ LOG_ERROR(Common_Filesystem, "Failed to delete pipeline cache file {}",
+ Common::FS::PathToUTF8String(filename));
+ }
+}
+
+void LoadPipelines(
+ std::stop_token stop_loading, const std::filesystem::path& filename, u32 expected_cache_version,
+ Common::UniqueFunction<void, std::ifstream&, FileEnvironment> load_compute,
+ Common::UniqueFunction<void, std::ifstream&, std::vector<FileEnvironment>> load_graphics) try {
+ std::ifstream file(filename, std::ios::binary | std::ios::ate);
+ if (!file.is_open()) {
+ return;
+ }
+ file.exceptions(std::ifstream::failbit);
+ const auto end{file.tellg()};
+ file.seekg(0, std::ios::beg);
+
+ std::array<char, 8> magic_number;
+ u32 cache_version;
+ file.read(magic_number.data(), magic_number.size())
+ .read(reinterpret_cast<char*>(&cache_version), sizeof(cache_version));
+ if (magic_number != MAGIC_NUMBER || cache_version != expected_cache_version) {
+ file.close();
+ if (Common::FS::RemoveFile(filename)) {
+ if (magic_number != MAGIC_NUMBER) {
+ LOG_ERROR(Common_Filesystem, "Invalid pipeline cache file");
+ }
+ if (cache_version != expected_cache_version) {
+ LOG_INFO(Common_Filesystem, "Deleting old pipeline cache");
+ }
+ } else {
+ LOG_ERROR(Common_Filesystem,
+ "Invalid pipeline cache file and failed to delete it in \"{}\"",
+ Common::FS::PathToUTF8String(filename));
+ }
+ return;
+ }
+ while (file.tellg() != end) {
+ if (stop_loading.stop_requested()) {
+ return;
+ }
+ u32 num_envs{};
+ file.read(reinterpret_cast<char*>(&num_envs), sizeof(num_envs));
+ std::vector<FileEnvironment> envs(num_envs);
+ for (FileEnvironment& env : envs) {
+ env.Deserialize(file);
+ }
+ if (envs.front().ShaderStage() == Shader::Stage::Compute) {
+ load_compute(file, std::move(envs.front()));
+ } else {
+ load_graphics(file, std::move(envs));
+ }
+ }
+
+} catch (const std::ios_base::failure& e) {
+ LOG_ERROR(Common_Filesystem, "{}", e.what());
+ if (!Common::FS::RemoveFile(filename)) {
+ LOG_ERROR(Common_Filesystem, "Failed to delete pipeline cache file {}",
+ Common::FS::PathToUTF8String(filename));
+ }
+}
+
+} // namespace VideoCommon
diff --git a/src/video_core/shader_environment.h b/src/video_core/shader_environment.h
new file mode 100644
index 000000000..2079979db
--- /dev/null
+++ b/src/video_core/shader_environment.h
@@ -0,0 +1,183 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <atomic>
+#include <filesystem>
+#include <iosfwd>
+#include <limits>
+#include <memory>
+#include <optional>
+#include <span>
+#include <type_traits>
+#include <unordered_map>
+#include <vector>
+
+#include "common/common_types.h"
+#include "common/unique_function.h"
+#include "shader_recompiler/environment.h"
+#include "video_core/engines/kepler_compute.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/textures/texture.h"
+
+namespace Tegra {
+class Memorymanager;
+}
+
+namespace VideoCommon {
+
+class GenericEnvironment : public Shader::Environment {
+public:
+ explicit GenericEnvironment() = default;
+ explicit GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_,
+ u32 start_address_);
+
+ ~GenericEnvironment() override;
+
+ [[nodiscard]] u32 TextureBoundBuffer() const final;
+
+ [[nodiscard]] u32 LocalMemorySize() const final;
+
+ [[nodiscard]] u32 SharedMemorySize() const final;
+
+ [[nodiscard]] std::array<u32, 3> WorkgroupSize() const final;
+
+ [[nodiscard]] u64 ReadInstruction(u32 address) final;
+
+ [[nodiscard]] std::optional<u64> Analyze();
+
+ void SetCachedSize(size_t size_bytes);
+
+ [[nodiscard]] size_t CachedSize() const noexcept;
+
+ [[nodiscard]] size_t ReadSize() const noexcept;
+
+ [[nodiscard]] bool CanBeSerialized() const noexcept;
+
+ [[nodiscard]] u64 CalculateHash() const;
+
+ void Serialize(std::ofstream& file) const;
+
+protected:
+ std::optional<u64> TryFindSize();
+
+ Shader::TextureType ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, bool via_header_index,
+ u32 raw);
+
+ Tegra::MemoryManager* gpu_memory{};
+ GPUVAddr program_base{};
+
+ std::vector<u64> code;
+ std::unordered_map<u32, Shader::TextureType> texture_types;
+ std::unordered_map<u64, u32> cbuf_values;
+
+ u32 local_memory_size{};
+ u32 texture_bound{};
+ u32 shared_memory_size{};
+ std::array<u32, 3> workgroup_size{};
+
+ u32 read_lowest = std::numeric_limits<u32>::max();
+ u32 read_highest = 0;
+
+ u32 cached_lowest = std::numeric_limits<u32>::max();
+ u32 cached_highest = 0;
+
+ bool has_unbound_instructions = false;
+};
+
+class GraphicsEnvironment final : public GenericEnvironment {
+public:
+ explicit GraphicsEnvironment() = default;
+ explicit GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_,
+ Tegra::MemoryManager& gpu_memory_,
+ Tegra::Engines::Maxwell3D::Regs::ShaderProgram program,
+ GPUVAddr program_base_, u32 start_address_);
+
+ ~GraphicsEnvironment() override = default;
+
+ u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override;
+
+ Shader::TextureType ReadTextureType(u32 handle) override;
+
+private:
+ Tegra::Engines::Maxwell3D* maxwell3d{};
+ size_t stage_index{};
+};
+
+class ComputeEnvironment final : public GenericEnvironment {
+public:
+ explicit ComputeEnvironment() = default;
+ explicit ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_,
+ Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_,
+ u32 start_address_);
+
+ ~ComputeEnvironment() override = default;
+
+ u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override;
+
+ Shader::TextureType ReadTextureType(u32 handle) override;
+
+private:
+ Tegra::Engines::KeplerCompute* kepler_compute{};
+};
+
+class FileEnvironment final : public Shader::Environment {
+public:
+ FileEnvironment() = default;
+ ~FileEnvironment() override = default;
+
+ FileEnvironment& operator=(FileEnvironment&&) noexcept = default;
+ FileEnvironment(FileEnvironment&&) noexcept = default;
+
+ FileEnvironment& operator=(const FileEnvironment&) = delete;
+ FileEnvironment(const FileEnvironment&) = delete;
+
+ void Deserialize(std::ifstream& file);
+
+ [[nodiscard]] u64 ReadInstruction(u32 address) override;
+
+ [[nodiscard]] u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override;
+
+ [[nodiscard]] Shader::TextureType ReadTextureType(u32 handle) override;
+
+ [[nodiscard]] u32 LocalMemorySize() const override;
+
+ [[nodiscard]] u32 SharedMemorySize() const override;
+
+ [[nodiscard]] u32 TextureBoundBuffer() const override;
+
+ [[nodiscard]] std::array<u32, 3> WorkgroupSize() const override;
+
+private:
+ std::unique_ptr<u64[]> code;
+ std::unordered_map<u32, Shader::TextureType> texture_types;
+ std::unordered_map<u64, u32> cbuf_values;
+ std::array<u32, 3> workgroup_size{};
+ u32 local_memory_size{};
+ u32 shared_memory_size{};
+ u32 texture_bound{};
+ u32 read_lowest{};
+ u32 read_highest{};
+};
+
+void SerializePipeline(std::span<const char> key, std::span<const GenericEnvironment* const> envs,
+ const std::filesystem::path& filename, u32 cache_version);
+
+template <typename Key, typename Envs>
+void SerializePipeline(const Key& key, const Envs& envs, const std::filesystem::path& filename,
+ u32 cache_version) {
+ static_assert(std::is_trivially_copyable_v<Key>);
+ static_assert(std::has_unique_object_representations_v<Key>);
+ SerializePipeline(std::span(reinterpret_cast<const char*>(&key), sizeof(key)),
+ std::span(envs.data(), envs.size()), filename, cache_version);
+}
+
+void LoadPipelines(
+ std::stop_token stop_loading, const std::filesystem::path& filename, u32 expected_cache_version,
+ Common::UniqueFunction<void, std::ifstream&, FileEnvironment> load_compute,
+ Common::UniqueFunction<void, std::ifstream&, std::vector<FileEnvironment>> load_graphics);
+
+} // namespace VideoCommon
diff --git a/src/video_core/shader_notify.cpp b/src/video_core/shader_notify.cpp
index 693e47158..dc6995b46 100644
--- a/src/video_core/shader_notify.cpp
+++ b/src/video_core/shader_notify.cpp
@@ -2,42 +2,35 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-#include <mutex>
+#include <atomic>
+#include <chrono>
+#include <optional>
+
#include "video_core/shader_notify.h"
using namespace std::chrono_literals;
namespace VideoCore {
-namespace {
-constexpr auto UPDATE_TICK = 32ms;
-}
-
-ShaderNotify::ShaderNotify() = default;
-ShaderNotify::~ShaderNotify() = default;
-std::size_t ShaderNotify::GetShadersBuilding() {
- const auto now = std::chrono::high_resolution_clock::now();
- const auto diff = now - last_update;
- if (diff > UPDATE_TICK) {
- std::shared_lock lock(mutex);
- last_updated_count = accurate_count;
+const auto TIME_TO_STOP_REPORTING = 2s;
+
+int ShaderNotify::ShadersBuilding() noexcept {
+ const int now_complete = num_complete.load(std::memory_order::relaxed);
+ const int now_building = num_building.load(std::memory_order::relaxed);
+ if (now_complete == now_building) {
+ const auto now = std::chrono::high_resolution_clock::now();
+ if (completed && num_complete == num_when_completed) {
+ if (now - complete_time > TIME_TO_STOP_REPORTING) {
+ report_base = now_complete;
+ completed = false;
+ }
+ } else {
+ completed = true;
+ num_when_completed = num_complete;
+ complete_time = now;
+ }
}
- return last_updated_count;
-}
-
-std::size_t ShaderNotify::GetShadersBuildingAccurate() {
- std::shared_lock lock{mutex};
- return accurate_count;
-}
-
-void ShaderNotify::MarkShaderComplete() {
- std::unique_lock lock{mutex};
- accurate_count--;
-}
-
-void ShaderNotify::MarkSharderBuilding() {
- std::unique_lock lock{mutex};
- accurate_count++;
+ return now_building - report_base;
}
} // namespace VideoCore
diff --git a/src/video_core/shader_notify.h b/src/video_core/shader_notify.h
index a9c92d179..ad363bfb5 100644
--- a/src/video_core/shader_notify.h
+++ b/src/video_core/shader_notify.h
@@ -4,26 +4,30 @@
#pragma once
+#include <atomic>
#include <chrono>
-#include <shared_mutex>
-#include "common/common_types.h"
+#include <optional>
namespace VideoCore {
class ShaderNotify {
public:
- ShaderNotify();
- ~ShaderNotify();
+ [[nodiscard]] int ShadersBuilding() noexcept;
- std::size_t GetShadersBuilding();
- std::size_t GetShadersBuildingAccurate();
+ void MarkShaderComplete() noexcept {
+ ++num_complete;
+ }
- void MarkShaderComplete();
- void MarkSharderBuilding();
+ void MarkShaderBuilding() noexcept {
+ ++num_building;
+ }
private:
- std::size_t last_updated_count{};
- std::size_t accurate_count{};
- std::shared_mutex mutex;
- std::chrono::high_resolution_clock::time_point last_update{};
+ std::atomic_int num_building{};
+ std::atomic_int num_complete{};
+ int report_base{};
+
+ bool completed{};
+ int num_when_completed{};
+ std::chrono::high_resolution_clock::time_point complete_time;
};
} // namespace VideoCore
diff --git a/src/video_core/texture_cache/formatter.cpp b/src/video_core/texture_cache/formatter.cpp
index d10ba4ccd..249cc4d0f 100644
--- a/src/video_core/texture_cache/formatter.cpp
+++ b/src/video_core/texture_cache/formatter.cpp
@@ -43,7 +43,7 @@ std::string Name(const ImageBase& image) {
return "Invalid";
}
-std::string Name(const ImageViewBase& image_view, std::optional<ImageViewType> type) {
+std::string Name(const ImageViewBase& image_view) {
const u32 width = image_view.size.width;
const u32 height = image_view.size.height;
const u32 depth = image_view.size.depth;
@@ -51,7 +51,7 @@ std::string Name(const ImageViewBase& image_view, std::optional<ImageViewType> t
const u32 num_layers = image_view.range.extent.layers;
const std::string level = num_levels > 1 ? fmt::format(":{}", num_levels) : "";
- switch (type.value_or(image_view.type)) {
+ switch (image_view.type) {
case ImageViewType::e1D:
return fmt::format("ImageView 1D {}{}", width, level);
case ImageViewType::e2D:
diff --git a/src/video_core/texture_cache/formatter.h b/src/video_core/texture_cache/formatter.h
index a48413983..c6cf0583f 100644
--- a/src/video_core/texture_cache/formatter.h
+++ b/src/video_core/texture_cache/formatter.h
@@ -255,8 +255,7 @@ struct RenderTargets;
[[nodiscard]] std::string Name(const ImageBase& image);
-[[nodiscard]] std::string Name(const ImageViewBase& image_view,
- std::optional<ImageViewType> type = std::nullopt);
+[[nodiscard]] std::string Name(const ImageViewBase& image_view);
[[nodiscard]] std::string Name(const RenderTargets& render_targets);
diff --git a/src/video_core/texture_cache/image_view_base.cpp b/src/video_core/texture_cache/image_view_base.cpp
index e8d632f9e..450becbeb 100644
--- a/src/video_core/texture_cache/image_view_base.cpp
+++ b/src/video_core/texture_cache/image_view_base.cpp
@@ -36,6 +36,15 @@ ImageViewBase::ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_i
}
}
+ImageViewBase::ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_info)
+ : format{info.format}, type{ImageViewType::Buffer}, size{
+ .width = info.size.width,
+ .height = 1,
+ .depth = 1,
+ } {
+ ASSERT_MSG(view_info.type == ImageViewType::Buffer, "Expected texture buffer");
+}
+
ImageViewBase::ImageViewBase(const NullImageParams&) {}
} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_view_base.h b/src/video_core/texture_cache/image_view_base.h
index 73954167e..903f715c5 100644
--- a/src/video_core/texture_cache/image_view_base.h
+++ b/src/video_core/texture_cache/image_view_base.h
@@ -27,6 +27,7 @@ DECLARE_ENUM_FLAG_OPERATORS(ImageViewFlagBits)
struct ImageViewBase {
explicit ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info,
ImageId image_id);
+ explicit ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_info);
explicit ImageViewBase(const NullImageParams&);
[[nodiscard]] bool IsBuffer() const noexcept {
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 01de2d498..f34c9d9ca 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -117,6 +117,9 @@ public:
/// Return a reference to the given image view id
[[nodiscard]] ImageView& GetImageView(ImageViewId id) noexcept;
+ /// Mark an image as modified from the GPU
+ void MarkModification(ImageId id) noexcept;
+
/// Fill image_view_ids with the graphics images in indices
void FillGraphicsImageViews(std::span<const u32> indices,
std::span<ImageViewId> image_view_ids);
@@ -527,6 +530,11 @@ typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) noexcept {
}
template <class P>
+void TextureCache<P>::MarkModification(ImageId id) noexcept {
+ MarkModification(slot_images[id]);
+}
+
+template <class P>
void TextureCache<P>::FillGraphicsImageViews(std::span<const u32> indices,
std::span<ImageViewId> image_view_ids) {
FillImageViews(graphics_image_table, graphics_image_view_ids, indices, image_view_ids);
@@ -540,13 +548,13 @@ void TextureCache<P>::FillComputeImageViews(std::span<const u32> indices,
template <class P>
typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) {
- [[unlikely]] if (index > graphics_sampler_table.Limit()) {
- LOG_ERROR(HW_GPU, "Invalid sampler index={}", index);
+ if (index > graphics_sampler_table.Limit()) {
+ LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index);
return &slot_samplers[NULL_SAMPLER_ID];
}
const auto [descriptor, is_new] = graphics_sampler_table.Read(index);
SamplerId& id = graphics_sampler_ids[index];
- [[unlikely]] if (is_new) {
+ if (is_new) {
id = FindSampler(descriptor);
}
return &slot_samplers[id];
@@ -554,13 +562,13 @@ typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) {
template <class P>
typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) {
- [[unlikely]] if (index > compute_sampler_table.Limit()) {
- LOG_ERROR(HW_GPU, "Invalid sampler index={}", index);
+ if (index > compute_sampler_table.Limit()) {
+ LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index);
return &slot_samplers[NULL_SAMPLER_ID];
}
const auto [descriptor, is_new] = compute_sampler_table.Read(index);
SamplerId& id = compute_sampler_ids[index];
- [[unlikely]] if (is_new) {
+ if (is_new) {
id = FindSampler(descriptor);
}
return &slot_samplers[id];
@@ -599,6 +607,12 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
using namespace VideoCommon::Dirty;
auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::RenderTargets]) {
+ for (size_t index = 0; index < NUM_RT; ++index) {
+ ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index];
+ PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id));
+ }
+ const ImageViewId depth_buffer_id = render_targets.depth_buffer_id;
+ PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id));
return;
}
flags[Dirty::RenderTargets] = false;
@@ -655,7 +669,7 @@ ImageViewId TextureCache<P>::VisitImageView(DescriptorTable<TICEntry>& table,
std::span<ImageViewId> cached_image_view_ids,
u32 index) {
if (index > table.Limit()) {
- LOG_ERROR(HW_GPU, "Invalid image view index={}", index);
+ LOG_DEBUG(HW_GPU, "Invalid image view index={}", index);
return NULL_IMAGE_VIEW_ID;
}
const auto [descriptor, is_new] = table.Read(index);
@@ -962,9 +976,6 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging)
auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data);
ConvertImage(unswizzled_data, image.info, mapped_span, copies);
image.UploadMemory(staging, copies);
- } else if (image.info.type == ImageType::Buffer) {
- const std::array copies{UploadBufferCopy(gpu_memory, gpu_addr, image, mapped_span)};
- image.UploadMemory(staging, copies);
} else {
const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span);
image.UploadMemory(staging, copies);
@@ -987,7 +998,12 @@ ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) {
template <class P>
ImageViewId TextureCache<P>::CreateImageView(const TICEntry& config) {
const ImageInfo info(config);
- const GPUVAddr image_gpu_addr = config.Address() - config.BaseLayer() * info.layer_stride;
+ if (info.type == ImageType::Buffer) {
+ const ImageViewInfo view_info(config, 0);
+ return slot_image_views.insert(runtime, info, view_info, config.Address());
+ }
+ const u32 layer_offset = config.BaseLayer() * info.layer_stride;
+ const GPUVAddr image_gpu_addr = config.Address() - layer_offset;
const ImageId image_id = FindOrInsertImage(info, image_gpu_addr);
if (!image_id) {
return NULL_IMAGE_VIEW_ID;
@@ -1795,6 +1811,9 @@ void TextureCache<P>::PrepareImageView(ImageViewId image_view_id, bool is_modifi
return;
}
const ImageViewBase& image_view = slot_image_views[image_view_id];
+ if (image_view.IsBuffer()) {
+ return;
+ }
PrepareImage(image_view.image_id, is_modification, invalidate);
}
diff --git a/src/video_core/texture_cache/types.h b/src/video_core/texture_cache/types.h
index 9fbdc1ac6..47a11cb2f 100644
--- a/src/video_core/texture_cache/types.h
+++ b/src/video_core/texture_cache/types.h
@@ -133,8 +133,8 @@ struct BufferImageCopy {
};
struct BufferCopy {
- size_t src_offset;
- size_t dst_offset;
+ u64 src_offset;
+ u64 dst_offset;
size_t size;
};
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
index c872517b8..59cf2f561 100644
--- a/src/video_core/texture_cache/util.cpp
+++ b/src/video_core/texture_cache/util.cpp
@@ -169,23 +169,6 @@ template <u32 GOB_EXTENT>
return Common::DivCeil(AdjustMipSize(size, level), block_size);
}
-[[nodiscard]] constexpr std::pair<int, int> Samples(int num_samples) {
- switch (num_samples) {
- case 1:
- return {1, 1};
- case 2:
- return {2, 1};
- case 4:
- return {2, 2};
- case 8:
- return {4, 2};
- case 16:
- return {4, 4};
- }
- UNREACHABLE_MSG("Invalid number of samples={}", num_samples);
- return {1, 1};
-}
-
[[nodiscard]] constexpr Extent2D DefaultBlockSize(PixelFormat format) {
return {DefaultBlockWidth(format), DefaultBlockHeight(format)};
}
@@ -283,14 +266,13 @@ template <u32 GOB_EXTENT>
}
[[nodiscard]] constexpr LevelInfo MakeLevelInfo(PixelFormat format, Extent3D size, Extent3D block,
- u32 num_samples, u32 tile_width_spacing) {
- const auto [samples_x, samples_y] = Samples(num_samples);
+ u32 tile_width_spacing) {
const u32 bytes_per_block = BytesPerBlock(format);
return {
.size =
{
- .width = size.width * samples_x,
- .height = size.height * samples_y,
+ .width = size.width,
+ .height = size.height,
.depth = size.depth,
},
.block = block,
@@ -301,14 +283,12 @@ template <u32 GOB_EXTENT>
}
[[nodiscard]] constexpr LevelInfo MakeLevelInfo(const ImageInfo& info) {
- return MakeLevelInfo(info.format, info.size, info.block, info.num_samples,
- info.tile_width_spacing);
+ return MakeLevelInfo(info.format, info.size, info.block, info.tile_width_spacing);
}
[[nodiscard]] constexpr u32 CalculateLevelOffset(PixelFormat format, Extent3D size, Extent3D block,
- u32 num_samples, u32 tile_width_spacing,
- u32 level) {
- const LevelInfo info = MakeLevelInfo(format, size, block, num_samples, tile_width_spacing);
+ u32 tile_width_spacing, u32 level) {
+ const LevelInfo info = MakeLevelInfo(format, size, block, tile_width_spacing);
u32 offset = 0;
for (u32 current_level = 0; current_level < level; ++current_level) {
offset += CalculateLevelSize(info, current_level);
@@ -645,8 +625,8 @@ u32 CalculateLayerStride(const ImageInfo& info) noexcept {
u32 CalculateLayerSize(const ImageInfo& info) noexcept {
ASSERT(info.type != ImageType::Linear);
- return CalculateLevelOffset(info.format, info.size, info.block, info.num_samples,
- info.tile_width_spacing, info.resources.levels);
+ return CalculateLevelOffset(info.format, info.size, info.block, info.tile_width_spacing,
+ info.resources.levels);
}
LevelArray CalculateMipLevelOffsets(const ImageInfo& info) noexcept {
@@ -1195,37 +1175,37 @@ static_assert(CalculateLevelSize(LevelInfo{{1920, 1080, 1}, {0, 2, 0}, {1, 1}, 2
0x7f8000);
static_assert(CalculateLevelSize(LevelInfo{{32, 32, 1}, {0, 0, 4}, {1, 1}, 4, 0}, 0) == 0x4000);
-static_assert(CalculateLevelOffset(PixelFormat::R8_SINT, {1920, 1080, 1}, {0, 2, 0}, 1, 0, 7) ==
+static_assert(CalculateLevelOffset(PixelFormat::R8_SINT, {1920, 1080, 1}, {0, 2, 0}, 0, 7) ==
0x2afc00);
-static_assert(CalculateLevelOffset(PixelFormat::ASTC_2D_12X12_UNORM, {8192, 4096, 1}, {0, 2, 0}, 1,
- 0, 12) == 0x50d200);
-
-static_assert(CalculateLevelOffset(PixelFormat::A8B8G8R8_UNORM, {1024, 1024, 1}, {0, 4, 0}, 1, 0,
- 0) == 0);
-static_assert(CalculateLevelOffset(PixelFormat::A8B8G8R8_UNORM, {1024, 1024, 1}, {0, 4, 0}, 1, 0,
- 1) == 0x400000);
-static_assert(CalculateLevelOffset(PixelFormat::A8B8G8R8_UNORM, {1024, 1024, 1}, {0, 4, 0}, 1, 0,
- 2) == 0x500000);
-static_assert(CalculateLevelOffset(PixelFormat::A8B8G8R8_UNORM, {1024, 1024, 1}, {0, 4, 0}, 1, 0,
- 3) == 0x540000);
-static_assert(CalculateLevelOffset(PixelFormat::A8B8G8R8_UNORM, {1024, 1024, 1}, {0, 4, 0}, 1, 0,
- 4) == 0x550000);
-static_assert(CalculateLevelOffset(PixelFormat::A8B8G8R8_UNORM, {1024, 1024, 1}, {0, 4, 0}, 1, 0,
- 5) == 0x554000);
-static_assert(CalculateLevelOffset(PixelFormat::A8B8G8R8_UNORM, {1024, 1024, 1}, {0, 4, 0}, 1, 0,
- 6) == 0x555000);
-static_assert(CalculateLevelOffset(PixelFormat::A8B8G8R8_UNORM, {1024, 1024, 1}, {0, 4, 0}, 1, 0,
- 7) == 0x555400);
-static_assert(CalculateLevelOffset(PixelFormat::A8B8G8R8_UNORM, {1024, 1024, 1}, {0, 4, 0}, 1, 0,
- 8) == 0x555600);
-static_assert(CalculateLevelOffset(PixelFormat::A8B8G8R8_UNORM, {1024, 1024, 1}, {0, 4, 0}, 1, 0,
- 9) == 0x555800);
+static_assert(CalculateLevelOffset(PixelFormat::ASTC_2D_12X12_UNORM, {8192, 4096, 1}, {0, 2, 0}, 0,
+ 12) == 0x50d200);
+
+static_assert(CalculateLevelOffset(PixelFormat::A8B8G8R8_UNORM, {1024, 1024, 1}, {0, 4, 0}, 0, 0) ==
+ 0);
+static_assert(CalculateLevelOffset(PixelFormat::A8B8G8R8_UNORM, {1024, 1024, 1}, {0, 4, 0}, 0, 1) ==
+ 0x400000);
+static_assert(CalculateLevelOffset(PixelFormat::A8B8G8R8_UNORM, {1024, 1024, 1}, {0, 4, 0}, 0, 2) ==
+ 0x500000);
+static_assert(CalculateLevelOffset(PixelFormat::A8B8G8R8_UNORM, {1024, 1024, 1}, {0, 4, 0}, 0, 3) ==
+ 0x540000);
+static_assert(CalculateLevelOffset(PixelFormat::A8B8G8R8_UNORM, {1024, 1024, 1}, {0, 4, 0}, 0, 4) ==
+ 0x550000);
+static_assert(CalculateLevelOffset(PixelFormat::A8B8G8R8_UNORM, {1024, 1024, 1}, {0, 4, 0}, 0, 5) ==
+ 0x554000);
+static_assert(CalculateLevelOffset(PixelFormat::A8B8G8R8_UNORM, {1024, 1024, 1}, {0, 4, 0}, 0, 6) ==
+ 0x555000);
+static_assert(CalculateLevelOffset(PixelFormat::A8B8G8R8_UNORM, {1024, 1024, 1}, {0, 4, 0}, 0, 7) ==
+ 0x555400);
+static_assert(CalculateLevelOffset(PixelFormat::A8B8G8R8_UNORM, {1024, 1024, 1}, {0, 4, 0}, 0, 8) ==
+ 0x555600);
+static_assert(CalculateLevelOffset(PixelFormat::A8B8G8R8_UNORM, {1024, 1024, 1}, {0, 4, 0}, 0, 9) ==
+ 0x555800);
constexpr u32 ValidateLayerSize(PixelFormat format, u32 width, u32 height, u32 block_height,
u32 tile_width_spacing, u32 level) {
const Extent3D size{width, height, 1};
const Extent3D block{0, block_height, 0};
- const u32 offset = CalculateLevelOffset(format, size, block, 1, tile_width_spacing, level);
+ const u32 offset = CalculateLevelOffset(format, size, block, tile_width_spacing, level);
return AlignLayerSize(offset, size, block, DefaultBlockHeight(format), tile_width_spacing);
}
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index c1d14335e..1a9399455 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -154,6 +154,15 @@ union TextureHandle {
};
static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size");
+[[nodiscard]] inline std::pair<u32, u32> TexturePair(u32 raw, bool via_header_index) {
+ if (via_header_index) {
+ return {raw, raw};
+ } else {
+ const Tegra::Texture::TextureHandle handle{raw};
+ return {handle.tic_id, via_header_index ? handle.tic_id : handle.tsc_id};
+ }
+}
+
struct TICEntry {
union {
struct {
diff --git a/src/video_core/transform_feedback.cpp b/src/video_core/transform_feedback.cpp
new file mode 100644
index 000000000..ba26ac3f1
--- /dev/null
+++ b/src/video_core/transform_feedback.cpp
@@ -0,0 +1,99 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <array>
+#include <vector>
+
+#include "common/alignment.h"
+#include "common/assert.h"
+#include "shader_recompiler/shader_info.h"
+#include "video_core/transform_feedback.h"
+
+namespace VideoCommon {
+
+std::vector<Shader::TransformFeedbackVarying> MakeTransformFeedbackVaryings(
+ const TransformFeedbackState& state) {
+ static constexpr std::array VECTORS{
+ 28, // gl_Position
+ 32, // Generic 0
+ 36, // Generic 1
+ 40, // Generic 2
+ 44, // Generic 3
+ 48, // Generic 4
+ 52, // Generic 5
+ 56, // Generic 6
+ 60, // Generic 7
+ 64, // Generic 8
+ 68, // Generic 9
+ 72, // Generic 10
+ 76, // Generic 11
+ 80, // Generic 12
+ 84, // Generic 13
+ 88, // Generic 14
+ 92, // Generic 15
+ 96, // Generic 16
+ 100, // Generic 17
+ 104, // Generic 18
+ 108, // Generic 19
+ 112, // Generic 20
+ 116, // Generic 21
+ 120, // Generic 22
+ 124, // Generic 23
+ 128, // Generic 24
+ 132, // Generic 25
+ 136, // Generic 26
+ 140, // Generic 27
+ 144, // Generic 28
+ 148, // Generic 29
+ 152, // Generic 30
+ 156, // Generic 31
+ 160, // gl_FrontColor
+ 164, // gl_FrontSecondaryColor
+ 160, // gl_BackColor
+ 164, // gl_BackSecondaryColor
+ 192, // gl_TexCoord[0]
+ 196, // gl_TexCoord[1]
+ 200, // gl_TexCoord[2]
+ 204, // gl_TexCoord[3]
+ 208, // gl_TexCoord[4]
+ 212, // gl_TexCoord[5]
+ 216, // gl_TexCoord[6]
+ 220, // gl_TexCoord[7]
+ };
+ std::vector<Shader::TransformFeedbackVarying> xfb(256);
+ for (size_t buffer = 0; buffer < state.layouts.size(); ++buffer) {
+ const auto& locations = state.varyings[buffer];
+ const auto& layout = state.layouts[buffer];
+ const u32 varying_count = layout.varying_count;
+ u32 highest = 0;
+ for (u32 offset = 0; offset < varying_count; ++offset) {
+ const u32 base_offset = offset;
+ const u8 location = locations[offset];
+
+ UNIMPLEMENTED_IF_MSG(layout.stream != 0, "Stream is not zero: {}", layout.stream);
+ Shader::TransformFeedbackVarying varying{
+ .buffer = static_cast<u32>(buffer),
+ .stride = layout.stride,
+ .offset = offset * 4,
+ .components = 1,
+ };
+ if (std::ranges::find(VECTORS, Common::AlignDown(location, 4)) != VECTORS.end()) {
+ UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB");
+
+ const u8 base_index = location / 4;
+ while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) {
+ ++offset;
+ ++varying.components;
+ }
+ }
+ xfb[location] = varying;
+ highest = std::max(highest, (base_offset + varying.components) * 4);
+ }
+ UNIMPLEMENTED_IF(highest != layout.stride);
+ }
+ return xfb;
+}
+
+} // namespace VideoCommon
diff --git a/src/video_core/transform_feedback.h b/src/video_core/transform_feedback.h
new file mode 100644
index 000000000..8f6946d65
--- /dev/null
+++ b/src/video_core/transform_feedback.h
@@ -0,0 +1,30 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <vector>
+
+#include "common/common_types.h"
+#include "shader_recompiler/runtime_info.h"
+#include "video_core/engines/maxwell_3d.h"
+
+namespace VideoCommon {
+
+struct TransformFeedbackState {
+ struct Layout {
+ u32 stream;
+ u32 varying_count;
+ u32 stride;
+ };
+ std::array<Layout, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers> layouts;
+ std::array<std::array<u8, 128>, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
+ varyings;
+};
+
+std::vector<Shader::TransformFeedbackVarying> MakeTransformFeedbackVaryings(
+ const TransformFeedbackState& state);
+
+} // namespace VideoCommon
diff --git a/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp b/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp
index 758c038ba..fdd1a5081 100644
--- a/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp
+++ b/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp
@@ -73,12 +73,11 @@ NsightAftermathTracker::~NsightAftermathTracker() {
}
}
-void NsightAftermathTracker::SaveShader(const std::vector<u32>& spirv) const {
+void NsightAftermathTracker::SaveShader(std::span<const u32> spirv) const {
if (!initialized) {
return;
}
-
- std::vector<u32> spirv_copy = spirv;
+ std::vector<u32> spirv_copy(spirv.begin(), spirv.end());
GFSDK_Aftermath_SpirvCode shader;
shader.pData = spirv_copy.data();
shader.size = static_cast<u32>(spirv_copy.size() * 4);
@@ -100,7 +99,7 @@ void NsightAftermathTracker::SaveShader(const std::vector<u32>& spirv) const {
LOG_ERROR(Render_Vulkan, "Failed to dump SPIR-V module with hash={:016x}", hash.hash);
return;
}
- if (file.Write(spirv) != spirv.size()) {
+ if (file.WriteSpan(spirv) != spirv.size()) {
LOG_ERROR(Render_Vulkan, "Failed to write SPIR-V module with hash={:016x}", hash.hash);
return;
}
diff --git a/src/video_core/vulkan_common/nsight_aftermath_tracker.h b/src/video_core/vulkan_common/nsight_aftermath_tracker.h
index 4fe2b14d9..eae1891dd 100644
--- a/src/video_core/vulkan_common/nsight_aftermath_tracker.h
+++ b/src/video_core/vulkan_common/nsight_aftermath_tracker.h
@@ -6,6 +6,7 @@
#include <filesystem>
#include <mutex>
+#include <span>
#include <string>
#include <vector>
@@ -33,7 +34,7 @@ public:
NsightAftermathTracker(NsightAftermathTracker&&) = delete;
NsightAftermathTracker& operator=(NsightAftermathTracker&&) = delete;
- void SaveShader(const std::vector<u32>& spirv) const;
+ void SaveShader(std::span<const u32> spirv) const;
private:
#ifdef HAS_NSIGHT_AFTERMATH
@@ -61,21 +62,21 @@ private:
bool initialized = false;
Common::DynamicLibrary dl;
- PFN_GFSDK_Aftermath_DisableGpuCrashDumps GFSDK_Aftermath_DisableGpuCrashDumps;
- PFN_GFSDK_Aftermath_EnableGpuCrashDumps GFSDK_Aftermath_EnableGpuCrashDumps;
- PFN_GFSDK_Aftermath_GetShaderDebugInfoIdentifier GFSDK_Aftermath_GetShaderDebugInfoIdentifier;
- PFN_GFSDK_Aftermath_GetShaderHashSpirv GFSDK_Aftermath_GetShaderHashSpirv;
- PFN_GFSDK_Aftermath_GpuCrashDump_CreateDecoder GFSDK_Aftermath_GpuCrashDump_CreateDecoder;
- PFN_GFSDK_Aftermath_GpuCrashDump_DestroyDecoder GFSDK_Aftermath_GpuCrashDump_DestroyDecoder;
- PFN_GFSDK_Aftermath_GpuCrashDump_GenerateJSON GFSDK_Aftermath_GpuCrashDump_GenerateJSON;
- PFN_GFSDK_Aftermath_GpuCrashDump_GetJSON GFSDK_Aftermath_GpuCrashDump_GetJSON;
+ PFN_GFSDK_Aftermath_DisableGpuCrashDumps GFSDK_Aftermath_DisableGpuCrashDumps{};
+ PFN_GFSDK_Aftermath_EnableGpuCrashDumps GFSDK_Aftermath_EnableGpuCrashDumps{};
+ PFN_GFSDK_Aftermath_GetShaderDebugInfoIdentifier GFSDK_Aftermath_GetShaderDebugInfoIdentifier{};
+ PFN_GFSDK_Aftermath_GetShaderHashSpirv GFSDK_Aftermath_GetShaderHashSpirv{};
+ PFN_GFSDK_Aftermath_GpuCrashDump_CreateDecoder GFSDK_Aftermath_GpuCrashDump_CreateDecoder{};
+ PFN_GFSDK_Aftermath_GpuCrashDump_DestroyDecoder GFSDK_Aftermath_GpuCrashDump_DestroyDecoder{};
+ PFN_GFSDK_Aftermath_GpuCrashDump_GenerateJSON GFSDK_Aftermath_GpuCrashDump_GenerateJSON{};
+ PFN_GFSDK_Aftermath_GpuCrashDump_GetJSON GFSDK_Aftermath_GpuCrashDump_GetJSON{};
#endif
};
#ifndef HAS_NSIGHT_AFTERMATH
inline NsightAftermathTracker::NsightAftermathTracker() = default;
inline NsightAftermathTracker::~NsightAftermathTracker() = default;
-inline void NsightAftermathTracker::SaveShader(const std::vector<u32>&) const {}
+inline void NsightAftermathTracker::SaveShader(std::span<const u32>) const {}
#endif
} // namespace Vulkan
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index f214510da..44afdc1cd 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -2,6 +2,7 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <algorithm>
#include <bitset>
#include <chrono>
#include <optional>
@@ -33,6 +34,12 @@ constexpr std::array DEPTH16_UNORM_STENCIL8_UINT{
};
} // namespace Alternatives
+enum class NvidiaArchitecture {
+ AmpereOrNewer,
+ Turing,
+ VoltaOrOlder,
+};
+
constexpr std::array REQUIRED_EXTENSIONS{
VK_KHR_MAINTENANCE1_EXTENSION_NAME,
VK_KHR_STORAGE_BUFFER_STORAGE_CLASS_EXTENSION_NAME,
@@ -43,11 +50,14 @@ constexpr std::array REQUIRED_EXTENSIONS{
VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME,
VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME,
VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME,
+ VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME,
+ VK_KHR_VARIABLE_POINTERS_EXTENSION_NAME,
VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME,
VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME,
VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME,
VK_EXT_ROBUSTNESS_2_EXTENSION_NAME,
VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME,
+ VK_EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION_EXTENSION_NAME,
#ifdef _WIN32
VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME,
#endif
@@ -112,6 +122,7 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(vk::Physica
VK_FORMAT_R16G16_SFLOAT,
VK_FORMAT_R16G16_SINT,
VK_FORMAT_R16_UNORM,
+ VK_FORMAT_R16_SNORM,
VK_FORMAT_R16_UINT,
VK_FORMAT_R8G8B8A8_SRGB,
VK_FORMAT_R8G8_UNORM,
@@ -191,15 +202,47 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(vk::Physica
return format_properties;
}
+std::vector<std::string> GetSupportedExtensions(vk::PhysicalDevice physical) {
+ const std::vector extensions = physical.EnumerateDeviceExtensionProperties();
+ std::vector<std::string> supported_extensions;
+ supported_extensions.reserve(extensions.size());
+ for (const auto& extension : extensions) {
+ supported_extensions.emplace_back(extension.extensionName);
+ }
+ return supported_extensions;
+}
+
+NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical,
+ std::span<const std::string> exts) {
+ if (std::ranges::find(exts, VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME) != exts.end()) {
+ VkPhysicalDeviceFragmentShadingRatePropertiesKHR shading_rate_props{};
+ shading_rate_props.sType =
+ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR;
+ VkPhysicalDeviceProperties2KHR physical_properties{};
+ physical_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR;
+ physical_properties.pNext = &shading_rate_props;
+ physical.GetProperties2KHR(physical_properties);
+ if (shading_rate_props.primitiveFragmentShadingRateWithMultipleViewports) {
+ // Only Ampere and newer support this feature
+ return NvidiaArchitecture::AmpereOrNewer;
+ }
+ }
+ if (std::ranges::find(exts, VK_NV_SHADING_RATE_IMAGE_EXTENSION_NAME) != exts.end()) {
+ return NvidiaArchitecture::Turing;
+ }
+ return NvidiaArchitecture::VoltaOrOlder;
+}
} // Anonymous namespace
Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR surface,
const vk::InstanceDispatch& dld_)
: instance{instance_}, dld{dld_}, physical{physical_}, properties{physical.GetProperties()},
- format_properties{GetFormatProperties(physical)} {
+ supported_extensions{GetSupportedExtensions(physical)},
+ format_properties(GetFormatProperties(physical)) {
CheckSuitability(surface != nullptr);
SetupFamilies(surface);
SetupFeatures();
+ SetupProperties();
const auto queue_cis = GetDeviceQueueCreateInfos();
const std::vector extensions = LoadExtensions(surface != nullptr);
@@ -214,16 +257,16 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
.independentBlend = true,
.geometryShader = true,
.tessellationShader = true,
- .sampleRateShading = false,
- .dualSrcBlend = false,
+ .sampleRateShading = true,
+ .dualSrcBlend = true,
.logicOp = false,
.multiDrawIndirect = false,
.drawIndirectFirstInstance = false,
.depthClamp = true,
.depthBiasClamp = true,
- .fillModeNonSolid = false,
- .depthBounds = false,
- .wideLines = false,
+ .fillModeNonSolid = true,
+ .depthBounds = is_depth_bounds_supported,
+ .wideLines = true,
.largePoints = true,
.alphaToOne = false,
.multiViewport = true,
@@ -245,11 +288,11 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
.shaderSampledImageArrayDynamicIndexing = false,
.shaderStorageBufferArrayDynamicIndexing = false,
.shaderStorageImageArrayDynamicIndexing = false,
- .shaderClipDistance = false,
- .shaderCullDistance = false,
- .shaderFloat64 = false,
- .shaderInt64 = false,
- .shaderInt16 = false,
+ .shaderClipDistance = true,
+ .shaderCullDistance = true,
+ .shaderFloat64 = is_shader_float64_supported,
+ .shaderInt64 = is_shader_int64_supported,
+ .shaderInt16 = is_shader_int16_supported,
.shaderResourceResidency = false,
.shaderResourceMinLod = false,
.sparseBinding = false,
@@ -278,7 +321,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
VkPhysicalDevice16BitStorageFeaturesKHR bit16_storage{
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR,
.pNext = nullptr,
- .storageBuffer16BitAccess = false,
+ .storageBuffer16BitAccess = true,
.uniformAndStorageBuffer16BitAccess = true,
.storagePushConstant16 = false,
.storageInputOutput16 = false,
@@ -310,6 +353,21 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
};
SetNext(next, host_query_reset);
+ VkPhysicalDeviceVariablePointerFeaturesKHR variable_pointers{
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES_KHR,
+ .pNext = nullptr,
+ .variablePointersStorageBuffer = VK_TRUE,
+ .variablePointers = VK_TRUE,
+ };
+ SetNext(next, variable_pointers);
+
+ VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT demote{
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT,
+ .pNext = nullptr,
+ .shaderDemoteToHelperInvocation = true,
+ };
+ SetNext(next, demote);
+
VkPhysicalDeviceFloat16Int8FeaturesKHR float16_int8;
if (is_float16_supported) {
float16_int8 = {
@@ -327,6 +385,14 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
LOG_INFO(Render_Vulkan, "Device doesn't support viewport swizzles");
}
+ if (!nv_viewport_array2) {
+ LOG_INFO(Render_Vulkan, "Device doesn't support viewport masks");
+ }
+
+ if (!nv_geometry_shader_passthrough) {
+ LOG_INFO(Render_Vulkan, "Device doesn't support passthrough geometry shaders");
+ }
+
VkPhysicalDeviceUniformBufferStandardLayoutFeaturesKHR std430_layout;
if (khr_uniform_buffer_standard_layout) {
std430_layout = {
@@ -389,12 +455,83 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state");
}
+ VkPhysicalDeviceLineRasterizationFeaturesEXT line_raster;
+ if (ext_line_rasterization) {
+ line_raster = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT,
+ .pNext = nullptr,
+ .rectangularLines = VK_TRUE,
+ .bresenhamLines = VK_FALSE,
+ .smoothLines = VK_TRUE,
+ .stippledRectangularLines = VK_FALSE,
+ .stippledBresenhamLines = VK_FALSE,
+ .stippledSmoothLines = VK_FALSE,
+ };
+ SetNext(next, line_raster);
+ } else {
+ LOG_INFO(Render_Vulkan, "Device doesn't support smooth lines");
+ }
+
+ if (!ext_conservative_rasterization) {
+ LOG_INFO(Render_Vulkan, "Device doesn't support conservative rasterization");
+ }
+
+ VkPhysicalDeviceProvokingVertexFeaturesEXT provoking_vertex;
+ if (ext_provoking_vertex) {
+ provoking_vertex = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT,
+ .pNext = nullptr,
+ .provokingVertexLast = VK_TRUE,
+ .transformFeedbackPreservesProvokingVertex = VK_TRUE,
+ };
+ SetNext(next, provoking_vertex);
+ } else {
+ LOG_INFO(Render_Vulkan, "Device doesn't support provoking vertex last");
+ }
+
+ VkPhysicalDeviceVertexInputDynamicStateFeaturesEXT vertex_input_dynamic;
+ if (ext_vertex_input_dynamic_state) {
+ vertex_input_dynamic = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_INPUT_DYNAMIC_STATE_FEATURES_EXT,
+ .pNext = nullptr,
+ .vertexInputDynamicState = VK_TRUE,
+ };
+ SetNext(next, vertex_input_dynamic);
+ } else {
+ LOG_INFO(Render_Vulkan, "Device doesn't support vertex input dynamic state");
+ }
+
+ VkPhysicalDeviceShaderAtomicInt64FeaturesKHR atomic_int64;
+ if (ext_shader_atomic_int64) {
+ atomic_int64 = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES_KHR,
+ .pNext = nullptr,
+ .shaderBufferInt64Atomics = VK_TRUE,
+ .shaderSharedInt64Atomics = VK_TRUE,
+ };
+ SetNext(next, atomic_int64);
+ }
+
+ VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR workgroup_layout;
+ if (khr_workgroup_memory_explicit_layout) {
+ workgroup_layout = {
+ .sType =
+ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR,
+ .pNext = nullptr,
+ .workgroupMemoryExplicitLayout = VK_TRUE,
+ .workgroupMemoryExplicitLayoutScalarBlockLayout = VK_TRUE,
+ .workgroupMemoryExplicitLayout8BitAccess = VK_TRUE,
+ .workgroupMemoryExplicitLayout16BitAccess = VK_TRUE,
+ };
+ SetNext(next, workgroup_layout);
+ }
+
if (!ext_depth_range_unrestricted) {
LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted");
}
VkDeviceDiagnosticsConfigCreateInfoNV diagnostics_nv;
- if (nv_device_diagnostics_config) {
+ if (Settings::values.enable_nsight_aftermath && nv_device_diagnostics_config) {
nsight_aftermath_tracker = std::make_unique<NsightAftermathTracker>();
diagnostics_nv = {
@@ -412,11 +549,33 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
CollectTelemetryParameters();
CollectToolingInfo();
+ if (driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR) {
+ const auto arch = GetNvidiaArchitecture(physical, supported_extensions);
+ switch (arch) {
+ case NvidiaArchitecture::AmpereOrNewer:
+ LOG_WARNING(Render_Vulkan, "Blacklisting Ampere devices from float16 math");
+ is_float16_supported = false;
+ break;
+ case NvidiaArchitecture::Turing:
+ break;
+ case NvidiaArchitecture::VoltaOrOlder:
+ LOG_WARNING(Render_Vulkan, "Blacklisting Volta and older from VK_KHR_push_descriptor");
+ khr_push_descriptor = false;
+ break;
+ }
+ }
if (ext_extended_dynamic_state && driver_id == VK_DRIVER_ID_MESA_RADV) {
- LOG_WARNING(
- Render_Vulkan,
- "Blacklisting RADV for VK_EXT_extended_dynamic state, likely due to a bug in yuzu");
- ext_extended_dynamic_state = false;
+ // Mask driver version variant
+ const u32 version = (properties.driverVersion << 3) >> 3;
+ if (version < VK_MAKE_API_VERSION(0, 21, 2, 0)) {
+ LOG_WARNING(Render_Vulkan,
+ "RADV versions older than 21.2 have broken VK_EXT_extended_dynamic_state");
+ ext_extended_dynamic_state = false;
+ }
+ }
+ if (ext_vertex_input_dynamic_state && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) {
+ LOG_WARNING(Render_Vulkan, "Blacklisting Intel for VK_EXT_vertex_input_dynamic_state");
+ ext_vertex_input_dynamic_state = false;
}
if (is_float16_supported && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) {
// Intel's compiler crashes when using fp16 on Astral Chain, disable it for the time being.
@@ -426,8 +585,6 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
graphics_queue = logical.GetQueue(graphics_family);
present_queue = logical.GetQueue(present_family);
-
- use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue();
}
Device::~Device() = default;
@@ -471,7 +628,7 @@ void Device::ReportLoss() const {
std::this_thread::sleep_for(std::chrono::seconds{15});
}
-void Device::SaveShader(const std::vector<u32>& spirv) const {
+void Device::SaveShader(std::span<const u32> spirv) const {
if (nsight_aftermath_tracker) {
nsight_aftermath_tracker->SaveShader(spirv);
}
@@ -597,10 +754,20 @@ void Device::CheckSuitability(bool requires_swapchain) const {
throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT);
}
}
+ VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT demote{};
+ demote.sType =
+ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT;
+ demote.pNext = nullptr;
+
+ VkPhysicalDeviceVariablePointerFeaturesKHR variable_pointers{};
+ variable_pointers.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES_KHR;
+ variable_pointers.pNext = &demote;
+
VkPhysicalDeviceRobustness2FeaturesEXT robustness2{};
robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT;
+ robustness2.pNext = &variable_pointers;
- VkPhysicalDeviceFeatures2 features2{};
+ VkPhysicalDeviceFeatures2KHR features2{};
features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
features2.pNext = &robustness2;
@@ -610,7 +777,6 @@ void Device::CheckSuitability(bool requires_swapchain) const {
const std::array feature_report{
std::make_pair(features.robustBufferAccess, "robustBufferAccess"),
std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"),
- std::make_pair(features.robustBufferAccess, "robustBufferAccess"),
std::make_pair(features.imageCubeArray, "imageCubeArray"),
std::make_pair(features.independentBlend, "independentBlend"),
std::make_pair(features.depthClamp, "depthClamp"),
@@ -618,13 +784,23 @@ void Device::CheckSuitability(bool requires_swapchain) const {
std::make_pair(features.largePoints, "largePoints"),
std::make_pair(features.multiViewport, "multiViewport"),
std::make_pair(features.depthBiasClamp, "depthBiasClamp"),
+ std::make_pair(features.fillModeNonSolid, "fillModeNonSolid"),
+ std::make_pair(features.wideLines, "wideLines"),
std::make_pair(features.geometryShader, "geometryShader"),
std::make_pair(features.tessellationShader, "tessellationShader"),
+ std::make_pair(features.sampleRateShading, "sampleRateShading"),
+ std::make_pair(features.dualSrcBlend, "dualSrcBlend"),
std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"),
std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"),
std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"),
std::make_pair(features.shaderStorageImageWriteWithoutFormat,
"shaderStorageImageWriteWithoutFormat"),
+ std::make_pair(features.shaderClipDistance, "shaderClipDistance"),
+ std::make_pair(features.shaderCullDistance, "shaderCullDistance"),
+ std::make_pair(demote.shaderDemoteToHelperInvocation, "shaderDemoteToHelperInvocation"),
+ std::make_pair(variable_pointers.variablePointers, "variablePointers"),
+ std::make_pair(variable_pointers.variablePointersStorageBuffer,
+ "variablePointersStorageBuffer"),
std::make_pair(robustness2.robustBufferAccess2, "robustBufferAccess2"),
std::make_pair(robustness2.robustImageAccess2, "robustImageAccess2"),
std::make_pair(robustness2.nullDescriptor, "nullDescriptor"),
@@ -647,14 +823,19 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
}
bool has_khr_shader_float16_int8{};
+ bool has_khr_workgroup_memory_explicit_layout{};
bool has_ext_subgroup_size_control{};
bool has_ext_transform_feedback{};
bool has_ext_custom_border_color{};
bool has_ext_extended_dynamic_state{};
- for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) {
+ bool has_ext_shader_atomic_int64{};
+ bool has_ext_provoking_vertex{};
+ bool has_ext_vertex_input_dynamic_state{};
+ bool has_ext_line_rasterization{};
+ for (const std::string& extension : supported_extensions) {
const auto test = [&](std::optional<std::reference_wrapper<bool>> status, const char* name,
bool push) {
- if (extension.extensionName != std::string_view(name)) {
+ if (extension != name) {
return;
}
if (push) {
@@ -665,8 +846,13 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
}
};
test(nv_viewport_swizzle, VK_NV_VIEWPORT_SWIZZLE_EXTENSION_NAME, true);
+ test(nv_viewport_array2, VK_NV_VIEWPORT_ARRAY2_EXTENSION_NAME, true);
+ test(nv_geometry_shader_passthrough, VK_NV_GEOMETRY_SHADER_PASSTHROUGH_EXTENSION_NAME,
+ true);
test(khr_uniform_buffer_standard_layout,
VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true);
+ test(khr_spirv_1_4, VK_KHR_SPIRV_1_4_EXTENSION_NAME, true);
+ test(khr_push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, true);
test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false);
test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true);
test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true);
@@ -675,16 +861,25 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
true);
test(ext_tooling_info, VK_EXT_TOOLING_INFO_EXTENSION_NAME, true);
test(ext_shader_stencil_export, VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME, true);
+ test(ext_conservative_rasterization, VK_EXT_CONSERVATIVE_RASTERIZATION_EXTENSION_NAME,
+ true);
test(has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, false);
test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false);
test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false);
test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false);
- if (Settings::values.renderer_debug) {
+ test(has_ext_provoking_vertex, VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME, false);
+ test(has_ext_vertex_input_dynamic_state, VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME,
+ false);
+ test(has_ext_shader_atomic_int64, VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME, false);
+ test(has_khr_workgroup_memory_explicit_layout,
+ VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME, false);
+ test(has_ext_line_rasterization, VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME, false);
+ if (Settings::values.enable_nsight_aftermath) {
test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME,
true);
}
}
- VkPhysicalDeviceFeatures2KHR features;
+ VkPhysicalDeviceFeatures2KHR features{};
features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR;
VkPhysicalDeviceProperties2KHR physical_properties;
@@ -722,10 +917,49 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
subgroup_properties.maxSubgroupSize >= GuestWarpSize) {
extensions.push_back(VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME);
guest_warp_stages = subgroup_properties.requiredSubgroupSizeStages;
+ ext_subgroup_size_control = true;
}
} else {
is_warp_potentially_bigger = true;
}
+ if (has_ext_provoking_vertex) {
+ VkPhysicalDeviceProvokingVertexFeaturesEXT provoking_vertex;
+ provoking_vertex.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT;
+ provoking_vertex.pNext = nullptr;
+ features.pNext = &provoking_vertex;
+ physical.GetFeatures2KHR(features);
+
+ if (provoking_vertex.provokingVertexLast &&
+ provoking_vertex.transformFeedbackPreservesProvokingVertex) {
+ extensions.push_back(VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME);
+ ext_provoking_vertex = true;
+ }
+ }
+ if (has_ext_vertex_input_dynamic_state) {
+ VkPhysicalDeviceVertexInputDynamicStateFeaturesEXT vertex_input;
+ vertex_input.sType =
+ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_INPUT_DYNAMIC_STATE_FEATURES_EXT;
+ vertex_input.pNext = nullptr;
+ features.pNext = &vertex_input;
+ physical.GetFeatures2KHR(features);
+
+ if (vertex_input.vertexInputDynamicState) {
+ extensions.push_back(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME);
+ ext_vertex_input_dynamic_state = true;
+ }
+ }
+ if (has_ext_shader_atomic_int64) {
+ VkPhysicalDeviceShaderAtomicInt64Features atomic_int64;
+ atomic_int64.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT;
+ atomic_int64.pNext = nullptr;
+ features.pNext = &atomic_int64;
+ physical.GetFeatures2KHR(features);
+
+ if (atomic_int64.shaderBufferInt64Atomics && atomic_int64.shaderSharedInt64Atomics) {
+ extensions.push_back(VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME);
+ ext_shader_atomic_int64 = true;
+ }
+ }
if (has_ext_transform_feedback) {
VkPhysicalDeviceTransformFeedbackFeaturesEXT tfb_features;
tfb_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT;
@@ -760,17 +994,55 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
}
}
if (has_ext_extended_dynamic_state) {
- VkPhysicalDeviceExtendedDynamicStateFeaturesEXT dynamic_state;
- dynamic_state.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT;
- dynamic_state.pNext = nullptr;
- features.pNext = &dynamic_state;
+ VkPhysicalDeviceExtendedDynamicStateFeaturesEXT extended_dynamic_state;
+ extended_dynamic_state.sType =
+ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT;
+ extended_dynamic_state.pNext = nullptr;
+ features.pNext = &extended_dynamic_state;
physical.GetFeatures2KHR(features);
- if (dynamic_state.extendedDynamicState) {
+ if (extended_dynamic_state.extendedDynamicState) {
extensions.push_back(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME);
ext_extended_dynamic_state = true;
}
}
+ if (has_ext_line_rasterization) {
+ VkPhysicalDeviceLineRasterizationFeaturesEXT line_raster;
+ line_raster.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT;
+ line_raster.pNext = nullptr;
+ features.pNext = &line_raster;
+ physical.GetFeatures2KHR(features);
+ if (line_raster.rectangularLines && line_raster.smoothLines) {
+ extensions.push_back(VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME);
+ ext_line_rasterization = true;
+ }
+ }
+ if (has_khr_workgroup_memory_explicit_layout) {
+ VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR layout;
+ layout.sType =
+ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR;
+ layout.pNext = nullptr;
+ features.pNext = &layout;
+ physical.GetFeatures2KHR(features);
+
+ if (layout.workgroupMemoryExplicitLayout &&
+ layout.workgroupMemoryExplicitLayout8BitAccess &&
+ layout.workgroupMemoryExplicitLayout16BitAccess &&
+ layout.workgroupMemoryExplicitLayoutScalarBlockLayout) {
+ extensions.push_back(VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME);
+ khr_workgroup_memory_explicit_layout = true;
+ }
+ }
+ if (khr_push_descriptor) {
+ VkPhysicalDevicePushDescriptorPropertiesKHR push_descriptor;
+ push_descriptor.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR;
+ push_descriptor.pNext = nullptr;
+
+ physical_properties.pNext = &push_descriptor;
+ physical.GetProperties2KHR(physical_properties);
+
+ max_push_descriptors = push_descriptor.maxPushDescriptors;
+ }
return extensions;
}
@@ -806,11 +1078,25 @@ void Device::SetupFamilies(VkSurfaceKHR surface) {
}
void Device::SetupFeatures() {
- const auto supported_features{physical.GetFeatures()};
- is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat;
- is_shader_storage_image_multisample = supported_features.shaderStorageImageMultisample;
+ const VkPhysicalDeviceFeatures features{physical.GetFeatures()};
+ is_depth_bounds_supported = features.depthBounds;
+ is_formatless_image_load_supported = features.shaderStorageImageReadWithoutFormat;
+ is_shader_float64_supported = features.shaderFloat64;
+ is_shader_int64_supported = features.shaderInt64;
+ is_shader_int16_supported = features.shaderInt16;
+ is_shader_storage_image_multisample = features.shaderStorageImageMultisample;
is_blit_depth_stencil_supported = TestDepthStencilBlits();
- is_optimal_astc_supported = IsOptimalAstcSupported(supported_features);
+ is_optimal_astc_supported = IsOptimalAstcSupported(features);
+}
+
+void Device::SetupProperties() {
+ float_controls.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES_KHR;
+
+ VkPhysicalDeviceProperties2KHR properties2{};
+ properties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR;
+ properties2.pNext = &float_controls;
+
+ physical.GetProperties2KHR(properties2);
}
void Device::CollectTelemetryParameters() {
@@ -832,12 +1118,6 @@ void Device::CollectTelemetryParameters() {
driver_id = driver.driverID;
vendor_name = driver.driverName;
-
- const std::vector extensions = physical.EnumerateDeviceExtensionProperties();
- reported_extensions.reserve(std::size(extensions));
- for (const auto& extension : extensions) {
- reported_extensions.emplace_back(extension.extensionName);
- }
}
void Device::CollectPhysicalMemoryInfo() {
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h
index 96c0f8c60..df394e384 100644
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -4,6 +4,7 @@
#pragma once
+#include <span>
#include <string>
#include <string_view>
#include <unordered_map>
@@ -43,7 +44,7 @@ public:
void ReportLoss() const;
/// Reports a shader to Nsight Aftermath.
- void SaveShader(const std::vector<u32>& spirv) const;
+ void SaveShader(std::span<const u32> spirv) const;
/// Returns the name of the VkDriverId reported from Vulkan.
std::string GetDriverName() const;
@@ -128,6 +129,11 @@ public:
return properties.limits.maxComputeSharedMemorySize;
}
+ /// Returns float control properties of the device.
+ const VkPhysicalDeviceFloatControlsPropertiesKHR& FloatControlProperties() const {
+ return float_controls;
+ }
+
/// Returns true if ASTC is natively supported.
bool IsOptimalAstcSupported() const {
return is_optimal_astc_supported;
@@ -148,11 +154,31 @@ public:
return guest_warp_stages & stage;
}
+ /// Returns the maximum number of push descriptors.
+ u32 MaxPushDescriptors() const {
+ return max_push_descriptors;
+ }
+
/// Returns true if formatless image load is supported.
bool IsFormatlessImageLoadSupported() const {
return is_formatless_image_load_supported;
}
+ /// Returns true if shader int64 is supported.
+ bool IsShaderInt64Supported() const {
+ return is_shader_int64_supported;
+ }
+
+ /// Returns true if shader int16 is supported.
+ bool IsShaderInt16Supported() const {
+ return is_shader_int16_supported;
+ }
+
+ // Returns true if depth bounds is supported.
+ bool IsDepthBoundsSupported() const {
+ return is_depth_bounds_supported;
+ }
+
/// Returns true when blitting from and to depth stencil images is supported.
bool IsBlitDepthStencilSupported() const {
return is_blit_depth_stencil_supported;
@@ -163,11 +189,36 @@ public:
return nv_viewport_swizzle;
}
- /// Returns true if the device supports VK_EXT_scalar_block_layout.
+ /// Returns true if the device supports VK_NV_viewport_array2.
+ bool IsNvViewportArray2Supported() const {
+ return nv_viewport_array2;
+ }
+
+ /// Returns true if the device supports VK_NV_geometry_shader_passthrough.
+ bool IsNvGeometryShaderPassthroughSupported() const {
+ return nv_geometry_shader_passthrough;
+ }
+
+ /// Returns true if the device supports VK_KHR_uniform_buffer_standard_layout.
bool IsKhrUniformBufferStandardLayoutSupported() const {
return khr_uniform_buffer_standard_layout;
}
+ /// Returns true if the device supports VK_KHR_spirv_1_4.
+ bool IsKhrSpirv1_4Supported() const {
+ return khr_spirv_1_4;
+ }
+
+ /// Returns true if the device supports VK_KHR_push_descriptor.
+ bool IsKhrPushDescriptorSupported() const {
+ return khr_push_descriptor;
+ }
+
+ /// Returns true if the device supports VK_KHR_workgroup_memory_explicit_layout.
+ bool IsKhrWorkgroupMemoryExplicitLayoutSupported() const {
+ return khr_workgroup_memory_explicit_layout;
+ }
+
/// Returns true if the device supports VK_EXT_index_type_uint8.
bool IsExtIndexTypeUint8Supported() const {
return ext_index_type_uint8;
@@ -188,6 +239,11 @@ public:
return ext_shader_viewport_index_layer;
}
+ /// Returns true if the device supports VK_EXT_subgroup_size_control.
+ bool IsExtSubgroupSizeControlSupported() const {
+ return ext_subgroup_size_control;
+ }
+
/// Returns true if the device supports VK_EXT_transform_feedback.
bool IsExtTransformFeedbackSupported() const {
return ext_transform_feedback;
@@ -203,11 +259,36 @@ public:
return ext_extended_dynamic_state;
}
+ /// Returns true if the device supports VK_EXT_line_rasterization.
+ bool IsExtLineRasterizationSupported() const {
+ return ext_line_rasterization;
+ }
+
+ /// Returns true if the device supports VK_EXT_vertex_input_dynamic_state.
+ bool IsExtVertexInputDynamicStateSupported() const {
+ return ext_vertex_input_dynamic_state;
+ }
+
/// Returns true if the device supports VK_EXT_shader_stencil_export.
bool IsExtShaderStencilExportSupported() const {
return ext_shader_stencil_export;
}
+ /// Returns true if the device supports VK_EXT_conservative_rasterization.
+ bool IsExtConservativeRasterizationSupported() const {
+ return ext_conservative_rasterization;
+ }
+
+ /// Returns true if the device supports VK_EXT_provoking_vertex.
+ bool IsExtProvokingVertexSupported() const {
+ return ext_provoking_vertex;
+ }
+
+ /// Returns true if the device supports VK_KHR_shader_atomic_int64.
+ bool IsExtShaderAtomicInt64Supported() const {
+ return ext_shader_atomic_int64;
+ }
+
/// Returns true when a known debugging tool is attached.
bool HasDebuggingToolAttached() const {
return has_renderdoc || has_nsight_graphics;
@@ -220,12 +301,7 @@ public:
/// Returns the list of available extensions.
const std::vector<std::string>& GetAvailableExtensions() const {
- return reported_extensions;
- }
-
- /// Returns true if the setting for async shader compilation is enabled.
- bool UseAsynchronousShaders() const {
- return use_asynchronous_shaders;
+ return supported_extensions;
}
u64 GetDeviceLocalMemory() const {
@@ -245,6 +321,9 @@ private:
/// Sets up device features.
void SetupFeatures();
+ /// Sets up device properties.
+ void SetupProperties();
+
/// Collects telemetry information from the device.
void CollectTelemetryParameters();
@@ -267,46 +346,60 @@ private:
bool IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage,
FormatType format_type) const;
- VkInstance instance; ///< Vulkan instance.
- vk::DeviceDispatch dld; ///< Device function pointers.
- vk::PhysicalDevice physical; ///< Physical device.
- VkPhysicalDeviceProperties properties; ///< Device properties.
- vk::Device logical; ///< Logical device.
- vk::Queue graphics_queue; ///< Main graphics queue.
- vk::Queue present_queue; ///< Main present queue.
- u32 instance_version{}; ///< Vulkan onstance version.
+ VkInstance instance; ///< Vulkan instance.
+ vk::DeviceDispatch dld; ///< Device function pointers.
+ vk::PhysicalDevice physical; ///< Physical device.
+ VkPhysicalDeviceProperties properties; ///< Device properties.
+ VkPhysicalDeviceFloatControlsPropertiesKHR float_controls{}; ///< Float control properties.
+ vk::Device logical; ///< Logical device.
+ vk::Queue graphics_queue; ///< Main graphics queue.
+ vk::Queue present_queue; ///< Main present queue.
+ u32 instance_version{}; ///< Vulkan onstance version.
u32 graphics_family{}; ///< Main graphics queue family index.
u32 present_family{}; ///< Main present queue family index.
VkDriverIdKHR driver_id{}; ///< Driver ID.
VkShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced.
u64 device_access_memory{}; ///< Total size of device local memory in bytes.
+ u32 max_push_descriptors{}; ///< Maximum number of push descriptors
bool is_optimal_astc_supported{}; ///< Support for native ASTC.
bool is_float16_supported{}; ///< Support for float16 arithmetics.
bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest.
bool is_formatless_image_load_supported{}; ///< Support for shader image read without format.
+ bool is_depth_bounds_supported{}; ///< Support for depth bounds.
+ bool is_shader_float64_supported{}; ///< Support for float64.
+ bool is_shader_int64_supported{}; ///< Support for int64.
+ bool is_shader_int16_supported{}; ///< Support for int16.
bool is_shader_storage_image_multisample{}; ///< Support for image operations on MSAA images.
bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil.
bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle.
- bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs.
- bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8.
- bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax.
- bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted.
- bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer.
- bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info.
- bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback.
- bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color.
- bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state.
- bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export.
- bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config.
- bool has_renderdoc{}; ///< Has RenderDoc attached
- bool has_nsight_graphics{}; ///< Has Nsight Graphics attached
-
- // Asynchronous Graphics Pipeline setting
- bool use_asynchronous_shaders{}; ///< Setting to use asynchronous shaders/graphics pipeline
+ bool nv_viewport_array2{}; ///< Support for VK_NV_viewport_array2.
+ bool nv_geometry_shader_passthrough{}; ///< Support for VK_NV_geometry_shader_passthrough.
+ bool khr_uniform_buffer_standard_layout{}; ///< Support for scalar uniform buffer layouts.
+ bool khr_spirv_1_4{}; ///< Support for VK_KHR_spirv_1_4.
+ bool khr_workgroup_memory_explicit_layout{}; ///< Support for explicit workgroup layouts.
+ bool khr_push_descriptor{}; ///< Support for VK_KHR_push_descritor.
+ bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8.
+ bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax.
+ bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted.
+ bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer.
+ bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info.
+ bool ext_subgroup_size_control{}; ///< Support for VK_EXT_subgroup_size_control.
+ bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback.
+ bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color.
+ bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state.
+ bool ext_line_rasterization{}; ///< Support for VK_EXT_line_rasterization.
+ bool ext_vertex_input_dynamic_state{}; ///< Support for VK_EXT_vertex_input_dynamic_state.
+ bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export.
+ bool ext_shader_atomic_int64{}; ///< Support for VK_KHR_shader_atomic_int64.
+ bool ext_conservative_rasterization{}; ///< Support for VK_EXT_conservative_rasterization.
+ bool ext_provoking_vertex{}; ///< Support for VK_EXT_provoking_vertex.
+ bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config.
+ bool has_renderdoc{}; ///< Has RenderDoc attached
+ bool has_nsight_graphics{}; ///< Has Nsight Graphics attached
// Telemetry parameters
- std::string vendor_name; ///< Device's driver name.
- std::vector<std::string> reported_extensions; ///< Reported Vulkan extensions.
+ std::string vendor_name; ///< Device's driver name.
+ std::vector<std::string> supported_extensions; ///< Reported Vulkan extensions.
/// Format properties dictionary.
std::unordered_map<VkFormat, VkFormatProperties> format_properties;
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp
index 2aa0ffbe6..bbf0fccae 100644
--- a/src/video_core/vulkan_common/vulkan_wrapper.cpp
+++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp
@@ -103,6 +103,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
X(vkCmdFillBuffer);
X(vkCmdPipelineBarrier);
X(vkCmdPushConstants);
+ X(vkCmdPushDescriptorSetWithTemplateKHR);
X(vkCmdSetBlendConstants);
X(vkCmdSetDepthBias);
X(vkCmdSetDepthBounds);
@@ -120,9 +121,11 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
X(vkCmdSetDepthTestEnableEXT);
X(vkCmdSetDepthWriteEnableEXT);
X(vkCmdSetFrontFaceEXT);
+ X(vkCmdSetLineWidth);
X(vkCmdSetPrimitiveTopologyEXT);
X(vkCmdSetStencilOpEXT);
X(vkCmdSetStencilTestEnableEXT);
+ X(vkCmdSetVertexInputEXT);
X(vkCmdResolveImage);
X(vkCreateBuffer);
X(vkCreateBufferView);
@@ -311,8 +314,6 @@ const char* ToString(VkResult result) noexcept {
return "VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT";
case VkResult::VK_ERROR_UNKNOWN:
return "VK_ERROR_UNKNOWN";
- case VkResult::VK_ERROR_INCOMPATIBLE_VERSION_KHR:
- return "VK_ERROR_INCOMPATIBLE_VERSION_KHR";
case VkResult::VK_THREAD_IDLE_KHR:
return "VK_THREAD_IDLE_KHR";
case VkResult::VK_THREAD_DONE_KHR:
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h
index 3e36d356a..d76bb4324 100644
--- a/src/video_core/vulkan_common/vulkan_wrapper.h
+++ b/src/video_core/vulkan_common/vulkan_wrapper.h
@@ -193,15 +193,16 @@ struct DeviceDispatch : InstanceDispatch {
PFN_vkBeginCommandBuffer vkBeginCommandBuffer{};
PFN_vkBindBufferMemory vkBindBufferMemory{};
PFN_vkBindImageMemory vkBindImageMemory{};
+ PFN_vkCmdBeginDebugUtilsLabelEXT vkCmdBeginDebugUtilsLabelEXT{};
PFN_vkCmdBeginQuery vkCmdBeginQuery{};
PFN_vkCmdBeginRenderPass vkCmdBeginRenderPass{};
PFN_vkCmdBeginTransformFeedbackEXT vkCmdBeginTransformFeedbackEXT{};
- PFN_vkCmdBeginDebugUtilsLabelEXT vkCmdBeginDebugUtilsLabelEXT{};
PFN_vkCmdBindDescriptorSets vkCmdBindDescriptorSets{};
PFN_vkCmdBindIndexBuffer vkCmdBindIndexBuffer{};
PFN_vkCmdBindPipeline vkCmdBindPipeline{};
PFN_vkCmdBindTransformFeedbackBuffersEXT vkCmdBindTransformFeedbackBuffersEXT{};
PFN_vkCmdBindVertexBuffers vkCmdBindVertexBuffers{};
+ PFN_vkCmdBindVertexBuffers2EXT vkCmdBindVertexBuffers2EXT{};
PFN_vkCmdBlitImage vkCmdBlitImage{};
PFN_vkCmdClearAttachments vkCmdClearAttachments{};
PFN_vkCmdCopyBuffer vkCmdCopyBuffer{};
@@ -211,34 +212,36 @@ struct DeviceDispatch : InstanceDispatch {
PFN_vkCmdDispatch vkCmdDispatch{};
PFN_vkCmdDraw vkCmdDraw{};
PFN_vkCmdDrawIndexed vkCmdDrawIndexed{};
+ PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT{};
PFN_vkCmdEndQuery vkCmdEndQuery{};
PFN_vkCmdEndRenderPass vkCmdEndRenderPass{};
PFN_vkCmdEndTransformFeedbackEXT vkCmdEndTransformFeedbackEXT{};
- PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT{};
PFN_vkCmdFillBuffer vkCmdFillBuffer{};
PFN_vkCmdPipelineBarrier vkCmdPipelineBarrier{};
PFN_vkCmdPushConstants vkCmdPushConstants{};
+ PFN_vkCmdPushDescriptorSetWithTemplateKHR vkCmdPushDescriptorSetWithTemplateKHR{};
+ PFN_vkCmdResolveImage vkCmdResolveImage{};
PFN_vkCmdSetBlendConstants vkCmdSetBlendConstants{};
+ PFN_vkCmdSetCullModeEXT vkCmdSetCullModeEXT{};
PFN_vkCmdSetDepthBias vkCmdSetDepthBias{};
PFN_vkCmdSetDepthBounds vkCmdSetDepthBounds{};
- PFN_vkCmdSetEvent vkCmdSetEvent{};
- PFN_vkCmdSetScissor vkCmdSetScissor{};
- PFN_vkCmdSetStencilCompareMask vkCmdSetStencilCompareMask{};
- PFN_vkCmdSetStencilReference vkCmdSetStencilReference{};
- PFN_vkCmdSetStencilWriteMask vkCmdSetStencilWriteMask{};
- PFN_vkCmdSetViewport vkCmdSetViewport{};
- PFN_vkCmdWaitEvents vkCmdWaitEvents{};
- PFN_vkCmdBindVertexBuffers2EXT vkCmdBindVertexBuffers2EXT{};
- PFN_vkCmdSetCullModeEXT vkCmdSetCullModeEXT{};
PFN_vkCmdSetDepthBoundsTestEnableEXT vkCmdSetDepthBoundsTestEnableEXT{};
PFN_vkCmdSetDepthCompareOpEXT vkCmdSetDepthCompareOpEXT{};
PFN_vkCmdSetDepthTestEnableEXT vkCmdSetDepthTestEnableEXT{};
PFN_vkCmdSetDepthWriteEnableEXT vkCmdSetDepthWriteEnableEXT{};
+ PFN_vkCmdSetEvent vkCmdSetEvent{};
PFN_vkCmdSetFrontFaceEXT vkCmdSetFrontFaceEXT{};
+ PFN_vkCmdSetLineWidth vkCmdSetLineWidth{};
PFN_vkCmdSetPrimitiveTopologyEXT vkCmdSetPrimitiveTopologyEXT{};
+ PFN_vkCmdSetScissor vkCmdSetScissor{};
+ PFN_vkCmdSetStencilCompareMask vkCmdSetStencilCompareMask{};
PFN_vkCmdSetStencilOpEXT vkCmdSetStencilOpEXT{};
+ PFN_vkCmdSetStencilReference vkCmdSetStencilReference{};
PFN_vkCmdSetStencilTestEnableEXT vkCmdSetStencilTestEnableEXT{};
- PFN_vkCmdResolveImage vkCmdResolveImage{};
+ PFN_vkCmdSetStencilWriteMask vkCmdSetStencilWriteMask{};
+ PFN_vkCmdSetVertexInputEXT vkCmdSetVertexInputEXT{};
+ PFN_vkCmdSetViewport vkCmdSetViewport{};
+ PFN_vkCmdWaitEvents vkCmdWaitEvents{};
PFN_vkCreateBuffer vkCreateBuffer{};
PFN_vkCreateBufferView vkCreateBufferView{};
PFN_vkCreateCommandPool vkCreateCommandPool{};
@@ -989,6 +992,12 @@ public:
dynamic_offsets.size(), dynamic_offsets.data());
}
+ void PushDescriptorSetWithTemplateKHR(VkDescriptorUpdateTemplateKHR update_template,
+ VkPipelineLayout layout, u32 set,
+ const void* data) const noexcept {
+ dld->vkCmdPushDescriptorSetWithTemplateKHR(handle, update_template, layout, set, data);
+ }
+
void BindPipeline(VkPipelineBindPoint bind_point, VkPipeline pipeline) const noexcept {
dld->vkCmdBindPipeline(handle, bind_point, pipeline);
}
@@ -1190,6 +1199,10 @@ public:
dld->vkCmdSetFrontFaceEXT(handle, front_face);
}
+ void SetLineWidth(float line_width) const noexcept {
+ dld->vkCmdSetLineWidth(handle, line_width);
+ }
+
void SetPrimitiveTopologyEXT(VkPrimitiveTopology primitive_topology) const noexcept {
dld->vkCmdSetPrimitiveTopologyEXT(handle, primitive_topology);
}
@@ -1203,6 +1216,13 @@ public:
dld->vkCmdSetStencilTestEnableEXT(handle, enable ? VK_TRUE : VK_FALSE);
}
+ void SetVertexInputEXT(
+ vk::Span<VkVertexInputBindingDescription2EXT> bindings,
+ vk::Span<VkVertexInputAttributeDescription2EXT> attributes) const noexcept {
+ dld->vkCmdSetVertexInputEXT(handle, bindings.size(), bindings.data(), attributes.size(),
+ attributes.data());
+ }
+
void BindTransformFeedbackBuffersEXT(u32 first, u32 count, const VkBuffer* buffers,
const VkDeviceSize* offsets,
const VkDeviceSize* sizes) const noexcept {