diff options
Diffstat (limited to 'src/video_core/engines')
-rw-r--r-- | src/video_core/engines/const_buffer_engine_interface.h | 119 | ||||
-rw-r--r-- | src/video_core/engines/fermi_2d.h | 12 | ||||
-rw-r--r-- | src/video_core/engines/kepler_compute.cpp | 30 | ||||
-rw-r--r-- | src/video_core/engines/kepler_compute.h | 39 | ||||
-rw-r--r-- | src/video_core/engines/kepler_memory.h | 4 | ||||
-rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 128 | ||||
-rw-r--r-- | src/video_core/engines/maxwell_3d.h | 140 | ||||
-rw-r--r-- | src/video_core/engines/maxwell_dma.h | 10 | ||||
-rw-r--r-- | src/video_core/engines/shader_bytecode.h | 56 | ||||
-rw-r--r-- | src/video_core/engines/shader_header.h | 50 |
10 files changed, 368 insertions, 220 deletions
diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h new file mode 100644 index 000000000..ac27b6cbe --- /dev/null +++ b/src/video_core/engines/const_buffer_engine_interface.h @@ -0,0 +1,119 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <type_traits> +#include "common/bit_field.h" +#include "common/common_types.h" +#include "video_core/engines/shader_bytecode.h" +#include "video_core/textures/texture.h" + +namespace Tegra::Engines { + +enum class ShaderType : u32 { + Vertex = 0, + TesselationControl = 1, + TesselationEval = 2, + Geometry = 3, + Fragment = 4, + Compute = 5, +}; + +struct SamplerDescriptor { + union { + BitField<0, 20, Tegra::Shader::TextureType> texture_type; + BitField<20, 1, u32> is_array; + BitField<21, 1, u32> is_buffer; + BitField<22, 1, u32> is_shadow; + u32 raw{}; + }; + + bool operator==(const SamplerDescriptor& rhs) const noexcept { + return raw == rhs.raw; + } + + bool operator!=(const SamplerDescriptor& rhs) const noexcept { + return !operator==(rhs); + } + + static SamplerDescriptor FromTicTexture(Tegra::Texture::TextureType tic_texture_type) { + SamplerDescriptor result; + switch (tic_texture_type) { + case Tegra::Texture::TextureType::Texture1D: + result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D); + result.is_array.Assign(0); + result.is_buffer.Assign(0); + result.is_shadow.Assign(0); + return result; + case Tegra::Texture::TextureType::Texture2D: + result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); + result.is_array.Assign(0); + result.is_buffer.Assign(0); + result.is_shadow.Assign(0); + return result; + case Tegra::Texture::TextureType::Texture3D: + result.texture_type.Assign(Tegra::Shader::TextureType::Texture3D); + result.is_array.Assign(0); + result.is_buffer.Assign(0); + result.is_shadow.Assign(0); + return result; + case Tegra::Texture::TextureType::TextureCubemap: + result.texture_type.Assign(Tegra::Shader::TextureType::TextureCube); + result.is_array.Assign(0); + result.is_buffer.Assign(0); + result.is_shadow.Assign(0); + return result; + case Tegra::Texture::TextureType::Texture1DArray: + result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D); + result.is_array.Assign(1); + result.is_buffer.Assign(0); + result.is_shadow.Assign(0); + return result; + case Tegra::Texture::TextureType::Texture2DArray: + result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); + result.is_array.Assign(1); + result.is_buffer.Assign(0); + result.is_shadow.Assign(0); + return result; + case Tegra::Texture::TextureType::Texture1DBuffer: + result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D); + result.is_array.Assign(0); + result.is_buffer.Assign(1); + result.is_shadow.Assign(0); + return result; + case Tegra::Texture::TextureType::Texture2DNoMipmap: + result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); + result.is_array.Assign(0); + result.is_buffer.Assign(0); + result.is_shadow.Assign(0); + return result; + case Tegra::Texture::TextureType::TextureCubeArray: + result.texture_type.Assign(Tegra::Shader::TextureType::TextureCube); + result.is_array.Assign(1); + result.is_buffer.Assign(0); + result.is_shadow.Assign(0); + return result; + default: + result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); + result.is_array.Assign(0); + result.is_buffer.Assign(0); + result.is_shadow.Assign(0); + return result; + } + } +}; +static_assert(std::is_trivially_copyable_v<SamplerDescriptor>); + +class ConstBufferEngineInterface { +public: + virtual ~ConstBufferEngineInterface() = default; + virtual u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const = 0; + virtual SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const = 0; + virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, + u64 offset) const = 0; + virtual u32 GetBoundBuffer() const = 0; +}; + +} // namespace Tegra::Engines diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index 0901cf2fa..dba342c70 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h @@ -99,19 +99,19 @@ public: union { struct { - INSERT_PADDING_WORDS(0x80); + INSERT_UNION_PADDING_WORDS(0x80); Surface dst; - INSERT_PADDING_WORDS(2); + INSERT_UNION_PADDING_WORDS(2); Surface src; - INSERT_PADDING_WORDS(0x15); + INSERT_UNION_PADDING_WORDS(0x15); Operation operation; - INSERT_PADDING_WORDS(0x177); + INSERT_UNION_PADDING_WORDS(0x177); union { u32 raw; @@ -119,7 +119,7 @@ public: BitField<4, 1, Filter> filter; } blit_control; - INSERT_PADDING_WORDS(0x8); + INSERT_UNION_PADDING_WORDS(0x8); u32 blit_dst_x; u32 blit_dst_y; @@ -130,7 +130,7 @@ public: u64 blit_src_x; u64 blit_src_y; - INSERT_PADDING_WORDS(0x21); + INSERT_UNION_PADDING_WORDS(0x21); }; std::array<u32, NUM_REGS> reg_array; }; diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index 63d449135..3a39aeabe 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -50,7 +50,7 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) { } } -Tegra::Texture::FullTextureInfo KeplerCompute::GetTexture(std::size_t offset) const { +Texture::FullTextureInfo KeplerCompute::GetTexture(std::size_t offset) const { const std::bitset<8> cbuf_mask = launch_description.const_buffer_enable_mask.Value(); ASSERT(cbuf_mask[regs.tex_cb_index]); @@ -61,22 +61,38 @@ Tegra::Texture::FullTextureInfo KeplerCompute::GetTexture(std::size_t offset) co ASSERT(address < texinfo.Address() + texinfo.size); const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(address)}; - return GetTextureInfo(tex_handle, offset); + return GetTextureInfo(tex_handle); } -Texture::FullTextureInfo KeplerCompute::GetTextureInfo(const Texture::TextureHandle tex_handle, - std::size_t offset) const { - return Texture::FullTextureInfo{static_cast<u32>(offset), GetTICEntry(tex_handle.tic_id), - GetTSCEntry(tex_handle.tsc_id)}; +Texture::FullTextureInfo KeplerCompute::GetTextureInfo(Texture::TextureHandle tex_handle) const { + return Texture::FullTextureInfo{GetTICEntry(tex_handle.tic_id), GetTSCEntry(tex_handle.tsc_id)}; } -u32 KeplerCompute::AccessConstBuffer32(u64 const_buffer, u64 offset) const { +u32 KeplerCompute::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const { + ASSERT(stage == ShaderType::Compute); const auto& buffer = launch_description.const_buffer_config[const_buffer]; u32 result; std::memcpy(&result, memory_manager.GetPointer(buffer.Address() + offset), sizeof(u32)); return result; } +SamplerDescriptor KeplerCompute::AccessBoundSampler(ShaderType stage, u64 offset) const { + return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle)); +} + +SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 const_buffer, + u64 offset) const { + ASSERT(stage == ShaderType::Compute); + const auto& tex_info_buffer = launch_description.const_buffer_config[const_buffer]; + const GPUVAddr tex_info_address = tex_info_buffer.Address() + offset; + + const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; + const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); + SamplerDescriptor result = SamplerDescriptor::FromTicTexture(tex_info.tic.texture_type.Value()); + result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); + return result; +} + void KeplerCompute::ProcessLaunch() { const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index 90cf650d2..5259d92bd 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -10,6 +10,7 @@ #include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" +#include "video_core/engines/const_buffer_engine_interface.h" #include "video_core/engines/engine_upload.h" #include "video_core/gpu.h" #include "video_core/textures/texture.h" @@ -37,7 +38,7 @@ namespace Tegra::Engines { #define KEPLER_COMPUTE_REG_INDEX(field_name) \ (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32)) -class KeplerCompute final { +class KeplerCompute final : public ConstBufferEngineInterface { public: explicit KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager); @@ -50,7 +51,7 @@ public: union { struct { - INSERT_PADDING_WORDS(0x60); + INSERT_UNION_PADDING_WORDS(0x60); Upload::Registers upload; @@ -62,7 +63,7 @@ public: u32 data_upload; - INSERT_PADDING_WORDS(0x3F); + INSERT_UNION_PADDING_WORDS(0x3F); struct { u32 address; @@ -71,11 +72,11 @@ public: } } launch_desc_loc; - INSERT_PADDING_WORDS(0x1); + INSERT_UNION_PADDING_WORDS(0x1); u32 launch; - INSERT_PADDING_WORDS(0x4A7); + INSERT_UNION_PADDING_WORDS(0x4A7); struct { u32 address_high; @@ -87,7 +88,7 @@ public: } } tsc; - INSERT_PADDING_WORDS(0x3); + INSERT_UNION_PADDING_WORDS(0x3); struct { u32 address_high; @@ -99,7 +100,7 @@ public: } } tic; - INSERT_PADDING_WORDS(0x22); + INSERT_UNION_PADDING_WORDS(0x22); struct { u32 address_high; @@ -110,11 +111,11 @@ public: } } code_loc; - INSERT_PADDING_WORDS(0x3FE); + INSERT_UNION_PADDING_WORDS(0x3FE); u32 tex_cb_index; - INSERT_PADDING_WORDS(0x374); + INSERT_UNION_PADDING_WORDS(0x374); }; std::array<u32, NUM_REGS> reg_array; }; @@ -178,7 +179,7 @@ public: }; INSERT_PADDING_WORDS(0x11); - } launch_description; + } launch_description{}; struct { u32 write_offset = 0; @@ -195,13 +196,21 @@ public: /// Write the value to the register identified by method. void CallMethod(const GPU::MethodCall& method_call); - Tegra::Texture::FullTextureInfo GetTexture(std::size_t offset) const; + Texture::FullTextureInfo GetTexture(std::size_t offset) const; - /// Given a Texture Handle, returns the TSC and TIC entries. - Texture::FullTextureInfo GetTextureInfo(const Texture::TextureHandle tex_handle, - std::size_t offset) const; + /// Given a texture handle, returns the TSC and TIC entries. + Texture::FullTextureInfo GetTextureInfo(Texture::TextureHandle tex_handle) const; - u32 AccessConstBuffer32(u64 const_buffer, u64 offset) const; + u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; + + SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override; + + SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, + u64 offset) const override; + + u32 GetBoundBuffer() const override { + return regs.tex_cb_index; + } private: Core::System& system; diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h index e0e25c321..396fb6e86 100644 --- a/src/video_core/engines/kepler_memory.h +++ b/src/video_core/engines/kepler_memory.h @@ -45,7 +45,7 @@ public: union { struct { - INSERT_PADDING_WORDS(0x60); + INSERT_UNION_PADDING_WORDS(0x60); Upload::Registers upload; @@ -57,7 +57,7 @@ public: u32 data; - INSERT_PADDING_WORDS(0x11); + INSERT_UNION_PADDING_WORDS(0x11); }; std::array<u32, NUM_REGS> reg_array; }; diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 7802fd808..2bed6cb38 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -98,10 +98,10 @@ void Maxwell3D::InitializeRegisterDefaults() { mme_inline[MAXWELL3D_REG_INDEX(index_array.count)] = true; } -#define DIRTY_REGS_POS(field_name) (offsetof(Maxwell3D::DirtyRegs, field_name)) +#define DIRTY_REGS_POS(field_name) static_cast<u8>(offsetof(Maxwell3D::DirtyRegs, field_name)) void Maxwell3D::InitDirtySettings() { - const auto set_block = [this](const u32 start, const u32 range, const u8 position) { + const auto set_block = [this](std::size_t start, std::size_t range, u8 position) { const auto start_itr = dirty_pointers.begin() + start; const auto end_itr = start_itr + range; std::fill(start_itr, end_itr, position); @@ -112,10 +112,10 @@ void Maxwell3D::InitDirtySettings() { constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32); constexpr u32 rt_start_reg = MAXWELL3D_REG_INDEX(rt); constexpr u32 rt_end_reg = rt_start_reg + registers_per_rt * 8; - u32 rt_dirty_reg = DIRTY_REGS_POS(render_target); + u8 rt_dirty_reg = DIRTY_REGS_POS(render_target); for (u32 rt_reg = rt_start_reg; rt_reg < rt_end_reg; rt_reg += registers_per_rt) { set_block(rt_reg, registers_per_rt, rt_dirty_reg); - rt_dirty_reg++; + ++rt_dirty_reg; } constexpr u32 depth_buffer_flag = DIRTY_REGS_POS(depth_buffer); dirty_pointers[MAXWELL3D_REG_INDEX(zeta_enable)] = depth_buffer_flag; @@ -129,35 +129,35 @@ void Maxwell3D::InitDirtySettings() { constexpr u32 vertex_array_start = MAXWELL3D_REG_INDEX(vertex_array); constexpr u32 vertex_array_size = sizeof(regs.vertex_array[0]) / sizeof(u32); constexpr u32 vertex_array_end = vertex_array_start + vertex_array_size * Regs::NumVertexArrays; - u32 va_reg = DIRTY_REGS_POS(vertex_array); - u32 vi_reg = DIRTY_REGS_POS(vertex_instance); + u8 va_dirty_reg = DIRTY_REGS_POS(vertex_array); + u8 vi_dirty_reg = DIRTY_REGS_POS(vertex_instance); for (u32 vertex_reg = vertex_array_start; vertex_reg < vertex_array_end; vertex_reg += vertex_array_size) { - set_block(vertex_reg, 3, va_reg); + set_block(vertex_reg, 3, va_dirty_reg); // The divisor concerns vertex array instances - dirty_pointers[vertex_reg + 3] = vi_reg; - va_reg++; - vi_reg++; + dirty_pointers[static_cast<std::size_t>(vertex_reg) + 3] = vi_dirty_reg; + ++va_dirty_reg; + ++vi_dirty_reg; } constexpr u32 vertex_limit_start = MAXWELL3D_REG_INDEX(vertex_array_limit); constexpr u32 vertex_limit_size = sizeof(regs.vertex_array_limit[0]) / sizeof(u32); constexpr u32 vertex_limit_end = vertex_limit_start + vertex_limit_size * Regs::NumVertexArrays; - va_reg = DIRTY_REGS_POS(vertex_array); + va_dirty_reg = DIRTY_REGS_POS(vertex_array); for (u32 vertex_reg = vertex_limit_start; vertex_reg < vertex_limit_end; vertex_reg += vertex_limit_size) { - set_block(vertex_reg, vertex_limit_size, va_reg); - va_reg++; + set_block(vertex_reg, vertex_limit_size, va_dirty_reg); + va_dirty_reg++; } constexpr u32 vertex_instance_start = MAXWELL3D_REG_INDEX(instanced_arrays); constexpr u32 vertex_instance_size = sizeof(regs.instanced_arrays.is_instanced[0]) / sizeof(u32); constexpr u32 vertex_instance_end = vertex_instance_start + vertex_instance_size * Regs::NumVertexArrays; - vi_reg = DIRTY_REGS_POS(vertex_instance); + vi_dirty_reg = DIRTY_REGS_POS(vertex_instance); for (u32 vertex_reg = vertex_instance_start; vertex_reg < vertex_instance_end; vertex_reg += vertex_instance_size) { - set_block(vertex_reg, vertex_instance_size, vi_reg); - vi_reg++; + set_block(vertex_reg, vertex_instance_size, vi_dirty_reg); + vi_dirty_reg++; } set_block(MAXWELL3D_REG_INDEX(vertex_attrib_format), regs.vertex_attrib_format.size(), DIRTY_REGS_POS(vertex_attrib_format)); @@ -171,7 +171,7 @@ void Maxwell3D::InitDirtySettings() { // State // Viewport - constexpr u32 viewport_dirty_reg = DIRTY_REGS_POS(viewport); + constexpr u8 viewport_dirty_reg = DIRTY_REGS_POS(viewport); constexpr u32 viewport_start = MAXWELL3D_REG_INDEX(viewports); constexpr u32 viewport_size = sizeof(regs.viewports) / sizeof(u32); set_block(viewport_start, viewport_size, viewport_dirty_reg); @@ -198,7 +198,7 @@ void Maxwell3D::InitDirtySettings() { set_block(primitive_restart_start, primitive_restart_size, DIRTY_REGS_POS(primitive_restart)); // Depth Test - constexpr u32 depth_test_dirty_reg = DIRTY_REGS_POS(depth_test); + constexpr u8 depth_test_dirty_reg = DIRTY_REGS_POS(depth_test); dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_enable)] = depth_test_dirty_reg; dirty_pointers[MAXWELL3D_REG_INDEX(depth_write_enabled)] = depth_test_dirty_reg; dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_func)] = depth_test_dirty_reg; @@ -223,12 +223,12 @@ void Maxwell3D::InitDirtySettings() { dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_mask)] = stencil_test_dirty_reg; // Color Mask - constexpr u32 color_mask_dirty_reg = DIRTY_REGS_POS(color_mask); + constexpr u8 color_mask_dirty_reg = DIRTY_REGS_POS(color_mask); dirty_pointers[MAXWELL3D_REG_INDEX(color_mask_common)] = color_mask_dirty_reg; set_block(MAXWELL3D_REG_INDEX(color_mask), sizeof(regs.color_mask) / sizeof(u32), color_mask_dirty_reg); // Blend State - constexpr u32 blend_state_dirty_reg = DIRTY_REGS_POS(blend_state); + constexpr u8 blend_state_dirty_reg = DIRTY_REGS_POS(blend_state); set_block(MAXWELL3D_REG_INDEX(blend_color), sizeof(regs.blend_color) / sizeof(u32), blend_state_dirty_reg); dirty_pointers[MAXWELL3D_REG_INDEX(independent_blend_enable)] = blend_state_dirty_reg; @@ -237,12 +237,12 @@ void Maxwell3D::InitDirtySettings() { blend_state_dirty_reg); // Scissor State - constexpr u32 scissor_test_dirty_reg = DIRTY_REGS_POS(scissor_test); + constexpr u8 scissor_test_dirty_reg = DIRTY_REGS_POS(scissor_test); set_block(MAXWELL3D_REG_INDEX(scissor_test), sizeof(regs.scissor_test) / sizeof(u32), scissor_test_dirty_reg); // Polygon Offset - constexpr u32 polygon_offset_dirty_reg = DIRTY_REGS_POS(polygon_offset); + constexpr u8 polygon_offset_dirty_reg = DIRTY_REGS_POS(polygon_offset); dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_fill_enable)] = polygon_offset_dirty_reg; dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_line_enable)] = polygon_offset_dirty_reg; dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_point_enable)] = polygon_offset_dirty_reg; @@ -251,7 +251,7 @@ void Maxwell3D::InitDirtySettings() { dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_clamp)] = polygon_offset_dirty_reg; // Depth bounds - constexpr u32 depth_bounds_values_dirty_reg = DIRTY_REGS_POS(depth_bounds_values); + constexpr u8 depth_bounds_values_dirty_reg = DIRTY_REGS_POS(depth_bounds_values); dirty_pointers[MAXWELL3D_REG_INDEX(depth_bounds[0])] = depth_bounds_values_dirty_reg; dirty_pointers[MAXWELL3D_REG_INDEX(depth_bounds[1])] = depth_bounds_values_dirty_reg; } @@ -478,7 +478,7 @@ void Maxwell3D::CallMethodFromMME(const GPU::MethodCall& method_call) { } void Maxwell3D::FlushMMEInlineDraw() { - LOG_DEBUG(HW_GPU, "called, topology={}, count={}", static_cast<u32>(regs.draw.topology.Value()), + LOG_TRACE(HW_GPU, "called, topology={}, count={}", static_cast<u32>(regs.draw.topology.Value()), regs.vertex_buffer.count); ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?"); ASSERT(mme_draw.instance_count == mme_draw.gl_end_count); @@ -760,61 +760,8 @@ Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const { return tsc_entry; } -std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderStage stage) const { - std::vector<Texture::FullTextureInfo> textures; - - auto& fragment_shader = state.shader_stages[static_cast<std::size_t>(stage)]; - auto& tex_info_buffer = fragment_shader.const_buffers[regs.tex_cb_index]; - ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0); - - GPUVAddr tex_info_buffer_end = tex_info_buffer.address + tex_info_buffer.size; - - // Offset into the texture constbuffer where the texture info begins. - static constexpr std::size_t TextureInfoOffset = 0x20; - - for (GPUVAddr current_texture = tex_info_buffer.address + TextureInfoOffset; - current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) { - - const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(current_texture)}; - - Texture::FullTextureInfo tex_info{}; - // TODO(Subv): Use the shader to determine which textures are actually accessed. - tex_info.index = - static_cast<u32>(current_texture - tex_info_buffer.address - TextureInfoOffset) / - sizeof(Texture::TextureHandle); - - // Load the TIC data. - auto tic_entry = GetTICEntry(tex_handle.tic_id); - // TODO(Subv): Workaround for BitField's move constructor being deleted. - std::memcpy(&tex_info.tic, &tic_entry, sizeof(tic_entry)); - - // Load the TSC data - auto tsc_entry = GetTSCEntry(tex_handle.tsc_id); - // TODO(Subv): Workaround for BitField's move constructor being deleted. - std::memcpy(&tex_info.tsc, &tsc_entry, sizeof(tsc_entry)); - - textures.push_back(tex_info); - } - - return textures; -} - -Texture::FullTextureInfo Maxwell3D::GetTextureInfo(const Texture::TextureHandle tex_handle, - std::size_t offset) const { - Texture::FullTextureInfo tex_info{}; - tex_info.index = static_cast<u32>(offset); - - // Load the TIC data. - auto tic_entry = GetTICEntry(tex_handle.tic_id); - // TODO(Subv): Workaround for BitField's move constructor being deleted. - std::memcpy(&tex_info.tic, &tic_entry, sizeof(tic_entry)); - - // Load the TSC data - auto tsc_entry = GetTSCEntry(tex_handle.tsc_id); - // TODO(Subv): Workaround for BitField's move constructor being deleted. - std::memcpy(&tex_info.tsc, &tsc_entry, sizeof(tsc_entry)); - - return tex_info; +Texture::FullTextureInfo Maxwell3D::GetTextureInfo(Texture::TextureHandle tex_handle) const { + return Texture::FullTextureInfo{GetTICEntry(tex_handle.tic_id), GetTSCEntry(tex_handle.tsc_id)}; } Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage, @@ -830,7 +777,7 @@ Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage, const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; - return GetTextureInfo(tex_handle, offset); + return GetTextureInfo(tex_handle); } u32 Maxwell3D::GetRegisterValue(u32 method) const { @@ -846,7 +793,8 @@ void Maxwell3D::ProcessClearBuffers() { rasterizer.Clear(); } -u32 Maxwell3D::AccessConstBuffer32(Regs::ShaderStage stage, u64 const_buffer, u64 offset) const { +u32 Maxwell3D::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const { + ASSERT(stage != ShaderType::Compute); const auto& shader_stage = state.shader_stages[static_cast<std::size_t>(stage)]; const auto& buffer = shader_stage.const_buffers[const_buffer]; u32 result; @@ -854,4 +802,22 @@ u32 Maxwell3D::AccessConstBuffer32(Regs::ShaderStage stage, u64 const_buffer, u6 return result; } +SamplerDescriptor Maxwell3D::AccessBoundSampler(ShaderType stage, u64 offset) const { + return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle)); +} + +SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_buffer, + u64 offset) const { + ASSERT(stage != ShaderType::Compute); + const auto& shader = state.shader_stages[static_cast<std::size_t>(stage)]; + const auto& tex_info_buffer = shader.const_buffers[const_buffer]; + const GPUVAddr tex_info_address = tex_info_buffer.address + offset; + + const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; + const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); + SamplerDescriptor result = SamplerDescriptor::FromTicTexture(tex_info.tic.texture_type.Value()); + result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); + return result; +} + } // namespace Tegra::Engines diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index e3f1047d5..1aa7c274f 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -15,6 +15,7 @@ #include "common/common_funcs.h" #include "common/common_types.h" #include "common/math_util.h" +#include "video_core/engines/const_buffer_engine_interface.h" #include "video_core/engines/const_buffer_info.h" #include "video_core/engines/engine_upload.h" #include "video_core/gpu.h" @@ -44,7 +45,7 @@ namespace Tegra::Engines { #define MAXWELL3D_REG_INDEX(field_name) \ (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32)) -class Maxwell3D final { +class Maxwell3D final : public ConstBufferEngineInterface { public: explicit Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager); @@ -495,7 +496,7 @@ public: Equation equation_a; Factor factor_source_a; Factor factor_dest_a; - INSERT_PADDING_WORDS(1); + INSERT_UNION_PADDING_WORDS(1); }; struct RenderTargetConfig { @@ -516,7 +517,7 @@ public: }; u32 layer_stride; u32 base_layer; - INSERT_PADDING_WORDS(7); + INSERT_UNION_PADDING_WORDS(7); GPUVAddr Address() const { return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | @@ -541,7 +542,7 @@ public: f32 translate_x; f32 translate_y; f32 translate_z; - INSERT_PADDING_WORDS(2); + INSERT_UNION_PADDING_WORDS(2); Common::Rectangle<s32> GetRect() const { return { @@ -605,7 +606,7 @@ public: union { struct { - INSERT_PADDING_WORDS(0x45); + INSERT_UNION_PADDING_WORDS(0x45); struct { u32 upload_address; @@ -614,7 +615,7 @@ public: u32 bind; } macros; - INSERT_PADDING_WORDS(0x17); + INSERT_UNION_PADDING_WORDS(0x17); Upload::Registers upload; struct { @@ -625,7 +626,7 @@ public: u32 data_upload; - INSERT_PADDING_WORDS(0x44); + INSERT_UNION_PADDING_WORDS(0x44); struct { union { @@ -635,11 +636,11 @@ public: }; } sync_info; - INSERT_PADDING_WORDS(0x11E); + INSERT_UNION_PADDING_WORDS(0x11E); u32 tfb_enabled; - INSERT_PADDING_WORDS(0x2E); + INSERT_UNION_PADDING_WORDS(0x2E); std::array<RenderTargetConfig, NumRenderTargets> rt; @@ -647,49 +648,49 @@ public: std::array<ViewPort, NumViewports> viewports; - INSERT_PADDING_WORDS(0x1D); + INSERT_UNION_PADDING_WORDS(0x1D); struct { u32 first; u32 count; } vertex_buffer; - INSERT_PADDING_WORDS(1); + INSERT_UNION_PADDING_WORDS(1); float clear_color[4]; float clear_depth; - INSERT_PADDING_WORDS(0x3); + INSERT_UNION_PADDING_WORDS(0x3); s32 clear_stencil; - INSERT_PADDING_WORDS(0x7); + INSERT_UNION_PADDING_WORDS(0x7); u32 polygon_offset_point_enable; u32 polygon_offset_line_enable; u32 polygon_offset_fill_enable; - INSERT_PADDING_WORDS(0xD); + INSERT_UNION_PADDING_WORDS(0xD); std::array<ScissorTest, NumViewports> scissor_test; - INSERT_PADDING_WORDS(0x15); + INSERT_UNION_PADDING_WORDS(0x15); s32 stencil_back_func_ref; u32 stencil_back_mask; u32 stencil_back_func_mask; - INSERT_PADDING_WORDS(0xC); + INSERT_UNION_PADDING_WORDS(0xC); u32 color_mask_common; - INSERT_PADDING_WORDS(0x6); + INSERT_UNION_PADDING_WORDS(0x6); u32 rt_separate_frag_data; f32 depth_bounds[2]; - INSERT_PADDING_WORDS(0xA); + INSERT_UNION_PADDING_WORDS(0xA); struct { u32 address_high; @@ -709,7 +710,7 @@ public: } } zeta; - INSERT_PADDING_WORDS(0x41); + INSERT_UNION_PADDING_WORDS(0x41); union { BitField<0, 4, u32> stencil; @@ -718,11 +719,11 @@ public: BitField<12, 4, u32> viewport; } clear_flags; - INSERT_PADDING_WORDS(0x19); + INSERT_UNION_PADDING_WORDS(0x19); std::array<VertexAttribute, NumVertexAttributes> vertex_attrib_format; - INSERT_PADDING_WORDS(0xF); + INSERT_UNION_PADDING_WORDS(0xF); struct { union { @@ -745,16 +746,16 @@ public: } } rt_control; - INSERT_PADDING_WORDS(0x2); + INSERT_UNION_PADDING_WORDS(0x2); u32 zeta_width; u32 zeta_height; - INSERT_PADDING_WORDS(0x27); + INSERT_UNION_PADDING_WORDS(0x27); u32 depth_test_enable; - INSERT_PADDING_WORDS(0x5); + INSERT_UNION_PADDING_WORDS(0x5); u32 independent_blend_enable; @@ -762,7 +763,7 @@ public: u32 alpha_test_enabled; - INSERT_PADDING_WORDS(0x6); + INSERT_UNION_PADDING_WORDS(0x6); u32 d3d_cull_mode; @@ -776,7 +777,7 @@ public: float b; float a; } blend_color; - INSERT_PADDING_WORDS(0x4); + INSERT_UNION_PADDING_WORDS(0x4); struct { u32 separate_alpha; @@ -785,7 +786,7 @@ public: Blend::Factor factor_dest_rgb; Blend::Equation equation_a; Blend::Factor factor_source_a; - INSERT_PADDING_WORDS(1); + INSERT_UNION_PADDING_WORDS(1); Blend::Factor factor_dest_a; u32 enable_common; @@ -801,7 +802,7 @@ public: u32 stencil_front_func_mask; u32 stencil_front_mask; - INSERT_PADDING_WORDS(0x2); + INSERT_UNION_PADDING_WORDS(0x2); u32 frag_color_clamp; @@ -810,12 +811,12 @@ public: BitField<4, 1, u32> triangle_rast_flip; } screen_y_control; - INSERT_PADDING_WORDS(0x21); + INSERT_UNION_PADDING_WORDS(0x21); u32 vb_element_base; u32 vb_base_instance; - INSERT_PADDING_WORDS(0x35); + INSERT_UNION_PADDING_WORDS(0x35); union { BitField<0, 1, u32> c0; @@ -828,11 +829,11 @@ public: BitField<7, 1, u32> c7; } clip_distance_enabled; - INSERT_PADDING_WORDS(0x1); + INSERT_UNION_PADDING_WORDS(0x1); float point_size; - INSERT_PADDING_WORDS(0x7); + INSERT_UNION_PADDING_WORDS(0x7); u32 zeta_enable; @@ -841,7 +842,7 @@ public: BitField<4, 1, u32> alpha_to_one; } multisample_control; - INSERT_PADDING_WORDS(0x4); + INSERT_UNION_PADDING_WORDS(0x4); struct { u32 address_high; @@ -865,11 +866,11 @@ public: } } tsc; - INSERT_PADDING_WORDS(0x1); + INSERT_UNION_PADDING_WORDS(0x1); float polygon_offset_factor; - INSERT_PADDING_WORDS(0x1); + INSERT_UNION_PADDING_WORDS(0x1); struct { u32 tic_address_high; @@ -882,7 +883,7 @@ public: } } tic; - INSERT_PADDING_WORDS(0x5); + INSERT_UNION_PADDING_WORDS(0x5); u32 stencil_two_side_enable; StencilOp stencil_back_op_fail; @@ -890,13 +891,13 @@ public: StencilOp stencil_back_op_zpass; ComparisonOp stencil_back_func_func; - INSERT_PADDING_WORDS(0x4); + INSERT_UNION_PADDING_WORDS(0x4); u32 framebuffer_srgb; float polygon_offset_units; - INSERT_PADDING_WORDS(0x11); + INSERT_UNION_PADDING_WORDS(0x11); union { BitField<2, 1, u32> coord_origin; @@ -912,7 +913,7 @@ public: (static_cast<GPUVAddr>(code_address_high) << 32) | code_address_low); } } code_address; - INSERT_PADDING_WORDS(1); + INSERT_UNION_PADDING_WORDS(1); struct { u32 vertex_end_gl; @@ -924,14 +925,14 @@ public: }; } draw; - INSERT_PADDING_WORDS(0xA); + INSERT_UNION_PADDING_WORDS(0xA); struct { u32 enabled; u32 index; } primitive_restart; - INSERT_PADDING_WORDS(0x5F); + INSERT_UNION_PADDING_WORDS(0x5F); struct { u32 start_addr_high; @@ -972,9 +973,9 @@ public: } } index_array; - INSERT_PADDING_WORDS(0x7); + INSERT_UNION_PADDING_WORDS(0x7); - INSERT_PADDING_WORDS(0x1F); + INSERT_UNION_PADDING_WORDS(0x1F); float polygon_offset_clamp; @@ -988,17 +989,17 @@ public: } } instanced_arrays; - INSERT_PADDING_WORDS(0x6); + INSERT_UNION_PADDING_WORDS(0x6); Cull cull; u32 pixel_center_integer; - INSERT_PADDING_WORDS(0x1); + INSERT_UNION_PADDING_WORDS(0x1); u32 viewport_transform_enabled; - INSERT_PADDING_WORDS(0x3); + INSERT_UNION_PADDING_WORDS(0x3); union { BitField<0, 1, u32> depth_range_0_1; @@ -1006,13 +1007,13 @@ public: BitField<4, 1, u32> depth_clamp_far; } view_volume_clip_control; - INSERT_PADDING_WORDS(0x21); + INSERT_UNION_PADDING_WORDS(0x21); struct { u32 enable; LogicOperation operation; } logic_op; - INSERT_PADDING_WORDS(0x1); + INSERT_UNION_PADDING_WORDS(0x1); union { u32 raw; @@ -1025,9 +1026,9 @@ public: BitField<6, 4, u32> RT; BitField<10, 11, u32> layer; } clear_buffers; - INSERT_PADDING_WORDS(0xB); + INSERT_UNION_PADDING_WORDS(0xB); std::array<ColorMask, NumRenderTargets> color_mask; - INSERT_PADDING_WORDS(0x38); + INSERT_UNION_PADDING_WORDS(0x38); struct { u32 query_address_high; @@ -1049,7 +1050,7 @@ public: } } query; - INSERT_PADDING_WORDS(0x3C); + INSERT_UNION_PADDING_WORDS(0x3C); struct { union { @@ -1089,10 +1090,10 @@ public: BitField<4, 4, ShaderProgram> program; }; u32 offset; - INSERT_PADDING_WORDS(14); + INSERT_UNION_PADDING_WORDS(14); } shader_config[MaxShaderProgram]; - INSERT_PADDING_WORDS(0x60); + INSERT_UNION_PADDING_WORDS(0x60); u32 firmware[0x20]; @@ -1109,7 +1110,7 @@ public: } } const_buffer; - INSERT_PADDING_WORDS(0x10); + INSERT_UNION_PADDING_WORDS(0x10); struct { union { @@ -1117,14 +1118,14 @@ public: BitField<0, 1, u32> valid; BitField<4, 5, u32> index; }; - INSERT_PADDING_WORDS(7); + INSERT_UNION_PADDING_WORDS(7); } cb_bind[MaxShaderStage]; - INSERT_PADDING_WORDS(0x56); + INSERT_UNION_PADDING_WORDS(0x56); u32 tex_cb_index; - INSERT_PADDING_WORDS(0x395); + INSERT_UNION_PADDING_WORDS(0x395); struct { /// Compressed address of a buffer that holds information about bound SSBOs. @@ -1136,14 +1137,14 @@ public: } } ssbo_info; - INSERT_PADDING_WORDS(0x11); + INSERT_UNION_PADDING_WORDS(0x11); struct { u32 address[MaxShaderStage]; u32 size[MaxShaderStage]; } tex_info_buffers; - INSERT_PADDING_WORDS(0xCC); + INSERT_UNION_PADDING_WORDS(0xCC); }; std::array<u32, NUM_REGS> reg_array; }; @@ -1165,6 +1166,8 @@ public: struct DirtyRegs { static constexpr std::size_t NUM_REGS = 256; + static_assert(NUM_REGS - 1 <= std::numeric_limits<u8>::max()); + union { struct { bool null_dirty; @@ -1247,17 +1250,22 @@ public: void FlushMMEInlineDraw(); - /// Given a Texture Handle, returns the TSC and TIC entries. - Texture::FullTextureInfo GetTextureInfo(const Texture::TextureHandle tex_handle, - std::size_t offset) const; - - /// Returns a list of enabled textures for the specified shader stage. - std::vector<Texture::FullTextureInfo> GetStageTextures(Regs::ShaderStage stage) const; + /// Given a texture handle, returns the TSC and TIC entries. + Texture::FullTextureInfo GetTextureInfo(Texture::TextureHandle tex_handle) const; /// Returns the texture information for a specific texture in a specific shader stage. Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, std::size_t offset) const; - u32 AccessConstBuffer32(Regs::ShaderStage stage, u64 const_buffer, u64 offset) const; + u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; + + SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override; + + SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, + u64 offset) const override; + + u32 GetBoundBuffer() const override { + return regs.tex_cb_index; + } /// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than /// we've seen used. diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index 93808a9bb..4f40d1d1f 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h @@ -94,7 +94,7 @@ public: union { struct { - INSERT_PADDING_WORDS(0xC0); + INSERT_UNION_PADDING_WORDS(0xC0); struct { union { @@ -112,7 +112,7 @@ public: }; } exec; - INSERT_PADDING_WORDS(0x3F); + INSERT_UNION_PADDING_WORDS(0x3F); struct { u32 address_high; @@ -139,7 +139,7 @@ public: u32 x_count; u32 y_count; - INSERT_PADDING_WORDS(0xB8); + INSERT_UNION_PADDING_WORDS(0xB8); u32 const0; u32 const1; @@ -162,11 +162,11 @@ public: Parameters dst_params; - INSERT_PADDING_WORDS(1); + INSERT_UNION_PADDING_WORDS(1); Parameters src_params; - INSERT_PADDING_WORDS(0x13); + INSERT_UNION_PADDING_WORDS(0x13); }; std::array<u32, NUM_REGS> reg_array; }; diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 7a6355ce2..8f6bc76eb 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -574,7 +574,7 @@ enum class ShuffleOperation : u64 { }; union Instruction { - Instruction& operator=(const Instruction& instr) { + constexpr Instruction& operator=(const Instruction& instr) { value = instr.value; return *this; } @@ -1238,6 +1238,32 @@ union Instruction { } tld4; union { + BitField<35, 1, u64> ndv_flag; + BitField<49, 1, u64> nodep_flag; + BitField<50, 1, u64> dc_flag; + BitField<33, 2, u64> info; + BitField<37, 2, u64> component; + + bool UsesMiscMode(TextureMiscMode mode) const { + switch (mode) { + case TextureMiscMode::NDV: + return ndv_flag != 0; + case TextureMiscMode::NODEP: + return nodep_flag != 0; + case TextureMiscMode::DC: + return dc_flag != 0; + case TextureMiscMode::AOFFI: + return info == 1; + case TextureMiscMode::PTP: + return info == 2; + default: + break; + } + return false; + } + } tld4_b; + + union { BitField<49, 1, u64> nodep_flag; BitField<50, 1, u64> dc_flag; BitField<51, 1, u64> aoffi_flag; @@ -1590,7 +1616,8 @@ public: TEXS, // Texture Fetch with scalar/non-vec4 source/destinations TLD, // Texture Load TLDS, // Texture Load with scalar/non-vec4 source/destinations - TLD4, // Texture Load 4 + TLD4, // Texture Gather 4 + TLD4_B, // Texture Gather 4 Bindless TLD4S, // Texture Load 4 with scalar / non - vec4 source / destinations TMML_B, // Texture Mip Map Level TMML, // Texture Mip Map Level @@ -1760,22 +1787,22 @@ public: class Matcher { public: - Matcher(const char* const name, u16 mask, u16 expected, OpCode::Id id, OpCode::Type type) + constexpr Matcher(const char* const name, u16 mask, u16 expected, Id id, Type type) : name{name}, mask{mask}, expected{expected}, id{id}, type{type} {} - const char* GetName() const { + constexpr const char* GetName() const { return name; } - u16 GetMask() const { + constexpr u16 GetMask() const { return mask; } - Id GetId() const { + constexpr Id GetId() const { return id; } - Type GetType() const { + constexpr Type GetType() const { return type; } @@ -1784,7 +1811,7 @@ public: * @param instruction The instruction to test * @returns true if the given instruction matches. */ - bool Matches(u16 instruction) const { + constexpr bool Matches(u16 instruction) const { return (instruction & mask) == expected; } @@ -1818,7 +1845,7 @@ private: * A '0' in a bitstring indicates that a zero must be present at that bit position. * A '1' in a bitstring indicates that a one must be present at that bit position. */ - static auto GetMaskAndExpect(const char* const bitstring) { + static constexpr auto GetMaskAndExpect(const char* const bitstring) { u16 mask = 0, expect = 0; for (std::size_t i = 0; i < opcode_bitsize; i++) { const std::size_t bit_position = opcode_bitsize - i - 1; @@ -1835,15 +1862,15 @@ private: break; } } - return std::make_tuple(mask, expect); + return std::make_pair(mask, expect); } public: /// Creates a matcher that can match and parse instructions based on bitstring. - static auto GetMatcher(const char* const bitstring, OpCode::Id op, OpCode::Type type, - const char* const name) { - const auto mask_expect = GetMaskAndExpect(bitstring); - return Matcher(name, std::get<0>(mask_expect), std::get<1>(mask_expect), op, type); + static constexpr auto GetMatcher(const char* const bitstring, Id op, Type type, + const char* const name) { + const auto [mask, expected] = GetMaskAndExpect(bitstring); + return Matcher(name, mask, expected, op, type); } }; @@ -1881,6 +1908,7 @@ private: INST("11011100--11----", Id::TLD, Type::Texture, "TLD"), INST("1101-01---------", Id::TLDS, Type::Texture, "TLDS"), INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"), + INST("1101111011111---", Id::TLD4_B, Type::Texture, "TLD4_B"), INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"), INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"), INST("1101111101011---", Id::TMML, Type::Texture, "TMML"), diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h index e86a7f04a..bc80661d8 100644 --- a/src/video_core/engines/shader_header.h +++ b/src/video_core/engines/shader_header.h @@ -38,37 +38,37 @@ struct Header { BitField<26, 1, u32> does_load_or_store; BitField<27, 1, u32> does_fp64; BitField<28, 4, u32> stream_out_mask; - } common0; + } common0{}; union { BitField<0, 24, u32> shader_local_memory_low_size; BitField<24, 8, u32> per_patch_attribute_count; - } common1; + } common1{}; union { BitField<0, 24, u32> shader_local_memory_high_size; BitField<24, 8, u32> threads_per_input_primitive; - } common2; + } common2{}; union { BitField<0, 24, u32> shader_local_memory_crs_size; BitField<24, 4, OutputTopology> output_topology; BitField<28, 4, u32> reserved; - } common3; + } common3{}; union { BitField<0, 12, u32> max_output_vertices; BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders. BitField<24, 4, u32> reserved; BitField<12, 8, u32> store_req_end; // NOTE: not used by geometry shaders. - } common4; + } common4{}; union { struct { - INSERT_PADDING_BYTES(3); // ImapSystemValuesA - INSERT_PADDING_BYTES(1); // ImapSystemValuesB - INSERT_PADDING_BYTES(16); // ImapGenericVector[32] - INSERT_PADDING_BYTES(2); // ImapColor + INSERT_UNION_PADDING_BYTES(3); // ImapSystemValuesA + INSERT_UNION_PADDING_BYTES(1); // ImapSystemValuesB + INSERT_UNION_PADDING_BYTES(16); // ImapGenericVector[32] + INSERT_UNION_PADDING_BYTES(2); // ImapColor union { BitField<0, 8, u16> clip_distances; BitField<8, 1, u16> point_sprite_s; @@ -79,20 +79,20 @@ struct Header { BitField<14, 1, u16> instance_id; BitField<15, 1, u16> vertex_id; }; - INSERT_PADDING_BYTES(5); // ImapFixedFncTexture[10] - INSERT_PADDING_BYTES(1); // ImapReserved - INSERT_PADDING_BYTES(3); // OmapSystemValuesA - INSERT_PADDING_BYTES(1); // OmapSystemValuesB - INSERT_PADDING_BYTES(16); // OmapGenericVector[32] - INSERT_PADDING_BYTES(2); // OmapColor - INSERT_PADDING_BYTES(2); // OmapSystemValuesC - INSERT_PADDING_BYTES(5); // OmapFixedFncTexture[10] - INSERT_PADDING_BYTES(1); // OmapReserved + INSERT_UNION_PADDING_BYTES(5); // ImapFixedFncTexture[10] + INSERT_UNION_PADDING_BYTES(1); // ImapReserved + INSERT_UNION_PADDING_BYTES(3); // OmapSystemValuesA + INSERT_UNION_PADDING_BYTES(1); // OmapSystemValuesB + INSERT_UNION_PADDING_BYTES(16); // OmapGenericVector[32] + INSERT_UNION_PADDING_BYTES(2); // OmapColor + INSERT_UNION_PADDING_BYTES(2); // OmapSystemValuesC + INSERT_UNION_PADDING_BYTES(5); // OmapFixedFncTexture[10] + INSERT_UNION_PADDING_BYTES(1); // OmapReserved } vtg; struct { - INSERT_PADDING_BYTES(3); // ImapSystemValuesA - INSERT_PADDING_BYTES(1); // ImapSystemValuesB + INSERT_UNION_PADDING_BYTES(3); // ImapSystemValuesA + INSERT_UNION_PADDING_BYTES(1); // ImapSystemValuesB union { BitField<0, 2, AttributeUse> x; BitField<2, 2, AttributeUse> y; @@ -100,10 +100,10 @@ struct Header { BitField<6, 2, AttributeUse> z; u8 raw; } imap_generic_vector[32]; - INSERT_PADDING_BYTES(2); // ImapColor - INSERT_PADDING_BYTES(2); // ImapSystemValuesC - INSERT_PADDING_BYTES(10); // ImapFixedFncTexture[10] - INSERT_PADDING_BYTES(2); // ImapReserved + INSERT_UNION_PADDING_BYTES(2); // ImapColor + INSERT_UNION_PADDING_BYTES(2); // ImapSystemValuesC + INSERT_UNION_PADDING_BYTES(10); // ImapFixedFncTexture[10] + INSERT_UNION_PADDING_BYTES(2); // ImapReserved struct { u32 target; union { @@ -139,6 +139,8 @@ struct Header { return result; } } ps; + + std::array<u32, 0xF> raw{}; }; u64 GetLocalMemorySize() const { |