diff options
Diffstat (limited to 'src/video_core/engines')
-rw-r--r-- | src/video_core/engines/const_buffer_engine_interface.h | 119 | ||||
-rw-r--r-- | src/video_core/engines/kepler_compute.cpp | 20 | ||||
-rw-r--r-- | src/video_core/engines/kepler_compute.h | 14 | ||||
-rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 68 | ||||
-rw-r--r-- | src/video_core/engines/maxwell_3d.h | 16 | ||||
-rw-r--r-- | src/video_core/engines/shader_bytecode.h | 26 |
6 files changed, 220 insertions, 43 deletions
diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h new file mode 100644 index 000000000..ac27b6cbe --- /dev/null +++ b/src/video_core/engines/const_buffer_engine_interface.h @@ -0,0 +1,119 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <type_traits> +#include "common/bit_field.h" +#include "common/common_types.h" +#include "video_core/engines/shader_bytecode.h" +#include "video_core/textures/texture.h" + +namespace Tegra::Engines { + +enum class ShaderType : u32 { + Vertex = 0, + TesselationControl = 1, + TesselationEval = 2, + Geometry = 3, + Fragment = 4, + Compute = 5, +}; + +struct SamplerDescriptor { + union { + BitField<0, 20, Tegra::Shader::TextureType> texture_type; + BitField<20, 1, u32> is_array; + BitField<21, 1, u32> is_buffer; + BitField<22, 1, u32> is_shadow; + u32 raw{}; + }; + + bool operator==(const SamplerDescriptor& rhs) const noexcept { + return raw == rhs.raw; + } + + bool operator!=(const SamplerDescriptor& rhs) const noexcept { + return !operator==(rhs); + } + + static SamplerDescriptor FromTicTexture(Tegra::Texture::TextureType tic_texture_type) { + SamplerDescriptor result; + switch (tic_texture_type) { + case Tegra::Texture::TextureType::Texture1D: + result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D); + result.is_array.Assign(0); + result.is_buffer.Assign(0); + result.is_shadow.Assign(0); + return result; + case Tegra::Texture::TextureType::Texture2D: + result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); + result.is_array.Assign(0); + result.is_buffer.Assign(0); + result.is_shadow.Assign(0); + return result; + case Tegra::Texture::TextureType::Texture3D: + result.texture_type.Assign(Tegra::Shader::TextureType::Texture3D); + result.is_array.Assign(0); + result.is_buffer.Assign(0); + result.is_shadow.Assign(0); + return result; + case Tegra::Texture::TextureType::TextureCubemap: + result.texture_type.Assign(Tegra::Shader::TextureType::TextureCube); + result.is_array.Assign(0); + result.is_buffer.Assign(0); + result.is_shadow.Assign(0); + return result; + case Tegra::Texture::TextureType::Texture1DArray: + result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D); + result.is_array.Assign(1); + result.is_buffer.Assign(0); + result.is_shadow.Assign(0); + return result; + case Tegra::Texture::TextureType::Texture2DArray: + result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); + result.is_array.Assign(1); + result.is_buffer.Assign(0); + result.is_shadow.Assign(0); + return result; + case Tegra::Texture::TextureType::Texture1DBuffer: + result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D); + result.is_array.Assign(0); + result.is_buffer.Assign(1); + result.is_shadow.Assign(0); + return result; + case Tegra::Texture::TextureType::Texture2DNoMipmap: + result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); + result.is_array.Assign(0); + result.is_buffer.Assign(0); + result.is_shadow.Assign(0); + return result; + case Tegra::Texture::TextureType::TextureCubeArray: + result.texture_type.Assign(Tegra::Shader::TextureType::TextureCube); + result.is_array.Assign(1); + result.is_buffer.Assign(0); + result.is_shadow.Assign(0); + return result; + default: + result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); + result.is_array.Assign(0); + result.is_buffer.Assign(0); + result.is_shadow.Assign(0); + return result; + } + } +}; +static_assert(std::is_trivially_copyable_v<SamplerDescriptor>); + +class ConstBufferEngineInterface { +public: + virtual ~ConstBufferEngineInterface() = default; + virtual u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const = 0; + virtual SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const = 0; + virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, + u64 offset) const = 0; + virtual u32 GetBoundBuffer() const = 0; +}; + +} // namespace Tegra::Engines diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index 63d449135..91adef360 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -70,13 +70,31 @@ Texture::FullTextureInfo KeplerCompute::GetTextureInfo(const Texture::TextureHan GetTSCEntry(tex_handle.tsc_id)}; } -u32 KeplerCompute::AccessConstBuffer32(u64 const_buffer, u64 offset) const { +u32 KeplerCompute::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const { + ASSERT(stage == ShaderType::Compute); const auto& buffer = launch_description.const_buffer_config[const_buffer]; u32 result; std::memcpy(&result, memory_manager.GetPointer(buffer.Address() + offset), sizeof(u32)); return result; } +SamplerDescriptor KeplerCompute::AccessBoundSampler(ShaderType stage, u64 offset) const { + return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle)); +} + +SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 const_buffer, + u64 offset) const { + ASSERT(stage == ShaderType::Compute); + const auto& tex_info_buffer = launch_description.const_buffer_config[const_buffer]; + const GPUVAddr tex_info_address = tex_info_buffer.Address() + offset; + + const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; + const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle, offset); + SamplerDescriptor result = SamplerDescriptor::FromTicTexture(tex_info.tic.texture_type.Value()); + result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); + return result; +} + void KeplerCompute::ProcessLaunch() { const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index 90cf650d2..8e7182727 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -10,6 +10,7 @@ #include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" +#include "video_core/engines/const_buffer_engine_interface.h" #include "video_core/engines/engine_upload.h" #include "video_core/gpu.h" #include "video_core/textures/texture.h" @@ -37,7 +38,7 @@ namespace Tegra::Engines { #define KEPLER_COMPUTE_REG_INDEX(field_name) \ (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32)) -class KeplerCompute final { +class KeplerCompute final : public ConstBufferEngineInterface { public: explicit KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager); @@ -201,7 +202,16 @@ public: Texture::FullTextureInfo GetTextureInfo(const Texture::TextureHandle tex_handle, std::size_t offset) const; - u32 AccessConstBuffer32(u64 const_buffer, u64 offset) const; + u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; + + SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override; + + SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, + u64 offset) const override; + + u32 GetBoundBuffer() const override { + return regs.tex_cb_index; + } private: Core::System& system; diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 59976943a..514ed93fa 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -98,11 +98,10 @@ void Maxwell3D::InitializeRegisterDefaults() { mme_inline[MAXWELL3D_REG_INDEX(index_array.count)] = true; } -#define DIRTY_REGS_POS(field_name) (offsetof(Maxwell3D::DirtyRegs, field_name)) +#define DIRTY_REGS_POS(field_name) static_cast<u8>(offsetof(Maxwell3D::DirtyRegs, field_name)) void Maxwell3D::InitDirtySettings() { - const auto set_block = [this](const std::size_t start, const std::size_t range, - const u8 position) { + const auto set_block = [this](std::size_t start, std::size_t range, u8 position) { const auto start_itr = dirty_pointers.begin() + start; const auto end_itr = start_itr + range; std::fill(start_itr, end_itr, position); @@ -113,10 +112,10 @@ void Maxwell3D::InitDirtySettings() { constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32); constexpr u32 rt_start_reg = MAXWELL3D_REG_INDEX(rt); constexpr u32 rt_end_reg = rt_start_reg + registers_per_rt * 8; - u32 rt_dirty_reg = DIRTY_REGS_POS(render_target); + u8 rt_dirty_reg = DIRTY_REGS_POS(render_target); for (u32 rt_reg = rt_start_reg; rt_reg < rt_end_reg; rt_reg += registers_per_rt) { set_block(rt_reg, registers_per_rt, rt_dirty_reg); - rt_dirty_reg++; + ++rt_dirty_reg; } constexpr u32 depth_buffer_flag = DIRTY_REGS_POS(depth_buffer); dirty_pointers[MAXWELL3D_REG_INDEX(zeta_enable)] = depth_buffer_flag; @@ -130,35 +129,35 @@ void Maxwell3D::InitDirtySettings() { constexpr u32 vertex_array_start = MAXWELL3D_REG_INDEX(vertex_array); constexpr u32 vertex_array_size = sizeof(regs.vertex_array[0]) / sizeof(u32); constexpr u32 vertex_array_end = vertex_array_start + vertex_array_size * Regs::NumVertexArrays; - u32 va_reg = DIRTY_REGS_POS(vertex_array); - u32 vi_reg = DIRTY_REGS_POS(vertex_instance); + u8 va_dirty_reg = DIRTY_REGS_POS(vertex_array); + u8 vi_dirty_reg = DIRTY_REGS_POS(vertex_instance); for (u32 vertex_reg = vertex_array_start; vertex_reg < vertex_array_end; vertex_reg += vertex_array_size) { - set_block(vertex_reg, 3, va_reg); + set_block(vertex_reg, 3, va_dirty_reg); // The divisor concerns vertex array instances - dirty_pointers[vertex_reg + 3] = vi_reg; - va_reg++; - vi_reg++; + dirty_pointers[static_cast<std::size_t>(vertex_reg) + 3] = vi_dirty_reg; + ++va_dirty_reg; + ++vi_dirty_reg; } constexpr u32 vertex_limit_start = MAXWELL3D_REG_INDEX(vertex_array_limit); constexpr u32 vertex_limit_size = sizeof(regs.vertex_array_limit[0]) / sizeof(u32); constexpr u32 vertex_limit_end = vertex_limit_start + vertex_limit_size * Regs::NumVertexArrays; - va_reg = DIRTY_REGS_POS(vertex_array); + va_dirty_reg = DIRTY_REGS_POS(vertex_array); for (u32 vertex_reg = vertex_limit_start; vertex_reg < vertex_limit_end; vertex_reg += vertex_limit_size) { - set_block(vertex_reg, vertex_limit_size, va_reg); - va_reg++; + set_block(vertex_reg, vertex_limit_size, va_dirty_reg); + va_dirty_reg++; } constexpr u32 vertex_instance_start = MAXWELL3D_REG_INDEX(instanced_arrays); constexpr u32 vertex_instance_size = sizeof(regs.instanced_arrays.is_instanced[0]) / sizeof(u32); constexpr u32 vertex_instance_end = vertex_instance_start + vertex_instance_size * Regs::NumVertexArrays; - vi_reg = DIRTY_REGS_POS(vertex_instance); + vi_dirty_reg = DIRTY_REGS_POS(vertex_instance); for (u32 vertex_reg = vertex_instance_start; vertex_reg < vertex_instance_end; vertex_reg += vertex_instance_size) { - set_block(vertex_reg, vertex_instance_size, vi_reg); - vi_reg++; + set_block(vertex_reg, vertex_instance_size, vi_dirty_reg); + vi_dirty_reg++; } set_block(MAXWELL3D_REG_INDEX(vertex_attrib_format), regs.vertex_attrib_format.size(), DIRTY_REGS_POS(vertex_attrib_format)); @@ -172,7 +171,7 @@ void Maxwell3D::InitDirtySettings() { // State // Viewport - constexpr u32 viewport_dirty_reg = DIRTY_REGS_POS(viewport); + constexpr u8 viewport_dirty_reg = DIRTY_REGS_POS(viewport); constexpr u32 viewport_start = MAXWELL3D_REG_INDEX(viewports); constexpr u32 viewport_size = sizeof(regs.viewports) / sizeof(u32); set_block(viewport_start, viewport_size, viewport_dirty_reg); @@ -199,7 +198,7 @@ void Maxwell3D::InitDirtySettings() { set_block(primitive_restart_start, primitive_restart_size, DIRTY_REGS_POS(primitive_restart)); // Depth Test - constexpr u32 depth_test_dirty_reg = DIRTY_REGS_POS(depth_test); + constexpr u8 depth_test_dirty_reg = DIRTY_REGS_POS(depth_test); dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_enable)] = depth_test_dirty_reg; dirty_pointers[MAXWELL3D_REG_INDEX(depth_write_enabled)] = depth_test_dirty_reg; dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_func)] = depth_test_dirty_reg; @@ -224,12 +223,12 @@ void Maxwell3D::InitDirtySettings() { dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_mask)] = stencil_test_dirty_reg; // Color Mask - constexpr u32 color_mask_dirty_reg = DIRTY_REGS_POS(color_mask); + constexpr u8 color_mask_dirty_reg = DIRTY_REGS_POS(color_mask); dirty_pointers[MAXWELL3D_REG_INDEX(color_mask_common)] = color_mask_dirty_reg; set_block(MAXWELL3D_REG_INDEX(color_mask), sizeof(regs.color_mask) / sizeof(u32), color_mask_dirty_reg); // Blend State - constexpr u32 blend_state_dirty_reg = DIRTY_REGS_POS(blend_state); + constexpr u8 blend_state_dirty_reg = DIRTY_REGS_POS(blend_state); set_block(MAXWELL3D_REG_INDEX(blend_color), sizeof(regs.blend_color) / sizeof(u32), blend_state_dirty_reg); dirty_pointers[MAXWELL3D_REG_INDEX(independent_blend_enable)] = blend_state_dirty_reg; @@ -238,12 +237,12 @@ void Maxwell3D::InitDirtySettings() { blend_state_dirty_reg); // Scissor State - constexpr u32 scissor_test_dirty_reg = DIRTY_REGS_POS(scissor_test); + constexpr u8 scissor_test_dirty_reg = DIRTY_REGS_POS(scissor_test); set_block(MAXWELL3D_REG_INDEX(scissor_test), sizeof(regs.scissor_test) / sizeof(u32), scissor_test_dirty_reg); // Polygon Offset - constexpr u32 polygon_offset_dirty_reg = DIRTY_REGS_POS(polygon_offset); + constexpr u8 polygon_offset_dirty_reg = DIRTY_REGS_POS(polygon_offset); dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_fill_enable)] = polygon_offset_dirty_reg; dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_line_enable)] = polygon_offset_dirty_reg; dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_point_enable)] = polygon_offset_dirty_reg; @@ -252,7 +251,7 @@ void Maxwell3D::InitDirtySettings() { dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_clamp)] = polygon_offset_dirty_reg; // Depth bounds - constexpr u32 depth_bounds_values_dirty_reg = DIRTY_REGS_POS(depth_bounds_values); + constexpr u8 depth_bounds_values_dirty_reg = DIRTY_REGS_POS(depth_bounds_values); dirty_pointers[MAXWELL3D_REG_INDEX(depth_bounds[0])] = depth_bounds_values_dirty_reg; dirty_pointers[MAXWELL3D_REG_INDEX(depth_bounds[1])] = depth_bounds_values_dirty_reg; } @@ -847,7 +846,8 @@ void Maxwell3D::ProcessClearBuffers() { rasterizer.Clear(); } -u32 Maxwell3D::AccessConstBuffer32(Regs::ShaderStage stage, u64 const_buffer, u64 offset) const { +u32 Maxwell3D::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const { + ASSERT(stage != ShaderType::Compute); const auto& shader_stage = state.shader_stages[static_cast<std::size_t>(stage)]; const auto& buffer = shader_stage.const_buffers[const_buffer]; u32 result; @@ -855,4 +855,22 @@ u32 Maxwell3D::AccessConstBuffer32(Regs::ShaderStage stage, u64 const_buffer, u6 return result; } +SamplerDescriptor Maxwell3D::AccessBoundSampler(ShaderType stage, u64 offset) const { + return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle)); +} + +SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_buffer, + u64 offset) const { + ASSERT(stage != ShaderType::Compute); + const auto& shader = state.shader_stages[static_cast<std::size_t>(stage)]; + const auto& tex_info_buffer = shader.const_buffers[const_buffer]; + const GPUVAddr tex_info_address = tex_info_buffer.address + offset; + + const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; + const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle, offset); + SamplerDescriptor result = SamplerDescriptor::FromTicTexture(tex_info.tic.texture_type.Value()); + result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); + return result; +} + } // namespace Tegra::Engines diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index e3f1047d5..987ad77b2 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -15,6 +15,7 @@ #include "common/common_funcs.h" #include "common/common_types.h" #include "common/math_util.h" +#include "video_core/engines/const_buffer_engine_interface.h" #include "video_core/engines/const_buffer_info.h" #include "video_core/engines/engine_upload.h" #include "video_core/gpu.h" @@ -44,7 +45,7 @@ namespace Tegra::Engines { #define MAXWELL3D_REG_INDEX(field_name) \ (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32)) -class Maxwell3D final { +class Maxwell3D final : public ConstBufferEngineInterface { public: explicit Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager); @@ -1165,6 +1166,8 @@ public: struct DirtyRegs { static constexpr std::size_t NUM_REGS = 256; + static_assert(NUM_REGS - 1 <= std::numeric_limits<u8>::max()); + union { struct { bool null_dirty; @@ -1257,7 +1260,16 @@ public: /// Returns the texture information for a specific texture in a specific shader stage. Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, std::size_t offset) const; - u32 AccessConstBuffer32(Regs::ShaderStage stage, u64 const_buffer, u64 offset) const; + u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; + + SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override; + + SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, + u64 offset) const override; + + u32 GetBoundBuffer() const override { + return regs.tex_cb_index; + } /// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than /// we've seen used. diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 7a6355ce2..d3d05a866 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -574,7 +574,7 @@ enum class ShuffleOperation : u64 { }; union Instruction { - Instruction& operator=(const Instruction& instr) { + constexpr Instruction& operator=(const Instruction& instr) { value = instr.value; return *this; } @@ -1760,22 +1760,22 @@ public: class Matcher { public: - Matcher(const char* const name, u16 mask, u16 expected, OpCode::Id id, OpCode::Type type) + constexpr Matcher(const char* const name, u16 mask, u16 expected, Id id, Type type) : name{name}, mask{mask}, expected{expected}, id{id}, type{type} {} - const char* GetName() const { + constexpr const char* GetName() const { return name; } - u16 GetMask() const { + constexpr u16 GetMask() const { return mask; } - Id GetId() const { + constexpr Id GetId() const { return id; } - Type GetType() const { + constexpr Type GetType() const { return type; } @@ -1784,7 +1784,7 @@ public: * @param instruction The instruction to test * @returns true if the given instruction matches. */ - bool Matches(u16 instruction) const { + constexpr bool Matches(u16 instruction) const { return (instruction & mask) == expected; } @@ -1818,7 +1818,7 @@ private: * A '0' in a bitstring indicates that a zero must be present at that bit position. * A '1' in a bitstring indicates that a one must be present at that bit position. */ - static auto GetMaskAndExpect(const char* const bitstring) { + static constexpr auto GetMaskAndExpect(const char* const bitstring) { u16 mask = 0, expect = 0; for (std::size_t i = 0; i < opcode_bitsize; i++) { const std::size_t bit_position = opcode_bitsize - i - 1; @@ -1835,15 +1835,15 @@ private: break; } } - return std::make_tuple(mask, expect); + return std::make_pair(mask, expect); } public: /// Creates a matcher that can match and parse instructions based on bitstring. - static auto GetMatcher(const char* const bitstring, OpCode::Id op, OpCode::Type type, - const char* const name) { - const auto mask_expect = GetMaskAndExpect(bitstring); - return Matcher(name, std::get<0>(mask_expect), std::get<1>(mask_expect), op, type); + static constexpr auto GetMatcher(const char* const bitstring, Id op, Type type, + const char* const name) { + const auto [mask, expected] = GetMaskAndExpect(bitstring); + return Matcher(name, mask, expected, op, type); } }; |