diff options
Diffstat (limited to 'src/video_core')
22 files changed, 611 insertions, 327 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 114bed20d..1e31a2900 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -46,6 +46,8 @@ add_library(video_core STATIC renderer_opengl/gl_rasterizer_cache.h renderer_opengl/gl_resource_manager.cpp renderer_opengl/gl_resource_manager.h + renderer_opengl/gl_sampler_cache.cpp + renderer_opengl/gl_sampler_cache.h renderer_opengl/gl_shader_cache.cpp renderer_opengl/gl_shader_cache.h renderer_opengl/gl_shader_decompiler.cpp @@ -67,6 +69,8 @@ add_library(video_core STATIC renderer_opengl/renderer_opengl.h renderer_opengl/utils.cpp renderer_opengl/utils.h + sampler_cache.cpp + sampler_cache.h shader/decode/arithmetic.cpp shader/decode/arithmetic_immediate.cpp shader/decode/bfe.cpp diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 74403eed4..b198793bc 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -482,19 +482,8 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt return textures; } -Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage, - std::size_t offset) const { - auto& shader = state.shader_stages[static_cast<std::size_t>(stage)]; - auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index]; - ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0); - - const GPUVAddr tex_info_address = - tex_info_buffer.address + offset * sizeof(Texture::TextureHandle); - - ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size); - - const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; - +Texture::FullTextureInfo Maxwell3D::GetTextureInfo(const Texture::TextureHandle tex_handle, + std::size_t offset) const { Texture::FullTextureInfo tex_info{}; tex_info.index = static_cast<u32>(offset); @@ -511,6 +500,22 @@ Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage, return tex_info; } +Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage, + std::size_t offset) const { + const auto& shader = state.shader_stages[static_cast<std::size_t>(stage)]; + const auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index]; + ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0); + + const GPUVAddr tex_info_address = + tex_info_buffer.address + offset * sizeof(Texture::TextureHandle); + + ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size); + + const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; + + return GetTextureInfo(tex_handle, offset); +} + u32 Maxwell3D::GetRegisterValue(u32 method) const { ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Maxwell3D register"); return regs.reg_array[method]; @@ -524,4 +529,12 @@ void Maxwell3D::ProcessClearBuffers() { rasterizer.Clear(); } +u32 Maxwell3D::AccessConstBuffer32(Regs::ShaderStage stage, u64 const_buffer, u64 offset) const { + const auto& shader_stage = state.shader_stages[static_cast<std::size_t>(stage)]; + const auto& buffer = shader_stage.const_buffers[const_buffer]; + u32 result; + std::memcpy(&result, memory_manager.GetPointer(buffer.address + offset), sizeof(u32)); + return result; +} + } // namespace Tegra::Engines diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 321af3297..cc2424d38 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -1131,12 +1131,18 @@ public: /// Write the value to the register identified by method. void CallMethod(const GPU::MethodCall& method_call); + /// Given a Texture Handle, returns the TSC and TIC entries. + Texture::FullTextureInfo GetTextureInfo(const Texture::TextureHandle tex_handle, + std::size_t offset) const; + /// Returns a list of enabled textures for the specified shader stage. std::vector<Texture::FullTextureInfo> GetStageTextures(Regs::ShaderStage stage) const; /// Returns the texture information for a specific texture in a specific shader stage. Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, std::size_t offset) const; + u32 AccessConstBuffer32(Regs::ShaderStage stage, u64 const_buffer, u64 offset) const; + /// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than /// we've seen used. using MacroMemory = std::array<u32, 0x40000>; diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 2e1e96c81..fce9733b9 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -387,6 +387,20 @@ enum class IpaSampleMode : u64 { Offset = 2, }; +enum class LmemLoadCacheManagement : u64 { + Default = 0, + LU = 1, + CI = 2, + CV = 3, +}; + +enum class LmemStoreCacheManagement : u64 { + Default = 0, + CG = 1, + CS = 2, + WT = 3, +}; + struct IpaMode { IpaInterpMode interpolation_mode; IpaSampleMode sampling_mode; @@ -782,7 +796,7 @@ union Instruction { } ld_l; union { - BitField<44, 2, u64> unknown; + BitField<44, 2, LmemStoreCacheManagement> cache_management; } st_l; union { @@ -792,6 +806,12 @@ union Instruction { } ldg; union { + BitField<48, 3, UniformType> type; + BitField<46, 2, u64> cache_mode; + BitField<20, 24, s64> immediate_offset; + } stg; + + union { BitField<0, 3, u64> pred0; BitField<3, 3, u64> pred3; BitField<7, 1, u64> abs_a; @@ -967,6 +987,38 @@ union Instruction { } tex; union { + BitField<28, 1, u64> array; + BitField<29, 2, TextureType> texture_type; + BitField<31, 4, u64> component_mask; + BitField<49, 1, u64> nodep_flag; + BitField<50, 1, u64> dc_flag; + BitField<36, 1, u64> aoffi_flag; + BitField<37, 3, TextureProcessMode> process_mode; + + bool IsComponentEnabled(std::size_t component) const { + return ((1ULL << component) & component_mask) != 0; + } + + TextureProcessMode GetTextureProcessMode() const { + return process_mode; + } + + bool UsesMiscMode(TextureMiscMode mode) const { + switch (mode) { + case TextureMiscMode::DC: + return dc_flag != 0; + case TextureMiscMode::NODEP: + return nodep_flag != 0; + case TextureMiscMode::AOFFI: + return aoffi_flag != 0; + default: + break; + } + return false; + } + } tex_b; + + union { BitField<22, 6, TextureQueryType> query_type; BitField<31, 4, u64> component_mask; BitField<49, 1, u64> nodep_flag; @@ -1312,7 +1364,9 @@ public: LDG, // Load from global memory STG, // Store in global memory TEX, + TEX_B, // Texture Load Bindless TXQ, // Texture Query + TXQ_B, // Texture Query Bindless TEXS, // Texture Fetch with scalar/non-vec4 source/destinations TLDS, // Texture Load with scalar/non-vec4 source/destinations TLD4, // Texture Load 4 @@ -1580,7 +1634,9 @@ private: INST("1110111011010---", Id::LDG, Type::Memory, "LDG"), INST("1110111011011---", Id::STG, Type::Memory, "STG"), INST("110000----111---", Id::TEX, Type::Texture, "TEX"), + INST("1101111010111---", Id::TEX_B, Type::Texture, "TEX_B"), INST("1101111101001---", Id::TXQ, Type::Texture, "TXQ"), + INST("1101111101010---", Id::TXQ_B, Type::Texture, "TXQ_B"), INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"), INST("1101101---------", Id::TLDS, Type::Texture, "TLDS"), INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"), diff --git a/src/video_core/renderer_opengl/gl_global_cache.cpp b/src/video_core/renderer_opengl/gl_global_cache.cpp index 8d9ee81f1..ea4a593af 100644 --- a/src/video_core/renderer_opengl/gl_global_cache.cpp +++ b/src/video_core/renderer_opengl/gl_global_cache.cpp @@ -14,28 +14,28 @@ namespace OpenGL { -CachedGlobalRegion::CachedGlobalRegion(VAddr cpu_addr, u32 size, u8* host_ptr) - : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size} { +CachedGlobalRegion::CachedGlobalRegion(VAddr cpu_addr, u8* host_ptr, u32 size, u32 max_size) + : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, host_ptr{host_ptr}, size{size}, + max_size{max_size} { buffer.Create(); - // Bind and unbind the buffer so it gets allocated by the driver - glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle); - glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); LabelGLObject(GL_BUFFER, buffer.handle, cpu_addr, "GlobalMemory"); } -void CachedGlobalRegion::Reload(u32 size_) { - constexpr auto max_size = static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize); +CachedGlobalRegion::~CachedGlobalRegion() = default; +void CachedGlobalRegion::Reload(u32 size_) { size = size_; if (size > max_size) { size = max_size; - LOG_CRITICAL(HW_GPU, "Global region size {} exceeded the expected size {}!", size_, + LOG_CRITICAL(HW_GPU, "Global region size {} exceeded the supported size {}!", size_, max_size); } + glNamedBufferData(buffer.handle, size, host_ptr, GL_STREAM_DRAW); +} - // TODO(Rodrigo): Get rid of Memory::GetPointer with a staging buffer - glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle); - glBufferData(GL_SHADER_STORAGE_BUFFER, size, GetHostPtr(), GL_DYNAMIC_DRAW); +void CachedGlobalRegion::Flush() { + LOG_DEBUG(Render_OpenGL, "Flushing {} bytes to CPU memory address 0x{:16}", size, cpu_addr); + glGetNamedBufferSubData(buffer.handle, 0, static_cast<GLsizeiptr>(size), host_ptr); } GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const { @@ -46,14 +46,16 @@ GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(CacheAddr addr, return search->second; } -GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(GPUVAddr addr, u32 size, - u8* host_ptr) { +GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(GPUVAddr addr, u8* host_ptr, + u32 size) { GlobalRegion region{TryGetReservedGlobalRegion(ToCacheAddr(host_ptr), size)}; if (!region) { // No reserved surface available, create a new one and reserve it auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()}; - const auto cpu_addr = *memory_manager.GpuToCpuAddress(addr); - region = std::make_shared<CachedGlobalRegion>(cpu_addr, size, host_ptr); + const auto cpu_addr{memory_manager.GpuToCpuAddress(addr)}; + ASSERT(cpu_addr); + + region = std::make_shared<CachedGlobalRegion>(*cpu_addr, host_ptr, size, max_ssbo_size); ReserveGlobalRegion(region); } region->Reload(size); @@ -65,7 +67,11 @@ void GlobalRegionCacheOpenGL::ReserveGlobalRegion(GlobalRegion region) { } GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer) - : RasterizerCache{rasterizer} {} + : RasterizerCache{rasterizer} { + GLint max_ssbo_size_; + glGetIntegerv(GL_MAX_SHADER_STORAGE_BLOCK_SIZE, &max_ssbo_size_); + max_ssbo_size = static_cast<u32>(max_ssbo_size_); +} GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion( const GLShader::GlobalMemoryEntry& global_region, @@ -73,7 +79,7 @@ GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion( auto& gpu{Core::System::GetInstance().GPU()}; auto& memory_manager{gpu.MemoryManager()}; - const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)]}; + const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<std::size_t>(stage)]}; const auto addr{cbufs.const_buffers[global_region.GetCbufIndex()].address + global_region.GetCbufOffset()}; const auto actual_addr{memory_manager.Read<u64>(addr)}; @@ -85,7 +91,7 @@ GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion( if (!region) { // No global region found - create a new one - region = GetUncachedGlobalRegion(actual_addr, size, host_ptr); + region = GetUncachedGlobalRegion(actual_addr, host_ptr, size); Register(region); } diff --git a/src/video_core/renderer_opengl/gl_global_cache.h b/src/video_core/renderer_opengl/gl_global_cache.h index 5a21ab66f..196e6e278 100644 --- a/src/video_core/renderer_opengl/gl_global_cache.h +++ b/src/video_core/renderer_opengl/gl_global_cache.h @@ -19,7 +19,7 @@ namespace OpenGL { namespace GLShader { class GlobalMemoryEntry; -} // namespace GLShader +} class RasterizerOpenGL; class CachedGlobalRegion; @@ -27,7 +27,8 @@ using GlobalRegion = std::shared_ptr<CachedGlobalRegion>; class CachedGlobalRegion final : public RasterizerCacheObject { public: - explicit CachedGlobalRegion(VAddr cpu_addr, u32 size, u8* host_ptr); + explicit CachedGlobalRegion(VAddr cpu_addr, u8* host_ptr, u32 size, u32 max_size); + ~CachedGlobalRegion(); VAddr GetCpuAddr() const override { return cpu_addr; @@ -45,14 +46,14 @@ public: /// Reloads the global region from guest memory void Reload(u32 size_); - // TODO(Rodrigo): When global memory is written (STG), implement flushing - void Flush() override { - UNIMPLEMENTED(); - } + void Flush() override; private: VAddr cpu_addr{}; + u8* host_ptr{}; u32 size{}; + u32 max_size{}; + OGLBuffer buffer; }; @@ -66,10 +67,11 @@ public: private: GlobalRegion TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const; - GlobalRegion GetUncachedGlobalRegion(GPUVAddr addr, u32 size, u8* host_ptr); + GlobalRegion GetUncachedGlobalRegion(GPUVAddr addr, u8* host_ptr, u32 size); void ReserveGlobalRegion(GlobalRegion region); std::unordered_map<CacheAddr, GlobalRegion> reserve; + u32 max_ssbo_size{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index d250d5cbb..6034dc489 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -101,12 +101,6 @@ struct FramebufferCacheKey { RasterizerOpenGL::RasterizerOpenGL(Core::System& system, ScreenInfo& info) : res_cache{*this}, shader_cache{*this, system}, global_cache{*this}, system{system}, screen_info{info}, buffer_cache(*this, STREAM_BUFFER_SIZE) { - // Create sampler objects - for (std::size_t i = 0; i < texture_samplers.size(); ++i) { - texture_samplers[i].Create(); - state.texture_units[i].sampler = texture_samplers[i].sampler.handle; - } - OpenGLState::ApplyDefaultState(); shader_program_manager = std::make_unique<GLShader::ProgramManager>(); @@ -582,9 +576,6 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers( } void RasterizerOpenGL::Clear() { - const auto prev_state{state}; - SCOPE_EXIT({ prev_state.Apply(); }); - const auto& regs = system.GPU().Maxwell3D().regs; bool use_color{}; bool use_depth{}; @@ -656,7 +647,10 @@ void RasterizerOpenGL::Clear() { clear_state.EmulateViewportWithScissor(); } - clear_state.Apply(); + clear_state.ApplyColorMask(); + clear_state.ApplyDepth(); + clear_state.ApplyStencilTest(); + clear_state.ApplyViewport(); if (use_color) { glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color); @@ -756,6 +750,7 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) { return; } res_cache.FlushRegion(addr, size); + global_cache.FlushRegion(addr, size); } void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { @@ -812,92 +807,6 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, return true; } -void RasterizerOpenGL::SamplerInfo::Create() { - sampler.Create(); - mag_filter = Tegra::Texture::TextureFilter::Linear; - min_filter = Tegra::Texture::TextureFilter::Linear; - wrap_u = Tegra::Texture::WrapMode::Wrap; - wrap_v = Tegra::Texture::WrapMode::Wrap; - wrap_p = Tegra::Texture::WrapMode::Wrap; - use_depth_compare = false; - depth_compare_func = Tegra::Texture::DepthCompareFunc::Never; - - // OpenGL's default is GL_LINEAR_MIPMAP_LINEAR - glSamplerParameteri(sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - glSamplerParameteri(sampler.handle, GL_TEXTURE_COMPARE_FUNC, GL_NEVER); - - // Other attributes have correct defaults -} - -void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntry& config) { - const GLuint sampler_id = sampler.handle; - if (mag_filter != config.mag_filter) { - mag_filter = config.mag_filter; - glSamplerParameteri( - sampler_id, GL_TEXTURE_MAG_FILTER, - MaxwellToGL::TextureFilterMode(mag_filter, Tegra::Texture::TextureMipmapFilter::None)); - } - if (min_filter != config.min_filter || mipmap_filter != config.mipmap_filter) { - min_filter = config.min_filter; - mipmap_filter = config.mipmap_filter; - glSamplerParameteri(sampler_id, GL_TEXTURE_MIN_FILTER, - MaxwellToGL::TextureFilterMode(min_filter, mipmap_filter)); - } - - if (wrap_u != config.wrap_u) { - wrap_u = config.wrap_u; - glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(wrap_u)); - } - if (wrap_v != config.wrap_v) { - wrap_v = config.wrap_v; - glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(wrap_v)); - } - if (wrap_p != config.wrap_p) { - wrap_p = config.wrap_p; - glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(wrap_p)); - } - - if (const bool enabled = config.depth_compare_enabled == 1; use_depth_compare != enabled) { - use_depth_compare = enabled; - glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_MODE, - use_depth_compare ? GL_COMPARE_REF_TO_TEXTURE : GL_NONE); - } - - if (depth_compare_func != config.depth_compare_func) { - depth_compare_func = config.depth_compare_func; - glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_FUNC, - MaxwellToGL::DepthCompareFunc(depth_compare_func)); - } - - if (const auto new_border_color = config.GetBorderColor(); border_color != new_border_color) { - border_color = new_border_color; - glSamplerParameterfv(sampler_id, GL_TEXTURE_BORDER_COLOR, border_color.data()); - } - - if (const float anisotropic = config.GetMaxAnisotropy(); max_anisotropic != anisotropic) { - max_anisotropic = anisotropic; - if (GLAD_GL_ARB_texture_filter_anisotropic) { - glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY, max_anisotropic); - } else if (GLAD_GL_EXT_texture_filter_anisotropic) { - glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY_EXT, max_anisotropic); - } - } - - if (const float min = config.GetMinLod(); min_lod != min) { - min_lod = min; - glSamplerParameterf(sampler_id, GL_TEXTURE_MIN_LOD, min_lod); - } - if (const float max = config.GetMaxLod(); max_lod != max) { - max_lod = max; - glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_LOD, max_lod); - } - - if (const float bias = config.GetLodBias(); lod_bias != bias) { - lod_bias = bias; - glSamplerParameterf(sampler_id, GL_TEXTURE_LOD_BIAS, lod_bias); - } -} - void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader, GLuint program_handle, BaseBindings base_bindings) { @@ -953,6 +862,9 @@ void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::Shade for (std::size_t bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { const auto& entry{entries[bindpoint]}; const auto& region{global_cache.GetGlobalRegion(entry, stage)}; + if (entry.IsWritten()) { + region->MarkAsModified(true, global_cache); + } bind_ssbo_pushbuffer.Push(region->GetBufferHandle(), 0, static_cast<GLsizeiptr>(region->GetSizeInBytes())); } @@ -970,10 +882,18 @@ void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& s for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { const auto& entry = entries[bindpoint]; - const auto texture = maxwell3d.GetStageTexture(stage, entry.GetOffset()); + Tegra::Texture::FullTextureInfo texture; + if (entry.IsBindless()) { + const auto cbuf = entry.GetBindlessCBuf(); + Tegra::Texture::TextureHandle tex_handle; + tex_handle.raw = maxwell3d.AccessConstBuffer32(stage, cbuf.first, cbuf.second); + texture = maxwell3d.GetTextureInfo(tex_handle, entry.GetOffset()); + } else { + texture = maxwell3d.GetStageTexture(stage, entry.GetOffset()); + } const u32 current_bindpoint = base_bindings.sampler + bindpoint; - texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc); + state.texture_units[current_bindpoint].sampler = sampler_cache.GetSampler(texture.tsc); if (Surface surface = res_cache.GetTextureSurface(texture, entry); surface) { state.texture_units[current_bindpoint].texture = diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index e4c64ae71..a0e056142 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -25,6 +25,7 @@ #include "video_core/renderer_opengl/gl_primitive_assembler.h" #include "video_core/renderer_opengl/gl_rasterizer_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_sampler_cache.h" #include "video_core/renderer_opengl/gl_shader_cache.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_state.h" @@ -71,39 +72,7 @@ public: static_assert(MaxConstbufferSize % sizeof(GLvec4) == 0, "The maximum size of a constbuffer must be a multiple of the size of GLvec4"); - static constexpr std::size_t MaxGlobalMemorySize = 0x10000; - static_assert(MaxGlobalMemorySize % sizeof(float) == 0, - "The maximum size of a global memory must be a multiple of the size of float"); - private: - class SamplerInfo { - public: - OGLSampler sampler; - - /// Creates the sampler object, initializing its state so that it's in sync with the - /// SamplerInfo struct. - void Create(); - /// Syncs the sampler object with the config, updating any necessary state. - void SyncWithConfig(const Tegra::Texture::TSCEntry& info); - - private: - Tegra::Texture::TextureFilter mag_filter = Tegra::Texture::TextureFilter::Nearest; - Tegra::Texture::TextureFilter min_filter = Tegra::Texture::TextureFilter::Nearest; - Tegra::Texture::TextureMipmapFilter mipmap_filter = - Tegra::Texture::TextureMipmapFilter::None; - Tegra::Texture::WrapMode wrap_u = Tegra::Texture::WrapMode::ClampToEdge; - Tegra::Texture::WrapMode wrap_v = Tegra::Texture::WrapMode::ClampToEdge; - Tegra::Texture::WrapMode wrap_p = Tegra::Texture::WrapMode::ClampToEdge; - bool use_depth_compare = false; - Tegra::Texture::DepthCompareFunc depth_compare_func = - Tegra::Texture::DepthCompareFunc::Always; - GLvec4 border_color = {}; - float min_lod = 0.0f; - float max_lod = 16.0f; - float lod_bias = 0.0f; - float max_anisotropic = 1.0f; - }; - struct FramebufferConfigState { bool using_color_fb{}; bool using_depth_fb{}; @@ -208,6 +177,7 @@ private: RasterizerCacheOpenGL res_cache; ShaderCacheOpenGL shader_cache; GlobalRegionCacheOpenGL global_cache; + SamplerCacheOpenGL sampler_cache; Core::System& system; @@ -223,8 +193,6 @@ private: FramebufferConfigState current_framebuffer_config_state; std::pair<bool, bool> current_depth_stencil_usage{}; - std::array<SamplerInfo, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> texture_samplers; - static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; OGLBufferCache buffer_cache; PrimitiveAssembler primitive_assembler{buffer_cache}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index f2ffc4710..7a68b8738 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -281,10 +281,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only, params.component_type = ComponentTypeFromRenderTarget(config.format); params.type = GetFormatType(params.pixel_format); params.width = config.width; - if (!params.is_tiled) { - const u32 bpp = params.GetFormatBpp() / 8; - params.pitch = config.width * bpp; - } + params.pitch = config.pitch; params.height = config.height; params.unaligned_height = config.height; params.target = SurfaceTarget::Texture2D; diff --git a/src/video_core/renderer_opengl/gl_sampler_cache.cpp b/src/video_core/renderer_opengl/gl_sampler_cache.cpp new file mode 100644 index 000000000..3ded5ecea --- /dev/null +++ b/src/video_core/renderer_opengl/gl_sampler_cache.cpp @@ -0,0 +1,52 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/logging/log.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_sampler_cache.h" +#include "video_core/renderer_opengl/maxwell_to_gl.h" + +namespace OpenGL { + +SamplerCacheOpenGL::SamplerCacheOpenGL() = default; + +SamplerCacheOpenGL::~SamplerCacheOpenGL() = default; + +OGLSampler SamplerCacheOpenGL::CreateSampler(const Tegra::Texture::TSCEntry& tsc) const { + OGLSampler sampler; + sampler.Create(); + + const GLuint sampler_id{sampler.handle}; + glSamplerParameteri( + sampler_id, GL_TEXTURE_MAG_FILTER, + MaxwellToGL::TextureFilterMode(tsc.mag_filter, Tegra::Texture::TextureMipmapFilter::None)); + glSamplerParameteri(sampler_id, GL_TEXTURE_MIN_FILTER, + MaxwellToGL::TextureFilterMode(tsc.min_filter, tsc.mipmap_filter)); + glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(tsc.wrap_u)); + glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(tsc.wrap_v)); + glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(tsc.wrap_p)); + glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_MODE, + tsc.depth_compare_enabled == 1 ? GL_COMPARE_REF_TO_TEXTURE : GL_NONE); + glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_FUNC, + MaxwellToGL::DepthCompareFunc(tsc.depth_compare_func)); + glSamplerParameterfv(sampler_id, GL_TEXTURE_BORDER_COLOR, tsc.GetBorderColor().data()); + glSamplerParameterf(sampler_id, GL_TEXTURE_MIN_LOD, tsc.GetMinLod()); + glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_LOD, tsc.GetMaxLod()); + glSamplerParameterf(sampler_id, GL_TEXTURE_LOD_BIAS, tsc.GetLodBias()); + if (GLAD_GL_ARB_texture_filter_anisotropic) { + glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY, tsc.GetMaxAnisotropy()); + } else if (GLAD_GL_EXT_texture_filter_anisotropic) { + glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY_EXT, tsc.GetMaxAnisotropy()); + } else if (tsc.GetMaxAnisotropy() != 1) { + LOG_WARNING(Render_OpenGL, "Anisotropy not supported by host GPU driver"); + } + + return sampler; +} + +GLuint SamplerCacheOpenGL::ToSamplerType(const OGLSampler& sampler) const { + return sampler.handle; +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_sampler_cache.h b/src/video_core/renderer_opengl/gl_sampler_cache.h new file mode 100644 index 000000000..defbc2d81 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_sampler_cache.h @@ -0,0 +1,25 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <glad/glad.h> + +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/sampler_cache.h" + +namespace OpenGL { + +class SamplerCacheOpenGL final : public VideoCommon::SamplerCache<GLuint, OGLSampler> { +public: + explicit SamplerCacheOpenGL(); + ~SamplerCacheOpenGL(); + +protected: + OGLSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const; + + GLuint ToSamplerType(const OGLSampler& sampler) const; +}; + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 28e490b3c..445048daf 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -45,8 +45,6 @@ using TextureIR = std::variant<TextureAoffi, TextureArgument>; enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 }; constexpr u32 MAX_CONSTBUFFER_ELEMENTS = static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float)); -constexpr u32 MAX_GLOBALMEMORY_ELEMENTS = - static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize) / sizeof(float); class ShaderWriter { public: @@ -208,8 +206,10 @@ public: for (const auto& sampler : ir.GetSamplers()) { entries.samplers.emplace_back(sampler); } - for (const auto& gmem : ir.GetGlobalMemoryBases()) { - entries.global_memory_entries.emplace_back(gmem.cbuf_index, gmem.cbuf_offset); + for (const auto& gmem_pair : ir.GetGlobalMemory()) { + const auto& [base, usage] = gmem_pair; + entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset, + usage.is_read, usage.is_written); } entries.clip_distances = ir.GetClipDistances(); entries.shader_length = ir.GetLength(); @@ -380,12 +380,22 @@ private: } void DeclareGlobalMemory() { - for (const auto& entry : ir.GetGlobalMemoryBases()) { + for (const auto& gmem : ir.GetGlobalMemory()) { + const auto& [base, usage] = gmem; + + // Since we don't know how the shader will use the shader, hint the driver to disable as + // much optimizations as possible + std::string qualifier = "coherent volatile"; + if (usage.is_read && !usage.is_written) + qualifier += " readonly"; + else if (usage.is_written && !usage.is_read) + qualifier += " writeonly"; + const std::string binding = - fmt::format("GMEM_BINDING_{}_{}", entry.cbuf_index, entry.cbuf_offset); - code.AddLine("layout (std430, binding = " + binding + ") buffer " + - GetGlobalMemoryBlock(entry) + " {"); - code.AddLine(" float " + GetGlobalMemory(entry) + "[MAX_GLOBALMEMORY_ELEMENTS];"); + fmt::format("GMEM_BINDING_{}_{}", base.cbuf_index, base.cbuf_offset); + code.AddLine("layout (std430, binding = " + binding + ") " + qualifier + " buffer " + + GetGlobalMemoryBlock(base) + " {"); + code.AddLine(" float " + GetGlobalMemory(base) + "[];"); code.AddLine("};"); code.AddNewLine(); } @@ -868,6 +878,12 @@ private: } else if (const auto lmem = std::get_if<LmemNode>(dest)) { target = GetLocalMemory() + "[ftou(" + Visit(lmem->GetAddress()) + ") / 4]"; + } else if (const auto gmem = std::get_if<GmemNode>(dest)) { + const std::string real = Visit(gmem->GetRealAddress()); + const std::string base = Visit(gmem->GetBaseAddress()); + const std::string final_offset = "(ftou(" + real + ") - ftou(" + base + ")) / 4"; + target = fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset); + } else { UNREACHABLE_MSG("Assign called without a proper target"); } @@ -1621,9 +1637,7 @@ private: std::string GetCommonDeclarations() { const auto cbuf = std::to_string(MAX_CONSTBUFFER_ELEMENTS); - const auto gmem = std::to_string(MAX_GLOBALMEMORY_ELEMENTS); return "#define MAX_CONSTBUFFER_ELEMENTS " + cbuf + "\n" + - "#define MAX_GLOBALMEMORY_ELEMENTS " + gmem + "\n" + "#define ftoi floatBitsToInt\n" "#define ftou floatBitsToUint\n" "#define itof intBitsToFloat\n" diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index 4e04ab2f8..74032d237 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h @@ -39,8 +39,9 @@ private: class GlobalMemoryEntry { public: - explicit GlobalMemoryEntry(u32 cbuf_index, u32 cbuf_offset) - : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset} {} + explicit GlobalMemoryEntry(u32 cbuf_index, u32 cbuf_offset, bool is_read, bool is_written) + : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset}, is_read{is_read}, is_written{ + is_written} {} u32 GetCbufIndex() const { return cbuf_index; @@ -50,14 +51,25 @@ public: return cbuf_offset; } + bool IsRead() const { + return is_read; + } + + bool IsWritten() const { + return is_written; + } + private: u32 cbuf_index{}; u32 cbuf_offset{}; + bool is_read{}; + bool is_written{}; }; struct ShaderEntries { std::vector<ConstBufferEntry> const_buffers; std::vector<SamplerEntry> samplers; + std::vector<SamplerEntry> bindless_samplers; std::vector<GlobalMemoryEntry> global_memory_entries; std::array<bool, Maxwell::NumClipDistances> clip_distances{}; std::size_t shader_length{}; @@ -68,4 +80,4 @@ std::string GetCommonDeclarations(); ProgramResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage, const std::string& suffix); -} // namespace OpenGL::GLShader
\ No newline at end of file +} // namespace OpenGL::GLShader diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 8a43eb157..53752b38d 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -319,16 +319,19 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn u32 type{}; u8 is_array{}; u8 is_shadow{}; + u8 is_bindless{}; if (file.ReadBytes(&offset, sizeof(u64)) != sizeof(u64) || file.ReadBytes(&index, sizeof(u64)) != sizeof(u64) || file.ReadBytes(&type, sizeof(u32)) != sizeof(u32) || file.ReadBytes(&is_array, sizeof(u8)) != sizeof(u8) || - file.ReadBytes(&is_shadow, sizeof(u8)) != sizeof(u8)) { + file.ReadBytes(&is_shadow, sizeof(u8)) != sizeof(u8) || + file.ReadBytes(&is_bindless, sizeof(u8)) != sizeof(u8)) { return {}; } - entry.entries.samplers.emplace_back( - static_cast<std::size_t>(offset), static_cast<std::size_t>(index), - static_cast<Tegra::Shader::TextureType>(type), is_array != 0, is_shadow != 0); + entry.entries.samplers.emplace_back(static_cast<std::size_t>(offset), + static_cast<std::size_t>(index), + static_cast<Tegra::Shader::TextureType>(type), + is_array != 0, is_shadow != 0, is_bindless != 0); } u32 global_memory_count{}; @@ -337,11 +340,16 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn for (u32 i = 0; i < global_memory_count; ++i) { u32 cbuf_index{}; u32 cbuf_offset{}; + u8 is_read{}; + u8 is_written{}; if (file.ReadBytes(&cbuf_index, sizeof(u32)) != sizeof(u32) || - file.ReadBytes(&cbuf_offset, sizeof(u32)) != sizeof(u32)) { + file.ReadBytes(&cbuf_offset, sizeof(u32)) != sizeof(u32) || + file.ReadBytes(&is_read, sizeof(u8)) != sizeof(u8) || + file.ReadBytes(&is_written, sizeof(u8)) != sizeof(u8)) { return {}; } - entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset); + entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset, is_read != 0, + is_written != 0); } for (auto& clip_distance : entry.entries.clip_distances) { @@ -388,7 +396,8 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(FileUtil::IOFile& file, u64 uniqu file.WriteObject(static_cast<u64>(sampler.GetIndex())) != 1 || file.WriteObject(static_cast<u32>(sampler.GetType())) != 1 || file.WriteObject(static_cast<u8>(sampler.IsArray() ? 1 : 0)) != 1 || - file.WriteObject(static_cast<u8>(sampler.IsShadow() ? 1 : 0)) != 1) { + file.WriteObject(static_cast<u8>(sampler.IsShadow() ? 1 : 0)) != 1 || + file.WriteObject(static_cast<u8>(sampler.IsBindless() ? 1 : 0)) != 1) { return false; } } @@ -397,7 +406,9 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(FileUtil::IOFile& file, u64 uniqu return false; for (const auto& gmem : entries.global_memory_entries) { if (file.WriteObject(static_cast<u32>(gmem.GetCbufIndex())) != 1 || - file.WriteObject(static_cast<u32>(gmem.GetCbufOffset())) != 1) { + file.WriteObject(static_cast<u32>(gmem.GetCbufOffset())) != 1 || + file.WriteObject(static_cast<u8>(gmem.IsRead() ? 1 : 0)) != 1 || + file.WriteObject(static_cast<u8>(gmem.IsWritten() ? 1 : 0)) != 1) { return false; } } diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp index ed3178f09..801826d3d 100644 --- a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp @@ -7,7 +7,6 @@ #include <unordered_map> #include "common/assert.h" -#include "common/cityhash.h" #include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/vk_sampler_cache.h" @@ -28,39 +27,20 @@ static std::optional<vk::BorderColor> TryConvertBorderColor(std::array<float, 4> } } -std::size_t SamplerCacheKey::Hash() const { - static_assert(sizeof(raw) % sizeof(u64) == 0); - return static_cast<std::size_t>( - Common::CityHash64(reinterpret_cast<const char*>(raw.data()), sizeof(raw) / sizeof(u64))); -} - -bool SamplerCacheKey::operator==(const SamplerCacheKey& rhs) const { - return raw == rhs.raw; -} - VKSamplerCache::VKSamplerCache(const VKDevice& device) : device{device} {} VKSamplerCache::~VKSamplerCache() = default; -vk::Sampler VKSamplerCache::GetSampler(const Tegra::Texture::TSCEntry& tsc) { - const auto [entry, is_cache_miss] = cache.try_emplace(SamplerCacheKey{tsc}); - auto& sampler = entry->second; - if (is_cache_miss) { - sampler = CreateSampler(tsc); - } - return *sampler; -} - -UniqueSampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) { - const float max_anisotropy = tsc.GetMaxAnisotropy(); - const bool has_anisotropy = max_anisotropy > 1.0f; +UniqueSampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) const { + const float max_anisotropy{tsc.GetMaxAnisotropy()}; + const bool has_anisotropy{max_anisotropy > 1.0f}; - const auto border_color = tsc.GetBorderColor(); - const auto vk_border_color = TryConvertBorderColor(border_color); + const auto border_color{tsc.GetBorderColor()}; + const auto vk_border_color{TryConvertBorderColor(border_color)}; UNIMPLEMENTED_IF_MSG(!vk_border_color, "Unimplemented border color {} {} {} {}", border_color[0], border_color[1], border_color[2], border_color[3]); - constexpr bool unnormalized_coords = false; + constexpr bool unnormalized_coords{false}; const vk::SamplerCreateInfo sampler_ci( {}, MaxwellToVK::Sampler::Filter(tsc.mag_filter), @@ -73,9 +53,13 @@ UniqueSampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) tsc.GetMaxLod(), vk_border_color.value_or(vk::BorderColor::eFloatTransparentBlack), unnormalized_coords); - const auto& dld = device.GetDispatchLoader(); - const auto dev = device.GetLogical(); + const auto& dld{device.GetDispatchLoader()}; + const auto dev{device.GetLogical()}; return dev.createSamplerUnique(sampler_ci, nullptr, dld); } +vk::Sampler VKSamplerCache::ToSamplerType(const UniqueSampler& sampler) const { + return *sampler; +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.h b/src/video_core/renderer_vulkan/vk_sampler_cache.h index c6394dc87..771b05c73 100644 --- a/src/video_core/renderer_vulkan/vk_sampler_cache.h +++ b/src/video_core/renderer_vulkan/vk_sampler_cache.h @@ -8,49 +8,25 @@ #include "common/common_types.h" #include "video_core/renderer_vulkan/declarations.h" +#include "video_core/sampler_cache.h" #include "video_core/textures/texture.h" namespace Vulkan { class VKDevice; -struct SamplerCacheKey final : public Tegra::Texture::TSCEntry { - std::size_t Hash() const; - - bool operator==(const SamplerCacheKey& rhs) const; - - bool operator!=(const SamplerCacheKey& rhs) const { - return !operator==(rhs); - } -}; - -} // namespace Vulkan - -namespace std { - -template <> -struct hash<Vulkan::SamplerCacheKey> { - std::size_t operator()(const Vulkan::SamplerCacheKey& k) const noexcept { - return k.Hash(); - } -}; - -} // namespace std - -namespace Vulkan { - -class VKSamplerCache { +class VKSamplerCache final : public VideoCommon::SamplerCache<vk::Sampler, UniqueSampler> { public: explicit VKSamplerCache(const VKDevice& device); ~VKSamplerCache(); - vk::Sampler GetSampler(const Tegra::Texture::TSCEntry& tsc); +protected: + UniqueSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const; -private: - UniqueSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc); + vk::Sampler ToSamplerType(const UniqueSampler& sampler) const; +private: const VKDevice& device; - std::unordered_map<SamplerCacheKey, UniqueSampler> cache; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index e0a6f5e87..25500f9a3 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -191,8 +191,9 @@ public: for (const auto& cbuf : ir.GetConstantBuffers()) { entries.const_buffers.emplace_back(cbuf.second, cbuf.first); } - for (const auto& gmem : ir.GetGlobalMemoryBases()) { - entries.global_buffers.emplace_back(gmem.cbuf_index, gmem.cbuf_offset); + for (const auto& gmem_pair : ir.GetGlobalMemory()) { + const auto& [base, usage] = gmem_pair; + entries.global_buffers.emplace_back(base.cbuf_index, base.cbuf_offset); } for (const auto& sampler : ir.GetSamplers()) { entries.samplers.emplace_back(sampler); @@ -225,7 +226,7 @@ private: return current_binding; }; const_buffers_base_binding = Allocate(ir.GetConstantBuffers().size()); - global_buffers_base_binding = Allocate(ir.GetGlobalMemoryBases().size()); + global_buffers_base_binding = Allocate(ir.GetGlobalMemory().size()); samplers_base_binding = Allocate(ir.GetSamplers().size()); ASSERT_MSG(binding_iterator - binding_base < STAGE_BINDING_STRIDE, @@ -390,14 +391,15 @@ private: void DeclareGlobalBuffers() { u32 binding = global_buffers_base_binding; - for (const auto& entry : ir.GetGlobalMemoryBases()) { + for (const auto& entry : ir.GetGlobalMemory()) { + const auto [base, usage] = entry; const Id id = OpVariable(t_gmem_ssbo, spv::StorageClass::StorageBuffer); AddGlobalVariable( - Name(id, fmt::format("gmem_{}_{}", entry.cbuf_index, entry.cbuf_offset))); + Name(id, fmt::format("gmem_{}_{}", base.cbuf_index, base.cbuf_offset))); Decorate(id, spv::Decoration::Binding, binding++); Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); - global_buffers.emplace(entry, id); + global_buffers.emplace(base, id); } } diff --git a/src/video_core/sampler_cache.cpp b/src/video_core/sampler_cache.cpp new file mode 100644 index 000000000..53c7ef12d --- /dev/null +++ b/src/video_core/sampler_cache.cpp @@ -0,0 +1,21 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/cityhash.h" +#include "common/common_types.h" +#include "video_core/sampler_cache.h" + +namespace VideoCommon { + +std::size_t SamplerCacheKey::Hash() const { + static_assert(sizeof(raw) % sizeof(u64) == 0); + return static_cast<std::size_t>( + Common::CityHash64(reinterpret_cast<const char*>(raw.data()), sizeof(raw) / sizeof(u64))); +} + +bool SamplerCacheKey::operator==(const SamplerCacheKey& rhs) const { + return raw == rhs.raw; +} + +} // namespace VideoCommon diff --git a/src/video_core/sampler_cache.h b/src/video_core/sampler_cache.h new file mode 100644 index 000000000..cbe3ad071 --- /dev/null +++ b/src/video_core/sampler_cache.h @@ -0,0 +1,60 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <cstddef> +#include <unordered_map> + +#include "video_core/textures/texture.h" + +namespace VideoCommon { + +struct SamplerCacheKey final : public Tegra::Texture::TSCEntry { + std::size_t Hash() const; + + bool operator==(const SamplerCacheKey& rhs) const; + + bool operator!=(const SamplerCacheKey& rhs) const { + return !operator==(rhs); + } +}; + +} // namespace VideoCommon + +namespace std { + +template <> +struct hash<VideoCommon::SamplerCacheKey> { + std::size_t operator()(const VideoCommon::SamplerCacheKey& k) const noexcept { + return k.Hash(); + } +}; + +} // namespace std + +namespace VideoCommon { + +template <typename SamplerType, typename SamplerStorageType> +class SamplerCache { +public: + SamplerType GetSampler(const Tegra::Texture::TSCEntry& tsc) { + const auto [entry, is_cache_miss] = cache.try_emplace(SamplerCacheKey{tsc}); + auto& sampler = entry->second; + if (is_cache_miss) { + sampler = CreateSampler(tsc); + } + return ToSamplerType(sampler); + } + +protected: + virtual SamplerStorageType CreateSampler(const Tegra::Texture::TSCEntry& tsc) const = 0; + + virtual SamplerType ToSamplerType(const SamplerStorageType& sampler) const = 0; + +private: + std::unordered_map<SamplerCacheKey, SamplerStorageType> cache; +}; + +} // namespace VideoCommon
\ No newline at end of file diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index ea3c71eed..ea1092db1 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -8,6 +8,7 @@ #include "common/assert.h" #include "common/common_types.h" +#include "common/logging/log.h" #include "video_core/engines/shader_bytecode.h" #include "video_core/shader/shader_ir.h" @@ -18,6 +19,23 @@ using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; using Tegra::Shader::Register; +namespace { +u32 GetUniformTypeElementsCount(Tegra::Shader::UniformType uniform_type) { + switch (uniform_type) { + case Tegra::Shader::UniformType::Single: + return 1; + case Tegra::Shader::UniformType::Double: + return 2; + case Tegra::Shader::UniformType::Quad: + case Tegra::Shader::UniformType::UnsignedQuad: + return 4; + default: + UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type)); + return 1; + } +} +} // namespace + u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); @@ -85,8 +103,8 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { break; } case OpCode::Id::LD_L: { - UNIMPLEMENTED_IF_MSG(instr.ld_l.unknown == 1, "LD_L Unhandled mode: {}", - static_cast<u32>(instr.ld_l.unknown.Value())); + LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}", + static_cast<u64>(instr.ld_l.unknown.Value())); const auto GetLmem = [&](s32 offset) { ASSERT(offset % 4 == 0); @@ -126,45 +144,15 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { break; } case OpCode::Id::LDG: { - const u32 count = [&]() { - switch (instr.ldg.type) { - case Tegra::Shader::UniformType::Single: - return 1; - case Tegra::Shader::UniformType::Double: - return 2; - case Tegra::Shader::UniformType::Quad: - case Tegra::Shader::UniformType::UnsignedQuad: - return 4; - default: - UNIMPLEMENTED_MSG("Unimplemented LDG size!"); - return 1; - } - }(); - - const Node addr_register = GetRegister(instr.gpr8); - const Node base_address = - TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size())); - const auto cbuf = std::get_if<CbufNode>(base_address); - ASSERT(cbuf != nullptr); - const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset()); - ASSERT(cbuf_offset_imm != nullptr); - const auto cbuf_offset = cbuf_offset_imm->GetValue(); - - bb.push_back(Comment( - fmt::format("Base address is c[0x{:x}][0x{:x}]", cbuf->GetIndex(), cbuf_offset))); - - const GlobalMemoryBase descriptor{cbuf->GetIndex(), cbuf_offset}; - used_global_memory_bases.insert(descriptor); - - const Node immediate_offset = - Immediate(static_cast<u32>(instr.ldg.immediate_offset.Value())); - const Node base_real_address = - Operation(OperationCode::UAdd, NO_PRECISE, immediate_offset, addr_register); + const auto [real_address_base, base_address, descriptor] = + TrackAndGetGlobalMemory(bb, GetRegister(instr.gpr8), + static_cast<u32>(instr.ldg.immediate_offset.Value()), false); + const u32 count = GetUniformTypeElementsCount(instr.ldg.type); for (u32 i = 0; i < count; ++i) { const Node it_offset = Immediate(i * 4); const Node real_address = - Operation(OperationCode::UAdd, NO_PRECISE, base_real_address, it_offset); + Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor)); SetTemporal(bb, i, gmem); @@ -174,6 +162,28 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { } break; } + case OpCode::Id::STG: { + const auto [real_address_base, base_address, descriptor] = + TrackAndGetGlobalMemory(bb, GetRegister(instr.gpr8), + static_cast<u32>(instr.stg.immediate_offset.Value()), true); + + // Encode in temporary registers like this: real_base_address, {registers_to_be_written...} + SetTemporal(bb, 0, real_address_base); + + const u32 count = GetUniformTypeElementsCount(instr.stg.type); + for (u32 i = 0; i < count; ++i) { + SetTemporal(bb, i + 1, GetRegister(instr.gpr0.Value() + i)); + } + for (u32 i = 0; i < count; ++i) { + const Node it_offset = Immediate(i * 4); + const Node real_address = + Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); + const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor)); + + bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporal(i + 1))); + } + break; + } case OpCode::Id::ST_A: { UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, "Indirect attribute loads are not supported"); @@ -205,8 +215,8 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { break; } case OpCode::Id::ST_L: { - UNIMPLEMENTED_IF_MSG(instr.st_l.unknown == 0, "ST_L Unhandled mode: {}", - static_cast<u32>(instr.st_l.unknown.Value())); + LOG_DEBUG(HW_GPU, "ST_L cache management mode: {}", + static_cast<u64>(instr.st_l.cache_management.Value())); const auto GetLmemAddr = [&](s32 offset) { ASSERT(offset % 4 == 0); @@ -236,4 +246,34 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { return pc; } +std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackAndGetGlobalMemory(NodeBlock& bb, + Node addr_register, + u32 immediate_offset, + bool is_write) { + const Node base_address{ + TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()))}; + const auto cbuf = std::get_if<CbufNode>(base_address); + ASSERT(cbuf != nullptr); + const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset()); + ASSERT(cbuf_offset_imm != nullptr); + const auto cbuf_offset = cbuf_offset_imm->GetValue(); + + bb.push_back( + Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", cbuf->GetIndex(), cbuf_offset))); + + const GlobalMemoryBase descriptor{cbuf->GetIndex(), cbuf_offset}; + const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor); + auto& usage = entry->second; + if (is_write) { + usage.is_written = true; + } else { + usage.is_read = true; + } + + const auto real_address = + Operation(OperationCode::UAdd, NO_PRECISE, Immediate(immediate_offset), addr_register); + + return {real_address, base_address, descriptor}; +} + } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index a775b402b..fa65ac9a9 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -40,7 +40,7 @@ static std::size_t GetCoordCount(TextureType texture_type) { u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - + bool is_bindless = false; switch (opcode->get().GetId()) { case OpCode::Id::TEX: { if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) { @@ -54,7 +54,25 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { const auto process_mode = instr.tex.GetTextureProcessMode(); WriteTexInstructionFloat( bb, instr, - GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, is_aoffi)); + GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, is_aoffi, {})); + break; + } + case OpCode::Id::TEX_B: { + UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI), + "AOFFI is not implemented"); + + if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) { + LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete"); + } + + const TextureType texture_type{instr.tex_b.texture_type}; + const bool is_array = instr.tex_b.array != 0; + const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI); + const bool depth_compare = instr.tex_b.UsesMiscMode(TextureMiscMode::DC); + const auto process_mode = instr.tex_b.GetTextureProcessMode(); + WriteTexInstructionFloat(bb, instr, + GetTexCode(instr, texture_type, process_mode, depth_compare, + is_array, is_aoffi, {instr.gpr20})); break; } case OpCode::Id::TEXS: { @@ -134,6 +152,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { WriteTexsInstructionFloat(bb, instr, values); break; } + case OpCode::Id::TXQ_B: + is_bindless = true; + [[fallthrough]]; case OpCode::Id::TXQ: { if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) { LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete"); @@ -143,7 +164,10 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { // Sadly, not all texture instructions specify the type of texture their sampler // uses. This must be fixed at a later instance. const auto& sampler = - GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false); + is_bindless + ? GetBindlessSampler(instr.gpr8, Tegra::Shader::TextureType::Texture2D, false, + false) + : GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false); u32 indexer = 0; switch (instr.txq.query_type) { @@ -154,7 +178,8 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { } MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element}; const Node value = - Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8)); + Operation(OperationCode::TextureQueryDimensions, meta, + GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0))); SetTemporal(bb, indexer++, value); } for (u32 i = 0; i < indexer; ++i) { @@ -168,6 +193,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { } break; } + case OpCode::Id::TMML_B: + is_bindless = true; + [[fallthrough]]; case OpCode::Id::TMML: { UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV), "NDV is not implemented"); @@ -178,7 +206,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { auto texture_type = instr.tmml.texture_type.Value(); const bool is_array = instr.tmml.array != 0; - const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); + const auto& sampler = is_bindless + ? GetBindlessSampler(instr.gpr20, texture_type, is_array, false) + : GetSampler(instr.sampler, texture_type, is_array, false); std::vector<Node> coords; @@ -199,17 +229,19 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { coords.push_back(GetRegister(instr.gpr8.Value() + 1)); texture_type = TextureType::Texture2D; } - + u32 indexer = 0; for (u32 element = 0; element < 2; ++element) { + if (!instr.tmml.IsComponentEnabled(element)) { + continue; + } auto params = coords; MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element}; const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); - SetTemporal(bb, element, value); + SetTemporal(bb, indexer++, value); } - for (u32 element = 0; element < 2; ++element) { - SetRegister(bb, instr.gpr0.Value() + element, GetTemporal(element)); + for (u32 i = 0; i < indexer; ++i) { + SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); } - break; } case OpCode::Id::TLDS: { @@ -254,6 +286,34 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, Textu return *used_samplers.emplace(entry).first; } +const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, TextureType type, + bool is_array, bool is_shadow) { + const Node sampler_register = GetRegister(reg); + const Node base_sampler = + TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size())); + const auto cbuf = std::get_if<CbufNode>(base_sampler); + const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset()); + ASSERT(cbuf_offset_imm != nullptr); + const auto cbuf_offset = cbuf_offset_imm->GetValue(); + const auto cbuf_index = cbuf->GetIndex(); + const u64 cbuf_key = (cbuf_index << 32) | cbuf_offset; + + // If this sampler has already been used, return the existing mapping. + const auto itr = + std::find_if(used_samplers.begin(), used_samplers.end(), + [&](const Sampler& entry) { return entry.GetOffset() == cbuf_key; }); + if (itr != used_samplers.end()) { + ASSERT(itr->GetType() == type && itr->IsArray() == is_array && + itr->IsShadow() == is_shadow); + return *itr; + } + + // Otherwise create a new mapping for this sampler + const std::size_t next_index = used_samplers.size(); + const Sampler entry{cbuf_index, cbuf_offset, next_index, type, is_array, is_shadow}; + return *used_samplers.emplace(entry).first; +} + void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) { u32 dest_elem = 0; for (u32 elem = 0; elem < 4; ++elem) { @@ -326,22 +386,27 @@ void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr, Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, TextureProcessMode process_mode, std::vector<Node> coords, Node array, Node depth_compare, u32 bias_offset, - std::vector<Node> aoffi) { + std::vector<Node> aoffi, + std::optional<Tegra::Shader::Register> bindless_reg) { const bool is_array = array; const bool is_shadow = depth_compare; + const bool is_bindless = bindless_reg.has_value(); UNIMPLEMENTED_IF_MSG((texture_type == TextureType::Texture3D && (is_array || is_shadow)) || (texture_type == TextureType::TextureCube && is_array && is_shadow), "This method is not supported."); - const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, is_shadow); + const auto& sampler = is_bindless + ? GetBindlessSampler(*bindless_reg, texture_type, is_array, is_shadow) + : GetSampler(instr.sampler, texture_type, is_array, is_shadow); const bool lod_needed = process_mode == TextureProcessMode::LZ || process_mode == TextureProcessMode::LL || process_mode == TextureProcessMode::LLA; - // LOD selection (either via bias or explicit textureLod) not supported in GL for - // sampler2DArrayShadow and samplerCubeArrayShadow. + // LOD selection (either via bias or explicit textureLod) not + // supported in GL for sampler2DArrayShadow and + // samplerCubeArrayShadow. const bool gl_lod_supported = !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && is_shadow) || (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && is_shadow)); @@ -359,8 +424,9 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, lod = Immediate(0.0f); break; case TextureProcessMode::LB: - // If present, lod or bias are always stored in the register indexed by the gpr20 - // field with an offset depending on the usage of the other registers + // If present, lod or bias are always stored in the register + // indexed by the gpr20 field with an offset depending on the + // usage of the other registers bias = GetRegister(instr.gpr20.Value() + bias_offset); break; case TextureProcessMode::LL: @@ -384,11 +450,18 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, TextureProcessMode process_mode, bool depth_compare, bool is_array, - bool is_aoffi) { + bool is_aoffi, std::optional<Tegra::Shader::Register> bindless_reg) { const bool lod_bias_enabled{ (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ)}; + const bool is_bindless = bindless_reg.has_value(); + u64 parameter_register = instr.gpr20.Value(); + if (is_bindless) { + ++parameter_register; + } + + const u32 bias_lod_offset = (is_bindless ? 1 : 0); if (lod_bias_enabled) { ++parameter_register; } @@ -423,7 +496,8 @@ Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, dc = GetRegister(parameter_register++); } - return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0, aoffi); + return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_lod_offset, + aoffi, bindless_reg); } Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, @@ -459,7 +533,8 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, dc = GetRegister(depth_register); } - return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset, {}); + return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset, {}, + {}); } Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 4888998d3..57af8b10f 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -196,9 +196,23 @@ enum class ExitMethod { class Sampler { public: + // Use this constructor for bounded Samplers explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type, bool is_array, bool is_shadow) - : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow} {} + : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow}, + is_bindless{false} {} + + // Use this constructor for bindless Samplers + explicit Sampler(u32 cbuf_index, u32 cbuf_offset, std::size_t index, + Tegra::Shader::TextureType type, bool is_array, bool is_shadow) + : offset{(static_cast<u64>(cbuf_index) << 32) | cbuf_offset}, index{index}, type{type}, + is_array{is_array}, is_shadow{is_shadow}, is_bindless{true} {} + + // Use this only for serialization/deserialization + explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type, + bool is_array, bool is_shadow, bool is_bindless) + : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow}, + is_bindless{is_bindless} {} std::size_t GetOffset() const { return offset; @@ -220,6 +234,14 @@ public: return is_shadow; } + bool IsBindless() const { + return is_bindless; + } + + std::pair<u32, u32> GetBindlessCBuf() const { + return {static_cast<u32>(offset >> 32), static_cast<u32>(offset)}; + } + bool operator<(const Sampler& rhs) const { return std::tie(offset, index, type, is_array, is_shadow) < std::tie(rhs.offset, rhs.index, rhs.type, rhs.is_array, rhs.is_shadow); @@ -231,8 +253,9 @@ private: std::size_t offset{}; std::size_t index{}; ///< Value used to index into the generated GLSL sampler array. Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) - bool is_array{}; ///< Whether the texture is being sampled as an array texture or not. - bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not. + bool is_array{}; ///< Whether the texture is being sampled as an array texture or not. + bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not. + bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not. }; class ConstBuffer { @@ -276,6 +299,11 @@ struct GlobalMemoryBase { } }; +struct GlobalMemoryUsage { + bool is_read{}; + bool is_written{}; +}; + struct MetaArithmetic { bool precise{}; }; @@ -578,8 +606,8 @@ public: return used_clip_distances; } - const std::set<GlobalMemoryBase>& GetGlobalMemoryBases() const { - return used_global_memory_bases; + const std::map<GlobalMemoryBase, GlobalMemoryUsage>& GetGlobalMemory() const { + return used_global_memory; } std::size_t GetLength() const { @@ -730,6 +758,11 @@ private: const Sampler& GetSampler(const Tegra::Shader::Sampler& sampler, Tegra::Shader::TextureType type, bool is_array, bool is_shadow); + // Accesses a texture sampler for a bindless texture. + const Sampler& GetBindlessSampler(const Tegra::Shader::Register& reg, + Tegra::Shader::TextureType type, bool is_array, + bool is_shadow); + /// Extracts a sequence of bits from a node Node BitfieldExtract(Node value, u32 offset, u32 bits); @@ -743,7 +776,8 @@ private: Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, - bool is_array, bool is_aoffi); + bool is_array, bool is_aoffi, + std::optional<Tegra::Shader::Register> bindless_reg); Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, @@ -763,7 +797,8 @@ private: Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords, - Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi); + Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi, + std::optional<Tegra::Shader::Register> bindless_reg); Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type, u64 byte_height); @@ -781,6 +816,11 @@ private: std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor); + std::tuple<Node, Node, GlobalMemoryBase> TrackAndGetGlobalMemory(NodeBlock& bb, + Node addr_register, + u32 immediate_offset, + bool is_write); + template <typename... T> Node Operation(OperationCode code, const T*... operands) { return StoreNode(OperationNode(code, operands...)); @@ -834,7 +874,7 @@ private: std::map<u32, ConstBuffer> used_cbufs; std::set<Sampler> used_samplers; std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{}; - std::set<GlobalMemoryBase> used_global_memory_bases; + std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory; Tegra::Shader::Header header; }; |