summaryrefslogtreecommitdiffstats
path: root/src/video_core/renderer_opengl/gl_shader_cache.cpp
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp359
1 files changed, 212 insertions, 147 deletions
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index ac8a9e6b7..42ca3b1bd 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -23,13 +23,13 @@ namespace OpenGL {
using VideoCommon::Shader::ProgramCode;
-// One UBO is always reserved for emulation values
-constexpr u32 RESERVED_UBOS = 1;
+// One UBO is always reserved for emulation values on staged shaders
+constexpr u32 STAGE_RESERVED_UBOS = 1;
struct UnspecializedShader {
std::string code;
GLShader::ShaderEntries entries;
- Maxwell::ShaderProgram program_type;
+ ProgramType program_type;
};
namespace {
@@ -55,15 +55,17 @@ ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr g
}
/// Gets the shader type from a Maxwell program type
-constexpr GLenum GetShaderType(Maxwell::ShaderProgram program_type) {
+constexpr GLenum GetShaderType(ProgramType program_type) {
switch (program_type) {
- case Maxwell::ShaderProgram::VertexA:
- case Maxwell::ShaderProgram::VertexB:
+ case ProgramType::VertexA:
+ case ProgramType::VertexB:
return GL_VERTEX_SHADER;
- case Maxwell::ShaderProgram::Geometry:
+ case ProgramType::Geometry:
return GL_GEOMETRY_SHADER;
- case Maxwell::ShaderProgram::Fragment:
+ case ProgramType::Fragment:
return GL_FRAGMENT_SHADER;
+ case ProgramType::Compute:
+ return GL_COMPUTE_SHADER;
default:
return GL_NONE;
}
@@ -100,18 +102,44 @@ constexpr std::tuple<const char*, const char*, u32> GetPrimitiveDescription(GLen
}
}
+ProgramType GetProgramType(Maxwell::ShaderProgram program) {
+ switch (program) {
+ case Maxwell::ShaderProgram::VertexA:
+ return ProgramType::VertexA;
+ case Maxwell::ShaderProgram::VertexB:
+ return ProgramType::VertexB;
+ case Maxwell::ShaderProgram::TesselationControl:
+ return ProgramType::TessellationControl;
+ case Maxwell::ShaderProgram::TesselationEval:
+ return ProgramType::TessellationEval;
+ case Maxwell::ShaderProgram::Geometry:
+ return ProgramType::Geometry;
+ case Maxwell::ShaderProgram::Fragment:
+ return ProgramType::Fragment;
+ }
+ UNREACHABLE();
+ return {};
+}
+
/// Calculates the size of a program stream
std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) {
constexpr std::size_t start_offset = 10;
+ // This is the encoded version of BRA that jumps to itself. All Nvidia
+ // shaders end with one.
+ constexpr u64 self_jumping_branch = 0xE2400FFFFF07000FULL;
+ constexpr u64 mask = 0xFFFFFFFFFF7FFFFFULL;
std::size_t offset = start_offset;
std::size_t size = start_offset * sizeof(u64);
while (offset < program.size()) {
const u64 instruction = program[offset];
if (!IsSchedInstruction(offset, start_offset)) {
- if (instruction == 0 || (instruction >> 52) == 0x50b) {
+ if ((instruction & mask) == self_jumping_branch) {
// End on Maxwell's "nop" instruction
break;
}
+ if (instruction == 0) {
+ break;
+ }
}
size += sizeof(u64);
offset++;
@@ -121,11 +149,13 @@ std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) {
}
/// Hashes one (or two) program streams
-u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode& code,
- const ProgramCode& code_b) {
- u64 unique_identifier =
- Common::CityHash64(reinterpret_cast<const char*>(code.data()), CalculateProgramSize(code));
- if (program_type != Maxwell::ShaderProgram::VertexA) {
+u64 GetUniqueIdentifier(ProgramType program_type, const ProgramCode& code,
+ const ProgramCode& code_b, std::size_t size_a = 0, std::size_t size_b = 0) {
+ if (size_a == 0) {
+ size_a = CalculateProgramSize(code);
+ }
+ u64 unique_identifier = Common::CityHash64(reinterpret_cast<const char*>(code.data()), size_a);
+ if (program_type != ProgramType::VertexA) {
return unique_identifier;
}
// VertexA programs include two programs
@@ -133,46 +163,70 @@ u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode&
std::size_t seed = 0;
boost::hash_combine(seed, unique_identifier);
- const u64 identifier_b = Common::CityHash64(reinterpret_cast<const char*>(code_b.data()),
- CalculateProgramSize(code_b));
+ if (size_b == 0) {
+ size_b = CalculateProgramSize(code_b);
+ }
+ const u64 identifier_b =
+ Common::CityHash64(reinterpret_cast<const char*>(code_b.data()), size_b);
boost::hash_combine(seed, identifier_b);
return static_cast<u64>(seed);
}
/// Creates an unspecialized program from code streams
-GLShader::ProgramResult CreateProgram(const Device& device, Maxwell::ShaderProgram program_type,
+GLShader::ProgramResult CreateProgram(const Device& device, ProgramType program_type,
ProgramCode program_code, ProgramCode program_code_b) {
GLShader::ShaderSetup setup(program_code);
- if (program_type == Maxwell::ShaderProgram::VertexA) {
+ setup.program.size_a = CalculateProgramSize(program_code);
+ setup.program.size_b = 0;
+ if (program_type == ProgramType::VertexA) {
// VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders.
// Conventional HW does not support this, so we combine VertexA and VertexB into one
// stage here.
setup.SetProgramB(program_code_b);
+ setup.program.size_b = CalculateProgramSize(program_code_b);
}
- setup.program.unique_identifier =
- GetUniqueIdentifier(program_type, program_code, program_code_b);
+ setup.program.unique_identifier = GetUniqueIdentifier(
+ program_type, program_code, program_code_b, setup.program.size_a, setup.program.size_b);
switch (program_type) {
- case Maxwell::ShaderProgram::VertexA:
- case Maxwell::ShaderProgram::VertexB:
+ case ProgramType::VertexA:
+ case ProgramType::VertexB:
return GLShader::GenerateVertexShader(device, setup);
- case Maxwell::ShaderProgram::Geometry:
+ case ProgramType::Geometry:
return GLShader::GenerateGeometryShader(device, setup);
- case Maxwell::ShaderProgram::Fragment:
+ case ProgramType::Fragment:
return GLShader::GenerateFragmentShader(device, setup);
+ case ProgramType::Compute:
+ return GLShader::GenerateComputeShader(device, setup);
default:
- LOG_CRITICAL(HW_GPU, "Unimplemented program_type={}", static_cast<u32>(program_type));
- UNREACHABLE();
+ UNIMPLEMENTED_MSG("Unimplemented program_type={}", static_cast<u32>(program_type));
return {};
}
}
CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEntries& entries,
- Maxwell::ShaderProgram program_type, BaseBindings base_bindings,
- GLenum primitive_mode, bool hint_retrievable = false) {
- std::string source = "#version 430 core\n"
- "#extension GL_ARB_separate_shader_objects : enable\n\n";
- source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++);
+ ProgramType program_type, const ProgramVariant& variant,
+ bool hint_retrievable = false) {
+ auto base_bindings{variant.base_bindings};
+ const auto primitive_mode{variant.primitive_mode};
+ const auto texture_buffer_usage{variant.texture_buffer_usage};
+
+ std::string source = R"(#version 430 core
+#extension GL_ARB_separate_shader_objects : enable
+#extension GL_ARB_shader_viewport_layer_array : enable
+#extension GL_EXT_shader_image_load_formatted : enable
+#extension GL_NV_gpu_shader5 : enable
+#extension GL_NV_shader_thread_group : enable
+#extension GL_NV_shader_thread_shuffle : enable
+)";
+ if (program_type == ProgramType::Compute) {
+ source += "#extension GL_ARB_compute_variable_group_size : require\n";
+ }
+ source += '\n';
+
+ if (program_type != ProgramType::Compute) {
+ source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++);
+ }
for (const auto& cbuf : entries.const_buffers) {
source +=
@@ -186,15 +240,34 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn
source += fmt::format("#define SAMPLER_BINDING_{} {}\n", sampler.GetIndex(),
base_bindings.sampler++);
}
+ for (const auto& image : entries.images) {
+ source +=
+ fmt::format("#define IMAGE_BINDING_{} {}\n", image.GetIndex(), base_bindings.image++);
+ }
+
+ // Transform 1D textures to texture samplers by declaring its preprocessor macros.
+ for (std::size_t i = 0; i < texture_buffer_usage.size(); ++i) {
+ if (!texture_buffer_usage.test(i)) {
+ continue;
+ }
+ source += fmt::format("#define SAMPLER_{}_IS_BUFFER\n", i);
+ }
+ if (texture_buffer_usage.any()) {
+ source += '\n';
+ }
- if (program_type == Maxwell::ShaderProgram::Geometry) {
+ if (program_type == ProgramType::Geometry) {
const auto [glsl_topology, debug_name, max_vertices] =
GetPrimitiveDescription(primitive_mode);
- source += "layout (" + std::string(glsl_topology) + ") in;\n";
+ source += "layout (" + std::string(glsl_topology) + ") in;\n\n";
source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n';
}
+ if (program_type == ProgramType::Compute) {
+ source += "layout (local_size_variable) in;\n";
+ }
+ source += '\n';
source += code;
OGLShader shader;
@@ -221,131 +294,97 @@ std::set<GLenum> GetSupportedFormats() {
} // Anonymous namespace
-CachedShader::CachedShader(const Device& device, VAddr cpu_addr, u64 unique_identifier,
- Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
- const PrecompiledPrograms& precompiled_programs,
- ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr)
- : RasterizerCacheObject{host_ptr}, host_ptr{host_ptr}, cpu_addr{cpu_addr},
- unique_identifier{unique_identifier}, program_type{program_type}, disk_cache{disk_cache},
- precompiled_programs{precompiled_programs} {
- const std::size_t code_size{CalculateProgramSize(program_code)};
- const std::size_t code_size_b{program_code_b.empty() ? 0
- : CalculateProgramSize(program_code_b)};
- GLShader::ProgramResult program_result{
- CreateProgram(device, program_type, program_code, program_code_b)};
- if (program_result.first.empty()) {
+CachedShader::CachedShader(const ShaderParameters& params, ProgramType program_type,
+ GLShader::ProgramResult result)
+ : RasterizerCacheObject{params.host_ptr}, cpu_addr{params.cpu_addr},
+ unique_identifier{params.unique_identifier}, program_type{program_type},
+ disk_cache{params.disk_cache}, precompiled_programs{params.precompiled_programs},
+ entries{result.second}, code{std::move(result.first)}, shader_length{entries.shader_length} {}
+
+Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
+ Maxwell::ShaderProgram program_type,
+ ProgramCode&& program_code,
+ ProgramCode&& program_code_b) {
+ const auto code_size{CalculateProgramSize(program_code)};
+ const auto code_size_b{CalculateProgramSize(program_code_b)};
+ auto result{
+ CreateProgram(params.device, GetProgramType(program_type), program_code, program_code_b)};
+ if (result.first.empty()) {
// TODO(Rodrigo): Unimplemented shader stages hit here, avoid using these for now
- return;
+ return {};
}
- code = program_result.first;
- entries = program_result.second;
- shader_length = entries.shader_length;
+ params.disk_cache.SaveRaw(ShaderDiskCacheRaw(
+ params.unique_identifier, GetProgramType(program_type),
+ static_cast<u32>(code_size / sizeof(u64)), static_cast<u32>(code_size_b / sizeof(u64)),
+ std::move(program_code), std::move(program_code_b)));
- const ShaderDiskCacheRaw raw(unique_identifier, program_type,
- static_cast<u32>(code_size / sizeof(u64)),
- static_cast<u32>(code_size_b / sizeof(u64)),
- std::move(program_code), std::move(program_code_b));
- disk_cache.SaveRaw(raw);
+ return std::shared_ptr<CachedShader>(
+ new CachedShader(params, GetProgramType(program_type), std::move(result)));
}
-CachedShader::CachedShader(VAddr cpu_addr, u64 unique_identifier,
- Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
- const PrecompiledPrograms& precompiled_programs,
- GLShader::ProgramResult result, u8* host_ptr)
- : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, unique_identifier{unique_identifier},
- program_type{program_type}, disk_cache{disk_cache}, precompiled_programs{
- precompiled_programs} {
- code = std::move(result.first);
- entries = result.second;
- shader_length = entries.shader_length;
+Shader CachedShader::CreateStageFromCache(const ShaderParameters& params,
+ Maxwell::ShaderProgram program_type,
+ GLShader::ProgramResult result) {
+ return std::shared_ptr<CachedShader>(
+ new CachedShader(params, GetProgramType(program_type), std::move(result)));
}
-std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive_mode,
- BaseBindings base_bindings) {
- GLuint handle{};
- if (program_type == Maxwell::ShaderProgram::Geometry) {
- handle = GetGeometryShader(primitive_mode, base_bindings);
- } else {
- const auto [entry, is_cache_miss] = programs.try_emplace(base_bindings);
- auto& program = entry->second;
- if (is_cache_miss) {
- program = TryLoadProgram(primitive_mode, base_bindings);
- if (!program) {
- program =
- SpecializeShader(code, entries, program_type, base_bindings, primitive_mode);
- disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings));
- }
+Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode&& code) {
+ auto result{CreateProgram(params.device, ProgramType::Compute, code, {})};
- LabelGLObject(GL_PROGRAM, program->handle, cpu_addr);
- }
+ const auto code_size{CalculateProgramSize(code)};
+ params.disk_cache.SaveRaw(ShaderDiskCacheRaw(params.unique_identifier, ProgramType::Compute,
+ static_cast<u32>(code_size / sizeof(u64)), 0,
+ std::move(code), {}));
- handle = program->handle;
- }
-
- base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size()) + RESERVED_UBOS;
- base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size());
- base_bindings.sampler += static_cast<u32>(entries.samplers.size());
+ return std::shared_ptr<CachedShader>(
+ new CachedShader(params, ProgramType::Compute, std::move(result)));
+}
- return {handle, base_bindings};
+Shader CachedShader::CreateKernelFromCache(const ShaderParameters& params,
+ GLShader::ProgramResult result) {
+ return std::shared_ptr<CachedShader>(
+ new CachedShader(params, ProgramType::Compute, std::move(result)));
}
-GLuint CachedShader::GetGeometryShader(GLenum primitive_mode, BaseBindings base_bindings) {
- const auto [entry, is_cache_miss] = geometry_programs.try_emplace(base_bindings);
- auto& programs = entry->second;
+std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVariant& variant) {
+ const auto [entry, is_cache_miss] = programs.try_emplace(variant);
+ auto& program = entry->second;
+ if (is_cache_miss) {
+ program = TryLoadProgram(variant);
+ if (!program) {
+ program = SpecializeShader(code, entries, program_type, variant);
+ disk_cache.SaveUsage(GetUsage(variant));
+ }
- switch (primitive_mode) {
- case GL_POINTS:
- return LazyGeometryProgram(programs.points, base_bindings, primitive_mode);
- case GL_LINES:
- case GL_LINE_STRIP:
- return LazyGeometryProgram(programs.lines, base_bindings, primitive_mode);
- case GL_LINES_ADJACENCY:
- case GL_LINE_STRIP_ADJACENCY:
- return LazyGeometryProgram(programs.lines_adjacency, base_bindings, primitive_mode);
- case GL_TRIANGLES:
- case GL_TRIANGLE_STRIP:
- case GL_TRIANGLE_FAN:
- return LazyGeometryProgram(programs.triangles, base_bindings, primitive_mode);
- case GL_TRIANGLES_ADJACENCY:
- case GL_TRIANGLE_STRIP_ADJACENCY:
- return LazyGeometryProgram(programs.triangles_adjacency, base_bindings, primitive_mode);
- default:
- UNREACHABLE_MSG("Unknown primitive mode.");
- return LazyGeometryProgram(programs.points, base_bindings, primitive_mode);
+ LabelGLObject(GL_PROGRAM, program->handle, cpu_addr);
}
-}
-GLuint CachedShader::LazyGeometryProgram(CachedProgram& target_program, BaseBindings base_bindings,
- GLenum primitive_mode) {
- if (target_program) {
- return target_program->handle;
+ auto base_bindings = variant.base_bindings;
+ base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size());
+ if (program_type != ProgramType::Compute) {
+ base_bindings.cbuf += STAGE_RESERVED_UBOS;
}
- const auto [glsl_name, debug_name, vertices] = GetPrimitiveDescription(primitive_mode);
- target_program = TryLoadProgram(primitive_mode, base_bindings);
- if (!target_program) {
- target_program =
- SpecializeShader(code, entries, program_type, base_bindings, primitive_mode);
- disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings));
- }
-
- LabelGLObject(GL_PROGRAM, target_program->handle, cpu_addr, debug_name);
+ base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size());
+ base_bindings.sampler += static_cast<u32>(entries.samplers.size());
- return target_program->handle;
-};
+ return {program->handle, base_bindings};
+}
-CachedProgram CachedShader::TryLoadProgram(GLenum primitive_mode,
- BaseBindings base_bindings) const {
- const auto found = precompiled_programs.find(GetUsage(primitive_mode, base_bindings));
+CachedProgram CachedShader::TryLoadProgram(const ProgramVariant& variant) const {
+ const auto found = precompiled_programs.find(GetUsage(variant));
if (found == precompiled_programs.end()) {
return {};
}
return found->second;
}
-ShaderDiskCacheUsage CachedShader::GetUsage(GLenum primitive_mode,
- BaseBindings base_bindings) const {
- return {unique_identifier, base_bindings, primitive_mode};
+ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant) const {
+ ShaderDiskCacheUsage usage;
+ usage.unique_identifier = unique_identifier;
+ usage.variant = variant;
+ return usage;
}
ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
@@ -411,8 +450,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
}
if (!shader) {
shader = SpecializeShader(unspecialized.code, unspecialized.entries,
- unspecialized.program_type, usage.bindings,
- usage.primitive, true);
+ unspecialized.program_type, usage.variant, true);
}
std::scoped_lock lock(mutex);
@@ -547,7 +585,7 @@ std::unordered_map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecia
}
Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
- if (!system.GPU().Maxwell3D().dirty_flags.shaders) {
+ if (!system.GPU().Maxwell3D().dirty.shaders) {
return last_shaders[static_cast<std::size_t>(program)];
}
@@ -564,28 +602,55 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
// No shader found - create a new one
ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)};
ProgramCode program_code_b;
- if (program == Maxwell::ShaderProgram::VertexA) {
+ const bool is_program_a{program == Maxwell::ShaderProgram::VertexA};
+ if (is_program_a) {
const GPUVAddr program_addr_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)};
program_code_b = GetShaderCode(memory_manager, program_addr_b,
memory_manager.GetPointer(program_addr_b));
}
- const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b);
- const VAddr cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)};
+ const auto unique_identifier =
+ GetUniqueIdentifier(GetProgramType(program), program_code, program_code_b);
+ const auto cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)};
+ const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr,
+ host_ptr, unique_identifier};
+
const auto found = precompiled_shaders.find(unique_identifier);
- if (found != precompiled_shaders.end()) {
- // Create a shader from the cache
- shader = std::make_shared<CachedShader>(cpu_addr, unique_identifier, program, disk_cache,
- precompiled_programs, found->second, host_ptr);
+ if (found == precompiled_shaders.end()) {
+ shader = CachedShader::CreateStageFromMemory(params, program, std::move(program_code),
+ std::move(program_code_b));
} else {
- // Create a shader from guest memory
- shader = std::make_shared<CachedShader>(
- device, cpu_addr, unique_identifier, program, disk_cache, precompiled_programs,
- std::move(program_code), std::move(program_code_b), host_ptr);
+ shader = CachedShader::CreateStageFromCache(params, program, found->second);
}
Register(shader);
return last_shaders[static_cast<std::size_t>(program)] = shader;
}
+Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
+ auto& memory_manager{system.GPU().MemoryManager()};
+ const auto host_ptr{memory_manager.GetPointer(code_addr)};
+ auto kernel = TryGet(host_ptr);
+ if (kernel) {
+ return kernel;
+ }
+
+ // No kernel found - create a new one
+ auto code{GetShaderCode(memory_manager, code_addr, host_ptr)};
+ const auto unique_identifier{GetUniqueIdentifier(ProgramType::Compute, code, {})};
+ const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)};
+ const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr,
+ host_ptr, unique_identifier};
+
+ const auto found = precompiled_shaders.find(unique_identifier);
+ if (found == precompiled_shaders.end()) {
+ kernel = CachedShader::CreateKernelFromMemory(params, std::move(code));
+ } else {
+ kernel = CachedShader::CreateKernelFromCache(params, found->second);
+ }
+
+ Register(kernel);
+ return kernel;
+}
+
} // namespace OpenGL