summaryrefslogtreecommitdiffstats
path: root/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/renderer_opengl/gl_shader_decompiler.cpp')
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp394
1 files changed, 299 insertions, 95 deletions
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 3a41ed30c..3adf7f0cb 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -23,8 +23,9 @@
#include "video_core/shader/ast.h"
#include "video_core/shader/node.h"
#include "video_core/shader/shader_ir.h"
+#include "video_core/shader/transform_feedback.h"
-namespace OpenGL::GLShader {
+namespace OpenGL {
namespace {
@@ -36,6 +37,8 @@ using Tegra::Shader::IpaInterpMode;
using Tegra::Shader::IpaMode;
using Tegra::Shader::IpaSampleMode;
using Tegra::Shader::Register;
+using VideoCommon::Shader::BuildTransformFeedback;
+using VideoCommon::Shader::Registry;
using namespace std::string_literals;
using namespace VideoCommon::Shader;
@@ -48,6 +51,11 @@ class ExprDecompiler;
enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat };
+constexpr std::array FLOAT_TYPES{"float", "vec2", "vec3", "vec4"};
+
+constexpr std::string_view INPUT_ATTRIBUTE_NAME = "in_attr";
+constexpr std::string_view OUTPUT_ATTRIBUTE_NAME = "out_attr";
+
struct TextureOffset {};
struct TextureDerivates {};
using TextureArgument = std::pair<Type, Node>;
@@ -56,6 +64,25 @@ using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument>
constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
static_cast<u32>(Maxwell::MaxConstBufferSize) / (4 * sizeof(float));
+constexpr std::string_view CommonDeclarations = R"(#define ftoi floatBitsToInt
+#define ftou floatBitsToUint
+#define itof intBitsToFloat
+#define utof uintBitsToFloat
+
+bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{
+ bvec2 is_nan1 = isnan(pair1);
+ bvec2 is_nan2 = isnan(pair2);
+ return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y);
+}}
+
+const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f );
+const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f );
+
+layout (std140, binding = {}) uniform vs_config {{
+ float y_direction;
+}};
+)";
+
class ShaderWriter final {
public:
void AddExpression(std::string_view text) {
@@ -269,12 +296,41 @@ const char* GetImageTypeDeclaration(Tegra::Shader::ImageType image_type) {
}
}
+/// Describes primitive behavior on geometry shaders
+std::pair<const char*, u32> GetPrimitiveDescription(Maxwell::PrimitiveTopology topology) {
+ switch (topology) {
+ case Maxwell::PrimitiveTopology::Points:
+ return {"points", 1};
+ case Maxwell::PrimitiveTopology::Lines:
+ case Maxwell::PrimitiveTopology::LineStrip:
+ return {"lines", 2};
+ case Maxwell::PrimitiveTopology::LinesAdjacency:
+ case Maxwell::PrimitiveTopology::LineStripAdjacency:
+ return {"lines_adjacency", 4};
+ case Maxwell::PrimitiveTopology::Triangles:
+ case Maxwell::PrimitiveTopology::TriangleStrip:
+ case Maxwell::PrimitiveTopology::TriangleFan:
+ return {"triangles", 3};
+ case Maxwell::PrimitiveTopology::TrianglesAdjacency:
+ case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
+ return {"triangles_adjacency", 6};
+ default:
+ UNIMPLEMENTED_MSG("topology={}", static_cast<int>(topology));
+ return {"points", 1};
+ }
+}
+
/// Generates code to use for a swizzle operation.
-constexpr const char* GetSwizzle(u32 element) {
+constexpr const char* GetSwizzle(std::size_t element) {
constexpr std::array swizzle = {".x", ".y", ".z", ".w"};
return swizzle.at(element);
}
+constexpr const char* GetColorSwizzle(std::size_t element) {
+ constexpr std::array swizzle = {".r", ".g", ".b", ".a"};
+ return swizzle.at(element);
+}
+
/// Translate topology
std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
switch (topology) {
@@ -341,11 +397,66 @@ std::string FlowStackTopName(MetaStackClass stack) {
return stage == ShaderType::Vertex;
}
+struct GenericVaryingDescription {
+ std::string name;
+ u8 first_element = 0;
+ bool is_scalar = false;
+};
+
class GLSLDecompiler final {
public:
- explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderType stage,
- std::string suffix)
- : device{device}, ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {}
+ explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,
+ ShaderType stage, std::string_view identifier, std::string_view suffix)
+ : device{device}, ir{ir}, registry{registry}, stage{stage},
+ identifier{identifier}, suffix{suffix}, header{ir.GetHeader()} {
+ if (stage != ShaderType::Compute) {
+ transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo());
+ }
+ }
+
+ void Decompile() {
+ DeclareHeader();
+ DeclareVertex();
+ DeclareGeometry();
+ DeclareFragment();
+ DeclareCompute();
+ DeclareInputAttributes();
+ DeclareOutputAttributes();
+ DeclareImages();
+ DeclareSamplers();
+ DeclareGlobalMemory();
+ DeclareConstantBuffers();
+ DeclareLocalMemory();
+ DeclareRegisters();
+ DeclarePredicates();
+ DeclareInternalFlags();
+ DeclareCustomVariables();
+ DeclarePhysicalAttributeReader();
+
+ code.AddLine("void main() {{");
+ ++code.scope;
+
+ if (stage == ShaderType::Vertex) {
+ code.AddLine("gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);");
+ }
+
+ if (ir.IsDecompiled()) {
+ DecompileAST();
+ } else {
+ DecompileBranchMode();
+ }
+
+ --code.scope;
+ code.AddLine("}}");
+ }
+
+ std::string GetResult() {
+ return code.GetResult();
+ }
+
+private:
+ friend class ASTDecompiler;
+ friend class ExprDecompiler;
void DecompileBranchMode() {
// VM's program counter
@@ -387,43 +498,36 @@ public:
void DecompileAST();
- void Decompile() {
- DeclareVertex();
- DeclareGeometry();
- DeclareRegisters();
- DeclareCustomVariables();
- DeclarePredicates();
- DeclareLocalMemory();
- DeclareInternalFlags();
- DeclareInputAttributes();
- DeclareOutputAttributes();
- DeclareConstantBuffers();
- DeclareGlobalMemory();
- DeclareSamplers();
- DeclareImages();
- DeclarePhysicalAttributeReader();
-
- code.AddLine("void execute_{}() {{", suffix);
- ++code.scope;
-
- if (ir.IsDecompiled()) {
- DecompileAST();
- } else {
- DecompileBranchMode();
+ void DeclareHeader() {
+ if (!identifier.empty()) {
+ code.AddLine("// {}", identifier);
+ }
+ code.AddLine("#version 440 core");
+ code.AddLine("#extension GL_ARB_separate_shader_objects : enable");
+ if (device.HasShaderBallot()) {
+ code.AddLine("#extension GL_ARB_shader_ballot : require");
}
+ if (device.HasVertexViewportLayer()) {
+ code.AddLine("#extension GL_ARB_shader_viewport_layer_array : require");
+ }
+ if (device.HasImageLoadFormatted()) {
+ code.AddLine("#extension GL_EXT_shader_image_load_formatted : require");
+ }
+ if (device.HasWarpIntrinsics()) {
+ code.AddLine("#extension GL_NV_gpu_shader5 : require");
+ code.AddLine("#extension GL_NV_shader_thread_group : require");
+ code.AddLine("#extension GL_NV_shader_thread_shuffle : require");
+ }
+ // This pragma stops Nvidia's driver from over optimizing math (probably using fp16
+ // operations) on places where we don't want to.
+ // Thanks to Ryujinx for finding this workaround.
+ code.AddLine("#pragma optionNV(fastmath off)");
- --code.scope;
- code.AddLine("}}");
- }
+ code.AddNewLine();
- std::string GetResult() {
- return code.GetResult();
+ code.AddLine(CommonDeclarations, EmulationUniformBlockBinding);
}
-private:
- friend class ASTDecompiler;
- friend class ExprDecompiler;
-
void DeclareVertex() {
if (!IsVertexShader(stage))
return;
@@ -436,9 +540,15 @@ private:
return;
}
+ const auto& info = registry.GetGraphicsInfo();
+ const auto input_topology = info.primitive_topology;
+ const auto [glsl_topology, max_vertices] = GetPrimitiveDescription(input_topology);
+ max_input_vertices = max_vertices;
+ code.AddLine("layout ({}) in;", glsl_topology);
+
const auto topology = GetTopologyName(header.common3.output_topology);
- const auto max_vertices = header.common4.max_output_vertices.Value();
- code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_vertices);
+ const auto max_output_vertices = header.common4.max_output_vertices.Value();
+ code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_output_vertices);
code.AddNewLine();
code.AddLine("in gl_PerVertex {{");
@@ -450,11 +560,40 @@ private:
DeclareVertexRedeclarations();
}
+ void DeclareFragment() {
+ if (stage != ShaderType::Fragment) {
+ return;
+ }
+ for (u32 rt = 0; rt < Maxwell::NumRenderTargets; ++rt) {
+ code.AddLine("layout (location = {}) out vec4 frag_color{};", rt, rt);
+ }
+ }
+
+ void DeclareCompute() {
+ if (stage != ShaderType::Compute) {
+ return;
+ }
+ const auto& info = registry.GetComputeInfo();
+ if (const u32 size = info.shared_memory_size_in_words; size > 0) {
+ code.AddLine("shared uint smem[{}];", size);
+ code.AddNewLine();
+ }
+ code.AddLine("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;",
+ info.workgroup_size[0], info.workgroup_size[1], info.workgroup_size[2]);
+ code.AddNewLine();
+ }
+
void DeclareVertexRedeclarations() {
code.AddLine("out gl_PerVertex {{");
++code.scope;
- code.AddLine("vec4 gl_Position;");
+ auto pos_xfb = GetTransformFeedbackDecoration(Attribute::Index::Position);
+ if (!pos_xfb.empty()) {
+ pos_xfb = fmt::format("layout ({}) ", pos_xfb);
+ }
+ const char* pos_type =
+ FLOAT_TYPES.at(GetNumComponents(Attribute::Index::Position).value_or(4) - 1);
+ code.AddLine("{}{} gl_Position;", pos_xfb, pos_type);
for (const auto attribute : ir.GetOutputAttributes()) {
if (attribute == Attribute::Index::ClipDistances0123 ||
@@ -525,18 +664,16 @@ private:
}
void DeclareLocalMemory() {
+ u64 local_memory_size = 0;
if (stage == ShaderType::Compute) {
- code.AddLine("#ifdef LOCAL_MEMORY_SIZE");
- code.AddLine("uint {}[LOCAL_MEMORY_SIZE];", GetLocalMemory());
- code.AddLine("#endif");
- return;
+ local_memory_size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL;
+ } else {
+ local_memory_size = header.GetLocalMemorySize();
}
-
- const u64 local_memory_size = header.GetLocalMemorySize();
if (local_memory_size == 0) {
return;
}
- const auto element_count = Common::AlignUp(local_memory_size, 4) / 4;
+ const u64 element_count = Common::AlignUp(local_memory_size, 4) / 4;
code.AddLine("uint {}[{}];", GetLocalMemory(), element_count);
code.AddNewLine();
}
@@ -589,7 +726,7 @@ private:
void DeclareInputAttribute(Attribute::Index index, bool skip_unused) {
const u32 location{GetGenericAttributeIndex(index)};
- std::string name{GetInputAttribute(index)};
+ std::string name{GetGenericInputAttribute(index)};
if (stage == ShaderType::Geometry) {
name = "gs_" + name + "[]";
}
@@ -626,9 +763,59 @@ private:
}
}
+ std::optional<std::size_t> GetNumComponents(Attribute::Index index, u8 element = 0) const {
+ const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element);
+ const auto it = transform_feedback.find(location);
+ if (it == transform_feedback.end()) {
+ return {};
+ }
+ return it->second.components;
+ }
+
+ std::string GetTransformFeedbackDecoration(Attribute::Index index, u8 element = 0) const {
+ const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element);
+ const auto it = transform_feedback.find(location);
+ if (it == transform_feedback.end()) {
+ return {};
+ }
+
+ const VaryingTFB& tfb = it->second;
+ return fmt::format("xfb_buffer = {}, xfb_offset = {}, xfb_stride = {}", tfb.buffer,
+ tfb.offset, tfb.stride);
+ }
+
void DeclareOutputAttribute(Attribute::Index index) {
- const u32 location{GetGenericAttributeIndex(index)};
- code.AddLine("layout (location = {}) out vec4 {};", location, GetOutputAttribute(index));
+ static constexpr std::string_view swizzle = "xyzw";
+ u8 element = 0;
+ while (element < 4) {
+ auto xfb = GetTransformFeedbackDecoration(index, element);
+ if (!xfb.empty()) {
+ xfb = fmt::format(", {}", xfb);
+ }
+ const std::size_t remainder = 4 - element;
+ const std::size_t num_components = GetNumComponents(index, element).value_or(remainder);
+ const char* const type = FLOAT_TYPES.at(num_components - 1);
+
+ const u32 location = GetGenericAttributeIndex(index);
+
+ GenericVaryingDescription description;
+ description.first_element = static_cast<u8>(element);
+ description.is_scalar = num_components == 1;
+ description.name = AppendSuffix(location, OUTPUT_ATTRIBUTE_NAME);
+ if (element != 0 || num_components != 4) {
+ const std::string_view name_swizzle = swizzle.substr(element, num_components);
+ description.name = fmt::format("{}_{}", description.name, name_swizzle);
+ }
+ for (std::size_t i = 0; i < num_components; ++i) {
+ const u8 offset = static_cast<u8>(location * 4 + element + i);
+ varying_description.insert({offset, description});
+ }
+
+ code.AddLine("layout (location = {}, component = {}{}) out {} {};", location, element,
+ xfb, type, description.name);
+
+ element = static_cast<u8>(static_cast<std::size_t>(element) + num_components);
+ }
}
void DeclareConstantBuffers() {
@@ -925,7 +1112,8 @@ private:
// TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games
// set an 0x80000000 index for those and the shader fails to build. Find out why
// this happens and what's its intent.
- return fmt::format("gs_{}[{} % MAX_VERTEX_INPUT]", name, Visit(buffer).AsUint());
+ return fmt::format("gs_{}[{} % {}]", name, Visit(buffer).AsUint(),
+ max_input_vertices.value());
}
return std::string(name);
};
@@ -980,7 +1168,7 @@ private:
return {"0", Type::Int};
default:
if (IsGenericAttribute(attribute)) {
- return {GeometryPass(GetInputAttribute(attribute)) + GetSwizzle(element),
+ return {GeometryPass(GetGenericInputAttribute(attribute)) + GetSwizzle(element),
Type::Float};
}
break;
@@ -1049,8 +1237,7 @@ private:
return {{fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4), Type::Float}};
default:
if (IsGenericAttribute(attribute)) {
- return {
- {GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement()), Type::Float}};
+ return {{GetGenericOutputAttribute(attribute, abuf->GetElement()), Type::Float}};
}
UNIMPLEMENTED_MSG("Unhandled output attribute: {}", static_cast<u32>(attribute));
return {};
@@ -1945,7 +2132,7 @@ private:
// TODO(Subv): Figure out how dual-source blending is configured in the Switch.
for (u32 component = 0; component < 4; ++component) {
if (header.ps.IsColorComponentOutputEnabled(render_target, component)) {
- code.AddLine("FragColor{}[{}] = {};", render_target, component,
+ code.AddLine("frag_color{}{} = {};", render_target, GetColorSwizzle(component),
SafeGetRegister(current_reg).AsFloat());
++current_reg;
}
@@ -2261,27 +2448,34 @@ private:
static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
std::string GetRegister(u32 index) const {
- return GetDeclarationWithSuffix(index, "gpr");
+ return AppendSuffix(index, "gpr");
}
std::string GetCustomVariable(u32 index) const {
- return GetDeclarationWithSuffix(index, "custom_var");
+ return AppendSuffix(index, "custom_var");
}
std::string GetPredicate(Tegra::Shader::Pred pred) const {
- return GetDeclarationWithSuffix(static_cast<u32>(pred), "pred");
+ return AppendSuffix(static_cast<u32>(pred), "pred");
}
- std::string GetInputAttribute(Attribute::Index attribute) const {
- return GetDeclarationWithSuffix(GetGenericAttributeIndex(attribute), "input_attr");
+ std::string GetGenericInputAttribute(Attribute::Index attribute) const {
+ return AppendSuffix(GetGenericAttributeIndex(attribute), INPUT_ATTRIBUTE_NAME);
}
- std::string GetOutputAttribute(Attribute::Index attribute) const {
- return GetDeclarationWithSuffix(GetGenericAttributeIndex(attribute), "output_attr");
+ std::unordered_map<u8, GenericVaryingDescription> varying_description;
+
+ std::string GetGenericOutputAttribute(Attribute::Index attribute, std::size_t element) const {
+ const u8 offset = static_cast<u8>(GetGenericAttributeIndex(attribute) * 4 + element);
+ const auto& description = varying_description.at(offset);
+ if (description.is_scalar) {
+ return description.name;
+ }
+ return fmt::format("{}[{}]", description.name, element - description.first_element);
}
std::string GetConstBuffer(u32 index) const {
- return GetDeclarationWithSuffix(index, "cbuf");
+ return AppendSuffix(index, "cbuf");
}
std::string GetGlobalMemory(const GlobalMemoryBase& descriptor) const {
@@ -2294,11 +2488,15 @@ private:
}
std::string GetConstBufferBlock(u32 index) const {
- return GetDeclarationWithSuffix(index, "cbuf_block");
+ return AppendSuffix(index, "cbuf_block");
}
std::string GetLocalMemory() const {
- return "lmem_" + suffix;
+ if (suffix.empty()) {
+ return "lmem";
+ } else {
+ return "lmem_" + std::string{suffix};
+ }
}
std::string GetInternalFlag(InternalFlag flag) const {
@@ -2307,19 +2505,27 @@ private:
const auto index = static_cast<u32>(flag);
ASSERT(index < static_cast<u32>(InternalFlag::Amount));
- return fmt::format("{}_{}", InternalFlagNames[index], suffix);
+ if (suffix.empty()) {
+ return InternalFlagNames[index];
+ } else {
+ return fmt::format("{}_{}", InternalFlagNames[index], suffix);
+ }
}
std::string GetSampler(const Sampler& sampler) const {
- return GetDeclarationWithSuffix(static_cast<u32>(sampler.GetIndex()), "sampler");
+ return AppendSuffix(static_cast<u32>(sampler.GetIndex()), "sampler");
}
std::string GetImage(const Image& image) const {
- return GetDeclarationWithSuffix(static_cast<u32>(image.GetIndex()), "image");
+ return AppendSuffix(static_cast<u32>(image.GetIndex()), "image");
}
- std::string GetDeclarationWithSuffix(u32 index, std::string_view name) const {
- return fmt::format("{}_{}_{}", name, index, suffix);
+ std::string AppendSuffix(u32 index, std::string_view name) const {
+ if (suffix.empty()) {
+ return fmt::format("{}{}", name, index);
+ } else {
+ return fmt::format("{}{}_{}", name, index, suffix);
+ }
}
u32 GetNumPhysicalInputAttributes() const {
@@ -2334,17 +2540,31 @@ private:
return std::min<u32>(device.GetMaxVaryings(), Maxwell::NumVaryings);
}
+ bool IsRenderTargetEnabled(u32 render_target) const {
+ for (u32 component = 0; component < 4; ++component) {
+ if (header.ps.IsColorComponentOutputEnabled(render_target, component)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
const Device& device;
const ShaderIR& ir;
+ const Registry& registry;
const ShaderType stage;
- const std::string suffix;
+ const std::string_view identifier;
+ const std::string_view suffix;
const Header header;
+ std::unordered_map<u8, VaryingTFB> transform_feedback;
ShaderWriter code;
+
+ std::optional<u32> max_input_vertices;
};
-std::string GetFlowVariable(u32 i) {
- return fmt::format("flow_var_{}", i);
+std::string GetFlowVariable(u32 index) {
+ return fmt::format("flow_var{}", index);
}
class ExprDecompiler {
@@ -2531,7 +2751,7 @@ void GLSLDecompiler::DecompileAST() {
} // Anonymous namespace
-ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir) {
+ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir) {
ShaderEntries entries;
for (const auto& cbuf : ir.GetConstantBuffers()) {
entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(),
@@ -2555,28 +2775,12 @@ ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir) {
return entries;
}
-std::string GetCommonDeclarations() {
- return R"(#define ftoi floatBitsToInt
-#define ftou floatBitsToUint
-#define itof intBitsToFloat
-#define utof uintBitsToFloat
-
-bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {
- bvec2 is_nan1 = isnan(pair1);
- bvec2 is_nan2 = isnan(pair2);
- return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y);
-}
-
-const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f );
-const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f );
-)";
-}
-
-std::string Decompile(const Device& device, const ShaderIR& ir, ShaderType stage,
- const std::string& suffix) {
- GLSLDecompiler decompiler(device, ir, stage, suffix);
+std::string DecompileShader(const Device& device, const ShaderIR& ir, const Registry& registry,
+ ShaderType stage, std::string_view identifier,
+ std::string_view suffix) {
+ GLSLDecompiler decompiler(device, ir, registry, stage, identifier, suffix);
decompiler.Decompile();
return decompiler.GetResult();
}
-} // namespace OpenGL::GLShader
+} // namespace OpenGL