summaryrefslogtreecommitdiffstats
path: root/src/video_core/shader
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/shader')
-rw-r--r--src/video_core/shader/ast.cpp4
-rw-r--r--src/video_core/shader/const_buffer_locker.cpp110
-rw-r--r--src/video_core/shader/const_buffer_locker.h80
-rw-r--r--src/video_core/shader/control_flow.cpp383
-rw-r--r--src/video_core/shader/control_flow.h69
-rw-r--r--src/video_core/shader/decode.cpp41
-rw-r--r--src/video_core/shader/decode/arithmetic_integer.cpp6
-rw-r--r--src/video_core/shader/decode/image.cpp4
-rw-r--r--src/video_core/shader/decode/other.cpp2
-rw-r--r--src/video_core/shader/decode/shift.cpp2
-rw-r--r--src/video_core/shader/decode/texture.cpp78
-rw-r--r--src/video_core/shader/decode/video.cpp2
-rw-r--r--src/video_core/shader/decode/warp.cpp7
-rw-r--r--src/video_core/shader/expr.h21
-rw-r--r--src/video_core/shader/shader_ir.cpp127
-rw-r--r--src/video_core/shader/shader_ir.h26
16 files changed, 722 insertions, 240 deletions
diff --git a/src/video_core/shader/ast.cpp b/src/video_core/shader/ast.cpp
index e43aecc18..3f96d9076 100644
--- a/src/video_core/shader/ast.cpp
+++ b/src/video_core/shader/ast.cpp
@@ -228,6 +228,10 @@ public:
inner += expr.value ? "true" : "false";
}
+ void operator()(const ExprGprEqual& expr) {
+ inner += "( gpr_" + std::to_string(expr.gpr) + " == " + std::to_string(expr.value) + ')';
+ }
+
const std::string& GetResult() const {
return inner;
}
diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp
new file mode 100644
index 000000000..fe467608e
--- /dev/null
+++ b/src/video_core/shader/const_buffer_locker.cpp
@@ -0,0 +1,110 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <algorithm>
+#include <memory>
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/shader/const_buffer_locker.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Engines::SamplerDescriptor;
+
+ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage)
+ : stage{shader_stage} {}
+
+ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage,
+ Tegra::Engines::ConstBufferEngineInterface& engine)
+ : stage{shader_stage}, engine{&engine} {}
+
+ConstBufferLocker::~ConstBufferLocker() = default;
+
+std::optional<u32> ConstBufferLocker::ObtainKey(u32 buffer, u32 offset) {
+ const std::pair<u32, u32> key = {buffer, offset};
+ const auto iter = keys.find(key);
+ if (iter != keys.end()) {
+ return iter->second;
+ }
+ if (!engine) {
+ return std::nullopt;
+ }
+ const u32 value = engine->AccessConstBuffer32(stage, buffer, offset);
+ keys.emplace(key, value);
+ return value;
+}
+
+std::optional<SamplerDescriptor> ConstBufferLocker::ObtainBoundSampler(u32 offset) {
+ const u32 key = offset;
+ const auto iter = bound_samplers.find(key);
+ if (iter != bound_samplers.end()) {
+ return iter->second;
+ }
+ if (!engine) {
+ return std::nullopt;
+ }
+ const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset);
+ bound_samplers.emplace(key, value);
+ return value;
+}
+
+std::optional<Tegra::Engines::SamplerDescriptor> ConstBufferLocker::ObtainBindlessSampler(
+ u32 buffer, u32 offset) {
+ const std::pair key = {buffer, offset};
+ const auto iter = bindless_samplers.find(key);
+ if (iter != bindless_samplers.end()) {
+ return iter->second;
+ }
+ if (!engine) {
+ return std::nullopt;
+ }
+ const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset);
+ bindless_samplers.emplace(key, value);
+ return value;
+}
+
+void ConstBufferLocker::InsertKey(u32 buffer, u32 offset, u32 value) {
+ keys.insert_or_assign({buffer, offset}, value);
+}
+
+void ConstBufferLocker::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) {
+ bound_samplers.insert_or_assign(offset, sampler);
+}
+
+void ConstBufferLocker::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) {
+ bindless_samplers.insert_or_assign({buffer, offset}, sampler);
+}
+
+bool ConstBufferLocker::IsConsistent() const {
+ if (!engine) {
+ return false;
+ }
+ return std::all_of(keys.begin(), keys.end(),
+ [this](const auto& pair) {
+ const auto [cbuf, offset] = pair.first;
+ const auto value = pair.second;
+ return value == engine->AccessConstBuffer32(stage, cbuf, offset);
+ }) &&
+ std::all_of(bound_samplers.begin(), bound_samplers.end(),
+ [this](const auto& sampler) {
+ const auto [key, value] = sampler;
+ return value == engine->AccessBoundSampler(stage, key);
+ }) &&
+ std::all_of(bindless_samplers.begin(), bindless_samplers.end(),
+ [this](const auto& sampler) {
+ const auto [cbuf, offset] = sampler.first;
+ const auto value = sampler.second;
+ return value == engine->AccessBindlessSampler(stage, cbuf, offset);
+ });
+}
+
+bool ConstBufferLocker::HasEqualKeys(const ConstBufferLocker& rhs) const {
+ return keys == rhs.keys && bound_samplers == rhs.bound_samplers &&
+ bindless_samplers == rhs.bindless_samplers;
+}
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h
new file mode 100644
index 000000000..600e2f3c3
--- /dev/null
+++ b/src/video_core/shader/const_buffer_locker.h
@@ -0,0 +1,80 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <unordered_map>
+#include "common/common_types.h"
+#include "common/hash.h"
+#include "video_core/engines/const_buffer_engine_interface.h"
+
+namespace VideoCommon::Shader {
+
+using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>;
+using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>;
+using BindlessSamplerMap =
+ std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>;
+
+/**
+ * The ConstBufferLocker is a class use to interface the 3D and compute engines with the shader
+ * compiler. with it, the shader can obtain required data from GPU state and store it for disk
+ * shader compilation.
+ **/
+class ConstBufferLocker {
+public:
+ explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage);
+
+ explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage,
+ Tegra::Engines::ConstBufferEngineInterface& engine);
+
+ ~ConstBufferLocker();
+
+ /// Retrieves a key from the locker, if it's registered, it will give the registered value, if
+ /// not it will obtain it from maxwell3d and register it.
+ std::optional<u32> ObtainKey(u32 buffer, u32 offset);
+
+ std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset);
+
+ std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset);
+
+ /// Inserts a key.
+ void InsertKey(u32 buffer, u32 offset, u32 value);
+
+ /// Inserts a bound sampler key.
+ void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler);
+
+ /// Inserts a bindless sampler key.
+ void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler);
+
+ /// Checks keys and samplers against engine's current const buffers. Returns true if they are
+ /// the same value, false otherwise;
+ bool IsConsistent() const;
+
+ /// Returns true if the keys are equal to the other ones in the locker.
+ bool HasEqualKeys(const ConstBufferLocker& rhs) const;
+
+ /// Gives an getter to the const buffer keys in the database.
+ const KeyMap& GetKeys() const {
+ return keys;
+ }
+
+ /// Gets samplers database.
+ const BoundSamplerMap& GetBoundSamplers() const {
+ return bound_samplers;
+ }
+
+ /// Gets bindless samplers database.
+ const BindlessSamplerMap& GetBindlessSamplers() const {
+ return bindless_samplers;
+ }
+
+private:
+ const Tegra::Engines::ShaderType stage;
+ Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;
+ KeyMap keys;
+ BoundSamplerMap bound_samplers;
+ BindlessSamplerMap bindless_samplers;
+};
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp
index 9d21f45de..d47c63d9f 100644
--- a/src/video_core/shader/control_flow.cpp
+++ b/src/video_core/shader/control_flow.cpp
@@ -35,14 +35,20 @@ struct BlockStack {
std::stack<u32> pbk_stack{};
};
-struct BlockBranchInfo {
- Condition condition{};
- s32 address{exit_branch};
- bool kill{};
- bool is_sync{};
- bool is_brk{};
- bool ignore{};
-};
+template <typename T, typename... Args>
+BlockBranchInfo MakeBranchInfo(Args&&... args) {
+ static_assert(std::is_convertible_v<T, BranchData>);
+ return std::make_shared<BranchData>(T(std::forward<Args>(args)...));
+}
+
+bool BlockBranchIsIgnored(BlockBranchInfo first) {
+ bool ignore = false;
+ if (std::holds_alternative<SingleBranch>(*first)) {
+ const auto branch = std::get_if<SingleBranch>(first.get());
+ ignore = branch->ignore;
+ }
+ return ignore;
+}
struct BlockInfo {
u32 start{};
@@ -56,10 +62,11 @@ struct BlockInfo {
};
struct CFGRebuildState {
- explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size,
- const u32 start)
- : start{start}, program_code{program_code}, program_size{program_size} {}
+ explicit CFGRebuildState(const ProgramCode& program_code, u32 start, ConstBufferLocker& locker)
+ : program_code{program_code}, start{start}, locker{locker} {}
+ const ProgramCode& program_code;
+ ConstBufferLocker& locker;
u32 start{};
std::vector<BlockInfo> block_info{};
std::list<u32> inspect_queries{};
@@ -69,8 +76,6 @@ struct CFGRebuildState {
std::map<u32, u32> ssy_labels{};
std::map<u32, u32> pbk_labels{};
std::unordered_map<u32, BlockStack> stacks{};
- const ProgramCode& program_code;
- const std::size_t program_size;
ASTManager* manager;
};
@@ -124,10 +129,116 @@ enum class ParseResult : u32 {
AbnormalFlow,
};
+struct BranchIndirectInfo {
+ u32 buffer{};
+ u32 offset{};
+ u32 entries{};
+ s32 relative_position{};
+};
+
+std::optional<BranchIndirectInfo> TrackBranchIndirectInfo(const CFGRebuildState& state,
+ u32 start_address, u32 current_position) {
+ const u32 shader_start = state.start;
+ u32 pos = current_position;
+ BranchIndirectInfo result{};
+ u64 track_register = 0;
+
+ // Step 0 Get BRX Info
+ const Instruction instr = {state.program_code[pos]};
+ const auto opcode = OpCode::Decode(instr);
+ if (opcode->get().GetId() != OpCode::Id::BRX) {
+ return std::nullopt;
+ }
+ if (instr.brx.constant_buffer != 0) {
+ return std::nullopt;
+ }
+ track_register = instr.gpr8.Value();
+ result.relative_position = instr.brx.GetBranchExtend();
+ pos--;
+ bool found_track = false;
+
+ // Step 1 Track LDC
+ while (pos >= shader_start) {
+ if (IsSchedInstruction(pos, shader_start)) {
+ pos--;
+ continue;
+ }
+ const Instruction instr = {state.program_code[pos]};
+ const auto opcode = OpCode::Decode(instr);
+ if (opcode->get().GetId() == OpCode::Id::LD_C) {
+ if (instr.gpr0.Value() == track_register &&
+ instr.ld_c.type.Value() == Tegra::Shader::UniformType::Single) {
+ result.buffer = instr.cbuf36.index.Value();
+ result.offset = static_cast<u32>(instr.cbuf36.GetOffset());
+ track_register = instr.gpr8.Value();
+ pos--;
+ found_track = true;
+ break;
+ }
+ }
+ pos--;
+ }
+
+ if (!found_track) {
+ return std::nullopt;
+ }
+ found_track = false;
+
+ // Step 2 Track SHL
+ while (pos >= shader_start) {
+ if (IsSchedInstruction(pos, shader_start)) {
+ pos--;
+ continue;
+ }
+ const Instruction instr = state.program_code[pos];
+ const auto opcode = OpCode::Decode(instr);
+ if (opcode->get().GetId() == OpCode::Id::SHL_IMM) {
+ if (instr.gpr0.Value() == track_register) {
+ track_register = instr.gpr8.Value();
+ pos--;
+ found_track = true;
+ break;
+ }
+ }
+ pos--;
+ }
+
+ if (!found_track) {
+ return std::nullopt;
+ }
+ found_track = false;
+
+ // Step 3 Track IMNMX
+ while (pos >= shader_start) {
+ if (IsSchedInstruction(pos, shader_start)) {
+ pos--;
+ continue;
+ }
+ const Instruction instr = state.program_code[pos];
+ const auto opcode = OpCode::Decode(instr);
+ if (opcode->get().GetId() == OpCode::Id::IMNMX_IMM) {
+ if (instr.gpr0.Value() == track_register) {
+ track_register = instr.gpr8.Value();
+ result.entries = instr.alu.GetSignedImm20_20() + 1;
+ pos--;
+ found_track = true;
+ break;
+ }
+ }
+ pos--;
+ }
+
+ if (!found_track) {
+ return std::nullopt;
+ }
+ return result;
+}
+
std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) {
u32 offset = static_cast<u32>(address);
- const u32 end_address = static_cast<u32>(state.program_size / sizeof(Instruction));
+ const u32 end_address = static_cast<u32>(state.program_code.size());
ParseInfo parse_info{};
+ SingleBranch single_branch{};
const auto insert_label = [](CFGRebuildState& state, u32 address) {
const auto pair = state.labels.emplace(address);
@@ -140,13 +251,14 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
if (offset >= end_address) {
// ASSERT_OR_EXECUTE can't be used, as it ignores the break
ASSERT_MSG(false, "Shader passed the current limit!");
- parse_info.branch_info.address = exit_branch;
- parse_info.branch_info.ignore = false;
+
+ single_branch.address = exit_branch;
+ single_branch.ignore = false;
break;
}
if (state.registered.count(offset) != 0) {
- parse_info.branch_info.address = offset;
- parse_info.branch_info.ignore = true;
+ single_branch.address = offset;
+ single_branch.ignore = true;
break;
}
if (IsSchedInstruction(offset, state.start)) {
@@ -163,24 +275,26 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
switch (opcode->get().GetId()) {
case OpCode::Id::EXIT: {
const auto pred_index = static_cast<u32>(instr.pred.pred_index);
- parse_info.branch_info.condition.predicate =
- GetPredicate(pred_index, instr.negate_pred != 0);
- if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
+ single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
+ if (single_branch.condition.predicate == Pred::NeverExecute) {
offset++;
continue;
}
const ConditionCode cc = instr.flow_condition_code;
- parse_info.branch_info.condition.cc = cc;
+ single_branch.condition.cc = cc;
if (cc == ConditionCode::F) {
offset++;
continue;
}
- parse_info.branch_info.address = exit_branch;
- parse_info.branch_info.kill = false;
- parse_info.branch_info.is_sync = false;
- parse_info.branch_info.is_brk = false;
- parse_info.branch_info.ignore = false;
+ single_branch.address = exit_branch;
+ single_branch.kill = false;
+ single_branch.is_sync = false;
+ single_branch.is_brk = false;
+ single_branch.ignore = false;
parse_info.end_address = offset;
+ parse_info.branch_info = MakeBranchInfo<SingleBranch>(
+ single_branch.condition, single_branch.address, single_branch.kill,
+ single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
return {ParseResult::ControlCaught, parse_info};
}
@@ -189,99 +303,107 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
return {ParseResult::AbnormalFlow, parse_info};
}
const auto pred_index = static_cast<u32>(instr.pred.pred_index);
- parse_info.branch_info.condition.predicate =
- GetPredicate(pred_index, instr.negate_pred != 0);
- if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
+ single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
+ if (single_branch.condition.predicate == Pred::NeverExecute) {
offset++;
continue;
}
const ConditionCode cc = instr.flow_condition_code;
- parse_info.branch_info.condition.cc = cc;
+ single_branch.condition.cc = cc;
if (cc == ConditionCode::F) {
offset++;
continue;
}
const u32 branch_offset = offset + instr.bra.GetBranchTarget();
if (branch_offset == 0) {
- parse_info.branch_info.address = exit_branch;
+ single_branch.address = exit_branch;
} else {
- parse_info.branch_info.address = branch_offset;
+ single_branch.address = branch_offset;
}
insert_label(state, branch_offset);
- parse_info.branch_info.kill = false;
- parse_info.branch_info.is_sync = false;
- parse_info.branch_info.is_brk = false;
- parse_info.branch_info.ignore = false;
+ single_branch.kill = false;
+ single_branch.is_sync = false;
+ single_branch.is_brk = false;
+ single_branch.ignore = false;
parse_info.end_address = offset;
+ parse_info.branch_info = MakeBranchInfo<SingleBranch>(
+ single_branch.condition, single_branch.address, single_branch.kill,
+ single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
return {ParseResult::ControlCaught, parse_info};
}
case OpCode::Id::SYNC: {
const auto pred_index = static_cast<u32>(instr.pred.pred_index);
- parse_info.branch_info.condition.predicate =
- GetPredicate(pred_index, instr.negate_pred != 0);
- if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
+ single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
+ if (single_branch.condition.predicate == Pred::NeverExecute) {
offset++;
continue;
}
const ConditionCode cc = instr.flow_condition_code;
- parse_info.branch_info.condition.cc = cc;
+ single_branch.condition.cc = cc;
if (cc == ConditionCode::F) {
offset++;
continue;
}
- parse_info.branch_info.address = unassigned_branch;
- parse_info.branch_info.kill = false;
- parse_info.branch_info.is_sync = true;
- parse_info.branch_info.is_brk = false;
- parse_info.branch_info.ignore = false;
+ single_branch.address = unassigned_branch;
+ single_branch.kill = false;
+ single_branch.is_sync = true;
+ single_branch.is_brk = false;
+ single_branch.ignore = false;
parse_info.end_address = offset;
+ parse_info.branch_info = MakeBranchInfo<SingleBranch>(
+ single_branch.condition, single_branch.address, single_branch.kill,
+ single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
return {ParseResult::ControlCaught, parse_info};
}
case OpCode::Id::BRK: {
const auto pred_index = static_cast<u32>(instr.pred.pred_index);
- parse_info.branch_info.condition.predicate =
- GetPredicate(pred_index, instr.negate_pred != 0);
- if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
+ single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
+ if (single_branch.condition.predicate == Pred::NeverExecute) {
offset++;
continue;
}
const ConditionCode cc = instr.flow_condition_code;
- parse_info.branch_info.condition.cc = cc;
+ single_branch.condition.cc = cc;
if (cc == ConditionCode::F) {
offset++;
continue;
}
- parse_info.branch_info.address = unassigned_branch;
- parse_info.branch_info.kill = false;
- parse_info.branch_info.is_sync = false;
- parse_info.branch_info.is_brk = true;
- parse_info.branch_info.ignore = false;
+ single_branch.address = unassigned_branch;
+ single_branch.kill = false;
+ single_branch.is_sync = false;
+ single_branch.is_brk = true;
+ single_branch.ignore = false;
parse_info.end_address = offset;
+ parse_info.branch_info = MakeBranchInfo<SingleBranch>(
+ single_branch.condition, single_branch.address, single_branch.kill,
+ single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
return {ParseResult::ControlCaught, parse_info};
}
case OpCode::Id::KIL: {
const auto pred_index = static_cast<u32>(instr.pred.pred_index);
- parse_info.branch_info.condition.predicate =
- GetPredicate(pred_index, instr.negate_pred != 0);
- if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
+ single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
+ if (single_branch.condition.predicate == Pred::NeverExecute) {
offset++;
continue;
}
const ConditionCode cc = instr.flow_condition_code;
- parse_info.branch_info.condition.cc = cc;
+ single_branch.condition.cc = cc;
if (cc == ConditionCode::F) {
offset++;
continue;
}
- parse_info.branch_info.address = exit_branch;
- parse_info.branch_info.kill = true;
- parse_info.branch_info.is_sync = false;
- parse_info.branch_info.is_brk = false;
- parse_info.branch_info.ignore = false;
+ single_branch.address = exit_branch;
+ single_branch.kill = true;
+ single_branch.is_sync = false;
+ single_branch.is_brk = false;
+ single_branch.ignore = false;
parse_info.end_address = offset;
+ parse_info.branch_info = MakeBranchInfo<SingleBranch>(
+ single_branch.condition, single_branch.address, single_branch.kill,
+ single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
return {ParseResult::ControlCaught, parse_info};
}
@@ -298,6 +420,29 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
break;
}
case OpCode::Id::BRX: {
+ auto tmp = TrackBranchIndirectInfo(state, address, offset);
+ if (tmp) {
+ auto result = *tmp;
+ std::vector<CaseBranch> branches{};
+ s32 pc_target = offset + result.relative_position;
+ for (u32 i = 0; i < result.entries; i++) {
+ auto k = state.locker.ObtainKey(result.buffer, result.offset + i * 4);
+ if (!k) {
+ return {ParseResult::AbnormalFlow, parse_info};
+ }
+ u32 value = *k;
+ u32 target = static_cast<u32>((value >> 3) + pc_target);
+ insert_label(state, target);
+ branches.emplace_back(value, target);
+ }
+ parse_info.end_address = offset;
+ parse_info.branch_info = MakeBranchInfo<MultiBranch>(
+ static_cast<u32>(instr.gpr8.Value()), std::move(branches));
+
+ return {ParseResult::ControlCaught, parse_info};
+ } else {
+ LOG_WARNING(HW_GPU, "BRX Track Unsuccesful");
+ }
return {ParseResult::AbnormalFlow, parse_info};
}
default:
@@ -306,10 +451,13 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
offset++;
}
- parse_info.branch_info.kill = false;
- parse_info.branch_info.is_sync = false;
- parse_info.branch_info.is_brk = false;
+ single_branch.kill = false;
+ single_branch.is_sync = false;
+ single_branch.is_brk = false;
parse_info.end_address = offset - 1;
+ parse_info.branch_info = MakeBranchInfo<SingleBranch>(
+ single_branch.condition, single_branch.address, single_branch.kill, single_branch.is_sync,
+ single_branch.is_brk, single_branch.ignore);
return {ParseResult::BlockEnd, parse_info};
}
@@ -333,9 +481,10 @@ bool TryInspectAddress(CFGRebuildState& state) {
BlockInfo& current_block = state.block_info[block_index];
current_block.end = address - 1;
new_block.branch = current_block.branch;
- BlockBranchInfo forward_branch{};
- forward_branch.address = address;
- forward_branch.ignore = true;
+ BlockBranchInfo forward_branch = MakeBranchInfo<SingleBranch>();
+ const auto branch = std::get_if<SingleBranch>(forward_branch.get());
+ branch->address = address;
+ branch->ignore = true;
current_block.branch = forward_branch;
return true;
}
@@ -350,12 +499,15 @@ bool TryInspectAddress(CFGRebuildState& state) {
BlockInfo& block_info = CreateBlockInfo(state, address, parse_info.end_address);
block_info.branch = parse_info.branch_info;
- if (parse_info.branch_info.condition.IsUnconditional()) {
+ if (std::holds_alternative<SingleBranch>(*block_info.branch)) {
+ const auto branch = std::get_if<SingleBranch>(block_info.branch.get());
+ if (branch->condition.IsUnconditional()) {
+ return true;
+ }
+ const u32 fallthrough_address = parse_info.end_address + 1;
+ state.inspect_queries.push_front(fallthrough_address);
return true;
}
-
- const u32 fallthrough_address = parse_info.end_address + 1;
- state.inspect_queries.push_front(fallthrough_address);
return true;
}
@@ -393,31 +545,42 @@ bool TryQuery(CFGRebuildState& state) {
state.queries.pop_front();
gather_labels(q2.ssy_stack, state.ssy_labels, block);
gather_labels(q2.pbk_stack, state.pbk_labels, block);
- if (!block.branch.condition.IsUnconditional()) {
- q2.address = block.end + 1;
- state.queries.push_back(q2);
- }
+ if (std::holds_alternative<SingleBranch>(*block.branch)) {
+ const auto branch = std::get_if<SingleBranch>(block.branch.get());
+ if (!branch->condition.IsUnconditional()) {
+ q2.address = block.end + 1;
+ state.queries.push_back(q2);
+ }
- Query conditional_query{q2};
- if (block.branch.is_sync) {
- if (block.branch.address == unassigned_branch) {
- block.branch.address = conditional_query.ssy_stack.top();
+ Query conditional_query{q2};
+ if (branch->is_sync) {
+ if (branch->address == unassigned_branch) {
+ branch->address = conditional_query.ssy_stack.top();
+ }
+ conditional_query.ssy_stack.pop();
}
- conditional_query.ssy_stack.pop();
- }
- if (block.branch.is_brk) {
- if (block.branch.address == unassigned_branch) {
- block.branch.address = conditional_query.pbk_stack.top();
+ if (branch->is_brk) {
+ if (branch->address == unassigned_branch) {
+ branch->address = conditional_query.pbk_stack.top();
+ }
+ conditional_query.pbk_stack.pop();
}
- conditional_query.pbk_stack.pop();
+ conditional_query.address = branch->address;
+ state.queries.push_back(std::move(conditional_query));
+ return true;
+ }
+ const auto multi_branch = std::get_if<MultiBranch>(block.branch.get());
+ for (const auto& branch_case : multi_branch->branches) {
+ Query conditional_query{q2};
+ conditional_query.address = branch_case.address;
+ state.queries.push_back(std::move(conditional_query));
}
- conditional_query.address = block.branch.address;
- state.queries.push_back(std::move(conditional_query));
return true;
}
+
} // Anonymous namespace
-void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch) {
+void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) {
const auto get_expr = ([&](const Condition& cond) -> Expr {
Expr result{};
if (cond.cc != ConditionCode::T) {
@@ -444,15 +607,24 @@ void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch) {
}
return MakeExpr<ExprBoolean>(true);
});
- if (branch.address < 0) {
- if (branch.kill) {
- mm.InsertReturn(get_expr(branch.condition), true);
+ if (std::holds_alternative<SingleBranch>(*branch_info)) {
+ const auto branch = std::get_if<SingleBranch>(branch_info.get());
+ if (branch->address < 0) {
+ if (branch->kill) {
+ mm.InsertReturn(get_expr(branch->condition), true);
+ return;
+ }
+ mm.InsertReturn(get_expr(branch->condition), false);
return;
}
- mm.InsertReturn(get_expr(branch.condition), false);
+ mm.InsertGoto(get_expr(branch->condition), branch->address);
return;
}
- mm.InsertGoto(get_expr(branch.condition), branch.address);
+ const auto multi_branch = std::get_if<MultiBranch>(branch_info.get());
+ for (const auto& branch_case : multi_branch->branches) {
+ mm.InsertGoto(MakeExpr<ExprGprEqual>(multi_branch->gpr, branch_case.cmp_value),
+ branch_case.address);
+ }
}
void DecompileShader(CFGRebuildState& state) {
@@ -464,25 +636,26 @@ void DecompileShader(CFGRebuildState& state) {
if (state.labels.count(block.start) != 0) {
state.manager->InsertLabel(block.start);
}
- u32 end = block.branch.ignore ? block.end + 1 : block.end;
+ const bool ignore = BlockBranchIsIgnored(block.branch);
+ u32 end = ignore ? block.end + 1 : block.end;
state.manager->InsertBlock(block.start, end);
- if (!block.branch.ignore) {
+ if (!ignore) {
InsertBranch(*state.manager, block.branch);
}
}
state.manager->Decompile();
}
-std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code,
- std::size_t program_size, u32 start_address,
- const CompilerSettings& settings) {
+std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address,
+ const CompilerSettings& settings,
+ ConstBufferLocker& locker) {
auto result_out = std::make_unique<ShaderCharacteristics>();
if (settings.depth == CompileDepth::BruteForce) {
result_out->settings.depth = CompileDepth::BruteForce;
return result_out;
}
- CFGRebuildState state{program_code, program_size, start_address};
+ CFGRebuildState state{program_code, start_address, locker};
// Inspect Code and generate blocks
state.labels.clear();
state.labels.emplace(start_address);
@@ -547,11 +720,9 @@ std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code,
ShaderBlock new_block{};
new_block.start = block.start;
new_block.end = block.end;
- new_block.ignore_branch = block.branch.ignore;
+ new_block.ignore_branch = BlockBranchIsIgnored(block.branch);
if (!new_block.ignore_branch) {
- new_block.branch.cond = block.branch.condition;
- new_block.branch.kills = block.branch.kill;
- new_block.branch.address = block.branch.address;
+ new_block.branch = block.branch;
}
result_out->end = std::max(result_out->end, block.end);
result_out->blocks.push_back(new_block);
diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h
index 37e987d62..5304998b9 100644
--- a/src/video_core/shader/control_flow.h
+++ b/src/video_core/shader/control_flow.h
@@ -7,6 +7,7 @@
#include <list>
#include <optional>
#include <set>
+#include <variant>
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/ast.h"
@@ -37,29 +38,61 @@ struct Condition {
}
};
-struct ShaderBlock {
- struct Branch {
- Condition cond{};
- bool kills{};
- s32 address{};
+class SingleBranch {
+public:
+ SingleBranch() = default;
+ SingleBranch(Condition condition, s32 address, bool kill, bool is_sync, bool is_brk,
+ bool ignore)
+ : condition{condition}, address{address}, kill{kill}, is_sync{is_sync}, is_brk{is_brk},
+ ignore{ignore} {}
+
+ bool operator==(const SingleBranch& b) const {
+ return std::tie(condition, address, kill, is_sync, is_brk, ignore) ==
+ std::tie(b.condition, b.address, b.kill, b.is_sync, b.is_brk, b.ignore);
+ }
+
+ bool operator!=(const SingleBranch& b) const {
+ return !operator==(b);
+ }
+
+ Condition condition{};
+ s32 address{exit_branch};
+ bool kill{};
+ bool is_sync{};
+ bool is_brk{};
+ bool ignore{};
+};
- bool operator==(const Branch& b) const {
- return std::tie(cond, kills, address) == std::tie(b.cond, b.kills, b.address);
- }
+struct CaseBranch {
+ CaseBranch(u32 cmp_value, u32 address) : cmp_value{cmp_value}, address{address} {}
+ u32 cmp_value;
+ u32 address;
+};
+
+class MultiBranch {
+public:
+ MultiBranch(u32 gpr, std::vector<CaseBranch>&& branches)
+ : gpr{gpr}, branches{std::move(branches)} {}
+
+ u32 gpr{};
+ std::vector<CaseBranch> branches{};
+};
+
+using BranchData = std::variant<SingleBranch, MultiBranch>;
+using BlockBranchInfo = std::shared_ptr<BranchData>;
- bool operator!=(const Branch& b) const {
- return !operator==(b);
- }
- };
+bool BlockBranchInfoAreEqual(BlockBranchInfo first, BlockBranchInfo second);
+struct ShaderBlock {
u32 start{};
u32 end{};
bool ignore_branch{};
- Branch branch{};
+ BlockBranchInfo branch{};
bool operator==(const ShaderBlock& sb) const {
- return std::tie(start, end, ignore_branch, branch) ==
- std::tie(sb.start, sb.end, sb.ignore_branch, sb.branch);
+ return std::tie(start, end, ignore_branch) ==
+ std::tie(sb.start, sb.end, sb.ignore_branch) &&
+ BlockBranchInfoAreEqual(branch, sb.branch);
}
bool operator!=(const ShaderBlock& sb) const {
@@ -76,8 +109,8 @@ struct ShaderCharacteristics {
CompilerSettings settings{};
};
-std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code,
- std::size_t program_size, u32 start_address,
- const CompilerSettings& settings);
+std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address,
+ const CompilerSettings& settings,
+ ConstBufferLocker& locker);
} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index 2626b1616..21fb9cb83 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -33,7 +33,7 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) {
return (absolute_offset % SchedPeriod) == 0;
}
-} // namespace
+} // Anonymous namespace
class ASTDecoder {
public:
@@ -102,7 +102,7 @@ void ShaderIR::Decode() {
std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
decompiled = false;
- auto info = ScanFlow(program_code, program_size, main_offset, settings);
+ auto info = ScanFlow(program_code, main_offset, settings, locker);
auto& shader_info = *info;
coverage_begin = shader_info.start;
coverage_end = shader_info.end;
@@ -155,7 +155,7 @@ void ShaderIR::Decode() {
[[fallthrough]];
case CompileDepth::BruteForce: {
coverage_begin = main_offset;
- const u32 shader_end = static_cast<u32>(program_size / sizeof(u64));
+ const std::size_t shader_end = program_code.size();
coverage_end = shader_end;
for (u32 label = main_offset; label < shader_end; label++) {
basic_blocks.insert({label, DecodeRange(label, label + 1)});
@@ -198,24 +198,39 @@ void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) {
}
return result;
};
- if (block.branch.address < 0) {
- if (block.branch.kills) {
- Node n = Operation(OperationCode::Discard);
- n = apply_conditions(block.branch.cond, n);
+ if (std::holds_alternative<SingleBranch>(*block.branch)) {
+ auto branch = std::get_if<SingleBranch>(block.branch.get());
+ if (branch->address < 0) {
+ if (branch->kill) {
+ Node n = Operation(OperationCode::Discard);
+ n = apply_conditions(branch->condition, n);
+ bb.push_back(n);
+ global_code.push_back(n);
+ return;
+ }
+ Node n = Operation(OperationCode::Exit);
+ n = apply_conditions(branch->condition, n);
bb.push_back(n);
global_code.push_back(n);
return;
}
- Node n = Operation(OperationCode::Exit);
- n = apply_conditions(block.branch.cond, n);
+ Node n = Operation(OperationCode::Branch, Immediate(branch->address));
+ n = apply_conditions(branch->condition, n);
bb.push_back(n);
global_code.push_back(n);
return;
}
- Node n = Operation(OperationCode::Branch, Immediate(block.branch.address));
- n = apply_conditions(block.branch.cond, n);
- bb.push_back(n);
- global_code.push_back(n);
+ auto multi_branch = std::get_if<MultiBranch>(block.branch.get());
+ Node op_a = GetRegister(multi_branch->gpr);
+ for (auto& branch_case : multi_branch->branches) {
+ Node n = Operation(OperationCode::Branch, Immediate(branch_case.address));
+ Node op_b = Immediate(branch_case.cmp_value);
+ Node condition =
+ GetPredicateComparisonInteger(Tegra::Shader::PredCondition::Equal, false, op_a, op_b);
+ auto result = Conditional(condition, {n});
+ bb.push_back(result);
+ global_code.push_back(result);
+ }
}
u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp
index b73f6536e..a33d242e9 100644
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer.cpp
@@ -144,7 +144,7 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
case OpCode::Id::ICMP_IMM: {
const Node zero = Immediate(0);
- const auto [op_b, test] = [&]() -> std::pair<Node, Node> {
+ const auto [op_rhs, test] = [&]() -> std::pair<Node, Node> {
switch (opcode->get().GetId()) {
case OpCode::Id::ICMP_CR:
return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset),
@@ -161,10 +161,10 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
return {zero, zero};
}
}();
- const Node op_a = GetRegister(instr.gpr8);
+ const Node op_lhs = GetRegister(instr.gpr8);
const Node comparison =
GetPredicateComparisonInteger(instr.icmp.cond, instr.icmp.is_signed != 0, test, zero);
- SetRegister(bb, instr.gpr0, Operation(OperationCode::Select, comparison, op_a, op_b));
+ SetRegister(bb, instr.gpr0, Operation(OperationCode::Select, comparison, op_lhs, op_rhs));
break;
}
case OpCode::Id::LOP_C:
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp
index 95ec1cdd9..b02d2cb95 100644
--- a/src/video_core/shader/decode/image.cpp
+++ b/src/video_core/shader/decode/image.cpp
@@ -144,8 +144,8 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) {
const auto offset{static_cast<std::size_t>(image.index.Value())};
- if (const auto image = TryUseExistingImage(offset, type)) {
- return *image;
+ if (const auto existing_image = TryUseExistingImage(offset, type)) {
+ return *existing_image;
}
const std::size_t next_index{used_images.size()};
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index d46e0f823..116b95f76 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -67,7 +67,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
break;
}
case OpCode::Id::MOV_SYS: {
- const Node value = [&]() {
+ const Node value = [this, instr] {
switch (instr.sys20) {
case SystemVariable::Ydirection:
return Operation(OperationCode::YNegate);
diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp
index f6ee68a54..d419e9c45 100644
--- a/src/video_core/shader/decode/shift.cpp
+++ b/src/video_core/shader/decode/shift.cpp
@@ -18,7 +18,7 @@ u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) {
const auto opcode = OpCode::Decode(instr);
Node op_a = GetRegister(instr.gpr8);
- Node op_b = [&]() {
+ Node op_b = [this, instr] {
if (instr.is_b_imm) {
return Immediate(instr.alu.GetSignedImm20_20());
} else if (instr.is_b_gpr) {
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index 0b934a069..d61e656b7 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -141,7 +141,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
const Node component = Immediate(static_cast<u32>(instr.tld4s.component));
const auto& sampler =
- GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare);
+ GetSampler(instr.sampler, {{TextureType::Texture2D, false, depth_compare}});
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
@@ -150,7 +150,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
}
- WriteTexsInstructionFloat(bb, instr, values);
+ WriteTexsInstructionFloat(bb, instr, values, true);
break;
}
case OpCode::Id::TXQ_B:
@@ -165,10 +165,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
// Sadly, not all texture instructions specify the type of texture their sampler
// uses. This must be fixed at a later instance.
const auto& sampler =
- is_bindless
- ? GetBindlessSampler(instr.gpr8, Tegra::Shader::TextureType::Texture2D, false,
- false)
- : GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false);
+ is_bindless ? GetBindlessSampler(instr.gpr8, {}) : GetSampler(instr.sampler, {});
u32 indexer = 0;
switch (instr.txq.query_type) {
@@ -207,9 +204,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
auto texture_type = instr.tmml.texture_type.Value();
const bool is_array = instr.tmml.array != 0;
- const auto& sampler = is_bindless
- ? GetBindlessSampler(instr.gpr20, texture_type, is_array, false)
- : GetSampler(instr.sampler, texture_type, is_array, false);
+ const auto& sampler =
+ is_bindless ? GetBindlessSampler(instr.gpr20, {{texture_type, is_array, false}})
+ : GetSampler(instr.sampler, {{texture_type, is_array, false}});
std::vector<Node> coords;
@@ -285,9 +282,26 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
return pc;
}
-const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, TextureType type,
- bool is_array, bool is_shadow) {
- const auto offset = static_cast<std::size_t>(sampler.index.Value());
+const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler,
+ std::optional<SamplerInfo> sampler_info) {
+ const auto offset = static_cast<u32>(sampler.index.Value());
+
+ Tegra::Shader::TextureType type;
+ bool is_array;
+ bool is_shadow;
+ if (sampler_info) {
+ type = sampler_info->type;
+ is_array = sampler_info->is_array;
+ is_shadow = sampler_info->is_shadow;
+ } else if (auto sampler = locker.ObtainBoundSampler(offset); sampler) {
+ type = sampler->texture_type.Value();
+ is_array = sampler->is_array.Value() != 0;
+ is_shadow = sampler->is_shadow.Value() != 0;
+ } else {
+ type = Tegra::Shader::TextureType::Texture2D;
+ is_array = false;
+ is_shadow = false;
+ }
// If this sampler has already been used, return the existing mapping.
const auto itr =
@@ -303,15 +317,31 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, Textu
const std::size_t next_index = used_samplers.size();
const Sampler entry{offset, next_index, type, is_array, is_shadow};
return *used_samplers.emplace(entry).first;
-}
+} // namespace VideoCommon::Shader
-const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, TextureType type,
- bool is_array, bool is_shadow) {
+const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg,
+ std::optional<SamplerInfo> sampler_info) {
const Node sampler_register = GetRegister(reg);
const auto [base_sampler, cbuf_index, cbuf_offset] =
TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size()));
ASSERT(base_sampler != nullptr);
const auto cbuf_key = (static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset);
+ Tegra::Shader::TextureType type;
+ bool is_array;
+ bool is_shadow;
+ if (sampler_info) {
+ type = sampler_info->type;
+ is_array = sampler_info->is_array;
+ is_shadow = sampler_info->is_shadow;
+ } else if (auto sampler = locker.ObtainBindlessSampler(cbuf_index, cbuf_offset); sampler) {
+ type = sampler->texture_type.Value();
+ is_array = sampler->is_array.Value() != 0;
+ is_shadow = sampler->is_shadow.Value() != 0;
+ } else {
+ type = Tegra::Shader::TextureType::Texture2D;
+ is_array = false;
+ is_shadow = false;
+ }
// If this sampler has already been used, return the existing mapping.
const auto itr =
@@ -344,14 +374,14 @@ void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const
}
}
-void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr,
- const Node4& components) {
+void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components,
+ bool ignore_mask) {
// TEXS has two destination registers and a swizzle. The first two elements in the swizzle
// go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
u32 dest_elem = 0;
for (u32 component = 0; component < 4; ++component) {
- if (!instr.texs.IsComponentEnabled(component))
+ if (!instr.texs.IsComponentEnabled(component) && !ignore_mask)
continue;
SetTemporary(bb, dest_elem++, components[component]);
}
@@ -411,9 +441,9 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
(texture_type == TextureType::TextureCube && is_array && is_shadow),
"This method is not supported.");
- const auto& sampler = is_bindless
- ? GetBindlessSampler(*bindless_reg, texture_type, is_array, is_shadow)
- : GetSampler(instr.sampler, texture_type, is_array, is_shadow);
+ const auto& sampler =
+ is_bindless ? GetBindlessSampler(*bindless_reg, {{texture_type, is_array, is_shadow}})
+ : GetSampler(instr.sampler, {{texture_type, is_array, is_shadow}});
const bool lod_needed = process_mode == TextureProcessMode::LZ ||
process_mode == TextureProcessMode::LL ||
@@ -577,7 +607,7 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
dc = GetRegister(parameter_register++);
}
- const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);
+ const auto& sampler = GetSampler(instr.sampler, {{texture_type, is_array, depth_compare}});
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
@@ -610,7 +640,7 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) {
// const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr};
// const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr};
- const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
+ const auto& sampler = GetSampler(instr.sampler, {{texture_type, is_array, false}});
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
@@ -646,7 +676,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
// When lod is used always is in gpr20
const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);
- const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
+ const auto& sampler = GetSampler(instr.sampler, {{texture_type, is_array, false}});
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp
index 97fc6f9b1..b047cf870 100644
--- a/src/video_core/shader/decode/video.cpp
+++ b/src/video_core/shader/decode/video.cpp
@@ -23,7 +23,7 @@ u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) {
const Node op_a =
GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a,
instr.video.type_a, instr.video.byte_height_a);
- const Node op_b = [&]() {
+ const Node op_b = [this, instr] {
if (instr.video.use_register_b) {
return GetVideoOperand(GetRegister(instr.gpr20), instr.video.is_byte_chunk_b,
instr.video.signed_b, instr.video.type_b,
diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp
index a8e481b3c..fa8a250cc 100644
--- a/src/video_core/shader/decode/warp.cpp
+++ b/src/video_core/shader/decode/warp.cpp
@@ -46,9 +46,10 @@ u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) {
break;
}
case OpCode::Id::SHFL: {
- Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm))
- : GetRegister(instr.gpr39);
- Node width = [&] {
+ Node width = [this, instr] {
+ Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm))
+ : GetRegister(instr.gpr39);
+
// Convert the obscure SHFL mask back into GL_NV_shader_thread_shuffle's width. This has
// been done reversing Nvidia's math. It won't work on all cases due to SHFL having
// different parameters that don't properly map to GLSL's interface, but it should work
diff --git a/src/video_core/shader/expr.h b/src/video_core/shader/expr.h
index d3dcd00ec..4e8264367 100644
--- a/src/video_core/shader/expr.h
+++ b/src/video_core/shader/expr.h
@@ -17,13 +17,14 @@ using Tegra::Shader::Pred;
class ExprAnd;
class ExprBoolean;
class ExprCondCode;
+class ExprGprEqual;
class ExprNot;
class ExprOr;
class ExprPredicate;
class ExprVar;
-using ExprData =
- std::variant<ExprVar, ExprCondCode, ExprPredicate, ExprNot, ExprOr, ExprAnd, ExprBoolean>;
+using ExprData = std::variant<ExprVar, ExprCondCode, ExprPredicate, ExprNot, ExprOr, ExprAnd,
+ ExprBoolean, ExprGprEqual>;
using Expr = std::shared_ptr<ExprData>;
class ExprAnd final {
@@ -118,6 +119,22 @@ public:
bool value;
};
+class ExprGprEqual final {
+public:
+ ExprGprEqual(u32 gpr, u32 value) : gpr{gpr}, value{value} {}
+
+ bool operator==(const ExprGprEqual& b) const {
+ return gpr == b.gpr && value == b.value;
+ }
+
+ bool operator!=(const ExprGprEqual& b) const {
+ return !operator==(b);
+ }
+
+ u32 gpr;
+ u32 value;
+};
+
template <typename T, typename... Args>
Expr MakeExpr(Args&&... args) {
static_assert(std::is_convertible_v<T, ExprData>);
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index c1f2b88c8..1d9825c76 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -2,8 +2,9 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <algorithm>
+#include <array>
#include <cmath>
-#include <unordered_map>
#include "common/assert.h"
#include "common/common_types.h"
@@ -22,10 +23,9 @@ using Tegra::Shader::PredCondition;
using Tegra::Shader::PredOperation;
using Tegra::Shader::Register;
-ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, const std::size_t size,
- CompilerSettings settings)
- : program_code{program_code}, main_offset{main_offset}, program_size{size}, basic_blocks{},
- program_manager{true, true}, settings{settings} {
+ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings,
+ ConstBufferLocker& locker)
+ : program_code{program_code}, main_offset{main_offset}, settings{settings}, locker{locker} {
Decode();
}
@@ -271,21 +271,24 @@ Node ShaderIR::GetSaturatedHalfFloat(Node value, bool saturate) {
}
Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) {
- const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = {
- {PredCondition::LessThan, OperationCode::LogicalFLessThan},
- {PredCondition::Equal, OperationCode::LogicalFEqual},
- {PredCondition::LessEqual, OperationCode::LogicalFLessEqual},
- {PredCondition::GreaterThan, OperationCode::LogicalFGreaterThan},
- {PredCondition::NotEqual, OperationCode::LogicalFNotEqual},
- {PredCondition::GreaterEqual, OperationCode::LogicalFGreaterEqual},
- {PredCondition::LessThanWithNan, OperationCode::LogicalFLessThan},
- {PredCondition::NotEqualWithNan, OperationCode::LogicalFNotEqual},
- {PredCondition::LessEqualWithNan, OperationCode::LogicalFLessEqual},
- {PredCondition::GreaterThanWithNan, OperationCode::LogicalFGreaterThan},
- {PredCondition::GreaterEqualWithNan, OperationCode::LogicalFGreaterEqual}};
-
- const auto comparison{PredicateComparisonTable.find(condition)};
- UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(),
+ static constexpr std::array comparison_table{
+ std::pair{PredCondition::LessThan, OperationCode::LogicalFLessThan},
+ std::pair{PredCondition::Equal, OperationCode::LogicalFEqual},
+ std::pair{PredCondition::LessEqual, OperationCode::LogicalFLessEqual},
+ std::pair{PredCondition::GreaterThan, OperationCode::LogicalFGreaterThan},
+ std::pair{PredCondition::NotEqual, OperationCode::LogicalFNotEqual},
+ std::pair{PredCondition::GreaterEqual, OperationCode::LogicalFGreaterEqual},
+ std::pair{PredCondition::LessThanWithNan, OperationCode::LogicalFLessThan},
+ std::pair{PredCondition::NotEqualWithNan, OperationCode::LogicalFNotEqual},
+ std::pair{PredCondition::LessEqualWithNan, OperationCode::LogicalFLessEqual},
+ std::pair{PredCondition::GreaterThanWithNan, OperationCode::LogicalFGreaterThan},
+ std::pair{PredCondition::GreaterEqualWithNan, OperationCode::LogicalFGreaterEqual},
+ };
+
+ const auto comparison =
+ std::find_if(comparison_table.cbegin(), comparison_table.cend(),
+ [condition](const auto entry) { return condition == entry.first; });
+ UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(),
"Unknown predicate comparison operation");
Node predicate = Operation(comparison->second, NO_PRECISE, op_a, op_b);
@@ -306,21 +309,24 @@ Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, N
Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_signed, Node op_a,
Node op_b) {
- const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = {
- {PredCondition::LessThan, OperationCode::LogicalILessThan},
- {PredCondition::Equal, OperationCode::LogicalIEqual},
- {PredCondition::LessEqual, OperationCode::LogicalILessEqual},
- {PredCondition::GreaterThan, OperationCode::LogicalIGreaterThan},
- {PredCondition::NotEqual, OperationCode::LogicalINotEqual},
- {PredCondition::GreaterEqual, OperationCode::LogicalIGreaterEqual},
- {PredCondition::LessThanWithNan, OperationCode::LogicalILessThan},
- {PredCondition::NotEqualWithNan, OperationCode::LogicalINotEqual},
- {PredCondition::LessEqualWithNan, OperationCode::LogicalILessEqual},
- {PredCondition::GreaterThanWithNan, OperationCode::LogicalIGreaterThan},
- {PredCondition::GreaterEqualWithNan, OperationCode::LogicalIGreaterEqual}};
-
- const auto comparison{PredicateComparisonTable.find(condition)};
- UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(),
+ static constexpr std::array comparison_table{
+ std::pair{PredCondition::LessThan, OperationCode::LogicalILessThan},
+ std::pair{PredCondition::Equal, OperationCode::LogicalIEqual},
+ std::pair{PredCondition::LessEqual, OperationCode::LogicalILessEqual},
+ std::pair{PredCondition::GreaterThan, OperationCode::LogicalIGreaterThan},
+ std::pair{PredCondition::NotEqual, OperationCode::LogicalINotEqual},
+ std::pair{PredCondition::GreaterEqual, OperationCode::LogicalIGreaterEqual},
+ std::pair{PredCondition::LessThanWithNan, OperationCode::LogicalILessThan},
+ std::pair{PredCondition::NotEqualWithNan, OperationCode::LogicalINotEqual},
+ std::pair{PredCondition::LessEqualWithNan, OperationCode::LogicalILessEqual},
+ std::pair{PredCondition::GreaterThanWithNan, OperationCode::LogicalIGreaterThan},
+ std::pair{PredCondition::GreaterEqualWithNan, OperationCode::LogicalIGreaterEqual},
+ };
+
+ const auto comparison =
+ std::find_if(comparison_table.cbegin(), comparison_table.cend(),
+ [condition](const auto entry) { return condition == entry.first; });
+ UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(),
"Unknown predicate comparison operation");
Node predicate = SignedOperation(comparison->second, is_signed, NO_PRECISE, std::move(op_a),
@@ -337,36 +343,43 @@ Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_si
Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a,
Node op_b) {
- const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = {
- {PredCondition::LessThan, OperationCode::Logical2HLessThan},
- {PredCondition::Equal, OperationCode::Logical2HEqual},
- {PredCondition::LessEqual, OperationCode::Logical2HLessEqual},
- {PredCondition::GreaterThan, OperationCode::Logical2HGreaterThan},
- {PredCondition::NotEqual, OperationCode::Logical2HNotEqual},
- {PredCondition::GreaterEqual, OperationCode::Logical2HGreaterEqual},
- {PredCondition::LessThanWithNan, OperationCode::Logical2HLessThanWithNan},
- {PredCondition::NotEqualWithNan, OperationCode::Logical2HNotEqualWithNan},
- {PredCondition::LessEqualWithNan, OperationCode::Logical2HLessEqualWithNan},
- {PredCondition::GreaterThanWithNan, OperationCode::Logical2HGreaterThanWithNan},
- {PredCondition::GreaterEqualWithNan, OperationCode::Logical2HGreaterEqualWithNan}};
-
- const auto comparison{PredicateComparisonTable.find(condition)};
- UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(),
+ static constexpr std::array comparison_table{
+ std::pair{PredCondition::LessThan, OperationCode::Logical2HLessThan},
+ std::pair{PredCondition::Equal, OperationCode::Logical2HEqual},
+ std::pair{PredCondition::LessEqual, OperationCode::Logical2HLessEqual},
+ std::pair{PredCondition::GreaterThan, OperationCode::Logical2HGreaterThan},
+ std::pair{PredCondition::NotEqual, OperationCode::Logical2HNotEqual},
+ std::pair{PredCondition::GreaterEqual, OperationCode::Logical2HGreaterEqual},
+ std::pair{PredCondition::LessThanWithNan, OperationCode::Logical2HLessThanWithNan},
+ std::pair{PredCondition::NotEqualWithNan, OperationCode::Logical2HNotEqualWithNan},
+ std::pair{PredCondition::LessEqualWithNan, OperationCode::Logical2HLessEqualWithNan},
+ std::pair{PredCondition::GreaterThanWithNan, OperationCode::Logical2HGreaterThanWithNan},
+ std::pair{PredCondition::GreaterEqualWithNan, OperationCode::Logical2HGreaterEqualWithNan},
+ };
+
+ const auto comparison =
+ std::find_if(comparison_table.cbegin(), comparison_table.cend(),
+ [condition](const auto entry) { return condition == entry.first; });
+ UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(),
"Unknown predicate comparison operation");
return Operation(comparison->second, NO_PRECISE, std::move(op_a), std::move(op_b));
}
OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) {
- const std::unordered_map<PredOperation, OperationCode> PredicateOperationTable = {
- {PredOperation::And, OperationCode::LogicalAnd},
- {PredOperation::Or, OperationCode::LogicalOr},
- {PredOperation::Xor, OperationCode::LogicalXor},
+ static constexpr std::array operation_table{
+ OperationCode::LogicalAnd,
+ OperationCode::LogicalOr,
+ OperationCode::LogicalXor,
};
- const auto op = PredicateOperationTable.find(operation);
- UNIMPLEMENTED_IF_MSG(op == PredicateOperationTable.end(), "Unknown predicate operation");
- return op->second;
+ const auto index = static_cast<std::size_t>(operation);
+ if (index >= operation_table.size()) {
+ UNIMPLEMENTED_MSG("Unknown predicate operation.");
+ return {};
+ }
+
+ return operation_table[index];
}
Node ShaderIR::GetConditionCode(Tegra::Shader::ConditionCode cc) const {
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 91cd0a534..1fd44bde1 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -17,6 +17,7 @@
#include "video_core/engines/shader_header.h"
#include "video_core/shader/ast.h"
#include "video_core/shader/compiler_settings.h"
+#include "video_core/shader/const_buffer_locker.h"
#include "video_core/shader/node.h"
namespace VideoCommon::Shader {
@@ -66,8 +67,8 @@ struct GlobalMemoryUsage {
class ShaderIR final {
public:
- explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, std::size_t size,
- CompilerSettings settings);
+ explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings,
+ ConstBufferLocker& locker);
~ShaderIR();
const std::map<u32, NodeBlock>& GetBasicBlocks() const {
@@ -172,6 +173,13 @@ public:
private:
friend class ASTDecoder;
+
+ struct SamplerInfo {
+ Tegra::Shader::TextureType type;
+ bool is_array;
+ bool is_shadow;
+ };
+
void Decode();
NodeBlock DecodeRange(u32 begin, u32 end);
@@ -296,12 +304,11 @@ private:
/// Accesses a texture sampler
const Sampler& GetSampler(const Tegra::Shader::Sampler& sampler,
- Tegra::Shader::TextureType type, bool is_array, bool is_shadow);
+ std::optional<SamplerInfo> sampler_info);
// Accesses a texture sampler for a bindless texture.
const Sampler& GetBindlessSampler(const Tegra::Shader::Register& reg,
- Tegra::Shader::TextureType type, bool is_array,
- bool is_shadow);
+ std::optional<SamplerInfo> sampler_info);
/// Accesses an image.
Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type);
@@ -322,7 +329,7 @@ private:
const Node4& components);
void WriteTexsInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
- const Node4& components);
+ const Node4& components, bool ignore_mask = false);
void WriteTexsInstructionHalfFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
const Node4& components);
@@ -377,7 +384,9 @@ private:
const ProgramCode& program_code;
const u32 main_offset;
- const std::size_t program_size;
+ const CompilerSettings settings;
+ ConstBufferLocker& locker;
+
bool decompiled{};
bool disable_flow_stack{};
@@ -386,8 +395,7 @@ private:
std::map<u32, NodeBlock> basic_blocks;
NodeBlock global_code;
- ASTManager program_manager;
- CompilerSettings settings{};
+ ASTManager program_manager{true, true};
std::set<u32> used_registers;
std::set<Tegra::Shader::Pred> used_predicates;