summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp48
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp43
-rw-r--r--src/video_core/shader/decode/warp.cpp68
-rw-r--r--src/video_core/shader/node.h11
5 files changed, 49 insertions, 122 deletions
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index f1b89165d..a287b5ee1 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -275,6 +275,7 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramTy
std::string source = fmt::format(R"(// {}
#version 430 core
#extension GL_ARB_separate_shader_objects : enable
+#extension GL_ARB_shader_ballot : enable
#extension GL_ARB_shader_viewport_layer_array : enable
#extension GL_EXT_shader_image_load_formatted : enable
#extension GL_NV_gpu_shader5 : enable
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 92ee8459e..ca4e6e468 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -1942,34 +1942,14 @@ private:
return Vote(operation, "allThreadsEqualNV");
}
- template <const std::string_view& func>
- Expression Shuffle(Operation operation) {
- const std::string value = VisitOperand(operation, 0).AsFloat();
- if (!device.HasWarpIntrinsics()) {
- LOG_ERROR(Render_OpenGL, "Nvidia shuffle intrinsics are required by this shader");
- // On a "single-thread" device we are either on the same thread or out of bounds. Both
- // cases return the passed value.
- return {value, Type::Float};
- }
-
- const std::string index = VisitOperand(operation, 1).AsUint();
- const std::string width = VisitOperand(operation, 2).AsUint();
- return {fmt::format("{}({}, {}, {})", func, value, index, width), Type::Float};
+ Expression ThreadId(Operation operation) {
+ return {"gl_SubGroupInvocationARB", Type::Uint};
}
- template <const std::string_view& func>
- Expression InRangeShuffle(Operation operation) {
- const std::string index = VisitOperand(operation, 0).AsUint();
- const std::string width = VisitOperand(operation, 1).AsUint();
- if (!device.HasWarpIntrinsics()) {
- // On a "single-thread" device we are only in bounds when the requested index is 0.
- return {fmt::format("({} == 0U)", index), Type::Bool};
- }
-
- const std::string in_range = code.GenerateTemporary();
- code.AddLine("bool {};", in_range);
- code.AddLine("{}(0U, {}, {}, {});", func, index, width, in_range);
- return {in_range, Type::Bool};
+ Expression ShuffleIndexed(Operation operation) {
+ const std::string value = VisitOperand(operation, 0).AsFloat();
+ const std::string index = VisitOperand(operation, 1).AsUint();
+ return {fmt::format("readInvocationARB({}, {})", value, index), Type::Float};
}
struct Func final {
@@ -1981,11 +1961,6 @@ private:
static constexpr std::string_view Or = "Or";
static constexpr std::string_view Xor = "Xor";
static constexpr std::string_view Exchange = "Exchange";
-
- static constexpr std::string_view ShuffleIndexed = "shuffleNV";
- static constexpr std::string_view ShuffleUp = "shuffleUpNV";
- static constexpr std::string_view ShuffleDown = "shuffleDownNV";
- static constexpr std::string_view ShuffleButterfly = "shuffleXorNV";
};
static constexpr std::array operation_decompilers = {
@@ -2151,15 +2126,8 @@ private:
&GLSLDecompiler::VoteAny,
&GLSLDecompiler::VoteEqual,
- &GLSLDecompiler::Shuffle<Func::ShuffleIndexed>,
- &GLSLDecompiler::Shuffle<Func::ShuffleUp>,
- &GLSLDecompiler::Shuffle<Func::ShuffleDown>,
- &GLSLDecompiler::Shuffle<Func::ShuffleButterfly>,
-
- &GLSLDecompiler::InRangeShuffle<Func::ShuffleIndexed>,
- &GLSLDecompiler::InRangeShuffle<Func::ShuffleUp>,
- &GLSLDecompiler::InRangeShuffle<Func::ShuffleDown>,
- &GLSLDecompiler::InRangeShuffle<Func::ShuffleButterfly>,
+ &GLSLDecompiler::ThreadId,
+ &GLSLDecompiler::ShuffleIndexed,
};
static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 42cf068b6..383720ea1 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -1195,42 +1195,12 @@ private:
return {};
}
- Id ShuffleIndexed(Operation) {
- UNIMPLEMENTED();
- return {};
- }
-
- Id ShuffleUp(Operation) {
- UNIMPLEMENTED();
- return {};
- }
-
- Id ShuffleDown(Operation) {
- UNIMPLEMENTED();
- return {};
- }
-
- Id ShuffleButterfly(Operation) {
+ Id ThreadId(Operation) {
UNIMPLEMENTED();
return {};
}
- Id InRangeShuffleIndexed(Operation) {
- UNIMPLEMENTED();
- return {};
- }
-
- Id InRangeShuffleUp(Operation) {
- UNIMPLEMENTED();
- return {};
- }
-
- Id InRangeShuffleDown(Operation) {
- UNIMPLEMENTED();
- return {};
- }
-
- Id InRangeShuffleButterfly(Operation) {
+ Id ShuffleIndexed(Operation) {
UNIMPLEMENTED();
return {};
}
@@ -1528,15 +1498,8 @@ private:
&SPIRVDecompiler::VoteAny,
&SPIRVDecompiler::VoteEqual,
+ &SPIRVDecompiler::ThreadId,
&SPIRVDecompiler::ShuffleIndexed,
- &SPIRVDecompiler::ShuffleUp,
- &SPIRVDecompiler::ShuffleDown,
- &SPIRVDecompiler::ShuffleButterfly,
-
- &SPIRVDecompiler::InRangeShuffleIndexed,
- &SPIRVDecompiler::InRangeShuffleUp,
- &SPIRVDecompiler::InRangeShuffleDown,
- &SPIRVDecompiler::InRangeShuffleButterfly,
};
static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp
index fa8a250cc..c2875eb2b 100644
--- a/src/video_core/shader/decode/warp.cpp
+++ b/src/video_core/shader/decode/warp.cpp
@@ -17,6 +17,7 @@ using Tegra::Shader::ShuffleOperation;
using Tegra::Shader::VoteOperation;
namespace {
+
OperationCode GetOperationCode(VoteOperation vote_op) {
switch (vote_op) {
case VoteOperation::All:
@@ -30,6 +31,7 @@ OperationCode GetOperationCode(VoteOperation vote_op) {
return OperationCode::VoteAll;
}
}
+
} // Anonymous namespace
u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) {
@@ -46,50 +48,50 @@ u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) {
break;
}
case OpCode::Id::SHFL: {
- Node width = [this, instr] {
- Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm))
- : GetRegister(instr.gpr39);
+ Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm))
+ : GetRegister(instr.gpr39);
+ Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm))
+ : GetRegister(instr.gpr20);
- // Convert the obscure SHFL mask back into GL_NV_shader_thread_shuffle's width. This has
- // been done reversing Nvidia's math. It won't work on all cases due to SHFL having
- // different parameters that don't properly map to GLSL's interface, but it should work
- // for cases emitted by Nvidia's compiler.
- if (instr.shfl.operation == ShuffleOperation::Up) {
- return Operation(
- OperationCode::ILogicalShiftRight,
- Operation(OperationCode::IAdd, std::move(mask), Immediate(-0x2000)),
- Immediate(8));
- } else {
- return Operation(OperationCode::ILogicalShiftRight,
- Operation(OperationCode::IAdd, Immediate(0x201F),
- Operation(OperationCode::INegate, std::move(mask))),
- Immediate(8));
- }
- }();
+ Node thread_id = Operation(OperationCode::ThreadId);
+ Node clamp = Operation(OperationCode::IBitwiseAnd, mask, Immediate(0x1FU));
+ Node seg_mask = BitfieldExtract(mask, 8, 16);
+
+ Node neg_seg_mask = Operation(OperationCode::IBitwiseNot, seg_mask);
+ Node min_thread_id = Operation(OperationCode::IBitwiseAnd, thread_id, seg_mask);
+ Node max_thread_id = Operation(OperationCode::IBitwiseOr, min_thread_id,
+ Operation(OperationCode::IBitwiseAnd, clamp, neg_seg_mask));
- const auto [operation, in_range] = [instr]() -> std::pair<OperationCode, OperationCode> {
+ Node src_thread_id = [instr, index, neg_seg_mask, min_thread_id, thread_id] {
switch (instr.shfl.operation) {
case ShuffleOperation::Idx:
- return {OperationCode::ShuffleIndexed, OperationCode::InRangeShuffleIndexed};
- case ShuffleOperation::Up:
- return {OperationCode::ShuffleUp, OperationCode::InRangeShuffleUp};
+ return Operation(OperationCode::IBitwiseOr,
+ Operation(OperationCode::IBitwiseAnd, index, neg_seg_mask),
+ min_thread_id);
case ShuffleOperation::Down:
- return {OperationCode::ShuffleDown, OperationCode::InRangeShuffleDown};
+ return Operation(OperationCode::IAdd, thread_id, index);
+ case ShuffleOperation::Up:
+ return Operation(OperationCode::IAdd, thread_id,
+ Operation(OperationCode::INegate, index));
case ShuffleOperation::Bfly:
- return {OperationCode::ShuffleButterfly, OperationCode::InRangeShuffleButterfly};
+ return Operation(OperationCode::IBitwiseXor, thread_id, index);
}
- UNREACHABLE_MSG("Invalid SHFL operation: {}",
- static_cast<u64>(instr.shfl.operation.Value()));
- return {};
+ UNREACHABLE();
+ return Immediate(0U);
}();
- // Setting the predicate before the register is intentional to avoid overwriting.
- Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm))
- : GetRegister(instr.gpr20);
- SetPredicate(bb, instr.shfl.pred48, Operation(in_range, index, width));
+ Node in_bounds = [instr, src_thread_id, min_thread_id, max_thread_id] {
+ if (instr.shfl.operation == ShuffleOperation::Up) {
+ return Operation(OperationCode::LogicalIGreaterEqual, src_thread_id, min_thread_id);
+ } else {
+ return Operation(OperationCode::LogicalILessEqual, src_thread_id, max_thread_id);
+ }
+ }();
+
+ SetPredicate(bb, instr.shfl.pred48, in_bounds);
SetRegister(
bb, instr.gpr0,
- Operation(operation, GetRegister(instr.gpr8), std::move(index), std::move(width)));
+ Operation(OperationCode::ShuffleIndexed, GetRegister(instr.gpr8), src_thread_id));
break;
}
default:
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index 4300d9ff4..bd3547e0d 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -181,15 +181,8 @@ enum class OperationCode {
VoteAny, /// (bool) -> bool
VoteEqual, /// (bool) -> bool
- ShuffleIndexed, /// (uint value, uint index, uint width) -> uint
- ShuffleUp, /// (uint value, uint index, uint width) -> uint
- ShuffleDown, /// (uint value, uint index, uint width) -> uint
- ShuffleButterfly, /// (uint value, uint index, uint width) -> uint
-
- InRangeShuffleIndexed, /// (uint index, uint width) -> bool
- InRangeShuffleUp, /// (uint index, uint width) -> bool
- InRangeShuffleDown, /// (uint index, uint width) -> bool
- InRangeShuffleButterfly, /// (uint index, uint width) -> bool
+ ThreadId, /// () -> uint
+ ShuffleIndexed, /// (uint value, uint index) -> uint
Amount,
};