summaryrefslogtreecommitdiffstats
path: root/src/shader_recompiler
diff options
context:
space:
mode:
Diffstat (limited to 'src/shader_recompiler')
-rw-r--r--src/shader_recompiler/CMakeLists.txt2
-rw-r--r--src/shader_recompiler/backend/spirv/emit_context.cpp115
-rw-r--r--src/shader_recompiler/backend/spirv/emit_context.h21
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv.h14
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp10
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp175
-rw-r--r--src/shader_recompiler/environment.h4
-rw-r--r--src/shader_recompiler/frontend/ir/ir_emitter.cpp46
-rw-r--r--src/shader_recompiler/frontend/ir/ir_emitter.h6
-rw-r--r--src/shader_recompiler/frontend/ir/microinstruction.cpp6
-rw-r--r--src/shader_recompiler/frontend/ir/opcodes.inc18
-rw-r--r--src/shader_recompiler/frontend/ir/program.h2
-rw-r--r--src/shader_recompiler/frontend/maxwell/program.cpp2
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp197
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp16
-rw-r--r--src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp6
-rw-r--r--src/shader_recompiler/profile.h3
17 files changed, 626 insertions, 17 deletions
diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt
index 55b846c84..003cbefb1 100644
--- a/src/shader_recompiler/CMakeLists.txt
+++ b/src/shader_recompiler/CMakeLists.txt
@@ -14,6 +14,7 @@ add_library(shader_recompiler STATIC
backend/spirv/emit_spirv_logical.cpp
backend/spirv/emit_spirv_memory.cpp
backend/spirv/emit_spirv_select.cpp
+ backend/spirv/emit_spirv_shared_memory.cpp
backend/spirv/emit_spirv_special.cpp
backend/spirv/emit_spirv_undefined.cpp
backend/spirv/emit_spirv_warp.cpp
@@ -111,6 +112,7 @@ add_library(shader_recompiler STATIC
frontend/maxwell/translate/impl/load_constant.cpp
frontend/maxwell/translate/impl/load_effective_address.cpp
frontend/maxwell/translate/impl/load_store_attribute.cpp
+ frontend/maxwell/translate/impl/load_store_local_shared.cpp
frontend/maxwell/translate/impl/load_store_memory.cpp
frontend/maxwell/translate/impl/logic_operation.cpp
frontend/maxwell/translate/impl/logic_operation_three_input.cpp
diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp
index a8ca33c1d..96d0e9b4d 100644
--- a/src/shader_recompiler/backend/spirv/emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_context.cpp
@@ -9,6 +9,7 @@
#include <fmt/format.h>
#include "common/common_types.h"
+#include "common/div_ceil.h"
#include "shader_recompiler/backend/spirv/emit_context.h"
namespace Shader::Backend::SPIRV {
@@ -96,11 +97,13 @@ void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_vie
}
EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& binding)
- : Sirit::Module(0x00010000), profile{profile_}, stage{program.stage} {
+ : Sirit::Module(profile_.supported_spirv), profile{profile_}, stage{program.stage} {
AddCapability(spv::Capability::Shader);
DefineCommonTypes(program.info);
DefineCommonConstants();
DefineInterfaces(program.info);
+ DefineLocalMemory(program);
+ DefineSharedMemory(program);
DefineConstantBuffers(program.info, binding);
DefineStorageBuffers(program.info, binding);
DefineTextures(program.info, binding);
@@ -143,6 +146,8 @@ void EmitContext::DefineCommonTypes(const Info& info) {
F32.Define(*this, TypeFloat(32), "f32");
U32.Define(*this, TypeInt(32, false), "u32");
+ private_u32 = Name(TypePointer(spv::StorageClass::Private, U32[1]), "private_u32");
+
input_f32 = Name(TypePointer(spv::StorageClass::Input, F32[1]), "input_f32");
input_u32 = Name(TypePointer(spv::StorageClass::Input, U32[1]), "input_u32");
input_s32 = Name(TypePointer(spv::StorageClass::Input, TypeInt(32, true)), "input_s32");
@@ -184,6 +189,105 @@ void EmitContext::DefineInterfaces(const Info& info) {
DefineOutputs(info);
}
+void EmitContext::DefineLocalMemory(const IR::Program& program) {
+ if (program.local_memory_size == 0) {
+ return;
+ }
+ const u32 num_elements{Common::DivCeil(program.local_memory_size, 4U)};
+ const Id type{TypeArray(U32[1], Constant(U32[1], num_elements))};
+ const Id pointer{TypePointer(spv::StorageClass::Private, type)};
+ local_memory = AddGlobalVariable(pointer, spv::StorageClass::Private);
+ if (profile.supported_spirv >= 0x00010400) {
+ interfaces.push_back(local_memory);
+ }
+}
+
+void EmitContext::DefineSharedMemory(const IR::Program& program) {
+ if (program.shared_memory_size == 0) {
+ return;
+ }
+ const auto make{[&](Id element_type, u32 element_size) {
+ const u32 num_elements{Common::DivCeil(program.shared_memory_size, element_size)};
+ const Id array_type{TypeArray(element_type, Constant(U32[1], num_elements))};
+ Decorate(array_type, spv::Decoration::ArrayStride, element_size);
+
+ const Id struct_type{TypeStruct(array_type)};
+ MemberDecorate(struct_type, 0U, spv::Decoration::Offset, 0U);
+ Decorate(struct_type, spv::Decoration::Block);
+
+ const Id pointer{TypePointer(spv::StorageClass::Workgroup, struct_type)};
+ const Id element_pointer{TypePointer(spv::StorageClass::Workgroup, element_type)};
+ const Id variable{AddGlobalVariable(pointer, spv::StorageClass::Workgroup)};
+ Decorate(variable, spv::Decoration::Aliased);
+ interfaces.push_back(variable);
+
+ return std::make_pair(variable, element_pointer);
+ }};
+ if (profile.support_explicit_workgroup_layout) {
+ AddExtension("SPV_KHR_workgroup_memory_explicit_layout");
+ AddCapability(spv::Capability::WorkgroupMemoryExplicitLayoutKHR);
+ if (program.info.uses_int8) {
+ AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout8BitAccessKHR);
+ std::tie(shared_memory_u8, shared_u8) = make(U8, 1);
+ }
+ if (program.info.uses_int16) {
+ AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout16BitAccessKHR);
+ std::tie(shared_memory_u16, shared_u16) = make(U16, 2);
+ }
+ std::tie(shared_memory_u32, shared_u32) = make(U32[1], 4);
+ std::tie(shared_memory_u32x2, shared_u32x2) = make(U32[2], 8);
+ std::tie(shared_memory_u32x4, shared_u32x4) = make(U32[4], 16);
+ }
+ const u32 num_elements{Common::DivCeil(program.shared_memory_size, 4U)};
+ const Id type{TypeArray(U32[1], Constant(U32[1], num_elements))};
+ const Id pointer_type{TypePointer(spv::StorageClass::Workgroup, type)};
+ shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]);
+ shared_memory_u32 = AddGlobalVariable(pointer_type, spv::StorageClass::Workgroup);
+ interfaces.push_back(shared_memory_u32);
+
+ const Id func_type{TypeFunction(void_id, U32[1], U32[1])};
+ const auto make_function{[&](u32 mask, u32 size) {
+ const Id loop_header{OpLabel()};
+ const Id continue_block{OpLabel()};
+ const Id merge_block{OpLabel()};
+
+ const Id func{OpFunction(void_id, spv::FunctionControlMask::MaskNone, func_type)};
+ const Id offset{OpFunctionParameter(U32[1])};
+ const Id insert_value{OpFunctionParameter(U32[1])};
+ AddLabel();
+ OpBranch(loop_header);
+
+ AddLabel(loop_header);
+ const Id word_offset{OpShiftRightArithmetic(U32[1], offset, Constant(U32[1], 2U))};
+ const Id shift_offset{OpShiftLeftLogical(U32[1], offset, Constant(U32[1], 3U))};
+ const Id bit_offset{OpBitwiseAnd(U32[1], shift_offset, Constant(U32[1], mask))};
+ const Id count{Constant(U32[1], size)};
+ OpLoopMerge(merge_block, continue_block, spv::LoopControlMask::MaskNone);
+ OpBranch(continue_block);
+
+ AddLabel(continue_block);
+ const Id word_pointer{OpAccessChain(shared_u32, shared_memory_u32, word_offset)};
+ const Id old_value{OpLoad(U32[1], word_pointer)};
+ const Id new_value{OpBitFieldInsert(U32[1], old_value, insert_value, bit_offset, count)};
+ const Id atomic_res{OpAtomicCompareExchange(U32[1], word_pointer, Constant(U32[1], 1U),
+ u32_zero_value, u32_zero_value, new_value,
+ old_value)};
+ const Id success{OpIEqual(U1, atomic_res, old_value)};
+ OpBranchConditional(success, merge_block, loop_header);
+
+ AddLabel(merge_block);
+ OpReturn();
+ OpFunctionEnd();
+ return func;
+ }};
+ if (program.info.uses_int8) {
+ shared_store_u8_func = make_function(24, 8);
+ }
+ if (program.info.uses_int16) {
+ shared_store_u16_func = make_function(16, 16);
+ }
+}
+
void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) {
if (info.constant_buffer_descriptors.empty()) {
return;
@@ -234,6 +338,9 @@ void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) {
Decorate(id, spv::Decoration::Binding, binding);
Decorate(id, spv::Decoration::DescriptorSet, 0U);
Name(id, fmt::format("ssbo{}", index));
+ if (profile.supported_spirv >= 0x00010400) {
+ interfaces.push_back(id);
+ }
std::fill_n(ssbos.data() + index, desc.count, id);
index += desc.count;
binding += desc.count;
@@ -261,6 +368,9 @@ void EmitContext::DefineTextures(const Info& info, u32& binding) {
.image_type{image_type},
});
}
+ if (profile.supported_spirv >= 0x00010400) {
+ interfaces.push_back(id);
+ }
binding += desc.count;
}
}
@@ -363,6 +473,9 @@ void EmitContext::DefineConstantBuffers(const Info& info, Id UniformDefinitions:
for (size_t i = 0; i < desc.count; ++i) {
cbufs[desc.index + i].*member_type = id;
}
+ if (profile.supported_spirv >= 0x00010400) {
+ interfaces.push_back(id);
+ }
binding += desc.count;
}
}
diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h
index 01b7b665d..1a4e8221a 100644
--- a/src/shader_recompiler/backend/spirv/emit_context.h
+++ b/src/shader_recompiler/backend/spirv/emit_context.h
@@ -73,6 +73,14 @@ public:
UniformDefinitions uniform_types;
+ Id private_u32{};
+
+ Id shared_u8{};
+ Id shared_u16{};
+ Id shared_u32{};
+ Id shared_u32x2{};
+ Id shared_u32x4{};
+
Id input_f32{};
Id input_u32{};
Id input_s32{};
@@ -96,6 +104,17 @@ public:
Id base_vertex{};
Id front_face{};
+ Id local_memory{};
+
+ Id shared_memory_u8{};
+ Id shared_memory_u16{};
+ Id shared_memory_u32{};
+ Id shared_memory_u32x2{};
+ Id shared_memory_u32x4{};
+
+ Id shared_store_u8_func{};
+ Id shared_store_u16_func{};
+
Id input_position{};
std::array<Id, 32> input_generics{};
@@ -111,6 +130,8 @@ private:
void DefineCommonTypes(const Info& info);
void DefineCommonConstants();
void DefineInterfaces(const Info& info);
+ void DefineLocalMemory(const IR::Program& program);
+ void DefineSharedMemory(const IR::Program& program);
void DefineConstantBuffers(const Info& info, u32& binding);
void DefineStorageBuffers(const Info& info, u32& binding);
void DefineTextures(const Info& info, u32& binding);
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h
index 837f0e858..4f62af959 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.h
@@ -58,6 +58,8 @@ void EmitSetCFlag(EmitContext& ctx);
void EmitSetOFlag(EmitContext& ctx);
Id EmitWorkgroupId(EmitContext& ctx);
Id EmitLocalInvocationId(EmitContext& ctx);
+Id EmitLoadLocal(EmitContext& ctx, Id word_offset);
+void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value);
Id EmitUndefU1(EmitContext& ctx);
Id EmitUndefU8(EmitContext& ctx);
Id EmitUndefU16(EmitContext& ctx);
@@ -94,6 +96,18 @@ void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Va
Id value);
void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
Id value);
+Id EmitLoadSharedU8(EmitContext& ctx, Id offset);
+Id EmitLoadSharedS8(EmitContext& ctx, Id offset);
+Id EmitLoadSharedU16(EmitContext& ctx, Id offset);
+Id EmitLoadSharedS16(EmitContext& ctx, Id offset);
+Id EmitLoadSharedU32(EmitContext& ctx, Id offset);
+Id EmitLoadSharedU64(EmitContext& ctx, Id offset);
+Id EmitLoadSharedU128(EmitContext& ctx, Id offset);
+void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value);
+void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value);
+void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value);
+void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value);
+void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value);
Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2);
Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3);
Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4);
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
index 4cbc2aec1..52dcef8a4 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
@@ -238,4 +238,14 @@ Id EmitLocalInvocationId(EmitContext& ctx) {
return ctx.OpLoad(ctx.U32[3], ctx.local_invocation_id);
}
+Id EmitLoadLocal(EmitContext& ctx, Id word_offset) {
+ const Id pointer{ctx.OpAccessChain(ctx.private_u32, ctx.local_memory, word_offset)};
+ return ctx.OpLoad(ctx.U32[1], pointer);
+}
+
+void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value) {
+ const Id pointer{ctx.OpAccessChain(ctx.private_u32, ctx.local_memory, word_offset)};
+ ctx.OpStore(pointer, value);
+}
+
} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp
new file mode 100644
index 000000000..fa2fc9ab4
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp
@@ -0,0 +1,175 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+
+namespace Shader::Backend::SPIRV {
+namespace {
+Id Pointer(EmitContext& ctx, Id pointer_type, Id array, Id offset, u32 shift) {
+ const Id shift_id{ctx.Constant(ctx.U32[1], shift)};
+ const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
+ return ctx.OpAccessChain(pointer_type, array, ctx.u32_zero_value, index);
+}
+
+Id Word(EmitContext& ctx, Id offset) {
+ const Id shift_id{ctx.Constant(ctx.U32[1], 2U)};
+ const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
+ const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
+ return ctx.OpLoad(ctx.U32[1], pointer);
+}
+
+std::pair<Id, Id> ExtractArgs(EmitContext& ctx, Id offset, u32 mask, u32 count) {
+ const Id shift{ctx.OpShiftLeftLogical(ctx.U32[1], offset, ctx.Constant(ctx.U32[1], 3U))};
+ const Id bit{ctx.OpBitwiseAnd(ctx.U32[1], shift, ctx.Constant(ctx.U32[1], mask))};
+ const Id count_id{ctx.Constant(ctx.U32[1], count)};
+ return {bit, count_id};
+}
+} // Anonymous namespace
+
+Id EmitLoadSharedU8(EmitContext& ctx, Id offset) {
+ if (ctx.profile.support_explicit_workgroup_layout) {
+ const Id pointer{
+ ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
+ return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer));
+ } else {
+ const auto [bit, count]{ExtractArgs(ctx, offset, 24, 8)};
+ return ctx.OpBitFieldUExtract(ctx.U32[1], Word(ctx, offset), bit, count);
+ }
+}
+
+Id EmitLoadSharedS8(EmitContext& ctx, Id offset) {
+ if (ctx.profile.support_explicit_workgroup_layout) {
+ const Id pointer{
+ ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
+ return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer));
+ } else {
+ const auto [bit, count]{ExtractArgs(ctx, offset, 24, 8)};
+ return ctx.OpBitFieldSExtract(ctx.U32[1], Word(ctx, offset), bit, count);
+ }
+}
+
+Id EmitLoadSharedU16(EmitContext& ctx, Id offset) {
+ if (ctx.profile.support_explicit_workgroup_layout) {
+ const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
+ return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer));
+ } else {
+ const auto [bit, count]{ExtractArgs(ctx, offset, 16, 16)};
+ return ctx.OpBitFieldUExtract(ctx.U32[1], Word(ctx, offset), bit, count);
+ }
+}
+
+Id EmitLoadSharedS16(EmitContext& ctx, Id offset) {
+ if (ctx.profile.support_explicit_workgroup_layout) {
+ const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
+ return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer));
+ } else {
+ const auto [bit, count]{ExtractArgs(ctx, offset, 16, 16)};
+ return ctx.OpBitFieldSExtract(ctx.U32[1], Word(ctx, offset), bit, count);
+ }
+}
+
+Id EmitLoadSharedU32(EmitContext& ctx, Id offset) {
+ if (ctx.profile.support_explicit_workgroup_layout) {
+ const Id pointer{Pointer(ctx, ctx.shared_u32, ctx.shared_memory_u32, offset, 2)};
+ return ctx.OpLoad(ctx.U32[1], pointer);
+ } else {
+ return Word(ctx, offset);
+ }
+}
+
+Id EmitLoadSharedU64(EmitContext& ctx, Id offset) {
+ if (ctx.profile.support_explicit_workgroup_layout) {
+ const Id pointer{Pointer(ctx, ctx.shared_u32x2, ctx.shared_memory_u32x2, offset, 3)};
+ return ctx.OpLoad(ctx.U32[2], pointer);
+ } else {
+ const Id shift_id{ctx.Constant(ctx.U32[1], 2U)};
+ const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
+ const Id next_index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], 1U))};
+ const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, base_index)};
+ const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_index)};
+ return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer),
+ ctx.OpLoad(ctx.U32[1], rhs_pointer));
+ }
+}
+
+Id EmitLoadSharedU128(EmitContext& ctx, Id offset) {
+ if (ctx.profile.support_explicit_workgroup_layout) {
+ const Id pointer{Pointer(ctx, ctx.shared_u32x4, ctx.shared_memory_u32x4, offset, 4)};
+ return ctx.OpLoad(ctx.U32[4], pointer);
+ }
+ const Id shift_id{ctx.Constant(ctx.U32[1], 2U)};
+ const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
+ std::array<Id, 4> values{};
+ for (u32 i = 0; i < 4; ++i) {
+ const Id index{i == 0 ? base_index
+ : ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], i))};
+ const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
+ values[i] = ctx.OpLoad(ctx.U32[1], pointer);
+ }
+ return ctx.OpCompositeConstruct(ctx.U32[4], values);
+}
+
+void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value) {
+ if (ctx.profile.support_explicit_workgroup_layout) {
+ const Id pointer{
+ ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
+ ctx.OpStore(pointer, ctx.OpUConvert(ctx.U8, value));
+ } else {
+ ctx.OpFunctionCall(ctx.void_id, ctx.shared_store_u8_func, offset, value);
+ }
+}
+
+void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value) {
+ if (ctx.profile.support_explicit_workgroup_layout) {
+ const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
+ ctx.OpStore(pointer, ctx.OpUConvert(ctx.U16, value));
+ } else {
+ ctx.OpFunctionCall(ctx.void_id, ctx.shared_store_u16_func, offset, value);
+ }
+}
+
+void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value) {
+ Id pointer{};
+ if (ctx.profile.support_explicit_workgroup_layout) {
+ pointer = Pointer(ctx, ctx.shared_u32, ctx.shared_memory_u32, offset, 2);
+ } else {
+ const Id shift{ctx.Constant(ctx.U32[1], 2U)};
+ const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
+ pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset);
+ }
+ ctx.OpStore(pointer, value);
+}
+
+void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value) {
+ if (ctx.profile.support_explicit_workgroup_layout) {
+ const Id pointer{Pointer(ctx, ctx.shared_u32x2, ctx.shared_memory_u32x2, offset, 3)};
+ ctx.OpStore(pointer, value);
+ return;
+ }
+ const Id shift{ctx.Constant(ctx.U32[1], 2U)};
+ const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
+ const Id next_offset{ctx.OpIAdd(ctx.U32[1], word_offset, ctx.Constant(ctx.U32[1], 1U))};
+ const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset)};
+ const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_offset)};
+ ctx.OpStore(lhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 0U));
+ ctx.OpStore(rhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U));
+}
+
+void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value) {
+ if (ctx.profile.support_explicit_workgroup_layout) {
+ const Id pointer{Pointer(ctx, ctx.shared_u32x4, ctx.shared_memory_u32x4, offset, 4)};
+ ctx.OpStore(pointer, value);
+ return;
+ }
+ const Id shift{ctx.Constant(ctx.U32[1], 2U)};
+ const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
+ for (u32 i = 0; i < 4; ++i) {
+ const Id index{i == 0 ? base_index
+ : ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], i))};
+ const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
+ ctx.OpStore(pointer, ctx.OpCompositeExtract(ctx.U32[1], value, i));
+ }
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h
index 0c62c1c54..9415d02f6 100644
--- a/src/shader_recompiler/environment.h
+++ b/src/shader_recompiler/environment.h
@@ -19,6 +19,10 @@ public:
[[nodiscard]] virtual u32 TextureBoundBuffer() const = 0;
+ [[nodiscard]] virtual u32 LocalMemorySize() const = 0;
+
+ [[nodiscard]] virtual u32 SharedMemorySize() const = 0;
+
[[nodiscard]] virtual std::array<u32, 3> WorkgroupSize() const = 0;
[[nodiscard]] const ProgramHeader& SPH() const noexcept {
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
index 6d41442ee..d6a1d8ec2 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@@ -355,6 +355,52 @@ void IREmitter::WriteGlobal128(const U64& address, const IR::Value& vector) {
Inst(Opcode::WriteGlobal128, address, vector);
}
+U32 IREmitter::LoadLocal(const IR::U32& word_offset) {
+ return Inst<U32>(Opcode::LoadLocal, word_offset);
+}
+
+void IREmitter::WriteLocal(const IR::U32& word_offset, const IR::U32& value) {
+ Inst(Opcode::WriteLocal, word_offset, value);
+}
+
+Value IREmitter::LoadShared(int bit_size, bool is_signed, const IR::U32& offset) {
+ switch (bit_size) {
+ case 8:
+ return Inst(is_signed ? Opcode::LoadSharedS8 : Opcode::LoadSharedU8, offset);
+ case 16:
+ return Inst(is_signed ? Opcode::LoadSharedS16 : Opcode::LoadSharedU16, offset);
+ case 32:
+ return Inst(Opcode::LoadSharedU32, offset);
+ case 64:
+ return Inst(Opcode::LoadSharedU64, offset);
+ case 128:
+ return Inst(Opcode::LoadSharedU128, offset);
+ }
+ throw InvalidArgument("Invalid bit size {}", bit_size);
+}
+
+void IREmitter::WriteShared(int bit_size, const IR::U32& offset, const IR::Value& value) {
+ switch (bit_size) {
+ case 8:
+ Inst(Opcode::WriteSharedU8, offset, value);
+ break;
+ case 16:
+ Inst(Opcode::WriteSharedU16, offset, value);
+ break;
+ case 32:
+ Inst(Opcode::WriteSharedU32, offset, value);
+ break;
+ case 64:
+ Inst(Opcode::WriteSharedU64, offset, value);
+ break;
+ case 128:
+ Inst(Opcode::WriteSharedU128, offset, value);
+ break;
+ default:
+ throw InvalidArgument("Invalid bit size {}", bit_size);
+ }
+}
+
U1 IREmitter::GetZeroFromOp(const Value& op) {
return Inst<U1>(Opcode::GetZeroFromOp, op);
}
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h
index 8d50aa607..842c2bdaf 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.h
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.h
@@ -99,6 +99,12 @@ public:
void WriteGlobal64(const U64& address, const IR::Value& vector);
void WriteGlobal128(const U64& address, const IR::Value& vector);
+ [[nodiscard]] U32 LoadLocal(const U32& word_offset);
+ void WriteLocal(const U32& word_offset, const U32& value);
+
+ [[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset);
+ void WriteShared(int bit_size, const U32& offset, const Value& value);
+
[[nodiscard]] U1 GetZeroFromOp(const Value& op);
[[nodiscard]] U1 GetSignFromOp(const Value& op);
[[nodiscard]] U1 GetCarryFromOp(const Value& op);
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp
index be8eb4d4c..52a5e5034 100644
--- a/src/shader_recompiler/frontend/ir/microinstruction.cpp
+++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp
@@ -76,6 +76,12 @@ bool Inst::MayHaveSideEffects() const noexcept {
case Opcode::WriteStorage32:
case Opcode::WriteStorage64:
case Opcode::WriteStorage128:
+ case Opcode::WriteLocal:
+ case Opcode::WriteSharedU8:
+ case Opcode::WriteSharedU16:
+ case Opcode::WriteSharedU32:
+ case Opcode::WriteSharedU64:
+ case Opcode::WriteSharedU128:
return true;
default:
return false;
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc
index 5d7462d76..c75658328 100644
--- a/src/shader_recompiler/frontend/ir/opcodes.inc
+++ b/src/shader_recompiler/frontend/ir/opcodes.inc
@@ -89,6 +89,24 @@ OPCODE(WriteStorage32, Void, U32,
OPCODE(WriteStorage64, Void, U32, U32, U32x2, )
OPCODE(WriteStorage128, Void, U32, U32, U32x4, )
+// Local memory operations
+OPCODE(LoadLocal, U32, U32, )
+OPCODE(WriteLocal, Void, U32, U32, )
+
+// Shared memory operations
+OPCODE(LoadSharedU8, U32, U32, )
+OPCODE(LoadSharedS8, U32, U32, )
+OPCODE(LoadSharedU16, U32, U32, )
+OPCODE(LoadSharedS16, U32, U32, )
+OPCODE(LoadSharedU32, U32, U32, )
+OPCODE(LoadSharedU64, U32x2, U32, )
+OPCODE(LoadSharedU128, U32x4, U32, )
+OPCODE(WriteSharedU8, Void, U32, U32, )
+OPCODE(WriteSharedU16, Void, U32, U32, )
+OPCODE(WriteSharedU32, Void, U32, U32, )
+OPCODE(WriteSharedU64, Void, U32, U32x2, )
+OPCODE(WriteSharedU128, Void, U32, U32x4, )
+
// Vector utility
OPCODE(CompositeConstructU32x2, U32x2, U32, U32, )
OPCODE(CompositeConstructU32x3, U32x3, U32, U32, U32, )
diff --git a/src/shader_recompiler/frontend/ir/program.h b/src/shader_recompiler/frontend/ir/program.h
index 0162e919c..3a37b3ab9 100644
--- a/src/shader_recompiler/frontend/ir/program.h
+++ b/src/shader_recompiler/frontend/ir/program.h
@@ -21,6 +21,8 @@ struct Program {
Info info;
Stage stage{};
std::array<u32, 3> workgroup_size{};
+ u32 local_memory_size{};
+ u32 shared_memory_size{};
};
[[nodiscard]] std::string DumpProgram(const Program& program);
diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp
index a914a91f4..7b08f11b0 100644
--- a/src/shader_recompiler/frontend/maxwell/program.cpp
+++ b/src/shader_recompiler/frontend/maxwell/program.cpp
@@ -67,8 +67,10 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
program.blocks = VisitAST(inst_pool, block_pool, env, cfg);
program.post_order_blocks = PostOrder(program.blocks);
program.stage = env.ShaderStage();
+ program.local_memory_size = env.LocalMemorySize();
if (program.stage == Stage::Compute) {
program.workgroup_size = env.WorkgroupSize();
+ program.shared_memory_size = env.SharedMemorySize();
}
RemoveUnreachableBlocks(program);
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp
new file mode 100644
index 000000000..68963c8ea
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp
@@ -0,0 +1,197 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Size : u64 {
+ U8,
+ S8,
+ U16,
+ S16,
+ B32,
+ B64,
+ B128,
+};
+
+IR::U32 Offset(TranslatorVisitor& v, u64 insn) {
+ union {
+ u64 raw;
+ BitField<8, 8, IR::Reg> offset_reg;
+ BitField<20, 24, u64> absolute_offset;
+ BitField<20, 24, s64> relative_offset;
+ } const encoding{insn};
+
+ if (encoding.offset_reg == IR::Reg::RZ) {
+ return v.ir.Imm32(static_cast<u32>(encoding.absolute_offset));
+ } else {
+ const s32 relative{static_cast<s32>(encoding.relative_offset.Value())};
+ return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative));
+ }
+}
+
+std::pair<int, bool> GetSize(u64 insn) {
+ union {
+ u64 raw;
+ BitField<48, 3, Size> size;
+ } const encoding{insn};
+
+ const Size nnn = encoding.size;
+ switch (encoding.size) {
+ case Size::U8:
+ return {8, false};
+ case Size::S8:
+ return {8, true};
+ case Size::U16:
+ return {16, false};
+ case Size::S16:
+ return {16, true};
+ case Size::B32:
+ return {32, false};
+ case Size::B64:
+ return {64, false};
+ case Size::B128:
+ return {128, false};
+ default:
+ throw NotImplementedException("Invalid size {}", encoding.size.Value());
+ }
+}
+
+IR::Reg Reg(u64 insn) {
+ union {
+ u64 raw;
+ BitField<0, 8, IR::Reg> reg;
+ } const encoding{insn};
+
+ return encoding.reg;
+}
+
+IR::U32 ByteOffset(IR::IREmitter& ir, const IR::U32& offset) {
+ return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(24));
+}
+
+IR::U32 ShortOffset(IR::IREmitter& ir, const IR::U32& offset) {
+ return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(16));
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::LDL(u64 insn) {
+ const IR::U32 offset{Offset(*this, insn)};
+ const IR::U32 word_offset{ir.ShiftRightArithmetic(offset, ir.Imm32(2))};
+
+ const IR::Reg dest{Reg(insn)};
+ const auto [bit_size, is_signed]{GetSize(insn)};
+ switch (bit_size) {
+ case 8: {
+ const IR::U32 bit{ByteOffset(ir, offset)};
+ X(dest, ir.BitFieldExtract(ir.LoadLocal(word_offset), bit, ir.Imm32(8), is_signed));
+ break;
+ }
+ case 16: {
+ const IR::U32 bit{ShortOffset(ir, offset)};
+ X(dest, ir.BitFieldExtract(ir.LoadLocal(word_offset), bit, ir.Imm32(16), is_signed));
+ break;
+ }
+ case 32:
+ case 64:
+ case 128:
+ if (!IR::IsAligned(dest, bit_size / 32)) {
+ throw NotImplementedException("Unaligned destination register {}", dest);
+ }
+ X(dest, ir.LoadLocal(word_offset));
+ for (int i = 1; i < bit_size / 32; ++i) {
+ X(dest + i, ir.LoadLocal(ir.IAdd(word_offset, ir.Imm32(i))));
+ }
+ break;
+ }
+}
+
+void TranslatorVisitor::LDS(u64 insn) {
+ const IR::U32 offset{Offset(*this, insn)};
+ const IR::Reg dest{Reg(insn)};
+ const auto [bit_size, is_signed]{GetSize(insn)};
+ const IR::Value value{ir.LoadShared(bit_size, is_signed, offset)};
+ switch (bit_size) {
+ case 8:
+ case 16:
+ case 32:
+ X(dest, IR::U32{value});
+ break;
+ case 64:
+ case 128:
+ if (!IR::IsAligned(dest, bit_size / 32)) {
+ throw NotImplementedException("Unaligned destination register {}", dest);
+ }
+ for (int element = 0; element < bit_size / 32; ++element) {
+ X(dest + element, IR::U32{ir.CompositeExtract(value, element)});
+ }
+ break;
+ }
+}
+
+void TranslatorVisitor::STL(u64 insn) {
+ const IR::U32 offset{Offset(*this, insn)};
+ const IR::U32 word_offset{ir.ShiftRightArithmetic(offset, ir.Imm32(2))};
+
+ const IR::Reg reg{Reg(insn)};
+ const IR::U32 src{X(reg)};
+ const int bit_size{GetSize(insn).first};
+ switch (bit_size) {
+ case 8: {
+ const IR::U32 bit{ByteOffset(ir, offset)};
+ const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(8))};
+ ir.WriteLocal(word_offset, value);
+ break;
+ }
+ case 16: {
+ const IR::U32 bit{ShortOffset(ir, offset)};
+ const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(16))};
+ ir.WriteLocal(word_offset, value);
+ break;
+ }
+ case 32:
+ case 64:
+ case 128:
+ if (!IR::IsAligned(reg, bit_size / 32)) {
+ throw NotImplementedException("Unaligned source register");
+ }
+ ir.WriteLocal(word_offset, src);
+ for (int i = 1; i < bit_size / 32; ++i) {
+ ir.WriteLocal(ir.IAdd(word_offset, ir.Imm32(i)), X(reg + i));
+ }
+ break;
+ }
+}
+
+void TranslatorVisitor::STS(u64 insn) {
+ const IR::U32 offset{Offset(*this, insn)};
+ const IR::Reg reg{Reg(insn)};
+ const int bit_size{GetSize(insn).first};
+ switch (bit_size) {
+ case 8:
+ case 16:
+ case 32:
+ ir.WriteShared(bit_size, offset, X(reg));
+ break;
+ case 64:
+ if (!IR::IsAligned(reg, 2)) {
+ throw NotImplementedException("Unaligned source register {}", reg);
+ }
+ ir.WriteShared(64, offset, ir.CompositeConstruct(X(reg), X(reg + 1)));
+ break;
+ case 128: {
+ if (!IR::IsAligned(reg, 2)) {
+ throw NotImplementedException("Unaligned source register {}", reg);
+ }
+ const IR::Value vector{ir.CompositeConstruct(X(reg), X(reg + 1), X(reg + 2), X(reg + 3))};
+ ir.WriteShared(128, offset, vector);
+ break;
+ }
+ }
+}
+
+} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
index 409216640..b62d8ee2a 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
@@ -193,14 +193,6 @@ void TranslatorVisitor::LD(u64) {
ThrowNotImplemented(Opcode::LD);
}
-void TranslatorVisitor::LDL(u64) {
- ThrowNotImplemented(Opcode::LDL);
-}
-
-void TranslatorVisitor::LDS(u64) {
- ThrowNotImplemented(Opcode::LDS);
-}
-
void TranslatorVisitor::LEPC(u64) {
ThrowNotImplemented(Opcode::LEPC);
}
@@ -309,18 +301,10 @@ void TranslatorVisitor::ST(u64) {
ThrowNotImplemented(Opcode::ST);
}
-void TranslatorVisitor::STL(u64) {
- ThrowNotImplemented(Opcode::STL);
-}
-
void TranslatorVisitor::STP(u64) {
ThrowNotImplemented(Opcode::STP);
}
-void TranslatorVisitor::STS(u64) {
- ThrowNotImplemented(Opcode::STS);
-}
-
void TranslatorVisitor::SUATOM_cas(u64) {
ThrowNotImplemented(Opcode::SUATOM_cas);
}
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
index 60be67228..c932c307b 100644
--- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
+++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
@@ -200,6 +200,9 @@ void VisitUsages(Info& info, IR::Inst& inst) {
case IR::Opcode::LoadStorageS8:
case IR::Opcode::WriteStorageU8:
case IR::Opcode::WriteStorageS8:
+ case IR::Opcode::LoadSharedU8:
+ case IR::Opcode::LoadSharedS8:
+ case IR::Opcode::WriteSharedU8:
case IR::Opcode::SelectU8:
case IR::Opcode::ConvertF16S8:
case IR::Opcode::ConvertF16U8:
@@ -224,6 +227,9 @@ void VisitUsages(Info& info, IR::Inst& inst) {
case IR::Opcode::LoadStorageS16:
case IR::Opcode::WriteStorageU16:
case IR::Opcode::WriteStorageS16:
+ case IR::Opcode::LoadSharedU16:
+ case IR::Opcode::LoadSharedS16:
+ case IR::Opcode::WriteSharedU16:
case IR::Opcode::SelectU16:
case IR::Opcode::BitCastU16F16:
case IR::Opcode::BitCastF16U16:
diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h
index e26047751..0276fc23b 100644
--- a/src/shader_recompiler/profile.h
+++ b/src/shader_recompiler/profile.h
@@ -18,6 +18,8 @@ enum class AttributeType : u8 {
};
struct Profile {
+ u32 supported_spirv{0x00010000};
+
bool unified_descriptor_binding{};
bool support_vertex_instance_id{};
bool support_float_controls{};
@@ -30,6 +32,7 @@ struct Profile {
bool support_fp16_signed_zero_nan_preserve{};
bool support_fp32_signed_zero_nan_preserve{};
bool support_fp64_signed_zero_nan_preserve{};
+ bool support_explicit_workgroup_layout{};
bool support_vote{};
bool warp_size_potentially_larger_than_guest{};