summaryrefslogtreecommitdiffstats
path: root/src/shader_recompiler/ir_opt
diff options
context:
space:
mode:
Diffstat (limited to 'src/shader_recompiler/ir_opt')
-rw-r--r--src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp104
-rw-r--r--src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp52
-rw-r--r--src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp20
-rw-r--r--src/shader_recompiler/ir_opt/rescaling_pass.cpp29
4 files changed, 165 insertions, 40 deletions
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
index b6a20f904..0b2c60842 100644
--- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
+++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
@@ -29,6 +29,46 @@ void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) {
});
}
+void AddRegisterIndexedLdc(Info& info) {
+ info.uses_cbuf_indirect = true;
+
+ // The shader can use any possible constant buffer
+ info.constant_buffer_mask = (1 << Info::MAX_CBUFS) - 1;
+
+ auto& cbufs{info.constant_buffer_descriptors};
+ cbufs.clear();
+ for (u32 i = 0; i < Info::MAX_CBUFS; i++) {
+ cbufs.push_back(ConstantBufferDescriptor{.index = i, .count = 1});
+
+ // The shader can use any possible access size
+ info.constant_buffer_used_sizes[i] = 0x10'000;
+ }
+}
+
+u32 GetElementSize(IR::Type& used_type, Shader::IR::Opcode opcode) {
+ switch (opcode) {
+ case IR::Opcode::GetCbufU8:
+ case IR::Opcode::GetCbufS8:
+ used_type |= IR::Type::U8;
+ return 1;
+ case IR::Opcode::GetCbufU16:
+ case IR::Opcode::GetCbufS16:
+ used_type |= IR::Type::U16;
+ return 2;
+ case IR::Opcode::GetCbufU32:
+ used_type |= IR::Type::U32;
+ return 4;
+ case IR::Opcode::GetCbufF32:
+ used_type |= IR::Type::F32;
+ return 4;
+ case IR::Opcode::GetCbufU32x2:
+ used_type |= IR::Type::U32x2;
+ return 8;
+ default:
+ throw InvalidArgument("Invalid opcode {}", opcode);
+ }
+}
+
void GetPatch(Info& info, IR::Patch patch) {
if (!IR::IsGeneric(patch)) {
throw NotImplementedException("Reading non-generic patch {}", patch);
@@ -360,6 +400,15 @@ void VisitUsages(Info& info, IR::Inst& inst) {
case IR::Opcode::GlobalAtomicOr64:
case IR::Opcode::GlobalAtomicXor64:
case IR::Opcode::GlobalAtomicExchange64:
+ case IR::Opcode::GlobalAtomicIAdd32x2:
+ case IR::Opcode::GlobalAtomicSMin32x2:
+ case IR::Opcode::GlobalAtomicUMin32x2:
+ case IR::Opcode::GlobalAtomicSMax32x2:
+ case IR::Opcode::GlobalAtomicUMax32x2:
+ case IR::Opcode::GlobalAtomicAnd32x2:
+ case IR::Opcode::GlobalAtomicOr32x2:
+ case IR::Opcode::GlobalAtomicXor32x2:
+ case IR::Opcode::GlobalAtomicExchange32x2:
case IR::Opcode::GlobalAtomicAddF32:
case IR::Opcode::GlobalAtomicAddF16x2:
case IR::Opcode::GlobalAtomicAddF32x2:
@@ -454,42 +503,18 @@ void VisitUsages(Info& info, IR::Inst& inst) {
case IR::Opcode::GetCbufU32x2: {
const IR::Value index{inst.Arg(0)};
const IR::Value offset{inst.Arg(1)};
- if (!index.IsImmediate()) {
- throw NotImplementedException("Constant buffer with non-immediate index");
- }
- AddConstantBufferDescriptor(info, index.U32(), 1);
- u32 element_size{};
- switch (inst.GetOpcode()) {
- case IR::Opcode::GetCbufU8:
- case IR::Opcode::GetCbufS8:
- info.used_constant_buffer_types |= IR::Type::U8;
- element_size = 1;
- break;
- case IR::Opcode::GetCbufU16:
- case IR::Opcode::GetCbufS16:
- info.used_constant_buffer_types |= IR::Type::U16;
- element_size = 2;
- break;
- case IR::Opcode::GetCbufU32:
- info.used_constant_buffer_types |= IR::Type::U32;
- element_size = 4;
- break;
- case IR::Opcode::GetCbufF32:
- info.used_constant_buffer_types |= IR::Type::F32;
- element_size = 4;
- break;
- case IR::Opcode::GetCbufU32x2:
- info.used_constant_buffer_types |= IR::Type::U32x2;
- element_size = 8;
- break;
- default:
- break;
- }
- u32& size{info.constant_buffer_used_sizes[index.U32()]};
- if (offset.IsImmediate()) {
- size = Common::AlignUp(std::max(size, offset.U32() + element_size), 16u);
+ if (index.IsImmediate()) {
+ AddConstantBufferDescriptor(info, index.U32(), 1);
+ u32 element_size = GetElementSize(info.used_constant_buffer_types, inst.GetOpcode());
+ u32& size{info.constant_buffer_used_sizes[index.U32()]};
+ if (offset.IsImmediate()) {
+ size = Common::AlignUp(std::max(size, offset.U32() + element_size), 16u);
+ } else {
+ size = 0x10'000;
+ }
} else {
- size = 0x10'000;
+ AddRegisterIndexedLdc(info);
+ GetElementSize(info.used_indirect_cbuf_types, inst.GetOpcode());
}
break;
}
@@ -597,6 +622,15 @@ void VisitUsages(Info& info, IR::Inst& inst) {
break;
case IR::Opcode::LoadStorage64:
case IR::Opcode::WriteStorage64:
+ case IR::Opcode::StorageAtomicIAdd32x2:
+ case IR::Opcode::StorageAtomicSMin32x2:
+ case IR::Opcode::StorageAtomicUMin32x2:
+ case IR::Opcode::StorageAtomicSMax32x2:
+ case IR::Opcode::StorageAtomicUMax32x2:
+ case IR::Opcode::StorageAtomicAnd32x2:
+ case IR::Opcode::StorageAtomicOr32x2:
+ case IR::Opcode::StorageAtomicXor32x2:
+ case IR::Opcode::StorageAtomicExchange32x2:
info.used_storage_buffer_types |= IR::Type::U32x2;
break;
case IR::Opcode::LoadStorage128:
diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
index 4197b0095..ddf497e32 100644
--- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
+++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
@@ -92,6 +92,15 @@ bool IsGlobalMemory(const IR::Inst& inst) {
case IR::Opcode::GlobalAtomicOr64:
case IR::Opcode::GlobalAtomicXor64:
case IR::Opcode::GlobalAtomicExchange64:
+ case IR::Opcode::GlobalAtomicIAdd32x2:
+ case IR::Opcode::GlobalAtomicSMin32x2:
+ case IR::Opcode::GlobalAtomicUMin32x2:
+ case IR::Opcode::GlobalAtomicSMax32x2:
+ case IR::Opcode::GlobalAtomicUMax32x2:
+ case IR::Opcode::GlobalAtomicAnd32x2:
+ case IR::Opcode::GlobalAtomicOr32x2:
+ case IR::Opcode::GlobalAtomicXor32x2:
+ case IR::Opcode::GlobalAtomicExchange32x2:
case IR::Opcode::GlobalAtomicAddF32:
case IR::Opcode::GlobalAtomicAddF16x2:
case IR::Opcode::GlobalAtomicAddF32x2:
@@ -135,6 +144,15 @@ bool IsGlobalMemoryWrite(const IR::Inst& inst) {
case IR::Opcode::GlobalAtomicOr64:
case IR::Opcode::GlobalAtomicXor64:
case IR::Opcode::GlobalAtomicExchange64:
+ case IR::Opcode::GlobalAtomicIAdd32x2:
+ case IR::Opcode::GlobalAtomicSMin32x2:
+ case IR::Opcode::GlobalAtomicUMin32x2:
+ case IR::Opcode::GlobalAtomicSMax32x2:
+ case IR::Opcode::GlobalAtomicUMax32x2:
+ case IR::Opcode::GlobalAtomicAnd32x2:
+ case IR::Opcode::GlobalAtomicOr32x2:
+ case IR::Opcode::GlobalAtomicXor32x2:
+ case IR::Opcode::GlobalAtomicExchange32x2:
case IR::Opcode::GlobalAtomicAddF32:
case IR::Opcode::GlobalAtomicAddF16x2:
case IR::Opcode::GlobalAtomicAddF32x2:
@@ -199,6 +217,8 @@ IR::Opcode GlobalToStorage(IR::Opcode opcode) {
return IR::Opcode::StorageAtomicOr32;
case IR::Opcode::GlobalAtomicXor32:
return IR::Opcode::StorageAtomicXor32;
+ case IR::Opcode::GlobalAtomicExchange32:
+ return IR::Opcode::StorageAtomicExchange32;
case IR::Opcode::GlobalAtomicIAdd64:
return IR::Opcode::StorageAtomicIAdd64;
case IR::Opcode::GlobalAtomicSMin64:
@@ -215,10 +235,26 @@ IR::Opcode GlobalToStorage(IR::Opcode opcode) {
return IR::Opcode::StorageAtomicOr64;
case IR::Opcode::GlobalAtomicXor64:
return IR::Opcode::StorageAtomicXor64;
- case IR::Opcode::GlobalAtomicExchange32:
- return IR::Opcode::StorageAtomicExchange32;
case IR::Opcode::GlobalAtomicExchange64:
return IR::Opcode::StorageAtomicExchange64;
+ case IR::Opcode::GlobalAtomicIAdd32x2:
+ return IR::Opcode::StorageAtomicIAdd32x2;
+ case IR::Opcode::GlobalAtomicSMin32x2:
+ return IR::Opcode::StorageAtomicSMin32x2;
+ case IR::Opcode::GlobalAtomicUMin32x2:
+ return IR::Opcode::StorageAtomicUMin32x2;
+ case IR::Opcode::GlobalAtomicSMax32x2:
+ return IR::Opcode::StorageAtomicSMax32x2;
+ case IR::Opcode::GlobalAtomicUMax32x2:
+ return IR::Opcode::StorageAtomicUMax32x2;
+ case IR::Opcode::GlobalAtomicAnd32x2:
+ return IR::Opcode::StorageAtomicAnd32x2;
+ case IR::Opcode::GlobalAtomicOr32x2:
+ return IR::Opcode::StorageAtomicOr32x2;
+ case IR::Opcode::GlobalAtomicXor32x2:
+ return IR::Opcode::StorageAtomicXor32x2;
+ case IR::Opcode::GlobalAtomicExchange32x2:
+ return IR::Opcode::StorageAtomicExchange32x2;
case IR::Opcode::GlobalAtomicAddF32:
return IR::Opcode::StorageAtomicAddF32;
case IR::Opcode::GlobalAtomicAddF16x2:
@@ -298,7 +334,8 @@ std::optional<LowAddrInfo> TrackLowAddress(IR::Inst* inst) {
/// Tries to track the storage buffer address used by a global memory instruction
std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias) {
const auto pred{[bias](const IR::Inst* inst) -> std::optional<StorageBufferAddr> {
- if (inst->GetOpcode() != IR::Opcode::GetCbufU32) {
+ if (inst->GetOpcode() != IR::Opcode::GetCbufU32 &&
+ inst->GetOpcode() != IR::Opcode::GetCbufU32x2) {
return std::nullopt;
}
const IR::Value index{inst->Arg(0)};
@@ -454,6 +491,15 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
case IR::Opcode::GlobalAtomicOr64:
case IR::Opcode::GlobalAtomicXor64:
case IR::Opcode::GlobalAtomicExchange64:
+ case IR::Opcode::GlobalAtomicIAdd32x2:
+ case IR::Opcode::GlobalAtomicSMin32x2:
+ case IR::Opcode::GlobalAtomicUMin32x2:
+ case IR::Opcode::GlobalAtomicSMax32x2:
+ case IR::Opcode::GlobalAtomicUMax32x2:
+ case IR::Opcode::GlobalAtomicAnd32x2:
+ case IR::Opcode::GlobalAtomicOr32x2:
+ case IR::Opcode::GlobalAtomicXor32x2:
+ case IR::Opcode::GlobalAtomicExchange32x2:
case IR::Opcode::GlobalAtomicAddF32:
case IR::Opcode::GlobalAtomicAddF16x2:
case IR::Opcode::GlobalAtomicAddF32x2:
diff --git a/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp b/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp
index e80d3d1d9..c2654cd9b 100644
--- a/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp
+++ b/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp
@@ -199,6 +199,26 @@ void Lower(IR::Block& block, IR::Inst& inst) {
return ShiftRightLogical64To32(block, inst);
case IR::Opcode::ShiftRightArithmetic64:
return ShiftRightArithmetic64To32(block, inst);
+ case IR::Opcode::SharedAtomicExchange64:
+ return inst.ReplaceOpcode(IR::Opcode::SharedAtomicExchange32x2);
+ case IR::Opcode::GlobalAtomicIAdd64:
+ return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicIAdd32x2);
+ case IR::Opcode::GlobalAtomicSMin64:
+ return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicSMin32x2);
+ case IR::Opcode::GlobalAtomicUMin64:
+ return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicUMin32x2);
+ case IR::Opcode::GlobalAtomicSMax64:
+ return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicSMax32x2);
+ case IR::Opcode::GlobalAtomicUMax64:
+ return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicUMax32x2);
+ case IR::Opcode::GlobalAtomicAnd64:
+ return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicAnd32x2);
+ case IR::Opcode::GlobalAtomicOr64:
+ return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicOr32x2);
+ case IR::Opcode::GlobalAtomicXor64:
+ return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicXor32x2);
+ case IR::Opcode::GlobalAtomicExchange64:
+ return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicExchange32x2);
default:
break;
}
diff --git a/src/shader_recompiler/ir_opt/rescaling_pass.cpp b/src/shader_recompiler/ir_opt/rescaling_pass.cpp
index c28500dd1..496d4667e 100644
--- a/src/shader_recompiler/ir_opt/rescaling_pass.cpp
+++ b/src/shader_recompiler/ir_opt/rescaling_pass.cpp
@@ -183,6 +183,31 @@ void ScaleIntegerComposite(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_s
}
}
+void ScaleIntegerOffsetComposite(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scaled,
+ size_t index) {
+ const IR::Value composite{inst.Arg(index)};
+ if (composite.IsEmpty()) {
+ return;
+ }
+ const auto info{inst.Flags<IR::TextureInstInfo>()};
+ const IR::U32 x{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(composite, 0)})};
+ const IR::U32 y{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(composite, 1)})};
+ switch (info.type) {
+ case TextureType::ColorArray2D:
+ case TextureType::Color2D:
+ inst.SetArg(index, ir.CompositeConstruct(x, y));
+ break;
+ case TextureType::Color1D:
+ case TextureType::ColorArray1D:
+ case TextureType::Color3D:
+ case TextureType::ColorCube:
+ case TextureType::ColorArrayCube:
+ case TextureType::Buffer:
+ // Nothing to patch here
+ break;
+ }
+}
+
void SubScaleCoord(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scaled) {
const auto info{inst.Flags<IR::TextureInstInfo>()};
const IR::Value coord{inst.Arg(1)};
@@ -220,7 +245,7 @@ void SubScaleImageFetch(IR::Block& block, IR::Inst& inst) {
const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))};
SubScaleCoord(ir, inst, is_scaled);
// Scale ImageFetch offset
- ScaleIntegerComposite(ir, inst, is_scaled, 2);
+ ScaleIntegerOffsetComposite(ir, inst, is_scaled, 2);
}
void SubScaleImageRead(IR::Block& block, IR::Inst& inst) {
@@ -242,7 +267,7 @@ void PatchImageFetch(IR::Block& block, IR::Inst& inst) {
const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))};
ScaleIntegerComposite(ir, inst, is_scaled, 1);
// Scale ImageFetch offset
- ScaleIntegerComposite(ir, inst, is_scaled, 2);
+ ScaleIntegerOffsetComposite(ir, inst, is_scaled, 2);
}
void PatchImageRead(IR::Block& block, IR::Inst& inst) {