diff options
Diffstat (limited to 'src/shader_recompiler/ir_opt')
4 files changed, 165 insertions, 40 deletions
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index b6a20f904..0b2c60842 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -29,6 +29,46 @@ void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) { }); } +void AddRegisterIndexedLdc(Info& info) { + info.uses_cbuf_indirect = true; + + // The shader can use any possible constant buffer + info.constant_buffer_mask = (1 << Info::MAX_CBUFS) - 1; + + auto& cbufs{info.constant_buffer_descriptors}; + cbufs.clear(); + for (u32 i = 0; i < Info::MAX_CBUFS; i++) { + cbufs.push_back(ConstantBufferDescriptor{.index = i, .count = 1}); + + // The shader can use any possible access size + info.constant_buffer_used_sizes[i] = 0x10'000; + } +} + +u32 GetElementSize(IR::Type& used_type, Shader::IR::Opcode opcode) { + switch (opcode) { + case IR::Opcode::GetCbufU8: + case IR::Opcode::GetCbufS8: + used_type |= IR::Type::U8; + return 1; + case IR::Opcode::GetCbufU16: + case IR::Opcode::GetCbufS16: + used_type |= IR::Type::U16; + return 2; + case IR::Opcode::GetCbufU32: + used_type |= IR::Type::U32; + return 4; + case IR::Opcode::GetCbufF32: + used_type |= IR::Type::F32; + return 4; + case IR::Opcode::GetCbufU32x2: + used_type |= IR::Type::U32x2; + return 8; + default: + throw InvalidArgument("Invalid opcode {}", opcode); + } +} + void GetPatch(Info& info, IR::Patch patch) { if (!IR::IsGeneric(patch)) { throw NotImplementedException("Reading non-generic patch {}", patch); @@ -360,6 +400,15 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::GlobalAtomicOr64: case IR::Opcode::GlobalAtomicXor64: case IR::Opcode::GlobalAtomicExchange64: + case IR::Opcode::GlobalAtomicIAdd32x2: + case IR::Opcode::GlobalAtomicSMin32x2: + case IR::Opcode::GlobalAtomicUMin32x2: + case IR::Opcode::GlobalAtomicSMax32x2: + case IR::Opcode::GlobalAtomicUMax32x2: + case IR::Opcode::GlobalAtomicAnd32x2: + case IR::Opcode::GlobalAtomicOr32x2: + case IR::Opcode::GlobalAtomicXor32x2: + case IR::Opcode::GlobalAtomicExchange32x2: case IR::Opcode::GlobalAtomicAddF32: case IR::Opcode::GlobalAtomicAddF16x2: case IR::Opcode::GlobalAtomicAddF32x2: @@ -454,42 +503,18 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::GetCbufU32x2: { const IR::Value index{inst.Arg(0)}; const IR::Value offset{inst.Arg(1)}; - if (!index.IsImmediate()) { - throw NotImplementedException("Constant buffer with non-immediate index"); - } - AddConstantBufferDescriptor(info, index.U32(), 1); - u32 element_size{}; - switch (inst.GetOpcode()) { - case IR::Opcode::GetCbufU8: - case IR::Opcode::GetCbufS8: - info.used_constant_buffer_types |= IR::Type::U8; - element_size = 1; - break; - case IR::Opcode::GetCbufU16: - case IR::Opcode::GetCbufS16: - info.used_constant_buffer_types |= IR::Type::U16; - element_size = 2; - break; - case IR::Opcode::GetCbufU32: - info.used_constant_buffer_types |= IR::Type::U32; - element_size = 4; - break; - case IR::Opcode::GetCbufF32: - info.used_constant_buffer_types |= IR::Type::F32; - element_size = 4; - break; - case IR::Opcode::GetCbufU32x2: - info.used_constant_buffer_types |= IR::Type::U32x2; - element_size = 8; - break; - default: - break; - } - u32& size{info.constant_buffer_used_sizes[index.U32()]}; - if (offset.IsImmediate()) { - size = Common::AlignUp(std::max(size, offset.U32() + element_size), 16u); + if (index.IsImmediate()) { + AddConstantBufferDescriptor(info, index.U32(), 1); + u32 element_size = GetElementSize(info.used_constant_buffer_types, inst.GetOpcode()); + u32& size{info.constant_buffer_used_sizes[index.U32()]}; + if (offset.IsImmediate()) { + size = Common::AlignUp(std::max(size, offset.U32() + element_size), 16u); + } else { + size = 0x10'000; + } } else { - size = 0x10'000; + AddRegisterIndexedLdc(info); + GetElementSize(info.used_indirect_cbuf_types, inst.GetOpcode()); } break; } @@ -597,6 +622,15 @@ void VisitUsages(Info& info, IR::Inst& inst) { break; case IR::Opcode::LoadStorage64: case IR::Opcode::WriteStorage64: + case IR::Opcode::StorageAtomicIAdd32x2: + case IR::Opcode::StorageAtomicSMin32x2: + case IR::Opcode::StorageAtomicUMin32x2: + case IR::Opcode::StorageAtomicSMax32x2: + case IR::Opcode::StorageAtomicUMax32x2: + case IR::Opcode::StorageAtomicAnd32x2: + case IR::Opcode::StorageAtomicOr32x2: + case IR::Opcode::StorageAtomicXor32x2: + case IR::Opcode::StorageAtomicExchange32x2: info.used_storage_buffer_types |= IR::Type::U32x2; break; case IR::Opcode::LoadStorage128: diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 4197b0095..ddf497e32 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -92,6 +92,15 @@ bool IsGlobalMemory(const IR::Inst& inst) { case IR::Opcode::GlobalAtomicOr64: case IR::Opcode::GlobalAtomicXor64: case IR::Opcode::GlobalAtomicExchange64: + case IR::Opcode::GlobalAtomicIAdd32x2: + case IR::Opcode::GlobalAtomicSMin32x2: + case IR::Opcode::GlobalAtomicUMin32x2: + case IR::Opcode::GlobalAtomicSMax32x2: + case IR::Opcode::GlobalAtomicUMax32x2: + case IR::Opcode::GlobalAtomicAnd32x2: + case IR::Opcode::GlobalAtomicOr32x2: + case IR::Opcode::GlobalAtomicXor32x2: + case IR::Opcode::GlobalAtomicExchange32x2: case IR::Opcode::GlobalAtomicAddF32: case IR::Opcode::GlobalAtomicAddF16x2: case IR::Opcode::GlobalAtomicAddF32x2: @@ -135,6 +144,15 @@ bool IsGlobalMemoryWrite(const IR::Inst& inst) { case IR::Opcode::GlobalAtomicOr64: case IR::Opcode::GlobalAtomicXor64: case IR::Opcode::GlobalAtomicExchange64: + case IR::Opcode::GlobalAtomicIAdd32x2: + case IR::Opcode::GlobalAtomicSMin32x2: + case IR::Opcode::GlobalAtomicUMin32x2: + case IR::Opcode::GlobalAtomicSMax32x2: + case IR::Opcode::GlobalAtomicUMax32x2: + case IR::Opcode::GlobalAtomicAnd32x2: + case IR::Opcode::GlobalAtomicOr32x2: + case IR::Opcode::GlobalAtomicXor32x2: + case IR::Opcode::GlobalAtomicExchange32x2: case IR::Opcode::GlobalAtomicAddF32: case IR::Opcode::GlobalAtomicAddF16x2: case IR::Opcode::GlobalAtomicAddF32x2: @@ -199,6 +217,8 @@ IR::Opcode GlobalToStorage(IR::Opcode opcode) { return IR::Opcode::StorageAtomicOr32; case IR::Opcode::GlobalAtomicXor32: return IR::Opcode::StorageAtomicXor32; + case IR::Opcode::GlobalAtomicExchange32: + return IR::Opcode::StorageAtomicExchange32; case IR::Opcode::GlobalAtomicIAdd64: return IR::Opcode::StorageAtomicIAdd64; case IR::Opcode::GlobalAtomicSMin64: @@ -215,10 +235,26 @@ IR::Opcode GlobalToStorage(IR::Opcode opcode) { return IR::Opcode::StorageAtomicOr64; case IR::Opcode::GlobalAtomicXor64: return IR::Opcode::StorageAtomicXor64; - case IR::Opcode::GlobalAtomicExchange32: - return IR::Opcode::StorageAtomicExchange32; case IR::Opcode::GlobalAtomicExchange64: return IR::Opcode::StorageAtomicExchange64; + case IR::Opcode::GlobalAtomicIAdd32x2: + return IR::Opcode::StorageAtomicIAdd32x2; + case IR::Opcode::GlobalAtomicSMin32x2: + return IR::Opcode::StorageAtomicSMin32x2; + case IR::Opcode::GlobalAtomicUMin32x2: + return IR::Opcode::StorageAtomicUMin32x2; + case IR::Opcode::GlobalAtomicSMax32x2: + return IR::Opcode::StorageAtomicSMax32x2; + case IR::Opcode::GlobalAtomicUMax32x2: + return IR::Opcode::StorageAtomicUMax32x2; + case IR::Opcode::GlobalAtomicAnd32x2: + return IR::Opcode::StorageAtomicAnd32x2; + case IR::Opcode::GlobalAtomicOr32x2: + return IR::Opcode::StorageAtomicOr32x2; + case IR::Opcode::GlobalAtomicXor32x2: + return IR::Opcode::StorageAtomicXor32x2; + case IR::Opcode::GlobalAtomicExchange32x2: + return IR::Opcode::StorageAtomicExchange32x2; case IR::Opcode::GlobalAtomicAddF32: return IR::Opcode::StorageAtomicAddF32; case IR::Opcode::GlobalAtomicAddF16x2: @@ -298,7 +334,8 @@ std::optional<LowAddrInfo> TrackLowAddress(IR::Inst* inst) { /// Tries to track the storage buffer address used by a global memory instruction std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias) { const auto pred{[bias](const IR::Inst* inst) -> std::optional<StorageBufferAddr> { - if (inst->GetOpcode() != IR::Opcode::GetCbufU32) { + if (inst->GetOpcode() != IR::Opcode::GetCbufU32 && + inst->GetOpcode() != IR::Opcode::GetCbufU32x2) { return std::nullopt; } const IR::Value index{inst->Arg(0)}; @@ -454,6 +491,15 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, case IR::Opcode::GlobalAtomicOr64: case IR::Opcode::GlobalAtomicXor64: case IR::Opcode::GlobalAtomicExchange64: + case IR::Opcode::GlobalAtomicIAdd32x2: + case IR::Opcode::GlobalAtomicSMin32x2: + case IR::Opcode::GlobalAtomicUMin32x2: + case IR::Opcode::GlobalAtomicSMax32x2: + case IR::Opcode::GlobalAtomicUMax32x2: + case IR::Opcode::GlobalAtomicAnd32x2: + case IR::Opcode::GlobalAtomicOr32x2: + case IR::Opcode::GlobalAtomicXor32x2: + case IR::Opcode::GlobalAtomicExchange32x2: case IR::Opcode::GlobalAtomicAddF32: case IR::Opcode::GlobalAtomicAddF16x2: case IR::Opcode::GlobalAtomicAddF32x2: diff --git a/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp b/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp index e80d3d1d9..c2654cd9b 100644 --- a/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp +++ b/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp @@ -199,6 +199,26 @@ void Lower(IR::Block& block, IR::Inst& inst) { return ShiftRightLogical64To32(block, inst); case IR::Opcode::ShiftRightArithmetic64: return ShiftRightArithmetic64To32(block, inst); + case IR::Opcode::SharedAtomicExchange64: + return inst.ReplaceOpcode(IR::Opcode::SharedAtomicExchange32x2); + case IR::Opcode::GlobalAtomicIAdd64: + return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicIAdd32x2); + case IR::Opcode::GlobalAtomicSMin64: + return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicSMin32x2); + case IR::Opcode::GlobalAtomicUMin64: + return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicUMin32x2); + case IR::Opcode::GlobalAtomicSMax64: + return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicSMax32x2); + case IR::Opcode::GlobalAtomicUMax64: + return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicUMax32x2); + case IR::Opcode::GlobalAtomicAnd64: + return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicAnd32x2); + case IR::Opcode::GlobalAtomicOr64: + return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicOr32x2); + case IR::Opcode::GlobalAtomicXor64: + return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicXor32x2); + case IR::Opcode::GlobalAtomicExchange64: + return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicExchange32x2); default: break; } diff --git a/src/shader_recompiler/ir_opt/rescaling_pass.cpp b/src/shader_recompiler/ir_opt/rescaling_pass.cpp index c28500dd1..496d4667e 100644 --- a/src/shader_recompiler/ir_opt/rescaling_pass.cpp +++ b/src/shader_recompiler/ir_opt/rescaling_pass.cpp @@ -183,6 +183,31 @@ void ScaleIntegerComposite(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_s } } +void ScaleIntegerOffsetComposite(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scaled, + size_t index) { + const IR::Value composite{inst.Arg(index)}; + if (composite.IsEmpty()) { + return; + } + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const IR::U32 x{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(composite, 0)})}; + const IR::U32 y{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(composite, 1)})}; + switch (info.type) { + case TextureType::ColorArray2D: + case TextureType::Color2D: + inst.SetArg(index, ir.CompositeConstruct(x, y)); + break; + case TextureType::Color1D: + case TextureType::ColorArray1D: + case TextureType::Color3D: + case TextureType::ColorCube: + case TextureType::ColorArrayCube: + case TextureType::Buffer: + // Nothing to patch here + break; + } +} + void SubScaleCoord(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scaled) { const auto info{inst.Flags<IR::TextureInstInfo>()}; const IR::Value coord{inst.Arg(1)}; @@ -220,7 +245,7 @@ void SubScaleImageFetch(IR::Block& block, IR::Inst& inst) { const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))}; SubScaleCoord(ir, inst, is_scaled); // Scale ImageFetch offset - ScaleIntegerComposite(ir, inst, is_scaled, 2); + ScaleIntegerOffsetComposite(ir, inst, is_scaled, 2); } void SubScaleImageRead(IR::Block& block, IR::Inst& inst) { @@ -242,7 +267,7 @@ void PatchImageFetch(IR::Block& block, IR::Inst& inst) { const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))}; ScaleIntegerComposite(ir, inst, is_scaled, 1); // Scale ImageFetch offset - ScaleIntegerComposite(ir, inst, is_scaled, 2); + ScaleIntegerOffsetComposite(ir, inst, is_scaled, 2); } void PatchImageRead(IR::Block& block, IR::Inst& inst) { |