diff options
Diffstat (limited to 'src/shader_recompiler')
39 files changed, 491 insertions, 225 deletions
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 0cb1e193e..fd4a61a4d 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -279,6 +279,8 @@ void SetupOptions(const IR::Program& program, const Profile& profile, header += "OPTION NV_internal;" "OPTION NV_shader_storage_buffer;" "OPTION NV_gpu_program_fp64;"; + // TODO: Enable only when MS is used + header += "OPTION NV_texture_multisample;"; if (info.uses_int64_bit_atomics) { header += "OPTION NV_shader_atomic_int64;"; } diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp index 5bfdecc09..2fc2a0ac6 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp @@ -43,10 +43,6 @@ void EmitBitCastU64F64(EmitContext&, IR::Inst& inst, const IR::Value& value) { Alias(inst, value); } -void EmitBitCastS32F32(EmitContext&, IR::Inst& inst, const IR::Value& value) { - Alias(inst, value); -} - void EmitBitCastF16U16(EmitContext&, IR::Inst& inst, const IR::Value& value) { Alias(inst, value); } diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp index f0bd84ab2..c7d7d5fef 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp @@ -137,6 +137,15 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, Scal case IR::Attribute::VertexId: ctx.Add("MOV.F {}.x,{}.id;", inst, ctx.attrib_name); break; + case IR::Attribute::BaseInstance: + ctx.Add("MOV.F {}.x,{}.baseInstance;", inst, ctx.attrib_name); + break; + case IR::Attribute::BaseVertex: + ctx.Add("MOV.F {}.x,{}.baseVertex;", inst, ctx.attrib_name); + break; + case IR::Attribute::DrawID: + ctx.Add("MOV.F {}.x,{}.draw.id;", inst, ctx.attrib_name); + break; case IR::Attribute::FrontFace: ctx.Add("CMP.F {}.x,{}.facing.x,0,-1;", inst, ctx.attrib_name); break; @@ -156,6 +165,15 @@ void EmitGetAttributeU32(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, S case IR::Attribute::VertexId: ctx.Add("MOV.S {}.x,{}.id;", inst, ctx.attrib_name); break; + case IR::Attribute::BaseInstance: + ctx.Add("MOV.S {}.x,{}.baseInstance;", inst, ctx.attrib_name); + break; + case IR::Attribute::BaseVertex: + ctx.Add("MOV.S {}.x,{}.baseVertex;", inst, ctx.attrib_name); + break; + case IR::Attribute::DrawID: + ctx.Add("MOV.S {}.x,{}.draw.id;", inst, ctx.attrib_name); + break; default: throw NotImplementedException("Get U32 attribute {}", attr); } diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp index e67e80fac..b7bc11416 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp @@ -59,7 +59,7 @@ std::string Image(EmitContext& ctx, IR::TextureInstInfo info, } } -std::string_view TextureType(IR::TextureInstInfo info) { +std::string_view TextureType(IR::TextureInstInfo info, bool is_ms = false) { if (info.is_depth) { switch (info.type) { case TextureType::Color1D: @@ -88,9 +88,9 @@ std::string_view TextureType(IR::TextureInstInfo info) { return "ARRAY1D"; case TextureType::Color2D: case TextureType::Color2DRect: - return "2D"; + return is_ms ? "2DMS" : "2D"; case TextureType::ColorArray2D: - return "ARRAY2D"; + return is_ms ? "ARRAY2DMS" : "ARRAY2D"; case TextureType::Color3D: return "3D"; case TextureType::ColorCube: @@ -510,15 +510,16 @@ void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, const IR::Value& coord, const IR::Value& offset, ScalarS32 lod, ScalarS32 ms) { const auto info{inst.Flags<IR::TextureInstInfo>()}; const auto sparse_inst{PrepareSparse(inst)}; + const bool is_multisample{ms.type != Type::Void}; const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; - const std::string_view type{TextureType(info)}; + const std::string_view type{TextureType(info, is_multisample)}; const std::string texture{Texture(ctx, info, index)}; const std::string offset_vec{Offset(ctx, offset)}; const auto [coord_vec, coord_alloc]{Coord(ctx, coord)}; const Register ret{ctx.reg_alloc.Define(inst)}; if (info.type == TextureType::Buffer) { ctx.Add("TXF.F{} {},{},{},{}{};", sparse_mod, ret, coord_vec, texture, type, offset_vec); - } else if (ms.type != Type::Void) { + } else if (is_multisample) { ctx.Add("MOV.S {}.w,{};" "TXFMS.F{} {},{},{},{}{};", coord_vec, ms, sparse_mod, ret, coord_vec, texture, type, offset_vec); @@ -531,7 +532,7 @@ void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, } void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - ScalarS32 lod) { + ScalarS32 lod, [[maybe_unused]] const IR::Value& skip_mips) { const auto info{inst.Flags<IR::TextureInstInfo>()}; const std::string texture{Texture(ctx, info, index)}; const std::string_view type{TextureType(info)}; diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index eaaf9ba39..1a1ea61d5 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -197,7 +197,6 @@ void EmitSelectF64(EmitContext& ctx, ScalarS32 cond, Register true_value, Regist void EmitBitCastU16F16(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); void EmitBitCastU32F32(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); -void EmitBitCastS32F32(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); void EmitBitCastF16U16(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); @@ -582,7 +581,7 @@ void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& inde void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, const IR::Value& coord, const IR::Value& offset, ScalarS32 lod, ScalarS32 ms); void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - ScalarS32 lod); + ScalarS32 lod, const IR::Value& skip_mips); void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord); void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, const IR::Value& coord, const IR::Value& derivatives, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp index e8a4390f6..d91e04446 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp @@ -219,7 +219,7 @@ std::string EmitGLSL(const Profile& profile, const RuntimeInfo& runtime_info, IR EmitContext ctx{program, bindings, profile, runtime_info}; Precolor(program); EmitCode(ctx, program); - const std::string version{fmt::format("#version 450{}\n", GlslVersionSpecifier(ctx))}; + const std::string version{fmt::format("#version 460{}\n", GlslVersionSpecifier(ctx))}; ctx.header.insert(0, version); if (program.shared_memory_size > 0) { const auto requested_size{program.shared_memory_size}; diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp index 8e5e6cf1f..1be4a0f59 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp @@ -48,10 +48,6 @@ void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) ctx.AddU64("{}=doubleBitsToUint64({});", inst, value); } -void EmitBitCastS32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { - ctx.AddF32("{}=ftoi({});", inst, value); -} - void EmitBitCastF16U16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst) { NotImplemented(); } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 39579cf5d..2e369ed72 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -234,6 +234,15 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, case IR::Attribute::FrontFace: ctx.AddF32("{}=itof(gl_FrontFacing?-1:0);", inst); break; + case IR::Attribute::BaseInstance: + ctx.AddF32("{}=itof(gl_BaseInstance);", inst); + break; + case IR::Attribute::BaseVertex: + ctx.AddF32("{}=itof(gl_BaseVertex);", inst); + break; + case IR::Attribute::DrawID: + ctx.AddF32("{}=itof(gl_DrawID);", inst); + break; default: throw NotImplementedException("Get attribute {}", attr); } @@ -250,6 +259,15 @@ void EmitGetAttributeU32(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, s case IR::Attribute::VertexId: ctx.AddU32("{}=uint(gl_VertexID);", inst); break; + case IR::Attribute::BaseInstance: + ctx.AddU32("{}=uint(gl_BaseInstance);", inst); + break; + case IR::Attribute::BaseVertex: + ctx.AddU32("{}=uint(gl_BaseVertex);", inst); + break; + case IR::Attribute::DrawID: + ctx.AddU32("{}=uint(gl_DrawID);", inst); + break; default: throw NotImplementedException("Get U32 attribute {}", attr); } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp index cecdbb9d6..4be2c25ec 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp @@ -414,7 +414,7 @@ void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& inde void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view offset, std::string_view lod, - [[maybe_unused]] std::string_view ms) { + std::string_view ms) { const auto info{inst.Flags<IR::TextureInstInfo>()}; if (info.has_bias) { throw NotImplementedException("EmitImageFetch Bias texture samples"); @@ -431,19 +431,24 @@ void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, ctx.AddU1("{}=true;", *sparse_inst); } if (!sparse_inst || !supports_sparse) { - if (!offset.empty()) { - ctx.Add("{}=texelFetchOffset({},{},int({}),{});", texel, texture, - CoordsCastToInt(coords, info), lod, CoordsCastToInt(offset, info)); + const auto int_coords{CoordsCastToInt(coords, info)}; + if (!ms.empty()) { + ctx.Add("{}=texelFetch({},{},int({}));", texel, texture, int_coords, ms); + } else if (!offset.empty()) { + ctx.Add("{}=texelFetchOffset({},{},int({}),{});", texel, texture, int_coords, lod, + CoordsCastToInt(offset, info)); } else { if (info.type == TextureType::Buffer) { ctx.Add("{}=texelFetch({},int({}));", texel, texture, coords); } else { - ctx.Add("{}=texelFetch({},{},int({}));", texel, texture, - CoordsCastToInt(coords, info), lod); + ctx.Add("{}=texelFetch({},{},int({}));", texel, texture, int_coords, lod); } } return; } + if (!ms.empty()) { + throw NotImplementedException("EmitImageFetch Sparse MSAA samples"); + } if (!offset.empty()) { ctx.AddU1("{}=sparseTexelsResidentARB(sparseTexelFetchOffsetARB({},{},int({}),{},{}));", *sparse_inst, texture, CastToIntVec(coords, info), lod, @@ -455,27 +460,27 @@ void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, } void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view lod) { + std::string_view lod, const IR::Value& skip_mips_val) { const auto info{inst.Flags<IR::TextureInstInfo>()}; const auto texture{Texture(ctx, info, index)}; + const bool skip_mips{skip_mips_val.U1()}; + const auto mips{ + [&] { return skip_mips ? "0u" : fmt::format("uint(textureQueryLevels({}))", texture); }}; switch (info.type) { case TextureType::Color1D: - return ctx.AddU32x4( - "{}=uvec4(uint(textureSize({},int({}))),0u,0u,uint(textureQueryLevels({})));", inst, - texture, lod, texture); + return ctx.AddU32x4("{}=uvec4(uint(textureSize({},int({}))),0u,0u,{});", inst, texture, lod, + mips()); case TextureType::ColorArray1D: case TextureType::Color2D: case TextureType::ColorCube: case TextureType::Color2DRect: - return ctx.AddU32x4( - "{}=uvec4(uvec2(textureSize({},int({}))),0u,uint(textureQueryLevels({})));", inst, - texture, lod, texture); + return ctx.AddU32x4("{}=uvec4(uvec2(textureSize({},int({}))),0u,{});", inst, texture, lod, + mips()); case TextureType::ColorArray2D: case TextureType::Color3D: case TextureType::ColorArrayCube: - return ctx.AddU32x4( - "{}=uvec4(uvec3(textureSize({},int({}))),uint(textureQueryLevels({})));", inst, texture, - lod, texture); + return ctx.AddU32x4("{}=uvec4(uvec3(textureSize({},int({}))),{});", inst, texture, lod, + mips()); case TextureType::Buffer: throw NotImplementedException("EmitImageQueryDimensions Texture buffers"); } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h index 4151c89de..8d0a65047 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h @@ -231,7 +231,6 @@ void EmitSelectF64(EmitContext& ctx, IR::Inst& inst, std::string_view cond, void EmitBitCastU16F16(EmitContext& ctx, IR::Inst& inst); void EmitBitCastU32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value); -void EmitBitCastS32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); void EmitBitCastF16U16(EmitContext& ctx, IR::Inst& inst); void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, std::string_view value); void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, std::string_view value); @@ -655,7 +654,7 @@ void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view offset, std::string_view lod, std::string_view ms); void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view lod); + std::string_view lod, const IR::Value& skip_mips); void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords); void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, diff --git a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp index 5d01ec0cd..1b006e811 100644 --- a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp @@ -61,24 +61,28 @@ std::string OutputDecorator(Stage stage, u32 size) { } } -std::string_view SamplerType(TextureType type, bool is_depth) { - if (is_depth) { - switch (type) { - case TextureType::Color1D: - return "sampler1DShadow"; - case TextureType::ColorArray1D: - return "sampler1DArrayShadow"; - case TextureType::Color2D: - return "sampler2DShadow"; - case TextureType::ColorArray2D: - return "sampler2DArrayShadow"; - case TextureType::ColorCube: - return "samplerCubeShadow"; - case TextureType::ColorArrayCube: - return "samplerCubeArrayShadow"; - default: - throw NotImplementedException("Texture type: {}", type); - } +std::string_view DepthSamplerType(TextureType type) { + switch (type) { + case TextureType::Color1D: + return "sampler1DShadow"; + case TextureType::ColorArray1D: + return "sampler1DArrayShadow"; + case TextureType::Color2D: + return "sampler2DShadow"; + case TextureType::ColorArray2D: + return "sampler2DArrayShadow"; + case TextureType::ColorCube: + return "samplerCubeShadow"; + case TextureType::ColorArrayCube: + return "samplerCubeArrayShadow"; + default: + throw NotImplementedException("Texture type: {}", type); + } +} + +std::string_view ColorSamplerType(TextureType type, bool is_multisample = false) { + if (is_multisample) { + ASSERT(type == TextureType::Color2D || type == TextureType::ColorArray2D); } switch (type) { case TextureType::Color1D: @@ -87,9 +91,9 @@ std::string_view SamplerType(TextureType type, bool is_depth) { return "sampler1DArray"; case TextureType::Color2D: case TextureType::Color2DRect: - return "sampler2D"; + return is_multisample ? "sampler2DMS" : "sampler2D"; case TextureType::ColorArray2D: - return "sampler2DArray"; + return is_multisample ? "sampler2DMSArray" : "sampler2DArray"; case TextureType::Color3D: return "sampler3D"; case TextureType::ColorCube: @@ -677,7 +681,7 @@ void EmitContext::SetupTextures(Bindings& bindings) { texture_buffers.reserve(info.texture_buffer_descriptors.size()); for (const auto& desc : info.texture_buffer_descriptors) { texture_buffers.push_back({bindings.texture, desc.count}); - const auto sampler_type{SamplerType(TextureType::Buffer, false)}; + const auto sampler_type{ColorSamplerType(TextureType::Buffer)}; const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""}; header += fmt::format("layout(binding={}) uniform {} tex{}{};", bindings.texture, sampler_type, bindings.texture, array_decorator); @@ -686,7 +690,8 @@ void EmitContext::SetupTextures(Bindings& bindings) { textures.reserve(info.texture_descriptors.size()); for (const auto& desc : info.texture_descriptors) { textures.push_back({bindings.texture, desc.count}); - const auto sampler_type{SamplerType(desc.type, desc.is_depth)}; + const auto sampler_type{desc.is_depth ? DepthSamplerType(desc.type) + : ColorSamplerType(desc.type, desc.is_multisample)}; const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""}; header += fmt::format("layout(binding={}) uniform {} tex{}{};", bindings.texture, sampler_type, bindings.texture, array_decorator); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp index 50daacd95..c4ca28d11 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp @@ -18,10 +18,6 @@ void EmitBitCastU64F64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitBitCastS32F32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - void EmitBitCastF16U16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 73b67f0af..0cd87a48f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -321,8 +321,12 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { case IR::Attribute::PositionY: case IR::Attribute::PositionZ: case IR::Attribute::PositionW: - return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, - ctx.Const(element))); + return ctx.OpLoad( + ctx.F32[1], + ctx.need_input_position_indirect + ? AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, ctx.u32_zero_value, + ctx.Const(element)) + : AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, ctx.Const(element))); case IR::Attribute::InstanceId: if (ctx.profile.support_vertex_instance_id) { return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id)); @@ -339,6 +343,12 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_vertex)}; return ctx.OpBitcast(ctx.F32[1], ctx.OpISub(ctx.U32[1], index, base)); } + case IR::Attribute::BaseInstance: + return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.base_instance)); + case IR::Attribute::BaseVertex: + return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.base_vertex)); + case IR::Attribute::DrawID: + return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.draw_index)); case IR::Attribute::FrontFace: return ctx.OpSelect(ctx.F32[1], ctx.OpLoad(ctx.U1, ctx.front_face), ctx.OpBitcast(ctx.F32[1], ctx.Const(std::numeric_limits<u32>::max())), @@ -380,6 +390,12 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, Id) { const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_vertex)}; return ctx.OpISub(ctx.U32[1], index, base); } + case IR::Attribute::BaseInstance: + return ctx.OpLoad(ctx.U32[1], ctx.base_instance); + case IR::Attribute::BaseVertex: + return ctx.OpLoad(ctx.U32[1], ctx.base_vertex); + case IR::Attribute::DrawID: + return ctx.OpLoad(ctx.U32[1], ctx.draw_index); default: throw NotImplementedException("Read U32 attribute {}", attr); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index fb5799c42..3b969d915 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -436,16 +436,22 @@ Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id c if (info.type == TextureType::Buffer) { lod = Id{}; } + if (Sirit::ValidId(ms)) { + // This image is multisampled, lod must be implicit + lod = Id{}; + } const ImageOperands operands(offset, lod, ms); return Emit(&EmitContext::OpImageSparseFetch, &EmitContext::OpImageFetch, ctx, inst, ctx.F32[4], TextureImage(ctx, info, index), coords, operands.MaskOptional(), operands.Span()); } -Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod) { +Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod, + const IR::Value& skip_mips_val) { const auto info{inst->Flags<IR::TextureInstInfo>()}; const Id image{TextureImage(ctx, info, index)}; const Id zero{ctx.u32_zero_value}; - const auto mips{[&] { return ctx.OpImageQueryLevels(ctx.U32[1], image); }}; + const bool skip_mips{skip_mips_val.U1()}; + const auto mips{[&] { return skip_mips ? zero : ctx.OpImageQueryLevels(ctx.U32[1], image); }}; switch (info.type) { case TextureType::Color1D: return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySizeLod(ctx.U32[1], image, lod), diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index e31cdc5e8..a440b557d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -179,7 +179,6 @@ Id EmitSelectF64(EmitContext& ctx, Id cond, Id true_value, Id false_value); void EmitBitCastU16F16(EmitContext& ctx); Id EmitBitCastU32F32(EmitContext& ctx, Id value); void EmitBitCastU64F64(EmitContext& ctx); -void EmitBitCastS32F32(EmitContext& ctx); void EmitBitCastF16U16(EmitContext&); Id EmitBitCastF32U32(EmitContext& ctx, Id value); void EmitBitCastF64U64(EmitContext& ctx); @@ -540,7 +539,8 @@ Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, const IR::Value& offset, const IR::Value& offset2, Id dref); Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, Id lod, Id ms); -Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod); +Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod, + const IR::Value& skip_mips); Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id derivates, Id offset, Id lod_clamp); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp index 2c90f2368..c5db19d09 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp @@ -58,11 +58,10 @@ Id SelectValue(EmitContext& ctx, Id in_range, Id value, Id src_thread_id) { ctx.OpGroupNonUniformShuffle(ctx.U32[1], SubgroupScope(ctx), value, src_thread_id), value); } -Id GetUpperClamp(EmitContext& ctx, Id invocation_id, Id clamp) { - const Id thirty_two{ctx.Const(32u)}; - const Id is_upper_partition{ctx.OpSGreaterThanEqual(ctx.U1, invocation_id, thirty_two)}; - const Id upper_clamp{ctx.OpIAdd(ctx.U32[1], thirty_two, clamp)}; - return ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_clamp, clamp); +Id AddPartitionBase(EmitContext& ctx, Id thread_id) { + const Id partition_idx{ctx.OpShiftRightLogical(ctx.U32[1], GetThreadId(ctx), ctx.Const(5u))}; + const Id partition_base{ctx.OpShiftLeftLogical(ctx.U32[1], partition_idx, ctx.Const(5u))}; + return ctx.OpIAdd(ctx.U32[1], thread_id, partition_base); } } // Anonymous namespace @@ -145,64 +144,63 @@ Id EmitSubgroupGeMask(EmitContext& ctx) { Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, Id segmentation_mask) { const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)}; - const Id thread_id{GetThreadId(ctx)}; - if (ctx.profile.warp_size_potentially_larger_than_guest) { - const Id thirty_two{ctx.Const(32u)}; - const Id is_upper_partition{ctx.OpSGreaterThanEqual(ctx.U1, thread_id, thirty_two)}; - const Id upper_index{ctx.OpIAdd(ctx.U32[1], thirty_two, index)}; - const Id upper_clamp{ctx.OpIAdd(ctx.U32[1], thirty_two, clamp)}; - index = ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_index, index); - clamp = ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_clamp, clamp); - } + const Id thread_id{EmitLaneId(ctx)}; const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)}; const Id max_thread_id{ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask)}; const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], index, not_seg_mask)}; - const Id src_thread_id{ctx.OpBitwiseOr(ctx.U32[1], lhs, min_thread_id)}; + Id src_thread_id{ctx.OpBitwiseOr(ctx.U32[1], lhs, min_thread_id)}; const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; + if (ctx.profile.warp_size_potentially_larger_than_guest) { + src_thread_id = AddPartitionBase(ctx, src_thread_id); + } + SetInBoundsFlag(inst, in_range); return SelectValue(ctx, in_range, value, src_thread_id); } Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, Id segmentation_mask) { - const Id thread_id{GetThreadId(ctx)}; - if (ctx.profile.warp_size_potentially_larger_than_guest) { - clamp = GetUpperClamp(ctx, thread_id, clamp); - } + const Id thread_id{EmitLaneId(ctx)}; const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; - const Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)}; + Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)}; const Id in_range{ctx.OpSGreaterThanEqual(ctx.U1, src_thread_id, max_thread_id)}; + if (ctx.profile.warp_size_potentially_larger_than_guest) { + src_thread_id = AddPartitionBase(ctx, src_thread_id); + } + SetInBoundsFlag(inst, in_range); return SelectValue(ctx, in_range, value, src_thread_id); } Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, Id segmentation_mask) { - const Id thread_id{GetThreadId(ctx)}; - if (ctx.profile.warp_size_potentially_larger_than_guest) { - clamp = GetUpperClamp(ctx, thread_id, clamp); - } + const Id thread_id{EmitLaneId(ctx)}; const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; - const Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)}; + Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)}; const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; + if (ctx.profile.warp_size_potentially_larger_than_guest) { + src_thread_id = AddPartitionBase(ctx, src_thread_id); + } + SetInBoundsFlag(inst, in_range); return SelectValue(ctx, in_range, value, src_thread_id); } Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, Id segmentation_mask) { - const Id thread_id{GetThreadId(ctx)}; - if (ctx.profile.warp_size_potentially_larger_than_guest) { - clamp = GetUpperClamp(ctx, thread_id, clamp); - } + const Id thread_id{EmitLaneId(ctx)}; const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; - const Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)}; + Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)}; const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; + if (ctx.profile.warp_size_potentially_larger_than_guest) { + src_thread_id = AddPartitionBase(ctx, src_thread_id); + } + SetInBoundsFlag(inst, in_range); return SelectValue(ctx, in_range, value, src_thread_id); } diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 41dc6d031..3b97721e1 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -35,6 +35,7 @@ Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) { const spv::ImageFormat format{spv::ImageFormat::Unknown}; const Id type{ctx.F32[1]}; const bool depth{desc.is_depth}; + const bool ms{desc.is_multisample}; switch (desc.type) { case TextureType::Color1D: return ctx.TypeImage(type, spv::Dim::Dim1D, depth, false, false, 1, format); @@ -42,9 +43,9 @@ Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) { return ctx.TypeImage(type, spv::Dim::Dim1D, depth, true, false, 1, format); case TextureType::Color2D: case TextureType::Color2DRect: - return ctx.TypeImage(type, spv::Dim::Dim2D, depth, false, false, 1, format); + return ctx.TypeImage(type, spv::Dim::Dim2D, depth, false, ms, 1, format); case TextureType::ColorArray2D: - return ctx.TypeImage(type, spv::Dim::Dim2D, depth, true, false, 1, format); + return ctx.TypeImage(type, spv::Dim::Dim2D, depth, true, ms, 1, format); case TextureType::Color3D: return ctx.TypeImage(type, spv::Dim::Dim3D, depth, false, false, 1, format); case TextureType::ColorCube: @@ -544,7 +545,7 @@ void EmitContext::DefineCommonTypes(const Info& info) { U16 = Name(TypeInt(16, false), "u16"); S16 = Name(TypeInt(16, true), "s16"); } - if (info.uses_int64) { + if (info.uses_int64 && profile.support_int64) { AddCapability(spv::Capability::Int64); U64 = Name(TypeInt(64, false), "u64"); } @@ -721,9 +722,21 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { size_t label_index{0}; if (info.loads.AnyComponent(IR::Attribute::PositionX)) { AddLabel(labels[label_index]); - const Id pointer{is_array - ? OpAccessChain(input_f32, input_position, vertex, masked_index) - : OpAccessChain(input_f32, input_position, masked_index)}; + const Id pointer{[&]() { + if (need_input_position_indirect) { + if (is_array) + return OpAccessChain(input_f32, input_position, vertex, u32_zero_value, + masked_index); + else + return OpAccessChain(input_f32, input_position, u32_zero_value, + masked_index); + } else { + if (is_array) + return OpAccessChain(input_f32, input_position, vertex, masked_index); + else + return OpAccessChain(input_f32, input_position, masked_index); + } + }()}; const Id result{OpLoad(F32[1], pointer)}; OpReturnValue(result); ++label_index; @@ -1367,30 +1380,56 @@ void EmitContext::DefineInputs(const IR::Program& program) { Decorate(layer, spv::Decoration::Flat); } if (loads.AnyComponent(IR::Attribute::PositionX)) { - const bool is_fragment{stage != Stage::Fragment}; - const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::Position : spv::BuiltIn::FragCoord}; - input_position = DefineInput(*this, F32[4], true, built_in); - if (profile.support_geometry_shader_passthrough) { - if (info.passthrough.AnyComponent(IR::Attribute::PositionX)) { - Decorate(input_position, spv::Decoration::PassthroughNV); + const bool is_fragment{stage == Stage::Fragment}; + if (!is_fragment && profile.has_broken_spirv_position_input) { + need_input_position_indirect = true; + + const Id input_position_struct = TypeStruct(F32[4]); + input_position = DefineInput(*this, input_position_struct, true); + + MemberDecorate(input_position_struct, 0, spv::Decoration::BuiltIn, + static_cast<unsigned>(spv::BuiltIn::Position)); + Decorate(input_position_struct, spv::Decoration::Block); + } else { + const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::FragCoord + : spv::BuiltIn::Position}; + input_position = DefineInput(*this, F32[4], true, built_in); + + if (profile.support_geometry_shader_passthrough) { + if (info.passthrough.AnyComponent(IR::Attribute::PositionX)) { + Decorate(input_position, spv::Decoration::PassthroughNV); + } } } } if (loads[IR::Attribute::InstanceId]) { if (profile.support_vertex_instance_id) { instance_id = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceId); + if (loads[IR::Attribute::BaseInstance]) { + base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex); + } } else { instance_index = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceIndex); base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseInstance); } + } else if (loads[IR::Attribute::BaseInstance]) { + base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseInstance); } if (loads[IR::Attribute::VertexId]) { if (profile.support_vertex_instance_id) { vertex_id = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexId); + if (loads[IR::Attribute::BaseVertex]) { + base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex); + } } else { vertex_index = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexIndex); base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex); } + } else if (loads[IR::Attribute::BaseVertex]) { + base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex); + } + if (loads[IR::Attribute::DrawID]) { + draw_index = DefineInput(*this, U32[1], true, spv::BuiltIn::DrawIndex); } if (loads[IR::Attribute::FrontFace]) { front_face = DefineInput(*this, U1, true, spv::BuiltIn::FrontFacing); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index dde45b4bc..dbc5c55b9 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -218,6 +218,7 @@ public: Id base_instance{}; Id vertex_id{}; Id vertex_index{}; + Id draw_index{}; Id base_vertex{}; Id front_face{}; Id point_coord{}; @@ -279,6 +280,7 @@ public: Id write_global_func_u32x2{}; Id write_global_func_u32x4{}; + bool need_input_position_indirect{}; Id input_position{}; std::array<Id, 32> input_generics{}; diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h index 402f2664f..26e8307c1 100644 --- a/src/shader_recompiler/environment.h +++ b/src/shader_recompiler/environment.h @@ -34,6 +34,11 @@ public: [[nodiscard]] virtual std::array<u32, 3> WorkgroupSize() const = 0; + [[nodiscard]] virtual bool HasHLEMacroState() const = 0; + + [[nodiscard]] virtual std::optional<ReplaceConstant> GetReplaceConstBuffer(u32 bank, + u32 offset) = 0; + virtual void Dump(u64 hash) = 0; [[nodiscard]] const ProgramHeader& SPH() const noexcept { @@ -52,11 +57,16 @@ public: return start_address; } + [[nodiscard]] bool IsPropietaryDriver() const noexcept { + return is_propietary_driver; + } + protected: ProgramHeader sph{}; std::array<u32, 8> gp_passthrough_mask{}; Stage stage{}; u32 start_address{}; + bool is_propietary_driver{}; }; } // namespace Shader diff --git a/src/shader_recompiler/frontend/ir/attribute.cpp b/src/shader_recompiler/frontend/ir/attribute.cpp index 7d3d882e4..1bf9db935 100644 --- a/src/shader_recompiler/frontend/ir/attribute.cpp +++ b/src/shader_recompiler/frontend/ir/attribute.cpp @@ -446,6 +446,12 @@ std::string NameOf(Attribute attribute) { return "ViewportMask"; case Attribute::FrontFace: return "FrontFace"; + case Attribute::BaseInstance: + return "BaseInstance"; + case Attribute::BaseVertex: + return "BaseVertex"; + case Attribute::DrawID: + return "DrawID"; } return fmt::format("<reserved attribute {}>", static_cast<int>(attribute)); } diff --git a/src/shader_recompiler/frontend/ir/attribute.h b/src/shader_recompiler/frontend/ir/attribute.h index 6ee3947b1..5f039b6f6 100644 --- a/src/shader_recompiler/frontend/ir/attribute.h +++ b/src/shader_recompiler/frontend/ir/attribute.h @@ -219,6 +219,11 @@ enum class Attribute : u64 { FixedFncTexture9Q = 231, ViewportMask = 232, FrontFace = 255, + + // Implementation attributes + BaseInstance = 256, + BaseVertex = 257, + DrawID = 258, }; constexpr size_t NUM_GENERICS = 32; diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 0cdac0eff..b7caa4246 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -294,6 +294,14 @@ F32 IREmitter::GetAttribute(IR::Attribute attribute, const U32& vertex) { return Inst<F32>(Opcode::GetAttribute, attribute, vertex); } +U32 IREmitter::GetAttributeU32(IR::Attribute attribute) { + return GetAttributeU32(attribute, Imm32(0)); +} + +U32 IREmitter::GetAttributeU32(IR::Attribute attribute, const U32& vertex) { + return Inst<U32>(Opcode::GetAttributeU32, attribute, vertex); +} + void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex) { Inst(Opcode::SetAttribute, attribute, value, vertex); } @@ -696,11 +704,6 @@ IR::U32 IREmitter::BitCast<IR::U32, IR::F32>(const IR::F32& value) { } template <> -IR::S32 IREmitter::BitCast<IR::S32, IR::F32>(const IR::F32& value) { - return Inst<IR::S32>(Opcode::BitCastS32F32, value); -} - -template <> IR::F32 IREmitter::BitCast<IR::F32, IR::U32>(const IR::U32& value) { return Inst<IR::F32>(Opcode::BitCastF32U32, value); } @@ -1843,15 +1846,16 @@ Value IREmitter::ImageFetch(const Value& handle, const Value& coords, const Valu return Inst(op, Flags{info}, handle, coords, offset, lod, multisampling); } -Value IREmitter::ImageQueryDimension(const Value& handle, const IR::U32& lod) { +Value IREmitter::ImageQueryDimension(const Value& handle, const IR::U32& lod, + const IR::U1& skip_mips) { const Opcode op{handle.IsImmediate() ? Opcode::BoundImageQueryDimensions : Opcode::BindlessImageQueryDimensions}; - return Inst(op, handle, lod); + return Inst(op, handle, lod, skip_mips); } Value IREmitter::ImageQueryDimension(const Value& handle, const IR::U32& lod, - TextureInstInfo info) { - return Inst(Opcode::ImageQueryDimensions, Flags{info}, handle, lod); + const IR::U1& skip_mips, TextureInstInfo info) { + return Inst(Opcode::ImageQueryDimensions, Flags{info}, handle, lod, skip_mips); } Value IREmitter::ImageQueryLod(const Value& handle, const Value& coords, TextureInstInfo info) { diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 2df992feb..f3c81dbe1 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -74,6 +74,8 @@ public: [[nodiscard]] F32 GetAttribute(IR::Attribute attribute); [[nodiscard]] F32 GetAttribute(IR::Attribute attribute, const U32& vertex); + [[nodiscard]] U32 GetAttributeU32(IR::Attribute attribute); + [[nodiscard]] U32 GetAttributeU32(IR::Attribute attribute, const U32& vertex); void SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex); [[nodiscard]] F32 GetAttributeIndexed(const U32& phys_address); @@ -318,9 +320,10 @@ public: [[nodiscard]] F32 ImageSampleDrefExplicitLod(const Value& handle, const Value& coords, const F32& dref, const F32& lod, const Value& offset, TextureInstInfo info); - [[nodiscard]] Value ImageQueryDimension(const Value& handle, const IR::U32& lod); [[nodiscard]] Value ImageQueryDimension(const Value& handle, const IR::U32& lod, - TextureInstInfo info); + const IR::U1& skip_mips); + [[nodiscard]] Value ImageQueryDimension(const Value& handle, const IR::U32& lod, + const IR::U1& skip_mips, TextureInstInfo info); [[nodiscard]] Value ImageQueryLod(const Value& handle, const Value& coords, TextureInstInfo info); @@ -406,7 +409,8 @@ private: } template <typename T> - requires(sizeof(T) <= sizeof(u32) && std::is_trivially_copyable_v<T>) struct Flags { + requires(sizeof(T) <= sizeof(u32) && std::is_trivially_copyable_v<T>) + struct Flags { Flags() = default; Flags(T proxy_) : proxy{proxy_} {} diff --git a/src/shader_recompiler/frontend/ir/opcodes.h b/src/shader_recompiler/frontend/ir/opcodes.h index d155afd0f..e300714f3 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.h +++ b/src/shader_recompiler/frontend/ir/opcodes.h @@ -38,7 +38,6 @@ constexpr Type U8{Type::U8}; constexpr Type U16{Type::U16}; constexpr Type U32{Type::U32}; constexpr Type U64{Type::U64}; -constexpr Type S32{Type::S32}; constexpr Type F16{Type::F16}; constexpr Type F32{Type::F32}; constexpr Type F64{Type::F64}; diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 1fe3749cc..4447d67b0 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -175,7 +175,6 @@ OPCODE(SelectF64, F64, U1, OPCODE(BitCastU16F16, U16, F16, ) OPCODE(BitCastU32F32, U32, F32, ) OPCODE(BitCastU64F64, U64, F64, ) -OPCODE(BitCastS32F32, S32, F32, ) OPCODE(BitCastF16U16, F16, U16, ) OPCODE(BitCastF32U32, F32, U32, ) OPCODE(BitCastF64U64, F64, U64, ) @@ -483,7 +482,7 @@ OPCODE(BindlessImageSampleDrefExplicitLod, F32, U32, OPCODE(BindlessImageGather, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(BindlessImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) OPCODE(BindlessImageFetch, F32x4, U32, Opaque, Opaque, U32, Opaque, ) -OPCODE(BindlessImageQueryDimensions, U32x4, U32, U32, ) +OPCODE(BindlessImageQueryDimensions, U32x4, U32, U32, U1, ) OPCODE(BindlessImageQueryLod, F32x4, U32, Opaque, ) OPCODE(BindlessImageGradient, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) OPCODE(BindlessImageRead, U32x4, U32, Opaque, ) @@ -496,7 +495,7 @@ OPCODE(BoundImageSampleDrefExplicitLod, F32, U32, OPCODE(BoundImageGather, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(BoundImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) OPCODE(BoundImageFetch, F32x4, U32, Opaque, Opaque, U32, Opaque, ) -OPCODE(BoundImageQueryDimensions, U32x4, U32, U32, ) +OPCODE(BoundImageQueryDimensions, U32x4, U32, U32, U1, ) OPCODE(BoundImageQueryLod, F32x4, U32, Opaque, ) OPCODE(BoundImageGradient, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) OPCODE(BoundImageRead, U32x4, U32, Opaque, ) @@ -509,7 +508,7 @@ OPCODE(ImageSampleDrefExplicitLod, F32, Opaq OPCODE(ImageGather, F32x4, Opaque, Opaque, Opaque, Opaque, ) OPCODE(ImageGatherDref, F32x4, Opaque, Opaque, Opaque, Opaque, F32, ) OPCODE(ImageFetch, F32x4, Opaque, Opaque, Opaque, U32, Opaque, ) -OPCODE(ImageQueryDimensions, U32x4, Opaque, U32, ) +OPCODE(ImageQueryDimensions, U32x4, Opaque, U32, U1, ) OPCODE(ImageQueryLod, F32x4, Opaque, Opaque, ) OPCODE(ImageGradient, F32x4, Opaque, Opaque, Opaque, Opaque, Opaque, ) OPCODE(ImageRead, U32x4, Opaque, Opaque, ) diff --git a/src/shader_recompiler/frontend/ir/type.h b/src/shader_recompiler/frontend/ir/type.h index 5a7c706ad..04c8c4ddb 100644 --- a/src/shader_recompiler/frontend/ir/type.h +++ b/src/shader_recompiler/frontend/ir/type.h @@ -24,22 +24,21 @@ enum class Type { U16 = 1 << 7, U32 = 1 << 8, U64 = 1 << 9, - S32 = 1 << 10, - F16 = 1 << 11, - F32 = 1 << 12, - F64 = 1 << 13, - U32x2 = 1 << 14, - U32x3 = 1 << 15, - U32x4 = 1 << 16, - F16x2 = 1 << 17, - F16x3 = 1 << 18, - F16x4 = 1 << 19, - F32x2 = 1 << 20, - F32x3 = 1 << 21, - F32x4 = 1 << 22, - F64x2 = 1 << 23, - F64x3 = 1 << 24, - F64x4 = 1 << 25, + F16 = 1 << 10, + F32 = 1 << 11, + F64 = 1 << 12, + U32x2 = 1 << 13, + U32x3 = 1 << 14, + U32x4 = 1 << 15, + F16x2 = 1 << 16, + F16x3 = 1 << 17, + F16x4 = 1 << 18, + F32x2 = 1 << 19, + F32x3 = 1 << 20, + F32x4 = 1 << 21, + F64x2 = 1 << 22, + F64x3 = 1 << 23, + F64x4 = 1 << 24, }; DECLARE_ENUM_FLAG_OPERATORS(Type) diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp index 30ba12316..346169328 100644 --- a/src/shader_recompiler/frontend/ir/value.cpp +++ b/src/shader_recompiler/frontend/ir/value.cpp @@ -23,8 +23,6 @@ Value::Value(u16 value) noexcept : type{Type::U16}, imm_u16{value} {} Value::Value(u32 value) noexcept : type{Type::U32}, imm_u32{value} {} -Value::Value(s32 value) noexcept : type{Type::S32}, imm_s32{value} {} - Value::Value(f32 value) noexcept : type{Type::F32}, imm_f32{value} {} Value::Value(u64 value) noexcept : type{Type::U64}, imm_u64{value} {} @@ -71,7 +69,6 @@ bool Value::operator==(const Value& other) const { return imm_u16 == other.imm_u16; case Type::U32: case Type::F32: - case Type::S32: return imm_u32 == other.imm_u32; case Type::U64: case Type::F64: diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index 8b34356fd..22e89dd1b 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -101,9 +101,8 @@ public: TypedValue() = default; template <IR::Type other_type> - requires((other_type & type_) != IR::Type::Void) explicit(false) - TypedValue(const TypedValue<other_type>& value) - : Value(value) {} + requires((other_type & type_) != IR::Type::Void) + explicit(false) TypedValue(const TypedValue<other_type>& value) : Value(value) {} explicit TypedValue(const Value& value) : Value(value) { if ((value.Type() & type_) == IR::Type::Void) { @@ -194,16 +193,16 @@ public: void ReplaceOpcode(IR::Opcode opcode); template <typename FlagsType> - requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v<FlagsType>) - [[nodiscard]] FlagsType Flags() const noexcept { + requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v<FlagsType>) + [[nodiscard]] FlagsType Flags() const noexcept { FlagsType ret; std::memcpy(reinterpret_cast<char*>(&ret), &flags, sizeof(ret)); return ret; } template <typename FlagsType> - requires(sizeof(FlagsType) <= sizeof(u32) && - std::is_trivially_copyable_v<FlagsType>) void SetFlags(FlagsType value) noexcept { + requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v<FlagsType>) + void SetFlags(FlagsType value) noexcept { std::memcpy(&flags, &value, sizeof(value)); } @@ -268,7 +267,6 @@ using U8 = TypedValue<Type::U8>; using U16 = TypedValue<Type::U16>; using U32 = TypedValue<Type::U32>; using U64 = TypedValue<Type::U64>; -using S32 = TypedValue<Type::S32>; using F16 = TypedValue<Type::F16>; using F32 = TypedValue<Type::F32>; using F64 = TypedValue<Type::F64>; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp index f8cfd4ab6..39af62559 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp @@ -15,11 +15,13 @@ enum class Mode : u64 { SamplePos = 5, }; -IR::Value Query(TranslatorVisitor& v, const IR::U32& handle, Mode mode, IR::Reg src_reg) { +IR::Value Query(TranslatorVisitor& v, const IR::U32& handle, Mode mode, IR::Reg src_reg, u64 mask) { switch (mode) { case Mode::Dimension: { + const bool needs_num_mips{((mask >> 3) & 1) != 0}; + const IR::U1 skip_mips{v.ir.Imm1(!needs_num_mips)}; const IR::U32 lod{v.X(src_reg)}; - return v.ir.ImageQueryDimension(handle, lod); + return v.ir.ImageQueryDimension(handle, lod, skip_mips); } case Mode::TextureType: case Mode::SamplePos: @@ -46,7 +48,7 @@ void Impl(TranslatorVisitor& v, u64 insn, std::optional<u32> cbuf_offset) { handle = v.X(src_reg); ++src_reg; } - const IR::Value query{Query(v, handle, txq.mode, src_reg)}; + const IR::Value query{Query(v, handle, txq.mode, src_reg, txq.mask)}; IR::Reg dest_reg{txq.dest_reg}; for (int element = 0; element < 4; ++element) { if (((txq.mask >> element) & 1) == 0) { diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index 3adbd2b16..a42453e90 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -171,6 +171,70 @@ std::map<IR::Attribute, IR::Attribute> GenerateLegacyToGenericMappings( } return mapping; } + +void EmitGeometryPassthrough(IR::IREmitter& ir, const IR::Program& program, + const Shader::VaryingState& passthrough_mask, + bool passthrough_position, + std::optional<IR::Attribute> passthrough_layer_attr) { + for (u32 i = 0; i < program.output_vertices; i++) { + // Assign generics from input + for (u32 j = 0; j < 32; j++) { + if (!passthrough_mask.Generic(j)) { + continue; + } + + const IR::Attribute attr = IR::Attribute::Generic0X + (j * 4); + ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0)); + ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0)); + ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0)); + ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0)); + } + + if (passthrough_position) { + // Assign position from input + const IR::Attribute attr = IR::Attribute::PositionX; + ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0)); + ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0)); + ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0)); + ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0)); + } + + if (passthrough_layer_attr) { + // Assign layer + ir.SetAttribute(IR::Attribute::Layer, ir.GetAttribute(*passthrough_layer_attr), + ir.Imm32(0)); + } + + // Emit vertex + ir.EmitVertex(ir.Imm32(0)); + } + ir.EndPrimitive(ir.Imm32(0)); +} + +u32 GetOutputTopologyVertices(OutputTopology output_topology) { + switch (output_topology) { + case OutputTopology::PointList: + return 1; + case OutputTopology::LineStrip: + return 2; + default: + return 3; + } +} + +void LowerGeometryPassthrough(const IR::Program& program, const HostTranslateInfo& host_info) { + for (IR::Block* const block : program.blocks) { + for (IR::Inst& inst : block->Instructions()) { + if (inst.GetOpcode() == IR::Opcode::Epilogue) { + IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)}; + EmitGeometryPassthrough( + ir, program, program.info.passthrough, + program.info.passthrough.AnyComponent(IR::Attribute::PositionX), {}); + } + } + } +} + } // Anonymous namespace IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, @@ -195,9 +259,14 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo program.is_geometry_passthrough = sph.common0.geometry_passthrough != 0; if (program.is_geometry_passthrough) { const auto& mask{env.GpPassthroughMask()}; - for (size_t i = 0; i < program.info.passthrough.mask.size(); ++i) { + for (size_t i = 0; i < mask.size() * 32; ++i) { program.info.passthrough.mask[i] = ((mask[i / 32] >> (i % 32)) & 1) == 0; } + + if (!host_info.support_geometry_shader_passthrough) { + program.output_vertices = GetOutputTopologyVertices(program.output_topology); + LowerGeometryPassthrough(program, host_info); + } } break; } @@ -219,11 +288,11 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo } Optimization::SsaRewritePass(program); - Optimization::ConstantPropagationPass(program); + Optimization::ConstantPropagationPass(env, program); Optimization::PositionPass(env, program); - Optimization::GlobalMemoryToStorageBufferPass(program); + Optimization::GlobalMemoryToStorageBufferPass(program, host_info); Optimization::TexturePass(env, program, host_info); if (Settings::values.resolution_info.active) { @@ -342,17 +411,7 @@ IR::Program GenerateGeometryPassthrough(ObjectPool<IR::Inst>& inst_pool, IR::Program program; program.stage = Stage::Geometry; program.output_topology = output_topology; - switch (output_topology) { - case OutputTopology::PointList: - program.output_vertices = 1; - break; - case OutputTopology::LineStrip: - program.output_vertices = 2; - break; - default: - program.output_vertices = 3; - break; - } + program.output_vertices = GetOutputTopologyVertices(output_topology); program.is_geometry_passthrough = false; program.info.loads.mask = source_program.info.stores.mask; @@ -366,35 +425,8 @@ IR::Program GenerateGeometryPassthrough(ObjectPool<IR::Inst>& inst_pool, node.data.block = current_block; IR::IREmitter ir{*current_block}; - for (u32 i = 0; i < program.output_vertices; i++) { - // Assign generics from input - for (u32 j = 0; j < 32; j++) { - if (!program.info.stores.Generic(j)) { - continue; - } - - const IR::Attribute attr = IR::Attribute::Generic0X + (j * 4); - ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0)); - ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0)); - ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0)); - ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0)); - } - - // Assign position from input - const IR::Attribute attr = IR::Attribute::PositionX; - ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0)); - ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0)); - ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0)); - ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0)); - - // Assign layer - ir.SetAttribute(IR::Attribute::Layer, ir.GetAttribute(source_program.info.emulated_layer), - ir.Imm32(0)); - - // Emit vertex - ir.EmitVertex(ir.Imm32(0)); - } - ir.EndPrimitive(ir.Imm32(0)); + EmitGeometryPassthrough(ir, program, program.info.stores, true, + source_program.info.emulated_layer); IR::Block* return_block{block_pool.Create(inst_pool)}; IR::IREmitter{*return_block}.Epilogue(); diff --git a/src/shader_recompiler/host_translate_info.h b/src/shader_recompiler/host_translate_info.h index d5d279554..55fc48768 100644 --- a/src/shader_recompiler/host_translate_info.h +++ b/src/shader_recompiler/host_translate_info.h @@ -15,6 +15,9 @@ struct HostTranslateInfo { bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered bool support_snorm_render_buffer{}; ///< True when the device supports SNORM render buffers bool support_viewport_index_layer{}; ///< True when the device supports gl_Layer in VS + u32 min_ssbo_alignment{}; ///< Minimum alignment supported by the device for SSBOs + bool support_geometry_shader_passthrough{}; ///< True when the device supports geometry + ///< passthrough shaders }; } // namespace Shader diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 826f9a54a..4d81e9336 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -7,6 +7,7 @@ #include <type_traits> #include "common/bit_cast.h" +#include "shader_recompiler/environment.h" #include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/ir/ir_emitter.h" #include "shader_recompiler/frontend/ir/value.h" @@ -515,6 +516,9 @@ void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) { case IR::Attribute::PrimitiveId: case IR::Attribute::InstanceId: case IR::Attribute::VertexId: + case IR::Attribute::BaseVertex: + case IR::Attribute::BaseInstance: + case IR::Attribute::DrawID: break; default: return; @@ -644,7 +648,63 @@ void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) { } } -void ConstantPropagation(IR::Block& block, IR::Inst& inst) { +void FoldConstBuffer(Environment& env, IR::Block& block, IR::Inst& inst) { + const IR::Value bank{inst.Arg(0)}; + const IR::Value offset{inst.Arg(1)}; + if (!bank.IsImmediate() || !offset.IsImmediate()) { + return; + } + const auto bank_value = bank.U32(); + const auto offset_value = offset.U32(); + auto replacement = env.GetReplaceConstBuffer(bank_value, offset_value); + if (!replacement) { + return; + } + const auto new_attribute = [replacement]() { + switch (*replacement) { + case ReplaceConstant::BaseInstance: + return IR::Attribute::BaseInstance; + case ReplaceConstant::BaseVertex: + return IR::Attribute::BaseVertex; + case ReplaceConstant::DrawID: + return IR::Attribute::DrawID; + default: + throw NotImplementedException("Not implemented replacement variable {}", *replacement); + } + }(); + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + if (inst.GetOpcode() == IR::Opcode::GetCbufU32) { + inst.ReplaceUsesWith(ir.GetAttributeU32(new_attribute)); + } else { + inst.ReplaceUsesWith(ir.GetAttribute(new_attribute)); + } +} + +void FoldDriverConstBuffer(Environment& env, IR::Block& block, IR::Inst& inst, u32 which_bank, + u32 offset_start = 0, u32 offset_end = std::numeric_limits<u16>::max()) { + const IR::Value bank{inst.Arg(0)}; + const IR::Value offset{inst.Arg(1)}; + if (!bank.IsImmediate() || !offset.IsImmediate()) { + return; + } + const auto bank_value = bank.U32(); + if (bank_value != which_bank) { + return; + } + const auto offset_value = offset.U32(); + if (offset_value < offset_start || offset_value >= offset_end) { + return; + } + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + if (inst.GetOpcode() == IR::Opcode::GetCbufU32) { + inst.ReplaceUsesWith(IR::Value{env.ReadCbufValue(bank_value, offset_value)}); + } else { + inst.ReplaceUsesWith( + IR::Value{Common::BitCast<f32>(env.ReadCbufValue(bank_value, offset_value))}); + } +} + +void ConstantPropagation(Environment& env, IR::Block& block, IR::Inst& inst) { switch (inst.GetOpcode()) { case IR::Opcode::GetRegister: return FoldGetRegister(inst); @@ -789,18 +849,28 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { IR::Opcode::CompositeInsertF16x4); case IR::Opcode::FSwizzleAdd: return FoldFSwizzleAdd(block, inst); + case IR::Opcode::GetCbufF32: + case IR::Opcode::GetCbufU32: + if (env.HasHLEMacroState()) { + FoldConstBuffer(env, block, inst); + } + if (env.IsPropietaryDriver()) { + FoldDriverConstBuffer(env, block, inst, 1); + } + break; default: break; } } + } // Anonymous namespace -void ConstantPropagationPass(IR::Program& program) { +void ConstantPropagationPass(Environment& env, IR::Program& program) { const auto end{program.post_order_blocks.rend()}; for (auto it = program.post_order_blocks.rbegin(); it != end; ++it) { IR::Block* const block{*it}; for (IR::Inst& inst : block->Instructions()) { - ConstantPropagation(*block, inst); + ConstantPropagation(env, *block, inst); } } } diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 336338e62..9101722ba 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -11,6 +11,7 @@ #include "shader_recompiler/frontend/ir/breadth_first_search.h" #include "shader_recompiler/frontend/ir/ir_emitter.h" #include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/host_translate_info.h" #include "shader_recompiler/ir_opt/passes.h" namespace Shader::Optimization { @@ -402,7 +403,7 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info) } /// Returns the offset in indices (not bytes) for an equivalent storage instruction -IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer) { +IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer, u32 alignment) { IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; IR::U32 offset; if (const std::optional<LowAddrInfo> low_addr{TrackLowAddress(&inst)}) { @@ -415,7 +416,10 @@ IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer } // Subtract the least significant 32 bits from the guest offset. The result is the storage // buffer offset in bytes. - const IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))}; + IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))}; + + // Align the offset base to match the host alignment requirements + low_cbuf = ir.BitwiseAnd(low_cbuf, ir.Imm32(~(alignment - 1U))); return ir.ISub(offset, low_cbuf); } @@ -510,7 +514,7 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, } } // Anonymous namespace -void GlobalMemoryToStorageBufferPass(IR::Program& program) { +void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateInfo& host_info) { StorageInfo info; for (IR::Block* const block : program.post_order_blocks) { for (IR::Inst& inst : block->Instructions()) { @@ -534,7 +538,8 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) { const IR::U32 index{IR::Value{static_cast<u32>(info.set.index_of(it))}}; IR::Block* const block{storage_inst.block}; IR::Inst* const inst{storage_inst.inst}; - const IR::U32 offset{StorageOffset(*block, *inst, storage_buffer)}; + const IR::U32 offset{ + StorageOffset(*block, *inst, storage_buffer, host_info.min_ssbo_alignment)}; Replace(*block, *inst, index, offset); } } diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 11bfe801a..4ffad1172 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h @@ -13,9 +13,9 @@ struct HostTranslateInfo; namespace Shader::Optimization { void CollectShaderInfoPass(Environment& env, IR::Program& program); -void ConstantPropagationPass(IR::Program& program); +void ConstantPropagationPass(Environment& env, IR::Program& program); void DeadCodeEliminationPass(IR::Program& program); -void GlobalMemoryToStorageBufferPass(IR::Program& program); +void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateInfo& host_info); void IdentityRemovalPass(IR::Program& program); void LowerFp16ToFp32(IR::Program& program); void LowerInt64ToInt32(IR::Program& program); diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index f5c86fcb1..d374c976a 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -355,21 +355,21 @@ TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) { }; } -TextureType ReadTextureType(Environment& env, const ConstBufferAddr& cbuf) { +u32 GetTextureHandle(Environment& env, const ConstBufferAddr& cbuf) { const u32 secondary_index{cbuf.has_secondary ? cbuf.secondary_index : cbuf.index}; const u32 secondary_offset{cbuf.has_secondary ? cbuf.secondary_offset : cbuf.offset}; const u32 lhs_raw{env.ReadCbufValue(cbuf.index, cbuf.offset) << cbuf.shift_left}; const u32 rhs_raw{env.ReadCbufValue(secondary_index, secondary_offset) << cbuf.secondary_shift_left}; - return env.ReadTextureType(lhs_raw | rhs_raw); + return lhs_raw | rhs_raw; +} + +TextureType ReadTextureType(Environment& env, const ConstBufferAddr& cbuf) { + return env.ReadTextureType(GetTextureHandle(env, cbuf)); } TexturePixelFormat ReadTexturePixelFormat(Environment& env, const ConstBufferAddr& cbuf) { - const u32 secondary_index{cbuf.has_secondary ? cbuf.secondary_index : cbuf.index}; - const u32 secondary_offset{cbuf.has_secondary ? cbuf.secondary_offset : cbuf.offset}; - const u32 lhs_raw{env.ReadCbufValue(cbuf.index, cbuf.offset)}; - const u32 rhs_raw{env.ReadCbufValue(secondary_index, secondary_offset)}; - return env.ReadTexturePixelFormat(lhs_raw | rhs_raw); + return env.ReadTexturePixelFormat(GetTextureHandle(env, cbuf)); } class Descriptors { @@ -386,8 +386,10 @@ public: return Add(texture_buffer_descriptors, desc, [&desc](const auto& existing) { return desc.cbuf_index == existing.cbuf_index && desc.cbuf_offset == existing.cbuf_offset && + desc.shift_left == existing.shift_left && desc.secondary_cbuf_index == existing.secondary_cbuf_index && desc.secondary_cbuf_offset == existing.secondary_cbuf_offset && + desc.secondary_shift_left == existing.secondary_shift_left && desc.count == existing.count && desc.size_shift == existing.size_shift && desc.has_secondary == existing.has_secondary; }); @@ -405,15 +407,20 @@ public: } u32 Add(const TextureDescriptor& desc) { - return Add(texture_descriptors, desc, [&desc](const auto& existing) { + const u32 index{Add(texture_descriptors, desc, [&desc](const auto& existing) { return desc.type == existing.type && desc.is_depth == existing.is_depth && desc.has_secondary == existing.has_secondary && desc.cbuf_index == existing.cbuf_index && desc.cbuf_offset == existing.cbuf_offset && + desc.shift_left == existing.shift_left && desc.secondary_cbuf_index == existing.secondary_cbuf_index && desc.secondary_cbuf_offset == existing.secondary_cbuf_offset && + desc.secondary_shift_left == existing.secondary_shift_left && desc.count == existing.count && desc.size_shift == existing.size_shift; - }); + })}; + // TODO: Read this from TIC + texture_descriptors[index].is_multisample |= desc.is_multisample; + return index; } u32 Add(const ImageDescriptor& desc) { @@ -452,7 +459,8 @@ void PatchImageSampleImplicitLod(IR::Block& block, IR::Inst& inst) { const IR::Value coord(inst.Arg(1)); const IR::Value handle(ir.Imm32(0)); const IR::U32 lod{ir.Imm32(0)}; - const IR::Value texture_size = ir.ImageQueryDimension(handle, lod, info); + const IR::U1 skip_mips{ir.Imm1(true)}; + const IR::Value texture_size = ir.ImageQueryDimension(handle, lod, skip_mips, info); inst.SetArg( 1, ir.CompositeConstruct( ir.FPMul(IR::F32(ir.CompositeExtract(coord, 0)), @@ -486,10 +494,10 @@ void PatchTexelFetch(IR::Block& block, IR::Inst& inst, TexturePixelFormat pixel_ const IR::F32 w(ir.CompositeExtract(new_inst, 3)); const IR::F16F32F64 max_value(ir.Imm32(get_max_value())); const IR::Value converted = - ir.CompositeConstruct(ir.FPMul(ir.ConvertSToF(32, 32, ir.BitCast<IR::S32>(x)), max_value), - ir.FPMul(ir.ConvertSToF(32, 32, ir.BitCast<IR::S32>(y)), max_value), - ir.FPMul(ir.ConvertSToF(32, 32, ir.BitCast<IR::S32>(z)), max_value), - ir.FPMul(ir.ConvertSToF(32, 32, ir.BitCast<IR::S32>(w)), max_value)); + ir.CompositeConstruct(ir.FPMul(ir.ConvertSToF(32, 32, ir.BitCast<IR::U32>(x)), max_value), + ir.FPMul(ir.ConvertSToF(32, 32, ir.BitCast<IR::U32>(y)), max_value), + ir.FPMul(ir.ConvertSToF(32, 32, ir.BitCast<IR::U32>(z)), max_value), + ir.FPMul(ir.ConvertSToF(32, 32, ir.BitCast<IR::U32>(w)), max_value)); inst.ReplaceUsesWith(converted); } } // Anonymous namespace @@ -524,6 +532,7 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo const auto& cbuf{texture_inst.cbuf}; auto flags{inst->Flags<IR::TextureInstInfo>()}; + bool is_multisample{false}; switch (inst->GetOpcode()) { case IR::Opcode::ImageQueryDimensions: flags.type.Assign(ReadTextureType(env, cbuf)); @@ -538,6 +547,12 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo } break; case IR::Opcode::ImageFetch: + if (flags.type == TextureType::Color2D || flags.type == TextureType::Color2DRect || + flags.type == TextureType::ColorArray2D) { + is_multisample = !inst->Arg(4).IsEmpty(); + } else { + inst->SetArg(4, IR::U32{}); + } if (flags.type != TextureType::Color1D) { break; } @@ -613,6 +628,7 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo index = descriptors.Add(TextureDescriptor{ .type = flags.type, .is_depth = flags.is_depth != 0, + .is_multisample = is_multisample, .has_secondary = cbuf.has_secondary, .cbuf_index = cbuf.index, .cbuf_offset = cbuf.offset, diff --git a/src/shader_recompiler/object_pool.h b/src/shader_recompiler/object_pool.h index 2b42c4ba2..5d648b159 100644 --- a/src/shader_recompiler/object_pool.h +++ b/src/shader_recompiler/object_pool.h @@ -10,7 +10,7 @@ namespace Shader { template <typename T> -requires std::is_destructible_v<T> + requires std::is_destructible_v<T> class ObjectPool { public: explicit ObjectPool(size_t chunk_size = 8192) : new_chunk_size{chunk_size} { @@ -18,7 +18,7 @@ public: } template <typename... Args> - requires std::is_constructible_v<T, Args...> + requires std::is_constructible_v<T, Args...> [[nodiscard]] T* Create(Args&&... args) { return std::construct_at(Memory(), std::forward<Args>(args)...); } diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index b8841a536..253e0d0bd 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -55,6 +55,8 @@ struct Profile { /// OpFClamp is broken and OpFMax + OpFMin should be used instead bool has_broken_spirv_clamp{}; + /// The Position builtin needs to be wrapped in a struct when used as an input + bool has_broken_spirv_position_input{}; /// Offset image operands with an unsigned type do not work bool has_broken_unsigned_image_offsets{}; /// Signed instructions with unsigned data types are misinterpreted diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index d9c6e92db..d308db942 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -16,6 +16,12 @@ namespace Shader { +enum class ReplaceConstant : u32 { + BaseInstance, + BaseVertex, + DrawID, +}; + enum class TextureType : u32 { Color1D, ColorArray1D, @@ -59,6 +65,8 @@ enum class Interpolation { struct ConstantBufferDescriptor { u32 index; u32 count; + + auto operator<=>(const ConstantBufferDescriptor&) const = default; }; struct StorageBufferDescriptor { @@ -66,6 +74,8 @@ struct StorageBufferDescriptor { u32 cbuf_offset; u32 count; bool is_written; + + auto operator<=>(const StorageBufferDescriptor&) const = default; }; struct TextureBufferDescriptor { @@ -78,6 +88,8 @@ struct TextureBufferDescriptor { u32 secondary_shift_left; u32 count; u32 size_shift; + + auto operator<=>(const TextureBufferDescriptor&) const = default; }; using TextureBufferDescriptors = boost::container::small_vector<TextureBufferDescriptor, 6>; @@ -89,12 +101,15 @@ struct ImageBufferDescriptor { u32 cbuf_offset; u32 count; u32 size_shift; + + auto operator<=>(const ImageBufferDescriptor&) const = default; }; using ImageBufferDescriptors = boost::container::small_vector<ImageBufferDescriptor, 2>; struct TextureDescriptor { TextureType type; bool is_depth; + bool is_multisample; bool has_secondary; u32 cbuf_index; u32 cbuf_offset; @@ -104,6 +119,8 @@ struct TextureDescriptor { u32 secondary_shift_left; u32 count; u32 size_shift; + + auto operator<=>(const TextureDescriptor&) const = default; }; using TextureDescriptors = boost::container::small_vector<TextureDescriptor, 12>; @@ -116,6 +133,8 @@ struct ImageDescriptor { u32 cbuf_offset; u32 count; u32 size_shift; + + auto operator<=>(const ImageDescriptor&) const = default; }; using ImageDescriptors = boost::container::small_vector<ImageDescriptor, 4>; diff --git a/src/shader_recompiler/varying_state.h b/src/shader_recompiler/varying_state.h index 7b28a285f..18a9aaf50 100644 --- a/src/shader_recompiler/varying_state.h +++ b/src/shader_recompiler/varying_state.h @@ -11,7 +11,7 @@ namespace Shader { struct VaryingState { - std::bitset<256> mask{}; + std::bitset<512> mask{}; void Set(IR::Attribute attribute, bool state = true) { mask[static_cast<size_t>(attribute)] = state; |