summaryrefslogtreecommitdiffstats
path: root/src/shader_recompiler
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm.cpp2
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp4
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp18
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_image.cpp13
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_instructions.h3
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl.cpp2
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp4
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp18
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_image.cpp37
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_instructions.h3
-rw-r--r--src/shader_recompiler/backend/glsl/glsl_emit_context.cpp49
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp4
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp20
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_image.cpp10
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_instructions.h4
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp58
-rw-r--r--src/shader_recompiler/backend/spirv/spirv_emit_context.cpp63
-rw-r--r--src/shader_recompiler/backend/spirv/spirv_emit_context.h2
-rw-r--r--src/shader_recompiler/environment.h10
-rw-r--r--src/shader_recompiler/frontend/ir/attribute.cpp6
-rw-r--r--src/shader_recompiler/frontend/ir/attribute.h5
-rw-r--r--src/shader_recompiler/frontend/ir/ir_emitter.cpp22
-rw-r--r--src/shader_recompiler/frontend/ir/ir_emitter.h10
-rw-r--r--src/shader_recompiler/frontend/ir/opcodes.h1
-rw-r--r--src/shader_recompiler/frontend/ir/opcodes.inc7
-rw-r--r--src/shader_recompiler/frontend/ir/type.h31
-rw-r--r--src/shader_recompiler/frontend/ir/value.cpp3
-rw-r--r--src/shader_recompiler/frontend/ir/value.h14
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp8
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate_program.cpp118
-rw-r--r--src/shader_recompiler/host_translate_info.h3
-rw-r--r--src/shader_recompiler/ir_opt/constant_propagation_pass.cpp76
-rw-r--r--src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp13
-rw-r--r--src/shader_recompiler/ir_opt/passes.h4
-rw-r--r--src/shader_recompiler/ir_opt/texture_pass.cpp44
-rw-r--r--src/shader_recompiler/object_pool.h4
-rw-r--r--src/shader_recompiler/profile.h2
-rw-r--r--src/shader_recompiler/shader_info.h19
-rw-r--r--src/shader_recompiler/varying_state.h2
39 files changed, 491 insertions, 225 deletions
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp
index 0cb1e193e..fd4a61a4d 100644
--- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp
+++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp
@@ -279,6 +279,8 @@ void SetupOptions(const IR::Program& program, const Profile& profile,
header += "OPTION NV_internal;"
"OPTION NV_shader_storage_buffer;"
"OPTION NV_gpu_program_fp64;";
+ // TODO: Enable only when MS is used
+ header += "OPTION NV_texture_multisample;";
if (info.uses_int64_bit_atomics) {
header += "OPTION NV_shader_atomic_int64;";
}
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp
index 5bfdecc09..2fc2a0ac6 100644
--- a/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp
@@ -43,10 +43,6 @@ void EmitBitCastU64F64(EmitContext&, IR::Inst& inst, const IR::Value& value) {
Alias(inst, value);
}
-void EmitBitCastS32F32(EmitContext&, IR::Inst& inst, const IR::Value& value) {
- Alias(inst, value);
-}
-
void EmitBitCastF16U16(EmitContext&, IR::Inst& inst, const IR::Value& value) {
Alias(inst, value);
}
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp
index f0bd84ab2..c7d7d5fef 100644
--- a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp
@@ -137,6 +137,15 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, Scal
case IR::Attribute::VertexId:
ctx.Add("MOV.F {}.x,{}.id;", inst, ctx.attrib_name);
break;
+ case IR::Attribute::BaseInstance:
+ ctx.Add("MOV.F {}.x,{}.baseInstance;", inst, ctx.attrib_name);
+ break;
+ case IR::Attribute::BaseVertex:
+ ctx.Add("MOV.F {}.x,{}.baseVertex;", inst, ctx.attrib_name);
+ break;
+ case IR::Attribute::DrawID:
+ ctx.Add("MOV.F {}.x,{}.draw.id;", inst, ctx.attrib_name);
+ break;
case IR::Attribute::FrontFace:
ctx.Add("CMP.F {}.x,{}.facing.x,0,-1;", inst, ctx.attrib_name);
break;
@@ -156,6 +165,15 @@ void EmitGetAttributeU32(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, S
case IR::Attribute::VertexId:
ctx.Add("MOV.S {}.x,{}.id;", inst, ctx.attrib_name);
break;
+ case IR::Attribute::BaseInstance:
+ ctx.Add("MOV.S {}.x,{}.baseInstance;", inst, ctx.attrib_name);
+ break;
+ case IR::Attribute::BaseVertex:
+ ctx.Add("MOV.S {}.x,{}.baseVertex;", inst, ctx.attrib_name);
+ break;
+ case IR::Attribute::DrawID:
+ ctx.Add("MOV.S {}.x,{}.draw.id;", inst, ctx.attrib_name);
+ break;
default:
throw NotImplementedException("Get U32 attribute {}", attr);
}
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp
index e67e80fac..b7bc11416 100644
--- a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp
@@ -59,7 +59,7 @@ std::string Image(EmitContext& ctx, IR::TextureInstInfo info,
}
}
-std::string_view TextureType(IR::TextureInstInfo info) {
+std::string_view TextureType(IR::TextureInstInfo info, bool is_ms = false) {
if (info.is_depth) {
switch (info.type) {
case TextureType::Color1D:
@@ -88,9 +88,9 @@ std::string_view TextureType(IR::TextureInstInfo info) {
return "ARRAY1D";
case TextureType::Color2D:
case TextureType::Color2DRect:
- return "2D";
+ return is_ms ? "2DMS" : "2D";
case TextureType::ColorArray2D:
- return "ARRAY2D";
+ return is_ms ? "ARRAY2DMS" : "ARRAY2D";
case TextureType::Color3D:
return "3D";
case TextureType::ColorCube:
@@ -510,15 +510,16 @@ void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
const IR::Value& coord, const IR::Value& offset, ScalarS32 lod, ScalarS32 ms) {
const auto info{inst.Flags<IR::TextureInstInfo>()};
const auto sparse_inst{PrepareSparse(inst)};
+ const bool is_multisample{ms.type != Type::Void};
const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
- const std::string_view type{TextureType(info)};
+ const std::string_view type{TextureType(info, is_multisample)};
const std::string texture{Texture(ctx, info, index)};
const std::string offset_vec{Offset(ctx, offset)};
const auto [coord_vec, coord_alloc]{Coord(ctx, coord)};
const Register ret{ctx.reg_alloc.Define(inst)};
if (info.type == TextureType::Buffer) {
ctx.Add("TXF.F{} {},{},{},{}{};", sparse_mod, ret, coord_vec, texture, type, offset_vec);
- } else if (ms.type != Type::Void) {
+ } else if (is_multisample) {
ctx.Add("MOV.S {}.w,{};"
"TXFMS.F{} {},{},{},{}{};",
coord_vec, ms, sparse_mod, ret, coord_vec, texture, type, offset_vec);
@@ -531,7 +532,7 @@ void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
}
void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
- ScalarS32 lod) {
+ ScalarS32 lod, [[maybe_unused]] const IR::Value& skip_mips) {
const auto info{inst.Flags<IR::TextureInstInfo>()};
const std::string texture{Texture(ctx, info, index)};
const std::string_view type{TextureType(info)};
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h
index eaaf9ba39..1a1ea61d5 100644
--- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h
@@ -197,7 +197,6 @@ void EmitSelectF64(EmitContext& ctx, ScalarS32 cond, Register true_value, Regist
void EmitBitCastU16F16(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
void EmitBitCastU32F32(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
-void EmitBitCastS32F32(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
void EmitBitCastF16U16(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
@@ -582,7 +581,7 @@ void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& inde
void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
const IR::Value& coord, const IR::Value& offset, ScalarS32 lod, ScalarS32 ms);
void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
- ScalarS32 lod);
+ ScalarS32 lod, const IR::Value& skip_mips);
void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord);
void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
const IR::Value& coord, const IR::Value& derivatives,
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp
index e8a4390f6..d91e04446 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp
@@ -219,7 +219,7 @@ std::string EmitGLSL(const Profile& profile, const RuntimeInfo& runtime_info, IR
EmitContext ctx{program, bindings, profile, runtime_info};
Precolor(program);
EmitCode(ctx, program);
- const std::string version{fmt::format("#version 450{}\n", GlslVersionSpecifier(ctx))};
+ const std::string version{fmt::format("#version 460{}\n", GlslVersionSpecifier(ctx))};
ctx.header.insert(0, version);
if (program.shared_memory_size > 0) {
const auto requested_size{program.shared_memory_size};
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp
index 8e5e6cf1f..1be4a0f59 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp
@@ -48,10 +48,6 @@ void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value)
ctx.AddU64("{}=doubleBitsToUint64({});", inst, value);
}
-void EmitBitCastS32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
- ctx.AddF32("{}=ftoi({});", inst, value);
-}
-
void EmitBitCastF16U16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst) {
NotImplemented();
}
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
index 39579cf5d..2e369ed72 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
@@ -234,6 +234,15 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr,
case IR::Attribute::FrontFace:
ctx.AddF32("{}=itof(gl_FrontFacing?-1:0);", inst);
break;
+ case IR::Attribute::BaseInstance:
+ ctx.AddF32("{}=itof(gl_BaseInstance);", inst);
+ break;
+ case IR::Attribute::BaseVertex:
+ ctx.AddF32("{}=itof(gl_BaseVertex);", inst);
+ break;
+ case IR::Attribute::DrawID:
+ ctx.AddF32("{}=itof(gl_DrawID);", inst);
+ break;
default:
throw NotImplementedException("Get attribute {}", attr);
}
@@ -250,6 +259,15 @@ void EmitGetAttributeU32(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, s
case IR::Attribute::VertexId:
ctx.AddU32("{}=uint(gl_VertexID);", inst);
break;
+ case IR::Attribute::BaseInstance:
+ ctx.AddU32("{}=uint(gl_BaseInstance);", inst);
+ break;
+ case IR::Attribute::BaseVertex:
+ ctx.AddU32("{}=uint(gl_BaseVertex);", inst);
+ break;
+ case IR::Attribute::DrawID:
+ ctx.AddU32("{}=uint(gl_DrawID);", inst);
+ break;
default:
throw NotImplementedException("Get U32 attribute {}", attr);
}
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp
index cecdbb9d6..4be2c25ec 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp
@@ -414,7 +414,7 @@ void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& inde
void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
std::string_view coords, std::string_view offset, std::string_view lod,
- [[maybe_unused]] std::string_view ms) {
+ std::string_view ms) {
const auto info{inst.Flags<IR::TextureInstInfo>()};
if (info.has_bias) {
throw NotImplementedException("EmitImageFetch Bias texture samples");
@@ -431,19 +431,24 @@ void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
ctx.AddU1("{}=true;", *sparse_inst);
}
if (!sparse_inst || !supports_sparse) {
- if (!offset.empty()) {
- ctx.Add("{}=texelFetchOffset({},{},int({}),{});", texel, texture,
- CoordsCastToInt(coords, info), lod, CoordsCastToInt(offset, info));
+ const auto int_coords{CoordsCastToInt(coords, info)};
+ if (!ms.empty()) {
+ ctx.Add("{}=texelFetch({},{},int({}));", texel, texture, int_coords, ms);
+ } else if (!offset.empty()) {
+ ctx.Add("{}=texelFetchOffset({},{},int({}),{});", texel, texture, int_coords, lod,
+ CoordsCastToInt(offset, info));
} else {
if (info.type == TextureType::Buffer) {
ctx.Add("{}=texelFetch({},int({}));", texel, texture, coords);
} else {
- ctx.Add("{}=texelFetch({},{},int({}));", texel, texture,
- CoordsCastToInt(coords, info), lod);
+ ctx.Add("{}=texelFetch({},{},int({}));", texel, texture, int_coords, lod);
}
}
return;
}
+ if (!ms.empty()) {
+ throw NotImplementedException("EmitImageFetch Sparse MSAA samples");
+ }
if (!offset.empty()) {
ctx.AddU1("{}=sparseTexelsResidentARB(sparseTexelFetchOffsetARB({},{},int({}),{},{}));",
*sparse_inst, texture, CastToIntVec(coords, info), lod,
@@ -455,27 +460,27 @@ void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
}
void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
- std::string_view lod) {
+ std::string_view lod, const IR::Value& skip_mips_val) {
const auto info{inst.Flags<IR::TextureInstInfo>()};
const auto texture{Texture(ctx, info, index)};
+ const bool skip_mips{skip_mips_val.U1()};
+ const auto mips{
+ [&] { return skip_mips ? "0u" : fmt::format("uint(textureQueryLevels({}))", texture); }};
switch (info.type) {
case TextureType::Color1D:
- return ctx.AddU32x4(
- "{}=uvec4(uint(textureSize({},int({}))),0u,0u,uint(textureQueryLevels({})));", inst,
- texture, lod, texture);
+ return ctx.AddU32x4("{}=uvec4(uint(textureSize({},int({}))),0u,0u,{});", inst, texture, lod,
+ mips());
case TextureType::ColorArray1D:
case TextureType::Color2D:
case TextureType::ColorCube:
case TextureType::Color2DRect:
- return ctx.AddU32x4(
- "{}=uvec4(uvec2(textureSize({},int({}))),0u,uint(textureQueryLevels({})));", inst,
- texture, lod, texture);
+ return ctx.AddU32x4("{}=uvec4(uvec2(textureSize({},int({}))),0u,{});", inst, texture, lod,
+ mips());
case TextureType::ColorArray2D:
case TextureType::Color3D:
case TextureType::ColorArrayCube:
- return ctx.AddU32x4(
- "{}=uvec4(uvec3(textureSize({},int({}))),uint(textureQueryLevels({})));", inst, texture,
- lod, texture);
+ return ctx.AddU32x4("{}=uvec4(uvec3(textureSize({},int({}))),{});", inst, texture, lod,
+ mips());
case TextureType::Buffer:
throw NotImplementedException("EmitImageQueryDimensions Texture buffers");
}
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h
index 4151c89de..8d0a65047 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h
@@ -231,7 +231,6 @@ void EmitSelectF64(EmitContext& ctx, IR::Inst& inst, std::string_view cond,
void EmitBitCastU16F16(EmitContext& ctx, IR::Inst& inst);
void EmitBitCastU32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
-void EmitBitCastS32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
void EmitBitCastF16U16(EmitContext& ctx, IR::Inst& inst);
void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
@@ -655,7 +654,7 @@ void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
std::string_view coords, std::string_view offset, std::string_view lod,
std::string_view ms);
void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
- std::string_view lod);
+ std::string_view lod, const IR::Value& skip_mips);
void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
std::string_view coords);
void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
diff --git a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp
index 5d01ec0cd..1b006e811 100644
--- a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp
+++ b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp
@@ -61,24 +61,28 @@ std::string OutputDecorator(Stage stage, u32 size) {
}
}
-std::string_view SamplerType(TextureType type, bool is_depth) {
- if (is_depth) {
- switch (type) {
- case TextureType::Color1D:
- return "sampler1DShadow";
- case TextureType::ColorArray1D:
- return "sampler1DArrayShadow";
- case TextureType::Color2D:
- return "sampler2DShadow";
- case TextureType::ColorArray2D:
- return "sampler2DArrayShadow";
- case TextureType::ColorCube:
- return "samplerCubeShadow";
- case TextureType::ColorArrayCube:
- return "samplerCubeArrayShadow";
- default:
- throw NotImplementedException("Texture type: {}", type);
- }
+std::string_view DepthSamplerType(TextureType type) {
+ switch (type) {
+ case TextureType::Color1D:
+ return "sampler1DShadow";
+ case TextureType::ColorArray1D:
+ return "sampler1DArrayShadow";
+ case TextureType::Color2D:
+ return "sampler2DShadow";
+ case TextureType::ColorArray2D:
+ return "sampler2DArrayShadow";
+ case TextureType::ColorCube:
+ return "samplerCubeShadow";
+ case TextureType::ColorArrayCube:
+ return "samplerCubeArrayShadow";
+ default:
+ throw NotImplementedException("Texture type: {}", type);
+ }
+}
+
+std::string_view ColorSamplerType(TextureType type, bool is_multisample = false) {
+ if (is_multisample) {
+ ASSERT(type == TextureType::Color2D || type == TextureType::ColorArray2D);
}
switch (type) {
case TextureType::Color1D:
@@ -87,9 +91,9 @@ std::string_view SamplerType(TextureType type, bool is_depth) {
return "sampler1DArray";
case TextureType::Color2D:
case TextureType::Color2DRect:
- return "sampler2D";
+ return is_multisample ? "sampler2DMS" : "sampler2D";
case TextureType::ColorArray2D:
- return "sampler2DArray";
+ return is_multisample ? "sampler2DMSArray" : "sampler2DArray";
case TextureType::Color3D:
return "sampler3D";
case TextureType::ColorCube:
@@ -677,7 +681,7 @@ void EmitContext::SetupTextures(Bindings& bindings) {
texture_buffers.reserve(info.texture_buffer_descriptors.size());
for (const auto& desc : info.texture_buffer_descriptors) {
texture_buffers.push_back({bindings.texture, desc.count});
- const auto sampler_type{SamplerType(TextureType::Buffer, false)};
+ const auto sampler_type{ColorSamplerType(TextureType::Buffer)};
const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""};
header += fmt::format("layout(binding={}) uniform {} tex{}{};", bindings.texture,
sampler_type, bindings.texture, array_decorator);
@@ -686,7 +690,8 @@ void EmitContext::SetupTextures(Bindings& bindings) {
textures.reserve(info.texture_descriptors.size());
for (const auto& desc : info.texture_descriptors) {
textures.push_back({bindings.texture, desc.count});
- const auto sampler_type{SamplerType(desc.type, desc.is_depth)};
+ const auto sampler_type{desc.is_depth ? DepthSamplerType(desc.type)
+ : ColorSamplerType(desc.type, desc.is_multisample)};
const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""};
header += fmt::format("layout(binding={}) uniform {} tex{}{};", bindings.texture,
sampler_type, bindings.texture, array_decorator);
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp
index 50daacd95..c4ca28d11 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp
@@ -18,10 +18,6 @@ void EmitBitCastU64F64(EmitContext&) {
throw NotImplementedException("SPIR-V Instruction");
}
-void EmitBitCastS32F32(EmitContext&) {
- throw NotImplementedException("SPIR-V Instruction");
-}
-
void EmitBitCastF16U16(EmitContext&) {
throw NotImplementedException("SPIR-V Instruction");
}
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
index 73b67f0af..0cd87a48f 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
@@ -321,8 +321,12 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {
case IR::Attribute::PositionY:
case IR::Attribute::PositionZ:
case IR::Attribute::PositionW:
- return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position,
- ctx.Const(element)));
+ return ctx.OpLoad(
+ ctx.F32[1],
+ ctx.need_input_position_indirect
+ ? AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, ctx.u32_zero_value,
+ ctx.Const(element))
+ : AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, ctx.Const(element)));
case IR::Attribute::InstanceId:
if (ctx.profile.support_vertex_instance_id) {
return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id));
@@ -339,6 +343,12 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {
const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_vertex)};
return ctx.OpBitcast(ctx.F32[1], ctx.OpISub(ctx.U32[1], index, base));
}
+ case IR::Attribute::BaseInstance:
+ return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.base_instance));
+ case IR::Attribute::BaseVertex:
+ return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.base_vertex));
+ case IR::Attribute::DrawID:
+ return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.draw_index));
case IR::Attribute::FrontFace:
return ctx.OpSelect(ctx.F32[1], ctx.OpLoad(ctx.U1, ctx.front_face),
ctx.OpBitcast(ctx.F32[1], ctx.Const(std::numeric_limits<u32>::max())),
@@ -380,6 +390,12 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, Id) {
const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_vertex)};
return ctx.OpISub(ctx.U32[1], index, base);
}
+ case IR::Attribute::BaseInstance:
+ return ctx.OpLoad(ctx.U32[1], ctx.base_instance);
+ case IR::Attribute::BaseVertex:
+ return ctx.OpLoad(ctx.U32[1], ctx.base_vertex);
+ case IR::Attribute::DrawID:
+ return ctx.OpLoad(ctx.U32[1], ctx.draw_index);
default:
throw NotImplementedException("Read U32 attribute {}", attr);
}
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
index fb5799c42..3b969d915 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
@@ -436,16 +436,22 @@ Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id c
if (info.type == TextureType::Buffer) {
lod = Id{};
}
+ if (Sirit::ValidId(ms)) {
+ // This image is multisampled, lod must be implicit
+ lod = Id{};
+ }
const ImageOperands operands(offset, lod, ms);
return Emit(&EmitContext::OpImageSparseFetch, &EmitContext::OpImageFetch, ctx, inst, ctx.F32[4],
TextureImage(ctx, info, index), coords, operands.MaskOptional(), operands.Span());
}
-Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod) {
+Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod,
+ const IR::Value& skip_mips_val) {
const auto info{inst->Flags<IR::TextureInstInfo>()};
const Id image{TextureImage(ctx, info, index)};
const Id zero{ctx.u32_zero_value};
- const auto mips{[&] { return ctx.OpImageQueryLevels(ctx.U32[1], image); }};
+ const bool skip_mips{skip_mips_val.U1()};
+ const auto mips{[&] { return skip_mips ? zero : ctx.OpImageQueryLevels(ctx.U32[1], image); }};
switch (info.type) {
case TextureType::Color1D:
return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySizeLod(ctx.U32[1], image, lod),
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
index e31cdc5e8..a440b557d 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
@@ -179,7 +179,6 @@ Id EmitSelectF64(EmitContext& ctx, Id cond, Id true_value, Id false_value);
void EmitBitCastU16F16(EmitContext& ctx);
Id EmitBitCastU32F32(EmitContext& ctx, Id value);
void EmitBitCastU64F64(EmitContext& ctx);
-void EmitBitCastS32F32(EmitContext& ctx);
void EmitBitCastF16U16(EmitContext&);
Id EmitBitCastF32U32(EmitContext& ctx, Id value);
void EmitBitCastF64U64(EmitContext& ctx);
@@ -540,7 +539,8 @@ Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
const IR::Value& offset, const IR::Value& offset2, Id dref);
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset,
Id lod, Id ms);
-Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod);
+Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod,
+ const IR::Value& skip_mips);
Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords);
Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
Id derivates, Id offset, Id lod_clamp);
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
index 2c90f2368..c5db19d09 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
@@ -58,11 +58,10 @@ Id SelectValue(EmitContext& ctx, Id in_range, Id value, Id src_thread_id) {
ctx.OpGroupNonUniformShuffle(ctx.U32[1], SubgroupScope(ctx), value, src_thread_id), value);
}
-Id GetUpperClamp(EmitContext& ctx, Id invocation_id, Id clamp) {
- const Id thirty_two{ctx.Const(32u)};
- const Id is_upper_partition{ctx.OpSGreaterThanEqual(ctx.U1, invocation_id, thirty_two)};
- const Id upper_clamp{ctx.OpIAdd(ctx.U32[1], thirty_two, clamp)};
- return ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_clamp, clamp);
+Id AddPartitionBase(EmitContext& ctx, Id thread_id) {
+ const Id partition_idx{ctx.OpShiftRightLogical(ctx.U32[1], GetThreadId(ctx), ctx.Const(5u))};
+ const Id partition_base{ctx.OpShiftLeftLogical(ctx.U32[1], partition_idx, ctx.Const(5u))};
+ return ctx.OpIAdd(ctx.U32[1], thread_id, partition_base);
}
} // Anonymous namespace
@@ -145,64 +144,63 @@ Id EmitSubgroupGeMask(EmitContext& ctx) {
Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
Id segmentation_mask) {
const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)};
- const Id thread_id{GetThreadId(ctx)};
- if (ctx.profile.warp_size_potentially_larger_than_guest) {
- const Id thirty_two{ctx.Const(32u)};
- const Id is_upper_partition{ctx.OpSGreaterThanEqual(ctx.U1, thread_id, thirty_two)};
- const Id upper_index{ctx.OpIAdd(ctx.U32[1], thirty_two, index)};
- const Id upper_clamp{ctx.OpIAdd(ctx.U32[1], thirty_two, clamp)};
- index = ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_index, index);
- clamp = ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_clamp, clamp);
- }
+ const Id thread_id{EmitLaneId(ctx)};
const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)};
const Id max_thread_id{ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask)};
const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], index, not_seg_mask)};
- const Id src_thread_id{ctx.OpBitwiseOr(ctx.U32[1], lhs, min_thread_id)};
+ Id src_thread_id{ctx.OpBitwiseOr(ctx.U32[1], lhs, min_thread_id)};
const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
+ if (ctx.profile.warp_size_potentially_larger_than_guest) {
+ src_thread_id = AddPartitionBase(ctx, src_thread_id);
+ }
+
SetInBoundsFlag(inst, in_range);
return SelectValue(ctx, in_range, value, src_thread_id);
}
Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
Id segmentation_mask) {
- const Id thread_id{GetThreadId(ctx)};
- if (ctx.profile.warp_size_potentially_larger_than_guest) {
- clamp = GetUpperClamp(ctx, thread_id, clamp);
- }
+ const Id thread_id{EmitLaneId(ctx)};
const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
- const Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)};
+ Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)};
const Id in_range{ctx.OpSGreaterThanEqual(ctx.U1, src_thread_id, max_thread_id)};
+ if (ctx.profile.warp_size_potentially_larger_than_guest) {
+ src_thread_id = AddPartitionBase(ctx, src_thread_id);
+ }
+
SetInBoundsFlag(inst, in_range);
return SelectValue(ctx, in_range, value, src_thread_id);
}
Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
Id segmentation_mask) {
- const Id thread_id{GetThreadId(ctx)};
- if (ctx.profile.warp_size_potentially_larger_than_guest) {
- clamp = GetUpperClamp(ctx, thread_id, clamp);
- }
+ const Id thread_id{EmitLaneId(ctx)};
const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
- const Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)};
+ Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)};
const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
+ if (ctx.profile.warp_size_potentially_larger_than_guest) {
+ src_thread_id = AddPartitionBase(ctx, src_thread_id);
+ }
+
SetInBoundsFlag(inst, in_range);
return SelectValue(ctx, in_range, value, src_thread_id);
}
Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
Id segmentation_mask) {
- const Id thread_id{GetThreadId(ctx)};
- if (ctx.profile.warp_size_potentially_larger_than_guest) {
- clamp = GetUpperClamp(ctx, thread_id, clamp);
- }
+ const Id thread_id{EmitLaneId(ctx)};
const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
- const Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)};
+ Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)};
const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
+ if (ctx.profile.warp_size_potentially_larger_than_guest) {
+ src_thread_id = AddPartitionBase(ctx, src_thread_id);
+ }
+
SetInBoundsFlag(inst, in_range);
return SelectValue(ctx, in_range, value, src_thread_id);
}
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
index 41dc6d031..3b97721e1 100644
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
@@ -35,6 +35,7 @@ Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) {
const spv::ImageFormat format{spv::ImageFormat::Unknown};
const Id type{ctx.F32[1]};
const bool depth{desc.is_depth};
+ const bool ms{desc.is_multisample};
switch (desc.type) {
case TextureType::Color1D:
return ctx.TypeImage(type, spv::Dim::Dim1D, depth, false, false, 1, format);
@@ -42,9 +43,9 @@ Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) {
return ctx.TypeImage(type, spv::Dim::Dim1D, depth, true, false, 1, format);
case TextureType::Color2D:
case TextureType::Color2DRect:
- return ctx.TypeImage(type, spv::Dim::Dim2D, depth, false, false, 1, format);
+ return ctx.TypeImage(type, spv::Dim::Dim2D, depth, false, ms, 1, format);
case TextureType::ColorArray2D:
- return ctx.TypeImage(type, spv::Dim::Dim2D, depth, true, false, 1, format);
+ return ctx.TypeImage(type, spv::Dim::Dim2D, depth, true, ms, 1, format);
case TextureType::Color3D:
return ctx.TypeImage(type, spv::Dim::Dim3D, depth, false, false, 1, format);
case TextureType::ColorCube:
@@ -544,7 +545,7 @@ void EmitContext::DefineCommonTypes(const Info& info) {
U16 = Name(TypeInt(16, false), "u16");
S16 = Name(TypeInt(16, true), "s16");
}
- if (info.uses_int64) {
+ if (info.uses_int64 && profile.support_int64) {
AddCapability(spv::Capability::Int64);
U64 = Name(TypeInt(64, false), "u64");
}
@@ -721,9 +722,21 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) {
size_t label_index{0};
if (info.loads.AnyComponent(IR::Attribute::PositionX)) {
AddLabel(labels[label_index]);
- const Id pointer{is_array
- ? OpAccessChain(input_f32, input_position, vertex, masked_index)
- : OpAccessChain(input_f32, input_position, masked_index)};
+ const Id pointer{[&]() {
+ if (need_input_position_indirect) {
+ if (is_array)
+ return OpAccessChain(input_f32, input_position, vertex, u32_zero_value,
+ masked_index);
+ else
+ return OpAccessChain(input_f32, input_position, u32_zero_value,
+ masked_index);
+ } else {
+ if (is_array)
+ return OpAccessChain(input_f32, input_position, vertex, masked_index);
+ else
+ return OpAccessChain(input_f32, input_position, masked_index);
+ }
+ }()};
const Id result{OpLoad(F32[1], pointer)};
OpReturnValue(result);
++label_index;
@@ -1367,30 +1380,56 @@ void EmitContext::DefineInputs(const IR::Program& program) {
Decorate(layer, spv::Decoration::Flat);
}
if (loads.AnyComponent(IR::Attribute::PositionX)) {
- const bool is_fragment{stage != Stage::Fragment};
- const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::Position : spv::BuiltIn::FragCoord};
- input_position = DefineInput(*this, F32[4], true, built_in);
- if (profile.support_geometry_shader_passthrough) {
- if (info.passthrough.AnyComponent(IR::Attribute::PositionX)) {
- Decorate(input_position, spv::Decoration::PassthroughNV);
+ const bool is_fragment{stage == Stage::Fragment};
+ if (!is_fragment && profile.has_broken_spirv_position_input) {
+ need_input_position_indirect = true;
+
+ const Id input_position_struct = TypeStruct(F32[4]);
+ input_position = DefineInput(*this, input_position_struct, true);
+
+ MemberDecorate(input_position_struct, 0, spv::Decoration::BuiltIn,
+ static_cast<unsigned>(spv::BuiltIn::Position));
+ Decorate(input_position_struct, spv::Decoration::Block);
+ } else {
+ const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::FragCoord
+ : spv::BuiltIn::Position};
+ input_position = DefineInput(*this, F32[4], true, built_in);
+
+ if (profile.support_geometry_shader_passthrough) {
+ if (info.passthrough.AnyComponent(IR::Attribute::PositionX)) {
+ Decorate(input_position, spv::Decoration::PassthroughNV);
+ }
}
}
}
if (loads[IR::Attribute::InstanceId]) {
if (profile.support_vertex_instance_id) {
instance_id = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceId);
+ if (loads[IR::Attribute::BaseInstance]) {
+ base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex);
+ }
} else {
instance_index = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceIndex);
base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseInstance);
}
+ } else if (loads[IR::Attribute::BaseInstance]) {
+ base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseInstance);
}
if (loads[IR::Attribute::VertexId]) {
if (profile.support_vertex_instance_id) {
vertex_id = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexId);
+ if (loads[IR::Attribute::BaseVertex]) {
+ base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex);
+ }
} else {
vertex_index = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexIndex);
base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex);
}
+ } else if (loads[IR::Attribute::BaseVertex]) {
+ base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex);
+ }
+ if (loads[IR::Attribute::DrawID]) {
+ draw_index = DefineInput(*this, U32[1], true, spv::BuiltIn::DrawIndex);
}
if (loads[IR::Attribute::FrontFace]) {
front_face = DefineInput(*this, U1, true, spv::BuiltIn::FrontFacing);
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h
index dde45b4bc..dbc5c55b9 100644
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h
@@ -218,6 +218,7 @@ public:
Id base_instance{};
Id vertex_id{};
Id vertex_index{};
+ Id draw_index{};
Id base_vertex{};
Id front_face{};
Id point_coord{};
@@ -279,6 +280,7 @@ public:
Id write_global_func_u32x2{};
Id write_global_func_u32x4{};
+ bool need_input_position_indirect{};
Id input_position{};
std::array<Id, 32> input_generics{};
diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h
index 402f2664f..26e8307c1 100644
--- a/src/shader_recompiler/environment.h
+++ b/src/shader_recompiler/environment.h
@@ -34,6 +34,11 @@ public:
[[nodiscard]] virtual std::array<u32, 3> WorkgroupSize() const = 0;
+ [[nodiscard]] virtual bool HasHLEMacroState() const = 0;
+
+ [[nodiscard]] virtual std::optional<ReplaceConstant> GetReplaceConstBuffer(u32 bank,
+ u32 offset) = 0;
+
virtual void Dump(u64 hash) = 0;
[[nodiscard]] const ProgramHeader& SPH() const noexcept {
@@ -52,11 +57,16 @@ public:
return start_address;
}
+ [[nodiscard]] bool IsPropietaryDriver() const noexcept {
+ return is_propietary_driver;
+ }
+
protected:
ProgramHeader sph{};
std::array<u32, 8> gp_passthrough_mask{};
Stage stage{};
u32 start_address{};
+ bool is_propietary_driver{};
};
} // namespace Shader
diff --git a/src/shader_recompiler/frontend/ir/attribute.cpp b/src/shader_recompiler/frontend/ir/attribute.cpp
index 7d3d882e4..1bf9db935 100644
--- a/src/shader_recompiler/frontend/ir/attribute.cpp
+++ b/src/shader_recompiler/frontend/ir/attribute.cpp
@@ -446,6 +446,12 @@ std::string NameOf(Attribute attribute) {
return "ViewportMask";
case Attribute::FrontFace:
return "FrontFace";
+ case Attribute::BaseInstance:
+ return "BaseInstance";
+ case Attribute::BaseVertex:
+ return "BaseVertex";
+ case Attribute::DrawID:
+ return "DrawID";
}
return fmt::format("<reserved attribute {}>", static_cast<int>(attribute));
}
diff --git a/src/shader_recompiler/frontend/ir/attribute.h b/src/shader_recompiler/frontend/ir/attribute.h
index 6ee3947b1..5f039b6f6 100644
--- a/src/shader_recompiler/frontend/ir/attribute.h
+++ b/src/shader_recompiler/frontend/ir/attribute.h
@@ -219,6 +219,11 @@ enum class Attribute : u64 {
FixedFncTexture9Q = 231,
ViewportMask = 232,
FrontFace = 255,
+
+ // Implementation attributes
+ BaseInstance = 256,
+ BaseVertex = 257,
+ DrawID = 258,
};
constexpr size_t NUM_GENERICS = 32;
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
index 0cdac0eff..b7caa4246 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@@ -294,6 +294,14 @@ F32 IREmitter::GetAttribute(IR::Attribute attribute, const U32& vertex) {
return Inst<F32>(Opcode::GetAttribute, attribute, vertex);
}
+U32 IREmitter::GetAttributeU32(IR::Attribute attribute) {
+ return GetAttributeU32(attribute, Imm32(0));
+}
+
+U32 IREmitter::GetAttributeU32(IR::Attribute attribute, const U32& vertex) {
+ return Inst<U32>(Opcode::GetAttributeU32, attribute, vertex);
+}
+
void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex) {
Inst(Opcode::SetAttribute, attribute, value, vertex);
}
@@ -696,11 +704,6 @@ IR::U32 IREmitter::BitCast<IR::U32, IR::F32>(const IR::F32& value) {
}
template <>
-IR::S32 IREmitter::BitCast<IR::S32, IR::F32>(const IR::F32& value) {
- return Inst<IR::S32>(Opcode::BitCastS32F32, value);
-}
-
-template <>
IR::F32 IREmitter::BitCast<IR::F32, IR::U32>(const IR::U32& value) {
return Inst<IR::F32>(Opcode::BitCastF32U32, value);
}
@@ -1843,15 +1846,16 @@ Value IREmitter::ImageFetch(const Value& handle, const Value& coords, const Valu
return Inst(op, Flags{info}, handle, coords, offset, lod, multisampling);
}
-Value IREmitter::ImageQueryDimension(const Value& handle, const IR::U32& lod) {
+Value IREmitter::ImageQueryDimension(const Value& handle, const IR::U32& lod,
+ const IR::U1& skip_mips) {
const Opcode op{handle.IsImmediate() ? Opcode::BoundImageQueryDimensions
: Opcode::BindlessImageQueryDimensions};
- return Inst(op, handle, lod);
+ return Inst(op, handle, lod, skip_mips);
}
Value IREmitter::ImageQueryDimension(const Value& handle, const IR::U32& lod,
- TextureInstInfo info) {
- return Inst(Opcode::ImageQueryDimensions, Flags{info}, handle, lod);
+ const IR::U1& skip_mips, TextureInstInfo info) {
+ return Inst(Opcode::ImageQueryDimensions, Flags{info}, handle, lod, skip_mips);
}
Value IREmitter::ImageQueryLod(const Value& handle, const Value& coords, TextureInstInfo info) {
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h
index 2df992feb..f3c81dbe1 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.h
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.h
@@ -74,6 +74,8 @@ public:
[[nodiscard]] F32 GetAttribute(IR::Attribute attribute);
[[nodiscard]] F32 GetAttribute(IR::Attribute attribute, const U32& vertex);
+ [[nodiscard]] U32 GetAttributeU32(IR::Attribute attribute);
+ [[nodiscard]] U32 GetAttributeU32(IR::Attribute attribute, const U32& vertex);
void SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex);
[[nodiscard]] F32 GetAttributeIndexed(const U32& phys_address);
@@ -318,9 +320,10 @@ public:
[[nodiscard]] F32 ImageSampleDrefExplicitLod(const Value& handle, const Value& coords,
const F32& dref, const F32& lod,
const Value& offset, TextureInstInfo info);
- [[nodiscard]] Value ImageQueryDimension(const Value& handle, const IR::U32& lod);
[[nodiscard]] Value ImageQueryDimension(const Value& handle, const IR::U32& lod,
- TextureInstInfo info);
+ const IR::U1& skip_mips);
+ [[nodiscard]] Value ImageQueryDimension(const Value& handle, const IR::U32& lod,
+ const IR::U1& skip_mips, TextureInstInfo info);
[[nodiscard]] Value ImageQueryLod(const Value& handle, const Value& coords,
TextureInstInfo info);
@@ -406,7 +409,8 @@ private:
}
template <typename T>
- requires(sizeof(T) <= sizeof(u32) && std::is_trivially_copyable_v<T>) struct Flags {
+ requires(sizeof(T) <= sizeof(u32) && std::is_trivially_copyable_v<T>)
+ struct Flags {
Flags() = default;
Flags(T proxy_) : proxy{proxy_} {}
diff --git a/src/shader_recompiler/frontend/ir/opcodes.h b/src/shader_recompiler/frontend/ir/opcodes.h
index d155afd0f..e300714f3 100644
--- a/src/shader_recompiler/frontend/ir/opcodes.h
+++ b/src/shader_recompiler/frontend/ir/opcodes.h
@@ -38,7 +38,6 @@ constexpr Type U8{Type::U8};
constexpr Type U16{Type::U16};
constexpr Type U32{Type::U32};
constexpr Type U64{Type::U64};
-constexpr Type S32{Type::S32};
constexpr Type F16{Type::F16};
constexpr Type F32{Type::F32};
constexpr Type F64{Type::F64};
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc
index 1fe3749cc..4447d67b0 100644
--- a/src/shader_recompiler/frontend/ir/opcodes.inc
+++ b/src/shader_recompiler/frontend/ir/opcodes.inc
@@ -175,7 +175,6 @@ OPCODE(SelectF64, F64, U1,
OPCODE(BitCastU16F16, U16, F16, )
OPCODE(BitCastU32F32, U32, F32, )
OPCODE(BitCastU64F64, U64, F64, )
-OPCODE(BitCastS32F32, S32, F32, )
OPCODE(BitCastF16U16, F16, U16, )
OPCODE(BitCastF32U32, F32, U32, )
OPCODE(BitCastF64U64, F64, U64, )
@@ -483,7 +482,7 @@ OPCODE(BindlessImageSampleDrefExplicitLod, F32, U32,
OPCODE(BindlessImageGather, F32x4, U32, Opaque, Opaque, Opaque, )
OPCODE(BindlessImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, )
OPCODE(BindlessImageFetch, F32x4, U32, Opaque, Opaque, U32, Opaque, )
-OPCODE(BindlessImageQueryDimensions, U32x4, U32, U32, )
+OPCODE(BindlessImageQueryDimensions, U32x4, U32, U32, U1, )
OPCODE(BindlessImageQueryLod, F32x4, U32, Opaque, )
OPCODE(BindlessImageGradient, F32x4, U32, Opaque, Opaque, Opaque, Opaque, )
OPCODE(BindlessImageRead, U32x4, U32, Opaque, )
@@ -496,7 +495,7 @@ OPCODE(BoundImageSampleDrefExplicitLod, F32, U32,
OPCODE(BoundImageGather, F32x4, U32, Opaque, Opaque, Opaque, )
OPCODE(BoundImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, )
OPCODE(BoundImageFetch, F32x4, U32, Opaque, Opaque, U32, Opaque, )
-OPCODE(BoundImageQueryDimensions, U32x4, U32, U32, )
+OPCODE(BoundImageQueryDimensions, U32x4, U32, U32, U1, )
OPCODE(BoundImageQueryLod, F32x4, U32, Opaque, )
OPCODE(BoundImageGradient, F32x4, U32, Opaque, Opaque, Opaque, Opaque, )
OPCODE(BoundImageRead, U32x4, U32, Opaque, )
@@ -509,7 +508,7 @@ OPCODE(ImageSampleDrefExplicitLod, F32, Opaq
OPCODE(ImageGather, F32x4, Opaque, Opaque, Opaque, Opaque, )
OPCODE(ImageGatherDref, F32x4, Opaque, Opaque, Opaque, Opaque, F32, )
OPCODE(ImageFetch, F32x4, Opaque, Opaque, Opaque, U32, Opaque, )
-OPCODE(ImageQueryDimensions, U32x4, Opaque, U32, )
+OPCODE(ImageQueryDimensions, U32x4, Opaque, U32, U1, )
OPCODE(ImageQueryLod, F32x4, Opaque, Opaque, )
OPCODE(ImageGradient, F32x4, Opaque, Opaque, Opaque, Opaque, Opaque, )
OPCODE(ImageRead, U32x4, Opaque, Opaque, )
diff --git a/src/shader_recompiler/frontend/ir/type.h b/src/shader_recompiler/frontend/ir/type.h
index 5a7c706ad..04c8c4ddb 100644
--- a/src/shader_recompiler/frontend/ir/type.h
+++ b/src/shader_recompiler/frontend/ir/type.h
@@ -24,22 +24,21 @@ enum class Type {
U16 = 1 << 7,
U32 = 1 << 8,
U64 = 1 << 9,
- S32 = 1 << 10,
- F16 = 1 << 11,
- F32 = 1 << 12,
- F64 = 1 << 13,
- U32x2 = 1 << 14,
- U32x3 = 1 << 15,
- U32x4 = 1 << 16,
- F16x2 = 1 << 17,
- F16x3 = 1 << 18,
- F16x4 = 1 << 19,
- F32x2 = 1 << 20,
- F32x3 = 1 << 21,
- F32x4 = 1 << 22,
- F64x2 = 1 << 23,
- F64x3 = 1 << 24,
- F64x4 = 1 << 25,
+ F16 = 1 << 10,
+ F32 = 1 << 11,
+ F64 = 1 << 12,
+ U32x2 = 1 << 13,
+ U32x3 = 1 << 14,
+ U32x4 = 1 << 15,
+ F16x2 = 1 << 16,
+ F16x3 = 1 << 17,
+ F16x4 = 1 << 18,
+ F32x2 = 1 << 19,
+ F32x3 = 1 << 20,
+ F32x4 = 1 << 21,
+ F64x2 = 1 << 22,
+ F64x3 = 1 << 23,
+ F64x4 = 1 << 24,
};
DECLARE_ENUM_FLAG_OPERATORS(Type)
diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp
index 30ba12316..346169328 100644
--- a/src/shader_recompiler/frontend/ir/value.cpp
+++ b/src/shader_recompiler/frontend/ir/value.cpp
@@ -23,8 +23,6 @@ Value::Value(u16 value) noexcept : type{Type::U16}, imm_u16{value} {}
Value::Value(u32 value) noexcept : type{Type::U32}, imm_u32{value} {}
-Value::Value(s32 value) noexcept : type{Type::S32}, imm_s32{value} {}
-
Value::Value(f32 value) noexcept : type{Type::F32}, imm_f32{value} {}
Value::Value(u64 value) noexcept : type{Type::U64}, imm_u64{value} {}
@@ -71,7 +69,6 @@ bool Value::operator==(const Value& other) const {
return imm_u16 == other.imm_u16;
case Type::U32:
case Type::F32:
- case Type::S32:
return imm_u32 == other.imm_u32;
case Type::U64:
case Type::F64:
diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h
index 8b34356fd..22e89dd1b 100644
--- a/src/shader_recompiler/frontend/ir/value.h
+++ b/src/shader_recompiler/frontend/ir/value.h
@@ -101,9 +101,8 @@ public:
TypedValue() = default;
template <IR::Type other_type>
- requires((other_type & type_) != IR::Type::Void) explicit(false)
- TypedValue(const TypedValue<other_type>& value)
- : Value(value) {}
+ requires((other_type & type_) != IR::Type::Void)
+ explicit(false) TypedValue(const TypedValue<other_type>& value) : Value(value) {}
explicit TypedValue(const Value& value) : Value(value) {
if ((value.Type() & type_) == IR::Type::Void) {
@@ -194,16 +193,16 @@ public:
void ReplaceOpcode(IR::Opcode opcode);
template <typename FlagsType>
- requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v<FlagsType>)
- [[nodiscard]] FlagsType Flags() const noexcept {
+ requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v<FlagsType>)
+ [[nodiscard]] FlagsType Flags() const noexcept {
FlagsType ret;
std::memcpy(reinterpret_cast<char*>(&ret), &flags, sizeof(ret));
return ret;
}
template <typename FlagsType>
- requires(sizeof(FlagsType) <= sizeof(u32) &&
- std::is_trivially_copyable_v<FlagsType>) void SetFlags(FlagsType value) noexcept {
+ requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v<FlagsType>)
+ void SetFlags(FlagsType value) noexcept {
std::memcpy(&flags, &value, sizeof(value));
}
@@ -268,7 +267,6 @@ using U8 = TypedValue<Type::U8>;
using U16 = TypedValue<Type::U16>;
using U32 = TypedValue<Type::U32>;
using U64 = TypedValue<Type::U64>;
-using S32 = TypedValue<Type::S32>;
using F16 = TypedValue<Type::F16>;
using F32 = TypedValue<Type::F32>;
using F64 = TypedValue<Type::F64>;
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp
index f8cfd4ab6..39af62559 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp
@@ -15,11 +15,13 @@ enum class Mode : u64 {
SamplePos = 5,
};
-IR::Value Query(TranslatorVisitor& v, const IR::U32& handle, Mode mode, IR::Reg src_reg) {
+IR::Value Query(TranslatorVisitor& v, const IR::U32& handle, Mode mode, IR::Reg src_reg, u64 mask) {
switch (mode) {
case Mode::Dimension: {
+ const bool needs_num_mips{((mask >> 3) & 1) != 0};
+ const IR::U1 skip_mips{v.ir.Imm1(!needs_num_mips)};
const IR::U32 lod{v.X(src_reg)};
- return v.ir.ImageQueryDimension(handle, lod);
+ return v.ir.ImageQueryDimension(handle, lod, skip_mips);
}
case Mode::TextureType:
case Mode::SamplePos:
@@ -46,7 +48,7 @@ void Impl(TranslatorVisitor& v, u64 insn, std::optional<u32> cbuf_offset) {
handle = v.X(src_reg);
++src_reg;
}
- const IR::Value query{Query(v, handle, txq.mode, src_reg)};
+ const IR::Value query{Query(v, handle, txq.mode, src_reg, txq.mask)};
IR::Reg dest_reg{txq.dest_reg};
for (int element = 0; element < 4; ++element) {
if (((txq.mask >> element) & 1) == 0) {
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
index 3adbd2b16..a42453e90 100644
--- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
@@ -171,6 +171,70 @@ std::map<IR::Attribute, IR::Attribute> GenerateLegacyToGenericMappings(
}
return mapping;
}
+
+void EmitGeometryPassthrough(IR::IREmitter& ir, const IR::Program& program,
+ const Shader::VaryingState& passthrough_mask,
+ bool passthrough_position,
+ std::optional<IR::Attribute> passthrough_layer_attr) {
+ for (u32 i = 0; i < program.output_vertices; i++) {
+ // Assign generics from input
+ for (u32 j = 0; j < 32; j++) {
+ if (!passthrough_mask.Generic(j)) {
+ continue;
+ }
+
+ const IR::Attribute attr = IR::Attribute::Generic0X + (j * 4);
+ ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0));
+ ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0));
+ ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0));
+ ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0));
+ }
+
+ if (passthrough_position) {
+ // Assign position from input
+ const IR::Attribute attr = IR::Attribute::PositionX;
+ ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0));
+ ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0));
+ ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0));
+ ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0));
+ }
+
+ if (passthrough_layer_attr) {
+ // Assign layer
+ ir.SetAttribute(IR::Attribute::Layer, ir.GetAttribute(*passthrough_layer_attr),
+ ir.Imm32(0));
+ }
+
+ // Emit vertex
+ ir.EmitVertex(ir.Imm32(0));
+ }
+ ir.EndPrimitive(ir.Imm32(0));
+}
+
+u32 GetOutputTopologyVertices(OutputTopology output_topology) {
+ switch (output_topology) {
+ case OutputTopology::PointList:
+ return 1;
+ case OutputTopology::LineStrip:
+ return 2;
+ default:
+ return 3;
+ }
+}
+
+void LowerGeometryPassthrough(const IR::Program& program, const HostTranslateInfo& host_info) {
+ for (IR::Block* const block : program.blocks) {
+ for (IR::Inst& inst : block->Instructions()) {
+ if (inst.GetOpcode() == IR::Opcode::Epilogue) {
+ IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)};
+ EmitGeometryPassthrough(
+ ir, program, program.info.passthrough,
+ program.info.passthrough.AnyComponent(IR::Attribute::PositionX), {});
+ }
+ }
+ }
+}
+
} // Anonymous namespace
IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
@@ -195,9 +259,14 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
program.is_geometry_passthrough = sph.common0.geometry_passthrough != 0;
if (program.is_geometry_passthrough) {
const auto& mask{env.GpPassthroughMask()};
- for (size_t i = 0; i < program.info.passthrough.mask.size(); ++i) {
+ for (size_t i = 0; i < mask.size() * 32; ++i) {
program.info.passthrough.mask[i] = ((mask[i / 32] >> (i % 32)) & 1) == 0;
}
+
+ if (!host_info.support_geometry_shader_passthrough) {
+ program.output_vertices = GetOutputTopologyVertices(program.output_topology);
+ LowerGeometryPassthrough(program, host_info);
+ }
}
break;
}
@@ -219,11 +288,11 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
}
Optimization::SsaRewritePass(program);
- Optimization::ConstantPropagationPass(program);
+ Optimization::ConstantPropagationPass(env, program);
Optimization::PositionPass(env, program);
- Optimization::GlobalMemoryToStorageBufferPass(program);
+ Optimization::GlobalMemoryToStorageBufferPass(program, host_info);
Optimization::TexturePass(env, program, host_info);
if (Settings::values.resolution_info.active) {
@@ -342,17 +411,7 @@ IR::Program GenerateGeometryPassthrough(ObjectPool<IR::Inst>& inst_pool,
IR::Program program;
program.stage = Stage::Geometry;
program.output_topology = output_topology;
- switch (output_topology) {
- case OutputTopology::PointList:
- program.output_vertices = 1;
- break;
- case OutputTopology::LineStrip:
- program.output_vertices = 2;
- break;
- default:
- program.output_vertices = 3;
- break;
- }
+ program.output_vertices = GetOutputTopologyVertices(output_topology);
program.is_geometry_passthrough = false;
program.info.loads.mask = source_program.info.stores.mask;
@@ -366,35 +425,8 @@ IR::Program GenerateGeometryPassthrough(ObjectPool<IR::Inst>& inst_pool,
node.data.block = current_block;
IR::IREmitter ir{*current_block};
- for (u32 i = 0; i < program.output_vertices; i++) {
- // Assign generics from input
- for (u32 j = 0; j < 32; j++) {
- if (!program.info.stores.Generic(j)) {
- continue;
- }
-
- const IR::Attribute attr = IR::Attribute::Generic0X + (j * 4);
- ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0));
- ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0));
- ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0));
- ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0));
- }
-
- // Assign position from input
- const IR::Attribute attr = IR::Attribute::PositionX;
- ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0));
- ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0));
- ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0));
- ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0));
-
- // Assign layer
- ir.SetAttribute(IR::Attribute::Layer, ir.GetAttribute(source_program.info.emulated_layer),
- ir.Imm32(0));
-
- // Emit vertex
- ir.EmitVertex(ir.Imm32(0));
- }
- ir.EndPrimitive(ir.Imm32(0));
+ EmitGeometryPassthrough(ir, program, program.info.stores, true,
+ source_program.info.emulated_layer);
IR::Block* return_block{block_pool.Create(inst_pool)};
IR::IREmitter{*return_block}.Epilogue();
diff --git a/src/shader_recompiler/host_translate_info.h b/src/shader_recompiler/host_translate_info.h
index d5d279554..55fc48768 100644
--- a/src/shader_recompiler/host_translate_info.h
+++ b/src/shader_recompiler/host_translate_info.h
@@ -15,6 +15,9 @@ struct HostTranslateInfo {
bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered
bool support_snorm_render_buffer{}; ///< True when the device supports SNORM render buffers
bool support_viewport_index_layer{}; ///< True when the device supports gl_Layer in VS
+ u32 min_ssbo_alignment{}; ///< Minimum alignment supported by the device for SSBOs
+ bool support_geometry_shader_passthrough{}; ///< True when the device supports geometry
+ ///< passthrough shaders
};
} // namespace Shader
diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
index 826f9a54a..4d81e9336 100644
--- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
+++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
@@ -7,6 +7,7 @@
#include <type_traits>
#include "common/bit_cast.h"
+#include "shader_recompiler/environment.h"
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/ir/ir_emitter.h"
#include "shader_recompiler/frontend/ir/value.h"
@@ -515,6 +516,9 @@ void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) {
case IR::Attribute::PrimitiveId:
case IR::Attribute::InstanceId:
case IR::Attribute::VertexId:
+ case IR::Attribute::BaseVertex:
+ case IR::Attribute::BaseInstance:
+ case IR::Attribute::DrawID:
break;
default:
return;
@@ -644,7 +648,63 @@ void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) {
}
}
-void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
+void FoldConstBuffer(Environment& env, IR::Block& block, IR::Inst& inst) {
+ const IR::Value bank{inst.Arg(0)};
+ const IR::Value offset{inst.Arg(1)};
+ if (!bank.IsImmediate() || !offset.IsImmediate()) {
+ return;
+ }
+ const auto bank_value = bank.U32();
+ const auto offset_value = offset.U32();
+ auto replacement = env.GetReplaceConstBuffer(bank_value, offset_value);
+ if (!replacement) {
+ return;
+ }
+ const auto new_attribute = [replacement]() {
+ switch (*replacement) {
+ case ReplaceConstant::BaseInstance:
+ return IR::Attribute::BaseInstance;
+ case ReplaceConstant::BaseVertex:
+ return IR::Attribute::BaseVertex;
+ case ReplaceConstant::DrawID:
+ return IR::Attribute::DrawID;
+ default:
+ throw NotImplementedException("Not implemented replacement variable {}", *replacement);
+ }
+ }();
+ IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
+ if (inst.GetOpcode() == IR::Opcode::GetCbufU32) {
+ inst.ReplaceUsesWith(ir.GetAttributeU32(new_attribute));
+ } else {
+ inst.ReplaceUsesWith(ir.GetAttribute(new_attribute));
+ }
+}
+
+void FoldDriverConstBuffer(Environment& env, IR::Block& block, IR::Inst& inst, u32 which_bank,
+ u32 offset_start = 0, u32 offset_end = std::numeric_limits<u16>::max()) {
+ const IR::Value bank{inst.Arg(0)};
+ const IR::Value offset{inst.Arg(1)};
+ if (!bank.IsImmediate() || !offset.IsImmediate()) {
+ return;
+ }
+ const auto bank_value = bank.U32();
+ if (bank_value != which_bank) {
+ return;
+ }
+ const auto offset_value = offset.U32();
+ if (offset_value < offset_start || offset_value >= offset_end) {
+ return;
+ }
+ IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
+ if (inst.GetOpcode() == IR::Opcode::GetCbufU32) {
+ inst.ReplaceUsesWith(IR::Value{env.ReadCbufValue(bank_value, offset_value)});
+ } else {
+ inst.ReplaceUsesWith(
+ IR::Value{Common::BitCast<f32>(env.ReadCbufValue(bank_value, offset_value))});
+ }
+}
+
+void ConstantPropagation(Environment& env, IR::Block& block, IR::Inst& inst) {
switch (inst.GetOpcode()) {
case IR::Opcode::GetRegister:
return FoldGetRegister(inst);
@@ -789,18 +849,28 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
IR::Opcode::CompositeInsertF16x4);
case IR::Opcode::FSwizzleAdd:
return FoldFSwizzleAdd(block, inst);
+ case IR::Opcode::GetCbufF32:
+ case IR::Opcode::GetCbufU32:
+ if (env.HasHLEMacroState()) {
+ FoldConstBuffer(env, block, inst);
+ }
+ if (env.IsPropietaryDriver()) {
+ FoldDriverConstBuffer(env, block, inst, 1);
+ }
+ break;
default:
break;
}
}
+
} // Anonymous namespace
-void ConstantPropagationPass(IR::Program& program) {
+void ConstantPropagationPass(Environment& env, IR::Program& program) {
const auto end{program.post_order_blocks.rend()};
for (auto it = program.post_order_blocks.rbegin(); it != end; ++it) {
IR::Block* const block{*it};
for (IR::Inst& inst : block->Instructions()) {
- ConstantPropagation(*block, inst);
+ ConstantPropagation(env, *block, inst);
}
}
}
diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
index 336338e62..9101722ba 100644
--- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
+++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
@@ -11,6 +11,7 @@
#include "shader_recompiler/frontend/ir/breadth_first_search.h"
#include "shader_recompiler/frontend/ir/ir_emitter.h"
#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/host_translate_info.h"
#include "shader_recompiler/ir_opt/passes.h"
namespace Shader::Optimization {
@@ -402,7 +403,7 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info)
}
/// Returns the offset in indices (not bytes) for an equivalent storage instruction
-IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer) {
+IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer, u32 alignment) {
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
IR::U32 offset;
if (const std::optional<LowAddrInfo> low_addr{TrackLowAddress(&inst)}) {
@@ -415,7 +416,10 @@ IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer
}
// Subtract the least significant 32 bits from the guest offset. The result is the storage
// buffer offset in bytes.
- const IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))};
+ IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))};
+
+ // Align the offset base to match the host alignment requirements
+ low_cbuf = ir.BitwiseAnd(low_cbuf, ir.Imm32(~(alignment - 1U)));
return ir.ISub(offset, low_cbuf);
}
@@ -510,7 +514,7 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
}
} // Anonymous namespace
-void GlobalMemoryToStorageBufferPass(IR::Program& program) {
+void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateInfo& host_info) {
StorageInfo info;
for (IR::Block* const block : program.post_order_blocks) {
for (IR::Inst& inst : block->Instructions()) {
@@ -534,7 +538,8 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) {
const IR::U32 index{IR::Value{static_cast<u32>(info.set.index_of(it))}};
IR::Block* const block{storage_inst.block};
IR::Inst* const inst{storage_inst.inst};
- const IR::U32 offset{StorageOffset(*block, *inst, storage_buffer)};
+ const IR::U32 offset{
+ StorageOffset(*block, *inst, storage_buffer, host_info.min_ssbo_alignment)};
Replace(*block, *inst, index, offset);
}
}
diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h
index 11bfe801a..4ffad1172 100644
--- a/src/shader_recompiler/ir_opt/passes.h
+++ b/src/shader_recompiler/ir_opt/passes.h
@@ -13,9 +13,9 @@ struct HostTranslateInfo;
namespace Shader::Optimization {
void CollectShaderInfoPass(Environment& env, IR::Program& program);
-void ConstantPropagationPass(IR::Program& program);
+void ConstantPropagationPass(Environment& env, IR::Program& program);
void DeadCodeEliminationPass(IR::Program& program);
-void GlobalMemoryToStorageBufferPass(IR::Program& program);
+void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateInfo& host_info);
void IdentityRemovalPass(IR::Program& program);
void LowerFp16ToFp32(IR::Program& program);
void LowerInt64ToInt32(IR::Program& program);
diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp
index f5c86fcb1..d374c976a 100644
--- a/src/shader_recompiler/ir_opt/texture_pass.cpp
+++ b/src/shader_recompiler/ir_opt/texture_pass.cpp
@@ -355,21 +355,21 @@ TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) {
};
}
-TextureType ReadTextureType(Environment& env, const ConstBufferAddr& cbuf) {
+u32 GetTextureHandle(Environment& env, const ConstBufferAddr& cbuf) {
const u32 secondary_index{cbuf.has_secondary ? cbuf.secondary_index : cbuf.index};
const u32 secondary_offset{cbuf.has_secondary ? cbuf.secondary_offset : cbuf.offset};
const u32 lhs_raw{env.ReadCbufValue(cbuf.index, cbuf.offset) << cbuf.shift_left};
const u32 rhs_raw{env.ReadCbufValue(secondary_index, secondary_offset)
<< cbuf.secondary_shift_left};
- return env.ReadTextureType(lhs_raw | rhs_raw);
+ return lhs_raw | rhs_raw;
+}
+
+TextureType ReadTextureType(Environment& env, const ConstBufferAddr& cbuf) {
+ return env.ReadTextureType(GetTextureHandle(env, cbuf));
}
TexturePixelFormat ReadTexturePixelFormat(Environment& env, const ConstBufferAddr& cbuf) {
- const u32 secondary_index{cbuf.has_secondary ? cbuf.secondary_index : cbuf.index};
- const u32 secondary_offset{cbuf.has_secondary ? cbuf.secondary_offset : cbuf.offset};
- const u32 lhs_raw{env.ReadCbufValue(cbuf.index, cbuf.offset)};
- const u32 rhs_raw{env.ReadCbufValue(secondary_index, secondary_offset)};
- return env.ReadTexturePixelFormat(lhs_raw | rhs_raw);
+ return env.ReadTexturePixelFormat(GetTextureHandle(env, cbuf));
}
class Descriptors {
@@ -386,8 +386,10 @@ public:
return Add(texture_buffer_descriptors, desc, [&desc](const auto& existing) {
return desc.cbuf_index == existing.cbuf_index &&
desc.cbuf_offset == existing.cbuf_offset &&
+ desc.shift_left == existing.shift_left &&
desc.secondary_cbuf_index == existing.secondary_cbuf_index &&
desc.secondary_cbuf_offset == existing.secondary_cbuf_offset &&
+ desc.secondary_shift_left == existing.secondary_shift_left &&
desc.count == existing.count && desc.size_shift == existing.size_shift &&
desc.has_secondary == existing.has_secondary;
});
@@ -405,15 +407,20 @@ public:
}
u32 Add(const TextureDescriptor& desc) {
- return Add(texture_descriptors, desc, [&desc](const auto& existing) {
+ const u32 index{Add(texture_descriptors, desc, [&desc](const auto& existing) {
return desc.type == existing.type && desc.is_depth == existing.is_depth &&
desc.has_secondary == existing.has_secondary &&
desc.cbuf_index == existing.cbuf_index &&
desc.cbuf_offset == existing.cbuf_offset &&
+ desc.shift_left == existing.shift_left &&
desc.secondary_cbuf_index == existing.secondary_cbuf_index &&
desc.secondary_cbuf_offset == existing.secondary_cbuf_offset &&
+ desc.secondary_shift_left == existing.secondary_shift_left &&
desc.count == existing.count && desc.size_shift == existing.size_shift;
- });
+ })};
+ // TODO: Read this from TIC
+ texture_descriptors[index].is_multisample |= desc.is_multisample;
+ return index;
}
u32 Add(const ImageDescriptor& desc) {
@@ -452,7 +459,8 @@ void PatchImageSampleImplicitLod(IR::Block& block, IR::Inst& inst) {
const IR::Value coord(inst.Arg(1));
const IR::Value handle(ir.Imm32(0));
const IR::U32 lod{ir.Imm32(0)};
- const IR::Value texture_size = ir.ImageQueryDimension(handle, lod, info);
+ const IR::U1 skip_mips{ir.Imm1(true)};
+ const IR::Value texture_size = ir.ImageQueryDimension(handle, lod, skip_mips, info);
inst.SetArg(
1, ir.CompositeConstruct(
ir.FPMul(IR::F32(ir.CompositeExtract(coord, 0)),
@@ -486,10 +494,10 @@ void PatchTexelFetch(IR::Block& block, IR::Inst& inst, TexturePixelFormat pixel_
const IR::F32 w(ir.CompositeExtract(new_inst, 3));
const IR::F16F32F64 max_value(ir.Imm32(get_max_value()));
const IR::Value converted =
- ir.CompositeConstruct(ir.FPMul(ir.ConvertSToF(32, 32, ir.BitCast<IR::S32>(x)), max_value),
- ir.FPMul(ir.ConvertSToF(32, 32, ir.BitCast<IR::S32>(y)), max_value),
- ir.FPMul(ir.ConvertSToF(32, 32, ir.BitCast<IR::S32>(z)), max_value),
- ir.FPMul(ir.ConvertSToF(32, 32, ir.BitCast<IR::S32>(w)), max_value));
+ ir.CompositeConstruct(ir.FPMul(ir.ConvertSToF(32, 32, ir.BitCast<IR::U32>(x)), max_value),
+ ir.FPMul(ir.ConvertSToF(32, 32, ir.BitCast<IR::U32>(y)), max_value),
+ ir.FPMul(ir.ConvertSToF(32, 32, ir.BitCast<IR::U32>(z)), max_value),
+ ir.FPMul(ir.ConvertSToF(32, 32, ir.BitCast<IR::U32>(w)), max_value));
inst.ReplaceUsesWith(converted);
}
} // Anonymous namespace
@@ -524,6 +532,7 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo
const auto& cbuf{texture_inst.cbuf};
auto flags{inst->Flags<IR::TextureInstInfo>()};
+ bool is_multisample{false};
switch (inst->GetOpcode()) {
case IR::Opcode::ImageQueryDimensions:
flags.type.Assign(ReadTextureType(env, cbuf));
@@ -538,6 +547,12 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo
}
break;
case IR::Opcode::ImageFetch:
+ if (flags.type == TextureType::Color2D || flags.type == TextureType::Color2DRect ||
+ flags.type == TextureType::ColorArray2D) {
+ is_multisample = !inst->Arg(4).IsEmpty();
+ } else {
+ inst->SetArg(4, IR::U32{});
+ }
if (flags.type != TextureType::Color1D) {
break;
}
@@ -613,6 +628,7 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo
index = descriptors.Add(TextureDescriptor{
.type = flags.type,
.is_depth = flags.is_depth != 0,
+ .is_multisample = is_multisample,
.has_secondary = cbuf.has_secondary,
.cbuf_index = cbuf.index,
.cbuf_offset = cbuf.offset,
diff --git a/src/shader_recompiler/object_pool.h b/src/shader_recompiler/object_pool.h
index 2b42c4ba2..5d648b159 100644
--- a/src/shader_recompiler/object_pool.h
+++ b/src/shader_recompiler/object_pool.h
@@ -10,7 +10,7 @@
namespace Shader {
template <typename T>
-requires std::is_destructible_v<T>
+ requires std::is_destructible_v<T>
class ObjectPool {
public:
explicit ObjectPool(size_t chunk_size = 8192) : new_chunk_size{chunk_size} {
@@ -18,7 +18,7 @@ public:
}
template <typename... Args>
- requires std::is_constructible_v<T, Args...>
+ requires std::is_constructible_v<T, Args...>
[[nodiscard]] T* Create(Args&&... args) {
return std::construct_at(Memory(), std::forward<Args>(args)...);
}
diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h
index b8841a536..253e0d0bd 100644
--- a/src/shader_recompiler/profile.h
+++ b/src/shader_recompiler/profile.h
@@ -55,6 +55,8 @@ struct Profile {
/// OpFClamp is broken and OpFMax + OpFMin should be used instead
bool has_broken_spirv_clamp{};
+ /// The Position builtin needs to be wrapped in a struct when used as an input
+ bool has_broken_spirv_position_input{};
/// Offset image operands with an unsigned type do not work
bool has_broken_unsigned_image_offsets{};
/// Signed instructions with unsigned data types are misinterpreted
diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h
index d9c6e92db..d308db942 100644
--- a/src/shader_recompiler/shader_info.h
+++ b/src/shader_recompiler/shader_info.h
@@ -16,6 +16,12 @@
namespace Shader {
+enum class ReplaceConstant : u32 {
+ BaseInstance,
+ BaseVertex,
+ DrawID,
+};
+
enum class TextureType : u32 {
Color1D,
ColorArray1D,
@@ -59,6 +65,8 @@ enum class Interpolation {
struct ConstantBufferDescriptor {
u32 index;
u32 count;
+
+ auto operator<=>(const ConstantBufferDescriptor&) const = default;
};
struct StorageBufferDescriptor {
@@ -66,6 +74,8 @@ struct StorageBufferDescriptor {
u32 cbuf_offset;
u32 count;
bool is_written;
+
+ auto operator<=>(const StorageBufferDescriptor&) const = default;
};
struct TextureBufferDescriptor {
@@ -78,6 +88,8 @@ struct TextureBufferDescriptor {
u32 secondary_shift_left;
u32 count;
u32 size_shift;
+
+ auto operator<=>(const TextureBufferDescriptor&) const = default;
};
using TextureBufferDescriptors = boost::container::small_vector<TextureBufferDescriptor, 6>;
@@ -89,12 +101,15 @@ struct ImageBufferDescriptor {
u32 cbuf_offset;
u32 count;
u32 size_shift;
+
+ auto operator<=>(const ImageBufferDescriptor&) const = default;
};
using ImageBufferDescriptors = boost::container::small_vector<ImageBufferDescriptor, 2>;
struct TextureDescriptor {
TextureType type;
bool is_depth;
+ bool is_multisample;
bool has_secondary;
u32 cbuf_index;
u32 cbuf_offset;
@@ -104,6 +119,8 @@ struct TextureDescriptor {
u32 secondary_shift_left;
u32 count;
u32 size_shift;
+
+ auto operator<=>(const TextureDescriptor&) const = default;
};
using TextureDescriptors = boost::container::small_vector<TextureDescriptor, 12>;
@@ -116,6 +133,8 @@ struct ImageDescriptor {
u32 cbuf_offset;
u32 count;
u32 size_shift;
+
+ auto operator<=>(const ImageDescriptor&) const = default;
};
using ImageDescriptors = boost::container::small_vector<ImageDescriptor, 4>;
diff --git a/src/shader_recompiler/varying_state.h b/src/shader_recompiler/varying_state.h
index 7b28a285f..18a9aaf50 100644
--- a/src/shader_recompiler/varying_state.h
+++ b/src/shader_recompiler/varying_state.h
@@ -11,7 +11,7 @@
namespace Shader {
struct VaryingState {
- std::bitset<256> mask{};
+ std::bitset<512> mask{};
void Set(IR::Attribute attribute, bool state = true) {
mask[static_cast<size_t>(attribute)] = state;