summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorameerj <52414509+ameerj@users.noreply.github.com>2021-05-31 06:25:54 +0200
committerameerj <52414509+ameerj@users.noreply.github.com>2021-07-23 03:51:37 +0200
commitb7561226edaefc79eadcfbd3df1b0344b7c4b673 (patch)
tree49515563855b3585e6d23ceec7274a8e69340615
parentglsl: Implement precise fp variable allocation (diff)
downloadyuzu-b7561226edaefc79eadcfbd3df1b0344b7c4b673.tar
yuzu-b7561226edaefc79eadcfbd3df1b0344b7c4b673.tar.gz
yuzu-b7561226edaefc79eadcfbd3df1b0344b7c4b673.tar.bz2
yuzu-b7561226edaefc79eadcfbd3df1b0344b7c4b673.tar.lz
yuzu-b7561226edaefc79eadcfbd3df1b0344b7c4b673.tar.xz
yuzu-b7561226edaefc79eadcfbd3df1b0344b7c4b673.tar.zst
yuzu-b7561226edaefc79eadcfbd3df1b0344b7c4b673.zip
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp22
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp20
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp43
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp34
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp8
5 files changed, 64 insertions, 63 deletions
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp
index d3301054c..9714ffe33 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp
@@ -20,7 +20,7 @@ static constexpr std::string_view cas_loop{R"(for (;;){{
void SharedCasFunction(EmitContext& ctx, IR::Inst& inst, std::string_view offset,
std::string_view value, std::string_view function) {
const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)};
- const std::string smem{fmt::format("smem[{}/4]", offset)};
+ const std::string smem{fmt::format("smem[{}>>2]", offset)};
ctx.Add(cas_loop.data(), smem, ret, smem, function, smem, value, ret);
}
@@ -45,7 +45,7 @@ void SsboCasFunctionF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& bindi
void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
std::string_view value) {
- ctx.AddU32("{}=atomicAdd(smem[{}/4],{});", inst, pointer_offset, value);
+ ctx.AddU32("{}=atomicAdd(smem[{}>>2],{});", inst, pointer_offset, value);
}
void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
@@ -56,7 +56,7 @@ void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, std::string_view p
void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
std::string_view value) {
- ctx.AddU32("{}=atomicMin(smem[{}/4],{});", inst, pointer_offset, value);
+ ctx.AddU32("{}=atomicMin(smem[{}>>2],{});", inst, pointer_offset, value);
}
void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
@@ -67,7 +67,7 @@ void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, std::string_view p
void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
std::string_view value) {
- ctx.AddU32("{}=atomicMax(smem[{}/4],{});", inst, pointer_offset, value);
+ ctx.AddU32("{}=atomicMax(smem[{}>>2],{});", inst, pointer_offset, value);
}
void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
@@ -82,31 +82,31 @@ void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, std::string_view po
void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
std::string_view value) {
- ctx.AddU32("{}=atomicAnd(smem[{}/4],{});", inst, pointer_offset, value);
+ ctx.AddU32("{}=atomicAnd(smem[{}>>2],{});", inst, pointer_offset, value);
}
void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
std::string_view value) {
- ctx.AddU32("{}=atomicOr(smem[{}/4],{});", inst, pointer_offset, value);
+ ctx.AddU32("{}=atomicOr(smem[{}>>2],{});", inst, pointer_offset, value);
}
void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
std::string_view value) {
- ctx.AddU32("{}=atomicXor(smem[{}/4],{});", inst, pointer_offset, value);
+ ctx.AddU32("{}=atomicXor(smem[{}>>2],{});", inst, pointer_offset, value);
}
void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
std::string_view value) {
- ctx.AddU32("{}=atomicExchange(smem[{}/4],{});", inst, pointer_offset, value);
+ ctx.AddU32("{}=atomicExchange(smem[{}>>2],{});", inst, pointer_offset, value);
}
void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
std::string_view value) {
// LOG_WARNING("Int64 Atomics not supported, fallback to non-atomic");
- ctx.AddU64("{}=packUint2x32(uvec2(smem[{}/4],smem[({}+4)/4]));", inst, pointer_offset,
+ ctx.AddU64("{}=packUint2x32(uvec2(smem[{}>>2],smem[({}+4)>>2]));", inst, pointer_offset,
pointer_offset);
- ctx.Add("smem[{}/4]=unpackUint2x32({}).x;smem[({}+4)/4]=unpackUint2x32({}).y;", pointer_offset,
- value, pointer_offset, value);
+ ctx.Add("smem[{}>>2]=unpackUint2x32({}).x;smem[({}+4)>>2]=unpackUint2x32({}).y;",
+ pointer_offset, value, pointer_offset, value);
}
void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
index 8f5f94752..8d2abdd94 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
@@ -31,7 +31,7 @@ void EmitGetCbufU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst&
} else {
const auto offset_var{ctx.var_alloc.Consume(offset)};
ctx.AddU32(
- "{}=bitfieldExtract(floatBitsToUint({}_cbuf{}[{}/16][({}/4)%4]),int(({}%4)*8),8);",
+ "{}=bitfieldExtract(floatBitsToUint({}_cbuf{}[{}/16][({}>>2)%4]),int(({}%4)*8),8);",
inst, ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var);
}
}
@@ -46,8 +46,8 @@ void EmitGetCbufS8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst&
} else {
const auto offset_var{ctx.var_alloc.Consume(offset)};
ctx.AddU32(
- "{}=bitfieldExtract(floatBitsToInt({}_cbuf{}[{}/16][({}/4)%4]),int(({}%4)*8),8);", inst,
- ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var);
+ "{}=bitfieldExtract(floatBitsToInt({}_cbuf{}[{}/16][({}>>2)%4]),int(({}%4)*8),8);",
+ inst, ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var);
}
}
@@ -60,7 +60,7 @@ void EmitGetCbufU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst
((offset.U32() / 2) % 2) * 16);
} else {
const auto offset_var{ctx.var_alloc.Consume(offset)};
- ctx.AddU32("{}=bitfieldExtract(floatBitsToUint({}_cbuf{}[{}/16][({}/4)%4]),int((({}/"
+ ctx.AddU32("{}=bitfieldExtract(floatBitsToUint({}_cbuf{}[{}/16][({}>>2)%4]),int((({}/"
"2)%2)*16),16);",
inst, ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var);
}
@@ -75,9 +75,9 @@ void EmitGetCbufS16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst
((offset.U32() / 2) % 2) * 16);
} else {
const auto offset_var{ctx.var_alloc.Consume(offset)};
- ctx.AddU32(
- "{}=bitfieldExtract(floatBitsToInt({}_cbuf{}[{}/16][({}/4)%4]),int((({}/2)%2)*16),16);",
- inst, ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var);
+ ctx.AddU32("{}=bitfieldExtract(floatBitsToInt({}_cbuf{}[{}/16][({}>>2)%4]),int((({}/"
+ "2)%2)*16),16);",
+ inst, ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var);
}
}
@@ -88,7 +88,7 @@ void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
offset.U32() / 16, OffsetSwizzle(offset.U32()));
} else {
const auto offset_var{ctx.var_alloc.Consume(offset)};
- ctx.AddU32("{}=floatBitsToUint({}_cbuf{}[{}/16][({}/4)%4]);", inst, ctx.stage_name,
+ ctx.AddU32("{}=floatBitsToUint({}_cbuf{}[{}/16][({}>>2)%4]);", inst, ctx.stage_name,
binding.U32(), offset_var, offset_var);
}
}
@@ -100,7 +100,7 @@ void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
OffsetSwizzle(offset.U32()));
} else {
const auto offset_var{ctx.var_alloc.Consume(offset)};
- ctx.AddF32("{}={}_cbuf{}[{}/16][({}/4)%4];", inst, ctx.stage_name, binding.U32(),
+ ctx.AddF32("{}={}_cbuf{}[{}/16][({}>>2)%4];", inst, ctx.stage_name, binding.U32(),
offset_var, offset_var);
}
}
@@ -116,7 +116,7 @@ void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding
} else {
const auto offset_var{ctx.var_alloc.Consume(offset)};
ctx.AddU32x2("{}=uvec2(floatBitsToUint({}_cbuf{}[{}/16][({}/"
- "4)%4]),floatBitsToUint({}_cbuf{}[({}+4)/16][(({}+4)/4)%4]));",
+ "4)%4]),floatBitsToUint({}_cbuf{}[({}+4)/16][(({}+4)>>2)%4]));",
inst, ctx.stage_name, binding.U32(), offset_var, offset_var, ctx.stage_name,
binding.U32(), offset_var, offset_var);
}
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp
index 78fbb9d6e..a4411b68b 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp
@@ -13,7 +13,7 @@ void EmitLoadStorageU8([[maybe_unused]] EmitContext& ctx, IR::Inst& inst,
[[maybe_unused]] const IR::Value& binding,
[[maybe_unused]] const IR::Value& offset) {
const auto offset_var{ctx.var_alloc.Consume(offset)};
- ctx.AddU32("{}=bitfieldExtract({}_ssbo{}[{}/4],int({}%4)*8,8);", inst, ctx.stage_name,
+ ctx.AddU32("{}=bitfieldExtract({}_ssbo{}[{}>>2],int({}%4)*8,8);", inst, ctx.stage_name,
binding.U32(), offset_var, offset_var);
}
@@ -21,7 +21,7 @@ void EmitLoadStorageS8([[maybe_unused]] EmitContext& ctx, IR::Inst& inst,
[[maybe_unused]] const IR::Value& binding,
[[maybe_unused]] const IR::Value& offset) {
const auto offset_var{ctx.var_alloc.Consume(offset)};
- ctx.AddS32("{}=bitfieldExtract(int({}_ssbo{}[{}/4]),int({}%4)*8,8);", inst, ctx.stage_name,
+ ctx.AddS32("{}=bitfieldExtract(int({}_ssbo{}[{}>>2]),int({}%4)*8,8);", inst, ctx.stage_name,
binding.U32(), offset_var, offset_var);
}
@@ -29,7 +29,7 @@ void EmitLoadStorageU16([[maybe_unused]] EmitContext& ctx, IR::Inst& inst,
[[maybe_unused]] const IR::Value& binding,
[[maybe_unused]] const IR::Value& offset) {
const auto offset_var{ctx.var_alloc.Consume(offset)};
- ctx.AddU32("{}=bitfieldExtract({}_ssbo{}[{}/4],int(({}/2)%2)*16,16);", inst, ctx.stage_name,
+ ctx.AddU32("{}=bitfieldExtract({}_ssbo{}[{}>>2],int(({}>>1)%2)*16,16);", inst, ctx.stage_name,
binding.U32(), offset_var, offset_var);
}
@@ -37,30 +37,31 @@ void EmitLoadStorageS16([[maybe_unused]] EmitContext& ctx, IR::Inst& inst,
[[maybe_unused]] const IR::Value& binding,
[[maybe_unused]] const IR::Value& offset) {
const auto offset_var{ctx.var_alloc.Consume(offset)};
- ctx.AddS32("{}=bitfieldExtract(int({}_ssbo{}[{}/4]),int(({}/2)%2)*16,16);", inst,
+ ctx.AddS32("{}=bitfieldExtract(int({}_ssbo{}[{}>>2]),int(({}>>1)%2)*16,16);", inst,
ctx.stage_name, binding.U32(), offset_var, offset_var);
}
void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
const IR::Value& offset) {
const auto offset_var{ctx.var_alloc.Consume(offset)};
- ctx.AddU32("{}={}_ssbo{}[{}/4];", inst, ctx.stage_name, binding.U32(), offset_var);
+ ctx.AddU32("{}={}_ssbo{}[{}>>2];", inst, ctx.stage_name, binding.U32(), offset_var);
}
void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
const IR::Value& offset) {
const auto offset_var{ctx.var_alloc.Consume(offset)};
- ctx.AddU32x2("{}=uvec2({}_ssbo{}[{}/4],{}_ssbo{}[({}+4)/4]);", inst, ctx.stage_name,
+ ctx.AddU32x2("{}=uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}+4)>>2]);", inst, ctx.stage_name,
binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var);
}
void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
const IR::Value& offset) {
const auto offset_var{ctx.var_alloc.Consume(offset)};
- ctx.AddU32x4(
- "{}=uvec4({}_ssbo{}[{}/4],{}_ssbo{}[({}+4)/4],{}_ssbo{}[({}+8)/4],{}_ssbo{}[({}+12)/4]);",
- inst, ctx.stage_name, binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var,
- ctx.stage_name, binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var);
+ ctx.AddU32x4("{}=uvec4({}_ssbo{}[{}>>2],{}_ssbo{}[({}+4)>>2],{}_ssbo{}[({}+8)>>2],{}_ssbo{}[({}"
+ "+12)>>2]);",
+ inst, ctx.stage_name, binding.U32(), offset_var, ctx.stage_name, binding.U32(),
+ offset_var, ctx.stage_name, binding.U32(), offset_var, ctx.stage_name,
+ binding.U32(), offset_var);
}
void EmitWriteStorageU8([[maybe_unused]] EmitContext& ctx,
@@ -68,7 +69,7 @@ void EmitWriteStorageU8([[maybe_unused]] EmitContext& ctx,
[[maybe_unused]] const IR::Value& offset,
[[maybe_unused]] std::string_view value) {
const auto offset_var{ctx.var_alloc.Consume(offset)};
- ctx.Add("{}_ssbo{}[{}/4]=bitfieldInsert({}_ssbo{}[{}/4],{},int({}%4)*8,8);", ctx.stage_name,
+ ctx.Add("{}_ssbo{}[{}>>2]=bitfieldInsert({}_ssbo{}[{}>>2],{},int({}%4)*8,8);", ctx.stage_name,
binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var, value,
offset_var);
}
@@ -78,7 +79,7 @@ void EmitWriteStorageS8([[maybe_unused]] EmitContext& ctx,
[[maybe_unused]] const IR::Value& offset,
[[maybe_unused]] std::string_view value) {
const auto offset_var{ctx.var_alloc.Consume(offset)};
- ctx.Add("{}_ssbo{}[{}/4]=bitfieldInsert({}_ssbo{}[{}/4],{},int({}%4)*8,8);", ctx.stage_name,
+ ctx.Add("{}_ssbo{}[{}>>2]=bitfieldInsert({}_ssbo{}[{}>>2],{},int({}%4)*8,8);", ctx.stage_name,
binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var, value,
offset_var);
}
@@ -88,7 +89,7 @@ void EmitWriteStorageU16([[maybe_unused]] EmitContext& ctx,
[[maybe_unused]] const IR::Value& offset,
[[maybe_unused]] std::string_view value) {
const auto offset_var{ctx.var_alloc.Consume(offset)};
- ctx.Add("{}_ssbo{}[{}/4]=bitfieldInsert({}_ssbo{}[{}/4],{},int(({}/2)%2)*16,16);",
+ ctx.Add("{}_ssbo{}[{}>>2]=bitfieldInsert({}_ssbo{}[{}>>2],{},int(({}>>1)%2)*16,16);",
ctx.stage_name, binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var,
value, offset_var);
}
@@ -98,7 +99,7 @@ void EmitWriteStorageS16([[maybe_unused]] EmitContext& ctx,
[[maybe_unused]] const IR::Value& offset,
[[maybe_unused]] std::string_view value) {
const auto offset_var{ctx.var_alloc.Consume(offset)};
- ctx.Add("{}_ssbo{}[{}/4]=bitfieldInsert({}_ssbo{}[{}/4],{},int(({}/2)%2)*16,16);",
+ ctx.Add("{}_ssbo{}[{}>>2]=bitfieldInsert({}_ssbo{}[{}>>2],{},int(({}>>1)%2)*16,16);",
ctx.stage_name, binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var,
value, offset_var);
}
@@ -106,14 +107,14 @@ void EmitWriteStorageS16([[maybe_unused]] EmitContext& ctx,
void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
std::string_view value) {
const auto offset_var{ctx.var_alloc.Consume(offset)};
- ctx.Add("{}_ssbo{}[{}/4]={};", ctx.stage_name, binding.U32(), offset_var, value);
+ ctx.Add("{}_ssbo{}[{}>>2]={};", ctx.stage_name, binding.U32(), offset_var, value);
}
void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
std::string_view value) {
const auto offset_var{ctx.var_alloc.Consume(offset)};
- ctx.Add("{}_ssbo{}[{}/4]={}.x;", ctx.stage_name, binding.U32(), offset_var, value);
- ctx.Add("{}_ssbo{}[({}+4)/4]={}.y;", ctx.stage_name, binding.U32(), offset_var, value);
+ ctx.Add("{}_ssbo{}[{}>>2]={}.x;", ctx.stage_name, binding.U32(), offset_var, value);
+ ctx.Add("{}_ssbo{}[({}+4)>>2]={}.y;", ctx.stage_name, binding.U32(), offset_var, value);
}
void EmitWriteStorage128([[maybe_unused]] EmitContext& ctx,
@@ -121,9 +122,9 @@ void EmitWriteStorage128([[maybe_unused]] EmitContext& ctx,
[[maybe_unused]] const IR::Value& offset,
[[maybe_unused]] std::string_view value) {
const auto offset_var{ctx.var_alloc.Consume(offset)};
- ctx.Add("{}_ssbo{}[{}/4]={}.x;", ctx.stage_name, binding.U32(), offset_var, value);
- ctx.Add("{}_ssbo{}[({}+4)/4]={}.y;", ctx.stage_name, binding.U32(), offset_var, value);
- ctx.Add("{}_ssbo{}[({}+8)/4]={}.z;", ctx.stage_name, binding.U32(), offset_var, value);
- ctx.Add("{}_ssbo{}[({}+12)/4]={}.w;", ctx.stage_name, binding.U32(), offset_var, value);
+ ctx.Add("{}_ssbo{}[{}>>2]={}.x;", ctx.stage_name, binding.U32(), offset_var, value);
+ ctx.Add("{}_ssbo{}[({}+4)>>2]={}.y;", ctx.stage_name, binding.U32(), offset_var, value);
+ ctx.Add("{}_ssbo{}[({}+8)>>2]={}.z;", ctx.stage_name, binding.U32(), offset_var, value);
+ ctx.Add("{}_ssbo{}[({}+12)>>2]={}.w;", ctx.stage_name, binding.U32(), offset_var, value);
}
} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp
index 8a4c69547..578bc349f 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp
@@ -11,70 +11,70 @@
namespace Shader::Backend::GLSL {
void EmitLoadSharedU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
[[maybe_unused]] std::string_view offset) {
- ctx.AddU32("{}=bitfieldExtract(smem[{}/4],int({}%4)*8,8);", inst, offset, offset);
+ ctx.AddU32("{}=bitfieldExtract(smem[{}>>2],int({}%4)*8,8);", inst, offset, offset);
}
void EmitLoadSharedS8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
[[maybe_unused]] std::string_view offset) {
- ctx.AddS32("{}=bitfieldExtract(int(smem[{}/4]),int({}%4)*8,8);", inst, offset, offset);
+ ctx.AddS32("{}=bitfieldExtract(int(smem[{}>>2]),int({}%4)*8,8);", inst, offset, offset);
}
void EmitLoadSharedU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
[[maybe_unused]] std::string_view offset) {
- ctx.AddU32("{}=bitfieldExtract(smem[{}/4],int(({}/2)%2)*16,16);", inst, offset, offset);
+ ctx.AddU32("{}=bitfieldExtract(smem[{}>>2],int(({}>>1)%2)*16,16);", inst, offset, offset);
}
void EmitLoadSharedS16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
[[maybe_unused]] std::string_view offset) {
- ctx.AddS32("{}=bitfieldExtract(int(smem[{}/4]),int(({}/2)%2)*16,16);", inst, offset, offset);
+ ctx.AddS32("{}=bitfieldExtract(int(smem[{}>>2]),int(({}>>1)%2)*16,16);", inst, offset, offset);
}
void EmitLoadSharedU32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
[[maybe_unused]] std::string_view offset) {
- ctx.AddU32("{}=smem[{}/4];", inst, offset);
+ ctx.AddU32("{}=smem[{}>>2];", inst, offset);
}
void EmitLoadSharedU64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
[[maybe_unused]] std::string_view offset) {
- ctx.AddU32x2("{}=uvec2(smem[{}/4],smem[({}+4)/4]);", inst, offset, offset);
+ ctx.AddU32x2("{}=uvec2(smem[{}>>2],smem[({}+4)>>2]);", inst, offset, offset);
}
void EmitLoadSharedU128([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
[[maybe_unused]] std::string_view offset) {
- ctx.AddU32x4("{}=uvec4(smem[{}/4],smem[({}+4)/4],smem[({}+8)/4],smem[({}+12)/4]);", inst,
+ ctx.AddU32x4("{}=uvec4(smem[{}>>2],smem[({}+4)>>2],smem[({}+8)>>2],smem[({}+12)>>2]);", inst,
offset, offset, offset, offset);
}
void EmitWriteSharedU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view offset,
[[maybe_unused]] std::string_view value) {
- ctx.Add("smem[{}/4]=bitfieldInsert(smem[{}/4],{},int({}%4)*8,8);", offset, offset, value,
+ ctx.Add("smem[{}>>2]=bitfieldInsert(smem[{}>>2],{},int({}%4)*8,8);", offset, offset, value,
offset);
}
void EmitWriteSharedU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view offset,
[[maybe_unused]] std::string_view value) {
- ctx.Add("smem[{}/4]=bitfieldInsert(smem[{}/4],{},int(({}/2)%2)*16,16);", offset, offset, value,
- offset);
+ ctx.Add("smem[{}>>2]=bitfieldInsert(smem[{}>>2],{},int(({}>>1)%2)*16,16);", offset, offset,
+ value, offset);
}
void EmitWriteSharedU32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view offset,
[[maybe_unused]] std::string_view value) {
- ctx.Add("smem[{}/4]={};", offset, value);
+ ctx.Add("smem[{}>>2]={};", offset, value);
}
void EmitWriteSharedU64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view offset,
[[maybe_unused]] std::string_view value) {
- ctx.Add("smem[{}/4]={}.x;", offset, value);
- ctx.Add("smem[({}+4)/4]={}.y;", offset, value);
+ ctx.Add("smem[{}>>2]={}.x;", offset, value);
+ ctx.Add("smem[({}+4)>>2]={}.y;", offset, value);
}
void EmitWriteSharedU128([[maybe_unused]] EmitContext& ctx,
[[maybe_unused]] std::string_view offset,
[[maybe_unused]] std::string_view value) {
- ctx.Add("smem[{}/4]={}.x;", offset, value);
- ctx.Add("smem[({}+4)/4]={}.y;", offset, value);
- ctx.Add("smem[({}+8)/4]={}.z;", offset, value);
- ctx.Add("smem[({}+12)/4]={}.w;", offset, value);
+ ctx.Add("smem[{}>>2]={}.x;", offset, value);
+ ctx.Add("smem[({}+4)>>2]={}.y;", offset, value);
+ ctx.Add("smem[({}+8)>>2]={}.z;", offset, value);
+ ctx.Add("smem[({}+12)>>2]={}.w;", offset, value);
}
} // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp
index 4286f29c7..0b6c5ad82 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp
@@ -112,7 +112,7 @@ void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value,
const auto src_thread_id{fmt::format("({})|({})", lhs, min_thread_id)};
ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id);
SetInBoundsFlag(ctx, inst);
- ctx.AddU32("{}=shfl_in_bounds?{}:{};", inst, value, src_thread_id);
+ ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value);
}
void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index,
@@ -122,7 +122,7 @@ void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std
const auto src_thread_id{fmt::format("({}-{})", thread_id, index)};
ctx.Add("shfl_in_bounds=int({})>=int({});", src_thread_id, max_thread_id);
SetInBoundsFlag(ctx, inst);
- ctx.AddU32("{}=shfl_in_bounds?{}:{};", inst, value, src_thread_id);
+ ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value);
}
void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value,
@@ -133,7 +133,7 @@ void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value,
const auto src_thread_id{fmt::format("({}+{})", thread_id, index)};
ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id);
SetInBoundsFlag(ctx, inst);
- ctx.AddU32("{}=shfl_in_bounds?{}:{};", inst, value, src_thread_id);
+ ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value);
}
void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value,
@@ -144,7 +144,7 @@ void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view val
const auto src_thread_id{fmt::format("({}^{})", thread_id, index)};
ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id);
SetInBoundsFlag(ctx, inst);
- ctx.AddU32("{}=shfl_in_bounds?{}:{};", inst, value, src_thread_id);
+ ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value);
}
void EmitFSwizzleAdd([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,