summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
m---------externals/dynarmic0
-rw-r--r--src/common/x64/native_clock.cpp49
-rw-r--r--src/core/arm/arm_interface.h3
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_32.cpp37
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_32.h4
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_64.cpp38
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_64.h4
-rw-r--r--src/core/hle/kernel/physical_core.cpp1
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp25
-rw-r--r--src/shader_recompiler/backend/glsl/glsl_emit_context.cpp19
-rw-r--r--src/shader_recompiler/backend/glsl/glsl_emit_context.h1
-rw-r--r--src/shader_recompiler/backend/spirv/spirv_emit_context.cpp21
-rw-r--r--src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp9
-rw-r--r--src/shader_recompiler/shader_info.h1
-rw-r--r--src/video_core/host_shaders/CMakeLists.txt1
-rw-r--r--src/video_core/host_shaders/opengl_convert_s8d24.comp18
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp13
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h3
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp7
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h2
-rw-r--r--src/video_core/renderer_opengl/util_shaders.cpp24
-rw-r--r--src/video_core/renderer_opengl/util_shaders.h3
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.cpp5
24 files changed, 199 insertions, 91 deletions
diff --git a/externals/dynarmic b/externals/dynarmic
-Subproject af2d50288fc537201014c4230bb55ab9018a743
+Subproject 644172477eaf0d822178cb7e96c62b75caa9657
diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp
index 7a3f21dcf..7fd9d22f8 100644
--- a/src/common/x64/native_clock.cpp
+++ b/src/common/x64/native_clock.cpp
@@ -10,25 +10,49 @@
#include "common/uint128.h"
#include "common/x64/native_clock.h"
+#ifdef _MSC_VER
+#include <intrin.h>
+#endif
+
namespace Common {
+#ifdef _MSC_VER
+__forceinline static u64 FencedRDTSC() {
+ _mm_lfence();
+ _ReadWriteBarrier();
+ const u64 result = __rdtsc();
+ _mm_lfence();
+ _ReadWriteBarrier();
+ return result;
+}
+#else
+static u64 FencedRDTSC() {
+ u64 result;
+ asm volatile("lfence\n\t"
+ "rdtsc\n\t"
+ "shl $32, %%rdx\n\t"
+ "or %%rdx, %0\n\t"
+ "lfence"
+ : "=a"(result)
+ :
+ : "rdx", "memory", "cc");
+ return result;
+}
+#endif
+
u64 EstimateRDTSCFrequency() {
// Discard the first result measuring the rdtsc.
- _mm_mfence();
- __rdtsc();
+ FencedRDTSC();
std::this_thread::sleep_for(std::chrono::milliseconds{1});
- _mm_mfence();
- __rdtsc();
+ FencedRDTSC();
// Get the current time.
const auto start_time = std::chrono::steady_clock::now();
- _mm_mfence();
- const u64 tsc_start = __rdtsc();
+ const u64 tsc_start = FencedRDTSC();
// Wait for 200 milliseconds.
std::this_thread::sleep_for(std::chrono::milliseconds{200});
const auto end_time = std::chrono::steady_clock::now();
- _mm_mfence();
- const u64 tsc_end = __rdtsc();
+ const u64 tsc_end = FencedRDTSC();
// Calculate differences.
const u64 timer_diff = static_cast<u64>(
std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time).count());
@@ -42,8 +66,7 @@ NativeClock::NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequen
u64 rtsc_frequency_)
: WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, true), rtsc_frequency{
rtsc_frequency_} {
- _mm_mfence();
- time_point.inner.last_measure = __rdtsc();
+ time_point.inner.last_measure = FencedRDTSC();
time_point.inner.accumulated_ticks = 0U;
ns_rtsc_factor = GetFixedPoint64Factor(NS_RATIO, rtsc_frequency);
us_rtsc_factor = GetFixedPoint64Factor(US_RATIO, rtsc_frequency);
@@ -58,8 +81,7 @@ u64 NativeClock::GetRTSC() {
current_time_point.pack = Common::AtomicLoad128(time_point.pack.data());
do {
- _mm_mfence();
- const u64 current_measure = __rdtsc();
+ const u64 current_measure = FencedRDTSC();
u64 diff = current_measure - current_time_point.inner.last_measure;
diff = diff & ~static_cast<u64>(static_cast<s64>(diff) >> 63); // max(diff, 0)
new_time_point.inner.last_measure = current_measure > current_time_point.inner.last_measure
@@ -80,8 +102,7 @@ void NativeClock::Pause(bool is_paused) {
current_time_point.pack = Common::AtomicLoad128(time_point.pack.data());
do {
new_time_point.pack = current_time_point.pack;
- _mm_mfence();
- new_time_point.inner.last_measure = __rdtsc();
+ new_time_point.inner.last_measure = FencedRDTSC();
} while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack,
current_time_point.pack, current_time_point.pack));
}
diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h
index c60322442..dce2f4195 100644
--- a/src/core/arm/arm_interface.h
+++ b/src/core/arm/arm_interface.h
@@ -171,6 +171,9 @@ public:
/// Prepare core for thread reschedule (if needed to correctly handle state)
virtual void PrepareReschedule() = 0;
+ /// Signal an interrupt and ask the core to halt as soon as possible.
+ virtual void SignalInterrupt() = 0;
+
struct BacktraceEntry {
std::string module;
u64 address;
diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
index 054572445..ab3210d84 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
@@ -25,6 +25,9 @@ namespace Core {
using namespace Common::Literals;
+constexpr Dynarmic::HaltReason break_loop = Dynarmic::HaltReason::UserDefined2;
+constexpr Dynarmic::HaltReason svc_call = Dynarmic::HaltReason::UserDefined3;
+
class DynarmicCallbacks32 : public Dynarmic::A32::UserCallbacks {
public:
explicit DynarmicCallbacks32(ARM_Dynarmic_32& parent_)
@@ -84,15 +87,13 @@ public:
}
void CallSVC(u32 swi) override {
- parent.svc_called = true;
parent.svc_swi = swi;
- parent.jit->HaltExecution();
+ parent.jit->HaltExecution(svc_call);
}
void AddTicks(u64 ticks) override {
- if (parent.uses_wall_clock) {
- return;
- }
+ ASSERT_MSG(!parent.uses_wall_clock, "This should never happen - dynarmic ticking disabled");
+
// Divide the number of ticks by the amount of CPU cores. TODO(Subv): This yields only a
// rough approximation of the amount of executed ticks in the system, it may be thrown off
// if not all cores are doing a similar amount of work. Instead of doing this, we should
@@ -108,12 +109,8 @@ public:
}
u64 GetTicksRemaining() override {
- if (parent.uses_wall_clock) {
- if (!parent.interrupt_handlers[parent.core_index].IsInterrupted()) {
- return minimum_run_cycles;
- }
- return 0U;
- }
+ ASSERT_MSG(!parent.uses_wall_clock, "This should never happen - dynarmic ticking disabled");
+
return std::max<s64>(parent.system.CoreTiming().GetDowncount(), 0);
}
@@ -148,6 +145,7 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
// Timing
config.wall_clock_cntpct = uses_wall_clock;
+ config.enable_cycle_counting = !uses_wall_clock;
// Code cache size
config.code_cache_size = 512_MiB;
@@ -230,13 +228,11 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
void ARM_Dynarmic_32::Run() {
while (true) {
- jit->Run();
- if (!svc_called) {
- break;
+ const auto hr = jit->Run();
+ if (Has(hr, svc_call)) {
+ Kernel::Svc::Call(system, svc_swi);
}
- svc_called = false;
- Kernel::Svc::Call(system, svc_swi);
- if (shutdown) {
+ if (Has(hr, break_loop)) {
break;
}
}
@@ -322,8 +318,11 @@ void ARM_Dynarmic_32::LoadContext(const ThreadContext32& ctx) {
}
void ARM_Dynarmic_32::PrepareReschedule() {
- jit->HaltExecution();
- shutdown = true;
+ jit->HaltExecution(break_loop);
+}
+
+void ARM_Dynarmic_32::SignalInterrupt() {
+ jit->HaltExecution(break_loop);
}
void ARM_Dynarmic_32::ClearInstructionCache() {
diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.h b/src/core/arm/dynarmic/arm_dynarmic_32.h
index 5d47b600d..3f68a4ff1 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_32.h
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.h
@@ -57,6 +57,7 @@ public:
void LoadContext(const ThreadContext64& ctx) override {}
void PrepareReschedule() override;
+ void SignalInterrupt() override;
void ClearExclusiveState() override;
void ClearInstructionCache() override;
@@ -83,9 +84,6 @@ private:
// SVC callback
u32 svc_swi{};
- bool svc_called{};
-
- bool shutdown{};
};
} // namespace Core
diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
index 7ff8f9495..68822a1fc 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
@@ -26,6 +26,9 @@ namespace Core {
using Vector = Dynarmic::A64::Vector;
using namespace Common::Literals;
+constexpr Dynarmic::HaltReason break_loop = Dynarmic::HaltReason::UserDefined2;
+constexpr Dynarmic::HaltReason svc_call = Dynarmic::HaltReason::UserDefined3;
+
class DynarmicCallbacks64 : public Dynarmic::A64::UserCallbacks {
public:
explicit DynarmicCallbacks64(ARM_Dynarmic_64& parent_)
@@ -106,7 +109,7 @@ public:
break;
}
- parent.jit->HaltExecution();
+ parent.jit->HaltExecution(Dynarmic::HaltReason::CacheInvalidation);
}
void ExceptionRaised(u64 pc, Dynarmic::A64::Exception exception) override {
@@ -126,15 +129,12 @@ public:
}
void CallSVC(u32 swi) override {
- parent.svc_called = true;
parent.svc_swi = swi;
- parent.jit->HaltExecution();
+ parent.jit->HaltExecution(svc_call);
}
void AddTicks(u64 ticks) override {
- if (parent.uses_wall_clock) {
- return;
- }
+ ASSERT_MSG(!parent.uses_wall_clock, "This should never happen - dynarmic ticking disabled");
// Divide the number of ticks by the amount of CPU cores. TODO(Subv): This yields only a
// rough approximation of the amount of executed ticks in the system, it may be thrown off
@@ -149,12 +149,8 @@ public:
}
u64 GetTicksRemaining() override {
- if (parent.uses_wall_clock) {
- if (!parent.interrupt_handlers[parent.core_index].IsInterrupted()) {
- return minimum_run_cycles;
- }
- return 0U;
- }
+ ASSERT_MSG(!parent.uses_wall_clock, "This should never happen - dynarmic ticking disabled");
+
return std::max<s64>(parent.system.CoreTiming().GetDowncount(), 0);
}
@@ -210,6 +206,7 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
// Timing
config.wall_clock_cntpct = uses_wall_clock;
+ config.enable_cycle_counting = !uses_wall_clock;
// Code cache size
config.code_cache_size = 512_MiB;
@@ -292,13 +289,11 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
void ARM_Dynarmic_64::Run() {
while (true) {
- jit->Run();
- if (!svc_called) {
- break;
+ const auto hr = jit->Run();
+ if (Has(hr, svc_call)) {
+ Kernel::Svc::Call(system, svc_swi);
}
- svc_called = false;
- Kernel::Svc::Call(system, svc_swi);
- if (shutdown) {
+ if (Has(hr, break_loop)) {
break;
}
}
@@ -389,8 +384,11 @@ void ARM_Dynarmic_64::LoadContext(const ThreadContext64& ctx) {
}
void ARM_Dynarmic_64::PrepareReschedule() {
- jit->HaltExecution();
- shutdown = true;
+ jit->HaltExecution(break_loop);
+}
+
+void ARM_Dynarmic_64::SignalInterrupt() {
+ jit->HaltExecution(break_loop);
}
void ARM_Dynarmic_64::ClearInstructionCache() {
diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.h b/src/core/arm/dynarmic/arm_dynarmic_64.h
index 0c4e46c64..58bc7fbec 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_64.h
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.h
@@ -51,6 +51,7 @@ public:
void LoadContext(const ThreadContext64& ctx) override;
void PrepareReschedule() override;
+ void SignalInterrupt() override;
void ClearExclusiveState() override;
void ClearInstructionCache() override;
@@ -77,9 +78,6 @@ private:
// SVC callback
u32 svc_swi{};
- bool svc_called{};
-
- bool shutdown{};
};
} // namespace Core
diff --git a/src/core/hle/kernel/physical_core.cpp b/src/core/hle/kernel/physical_core.cpp
index 7477668e4..18a5f40f8 100644
--- a/src/core/hle/kernel/physical_core.cpp
+++ b/src/core/hle/kernel/physical_core.cpp
@@ -58,6 +58,7 @@ bool PhysicalCore::IsInterrupted() const {
void PhysicalCore::Interrupt() {
guard->lock();
interrupts[core_index].SetInterrupt(true);
+ arm_interface->SignalInterrupt();
guard->unlock();
}
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
index 0c1fbc7b1..282668b36 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
@@ -35,6 +35,15 @@ std::string_view OutputVertexIndex(EmitContext& ctx) {
return ctx.stage == Stage::TessellationControl ? "[gl_InvocationID]" : "";
}
+std::string ChooseCbuf(EmitContext& ctx, const IR::Value& binding, std::string_view index) {
+ if (binding.IsImmediate()) {
+ return fmt::format("{}_cbuf{}[{}]", ctx.stage_name, binding.U32(), index);
+ } else {
+ const auto binding_var{ctx.var_alloc.Consume(binding)};
+ return fmt::format("GetCbufIndirect({},{})", binding_var, index);
+ }
+}
+
void GetCbuf(EmitContext& ctx, std::string_view ret, const IR::Value& binding,
const IR::Value& offset, u32 num_bits, std::string_view cast = {},
std::string_view bit_offset = {}) {
@@ -55,8 +64,8 @@ void GetCbuf(EmitContext& ctx, std::string_view ret, const IR::Value& binding,
const auto swizzle{is_immediate ? fmt::format(".{}", OffsetSwizzle(offset.U32()))
: fmt::format("[({}>>2)%4]", offset_var)};
- const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())};
- const auto cbuf_cast{fmt::format("{}({}[{}]{{}})", cast, cbuf, index)};
+ const auto cbuf{ChooseCbuf(ctx, binding, index)};
+ const auto cbuf_cast{fmt::format("{}({}{{}})", cast, cbuf)};
const auto extraction{num_bits == 32 ? cbuf_cast
: fmt::format("bitfieldExtract({},int({}),{})", cbuf_cast,
bit_offset, num_bits)};
@@ -140,9 +149,9 @@ void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
const IR::Value& offset) {
- const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())};
const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "" : "ftou"};
if (offset.IsImmediate()) {
+ const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())};
static constexpr u32 cbuf_size{0x10000};
const u32 u32_offset{offset.U32()};
const s32 signed_offset{static_cast<s32>(offset.U32())};
@@ -162,17 +171,17 @@ void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding
return;
}
const auto offset_var{ctx.var_alloc.Consume(offset)};
+ const auto cbuf{ChooseCbuf(ctx, binding, fmt::format("{}>>4", offset_var))};
if (!ctx.profile.has_gl_component_indexing_bug) {
- ctx.AddU32x2("{}=uvec2({}({}[{}>>4][({}>>2)%4]),{}({}[({}+4)>>4][(({}+4)>>2)%4]));", inst,
- cast, cbuf, offset_var, offset_var, cast, cbuf, offset_var, offset_var);
+ ctx.AddU32x2("{}=uvec2({}({}[({}>>2)%4]),{}({}[(({}+4)>>2)%4]));", inst, cast, cbuf,
+ offset_var, cast, cbuf, offset_var);
return;
}
const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32x2)};
const auto cbuf_offset{fmt::format("{}>>2", offset_var)};
for (u32 swizzle = 0; swizzle < 4; ++swizzle) {
- ctx.Add("if(({}&3)=={}){}=uvec2({}({}[{}>>4].{}),{}({}[({}+4)>>4].{}));", cbuf_offset,
- swizzle, ret, cast, cbuf, offset_var, "xyzw"[swizzle], cast, cbuf, offset_var,
- "xyzw"[(swizzle + 1) % 4]);
+ ctx.Add("if(({}&3)=={}){}=uvec2({}({}.{}),{}({}.{}));", cbuf_offset, swizzle, ret, cast,
+ cbuf, "xyzw"[swizzle], cast, cbuf, "xyzw"[(swizzle + 1) % 4]);
}
}
diff --git a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp
index e816a93ec..17266f40d 100644
--- a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp
+++ b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp
@@ -359,6 +359,7 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile
header += "layout(location=0) uniform vec4 scaling;";
}
DefineConstantBuffers(bindings);
+ DefineConstantBufferIndirect();
DefineStorageBuffers(bindings);
SetupImages(bindings);
SetupTextures(bindings);
@@ -436,6 +437,24 @@ void EmitContext::DefineConstantBuffers(Bindings& bindings) {
}
}
+void EmitContext::DefineConstantBufferIndirect() {
+ if (!info.uses_cbuf_indirect) {
+ return;
+ }
+
+ header += profile.has_gl_cbuf_ftou_bug ? "uvec4 " : "vec4 ";
+ header += "GetCbufIndirect(uint binding, uint offset){"
+ "switch(binding){"
+ "default:";
+
+ for (const auto& desc : info.constant_buffer_descriptors) {
+ header +=
+ fmt::format("case {}:return {}_cbuf{}[offset];", desc.index, stage_name, desc.index);
+ }
+
+ header += "}}";
+}
+
void EmitContext::DefineStorageBuffers(Bindings& bindings) {
if (info.storage_buffers_descriptors.empty()) {
return;
diff --git a/src/shader_recompiler/backend/glsl/glsl_emit_context.h b/src/shader_recompiler/backend/glsl/glsl_emit_context.h
index d9b639d29..2b13db6e6 100644
--- a/src/shader_recompiler/backend/glsl/glsl_emit_context.h
+++ b/src/shader_recompiler/backend/glsl/glsl_emit_context.h
@@ -162,6 +162,7 @@ public:
private:
void SetupExtensions();
void DefineConstantBuffers(Bindings& bindings);
+ void DefineConstantBufferIndirect();
void DefineStorageBuffers(Bindings& bindings);
void DefineGenericOutput(size_t index, u32 invocations);
void DefineHelperFunctions();
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
index 28f6a6184..9c83cd2e4 100644
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
@@ -1043,15 +1043,15 @@ void EmitContext::DefineConstantBufferIndirectFunctions(const Info& info) {
const Id merge_label{OpLabel()};
const Id uniform_type{uniform_types.*member_ptr};
- std::array<Id, Info::MAX_CBUFS> buf_labels;
- std::array<Sirit::Literal, Info::MAX_CBUFS> buf_literals;
- for (u32 i = 0; i < Info::MAX_CBUFS; i++) {
+ std::array<Id, Info::MAX_INDIRECT_CBUFS> buf_labels;
+ std::array<Sirit::Literal, Info::MAX_INDIRECT_CBUFS> buf_literals;
+ for (u32 i = 0; i < Info::MAX_INDIRECT_CBUFS; i++) {
buf_labels[i] = OpLabel();
buf_literals[i] = Sirit::Literal{i};
}
OpSelectionMerge(merge_label, spv::SelectionControlMask::MaskNone);
OpSwitch(binding, buf_labels[0], buf_literals, buf_labels);
- for (u32 i = 0; i < Info::MAX_CBUFS; i++) {
+ for (u32 i = 0; i < Info::MAX_INDIRECT_CBUFS; i++) {
AddLabel(buf_labels[i]);
const Id cbuf{cbufs[i].*member_ptr};
const Id access_chain{OpAccessChain(uniform_type, cbuf, u32_zero_value, offset)};
@@ -1064,22 +1064,23 @@ void EmitContext::DefineConstantBufferIndirectFunctions(const Info& info) {
return func;
}};
IR::Type types{info.used_indirect_cbuf_types};
- if (True(types & IR::Type::U8)) {
+ bool supports_aliasing = profile.support_descriptor_aliasing;
+ if (supports_aliasing && True(types & IR::Type::U8)) {
load_const_func_u8 = make_accessor(U8, &UniformDefinitions::U8);
}
- if (True(types & IR::Type::U16)) {
+ if (supports_aliasing && True(types & IR::Type::U16)) {
load_const_func_u16 = make_accessor(U16, &UniformDefinitions::U16);
}
- if (True(types & IR::Type::F32)) {
+ if (supports_aliasing && True(types & IR::Type::F32)) {
load_const_func_f32 = make_accessor(F32[1], &UniformDefinitions::F32);
}
- if (True(types & IR::Type::U32)) {
+ if (supports_aliasing && True(types & IR::Type::U32)) {
load_const_func_u32 = make_accessor(U32[1], &UniformDefinitions::U32);
}
- if (True(types & IR::Type::U32x2)) {
+ if (supports_aliasing && True(types & IR::Type::U32x2)) {
load_const_func_u32x2 = make_accessor(U32[2], &UniformDefinitions::U32x2);
}
- if (True(types & IR::Type::U32x4)) {
+ if (!supports_aliasing || True(types & IR::Type::U32x4)) {
load_const_func_u32x4 = make_accessor(U32[4], &UniformDefinitions::U32x4);
}
}
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
index 0b2c60842..16278faab 100644
--- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
+++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
@@ -32,13 +32,8 @@ void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) {
void AddRegisterIndexedLdc(Info& info) {
info.uses_cbuf_indirect = true;
- // The shader can use any possible constant buffer
- info.constant_buffer_mask = (1 << Info::MAX_CBUFS) - 1;
-
- auto& cbufs{info.constant_buffer_descriptors};
- cbufs.clear();
- for (u32 i = 0; i < Info::MAX_CBUFS; i++) {
- cbufs.push_back(ConstantBufferDescriptor{.index = i, .count = 1});
+ for (u32 i = 0; i < Info::MAX_INDIRECT_CBUFS; i++) {
+ AddConstantBufferDescriptor(info, i, 1);
// The shader can use any possible access size
info.constant_buffer_used_sizes[i] = 0x10'000;
diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h
index 9d36bd9eb..a3a09c71c 100644
--- a/src/shader_recompiler/shader_info.h
+++ b/src/shader_recompiler/shader_info.h
@@ -105,6 +105,7 @@ struct ImageDescriptor {
using ImageDescriptors = boost::container::small_vector<ImageDescriptor, 4>;
struct Info {
+ static constexpr size_t MAX_INDIRECT_CBUFS{14};
static constexpr size_t MAX_CBUFS{18};
static constexpr size_t MAX_SSBOS{32};
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt
index af05d47d1..190fc6aea 100644
--- a/src/video_core/host_shaders/CMakeLists.txt
+++ b/src/video_core/host_shaders/CMakeLists.txt
@@ -18,6 +18,7 @@ set(SHADER_FILES
full_screen_triangle.vert
fxaa.frag
fxaa.vert
+ opengl_convert_s8d24.comp
opengl_copy_bc4.comp
opengl_present.frag
opengl_present.vert
diff --git a/src/video_core/host_shaders/opengl_convert_s8d24.comp b/src/video_core/host_shaders/opengl_convert_s8d24.comp
new file mode 100644
index 000000000..83e1ab176
--- /dev/null
+++ b/src/video_core/host_shaders/opengl_convert_s8d24.comp
@@ -0,0 +1,18 @@
+// Copyright 2022 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#version 430 core
+
+layout(local_size_x = 16, local_size_y = 8) in;
+
+layout(binding = 0, rgba8ui) restrict uniform uimage2D destination;
+layout(location = 0) uniform uvec3 size;
+
+void main() {
+ if (any(greaterThanEqual(gl_GlobalInvocationID, size))) {
+ return;
+ }
+ uvec4 components = imageLoad(destination, ivec2(gl_GlobalInvocationID.xy));
+ imageStore(destination, ivec2(gl_GlobalInvocationID.xy), components.wxyz);
+}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index e6f9ece8b..7ab7f0c0a 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -520,6 +520,8 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
// ASSERT_MSG(image_view->size.width == config.width, "Framebuffer width is different");
// ASSERT_MSG(image_view->size.height == config.height, "Framebuffer height is different");
+ screen_info.texture.width = image_view->size.width;
+ screen_info.texture.height = image_view->size.height;
screen_info.display_texture = image_view->Handle(Shader::TextureType::Color2D);
screen_info.display_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format);
return true;
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 8f9a65beb..d12076358 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -409,8 +409,8 @@ ImageBufferMap::~ImageBufferMap() {
TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& program_manager,
StateTracker& state_tracker_)
- : device{device_}, state_tracker{state_tracker_},
- util_shaders(program_manager), resolution{Settings::values.resolution_info} {
+ : device{device_}, state_tracker{state_tracker_}, util_shaders(program_manager),
+ format_conversion_pass{util_shaders}, resolution{Settings::values.resolution_info} {
static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D};
for (size_t i = 0; i < TARGETS.size(); ++i) {
const GLenum target = TARGETS[i];
@@ -1325,6 +1325,9 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
Framebuffer::~Framebuffer() = default;
+FormatConversionPass::FormatConversionPass(UtilShaders& util_shaders_)
+ : util_shaders{util_shaders_} {}
+
void FormatConversionPass::ConvertImage(Image& dst_image, Image& src_image,
std::span<const VideoCommon::ImageCopy> copies) {
const GLenum dst_target = ImageTarget(dst_image.info);
@@ -1357,6 +1360,12 @@ void FormatConversionPass::ConvertImage(Image& dst_image, Image& src_image,
dst_origin.z, region.width, region.height, region.depth,
dst_image.GlFormat(), dst_image.GlType(), nullptr);
}
+
+ // Swap component order of S8D24 to ABGR8 reinterprets
+ if (src_image.info.format == PixelFormat::D24_UNORM_S8_UINT &&
+ dst_image.info.format == PixelFormat::A8B8G8R8_UNORM) {
+ util_shaders.ConvertS8D24(dst_image, copies);
+ }
}
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 53088b66e..672fa8dde 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -55,13 +55,14 @@ struct FormatProperties {
class FormatConversionPass {
public:
- FormatConversionPass() = default;
+ explicit FormatConversionPass(UtilShaders& util_shaders);
~FormatConversionPass() = default;
void ConvertImage(Image& dst_image, Image& src_image,
std::span<const VideoCommon::ImageCopy> copies);
private:
+ UtilShaders& util_shaders;
OGLBuffer intermediate_pbo;
size_t pbo_size{};
};
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index f8f29013a..3a3c213bb 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -208,6 +208,8 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
// Framebuffer orientation handling
framebuffer_transform_flags = framebuffer.transform_flags;
framebuffer_crop_rect = framebuffer.crop_rect;
+ framebuffer_width = framebuffer.width;
+ framebuffer_height = framebuffer.height;
const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset};
screen_info.was_accelerated =
@@ -480,9 +482,12 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
ASSERT_MSG(framebuffer_crop_rect.top == 0, "Unimplemented");
ASSERT_MSG(framebuffer_crop_rect.left == 0, "Unimplemented");
+ f32 scale_u = static_cast<f32>(framebuffer_width) / static_cast<f32>(screen_info.texture.width);
+ f32 scale_v =
+ static_cast<f32>(framebuffer_height) / static_cast<f32>(screen_info.texture.height);
+
// Scale the output by the crop width/height. This is commonly used with 1280x720 rendering
// (e.g. handheld mode) on a 1920x1080 framebuffer.
- f32 scale_u = 1.f, scale_v = 1.f;
if (framebuffer_crop_rect.GetWidth() > 0) {
scale_u = static_cast<f32>(framebuffer_crop_rect.GetWidth()) /
static_cast<f32>(screen_info.texture.width);
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index aa206878b..ae9558a33 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -137,6 +137,8 @@ private:
/// Used for transforming the framebuffer orientation
Service::android::BufferTransformFlags framebuffer_transform_flags{};
Common::Rectangle<int> framebuffer_crop_rect;
+ u32 framebuffer_width;
+ u32 framebuffer_height;
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp
index 897c380b3..04c482a09 100644
--- a/src/video_core/renderer_opengl/util_shaders.cpp
+++ b/src/video_core/renderer_opengl/util_shaders.cpp
@@ -13,6 +13,7 @@
#include "video_core/host_shaders/astc_decoder_comp.h"
#include "video_core/host_shaders/block_linear_unswizzle_2d_comp.h"
#include "video_core/host_shaders/block_linear_unswizzle_3d_comp.h"
+#include "video_core/host_shaders/opengl_convert_s8d24_comp.h"
#include "video_core/host_shaders/opengl_copy_bc4_comp.h"
#include "video_core/host_shaders/pitch_unswizzle_comp.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
@@ -50,7 +51,8 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_)
block_linear_unswizzle_2d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_2D_COMP)),
block_linear_unswizzle_3d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_3D_COMP)),
pitch_unswizzle_program(MakeProgram(PITCH_UNSWIZZLE_COMP)),
- copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)) {
+ copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)),
+ convert_s8d24_program(MakeProgram(OPENGL_CONVERT_S8D24_COMP)) {
const auto swizzle_table = Tegra::Texture::MakeSwizzleTable();
swizzle_table_buffer.Create();
glNamedBufferStorage(swizzle_table_buffer.handle, sizeof(swizzle_table), &swizzle_table, 0);
@@ -248,6 +250,26 @@ void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const Im
program_manager.RestoreGuestCompute();
}
+void UtilShaders::ConvertS8D24(Image& dst_image, std::span<const ImageCopy> copies) {
+ static constexpr GLuint BINDING_DESTINATION = 0;
+ static constexpr GLuint LOC_SIZE = 0;
+
+ program_manager.BindComputeProgram(convert_s8d24_program.handle);
+ for (const ImageCopy& copy : copies) {
+ ASSERT(copy.src_subresource.base_layer == 0);
+ ASSERT(copy.src_subresource.num_layers == 1);
+ ASSERT(copy.dst_subresource.base_layer == 0);
+ ASSERT(copy.dst_subresource.num_layers == 1);
+
+ glUniform3ui(LOC_SIZE, copy.extent.width, copy.extent.height, copy.extent.depth);
+ glBindImageTexture(BINDING_DESTINATION, dst_image.StorageHandle(),
+ copy.dst_subresource.base_level, GL_TRUE, 0, GL_READ_WRITE, GL_RGBA8UI);
+ glDispatchCompute(Common::DivCeil(copy.extent.width, 16u),
+ Common::DivCeil(copy.extent.height, 8u), copy.extent.depth);
+ }
+ program_manager.RestoreGuestCompute();
+}
+
GLenum StoreFormat(u32 bytes_per_block) {
switch (bytes_per_block) {
case 1:
diff --git a/src/video_core/renderer_opengl/util_shaders.h b/src/video_core/renderer_opengl/util_shaders.h
index 5de95ea7a..5c132e67f 100644
--- a/src/video_core/renderer_opengl/util_shaders.h
+++ b/src/video_core/renderer_opengl/util_shaders.h
@@ -39,6 +39,8 @@ public:
void CopyBC4(Image& dst_image, Image& src_image,
std::span<const VideoCommon::ImageCopy> copies);
+ void ConvertS8D24(Image& dst_image, std::span<const VideoCommon::ImageCopy> copies);
+
private:
ProgramManager& program_manager;
@@ -49,6 +51,7 @@ private:
OGLProgram block_linear_unswizzle_3d_program;
OGLProgram pitch_unswizzle_program;
OGLProgram copy_bc4_program;
+ OGLProgram convert_s8d24_program;
};
GLenum StoreFormat(u32 bytes_per_block);
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
index d893c1952..b866e9103 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -1406,8 +1406,9 @@ void VKBlitScreen::SetVertexData(BufferData& data, const Tegra::FramebufferConfi
UNIMPLEMENTED_IF(framebuffer_crop_rect.top != 0);
UNIMPLEMENTED_IF(framebuffer_crop_rect.left != 0);
- f32 scale_u = 1.0f;
- f32 scale_v = 1.0f;
+ f32 scale_u = static_cast<f32>(framebuffer.width) / static_cast<f32>(screen_info.width);
+ f32 scale_v = static_cast<f32>(framebuffer.height) / static_cast<f32>(screen_info.height);
+
// Scale the output by the crop width/height. This is commonly used with 1280x720 rendering
// (e.g. handheld mode) on a 1920x1080 framebuffer.
if (!fsr) {