summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.ci/templates/build-standard.yml2
-rw-r--r--.ci/templates/build-testing.yml2
-rw-r--r--.ci/templates/release.yml29
-rw-r--r--README.md3
-rw-r--r--src/core/arm/unicorn/arm_unicorn.cpp9
-rw-r--r--src/core/core.h4
-rw-r--r--src/core/hle/kernel/process.cpp29
-rw-r--r--src/core/hle/kernel/process.h11
-rw-r--r--src/core/hle/kernel/svc.cpp12
-rw-r--r--src/video_core/dma_pusher.cpp2
-rw-r--r--src/video_core/engines/kepler_compute.cpp2
-rw-r--r--src/video_core/engines/kepler_memory.cpp2
-rw-r--r--src/video_core/engines/maxwell_3d.cpp261
-rw-r--r--src/video_core/engines/maxwell_3d.h89
-rw-r--r--src/video_core/engines/maxwell_dma.cpp2
-rw-r--r--src/video_core/engines/shader_bytecode.h26
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp178
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h6
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp16
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp41
-rw-r--r--src/video_core/renderer_opengl/gl_state.h33
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp6
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp7
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp17
-rw-r--r--src/video_core/shader/decode.cpp8
-rw-r--r--src/video_core/shader/decode/half_set_predicate.cpp69
-rw-r--r--src/video_core/shader/decode/image.cpp6
-rw-r--r--src/video_core/shader/decode/memory.cpp37
-rw-r--r--src/video_core/shader/decode/other.cpp2
-rw-r--r--src/video_core/shader/decode/texture.cpp34
-rw-r--r--src/video_core/shader/decode/xmad.cpp12
-rw-r--r--src/video_core/shader/node.h3
-rw-r--r--src/video_core/shader/node_helper.cpp2
-rw-r--r--src/video_core/shader/shader_ir.cpp118
-rw-r--r--src/video_core/shader/shader_ir.h13
-rw-r--r--src/video_core/shader/track.cpp35
-rw-r--r--src/video_core/texture_cache/texture_cache.h13
38 files changed, 778 insertions, 365 deletions
diff --git a/.ci/templates/build-standard.yml b/.ci/templates/build-standard.yml
index 9975f5c49..6cd209dbf 100644
--- a/.ci/templates/build-standard.yml
+++ b/.ci/templates/build-standard.yml
@@ -3,7 +3,7 @@ jobs:
displayName: 'standard'
pool:
vmImage: ubuntu-latest
- strategy:
+ strategy:
maxParallel: 10
matrix:
windows:
diff --git a/.ci/templates/build-testing.yml b/.ci/templates/build-testing.yml
index 101e52996..278efb6f5 100644
--- a/.ci/templates/build-testing.yml
+++ b/.ci/templates/build-testing.yml
@@ -3,7 +3,7 @@ jobs:
displayName: 'testing'
pool:
vmImage: ubuntu-latest
- strategy:
+ strategy:
maxParallel: 10
matrix:
windows:
diff --git a/.ci/templates/release.yml b/.ci/templates/release.yml
deleted file mode 100644
index 60bebd2aa..000000000
--- a/.ci/templates/release.yml
+++ /dev/null
@@ -1,29 +0,0 @@
-steps:
- - task: DownloadPipelineArtifact@2
- displayName: 'Download Windows Release'
- inputs:
- artifactName: 'yuzu-$(BuildName)-windows-mingw'
- buildType: 'current'
- targetPath: '$(Build.ArtifactStagingDirectory)'
- - task: DownloadPipelineArtifact@2
- displayName: 'Download Linux Release'
- inputs:
- artifactName: 'yuzu-$(BuildName)-linux'
- buildType: 'current'
- targetPath: '$(Build.ArtifactStagingDirectory)'
- - task: DownloadPipelineArtifact@2
- displayName: 'Download Release Point'
- inputs:
- artifactName: 'yuzu-$(BuildName)-release-point'
- buildType: 'current'
- targetPath: '$(Build.ArtifactStagingDirectory)'
- - script: echo '##vso[task.setvariable variable=tagcommit]' && cat $(Build.ArtifactStagingDirectory)/tag-commit.sha
- displayName: 'Calculate Release Point'
- - task: GitHubRelease@0
- inputs:
- gitHubConnection: $(GitHubReleaseConnectionName)
- repositoryName: '$(GitHubReleaseRepoName)'
- action: 'create'
- target: $(variables.tagcommit)
- title: 'yuzu $(BuildName) #$(Build.BuildId)'
- assets: '$(Build.ArtifactStagingDirectory)/*'
diff --git a/README.md b/README.md
index 4b1ea7d7c..32b889063 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,8 @@
-yuzu emulator
+yuzu emulator
=============
[![Travis CI Build Status](https://travis-ci.org/yuzu-emu/yuzu.svg?branch=master)](https://travis-ci.org/yuzu-emu/yuzu)
[![AppVeyor CI Build Status](https://ci.appveyor.com/api/projects/status/77k97svb2usreu68?svg=true)](https://ci.appveyor.com/project/bunnei/yuzu)
+[![Azure Mainline CI Build Status](https://dev.azure.com/yuzu-emu/yuzu/_apis/build/status/yuzu%20mainline?branchName=master)](https://dev.azure.com/yuzu-emu/yuzu/)
yuzu is an experimental open-source emulator for the Nintendo Switch from the creators of [Citra](https://citra-emu.org/).
diff --git a/src/core/arm/unicorn/arm_unicorn.cpp b/src/core/arm/unicorn/arm_unicorn.cpp
index b0ee7821a..97d5c2a8a 100644
--- a/src/core/arm/unicorn/arm_unicorn.cpp
+++ b/src/core/arm/unicorn/arm_unicorn.cpp
@@ -50,11 +50,14 @@ static void CodeHook(uc_engine* uc, uint64_t address, uint32_t size, void* user_
static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int size, u64 value,
void* user_data) {
+ auto* const system = static_cast<System*>(user_data);
+
ARM_Interface::ThreadContext ctx{};
- Core::CurrentArmInterface().SaveContext(ctx);
+ system->CurrentArmInterface().SaveContext(ctx);
ASSERT_MSG(false, "Attempted to read from unmapped memory: 0x{:X}, pc=0x{:X}, lr=0x{:X}", addr,
ctx.pc, ctx.cpu_registers[30]);
- return {};
+
+ return false;
}
ARM_Unicorn::ARM_Unicorn(System& system) : system{system} {
@@ -65,7 +68,7 @@ ARM_Unicorn::ARM_Unicorn(System& system) : system{system} {
uc_hook hook{};
CHECKED(uc_hook_add(uc, &hook, UC_HOOK_INTR, (void*)InterruptHook, this, 0, -1));
- CHECKED(uc_hook_add(uc, &hook, UC_HOOK_MEM_INVALID, (void*)UnmappedMemoryHook, this, 0, -1));
+ CHECKED(uc_hook_add(uc, &hook, UC_HOOK_MEM_INVALID, (void*)UnmappedMemoryHook, &system, 0, -1));
if (GDBStub::IsServerEnabled()) {
CHECKED(uc_hook_add(uc, &hook, UC_HOOK_CODE, (void*)CodeHook, this, 0, -1));
last_bkpt_hit = false;
diff --git a/src/core/core.h b/src/core/core.h
index 11e73278e..8ebb385ac 100644
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -327,10 +327,6 @@ private:
static System s_instance;
};
-inline ARM_Interface& CurrentArmInterface() {
- return System::GetInstance().CurrentArmInterface();
-}
-
inline Kernel::Process* CurrentProcess() {
return System::GetInstance().CurrentProcess();
}
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index db3ab14ce..92169a97b 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -184,19 +184,11 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata) {
}
void Process::Run(s32 main_thread_priority, u64 stack_size) {
- // The kernel always ensures that the given stack size is page aligned.
- main_thread_stack_size = Common::AlignUp(stack_size, Memory::PAGE_SIZE);
-
- // Allocate and map the main thread stack
- // TODO(bunnei): This is heap area that should be allocated by the kernel and not mapped as part
- // of the user address space.
- const VAddr mapping_address = vm_manager.GetTLSIORegionEndAddress() - main_thread_stack_size;
- vm_manager
- .MapMemoryBlock(mapping_address, std::make_shared<std::vector<u8>>(main_thread_stack_size),
- 0, main_thread_stack_size, MemoryState::Stack)
- .Unwrap();
+ AllocateMainThreadStack(stack_size);
+ tls_region_address = CreateTLSRegion();
vm_manager.LogLayout();
+
ChangeStatus(ProcessStatus::Running);
SetupMainThread(*this, kernel, main_thread_priority);
@@ -226,6 +218,9 @@ void Process::PrepareForTermination() {
stop_threads(system.Scheduler(2).GetThreadList());
stop_threads(system.Scheduler(3).GetThreadList());
+ FreeTLSRegion(tls_region_address);
+ tls_region_address = 0;
+
ChangeStatus(ProcessStatus::Exited);
}
@@ -325,4 +320,16 @@ void Process::ChangeStatus(ProcessStatus new_status) {
WakeupAllWaitingThreads();
}
+void Process::AllocateMainThreadStack(u64 stack_size) {
+ // The kernel always ensures that the given stack size is page aligned.
+ main_thread_stack_size = Common::AlignUp(stack_size, Memory::PAGE_SIZE);
+
+ // Allocate and map the main thread stack
+ const VAddr mapping_address = vm_manager.GetTLSIORegionEndAddress() - main_thread_stack_size;
+ vm_manager
+ .MapMemoryBlock(mapping_address, std::make_shared<std::vector<u8>>(main_thread_stack_size),
+ 0, main_thread_stack_size, MemoryState::Stack)
+ .Unwrap();
+}
+
} // namespace Kernel
diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h
index 3196014da..c2df451f3 100644
--- a/src/core/hle/kernel/process.h
+++ b/src/core/hle/kernel/process.h
@@ -135,6 +135,11 @@ public:
return mutex;
}
+ /// Gets the address to the process' dedicated TLS region.
+ VAddr GetTLSRegionAddress() const {
+ return tls_region_address;
+ }
+
/// Gets the current status of the process
ProcessStatus GetStatus() const {
return status;
@@ -296,6 +301,9 @@ private:
/// a process signal.
void ChangeStatus(ProcessStatus new_status);
+ /// Allocates the main thread stack for the process, given the stack size in bytes.
+ void AllocateMainThreadStack(u64 stack_size);
+
/// Memory manager for this process.
Kernel::VMManager vm_manager;
@@ -358,6 +366,9 @@ private:
/// variable related facilities.
Mutex mutex;
+ /// Address indicating the location of the process' dedicated TLS region.
+ VAddr tls_region_address = 0;
+
/// Random values for svcGetInfo RandomEntropy
std::array<u64, RANDOM_ENTROPY_SIZE> random_entropy{};
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index a46eed3da..1fd1a732a 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -843,9 +843,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
return RESULT_SUCCESS;
case GetInfoType::UserExceptionContextAddr:
- LOG_WARNING(Kernel_SVC,
- "(STUBBED) Attempted to query user exception context address, returned 0");
- *result = 0;
+ *result = process->GetTLSRegionAddress();
return RESULT_SUCCESS;
case GetInfoType::TotalPhysicalMemoryAvailableWithoutSystemResource:
@@ -1739,8 +1737,8 @@ static ResultCode SignalProcessWideKey(Core::System& system, VAddr condition_var
// Wait for an address (via Address Arbiter)
static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type, s32 value,
s64 timeout) {
- LOG_WARNING(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, timeout={}",
- address, type, value, timeout);
+ LOG_TRACE(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, timeout={}", address,
+ type, value, timeout);
// If the passed address is a kernel virtual address, return invalid memory state.
if (Memory::IsKernelVirtualAddress(address)) {
@@ -1762,8 +1760,8 @@ static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type,
// Signals to an address (via Address Arbiter)
static ResultCode SignalToAddress(Core::System& system, VAddr address, u32 type, s32 value,
s32 num_to_wake) {
- LOG_WARNING(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, num_to_wake=0x{:X}",
- address, type, value, num_to_wake);
+ LOG_TRACE(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, num_to_wake=0x{:X}",
+ address, type, value, num_to_wake);
// If the passed address is a kernel virtual address, return invalid memory state.
if (Memory::IsKernelVirtualAddress(address)) {
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index 3175579cc..bd036cbe8 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -22,7 +22,7 @@ void DmaPusher::DispatchCalls() {
MICROPROFILE_SCOPE(DispatchCalls);
// On entering GPU code, assume all memory may be touched by the ARM core.
- gpu.Maxwell3D().dirty_flags.OnMemoryWrite();
+ gpu.Maxwell3D().dirty.OnMemoryWrite();
dma_pushbuffer_subindex = 0;
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index 089465a71..08586d33c 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -37,7 +37,7 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) {
const bool is_last_call = method_call.IsLastCall();
upload_state.ProcessData(method_call.argument, is_last_call);
if (is_last_call) {
- system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
+ system.GPU().Maxwell3D().dirty.OnMemoryWrite();
}
break;
}
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index 0561f676c..44279de00 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -34,7 +34,7 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) {
const bool is_last_call = method_call.IsLastCall();
upload_state.ProcessData(method_call.argument, is_last_call);
if (is_last_call) {
- system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
+ system.GPU().Maxwell3D().dirty.OnMemoryWrite();
}
break;
}
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 8755b8af4..fe9fc0278 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -22,6 +22,7 @@ Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& raste
MemoryManager& memory_manager)
: system{system}, rasterizer{rasterizer}, memory_manager{memory_manager},
macro_interpreter{*this}, upload_state{memory_manager, regs.upload} {
+ InitDirtySettings();
InitializeRegisterDefaults();
}
@@ -69,6 +70,10 @@ void Maxwell3D::InitializeRegisterDefaults() {
regs.stencil_back_func_mask = 0xFFFFFFFF;
regs.stencil_back_mask = 0xFFFFFFFF;
+ regs.depth_test_func = Regs::ComparisonOp::Always;
+ regs.cull.front_face = Regs::Cull::FrontFace::CounterClockWise;
+ regs.cull.cull_face = Regs::Cull::CullFace::Back;
+
// TODO(Rodrigo): Most games do not set a point size. I think this is a case of a
// register carrying a default value. Assume it's OpenGL's default (1).
regs.point_size = 1.0f;
@@ -86,6 +91,159 @@ void Maxwell3D::InitializeRegisterDefaults() {
regs.rt_separate_frag_data = 1;
}
+#define DIRTY_REGS_POS(field_name) (offsetof(Maxwell3D::DirtyRegs, field_name))
+
+void Maxwell3D::InitDirtySettings() {
+ const auto set_block = [this](const u32 start, const u32 range, const u8 position) {
+ const auto start_itr = dirty_pointers.begin() + start;
+ const auto end_itr = start_itr + range;
+ std::fill(start_itr, end_itr, position);
+ };
+ dirty.regs.fill(true);
+
+ // Init Render Targets
+ constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32);
+ constexpr u32 rt_start_reg = MAXWELL3D_REG_INDEX(rt);
+ constexpr u32 rt_end_reg = rt_start_reg + registers_per_rt * 8;
+ u32 rt_dirty_reg = DIRTY_REGS_POS(render_target);
+ for (u32 rt_reg = rt_start_reg; rt_reg < rt_end_reg; rt_reg += registers_per_rt) {
+ set_block(rt_reg, registers_per_rt, rt_dirty_reg);
+ rt_dirty_reg++;
+ }
+ constexpr u32 depth_buffer_flag = DIRTY_REGS_POS(depth_buffer);
+ dirty_pointers[MAXWELL3D_REG_INDEX(zeta_enable)] = depth_buffer_flag;
+ dirty_pointers[MAXWELL3D_REG_INDEX(zeta_width)] = depth_buffer_flag;
+ dirty_pointers[MAXWELL3D_REG_INDEX(zeta_height)] = depth_buffer_flag;
+ constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32);
+ constexpr u32 zeta_reg = MAXWELL3D_REG_INDEX(zeta);
+ set_block(zeta_reg, registers_in_zeta, depth_buffer_flag);
+
+ // Init Vertex Arrays
+ constexpr u32 vertex_array_start = MAXWELL3D_REG_INDEX(vertex_array);
+ constexpr u32 vertex_array_size = sizeof(regs.vertex_array[0]) / sizeof(u32);
+ constexpr u32 vertex_array_end = vertex_array_start + vertex_array_size * Regs::NumVertexArrays;
+ u32 va_reg = DIRTY_REGS_POS(vertex_array);
+ u32 vi_reg = DIRTY_REGS_POS(vertex_instance);
+ for (u32 vertex_reg = vertex_array_start; vertex_reg < vertex_array_end;
+ vertex_reg += vertex_array_size) {
+ set_block(vertex_reg, 3, va_reg);
+ // The divisor concerns vertex array instances
+ dirty_pointers[vertex_reg + 3] = vi_reg;
+ va_reg++;
+ vi_reg++;
+ }
+ constexpr u32 vertex_limit_start = MAXWELL3D_REG_INDEX(vertex_array_limit);
+ constexpr u32 vertex_limit_size = sizeof(regs.vertex_array_limit[0]) / sizeof(u32);
+ constexpr u32 vertex_limit_end = vertex_limit_start + vertex_limit_size * Regs::NumVertexArrays;
+ va_reg = DIRTY_REGS_POS(vertex_array);
+ for (u32 vertex_reg = vertex_limit_start; vertex_reg < vertex_limit_end;
+ vertex_reg += vertex_limit_size) {
+ set_block(vertex_reg, vertex_limit_size, va_reg);
+ va_reg++;
+ }
+ constexpr u32 vertex_instance_start = MAXWELL3D_REG_INDEX(instanced_arrays);
+ constexpr u32 vertex_instance_size =
+ sizeof(regs.instanced_arrays.is_instanced[0]) / sizeof(u32);
+ constexpr u32 vertex_instance_end =
+ vertex_instance_start + vertex_instance_size * Regs::NumVertexArrays;
+ vi_reg = DIRTY_REGS_POS(vertex_instance);
+ for (u32 vertex_reg = vertex_instance_start; vertex_reg < vertex_instance_end;
+ vertex_reg += vertex_instance_size) {
+ set_block(vertex_reg, vertex_instance_size, vi_reg);
+ vi_reg++;
+ }
+ set_block(MAXWELL3D_REG_INDEX(vertex_attrib_format), regs.vertex_attrib_format.size(),
+ DIRTY_REGS_POS(vertex_attrib_format));
+
+ // Init Shaders
+ constexpr u32 shader_registers_count =
+ sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32);
+ set_block(MAXWELL3D_REG_INDEX(shader_config[0]), shader_registers_count,
+ DIRTY_REGS_POS(shaders));
+
+ // State
+
+ // Viewport
+ constexpr u32 viewport_dirty_reg = DIRTY_REGS_POS(viewport);
+ constexpr u32 viewport_start = MAXWELL3D_REG_INDEX(viewports);
+ constexpr u32 viewport_size = sizeof(regs.viewports) / sizeof(u32);
+ set_block(viewport_start, viewport_size, viewport_dirty_reg);
+ constexpr u32 view_volume_start = MAXWELL3D_REG_INDEX(view_volume_clip_control);
+ constexpr u32 view_volume_size = sizeof(regs.view_volume_clip_control) / sizeof(u32);
+ set_block(view_volume_start, view_volume_size, viewport_dirty_reg);
+
+ // Viewport transformation
+ constexpr u32 viewport_trans_start = MAXWELL3D_REG_INDEX(viewport_transform);
+ constexpr u32 viewport_trans_size = sizeof(regs.viewport_transform) / sizeof(u32);
+ set_block(viewport_trans_start, viewport_trans_size, DIRTY_REGS_POS(viewport_transform));
+
+ // Cullmode
+ constexpr u32 cull_mode_start = MAXWELL3D_REG_INDEX(cull);
+ constexpr u32 cull_mode_size = sizeof(regs.cull) / sizeof(u32);
+ set_block(cull_mode_start, cull_mode_size, DIRTY_REGS_POS(cull_mode));
+
+ // Screen y control
+ dirty_pointers[MAXWELL3D_REG_INDEX(screen_y_control)] = DIRTY_REGS_POS(screen_y_control);
+
+ // Primitive Restart
+ constexpr u32 primitive_restart_start = MAXWELL3D_REG_INDEX(primitive_restart);
+ constexpr u32 primitive_restart_size = sizeof(regs.primitive_restart) / sizeof(u32);
+ set_block(primitive_restart_start, primitive_restart_size, DIRTY_REGS_POS(primitive_restart));
+
+ // Depth Test
+ constexpr u32 depth_test_dirty_reg = DIRTY_REGS_POS(depth_test);
+ dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_enable)] = depth_test_dirty_reg;
+ dirty_pointers[MAXWELL3D_REG_INDEX(depth_write_enabled)] = depth_test_dirty_reg;
+ dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_func)] = depth_test_dirty_reg;
+
+ // Stencil Test
+ constexpr u32 stencil_test_dirty_reg = DIRTY_REGS_POS(stencil_test);
+ dirty_pointers[MAXWELL3D_REG_INDEX(stencil_enable)] = stencil_test_dirty_reg;
+ dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_func)] = stencil_test_dirty_reg;
+ dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_ref)] = stencil_test_dirty_reg;
+ dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_mask)] = stencil_test_dirty_reg;
+ dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_fail)] = stencil_test_dirty_reg;
+ dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_zfail)] = stencil_test_dirty_reg;
+ dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_zpass)] = stencil_test_dirty_reg;
+ dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_mask)] = stencil_test_dirty_reg;
+ dirty_pointers[MAXWELL3D_REG_INDEX(stencil_two_side_enable)] = stencil_test_dirty_reg;
+ dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_func)] = stencil_test_dirty_reg;
+ dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_ref)] = stencil_test_dirty_reg;
+ dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_mask)] = stencil_test_dirty_reg;
+ dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_fail)] = stencil_test_dirty_reg;
+ dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_zfail)] = stencil_test_dirty_reg;
+ dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_zpass)] = stencil_test_dirty_reg;
+ dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_mask)] = stencil_test_dirty_reg;
+
+ // Color Mask
+ constexpr u32 color_mask_dirty_reg = DIRTY_REGS_POS(color_mask);
+ dirty_pointers[MAXWELL3D_REG_INDEX(color_mask_common)] = color_mask_dirty_reg;
+ set_block(MAXWELL3D_REG_INDEX(color_mask), sizeof(regs.color_mask) / sizeof(u32),
+ color_mask_dirty_reg);
+ // Blend State
+ constexpr u32 blend_state_dirty_reg = DIRTY_REGS_POS(blend_state);
+ set_block(MAXWELL3D_REG_INDEX(blend_color), sizeof(regs.blend_color) / sizeof(u32),
+ blend_state_dirty_reg);
+ dirty_pointers[MAXWELL3D_REG_INDEX(independent_blend_enable)] = blend_state_dirty_reg;
+ set_block(MAXWELL3D_REG_INDEX(blend), sizeof(regs.blend) / sizeof(u32), blend_state_dirty_reg);
+ set_block(MAXWELL3D_REG_INDEX(independent_blend), sizeof(regs.independent_blend) / sizeof(u32),
+ blend_state_dirty_reg);
+
+ // Scissor State
+ constexpr u32 scissor_test_dirty_reg = DIRTY_REGS_POS(scissor_test);
+ set_block(MAXWELL3D_REG_INDEX(scissor_test), sizeof(regs.scissor_test) / sizeof(u32),
+ scissor_test_dirty_reg);
+
+ // Polygon Offset
+ constexpr u32 polygon_offset_dirty_reg = DIRTY_REGS_POS(polygon_offset);
+ dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_fill_enable)] = polygon_offset_dirty_reg;
+ dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_line_enable)] = polygon_offset_dirty_reg;
+ dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_point_enable)] = polygon_offset_dirty_reg;
+ dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_units)] = polygon_offset_dirty_reg;
+ dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_factor)] = polygon_offset_dirty_reg;
+ dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_clamp)] = polygon_offset_dirty_reg;
+}
+
void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) {
// Reset the current macro.
executing_macro = 0;
@@ -108,6 +266,14 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
const u32 method = method_call.method;
+ if (method == cb_data_state.current) {
+ regs.reg_array[method] = method_call.argument;
+ ProcessCBData(method_call.argument);
+ return;
+ } else if (cb_data_state.current != null_cb_data) {
+ FinishCBData();
+ }
+
// It is an error to write to a register other than the current macro's ARG register before it
// has finished execution.
if (executing_macro != 0) {
@@ -143,49 +309,19 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
if (regs.reg_array[method] != method_call.argument) {
regs.reg_array[method] = method_call.argument;
- // Color buffers
- constexpr u32 first_rt_reg = MAXWELL3D_REG_INDEX(rt);
- constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32);
- if (method >= first_rt_reg &&
- method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) {
- const std::size_t rt_index = (method - first_rt_reg) / registers_per_rt;
- dirty_flags.color_buffer.set(rt_index);
- }
-
- // Zeta buffer
- constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32);
- if (method == MAXWELL3D_REG_INDEX(zeta_enable) ||
- method == MAXWELL3D_REG_INDEX(zeta_width) ||
- method == MAXWELL3D_REG_INDEX(zeta_height) ||
- (method >= MAXWELL3D_REG_INDEX(zeta) &&
- method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) {
- dirty_flags.zeta_buffer = true;
- }
-
- // Shader
- constexpr u32 shader_registers_count =
- sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32);
- if (method >= MAXWELL3D_REG_INDEX(shader_config[0]) &&
- method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) {
- dirty_flags.shaders = true;
- }
-
- // Vertex format
- if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) &&
- method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {
- dirty_flags.vertex_attrib_format = true;
- }
-
- // Vertex buffer
- if (method >= MAXWELL3D_REG_INDEX(vertex_array) &&
- method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * Regs::NumVertexArrays) {
- dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2);
- } else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) &&
- method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * Regs::NumVertexArrays) {
- dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1);
- } else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) &&
- method < MAXWELL3D_REG_INDEX(instanced_arrays) + Regs::NumVertexArrays) {
- dirty_flags.vertex_array.set(method - MAXWELL3D_REG_INDEX(instanced_arrays));
+ const std::size_t dirty_reg = dirty_pointers[method];
+ if (dirty_reg) {
+ dirty.regs[dirty_reg] = true;
+ if (dirty_reg >= DIRTY_REGS_POS(vertex_array) &&
+ dirty_reg < DIRTY_REGS_POS(vertex_array_buffers)) {
+ dirty.vertex_array_buffers = true;
+ } else if (dirty_reg >= DIRTY_REGS_POS(vertex_instance) &&
+ dirty_reg < DIRTY_REGS_POS(vertex_instances)) {
+ dirty.vertex_instances = true;
+ } else if (dirty_reg >= DIRTY_REGS_POS(render_target) &&
+ dirty_reg < DIRTY_REGS_POS(render_settings)) {
+ dirty.render_settings = true;
+ }
}
}
@@ -214,7 +350,7 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]):
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]):
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]): {
- ProcessCBData(method_call.argument);
+ StartCBData(method);
break;
}
case MAXWELL3D_REG_INDEX(cb_bind[0].raw_config): {
@@ -261,7 +397,7 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
const bool is_last_call = method_call.IsLastCall();
upload_state.ProcessData(method_call.argument, is_last_call);
if (is_last_call) {
- dirty_flags.OnMemoryWrite();
+ dirty.OnMemoryWrite();
}
break;
}
@@ -333,7 +469,6 @@ void Maxwell3D::ProcessQueryGet() {
query_result.timestamp = system.CoreTiming().GetTicks();
memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result));
}
- dirty_flags.OnMemoryWrite();
break;
}
default:
@@ -405,23 +540,39 @@ void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) {
}
void Maxwell3D::ProcessCBData(u32 value) {
+ const u32 id = cb_data_state.id;
+ cb_data_state.buffer[id][cb_data_state.counter] = value;
+ // Increment the current buffer position.
+ regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4;
+ cb_data_state.counter++;
+}
+
+void Maxwell3D::StartCBData(u32 method) {
+ constexpr u32 first_cb_data = MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]);
+ cb_data_state.start_pos = regs.const_buffer.cb_pos;
+ cb_data_state.id = method - first_cb_data;
+ cb_data_state.current = method;
+ cb_data_state.counter = 0;
+ ProcessCBData(regs.const_buffer.cb_data[cb_data_state.id]);
+}
+
+void Maxwell3D::FinishCBData() {
// Write the input value to the current const buffer at the current position.
const GPUVAddr buffer_address = regs.const_buffer.BufferAddress();
ASSERT(buffer_address != 0);
// Don't allow writing past the end of the buffer.
- ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size);
+ ASSERT(regs.const_buffer.cb_pos <= regs.const_buffer.cb_size);
- const GPUVAddr address{buffer_address + regs.const_buffer.cb_pos};
+ const GPUVAddr address{buffer_address + cb_data_state.start_pos};
+ const std::size_t size = regs.const_buffer.cb_pos - cb_data_state.start_pos;
- u8* ptr{memory_manager.GetPointer(address)};
- rasterizer.InvalidateRegion(ToCacheAddr(ptr), sizeof(u32));
- memory_manager.Write<u32>(address, value);
+ const u32 id = cb_data_state.id;
+ memory_manager.WriteBlock(address, cb_data_state.buffer[id].data(), size);
+ dirty.OnMemoryWrite();
- dirty_flags.OnMemoryWrite();
-
- // Increment the current buffer position.
- regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4;
+ cb_data_state.id = null_cb_data;
+ cb_data_state.current = null_cb_data;
}
Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 8d15c8a48..ac300bf76 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -1124,23 +1124,77 @@ public:
State state{};
- struct DirtyFlags {
- std::bitset<8> color_buffer{0xFF};
- std::bitset<32> vertex_array{0xFFFFFFFF};
+ struct DirtyRegs {
+ static constexpr std::size_t NUM_REGS = 256;
+ union {
+ struct {
+ bool null_dirty;
+
+ // Vertex Attributes
+ bool vertex_attrib_format;
+
+ // Vertex Arrays
+ std::array<bool, 32> vertex_array;
+
+ bool vertex_array_buffers;
+
+ // Vertex Instances
+ std::array<bool, 32> vertex_instance;
+
+ bool vertex_instances;
+
+ // Render Targets
+ std::array<bool, 8> render_target;
+ bool depth_buffer;
+
+ bool render_settings;
+
+ // Shaders
+ bool shaders;
+
+ // Rasterizer State
+ bool viewport;
+ bool clip_coefficient;
+ bool cull_mode;
+ bool primitive_restart;
+ bool depth_test;
+ bool stencil_test;
+ bool blend_state;
+ bool scissor_test;
+ bool transform_feedback;
+ bool color_mask;
+ bool polygon_offset;
- bool vertex_attrib_format = true;
- bool zeta_buffer = true;
- bool shaders = true;
+ // Complementary
+ bool viewport_transform;
+ bool screen_y_control;
+
+ bool memory_general;
+ };
+ std::array<bool, NUM_REGS> regs;
+ };
+
+ void ResetVertexArrays() {
+ vertex_array.fill(true);
+ vertex_array_buffers = true;
+ }
+
+ void ResetRenderTargets() {
+ depth_buffer = true;
+ render_target.fill(true);
+ render_settings = true;
+ }
void OnMemoryWrite() {
- zeta_buffer = true;
shaders = true;
- color_buffer.set();
- vertex_array.set();
+ memory_general = true;
+ ResetRenderTargets();
+ ResetVertexArrays();
}
- };
- DirtyFlags dirty_flags;
+ } dirty{};
+
+ std::array<u8, Regs::NUM_REGS> dirty_pointers{};
/// Reads a register value located at the input method address
u32 GetRegisterValue(u32 method) const;
@@ -1192,6 +1246,15 @@ private:
/// Interpreter for the macro codes uploaded to the GPU.
MacroInterpreter macro_interpreter;
+ static constexpr u32 null_cb_data = 0xFFFFFFFF;
+ struct {
+ std::array<std::array<u32, 0x4000>, 16> buffer;
+ u32 current{null_cb_data};
+ u32 id{null_cb_data};
+ u32 start_pos{};
+ u32 counter{};
+ } cb_data_state;
+
Upload::State upload_state;
/// Retrieves information about a specific TIC entry from the TIC buffer.
@@ -1200,6 +1263,8 @@ private:
/// Retrieves information about a specific TSC entry from the TSC buffer.
Texture::TSCEntry GetTSCEntry(u32 tsc_index) const;
+ void InitDirtySettings();
+
/**
* Call a macro on this engine.
* @param method Method to call
@@ -1223,7 +1288,9 @@ private:
void ProcessSyncPoint();
/// Handles a write to the CB_DATA[i] register.
+ void StartCBData(u32 method);
void ProcessCBData(u32 value);
+ void FinishCBData();
/// Handles a write to the CB_BIND register.
void ProcessCBBind(Regs::ShaderStage stage);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index afb9578d0..b5f57e534 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -58,7 +58,7 @@ void MaxwellDMA::HandleCopy() {
}
// All copies here update the main memory, so mark all rasterizer states as invalid.
- system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
+ system.GPU().Maxwell3D().dirty.OnMemoryWrite();
if (regs.exec.is_dst_linear && regs.exec.is_src_linear) {
// When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 79d469b88..8520a0143 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -931,8 +931,6 @@ union Instruction {
} csetp;
union {
- BitField<35, 4, PredCondition> cond;
- BitField<49, 1, u64> h_and;
BitField<6, 1, u64> ftz;
BitField<45, 2, PredOperation> op;
BitField<3, 3, u64> pred3;
@@ -940,9 +938,21 @@ union Instruction {
BitField<43, 1, u64> negate_a;
BitField<44, 1, u64> abs_a;
BitField<47, 2, HalfType> type_a;
- BitField<31, 1, u64> negate_b;
- BitField<30, 1, u64> abs_b;
- BitField<28, 2, HalfType> type_b;
+ union {
+ BitField<35, 4, PredCondition> cond;
+ BitField<49, 1, u64> h_and;
+ BitField<31, 1, u64> negate_b;
+ BitField<30, 1, u64> abs_b;
+ BitField<28, 2, HalfType> type_b;
+ } reg;
+ union {
+ BitField<56, 1, u64> negate_b;
+ BitField<54, 1, u64> abs_b;
+ } cbuf;
+ union {
+ BitField<49, 4, PredCondition> cond;
+ BitField<53, 1, u64> h_and;
+ } cbuf_and_imm;
BitField<42, 1, u64> neg_pred;
BitField<39, 3, u64> pred39;
} hsetp2;
@@ -1548,7 +1558,9 @@ public:
HFMA2_RC,
HFMA2_RR,
HFMA2_IMM_R,
+ HSETP2_C,
HSETP2_R,
+ HSETP2_IMM,
HSET2_R,
POPC_C,
POPC_R,
@@ -1831,7 +1843,9 @@ private:
INST("01100---1-------", Id::HFMA2_RC, Type::Hfma2, "HFMA2_RC"),
INST("0101110100000---", Id::HFMA2_RR, Type::Hfma2, "HFMA2_RR"),
INST("01110---0-------", Id::HFMA2_IMM_R, Type::Hfma2, "HFMA2_R_IMM"),
- INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP_R"),
+ INST("0111111-1-------", Id::HSETP2_C, Type::HalfSetPredicate, "HSETP2_C"),
+ INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"),
+ INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"),
INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"),
INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"),
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 9cd4cf7b8..c59e687b6 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -107,6 +107,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind
shader_program_manager = std::make_unique<GLShader::ProgramManager>();
state.draw.shader_program = 0;
state.Apply();
+ clear_framebuffer.Create();
LOG_DEBUG(Render_OpenGL, "Sync fixed function OpenGL state here");
CheckExtensions();
@@ -126,10 +127,10 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
auto& gpu = system.GPU().Maxwell3D();
const auto& regs = gpu.regs;
- if (!gpu.dirty_flags.vertex_attrib_format) {
+ if (!gpu.dirty.vertex_attrib_format) {
return state.draw.vertex_array;
}
- gpu.dirty_flags.vertex_attrib_format = false;
+ gpu.dirty.vertex_attrib_format = false;
MICROPROFILE_SCOPE(OpenGL_VAO);
@@ -183,7 +184,7 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
}
// Rebinding the VAO invalidates the vertex buffer bindings.
- gpu.dirty_flags.vertex_array.set();
+ gpu.dirty.ResetVertexArrays();
state.draw.vertex_array = vao_entry.handle;
return vao_entry.handle;
@@ -191,17 +192,20 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
auto& gpu = system.GPU().Maxwell3D();
- const auto& regs = gpu.regs;
-
- if (gpu.dirty_flags.vertex_array.none())
+ if (!gpu.dirty.vertex_array_buffers)
return;
+ gpu.dirty.vertex_array_buffers = false;
+
+ const auto& regs = gpu.regs;
MICROPROFILE_SCOPE(OpenGL_VB);
// Upload all guest vertex arrays sequentially to our buffer
for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
- if (!gpu.dirty_flags.vertex_array[index])
+ if (!gpu.dirty.vertex_array[index])
continue;
+ gpu.dirty.vertex_array[index] = false;
+ gpu.dirty.vertex_instance[index] = false;
const auto& vertex_array = regs.vertex_array[index];
if (!vertex_array.IsEnabled())
@@ -226,8 +230,32 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
glVertexArrayBindingDivisor(vao, index, 0);
}
}
+}
+
+void RasterizerOpenGL::SetupVertexInstances(GLuint vao) {
+ auto& gpu = system.GPU().Maxwell3D();
+
+ if (!gpu.dirty.vertex_instances)
+ return;
+ gpu.dirty.vertex_instances = false;
- gpu.dirty_flags.vertex_array.reset();
+ const auto& regs = gpu.regs;
+ // Upload all guest vertex arrays sequentially to our buffer
+ for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
+ if (!gpu.dirty.vertex_instance[index])
+ continue;
+
+ gpu.dirty.vertex_instance[index] = false;
+
+ if (regs.instanced_arrays.IsInstancingEnabled(index) &&
+ regs.vertex_array[index].divisor != 0) {
+ // Enable vertex buffer instancing with the specified divisor.
+ glVertexArrayBindingDivisor(vao, index, regs.vertex_array[index].divisor);
+ } else {
+ // Disable the vertex buffer instancing.
+ glVertexArrayBindingDivisor(vao, index, 0);
+ }
+ }
}
GLintptr RasterizerOpenGL::SetupIndexBuffer() {
@@ -343,7 +371,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
SyncClipEnabled(clip_distances);
- gpu.dirty_flags.shaders = false;
+ gpu.dirty.shaders = false;
}
std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
@@ -426,13 +454,13 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents,
single_color_target};
- if (fb_config_state == current_framebuffer_config_state &&
- gpu.dirty_flags.color_buffer.none() && !gpu.dirty_flags.zeta_buffer) {
+ if (fb_config_state == current_framebuffer_config_state && !gpu.dirty.render_settings) {
// Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or
// single color targets). This is done because the guest registers may not change but the
// host framebuffer may contain different attachments
return current_depth_stencil_usage;
}
+ gpu.dirty.render_settings = false;
current_framebuffer_config_state = fb_config_state;
texture_cache.GuardRenderTargets(true);
@@ -521,13 +549,65 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
return current_depth_stencil_usage = {static_cast<bool>(depth_surface), fbkey.stencil_enable};
}
+void RasterizerOpenGL::ConfigureClearFramebuffer(OpenGLState& current_state, bool using_color_fb,
+ bool using_depth_fb, bool using_stencil_fb) {
+ auto& gpu = system.GPU().Maxwell3D();
+ const auto& regs = gpu.regs;
+
+ texture_cache.GuardRenderTargets(true);
+ View color_surface{};
+ if (using_color_fb) {
+ color_surface = texture_cache.GetColorBufferSurface(regs.clear_buffers.RT, false);
+ }
+ View depth_surface{};
+ if (using_depth_fb || using_stencil_fb) {
+ depth_surface = texture_cache.GetDepthBufferSurface(false);
+ }
+ texture_cache.GuardRenderTargets(false);
+
+ current_state.draw.draw_framebuffer = clear_framebuffer.handle;
+ current_state.ApplyFramebufferState();
+
+ if (color_surface) {
+ color_surface->Attach(GL_COLOR_ATTACHMENT0, GL_DRAW_FRAMEBUFFER);
+ } else {
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
+ }
+
+ if (depth_surface) {
+ const auto& params = depth_surface->GetSurfaceParams();
+ switch (params.type) {
+ case VideoCore::Surface::SurfaceType::Depth: {
+ depth_surface->Attach(GL_DEPTH_ATTACHMENT, GL_DRAW_FRAMEBUFFER);
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
+ break;
+ }
+ case VideoCore::Surface::SurfaceType::DepthStencil: {
+ depth_surface->Attach(GL_DEPTH_ATTACHMENT, GL_DRAW_FRAMEBUFFER);
+ break;
+ }
+ default: { UNIMPLEMENTED(); }
+ }
+ } else {
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
+ 0);
+ }
+}
+
void RasterizerOpenGL::Clear() {
const auto& regs = system.GPU().Maxwell3D().regs;
bool use_color{};
bool use_depth{};
bool use_stencil{};
- OpenGLState clear_state;
+ OpenGLState prev_state{OpenGLState::GetCurState()};
+ SCOPE_EXIT({
+ prev_state.AllDirty();
+ prev_state.Apply();
+ });
+
+ OpenGLState clear_state{OpenGLState::GetCurState()};
+ clear_state.SetDefaultViewports();
if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
regs.clear_buffers.A) {
use_color = true;
@@ -547,6 +627,7 @@ void RasterizerOpenGL::Clear() {
// true.
clear_state.depth.test_enabled = true;
clear_state.depth.test_func = GL_ALWAYS;
+ clear_state.depth.write_mask = GL_TRUE;
}
if (regs.clear_buffers.S) {
ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear stencil but buffer is not enabled!");
@@ -583,8 +664,9 @@ void RasterizerOpenGL::Clear() {
return;
}
- const auto [clear_depth, clear_stencil] = ConfigureFramebuffers(
- clear_state, use_color, use_depth || use_stencil, false, regs.clear_buffers.RT.Value());
+ ConfigureClearFramebuffer(clear_state, use_color, use_depth, use_stencil);
+
+ SyncViewport(clear_state);
if (regs.clear_flags.scissor) {
SyncScissorTest(clear_state);
}
@@ -593,21 +675,18 @@ void RasterizerOpenGL::Clear() {
clear_state.EmulateViewportWithScissor();
}
- clear_state.ApplyColorMask();
- clear_state.ApplyDepth();
- clear_state.ApplyStencilTest();
- clear_state.ApplyViewport();
- clear_state.ApplyFramebufferState();
+ clear_state.AllDirty();
+ clear_state.Apply();
if (use_color) {
- glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color);
+ glClearBufferfv(GL_COLOR, 0, regs.clear_color);
}
- if (clear_depth && clear_stencil) {
+ if (use_depth && use_stencil) {
glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil);
- } else if (clear_depth) {
+ } else if (use_depth) {
glClearBufferfv(GL_DEPTH, 0, &regs.clear_depth);
- } else if (clear_stencil) {
+ } else if (use_stencil) {
glClearBufferiv(GL_STENCIL, 0, &regs.clear_stencil);
}
}
@@ -663,6 +742,7 @@ void RasterizerOpenGL::DrawArrays() {
// Upload vertex and index data.
SetupVertexBuffer(vao);
+ SetupVertexInstances(vao);
const GLintptr index_buffer_offset = SetupIndexBuffer();
// Setup draw parameters. It will automatically choose what glDraw* method to use.
@@ -689,7 +769,7 @@ void RasterizerOpenGL::DrawArrays() {
if (invalidate) {
// As all cached buffers are invalidated, we need to recheck their state.
- gpu.dirty_flags.vertex_array.set();
+ gpu.dirty.ResetVertexArrays();
}
shader_program_manager->ApplyTo(state);
@@ -702,6 +782,7 @@ void RasterizerOpenGL::DrawArrays() {
params.DispatchDraw();
accelerate_draw = AccelDraw::Disabled;
+ gpu.dirty.memory_general = false;
}
void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
@@ -976,10 +1057,11 @@ void RasterizerOpenGL::SyncClipCoef() {
}
void RasterizerOpenGL::SyncCullMode() {
- const auto& regs = system.GPU().Maxwell3D().regs;
+ auto& maxwell3d = system.GPU().Maxwell3D();
- state.cull.enabled = regs.cull.enabled != 0;
+ const auto& regs = maxwell3d.regs;
+ state.cull.enabled = regs.cull.enabled != 0;
if (state.cull.enabled) {
state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face);
state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face);
@@ -1012,16 +1094,21 @@ void RasterizerOpenGL::SyncDepthTestState() {
state.depth.test_enabled = regs.depth_test_enable != 0;
state.depth.write_mask = regs.depth_write_enabled ? GL_TRUE : GL_FALSE;
- if (!state.depth.test_enabled)
+ if (!state.depth.test_enabled) {
return;
+ }
state.depth.test_func = MaxwellToGL::ComparisonOp(regs.depth_test_func);
}
void RasterizerOpenGL::SyncStencilTestState() {
- const auto& regs = system.GPU().Maxwell3D().regs;
- state.stencil.test_enabled = regs.stencil_enable != 0;
+ auto& maxwell3d = system.GPU().Maxwell3D();
+ if (!maxwell3d.dirty.stencil_test) {
+ return;
+ }
+ const auto& regs = maxwell3d.regs;
+ state.stencil.test_enabled = regs.stencil_enable != 0;
if (!regs.stencil_enable) {
return;
}
@@ -1050,10 +1137,17 @@ void RasterizerOpenGL::SyncStencilTestState() {
state.stencil.back.action_depth_fail = GL_KEEP;
state.stencil.back.action_depth_pass = GL_KEEP;
}
+ state.MarkDirtyStencilState();
+ maxwell3d.dirty.stencil_test = false;
}
void RasterizerOpenGL::SyncColorMask() {
- const auto& regs = system.GPU().Maxwell3D().regs;
+ auto& maxwell3d = system.GPU().Maxwell3D();
+ if (!maxwell3d.dirty.color_mask) {
+ return;
+ }
+ const auto& regs = maxwell3d.regs;
+
const std::size_t count =
regs.independent_blend_enable ? Tegra::Engines::Maxwell3D::Regs::NumRenderTargets : 1;
for (std::size_t i = 0; i < count; i++) {
@@ -1064,6 +1158,9 @@ void RasterizerOpenGL::SyncColorMask() {
dest.blue_enabled = (source.B == 0) ? GL_FALSE : GL_TRUE;
dest.alpha_enabled = (source.A == 0) ? GL_FALSE : GL_TRUE;
}
+
+ state.MarkDirtyColorMask();
+ maxwell3d.dirty.color_mask = false;
}
void RasterizerOpenGL::SyncMultiSampleState() {
@@ -1078,7 +1175,11 @@ void RasterizerOpenGL::SyncFragmentColorClampState() {
}
void RasterizerOpenGL::SyncBlendState() {
- const auto& regs = system.GPU().Maxwell3D().regs;
+ auto& maxwell3d = system.GPU().Maxwell3D();
+ if (!maxwell3d.dirty.blend_state) {
+ return;
+ }
+ const auto& regs = maxwell3d.regs;
state.blend_color.red = regs.blend_color.r;
state.blend_color.green = regs.blend_color.g;
@@ -1101,6 +1202,8 @@ void RasterizerOpenGL::SyncBlendState() {
for (std::size_t i = 1; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
state.blend[i].enabled = false;
}
+ maxwell3d.dirty.blend_state = false;
+ state.MarkDirtyBlendState();
return;
}
@@ -1117,6 +1220,9 @@ void RasterizerOpenGL::SyncBlendState() {
blend.src_a_func = MaxwellToGL::BlendFunc(src.factor_source_a);
blend.dst_a_func = MaxwellToGL::BlendFunc(src.factor_dest_a);
}
+
+ state.MarkDirtyBlendState();
+ maxwell3d.dirty.blend_state = false;
}
void RasterizerOpenGL::SyncLogicOpState() {
@@ -1168,13 +1274,21 @@ void RasterizerOpenGL::SyncPointState() {
}
void RasterizerOpenGL::SyncPolygonOffset() {
- const auto& regs = system.GPU().Maxwell3D().regs;
+ auto& maxwell3d = system.GPU().Maxwell3D();
+ if (!maxwell3d.dirty.polygon_offset) {
+ return;
+ }
+ const auto& regs = maxwell3d.regs;
+
state.polygon_offset.fill_enable = regs.polygon_offset_fill_enable != 0;
state.polygon_offset.line_enable = regs.polygon_offset_line_enable != 0;
state.polygon_offset.point_enable = regs.polygon_offset_point_enable != 0;
state.polygon_offset.units = regs.polygon_offset_units;
state.polygon_offset.factor = regs.polygon_offset_factor;
state.polygon_offset.clamp = regs.polygon_offset_clamp;
+
+ state.MarkDirtyPolygonOffset();
+ maxwell3d.dirty.polygon_offset = false;
}
void RasterizerOpenGL::SyncAlphaTest() {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index b2b671230..8b123c48d 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -109,6 +109,9 @@ private:
OpenGLState& current_state, bool using_color_fb = true, bool using_depth_fb = true,
bool preserve_contents = true, std::optional<std::size_t> single_color_target = {});
+ void ConfigureClearFramebuffer(OpenGLState& current_state, bool using_color_fb,
+ bool using_depth_fb, bool using_stencil_fb);
+
/// Configures the current constbuffers to use for the draw command.
void SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
const Shader& shader);
@@ -227,6 +230,7 @@ private:
GLuint SetupVertexFormat();
void SetupVertexBuffer(GLuint vao);
+ void SetupVertexInstances(GLuint vao);
GLintptr SetupIndexBuffer();
@@ -237,6 +241,8 @@ private:
enum class AccelDraw { Disabled, Arrays, Indexed };
AccelDraw accelerate_draw = AccelDraw::Disabled;
+ OGLFramebuffer clear_framebuffer;
+
using CachedPageMap = boost::icl::interval_map<u64, int>;
CachedPageMap cached_pages;
};
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 865c191bd..1c90facc3 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -628,7 +628,7 @@ std::unordered_map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecia
}
Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
- if (!system.GPU().Maxwell3D().dirty_flags.shaders) {
+ if (!system.GPU().Maxwell3D().dirty.shaders) {
return last_shaders[static_cast<std::size_t>(program)];
}
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 50b616be4..ffe26b241 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -257,10 +257,6 @@ public:
}
private:
- using OperationDecompilerFn = std::string (GLSLDecompiler::*)(Operation);
- using OperationDecompilersArray =
- std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>;
-
void DeclareVertex() {
if (!IsVertexShader(stage))
return;
@@ -1414,14 +1410,10 @@ private:
return fmt::format("{}[{}]", pair, VisitOperand(operation, 1, Type::Uint));
}
- std::string LogicalAll2(Operation operation) {
+ std::string LogicalAnd2(Operation operation) {
return GenerateUnary(operation, "all", Type::Bool, Type::Bool2);
}
- std::string LogicalAny2(Operation operation) {
- return GenerateUnary(operation, "any", Type::Bool, Type::Bool2);
- }
-
template <bool with_nan>
std::string GenerateHalfComparison(Operation operation, const std::string& compare_op) {
const std::string comparison{GenerateBinaryCall(operation, compare_op, Type::Bool2,
@@ -1728,7 +1720,7 @@ private:
return "utof(gl_WorkGroupID"s + GetSwizzle(element) + ')';
}
- static constexpr OperationDecompilersArray operation_decompilers = {
+ static constexpr std::array operation_decompilers = {
&GLSLDecompiler::Assign,
&GLSLDecompiler::Select,
@@ -1812,8 +1804,7 @@ private:
&GLSLDecompiler::LogicalXor,
&GLSLDecompiler::LogicalNegate,
&GLSLDecompiler::LogicalPick2,
- &GLSLDecompiler::LogicalAll2,
- &GLSLDecompiler::LogicalAny2,
+ &GLSLDecompiler::LogicalAnd2,
&GLSLDecompiler::LogicalLessThan<Type::Float>,
&GLSLDecompiler::LogicalEqual<Type::Float>,
@@ -1877,6 +1868,7 @@ private:
&GLSLDecompiler::WorkGroupId<1>,
&GLSLDecompiler::WorkGroupId<2>,
};
+ static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
std::string GetRegister(u32 index) const {
return GetDeclarationWithSuffix(index, "gpr");
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index 0eae98afe..f4777d0b0 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -165,6 +165,25 @@ OpenGLState::OpenGLState() {
alpha_test.ref = 0.0f;
}
+void OpenGLState::SetDefaultViewports() {
+ for (auto& item : viewports) {
+ item.x = 0;
+ item.y = 0;
+ item.width = 0;
+ item.height = 0;
+ item.depth_range_near = 0.0f;
+ item.depth_range_far = 1.0f;
+ item.scissor.enabled = false;
+ item.scissor.x = 0;
+ item.scissor.y = 0;
+ item.scissor.width = 0;
+ item.scissor.height = 0;
+ }
+
+ depth_clamp.far_plane = false;
+ depth_clamp.near_plane = false;
+}
+
void OpenGLState::ApplyDefaultState() {
glEnable(GL_BLEND);
glDisable(GL_FRAMEBUFFER_SRGB);
@@ -526,7 +545,7 @@ void OpenGLState::ApplySamplers() const {
}
}
-void OpenGLState::Apply() const {
+void OpenGLState::Apply() {
MICROPROFILE_SCOPE(OpenGL_State);
ApplyFramebufferState();
ApplyVertexArrayState();
@@ -536,19 +555,31 @@ void OpenGLState::Apply() const {
ApplyPointSize();
ApplyFragmentColorClamp();
ApplyMultisample();
+ if (dirty.color_mask) {
+ ApplyColorMask();
+ dirty.color_mask = false;
+ }
ApplyDepthClamp();
- ApplyColorMask();
ApplyViewport();
- ApplyStencilTest();
+ if (dirty.stencil_state) {
+ ApplyStencilTest();
+ dirty.stencil_state = false;
+ }
ApplySRgb();
ApplyCulling();
ApplyDepth();
ApplyPrimitiveRestart();
- ApplyBlending();
+ if (dirty.blend_state) {
+ ApplyBlending();
+ dirty.blend_state = false;
+ }
ApplyLogicOp();
ApplyTextures();
ApplySamplers();
- ApplyPolygonOffset();
+ if (dirty.polygon_offset) {
+ ApplyPolygonOffset();
+ dirty.polygon_offset = false;
+ }
ApplyAlphaTest();
}
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index b0140495d..fdf9a8a12 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -195,8 +195,9 @@ public:
s_rgb_used = false;
}
+ void SetDefaultViewports();
/// Apply this state as the current OpenGL state
- void Apply() const;
+ void Apply();
void ApplyFramebufferState() const;
void ApplyVertexArrayState() const;
@@ -237,11 +238,41 @@ public:
/// Viewport does not affects glClearBuffer so emulate viewport using scissor test
void EmulateViewportWithScissor();
+ void MarkDirtyBlendState() {
+ dirty.blend_state = true;
+ }
+
+ void MarkDirtyStencilState() {
+ dirty.stencil_state = true;
+ }
+
+ void MarkDirtyPolygonOffset() {
+ dirty.polygon_offset = true;
+ }
+
+ void MarkDirtyColorMask() {
+ dirty.color_mask = true;
+ }
+
+ void AllDirty() {
+ dirty.blend_state = true;
+ dirty.stencil_state = true;
+ dirty.polygon_offset = true;
+ dirty.color_mask = true;
+ }
+
private:
static OpenGLState cur_state;
// Workaround for sRGB problems caused by QT not supporting srgb output
static bool s_rgb_used;
+ struct {
+ bool blend_state;
+ bool stencil_state;
+ bool viewport_state;
+ bool polygon_offset;
+ bool color_mask;
+ } dirty{};
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index b1f6bc7c2..8fcd39a69 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -485,11 +485,15 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view,
const auto& dst_params{dst_view->GetSurfaceParams()};
OpenGLState prev_state{OpenGLState::GetCurState()};
- SCOPE_EXIT({ prev_state.Apply(); });
+ SCOPE_EXIT({
+ prev_state.AllDirty();
+ prev_state.Apply();
+ });
OpenGLState state;
state.draw.read_framebuffer = src_framebuffer.handle;
state.draw.draw_framebuffer = dst_framebuffer.handle;
+ state.AllDirty();
state.Apply();
u32 buffers{};
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 9ecdddb0d..a05cef3b9 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -108,6 +108,7 @@ void RendererOpenGL::SwapBuffers(
// Maintain the rasterizer's state as a priority
OpenGLState prev_state = OpenGLState::GetCurState();
+ state.AllDirty();
state.Apply();
if (framebuffer) {
@@ -140,6 +141,7 @@ void RendererOpenGL::SwapBuffers(
system.GetPerfStats().BeginSystemFrame();
// Restore the rasterizer state
+ prev_state.AllDirty();
prev_state.Apply();
}
@@ -206,6 +208,7 @@ void RendererOpenGL::InitOpenGLObjects() {
// Link shaders and get variable locations
shader.CreateFromSource(vertex_shader, nullptr, fragment_shader);
state.draw.shader_program = shader.handle;
+ state.AllDirty();
state.Apply();
uniform_modelview_matrix = glGetUniformLocation(shader.handle, "modelview_matrix");
uniform_color_texture = glGetUniformLocation(shader.handle, "color_texture");
@@ -338,12 +341,14 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x,
// Workaround brigthness problems in SMO by enabling sRGB in the final output
// if it has been used in the frame. Needed because of this bug in QT: QTBUG-50987
state.framebuffer_srgb.enabled = OpenGLState::GetsRGBUsed();
+ state.AllDirty();
state.Apply();
glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), vertices.data());
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
// Restore default state
state.framebuffer_srgb.enabled = false;
state.texture_units[0].texture = 0;
+ state.AllDirty();
state.Apply();
// Clear sRGB state for the next frame
OpenGLState::ClearsRGBUsed();
@@ -388,6 +393,7 @@ void RendererOpenGL::CaptureScreenshot() {
GLuint old_read_fb = state.draw.read_framebuffer;
GLuint old_draw_fb = state.draw.draw_framebuffer;
state.draw.read_framebuffer = state.draw.draw_framebuffer = screenshot_framebuffer.handle;
+ state.AllDirty();
state.Apply();
Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout};
@@ -407,6 +413,7 @@ void RendererOpenGL::CaptureScreenshot() {
screenshot_framebuffer.Release();
state.draw.read_framebuffer = old_read_fb;
state.draw.draw_framebuffer = old_draw_fb;
+ state.AllDirty();
state.Apply();
glDeleteRenderbuffers(1, &renderbuffer);
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 9b2d8e987..d267712c9 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -205,10 +205,6 @@ public:
}
private:
- using OperationDecompilerFn = Id (SPIRVDecompiler::*)(Operation);
- using OperationDecompilersArray =
- std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>;
-
static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount);
void AllocateBindings() {
@@ -804,12 +800,7 @@ private:
return {};
}
- Id LogicalAll2(Operation operation) {
- UNIMPLEMENTED();
- return {};
- }
-
- Id LogicalAny2(Operation operation) {
+ Id LogicalAnd2(Operation operation) {
UNIMPLEMENTED();
return {};
}
@@ -1206,7 +1197,7 @@ private:
return {};
}
- static constexpr OperationDecompilersArray operation_decompilers = {
+ static constexpr std::array operation_decompilers = {
&SPIRVDecompiler::Assign,
&SPIRVDecompiler::Ternary<&Module::OpSelect, Type::Float, Type::Bool, Type::Float,
@@ -1291,8 +1282,7 @@ private:
&SPIRVDecompiler::Binary<&Module::OpLogicalNotEqual, Type::Bool>,
&SPIRVDecompiler::Unary<&Module::OpLogicalNot, Type::Bool>,
&SPIRVDecompiler::LogicalPick2,
- &SPIRVDecompiler::LogicalAll2,
- &SPIRVDecompiler::LogicalAny2,
+ &SPIRVDecompiler::LogicalAnd2,
&SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool, Type::Float>,
&SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool, Type::Float>,
@@ -1357,6 +1347,7 @@ private:
&SPIRVDecompiler::WorkGroupId<1>,
&SPIRVDecompiler::WorkGroupId<2>,
};
+ static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
const VKDevice& device;
const ShaderIR& ir;
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index 29c8895c5..afffd157f 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -46,12 +46,12 @@ void ShaderIR::Decode() {
coverage_end = shader_info.end;
if (shader_info.decompilable) {
disable_flow_stack = true;
- const auto insert_block = ([this](NodeBlock& nodes, u32 label) {
+ const auto insert_block = [this](NodeBlock& nodes, u32 label) {
if (label == exit_branch) {
return;
}
basic_blocks.insert({label, nodes});
- });
+ };
const auto& blocks = shader_info.blocks;
NodeBlock current_block;
u32 current_label = exit_branch;
@@ -103,7 +103,7 @@ void ShaderIR::DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end) {
}
void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) {
- const auto apply_conditions = ([&](const Condition& cond, Node n) -> Node {
+ const auto apply_conditions = [&](const Condition& cond, Node n) -> Node {
Node result = n;
if (cond.cc != ConditionCode::T) {
result = Conditional(GetConditionCode(cond.cc), {result});
@@ -117,7 +117,7 @@ void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) {
result = Conditional(GetPredicate(pred, is_neg), {result});
}
return result;
- });
+ };
if (block.branch.address < 0) {
if (block.branch.kills) {
Node n = Operation(OperationCode::Discard);
diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp
index d59d15bd8..ad180d6df 100644
--- a/src/video_core/shader/decode/half_set_predicate.cpp
+++ b/src/video_core/shader/decode/half_set_predicate.cpp
@@ -23,38 +23,51 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) {
Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a);
op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a);
- Node op_b = [&]() {
- switch (opcode->get().GetId()) {
- case OpCode::Id::HSETP2_R:
- return GetOperandAbsNegHalf(GetRegister(instr.gpr20), instr.hsetp2.abs_a,
- instr.hsetp2.negate_b);
- default:
- UNREACHABLE();
- return Immediate(0);
- }
- }();
- op_b = UnpackHalfFloat(op_b, instr.hsetp2.type_b);
-
- // We can't use the constant predicate as destination.
- ASSERT(instr.hsetp2.pred3 != static_cast<u64>(Pred::UnusedIndex));
-
- const Node second_pred = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred != 0);
+ Tegra::Shader::PredCondition cond{};
+ bool h_and{};
+ Node op_b{};
+ switch (opcode->get().GetId()) {
+ case OpCode::Id::HSETP2_C:
+ cond = instr.hsetp2.cbuf_and_imm.cond;
+ h_and = instr.hsetp2.cbuf_and_imm.h_and;
+ op_b = GetOperandAbsNegHalf(GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset),
+ instr.hsetp2.cbuf.abs_b, instr.hsetp2.cbuf.negate_b);
+ break;
+ case OpCode::Id::HSETP2_IMM:
+ cond = instr.hsetp2.cbuf_and_imm.cond;
+ h_and = instr.hsetp2.cbuf_and_imm.h_and;
+ op_b = UnpackHalfImmediate(instr, true);
+ break;
+ case OpCode::Id::HSETP2_R:
+ cond = instr.hsetp2.reg.cond;
+ h_and = instr.hsetp2.reg.h_and;
+ op_b =
+ UnpackHalfFloat(GetOperandAbsNegHalf(GetRegister(instr.gpr20), instr.hsetp2.reg.abs_b,
+ instr.hsetp2.reg.negate_b),
+ instr.hsetp2.reg.type_b);
+ break;
+ default:
+ UNREACHABLE();
+ op_b = Immediate(0);
+ }
const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op);
- const OperationCode pair_combiner =
- instr.hsetp2.h_and ? OperationCode::LogicalAll2 : OperationCode::LogicalAny2;
-
- const Node comparison = GetPredicateComparisonHalf(instr.hsetp2.cond, op_a, op_b);
- const Node first_pred = Operation(pair_combiner, comparison);
+ const Node pred39 = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred);
- // Set the primary predicate to the result of Predicate OP SecondPredicate
- const Node value = Operation(combiner, first_pred, second_pred);
- SetPredicate(bb, instr.hsetp2.pred3, value);
+ const auto Write = [&](u64 dest, Node src) {
+ SetPredicate(bb, dest, Operation(combiner, std::move(src), pred39));
+ };
- if (instr.hsetp2.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
- // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled
- const Node negated_pred = Operation(OperationCode::LogicalNegate, first_pred);
- SetPredicate(bb, instr.hsetp2.pred0, Operation(combiner, negated_pred, second_pred));
+ const Node comparison = GetPredicateComparisonHalf(cond, op_a, op_b);
+ const u64 first = instr.hsetp2.pred0;
+ const u64 second = instr.hsetp2.pred3;
+ if (h_and) {
+ const Node joined = Operation(OperationCode::LogicalAnd2, comparison);
+ Write(first, joined);
+ Write(second, Operation(OperationCode::LogicalNegate, joined));
+ } else {
+ Write(first, Operation(OperationCode::LogicalPick2, comparison, Immediate(0u)));
+ Write(second, Operation(OperationCode::LogicalPick2, comparison, Immediate(1u)));
}
return pc;
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp
index 24f022cc0..77151a24b 100644
--- a/src/video_core/shader/decode/image.cpp
+++ b/src/video_core/shader/decode/image.cpp
@@ -95,12 +95,8 @@ const Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::Image
const Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg,
Tegra::Shader::ImageType type) {
const Node image_register{GetRegister(reg)};
- const Node base_image{
+ const auto [base_image, cbuf_index, cbuf_offset]{
TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()))};
- const auto cbuf{std::get_if<CbufNode>(&*base_image)};
- const auto cbuf_offset_imm{std::get_if<ImmediateNode>(&*cbuf->GetOffset())};
- const auto cbuf_offset{cbuf_offset_imm->GetValue()};
- const auto cbuf_index{cbuf->GetIndex()};
const auto cbuf_key{(static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset)};
// If this image has already been used, return the existing mapping.
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 80fc0ccfc..ed108bea8 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -95,10 +95,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
const Node op_b =
GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 4, index);
- SetTemporal(bb, 0, op_a);
- SetTemporal(bb, 1, op_b);
- SetRegister(bb, instr.gpr0, GetTemporal(0));
- SetRegister(bb, instr.gpr0.Value() + 1, GetTemporal(1));
+ SetTemporary(bb, 0, op_a);
+ SetTemporary(bb, 1, op_b);
+ SetRegister(bb, instr.gpr0, GetTemporary(0));
+ SetRegister(bb, instr.gpr0.Value() + 1, GetTemporary(1));
break;
}
default:
@@ -136,9 +136,9 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
}
}();
for (u32 i = 0; i < count; ++i)
- SetTemporal(bb, i, GetLmem(i * 4));
+ SetTemporary(bb, i, GetLmem(i * 4));
for (u32 i = 0; i < count; ++i)
- SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
+ SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
break;
}
default:
@@ -172,10 +172,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset);
const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
- SetTemporal(bb, i, gmem);
+ SetTemporary(bb, i, gmem);
}
for (u32 i = 0; i < count; ++i) {
- SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
+ SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
}
break;
}
@@ -253,11 +253,11 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
TrackAndGetGlobalMemory(bb, instr, true);
// Encode in temporary registers like this: real_base_address, {registers_to_be_written...}
- SetTemporal(bb, 0, real_address_base);
+ SetTemporary(bb, 0, real_address_base);
const u32 count = GetUniformTypeElementsCount(type);
for (u32 i = 0; i < count; ++i) {
- SetTemporal(bb, i + 1, GetRegister(instr.gpr0.Value() + i));
+ SetTemporary(bb, i + 1, GetRegister(instr.gpr0.Value() + i));
}
for (u32 i = 0; i < count; ++i) {
const Node it_offset = Immediate(i * 4);
@@ -265,7 +265,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset);
const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
- bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporal(i + 1)));
+ bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporary(i + 1)));
}
break;
}
@@ -297,18 +297,13 @@ std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackAndGetGlobalMemory(NodeB
const auto addr_register{GetRegister(instr.gmem.gpr)};
const auto immediate_offset{static_cast<u32>(instr.gmem.offset)};
- const Node base_address{
- TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()))};
- const auto cbuf = std::get_if<CbufNode>(&*base_address);
- ASSERT(cbuf != nullptr);
- const auto cbuf_offset_imm = std::get_if<ImmediateNode>(&*cbuf->GetOffset());
- ASSERT(cbuf_offset_imm != nullptr);
- const auto cbuf_offset = cbuf_offset_imm->GetValue();
+ const auto [base_address, index, offset] =
+ TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()));
+ ASSERT(base_address != nullptr);
- bb.push_back(
- Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", cbuf->GetIndex(), cbuf_offset)));
+ bb.push_back(Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", index, offset)));
- const GlobalMemoryBase descriptor{cbuf->GetIndex(), cbuf_offset};
+ const GlobalMemoryBase descriptor{index, offset};
const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor);
auto& usage = entry->second;
if (is_write) {
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index 42e3de02f..c0f64d7a0 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -102,7 +102,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
PRECISE, op_a, Immediate(3));
const Node operand =
Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
- branch = Operation(OperationCode::BranchIndirect, convert);
+ branch = Operation(OperationCode::BranchIndirect, operand);
}
const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index 323be3f14..0b934a069 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -181,10 +181,10 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
const Node value =
Operation(OperationCode::TextureQueryDimensions, meta,
GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0)));
- SetTemporal(bb, indexer++, value);
+ SetTemporary(bb, indexer++, value);
}
for (u32 i = 0; i < indexer; ++i) {
- SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
+ SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
}
break;
}
@@ -238,10 +238,10 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
auto params = coords;
MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element};
const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
- SetTemporal(bb, indexer++, value);
+ SetTemporary(bb, indexer++, value);
}
for (u32 i = 0; i < indexer; ++i) {
- SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
+ SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
}
break;
}
@@ -308,13 +308,9 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, Textu
const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, TextureType type,
bool is_array, bool is_shadow) {
const Node sampler_register = GetRegister(reg);
- const Node base_sampler =
+ const auto [base_sampler, cbuf_index, cbuf_offset] =
TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size()));
- const auto cbuf = std::get_if<CbufNode>(&*base_sampler);
- const auto cbuf_offset_imm = std::get_if<ImmediateNode>(&*cbuf->GetOffset());
- ASSERT(cbuf_offset_imm != nullptr);
- const auto cbuf_offset = cbuf_offset_imm->GetValue();
- const auto cbuf_index = cbuf->GetIndex();
+ ASSERT(base_sampler != nullptr);
const auto cbuf_key = (static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset);
// If this sampler has already been used, return the existing mapping.
@@ -340,11 +336,11 @@ void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const
// Skip disabled components
continue;
}
- SetTemporal(bb, dest_elem++, components[elem]);
+ SetTemporary(bb, dest_elem++, components[elem]);
}
// After writing values in temporals, move them to the real registers
for (u32 i = 0; i < dest_elem; ++i) {
- SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
+ SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
}
}
@@ -357,17 +353,17 @@ void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr,
for (u32 component = 0; component < 4; ++component) {
if (!instr.texs.IsComponentEnabled(component))
continue;
- SetTemporal(bb, dest_elem++, components[component]);
+ SetTemporary(bb, dest_elem++, components[component]);
}
for (u32 i = 0; i < dest_elem; ++i) {
if (i < 2) {
// Write the first two swizzle components to gpr0 and gpr0+1
- SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporal(i));
+ SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporary(i));
} else {
ASSERT(instr.texs.HasTwoDestinations());
// Write the rest of the swizzle components to gpr28 and gpr28+1
- SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporal(i));
+ SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporary(i));
}
}
}
@@ -395,11 +391,11 @@ void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
return;
}
- SetTemporal(bb, 0, first_value);
- SetTemporal(bb, 1, Operation(OperationCode::HPack2, values[2], values[3]));
+ SetTemporary(bb, 0, first_value);
+ SetTemporary(bb, 1, Operation(OperationCode::HPack2, values[2], values[3]));
- SetRegister(bb, instr.gpr0, GetTemporal(0));
- SetRegister(bb, instr.gpr28, GetTemporal(1));
+ SetRegister(bb, instr.gpr0, GetTemporary(0));
+ SetRegister(bb, instr.gpr28, GetTemporary(1));
}
Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp
index 93dee77d1..206961909 100644
--- a/src/video_core/shader/decode/xmad.cpp
+++ b/src/video_core/shader/decode/xmad.cpp
@@ -73,8 +73,8 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
if (is_psl) {
product = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, product, Immediate(16));
}
- SetTemporal(bb, 0, product);
- product = GetTemporal(0);
+ SetTemporary(bb, 0, product);
+ product = GetTemporary(0);
const Node original_c = op_c;
const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error
@@ -98,13 +98,13 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
}
}();
- SetTemporal(bb, 1, op_c);
- op_c = GetTemporal(1);
+ SetTemporary(bb, 1, op_c);
+ op_c = GetTemporary(1);
// TODO(Rodrigo): Use an appropiate sign for this operation
Node sum = Operation(OperationCode::IAdd, product, op_c);
- SetTemporal(bb, 2, sum);
- sum = GetTemporal(2);
+ SetTemporary(bb, 2, sum);
+ sum = GetTemporary(2);
if (is_merge) {
const Node a = BitfieldExtract(sum, 0, 16);
const Node b =
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index 7427ed896..715184d67 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -101,8 +101,7 @@ enum class OperationCode {
LogicalXor, /// (bool a, bool b) -> bool
LogicalNegate, /// (bool a) -> bool
LogicalPick2, /// (bool2 pair, uint index) -> bool
- LogicalAll2, /// (bool2 a) -> bool
- LogicalAny2, /// (bool2 a) -> bool
+ LogicalAnd2, /// (bool2 a) -> bool
LogicalFLessThan, /// (float a, float b) -> bool
LogicalFEqual, /// (float a, float b) -> bool
diff --git a/src/video_core/shader/node_helper.cpp b/src/video_core/shader/node_helper.cpp
index 6fccbbba3..b3dcd291c 100644
--- a/src/video_core/shader/node_helper.cpp
+++ b/src/video_core/shader/node_helper.cpp
@@ -12,7 +12,7 @@
namespace VideoCommon::Shader {
Node Conditional(Node condition, std::vector<Node> code) {
- return MakeNode<ConditionalNode>(condition, std::move(code));
+ return MakeNode<ConditionalNode>(std::move(condition), std::move(code));
}
Node Comment(std::string text) {
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index caa409788..5e91fe129 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -61,8 +61,17 @@ Node ShaderIR::GetConstBufferIndirect(u64 index_, u64 offset_, Node node) {
const auto [entry, is_new] = used_cbufs.try_emplace(index);
entry->second.MarkAsUsedIndirect();
- const Node final_offset = Operation(OperationCode::UAdd, NO_PRECISE, node, Immediate(offset));
- return MakeNode<CbufNode>(index, final_offset);
+ Node final_offset = [&] {
+ // Attempt to inline constant buffer without a variable offset. This is done to allow
+ // tracking LDC calls.
+ if (const auto gpr = std::get_if<GprNode>(&*node)) {
+ if (gpr->GetIndex() == Register::ZeroIndex) {
+ return Immediate(offset);
+ }
+ }
+ return Operation(OperationCode::UAdd, NO_PRECISE, std::move(node), Immediate(offset));
+ }();
+ return MakeNode<CbufNode>(index, std::move(final_offset));
}
Node ShaderIR::GetPredicate(u64 pred_, bool negated) {
@@ -80,7 +89,7 @@ Node ShaderIR::GetPredicate(bool immediate) {
Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, Node buffer) {
used_input_attributes.emplace(index);
- return MakeNode<AbufNode>(index, static_cast<u32>(element), buffer);
+ return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer));
}
Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer) {
@@ -113,7 +122,7 @@ Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buff
}
used_output_attributes.insert(index);
- return MakeNode<AbufNode>(index, static_cast<u32>(element), buffer);
+ return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer));
}
Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) {
@@ -125,19 +134,19 @@ Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) {
}
Node ShaderIR::GetLocalMemory(Node address) {
- return MakeNode<LmemNode>(address);
+ return MakeNode<LmemNode>(std::move(address));
}
-Node ShaderIR::GetTemporal(u32 id) {
+Node ShaderIR::GetTemporary(u32 id) {
return GetRegister(Register::ZeroIndex + 1 + id);
}
Node ShaderIR::GetOperandAbsNegFloat(Node value, bool absolute, bool negate) {
if (absolute) {
- value = Operation(OperationCode::FAbsolute, NO_PRECISE, value);
+ value = Operation(OperationCode::FAbsolute, NO_PRECISE, std::move(value));
}
if (negate) {
- value = Operation(OperationCode::FNegate, NO_PRECISE, value);
+ value = Operation(OperationCode::FNegate, NO_PRECISE, std::move(value));
}
return value;
}
@@ -146,24 +155,26 @@ Node ShaderIR::GetSaturatedFloat(Node value, bool saturate) {
if (!saturate) {
return value;
}
- const Node positive_zero = Immediate(std::copysignf(0, 1));
- const Node positive_one = Immediate(1.0f);
- return Operation(OperationCode::FClamp, NO_PRECISE, value, positive_zero, positive_one);
+
+ Node positive_zero = Immediate(std::copysignf(0, 1));
+ Node positive_one = Immediate(1.0f);
+ return Operation(OperationCode::FClamp, NO_PRECISE, std::move(value), std::move(positive_zero),
+ std::move(positive_one));
}
-Node ShaderIR::ConvertIntegerSize(Node value, Tegra::Shader::Register::Size size, bool is_signed) {
+Node ShaderIR::ConvertIntegerSize(Node value, Register::Size size, bool is_signed) {
switch (size) {
case Register::Size::Byte:
- value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, value,
- Immediate(24));
- value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, value,
- Immediate(24));
+ value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE,
+ std::move(value), Immediate(24));
+ value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE,
+ std::move(value), Immediate(24));
return value;
case Register::Size::Short:
- value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, value,
- Immediate(16));
- value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, value,
- Immediate(16));
+ value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE,
+ std::move(value), Immediate(16));
+ value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE,
+ std::move(value), Immediate(16));
case Register::Size::Word:
// Default - do nothing
return value;
@@ -179,27 +190,29 @@ Node ShaderIR::GetOperandAbsNegInteger(Node value, bool absolute, bool negate, b
return value;
}
if (absolute) {
- value = Operation(OperationCode::IAbsolute, NO_PRECISE, value);
+ value = Operation(OperationCode::IAbsolute, NO_PRECISE, std::move(value));
}
if (negate) {
- value = Operation(OperationCode::INegate, NO_PRECISE, value);
+ value = Operation(OperationCode::INegate, NO_PRECISE, std::move(value));
}
return value;
}
Node ShaderIR::UnpackHalfImmediate(Instruction instr, bool has_negation) {
- const Node value = Immediate(instr.half_imm.PackImmediates());
+ Node value = Immediate(instr.half_imm.PackImmediates());
if (!has_negation) {
return value;
}
- const Node first_negate = GetPredicate(instr.half_imm.first_negate != 0);
- const Node second_negate = GetPredicate(instr.half_imm.second_negate != 0);
- return Operation(OperationCode::HNegate, NO_PRECISE, value, first_negate, second_negate);
+ Node first_negate = GetPredicate(instr.half_imm.first_negate != 0);
+ Node second_negate = GetPredicate(instr.half_imm.second_negate != 0);
+
+ return Operation(OperationCode::HNegate, NO_PRECISE, std::move(value), std::move(first_negate),
+ std::move(second_negate));
}
Node ShaderIR::UnpackHalfFloat(Node value, Tegra::Shader::HalfType type) {
- return Operation(OperationCode::HUnpack, type, value);
+ return Operation(OperationCode::HUnpack, type, std::move(value));
}
Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) {
@@ -207,11 +220,11 @@ Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) {
case Tegra::Shader::HalfMerge::H0_H1:
return src;
case Tegra::Shader::HalfMerge::F32:
- return Operation(OperationCode::HMergeF32, src);
+ return Operation(OperationCode::HMergeF32, std::move(src));
case Tegra::Shader::HalfMerge::Mrg_H0:
- return Operation(OperationCode::HMergeH0, dest, src);
+ return Operation(OperationCode::HMergeH0, std::move(dest), std::move(src));
case Tegra::Shader::HalfMerge::Mrg_H1:
- return Operation(OperationCode::HMergeH1, dest, src);
+ return Operation(OperationCode::HMergeH1, std::move(dest), std::move(src));
}
UNREACHABLE();
return src;
@@ -219,10 +232,10 @@ Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) {
Node ShaderIR::GetOperandAbsNegHalf(Node value, bool absolute, bool negate) {
if (absolute) {
- value = Operation(OperationCode::HAbsolute, NO_PRECISE, value);
+ value = Operation(OperationCode::HAbsolute, NO_PRECISE, std::move(value));
}
if (negate) {
- value = Operation(OperationCode::HNegate, NO_PRECISE, value, GetPredicate(true),
+ value = Operation(OperationCode::HNegate, NO_PRECISE, std::move(value), GetPredicate(true),
GetPredicate(true));
}
return value;
@@ -232,9 +245,11 @@ Node ShaderIR::GetSaturatedHalfFloat(Node value, bool saturate) {
if (!saturate) {
return value;
}
- const Node positive_zero = Immediate(std::copysignf(0, 1));
- const Node positive_one = Immediate(1.0f);
- return Operation(OperationCode::HClamp, NO_PRECISE, value, positive_zero, positive_one);
+
+ Node positive_zero = Immediate(std::copysignf(0, 1));
+ Node positive_one = Immediate(1.0f);
+ return Operation(OperationCode::HClamp, NO_PRECISE, std::move(value), std::move(positive_zero),
+ std::move(positive_one));
}
Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) {
@@ -262,7 +277,6 @@ Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, N
condition == PredCondition::LessEqualWithNan ||
condition == PredCondition::GreaterThanWithNan ||
condition == PredCondition::GreaterEqualWithNan) {
-
predicate = Operation(OperationCode::LogicalOr, predicate,
Operation(OperationCode::LogicalFIsNan, op_a));
predicate = Operation(OperationCode::LogicalOr, predicate,
@@ -291,7 +305,8 @@ Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_si
UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(),
"Unknown predicate comparison operation");
- Node predicate = SignedOperation(comparison->second, is_signed, NO_PRECISE, op_a, op_b);
+ Node predicate = SignedOperation(comparison->second, is_signed, NO_PRECISE, std::move(op_a),
+ std::move(op_b));
UNIMPLEMENTED_IF_MSG(condition == PredCondition::LessThanWithNan ||
condition == PredCondition::NotEqualWithNan ||
@@ -321,9 +336,7 @@ Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition
UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(),
"Unknown predicate comparison operation");
- const Node predicate = Operation(comparison->second, NO_PRECISE, op_a, op_b);
-
- return predicate;
+ return Operation(comparison->second, NO_PRECISE, std::move(op_a), std::move(op_b));
}
OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) {
@@ -349,31 +362,32 @@ Node ShaderIR::GetConditionCode(Tegra::Shader::ConditionCode cc) {
}
void ShaderIR::SetRegister(NodeBlock& bb, Register dest, Node src) {
- bb.push_back(Operation(OperationCode::Assign, GetRegister(dest), src));
+ bb.push_back(Operation(OperationCode::Assign, GetRegister(dest), std::move(src)));
}
void ShaderIR::SetPredicate(NodeBlock& bb, u64 dest, Node src) {
- bb.push_back(Operation(OperationCode::LogicalAssign, GetPredicate(dest), src));
+ bb.push_back(Operation(OperationCode::LogicalAssign, GetPredicate(dest), std::move(src)));
}
void ShaderIR::SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value) {
- bb.push_back(Operation(OperationCode::LogicalAssign, GetInternalFlag(flag), value));
+ bb.push_back(Operation(OperationCode::LogicalAssign, GetInternalFlag(flag), std::move(value)));
}
void ShaderIR::SetLocalMemory(NodeBlock& bb, Node address, Node value) {
- bb.push_back(Operation(OperationCode::Assign, GetLocalMemory(address), value));
+ bb.push_back(
+ Operation(OperationCode::Assign, GetLocalMemory(std::move(address)), std::move(value)));
}
-void ShaderIR::SetTemporal(NodeBlock& bb, u32 id, Node value) {
- SetRegister(bb, Register::ZeroIndex + 1 + id, value);
+void ShaderIR::SetTemporary(NodeBlock& bb, u32 id, Node value) {
+ SetRegister(bb, Register::ZeroIndex + 1 + id, std::move(value));
}
void ShaderIR::SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc) {
if (!sets_cc) {
return;
}
- const Node zerop = Operation(OperationCode::LogicalFEqual, value, Immediate(0.0f));
- SetInternalFlag(bb, InternalFlag::Zero, zerop);
+ Node zerop = Operation(OperationCode::LogicalFEqual, std::move(value), Immediate(0.0f));
+ SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop));
LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete");
}
@@ -381,14 +395,14 @@ void ShaderIR::SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_
if (!sets_cc) {
return;
}
- const Node zerop = Operation(OperationCode::LogicalIEqual, value, Immediate(0));
- SetInternalFlag(bb, InternalFlag::Zero, zerop);
+ Node zerop = Operation(OperationCode::LogicalIEqual, std::move(value), Immediate(0));
+ SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop));
LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete");
}
Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) {
- return Operation(OperationCode::UBitfieldExtract, NO_PRECISE, value, Immediate(offset),
- Immediate(bits));
+ return Operation(OperationCode::UBitfieldExtract, NO_PRECISE, std::move(value),
+ Immediate(offset), Immediate(bits));
}
} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 03c888def..59a083d90 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -5,13 +5,10 @@
#pragma once
#include <array>
-#include <cstring>
#include <map>
#include <optional>
#include <set>
-#include <string>
#include <tuple>
-#include <variant>
#include <vector>
#include "common/common_types.h"
@@ -210,8 +207,8 @@ private:
Node GetInternalFlag(InternalFlag flag, bool negated = false);
/// Generates a node representing a local memory address
Node GetLocalMemory(Node address);
- /// Generates a temporal, internally it uses a post-RZ register
- Node GetTemporal(u32 id);
+ /// Generates a temporary, internally it uses a post-RZ register
+ Node GetTemporary(u32 id);
/// Sets a register. src value must be a number-evaluated node.
void SetRegister(NodeBlock& bb, Tegra::Shader::Register dest, Node src);
@@ -221,8 +218,8 @@ private:
void SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value);
/// Sets a local memory address. address and value must be a number-evaluated node
void SetLocalMemory(NodeBlock& bb, Node address, Node value);
- /// Sets a temporal. Internally it uses a post-RZ register
- void SetTemporal(NodeBlock& bb, u32 id, Node value);
+ /// Sets a temporary. Internally it uses a post-RZ register
+ void SetTemporary(NodeBlock& bb, u32 id, Node value);
/// Sets internal flags from a float
void SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc = true);
@@ -328,7 +325,7 @@ private:
void WriteLop3Instruction(NodeBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b,
Node op_c, Node imm_lut, bool sets_cc);
- Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const;
+ std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const;
std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const;
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
index fc957d980..a53e02253 100644
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@@ -15,56 +15,63 @@ namespace {
std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
OperationCode operation_code) {
for (; cursor >= 0; --cursor) {
- const Node node = code.at(cursor);
+ Node node = code.at(cursor);
+
if (const auto operation = std::get_if<OperationNode>(&*node)) {
if (operation->GetCode() == operation_code) {
- return {node, cursor};
+ return {std::move(node), cursor};
}
}
+
if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
const auto& conditional_code = conditional->GetCode();
- const auto [found, internal_cursor] = FindOperation(
+ auto [found, internal_cursor] = FindOperation(
conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code);
if (found) {
- return {found, cursor};
+ return {std::move(found), cursor};
}
}
}
return {};
}
-} // namespace
+} // Anonymous namespace
-Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const {
+std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code,
+ s64 cursor) const {
if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) {
- // Cbuf found, but it has to be immediate
- return std::holds_alternative<ImmediateNode>(*cbuf->GetOffset()) ? tracked : nullptr;
+ // Constant buffer found, test if it's an immediate
+ const auto offset = cbuf->GetOffset();
+ if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
+ return {tracked, cbuf->GetIndex(), immediate->GetValue()};
+ }
+ return {};
}
if (const auto gpr = std::get_if<GprNode>(&*tracked)) {
if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) {
- return nullptr;
+ return {};
}
// Reduce the cursor in one to avoid infinite loops when the instruction sets the same
// register that it uses as operand
const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1);
if (!source) {
- return nullptr;
+ return {};
}
return TrackCbuf(source, code, new_cursor);
}
if (const auto operation = std::get_if<OperationNode>(&*tracked)) {
for (std::size_t i = 0; i < operation->GetOperandsCount(); ++i) {
- if (const auto found = TrackCbuf((*operation)[i], code, cursor)) {
- // Cbuf found in operand
+ if (auto found = TrackCbuf((*operation)[i], code, cursor); std::get<0>(found)) {
+ // Cbuf found in operand.
return found;
}
}
- return nullptr;
+ return {};
}
if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) {
const auto& conditional_code = conditional->GetCode();
return TrackCbuf(tracked, conditional_code, static_cast<s64>(conditional_code.size()));
}
- return nullptr;
+ return {};
}
std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const {
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 7f9623c62..a3a3770a7 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -116,10 +116,10 @@ public:
std::lock_guard lock{mutex};
auto& maxwell3d = system.GPU().Maxwell3D();
- if (!maxwell3d.dirty_flags.zeta_buffer) {
+ if (!maxwell3d.dirty.depth_buffer) {
return depth_buffer.view;
}
- maxwell3d.dirty_flags.zeta_buffer = false;
+ maxwell3d.dirty.depth_buffer = false;
const auto& regs{maxwell3d.regs};
const auto gpu_addr{regs.zeta.Address()};
@@ -145,10 +145,10 @@ public:
std::lock_guard lock{mutex};
ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
auto& maxwell3d = system.GPU().Maxwell3D();
- if (!maxwell3d.dirty_flags.color_buffer[index]) {
+ if (!maxwell3d.dirty.render_target[index]) {
return render_targets[index].view;
}
- maxwell3d.dirty_flags.color_buffer.reset(index);
+ maxwell3d.dirty.render_target[index] = false;
const auto& regs{maxwell3d.regs};
if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 ||
@@ -274,10 +274,11 @@ protected:
auto& maxwell3d = system.GPU().Maxwell3D();
const u32 index = surface->GetRenderTarget();
if (index == DEPTH_RT) {
- maxwell3d.dirty_flags.zeta_buffer = true;
+ maxwell3d.dirty.depth_buffer = true;
} else {
- maxwell3d.dirty_flags.color_buffer.set(index, true);
+ maxwell3d.dirty.render_target[index] = true;
}
+ maxwell3d.dirty.render_settings = true;
}
void Register(TSurface surface) {