diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp | 20 | ||||
-rw-r--r-- | src/core/hle/service/nvdrv/devices/nvhost_ctrl.h | 6 | ||||
-rw-r--r-- | src/video_core/engines/shader_bytecode.h | 2 | ||||
-rw-r--r-- | src/video_core/shader/async_shaders.cpp | 17 | ||||
-rw-r--r-- | src/video_core/shader/decode/arithmetic.cpp | 3 |
5 files changed, 23 insertions, 25 deletions
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp index b27ee0502..75d9191ff 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp @@ -36,8 +36,8 @@ u32 nvhost_ctrl::ioctl(Ioctl command, const std::vector<u8>& input, const std::v return IocCtrlEventRegister(input, output); case IoctlCommand::IocCtrlEventUnregisterCommand: return IocCtrlEventUnregister(input, output); - case IoctlCommand::IocCtrlEventSignalCommand: - return IocCtrlEventSignal(input, output); + case IoctlCommand::IocCtrlClearEventWaitCommand: + return IocCtrlClearEventWait(input, output); default: UNIMPLEMENTED_MSG("Unimplemented ioctl"); return 0; @@ -154,23 +154,17 @@ u32 nvhost_ctrl::IocCtrlEventUnregister(const std::vector<u8>& input, std::vecto return NvResult::Success; } -u32 nvhost_ctrl::IocCtrlEventSignal(const std::vector<u8>& input, std::vector<u8>& output) { +u32 nvhost_ctrl::IocCtrlClearEventWait(const std::vector<u8>& input, std::vector<u8>& output) { IocCtrlEventSignalParams params{}; std::memcpy(¶ms, input.data(), sizeof(params)); - // TODO(Blinkhawk): This is normally called when an NvEvents timeout on WaitSynchronization - // It is believed from RE to cancel the GPU Event. However, better research is required - u32 event_id = params.user_event_id & 0x00FF; - LOG_WARNING(Service_NVDRV, "(STUBBED) called, user_event_id: {:X}", event_id); + u32 event_id = params.event_id & 0x00FF; + LOG_WARNING(Service_NVDRV, "cleared event wait on, event_id: {:X}", event_id); if (event_id >= MaxNvEvents) { return NvResult::BadParameter; } if (events_interface.status[event_id] == EventState::Waiting) { - auto& gpu = system.GPU(); - if (gpu.CancelSyncptInterrupt(events_interface.assigned_syncpt[event_id], - events_interface.assigned_value[event_id])) { - events_interface.LiberateEvent(event_id); - events_interface.events[event_id].writable->Signal(); - } + events_interface.LiberateEvent(event_id); + events_interface.events[event_id].writable->Signal(); } return NvResult::Success; } diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h index 9898623de..f7b04d9f1 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h @@ -31,7 +31,7 @@ private: IocSyncptWaitexCommand = 0xC0100019, IocSyncptReadMaxCommand = 0xC008001A, IocGetConfigCommand = 0xC183001B, - IocCtrlEventSignalCommand = 0xC004001C, + IocCtrlClearEventWaitCommand = 0xC004001C, IocCtrlEventWaitCommand = 0xC010001D, IocCtrlEventWaitAsyncCommand = 0xC010001E, IocCtrlEventRegisterCommand = 0xC004001F, @@ -94,7 +94,7 @@ private: static_assert(sizeof(IocGetConfigParams) == 387, "IocGetConfigParams is incorrect size"); struct IocCtrlEventSignalParams { - u32_le user_event_id; + u32_le event_id; }; static_assert(sizeof(IocCtrlEventSignalParams) == 4, "IocCtrlEventSignalParams is incorrect size"); @@ -142,7 +142,7 @@ private: u32 IocCtrlEventUnregister(const std::vector<u8>& input, std::vector<u8>& output); - u32 IocCtrlEventSignal(const std::vector<u8>& input, std::vector<u8>& output); + u32 IocCtrlClearEventWait(const std::vector<u8>& input, std::vector<u8>& output); EventInterface& events_interface; }; diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index d374b73cf..a3c05d1b0 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -1893,6 +1893,7 @@ public: ICMP_IMM, FCMP_RR, FCMP_RC, + FCMP_IMMR, MUFU, // Multi-Function Operator RRO_C, // Range Reduction Operator RRO_R, @@ -2205,6 +2206,7 @@ private: INST("0111110-0-------", Id::HSET2_IMM, Type::HalfSet, "HSET2_IMM"), INST("010110111010----", Id::FCMP_RR, Type::Arithmetic, "FCMP_RR"), INST("010010111010----", Id::FCMP_RC, Type::Arithmetic, "FCMP_RC"), + INST("0011011-1010----", Id::FCMP_IMMR, Type::Arithmetic, "FCMP_IMMR"), INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"), INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"), INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"), diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp index aabd62c5c..39cc3b869 100644 --- a/src/video_core/shader/async_shaders.cpp +++ b/src/video_core/shader/async_shaders.cpp @@ -20,14 +20,15 @@ AsyncShaders::~AsyncShaders() { } void AsyncShaders::AllocateWorkers() { - // Max worker threads we should allow - constexpr u32 MAX_THREADS = 4; - // Deduce how many threads we can use - const u32 threads_used = std::thread::hardware_concurrency() / 4; - // Always allow at least 1 thread regardless of our settings - const auto max_worker_count = std::max(1U, threads_used); - // Don't use more than MAX_THREADS - const auto num_workers = std::min(max_worker_count, MAX_THREADS); + // Use at least one thread + u32 num_workers = 1; + + // Deduce how many more threads we can use + const u32 thread_count = std::thread::hardware_concurrency(); + if (thread_count >= 8) { + // Increase async workers by 1 for every 2 threads >= 8 + num_workers += 1 + (thread_count - 8) / 2; + } // If we already have workers queued, ignore if (num_workers == worker_threads.size()) { diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp index 4db329fa5..afef5948d 100644 --- a/src/video_core/shader/decode/arithmetic.cpp +++ b/src/video_core/shader/decode/arithmetic.cpp @@ -137,7 +137,8 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) { break; } case OpCode::Id::FCMP_RR: - case OpCode::Id::FCMP_RC: { + case OpCode::Id::FCMP_RC: + case OpCode::Id::FCMP_IMMR: { UNIMPLEMENTED_IF(instr.fcmp.ftz == 0); Node op_c = GetRegister(instr.gpr39); Node comp = GetPredicateComparisonFloat(instr.fcmp.cond, std::move(op_c), Immediate(0.0f)); |