summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/core/hle/service/ldr/ldr.cpp16
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp20
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl.h6
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp2
-rw-r--r--src/video_core/engines/shader_bytecode.h2
-rw-r--r--src/video_core/shader/async_shaders.cpp17
-rw-r--r--src/video_core/shader/decode/arithmetic.cpp3
7 files changed, 39 insertions, 27 deletions
diff --git a/src/core/hle/service/ldr/ldr.cpp b/src/core/hle/service/ldr/ldr.cpp
index 9ad5bbf0d..eeaca44b6 100644
--- a/src/core/hle/service/ldr/ldr.cpp
+++ b/src/core/hle/service/ldr/ldr.cpp
@@ -166,7 +166,7 @@ public:
{0, &RelocatableObject::LoadNro, "LoadNro"},
{1, &RelocatableObject::UnloadNro, "UnloadNro"},
{2, &RelocatableObject::LoadNrr, "LoadNrr"},
- {3, nullptr, "UnloadNrr"},
+ {3, &RelocatableObject::UnloadNrr, "UnloadNrr"},
{4, &RelocatableObject::Initialize, "Initialize"},
{10, nullptr, "LoadNrrEx"},
};
@@ -272,6 +272,20 @@ public:
rb.Push(RESULT_SUCCESS);
}
+ void UnloadNrr(Kernel::HLERequestContext& ctx) {
+ IPC::RequestParser rp{ctx};
+ const auto pid = rp.Pop<u64>();
+ const auto nrr_address = rp.Pop<VAddr>();
+
+ LOG_DEBUG(Service_LDR, "called with pid={}, nrr_address={:016X}", pid, nrr_address);
+
+ nrr.erase(nrr_address);
+
+ IPC::ResponseBuilder rb{ctx, 2};
+
+ rb.Push(RESULT_SUCCESS);
+ }
+
bool ValidateRegionForMap(Kernel::Memory::PageTable& page_table, VAddr start,
std::size_t size) const {
constexpr std::size_t padding_size{4 * Kernel::Memory::PageSize};
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
index b27ee0502..75d9191ff 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
@@ -36,8 +36,8 @@ u32 nvhost_ctrl::ioctl(Ioctl command, const std::vector<u8>& input, const std::v
return IocCtrlEventRegister(input, output);
case IoctlCommand::IocCtrlEventUnregisterCommand:
return IocCtrlEventUnregister(input, output);
- case IoctlCommand::IocCtrlEventSignalCommand:
- return IocCtrlEventSignal(input, output);
+ case IoctlCommand::IocCtrlClearEventWaitCommand:
+ return IocCtrlClearEventWait(input, output);
default:
UNIMPLEMENTED_MSG("Unimplemented ioctl");
return 0;
@@ -154,23 +154,17 @@ u32 nvhost_ctrl::IocCtrlEventUnregister(const std::vector<u8>& input, std::vecto
return NvResult::Success;
}
-u32 nvhost_ctrl::IocCtrlEventSignal(const std::vector<u8>& input, std::vector<u8>& output) {
+u32 nvhost_ctrl::IocCtrlClearEventWait(const std::vector<u8>& input, std::vector<u8>& output) {
IocCtrlEventSignalParams params{};
std::memcpy(&params, input.data(), sizeof(params));
- // TODO(Blinkhawk): This is normally called when an NvEvents timeout on WaitSynchronization
- // It is believed from RE to cancel the GPU Event. However, better research is required
- u32 event_id = params.user_event_id & 0x00FF;
- LOG_WARNING(Service_NVDRV, "(STUBBED) called, user_event_id: {:X}", event_id);
+ u32 event_id = params.event_id & 0x00FF;
+ LOG_WARNING(Service_NVDRV, "cleared event wait on, event_id: {:X}", event_id);
if (event_id >= MaxNvEvents) {
return NvResult::BadParameter;
}
if (events_interface.status[event_id] == EventState::Waiting) {
- auto& gpu = system.GPU();
- if (gpu.CancelSyncptInterrupt(events_interface.assigned_syncpt[event_id],
- events_interface.assigned_value[event_id])) {
- events_interface.LiberateEvent(event_id);
- events_interface.events[event_id].writable->Signal();
- }
+ events_interface.LiberateEvent(event_id);
+ events_interface.events[event_id].writable->Signal();
}
return NvResult::Success;
}
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
index 9898623de..f7b04d9f1 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
@@ -31,7 +31,7 @@ private:
IocSyncptWaitexCommand = 0xC0100019,
IocSyncptReadMaxCommand = 0xC008001A,
IocGetConfigCommand = 0xC183001B,
- IocCtrlEventSignalCommand = 0xC004001C,
+ IocCtrlClearEventWaitCommand = 0xC004001C,
IocCtrlEventWaitCommand = 0xC010001D,
IocCtrlEventWaitAsyncCommand = 0xC010001E,
IocCtrlEventRegisterCommand = 0xC004001F,
@@ -94,7 +94,7 @@ private:
static_assert(sizeof(IocGetConfigParams) == 387, "IocGetConfigParams is incorrect size");
struct IocCtrlEventSignalParams {
- u32_le user_event_id;
+ u32_le event_id;
};
static_assert(sizeof(IocCtrlEventSignalParams) == 4,
"IocCtrlEventSignalParams is incorrect size");
@@ -142,7 +142,7 @@ private:
u32 IocCtrlEventUnregister(const std::vector<u8>& input, std::vector<u8>& output);
- u32 IocCtrlEventSignal(const std::vector<u8>& input, std::vector<u8>& output);
+ u32 IocCtrlClearEventWait(const std::vector<u8>& input, std::vector<u8>& output);
EventInterface& events_interface;
};
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
index 85792495f..30f03f845 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
@@ -38,7 +38,7 @@ std::size_t WriteVectors(std::vector<u8>& dst, const std::vector<T>& src, std::s
namespace NvErrCodes {
constexpr u32 Success{};
-constexpr u32 OutOfMemory{static_cast<u32>(-12)};
+[[maybe_unused]] constexpr u32 OutOfMemory{static_cast<u32>(-12)};
constexpr u32 InvalidInput{static_cast<u32>(-22)};
} // namespace NvErrCodes
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index d374b73cf..a3c05d1b0 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -1893,6 +1893,7 @@ public:
ICMP_IMM,
FCMP_RR,
FCMP_RC,
+ FCMP_IMMR,
MUFU, // Multi-Function Operator
RRO_C, // Range Reduction Operator
RRO_R,
@@ -2205,6 +2206,7 @@ private:
INST("0111110-0-------", Id::HSET2_IMM, Type::HalfSet, "HSET2_IMM"),
INST("010110111010----", Id::FCMP_RR, Type::Arithmetic, "FCMP_RR"),
INST("010010111010----", Id::FCMP_RC, Type::Arithmetic, "FCMP_RC"),
+ INST("0011011-1010----", Id::FCMP_IMMR, Type::Arithmetic, "FCMP_IMMR"),
INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"),
INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"),
diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp
index aabd62c5c..39cc3b869 100644
--- a/src/video_core/shader/async_shaders.cpp
+++ b/src/video_core/shader/async_shaders.cpp
@@ -20,14 +20,15 @@ AsyncShaders::~AsyncShaders() {
}
void AsyncShaders::AllocateWorkers() {
- // Max worker threads we should allow
- constexpr u32 MAX_THREADS = 4;
- // Deduce how many threads we can use
- const u32 threads_used = std::thread::hardware_concurrency() / 4;
- // Always allow at least 1 thread regardless of our settings
- const auto max_worker_count = std::max(1U, threads_used);
- // Don't use more than MAX_THREADS
- const auto num_workers = std::min(max_worker_count, MAX_THREADS);
+ // Use at least one thread
+ u32 num_workers = 1;
+
+ // Deduce how many more threads we can use
+ const u32 thread_count = std::thread::hardware_concurrency();
+ if (thread_count >= 8) {
+ // Increase async workers by 1 for every 2 threads >= 8
+ num_workers += 1 + (thread_count - 8) / 2;
+ }
// If we already have workers queued, ignore
if (num_workers == worker_threads.size()) {
diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp
index 4db329fa5..afef5948d 100644
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ b/src/video_core/shader/decode/arithmetic.cpp
@@ -137,7 +137,8 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
break;
}
case OpCode::Id::FCMP_RR:
- case OpCode::Id::FCMP_RC: {
+ case OpCode::Id::FCMP_RC:
+ case OpCode::Id::FCMP_IMMR: {
UNIMPLEMENTED_IF(instr.fcmp.ftz == 0);
Node op_c = GetRegister(instr.gpr39);
Node comp = GetPredicateComparisonFloat(instr.fcmp.cond, std::move(op_c), Immediate(0.0f));