summaryrefslogtreecommitdiffstats
path: root/src/video_core/gpu.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/gpu.cpp')
-rw-r--r--src/video_core/gpu.cpp179
1 files changed, 133 insertions, 46 deletions
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 4bb9256e9..6ab06775f 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -10,6 +10,7 @@
#include "core/core_timing.h"
#include "core/core_timing_util.h"
#include "core/frontend/emu_window.h"
+#include "core/hardware_interrupt_manager.h"
#include "core/memory.h"
#include "core/settings.h"
#include "video_core/engines/fermi_2d.h"
@@ -27,15 +28,17 @@ namespace Tegra {
MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
-GPU::GPU(Core::System& system_, bool is_async_)
+GPU::GPU(Core::System& system_, bool is_async_, bool use_nvdec_)
: system{system_}, memory_manager{std::make_unique<Tegra::MemoryManager>(system)},
dma_pusher{std::make_unique<Tegra::DmaPusher>(system, *this)},
+ cdma_pusher{std::make_unique<Tegra::CDmaPusher>(*this)}, use_nvdec{use_nvdec_},
maxwell_3d{std::make_unique<Engines::Maxwell3D>(system, *memory_manager)},
fermi_2d{std::make_unique<Engines::Fermi2D>()},
kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)},
maxwell_dma{std::make_unique<Engines::MaxwellDMA>(system, *memory_manager)},
kepler_memory{std::make_unique<Engines::KeplerMemory>(system, *memory_manager)},
- shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_} {}
+ shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_},
+ gpu_thread{system_, is_async_} {}
GPU::~GPU() = default;
@@ -77,31 +80,46 @@ DmaPusher& GPU::DmaPusher() {
return *dma_pusher;
}
+Tegra::CDmaPusher& GPU::CDmaPusher() {
+ return *cdma_pusher;
+}
+
const DmaPusher& GPU::DmaPusher() const {
return *dma_pusher;
}
+const Tegra::CDmaPusher& GPU::CDmaPusher() const {
+ return *cdma_pusher;
+}
+
void GPU::WaitFence(u32 syncpoint_id, u32 value) {
// Synced GPU, is always in sync
if (!is_async) {
return;
}
+ if (syncpoint_id == UINT32_MAX) {
+ // TODO: Research what this does.
+ LOG_ERROR(HW_GPU, "Waiting for syncpoint -1 not implemented");
+ return;
+ }
MICROPROFILE_SCOPE(GPU_wait);
std::unique_lock lock{sync_mutex};
- sync_cv.wait(lock, [=, this] { return syncpoints[syncpoint_id].load() >= value; });
+ sync_cv.wait(lock, [=, this] { return syncpoints.at(syncpoint_id).load() >= value; });
}
void GPU::IncrementSyncPoint(const u32 syncpoint_id) {
- syncpoints[syncpoint_id]++;
+ auto& syncpoint = syncpoints.at(syncpoint_id);
+ syncpoint++;
std::lock_guard lock{sync_mutex};
sync_cv.notify_all();
- if (!syncpt_interrupts[syncpoint_id].empty()) {
- u32 value = syncpoints[syncpoint_id].load();
- auto it = syncpt_interrupts[syncpoint_id].begin();
- while (it != syncpt_interrupts[syncpoint_id].end()) {
+ auto& interrupt = syncpt_interrupts.at(syncpoint_id);
+ if (!interrupt.empty()) {
+ u32 value = syncpoint.load();
+ auto it = interrupt.begin();
+ while (it != interrupt.end()) {
if (value >= *it) {
TriggerCpuInterrupt(syncpoint_id, *it);
- it = syncpt_interrupts[syncpoint_id].erase(it);
+ it = interrupt.erase(it);
continue;
}
it++;
@@ -110,22 +128,22 @@ void GPU::IncrementSyncPoint(const u32 syncpoint_id) {
}
u32 GPU::GetSyncpointValue(const u32 syncpoint_id) const {
- return syncpoints[syncpoint_id].load();
+ return syncpoints.at(syncpoint_id).load();
}
void GPU::RegisterSyncptInterrupt(const u32 syncpoint_id, const u32 value) {
- auto& interrupt = syncpt_interrupts[syncpoint_id];
+ auto& interrupt = syncpt_interrupts.at(syncpoint_id);
bool contains = std::any_of(interrupt.begin(), interrupt.end(),
[value](u32 in_value) { return in_value == value; });
if (contains) {
return;
}
- syncpt_interrupts[syncpoint_id].emplace_back(value);
+ interrupt.emplace_back(value);
}
bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) {
std::lock_guard lock{sync_mutex};
- auto& interrupt = syncpt_interrupts[syncpoint_id];
+ auto& interrupt = syncpt_interrupts.at(syncpoint_id);
const auto iter =
std::find_if(interrupt.begin(), interrupt.end(),
[value](u32 interrupt_value) { return value == interrupt_value; });
@@ -182,34 +200,6 @@ void GPU::SyncGuestHost() {
renderer->Rasterizer().SyncGuestHost();
}
-void GPU::OnCommandListEnd() {
- renderer->Rasterizer().ReleaseFences();
-}
-// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
-// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
-// So the values you see in docs might be multiplied by 4.
-enum class BufferMethods {
- BindObject = 0x0,
- Nop = 0x2,
- SemaphoreAddressHigh = 0x4,
- SemaphoreAddressLow = 0x5,
- SemaphoreSequence = 0x6,
- SemaphoreTrigger = 0x7,
- NotifyIntr = 0x8,
- WrcacheFlush = 0x9,
- Unk28 = 0xA,
- UnkCacheFlush = 0xB,
- RefCnt = 0x14,
- SemaphoreAcquire = 0x1A,
- SemaphoreRelease = 0x1B,
- FenceValue = 0x1C,
- FenceAction = 0x1D,
- Unk78 = 0x1E,
- Unk7c = 0x1F,
- Yield = 0x20,
- NonPullerMethods = 0x40,
-};
-
enum class GpuSemaphoreOperation {
AcquireEqual = 0x1,
WriteLong = 0x2,
@@ -240,8 +230,12 @@ void GPU::CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32
CallEngineMultiMethod(method, subchannel, base_start, amount, methods_pending);
} else {
for (std::size_t i = 0; i < amount; i++) {
- CallPullerMethod(
- {method, base_start[i], subchannel, methods_pending - static_cast<u32>(i)});
+ CallPullerMethod(MethodCall{
+ method,
+ base_start[i],
+ subchannel,
+ methods_pending - static_cast<u32>(i),
+ });
}
}
}
@@ -268,7 +262,12 @@ void GPU::CallPullerMethod(const MethodCall& method_call) {
case BufferMethods::UnkCacheFlush:
case BufferMethods::WrcacheFlush:
case BufferMethods::FenceValue:
+ break;
case BufferMethods::FenceAction:
+ ProcessFenceActionMethod();
+ break;
+ case BufferMethods::WaitForInterrupt:
+ ProcessWaitForInterruptMethod();
break;
case BufferMethods::SemaphoreTrigger: {
ProcessSemaphoreTriggerMethod();
@@ -298,8 +297,7 @@ void GPU::CallPullerMethod(const MethodCall& method_call) {
break;
}
default:
- LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented",
- static_cast<u32>(method));
+ LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented", method);
break;
}
}
@@ -378,10 +376,28 @@ void GPU::ProcessBindMethod(const MethodCall& method_call) {
dma_pusher->BindSubchannel(kepler_memory.get(), method_call.subchannel);
break;
default:
- UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", static_cast<u32>(engine_id));
+ UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id);
+ }
+}
+
+void GPU::ProcessFenceActionMethod() {
+ switch (regs.fence_action.op) {
+ case FenceOperation::Acquire:
+ WaitFence(regs.fence_action.syncpoint_id, regs.fence_value);
+ break;
+ case FenceOperation::Increment:
+ IncrementSyncPoint(regs.fence_action.syncpoint_id);
+ break;
+ default:
+ UNIMPLEMENTED_MSG("Unimplemented operation {}", regs.fence_action.op.Value());
}
}
+void GPU::ProcessWaitForInterruptMethod() {
+ // TODO(bunnei) ImplementMe
+ LOG_WARNING(HW_GPU, "(STUBBED) called");
+}
+
void GPU::ProcessSemaphoreTriggerMethod() {
const auto semaphoreOperationMask = 0xF;
const auto op =
@@ -443,4 +459,75 @@ void GPU::ProcessSemaphoreAcquire() {
}
}
+void GPU::Start() {
+ gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher, *cdma_pusher);
+ cpu_context = renderer->GetRenderWindow().CreateSharedContext();
+ cpu_context->MakeCurrent();
+}
+
+void GPU::ObtainContext() {
+ cpu_context->MakeCurrent();
+}
+
+void GPU::ReleaseContext() {
+ cpu_context->DoneCurrent();
+}
+
+void GPU::PushGPUEntries(Tegra::CommandList&& entries) {
+ gpu_thread.SubmitList(std::move(entries));
+}
+
+void GPU::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) {
+ if (!use_nvdec) {
+ return;
+ }
+ // This condition fires when a video stream ends, clear all intermediary data
+ if (entries[0].raw == 0xDEADB33F) {
+ cdma_pusher.reset();
+ return;
+ }
+ if (!cdma_pusher) {
+ cdma_pusher = std::make_unique<Tegra::CDmaPusher>(*this);
+ }
+
+ // SubmitCommandBuffer would make the nvdec operations async, this is not currently working
+ // TODO(ameerj): RE proper async nvdec operation
+ // gpu_thread.SubmitCommandBuffer(std::move(entries));
+
+ cdma_pusher->Push(std::move(entries));
+ cdma_pusher->DispatchCalls();
+}
+
+void GPU::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
+ gpu_thread.SwapBuffers(framebuffer);
+}
+
+void GPU::FlushRegion(VAddr addr, u64 size) {
+ gpu_thread.FlushRegion(addr, size);
+}
+
+void GPU::InvalidateRegion(VAddr addr, u64 size) {
+ gpu_thread.InvalidateRegion(addr, size);
+}
+
+void GPU::FlushAndInvalidateRegion(VAddr addr, u64 size) {
+ gpu_thread.FlushAndInvalidateRegion(addr, size);
+}
+
+void GPU::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) const {
+ auto& interrupt_manager = system.InterruptManager();
+ interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value);
+}
+
+void GPU::WaitIdle() const {
+ gpu_thread.WaitIdle();
+}
+
+void GPU::OnCommandListEnd() {
+ if (is_async) {
+ // This command only applies to asynchronous GPU mode
+ gpu_thread.OnCommandListEnd();
+ }
+}
+
} // namespace Tegra