From 139ea93512aeead8a4aee3910a3de86eb109a838 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Fri, 5 Nov 2021 15:52:31 +0100
Subject: VideoCore: implement channels on gpu caches.

---
 .../hle/service/nvdrv/devices/nvhost_as_gpu.cpp    | 34 ++++++++++++----------
 src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h | 14 ++++++++-
 src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp  | 34 +++++++++++++++-------
 src/core/hle/service/nvdrv/devices/nvhost_gpu.h    |  9 ++++++
 src/core/hle/service/nvdrv/devices/nvmap.cpp       |  2 +-
 5 files changed, 64 insertions(+), 29 deletions(-)

(limited to 'src/core/hle/service/nvdrv/devices')
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
index b1c683511..9946ce624 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
@@ -10,13 +10,17 @@
 #include "core/hle/service/nvdrv/core/container.h"
 #include "core/hle/service/nvdrv/core/nvmap.h"
 #include "core/hle/service/nvdrv/devices/nvhost_as_gpu.h"
+#include "core/hle/service/nvdrv/devices/nvhost_gpu.h"
+#include "core/hle/service/nvdrv/nvdrv.h"
+#include "video_core/control/channel_state.h"
 #include "video_core/memory_manager.h"
 #include "video_core/rasterizer_interface.h"
 
 namespace Service::Nvidia::Devices {
 
-nvhost_as_gpu::nvhost_as_gpu(Core::System& system_, NvCore::Container& core)
-    : nvdevice{system_}, container{core}, nvmap{core.GetNvMapFile()} {}
+nvhost_as_gpu::nvhost_as_gpu(Core::System& system_, Module& module_, NvCore::Container& core)
+    : nvdevice{system_}, module{module_}, container{core}, nvmap{core.GetNvMapFile()},
+      gmmu{std::make_shared<Tegra::MemoryManager>(system)} {}
 nvhost_as_gpu::~nvhost_as_gpu() = default;
 
 NvResult nvhost_as_gpu::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
@@ -102,9 +106,9 @@ NvResult nvhost_as_gpu::AllocateSpace(const std::vector<u8>& input, std::vector<
 
     const auto size{static_cast<u64>(params.pages) * static_cast<u64>(params.page_size)};
     if ((params.flags & AddressSpaceFlags::FixedOffset) != AddressSpaceFlags::None) {
-        params.offset = *system.GPU().MemoryManager().AllocateFixed(params.offset, size);
+        params.offset = *(gmmu->AllocateFixed(params.offset, size));
     } else {
-        params.offset = system.GPU().MemoryManager().Allocate(size, params.align);
+        params.offset = gmmu->Allocate(size, params.align);
     }
 
     auto result = NvResult::Success;
@@ -124,8 +128,7 @@ NvResult nvhost_as_gpu::FreeSpace(const std::vector<u8>& input, std::vector<u8>&
     LOG_DEBUG(Service_NVDRV, "called, offset={:X}, pages={:X}, page_size={:X}", params.offset,
               params.pages, params.page_size);
 
-    system.GPU().MemoryManager().Unmap(params.offset,
-                                       static_cast<std::size_t>(params.pages) * params.page_size);
+    gmmu->Unmap(params.offset, static_cast<std::size_t>(params.pages) * params.page_size);
 
     std::memcpy(output.data(), &params, output.size());
     return NvResult::Success;
@@ -148,7 +151,7 @@ NvResult nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& out
             // If nvmap handle is null, we should unmap instead.
             const auto offset{static_cast<GPUVAddr>(entry.offset) << 0x10};
             const auto size{static_cast<u64>(entry.pages) << 0x10};
-            system.GPU().MemoryManager().Unmap(offset, size);
+            gmmu->Unmap(offset, size);
             continue;
         }
 
@@ -162,8 +165,7 @@ NvResult nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& out
         const auto offset{static_cast<GPUVAddr>(entry.offset) << 0x10};
         const auto size{static_cast<u64>(entry.pages) << 0x10};
         const auto map_offset{static_cast<u64>(entry.map_offset) << 0x10};
-        const auto addr{
-            system.GPU().MemoryManager().Map(object->address + map_offset, offset, size)};
+        const auto addr{gmmu->Map(object->address + map_offset, offset, size)};
 
         if (!addr) {
             LOG_CRITICAL(Service_NVDRV, "map returned an invalid address!");
@@ -186,13 +188,12 @@ NvResult nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8
               params.flags, params.nvmap_handle, params.buffer_offset, params.mapping_size,
               params.offset);
 
-    auto& gpu = system.GPU();
     if ((params.flags & AddressSpaceFlags::Remap) != AddressSpaceFlags::None) {
         if (const auto buffer_map{FindBufferMap(params.offset)}; buffer_map) {
             const auto cpu_addr{static_cast<VAddr>(buffer_map->CpuAddr() + params.buffer_offset)};
             const auto gpu_addr{static_cast<GPUVAddr>(params.offset + params.buffer_offset)};
 
-            if (!gpu.MemoryManager().Map(cpu_addr, gpu_addr, params.mapping_size)) {
+            if (!gmmu->Map(cpu_addr, gpu_addr, params.mapping_size)) {
                 LOG_CRITICAL(Service_NVDRV,
                              "remap failed, flags={:X}, nvmap_handle={:X}, buffer_offset={}, "
                              "mapping_size = {}, offset={}",
@@ -238,9 +239,9 @@ NvResult nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8
 
     const bool is_alloc{(params.flags & AddressSpaceFlags::FixedOffset) == AddressSpaceFlags::None};
     if (is_alloc) {
-        params.offset = gpu.MemoryManager().MapAllocate(physical_address, size, page_size);
+        params.offset = gmmu->MapAllocate(physical_address, size, page_size);
     } else {
-        params.offset = gpu.MemoryManager().Map(physical_address, params.offset, size);
+        params.offset = gmmu->Map(physical_address, params.offset, size);
     }
 
     auto result = NvResult::Success;
@@ -262,7 +263,7 @@ NvResult nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8
     LOG_DEBUG(Service_NVDRV, "called, offset=0x{:X}", params.offset);
 
     if (const auto size{RemoveBufferMap(params.offset)}; size) {
-        system.GPU().MemoryManager().Unmap(params.offset, *size);
+        gmmu->Unmap(params.offset, *size);
     } else {
         LOG_ERROR(Service_NVDRV, "invalid offset=0x{:X}", params.offset);
     }
@@ -274,9 +275,10 @@ NvResult nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8
 NvResult nvhost_as_gpu::BindChannel(const std::vector<u8>& input, std::vector<u8>& output) {
     IoctlBindChannel params{};
     std::memcpy(&params, input.data(), input.size());
-    LOG_WARNING(Service_NVDRV, "(STUBBED) called, fd={:X}", params.fd);
+    LOG_DEBUG(Service_NVDRV, "called, fd={:X}", params.fd);
 
-    channel = params.fd;
+    auto gpu_channel_device = module.GetDevice<nvhost_gpu>(params.fd);
+    gpu_channel_device->channel_state->memory_manager = gmmu;
     return NvResult::Success;
 }
 
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
index 67d2f1e87..4ecae3caf 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
@@ -13,6 +13,14 @@
 #include "common/swap.h"
 #include "core/hle/service/nvdrv/devices/nvdevice.h"
 
+namespace Tegra {
+class MemoryManager;
+} // namespace Tegra
+
+namespace Service::Nvidia {
+class Module;
+}
+
 namespace Service::Nvidia::NvCore {
 class Container;
 class NvMap;
@@ -34,7 +42,7 @@ DECLARE_ENUM_FLAG_OPERATORS(AddressSpaceFlags);
 
 class nvhost_as_gpu final : public nvdevice {
 public:
-    explicit nvhost_as_gpu(Core::System& system_, NvCore::Container& core);
+    explicit nvhost_as_gpu(Core::System& system_, Module& module, NvCore::Container& core);
     ~nvhost_as_gpu() override;
 
     NvResult Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
@@ -187,9 +195,13 @@ private:
     void AddBufferMap(GPUVAddr gpu_addr, std::size_t size, VAddr cpu_addr, bool is_allocated);
     std::optional<std::size_t> RemoveBufferMap(GPUVAddr gpu_addr);
 
+    Module& module;
+
     NvCore::Container& container;
     NvCore::NvMap& nvmap;
 
+    std::shared_ptr<Tegra::MemoryManager> gmmu;
+
     // This is expected to be ordered, therefore we must use a map, not unordered_map
     std::map<GPUVAddr, BufferMap> buffer_mappings;
 };
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
index cb54ee5a4..38d45cb79 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
@@ -11,12 +11,14 @@
 #include "core/hle/service/nvdrv/devices/nvhost_gpu.h"
 #include "core/hle/service/nvdrv/nvdrv.h"
 #include "core/memory.h"
+#include "video_core/control/channel_state.h"
+#include "video_core/engines/puller.h"
 #include "video_core/gpu.h"
 
 namespace Service::Nvidia::Devices {
 namespace {
-Tegra::CommandHeader BuildFenceAction(Tegra::GPU::FenceOperation op, u32 syncpoint_id) {
-    Tegra::GPU::FenceAction result{};
+Tegra::CommandHeader BuildFenceAction(Tegra::Engines::Puller::FenceOperation op, u32 syncpoint_id) {
+    Tegra::Engines::Puller::FenceAction result{};
     result.op.Assign(op);
     result.syncpoint_id.Assign(syncpoint_id);
     return {result.raw};
@@ -26,7 +28,8 @@ Tegra::CommandHeader BuildFenceAction(Tegra::GPU::FenceOperation op, u32 syncpoi
 nvhost_gpu::nvhost_gpu(Core::System& system_, EventInterface& events_interface_,
                        NvCore::Container& core_)
     : nvdevice{system_}, events_interface{events_interface_}, core{core_},
-      syncpoint_manager{core_.GetSyncpointManager()}, nvmap{core.GetNvMapFile()} {
+      syncpoint_manager{core_.GetSyncpointManager()}, nvmap{core.GetNvMapFile()},
+      channel_state{system.GPU().AllocateChannel()} {
     channel_fence.id = syncpoint_manager.AllocateSyncpoint();
     channel_fence.value = system_.GPU().GetSyncpointValue(channel_fence.id);
     sm_exception_breakpoint_int_report_event =
@@ -180,6 +183,12 @@ NvResult nvhost_gpu::AllocGPFIFOEx2(const std::vector<u8>& input, std::vector<u8
                 params.num_entries, params.flags, params.unk0, params.unk1, params.unk2,
                 params.unk3);
 
+    if (channel_state->initiated) {
+        LOG_CRITICAL(Service_NVDRV, "Already allocated!");
+        return NvResult::AlreadyAllocated;
+    }
+
+    system.GPU().InitChannel(*channel_state);
     channel_fence.value = system.GPU().GetSyncpointValue(channel_fence.id);
 
     params.fence_out = channel_fence;
@@ -206,7 +215,7 @@ static std::vector<Tegra::CommandHeader> BuildWaitCommandList(NvFence fence) {
         {fence.value},
         Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1,
                                   Tegra::SubmissionMode::Increasing),
-        BuildFenceAction(Tegra::GPU::FenceOperation::Acquire, fence.id),
+        BuildFenceAction(Tegra::Engines::Puller::FenceOperation::Acquire, fence.id),
     };
 }
 
@@ -220,7 +229,8 @@ static std::vector<Tegra::CommandHeader> BuildIncrementCommandList(NvFence fence
     for (u32 count = 0; count < add_increment; ++count) {
         result.emplace_back(Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1,
                                                       Tegra::SubmissionMode::Increasing));
-        result.emplace_back(BuildFenceAction(Tegra::GPU::FenceOperation::Increment, fence.id));
+        result.emplace_back(
+            BuildFenceAction(Tegra::Engines::Puller::FenceOperation::Increment, fence.id));
     }
 
     return result;
@@ -247,11 +257,13 @@ NvResult nvhost_gpu::SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector<u8>
 
     auto& gpu = system.GPU();
 
+    const auto bind_id = channel_state->bind_id;
+
     params.fence_out.id = channel_fence.id;
 
     if (params.flags.add_wait.Value() &&
         !syncpoint_manager.IsSyncpointExpired(params.fence_out.id, params.fence_out.value)) {
-        gpu.PushGPUEntries(Tegra::CommandList{BuildWaitCommandList(params.fence_out)});
+        gpu.PushGPUEntries(bind_id, Tegra::CommandList{BuildWaitCommandList(params.fence_out)});
     }
 
     if (params.flags.add_increment.Value() || params.flags.increment.Value()) {
@@ -262,15 +274,15 @@ NvResult nvhost_gpu::SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector<u8>
         params.fence_out.value = syncpoint_manager.GetSyncpointMax(params.fence_out.id);
     }
 
-    gpu.PushGPUEntries(std::move(entries));
+    gpu.PushGPUEntries(bind_id, std::move(entries));
 
     if (params.flags.add_increment.Value()) {
         if (params.flags.suppress_wfi) {
-            gpu.PushGPUEntries(Tegra::CommandList{
-                BuildIncrementCommandList(params.fence_out, params.AddIncrementValue())});
+            gpu.PushGPUEntries(bind_id, Tegra::CommandList{BuildIncrementCommandList(
+                                            params.fence_out, params.AddIncrementValue())});
         } else {
-            gpu.PushGPUEntries(Tegra::CommandList{
-                BuildIncrementWithWfiCommandList(params.fence_out, params.AddIncrementValue())});
+            gpu.PushGPUEntries(bind_id, Tegra::CommandList{BuildIncrementWithWfiCommandList(
+                                            params.fence_out, params.AddIncrementValue())});
         }
     }
 
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
index 440c0c42d..3a65ed06d 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
@@ -13,6 +13,12 @@
 #include "core/hle/service/nvdrv/nvdata.h"
 #include "video_core/dma_pusher.h"
 
+namespace Tegra {
+namespace Control {
+struct ChannelState;
+}
+} // namespace Tegra
+
 namespace Service::Nvidia {
 
 namespace NvCore {
@@ -26,6 +32,7 @@ class EventInterface;
 
 namespace Service::Nvidia::Devices {
 
+class nvhost_as_gpu;
 class nvmap;
 class nvhost_gpu final : public nvdevice {
 public:
@@ -46,6 +53,7 @@ public:
     Kernel::KEvent* QueryEvent(u32 event_id) override;
 
 private:
+    friend class nvhost_as_gpu;
     enum class CtxObjects : u32_le {
         Ctx2D = 0x902D,
         Ctx3D = 0xB197,
@@ -204,6 +212,7 @@ private:
     NvCore::Container& core;
     NvCore::SyncpointManager& syncpoint_manager;
     NvCore::NvMap& nvmap;
+    std::shared_ptr<Tegra::Control::ChannelState> channel_state;
     NvFence channel_fence;
 
     // Events
diff --git a/src/core/hle/service/nvdrv/devices/nvmap.cpp b/src/core/hle/service/nvdrv/devices/nvmap.cpp
index 57f58055d..279997e81 100644
--- a/src/core/hle/service/nvdrv/devices/nvmap.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvmap.cpp
@@ -168,7 +168,7 @@ NvResult nvmap::IocFromId(const std::vector<u8>& input, std::vector<u8>& output)
     IocFromIdParams params;
     std::memcpy(&params, input.data(), sizeof(params));
 
-    LOG_DEBUG(Service_NVDRV, "called, id:{}");
+    LOG_DEBUG(Service_NVDRV, "called, id:{}", params.id);
 
     // Handles and IDs are always the same value in nvmap however IDs can be used globally given the
     // right permissions.
-- 
cgit v1.2.3