From 2c47f8aa1886522898b5b3a73185b5662be3e9f3 Mon Sep 17 00:00:00 2001
From: Feng Chen <vonchenplus@gmail.com>
Date: Thu, 2 Dec 2021 12:19:43 +0800
Subject: Support multiple videos playing

---
 .../hle/service/nvdrv/devices/nvhost_nvdec.cpp     | 11 ++++--
 .../service/nvdrv/devices/nvhost_nvdec_common.cpp  |  5 ++-
 .../service/nvdrv/devices/nvhost_nvdec_common.h    |  3 +-
 src/core/hle/service/nvdrv/devices/nvhost_vic.cpp  | 11 ++++--
 src/video_core/gpu.cpp                             | 43 +++++++---------------
 src/video_core/gpu.h                               |  4 +-
 6 files changed, 36 insertions(+), 41 deletions(-)
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
index 0d7d4ad03..8e2a16d86 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
@@ -21,7 +21,7 @@ NvResult nvhost_nvdec::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>&
     case 0x0:
         switch (command.cmd) {
         case 0x1:
-            return Submit(input, output);
+            return Submit(fd, input, output);
         case 0x2:
             return GetSyncpoint(input, output);
         case 0x3:
@@ -62,11 +62,16 @@ NvResult nvhost_nvdec::Ioctl3(DeviceFD fd, Ioctl command, const std::vector<u8>&
     return NvResult::NotImplemented;
 }
 
-void nvhost_nvdec::OnOpen(DeviceFD fd) {}
+void nvhost_nvdec::OnOpen(DeviceFD fd) {
+    static u32 next_id{};
+    fd_to_id[fd] = next_id++;
+}
 
 void nvhost_nvdec::OnClose(DeviceFD fd) {
     LOG_INFO(Service_NVDRV, "NVDEC video stream ended");
-    system.GPU().ClearCdmaInstance();
+    if (fd_to_id.find(fd) != fd_to_id.end()) {
+        system.GPU().ClearCdmaInstance(fd_to_id[fd]);
+    }
 }
 
 } // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
index e61261f98..8a05f0668 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
@@ -59,7 +59,8 @@ NvResult nvhost_nvdec_common::SetNVMAPfd(const std::vector<u8>& input) {
     return NvResult::Success;
 }
 
-NvResult nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u8>& output) {
+NvResult nvhost_nvdec_common::Submit(DeviceFD fd, const std::vector<u8>& input,
+                                     std::vector<u8>& output) {
     IoctlSubmit params{};
     std::memcpy(&params, input.data(), sizeof(IoctlSubmit));
     LOG_DEBUG(Service_NVDRV, "called NVDEC Submit, cmd_buffer_count={}", params.cmd_buffer_count);
@@ -93,7 +94,7 @@ NvResult nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u
         Tegra::ChCommandHeaderList cmdlist(cmd_buffer.word_count);
         system.Memory().ReadBlock(object->addr + cmd_buffer.offset, cmdlist.data(),
                                   cmdlist.size() * sizeof(u32));
-        gpu.PushCommandBuffer(cmdlist);
+        gpu.PushCommandBuffer(fd_to_id[fd], cmdlist);
     }
     std::memcpy(output.data(), &params, sizeof(IoctlSubmit));
     // Some games expect command_buffers to be written back
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h
index 351625c17..e28c54df6 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h
@@ -104,13 +104,14 @@ protected:
 
     /// Ioctl command implementations
     NvResult SetNVMAPfd(const std::vector<u8>& input);
-    NvResult Submit(const std::vector<u8>& input, std::vector<u8>& output);
+    NvResult Submit(DeviceFD fd, const std::vector<u8>& input, std::vector<u8>& output);
     NvResult GetSyncpoint(const std::vector<u8>& input, std::vector<u8>& output);
     NvResult GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output);
     NvResult MapBuffer(const std::vector<u8>& input, std::vector<u8>& output);
     NvResult UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& output);
     NvResult SetSubmitTimeout(const std::vector<u8>& input, std::vector<u8>& output);
 
+    std::unordered_map<DeviceFD, u32> fd_to_id{};
     s32_le nvmap_fd{};
     u32_le submit_timeout{};
     std::shared_ptr<nvmap> nvmap_dev;
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
index eac4dd530..420fe21c8 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
@@ -21,7 +21,7 @@ NvResult nvhost_vic::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& i
     case 0x0:
         switch (command.cmd) {
         case 0x1:
-            return Submit(input, output);
+            return Submit(fd, input, output);
         case 0x2:
             return GetSyncpoint(input, output);
         case 0x3:
@@ -62,10 +62,15 @@ NvResult nvhost_vic::Ioctl3(DeviceFD fd, Ioctl command, const std::vector<u8>& i
     return NvResult::NotImplemented;
 }
 
-void nvhost_vic::OnOpen(DeviceFD fd) {}
+void nvhost_vic::OnOpen(DeviceFD fd) {
+    static u32 next_id{};
+    fd_to_id[fd] = next_id++;
+}
 
 void nvhost_vic::OnClose(DeviceFD fd) {
-    system.GPU().ClearCdmaInstance();
+    if (fd_to_id.find(fd) != fd_to_id.end()) {
+        system.GPU().ClearCdmaInstance(fd_to_id[fd]);
+    }
 }
 
 } // namespace Service::Nvidia::Devices
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index ab7c21a49..27a47954d 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -185,16 +185,6 @@ struct GPU::Impl {
         return *dma_pusher;
     }
 
-    /// Returns a reference to the GPU CDMA pusher.
-    [[nodiscard]] Tegra::CDmaPusher& CDmaPusher() {
-        return *cdma_pusher;
-    }
-
-    /// Returns a const reference to the GPU CDMA pusher.
-    [[nodiscard]] const Tegra::CDmaPusher& CDmaPusher() const {
-        return *cdma_pusher;
-    }
-
     /// Returns a reference to the underlying renderer.
     [[nodiscard]] VideoCore::RendererBase& Renderer() {
         return *renderer;
@@ -338,25 +328,26 @@ struct GPU::Impl {
     }
 
     /// Push GPU command buffer entries to be processed
-    void PushCommandBuffer(Tegra::ChCommandHeaderList& entries) {
+    void PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries) {
         if (!use_nvdec) {
             return;
         }
 
-        if (!cdma_pusher) {
-            cdma_pusher = std::make_unique<Tegra::CDmaPusher>(gpu);
+        if (cdma_pushers.find(id) == cdma_pushers.end()) {
+            cdma_pushers[id] = std::make_unique<Tegra::CDmaPusher>(gpu);
         }
 
         // SubmitCommandBuffer would make the nvdec operations async, this is not currently working
         // TODO(ameerj): RE proper async nvdec operation
         // gpu_thread.SubmitCommandBuffer(std::move(entries));
-
-        cdma_pusher->ProcessEntries(std::move(entries));
+        cdma_pushers[id]->ProcessEntries(std::move(entries));
     }
 
     /// Frees the CDMAPusher instance to free up resources
-    void ClearCdmaInstance() {
-        cdma_pusher.reset();
+    void ClearCdmaInstance(u32 id) {
+        if (cdma_pushers.find(id) != cdma_pushers.end()) {
+            cdma_pushers.erase(id);
+        }
     }
 
     /// Swap buffers (render frame)
@@ -659,7 +650,7 @@ struct GPU::Impl {
     Core::System& system;
     std::unique_ptr<Tegra::MemoryManager> memory_manager;
     std::unique_ptr<Tegra::DmaPusher> dma_pusher;
-    std::unique_ptr<Tegra::CDmaPusher> cdma_pusher;
+    std::map<u32, std::unique_ptr<Tegra::CDmaPusher>> cdma_pushers;
     std::unique_ptr<VideoCore::RendererBase> renderer;
     VideoCore::RasterizerInterface* rasterizer = nullptr;
     const bool use_nvdec;
@@ -811,14 +802,6 @@ const Tegra::DmaPusher& GPU::DmaPusher() const {
     return impl->DmaPusher();
 }
 
-Tegra::CDmaPusher& GPU::CDmaPusher() {
-    return impl->CDmaPusher();
-}
-
-const Tegra::CDmaPusher& GPU::CDmaPusher() const {
-    return impl->CDmaPusher();
-}
-
 VideoCore::RendererBase& GPU::Renderer() {
     return impl->Renderer();
 }
@@ -887,12 +870,12 @@ void GPU::PushGPUEntries(Tegra::CommandList&& entries) {
     impl->PushGPUEntries(std::move(entries));
 }
 
-void GPU::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) {
-    impl->PushCommandBuffer(entries);
+void GPU::PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries) {
+    impl->PushCommandBuffer(id, entries);
 }
 
-void GPU::ClearCdmaInstance() {
-    impl->ClearCdmaInstance();
+void GPU::ClearCdmaInstance(u32 id) {
+    impl->ClearCdmaInstance(id);
 }
 
 void GPU::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index c89a5d693..500411176 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -242,10 +242,10 @@ public:
     void PushGPUEntries(Tegra::CommandList&& entries);
 
     /// Push GPU command buffer entries to be processed
-    void PushCommandBuffer(Tegra::ChCommandHeaderList& entries);
+    void PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries);
 
     /// Frees the CDMAPusher instance to free up resources
-    void ClearCdmaInstance();
+    void ClearCdmaInstance(u32 id);
 
     /// Swap buffers (render frame)
     void SwapBuffers(const Tegra::FramebufferConfig* framebuffer);
-- 
cgit v1.2.3


From 5462485cc3835941713b835bce3b671b15d210b7 Mon Sep 17 00:00:00 2001
From: Feng Chen <vonchenplus@gmail.com>
Date: Fri, 3 Dec 2021 12:31:07 +0800
Subject: Address feedback

---
 src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp | 16 +++++++++-------
 src/core/hle/service/nvdrv/devices/nvhost_nvdec.h   |  3 +++
 src/core/hle/service/nvdrv/devices/nvhost_vic.cpp   | 13 +++++++------
 src/core/hle/service/nvdrv/devices/nvhost_vic.h     |  3 +++
 src/video_core/gpu.cpp                              |  9 +++++----
 5 files changed, 27 insertions(+), 17 deletions(-)

diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
index 8e2a16d86..8314d1ec2 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
@@ -20,8 +20,12 @@ NvResult nvhost_nvdec::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>&
     switch (command.group) {
     case 0x0:
         switch (command.cmd) {
-        case 0x1:
+        case 0x1: {
+            if (!fd_to_id.contains(fd)) {
+                fd_to_id[fd] = next_id++;
+            }
             return Submit(fd, input, output);
+        }
         case 0x2:
             return GetSyncpoint(input, output);
         case 0x3:
@@ -62,15 +66,13 @@ NvResult nvhost_nvdec::Ioctl3(DeviceFD fd, Ioctl command, const std::vector<u8>&
     return NvResult::NotImplemented;
 }
 
-void nvhost_nvdec::OnOpen(DeviceFD fd) {
-    static u32 next_id{};
-    fd_to_id[fd] = next_id++;
-}
+void nvhost_nvdec::OnOpen(DeviceFD fd) {}
 
 void nvhost_nvdec::OnClose(DeviceFD fd) {
     LOG_INFO(Service_NVDRV, "NVDEC video stream ended");
-    if (fd_to_id.find(fd) != fd_to_id.end()) {
-        system.GPU().ClearCdmaInstance(fd_to_id[fd]);
+    const auto iter = fd_to_id.find(fd);
+    if (iter != fd_to_id.end()) {
+        system.GPU().ClearCdmaInstance(iter->second);
     }
 }
 
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h
index 523d96e3a..a507c4d0a 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h
@@ -24,6 +24,9 @@ public:
 
     void OnOpen(DeviceFD fd) override;
     void OnClose(DeviceFD fd) override;
+
+private:
+    u32 next_id{};
 };
 
 } // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
index 420fe21c8..76b39806f 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
@@ -21,6 +21,9 @@ NvResult nvhost_vic::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& i
     case 0x0:
         switch (command.cmd) {
         case 0x1:
+            if (!fd_to_id.contains(fd)) {
+                fd_to_id[fd] = next_id++;
+            }
             return Submit(fd, input, output);
         case 0x2:
             return GetSyncpoint(input, output);
@@ -62,14 +65,12 @@ NvResult nvhost_vic::Ioctl3(DeviceFD fd, Ioctl command, const std::vector<u8>& i
     return NvResult::NotImplemented;
 }
 
-void nvhost_vic::OnOpen(DeviceFD fd) {
-    static u32 next_id{};
-    fd_to_id[fd] = next_id++;
-}
+void nvhost_vic::OnOpen(DeviceFD fd) {}
 
 void nvhost_vic::OnClose(DeviceFD fd) {
-    if (fd_to_id.find(fd) != fd_to_id.end()) {
-        system.GPU().ClearCdmaInstance(fd_to_id[fd]);
+    const auto iter = fd_to_id.find(fd);
+    if (iter != fd_to_id.end()) {
+        system.GPU().ClearCdmaInstance(iter->second);
     }
 }
 
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.h b/src/core/hle/service/nvdrv/devices/nvhost_vic.h
index 6d7fda9d1..c9732c037 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_vic.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.h
@@ -23,5 +23,8 @@ public:
 
     void OnOpen(DeviceFD fd) override;
     void OnClose(DeviceFD fd) override;
+
+private:
+    u32 next_id{};
 };
 } // namespace Service::Nvidia::Devices
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 27a47954d..8788f5148 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -333,8 +333,8 @@ struct GPU::Impl {
             return;
         }
 
-        if (cdma_pushers.find(id) == cdma_pushers.end()) {
-            cdma_pushers[id] = std::make_unique<Tegra::CDmaPusher>(gpu);
+        if (!cdma_pushers.contains(id)) {
+            cdma_pushers.insert_or_assign(id, std::make_unique<Tegra::CDmaPusher>(gpu));
         }
 
         // SubmitCommandBuffer would make the nvdec operations async, this is not currently working
@@ -345,8 +345,9 @@ struct GPU::Impl {
 
     /// Frees the CDMAPusher instance to free up resources
     void ClearCdmaInstance(u32 id) {
-        if (cdma_pushers.find(id) != cdma_pushers.end()) {
-            cdma_pushers.erase(id);
+        const auto iter = cdma_pushers.find(id);
+        if (iter != cdma_pushers.end()) {
+            cdma_pushers.erase(iter);
         }
     }
 
-- 
cgit v1.2.3