From d129905a665ce329089338b4e468da84b3dab5d6 Mon Sep 17 00:00:00 2001
From: David <25727384+ogniK5377@users.noreply.github.com>
Date: Mon, 5 Feb 2018 18:19:31 -0800
Subject: Extra nvdrv support (#162)

* FinishInitalize needed for 3.0.1+ games

* nvdrv:s and nvdrv:t both use NVDRV

* Most settings return 0 on hardware, disabled NV_MEMORY_PROFILER for now.

NVN_THROUGH_OPENGL & NVRM_GPU_PREVENT_USE are a few interesting settings to look at. Carefully choosing settings can help with drawing graphics later on

* Initial /dev/nvhost-gpu support

* ZCullBind

* Stubbed SetErrorNotifier

* Fixed SetErrorNotifier log, Added SetChannelPriority

* Allocate GPFIFO Ex2, Allocate Obj Ctx, Submit GPFIFO

* oops

* Fixed up naming/structs/enums. Used vector instead of array for "gpfifo_entry"

* Added missing fixes

* /dev/nvhost-ctrl-gpu

* unneeded struct

* Forgot u32 in enum class

* Automatic descriptor swapping for ioctls, fixed nvgpu_gpu_get_tpc_masks_args being incorrect size

* nvdrv#QueryEvent

* Renamed logs for nvdrv

* Refactor ioctl so nv_result isn't needed

* /dev/nvhost-as-gpu

* Fixed Log service naming, CtxObjects now u32, renamed all structs, added static_asserts to structs, used INSERT_PADDING_WORDS instead of u32s

* nvdevices now uses "Ioctl" union,

* IoctlGpfifoEntry now uses bit field

* final changes
---
 src/core/hle/service/nvdrv/devices/nvdevice.h      |  12 +-
 .../hle/service/nvdrv/devices/nvdisp_disp0.cpp     |   2 +-
 src/core/hle/service/nvdrv/devices/nvdisp_disp0.h  |   2 +-
 .../hle/service/nvdrv/devices/nvhost_as_gpu.cpp    |  78 ++++++++++-
 src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h |  76 ++++++++++-
 src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp |  18 ++-
 src/core/hle/service/nvdrv/devices/nvhost_ctrl.h   |   5 +-
 .../hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp  | 114 ++++++++++++++++
 .../hle/service/nvdrv/devices/nvhost_ctrl_gpu.h    | 130 +++++++++++++++++++
 src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp  | 144 +++++++++++++++++++++
 src/core/hle/service/nvdrv/devices/nvhost_gpu.h    | 139 ++++++++++++++++++++
 src/core/hle/service/nvdrv/devices/nvmap.cpp       |   4 +-
 src/core/hle/service/nvdrv/devices/nvmap.h         |   2 +-
 13 files changed, 708 insertions(+), 18 deletions(-)
 create mode 100644 src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
 create mode 100644 src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
 create mode 100644 src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
 create mode 100644 src/core/hle/service/nvdrv/devices/nvhost_gpu.h

(limited to 'src/core/hle/service/nvdrv/devices')
diff --git a/src/core/hle/service/nvdrv/devices/nvdevice.h b/src/core/hle/service/nvdrv/devices/nvdevice.h
index 5ee33b3d6..cdc25b059 100644
--- a/src/core/hle/service/nvdrv/devices/nvdevice.h
+++ b/src/core/hle/service/nvdrv/devices/nvdevice.h
@@ -5,7 +5,9 @@
 #pragma once
 
 #include <vector>
+#include "common/bit_field.h"
 #include "common/common_types.h"
+#include "common/swap.h"
 
 namespace Service {
 namespace Nvidia {
@@ -17,6 +19,14 @@ class nvdevice {
 public:
     nvdevice() = default;
     virtual ~nvdevice() = default;
+    union Ioctl {
+        u32_le raw;
+        BitField<0, 8, u32_le> cmd;
+        BitField<8, 8, u32_le> group;
+        BitField<16, 14, u32_le> length;
+        BitField<30, 1, u32_le> is_in;
+        BitField<31, 1, u32_le> is_out;
+    };
 
     /**
      * Handles an ioctl request.
@@ -25,7 +35,7 @@ public:
      * @param output A buffer where the output data will be written to.
      * @returns The result code of the ioctl.
      */
-    virtual u32 ioctl(u32 command, const std::vector<u8>& input, std::vector<u8>& output) = 0;
+    virtual u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) = 0;
 };
 
 } // namespace Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
index b65d79f11..4d0ab844c 100644
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
@@ -14,7 +14,7 @@ namespace Service {
 namespace Nvidia {
 namespace Devices {
 
-u32 nvdisp_disp0::ioctl(u32 command, const std::vector<u8>& input, std::vector<u8>& output) {
+u32 nvdisp_disp0::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) {
     UNIMPLEMENTED();
     return 0;
 }
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
index f5f9de3f4..f3cfc9925 100644
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
@@ -20,7 +20,7 @@ public:
     nvdisp_disp0(std::shared_ptr<nvmap> nvmap_dev) : nvdevice(), nvmap_dev(std::move(nvmap_dev)) {}
     ~nvdisp_disp0() = default;
 
-    u32 ioctl(u32 command, const std::vector<u8>& input, std::vector<u8>& output) override;
+    u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
 
     /// Performs a screen flip, drawing the buffer pointed to by the handle.
     void flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height, u32 stride);
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
index 9db08339a..11ab25545 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
@@ -10,8 +10,82 @@ namespace Service {
 namespace Nvidia {
 namespace Devices {
 
-u32 nvhost_as_gpu::ioctl(u32 command, const std::vector<u8>& input, std::vector<u8>& output) {
-    UNIMPLEMENTED();
+u32 nvhost_as_gpu::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) {
+    LOG_DEBUG(Service_NVDRV, "called, command=0x%08x, input_size=0x%llx, output_size=0x%llx",
+              command, input.size(), output.size());
+
+    switch (static_cast<IoctlCommand>(command.raw)) {
+    case IoctlCommand::IocInitalizeExCommand:
+        return InitalizeEx(input, output);
+    case IoctlCommand::IocAllocateSpaceCommand:
+        return AllocateSpace(input, output);
+    case IoctlCommand::IocMapBufferExCommand:
+        return MapBufferEx(input, output);
+    case IoctlCommand::IocBindChannelCommand:
+        return BindChannel(input, output);
+    case IoctlCommand::IocGetVaRegionsCommand:
+        return GetVARegions(input, output);
+    }
+    return 0;
+}
+
+u32 nvhost_as_gpu::InitalizeEx(const std::vector<u8>& input, std::vector<u8>& output) {
+    IoctlInitalizeEx params{};
+    std::memcpy(&params, input.data(), input.size());
+    LOG_WARNING(Service_NVDRV, "(STUBBED) called, big_page_size=0x%x", params.big_page_size);
+    std::memcpy(output.data(), &params, output.size());
+    return 0;
+}
+
+u32 nvhost_as_gpu::AllocateSpace(const std::vector<u8>& input, std::vector<u8>& output) {
+    IoctlAllocSpace params{};
+    std::memcpy(&params, input.data(), input.size());
+    LOG_WARNING(Service_NVDRV, "(STUBBED) called, pages=%x, page_size=%x, flags=%x", params.pages,
+                params.page_size, params.flags);
+    params.offset = 0xdeadbeef; // TODO(ogniK): Actually allocate space and give a real offset
+    std::memcpy(output.data(), &params, output.size());
+    return 0;
+}
+
+u32 nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8>& output) {
+    IoctlMapBufferEx params{};
+    std::memcpy(&params, input.data(), input.size());
+
+    LOG_WARNING(Service_NVDRV,
+                "(STUBBED) called, flags=%x, nvmap_handle=%x, buffer_offset=%lx, mapping_size=%lx, "
+                "offset=%lx",
+                params.flags, params.nvmap_handle, params.buffer_offset, params.mapping_size,
+                params.offset);
+    params.offset = 0x0; // TODO(ogniK): Actually map and give a real offset
+    std::memcpy(output.data(), &params, output.size());
+    return 0;
+}
+
+u32 nvhost_as_gpu::BindChannel(const std::vector<u8>& input, std::vector<u8>& output) {
+    IoctlBindChannel params{};
+    std::memcpy(&params, input.data(), input.size());
+    LOG_DEBUG(Service_NVDRV, "called, fd=%x", params.fd);
+    channel = params.fd;
+    std::memcpy(output.data(), &params, output.size());
+    return 0;
+}
+
+u32 nvhost_as_gpu::GetVARegions(const std::vector<u8>& input, std::vector<u8>& output) {
+    IoctlGetVaRegions params{};
+    std::memcpy(&params, input.data(), input.size());
+    LOG_WARNING(Service_NVDRV, "(STUBBED) called, buf_addr=%lx, buf_size=%x", params.buf_addr,
+                params.buf_size);
+
+    params.buf_size = 0x30;
+    params.regions[0].offset = 0x04000000;
+    params.regions[0].page_size = 0x1000;
+    params.regions[0].pages = 0x3fbfff;
+
+    params.regions[1].offset = 0x04000000;
+    params.regions[1].page_size = 0x10000;
+    params.regions[1].pages = 0x1bffff;
+    // TODO(ogniK): This probably can stay stubbed but should add support way way later
+    std::memcpy(output.data(), &params, output.size());
     return 0;
 }
 
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
index 01f8861c8..06c256d5d 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
@@ -6,6 +6,7 @@
 
 #include <vector>
 #include "common/common_types.h"
+#include "common/swap.h"
 #include "core/hle/service/nvdrv/devices/nvdevice.h"
 
 namespace Service {
@@ -17,7 +18,80 @@ public:
     nvhost_as_gpu() = default;
     ~nvhost_as_gpu() override = default;
 
-    u32 ioctl(u32 command, const std::vector<u8>& input, std::vector<u8>& output) override;
+    u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
+
+private:
+    enum class IoctlCommand : u32_le {
+        IocInitalizeExCommand = 0x40284109,
+        IocAllocateSpaceCommand = 0xC0184102,
+        IocMapBufferExCommand = 0xC0284106,
+        IocBindChannelCommand = 0x40044101,
+        IocGetVaRegionsCommand = 0xC0404108,
+    };
+
+    struct IoctlInitalizeEx {
+        u32_le big_page_size; // depends on GPU's available_big_page_sizes; 0=default
+        s32_le as_fd;         // ignored; passes 0
+        u32_le flags;         // passes 0
+        u32_le reserved;      // ignored; passes 0
+        u64_le unk0;
+        u64_le unk1;
+        u64_le unk2;
+    };
+    static_assert(sizeof(IoctlInitalizeEx) == 40, "IoctlInitalizeEx is incorrect size");
+
+    struct IoctlAllocSpace {
+        u32_le pages;
+        u32_le page_size;
+        u32_le flags;
+        INSERT_PADDING_WORDS(1);
+        union {
+            u64_le offset;
+            u64_le align;
+        };
+    };
+    static_assert(sizeof(IoctlAllocSpace) == 24, "IoctlInitalizeEx is incorrect size");
+
+    struct IoctlMapBufferEx {
+        u32_le flags; // bit0: fixed_offset, bit2: cacheable
+        u32_le kind;  // -1 is default
+        u32_le nvmap_handle;
+        u32_le page_size; // 0 means don't care
+        u64_le buffer_offset;
+        u64_le mapping_size;
+        u64_le offset;
+    };
+    static_assert(sizeof(IoctlMapBufferEx) == 40, "IoctlMapBufferEx is incorrect size");
+
+    struct IoctlBindChannel {
+        u32_le fd;
+    };
+    static_assert(sizeof(IoctlBindChannel) == 4, "IoctlBindChannel is incorrect size");
+
+    struct IoctlVaRegion {
+        u64_le offset;
+        u32_le page_size;
+        INSERT_PADDING_WORDS(1);
+        u64_le pages;
+    };
+    static_assert(sizeof(IoctlVaRegion) == 24, "IoctlVaRegion is incorrect size");
+
+    struct IoctlGetVaRegions {
+        u64_le buf_addr; // (contained output user ptr on linux, ignored)
+        u32_le buf_size; // forced to 2*sizeof(struct va_region)
+        u32_le reserved;
+        IoctlVaRegion regions[2];
+    };
+    static_assert(sizeof(IoctlGetVaRegions) == 16 + sizeof(IoctlVaRegion) * 2,
+                  "IoctlGetVaRegions is incorrect size");
+
+    u32 channel{};
+
+    u32 InitalizeEx(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 AllocateSpace(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 MapBufferEx(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 BindChannel(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 GetVARegions(const std::vector<u8>& input, std::vector<u8>& output);
 };
 
 } // namespace Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
index 2078f2187..c0e35237a 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
@@ -10,12 +10,12 @@ namespace Service {
 namespace Nvidia {
 namespace Devices {
 
-u32 nvhost_ctrl::ioctl(u32 command, const std::vector<u8>& input, std::vector<u8>& output) {
+u32 nvhost_ctrl::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) {
     LOG_DEBUG(Service_NVDRV, "called, command=0x%08x, input_size=0x%lx, output_size=0x%lx", command,
               input.size(), output.size());
 
-    switch (command) {
-    case IocGetConfigCommand:
+    switch (static_cast<IoctlCommand>(command.raw)) {
+    case IoctlCommand::IocGetConfigCommand:
         return NvOsGetConfigU32(input, output);
     }
     UNIMPLEMENTED();
@@ -23,19 +23,23 @@ u32 nvhost_ctrl::ioctl(u32 command, const std::vector<u8>& input, std::vector<u8
 }
 
 u32 nvhost_ctrl::NvOsGetConfigU32(const std::vector<u8>& input, std::vector<u8>& output) {
-    IocGetConfigParams params;
+    IocGetConfigParams params{};
     std::memcpy(&params, input.data(), sizeof(params));
     LOG_DEBUG(Service_NVDRV, "called, setting=%s!%s", params.domain_str.data(),
               params.param_str.data());
 
     if (!strcmp(params.domain_str.data(), "nv")) {
         if (!strcmp(params.param_str.data(), "NV_MEMORY_PROFILER")) {
-            params.config_str[0] = '1';
+            params.config_str[0] = '0';
+        } else if (!strcmp(params.param_str.data(), "NVN_THROUGH_OPENGL")) {
+            params.config_str[0] = '0';
+        } else if (!strcmp(params.param_str.data(), "NVRM_GPU_PREVENT_USE")) {
+            params.config_str[0] = '0';
         } else {
-            UNIMPLEMENTED();
+            params.config_str[0] = '0';
         }
     } else {
-        UNIMPLEMENTED();
+        UNIMPLEMENTED(); // unknown domain? Only nv has been seen so far on hardware
     }
     std::memcpy(output.data(), &params, sizeof(params));
     return 0;
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
index abce35e17..fd02a5e45 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
@@ -20,10 +20,10 @@ public:
     nvhost_ctrl() = default;
     ~nvhost_ctrl() override = default;
 
-    u32 ioctl(u32 command, const std::vector<u8>& input, std::vector<u8>& output) override;
+    u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
 
 private:
-    enum IoctlCommands {
+    enum class IoctlCommand : u32_le {
         IocSyncptReadCommand = 0xC0080014,
         IocSyncptIncrCommand = 0x40040015,
         IocSyncptWaitCommand = 0xC00C0016,
@@ -39,6 +39,7 @@ private:
         std::array<char, 0x41> param_str;
         std::array<char, 0x101> config_str;
     };
+    static_assert(sizeof(IocGetConfigParams) == 387, "IocGetConfigParams is incorrect size");
 
     u32 NvOsGetConfigU32(const std::vector<u8>& input, std::vector<u8>& output);
 };
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
new file mode 100644
index 000000000..d7e0b1bbd
--- /dev/null
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
@@ -0,0 +1,114 @@
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/logging/log.h"
+#include "core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h"
+
+namespace Service {
+namespace Nvidia {
+namespace Devices {
+
+u32 nvhost_ctrl_gpu::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) {
+    LOG_DEBUG(Service_NVDRV, "called, command=0x%08x, input_size=0x%llx, output_size=0x%llx",
+              command, input.size(), output.size());
+
+    switch (static_cast<IoctlCommand>(command.raw)) {
+    case IoctlCommand::IocGetCharacteristicsCommand:
+        return GetCharacteristics(input, output);
+    case IoctlCommand::IocGetTPCMasksCommand:
+        return GetTPCMasks(input, output);
+    case IoctlCommand::IocGetActiveSlotMaskCommand:
+        return GetActiveSlotMask(input, output);
+    case IoctlCommand::IocZcullGetCtxSizeCommand:
+        return ZCullGetCtxSize(input, output);
+    case IoctlCommand::IocZcullGetInfo:
+        return ZCullGetInfo(input, output);
+    }
+    UNIMPLEMENTED();
+    return 0;
+}
+
+u32 nvhost_ctrl_gpu::GetCharacteristics(const std::vector<u8>& input, std::vector<u8>& output) {
+    LOG_DEBUG(Service_NVDRV, "called");
+    IoctlCharacteristics params{};
+    std::memcpy(&params, input.data(), input.size());
+    params.gc.arch = 0x120;
+    params.gc.impl = 0xb;
+    params.gc.rev = 0xa1;
+    params.gc.num_gpc = 0x1;
+    params.gc.l2_cache_size = 0x40000;
+    params.gc.on_board_video_memory_size = 0x0;
+    params.gc.num_tpc_per_gpc = 0x2;
+    params.gc.bus_type = 0x20;
+    params.gc.big_page_size = 0x20000;
+    params.gc.compression_page_size = 0x20000;
+    params.gc.pde_coverage_bit_count = 0x1B;
+    params.gc.available_big_page_sizes = 0x30000;
+    params.gc.gpc_mask = 0x1;
+    params.gc.sm_arch_sm_version = 0x503;
+    params.gc.sm_arch_spa_version = 0x503;
+    params.gc.sm_arch_warp_count = 0x80;
+    params.gc.gpu_va_bit_count = 0x28;
+    params.gc.reserved = 0x0;
+    params.gc.flags = 0x55;
+    params.gc.twod_class = 0x902D;
+    params.gc.threed_class = 0xB197;
+    params.gc.compute_class = 0xB1C0;
+    params.gc.gpfifo_class = 0xB06F;
+    params.gc.inline_to_memory_class = 0xA140;
+    params.gc.dma_copy_class = 0xB0B5;
+    params.gc.max_fbps_count = 0x1;
+    params.gc.fbp_en_mask = 0x0;
+    params.gc.max_ltc_per_fbp = 0x2;
+    params.gc.max_lts_per_ltc = 0x1;
+    params.gc.max_tex_per_tpc = 0x0;
+    params.gc.max_gpc_count = 0x1;
+    params.gc.rop_l2_en_mask_0 = 0x21D70;
+    params.gc.rop_l2_en_mask_1 = 0x0;
+    params.gc.chipname = 0x6230326D67;
+    params.gc.gr_compbit_store_base_hw = 0x0;
+    params.gpu_characteristics_buf_size = 0xA0;
+    params.gpu_characteristics_buf_addr = 0xdeadbeef; // Cannot be 0 (UNUSED)
+    std::memcpy(output.data(), &params, output.size());
+    return 0;
+}
+
+u32 nvhost_ctrl_gpu::GetTPCMasks(const std::vector<u8>& input, std::vector<u8>& output) {
+    IoctlGpuGetTpcMasksArgs params{};
+    std::memcpy(&params, input.data(), input.size());
+    LOG_WARNING(Service_NVDRV, "(STUBBED) called, mask=0x%x, mask_buf_addr=0x%lx",
+                params.mask_buf_size, params.mask_buf_addr);
+    std::memcpy(output.data(), &params, sizeof(params));
+    return 0;
+}
+
+u32 nvhost_ctrl_gpu::GetActiveSlotMask(const std::vector<u8>& input, std::vector<u8>& output) {
+    LOG_DEBUG(Service_NVDRV, "called");
+    IoctlActiveSlotMask params{};
+    std::memcpy(&params, input.data(), input.size());
+    params.slot = 0x07;
+    params.mask = 0x01;
+    std::memcpy(output.data(), &params, output.size());
+    return 0;
+}
+
+u32 nvhost_ctrl_gpu::ZCullGetCtxSize(const std::vector<u8>& input, std::vector<u8>& output) {
+    LOG_DEBUG(Service_NVDRV, "called");
+    IoctlZcullGetCtxSize params{};
+    std::memcpy(&params, input.data(), input.size());
+    params.size = 0x1;
+    std::memcpy(output.data(), &params, output.size());
+    return 0;
+}
+
+u32 nvhost_ctrl_gpu::ZCullGetInfo(const std::vector<u8>& input, std::vector<u8>& output) {
+    LOG_WARNING(Service_NVDRV, "(STUBBED) called");
+    std::memset(output.data(), 0, output.size());
+    return 0;
+}
+
+} // namespace Devices
+} // namespace Nvidia
+} // namespace Service
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
new file mode 100644
index 000000000..dc0476993
--- /dev/null
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
@@ -0,0 +1,130 @@
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <vector>
+#include "common/common_types.h"
+#include "common/swap.h"
+#include "core/hle/service/nvdrv/devices/nvdevice.h"
+
+namespace Service {
+namespace Nvidia {
+namespace Devices {
+
+class nvhost_ctrl_gpu final : public nvdevice {
+public:
+    nvhost_ctrl_gpu() = default;
+    ~nvhost_ctrl_gpu() override = default;
+
+    u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
+
+private:
+    enum class IoctlCommand : u32_le {
+        IocGetCharacteristicsCommand = 0xC0B04705,
+        IocGetTPCMasksCommand = 0xC0184706,
+        IocGetActiveSlotMaskCommand = 0x80084714,
+        IocZcullGetCtxSizeCommand = 0x80044701,
+        IocZcullGetInfo = 0x80284702,
+    };
+
+    struct IoctlGpuCharacteristics {
+        u32_le arch;                       // 0x120 (NVGPU_GPU_ARCH_GM200)
+        u32_le impl;                       // 0xB (NVGPU_GPU_IMPL_GM20B)
+        u32_le rev;                        // 0xA1 (Revision A1)
+        u32_le num_gpc;                    // 0x1
+        u64_le l2_cache_size;              // 0x40000
+        u64_le on_board_video_memory_size; // 0x0 (not used)
+        u32_le num_tpc_per_gpc;            // 0x2
+        u32_le bus_type;                   // 0x20 (NVGPU_GPU_BUS_TYPE_AXI)
+        u32_le big_page_size;              // 0x20000
+        u32_le compression_page_size;      // 0x20000
+        u32_le pde_coverage_bit_count;     // 0x1B
+        u32_le available_big_page_sizes;   // 0x30000
+        u32_le gpc_mask;                   // 0x1
+        u32_le sm_arch_sm_version;         // 0x503 (Maxwell Generation 5.0.3?)
+        u32_le sm_arch_spa_version;        // 0x503 (Maxwell Generation 5.0.3?)
+        u32_le sm_arch_warp_count;         // 0x80
+        u32_le gpu_va_bit_count;           // 0x28
+        u32_le reserved;                   // NULL
+        u64_le flags;                      // 0x55
+        u32_le twod_class;                 // 0x902D (FERMI_TWOD_A)
+        u32_le threed_class;               // 0xB197 (MAXWELL_B)
+        u32_le compute_class;              // 0xB1C0 (MAXWELL_COMPUTE_B)
+        u32_le gpfifo_class;               // 0xB06F (MAXWELL_CHANNEL_GPFIFO_A)
+        u32_le inline_to_memory_class;     // 0xA140 (KEPLER_INLINE_TO_MEMORY_B)
+        u32_le dma_copy_class;             // 0xB0B5 (MAXWELL_DMA_COPY_A)
+        u32_le max_fbps_count;             // 0x1
+        u32_le fbp_en_mask;                // 0x0 (disabled)
+        u32_le max_ltc_per_fbp;            // 0x2
+        u32_le max_lts_per_ltc;            // 0x1
+        u32_le max_tex_per_tpc;            // 0x0 (not supported)
+        u32_le max_gpc_count;              // 0x1
+        u32_le rop_l2_en_mask_0;           // 0x21D70 (fuse_status_opt_rop_l2_fbp_r)
+        u32_le rop_l2_en_mask_1;           // 0x0
+        u64_le chipname;                   // 0x6230326D67 ("gm20b")
+        u64_le gr_compbit_store_base_hw;   // 0x0 (not supported)
+    };
+    static_assert(sizeof(IoctlGpuCharacteristics) == 160,
+                  "IoctlGpuCharacteristics is incorrect size");
+
+    struct IoctlCharacteristics {
+        u64_le gpu_characteristics_buf_size; // must not be NULL, but gets overwritten with
+                                             // 0xA0=max_size
+        u64_le gpu_characteristics_buf_addr; // ignored, but must not be NULL
+        IoctlGpuCharacteristics gc;
+    };
+    static_assert(sizeof(IoctlCharacteristics) == 16 + sizeof(IoctlGpuCharacteristics),
+                  "IoctlCharacteristics is incorrect size");
+
+    struct IoctlGpuGetTpcMasksArgs {
+        /// [in]  TPC mask buffer size reserved by userspace. Should be at least
+        /// sizeof(__u32) * fls(gpc_mask) to receive TPC mask for each GPC.
+        /// [out] full kernel buffer size
+        u32_le mask_buf_size;
+        u32_le reserved;
+
+        /// [in]  pointer to TPC mask buffer. It will receive one 32-bit TPC mask per GPC or 0 if
+        /// GPC is not enabled or not present. This parameter is ignored if mask_buf_size is 0.
+        u64_le mask_buf_addr;
+        u64_le unk; // Nintendo add this?
+    };
+    static_assert(sizeof(IoctlGpuGetTpcMasksArgs) == 24,
+                  "IoctlGpuGetTpcMasksArgs is incorrect size");
+
+    struct IoctlActiveSlotMask {
+        u32_le slot; // always 0x07
+        u32_le mask;
+    };
+    static_assert(sizeof(IoctlActiveSlotMask) == 8, "IoctlActiveSlotMask is incorrect size");
+
+    struct IoctlZcullGetCtxSize {
+        u32_le size;
+    };
+    static_assert(sizeof(IoctlZcullGetCtxSize) == 4, "IoctlZcullGetCtxSize is incorrect size");
+
+    struct IoctlNvgpuGpuZcullGetInfoArgs {
+        u32_le width_align_pixels;
+        u32_le height_align_pixels;
+        u32_le pixel_squares_by_aliquots;
+        u32_le aliquot_total;
+        u32_le region_byte_multiplier;
+        u32_le region_header_size;
+        u32_le subregion_header_size;
+        u32_le subregion_width_align_pixels;
+        u32_le subregion_height_align_pixels;
+        u32_le subregion_count;
+    };
+    static_assert(sizeof(IoctlNvgpuGpuZcullGetInfoArgs) == 40,
+                  "IoctlNvgpuGpuZcullGetInfoArgs is incorrect size");
+
+    u32 GetCharacteristics(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 GetTPCMasks(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 GetActiveSlotMask(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 ZCullGetCtxSize(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 ZCullGetInfo(const std::vector<u8>& input, std::vector<u8>& output);
+};
+} // namespace Devices
+} // namespace Nvidia
+} // namespace Service
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
new file mode 100644
index 000000000..229048d37
--- /dev/null
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
@@ -0,0 +1,144 @@
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <map>
+#include "common/assert.h"
+#include "common/logging/log.h"
+#include "core/hle/service/nvdrv/devices/nvhost_gpu.h"
+
+namespace Service {
+namespace Nvidia {
+namespace Devices {
+
+u32 nvhost_gpu::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) {
+    LOG_DEBUG(Service_NVDRV, "called, command=0x%08x, input_size=0x%llx, output_size=0x%llx",
+              command, input.size(), output.size());
+
+    switch (static_cast<IoctlCommand>(command.raw)) {
+    case IoctlCommand::IocSetNVMAPfdCommand:
+        return SetNVMAPfd(input, output);
+    case IoctlCommand::IocSetClientDataCommand:
+        return SetClientData(input, output);
+    case IoctlCommand::IocGetClientDataCommand:
+        return GetClientData(input, output);
+    case IoctlCommand::IocZCullBind:
+        return ZCullBind(input, output);
+    case IoctlCommand::IocSetErrorNotifierCommand:
+        return SetErrorNotifier(input, output);
+    case IoctlCommand::IocChannelSetPriorityCommand:
+        return SetChannelPriority(input, output);
+    case IoctlCommand::IocAllocGPFIFOEx2Command:
+        return AllocGPFIFOEx2(input, output);
+    case IoctlCommand::IocAllocObjCtxCommand:
+        return AllocateObjectContext(input, output);
+    }
+
+    if (command.group == NVGPU_IOCTL_MAGIC) {
+        if (command.cmd == NVGPU_IOCTL_CHANNEL_SUBMIT_GPFIFO) {
+            return SubmitGPFIFO(input, output);
+        }
+    }
+
+    UNIMPLEMENTED();
+    return 0;
+};
+
+u32 nvhost_gpu::SetNVMAPfd(const std::vector<u8>& input, std::vector<u8>& output) {
+    IoctlSetNvmapFD params{};
+    std::memcpy(&params, input.data(), input.size());
+    LOG_DEBUG(Service_NVDRV, "called, fd=%x", params.nvmap_fd);
+    nvmap_fd = params.nvmap_fd;
+    std::memcpy(output.data(), &params, output.size());
+    return 0;
+}
+
+u32 nvhost_gpu::SetClientData(const std::vector<u8>& input, std::vector<u8>& output) {
+    LOG_DEBUG(Service_NVDRV, "called");
+    IoctlClientData params{};
+    std::memcpy(&params, input.data(), input.size());
+    user_data = params.data;
+    std::memcpy(output.data(), &params, output.size());
+    return 0;
+}
+
+u32 nvhost_gpu::GetClientData(const std::vector<u8>& input, std::vector<u8>& output) {
+    LOG_DEBUG(Service_NVDRV, "called");
+    IoctlClientData params{};
+    std::memcpy(&params, input.data(), input.size());
+    params.data = user_data;
+    std::memcpy(output.data(), &params, output.size());
+    return 0;
+}
+
+u32 nvhost_gpu::ZCullBind(const std::vector<u8>& input, std::vector<u8>& output) {
+    std::memcpy(&zcull_params, input.data(), input.size());
+    LOG_DEBUG(Service_NVDRV, "called, gpu_va=%lx, mode=%x", zcull_params.gpu_va, zcull_params.mode);
+    std::memcpy(output.data(), &zcull_params, output.size());
+    return 0;
+}
+
+u32 nvhost_gpu::SetErrorNotifier(const std::vector<u8>& input, std::vector<u8>& output) {
+    IoctlSetErrorNotifier params{};
+    std::memcpy(&params, input.data(), input.size());
+    LOG_WARNING(Service_NVDRV, "(STUBBED) called, offset=%lx, size=%lx, mem=%x", params.offset,
+                params.size, params.mem);
+    std::memcpy(output.data(), &params, output.size());
+    return 0;
+}
+
+u32 nvhost_gpu::SetChannelPriority(const std::vector<u8>& input, std::vector<u8>& output) {
+    std::memcpy(&channel_priority, input.data(), input.size());
+    LOG_DEBUG(Service_NVDRV, "(STUBBED) called, priority=%x", channel_priority);
+    std::memcpy(output.data(), &channel_priority, output.size());
+    return 0;
+}
+
+u32 nvhost_gpu::AllocGPFIFOEx2(const std::vector<u8>& input, std::vector<u8>& output) {
+    IoctlAllocGpfifoEx2 params{};
+    std::memcpy(&params, input.data(), input.size());
+    LOG_WARNING(Service_NVDRV,
+                "(STUBBED) called, num_entries=%x, flags=%x, unk0=%x, unk1=%x, unk2=%x, unk3=%x",
+                params.num_entries, params.flags, params.unk0, params.unk1, params.unk2,
+                params.unk3);
+    params.fence_out.id = 0;
+    params.fence_out.value = 0;
+    std::memcpy(output.data(), &params, output.size());
+    return 0;
+}
+
+u32 nvhost_gpu::AllocateObjectContext(const std::vector<u8>& input, std::vector<u8>& output) {
+    IoctlAllocObjCtx params{};
+    std::memcpy(&params, input.data(), input.size());
+    LOG_WARNING(Service_NVDRV, "(STUBBED) called, class_num=%x, flags=%x", params.class_num,
+                params.flags);
+    params.obj_id = 0x0;
+    std::memcpy(output.data(), &params, output.size());
+    return 0;
+}
+
+u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) {
+    if (input.size() < sizeof(IoctlSubmitGpfifo))
+        UNIMPLEMENTED();
+    IoctlSubmitGpfifo params{};
+    std::memcpy(&params, input.data(), sizeof(IoctlSubmitGpfifo));
+    LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo=%lx, num_entries=%x, flags=%x",
+                params.gpfifo, params.num_entries, params.flags);
+
+    auto entries = std::vector<IoctlGpfifoEntry>();
+    entries.resize(params.num_entries);
+    std::memcpy(&entries[0], &input.data()[sizeof(IoctlSubmitGpfifo)],
+                params.num_entries * sizeof(IoctlGpfifoEntry));
+    for (auto entry : entries) {
+        VAddr va_addr = entry.Address();
+        // TODO(ogniK): Process these
+    }
+    params.fence_out.id = 0;
+    params.fence_out.value = 0;
+    std::memcpy(output.data(), &params, output.size());
+    return 0;
+}
+
+} // namespace Devices
+} // namespace Nvidia
+} // namespace Service
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
new file mode 100644
index 000000000..4fe2c9ad5
--- /dev/null
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
@@ -0,0 +1,139 @@
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <vector>
+#include "common/common_types.h"
+#include "common/swap.h"
+#include "core/hle/service/nvdrv/devices/nvdevice.h"
+
+namespace Service {
+namespace Nvidia {
+namespace Devices {
+constexpr u32 NVGPU_IOCTL_MAGIC('H');
+constexpr u32 NVGPU_IOCTL_CHANNEL_SUBMIT_GPFIFO(0x8);
+
+class nvhost_gpu final : public nvdevice {
+public:
+    nvhost_gpu() = default;
+    ~nvhost_gpu() override = default;
+
+    u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
+
+private:
+    enum class IoctlCommand : u32_le {
+        IocSetNVMAPfdCommand = 0x40044801,
+        IocSetClientDataCommand = 0x40084714,
+        IocGetClientDataCommand = 0x80084715,
+        IocZCullBind = 0xc010480b,
+        IocSetErrorNotifierCommand = 0xC018480C,
+        IocChannelSetPriorityCommand = 0x4004480D,
+        IocAllocGPFIFOEx2Command = 0xC020481A,
+        IocAllocObjCtxCommand = 0xC0104809,
+    };
+
+    enum class CtxObjects : u32_le {
+        Ctx2D = 0x902D,
+        Ctx3D = 0xB197,
+        CtxCompute = 0xB1C0,
+        CtxKepler = 0xA140,
+        CtxDMA = 0xB0B5,
+        CtxChannelGPFIFO = 0xB06F,
+    };
+
+    struct IoctlSetNvmapFD {
+        u32_le nvmap_fd;
+    };
+    static_assert(sizeof(IoctlSetNvmapFD) == 4, "IoctlSetNvmapFD is incorrect size");
+
+    struct IoctlClientData {
+        u64_le data;
+    };
+    static_assert(sizeof(IoctlClientData) == 8, "IoctlClientData is incorrect size");
+
+    struct IoctlZCullBind {
+        u64_le gpu_va;
+        u32_le mode; // 0=global, 1=no_ctxsw, 2=separate_buffer, 3=part_of_regular_buf
+        INSERT_PADDING_WORDS(1);
+    };
+    static_assert(sizeof(IoctlZCullBind) == 16, "IoctlZCullBind is incorrect size");
+
+    struct IoctlSetErrorNotifier {
+        u64_le offset;
+        u64_le size;
+        u32_le mem; // nvmap object handle
+        INSERT_PADDING_WORDS(1);
+    };
+    static_assert(sizeof(IoctlSetErrorNotifier) == 24, "IoctlSetErrorNotifier is incorrect size");
+
+    struct IoctlFence {
+        u32_le id;
+        u32_le value;
+    };
+    static_assert(sizeof(IoctlFence) == 8, "IoctlFence is incorrect size");
+
+    struct IoctlAllocGpfifoEx2 {
+        u32_le num_entries;   // in
+        u32_le flags;         // in
+        u32_le unk0;          // in (1 works)
+        IoctlFence fence_out; // out
+        u32_le unk1;          // in
+        u32_le unk2;          // in
+        u32_le unk3;          // in
+    };
+    static_assert(sizeof(IoctlAllocGpfifoEx2) == 32, "IoctlAllocGpfifoEx2 is incorrect size");
+
+    struct IoctlAllocObjCtx {
+        u32_le class_num; // 0x902D=2d, 0xB197=3d, 0xB1C0=compute, 0xA140=kepler, 0xB0B5=DMA,
+                          // 0xB06F=channel_gpfifo
+        u32_le flags;
+        u64_le obj_id; // (ignored) used for FREE_OBJ_CTX ioctl, which is not supported
+    };
+    static_assert(sizeof(IoctlAllocObjCtx) == 16, "IoctlAllocObjCtx is incorrect size");
+
+    struct IoctlGpfifoEntry {
+        u32_le entry0; // gpu_va_lo
+        union {
+            u32_le entry1; // gpu_va_hi | (unk_0x02 << 0x08) | (size << 0x0A) | (unk_0x01 << 0x1F)
+            BitField<0, 8, u32_le> gpu_va_hi;
+            BitField<8, 2, u32_le> unk1;
+            BitField<10, 21, u32_le> sz;
+            BitField<31, 1, u32_le> unk2;
+        };
+
+        VAddr Address() const {
+            return (static_cast<VAddr>(gpu_va_hi) << 32) | entry0;
+        }
+    };
+    static_assert(sizeof(IoctlGpfifoEntry) == 8, "IoctlGpfifoEntry is incorrect size");
+
+    struct IoctlSubmitGpfifo {
+        u64_le gpfifo;      // (ignored) pointer to gpfifo fence structs
+        u32_le num_entries; // number of fence objects being submitted
+        u32_le flags;
+        IoctlFence fence_out; // returned new fence object for others to wait on
+    };
+    static_assert(sizeof(IoctlSubmitGpfifo) == 16 + sizeof(IoctlFence),
+                  "submit_gpfifo is incorrect size");
+
+    u32_le nvmap_fd{};
+    u64_le user_data{};
+    IoctlZCullBind zcull_params{};
+    u32_le channel_priority{};
+
+    u32 SetNVMAPfd(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 SetClientData(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 GetClientData(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 ZCullBind(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 SetErrorNotifier(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 SetChannelPriority(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 AllocGPFIFOEx2(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 AllocateObjectContext(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output);
+};
+
+} // namespace Devices
+} // namespace Nvidia
+} // namespace Service
diff --git a/src/core/hle/service/nvdrv/devices/nvmap.cpp b/src/core/hle/service/nvdrv/devices/nvmap.cpp
index f6196da1b..cb3692689 100644
--- a/src/core/hle/service/nvdrv/devices/nvmap.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvmap.cpp
@@ -21,8 +21,8 @@ VAddr nvmap::GetObjectAddress(u32 handle) const {
     return object->addr;
 }
 
-u32 nvmap::ioctl(u32 command, const std::vector<u8>& input, std::vector<u8>& output) {
-    switch (static_cast<IoctlCommand>(command)) {
+u32 nvmap::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) {
+    switch (static_cast<IoctlCommand>(command.raw)) {
     case IoctlCommand::Create:
         return IocCreate(input, output);
     case IoctlCommand::Alloc:
diff --git a/src/core/hle/service/nvdrv/devices/nvmap.h b/src/core/hle/service/nvdrv/devices/nvmap.h
index 42e00f370..1591ac8ff 100644
--- a/src/core/hle/service/nvdrv/devices/nvmap.h
+++ b/src/core/hle/service/nvdrv/devices/nvmap.h
@@ -24,7 +24,7 @@ public:
     /// Returns the allocated address of an nvmap object given its handle.
     VAddr GetObjectAddress(u32 handle) const;
 
-    u32 ioctl(u32 command, const std::vector<u8>& input, std::vector<u8>& output) override;
+    u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
 
 private:
     // Represents an nvmap object.
-- 
cgit v1.2.3