author: bunnei <bunneidev@gmail.com> 2018-04-25 05:22:24 +0200
committer: GitHub <noreply@github.com> 2018-04-25 05:22:24 +0200
commit: ea3151f475e170eaaec3ded306a0fe5c1e5944db (patch)
tree: 6f7e127c4f58de6071d9a7dbd2af464dbbd14b9b /src/core
parent: Merge pull request #393 from lioncash/loader (diff)
parent: renderer_opengl: Use correct byte order for framebuffer pixel format ABGR8. (diff)
download: yuzu-ea3151f475e170eaaec3ded306a0fe5c1e5944db.tar
yuzu-ea3151f475e170eaaec3ded306a0fe5c1e5944db.tar.gz
yuzu-ea3151f475e170eaaec3ded306a0fe5c1e5944db.tar.bz2
yuzu-ea3151f475e170eaaec3ded306a0fe5c1e5944db.tar.lz
yuzu-ea3151f475e170eaaec3ded306a0fe5c1e5944db.tar.xz
yuzu-ea3151f475e170eaaec3ded306a0fe5c1e5944db.tar.zst
yuzu-ea3151f475e170eaaec3ded306a0fe5c1e5944db.zip
2 files changed, 50 insertions, 17 deletions
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 291bf066f..ff0420c56 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -325,15 +325,29 @@ u8* GetPhysicalPointer(PAddr address) {
     return target_pointer;
 }
 
-void RasterizerMarkRegionCached(VAddr start, u64 size, bool cached) {
-    if (start == 0) {
+void RasterizerMarkRegionCached(Tegra::GPUVAddr gpu_addr, u64 size, bool cached) {
+    if (gpu_addr == 0) {
         return;
     }
 
-    u64 num_pages = ((start + size - 1) >> PAGE_BITS) - (start >> PAGE_BITS) + 1;
-    VAddr vaddr = start;
+    // Iterate over a contiguous CPU address space, which corresponds to the specified GPU address
+    // space, marking the region as un/cached. The region is marked un/cached at a granularity of
+    // CPU pages, hence why we iterate on a CPU page basis (note: GPU page size is different). This
+    // assumes the specified GPU address region is contiguous as well.
+
+    u64 num_pages = ((gpu_addr + size - 1) >> PAGE_BITS) - (gpu_addr >> PAGE_BITS) + 1;
+    for (unsigned i = 0; i < num_pages; ++i, gpu_addr += PAGE_SIZE) {
+        boost::optional<VAddr> maybe_vaddr =
+            Core::System::GetInstance().GPU().memory_manager->GpuToCpuAddress(gpu_addr);
+        // The GPU <-> CPU virtual memory mapping is not 1:1
+        if (!maybe_vaddr) {
+            LOG_ERROR(HW_Memory,
+                      "Trying to flush a cached region to an invalid physical address %08X",
+                      gpu_addr);
+            continue;
+        }
+        VAddr vaddr = *maybe_vaddr;
 
-    for (unsigned i = 0; i < num_pages; ++i, vaddr += PAGE_SIZE) {
         PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS];
 
         if (cached) {
@@ -347,6 +361,10 @@ void RasterizerMarkRegionCached(VAddr start, u64 size, bool cached) {
                 page_type = PageType::RasterizerCachedMemory;
                 current_page_table->pointers[vaddr >> PAGE_BITS] = nullptr;
                 break;
+            case PageType::RasterizerCachedMemory:
+                // There can be more than one GPU region mapped per CPU region, so it's common that
+                // this area is already marked as cached.
+                break;
             default:
                 UNREACHABLE();
             }
@@ -357,6 +375,10 @@ void RasterizerMarkRegionCached(VAddr start, u64 size, bool cached) {
                 // It is not necessary for a process to have this region mapped into its address
                 // space, for example, a system module need not have a VRAM mapping.
                 break;
+            case PageType::Memory:
+                // There can be more than one GPU region mapped per CPU region, so it's common that
+                // this area is already unmarked as cached.
+                break;
             case PageType::RasterizerCachedMemory: {
                 u8* pointer = GetPointerFromVMA(vaddr & ~PAGE_MASK);
                 if (pointer == nullptr) {
@@ -394,19 +416,29 @@ void RasterizerFlushVirtualRegion(VAddr start, u64 size, FlushMode mode) {
 
         VAddr overlap_start = std::max(start, region_start);
         VAddr overlap_end = std::min(end, region_end);
+
+        std::vector<Tegra::GPUVAddr> gpu_addresses =
+            Core::System::GetInstance().GPU().memory_manager->CpuToGpuAddress(overlap_start);
+
+        if (gpu_addresses.empty()) {
+            return;
+        }
+
         u64 overlap_size = overlap_end - overlap_start;
 
-        auto* rasterizer = VideoCore::g_renderer->Rasterizer();
-        switch (mode) {
-        case FlushMode::Flush:
-            rasterizer->FlushRegion(overlap_start, overlap_size);
-            break;
-        case FlushMode::Invalidate:
-            rasterizer->InvalidateRegion(overlap_start, overlap_size);
-            break;
-        case FlushMode::FlushAndInvalidate:
-            rasterizer->FlushAndInvalidateRegion(overlap_start, overlap_size);
-            break;
+        for (const auto& gpu_address : gpu_addresses) {
+            auto* rasterizer = VideoCore::g_renderer->Rasterizer();
+            switch (mode) {
+            case FlushMode::Flush:
+                rasterizer->FlushRegion(gpu_address, overlap_size);
+                break;
+            case FlushMode::Invalidate:
+                rasterizer->InvalidateRegion(gpu_address, overlap_size);
+                break;
+            case FlushMode::FlushAndInvalidate:
+                rasterizer->FlushAndInvalidateRegion(gpu_address, overlap_size);
+                break;
+            }
         }
     };
 
diff --git a/src/core/memory.h b/src/core/memory.h
index e9b8ca873..3f56a2c6a 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -14,6 +14,7 @@
 #include <boost/optional.hpp>
 #include "common/common_types.h"
 #include "core/memory_hook.h"
+#include "video_core/memory_manager.h"
 
 namespace Kernel {
 class Process;
@@ -258,7 +259,7 @@ enum class FlushMode {
 /**
  * Mark each page touching the region as cached.
  */
-void RasterizerMarkRegionCached(VAddr start, u64 size, bool cached);
+void RasterizerMarkRegionCached(Tegra::GPUVAddr start, u64 size, bool cached);
 
 /**
  * Flushes and invalidates any externally cached rasterizer resources touching the given virtual
author	bunnei <bunneidev@gmail.com>	2018-04-25 05:22:24 +0200
committer	GitHub <noreply@github.com>	2018-04-25 05:22:24 +0200
commit	ea3151f475e170eaaec3ded306a0fe5c1e5944db (patch)
tree	6f7e127c4f58de6071d9a7dbd2af464dbbd14b9b /src/core
parent	Merge pull request #393 from lioncash/loader (diff)
parent	renderer_opengl: Use correct byte order for framebuffer pixel format ABGR8. (diff)
download	yuzu-ea3151f475e170eaaec3ded306a0fe5c1e5944db.tar yuzu-ea3151f475e170eaaec3ded306a0fe5c1e5944db.tar.gz yuzu-ea3151f475e170eaaec3ded306a0fe5c1e5944db.tar.bz2 yuzu-ea3151f475e170eaaec3ded306a0fe5c1e5944db.tar.lz yuzu-ea3151f475e170eaaec3ded306a0fe5c1e5944db.tar.xz yuzu-ea3151f475e170eaaec3ded306a0fe5c1e5944db.tar.zst yuzu-ea3151f475e170eaaec3ded306a0fe5c1e5944db.zip