summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/CMakeLists.txt2
-rw-r--r--src/video_core/dma_pusher.cpp7
-rw-r--r--src/video_core/dma_pusher.h1
-rw-r--r--src/video_core/engines/kepler_memory.cpp15
-rw-r--r--src/video_core/engines/maxwell_3d.cpp42
-rw-r--r--src/video_core/engines/maxwell_dma.cpp37
-rw-r--r--src/video_core/gpu.cpp14
-rw-r--r--src/video_core/gpu.h17
-rw-r--r--src/video_core/gpu_asynch.cpp6
-rw-r--r--src/video_core/gpu_asynch.h6
-rw-r--r--src/video_core/gpu_synch.cpp6
-rw-r--r--src/video_core/gpu_synch.h6
-rw-r--r--src/video_core/gpu_thread.cpp136
-rw-r--r--src/video_core/gpu_thread.h132
-rw-r--r--src/video_core/memory_manager.cpp477
-rw-r--r--src/video_core/memory_manager.h163
-rw-r--r--src/video_core/morton.cpp324
-rw-r--r--src/video_core/morton.h6
-rw-r--r--src/video_core/rasterizer_cache.h74
-rw-r--r--src/video_core/rasterizer_interface.h9
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp30
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h33
-rw-r--r--src/video_core/renderer_opengl/gl_global_cache.cpp43
-rw-r--r--src/video_core/renderer_opengl/gl_global_cache.h21
-rw-r--r--src/video_core/renderer_opengl/gl_primitive_assembler.cpp10
-rw-r--r--src/video_core/renderer_opengl/gl_primitive_assembler.h2
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp83
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h9
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp96
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h38
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp62
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h19
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp11
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp30
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h34
-rw-r--r--src/video_core/renderer_vulkan/vk_sampler_cache.cpp81
-rw-r--r--src/video_core/renderer_vulkan/vk_sampler_cache.h56
-rw-r--r--src/video_core/textures/decoders.cpp32
-rw-r--r--src/video_core/textures/decoders.h13
-rw-r--r--src/video_core/textures/texture.h55
40 files changed, 1376 insertions, 862 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 0c3038c52..14b76680f 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -123,6 +123,8 @@ if (ENABLE_VULKAN)
renderer_vulkan/vk_memory_manager.h
renderer_vulkan/vk_resource_manager.cpp
renderer_vulkan/vk_resource_manager.h
+ renderer_vulkan/vk_sampler_cache.cpp
+ renderer_vulkan/vk_sampler_cache.h
renderer_vulkan/vk_scheduler.cpp
renderer_vulkan/vk_scheduler.h
renderer_vulkan/vk_stream_buffer.cpp
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index bff1a37ff..8b1bea1ae 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -55,12 +55,9 @@ bool DmaPusher::Step() {
}
// Push buffer non-empty, read a word
- const auto address = gpu.MemoryManager().GpuToCpuAddress(dma_get);
- ASSERT_MSG(address, "Invalid GPU address");
-
command_headers.resize(command_list_header.size);
-
- Memory::ReadBlock(*address, command_headers.data(), command_list_header.size * sizeof(u32));
+ gpu.MemoryManager().ReadBlock(dma_get, command_headers.data(),
+ command_list_header.size * sizeof(u32));
for (const CommandHeader& command_header : command_headers) {
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h
index 27a36348c..6ab06518f 100644
--- a/src/video_core/dma_pusher.h
+++ b/src/video_core/dma_pusher.h
@@ -9,7 +9,6 @@
#include "common/bit_field.h"
#include "common/common_types.h"
-#include "video_core/memory_manager.h"
namespace Tegra {
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index aae2a4019..e259bf46b 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -9,6 +9,7 @@
#include "video_core/engines/kepler_memory.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/rasterizer_interface.h"
+#include "video_core/renderer_base.h"
namespace Tegra::Engines {
@@ -40,17 +41,13 @@ void KeplerMemory::ProcessData(u32 data) {
ASSERT_MSG(regs.exec.linear, "Non-linear uploads are not supported");
ASSERT(regs.dest.x == 0 && regs.dest.y == 0 && regs.dest.z == 0);
- const GPUVAddr address = regs.dest.Address();
- const auto dest_address =
- memory_manager.GpuToCpuAddress(address + state.write_offset * sizeof(u32));
- ASSERT_MSG(dest_address, "Invalid GPU address");
-
// We have to invalidate the destination region to evict any outdated surfaces from the cache.
- // We do this before actually writing the new data because the destination address might contain
- // a dirty surface that will have to be written back to memory.
- Core::System::GetInstance().GPU().InvalidateRegion(*dest_address, sizeof(u32));
+ // We do this before actually writing the new data because the destination address might
+ // contain a dirty surface that will have to be written back to memory.
+ const GPUVAddr address{regs.dest.Address() + state.write_offset * sizeof(u32)};
+ rasterizer.InvalidateRegion(ToCacheAddr(memory_manager.GetPointer(address)), sizeof(u32));
+ memory_manager.Write<u32>(address, data);
- Memory::Write32(*dest_address, data);
system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
state.write_offset++;
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 144e7fa82..defcfbd3f 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -270,11 +270,9 @@ void Maxwell3D::ProcessMacroBind(u32 data) {
}
void Maxwell3D::ProcessQueryGet() {
- GPUVAddr sequence_address = regs.query.QueryAddress();
+ const GPUVAddr sequence_address{regs.query.QueryAddress()};
// Since the sequence address is given as a GPU VAddr, we have to convert it to an application
// VAddr before writing.
- const auto address = memory_manager.GpuToCpuAddress(sequence_address);
- ASSERT_MSG(address, "Invalid GPU address");
// TODO(Subv): Support the other query units.
ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop,
@@ -309,7 +307,7 @@ void Maxwell3D::ProcessQueryGet() {
// Write the current query sequence to the sequence address.
// TODO(Subv): Find out what happens if you use a long query type but mark it as a short
// query.
- Memory::Write32(*address, sequence);
+ memory_manager.Write<u32>(sequence_address, sequence);
} else {
// Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast
// GPU, this command may actually take a while to complete in real hardware due to GPU
@@ -318,7 +316,7 @@ void Maxwell3D::ProcessQueryGet() {
query_result.value = result;
// TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming
query_result.timestamp = system.CoreTiming().GetTicks();
- Memory::WriteBlock(*address, &query_result, sizeof(query_result));
+ memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result));
}
dirty_flags.OnMemoryWrite();
break;
@@ -393,10 +391,12 @@ void Maxwell3D::ProcessCBData(u32 value) {
// Don't allow writing past the end of the buffer.
ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size);
- const auto address = memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos);
- ASSERT_MSG(address, "Invalid GPU address");
+ const GPUVAddr address{buffer_address + regs.const_buffer.cb_pos};
+
+ u8* ptr{memory_manager.GetPointer(address)};
+ rasterizer.InvalidateRegion(ToCacheAddr(ptr), sizeof(u32));
+ memory_manager.Write<u32>(address, value);
- Memory::Write32(*address, value);
dirty_flags.OnMemoryWrite();
// Increment the current buffer position.
@@ -404,14 +404,10 @@ void Maxwell3D::ProcessCBData(u32 value) {
}
Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
- const GPUVAddr tic_base_address = regs.tic.TICAddress();
-
- const GPUVAddr tic_address_gpu = tic_base_address + tic_index * sizeof(Texture::TICEntry);
- const auto tic_address_cpu = memory_manager.GpuToCpuAddress(tic_address_gpu);
- ASSERT_MSG(tic_address_cpu, "Invalid GPU address");
+ const GPUVAddr tic_address_gpu{regs.tic.TICAddress() + tic_index * sizeof(Texture::TICEntry)};
Texture::TICEntry tic_entry;
- Memory::ReadBlock(*tic_address_cpu, &tic_entry, sizeof(Texture::TICEntry));
+ memory_manager.ReadBlock(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));
ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear ||
tic_entry.header_version == Texture::TICHeaderVersion::Pitch,
@@ -429,14 +425,10 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
}
Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const {
- const GPUVAddr tsc_base_address = regs.tsc.TSCAddress();
-
- const GPUVAddr tsc_address_gpu = tsc_base_address + tsc_index * sizeof(Texture::TSCEntry);
- const auto tsc_address_cpu = memory_manager.GpuToCpuAddress(tsc_address_gpu);
- ASSERT_MSG(tsc_address_cpu, "Invalid GPU address");
+ const GPUVAddr tsc_address_gpu{regs.tsc.TSCAddress() + tsc_index * sizeof(Texture::TSCEntry)};
Texture::TSCEntry tsc_entry;
- Memory::ReadBlock(*tsc_address_cpu, &tsc_entry, sizeof(Texture::TSCEntry));
+ memory_manager.ReadBlock(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry));
return tsc_entry;
}
@@ -455,10 +447,7 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt
for (GPUVAddr current_texture = tex_info_buffer.address + TextureInfoOffset;
current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) {
- const auto address = memory_manager.GpuToCpuAddress(current_texture);
- ASSERT_MSG(address, "Invalid GPU address");
-
- const Texture::TextureHandle tex_handle{Memory::Read32(*address)};
+ const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(current_texture)};
Texture::FullTextureInfo tex_info{};
// TODO(Subv): Use the shader to determine which textures are actually accessed.
@@ -493,10 +482,7 @@ Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage,
ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size);
- const auto tex_address_cpu = memory_manager.GpuToCpuAddress(tex_info_address);
- ASSERT_MSG(tex_address_cpu, "Invalid GPU address");
-
- const Texture::TextureHandle tex_handle{Memory::Read32(*tex_address_cpu)};
+ const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
Texture::FullTextureInfo tex_info{};
tex_info.index = static_cast<u32>(offset);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 9dfea5999..5cca5c29a 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -9,6 +9,7 @@
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/maxwell_dma.h"
#include "video_core/rasterizer_interface.h"
+#include "video_core/renderer_base.h"
#include "video_core/textures/decoders.h"
namespace Tegra::Engines {
@@ -42,11 +43,6 @@ void MaxwellDMA::HandleCopy() {
const GPUVAddr source = regs.src_address.Address();
const GPUVAddr dest = regs.dst_address.Address();
- const auto source_cpu = memory_manager.GpuToCpuAddress(source);
- const auto dest_cpu = memory_manager.GpuToCpuAddress(dest);
- ASSERT_MSG(source_cpu, "Invalid source GPU address");
- ASSERT_MSG(dest_cpu, "Invalid destination GPU address");
-
// TODO(Subv): Perform more research and implement all features of this engine.
ASSERT(regs.exec.enable_swizzle == 0);
ASSERT(regs.exec.query_mode == Regs::QueryMode::None);
@@ -69,7 +65,7 @@ void MaxwellDMA::HandleCopy() {
// buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count,
// y_count).
if (!regs.exec.enable_2d) {
- Memory::CopyBlock(*dest_cpu, *source_cpu, regs.x_count);
+ memory_manager.CopyBlock(dest, source, regs.x_count);
return;
}
@@ -78,9 +74,9 @@ void MaxwellDMA::HandleCopy() {
// rectangle. There is no need to manually flush/invalidate the regions because
// CopyBlock does that for us.
for (u32 line = 0; line < regs.y_count; ++line) {
- const VAddr source_line = *source_cpu + line * regs.src_pitch;
- const VAddr dest_line = *dest_cpu + line * regs.dst_pitch;
- Memory::CopyBlock(dest_line, source_line, regs.x_count);
+ const GPUVAddr source_line = source + line * regs.src_pitch;
+ const GPUVAddr dest_line = dest + line * regs.dst_pitch;
+ memory_manager.CopyBlock(dest_line, source_line, regs.x_count);
}
return;
}
@@ -89,15 +85,28 @@ void MaxwellDMA::HandleCopy() {
const std::size_t copy_size = regs.x_count * regs.y_count;
+ auto source_ptr{memory_manager.GetPointer(source)};
+ auto dst_ptr{memory_manager.GetPointer(dest)};
+
+ if (!source_ptr) {
+ LOG_ERROR(HW_GPU, "source_ptr is invalid");
+ return;
+ }
+
+ if (!dst_ptr) {
+ LOG_ERROR(HW_GPU, "dst_ptr is invalid");
+ return;
+ }
+
const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) {
// TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated
// copying.
- Core::System::GetInstance().GPU().FlushRegion(*source_cpu, src_size);
+ rasterizer.FlushRegion(ToCacheAddr(source_ptr), src_size);
// We have to invalidate the destination region to evict any outdated surfaces from the
// cache. We do this before actually writing the new data because the destination address
// might contain a dirty surface that will have to be written back to memory.
- Core::System::GetInstance().GPU().InvalidateRegion(*dest_cpu, dst_size);
+ rasterizer.InvalidateRegion(ToCacheAddr(dst_ptr), dst_size);
};
if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
@@ -110,8 +119,8 @@ void MaxwellDMA::HandleCopy() {
copy_size * src_bytes_per_pixel);
Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch,
- regs.src_params.size_x, src_bytes_per_pixel, *source_cpu,
- *dest_cpu, regs.src_params.BlockHeight(), regs.src_params.pos_x,
+ regs.src_params.size_x, src_bytes_per_pixel, source_ptr, dst_ptr,
+ regs.src_params.BlockHeight(), regs.src_params.pos_x,
regs.src_params.pos_y);
} else {
ASSERT(regs.dst_params.size_z == 1);
@@ -124,7 +133,7 @@ void MaxwellDMA::HandleCopy() {
// If the input is linear and the output is tiled, swizzle the input and copy it over.
Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x,
- src_bpp, *dest_cpu, *source_cpu, regs.dst_params.BlockHeight());
+ src_bpp, dst_ptr, source_ptr, regs.dst_params.BlockHeight());
}
}
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 08abf8ac9..267a03f2d 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -12,6 +12,7 @@
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/maxwell_dma.h"
#include "video_core/gpu.h"
+#include "video_core/memory_manager.h"
#include "video_core/renderer_base.h"
namespace Tegra {
@@ -274,7 +275,6 @@ void GPU::ProcessSemaphoreTriggerMethod() {
const auto op =
static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask);
if (op == GpuSemaphoreOperation::WriteLong) {
- auto address = memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress());
struct Block {
u32 sequence;
u32 zeros = 0;
@@ -286,11 +286,9 @@ void GPU::ProcessSemaphoreTriggerMethod() {
// TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
// CoreTiming
block.timestamp = Core::System::GetInstance().CoreTiming().GetTicks();
- Memory::WriteBlock(*address, &block, sizeof(block));
+ memory_manager->WriteBlock(regs.smaphore_address.SmaphoreAddress(), &block, sizeof(block));
} else {
- const auto address =
- memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress());
- const u32 word = Memory::Read32(*address);
+ const u32 word{memory_manager->Read<u32>(regs.smaphore_address.SmaphoreAddress())};
if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) ||
(op == GpuSemaphoreOperation::AcquireGequal &&
static_cast<s32>(word - regs.semaphore_sequence) > 0) ||
@@ -317,13 +315,11 @@ void GPU::ProcessSemaphoreTriggerMethod() {
}
void GPU::ProcessSemaphoreRelease() {
- const auto address = memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress());
- Memory::Write32(*address, regs.semaphore_release);
+ memory_manager->Write<u32>(regs.smaphore_address.SmaphoreAddress(), regs.semaphore_release);
}
void GPU::ProcessSemaphoreAcquire() {
- const auto address = memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress());
- const u32 word = Memory::Read32(*address);
+ const u32 word = memory_manager->Read<u32>(regs.smaphore_address.SmaphoreAddress());
const auto value = regs.semaphore_acquire;
if (word != value) {
regs.acquire_active = true;
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 56a203275..c1830ac8d 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -9,7 +9,11 @@
#include "common/common_types.h"
#include "core/hle/service/nvflinger/buffer_queue.h"
#include "video_core/dma_pusher.h"
-#include "video_core/memory_manager.h"
+
+using CacheAddr = std::uintptr_t;
+inline CacheAddr ToCacheAddr(const void* host_ptr) {
+ return reinterpret_cast<CacheAddr>(host_ptr);
+}
namespace Core {
class System;
@@ -119,6 +123,8 @@ enum class EngineID {
MAXWELL_DMA_COPY_A = 0xB0B5,
};
+class MemoryManager;
+
class GPU {
public:
explicit GPU(Core::System& system, VideoCore::RendererBase& renderer);
@@ -209,13 +215,13 @@ public:
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) = 0;
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
- virtual void FlushRegion(VAddr addr, u64 size) = 0;
+ virtual void FlushRegion(CacheAddr addr, u64 size) = 0;
/// Notify rasterizer that any caches of the specified region should be invalidated
- virtual void InvalidateRegion(VAddr addr, u64 size) = 0;
+ virtual void InvalidateRegion(CacheAddr addr, u64 size) = 0;
/// Notify rasterizer that any caches of the specified region should be flushed and invalidated
- virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
+ virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0;
private:
void ProcessBindMethod(const MethodCall& method_call);
@@ -239,9 +245,8 @@ protected:
private:
std::unique_ptr<Tegra::MemoryManager> memory_manager;
- /// Mapping of command subchannels to their bound engine ids.
+ /// Mapping of command subchannels to their bound engine ids
std::array<EngineID, 8> bound_engines = {};
-
/// 3D engine
std::unique_ptr<Engines::Maxwell3D> maxwell_3d;
/// 2D engine
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp
index ad0a747e3..8b355cf7b 100644
--- a/src/video_core/gpu_asynch.cpp
+++ b/src/video_core/gpu_asynch.cpp
@@ -22,15 +22,15 @@ void GPUAsynch::SwapBuffers(
gpu_thread.SwapBuffers(std::move(framebuffer));
}
-void GPUAsynch::FlushRegion(VAddr addr, u64 size) {
+void GPUAsynch::FlushRegion(CacheAddr addr, u64 size) {
gpu_thread.FlushRegion(addr, size);
}
-void GPUAsynch::InvalidateRegion(VAddr addr, u64 size) {
+void GPUAsynch::InvalidateRegion(CacheAddr addr, u64 size) {
gpu_thread.InvalidateRegion(addr, size);
}
-void GPUAsynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
+void GPUAsynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
gpu_thread.FlushAndInvalidateRegion(addr, size);
}
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h
index e6a807aba..1dcc61a6c 100644
--- a/src/video_core/gpu_asynch.h
+++ b/src/video_core/gpu_asynch.h
@@ -26,9 +26,9 @@ public:
void PushGPUEntries(Tegra::CommandList&& entries) override;
void SwapBuffers(
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
- void FlushRegion(VAddr addr, u64 size) override;
- void InvalidateRegion(VAddr addr, u64 size) override;
- void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
+ void FlushRegion(CacheAddr addr, u64 size) override;
+ void InvalidateRegion(CacheAddr addr, u64 size) override;
+ void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
private:
GPUThread::ThreadManager gpu_thread;
diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp
index 4c00b96c7..2cfc900ed 100644
--- a/src/video_core/gpu_synch.cpp
+++ b/src/video_core/gpu_synch.cpp
@@ -22,15 +22,15 @@ void GPUSynch::SwapBuffers(
renderer.SwapBuffers(std::move(framebuffer));
}
-void GPUSynch::FlushRegion(VAddr addr, u64 size) {
+void GPUSynch::FlushRegion(CacheAddr addr, u64 size) {
renderer.Rasterizer().FlushRegion(addr, size);
}
-void GPUSynch::InvalidateRegion(VAddr addr, u64 size) {
+void GPUSynch::InvalidateRegion(CacheAddr addr, u64 size) {
renderer.Rasterizer().InvalidateRegion(addr, size);
}
-void GPUSynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
+void GPUSynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
renderer.Rasterizer().FlushAndInvalidateRegion(addr, size);
}
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h
index 7d5a241ff..766b5631c 100644
--- a/src/video_core/gpu_synch.h
+++ b/src/video_core/gpu_synch.h
@@ -21,9 +21,9 @@ public:
void PushGPUEntries(Tegra::CommandList&& entries) override;
void SwapBuffers(
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
- void FlushRegion(VAddr addr, u64 size) override;
- void InvalidateRegion(VAddr addr, u64 size) override;
- void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
+ void FlushRegion(CacheAddr addr, u64 size) override;
+ void InvalidateRegion(CacheAddr addr, u64 size) override;
+ void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
};
} // namespace VideoCommon
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index c5bdd2a17..086b2f625 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -5,7 +5,6 @@
#include "common/assert.h"
#include "common/microprofile.h"
#include "core/frontend/scope_acquire_window_context.h"
-#include "core/settings.h"
#include "video_core/dma_pusher.h"
#include "video_core/gpu.h"
#include "video_core/gpu_thread.h"
@@ -13,38 +12,13 @@
namespace VideoCommon::GPUThread {
-/// Executes a single GPU thread command
-static void ExecuteCommand(CommandData* command, VideoCore::RendererBase& renderer,
- Tegra::DmaPusher& dma_pusher) {
- if (const auto submit_list = std::get_if<SubmitListCommand>(command)) {
- dma_pusher.Push(std::move(submit_list->entries));
- dma_pusher.DispatchCalls();
- } else if (const auto data = std::get_if<SwapBuffersCommand>(command)) {
- renderer.SwapBuffers(data->framebuffer);
- } else if (const auto data = std::get_if<FlushRegionCommand>(command)) {
- renderer.Rasterizer().FlushRegion(data->addr, data->size);
- } else if (const auto data = std::get_if<InvalidateRegionCommand>(command)) {
- renderer.Rasterizer().InvalidateRegion(data->addr, data->size);
- } else if (const auto data = std::get_if<FlushAndInvalidateRegionCommand>(command)) {
- renderer.Rasterizer().FlushAndInvalidateRegion(data->addr, data->size);
- } else {
- UNREACHABLE();
- }
-}
-
/// Runs the GPU thread
static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher,
SynchState& state) {
-
MicroProfileOnThreadCreate("GpuThread");
- auto WaitForWakeup = [&]() {
- std::unique_lock<std::mutex> lock{state.signal_mutex};
- state.signal_condition.wait(lock, [&] { return !state.is_idle || !state.is_running; });
- };
-
// Wait for first GPU command before acquiring the window context
- WaitForWakeup();
+ state.WaitForCommands();
// If emulation was stopped during disk shader loading, abort before trying to acquire context
if (!state.is_running) {
@@ -53,100 +27,72 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p
Core::Frontend::ScopeAcquireWindowContext acquire_context{renderer.GetRenderWindow()};
+ CommandDataContainer next;
while (state.is_running) {
- if (!state.is_running) {
- return;
- }
-
- {
- // Thread has been woken up, so make the previous write queue the next read queue
- std::lock_guard<std::mutex> lock{state.signal_mutex};
- std::swap(state.push_queue, state.pop_queue);
- }
-
- // Execute all of the GPU commands
- while (!state.pop_queue->empty()) {
- ExecuteCommand(&state.pop_queue->front(), renderer, dma_pusher);
- state.pop_queue->pop();
+ state.WaitForCommands();
+ while (!state.queue.Empty()) {
+ state.queue.Pop(next);
+ if (const auto submit_list = std::get_if<SubmitListCommand>(&next.data)) {
+ dma_pusher.Push(std::move(submit_list->entries));
+ dma_pusher.DispatchCalls();
+ } else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) {
+ state.DecrementFramesCounter();
+ renderer.SwapBuffers(std::move(data->framebuffer));
+ } else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) {
+ renderer.Rasterizer().FlushRegion(data->addr, data->size);
+ } else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) {
+ renderer.Rasterizer().InvalidateRegion(data->addr, data->size);
+ } else if (const auto data = std::get_if<EndProcessingCommand>(&next.data)) {
+ return;
+ } else {
+ UNREACHABLE();
+ }
}
-
- state.UpdateIdleState();
-
- // Signal that the GPU thread has finished processing commands
- if (state.is_idle) {
- state.idle_condition.notify_one();
- }
-
- // Wait for CPU thread to send more GPU commands
- WaitForWakeup();
}
}
ThreadManager::ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher)
: renderer{renderer}, dma_pusher{dma_pusher}, thread{RunThread, std::ref(renderer),
- std::ref(dma_pusher), std::ref(state)},
- thread_id{thread.get_id()} {}
+ std::ref(dma_pusher), std::ref(state)} {}
ThreadManager::~ThreadManager() {
- {
- // Notify GPU thread that a shutdown is pending
- std::lock_guard<std::mutex> lock{state.signal_mutex};
- state.is_running = false;
- }
-
- state.signal_condition.notify_one();
+ // Notify GPU thread that a shutdown is pending
+ PushCommand(EndProcessingCommand());
thread.join();
}
void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
- if (entries.empty()) {
- return;
- }
-
- PushCommand(SubmitListCommand(std::move(entries)), false, false);
+ PushCommand(SubmitListCommand(std::move(entries)));
}
void ThreadManager::SwapBuffers(
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
- PushCommand(SwapBuffersCommand(std::move(framebuffer)), true, false);
+ state.IncrementFramesCounter();
+ PushCommand(SwapBuffersCommand(std::move(framebuffer)));
+ state.WaitForFrames();
}
-void ThreadManager::FlushRegion(VAddr addr, u64 size) {
- // Block the CPU when using accurate emulation
- PushCommand(FlushRegionCommand(addr, size), Settings::values.use_accurate_gpu_emulation, false);
+void ThreadManager::FlushRegion(CacheAddr addr, u64 size) {
+ PushCommand(FlushRegionCommand(addr, size));
}
-void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
- PushCommand(InvalidateRegionCommand(addr, size), true, true);
+void ThreadManager::InvalidateRegion(CacheAddr addr, u64 size) {
+ if (state.queue.Empty()) {
+ // It's quicker to invalidate a single region on the CPU if the queue is already empty
+ renderer.Rasterizer().InvalidateRegion(addr, size);
+ } else {
+ PushCommand(InvalidateRegionCommand(addr, size));
+ }
}
-void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
+void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
+ // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important
InvalidateRegion(addr, size);
}
-void ThreadManager::PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu) {
- {
- std::lock_guard<std::mutex> lock{state.signal_mutex};
-
- if ((allow_on_cpu && state.is_idle) || IsGpuThread()) {
- // Execute the command synchronously on the current thread
- ExecuteCommand(&command_data, renderer, dma_pusher);
- return;
- }
-
- // Push the command to the GPU thread
- state.UpdateIdleState();
- state.push_queue->emplace(command_data);
- }
-
- // Signal the GPU thread that commands are pending
- state.signal_condition.notify_one();
-
- if (wait_for_idle) {
- // Wait for the GPU to be idle (all commands to be executed)
- std::unique_lock<std::mutex> lock{state.idle_mutex};
- state.idle_condition.wait(lock, [this] { return static_cast<bool>(state.is_idle); });
- }
+void ThreadManager::PushCommand(CommandData&& command_data) {
+ state.queue.Push(CommandDataContainer(std::move(command_data)));
+ state.SignalCommands();
}
} // namespace VideoCommon::GPUThread
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index edb148b14..8cd7db1c6 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -13,6 +13,9 @@
#include <thread>
#include <variant>
+#include "common/threadsafe_queue.h"
+#include "video_core/gpu.h"
+
namespace Tegra {
struct FramebufferConfig;
class DmaPusher;
@@ -24,6 +27,9 @@ class RendererBase;
namespace VideoCommon::GPUThread {
+/// Command to signal to the GPU thread that processing has ended
+struct EndProcessingCommand final {};
+
/// Command to signal to the GPU thread that a command list is ready for processing
struct SubmitListCommand final {
explicit SubmitListCommand(Tegra::CommandList&& entries) : entries{std::move(entries)} {}
@@ -36,59 +42,110 @@ struct SwapBuffersCommand final {
explicit SwapBuffersCommand(std::optional<const Tegra::FramebufferConfig> framebuffer)
: framebuffer{std::move(framebuffer)} {}
- std::optional<const Tegra::FramebufferConfig> framebuffer;
+ std::optional<Tegra::FramebufferConfig> framebuffer;
};
/// Command to signal to the GPU thread to flush a region
struct FlushRegionCommand final {
- explicit constexpr FlushRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}
+ explicit constexpr FlushRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {}
- const VAddr addr;
- const u64 size;
+ CacheAddr addr;
+ u64 size;
};
/// Command to signal to the GPU thread to invalidate a region
struct InvalidateRegionCommand final {
- explicit constexpr InvalidateRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}
+ explicit constexpr InvalidateRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {}
- const VAddr addr;
- const u64 size;
+ CacheAddr addr;
+ u64 size;
};
/// Command to signal to the GPU thread to flush and invalidate a region
struct FlushAndInvalidateRegionCommand final {
- explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr, u64 size)
+ explicit constexpr FlushAndInvalidateRegionCommand(CacheAddr addr, u64 size)
: addr{addr}, size{size} {}
- const VAddr addr;
- const u64 size;
+ CacheAddr addr;
+ u64 size;
};
-using CommandData = std::variant<SubmitListCommand, SwapBuffersCommand, FlushRegionCommand,
- InvalidateRegionCommand, FlushAndInvalidateRegionCommand>;
+using CommandData =
+ std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand,
+ InvalidateRegionCommand, FlushAndInvalidateRegionCommand>;
+
+struct CommandDataContainer {
+ CommandDataContainer() = default;
+
+ CommandDataContainer(CommandData&& data) : data{std::move(data)} {}
+
+ CommandDataContainer& operator=(const CommandDataContainer& t) {
+ data = std::move(t.data);
+ return *this;
+ }
+
+ CommandData data;
+};
/// Struct used to synchronize the GPU thread
struct SynchState final {
- std::atomic<bool> is_running{true};
- std::atomic<bool> is_idle{true};
- std::condition_variable signal_condition;
- std::mutex signal_mutex;
- std::condition_variable idle_condition;
- std::mutex idle_mutex;
-
- // We use two queues for sending commands to the GPU thread, one for writing (push_queue) to and
- // one for reading from (pop_queue). These are swapped whenever the current pop_queue becomes
- // empty. This allows for efficient thread-safe access, as it does not require any copies.
-
- using CommandQueue = std::queue<CommandData>;
- std::array<CommandQueue, 2> command_queues;
- CommandQueue* push_queue{&command_queues[0]};
- CommandQueue* pop_queue{&command_queues[1]};
-
- void UpdateIdleState() {
- std::lock_guard<std::mutex> lock{idle_mutex};
- is_idle = command_queues[0].empty() && command_queues[1].empty();
+ std::atomic_bool is_running{true};
+ std::atomic_int queued_frame_count{};
+ std::mutex frames_mutex;
+ std::mutex commands_mutex;
+ std::condition_variable commands_condition;
+ std::condition_variable frames_condition;
+
+ void IncrementFramesCounter() {
+ std::lock_guard<std::mutex> lock{frames_mutex};
+ ++queued_frame_count;
+ }
+
+ void DecrementFramesCounter() {
+ {
+ std::lock_guard<std::mutex> lock{frames_mutex};
+ --queued_frame_count;
+
+ if (queued_frame_count) {
+ return;
+ }
+ }
+ frames_condition.notify_one();
}
+
+ void WaitForFrames() {
+ {
+ std::lock_guard<std::mutex> lock{frames_mutex};
+ if (!queued_frame_count) {
+ return;
+ }
+ }
+
+ // Wait for the GPU to be idle (all commands to be executed)
+ {
+ std::unique_lock<std::mutex> lock{frames_mutex};
+ frames_condition.wait(lock, [this] { return !queued_frame_count; });
+ }
+ }
+
+ void SignalCommands() {
+ {
+ std::unique_lock<std::mutex> lock{commands_mutex};
+ if (queue.Empty()) {
+ return;
+ }
+ }
+
+ commands_condition.notify_one();
+ }
+
+ void WaitForCommands() {
+ std::unique_lock<std::mutex> lock{commands_mutex};
+ commands_condition.wait(lock, [this] { return !queue.Empty(); });
+ }
+
+ using CommandQueue = Common::SPSCQueue<CommandDataContainer>;
+ CommandQueue queue;
};
/// Class used to manage the GPU thread
@@ -105,22 +162,17 @@ public:
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer);
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
- void FlushRegion(VAddr addr, u64 size);
+ void FlushRegion(CacheAddr addr, u64 size);
/// Notify rasterizer that any caches of the specified region should be invalidated
- void InvalidateRegion(VAddr addr, u64 size);
+ void InvalidateRegion(CacheAddr addr, u64 size);
/// Notify rasterizer that any caches of the specified region should be flushed and invalidated
- void FlushAndInvalidateRegion(VAddr addr, u64 size);
+ void FlushAndInvalidateRegion(CacheAddr addr, u64 size);
private:
/// Pushes a command to be executed by the GPU thread
- void PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu);
-
- /// Returns true if this is called by the GPU thread
- bool IsGpuThread() const {
- return std::this_thread::get_id() == thread_id;
- }
+ void PushCommand(CommandData&& command_data);
private:
SynchState state;
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 54abe5298..e76b59842 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -5,181 +5,446 @@
#include "common/alignment.h"
#include "common/assert.h"
#include "common/logging/log.h"
+#include "core/core.h"
+#include "core/memory.h"
+#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
+#include "video_core/rasterizer_interface.h"
+#include "video_core/renderer_base.h"
namespace Tegra {
MemoryManager::MemoryManager() {
- // Mark the first page as reserved, so that 0 is not a valid GPUVAddr. Otherwise, games might
- // try to use 0 as a valid address, which is also used to mean nullptr. This fixes a bug with
- // Undertale using 0 for a render target.
- PageSlot(0) = static_cast<u64>(PageStatus::Reserved);
+ std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr);
+ std::fill(page_table.attributes.begin(), page_table.attributes.end(),
+ Common::PageType::Unmapped);
+ page_table.Resize(address_space_width);
+
+ // Initialize the map with a single free region covering the entire managed space.
+ VirtualMemoryArea initial_vma;
+ initial_vma.size = address_space_end;
+ vma_map.emplace(initial_vma.base, initial_vma);
+
+ UpdatePageTableForVMA(initial_vma);
}
GPUVAddr MemoryManager::AllocateSpace(u64 size, u64 align) {
- const std::optional<GPUVAddr> gpu_addr{FindFreeBlock(0, size, align, PageStatus::Unmapped)};
+ const u64 aligned_size{Common::AlignUp(size, page_size)};
+ const GPUVAddr gpu_addr{FindFreeRegion(address_space_base, aligned_size)};
- ASSERT_MSG(gpu_addr, "unable to find available GPU memory");
+ AllocateMemory(gpu_addr, 0, aligned_size);
- for (u64 offset{}; offset < size; offset += PAGE_SIZE) {
- VAddr& slot{PageSlot(*gpu_addr + offset)};
+ return gpu_addr;
+}
- ASSERT(slot == static_cast<u64>(PageStatus::Unmapped));
+GPUVAddr MemoryManager::AllocateSpace(GPUVAddr gpu_addr, u64 size, u64 align) {
+ const u64 aligned_size{Common::AlignUp(size, page_size)};
- slot = static_cast<u64>(PageStatus::Allocated);
- }
+ AllocateMemory(gpu_addr, 0, aligned_size);
- return *gpu_addr;
+ return gpu_addr;
}
-GPUVAddr MemoryManager::AllocateSpace(GPUVAddr gpu_addr, u64 size, u64 align) {
- for (u64 offset{}; offset < size; offset += PAGE_SIZE) {
- VAddr& slot{PageSlot(gpu_addr + offset)};
+GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, u64 size) {
+ const u64 aligned_size{Common::AlignUp(size, page_size)};
+ const GPUVAddr gpu_addr{FindFreeRegion(address_space_base, aligned_size)};
- ASSERT(slot == static_cast<u64>(PageStatus::Unmapped));
+ MapBackingMemory(gpu_addr, Memory::GetPointer(cpu_addr), aligned_size, cpu_addr);
- slot = static_cast<u64>(PageStatus::Allocated);
- }
+ return gpu_addr;
+}
+
+GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size) {
+ ASSERT((gpu_addr & page_mask) == 0);
+
+ const u64 aligned_size{Common::AlignUp(size, page_size)};
+
+ MapBackingMemory(gpu_addr, Memory::GetPointer(cpu_addr), aligned_size, cpu_addr);
return gpu_addr;
}
-GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, u64 size) {
- const std::optional<GPUVAddr> gpu_addr{FindFreeBlock(0, size, PAGE_SIZE, PageStatus::Unmapped)};
+GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) {
+ ASSERT((gpu_addr & page_mask) == 0);
- ASSERT_MSG(gpu_addr, "unable to find available GPU memory");
+ const u64 aligned_size{Common::AlignUp(size, page_size)};
+ const CacheAddr cache_addr{ToCacheAddr(GetPointer(gpu_addr))};
+
+ Core::System::GetInstance().Renderer().Rasterizer().FlushAndInvalidateRegion(cache_addr,
+ aligned_size);
+ UnmapRange(gpu_addr, aligned_size);
+
+ return gpu_addr;
+}
- for (u64 offset{}; offset < size; offset += PAGE_SIZE) {
- VAddr& slot{PageSlot(*gpu_addr + offset)};
+GPUVAddr MemoryManager::FindFreeRegion(GPUVAddr region_start, u64 size) {
+ // Find the first Free VMA.
+ const VMAHandle vma_handle{std::find_if(vma_map.begin(), vma_map.end(), [&](const auto& vma) {
+ if (vma.second.type != VirtualMemoryArea::Type::Unmapped) {
+ return false;
+ }
- ASSERT(slot == static_cast<u64>(PageStatus::Unmapped));
+ const VAddr vma_end{vma.second.base + vma.second.size};
+ return vma_end > region_start && vma_end >= region_start + size;
+ })};
- slot = cpu_addr + offset;
+ if (vma_handle == vma_map.end()) {
+ return {};
}
- const MappedRegion region{cpu_addr, *gpu_addr, size};
- mapped_regions.push_back(region);
+ return std::max(region_start, vma_handle->second.base);
+}
- return *gpu_addr;
+bool MemoryManager::IsAddressValid(GPUVAddr addr) const {
+ return (addr >> page_bits) < page_table.pointers.size();
}
-GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size) {
- ASSERT((gpu_addr & PAGE_MASK) == 0);
+std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr addr) {
+ if (!IsAddressValid(addr)) {
+ return {};
+ }
- if (PageSlot(gpu_addr) != static_cast<u64>(PageStatus::Allocated)) {
- // Page has been already mapped. In this case, we must find a new area of memory to use that
- // is different than the specified one. Super Mario Odyssey hits this scenario when changing
- // areas, but we do not want to overwrite the old pages.
- // TODO(bunnei): We need to write a hardware test to confirm this behavior.
+ VAddr cpu_addr{page_table.backing_addr[addr >> page_bits]};
+ if (cpu_addr) {
+ return cpu_addr + (addr & page_mask);
+ }
- LOG_ERROR(HW_GPU, "attempting to map addr 0x{:016X}, which is not available!", gpu_addr);
+ return {};
+}
- const std::optional<GPUVAddr> new_gpu_addr{
- FindFreeBlock(gpu_addr, size, PAGE_SIZE, PageStatus::Allocated)};
+template <typename T>
+T MemoryManager::Read(GPUVAddr addr) {
+ if (!IsAddressValid(addr)) {
+ return {};
+ }
- ASSERT_MSG(new_gpu_addr, "unable to find available GPU memory");
+ const u8* page_pointer{page_table.pointers[addr >> page_bits]};
+ if (page_pointer) {
+ // NOTE: Avoid adding any extra logic to this fast-path block
+ T value;
+ std::memcpy(&value, &page_pointer[addr & page_mask], sizeof(T));
+ return value;
+ }
- gpu_addr = *new_gpu_addr;
+ switch (page_table.attributes[addr >> page_bits]) {
+ case Common::PageType::Unmapped:
+ LOG_ERROR(HW_GPU, "Unmapped Read{} @ 0x{:08X}", sizeof(T) * 8, addr);
+ return 0;
+ case Common::PageType::Memory:
+ ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", addr);
+ break;
+ default:
+ UNREACHABLE();
}
+ return {};
+}
- for (u64 offset{}; offset < size; offset += PAGE_SIZE) {
- VAddr& slot{PageSlot(gpu_addr + offset)};
+template <typename T>
+void MemoryManager::Write(GPUVAddr addr, T data) {
+ if (!IsAddressValid(addr)) {
+ return;
+ }
- ASSERT(slot == static_cast<u64>(PageStatus::Allocated));
+ u8* page_pointer{page_table.pointers[addr >> page_bits]};
+ if (page_pointer) {
+ // NOTE: Avoid adding any extra logic to this fast-path block
+ std::memcpy(&page_pointer[addr & page_mask], &data, sizeof(T));
+ return;
+ }
- slot = cpu_addr + offset;
+ switch (page_table.attributes[addr >> page_bits]) {
+ case Common::PageType::Unmapped:
+ LOG_ERROR(HW_GPU, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8,
+ static_cast<u32>(data), addr);
+ return;
+ case Common::PageType::Memory:
+ ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", addr);
+ break;
+ default:
+ UNREACHABLE();
}
+}
- const MappedRegion region{cpu_addr, gpu_addr, size};
- mapped_regions.push_back(region);
+template u8 MemoryManager::Read<u8>(GPUVAddr addr);
+template u16 MemoryManager::Read<u16>(GPUVAddr addr);
+template u32 MemoryManager::Read<u32>(GPUVAddr addr);
+template u64 MemoryManager::Read<u64>(GPUVAddr addr);
+template void MemoryManager::Write<u8>(GPUVAddr addr, u8 data);
+template void MemoryManager::Write<u16>(GPUVAddr addr, u16 data);
+template void MemoryManager::Write<u32>(GPUVAddr addr, u32 data);
+template void MemoryManager::Write<u64>(GPUVAddr addr, u64 data);
+
+u8* MemoryManager::GetPointer(GPUVAddr addr) {
+ if (!IsAddressValid(addr)) {
+ return {};
+ }
- return gpu_addr;
+ u8* page_pointer{page_table.pointers[addr >> page_bits]};
+ if (page_pointer) {
+ return page_pointer + (addr & page_mask);
+ }
+
+ LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr);
+ return {};
}
-GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) {
- ASSERT((gpu_addr & PAGE_MASK) == 0);
+void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) {
+ std::memcpy(dest_buffer, GetPointer(src_addr), size);
+}
+void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size) {
+ std::memcpy(GetPointer(dest_addr), src_buffer, size);
+}
- for (u64 offset{}; offset < size; offset += PAGE_SIZE) {
- VAddr& slot{PageSlot(gpu_addr + offset)};
+void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size) {
+ std::memcpy(GetPointer(dest_addr), GetPointer(src_addr), size);
+}
- ASSERT(slot != static_cast<u64>(PageStatus::Allocated) &&
- slot != static_cast<u64>(PageStatus::Unmapped));
+void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type,
+ VAddr backing_addr) {
+ LOG_DEBUG(HW_GPU, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * page_size,
+ (base + size) * page_size);
+
+ const VAddr end{base + size};
+ ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}",
+ base + page_table.pointers.size());
+
+ std::fill(page_table.attributes.begin() + base, page_table.attributes.begin() + end, type);
+
+ if (memory == nullptr) {
+ std::fill(page_table.pointers.begin() + base, page_table.pointers.begin() + end, memory);
+ std::fill(page_table.backing_addr.begin() + base, page_table.backing_addr.begin() + end,
+ backing_addr);
+ } else {
+ while (base != end) {
+ page_table.pointers[base] = memory;
+ page_table.backing_addr[base] = backing_addr;
+
+ base += 1;
+ memory += page_size;
+ backing_addr += page_size;
+ }
+ }
+}
- slot = static_cast<u64>(PageStatus::Unmapped);
+void MemoryManager::MapMemoryRegion(GPUVAddr base, u64 size, u8* target, VAddr backing_addr) {
+ ASSERT_MSG((size & page_mask) == 0, "non-page aligned size: {:016X}", size);
+ ASSERT_MSG((base & page_mask) == 0, "non-page aligned base: {:016X}", base);
+ MapPages(base / page_size, size / page_size, target, Common::PageType::Memory, backing_addr);
+}
+
+void MemoryManager::UnmapRegion(GPUVAddr base, u64 size) {
+ ASSERT_MSG((size & page_mask) == 0, "non-page aligned size: {:016X}", size);
+ ASSERT_MSG((base & page_mask) == 0, "non-page aligned base: {:016X}", base);
+ MapPages(base / page_size, size / page_size, nullptr, Common::PageType::Unmapped);
+}
+
+bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const {
+ ASSERT(base + size == next.base);
+ if (type != next.type) {
+ return {};
+ }
+ if (type == VirtualMemoryArea::Type::Allocated && (offset + size != next.offset)) {
+ return {};
+ }
+ if (type == VirtualMemoryArea::Type::Mapped && backing_memory + size != next.backing_memory) {
+ return {};
+ }
+ return true;
+}
+
+MemoryManager::VMAHandle MemoryManager::FindVMA(GPUVAddr target) const {
+ if (target >= address_space_end) {
+ return vma_map.end();
+ } else {
+ return std::prev(vma_map.upper_bound(target));
}
+}
- // Delete the region mappings that are contained within the unmapped region
- mapped_regions.erase(std::remove_if(mapped_regions.begin(), mapped_regions.end(),
- [&](const MappedRegion& region) {
- return region.gpu_addr <= gpu_addr &&
- region.gpu_addr + region.size < gpu_addr + size;
- }),
- mapped_regions.end());
- return gpu_addr;
+MemoryManager::VMAIter MemoryManager::Allocate(VMAIter vma_handle) {
+ VirtualMemoryArea& vma{vma_handle->second};
+
+ vma.type = VirtualMemoryArea::Type::Allocated;
+ vma.backing_addr = 0;
+ vma.backing_memory = {};
+ UpdatePageTableForVMA(vma);
+
+ return MergeAdjacent(vma_handle);
}
-GPUVAddr MemoryManager::GetRegionEnd(GPUVAddr region_start) const {
- for (const auto& region : mapped_regions) {
- const GPUVAddr region_end{region.gpu_addr + region.size};
- if (region_start >= region.gpu_addr && region_start < region_end) {
- return region_end;
- }
+MemoryManager::VMAHandle MemoryManager::AllocateMemory(GPUVAddr target, std::size_t offset,
+ u64 size) {
+
+ // This is the appropriately sized VMA that will turn into our allocation.
+ VMAIter vma_handle{CarveVMA(target, size)};
+ VirtualMemoryArea& vma{vma_handle->second};
+
+ ASSERT(vma.size == size);
+
+ vma.offset = offset;
+
+ return Allocate(vma_handle);
+}
+
+MemoryManager::VMAHandle MemoryManager::MapBackingMemory(GPUVAddr target, u8* memory, u64 size,
+ VAddr backing_addr) {
+ // This is the appropriately sized VMA that will turn into our allocation.
+ VMAIter vma_handle{CarveVMA(target, size)};
+ VirtualMemoryArea& vma{vma_handle->second};
+
+ ASSERT(vma.size == size);
+
+ vma.type = VirtualMemoryArea::Type::Mapped;
+ vma.backing_memory = memory;
+ vma.backing_addr = backing_addr;
+ UpdatePageTableForVMA(vma);
+
+ return MergeAdjacent(vma_handle);
+}
+
+void MemoryManager::UnmapRange(GPUVAddr target, u64 size) {
+ VMAIter vma{CarveVMARange(target, size)};
+ const VAddr target_end{target + size};
+ const VMAIter end{vma_map.end()};
+
+ // The comparison against the end of the range must be done using addresses since VMAs can be
+ // merged during this process, causing invalidation of the iterators.
+ while (vma != end && vma->second.base < target_end) {
+ // Unmapped ranges return to allocated state and can be reused
+ // This behavior is used by Super Mario Odyssey, Sonic Forces, and likely other games
+ vma = std::next(Allocate(vma));
}
- return {};
+
+ ASSERT(FindVMA(target)->second.size >= size);
}
-std::optional<GPUVAddr> MemoryManager::FindFreeBlock(GPUVAddr region_start, u64 size, u64 align,
- PageStatus status) {
- GPUVAddr gpu_addr{region_start};
- u64 free_space{};
- align = (align + PAGE_MASK) & ~PAGE_MASK;
-
- while (gpu_addr + free_space < MAX_ADDRESS) {
- if (PageSlot(gpu_addr + free_space) == static_cast<u64>(status)) {
- free_space += PAGE_SIZE;
- if (free_space >= size) {
- return gpu_addr;
- }
- } else {
- gpu_addr += free_space + PAGE_SIZE;
- free_space = 0;
- gpu_addr = Common::AlignUp(gpu_addr, align);
- }
+MemoryManager::VMAIter MemoryManager::StripIterConstness(const VMAHandle& iter) {
+ // This uses a neat C++ trick to convert a const_iterator to a regular iterator, given
+ // non-const access to its container.
+ return vma_map.erase(iter, iter); // Erases an empty range of elements
+}
+
+MemoryManager::VMAIter MemoryManager::CarveVMA(GPUVAddr base, u64 size) {
+ ASSERT_MSG((size & page_mask) == 0, "non-page aligned size: 0x{:016X}", size);
+ ASSERT_MSG((base & page_mask) == 0, "non-page aligned base: 0x{:016X}", base);
+
+ VMAIter vma_handle{StripIterConstness(FindVMA(base))};
+ if (vma_handle == vma_map.end()) {
+ // Target address is outside the managed range
+ return {};
}
- return {};
+ const VirtualMemoryArea& vma{vma_handle->second};
+ if (vma.type == VirtualMemoryArea::Type::Mapped) {
+ // Region is already allocated
+ return {};
+ }
+
+ const VAddr start_in_vma{base - vma.base};
+ const VAddr end_in_vma{start_in_vma + size};
+
+ ASSERT_MSG(end_in_vma <= vma.size, "region size 0x{:016X} is less than required size 0x{:016X}",
+ vma.size, end_in_vma);
+
+ if (end_in_vma < vma.size) {
+ // Split VMA at the end of the allocated region
+ SplitVMA(vma_handle, end_in_vma);
+ }
+ if (start_in_vma != 0) {
+ // Split VMA at the start of the allocated region
+ vma_handle = SplitVMA(vma_handle, start_in_vma);
+ }
+
+ return vma_handle;
}
-std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) {
- const VAddr base_addr{PageSlot(gpu_addr)};
+MemoryManager::VMAIter MemoryManager::CarveVMARange(GPUVAddr target, u64 size) {
+ ASSERT_MSG((size & page_mask) == 0, "non-page aligned size: 0x{:016X}", size);
+ ASSERT_MSG((target & page_mask) == 0, "non-page aligned base: 0x{:016X}", target);
- if (base_addr == static_cast<u64>(PageStatus::Allocated) ||
- base_addr == static_cast<u64>(PageStatus::Unmapped) ||
- base_addr == static_cast<u64>(PageStatus::Reserved)) {
+ const VAddr target_end{target + size};
+ ASSERT(target_end >= target);
+ ASSERT(size > 0);
+
+ VMAIter begin_vma{StripIterConstness(FindVMA(target))};
+ const VMAIter i_end{vma_map.lower_bound(target_end)};
+ if (std::any_of(begin_vma, i_end, [](const auto& entry) {
+ return entry.second.type == VirtualMemoryArea::Type::Unmapped;
+ })) {
return {};
}
- return base_addr + (gpu_addr & PAGE_MASK);
+ if (target != begin_vma->second.base) {
+ begin_vma = SplitVMA(begin_vma, target - begin_vma->second.base);
+ }
+
+ VMAIter end_vma{StripIterConstness(FindVMA(target_end))};
+ if (end_vma != vma_map.end() && target_end != end_vma->second.base) {
+ end_vma = SplitVMA(end_vma, target_end - end_vma->second.base);
+ }
+
+ return begin_vma;
}
-std::vector<GPUVAddr> MemoryManager::CpuToGpuAddress(VAddr cpu_addr) const {
- std::vector<GPUVAddr> results;
- for (const auto& region : mapped_regions) {
- if (cpu_addr >= region.cpu_addr && cpu_addr < (region.cpu_addr + region.size)) {
- const u64 offset{cpu_addr - region.cpu_addr};
- results.push_back(region.gpu_addr + offset);
+MemoryManager::VMAIter MemoryManager::SplitVMA(VMAIter vma_handle, u64 offset_in_vma) {
+ VirtualMemoryArea& old_vma{vma_handle->second};
+ VirtualMemoryArea new_vma{old_vma}; // Make a copy of the VMA
+
+ // For now, don't allow no-op VMA splits (trying to split at a boundary) because it's probably
+ // a bug. This restriction might be removed later.
+ ASSERT(offset_in_vma < old_vma.size);
+ ASSERT(offset_in_vma > 0);
+
+ old_vma.size = offset_in_vma;
+ new_vma.base += offset_in_vma;
+ new_vma.size -= offset_in_vma;
+
+ switch (new_vma.type) {
+ case VirtualMemoryArea::Type::Unmapped:
+ break;
+ case VirtualMemoryArea::Type::Allocated:
+ new_vma.offset += offset_in_vma;
+ break;
+ case VirtualMemoryArea::Type::Mapped:
+ new_vma.backing_memory += offset_in_vma;
+ break;
+ }
+
+ ASSERT(old_vma.CanBeMergedWith(new_vma));
+
+ return vma_map.emplace_hint(std::next(vma_handle), new_vma.base, new_vma);
+}
+
+MemoryManager::VMAIter MemoryManager::MergeAdjacent(VMAIter iter) {
+ const VMAIter next_vma{std::next(iter)};
+ if (next_vma != vma_map.end() && iter->second.CanBeMergedWith(next_vma->second)) {
+ iter->second.size += next_vma->second.size;
+ vma_map.erase(next_vma);
+ }
+
+ if (iter != vma_map.begin()) {
+ VMAIter prev_vma{std::prev(iter)};
+ if (prev_vma->second.CanBeMergedWith(iter->second)) {
+ prev_vma->second.size += iter->second.size;
+ vma_map.erase(iter);
+ iter = prev_vma;
}
}
- return results;
+
+ return iter;
}
-VAddr& MemoryManager::PageSlot(GPUVAddr gpu_addr) {
- auto& block{page_table[(gpu_addr >> (PAGE_BITS + PAGE_TABLE_BITS)) & PAGE_TABLE_MASK]};
- if (!block) {
- block = std::make_unique<PageBlock>();
- block->fill(static_cast<VAddr>(PageStatus::Unmapped));
+void MemoryManager::UpdatePageTableForVMA(const VirtualMemoryArea& vma) {
+ switch (vma.type) {
+ case VirtualMemoryArea::Type::Unmapped:
+ UnmapRegion(vma.base, vma.size);
+ break;
+ case VirtualMemoryArea::Type::Allocated:
+ MapMemoryRegion(vma.base, vma.size, nullptr, vma.backing_addr);
+ break;
+ case VirtualMemoryArea::Type::Mapped:
+ MapMemoryRegion(vma.base, vma.size, vma.backing_memory, vma.backing_addr);
+ break;
}
- return (*block)[(gpu_addr >> PAGE_BITS) & PAGE_BLOCK_MASK];
}
} // namespace Tegra
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index fb03497ca..34744bb27 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -1,67 +1,148 @@
-// Copyright 2018 yuzu emulator team
+// Copyright 2018 yuzu emulator team
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
-#include <array>
-#include <memory>
+#include <map>
#include <optional>
-#include <vector>
#include "common/common_types.h"
+#include "common/page_table.h"
namespace Tegra {
-/// Virtual addresses in the GPU's memory map are 64 bit.
-using GPUVAddr = u64;
+/**
+ * Represents a VMA in an address space. A VMA is a contiguous region of virtual addressing space
+ * with homogeneous attributes across its extents. In this particular implementation each VMA is
+ * also backed by a single host memory allocation.
+ */
+struct VirtualMemoryArea {
+ enum class Type : u8 {
+ Unmapped,
+ Allocated,
+ Mapped,
+ };
+
+ /// Virtual base address of the region.
+ GPUVAddr base{};
+ /// Size of the region.
+ u64 size{};
+ /// Memory area mapping type.
+ Type type{Type::Unmapped};
+ /// CPU memory mapped address corresponding to this memory area.
+ VAddr backing_addr{};
+ /// Offset into the backing_memory the mapping starts from.
+ std::size_t offset{};
+ /// Pointer backing this VMA.
+ u8* backing_memory{};
+
+ /// Tests if this area can be merged to the right with `next`.
+ bool CanBeMergedWith(const VirtualMemoryArea& next) const;
+};
class MemoryManager final {
public:
MemoryManager();
GPUVAddr AllocateSpace(u64 size, u64 align);
- GPUVAddr AllocateSpace(GPUVAddr gpu_addr, u64 size, u64 align);
+ GPUVAddr AllocateSpace(GPUVAddr addr, u64 size, u64 align);
GPUVAddr MapBufferEx(VAddr cpu_addr, u64 size);
- GPUVAddr MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size);
- GPUVAddr UnmapBuffer(GPUVAddr gpu_addr, u64 size);
- GPUVAddr GetRegionEnd(GPUVAddr region_start) const;
- std::optional<VAddr> GpuToCpuAddress(GPUVAddr gpu_addr);
- std::vector<GPUVAddr> CpuToGpuAddress(VAddr cpu_addr) const;
+ GPUVAddr MapBufferEx(VAddr cpu_addr, GPUVAddr addr, u64 size);
+ GPUVAddr UnmapBuffer(GPUVAddr addr, u64 size);
+ std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr);
+
+ template <typename T>
+ T Read(GPUVAddr addr);
+
+ template <typename T>
+ void Write(GPUVAddr addr, T data);
- static constexpr u64 PAGE_BITS = 16;
- static constexpr u64 PAGE_SIZE = 1 << PAGE_BITS;
- static constexpr u64 PAGE_MASK = PAGE_SIZE - 1;
+ u8* GetPointer(GPUVAddr addr);
+
+ void ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size);
+ void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size);
+ void CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size);
private:
- enum class PageStatus : u64 {
- Unmapped = 0xFFFFFFFFFFFFFFFFULL,
- Allocated = 0xFFFFFFFFFFFFFFFEULL,
- Reserved = 0xFFFFFFFFFFFFFFFDULL,
- };
+ using VMAMap = std::map<GPUVAddr, VirtualMemoryArea>;
+ using VMAHandle = VMAMap::const_iterator;
+ using VMAIter = VMAMap::iterator;
- std::optional<GPUVAddr> FindFreeBlock(GPUVAddr region_start, u64 size, u64 align,
- PageStatus status);
- VAddr& PageSlot(GPUVAddr gpu_addr);
-
- static constexpr u64 MAX_ADDRESS{0x10000000000ULL};
- static constexpr u64 PAGE_TABLE_BITS{10};
- static constexpr u64 PAGE_TABLE_SIZE{1 << PAGE_TABLE_BITS};
- static constexpr u64 PAGE_TABLE_MASK{PAGE_TABLE_SIZE - 1};
- static constexpr u64 PAGE_BLOCK_BITS{14};
- static constexpr u64 PAGE_BLOCK_SIZE{1 << PAGE_BLOCK_BITS};
- static constexpr u64 PAGE_BLOCK_MASK{PAGE_BLOCK_SIZE - 1};
-
- using PageBlock = std::array<VAddr, PAGE_BLOCK_SIZE>;
- std::array<std::unique_ptr<PageBlock>, PAGE_TABLE_SIZE> page_table{};
-
- struct MappedRegion {
- VAddr cpu_addr;
- GPUVAddr gpu_addr;
- u64 size;
- };
+ bool IsAddressValid(GPUVAddr addr) const;
+ void MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type,
+ VAddr backing_addr = 0);
+ void MapMemoryRegion(GPUVAddr base, u64 size, u8* target, VAddr backing_addr);
+ void UnmapRegion(GPUVAddr base, u64 size);
+
+ /// Finds the VMA in which the given address is included in, or `vma_map.end()`.
+ VMAHandle FindVMA(GPUVAddr target) const;
+
+ VMAHandle AllocateMemory(GPUVAddr target, std::size_t offset, u64 size);
+
+ /**
+ * Maps an unmanaged host memory pointer at a given address.
+ *
+ * @param target The guest address to start the mapping at.
+ * @param memory The memory to be mapped.
+ * @param size Size of the mapping.
+ * @param state MemoryState tag to attach to the VMA.
+ */
+ VMAHandle MapBackingMemory(GPUVAddr target, u8* memory, u64 size, VAddr backing_addr);
+
+ /// Unmaps a range of addresses, splitting VMAs as necessary.
+ void UnmapRange(GPUVAddr target, u64 size);
+
+ /// Converts a VMAHandle to a mutable VMAIter.
+ VMAIter StripIterConstness(const VMAHandle& iter);
+
+ /// Marks as the specfied VMA as allocated.
+ VMAIter Allocate(VMAIter vma);
+
+ /**
+ * Carves a VMA of a specific size at the specified address by splitting Free VMAs while doing
+ * the appropriate error checking.
+ */
+ VMAIter CarveVMA(GPUVAddr base, u64 size);
+
+ /**
+ * Splits the edges of the given range of non-Free VMAs so that there is a VMA split at each
+ * end of the range.
+ */
+ VMAIter CarveVMARange(GPUVAddr base, u64 size);
+
+ /**
+ * Splits a VMA in two, at the specified offset.
+ * @returns the right side of the split, with the original iterator becoming the left side.
+ */
+ VMAIter SplitVMA(VMAIter vma, u64 offset_in_vma);
+
+ /**
+ * Checks for and merges the specified VMA with adjacent ones if possible.
+ * @returns the merged VMA or the original if no merging was possible.
+ */
+ VMAIter MergeAdjacent(VMAIter vma);
+
+ /// Updates the pages corresponding to this VMA so they match the VMA's attributes.
+ void UpdatePageTableForVMA(const VirtualMemoryArea& vma);
+
+ /// Finds a free (unmapped region) of the specified size starting at the specified address.
+ GPUVAddr FindFreeRegion(GPUVAddr region_start, u64 size);
+
+private:
+ static constexpr u64 page_bits{16};
+ static constexpr u64 page_size{1 << page_bits};
+ static constexpr u64 page_mask{page_size - 1};
+
+ /// Address space in bits, this is fairly arbitrary but sufficiently large.
+ static constexpr u32 address_space_width{39};
+ /// Start address for mapping, this is fairly arbitrary but must be non-zero.
+ static constexpr GPUVAddr address_space_base{0x100000};
+ /// End of address space, based on address space in bits.
+ static constexpr GPUVAddr address_space_end{1ULL << address_space_width};
- std::vector<MappedRegion> mapped_regions;
+ Common::PageTable page_table{page_bits};
+ VMAMap vma_map;
};
} // namespace Tegra
diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp
index b68f4fb13..3e91cbc83 100644
--- a/src/video_core/morton.cpp
+++ b/src/video_core/morton.cpp
@@ -6,7 +6,6 @@
#include <cstring>
#include "common/assert.h"
#include "common/common_types.h"
-#include "core/memory.h"
#include "video_core/morton.h"
#include "video_core/surface.h"
#include "video_core/textures/decoders.h"
@@ -16,12 +15,12 @@ namespace VideoCore {
using Surface::GetBytesPerPixel;
using Surface::PixelFormat;
-using MortonCopyFn = void (*)(u32, u32, u32, u32, u32, u32, u8*, std::size_t, VAddr);
+using MortonCopyFn = void (*)(u32, u32, u32, u32, u32, u32, u8*, u8*);
using ConversionArray = std::array<MortonCopyFn, Surface::MaxPixelFormat>;
template <bool morton_to_linear, PixelFormat format>
static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth,
- u32 tile_width_spacing, u8* buffer, std::size_t buffer_size, VAddr addr) {
+ u32 tile_width_spacing, u8* buffer, u8* addr) {
constexpr u32 bytes_per_pixel = GetBytesPerPixel(format);
// With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual
@@ -34,150 +33,146 @@ static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth
stride, height, depth, block_height, block_depth,
tile_width_spacing);
} else {
- Tegra::Texture::CopySwizzledData(
- (stride + tile_size_x - 1) / tile_size_x, (height + tile_size_y - 1) / tile_size_y,
- depth, bytes_per_pixel, bytes_per_pixel, Memory::GetPointer(addr), buffer, false,
- block_height, block_depth, tile_width_spacing);
+ Tegra::Texture::CopySwizzledData((stride + tile_size_x - 1) / tile_size_x,
+ (height + tile_size_y - 1) / tile_size_y, depth,
+ bytes_per_pixel, bytes_per_pixel, addr, buffer, false,
+ block_height, block_depth, tile_width_spacing);
}
}
static constexpr ConversionArray morton_to_linear_fns = {
- // clang-format off
- MortonCopy<true, PixelFormat::ABGR8U>,
- MortonCopy<true, PixelFormat::ABGR8S>,
- MortonCopy<true, PixelFormat::ABGR8UI>,
- MortonCopy<true, PixelFormat::B5G6R5U>,
- MortonCopy<true, PixelFormat::A2B10G10R10U>,
- MortonCopy<true, PixelFormat::A1B5G5R5U>,
- MortonCopy<true, PixelFormat::R8U>,
- MortonCopy<true, PixelFormat::R8UI>,
- MortonCopy<true, PixelFormat::RGBA16F>,
- MortonCopy<true, PixelFormat::RGBA16U>,
- MortonCopy<true, PixelFormat::RGBA16UI>,
- MortonCopy<true, PixelFormat::R11FG11FB10F>,
- MortonCopy<true, PixelFormat::RGBA32UI>,
- MortonCopy<true, PixelFormat::DXT1>,
- MortonCopy<true, PixelFormat::DXT23>,
- MortonCopy<true, PixelFormat::DXT45>,
- MortonCopy<true, PixelFormat::DXN1>,
- MortonCopy<true, PixelFormat::DXN2UNORM>,
- MortonCopy<true, PixelFormat::DXN2SNORM>,
- MortonCopy<true, PixelFormat::BC7U>,
- MortonCopy<true, PixelFormat::BC6H_UF16>,
- MortonCopy<true, PixelFormat::BC6H_SF16>,
- MortonCopy<true, PixelFormat::ASTC_2D_4X4>,
- MortonCopy<true, PixelFormat::BGRA8>,
- MortonCopy<true, PixelFormat::RGBA32F>,
- MortonCopy<true, PixelFormat::RG32F>,
- MortonCopy<true, PixelFormat::R32F>,
- MortonCopy<true, PixelFormat::R16F>,
- MortonCopy<true, PixelFormat::R16U>,
- MortonCopy<true, PixelFormat::R16S>,
- MortonCopy<true, PixelFormat::R16UI>,
- MortonCopy<true, PixelFormat::R16I>,
- MortonCopy<true, PixelFormat::RG16>,
- MortonCopy<true, PixelFormat::RG16F>,
- MortonCopy<true, PixelFormat::RG16UI>,
- MortonCopy<true, PixelFormat::RG16I>,
- MortonCopy<true, PixelFormat::RG16S>,
- MortonCopy<true, PixelFormat::RGB32F>,
- MortonCopy<true, PixelFormat::RGBA8_SRGB>,
- MortonCopy<true, PixelFormat::RG8U>,
- MortonCopy<true, PixelFormat::RG8S>,
- MortonCopy<true, PixelFormat::RG32UI>,
- MortonCopy<true, PixelFormat::R32UI>,
- MortonCopy<true, PixelFormat::ASTC_2D_8X8>,
- MortonCopy<true, PixelFormat::ASTC_2D_8X5>,
- MortonCopy<true, PixelFormat::ASTC_2D_5X4>,
- MortonCopy<true, PixelFormat::BGRA8_SRGB>,
- MortonCopy<true, PixelFormat::DXT1_SRGB>,
- MortonCopy<true, PixelFormat::DXT23_SRGB>,
- MortonCopy<true, PixelFormat::DXT45_SRGB>,
- MortonCopy<true, PixelFormat::BC7U_SRGB>,
- MortonCopy<true, PixelFormat::ASTC_2D_4X4_SRGB>,
- MortonCopy<true, PixelFormat::ASTC_2D_8X8_SRGB>,
- MortonCopy<true, PixelFormat::ASTC_2D_8X5_SRGB>,
- MortonCopy<true, PixelFormat::ASTC_2D_5X4_SRGB>,
- MortonCopy<true, PixelFormat::ASTC_2D_5X5>,
- MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>,
- MortonCopy<true, PixelFormat::ASTC_2D_10X8>,
- MortonCopy<true, PixelFormat::ASTC_2D_10X8_SRGB>,
- MortonCopy<true, PixelFormat::Z32F>,
- MortonCopy<true, PixelFormat::Z16>,
- MortonCopy<true, PixelFormat::Z24S8>,
- MortonCopy<true, PixelFormat::S8Z24>,
- MortonCopy<true, PixelFormat::Z32FS8>,
- // clang-format on
+ MortonCopy<true, PixelFormat::ABGR8U>,
+ MortonCopy<true, PixelFormat::ABGR8S>,
+ MortonCopy<true, PixelFormat::ABGR8UI>,
+ MortonCopy<true, PixelFormat::B5G6R5U>,
+ MortonCopy<true, PixelFormat::A2B10G10R10U>,
+ MortonCopy<true, PixelFormat::A1B5G5R5U>,
+ MortonCopy<true, PixelFormat::R8U>,
+ MortonCopy<true, PixelFormat::R8UI>,
+ MortonCopy<true, PixelFormat::RGBA16F>,
+ MortonCopy<true, PixelFormat::RGBA16U>,
+ MortonCopy<true, PixelFormat::RGBA16UI>,
+ MortonCopy<true, PixelFormat::R11FG11FB10F>,
+ MortonCopy<true, PixelFormat::RGBA32UI>,
+ MortonCopy<true, PixelFormat::DXT1>,
+ MortonCopy<true, PixelFormat::DXT23>,
+ MortonCopy<true, PixelFormat::DXT45>,
+ MortonCopy<true, PixelFormat::DXN1>,
+ MortonCopy<true, PixelFormat::DXN2UNORM>,
+ MortonCopy<true, PixelFormat::DXN2SNORM>,
+ MortonCopy<true, PixelFormat::BC7U>,
+ MortonCopy<true, PixelFormat::BC6H_UF16>,
+ MortonCopy<true, PixelFormat::BC6H_SF16>,
+ MortonCopy<true, PixelFormat::ASTC_2D_4X4>,
+ MortonCopy<true, PixelFormat::BGRA8>,
+ MortonCopy<true, PixelFormat::RGBA32F>,
+ MortonCopy<true, PixelFormat::RG32F>,
+ MortonCopy<true, PixelFormat::R32F>,
+ MortonCopy<true, PixelFormat::R16F>,
+ MortonCopy<true, PixelFormat::R16U>,
+ MortonCopy<true, PixelFormat::R16S>,
+ MortonCopy<true, PixelFormat::R16UI>,
+ MortonCopy<true, PixelFormat::R16I>,
+ MortonCopy<true, PixelFormat::RG16>,
+ MortonCopy<true, PixelFormat::RG16F>,
+ MortonCopy<true, PixelFormat::RG16UI>,
+ MortonCopy<true, PixelFormat::RG16I>,
+ MortonCopy<true, PixelFormat::RG16S>,
+ MortonCopy<true, PixelFormat::RGB32F>,
+ MortonCopy<true, PixelFormat::RGBA8_SRGB>,
+ MortonCopy<true, PixelFormat::RG8U>,
+ MortonCopy<true, PixelFormat::RG8S>,
+ MortonCopy<true, PixelFormat::RG32UI>,
+ MortonCopy<true, PixelFormat::R32UI>,
+ MortonCopy<true, PixelFormat::ASTC_2D_8X8>,
+ MortonCopy<true, PixelFormat::ASTC_2D_8X5>,
+ MortonCopy<true, PixelFormat::ASTC_2D_5X4>,
+ MortonCopy<true, PixelFormat::BGRA8_SRGB>,
+ MortonCopy<true, PixelFormat::DXT1_SRGB>,
+ MortonCopy<true, PixelFormat::DXT23_SRGB>,
+ MortonCopy<true, PixelFormat::DXT45_SRGB>,
+ MortonCopy<true, PixelFormat::BC7U_SRGB>,
+ MortonCopy<true, PixelFormat::ASTC_2D_4X4_SRGB>,
+ MortonCopy<true, PixelFormat::ASTC_2D_8X8_SRGB>,
+ MortonCopy<true, PixelFormat::ASTC_2D_8X5_SRGB>,
+ MortonCopy<true, PixelFormat::ASTC_2D_5X4_SRGB>,
+ MortonCopy<true, PixelFormat::ASTC_2D_5X5>,
+ MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>,
+ MortonCopy<true, PixelFormat::ASTC_2D_10X8>,
+ MortonCopy<true, PixelFormat::ASTC_2D_10X8_SRGB>,
+ MortonCopy<true, PixelFormat::Z32F>,
+ MortonCopy<true, PixelFormat::Z16>,
+ MortonCopy<true, PixelFormat::Z24S8>,
+ MortonCopy<true, PixelFormat::S8Z24>,
+ MortonCopy<true, PixelFormat::Z32FS8>,
};
static constexpr ConversionArray linear_to_morton_fns = {
- // clang-format off
- MortonCopy<false, PixelFormat::ABGR8U>,
- MortonCopy<false, PixelFormat::ABGR8S>,
- MortonCopy<false, PixelFormat::ABGR8UI>,
- MortonCopy<false, PixelFormat::B5G6R5U>,
- MortonCopy<false, PixelFormat::A2B10G10R10U>,
- MortonCopy<false, PixelFormat::A1B5G5R5U>,
- MortonCopy<false, PixelFormat::R8U>,
- MortonCopy<false, PixelFormat::R8UI>,
- MortonCopy<false, PixelFormat::RGBA16F>,
- MortonCopy<false, PixelFormat::RGBA16U>,
- MortonCopy<false, PixelFormat::RGBA16UI>,
- MortonCopy<false, PixelFormat::R11FG11FB10F>,
- MortonCopy<false, PixelFormat::RGBA32UI>,
- MortonCopy<false, PixelFormat::DXT1>,
- MortonCopy<false, PixelFormat::DXT23>,
- MortonCopy<false, PixelFormat::DXT45>,
- MortonCopy<false, PixelFormat::DXN1>,
- MortonCopy<false, PixelFormat::DXN2UNORM>,
- MortonCopy<false, PixelFormat::DXN2SNORM>,
- MortonCopy<false, PixelFormat::BC7U>,
- MortonCopy<false, PixelFormat::BC6H_UF16>,
- MortonCopy<false, PixelFormat::BC6H_SF16>,
- // TODO(Subv): Swizzling ASTC formats are not supported
- nullptr,
- MortonCopy<false, PixelFormat::BGRA8>,
- MortonCopy<false, PixelFormat::RGBA32F>,
- MortonCopy<false, PixelFormat::RG32F>,
- MortonCopy<false, PixelFormat::R32F>,
- MortonCopy<false, PixelFormat::R16F>,
- MortonCopy<false, PixelFormat::R16U>,
- MortonCopy<false, PixelFormat::R16S>,
- MortonCopy<false, PixelFormat::R16UI>,
- MortonCopy<false, PixelFormat::R16I>,
- MortonCopy<false, PixelFormat::RG16>,
- MortonCopy<false, PixelFormat::RG16F>,
- MortonCopy<false, PixelFormat::RG16UI>,
- MortonCopy<false, PixelFormat::RG16I>,
- MortonCopy<false, PixelFormat::RG16S>,
- MortonCopy<false, PixelFormat::RGB32F>,
- MortonCopy<false, PixelFormat::RGBA8_SRGB>,
- MortonCopy<false, PixelFormat::RG8U>,
- MortonCopy<false, PixelFormat::RG8S>,
- MortonCopy<false, PixelFormat::RG32UI>,
- MortonCopy<false, PixelFormat::R32UI>,
- nullptr,
- nullptr,
- nullptr,
- MortonCopy<false, PixelFormat::BGRA8_SRGB>,
- MortonCopy<false, PixelFormat::DXT1_SRGB>,
- MortonCopy<false, PixelFormat::DXT23_SRGB>,
- MortonCopy<false, PixelFormat::DXT45_SRGB>,
- MortonCopy<false, PixelFormat::BC7U_SRGB>,
- nullptr,
- nullptr,
- nullptr,
- nullptr,
- nullptr,
- nullptr,
- nullptr,
- nullptr,
- MortonCopy<false, PixelFormat::Z32F>,
- MortonCopy<false, PixelFormat::Z16>,
- MortonCopy<false, PixelFormat::Z24S8>,
- MortonCopy<false, PixelFormat::S8Z24>,
- MortonCopy<false, PixelFormat::Z32FS8>,
- // clang-format on
+ MortonCopy<false, PixelFormat::ABGR8U>,
+ MortonCopy<false, PixelFormat::ABGR8S>,
+ MortonCopy<false, PixelFormat::ABGR8UI>,
+ MortonCopy<false, PixelFormat::B5G6R5U>,
+ MortonCopy<false, PixelFormat::A2B10G10R10U>,
+ MortonCopy<false, PixelFormat::A1B5G5R5U>,
+ MortonCopy<false, PixelFormat::R8U>,
+ MortonCopy<false, PixelFormat::R8UI>,
+ MortonCopy<false, PixelFormat::RGBA16F>,
+ MortonCopy<false, PixelFormat::RGBA16U>,
+ MortonCopy<false, PixelFormat::RGBA16UI>,
+ MortonCopy<false, PixelFormat::R11FG11FB10F>,
+ MortonCopy<false, PixelFormat::RGBA32UI>,
+ MortonCopy<false, PixelFormat::DXT1>,
+ MortonCopy<false, PixelFormat::DXT23>,
+ MortonCopy<false, PixelFormat::DXT45>,
+ MortonCopy<false, PixelFormat::DXN1>,
+ MortonCopy<false, PixelFormat::DXN2UNORM>,
+ MortonCopy<false, PixelFormat::DXN2SNORM>,
+ MortonCopy<false, PixelFormat::BC7U>,
+ MortonCopy<false, PixelFormat::BC6H_UF16>,
+ MortonCopy<false, PixelFormat::BC6H_SF16>,
+ // TODO(Subv): Swizzling ASTC formats are not supported
+ nullptr,
+ MortonCopy<false, PixelFormat::BGRA8>,
+ MortonCopy<false, PixelFormat::RGBA32F>,
+ MortonCopy<false, PixelFormat::RG32F>,
+ MortonCopy<false, PixelFormat::R32F>,
+ MortonCopy<false, PixelFormat::R16F>,
+ MortonCopy<false, PixelFormat::R16U>,
+ MortonCopy<false, PixelFormat::R16S>,
+ MortonCopy<false, PixelFormat::R16UI>,
+ MortonCopy<false, PixelFormat::R16I>,
+ MortonCopy<false, PixelFormat::RG16>,
+ MortonCopy<false, PixelFormat::RG16F>,
+ MortonCopy<false, PixelFormat::RG16UI>,
+ MortonCopy<false, PixelFormat::RG16I>,
+ MortonCopy<false, PixelFormat::RG16S>,
+ MortonCopy<false, PixelFormat::RGB32F>,
+ MortonCopy<false, PixelFormat::RGBA8_SRGB>,
+ MortonCopy<false, PixelFormat::RG8U>,
+ MortonCopy<false, PixelFormat::RG8S>,
+ MortonCopy<false, PixelFormat::RG32UI>,
+ MortonCopy<false, PixelFormat::R32UI>,
+ nullptr,
+ nullptr,
+ nullptr,
+ MortonCopy<false, PixelFormat::BGRA8_SRGB>,
+ MortonCopy<false, PixelFormat::DXT1_SRGB>,
+ MortonCopy<false, PixelFormat::DXT23_SRGB>,
+ MortonCopy<false, PixelFormat::DXT45_SRGB>,
+ MortonCopy<false, PixelFormat::BC7U_SRGB>,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ MortonCopy<false, PixelFormat::Z32F>,
+ MortonCopy<false, PixelFormat::Z16>,
+ MortonCopy<false, PixelFormat::Z24S8>,
+ MortonCopy<false, PixelFormat::S8Z24>,
+ MortonCopy<false, PixelFormat::Z32FS8>,
};
static MortonCopyFn GetSwizzleFunction(MortonSwizzleMode mode, Surface::PixelFormat format) {
@@ -191,45 +186,6 @@ static MortonCopyFn GetSwizzleFunction(MortonSwizzleMode mode, Surface::PixelFor
return morton_to_linear_fns[static_cast<std::size_t>(format)];
}
-/// 8x8 Z-Order coordinate from 2D coordinates
-static u32 MortonInterleave(u32 x, u32 y) {
- static const u32 xlut[] = {0x00, 0x01, 0x04, 0x05, 0x10, 0x11, 0x14, 0x15};
- static const u32 ylut[] = {0x00, 0x02, 0x08, 0x0a, 0x20, 0x22, 0x28, 0x2a};
- return xlut[x % 8] + ylut[y % 8];
-}
-
-/// Calculates the offset of the position of the pixel in Morton order
-static u32 GetMortonOffset(u32 x, u32 y, u32 bytes_per_pixel) {
- // Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each
- // of which is composed of four 2x2 subtiles each of which is composed of four texels.
- // Each structure is embedded into the next-bigger one in a diagonal pattern, e.g.
- // texels are laid out in a 2x2 subtile like this:
- // 2 3
- // 0 1
- //
- // The full 8x8 tile has the texels arranged like this:
- //
- // 42 43 46 47 58 59 62 63
- // 40 41 44 45 56 57 60 61
- // 34 35 38 39 50 51 54 55
- // 32 33 36 37 48 49 52 53
- // 10 11 14 15 26 27 30 31
- // 08 09 12 13 24 25 28 29
- // 02 03 06 07 18 19 22 23
- // 00 01 04 05 16 17 20 21
- //
- // This pattern is what's called Z-order curve, or Morton order.
-
- const unsigned int block_height = 8;
- const unsigned int coarse_x = x & ~7;
-
- u32 i = MortonInterleave(x, y);
-
- const unsigned int offset = coarse_x * block_height;
-
- return (i + offset) * bytes_per_pixel;
-}
-
static u32 MortonInterleave128(u32 x, u32 y) {
// 128x128 Z-Order coordinate from 2D coordinates
static constexpr u32 xlut[] = {
@@ -325,14 +281,14 @@ static u32 GetMortonOffset128(u32 x, u32 y, u32 bytes_per_pixel) {
void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stride,
u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing,
- u8* buffer, std::size_t buffer_size, VAddr addr) {
-
+ u8* buffer, u8* addr) {
GetSwizzleFunction(mode, format)(stride, block_height, height, block_depth, depth,
- tile_width_spacing, buffer, buffer_size, addr);
+ tile_width_spacing, buffer, addr);
}
-void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixel, u32 linear_bytes_per_pixel,
- u8* morton_data, u8* linear_data, bool morton_to_linear) {
+void MortonCopyPixels128(MortonSwizzleMode mode, u32 width, u32 height, u32 bytes_per_pixel,
+ u32 linear_bytes_per_pixel, u8* morton_data, u8* linear_data) {
+ const bool morton_to_linear = mode == MortonSwizzleMode::MortonToLinear;
u8* data_ptrs[2];
for (u32 y = 0; y < height; ++y) {
for (u32 x = 0; x < width; ++x) {
diff --git a/src/video_core/morton.h b/src/video_core/morton.h
index 065f59ce3..ee5b45555 100644
--- a/src/video_core/morton.h
+++ b/src/video_core/morton.h
@@ -13,9 +13,9 @@ enum class MortonSwizzleMode { MortonToLinear, LinearToMorton };
void MortonSwizzle(MortonSwizzleMode mode, VideoCore::Surface::PixelFormat format, u32 stride,
u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing,
- u8* buffer, std::size_t buffer_size, VAddr addr);
+ u8* buffer, u8* addr);
-void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixel, u32 linear_bytes_per_pixel,
- u8* morton_data, u8* linear_data, bool morton_to_linear);
+void MortonCopyPixels128(MortonSwizzleMode mode, u32 width, u32 height, u32 bytes_per_pixel,
+ u32 linear_bytes_per_pixel, u8* morton_data, u8* linear_data);
} // namespace VideoCore
diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h
index a7bcf26fb..9fc9f3056 100644
--- a/src/video_core/rasterizer_cache.h
+++ b/src/video_core/rasterizer_cache.h
@@ -4,6 +4,7 @@
#pragma once
+#include <mutex>
#include <set>
#include <unordered_map>
@@ -12,14 +13,26 @@
#include "common/common_types.h"
#include "core/settings.h"
+#include "video_core/gpu.h"
#include "video_core/rasterizer_interface.h"
class RasterizerCacheObject {
public:
+ explicit RasterizerCacheObject(const u8* host_ptr)
+ : host_ptr{host_ptr}, cache_addr{ToCacheAddr(host_ptr)} {}
+
virtual ~RasterizerCacheObject();
+ CacheAddr GetCacheAddr() const {
+ return cache_addr;
+ }
+
+ const u8* GetHostPtr() const {
+ return host_ptr;
+ }
+
/// Gets the address of the shader in guest memory, required for cache management
- virtual VAddr GetAddr() const = 0;
+ virtual VAddr GetCpuAddr() const = 0;
/// Gets the size of the shader in guest memory, required for cache management
virtual std::size_t GetSizeInBytes() const = 0;
@@ -58,6 +71,8 @@ private:
bool is_registered{}; ///< Whether the object is currently registered with the cache
bool is_dirty{}; ///< Whether the object is dirty (out of sync with guest memory)
u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing
+ CacheAddr cache_addr{}; ///< Cache address memory, unique from emulated virtual address space
+ const u8* host_ptr{}; ///< Pointer to the memory backing this cached region
};
template <class T>
@@ -68,7 +83,9 @@ public:
explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {}
/// Write any cached resources overlapping the specified region back to memory
- void FlushRegion(Tegra::GPUVAddr addr, size_t size) {
+ void FlushRegion(CacheAddr addr, std::size_t size) {
+ std::lock_guard<std::recursive_mutex> lock{mutex};
+
const auto& objects{GetSortedObjectsFromRegion(addr, size)};
for (auto& object : objects) {
FlushObject(object);
@@ -76,7 +93,9 @@ public:
}
/// Mark the specified region as being invalidated
- void InvalidateRegion(VAddr addr, u64 size) {
+ void InvalidateRegion(CacheAddr addr, u64 size) {
+ std::lock_guard<std::recursive_mutex> lock{mutex};
+
const auto& objects{GetSortedObjectsFromRegion(addr, size)};
for (auto& object : objects) {
if (!object->IsRegistered()) {
@@ -89,48 +108,60 @@ public:
/// Invalidates everything in the cache
void InvalidateAll() {
+ std::lock_guard<std::recursive_mutex> lock{mutex};
+
while (interval_cache.begin() != interval_cache.end()) {
Unregister(*interval_cache.begin()->second.begin());
}
}
protected:
- /// Tries to get an object from the cache with the specified address
- T TryGet(VAddr addr) const {
+ /// Tries to get an object from the cache with the specified cache address
+ T TryGet(CacheAddr addr) const {
const auto iter = map_cache.find(addr);
if (iter != map_cache.end())
return iter->second;
return nullptr;
}
+ T TryGet(const void* addr) const {
+ const auto iter = map_cache.find(ToCacheAddr(addr));
+ if (iter != map_cache.end())
+ return iter->second;
+ return nullptr;
+ }
+
/// Register an object into the cache
- void Register(const T& object) {
+ virtual void Register(const T& object) {
+ std::lock_guard<std::recursive_mutex> lock{mutex};
+
object->SetIsRegistered(true);
interval_cache.add({GetInterval(object), ObjectSet{object}});
- map_cache.insert({object->GetAddr(), object});
- rasterizer.UpdatePagesCachedCount(object->GetAddr(), object->GetSizeInBytes(), 1);
+ map_cache.insert({object->GetCacheAddr(), object});
+ rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1);
}
/// Unregisters an object from the cache
- void Unregister(const T& object) {
- object->SetIsRegistered(false);
- rasterizer.UpdatePagesCachedCount(object->GetAddr(), object->GetSizeInBytes(), -1);
- // Only flush if use_accurate_gpu_emulation is enabled, as it incurs a performance hit
- if (Settings::values.use_accurate_gpu_emulation) {
- FlushObject(object);
- }
+ virtual void Unregister(const T& object) {
+ std::lock_guard<std::recursive_mutex> lock{mutex};
+ object->SetIsRegistered(false);
+ rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1);
interval_cache.subtract({GetInterval(object), ObjectSet{object}});
- map_cache.erase(object->GetAddr());
+ map_cache.erase(object->GetCacheAddr());
}
/// Returns a ticks counter used for tracking when cached objects were last modified
u64 GetModifiedTicks() {
+ std::lock_guard<std::recursive_mutex> lock{mutex};
+
return ++modified_ticks;
}
/// Flushes the specified object, updating appropriate cache state as needed
void FlushObject(const T& object) {
+ std::lock_guard<std::recursive_mutex> lock{mutex};
+
if (!object->IsDirty()) {
return;
}
@@ -140,7 +171,7 @@ protected:
private:
/// Returns a list of cached objects from the specified memory region, ordered by access time
- std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) {
+ std::vector<T> GetSortedObjectsFromRegion(CacheAddr addr, u64 size) {
if (size == 0) {
return {};
}
@@ -164,17 +195,18 @@ private:
}
using ObjectSet = std::set<T>;
- using ObjectCache = std::unordered_map<VAddr, T>;
- using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>;
+ using ObjectCache = std::unordered_map<CacheAddr, T>;
+ using IntervalCache = boost::icl::interval_map<CacheAddr, ObjectSet>;
using ObjectInterval = typename IntervalCache::interval_type;
static auto GetInterval(const T& object) {
- return ObjectInterval::right_open(object->GetAddr(),
- object->GetAddr() + object->GetSizeInBytes());
+ return ObjectInterval::right_open(object->GetCacheAddr(),
+ object->GetCacheAddr() + object->GetSizeInBytes());
}
ObjectCache map_cache;
IntervalCache interval_cache; ///< Cache of objects
u64 modified_ticks{}; ///< Counter of cache state ticks, used for in-order flushing
VideoCore::RasterizerInterface& rasterizer;
+ std::recursive_mutex mutex;
};
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 6a1dc9cf6..d7b86df38 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -9,7 +9,6 @@
#include "common/common_types.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/gpu.h"
-#include "video_core/memory_manager.h"
namespace VideoCore {
@@ -35,14 +34,14 @@ public:
virtual void FlushAll() = 0;
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
- virtual void FlushRegion(VAddr addr, u64 size) = 0;
+ virtual void FlushRegion(CacheAddr addr, u64 size) = 0;
/// Notify rasterizer that any caches of the specified region should be invalidated
- virtual void InvalidateRegion(VAddr addr, u64 size) = 0;
+ virtual void InvalidateRegion(CacheAddr addr, u64 size) = 0;
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
/// and invalidated
- virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
+ virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0;
/// Attempt to use a faster method to perform a surface copy
virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
@@ -63,7 +62,7 @@ public:
}
/// Increase/decrease the number of object in pages touching the specified region
- virtual void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) {}
+ virtual void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {}
/// Initialize disk cached resources for the game being emulated
virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false,
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index b3062e5ba..f75c65825 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -13,24 +13,28 @@
namespace OpenGL {
+CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset,
+ std::size_t alignment, u8* host_ptr)
+ : cpu_addr{cpu_addr}, size{size}, offset{offset}, alignment{alignment}, RasterizerCacheObject{
+ host_ptr} {}
+
OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size)
: RasterizerCache{rasterizer}, stream_buffer(size, true) {}
-GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size,
- std::size_t alignment, bool cache) {
+GLintptr OGLBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment,
+ bool cache) {
auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
- const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)};
- ASSERT_MSG(cpu_addr, "Invalid GPU address");
// Cache management is a big overhead, so only cache entries with a given size.
// TODO: Figure out which size is the best for given games.
cache &= size >= 2048;
+ const auto& host_ptr{memory_manager.GetPointer(gpu_addr)};
if (cache) {
- auto entry = TryGet(*cpu_addr);
+ auto entry = TryGet(host_ptr);
if (entry) {
- if (entry->size >= size && entry->alignment == alignment) {
- return entry->offset;
+ if (entry->GetSize() >= size && entry->GetAlignment() == alignment) {
+ return entry->GetOffset();
}
Unregister(entry);
}
@@ -39,17 +43,17 @@ GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size
AlignBuffer(alignment);
const GLintptr uploaded_offset = buffer_offset;
- Memory::ReadBlock(*cpu_addr, buffer_ptr, size);
+ if (!host_ptr) {
+ return uploaded_offset;
+ }
+ std::memcpy(buffer_ptr, host_ptr, size);
buffer_ptr += size;
buffer_offset += size;
if (cache) {
- auto entry = std::make_shared<CachedBufferEntry>();
- entry->offset = uploaded_offset;
- entry->size = size;
- entry->alignment = alignment;
- entry->addr = *cpu_addr;
+ auto entry = std::make_shared<CachedBufferEntry>(
+ *memory_manager.GpuToCpuAddress(gpu_addr), size, uploaded_offset, alignment, host_ptr);
Register(entry);
}
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index c11acfb79..fc33aa433 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -17,22 +17,39 @@ namespace OpenGL {
class RasterizerOpenGL;
-struct CachedBufferEntry final : public RasterizerCacheObject {
- VAddr GetAddr() const override {
- return addr;
+class CachedBufferEntry final : public RasterizerCacheObject {
+public:
+ explicit CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset,
+ std::size_t alignment, u8* host_ptr);
+
+ VAddr GetCpuAddr() const override {
+ return cpu_addr;
}
std::size_t GetSizeInBytes() const override {
return size;
}
+ std::size_t GetSize() const {
+ return size;
+ }
+
+ GLintptr GetOffset() const {
+ return offset;
+ }
+
+ std::size_t GetAlignment() const {
+ return alignment;
+ }
+
// We do not have to flush this cache as things in it are never modified by us.
void Flush() override {}
- VAddr addr;
- std::size_t size;
- GLintptr offset;
- std::size_t alignment;
+private:
+ VAddr cpu_addr{};
+ std::size_t size{};
+ GLintptr offset{};
+ std::size_t alignment{};
};
class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> {
@@ -41,7 +58,7 @@ public:
/// Uploads data from a guest GPU address. Returns host's buffer offset where it's been
/// allocated.
- GLintptr UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
+ GLintptr UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
bool cache = true);
/// Uploads from a host memory. Returns host's buffer offset where it's been allocated.
diff --git a/src/video_core/renderer_opengl/gl_global_cache.cpp b/src/video_core/renderer_opengl/gl_global_cache.cpp
index c7f32feaa..0fbfbad55 100644
--- a/src/video_core/renderer_opengl/gl_global_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_global_cache.cpp
@@ -7,7 +7,6 @@
#include "common/assert.h"
#include "common/logging/log.h"
#include "core/core.h"
-#include "core/memory.h"
#include "video_core/renderer_opengl/gl_global_cache.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
@@ -15,12 +14,13 @@
namespace OpenGL {
-CachedGlobalRegion::CachedGlobalRegion(VAddr addr, u32 size) : addr{addr}, size{size} {
+CachedGlobalRegion::CachedGlobalRegion(VAddr cpu_addr, u32 size, u8* host_ptr)
+ : cpu_addr{cpu_addr}, size{size}, RasterizerCacheObject{host_ptr} {
buffer.Create();
// Bind and unbind the buffer so it gets allocated by the driver
glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
- LabelGLObject(GL_BUFFER, buffer.handle, addr, "GlobalMemory");
+ LabelGLObject(GL_BUFFER, buffer.handle, cpu_addr, "GlobalMemory");
}
void CachedGlobalRegion::Reload(u32 size_) {
@@ -35,10 +35,10 @@ void CachedGlobalRegion::Reload(u32 size_) {
// TODO(Rodrigo): Get rid of Memory::GetPointer with a staging buffer
glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle);
- glBufferData(GL_SHADER_STORAGE_BUFFER, size, Memory::GetPointer(addr), GL_DYNAMIC_DRAW);
+ glBufferData(GL_SHADER_STORAGE_BUFFER, size, GetHostPtr(), GL_DYNAMIC_DRAW);
}
-GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(VAddr addr, u32 size) const {
+GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const {
const auto search{reserve.find(addr)};
if (search == reserve.end()) {
return {};
@@ -46,19 +46,22 @@ GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(VAddr addr, u32
return search->second;
}
-GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(VAddr addr, u32 size) {
- GlobalRegion region{TryGetReservedGlobalRegion(addr, size)};
+GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(GPUVAddr addr, u32 size,
+ u8* host_ptr) {
+ GlobalRegion region{TryGetReservedGlobalRegion(ToCacheAddr(host_ptr), size)};
if (!region) {
// No reserved surface available, create a new one and reserve it
- region = std::make_shared<CachedGlobalRegion>(addr, size);
+ auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
+ const auto cpu_addr = *memory_manager.GpuToCpuAddress(addr);
+ region = std::make_shared<CachedGlobalRegion>(cpu_addr, size, host_ptr);
ReserveGlobalRegion(region);
}
region->Reload(size);
return region;
}
-void GlobalRegionCacheOpenGL::ReserveGlobalRegion(const GlobalRegion& region) {
- reserve[region->GetAddr()] = region;
+void GlobalRegionCacheOpenGL::ReserveGlobalRegion(GlobalRegion region) {
+ reserve.insert_or_assign(region->GetCacheAddr(), std::move(region));
}
GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer)
@@ -69,22 +72,20 @@ GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion(
Tegra::Engines::Maxwell3D::Regs::ShaderStage stage) {
auto& gpu{Core::System::GetInstance().GPU()};
- const auto cbufs = gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)];
- const auto cbuf_addr = gpu.MemoryManager().GpuToCpuAddress(
- cbufs.const_buffers[global_region.GetCbufIndex()].address + global_region.GetCbufOffset());
- ASSERT(cbuf_addr);
-
- const auto actual_addr_gpu = Memory::Read64(*cbuf_addr);
- const auto size = Memory::Read32(*cbuf_addr + 8);
- const auto actual_addr = gpu.MemoryManager().GpuToCpuAddress(actual_addr_gpu);
- ASSERT(actual_addr);
+ auto& memory_manager{gpu.MemoryManager()};
+ const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)]};
+ const auto addr{cbufs.const_buffers[global_region.GetCbufIndex()].address +
+ global_region.GetCbufOffset()};
+ const auto actual_addr{memory_manager.Read<u64>(addr)};
+ const auto size{memory_manager.Read<u32>(addr + 8)};
// Look up global region in the cache based on address
- GlobalRegion region = TryGet(*actual_addr);
+ const auto& host_ptr{memory_manager.GetPointer(actual_addr)};
+ GlobalRegion region{TryGet(host_ptr)};
if (!region) {
// No global region found - create a new one
- region = GetUncachedGlobalRegion(*actual_addr, size);
+ region = GetUncachedGlobalRegion(actual_addr, size, host_ptr);
Register(region);
}
diff --git a/src/video_core/renderer_opengl/gl_global_cache.h b/src/video_core/renderer_opengl/gl_global_cache.h
index 37830bb7c..5a21ab66f 100644
--- a/src/video_core/renderer_opengl/gl_global_cache.h
+++ b/src/video_core/renderer_opengl/gl_global_cache.h
@@ -27,15 +27,13 @@ using GlobalRegion = std::shared_ptr<CachedGlobalRegion>;
class CachedGlobalRegion final : public RasterizerCacheObject {
public:
- explicit CachedGlobalRegion(VAddr addr, u32 size);
+ explicit CachedGlobalRegion(VAddr cpu_addr, u32 size, u8* host_ptr);
- /// Gets the address of the shader in guest memory, required for cache management
- VAddr GetAddr() const {
- return addr;
+ VAddr GetCpuAddr() const override {
+ return cpu_addr;
}
- /// Gets the size of the shader in guest memory, required for cache management
- std::size_t GetSizeInBytes() const {
+ std::size_t GetSizeInBytes() const override {
return size;
}
@@ -53,9 +51,8 @@ public:
}
private:
- VAddr addr{};
+ VAddr cpu_addr{};
u32 size{};
-
OGLBuffer buffer;
};
@@ -68,11 +65,11 @@ public:
Tegra::Engines::Maxwell3D::Regs::ShaderStage stage);
private:
- GlobalRegion TryGetReservedGlobalRegion(VAddr addr, u32 size) const;
- GlobalRegion GetUncachedGlobalRegion(VAddr addr, u32 size);
- void ReserveGlobalRegion(const GlobalRegion& region);
+ GlobalRegion TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const;
+ GlobalRegion GetUncachedGlobalRegion(GPUVAddr addr, u32 size, u8* host_ptr);
+ void ReserveGlobalRegion(GlobalRegion region);
- std::unordered_map<VAddr, GlobalRegion> reserve;
+ std::unordered_map<CacheAddr, GlobalRegion> reserve;
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_primitive_assembler.cpp b/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
index 77d5cedd2..2bcbd3da2 100644
--- a/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
+++ b/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
@@ -40,16 +40,12 @@ GLintptr PrimitiveAssembler::MakeQuadArray(u32 first, u32 count) {
return index_offset;
}
-GLintptr PrimitiveAssembler::MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size_t index_size,
- u32 count) {
+GLintptr PrimitiveAssembler::MakeQuadIndexed(GPUVAddr gpu_addr, std::size_t index_size, u32 count) {
const std::size_t map_size{CalculateQuadSize(count)};
auto [dst_pointer, index_offset] = buffer_cache.ReserveMemory(map_size);
auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
- const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)};
- ASSERT_MSG(cpu_addr, "Invalid GPU address");
-
- const u8* source{Memory::GetPointer(*cpu_addr)};
+ const u8* source{memory_manager.GetPointer(gpu_addr)};
for (u32 primitive = 0; primitive < count / 4; ++primitive) {
for (std::size_t i = 0; i < TRIANGLES_PER_QUAD; ++i) {
@@ -64,4 +60,4 @@ GLintptr PrimitiveAssembler::MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size
return index_offset;
}
-} // namespace OpenGL \ No newline at end of file
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_primitive_assembler.h b/src/video_core/renderer_opengl/gl_primitive_assembler.h
index a8cb88eb5..0e2e7dc36 100644
--- a/src/video_core/renderer_opengl/gl_primitive_assembler.h
+++ b/src/video_core/renderer_opengl/gl_primitive_assembler.h
@@ -24,7 +24,7 @@ public:
GLintptr MakeQuadArray(u32 first, u32 count);
- GLintptr MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size_t index_size, u32 count);
+ GLintptr MakeQuadIndexed(GPUVAddr gpu_addr, std::size_t index_size, u32 count);
private:
OGLBufferCache& buffer_cache;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 824863561..e06dfe43f 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -102,8 +102,9 @@ struct FramebufferCacheKey {
RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::System& system,
ScreenInfo& info)
- : res_cache{*this}, shader_cache{*this, system}, global_cache{*this}, emu_window{window},
- screen_info{info}, buffer_cache(*this, STREAM_BUFFER_SIZE) {
+ : res_cache{*this}, shader_cache{*this, system}, global_cache{*this},
+ emu_window{window}, system{system}, screen_info{info},
+ buffer_cache(*this, STREAM_BUFFER_SIZE) {
// Create sampler objects
for (std::size_t i = 0; i < texture_samplers.size(); ++i) {
texture_samplers[i].Create();
@@ -138,7 +139,7 @@ void RasterizerOpenGL::CheckExtensions() {
}
GLuint RasterizerOpenGL::SetupVertexFormat() {
- auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
+ auto& gpu = system.GPU().Maxwell3D();
const auto& regs = gpu.regs;
if (!gpu.dirty_flags.vertex_attrib_format) {
@@ -207,7 +208,7 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
}
void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
- auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
+ auto& gpu = system.GPU().Maxwell3D();
const auto& regs = gpu.regs;
if (gpu.dirty_flags.vertex_array.none())
@@ -224,8 +225,8 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
if (!vertex_array.IsEnabled())
continue;
- const Tegra::GPUVAddr start = vertex_array.StartAddress();
- const Tegra::GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
+ const GPUVAddr start = vertex_array.StartAddress();
+ const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
ASSERT(end > start);
const u64 size = end - start + 1;
@@ -248,7 +249,7 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
}
DrawParameters RasterizerOpenGL::SetupDraw() {
- const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
+ const auto& gpu = system.GPU().Maxwell3D();
const auto& regs = gpu.regs;
const bool is_indexed = accelerate_draw == AccelDraw::Indexed;
@@ -297,7 +298,7 @@ DrawParameters RasterizerOpenGL::SetupDraw() {
void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
MICROPROFILE_SCOPE(OpenGL_Shader);
- auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
+ auto& gpu = system.GPU().Maxwell3D();
BaseBindings base_bindings;
std::array<bool, Maxwell::NumClipDistances> clip_distances{};
@@ -413,15 +414,15 @@ void RasterizerOpenGL::SetupCachedFramebuffer(const FramebufferCacheKey& fbkey,
}
std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
- const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
+ const auto& regs = system.GPU().Maxwell3D().regs;
std::size_t size = 0;
for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
if (!regs.vertex_array[index].IsEnabled())
continue;
- const Tegra::GPUVAddr start = regs.vertex_array[index].StartAddress();
- const Tegra::GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
+ const GPUVAddr start = regs.vertex_array[index].StartAddress();
+ const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
ASSERT(end > start);
size += end - start + 1;
@@ -431,7 +432,7 @@ std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
}
std::size_t RasterizerOpenGL::CalculateIndexBufferSize() const {
- const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
+ const auto& regs = system.GPU().Maxwell3D().regs;
return static_cast<std::size_t>(regs.index_array.count) *
static_cast<std::size_t>(regs.index_array.FormatSizeInBytes());
@@ -448,7 +449,7 @@ static constexpr auto RangeFromInterval(Map& map, const Interval& interval) {
return boost::make_iterator_range(map.equal_range(interval));
}
-void RasterizerOpenGL::UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) {
+void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
const u64 page_start{addr >> Memory::PAGE_BITS};
const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS};
@@ -487,7 +488,7 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
OpenGLState& current_state, bool using_color_fb, bool using_depth_fb, bool preserve_contents,
std::optional<std::size_t> single_color_target) {
MICROPROFILE_SCOPE(OpenGL_Framebuffer);
- auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
+ auto& gpu = system.GPU().Maxwell3D();
const auto& regs = gpu.regs;
const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents,
@@ -581,7 +582,7 @@ void RasterizerOpenGL::Clear() {
const auto prev_state{state};
SCOPE_EXIT({ prev_state.Apply(); });
- const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
+ const auto& regs = system.GPU().Maxwell3D().regs;
bool use_color{};
bool use_depth{};
bool use_stencil{};
@@ -672,7 +673,7 @@ void RasterizerOpenGL::DrawArrays() {
return;
MICROPROFILE_SCOPE(OpenGL_Drawing);
- auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
+ auto& gpu = system.GPU().Maxwell3D();
const auto& regs = gpu.regs;
ConfigureFramebuffers(state);
@@ -746,20 +747,26 @@ void RasterizerOpenGL::DrawArrays() {
void RasterizerOpenGL::FlushAll() {}
-void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
+void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
+ if (!addr || !size) {
+ return;
+ }
res_cache.FlushRegion(addr, size);
}
-void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
+void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
+ if (!addr || !size) {
+ return;
+ }
res_cache.InvalidateRegion(addr, size);
shader_cache.InvalidateRegion(addr, size);
global_cache.InvalidateRegion(addr, size);
buffer_cache.InvalidateRegion(addr, size);
}
-void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
+void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
FlushRegion(addr, size);
InvalidateRegion(addr, size);
}
@@ -781,7 +788,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
- const auto& surface{res_cache.TryFindFramebufferSurface(framebuffer_addr)};
+ const auto& surface{res_cache.TryFindFramebufferSurface(Memory::GetPointer(framebuffer_addr))};
if (!surface) {
return {};
}
@@ -892,7 +899,7 @@ void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::Shader
const Shader& shader, GLuint program_handle,
BaseBindings base_bindings) {
MICROPROFILE_SCOPE(OpenGL_UBO);
- const auto& gpu = Core::System::GetInstance().GPU();
+ const auto& gpu = system.GPU();
const auto& maxwell3d = gpu.Maxwell3D();
const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<std::size_t>(stage)];
const auto& entries = shader->GetShaderEntries().const_buffers;
@@ -971,7 +978,7 @@ void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::Shade
void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader,
GLuint program_handle, BaseBindings base_bindings) {
MICROPROFILE_SCOPE(OpenGL_Texture);
- const auto& gpu = Core::System::GetInstance().GPU();
+ const auto& gpu = system.GPU();
const auto& maxwell3d = gpu.Maxwell3D();
const auto& entries = shader->GetShaderEntries().samplers;
@@ -998,7 +1005,7 @@ void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& s
}
void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
- const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
+ const auto& regs = system.GPU().Maxwell3D().regs;
const bool geometry_shaders_enabled =
regs.IsShaderConfigEnabled(static_cast<size_t>(Maxwell::ShaderProgram::Geometry));
const std::size_t viewport_count =
@@ -1021,7 +1028,7 @@ void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
void RasterizerOpenGL::SyncClipEnabled(
const std::array<bool, Maxwell::Regs::NumClipDistances>& clip_mask) {
- const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
+ const auto& regs = system.GPU().Maxwell3D().regs;
const std::array<bool, Maxwell::Regs::NumClipDistances> reg_state{
regs.clip_distance_enabled.c0 != 0, regs.clip_distance_enabled.c1 != 0,
regs.clip_distance_enabled.c2 != 0, regs.clip_distance_enabled.c3 != 0,
@@ -1038,7 +1045,7 @@ void RasterizerOpenGL::SyncClipCoef() {
}
void RasterizerOpenGL::SyncCullMode() {
- const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
+ const auto& regs = system.GPU().Maxwell3D().regs;
state.cull.enabled = regs.cull.enabled != 0;
@@ -1062,14 +1069,14 @@ void RasterizerOpenGL::SyncCullMode() {
}
void RasterizerOpenGL::SyncPrimitiveRestart() {
- const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
+ const auto& regs = system.GPU().Maxwell3D().regs;
state.primitive_restart.enabled = regs.primitive_restart.enabled;
state.primitive_restart.index = regs.primitive_restart.index;
}
void RasterizerOpenGL::SyncDepthTestState() {
- const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
+ const auto& regs = system.GPU().Maxwell3D().regs;
state.depth.test_enabled = regs.depth_test_enable != 0;
state.depth.write_mask = regs.depth_write_enabled ? GL_TRUE : GL_FALSE;
@@ -1081,7 +1088,7 @@ void RasterizerOpenGL::SyncDepthTestState() {
}
void RasterizerOpenGL::SyncStencilTestState() {
- const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
+ const auto& regs = system.GPU().Maxwell3D().regs;
state.stencil.test_enabled = regs.stencil_enable != 0;
if (!regs.stencil_enable) {
@@ -1115,7 +1122,7 @@ void RasterizerOpenGL::SyncStencilTestState() {
}
void RasterizerOpenGL::SyncColorMask() {
- const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
+ const auto& regs = system.GPU().Maxwell3D().regs;
const std::size_t count =
regs.independent_blend_enable ? Tegra::Engines::Maxwell3D::Regs::NumRenderTargets : 1;
for (std::size_t i = 0; i < count; i++) {
@@ -1129,18 +1136,18 @@ void RasterizerOpenGL::SyncColorMask() {
}
void RasterizerOpenGL::SyncMultiSampleState() {
- const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
+ const auto& regs = system.GPU().Maxwell3D().regs;
state.multisample_control.alpha_to_coverage = regs.multisample_control.alpha_to_coverage != 0;
state.multisample_control.alpha_to_one = regs.multisample_control.alpha_to_one != 0;
}
void RasterizerOpenGL::SyncFragmentColorClampState() {
- const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
+ const auto& regs = system.GPU().Maxwell3D().regs;
state.fragment_color_clamp.enabled = regs.frag_color_clamp != 0;
}
void RasterizerOpenGL::SyncBlendState() {
- const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
+ const auto& regs = system.GPU().Maxwell3D().regs;
state.blend_color.red = regs.blend_color.r;
state.blend_color.green = regs.blend_color.g;
@@ -1182,7 +1189,7 @@ void RasterizerOpenGL::SyncBlendState() {
}
void RasterizerOpenGL::SyncLogicOpState() {
- const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
+ const auto& regs = system.GPU().Maxwell3D().regs;
state.logic_op.enabled = regs.logic_op.enable != 0;
@@ -1196,7 +1203,7 @@ void RasterizerOpenGL::SyncLogicOpState() {
}
void RasterizerOpenGL::SyncScissorTest(OpenGLState& current_state) {
- const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
+ const auto& regs = system.GPU().Maxwell3D().regs;
const bool geometry_shaders_enabled =
regs.IsShaderConfigEnabled(static_cast<size_t>(Maxwell::ShaderProgram::Geometry));
const std::size_t viewport_count =
@@ -1218,17 +1225,17 @@ void RasterizerOpenGL::SyncScissorTest(OpenGLState& current_state) {
}
void RasterizerOpenGL::SyncTransformFeedback() {
- const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
+ const auto& regs = system.GPU().Maxwell3D().regs;
UNIMPLEMENTED_IF_MSG(regs.tfb_enabled != 0, "Transform feedbacks are not implemented");
}
void RasterizerOpenGL::SyncPointState() {
- const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
+ const auto& regs = system.GPU().Maxwell3D().regs;
state.point.size = regs.point_size;
}
void RasterizerOpenGL::SyncPolygonOffset() {
- const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
+ const auto& regs = system.GPU().Maxwell3D().regs;
state.polygon_offset.fill_enable = regs.polygon_offset_fill_enable != 0;
state.polygon_offset.line_enable = regs.polygon_offset_line_enable != 0;
state.polygon_offset.point_enable = regs.polygon_offset_point_enable != 0;
@@ -1238,7 +1245,7 @@ void RasterizerOpenGL::SyncPolygonOffset() {
}
void RasterizerOpenGL::CheckAlphaTests() {
- const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
+ const auto& regs = system.GPU().Maxwell3D().regs;
UNIMPLEMENTED_IF_MSG(regs.alpha_test_enabled != 0 && regs.rt_control.count > 1,
"Alpha Testing is enabled with more than one rendertarget");
}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 7e63f8008..30f3e8acb 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -57,9 +57,9 @@ public:
void DrawArrays() override;
void Clear() override;
void FlushAll() override;
- void FlushRegion(VAddr addr, u64 size) override;
- void InvalidateRegion(VAddr addr, u64 size) override;
- void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
+ void FlushRegion(CacheAddr addr, u64 size) override;
+ void InvalidateRegion(CacheAddr addr, u64 size) override;
+ void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
const Tegra::Engines::Fermi2D::Regs::Surface& dst,
const Common::Rectangle<u32>& src_rect,
@@ -67,7 +67,7 @@ public:
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
u32 pixel_stride) override;
bool AccelerateDrawBatch(bool is_indexed) override;
- void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) override;
+ void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) override;
void LoadDiskResources(const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) override;
@@ -215,6 +215,7 @@ private:
GlobalRegionCacheOpenGL global_cache;
Core::Frontend::EmuWindow& emu_window;
+ Core::System& system;
ScreenInfo& screen_info;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index e9eb6e921..0235317c0 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -55,12 +55,11 @@ static void ApplyTextureDefaults(GLuint texture, u32 max_mip_level) {
}
}
-void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) {
+void SurfaceParams::InitCacheParameters(GPUVAddr gpu_addr_) {
auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
- const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr_)};
- addr = cpu_addr ? *cpu_addr : 0;
gpu_addr = gpu_addr_;
+ host_ptr = memory_manager.GetPointer(gpu_addr_);
size_in_bytes = SizeInBytesRaw();
if (IsPixelFormatASTC(pixel_format)) {
@@ -223,7 +222,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
}
/*static*/ SurfaceParams SurfaceParams::CreateForDepthBuffer(
- u32 zeta_width, u32 zeta_height, Tegra::GPUVAddr zeta_address, Tegra::DepthFormat format,
+ u32 zeta_width, u32 zeta_height, GPUVAddr zeta_address, Tegra::DepthFormat format,
u32 block_width, u32 block_height, u32 block_depth,
Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type) {
SurfaceParams params{};
@@ -446,7 +445,7 @@ void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params,
MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level),
params.MipBlockHeight(mip_level), params.MipHeight(mip_level),
params.MipBlockDepth(mip_level), 1, params.tile_width_spacing,
- gl_buffer.data() + offset_gl, gl_size, params.addr + offset);
+ gl_buffer.data() + offset_gl, params.host_ptr + offset);
offset += layer_size;
offset_gl += gl_size;
}
@@ -455,7 +454,7 @@ void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params,
MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level),
params.MipBlockHeight(mip_level), params.MipHeight(mip_level),
params.MipBlockDepth(mip_level), depth, params.tile_width_spacing,
- gl_buffer.data(), gl_buffer.size(), params.addr + offset);
+ gl_buffer.data(), params.host_ptr + offset);
}
}
@@ -513,9 +512,9 @@ void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surfac
"reinterpretation but the texture is tiled.");
}
const std::size_t remaining_size = dst_params.size_in_bytes - src_params.size_in_bytes;
-
+ auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
glBufferSubData(GL_PIXEL_PACK_BUFFER, src_params.size_in_bytes, remaining_size,
- Memory::GetPointer(dst_params.addr + src_params.size_in_bytes));
+ memory_manager.GetPointer(dst_params.gpu_addr + src_params.size_in_bytes));
}
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
@@ -563,8 +562,14 @@ void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surfac
}
CachedSurface::CachedSurface(const SurfaceParams& params)
- : params(params), gl_target(SurfaceTargetToGL(params.target)),
- cached_size_in_bytes(params.size_in_bytes) {
+ : params{params}, gl_target{SurfaceTargetToGL(params.target)},
+ cached_size_in_bytes{params.size_in_bytes}, RasterizerCacheObject{params.host_ptr} {
+
+ const auto optional_cpu_addr{
+ Core::System::GetInstance().GPU().MemoryManager().GpuToCpuAddress(params.gpu_addr)};
+ ASSERT_MSG(optional_cpu_addr, "optional_cpu_addr is invalid");
+ cpu_addr = *optional_cpu_addr;
+
texture.Create(gl_target);
// TODO(Rodrigo): Using params.GetRect() returns a different size than using its Mip*(0)
@@ -603,19 +608,7 @@ CachedSurface::CachedSurface(const SurfaceParams& params)
ApplyTextureDefaults(texture.handle, params.max_mip_level);
- OpenGL::LabelGLObject(GL_TEXTURE, texture.handle, params.addr, params.IdentityString());
-
- // Clamp size to mapped GPU memory region
- // TODO(bunnei): Super Mario Odyssey maps a 0x40000 byte region and then uses it for a 0x80000
- // R32F render buffer. We do not yet know if this is a game bug or something else, but this
- // check is necessary to prevent flushing from overwriting unmapped memory.
-
- auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
- const u64 max_size{memory_manager.GetRegionEnd(params.gpu_addr) - params.gpu_addr};
- if (cached_size_in_bytes > max_size) {
- LOG_ERROR(HW_GPU, "Surface size {} exceeds region size {}", params.size_in_bytes, max_size);
- cached_size_in_bytes = max_size;
- }
+ OpenGL::LabelGLObject(GL_TEXTURE, texture.handle, params.gpu_addr, params.IdentityString());
}
MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64));
@@ -633,10 +626,9 @@ void CachedSurface::LoadGLBuffer() {
const u32 bpp = params.GetFormatBpp() / 8;
const u32 copy_size = params.width * bpp;
if (params.pitch == copy_size) {
- std::memcpy(gl_buffer[0].data(), Memory::GetPointer(params.addr),
- params.size_in_bytes_gl);
+ std::memcpy(gl_buffer[0].data(), params.host_ptr, params.size_in_bytes_gl);
} else {
- const u8* start = Memory::GetPointer(params.addr);
+ const u8* start{params.host_ptr};
u8* write_to = gl_buffer[0].data();
for (u32 h = params.height; h > 0; h--) {
std::memcpy(write_to, start, copy_size);
@@ -680,8 +672,6 @@ void CachedSurface::FlushGLBuffer() {
glPixelStorei(GL_PACK_ROW_LENGTH, 0);
Tegra::Texture::ConvertFromHostToGuest(gl_buffer[0].data(), params.pixel_format, params.width,
params.height, params.depth, true, true);
- const u8* const texture_src_data = Memory::GetPointer(params.addr);
- ASSERT(texture_src_data);
if (params.is_tiled) {
ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}",
params.block_width, static_cast<u32>(params.target));
@@ -691,9 +681,9 @@ void CachedSurface::FlushGLBuffer() {
const u32 bpp = params.GetFormatBpp() / 8;
const u32 copy_size = params.width * bpp;
if (params.pitch == copy_size) {
- std::memcpy(Memory::GetPointer(params.addr), gl_buffer[0].data(), GetSizeInBytes());
+ std::memcpy(params.host_ptr, gl_buffer[0].data(), GetSizeInBytes());
} else {
- u8* start = Memory::GetPointer(params.addr);
+ u8* start{params.host_ptr};
const u8* read_to = gl_buffer[0].data();
for (u32 h = params.height; h > 0; h--) {
std::memcpy(start, read_to, copy_size);
@@ -927,12 +917,12 @@ void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) {
}
Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool preserve_contents) {
- if (params.addr == 0 || params.height * params.width == 0) {
+ if (!params.IsValid()) {
return {};
}
// Look up surface in the cache based on address
- Surface surface{TryGet(params.addr)};
+ Surface surface{TryGet(params.host_ptr)};
if (surface) {
if (surface->GetSurfaceParams().IsCompatibleSurface(params)) {
// Use the cached surface as-is unless it's not synced with memory
@@ -943,7 +933,7 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool pres
// If surface parameters changed and we care about keeping the previous data, recreate
// the surface from the old one
Surface new_surface{RecreateSurface(surface, params)};
- UnregisterSurface(surface);
+ Unregister(surface);
Register(new_surface);
if (new_surface->IsUploaded()) {
RegisterReinterpretSurface(new_surface);
@@ -951,7 +941,7 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool pres
return new_surface;
} else {
// Delete the old surface before creating a new one to prevent collisions.
- UnregisterSurface(surface);
+ Unregister(surface);
}
}
@@ -981,14 +971,16 @@ void RasterizerCacheOpenGL::FastLayeredCopySurface(const Surface& src_surface,
const Surface& dst_surface) {
const auto& init_params{src_surface->GetSurfaceParams()};
const auto& dst_params{dst_surface->GetSurfaceParams()};
- VAddr address = init_params.addr;
- const std::size_t layer_size = dst_params.LayerMemorySize();
+ auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
+ GPUVAddr address{init_params.gpu_addr};
+ const std::size_t layer_size{dst_params.LayerMemorySize()};
for (u32 layer = 0; layer < dst_params.depth; layer++) {
for (u32 mipmap = 0; mipmap < dst_params.max_mip_level; mipmap++) {
- const VAddr sub_address = address + dst_params.GetMipmapLevelOffset(mipmap);
- const Surface& copy = TryGet(sub_address);
- if (!copy)
+ const GPUVAddr sub_address{address + dst_params.GetMipmapLevelOffset(mipmap)};
+ const Surface& copy{TryGet(memory_manager.GetPointer(sub_address))};
+ if (!copy) {
continue;
+ }
const auto& src_params{copy->GetSurfaceParams()};
const u32 width{std::min(src_params.width, dst_params.MipWidth(mipmap))};
const u32 height{std::min(src_params.height, dst_params.MipHeight(mipmap))};
@@ -1163,7 +1155,8 @@ void RasterizerCacheOpenGL::AccurateCopySurface(const Surface& src_surface,
const auto& dst_params{dst_surface->GetSurfaceParams()};
// Flush enough memory for both the source and destination surface
- FlushRegion(src_params.addr, std::max(src_params.MemorySize(), dst_params.MemorySize()));
+ FlushRegion(ToCacheAddr(src_params.host_ptr),
+ std::max(src_params.MemorySize(), dst_params.MemorySize()));
LoadSurface(dst_surface);
}
@@ -1215,8 +1208,8 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
return new_surface;
}
-Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(VAddr addr) const {
- return TryGet(addr);
+Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(const u8* host_ptr) const {
+ return TryGet(host_ptr);
}
void RasterizerCacheOpenGL::ReserveSurface(const Surface& surface) {
@@ -1243,9 +1236,9 @@ static std::optional<u32> TryFindBestMipMap(std::size_t memory, const SurfacePar
return {};
}
-static std::optional<u32> TryFindBestLayer(VAddr addr, const SurfaceParams params, u32 mipmap) {
- const std::size_t size = params.LayerMemorySize();
- VAddr start = params.addr + params.GetMipmapLevelOffset(mipmap);
+static std::optional<u32> TryFindBestLayer(GPUVAddr addr, const SurfaceParams params, u32 mipmap) {
+ const std::size_t size{params.LayerMemorySize()};
+ GPUVAddr start{params.gpu_addr + params.GetMipmapLevelOffset(mipmap)};
for (u32 i = 0; i < params.depth; i++) {
if (start == addr) {
return {i};
@@ -1267,7 +1260,7 @@ static bool LayerFitReinterpretSurface(RasterizerCacheOpenGL& cache, const Surfa
src_params.height == dst_params.MipHeight(*level) &&
src_params.block_height >= dst_params.MipBlockHeight(*level)) {
const std::optional<u32> slot =
- TryFindBestLayer(render_surface->GetAddr(), dst_params, *level);
+ TryFindBestLayer(render_surface->GetSurfaceParams().gpu_addr, dst_params, *level);
if (slot.has_value()) {
glCopyImageSubData(render_surface->Texture().handle,
SurfaceTargetToGL(src_params.target), 0, 0, 0, 0,
@@ -1283,8 +1276,8 @@ static bool LayerFitReinterpretSurface(RasterizerCacheOpenGL& cache, const Surfa
}
static bool IsReinterpretInvalid(const Surface render_surface, const Surface blitted_surface) {
- const VAddr bound1 = blitted_surface->GetAddr() + blitted_surface->GetMemorySize();
- const VAddr bound2 = render_surface->GetAddr() + render_surface->GetMemorySize();
+ const VAddr bound1 = blitted_surface->GetCpuAddr() + blitted_surface->GetMemorySize();
+ const VAddr bound2 = render_surface->GetCpuAddr() + render_surface->GetMemorySize();
if (bound2 > bound1)
return true;
const auto& dst_params = blitted_surface->GetSurfaceParams();
@@ -1302,12 +1295,12 @@ static bool IsReinterpretInvalidSecond(const Surface render_surface,
bool RasterizerCacheOpenGL::PartialReinterpretSurface(Surface triggering_surface,
Surface intersect) {
if (IsReinterpretInvalid(triggering_surface, intersect)) {
- UnregisterSurface(intersect);
+ Unregister(intersect);
return false;
}
if (!LayerFitReinterpretSurface(*this, triggering_surface, intersect)) {
if (IsReinterpretInvalidSecond(triggering_surface, intersect)) {
- UnregisterSurface(intersect);
+ Unregister(intersect);
return false;
}
FlushObject(intersect);
@@ -1327,7 +1320,8 @@ void RasterizerCacheOpenGL::SignalPreDrawCall() {
void RasterizerCacheOpenGL::SignalPostDrawCall() {
for (u32 i = 0; i < Maxwell::NumRenderTargets; i++) {
if (current_color_buffers[i] != nullptr) {
- Surface intersect = CollideOnReinterpretedSurface(current_color_buffers[i]->GetAddr());
+ Surface intersect =
+ CollideOnReinterpretedSurface(current_color_buffers[i]->GetCacheAddr());
if (intersect != nullptr) {
PartialReinterpretSurface(current_color_buffers[i], intersect);
texception = true;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 9cf6f50be..c644271d0 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -109,6 +109,11 @@ struct SurfaceParams {
return size;
}
+ /// Returns true if the parameters constitute a valid rasterizer surface.
+ bool IsValid() const {
+ return gpu_addr && host_ptr && height && width;
+ }
+
/// Returns the exact size of the memory occupied by a layer in a texture in VRAM, including
/// mipmaps.
std::size_t LayerMemorySize() const {
@@ -210,7 +215,7 @@ struct SurfaceParams {
/// Creates SurfaceParams for a depth buffer configuration
static SurfaceParams CreateForDepthBuffer(
- u32 zeta_width, u32 zeta_height, Tegra::GPUVAddr zeta_address, Tegra::DepthFormat format,
+ u32 zeta_width, u32 zeta_height, GPUVAddr zeta_address, Tegra::DepthFormat format,
u32 block_width, u32 block_height, u32 block_depth,
Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type);
@@ -232,7 +237,7 @@ struct SurfaceParams {
}
/// Initializes parameters for caching, should be called after everything has been initialized
- void InitCacheParameters(Tegra::GPUVAddr gpu_addr);
+ void InitCacheParameters(GPUVAddr gpu_addr);
std::string TargetName() const {
switch (target) {
@@ -296,8 +301,8 @@ struct SurfaceParams {
bool is_array;
bool srgb_conversion;
// Parameters used for caching
- VAddr addr;
- Tegra::GPUVAddr gpu_addr;
+ u8* host_ptr;
+ GPUVAddr gpu_addr;
std::size_t size_in_bytes;
std::size_t size_in_bytes_gl;
@@ -345,10 +350,10 @@ class RasterizerOpenGL;
class CachedSurface final : public RasterizerCacheObject {
public:
- CachedSurface(const SurfaceParams& params);
+ explicit CachedSurface(const SurfaceParams& params);
- VAddr GetAddr() const override {
- return params.addr;
+ VAddr GetCpuAddr() const override {
+ return cpu_addr;
}
std::size_t GetSizeInBytes() const override {
@@ -432,6 +437,7 @@ private:
std::size_t memory_size;
bool reinterpreted = false;
bool must_reload = false;
+ VAddr cpu_addr{};
};
class RasterizerCacheOpenGL final : public RasterizerCache<Surface> {
@@ -449,7 +455,7 @@ public:
Surface GetColorBufferSurface(std::size_t index, bool preserve_contents);
/// Tries to find a framebuffer using on the provided CPU address
- Surface TryFindFramebufferSurface(VAddr addr) const;
+ Surface TryFindFramebufferSurface(const u8* host_ptr) const;
/// Copies the contents of one surface to another
void FermiCopySurface(const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
@@ -506,12 +512,12 @@ private:
std::array<Surface, Maxwell::NumRenderTargets> current_color_buffers;
Surface last_depth_buffer;
- using SurfaceIntervalCache = boost::icl::interval_map<VAddr, Surface>;
+ using SurfaceIntervalCache = boost::icl::interval_map<CacheAddr, Surface>;
using SurfaceInterval = typename SurfaceIntervalCache::interval_type;
static auto GetReinterpretInterval(const Surface& object) {
- return SurfaceInterval::right_open(object->GetAddr() + 1,
- object->GetAddr() + object->GetMemorySize() - 1);
+ return SurfaceInterval::right_open(object->GetCacheAddr() + 1,
+ object->GetCacheAddr() + object->GetMemorySize() - 1);
}
// Reinterpreted surfaces are very fragil as the game may keep rendering into them.
@@ -523,7 +529,7 @@ private:
reinterpret_surface->MarkReinterpreted();
}
- Surface CollideOnReinterpretedSurface(VAddr addr) const {
+ Surface CollideOnReinterpretedSurface(CacheAddr addr) const {
const SurfaceInterval interval{addr};
for (auto& pair :
boost::make_iterator_range(reinterpreted_surfaces.equal_range(interval))) {
@@ -532,13 +538,17 @@ private:
return nullptr;
}
+ void Register(const Surface& object) {
+ RasterizerCache<Surface>::Register(object);
+ }
+
/// Unregisters an object from the cache
- void UnregisterSurface(const Surface& object) {
+ void Unregister(const Surface& object) {
if (object->IsReinterpreted()) {
auto interval = GetReinterpretInterval(object);
reinterpreted_surfaces.erase(interval);
}
- Unregister(object);
+ RasterizerCache<Surface>::Unregister(object);
}
};
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 4883e4f62..1f8eca6f0 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -32,19 +32,16 @@ struct UnspecializedShader {
namespace {
/// Gets the address for the specified shader stage program
-VAddr GetShaderAddress(Maxwell::ShaderProgram program) {
- const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
- const auto& shader_config = gpu.regs.shader_config[static_cast<std::size_t>(program)];
- const auto address = gpu.memory_manager.GpuToCpuAddress(gpu.regs.code_address.CodeAddress() +
- shader_config.offset);
- ASSERT_MSG(address, "Invalid GPU address");
- return *address;
+GPUVAddr GetShaderAddress(Maxwell::ShaderProgram program) {
+ const auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()};
+ const auto& shader_config{gpu.regs.shader_config[static_cast<std::size_t>(program)]};
+ return gpu.regs.code_address.CodeAddress() + shader_config.offset;
}
/// Gets the shader program code from memory for the specified address
-ProgramCode GetShaderCode(VAddr addr) {
+ProgramCode GetShaderCode(const u8* host_ptr) {
ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH);
- Memory::ReadBlock(addr, program_code.data(), program_code.size() * sizeof(u64));
+ std::memcpy(program_code.data(), host_ptr, program_code.size() * sizeof(u64));
return program_code;
}
@@ -214,12 +211,13 @@ std::set<GLenum> GetSupportedFormats() {
} // namespace
-CachedShader::CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type,
- ShaderDiskCacheOpenGL& disk_cache,
+CachedShader::CachedShader(VAddr cpu_addr, u64 unique_identifier,
+ Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
const PrecompiledPrograms& precompiled_programs,
- ProgramCode&& program_code, ProgramCode&& program_code_b)
- : addr{addr}, unique_identifier{unique_identifier}, program_type{program_type},
- disk_cache{disk_cache}, precompiled_programs{precompiled_programs} {
+ ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr)
+ : host_ptr{host_ptr}, cpu_addr{cpu_addr}, unique_identifier{unique_identifier},
+ program_type{program_type}, disk_cache{disk_cache},
+ precompiled_programs{precompiled_programs}, RasterizerCacheObject{host_ptr} {
const std::size_t code_size = CalculateProgramSize(program_code);
const std::size_t code_size_b =
@@ -243,12 +241,13 @@ CachedShader::CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderPro
disk_cache.SaveRaw(raw);
}
-CachedShader::CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type,
- ShaderDiskCacheOpenGL& disk_cache,
+CachedShader::CachedShader(VAddr cpu_addr, u64 unique_identifier,
+ Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
const PrecompiledPrograms& precompiled_programs,
- GLShader::ProgramResult result)
- : addr{addr}, unique_identifier{unique_identifier}, program_type{program_type},
- disk_cache{disk_cache}, precompiled_programs{precompiled_programs} {
+ GLShader::ProgramResult result, u8* host_ptr)
+ : cpu_addr{cpu_addr}, unique_identifier{unique_identifier}, program_type{program_type},
+ disk_cache{disk_cache}, precompiled_programs{precompiled_programs}, RasterizerCacheObject{
+ host_ptr} {
code = std::move(result.first);
entries = result.second;
@@ -271,7 +270,7 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive
disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings));
}
- LabelGLObject(GL_PROGRAM, program->handle, addr);
+ LabelGLObject(GL_PROGRAM, program->handle, cpu_addr);
}
handle = program->handle;
@@ -323,7 +322,7 @@ GLuint CachedShader::LazyGeometryProgram(CachedProgram& target_program, BaseBind
disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings));
}
- LabelGLObject(GL_PROGRAM, target_program->handle, addr, debug_name);
+ LabelGLObject(GL_PROGRAM, target_program->handle, cpu_addr, debug_name);
return target_program->handle;
};
@@ -486,29 +485,32 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
return last_shaders[static_cast<u32>(program)];
}
- const VAddr program_addr{GetShaderAddress(program)};
+ auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
+ const GPUVAddr program_addr{GetShaderAddress(program)};
// Look up shader in the cache based on address
- Shader shader{TryGet(program_addr)};
+ const auto& host_ptr{memory_manager.GetPointer(program_addr)};
+ Shader shader{TryGet(host_ptr)};
if (!shader) {
// No shader found - create a new one
- ProgramCode program_code = GetShaderCode(program_addr);
+ ProgramCode program_code{GetShaderCode(host_ptr)};
ProgramCode program_code_b;
if (program == Maxwell::ShaderProgram::VertexA) {
- program_code_b = GetShaderCode(GetShaderAddress(Maxwell::ShaderProgram::VertexB));
+ program_code_b = GetShaderCode(
+ memory_manager.GetPointer(GetShaderAddress(Maxwell::ShaderProgram::VertexB)));
}
const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b);
-
+ const VAddr cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)};
const auto found = precompiled_shaders.find(unique_identifier);
if (found != precompiled_shaders.end()) {
shader =
- std::make_shared<CachedShader>(program_addr, unique_identifier, program, disk_cache,
- precompiled_programs, found->second);
+ std::make_shared<CachedShader>(cpu_addr, unique_identifier, program, disk_cache,
+ precompiled_programs, found->second, host_ptr);
} else {
shader = std::make_shared<CachedShader>(
- program_addr, unique_identifier, program, disk_cache, precompiled_programs,
- std::move(program_code), std::move(program_code_b));
+ cpu_addr, unique_identifier, program, disk_cache, precompiled_programs,
+ std::move(program_code), std::move(program_code_b), host_ptr);
}
Register(shader);
}
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index 97eed192f..fd1c85115 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -39,18 +39,18 @@ using PrecompiledShaders = std::unordered_map<u64, GLShader::ProgramResult>;
class CachedShader final : public RasterizerCacheObject {
public:
- explicit CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type,
- ShaderDiskCacheOpenGL& disk_cache,
+ explicit CachedShader(VAddr cpu_addr, u64 unique_identifier,
+ Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
const PrecompiledPrograms& precompiled_programs,
- ProgramCode&& program_code, ProgramCode&& program_code_b);
+ ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr);
- explicit CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type,
- ShaderDiskCacheOpenGL& disk_cache,
+ explicit CachedShader(VAddr cpu_addr, u64 unique_identifier,
+ Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
const PrecompiledPrograms& precompiled_programs,
- GLShader::ProgramResult result);
+ GLShader::ProgramResult result, u8* host_ptr);
- VAddr GetAddr() const override {
- return addr;
+ VAddr GetCpuAddr() const override {
+ return cpu_addr;
}
std::size_t GetSizeInBytes() const override {
@@ -91,7 +91,8 @@ private:
ShaderDiskCacheUsage GetUsage(GLenum primitive_mode, BaseBindings base_bindings) const;
- VAddr addr{};
+ u8* host_ptr{};
+ VAddr cpu_addr{};
u64 unique_identifier{};
Maxwell::ShaderProgram program_type{};
ShaderDiskCacheOpenGL& disk_cache;
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 8b510b6ae..5e3d862c6 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -164,12 +164,13 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
// Reset the screen info's display texture to its own permanent texture
screen_info.display_texture = screen_info.texture.resource.handle;
- Memory::RasterizerFlushVirtualRegion(framebuffer_addr, size_in_bytes,
- Memory::FlushMode::Flush);
+ rasterizer->FlushRegion(ToCacheAddr(Memory::GetPointer(framebuffer_addr)), size_in_bytes);
- VideoCore::MortonCopyPixels128(framebuffer.width, framebuffer.height, bytes_per_pixel, 4,
- Memory::GetPointer(framebuffer_addr),
- gl_framebuffer_data.data(), true);
+ constexpr u32 linear_bpp = 4;
+ VideoCore::MortonCopyPixels128(VideoCore::MortonSwizzleMode::MortonToLinear,
+ framebuffer.width, framebuffer.height, bytes_per_pixel,
+ linear_bpp, Memory::GetPointer(framebuffer_addr),
+ gl_framebuffer_data.data());
glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride));
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 4a33a6c84..eac51ecb3 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -17,6 +17,11 @@
namespace Vulkan {
+CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, u64 offset,
+ std::size_t alignment, u8* host_ptr)
+ : cpu_addr{cpu_addr}, size{size}, offset{offset}, alignment{alignment}, RasterizerCacheObject{
+ host_ptr} {}
+
VKBufferCache::VKBufferCache(Tegra::MemoryManager& tegra_memory_manager,
VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size)
@@ -34,19 +39,20 @@ VKBufferCache::VKBufferCache(Tegra::MemoryManager& tegra_memory_manager,
VKBufferCache::~VKBufferCache() = default;
-u64 VKBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, u64 alignment,
- bool cache) {
+u64 VKBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, u64 alignment, bool cache) {
const auto cpu_addr{tegra_memory_manager.GpuToCpuAddress(gpu_addr)};
- ASSERT(cpu_addr);
+ ASSERT_MSG(cpu_addr, "Invalid GPU address");
// Cache management is a big overhead, so only cache entries with a given size.
// TODO: Figure out which size is the best for given games.
cache &= size >= 2048;
+ const auto& host_ptr{Memory::GetPointer(*cpu_addr)};
if (cache) {
- if (auto entry = TryGet(*cpu_addr); entry) {
- if (entry->size >= size && entry->alignment == alignment) {
- return entry->offset;
+ auto entry = TryGet(host_ptr);
+ if (entry) {
+ if (entry->GetSize() >= size && entry->GetAlignment() == alignment) {
+ return entry->GetOffset();
}
Unregister(entry);
}
@@ -55,17 +61,17 @@ u64 VKBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, u64
AlignBuffer(alignment);
const u64 uploaded_offset = buffer_offset;
- Memory::ReadBlock(*cpu_addr, buffer_ptr, size);
+ if (!host_ptr) {
+ return uploaded_offset;
+ }
+ std::memcpy(buffer_ptr, host_ptr, size);
buffer_ptr += size;
buffer_offset += size;
if (cache) {
- auto entry = std::make_shared<CachedBufferEntry>();
- entry->offset = uploaded_offset;
- entry->size = size;
- entry->alignment = alignment;
- entry->addr = *cpu_addr;
+ auto entry = std::make_shared<CachedBufferEntry>(*cpu_addr, size, uploaded_offset,
+ alignment, host_ptr);
Register(entry);
}
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index d8e916f31..08b786aad 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -24,22 +24,39 @@ class VKFence;
class VKMemoryManager;
class VKStreamBuffer;
-struct CachedBufferEntry final : public RasterizerCacheObject {
- VAddr GetAddr() const override {
- return addr;
+class CachedBufferEntry final : public RasterizerCacheObject {
+public:
+ explicit CachedBufferEntry(VAddr cpu_addr, std::size_t size, u64 offset, std::size_t alignment,
+ u8* host_ptr);
+
+ VAddr GetCpuAddr() const override {
+ return cpu_addr;
}
std::size_t GetSizeInBytes() const override {
return size;
}
+ std::size_t GetSize() const {
+ return size;
+ }
+
+ u64 GetOffset() const {
+ return offset;
+ }
+
+ std::size_t GetAlignment() const {
+ return alignment;
+ }
+
// We do not have to flush this cache as things in it are never modified by us.
void Flush() override {}
- VAddr addr;
- std::size_t size;
- u64 offset;
- std::size_t alignment;
+private:
+ VAddr cpu_addr{};
+ std::size_t size{};
+ u64 offset{};
+ std::size_t alignment{};
};
class VKBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> {
@@ -51,8 +68,7 @@ public:
/// Uploads data from a guest GPU address. Returns host's buffer offset where it's been
/// allocated.
- u64 UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, u64 alignment = 4,
- bool cache = true);
+ u64 UploadMemory(GPUVAddr gpu_addr, std::size_t size, u64 alignment = 4, bool cache = true);
/// Uploads from a host memory. Returns host's buffer offset where it's been allocated.
u64 UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment = 4);
diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
new file mode 100644
index 000000000..ed3178f09
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
@@ -0,0 +1,81 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <cstring>
+#include <optional>
+#include <unordered_map>
+
+#include "common/assert.h"
+#include "common/cityhash.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/maxwell_to_vk.h"
+#include "video_core/renderer_vulkan/vk_sampler_cache.h"
+#include "video_core/textures/texture.h"
+
+namespace Vulkan {
+
+static std::optional<vk::BorderColor> TryConvertBorderColor(std::array<float, 4> color) {
+ // TODO(Rodrigo): Manage integer border colors
+ if (color == std::array<float, 4>{0, 0, 0, 0}) {
+ return vk::BorderColor::eFloatTransparentBlack;
+ } else if (color == std::array<float, 4>{0, 0, 0, 1}) {
+ return vk::BorderColor::eFloatOpaqueBlack;
+ } else if (color == std::array<float, 4>{1, 1, 1, 1}) {
+ return vk::BorderColor::eFloatOpaqueWhite;
+ } else {
+ return {};
+ }
+}
+
+std::size_t SamplerCacheKey::Hash() const {
+ static_assert(sizeof(raw) % sizeof(u64) == 0);
+ return static_cast<std::size_t>(
+ Common::CityHash64(reinterpret_cast<const char*>(raw.data()), sizeof(raw) / sizeof(u64)));
+}
+
+bool SamplerCacheKey::operator==(const SamplerCacheKey& rhs) const {
+ return raw == rhs.raw;
+}
+
+VKSamplerCache::VKSamplerCache(const VKDevice& device) : device{device} {}
+
+VKSamplerCache::~VKSamplerCache() = default;
+
+vk::Sampler VKSamplerCache::GetSampler(const Tegra::Texture::TSCEntry& tsc) {
+ const auto [entry, is_cache_miss] = cache.try_emplace(SamplerCacheKey{tsc});
+ auto& sampler = entry->second;
+ if (is_cache_miss) {
+ sampler = CreateSampler(tsc);
+ }
+ return *sampler;
+}
+
+UniqueSampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) {
+ const float max_anisotropy = tsc.GetMaxAnisotropy();
+ const bool has_anisotropy = max_anisotropy > 1.0f;
+
+ const auto border_color = tsc.GetBorderColor();
+ const auto vk_border_color = TryConvertBorderColor(border_color);
+ UNIMPLEMENTED_IF_MSG(!vk_border_color, "Unimplemented border color {} {} {} {}",
+ border_color[0], border_color[1], border_color[2], border_color[3]);
+
+ constexpr bool unnormalized_coords = false;
+
+ const vk::SamplerCreateInfo sampler_ci(
+ {}, MaxwellToVK::Sampler::Filter(tsc.mag_filter),
+ MaxwellToVK::Sampler::Filter(tsc.min_filter),
+ MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter),
+ MaxwellToVK::Sampler::WrapMode(tsc.wrap_u), MaxwellToVK::Sampler::WrapMode(tsc.wrap_v),
+ MaxwellToVK::Sampler::WrapMode(tsc.wrap_p), tsc.GetLodBias(), has_anisotropy,
+ max_anisotropy, tsc.depth_compare_enabled,
+ MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func), tsc.GetMinLod(),
+ tsc.GetMaxLod(), vk_border_color.value_or(vk::BorderColor::eFloatTransparentBlack),
+ unnormalized_coords);
+
+ const auto& dld = device.GetDispatchLoader();
+ const auto dev = device.GetLogical();
+ return dev.createSamplerUnique(sampler_ci, nullptr, dld);
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.h b/src/video_core/renderer_vulkan/vk_sampler_cache.h
new file mode 100644
index 000000000..c6394dc87
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_sampler_cache.h
@@ -0,0 +1,56 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <unordered_map>
+
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/textures/texture.h"
+
+namespace Vulkan {
+
+class VKDevice;
+
+struct SamplerCacheKey final : public Tegra::Texture::TSCEntry {
+ std::size_t Hash() const;
+
+ bool operator==(const SamplerCacheKey& rhs) const;
+
+ bool operator!=(const SamplerCacheKey& rhs) const {
+ return !operator==(rhs);
+ }
+};
+
+} // namespace Vulkan
+
+namespace std {
+
+template <>
+struct hash<Vulkan::SamplerCacheKey> {
+ std::size_t operator()(const Vulkan::SamplerCacheKey& k) const noexcept {
+ return k.Hash();
+ }
+};
+
+} // namespace std
+
+namespace Vulkan {
+
+class VKSamplerCache {
+public:
+ explicit VKSamplerCache(const VKDevice& device);
+ ~VKSamplerCache();
+
+ vk::Sampler GetSampler(const Tegra::Texture::TSCEntry& tsc);
+
+private:
+ UniqueSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc);
+
+ const VKDevice& device;
+ std::unordered_map<SamplerCacheKey, UniqueSampler> cache;
+};
+
+} // namespace Vulkan
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index cad7340f5..995d0e068 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -6,7 +6,6 @@
#include <cstring>
#include "common/alignment.h"
#include "common/assert.h"
-#include "core/memory.h"
#include "video_core/gpu.h"
#include "video_core/textures/decoders.h"
#include "video_core/textures/texture.h"
@@ -230,18 +229,18 @@ u32 BytesPerPixel(TextureFormat format) {
}
}
-void UnswizzleTexture(u8* const unswizzled_data, VAddr address, u32 tile_size_x, u32 tile_size_y,
+void UnswizzleTexture(u8* const unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y,
u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height,
u32 block_depth, u32 width_spacing) {
CopySwizzledData((width + tile_size_x - 1) / tile_size_x,
(height + tile_size_y - 1) / tile_size_y, depth, bytes_per_pixel,
- bytes_per_pixel, Memory::GetPointer(address), unswizzled_data, true,
- block_height, block_depth, width_spacing);
+ bytes_per_pixel, address, unswizzled_data, true, block_height, block_depth,
+ width_spacing);
}
-std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y,
- u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
- u32 block_height, u32 block_depth, u32 width_spacing) {
+std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, u32 bytes_per_pixel,
+ u32 width, u32 height, u32 depth, u32 block_height,
+ u32 block_depth, u32 width_spacing) {
std::vector<u8> unswizzled_data(width * height * depth * bytes_per_pixel);
UnswizzleTexture(unswizzled_data.data(), address, tile_size_x, tile_size_y, bytes_per_pixel,
width, height, depth, block_height, block_depth, width_spacing);
@@ -249,8 +248,7 @@ std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y
}
void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
- u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data,
- u32 block_height) {
+ u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height) {
const u32 image_width_in_gobs{(swizzled_width * bytes_per_pixel + (gob_size_x - 1)) /
gob_size_x};
for (u32 line = 0; line < subrect_height; ++line) {
@@ -262,17 +260,17 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32
const u32 gob_address =
gob_address_y + (x * bytes_per_pixel / gob_size_x) * gob_size * block_height;
const u32 swizzled_offset = gob_address + table[(x * bytes_per_pixel) % gob_size_x];
- const VAddr source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel;
- const VAddr dest_addr = swizzled_data + swizzled_offset;
+ u8* source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel;
+ u8* dest_addr = swizzled_data + swizzled_offset;
- Memory::CopyBlock(dest_addr, source_line, bytes_per_pixel);
+ std::memcpy(dest_addr, source_line, bytes_per_pixel);
}
}
}
void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width,
- u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data,
- u32 block_height, u32 offset_x, u32 offset_y) {
+ u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height,
+ u32 offset_x, u32 offset_y) {
for (u32 line = 0; line < subrect_height; ++line) {
const u32 y2 = line + offset_y;
const u32 gob_address_y = (y2 / (gob_size_y * block_height)) * gob_size * block_height +
@@ -282,10 +280,10 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32
const u32 x2 = (x + offset_x) * bytes_per_pixel;
const u32 gob_address = gob_address_y + (x2 / gob_size_x) * gob_size * block_height;
const u32 swizzled_offset = gob_address + table[x2 % gob_size_x];
- const VAddr dest_line = unswizzled_data + line * dest_pitch + x * bytes_per_pixel;
- const VAddr source_addr = swizzled_data + swizzled_offset;
+ u8* dest_line = unswizzled_data + line * dest_pitch + x * bytes_per_pixel;
+ u8* source_addr = swizzled_data + swizzled_offset;
- Memory::CopyBlock(dest_line, source_addr, bytes_per_pixel);
+ std::memcpy(dest_line, source_addr, bytes_per_pixel);
}
}
}
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h
index 65df86890..e078fa274 100644
--- a/src/video_core/textures/decoders.h
+++ b/src/video_core/textures/decoders.h
@@ -17,14 +17,14 @@ inline std::size_t GetGOBSize() {
}
/// Unswizzles a swizzled texture without changing its format.
-void UnswizzleTexture(u8* unswizzled_data, VAddr address, u32 tile_size_x, u32 tile_size_y,
+void UnswizzleTexture(u8* unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y,
u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
u32 block_height = TICEntry::DefaultBlockHeight,
u32 block_depth = TICEntry::DefaultBlockHeight, u32 width_spacing = 0);
/// Unswizzles a swizzled texture without changing its format.
-std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y,
- u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
+std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, u32 bytes_per_pixel,
+ u32 width, u32 height, u32 depth,
u32 block_height = TICEntry::DefaultBlockHeight,
u32 block_depth = TICEntry::DefaultBlockHeight,
u32 width_spacing = 0);
@@ -44,12 +44,11 @@ std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height
/// Copies an untiled subrectangle into a tiled surface.
void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
- u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data,
- u32 block_height);
+ u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height);
/// Copies a tiled subrectangle into a linear surface.
void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width,
- u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data,
- u32 block_height, u32 offset_x, u32 offset_y);
+ u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height,
+ u32 offset_x, u32 offset_y);
} // namespace Tegra::Texture
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index 8c278c0e2..93ecc6e31 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -283,31 +283,36 @@ enum class TextureMipmapFilter : u32 {
struct TSCEntry {
union {
- BitField<0, 3, WrapMode> wrap_u;
- BitField<3, 3, WrapMode> wrap_v;
- BitField<6, 3, WrapMode> wrap_p;
- BitField<9, 1, u32> depth_compare_enabled;
- BitField<10, 3, DepthCompareFunc> depth_compare_func;
- BitField<13, 1, u32> srgb_conversion;
- BitField<20, 3, u32> max_anisotropy;
+ struct {
+ union {
+ BitField<0, 3, WrapMode> wrap_u;
+ BitField<3, 3, WrapMode> wrap_v;
+ BitField<6, 3, WrapMode> wrap_p;
+ BitField<9, 1, u32> depth_compare_enabled;
+ BitField<10, 3, DepthCompareFunc> depth_compare_func;
+ BitField<13, 1, u32> srgb_conversion;
+ BitField<20, 3, u32> max_anisotropy;
+ };
+ union {
+ BitField<0, 2, TextureFilter> mag_filter;
+ BitField<4, 2, TextureFilter> min_filter;
+ BitField<6, 2, TextureMipmapFilter> mipmap_filter;
+ BitField<9, 1, u32> cubemap_interface_filtering;
+ BitField<12, 13, u32> mip_lod_bias;
+ };
+ union {
+ BitField<0, 12, u32> min_lod_clamp;
+ BitField<12, 12, u32> max_lod_clamp;
+ BitField<24, 8, u32> srgb_border_color_r;
+ };
+ union {
+ BitField<12, 8, u32> srgb_border_color_g;
+ BitField<20, 8, u32> srgb_border_color_b;
+ };
+ std::array<f32, 4> border_color;
+ };
+ std::array<u8, 0x20> raw;
};
- union {
- BitField<0, 2, TextureFilter> mag_filter;
- BitField<4, 2, TextureFilter> min_filter;
- BitField<6, 2, TextureMipmapFilter> mipmap_filter;
- BitField<9, 1, u32> cubemap_interface_filtering;
- BitField<12, 13, u32> mip_lod_bias;
- };
- union {
- BitField<0, 12, u32> min_lod_clamp;
- BitField<12, 12, u32> max_lod_clamp;
- BitField<24, 8, u32> srgb_border_color_r;
- };
- union {
- BitField<12, 8, u32> srgb_border_color_g;
- BitField<20, 8, u32> srgb_border_color_b;
- };
- std::array<f32, 4> border_color;
float GetMaxAnisotropy() const {
return static_cast<float>(1U << max_anisotropy);
@@ -324,7 +329,7 @@ struct TSCEntry {
float GetLodBias() const {
// Sign extend the 13-bit value.
constexpr u32 mask = 1U << (13 - 1);
- return static_cast<float>((mip_lod_bias ^ mask) - mask) / 256.0f;
+ return static_cast<s32>((mip_lod_bias ^ mask) - mask) / 256.0f;
}
std::array<float, 4> GetBorderColor() const {