summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/CMakeLists.txt1
-rw-r--r--src/video_core/dirty_flags.cpp2
-rw-r--r--src/video_core/engines/maxwell_3d.cpp8
-rw-r--r--src/video_core/engines/maxwell_3d.h26
-rw-r--r--src/video_core/engines/maxwell_dma.cpp127
-rw-r--r--src/video_core/engines/maxwell_dma.h2
-rw-r--r--src/video_core/host1x/syncpoint_manager.cpp12
-rw-r--r--src/video_core/memory_manager.cpp61
-rw-r--r--src/video_core/memory_manager.h21
-rw-r--r--src/video_core/pte_kind.h264
-rw-r--r--src/video_core/renderer_base.cpp8
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp17
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_state_tracker.cpp14
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.cpp10
-rw-r--r--src/video_core/renderer_vulkan/vk_query_cache.cpp7
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp14
-rw-r--r--src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp84
-rw-r--r--src/video_core/renderer_vulkan/vk_staging_buffer_pool.h3
-rw-r--r--src/video_core/renderer_vulkan/vk_state_tracker.cpp12
-rw-r--r--src/video_core/texture_cache/descriptor_table.h2
-rw-r--r--src/video_core/texture_cache/format_lookup_table.cpp2
-rw-r--r--src/video_core/texture_cache/texture_cache.h8
23 files changed, 520 insertions, 189 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 40e6d1ec4..cb8b46edf 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -82,6 +82,7 @@ add_library(video_core STATIC
gpu_thread.h
memory_manager.cpp
memory_manager.h
+ pte_kind.h
query_cache.h
rasterizer_accelerated.cpp
rasterizer_accelerated.h
diff --git a/src/video_core/dirty_flags.cpp b/src/video_core/dirty_flags.cpp
index 1039e036f..c2ecc12f5 100644
--- a/src/video_core/dirty_flags.cpp
+++ b/src/video_core/dirty_flags.cpp
@@ -61,7 +61,7 @@ void SetupDirtyRenderTargets(Maxwell3D::DirtyState::Tables& tables) {
}
void SetupDirtyShaders(Maxwell3D::DirtyState::Tables& tables) {
- FillBlock(tables[0], OFF(pipelines), NUM(pipelines) * Maxwell3D::Regs::MaxShaderProgram,
+ FillBlock(tables[0], OFF(pipelines), NUM(pipelines[0]) * Maxwell3D::Regs::MaxShaderProgram,
Shaders);
}
} // Anonymous namespace
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index fdf470913..b1a22b76c 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -74,15 +74,15 @@ void Maxwell3D::InitializeRegisterDefaults() {
regs.stencil_front_op.zfail = Regs::StencilOp::Op::Keep_D3D;
regs.stencil_front_op.zpass = Regs::StencilOp::Op::Keep_D3D;
regs.stencil_front_op.func = Regs::ComparisonOp::Always_GL;
- regs.stencil_front_func.func_mask = 0xFFFFFFFF;
- regs.stencil_front_func.mask = 0xFFFFFFFF;
+ regs.stencil_front_func_mask = 0xFFFFFFFF;
+ regs.stencil_front_mask = 0xFFFFFFFF;
regs.stencil_two_side_enable = 1;
regs.stencil_back_op.fail = Regs::StencilOp::Op::Keep_D3D;
regs.stencil_back_op.zfail = Regs::StencilOp::Op::Keep_D3D;
regs.stencil_back_op.zpass = Regs::StencilOp::Op::Keep_D3D;
regs.stencil_back_op.func = Regs::ComparisonOp::Always_GL;
- regs.stencil_back_func.func_mask = 0xFFFFFFFF;
- regs.stencil_back_func.mask = 0xFFFFFFFF;
+ regs.stencil_back_func_mask = 0xFFFFFFFF;
+ regs.stencil_back_mask = 0xFFFFFFFF;
regs.depth_test_func = Regs::ComparisonOp::Always_GL;
regs.gl_front_face = Regs::FrontFace::CounterClockWise;
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index efe1073b0..75e3b868d 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -390,7 +390,7 @@ public:
FractionalEven = 2,
};
- enum class OutputPrimitves : u32 {
+ enum class OutputPrimitives : u32 {
Points = 0,
Lines = 1,
Triangles_CW = 2,
@@ -401,7 +401,7 @@ public:
union {
BitField<0, 2, DomainType> domain_type;
BitField<4, 2, Spacing> spacing;
- BitField<8, 2, OutputPrimitves> output_primitives;
+ BitField<8, 2, OutputPrimitives> output_primitives;
};
} params;
@@ -1795,12 +1795,6 @@ public:
ComparisonOp func;
};
- struct StencilFunc {
- s32 ref;
- u32 func_mask;
- u32 mask;
- };
-
struct PsSaturate {
// Opposite of DepthMode
enum class Depth : u32 {
@@ -2737,7 +2731,9 @@ public:
u32 post_z_pixel_imask; ///< 0x0F1C
INSERT_PADDING_BYTES_NOINIT(0x20);
ConstantColorRendering const_color_rendering; ///< 0x0F40
- StencilFunc stencil_back_func; ///< 0x0F54
+ s32 stencil_back_ref; ///< 0x0F54
+ u32 stencil_back_mask; ///< 0x0F58
+ u32 stencil_back_func_mask; ///< 0x0F5C
INSERT_PADDING_BYTES_NOINIT(0x24);
VertexStreamSubstitute vertex_stream_substitute; ///< 0x0F84
u32 line_mode_clip_generated_edge_do_not_draw; ///< 0x0F8C
@@ -2855,7 +2851,9 @@ public:
Blend blend; ///< 0x133C
u32 stencil_enable; ///< 0x1380
StencilOp stencil_front_op; ///< 0x1384
- StencilFunc stencil_front_func; ///< 0x1394
+ s32 stencil_front_ref; ///< 0x1394
+ s32 stencil_front_func_mask; ///< 0x1398
+ s32 stencil_front_mask; ///< 0x139C
INSERT_PADDING_BYTES_NOINIT(0x4);
u32 draw_auto_start_byte_count; ///< 0x13A4
PsSaturate frag_color_clamp; ///< 0x13A8
@@ -3311,7 +3309,9 @@ ASSERT_REG_POSITION(vpc_perf, 0x0F14);
ASSERT_REG_POSITION(pm_local_trigger, 0x0F18);
ASSERT_REG_POSITION(post_z_pixel_imask, 0x0F1C);
ASSERT_REG_POSITION(const_color_rendering, 0x0F40);
-ASSERT_REG_POSITION(stencil_back_func, 0x0F54);
+ASSERT_REG_POSITION(stencil_back_ref, 0x0F54);
+ASSERT_REG_POSITION(stencil_back_mask, 0x0F58);
+ASSERT_REG_POSITION(stencil_back_func_mask, 0x0F5C);
ASSERT_REG_POSITION(vertex_stream_substitute, 0x0F84);
ASSERT_REG_POSITION(line_mode_clip_generated_edge_do_not_draw, 0x0F8C);
ASSERT_REG_POSITION(color_mask_common, 0x0F90);
@@ -3416,7 +3416,9 @@ ASSERT_REG_POSITION(invalidate_texture_data_cache_lines, 0x1338);
ASSERT_REG_POSITION(blend, 0x133C);
ASSERT_REG_POSITION(stencil_enable, 0x1380);
ASSERT_REG_POSITION(stencil_front_op, 0x1384);
-ASSERT_REG_POSITION(stencil_front_func, 0x1394);
+ASSERT_REG_POSITION(stencil_front_ref, 0x1394);
+ASSERT_REG_POSITION(stencil_front_func_mask, 0x1398);
+ASSERT_REG_POSITION(stencil_front_mask, 0x139C);
ASSERT_REG_POSITION(draw_auto_start_byte_count, 0x13A4);
ASSERT_REG_POSITION(frag_color_clamp, 0x13A8);
ASSERT_REG_POSITION(window_origin, 0x13AC);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 3909d36c1..4eb7a100d 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -56,66 +56,85 @@ void MaxwellDMA::Launch() {
ASSERT(launch.interrupt_type == LaunchDMA::InterruptType::NONE);
ASSERT(launch.data_transfer_type == LaunchDMA::DataTransferType::NON_PIPELINED);
- const bool is_src_pitch = launch.src_memory_layout == LaunchDMA::MemoryLayout::PITCH;
- const bool is_dst_pitch = launch.dst_memory_layout == LaunchDMA::MemoryLayout::PITCH;
-
- if (!is_src_pitch && !is_dst_pitch) {
- // If both the source and the destination are in block layout, assert.
- UNIMPLEMENTED_MSG("Tiled->Tiled DMA transfers are not yet implemented");
- return;
- }
+ if (launch.multi_line_enable) {
+ const bool is_src_pitch = launch.src_memory_layout == LaunchDMA::MemoryLayout::PITCH;
+ const bool is_dst_pitch = launch.dst_memory_layout == LaunchDMA::MemoryLayout::PITCH;
+
+ if (!is_src_pitch && !is_dst_pitch) {
+ // If both the source and the destination are in block layout, assert.
+ UNIMPLEMENTED_MSG("Tiled->Tiled DMA transfers are not yet implemented");
+ return;
+ }
- if (is_src_pitch && is_dst_pitch) {
- CopyPitchToPitch();
+ if (is_src_pitch && is_dst_pitch) {
+ for (u32 line = 0; line < regs.line_count; ++line) {
+ const GPUVAddr source_line =
+ regs.offset_in + static_cast<size_t>(line) * regs.pitch_in;
+ const GPUVAddr dest_line =
+ regs.offset_out + static_cast<size_t>(line) * regs.pitch_out;
+ memory_manager.CopyBlock(dest_line, source_line, regs.line_length_in);
+ }
+ } else {
+ if (!is_src_pitch && is_dst_pitch) {
+ CopyBlockLinearToPitch();
+ } else {
+ CopyPitchToBlockLinear();
+ }
+ }
} else {
- ASSERT(launch.multi_line_enable == 1);
-
- if (!is_src_pitch && is_dst_pitch) {
- CopyBlockLinearToPitch();
+ // TODO: allow multisized components.
+ auto& accelerate = rasterizer->AccessAccelerateDMA();
+ const bool is_const_a_dst = regs.remap_const.dst_x == RemapConst::Swizzle::CONST_A;
+ if (regs.launch_dma.remap_enable != 0 && is_const_a_dst) {
+ ASSERT(regs.remap_const.component_size_minus_one == 3);
+ accelerate.BufferClear(regs.offset_out, regs.line_length_in, regs.remap_consta_value);
+ std::vector<u32> tmp_buffer(regs.line_length_in, regs.remap_consta_value);
+ memory_manager.WriteBlockUnsafe(regs.offset_out,
+ reinterpret_cast<u8*>(tmp_buffer.data()),
+ regs.line_length_in * sizeof(u32));
} else {
- CopyPitchToBlockLinear();
+ auto convert_linear_2_blocklinear_addr = [](u64 address) {
+ return (address & ~0x1f0ULL) | ((address & 0x40) >> 2) | ((address & 0x10) << 1) |
+ ((address & 0x180) >> 1) | ((address & 0x20) << 3);
+ };
+ auto src_kind = memory_manager.GetPageKind(regs.offset_in);
+ auto dst_kind = memory_manager.GetPageKind(regs.offset_out);
+ const bool is_src_pitch = IsPitchKind(static_cast<PTEKind>(src_kind));
+ const bool is_dst_pitch = IsPitchKind(static_cast<PTEKind>(dst_kind));
+ if (!is_src_pitch && is_dst_pitch) {
+ std::vector<u8> tmp_buffer(regs.line_length_in);
+ std::vector<u8> dst_buffer(regs.line_length_in);
+ memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(),
+ regs.line_length_in);
+ for (u32 offset = 0; offset < regs.line_length_in; ++offset) {
+ dst_buffer[offset] =
+ tmp_buffer[convert_linear_2_blocklinear_addr(regs.offset_in + offset) -
+ regs.offset_in];
+ }
+ memory_manager.WriteBlock(regs.offset_out, dst_buffer.data(), regs.line_length_in);
+ } else if (is_src_pitch && !is_dst_pitch) {
+ std::vector<u8> tmp_buffer(regs.line_length_in);
+ std::vector<u8> dst_buffer(regs.line_length_in);
+ memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(),
+ regs.line_length_in);
+ for (u32 offset = 0; offset < regs.line_length_in; ++offset) {
+ dst_buffer[convert_linear_2_blocklinear_addr(regs.offset_out + offset) -
+ regs.offset_out] = tmp_buffer[offset];
+ }
+ memory_manager.WriteBlock(regs.offset_out, dst_buffer.data(), regs.line_length_in);
+ } else {
+ if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) {
+ std::vector<u8> tmp_buffer(regs.line_length_in);
+ memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(),
+ regs.line_length_in);
+ memory_manager.WriteBlock(regs.offset_out, tmp_buffer.data(),
+ regs.line_length_in);
+ }
+ }
}
}
- ReleaseSemaphore();
-}
-void MaxwellDMA::CopyPitchToPitch() {
- // When `multi_line_enable` bit is enabled we copy a 2D image of dimensions
- // (line_length_in, line_count).
- // Otherwise the copy is performed as if we were copying a 1D buffer of length line_length_in.
- const bool remap_enabled = regs.launch_dma.remap_enable != 0;
- if (regs.launch_dma.multi_line_enable) {
- UNIMPLEMENTED_IF(remap_enabled);
-
- // Perform a line-by-line copy.
- // We're going to take a subrect of size (line_length_in, line_count) from the source
- // rectangle. There is no need to manually flush/invalidate the regions because CopyBlock
- // does that for us.
- for (u32 line = 0; line < regs.line_count; ++line) {
- const GPUVAddr source_line = regs.offset_in + static_cast<size_t>(line) * regs.pitch_in;
- const GPUVAddr dest_line = regs.offset_out + static_cast<size_t>(line) * regs.pitch_out;
- memory_manager.CopyBlock(dest_line, source_line, regs.line_length_in);
- }
- return;
- }
- // TODO: allow multisized components.
- auto& accelerate = rasterizer->AccessAccelerateDMA();
- const bool is_const_a_dst = regs.remap_const.dst_x == RemapConst::Swizzle::CONST_A;
- const bool is_buffer_clear = remap_enabled && is_const_a_dst;
- if (is_buffer_clear) {
- ASSERT(regs.remap_const.component_size_minus_one == 3);
- accelerate.BufferClear(regs.offset_out, regs.line_length_in, regs.remap_consta_value);
- std::vector<u32> tmp_buffer(regs.line_length_in, regs.remap_consta_value);
- memory_manager.WriteBlockUnsafe(regs.offset_out, reinterpret_cast<u8*>(tmp_buffer.data()),
- regs.line_length_in * sizeof(u32));
- return;
- }
- UNIMPLEMENTED_IF(remap_enabled);
- if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) {
- std::vector<u8> tmp_buffer(regs.line_length_in);
- memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(), regs.line_length_in);
- memory_manager.WriteBlock(regs.offset_out, tmp_buffer.data(), regs.line_length_in);
- }
+ ReleaseSemaphore();
}
void MaxwellDMA::CopyBlockLinearToPitch() {
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index bc48320ce..953e34adc 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -219,8 +219,6 @@ private:
/// registers.
void Launch();
- void CopyPitchToPitch();
-
void CopyBlockLinearToPitch();
void CopyPitchToBlockLinear();
diff --git a/src/video_core/host1x/syncpoint_manager.cpp b/src/video_core/host1x/syncpoint_manager.cpp
index 326e8355a..a44fc83d3 100644
--- a/src/video_core/host1x/syncpoint_manager.cpp
+++ b/src/video_core/host1x/syncpoint_manager.cpp
@@ -36,7 +36,17 @@ SyncpointManager::ActionHandle SyncpointManager::RegisterAction(
void SyncpointManager::DeregisterAction(std::list<RegisteredAction>& action_storage,
ActionHandle& handle) {
std::unique_lock lk(guard);
- action_storage.erase(handle);
+
+ // We want to ensure the iterator still exists prior to erasing it
+ // Otherwise, if an invalid iterator was passed in then it could lead to UB
+ // It is important to avoid UB in that case since the deregister isn't called from a locked
+ // context
+ for (auto it = action_storage.begin(); it != action_storage.end(); it++) {
+ if (it == handle) {
+ action_storage.erase(it);
+ return;
+ }
+ }
}
void SyncpointManager::DeregisterGuestAction(u32 syncpoint_id, ActionHandle& handle) {
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index cca401c74..d07b21bd6 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -41,7 +41,11 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64
big_entries.resize(big_page_table_size / 32, 0);
big_page_table_cpu.resize(big_page_table_size);
big_page_continous.resize(big_page_table_size / continous_bits, 0);
+ std::array<PTEKind, 32> kind_valus;
+ kind_valus.fill(PTEKind::INVALID);
+ big_kinds.resize(big_page_table_size / 32, kind_valus);
entries.resize(page_table_size / 32, 0);
+ kinds.resize(big_page_table_size / 32, kind_valus);
}
MemoryManager::~MemoryManager() = default;
@@ -78,6 +82,41 @@ void MemoryManager::SetEntry(size_t position, MemoryManager::EntryType entry) {
}
}
+PTEKind MemoryManager::GetPageKind(GPUVAddr gpu_addr) const {
+ auto entry = GetEntry<true>(gpu_addr);
+ if (entry == EntryType::Mapped || entry == EntryType::Reserved) [[likely]] {
+ return GetKind<true>(gpu_addr);
+ } else {
+ return GetKind<false>(gpu_addr);
+ }
+}
+
+template <bool is_big_page>
+PTEKind MemoryManager::GetKind(size_t position) const {
+ if constexpr (is_big_page) {
+ position = position >> big_page_bits;
+ const size_t sub_index = position % 32;
+ return big_kinds[position / 32][sub_index];
+ } else {
+ position = position >> page_bits;
+ const size_t sub_index = position % 32;
+ return kinds[position / 32][sub_index];
+ }
+}
+
+template <bool is_big_page>
+void MemoryManager::SetKind(size_t position, PTEKind kind) {
+ if constexpr (is_big_page) {
+ position = position >> big_page_bits;
+ const size_t sub_index = position % 32;
+ big_kinds[position / 32][sub_index] = kind;
+ } else {
+ position = position >> page_bits;
+ const size_t sub_index = position % 32;
+ kinds[position / 32][sub_index] = kind;
+ }
+}
+
inline bool MemoryManager::IsBigPageContinous(size_t big_page_index) const {
const u64 entry_mask = big_page_continous[big_page_index / continous_bits];
const size_t sub_index = big_page_index % continous_bits;
@@ -92,8 +131,8 @@ inline void MemoryManager::SetBigPageContinous(size_t big_page_index, bool value
}
template <MemoryManager::EntryType entry_type>
-GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr,
- size_t size) {
+GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size,
+ PTEKind kind) {
u64 remaining_size{size};
if constexpr (entry_type == EntryType::Mapped) {
page_table.ReserveRange(gpu_addr, size);
@@ -102,6 +141,7 @@ GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cp
const GPUVAddr current_gpu_addr = gpu_addr + offset;
[[maybe_unused]] const auto current_entry_type = GetEntry<false>(current_gpu_addr);
SetEntry<false>(current_gpu_addr, entry_type);
+ SetKind<false>(current_gpu_addr, kind);
if (current_entry_type != entry_type) {
rasterizer->ModifyGPUMemory(unique_identifier, gpu_addr, page_size);
}
@@ -118,12 +158,13 @@ GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cp
template <MemoryManager::EntryType entry_type>
GPUVAddr MemoryManager::BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr,
- size_t size) {
+ size_t size, PTEKind kind) {
u64 remaining_size{size};
for (u64 offset{}; offset < size; offset += big_page_size) {
const GPUVAddr current_gpu_addr = gpu_addr + offset;
[[maybe_unused]] const auto current_entry_type = GetEntry<true>(current_gpu_addr);
SetEntry<true>(current_gpu_addr, entry_type);
+ SetKind<true>(current_gpu_addr, kind);
if (current_entry_type != entry_type) {
rasterizer->ModifyGPUMemory(unique_identifier, gpu_addr, big_page_size);
}
@@ -159,19 +200,19 @@ void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_)
rasterizer = rasterizer_;
}
-GPUVAddr MemoryManager::Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size,
+GPUVAddr MemoryManager::Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size, PTEKind kind,
bool is_big_pages) {
if (is_big_pages) [[likely]] {
- return BigPageTableOp<EntryType::Mapped>(gpu_addr, cpu_addr, size);
+ return BigPageTableOp<EntryType::Mapped>(gpu_addr, cpu_addr, size, kind);
}
- return PageTableOp<EntryType::Mapped>(gpu_addr, cpu_addr, size);
+ return PageTableOp<EntryType::Mapped>(gpu_addr, cpu_addr, size, kind);
}
GPUVAddr MemoryManager::MapSparse(GPUVAddr gpu_addr, std::size_t size, bool is_big_pages) {
if (is_big_pages) [[likely]] {
- return BigPageTableOp<EntryType::Reserved>(gpu_addr, 0, size);
+ return BigPageTableOp<EntryType::Reserved>(gpu_addr, 0, size, PTEKind::INVALID);
}
- return PageTableOp<EntryType::Reserved>(gpu_addr, 0, size);
+ return PageTableOp<EntryType::Reserved>(gpu_addr, 0, size, PTEKind::INVALID);
}
void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
@@ -188,8 +229,8 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
rasterizer->UnmapMemory(*cpu_addr, map_size);
}
- BigPageTableOp<EntryType::Free>(gpu_addr, 0, size);
- PageTableOp<EntryType::Free>(gpu_addr, 0, size);
+ BigPageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID);
+ PageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID);
}
std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const {
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index f992e29f3..ab4bc9ec6 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -11,6 +11,7 @@
#include "common/common_types.h"
#include "common/multi_level_page_table.h"
#include "common/virtual_buffer.h"
+#include "video_core/pte_kind.h"
namespace VideoCore {
class RasterizerInterface;
@@ -98,7 +99,8 @@ public:
std::vector<std::pair<GPUVAddr, std::size_t>> GetSubmappedRange(GPUVAddr gpu_addr,
std::size_t size) const;
- GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size, bool is_big_pages = true);
+ GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size,
+ PTEKind kind = PTEKind::INVALID, bool is_big_pages = true);
GPUVAddr MapSparse(GPUVAddr gpu_addr, std::size_t size, bool is_big_pages = true);
void Unmap(GPUVAddr gpu_addr, std::size_t size);
@@ -114,6 +116,8 @@ public:
return gpu_addr < address_space_size;
}
+ PTEKind GetPageKind(GPUVAddr gpu_addr) const;
+
private:
template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped>
inline void MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size, FuncMapped&& func_mapped,
@@ -166,10 +170,12 @@ private:
std::vector<u64> big_entries;
template <EntryType entry_type>
- GPUVAddr PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size);
+ GPUVAddr PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size,
+ PTEKind kind);
template <EntryType entry_type>
- GPUVAddr BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size);
+ GPUVAddr BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size,
+ PTEKind kind);
template <bool is_big_page>
inline EntryType GetEntry(size_t position) const;
@@ -177,6 +183,15 @@ private:
template <bool is_big_page>
inline void SetEntry(size_t position, EntryType entry);
+ std::vector<std::array<PTEKind, 32>> kinds;
+ std::vector<std::array<PTEKind, 32>> big_kinds;
+
+ template <bool is_big_page>
+ inline PTEKind GetKind(size_t position) const;
+
+ template <bool is_big_page>
+ inline void SetKind(size_t position, PTEKind kind);
+
Common::MultiLevelPageTable<u32> page_table;
Common::VirtualBuffer<u32> big_page_table_cpu;
diff --git a/src/video_core/pte_kind.h b/src/video_core/pte_kind.h
new file mode 100644
index 000000000..591d7214b
--- /dev/null
+++ b/src/video_core/pte_kind.h
@@ -0,0 +1,264 @@
+// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#pragma once
+
+#include "common/common_types.h"
+
+namespace Tegra {
+
+// https://github.com/NVIDIA/open-gpu-doc/blob/master/manuals/volta/gv100/dev_mmu.ref.txt
+enum class PTEKind : u8 {
+ INVALID = 0xff,
+ PITCH = 0x00,
+ Z16 = 0x01,
+ Z16_2C = 0x02,
+ Z16_MS2_2C = 0x03,
+ Z16_MS4_2C = 0x04,
+ Z16_MS8_2C = 0x05,
+ Z16_MS16_2C = 0x06,
+ Z16_2Z = 0x07,
+ Z16_MS2_2Z = 0x08,
+ Z16_MS4_2Z = 0x09,
+ Z16_MS8_2Z = 0x0a,
+ Z16_MS16_2Z = 0x0b,
+ Z16_2CZ = 0x36,
+ Z16_MS2_2CZ = 0x37,
+ Z16_MS4_2CZ = 0x38,
+ Z16_MS8_2CZ = 0x39,
+ Z16_MS16_2CZ = 0x5f,
+ Z16_4CZ = 0x0c,
+ Z16_MS2_4CZ = 0x0d,
+ Z16_MS4_4CZ = 0x0e,
+ Z16_MS8_4CZ = 0x0f,
+ Z16_MS16_4CZ = 0x10,
+ S8Z24 = 0x11,
+ S8Z24_1Z = 0x12,
+ S8Z24_MS2_1Z = 0x13,
+ S8Z24_MS4_1Z = 0x14,
+ S8Z24_MS8_1Z = 0x15,
+ S8Z24_MS16_1Z = 0x16,
+ S8Z24_2CZ = 0x17,
+ S8Z24_MS2_2CZ = 0x18,
+ S8Z24_MS4_2CZ = 0x19,
+ S8Z24_MS8_2CZ = 0x1a,
+ S8Z24_MS16_2CZ = 0x1b,
+ S8Z24_2CS = 0x1c,
+ S8Z24_MS2_2CS = 0x1d,
+ S8Z24_MS4_2CS = 0x1e,
+ S8Z24_MS8_2CS = 0x1f,
+ S8Z24_MS16_2CS = 0x20,
+ S8Z24_4CSZV = 0x21,
+ S8Z24_MS2_4CSZV = 0x22,
+ S8Z24_MS4_4CSZV = 0x23,
+ S8Z24_MS8_4CSZV = 0x24,
+ S8Z24_MS16_4CSZV = 0x25,
+ V8Z24_MS4_VC12 = 0x26,
+ V8Z24_MS4_VC4 = 0x27,
+ V8Z24_MS8_VC8 = 0x28,
+ V8Z24_MS8_VC24 = 0x29,
+ V8Z24_MS4_VC12_1ZV = 0x2e,
+ V8Z24_MS4_VC4_1ZV = 0x2f,
+ V8Z24_MS8_VC8_1ZV = 0x30,
+ V8Z24_MS8_VC24_1ZV = 0x31,
+ V8Z24_MS4_VC12_2CS = 0x32,
+ V8Z24_MS4_VC4_2CS = 0x33,
+ V8Z24_MS8_VC8_2CS = 0x34,
+ V8Z24_MS8_VC24_2CS = 0x35,
+ V8Z24_MS4_VC12_2CZV = 0x3a,
+ V8Z24_MS4_VC4_2CZV = 0x3b,
+ V8Z24_MS8_VC8_2CZV = 0x3c,
+ V8Z24_MS8_VC24_2CZV = 0x3d,
+ V8Z24_MS4_VC12_2ZV = 0x3e,
+ V8Z24_MS4_VC4_2ZV = 0x3f,
+ V8Z24_MS8_VC8_2ZV = 0x40,
+ V8Z24_MS8_VC24_2ZV = 0x41,
+ V8Z24_MS4_VC12_4CSZV = 0x42,
+ V8Z24_MS4_VC4_4CSZV = 0x43,
+ V8Z24_MS8_VC8_4CSZV = 0x44,
+ V8Z24_MS8_VC24_4CSZV = 0x45,
+ Z24S8 = 0x46,
+ Z24S8_1Z = 0x47,
+ Z24S8_MS2_1Z = 0x48,
+ Z24S8_MS4_1Z = 0x49,
+ Z24S8_MS8_1Z = 0x4a,
+ Z24S8_MS16_1Z = 0x4b,
+ Z24S8_2CS = 0x4c,
+ Z24S8_MS2_2CS = 0x4d,
+ Z24S8_MS4_2CS = 0x4e,
+ Z24S8_MS8_2CS = 0x4f,
+ Z24S8_MS16_2CS = 0x50,
+ Z24S8_2CZ = 0x51,
+ Z24S8_MS2_2CZ = 0x52,
+ Z24S8_MS4_2CZ = 0x53,
+ Z24S8_MS8_2CZ = 0x54,
+ Z24S8_MS16_2CZ = 0x55,
+ Z24S8_4CSZV = 0x56,
+ Z24S8_MS2_4CSZV = 0x57,
+ Z24S8_MS4_4CSZV = 0x58,
+ Z24S8_MS8_4CSZV = 0x59,
+ Z24S8_MS16_4CSZV = 0x5a,
+ Z24V8_MS4_VC12 = 0x5b,
+ Z24V8_MS4_VC4 = 0x5c,
+ Z24V8_MS8_VC8 = 0x5d,
+ Z24V8_MS8_VC24 = 0x5e,
+ YUV_B8C1_2Y = 0x60,
+ YUV_B8C2_2Y = 0x61,
+ YUV_B10C1_2Y = 0x62,
+ YUV_B10C2_2Y = 0x6b,
+ YUV_B12C1_2Y = 0x6c,
+ YUV_B12C2_2Y = 0x6d,
+ Z24V8_MS4_VC12_1ZV = 0x63,
+ Z24V8_MS4_VC4_1ZV = 0x64,
+ Z24V8_MS8_VC8_1ZV = 0x65,
+ Z24V8_MS8_VC24_1ZV = 0x66,
+ Z24V8_MS4_VC12_2CS = 0x67,
+ Z24V8_MS4_VC4_2CS = 0x68,
+ Z24V8_MS8_VC8_2CS = 0x69,
+ Z24V8_MS8_VC24_2CS = 0x6a,
+ Z24V8_MS4_VC12_2CZV = 0x6f,
+ Z24V8_MS4_VC4_2CZV = 0x70,
+ Z24V8_MS8_VC8_2CZV = 0x71,
+ Z24V8_MS8_VC24_2CZV = 0x72,
+ Z24V8_MS4_VC12_2ZV = 0x73,
+ Z24V8_MS4_VC4_2ZV = 0x74,
+ Z24V8_MS8_VC8_2ZV = 0x75,
+ Z24V8_MS8_VC24_2ZV = 0x76,
+ Z24V8_MS4_VC12_4CSZV = 0x77,
+ Z24V8_MS4_VC4_4CSZV = 0x78,
+ Z24V8_MS8_VC8_4CSZV = 0x79,
+ Z24V8_MS8_VC24_4CSZV = 0x7a,
+ ZF32 = 0x7b,
+ ZF32_1Z = 0x7c,
+ ZF32_MS2_1Z = 0x7d,
+ ZF32_MS4_1Z = 0x7e,
+ ZF32_MS8_1Z = 0x7f,
+ ZF32_MS16_1Z = 0x80,
+ ZF32_2CS = 0x81,
+ ZF32_MS2_2CS = 0x82,
+ ZF32_MS4_2CS = 0x83,
+ ZF32_MS8_2CS = 0x84,
+ ZF32_MS16_2CS = 0x85,
+ ZF32_2CZ = 0x86,
+ ZF32_MS2_2CZ = 0x87,
+ ZF32_MS4_2CZ = 0x88,
+ ZF32_MS8_2CZ = 0x89,
+ ZF32_MS16_2CZ = 0x8a,
+ X8Z24_X16V8S8_MS4_VC12 = 0x8b,
+ X8Z24_X16V8S8_MS4_VC4 = 0x8c,
+ X8Z24_X16V8S8_MS8_VC8 = 0x8d,
+ X8Z24_X16V8S8_MS8_VC24 = 0x8e,
+ X8Z24_X16V8S8_MS4_VC12_1CS = 0x8f,
+ X8Z24_X16V8S8_MS4_VC4_1CS = 0x90,
+ X8Z24_X16V8S8_MS8_VC8_1CS = 0x91,
+ X8Z24_X16V8S8_MS8_VC24_1CS = 0x92,
+ X8Z24_X16V8S8_MS4_VC12_1ZV = 0x97,
+ X8Z24_X16V8S8_MS4_VC4_1ZV = 0x98,
+ X8Z24_X16V8S8_MS8_VC8_1ZV = 0x99,
+ X8Z24_X16V8S8_MS8_VC24_1ZV = 0x9a,
+ X8Z24_X16V8S8_MS4_VC12_1CZV = 0x9b,
+ X8Z24_X16V8S8_MS4_VC4_1CZV = 0x9c,
+ X8Z24_X16V8S8_MS8_VC8_1CZV = 0x9d,
+ X8Z24_X16V8S8_MS8_VC24_1CZV = 0x9e,
+ X8Z24_X16V8S8_MS4_VC12_2CS = 0x9f,
+ X8Z24_X16V8S8_MS4_VC4_2CS = 0xa0,
+ X8Z24_X16V8S8_MS8_VC8_2CS = 0xa1,
+ X8Z24_X16V8S8_MS8_VC24_2CS = 0xa2,
+ X8Z24_X16V8S8_MS4_VC12_2CSZV = 0xa3,
+ X8Z24_X16V8S8_MS4_VC4_2CSZV = 0xa4,
+ X8Z24_X16V8S8_MS8_VC8_2CSZV = 0xa5,
+ X8Z24_X16V8S8_MS8_VC24_2CSZV = 0xa6,
+ ZF32_X16V8S8_MS4_VC12 = 0xa7,
+ ZF32_X16V8S8_MS4_VC4 = 0xa8,
+ ZF32_X16V8S8_MS8_VC8 = 0xa9,
+ ZF32_X16V8S8_MS8_VC24 = 0xaa,
+ ZF32_X16V8S8_MS4_VC12_1CS = 0xab,
+ ZF32_X16V8S8_MS4_VC4_1CS = 0xac,
+ ZF32_X16V8S8_MS8_VC8_1CS = 0xad,
+ ZF32_X16V8S8_MS8_VC24_1CS = 0xae,
+ ZF32_X16V8S8_MS4_VC12_1ZV = 0xb3,
+ ZF32_X16V8S8_MS4_VC4_1ZV = 0xb4,
+ ZF32_X16V8S8_MS8_VC8_1ZV = 0xb5,
+ ZF32_X16V8S8_MS8_VC24_1ZV = 0xb6,
+ ZF32_X16V8S8_MS4_VC12_1CZV = 0xb7,
+ ZF32_X16V8S8_MS4_VC4_1CZV = 0xb8,
+ ZF32_X16V8S8_MS8_VC8_1CZV = 0xb9,
+ ZF32_X16V8S8_MS8_VC24_1CZV = 0xba,
+ ZF32_X16V8S8_MS4_VC12_2CS = 0xbb,
+ ZF32_X16V8S8_MS4_VC4_2CS = 0xbc,
+ ZF32_X16V8S8_MS8_VC8_2CS = 0xbd,
+ ZF32_X16V8S8_MS8_VC24_2CS = 0xbe,
+ ZF32_X16V8S8_MS4_VC12_2CSZV = 0xbf,
+ ZF32_X16V8S8_MS4_VC4_2CSZV = 0xc0,
+ ZF32_X16V8S8_MS8_VC8_2CSZV = 0xc1,
+ ZF32_X16V8S8_MS8_VC24_2CSZV = 0xc2,
+ ZF32_X24S8 = 0xc3,
+ ZF32_X24S8_1CS = 0xc4,
+ ZF32_X24S8_MS2_1CS = 0xc5,
+ ZF32_X24S8_MS4_1CS = 0xc6,
+ ZF32_X24S8_MS8_1CS = 0xc7,
+ ZF32_X24S8_MS16_1CS = 0xc8,
+ ZF32_X24S8_2CSZV = 0xce,
+ ZF32_X24S8_MS2_2CSZV = 0xcf,
+ ZF32_X24S8_MS4_2CSZV = 0xd0,
+ ZF32_X24S8_MS8_2CSZV = 0xd1,
+ ZF32_X24S8_MS16_2CSZV = 0xd2,
+ ZF32_X24S8_2CS = 0xd3,
+ ZF32_X24S8_MS2_2CS = 0xd4,
+ ZF32_X24S8_MS4_2CS = 0xd5,
+ ZF32_X24S8_MS8_2CS = 0xd6,
+ ZF32_X24S8_MS16_2CS = 0xd7,
+ S8 = 0x2a,
+ S8_2S = 0x2b,
+ GENERIC_16BX2 = 0xfe,
+ C32_2C = 0xd8,
+ C32_2CBR = 0xd9,
+ C32_2CBA = 0xda,
+ C32_2CRA = 0xdb,
+ C32_2BRA = 0xdc,
+ C32_MS2_2C = 0xdd,
+ C32_MS2_2CBR = 0xde,
+ C32_MS2_4CBRA = 0xcc,
+ C32_MS4_2C = 0xdf,
+ C32_MS4_2CBR = 0xe0,
+ C32_MS4_2CBA = 0xe1,
+ C32_MS4_2CRA = 0xe2,
+ C32_MS4_2BRA = 0xe3,
+ C32_MS4_4CBRA = 0x2c,
+ C32_MS8_MS16_2C = 0xe4,
+ C32_MS8_MS16_2CRA = 0xe5,
+ C64_2C = 0xe6,
+ C64_2CBR = 0xe7,
+ C64_2CBA = 0xe8,
+ C64_2CRA = 0xe9,
+ C64_2BRA = 0xea,
+ C64_MS2_2C = 0xeb,
+ C64_MS2_2CBR = 0xec,
+ C64_MS2_4CBRA = 0xcd,
+ C64_MS4_2C = 0xed,
+ C64_MS4_2CBR = 0xee,
+ C64_MS4_2CBA = 0xef,
+ C64_MS4_2CRA = 0xf0,
+ C64_MS4_2BRA = 0xf1,
+ C64_MS4_4CBRA = 0x2d,
+ C64_MS8_MS16_2C = 0xf2,
+ C64_MS8_MS16_2CRA = 0xf3,
+ C128_2C = 0xf4,
+ C128_2CR = 0xf5,
+ C128_MS2_2C = 0xf6,
+ C128_MS2_2CR = 0xf7,
+ C128_MS4_2C = 0xf8,
+ C128_MS4_2CR = 0xf9,
+ C128_MS8_MS16_2C = 0xfa,
+ C128_MS8_MS16_2CR = 0xfb,
+ X8C24 = 0xfc,
+ PITCH_NO_SWIZZLE = 0xfd,
+ SMSKED_MESSAGE = 0xca,
+ SMHOST_MESSAGE = 0xcb,
+};
+
+constexpr bool IsPitchKind(PTEKind kind) {
+ return kind == PTEKind::PITCH || kind == PTEKind::PITCH_NO_SWIZZLE;
+}
+
+} // namespace Tegra
diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp
index 45791aa75..e8761a747 100644
--- a/src/video_core/renderer_base.cpp
+++ b/src/video_core/renderer_base.cpp
@@ -1,6 +1,8 @@
// SPDX-FileCopyrightText: 2015 Citra Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
+#include <thread>
+
#include "common/logging/log.h"
#include "core/frontend/emu_window.h"
#include "video_core/renderer_base.h"
@@ -35,8 +37,12 @@ void RendererBase::RequestScreenshot(void* data, std::function<void(bool)> callb
LOG_ERROR(Render, "A screenshot is already requested or in progress, ignoring the request");
return;
}
+ auto async_callback{[callback = std::move(callback)](bool invert_y) {
+ std::thread t{callback, invert_y};
+ t.detach();
+ }};
renderer_settings.screenshot_bits = data;
- renderer_settings.screenshot_complete_callback = std::move(callback);
+ renderer_settings.screenshot_complete_callback = async_callback;
renderer_settings.screenshot_framebuffer_layout = layout;
renderer_settings.screenshot_requested = true;
}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index cce00cea8..e5c09a969 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -658,8 +658,13 @@ void RasterizerOpenGL::SyncDepthClamp() {
}
flags[Dirty::DepthClampEnabled] = false;
- oglEnable(GL_DEPTH_CLAMP, maxwell3d->regs.viewport_clip_control.geometry_clip !=
- Maxwell::ViewportClipControl::GeometryClip::Passthrough);
+ bool depth_clamp_disabled{maxwell3d->regs.viewport_clip_control.geometry_clip ==
+ Maxwell::ViewportClipControl::GeometryClip::Passthrough ||
+ maxwell3d->regs.viewport_clip_control.geometry_clip ==
+ Maxwell::ViewportClipControl::GeometryClip::FrustumXYZ ||
+ maxwell3d->regs.viewport_clip_control.geometry_clip ==
+ Maxwell::ViewportClipControl::GeometryClip::FrustumZ};
+ oglEnable(GL_DEPTH_CLAMP, !depth_clamp_disabled);
}
void RasterizerOpenGL::SyncClipEnabled(u32 clip_mask) {
@@ -746,19 +751,19 @@ void RasterizerOpenGL::SyncStencilTestState() {
oglEnable(GL_STENCIL_TEST, regs.stencil_enable);
glStencilFuncSeparate(GL_FRONT, MaxwellToGL::ComparisonOp(regs.stencil_front_op.func),
- regs.stencil_front_func.ref, regs.stencil_front_func.func_mask);
+ regs.stencil_front_ref, regs.stencil_front_func_mask);
glStencilOpSeparate(GL_FRONT, MaxwellToGL::StencilOp(regs.stencil_front_op.fail),
MaxwellToGL::StencilOp(regs.stencil_front_op.zfail),
MaxwellToGL::StencilOp(regs.stencil_front_op.zpass));
- glStencilMaskSeparate(GL_FRONT, regs.stencil_front_func.mask);
+ glStencilMaskSeparate(GL_FRONT, regs.stencil_front_mask);
if (regs.stencil_two_side_enable) {
glStencilFuncSeparate(GL_BACK, MaxwellToGL::ComparisonOp(regs.stencil_back_op.func),
- regs.stencil_back_func.ref, regs.stencil_back_func.mask);
+ regs.stencil_back_ref, regs.stencil_back_mask);
glStencilOpSeparate(GL_BACK, MaxwellToGL::StencilOp(regs.stencil_back_op.fail),
MaxwellToGL::StencilOp(regs.stencil_back_op.zfail),
MaxwellToGL::StencilOp(regs.stencil_back_op.zpass));
- glStencilMaskSeparate(GL_BACK, regs.stencil_back_func.mask);
+ glStencilMaskSeparate(GL_BACK, regs.stencil_back_mask);
} else {
glStencilFuncSeparate(GL_BACK, GL_ALWAYS, 0, 0xFFFFFFFF);
glStencilOpSeparate(GL_BACK, GL_KEEP, GL_KEEP, GL_KEEP);
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 6bdb0b645..609f0a772 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -317,8 +317,8 @@ GraphicsPipeline* ShaderCache::CurrentGraphicsPipeline() {
graphics_key.tessellation_primitive.Assign(regs.tessellation.params.domain_type.Value());
graphics_key.tessellation_spacing.Assign(regs.tessellation.params.spacing.Value());
graphics_key.tessellation_clockwise.Assign(
- regs.tessellation.params.output_primitives.Value() !=
- Maxwell::Tessellation::OutputPrimitves::Triangles_CCW);
+ regs.tessellation.params.output_primitives.Value() ==
+ Maxwell::Tessellation::OutputPrimitives::Triangles_CW);
graphics_key.xfb_enabled.Assign(regs.transform_feedback_enabled != 0 ? 1 : 0);
if (graphics_key.xfb_enabled) {
SetXfbState(graphics_key.xfb_state, regs);
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.cpp b/src/video_core/renderer_opengl/gl_state_tracker.cpp
index e2c709aac..a359f96f1 100644
--- a/src/video_core/renderer_opengl/gl_state_tracker.cpp
+++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp
@@ -100,14 +100,12 @@ void SetupDirtyDepthTest(Tables& tables) {
void SetupDirtyStencilTest(Tables& tables) {
static constexpr std::array offsets = {
- OFF(stencil_enable), OFF(stencil_front_op.func),
- OFF(stencil_front_func.ref), OFF(stencil_front_func.func_mask),
- OFF(stencil_front_op.fail), OFF(stencil_front_op.zfail),
- OFF(stencil_front_op.zpass), OFF(stencil_front_func.mask),
- OFF(stencil_two_side_enable), OFF(stencil_back_op.func),
- OFF(stencil_back_func.ref), OFF(stencil_back_func.func_mask),
- OFF(stencil_back_op.fail), OFF(stencil_back_op.zfail),
- OFF(stencil_back_op.zpass), OFF(stencil_back_func.mask)};
+ OFF(stencil_enable), OFF(stencil_front_op.func), OFF(stencil_front_ref),
+ OFF(stencil_front_func_mask), OFF(stencil_front_op.fail), OFF(stencil_front_op.zfail),
+ OFF(stencil_front_op.zpass), OFF(stencil_front_mask), OFF(stencil_two_side_enable),
+ OFF(stencil_back_op.func), OFF(stencil_back_ref), OFF(stencil_back_func_mask),
+ OFF(stencil_back_op.fail), OFF(stencil_back_op.zfail), OFF(stencil_back_op.zpass),
+ OFF(stencil_back_mask)};
for (const auto offset : offsets) {
tables[0][offset] = StencilTest;
}
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
index eb7c22fd5..f85ed8e5b 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
@@ -63,14 +63,18 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d,
primitive_restart_enable.Assign(regs.primitive_restart.enabled != 0 ? 1 : 0);
depth_bias_enable.Assign(enabled_lut[POLYGON_OFFSET_ENABLE_LUT[topology_index]] != 0 ? 1 : 0);
depth_clamp_disabled.Assign(regs.viewport_clip_control.geometry_clip ==
- Maxwell::ViewportClipControl::GeometryClip::Passthrough);
+ Maxwell::ViewportClipControl::GeometryClip::Passthrough ||
+ regs.viewport_clip_control.geometry_clip ==
+ Maxwell::ViewportClipControl::GeometryClip::FrustumXYZ ||
+ regs.viewport_clip_control.geometry_clip ==
+ Maxwell::ViewportClipControl::GeometryClip::FrustumZ);
ndc_minus_one_to_one.Assign(regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1 : 0);
polygon_mode.Assign(PackPolygonMode(regs.polygon_mode_front));
patch_control_points_minus_one.Assign(regs.patch_vertices - 1);
tessellation_primitive.Assign(static_cast<u32>(regs.tessellation.params.domain_type.Value()));
tessellation_spacing.Assign(static_cast<u32>(regs.tessellation.params.spacing.Value()));
- tessellation_clockwise.Assign(regs.tessellation.params.output_primitives.Value() !=
- Maxwell::Tessellation::OutputPrimitves::Triangles_CCW);
+ tessellation_clockwise.Assign(regs.tessellation.params.output_primitives.Value() ==
+ Maxwell::Tessellation::OutputPrimitives::Triangles_CW);
logic_op_enable.Assign(regs.logic_op.enable != 0 ? 1 : 0);
logic_op.Assign(PackLogicOp(regs.logic_op.op));
topology.Assign(regs.draw.topology);
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp
index 7cb02631c..4b15c0f85 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp
@@ -59,10 +59,11 @@ void QueryPool::Reserve(std::pair<VkQueryPool, u32> query) {
std::find_if(pools.begin(), pools.end(), [query_pool = query.first](vk::QueryPool& pool) {
return query_pool == *pool;
});
- ASSERT(it != std::end(pools));
- const std::ptrdiff_t pool_index = std::distance(std::begin(pools), it);
- usage[pool_index * GROW_STEP + static_cast<std::ptrdiff_t>(query.second)] = false;
+ if (it != std::end(pools)) {
+ const std::ptrdiff_t pool_index = std::distance(std::begin(pools), it);
+ usage[pool_index * GROW_STEP + static_cast<std::ptrdiff_t>(query.second)] = false;
+ }
}
QueryCache::QueryCache(VideoCore::RasterizerInterface& rasterizer_, const Device& device_,
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 892cd94a3..47dfb45a1 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -772,11 +772,10 @@ void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs)
if (regs.stencil_two_side_enable) {
// Separate values per face
scheduler.Record(
- [front_ref = regs.stencil_front_func.ref,
- front_write_mask = regs.stencil_front_func.mask,
- front_test_mask = regs.stencil_front_func.func_mask,
- back_ref = regs.stencil_back_func.ref, back_write_mask = regs.stencil_back_func.mask,
- back_test_mask = regs.stencil_back_func.func_mask](vk::CommandBuffer cmdbuf) {
+ [front_ref = regs.stencil_front_ref, front_write_mask = regs.stencil_front_mask,
+ front_test_mask = regs.stencil_front_func_mask, back_ref = regs.stencil_back_ref,
+ back_write_mask = regs.stencil_back_mask,
+ back_test_mask = regs.stencil_back_func_mask](vk::CommandBuffer cmdbuf) {
// Front face
cmdbuf.SetStencilReference(VK_STENCIL_FACE_FRONT_BIT, front_ref);
cmdbuf.SetStencilWriteMask(VK_STENCIL_FACE_FRONT_BIT, front_write_mask);
@@ -789,9 +788,8 @@ void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs)
});
} else {
// Front face defines both faces
- scheduler.Record([ref = regs.stencil_front_func.ref,
- write_mask = regs.stencil_front_func.mask,
- test_mask = regs.stencil_front_func.func_mask](vk::CommandBuffer cmdbuf) {
+ scheduler.Record([ref = regs.stencil_front_ref, write_mask = regs.stencil_front_mask,
+ test_mask = regs.stencil_front_func_mask](vk::CommandBuffer cmdbuf) {
cmdbuf.SetStencilReference(VK_STENCIL_FACE_FRONT_AND_BACK, ref);
cmdbuf.SetStencilWriteMask(VK_STENCIL_FACE_FRONT_AND_BACK, write_mask);
cmdbuf.SetStencilCompareMask(VK_STENCIL_FACE_FRONT_AND_BACK, test_mask);
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
index 7fb256953..06f68d09a 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
@@ -26,39 +26,20 @@ using namespace Common::Literals;
constexpr VkDeviceSize MAX_ALIGNMENT = 256;
// Maximum size to put elements in the stream buffer
constexpr VkDeviceSize MAX_STREAM_BUFFER_REQUEST_SIZE = 8_MiB;
+// Stream buffer size in bytes
+constexpr VkDeviceSize STREAM_BUFFER_SIZE = 128_MiB;
+constexpr VkDeviceSize REGION_SIZE = STREAM_BUFFER_SIZE / StagingBufferPool::NUM_SYNCS;
constexpr VkMemoryPropertyFlags HOST_FLAGS =
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
constexpr VkMemoryPropertyFlags STREAM_FLAGS = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | HOST_FLAGS;
-static bool IsStreamHeap(VkMemoryHeap heap, size_t staging_buffer_size) noexcept {
- return staging_buffer_size < (heap.size * 2) / 3;
-}
-
-static bool HasLargeDeviceLocalHostVisibleMemory(const VkPhysicalDeviceMemoryProperties& props) {
- const auto flags{VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT};
-
- for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) {
- const auto& memory_type{props.memoryTypes[type_index]};
-
- if ((memory_type.propertyFlags & flags) != flags) {
- // Memory must be device local and host visible
- continue;
- }
-
- const auto& heap{props.memoryHeaps[memory_type.heapIndex]};
- if (heap.size >= 7168_MiB) {
- // This is the right type of memory
- return true;
- }
- }
-
- return false;
+bool IsStreamHeap(VkMemoryHeap heap) noexcept {
+ return STREAM_BUFFER_SIZE < (heap.size * 2) / 3;
}
std::optional<u32> FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask,
- VkMemoryPropertyFlags flags,
- size_t staging_buffer_size) noexcept {
+ VkMemoryPropertyFlags flags) noexcept {
for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) {
if (((type_mask >> type_index) & 1) == 0) {
// Memory type is incompatible
@@ -69,7 +50,7 @@ std::optional<u32> FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& p
// Memory type doesn't have the flags we want
continue;
}
- if (!IsStreamHeap(props.memoryHeaps[memory_type.heapIndex], staging_buffer_size)) {
+ if (!IsStreamHeap(props.memoryHeaps[memory_type.heapIndex])) {
// Memory heap is not suitable for streaming
continue;
}
@@ -80,17 +61,17 @@ std::optional<u32> FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& p
}
u32 FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask,
- bool try_device_local, size_t staging_buffer_size) {
+ bool try_device_local) {
std::optional<u32> type;
if (try_device_local) {
// Try to find a DEVICE_LOCAL_BIT type, Nvidia and AMD have a dedicated heap for this
- type = FindMemoryTypeIndex(props, type_mask, STREAM_FLAGS, staging_buffer_size);
+ type = FindMemoryTypeIndex(props, type_mask, STREAM_FLAGS);
if (type) {
return *type;
}
}
// Otherwise try without the DEVICE_LOCAL_BIT
- type = FindMemoryTypeIndex(props, type_mask, HOST_FLAGS, staging_buffer_size);
+ type = FindMemoryTypeIndex(props, type_mask, HOST_FLAGS);
if (type) {
return *type;
}
@@ -98,32 +79,20 @@ u32 FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_
throw vk::Exception(VK_ERROR_OUT_OF_DEVICE_MEMORY);
}
-size_t Region(size_t iterator, size_t region_size) noexcept {
- return iterator / region_size;
+size_t Region(size_t iterator) noexcept {
+ return iterator / REGION_SIZE;
}
} // Anonymous namespace
StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_,
Scheduler& scheduler_)
: device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} {
-
- const auto memory_properties{device.GetPhysical().GetMemoryProperties().memoryProperties};
- if (HasLargeDeviceLocalHostVisibleMemory(memory_properties)) {
- // Possible on many integrated and newer discrete cards
- staging_buffer_size = 1_GiB;
- } else {
- // Well-supported default size used by most Vulkan PC games
- staging_buffer_size = 256_MiB;
- }
-
- region_size = staging_buffer_size / StagingBufferPool::NUM_SYNCS;
-
const vk::Device& dev = device.GetLogical();
stream_buffer = dev.CreateBuffer(VkBufferCreateInfo{
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
- .size = staging_buffer_size,
+ .size = STREAM_BUFFER_SIZE,
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
@@ -148,18 +117,19 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem
.image = nullptr,
.buffer = *stream_buffer,
};
+ const auto memory_properties = device.GetPhysical().GetMemoryProperties().memoryProperties;
VkMemoryAllocateInfo stream_memory_info{
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
.pNext = make_dedicated ? &dedicated_info : nullptr,
.allocationSize = requirements.size,
- .memoryTypeIndex = FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits, true,
- staging_buffer_size),
+ .memoryTypeIndex =
+ FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits, true),
};
stream_memory = dev.TryAllocateMemory(stream_memory_info);
if (!stream_memory) {
LOG_INFO(Render_Vulkan, "Dynamic memory allocation failed, trying with system memory");
- stream_memory_info.memoryTypeIndex = FindMemoryTypeIndex(
- memory_properties, requirements.memoryTypeBits, false, staging_buffer_size);
+ stream_memory_info.memoryTypeIndex =
+ FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits, false);
stream_memory = dev.AllocateMemory(stream_memory_info);
}
@@ -167,7 +137,7 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem
stream_memory.SetObjectNameEXT("Stream Buffer Memory");
}
stream_buffer.BindMemory(*stream_memory, 0);
- stream_pointer = stream_memory.Map(0, staging_buffer_size);
+ stream_pointer = stream_memory.Map(0, STREAM_BUFFER_SIZE);
}
StagingBufferPool::~StagingBufferPool() = default;
@@ -188,25 +158,25 @@ void StagingBufferPool::TickFrame() {
}
StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) {
- if (AreRegionsActive(Region(free_iterator, region_size) + 1,
- std::min(Region(iterator + size, region_size) + 1, NUM_SYNCS))) {
+ if (AreRegionsActive(Region(free_iterator) + 1,
+ std::min(Region(iterator + size) + 1, NUM_SYNCS))) {
// Avoid waiting for the previous usages to be free
return GetStagingBuffer(size, MemoryUsage::Upload);
}
const u64 current_tick = scheduler.CurrentTick();
- std::fill(sync_ticks.begin() + Region(used_iterator, region_size),
- sync_ticks.begin() + Region(iterator, region_size), current_tick);
+ std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + Region(iterator),
+ current_tick);
used_iterator = iterator;
free_iterator = std::max(free_iterator, iterator + size);
- if (iterator + size >= staging_buffer_size) {
- std::fill(sync_ticks.begin() + Region(used_iterator, region_size),
- sync_ticks.begin() + NUM_SYNCS, current_tick);
+ if (iterator + size >= STREAM_BUFFER_SIZE) {
+ std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + NUM_SYNCS,
+ current_tick);
used_iterator = 0;
iterator = 0;
free_iterator = size;
- if (AreRegionsActive(0, Region(size, region_size) + 1)) {
+ if (AreRegionsActive(0, Region(size) + 1)) {
// Avoid waiting for the previous usages to be free
return GetStagingBuffer(size, MemoryUsage::Upload);
}
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
index 90c67177f..91dc84da8 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
@@ -93,9 +93,6 @@ private:
size_t free_iterator = 0;
std::array<u64, NUM_SYNCS> sync_ticks{};
- size_t staging_buffer_size = 0;
- size_t region_size = 0;
-
StagingBuffersCache device_local_cache;
StagingBuffersCache upload_cache;
StagingBuffersCache download_cache;
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
index ed98c8370..b87c3be66 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
@@ -77,12 +77,12 @@ void SetupDirtyDepthBounds(Tables& tables) {
void SetupDirtyStencilProperties(Tables& tables) {
auto& table = tables[0];
table[OFF(stencil_two_side_enable)] = StencilProperties;
- table[OFF(stencil_front_func.ref)] = StencilProperties;
- table[OFF(stencil_front_func.mask)] = StencilProperties;
- table[OFF(stencil_front_func.func_mask)] = StencilProperties;
- table[OFF(stencil_back_func.ref)] = StencilProperties;
- table[OFF(stencil_back_func.mask)] = StencilProperties;
- table[OFF(stencil_back_func.func_mask)] = StencilProperties;
+ table[OFF(stencil_front_ref)] = StencilProperties;
+ table[OFF(stencil_front_mask)] = StencilProperties;
+ table[OFF(stencil_front_func_mask)] = StencilProperties;
+ table[OFF(stencil_back_ref)] = StencilProperties;
+ table[OFF(stencil_back_mask)] = StencilProperties;
+ table[OFF(stencil_back_func_mask)] = StencilProperties;
}
void SetupDirtyLineWidth(Tables& tables) {
diff --git a/src/video_core/texture_cache/descriptor_table.h b/src/video_core/texture_cache/descriptor_table.h
index b18e3838f..ee4240288 100644
--- a/src/video_core/texture_cache/descriptor_table.h
+++ b/src/video_core/texture_cache/descriptor_table.h
@@ -18,7 +18,7 @@ class DescriptorTable {
public:
explicit DescriptorTable(Tegra::MemoryManager& gpu_memory_) : gpu_memory{gpu_memory_} {}
- [[nodiscard]] bool Synchornize(GPUVAddr gpu_addr, u32 limit) {
+ [[nodiscard]] bool Synchronize(GPUVAddr gpu_addr, u32 limit) {
[[likely]] if (current_gpu_addr == gpu_addr && current_limit == limit) {
return false;
}
diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp
index ad935d386..08aa8ca33 100644
--- a/src/video_core/texture_cache/format_lookup_table.cpp
+++ b/src/video_core/texture_cache/format_lookup_table.cpp
@@ -150,6 +150,8 @@ PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red,
return PixelFormat::D24_UNORM_S8_UINT;
case Hash(TextureFormat::D32S8, FLOAT, UINT, UNORM, UNORM, LINEAR):
return PixelFormat::D32_FLOAT_S8_UINT;
+ case Hash(TextureFormat::R32_B24G8, FLOAT, UINT, UNORM, UNORM, LINEAR):
+ return PixelFormat::D32_FLOAT_S8_UINT;
case Hash(TextureFormat::BC1_RGBA, UNORM, LINEAR):
return PixelFormat::BC1_RGBA_UNORM;
case Hash(TextureFormat::BC1_RGBA, UNORM, SRGB):
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 413baf730..0e0fd410f 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -193,11 +193,11 @@ void TextureCache<P>::SynchronizeGraphicsDescriptors() {
const bool linked_tsc = maxwell3d->regs.sampler_binding == SamplerBinding::ViaHeaderBinding;
const u32 tic_limit = maxwell3d->regs.tex_header.limit;
const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d->regs.tex_sampler.limit;
- if (channel_state->graphics_sampler_table.Synchornize(maxwell3d->regs.tex_sampler.Address(),
+ if (channel_state->graphics_sampler_table.Synchronize(maxwell3d->regs.tex_sampler.Address(),
tsc_limit)) {
channel_state->graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
}
- if (channel_state->graphics_image_table.Synchornize(maxwell3d->regs.tex_header.Address(),
+ if (channel_state->graphics_image_table.Synchronize(maxwell3d->regs.tex_header.Address(),
tic_limit)) {
channel_state->graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
}
@@ -209,10 +209,10 @@ void TextureCache<P>::SynchronizeComputeDescriptors() {
const u32 tic_limit = kepler_compute->regs.tic.limit;
const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute->regs.tsc.limit;
const GPUVAddr tsc_gpu_addr = kepler_compute->regs.tsc.Address();
- if (channel_state->compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) {
+ if (channel_state->compute_sampler_table.Synchronize(tsc_gpu_addr, tsc_limit)) {
channel_state->compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
}
- if (channel_state->compute_image_table.Synchornize(kepler_compute->regs.tic.Address(),
+ if (channel_state->compute_image_table.Synchronize(kepler_compute->regs.tic.Address(),
tic_limit)) {
channel_state->compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
}