summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/CMakeLists.txt9
-rw-r--r--src/video_core/engines/maxwell_3d.cpp285
-rw-r--r--src/video_core/engines/maxwell_3d.h48
-rw-r--r--src/video_core/engines/maxwell_dma.cpp127
-rw-r--r--src/video_core/engines/maxwell_dma.h2
-rw-r--r--src/video_core/engines/puller.cpp5
-rw-r--r--src/video_core/macro/macro_hle.cpp47
-rw-r--r--src/video_core/macro/macro_interpreter.cpp2
-rw-r--r--src/video_core/macro/macro_jit_x64.cpp2
-rw-r--r--src/video_core/memory_manager.cpp65
-rw-r--r--src/video_core/memory_manager.h21
-rw-r--r--src/video_core/pte_kind.h264
-rw-r--r--src/video_core/rasterizer_interface.h2
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp17
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h4
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp1
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp1
-rw-r--r--src/video_core/renderer_vulkan/vk_query_cache.cpp7
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp47
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h4
-rw-r--r--src/video_core/texture_cache/format_lookup_table.cpp2
-rw-r--r--src/video_core/texture_cache/texture_cache.h8
-rw-r--r--src/video_core/textures/astc.cpp4
-rw-r--r--src/video_core/textures/decoders.cpp2
24 files changed, 647 insertions, 329 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 40e6d1ec4..106991969 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -82,6 +82,7 @@ add_library(video_core STATIC
gpu_thread.h
memory_manager.cpp
memory_manager.h
+ pte_kind.h
query_cache.h
rasterizer_accelerated.cpp
rasterizer_accelerated.h
@@ -278,14 +279,8 @@ if (MSVC)
else()
target_compile_options(video_core PRIVATE
-Werror=conversion
- -Wno-error=sign-conversion
- -Werror=pessimizing-move
- -Werror=redundant-move
- -Werror=type-limits
- $<$<CXX_COMPILER_ID:GNU>:-Werror=class-memaccess>
- $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-parameter>
- $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-variable>
+ -Wno-sign-conversion
)
endif()
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 89a9d1f5a..f9794dfe4 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -117,10 +117,15 @@ void Maxwell3D::InitializeRegisterDefaults() {
shadow_state = regs;
- mme_inline[MAXWELL3D_REG_INDEX(draw.end)] = true;
- mme_inline[MAXWELL3D_REG_INDEX(draw.begin)] = true;
- mme_inline[MAXWELL3D_REG_INDEX(vertex_buffer.count)] = true;
- mme_inline[MAXWELL3D_REG_INDEX(index_buffer.count)] = true;
+ draw_command[MAXWELL3D_REG_INDEX(draw.end)] = true;
+ draw_command[MAXWELL3D_REG_INDEX(draw.begin)] = true;
+ draw_command[MAXWELL3D_REG_INDEX(vertex_buffer.first)] = true;
+ draw_command[MAXWELL3D_REG_INDEX(vertex_buffer.count)] = true;
+ draw_command[MAXWELL3D_REG_INDEX(index_buffer.first)] = true;
+ draw_command[MAXWELL3D_REG_INDEX(index_buffer.count)] = true;
+ draw_command[MAXWELL3D_REG_INDEX(draw_inline_index)] = true;
+ draw_command[MAXWELL3D_REG_INDEX(inline_index_2x16.even)] = true;
+ draw_command[MAXWELL3D_REG_INDEX(inline_index_4x8.index0)] = true;
}
void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool is_last_call) {
@@ -208,25 +213,21 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume
return ProcessCBBind(3);
case MAXWELL3D_REG_INDEX(bind_groups[4].raw_config):
return ProcessCBBind(4);
- case MAXWELL3D_REG_INDEX(draw.end):
- return DrawArrays();
case MAXWELL3D_REG_INDEX(index_buffer32_first):
regs.index_buffer.count = regs.index_buffer32_first.count;
regs.index_buffer.first = regs.index_buffer32_first.first;
dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
- return DrawArrays();
+ return ProcessDraw();
case MAXWELL3D_REG_INDEX(index_buffer16_first):
regs.index_buffer.count = regs.index_buffer16_first.count;
regs.index_buffer.first = regs.index_buffer16_first.first;
dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
- return DrawArrays();
+ return ProcessDraw();
case MAXWELL3D_REG_INDEX(index_buffer8_first):
regs.index_buffer.count = regs.index_buffer8_first.count;
regs.index_buffer.first = regs.index_buffer8_first.first;
dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
- // a macro calls this one over and over, should it increase instancing?
- // Used by Hades and likely other Vulkan games.
- return DrawArrays();
+ return ProcessDraw();
case MAXWELL3D_REG_INDEX(topology_override):
use_topology_override = true;
return;
@@ -261,14 +262,13 @@ void Maxwell3D::CallMacroMethod(u32 method, const std::vector<u32>& parameters)
// Execute the current macro.
macro_engine->Execute(macro_positions[entry], parameters);
- if (mme_draw.current_mode != MMEDrawMode::Undefined) {
- FlushMMEInlineDraw();
- }
+
+ ProcessDeferredDraw();
}
void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
- // It is an error to write to a register other than the current macro's ARG register before it
- // has finished execution.
+ // It is an error to write to a register other than the current macro's ARG register before
+ // it has finished execution.
if (executing_macro != 0) {
ASSERT(method == executing_macro + 1);
}
@@ -283,9 +283,33 @@ void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
ASSERT_MSG(method < Regs::NUM_REGS,
"Invalid Maxwell3D register, increase the size of the Regs structure");
- const u32 argument = ProcessShadowRam(method, method_argument);
- ProcessDirtyRegisters(method, argument);
- ProcessMethodCall(method, argument, method_argument, is_last_call);
+ if (draw_command[method]) {
+ regs.reg_array[method] = method_argument;
+ deferred_draw_method.push_back(method);
+ auto u32_to_u8 = [&](const u32 argument) {
+ inline_index_draw_indexes.push_back(static_cast<u8>(argument & 0x000000ff));
+ inline_index_draw_indexes.push_back(static_cast<u8>((argument & 0x0000ff00) >> 8));
+ inline_index_draw_indexes.push_back(static_cast<u8>((argument & 0x00ff0000) >> 16));
+ inline_index_draw_indexes.push_back(static_cast<u8>((argument & 0xff000000) >> 24));
+ };
+ if (MAXWELL3D_REG_INDEX(draw_inline_index) == method) {
+ u32_to_u8(method_argument);
+ } else if (MAXWELL3D_REG_INDEX(inline_index_2x16.even) == method) {
+ u32_to_u8(regs.inline_index_2x16.even);
+ u32_to_u8(regs.inline_index_2x16.odd);
+ } else if (MAXWELL3D_REG_INDEX(inline_index_4x8.index0) == method) {
+ u32_to_u8(regs.inline_index_4x8.index0);
+ u32_to_u8(regs.inline_index_4x8.index1);
+ u32_to_u8(regs.inline_index_4x8.index2);
+ u32_to_u8(regs.inline_index_4x8.index3);
+ }
+ } else {
+ ProcessDeferredDraw();
+
+ const u32 argument = ProcessShadowRam(method, method_argument);
+ ProcessDirtyRegisters(method, argument);
+ ProcessMethodCall(method, argument, method_argument, is_last_call);
+ }
}
void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
@@ -326,55 +350,6 @@ void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
}
}
-void Maxwell3D::StepInstance(const MMEDrawMode expected_mode, const u32 count) {
- if (mme_draw.current_mode == MMEDrawMode::Undefined) {
- if (mme_draw.gl_begin_consume) {
- mme_draw.current_mode = expected_mode;
- mme_draw.current_count = count;
- mme_draw.instance_count = 1;
- mme_draw.gl_begin_consume = false;
- mme_draw.gl_end_count = 0;
- }
- return;
- } else {
- if (mme_draw.current_mode == expected_mode && count == mme_draw.current_count &&
- mme_draw.instance_mode && mme_draw.gl_begin_consume) {
- mme_draw.instance_count++;
- mme_draw.gl_begin_consume = false;
- return;
- } else {
- FlushMMEInlineDraw();
- }
- }
- // Tail call in case it needs to retry.
- StepInstance(expected_mode, count);
-}
-
-void Maxwell3D::CallMethodFromMME(u32 method, u32 method_argument) {
- if (mme_inline[method]) {
- regs.reg_array[method] = method_argument;
- if (method == MAXWELL3D_REG_INDEX(vertex_buffer.count) ||
- method == MAXWELL3D_REG_INDEX(index_buffer.count)) {
- const MMEDrawMode expected_mode = method == MAXWELL3D_REG_INDEX(vertex_buffer.count)
- ? MMEDrawMode::Array
- : MMEDrawMode::Indexed;
- StepInstance(expected_mode, method_argument);
- } else if (method == MAXWELL3D_REG_INDEX(draw.begin)) {
- mme_draw.instance_mode =
- (regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::Subsequent) ||
- (regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::Unchanged);
- mme_draw.gl_begin_consume = true;
- } else {
- mme_draw.gl_end_count++;
- }
- } else {
- if (mme_draw.current_mode != MMEDrawMode::Undefined) {
- FlushMMEInlineDraw();
- }
- CallMethod(method, method_argument, true);
- }
-}
-
void Maxwell3D::ProcessTopologyOverride() {
using PrimitiveTopology = Maxwell3D::Regs::PrimitiveTopology;
using PrimitiveTopologyOverride = Maxwell3D::Regs::PrimitiveTopologyOverride;
@@ -404,41 +379,6 @@ void Maxwell3D::ProcessTopologyOverride() {
}
}
-void Maxwell3D::FlushMMEInlineDraw() {
- LOG_TRACE(HW_GPU, "called, topology={}, count={}", regs.draw.topology.Value(),
- regs.vertex_buffer.count);
- ASSERT_MSG(!(regs.index_buffer.count && regs.vertex_buffer.count), "Both indexed and direct?");
- ASSERT(mme_draw.instance_count == mme_draw.gl_end_count);
-
- // Both instance configuration registers can not be set at the same time.
- ASSERT_MSG(regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::First ||
- regs.draw.instance_id != Maxwell3D::Regs::Draw::InstanceId::Unchanged,
- "Illegal combination of instancing parameters");
-
- ProcessTopologyOverride();
-
- const bool is_indexed = mme_draw.current_mode == MMEDrawMode::Indexed;
- if (ShouldExecute()) {
- rasterizer->Draw(is_indexed, true);
- }
-
- // TODO(bunnei): Below, we reset vertex count so that we can use these registers to determine if
- // the game is trying to draw indexed or direct mode. This needs to be verified on HW still -
- // it's possible that it is incorrect and that there is some other register used to specify the
- // drawing mode.
- if (is_indexed) {
- regs.index_buffer.count = 0;
- } else {
- regs.vertex_buffer.count = 0;
- }
- mme_draw.current_mode = MMEDrawMode::Undefined;
- mme_draw.current_count = 0;
- mme_draw.instance_count = 0;
- mme_draw.instance_mode = false;
- mme_draw.gl_begin_consume = false;
- mme_draw.gl_end_count = 0;
-}
-
void Maxwell3D::ProcessMacroUpload(u32 data) {
macro_engine->AddCode(regs.load_mme.instruction_ptr++, data);
}
@@ -473,9 +413,7 @@ void Maxwell3D::ProcessQueryGet() {
switch (regs.report_semaphore.query.operation) {
case Regs::ReportSemaphore::Operation::Release:
- if (regs.report_semaphore.query.release ==
- Regs::ReportSemaphore::Release::AfterAllPreceedingWrites ||
- regs.report_semaphore.query.short_query != 0) {
+ if (regs.report_semaphore.query.short_query != 0) {
const GPUVAddr sequence_address{regs.report_semaphore.Address()};
const u32 payload = regs.report_semaphore.payload;
std::function<void()> operation([this, sequence_address, payload] {
@@ -489,11 +427,10 @@ void Maxwell3D::ProcessQueryGet() {
};
const GPUVAddr sequence_address{regs.report_semaphore.Address()};
const u32 payload = regs.report_semaphore.payload;
- std::function<void()> operation([this, sequence_address, payload] {
+ [this, sequence_address, payload] {
memory_manager.Write<u64>(sequence_address + sizeof(u64), system.GPU().GetTicks());
memory_manager.Write<u64>(sequence_address, payload);
- });
- rasterizer->SyncOperation(std::move(operation));
+ }();
}
break;
case Regs::ReportSemaphore::Operation::Acquire:
@@ -569,47 +506,11 @@ void Maxwell3D::ProcessCounterReset() {
void Maxwell3D::ProcessSyncPoint() {
const u32 sync_point = regs.sync_info.sync_point.Value();
- const auto condition = regs.sync_info.condition.Value();
- [[maybe_unused]] const u32 cache_flush = regs.sync_info.clean_l2.Value();
- if (condition == Regs::SyncInfo::Condition::RopWritesDone) {
- rasterizer->SignalSyncPoint(sync_point);
- }
-}
-
-void Maxwell3D::DrawArrays() {
- LOG_TRACE(HW_GPU, "called, topology={}, count={}", regs.draw.topology.Value(),
- regs.vertex_buffer.count);
- ASSERT_MSG(!(regs.index_buffer.count && regs.vertex_buffer.count), "Both indexed and direct?");
-
- // Both instance configuration registers can not be set at the same time.
- ASSERT_MSG(regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::First ||
- regs.draw.instance_id != Maxwell3D::Regs::Draw::InstanceId::Unchanged,
- "Illegal combination of instancing parameters");
-
- ProcessTopologyOverride();
-
- if (regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::Subsequent) {
- // Increment the current instance *before* drawing.
- state.current_instance++;
- } else if (regs.draw.instance_id != Maxwell3D::Regs::Draw::InstanceId::Unchanged) {
- // Reset the current instance to 0.
- state.current_instance = 0;
- }
-
- const bool is_indexed{regs.index_buffer.count && !regs.vertex_buffer.count};
- if (ShouldExecute()) {
- rasterizer->Draw(is_indexed, false);
- }
-
- // TODO(bunnei): Below, we reset vertex count so that we can use these registers to determine if
- // the game is trying to draw indexed or direct mode. This needs to be verified on HW still -
- // it's possible that it is incorrect and that there is some other register used to specify the
- // drawing mode.
- if (is_indexed) {
- regs.index_buffer.count = 0;
- } else {
- regs.vertex_buffer.count = 0;
+ const u32 cache_flush = regs.sync_info.clean_l2.Value();
+ if (cache_flush != 0) {
+ rasterizer->InvalidateGPUCache();
}
+ rasterizer->SignalSyncPoint(sync_point);
}
std::optional<u64> Maxwell3D::GetQueryResult() {
@@ -694,4 +595,90 @@ void Maxwell3D::ProcessClearBuffers() {
rasterizer->Clear();
}
+void Maxwell3D::ProcessDraw(u32 instance_count) {
+ LOG_TRACE(HW_GPU, "called, topology={}, count={}", regs.draw.topology.Value(),
+ regs.vertex_buffer.count);
+
+ ASSERT_MSG(!(regs.index_buffer.count && regs.vertex_buffer.count), "Both indexed and direct?");
+
+ // Both instance configuration registers can not be set at the same time.
+ ASSERT_MSG(regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::First ||
+ regs.draw.instance_id != Maxwell3D::Regs::Draw::InstanceId::Unchanged,
+ "Illegal combination of instancing parameters");
+
+ ProcessTopologyOverride();
+
+ const bool is_indexed = regs.index_buffer.count && !regs.vertex_buffer.count;
+ if (ShouldExecute()) {
+ rasterizer->Draw(is_indexed, instance_count);
+ }
+
+ if (is_indexed) {
+ regs.index_buffer.count = 0;
+ } else {
+ regs.vertex_buffer.count = 0;
+ }
+}
+
+void Maxwell3D::ProcessDeferredDraw() {
+ if (deferred_draw_method.empty()) {
+ return;
+ }
+
+ enum class DrawMode {
+ Undefined,
+ General,
+ Instance,
+ };
+ DrawMode draw_mode{DrawMode::Undefined};
+ u32 instance_count = 1;
+
+ u32 index = 0;
+ u32 method = 0;
+ u32 method_count = static_cast<u32>(deferred_draw_method.size());
+ for (; index < method_count &&
+ (method = deferred_draw_method[index]) != MAXWELL3D_REG_INDEX(draw.begin);
+ ++index)
+ ;
+
+ if (MAXWELL3D_REG_INDEX(draw.begin) != method) {
+ return;
+ }
+
+ // The minimum number of methods for drawing must be greater than or equal to
+ // 3[draw.begin->vertex(index)count(first)->draw.end] to avoid errors in index mode drawing
+ if ((method_count - index) < 3) {
+ return;
+ }
+ draw_mode = (regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::Subsequent) ||
+ (regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::Unchanged)
+ ? DrawMode::Instance
+ : DrawMode::General;
+
+ // Drawing will only begin with draw.begin or index_buffer method, other methods directly
+ // clear
+ if (draw_mode == DrawMode::Undefined) {
+ deferred_draw_method.clear();
+ return;
+ }
+
+ if (draw_mode == DrawMode::Instance) {
+ ASSERT_MSG(deferred_draw_method.size() % 4 == 0, "Instance mode method size error");
+ instance_count = static_cast<u32>(method_count - index) / 4;
+ } else {
+ method = deferred_draw_method[index + 1];
+ if (MAXWELL3D_REG_INDEX(draw_inline_index) == method ||
+ MAXWELL3D_REG_INDEX(inline_index_2x16.even) == method ||
+ MAXWELL3D_REG_INDEX(inline_index_4x8.index0) == method) {
+ regs.index_buffer.count = static_cast<u32>(inline_index_draw_indexes.size() / 4);
+ regs.index_buffer.format = Regs::IndexFormat::UnsignedInt;
+ }
+ }
+
+ ProcessDraw(instance_count);
+
+ deferred_draw_method.clear();
+ inline_index_draw_indexes.clear();
+}
+
} // namespace Tegra::Engines
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 75e3b868d..a948fcb14 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -1739,14 +1739,11 @@ public:
Footprint_1x1_Virtual = 2,
};
- struct InlineIndex4x8Align {
+ struct InlineIndex4x8 {
union {
BitField<0, 30, u32> count;
BitField<30, 2, u32> start;
};
- };
-
- struct InlineIndex4x8Index {
union {
BitField<0, 8, u32> index0;
BitField<8, 8, u32> index1;
@@ -2836,8 +2833,7 @@ public:
u32 depth_write_enabled; ///< 0x12E8
u32 alpha_test_enabled; ///< 0x12EC
INSERT_PADDING_BYTES_NOINIT(0x10);
- InlineIndex4x8Align inline_index_4x8_align; ///< 0x1300
- InlineIndex4x8Index inline_index_4x8_index; ///< 0x1304
+ InlineIndex4x8 inline_index_4x8; ///< 0x1300
D3DCullMode d3d_cull_mode; ///< 0x1308
ComparisonOp depth_test_func; ///< 0x130C
f32 alpha_test_ref; ///< 0x1310
@@ -3048,8 +3044,6 @@ public:
};
std::array<ShaderStageInfo, Regs::MaxShaderStage> shader_stages;
-
- u32 current_instance = 0; ///< Current instance to be used to simulate instanced rendering.
};
State state{};
@@ -3064,11 +3058,6 @@ public:
void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
u32 methods_pending) override;
- /// Write the value to the register identified by method.
- void CallMethodFromMME(u32 method, u32 method_argument);
-
- void FlushMMEInlineDraw();
-
bool ShouldExecute() const {
return execute_on;
}
@@ -3081,21 +3070,6 @@ public:
return *rasterizer;
}
- enum class MMEDrawMode : u32 {
- Undefined,
- Array,
- Indexed,
- };
-
- struct MMEDrawState {
- MMEDrawMode current_mode{MMEDrawMode::Undefined};
- u32 current_count{};
- u32 instance_count{};
- bool instance_mode{};
- bool gl_begin_consume{};
- u32 gl_end_count{};
- } mme_draw;
-
struct DirtyState {
using Flags = std::bitset<std::numeric_limits<u8>::max()>;
using Table = std::array<u8, Regs::NUM_REGS>;
@@ -3105,6 +3079,8 @@ public:
Tables tables{};
} dirty;
+ std::vector<u8> inline_index_draw_indexes;
+
private:
void InitializeRegisterDefaults();
@@ -3164,14 +3140,12 @@ private:
/// Handles a write to the CB_BIND register.
void ProcessCBBind(size_t stage_index);
- /// Handles a write to the VERTEX_END_GL register, triggering a draw.
- void DrawArrays();
-
/// Handles use of topology overrides (e.g., to avoid using a topology assigned from a macro)
void ProcessTopologyOverride();
- // Handles a instance drawcall from MME
- void StepInstance(MMEDrawMode expected_mode, u32 count);
+ void ProcessDraw(u32 instance_count = 1);
+
+ void ProcessDeferredDraw();
/// Returns a query's value or an empty object if the value will be deferred through a cache.
std::optional<u64> GetQueryResult();
@@ -3184,8 +3158,6 @@ private:
/// Start offsets of each macro in macro_memory
std::array<u32, 0x80> macro_positions{};
- std::array<bool, Regs::NUM_REGS> mme_inline{};
-
/// Macro method that is currently being executed / being fed parameters.
u32 executing_macro = 0;
/// Parameters that have been submitted to the macro call so far.
@@ -3198,6 +3170,9 @@ private:
bool execute_on{true};
bool use_topology_override{false};
+
+ std::array<bool, Regs::NUM_REGS> draw_command{};
+ std::vector<u32> deferred_draw_method;
};
#define ASSERT_REG_POSITION(field_name, position) \
@@ -3402,8 +3377,7 @@ ASSERT_REG_POSITION(alpha_to_coverage_dither, 0x12E0);
ASSERT_REG_POSITION(blend_per_target_enabled, 0x12E4);
ASSERT_REG_POSITION(depth_write_enabled, 0x12E8);
ASSERT_REG_POSITION(alpha_test_enabled, 0x12EC);
-ASSERT_REG_POSITION(inline_index_4x8_align, 0x1300);
-ASSERT_REG_POSITION(inline_index_4x8_index, 0x1304);
+ASSERT_REG_POSITION(inline_index_4x8, 0x1300);
ASSERT_REG_POSITION(d3d_cull_mode, 0x1308);
ASSERT_REG_POSITION(depth_test_func, 0x130C);
ASSERT_REG_POSITION(alpha_test_ref, 0x1310);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 3909d36c1..4eb7a100d 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -56,66 +56,85 @@ void MaxwellDMA::Launch() {
ASSERT(launch.interrupt_type == LaunchDMA::InterruptType::NONE);
ASSERT(launch.data_transfer_type == LaunchDMA::DataTransferType::NON_PIPELINED);
- const bool is_src_pitch = launch.src_memory_layout == LaunchDMA::MemoryLayout::PITCH;
- const bool is_dst_pitch = launch.dst_memory_layout == LaunchDMA::MemoryLayout::PITCH;
-
- if (!is_src_pitch && !is_dst_pitch) {
- // If both the source and the destination are in block layout, assert.
- UNIMPLEMENTED_MSG("Tiled->Tiled DMA transfers are not yet implemented");
- return;
- }
+ if (launch.multi_line_enable) {
+ const bool is_src_pitch = launch.src_memory_layout == LaunchDMA::MemoryLayout::PITCH;
+ const bool is_dst_pitch = launch.dst_memory_layout == LaunchDMA::MemoryLayout::PITCH;
+
+ if (!is_src_pitch && !is_dst_pitch) {
+ // If both the source and the destination are in block layout, assert.
+ UNIMPLEMENTED_MSG("Tiled->Tiled DMA transfers are not yet implemented");
+ return;
+ }
- if (is_src_pitch && is_dst_pitch) {
- CopyPitchToPitch();
+ if (is_src_pitch && is_dst_pitch) {
+ for (u32 line = 0; line < regs.line_count; ++line) {
+ const GPUVAddr source_line =
+ regs.offset_in + static_cast<size_t>(line) * regs.pitch_in;
+ const GPUVAddr dest_line =
+ regs.offset_out + static_cast<size_t>(line) * regs.pitch_out;
+ memory_manager.CopyBlock(dest_line, source_line, regs.line_length_in);
+ }
+ } else {
+ if (!is_src_pitch && is_dst_pitch) {
+ CopyBlockLinearToPitch();
+ } else {
+ CopyPitchToBlockLinear();
+ }
+ }
} else {
- ASSERT(launch.multi_line_enable == 1);
-
- if (!is_src_pitch && is_dst_pitch) {
- CopyBlockLinearToPitch();
+ // TODO: allow multisized components.
+ auto& accelerate = rasterizer->AccessAccelerateDMA();
+ const bool is_const_a_dst = regs.remap_const.dst_x == RemapConst::Swizzle::CONST_A;
+ if (regs.launch_dma.remap_enable != 0 && is_const_a_dst) {
+ ASSERT(regs.remap_const.component_size_minus_one == 3);
+ accelerate.BufferClear(regs.offset_out, regs.line_length_in, regs.remap_consta_value);
+ std::vector<u32> tmp_buffer(regs.line_length_in, regs.remap_consta_value);
+ memory_manager.WriteBlockUnsafe(regs.offset_out,
+ reinterpret_cast<u8*>(tmp_buffer.data()),
+ regs.line_length_in * sizeof(u32));
} else {
- CopyPitchToBlockLinear();
+ auto convert_linear_2_blocklinear_addr = [](u64 address) {
+ return (address & ~0x1f0ULL) | ((address & 0x40) >> 2) | ((address & 0x10) << 1) |
+ ((address & 0x180) >> 1) | ((address & 0x20) << 3);
+ };
+ auto src_kind = memory_manager.GetPageKind(regs.offset_in);
+ auto dst_kind = memory_manager.GetPageKind(regs.offset_out);
+ const bool is_src_pitch = IsPitchKind(static_cast<PTEKind>(src_kind));
+ const bool is_dst_pitch = IsPitchKind(static_cast<PTEKind>(dst_kind));
+ if (!is_src_pitch && is_dst_pitch) {
+ std::vector<u8> tmp_buffer(regs.line_length_in);
+ std::vector<u8> dst_buffer(regs.line_length_in);
+ memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(),
+ regs.line_length_in);
+ for (u32 offset = 0; offset < regs.line_length_in; ++offset) {
+ dst_buffer[offset] =
+ tmp_buffer[convert_linear_2_blocklinear_addr(regs.offset_in + offset) -
+ regs.offset_in];
+ }
+ memory_manager.WriteBlock(regs.offset_out, dst_buffer.data(), regs.line_length_in);
+ } else if (is_src_pitch && !is_dst_pitch) {
+ std::vector<u8> tmp_buffer(regs.line_length_in);
+ std::vector<u8> dst_buffer(regs.line_length_in);
+ memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(),
+ regs.line_length_in);
+ for (u32 offset = 0; offset < regs.line_length_in; ++offset) {
+ dst_buffer[convert_linear_2_blocklinear_addr(regs.offset_out + offset) -
+ regs.offset_out] = tmp_buffer[offset];
+ }
+ memory_manager.WriteBlock(regs.offset_out, dst_buffer.data(), regs.line_length_in);
+ } else {
+ if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) {
+ std::vector<u8> tmp_buffer(regs.line_length_in);
+ memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(),
+ regs.line_length_in);
+ memory_manager.WriteBlock(regs.offset_out, tmp_buffer.data(),
+ regs.line_length_in);
+ }
+ }
}
}
- ReleaseSemaphore();
-}
-void MaxwellDMA::CopyPitchToPitch() {
- // When `multi_line_enable` bit is enabled we copy a 2D image of dimensions
- // (line_length_in, line_count).
- // Otherwise the copy is performed as if we were copying a 1D buffer of length line_length_in.
- const bool remap_enabled = regs.launch_dma.remap_enable != 0;
- if (regs.launch_dma.multi_line_enable) {
- UNIMPLEMENTED_IF(remap_enabled);
-
- // Perform a line-by-line copy.
- // We're going to take a subrect of size (line_length_in, line_count) from the source
- // rectangle. There is no need to manually flush/invalidate the regions because CopyBlock
- // does that for us.
- for (u32 line = 0; line < regs.line_count; ++line) {
- const GPUVAddr source_line = regs.offset_in + static_cast<size_t>(line) * regs.pitch_in;
- const GPUVAddr dest_line = regs.offset_out + static_cast<size_t>(line) * regs.pitch_out;
- memory_manager.CopyBlock(dest_line, source_line, regs.line_length_in);
- }
- return;
- }
- // TODO: allow multisized components.
- auto& accelerate = rasterizer->AccessAccelerateDMA();
- const bool is_const_a_dst = regs.remap_const.dst_x == RemapConst::Swizzle::CONST_A;
- const bool is_buffer_clear = remap_enabled && is_const_a_dst;
- if (is_buffer_clear) {
- ASSERT(regs.remap_const.component_size_minus_one == 3);
- accelerate.BufferClear(regs.offset_out, regs.line_length_in, regs.remap_consta_value);
- std::vector<u32> tmp_buffer(regs.line_length_in, regs.remap_consta_value);
- memory_manager.WriteBlockUnsafe(regs.offset_out, reinterpret_cast<u8*>(tmp_buffer.data()),
- regs.line_length_in * sizeof(u32));
- return;
- }
- UNIMPLEMENTED_IF(remap_enabled);
- if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) {
- std::vector<u8> tmp_buffer(regs.line_length_in);
- memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(), regs.line_length_in);
- memory_manager.WriteBlock(regs.offset_out, tmp_buffer.data(), regs.line_length_in);
- }
+ ReleaseSemaphore();
}
void MaxwellDMA::CopyBlockLinearToPitch() {
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index bc48320ce..953e34adc 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -219,8 +219,6 @@ private:
/// registers.
void Launch();
- void CopyPitchToPitch();
-
void CopyBlockLinearToPitch();
void CopyPitchToBlockLinear();
diff --git a/src/video_core/engines/puller.cpp b/src/video_core/engines/puller.cpp
index cca890792..3977bb0fb 100644
--- a/src/video_core/engines/puller.cpp
+++ b/src/video_core/engines/puller.cpp
@@ -75,11 +75,10 @@ void Puller::ProcessSemaphoreTriggerMethod() {
if (op == GpuSemaphoreOperation::WriteLong) {
const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()};
const u32 payload = regs.semaphore_sequence;
- std::function<void()> operation([this, sequence_address, payload] {
+ [this, sequence_address, payload] {
memory_manager.Write<u64>(sequence_address + sizeof(u64), gpu.GetTicks());
memory_manager.Write<u64>(sequence_address, payload);
- });
- rasterizer->SignalFence(std::move(operation));
+ }();
} else {
do {
const u32 word{memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress())};
diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp
index 8a8adbb42..f896591bf 100644
--- a/src/video_core/macro/macro_hle.cpp
+++ b/src/video_core/macro/macro_hle.cpp
@@ -22,35 +22,29 @@ void HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d, const std::vector<u32>&
maxwell3d.regs.draw.topology.Assign(
static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0] & 0x3ffffff));
maxwell3d.regs.global_base_instance_index = parameters[5];
- maxwell3d.mme_draw.instance_count = instance_count;
maxwell3d.regs.global_base_vertex_index = parameters[3];
maxwell3d.regs.index_buffer.count = parameters[1];
maxwell3d.regs.index_buffer.first = parameters[4];
if (maxwell3d.ShouldExecute()) {
- maxwell3d.Rasterizer().Draw(true, true);
+ maxwell3d.Rasterizer().Draw(true, instance_count);
}
maxwell3d.regs.index_buffer.count = 0;
- maxwell3d.mme_draw.instance_count = 0;
- maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined;
}
void HLE_0D61FC9FAAC9FCAD(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) {
- const u32 count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]);
+ const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]);
maxwell3d.regs.vertex_buffer.first = parameters[3];
maxwell3d.regs.vertex_buffer.count = parameters[1];
maxwell3d.regs.global_base_instance_index = parameters[4];
maxwell3d.regs.draw.topology.Assign(
static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]));
- maxwell3d.mme_draw.instance_count = count;
if (maxwell3d.ShouldExecute()) {
- maxwell3d.Rasterizer().Draw(false, true);
+ maxwell3d.Rasterizer().Draw(false, instance_count);
}
maxwell3d.regs.vertex_buffer.count = 0;
- maxwell3d.mme_draw.instance_count = 0;
- maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined;
}
void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) {
@@ -63,24 +57,21 @@ void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, const std::vector<u32>&
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
maxwell3d.regs.global_base_vertex_index = element_base;
maxwell3d.regs.global_base_instance_index = base_instance;
- maxwell3d.mme_draw.instance_count = instance_count;
- maxwell3d.CallMethodFromMME(0x8e3, 0x640);
- maxwell3d.CallMethodFromMME(0x8e4, element_base);
- maxwell3d.CallMethodFromMME(0x8e5, base_instance);
+ maxwell3d.CallMethod(0x8e3, 0x640, true);
+ maxwell3d.CallMethod(0x8e4, element_base, true);
+ maxwell3d.CallMethod(0x8e5, base_instance, true);
maxwell3d.regs.draw.topology.Assign(
static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]));
if (maxwell3d.ShouldExecute()) {
- maxwell3d.Rasterizer().Draw(true, true);
+ maxwell3d.Rasterizer().Draw(true, instance_count);
}
maxwell3d.regs.vertex_id_base = 0x0;
maxwell3d.regs.index_buffer.count = 0;
maxwell3d.regs.global_base_vertex_index = 0x0;
maxwell3d.regs.global_base_instance_index = 0x0;
- maxwell3d.mme_draw.instance_count = 0;
- maxwell3d.CallMethodFromMME(0x8e3, 0x640);
- maxwell3d.CallMethodFromMME(0x8e4, 0x0);
- maxwell3d.CallMethodFromMME(0x8e5, 0x0);
- maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined;
+ maxwell3d.CallMethod(0x8e3, 0x640, true);
+ maxwell3d.CallMethod(0x8e4, 0x0, true);
+ maxwell3d.CallMethod(0x8e5, 0x0, true);
}
// Multidraw Indirect
@@ -91,11 +82,9 @@ void HLE_3F5E74B9C9A50164(Engines::Maxwell3D& maxwell3d, const std::vector<u32>&
maxwell3d.regs.index_buffer.count = 0;
maxwell3d.regs.global_base_vertex_index = 0x0;
maxwell3d.regs.global_base_instance_index = 0x0;
- maxwell3d.mme_draw.instance_count = 0;
- maxwell3d.CallMethodFromMME(0x8e3, 0x640);
- maxwell3d.CallMethodFromMME(0x8e4, 0x0);
- maxwell3d.CallMethodFromMME(0x8e5, 0x0);
- maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined;
+ maxwell3d.CallMethod(0x8e3, 0x640, true);
+ maxwell3d.CallMethod(0x8e4, 0x0, true);
+ maxwell3d.CallMethod(0x8e5, 0x0, true);
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
});
const u32 start_indirect = parameters[0];
@@ -127,15 +116,13 @@ void HLE_3F5E74B9C9A50164(Engines::Maxwell3D& maxwell3d, const std::vector<u32>&
maxwell3d.regs.index_buffer.count = num_vertices;
maxwell3d.regs.global_base_vertex_index = base_vertex;
maxwell3d.regs.global_base_instance_index = base_instance;
- maxwell3d.mme_draw.instance_count = instance_count;
- maxwell3d.CallMethodFromMME(0x8e3, 0x640);
- maxwell3d.CallMethodFromMME(0x8e4, base_vertex);
- maxwell3d.CallMethodFromMME(0x8e5, base_instance);
+ maxwell3d.CallMethod(0x8e3, 0x640, true);
+ maxwell3d.CallMethod(0x8e4, base_vertex, true);
+ maxwell3d.CallMethod(0x8e5, base_instance, true);
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
if (maxwell3d.ShouldExecute()) {
- maxwell3d.Rasterizer().Draw(true, true);
+ maxwell3d.Rasterizer().Draw(true, instance_count);
}
- maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined;
}
}
diff --git a/src/video_core/macro/macro_interpreter.cpp b/src/video_core/macro/macro_interpreter.cpp
index f670b1bca..c0d32c112 100644
--- a/src/video_core/macro/macro_interpreter.cpp
+++ b/src/video_core/macro/macro_interpreter.cpp
@@ -335,7 +335,7 @@ void MacroInterpreterImpl::SetMethodAddress(u32 address) {
}
void MacroInterpreterImpl::Send(u32 value) {
- maxwell3d.CallMethodFromMME(method_address.address, value);
+ maxwell3d.CallMethod(method_address.address, value, true);
// Increment the method address by the method increment.
method_address.address.Assign(method_address.address.Value() +
method_address.increment.Value());
diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp
index a302a9603..25c1ce798 100644
--- a/src/video_core/macro/macro_jit_x64.cpp
+++ b/src/video_core/macro/macro_jit_x64.cpp
@@ -346,7 +346,7 @@ void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) {
}
void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) {
- maxwell3d->CallMethodFromMME(method_address.address, value);
+ maxwell3d->CallMethod(method_address.address, value, true);
}
void MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) {
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index cca401c74..384350dbd 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -41,7 +41,11 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64
big_entries.resize(big_page_table_size / 32, 0);
big_page_table_cpu.resize(big_page_table_size);
big_page_continous.resize(big_page_table_size / continous_bits, 0);
+ std::array<PTEKind, 32> kind_valus;
+ kind_valus.fill(PTEKind::INVALID);
+ big_kinds.resize(big_page_table_size / 32, kind_valus);
entries.resize(page_table_size / 32, 0);
+ kinds.resize(big_page_table_size / 32, kind_valus);
}
MemoryManager::~MemoryManager() = default;
@@ -78,6 +82,41 @@ void MemoryManager::SetEntry(size_t position, MemoryManager::EntryType entry) {
}
}
+PTEKind MemoryManager::GetPageKind(GPUVAddr gpu_addr) const {
+ auto entry = GetEntry<true>(gpu_addr);
+ if (entry == EntryType::Mapped || entry == EntryType::Reserved) [[likely]] {
+ return GetKind<true>(gpu_addr);
+ } else {
+ return GetKind<false>(gpu_addr);
+ }
+}
+
+template <bool is_big_page>
+PTEKind MemoryManager::GetKind(size_t position) const {
+ if constexpr (is_big_page) {
+ position = position >> big_page_bits;
+ const size_t sub_index = position % 32;
+ return big_kinds[position / 32][sub_index];
+ } else {
+ position = position >> page_bits;
+ const size_t sub_index = position % 32;
+ return kinds[position / 32][sub_index];
+ }
+}
+
+template <bool is_big_page>
+void MemoryManager::SetKind(size_t position, PTEKind kind) {
+ if constexpr (is_big_page) {
+ position = position >> big_page_bits;
+ const size_t sub_index = position % 32;
+ big_kinds[position / 32][sub_index] = kind;
+ } else {
+ position = position >> page_bits;
+ const size_t sub_index = position % 32;
+ kinds[position / 32][sub_index] = kind;
+ }
+}
+
inline bool MemoryManager::IsBigPageContinous(size_t big_page_index) const {
const u64 entry_mask = big_page_continous[big_page_index / continous_bits];
const size_t sub_index = big_page_index % continous_bits;
@@ -92,9 +131,9 @@ inline void MemoryManager::SetBigPageContinous(size_t big_page_index, bool value
}
template <MemoryManager::EntryType entry_type>
-GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr,
- size_t size) {
- u64 remaining_size{size};
+GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size,
+ PTEKind kind) {
+ [[maybe_unused]] u64 remaining_size{size};
if constexpr (entry_type == EntryType::Mapped) {
page_table.ReserveRange(gpu_addr, size);
}
@@ -102,6 +141,7 @@ GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cp
const GPUVAddr current_gpu_addr = gpu_addr + offset;
[[maybe_unused]] const auto current_entry_type = GetEntry<false>(current_gpu_addr);
SetEntry<false>(current_gpu_addr, entry_type);
+ SetKind<false>(current_gpu_addr, kind);
if (current_entry_type != entry_type) {
rasterizer->ModifyGPUMemory(unique_identifier, gpu_addr, page_size);
}
@@ -118,12 +158,13 @@ GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cp
template <MemoryManager::EntryType entry_type>
GPUVAddr MemoryManager::BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr,
- size_t size) {
- u64 remaining_size{size};
+ size_t size, PTEKind kind) {
+ [[maybe_unused]] u64 remaining_size{size};
for (u64 offset{}; offset < size; offset += big_page_size) {
const GPUVAddr current_gpu_addr = gpu_addr + offset;
[[maybe_unused]] const auto current_entry_type = GetEntry<true>(current_gpu_addr);
SetEntry<true>(current_gpu_addr, entry_type);
+ SetKind<true>(current_gpu_addr, kind);
if (current_entry_type != entry_type) {
rasterizer->ModifyGPUMemory(unique_identifier, gpu_addr, big_page_size);
}
@@ -159,19 +200,19 @@ void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_)
rasterizer = rasterizer_;
}
-GPUVAddr MemoryManager::Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size,
+GPUVAddr MemoryManager::Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size, PTEKind kind,
bool is_big_pages) {
if (is_big_pages) [[likely]] {
- return BigPageTableOp<EntryType::Mapped>(gpu_addr, cpu_addr, size);
+ return BigPageTableOp<EntryType::Mapped>(gpu_addr, cpu_addr, size, kind);
}
- return PageTableOp<EntryType::Mapped>(gpu_addr, cpu_addr, size);
+ return PageTableOp<EntryType::Mapped>(gpu_addr, cpu_addr, size, kind);
}
GPUVAddr MemoryManager::MapSparse(GPUVAddr gpu_addr, std::size_t size, bool is_big_pages) {
if (is_big_pages) [[likely]] {
- return BigPageTableOp<EntryType::Reserved>(gpu_addr, 0, size);
+ return BigPageTableOp<EntryType::Reserved>(gpu_addr, 0, size, PTEKind::INVALID);
}
- return PageTableOp<EntryType::Reserved>(gpu_addr, 0, size);
+ return PageTableOp<EntryType::Reserved>(gpu_addr, 0, size, PTEKind::INVALID);
}
void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
@@ -188,8 +229,8 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
rasterizer->UnmapMemory(*cpu_addr, map_size);
}
- BigPageTableOp<EntryType::Free>(gpu_addr, 0, size);
- PageTableOp<EntryType::Free>(gpu_addr, 0, size);
+ BigPageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID);
+ PageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID);
}
std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const {
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index f992e29f3..ab4bc9ec6 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -11,6 +11,7 @@
#include "common/common_types.h"
#include "common/multi_level_page_table.h"
#include "common/virtual_buffer.h"
+#include "video_core/pte_kind.h"
namespace VideoCore {
class RasterizerInterface;
@@ -98,7 +99,8 @@ public:
std::vector<std::pair<GPUVAddr, std::size_t>> GetSubmappedRange(GPUVAddr gpu_addr,
std::size_t size) const;
- GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size, bool is_big_pages = true);
+ GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size,
+ PTEKind kind = PTEKind::INVALID, bool is_big_pages = true);
GPUVAddr MapSparse(GPUVAddr gpu_addr, std::size_t size, bool is_big_pages = true);
void Unmap(GPUVAddr gpu_addr, std::size_t size);
@@ -114,6 +116,8 @@ public:
return gpu_addr < address_space_size;
}
+ PTEKind GetPageKind(GPUVAddr gpu_addr) const;
+
private:
template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped>
inline void MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size, FuncMapped&& func_mapped,
@@ -166,10 +170,12 @@ private:
std::vector<u64> big_entries;
template <EntryType entry_type>
- GPUVAddr PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size);
+ GPUVAddr PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size,
+ PTEKind kind);
template <EntryType entry_type>
- GPUVAddr BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size);
+ GPUVAddr BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size,
+ PTEKind kind);
template <bool is_big_page>
inline EntryType GetEntry(size_t position) const;
@@ -177,6 +183,15 @@ private:
template <bool is_big_page>
inline void SetEntry(size_t position, EntryType entry);
+ std::vector<std::array<PTEKind, 32>> kinds;
+ std::vector<std::array<PTEKind, 32>> big_kinds;
+
+ template <bool is_big_page>
+ inline PTEKind GetKind(size_t position) const;
+
+ template <bool is_big_page>
+ inline void SetKind(size_t position, PTEKind kind);
+
Common::MultiLevelPageTable<u32> page_table;
Common::VirtualBuffer<u32> big_page_table_cpu;
diff --git a/src/video_core/pte_kind.h b/src/video_core/pte_kind.h
new file mode 100644
index 000000000..591d7214b
--- /dev/null
+++ b/src/video_core/pte_kind.h
@@ -0,0 +1,264 @@
+// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#pragma once
+
+#include "common/common_types.h"
+
+namespace Tegra {
+
+// https://github.com/NVIDIA/open-gpu-doc/blob/master/manuals/volta/gv100/dev_mmu.ref.txt
+enum class PTEKind : u8 {
+ INVALID = 0xff,
+ PITCH = 0x00,
+ Z16 = 0x01,
+ Z16_2C = 0x02,
+ Z16_MS2_2C = 0x03,
+ Z16_MS4_2C = 0x04,
+ Z16_MS8_2C = 0x05,
+ Z16_MS16_2C = 0x06,
+ Z16_2Z = 0x07,
+ Z16_MS2_2Z = 0x08,
+ Z16_MS4_2Z = 0x09,
+ Z16_MS8_2Z = 0x0a,
+ Z16_MS16_2Z = 0x0b,
+ Z16_2CZ = 0x36,
+ Z16_MS2_2CZ = 0x37,
+ Z16_MS4_2CZ = 0x38,
+ Z16_MS8_2CZ = 0x39,
+ Z16_MS16_2CZ = 0x5f,
+ Z16_4CZ = 0x0c,
+ Z16_MS2_4CZ = 0x0d,
+ Z16_MS4_4CZ = 0x0e,
+ Z16_MS8_4CZ = 0x0f,
+ Z16_MS16_4CZ = 0x10,
+ S8Z24 = 0x11,
+ S8Z24_1Z = 0x12,
+ S8Z24_MS2_1Z = 0x13,
+ S8Z24_MS4_1Z = 0x14,
+ S8Z24_MS8_1Z = 0x15,
+ S8Z24_MS16_1Z = 0x16,
+ S8Z24_2CZ = 0x17,
+ S8Z24_MS2_2CZ = 0x18,
+ S8Z24_MS4_2CZ = 0x19,
+ S8Z24_MS8_2CZ = 0x1a,
+ S8Z24_MS16_2CZ = 0x1b,
+ S8Z24_2CS = 0x1c,
+ S8Z24_MS2_2CS = 0x1d,
+ S8Z24_MS4_2CS = 0x1e,
+ S8Z24_MS8_2CS = 0x1f,
+ S8Z24_MS16_2CS = 0x20,
+ S8Z24_4CSZV = 0x21,
+ S8Z24_MS2_4CSZV = 0x22,
+ S8Z24_MS4_4CSZV = 0x23,
+ S8Z24_MS8_4CSZV = 0x24,
+ S8Z24_MS16_4CSZV = 0x25,
+ V8Z24_MS4_VC12 = 0x26,
+ V8Z24_MS4_VC4 = 0x27,
+ V8Z24_MS8_VC8 = 0x28,
+ V8Z24_MS8_VC24 = 0x29,
+ V8Z24_MS4_VC12_1ZV = 0x2e,
+ V8Z24_MS4_VC4_1ZV = 0x2f,
+ V8Z24_MS8_VC8_1ZV = 0x30,
+ V8Z24_MS8_VC24_1ZV = 0x31,
+ V8Z24_MS4_VC12_2CS = 0x32,
+ V8Z24_MS4_VC4_2CS = 0x33,
+ V8Z24_MS8_VC8_2CS = 0x34,
+ V8Z24_MS8_VC24_2CS = 0x35,
+ V8Z24_MS4_VC12_2CZV = 0x3a,
+ V8Z24_MS4_VC4_2CZV = 0x3b,
+ V8Z24_MS8_VC8_2CZV = 0x3c,
+ V8Z24_MS8_VC24_2CZV = 0x3d,
+ V8Z24_MS4_VC12_2ZV = 0x3e,
+ V8Z24_MS4_VC4_2ZV = 0x3f,
+ V8Z24_MS8_VC8_2ZV = 0x40,
+ V8Z24_MS8_VC24_2ZV = 0x41,
+ V8Z24_MS4_VC12_4CSZV = 0x42,
+ V8Z24_MS4_VC4_4CSZV = 0x43,
+ V8Z24_MS8_VC8_4CSZV = 0x44,
+ V8Z24_MS8_VC24_4CSZV = 0x45,
+ Z24S8 = 0x46,
+ Z24S8_1Z = 0x47,
+ Z24S8_MS2_1Z = 0x48,
+ Z24S8_MS4_1Z = 0x49,
+ Z24S8_MS8_1Z = 0x4a,
+ Z24S8_MS16_1Z = 0x4b,
+ Z24S8_2CS = 0x4c,
+ Z24S8_MS2_2CS = 0x4d,
+ Z24S8_MS4_2CS = 0x4e,
+ Z24S8_MS8_2CS = 0x4f,
+ Z24S8_MS16_2CS = 0x50,
+ Z24S8_2CZ = 0x51,
+ Z24S8_MS2_2CZ = 0x52,
+ Z24S8_MS4_2CZ = 0x53,
+ Z24S8_MS8_2CZ = 0x54,
+ Z24S8_MS16_2CZ = 0x55,
+ Z24S8_4CSZV = 0x56,
+ Z24S8_MS2_4CSZV = 0x57,
+ Z24S8_MS4_4CSZV = 0x58,
+ Z24S8_MS8_4CSZV = 0x59,
+ Z24S8_MS16_4CSZV = 0x5a,
+ Z24V8_MS4_VC12 = 0x5b,
+ Z24V8_MS4_VC4 = 0x5c,
+ Z24V8_MS8_VC8 = 0x5d,
+ Z24V8_MS8_VC24 = 0x5e,
+ YUV_B8C1_2Y = 0x60,
+ YUV_B8C2_2Y = 0x61,
+ YUV_B10C1_2Y = 0x62,
+ YUV_B10C2_2Y = 0x6b,
+ YUV_B12C1_2Y = 0x6c,
+ YUV_B12C2_2Y = 0x6d,
+ Z24V8_MS4_VC12_1ZV = 0x63,
+ Z24V8_MS4_VC4_1ZV = 0x64,
+ Z24V8_MS8_VC8_1ZV = 0x65,
+ Z24V8_MS8_VC24_1ZV = 0x66,
+ Z24V8_MS4_VC12_2CS = 0x67,
+ Z24V8_MS4_VC4_2CS = 0x68,
+ Z24V8_MS8_VC8_2CS = 0x69,
+ Z24V8_MS8_VC24_2CS = 0x6a,
+ Z24V8_MS4_VC12_2CZV = 0x6f,
+ Z24V8_MS4_VC4_2CZV = 0x70,
+ Z24V8_MS8_VC8_2CZV = 0x71,
+ Z24V8_MS8_VC24_2CZV = 0x72,
+ Z24V8_MS4_VC12_2ZV = 0x73,
+ Z24V8_MS4_VC4_2ZV = 0x74,
+ Z24V8_MS8_VC8_2ZV = 0x75,
+ Z24V8_MS8_VC24_2ZV = 0x76,
+ Z24V8_MS4_VC12_4CSZV = 0x77,
+ Z24V8_MS4_VC4_4CSZV = 0x78,
+ Z24V8_MS8_VC8_4CSZV = 0x79,
+ Z24V8_MS8_VC24_4CSZV = 0x7a,
+ ZF32 = 0x7b,
+ ZF32_1Z = 0x7c,
+ ZF32_MS2_1Z = 0x7d,
+ ZF32_MS4_1Z = 0x7e,
+ ZF32_MS8_1Z = 0x7f,
+ ZF32_MS16_1Z = 0x80,
+ ZF32_2CS = 0x81,
+ ZF32_MS2_2CS = 0x82,
+ ZF32_MS4_2CS = 0x83,
+ ZF32_MS8_2CS = 0x84,
+ ZF32_MS16_2CS = 0x85,
+ ZF32_2CZ = 0x86,
+ ZF32_MS2_2CZ = 0x87,
+ ZF32_MS4_2CZ = 0x88,
+ ZF32_MS8_2CZ = 0x89,
+ ZF32_MS16_2CZ = 0x8a,
+ X8Z24_X16V8S8_MS4_VC12 = 0x8b,
+ X8Z24_X16V8S8_MS4_VC4 = 0x8c,
+ X8Z24_X16V8S8_MS8_VC8 = 0x8d,
+ X8Z24_X16V8S8_MS8_VC24 = 0x8e,
+ X8Z24_X16V8S8_MS4_VC12_1CS = 0x8f,
+ X8Z24_X16V8S8_MS4_VC4_1CS = 0x90,
+ X8Z24_X16V8S8_MS8_VC8_1CS = 0x91,
+ X8Z24_X16V8S8_MS8_VC24_1CS = 0x92,
+ X8Z24_X16V8S8_MS4_VC12_1ZV = 0x97,
+ X8Z24_X16V8S8_MS4_VC4_1ZV = 0x98,
+ X8Z24_X16V8S8_MS8_VC8_1ZV = 0x99,
+ X8Z24_X16V8S8_MS8_VC24_1ZV = 0x9a,
+ X8Z24_X16V8S8_MS4_VC12_1CZV = 0x9b,
+ X8Z24_X16V8S8_MS4_VC4_1CZV = 0x9c,
+ X8Z24_X16V8S8_MS8_VC8_1CZV = 0x9d,
+ X8Z24_X16V8S8_MS8_VC24_1CZV = 0x9e,
+ X8Z24_X16V8S8_MS4_VC12_2CS = 0x9f,
+ X8Z24_X16V8S8_MS4_VC4_2CS = 0xa0,
+ X8Z24_X16V8S8_MS8_VC8_2CS = 0xa1,
+ X8Z24_X16V8S8_MS8_VC24_2CS = 0xa2,
+ X8Z24_X16V8S8_MS4_VC12_2CSZV = 0xa3,
+ X8Z24_X16V8S8_MS4_VC4_2CSZV = 0xa4,
+ X8Z24_X16V8S8_MS8_VC8_2CSZV = 0xa5,
+ X8Z24_X16V8S8_MS8_VC24_2CSZV = 0xa6,
+ ZF32_X16V8S8_MS4_VC12 = 0xa7,
+ ZF32_X16V8S8_MS4_VC4 = 0xa8,
+ ZF32_X16V8S8_MS8_VC8 = 0xa9,
+ ZF32_X16V8S8_MS8_VC24 = 0xaa,
+ ZF32_X16V8S8_MS4_VC12_1CS = 0xab,
+ ZF32_X16V8S8_MS4_VC4_1CS = 0xac,
+ ZF32_X16V8S8_MS8_VC8_1CS = 0xad,
+ ZF32_X16V8S8_MS8_VC24_1CS = 0xae,
+ ZF32_X16V8S8_MS4_VC12_1ZV = 0xb3,
+ ZF32_X16V8S8_MS4_VC4_1ZV = 0xb4,
+ ZF32_X16V8S8_MS8_VC8_1ZV = 0xb5,
+ ZF32_X16V8S8_MS8_VC24_1ZV = 0xb6,
+ ZF32_X16V8S8_MS4_VC12_1CZV = 0xb7,
+ ZF32_X16V8S8_MS4_VC4_1CZV = 0xb8,
+ ZF32_X16V8S8_MS8_VC8_1CZV = 0xb9,
+ ZF32_X16V8S8_MS8_VC24_1CZV = 0xba,
+ ZF32_X16V8S8_MS4_VC12_2CS = 0xbb,
+ ZF32_X16V8S8_MS4_VC4_2CS = 0xbc,
+ ZF32_X16V8S8_MS8_VC8_2CS = 0xbd,
+ ZF32_X16V8S8_MS8_VC24_2CS = 0xbe,
+ ZF32_X16V8S8_MS4_VC12_2CSZV = 0xbf,
+ ZF32_X16V8S8_MS4_VC4_2CSZV = 0xc0,
+ ZF32_X16V8S8_MS8_VC8_2CSZV = 0xc1,
+ ZF32_X16V8S8_MS8_VC24_2CSZV = 0xc2,
+ ZF32_X24S8 = 0xc3,
+ ZF32_X24S8_1CS = 0xc4,
+ ZF32_X24S8_MS2_1CS = 0xc5,
+ ZF32_X24S8_MS4_1CS = 0xc6,
+ ZF32_X24S8_MS8_1CS = 0xc7,
+ ZF32_X24S8_MS16_1CS = 0xc8,
+ ZF32_X24S8_2CSZV = 0xce,
+ ZF32_X24S8_MS2_2CSZV = 0xcf,
+ ZF32_X24S8_MS4_2CSZV = 0xd0,
+ ZF32_X24S8_MS8_2CSZV = 0xd1,
+ ZF32_X24S8_MS16_2CSZV = 0xd2,
+ ZF32_X24S8_2CS = 0xd3,
+ ZF32_X24S8_MS2_2CS = 0xd4,
+ ZF32_X24S8_MS4_2CS = 0xd5,
+ ZF32_X24S8_MS8_2CS = 0xd6,
+ ZF32_X24S8_MS16_2CS = 0xd7,
+ S8 = 0x2a,
+ S8_2S = 0x2b,
+ GENERIC_16BX2 = 0xfe,
+ C32_2C = 0xd8,
+ C32_2CBR = 0xd9,
+ C32_2CBA = 0xda,
+ C32_2CRA = 0xdb,
+ C32_2BRA = 0xdc,
+ C32_MS2_2C = 0xdd,
+ C32_MS2_2CBR = 0xde,
+ C32_MS2_4CBRA = 0xcc,
+ C32_MS4_2C = 0xdf,
+ C32_MS4_2CBR = 0xe0,
+ C32_MS4_2CBA = 0xe1,
+ C32_MS4_2CRA = 0xe2,
+ C32_MS4_2BRA = 0xe3,
+ C32_MS4_4CBRA = 0x2c,
+ C32_MS8_MS16_2C = 0xe4,
+ C32_MS8_MS16_2CRA = 0xe5,
+ C64_2C = 0xe6,
+ C64_2CBR = 0xe7,
+ C64_2CBA = 0xe8,
+ C64_2CRA = 0xe9,
+ C64_2BRA = 0xea,
+ C64_MS2_2C = 0xeb,
+ C64_MS2_2CBR = 0xec,
+ C64_MS2_4CBRA = 0xcd,
+ C64_MS4_2C = 0xed,
+ C64_MS4_2CBR = 0xee,
+ C64_MS4_2CBA = 0xef,
+ C64_MS4_2CRA = 0xf0,
+ C64_MS4_2BRA = 0xf1,
+ C64_MS4_4CBRA = 0x2d,
+ C64_MS8_MS16_2C = 0xf2,
+ C64_MS8_MS16_2CRA = 0xf3,
+ C128_2C = 0xf4,
+ C128_2CR = 0xf5,
+ C128_MS2_2C = 0xf6,
+ C128_MS2_2CR = 0xf7,
+ C128_MS4_2C = 0xf8,
+ C128_MS4_2CR = 0xf9,
+ C128_MS8_MS16_2C = 0xfa,
+ C128_MS8_MS16_2CR = 0xfb,
+ X8C24 = 0xfc,
+ PITCH_NO_SWIZZLE = 0xfd,
+ SMSKED_MESSAGE = 0xca,
+ SMHOST_MESSAGE = 0xcb,
+};
+
+constexpr bool IsPitchKind(PTEKind kind) {
+ return kind == PTEKind::PITCH || kind == PTEKind::PITCH_NO_SWIZZLE;
+}
+
+} // namespace Tegra
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index d2d40884c..1cbfef090 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -40,7 +40,7 @@ public:
virtual ~RasterizerInterface() = default;
/// Dispatches a draw invocation
- virtual void Draw(bool is_indexed, bool is_instanced) = 0;
+ virtual void Draw(bool is_indexed, u32 instance_count) = 0;
/// Clear the current framebuffer
virtual void Clear() = 0;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index e5c09a969..1590b21de 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -205,7 +205,7 @@ void RasterizerOpenGL::Clear() {
++num_queued_commands;
}
-void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
+void RasterizerOpenGL::Draw(bool is_indexed, u32 instance_count) {
MICROPROFILE_SCOPE(OpenGL_Drawing);
SCOPE_EXIT({ gpu.TickWork(); });
@@ -222,14 +222,15 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
pipeline->SetEngine(maxwell3d, gpu_memory);
pipeline->Configure(is_indexed);
+ BindInlineIndexBuffer();
+
SyncState();
const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d->regs.draw.topology);
BeginTransformFeedback(pipeline, primitive_mode);
const GLuint base_instance = static_cast<GLuint>(maxwell3d->regs.global_base_instance_index);
- const GLsizei num_instances =
- static_cast<GLsizei>(is_instanced ? maxwell3d->mme_draw.instance_count : 1);
+ const GLsizei num_instances = static_cast<GLsizei>(instance_count);
if (is_indexed) {
const GLint base_vertex = static_cast<GLint>(maxwell3d->regs.global_base_vertex_index);
const GLsizei num_vertices = static_cast<GLsizei>(maxwell3d->regs.index_buffer.count);
@@ -1129,6 +1130,16 @@ void RasterizerOpenGL::ReleaseChannel(s32 channel_id) {
query_cache.EraseChannel(channel_id);
}
+void RasterizerOpenGL::BindInlineIndexBuffer() {
+ if (maxwell3d->inline_index_draw_indexes.empty()) {
+ return;
+ }
+ const auto data_count = static_cast<u32>(maxwell3d->inline_index_draw_indexes.size());
+ auto buffer = Buffer(buffer_cache_runtime, *this, 0, data_count);
+ buffer.ImmediateUpload(0, maxwell3d->inline_index_draw_indexes);
+ buffer_cache_runtime.BindIndexBuffer(buffer, 0, data_count);
+}
+
AccelerateDMA::AccelerateDMA(BufferCache& buffer_cache_) : buffer_cache{buffer_cache_} {}
bool AccelerateDMA::BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 45131b785..793e0d608 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -68,7 +68,7 @@ public:
StateTracker& state_tracker_);
~RasterizerOpenGL() override;
- void Draw(bool is_indexed, bool is_instanced) override;
+ void Draw(bool is_indexed, u32 instance_count) override;
void Clear() override;
void DispatchCompute() override;
void ResetCounter(VideoCore::QueryType type) override;
@@ -199,6 +199,8 @@ private:
/// End a transform feedback
void EndTransformFeedback();
+ void BindInlineIndexBuffer();
+
Tegra::GPU& gpu;
const Device& device;
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 609f0a772..e94cfdb1a 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -63,6 +63,7 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key,
Shader::RuntimeInfo info;
if (previous_program) {
info.previous_stage_stores = previous_program->info.stores;
+ info.previous_stage_legacy_stores_mapping = previous_program->info.legacy_stores_mapping;
} else {
// Mark all stores as available for vertex shaders
info.previous_stage_stores.mask.set();
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 20f1d6584..13d5a1f67 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -134,6 +134,7 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> program
Shader::RuntimeInfo info;
if (previous_program) {
info.previous_stage_stores = previous_program->info.stores;
+ info.previous_stage_legacy_stores_mapping = previous_program->info.legacy_stores_mapping;
if (previous_program->is_geometry_passthrough) {
info.previous_stage_stores.mask |= previous_program->info.passthrough.mask;
}
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp
index 7cb02631c..4b15c0f85 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp
@@ -59,10 +59,11 @@ void QueryPool::Reserve(std::pair<VkQueryPool, u32> query) {
std::find_if(pools.begin(), pools.end(), [query_pool = query.first](vk::QueryPool& pool) {
return query_pool == *pool;
});
- ASSERT(it != std::end(pools));
- const std::ptrdiff_t pool_index = std::distance(std::begin(pools), it);
- usage[pool_index * GROW_STEP + static_cast<std::ptrdiff_t>(query.second)] = false;
+ if (it != std::end(pools)) {
+ const std::ptrdiff_t pool_index = std::distance(std::begin(pools), it);
+ usage[pool_index * GROW_STEP + static_cast<std::ptrdiff_t>(query.second)] = false;
+ }
}
QueryCache::QueryCache(VideoCore::RasterizerInterface& rasterizer_, const Device& device_,
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 47dfb45a1..6ab68892c 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -127,11 +127,10 @@ VkRect2D GetScissorState(const Maxwell& regs, size_t index, u32 up_scale = 1, u3
return scissor;
}
-DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced,
- bool is_indexed) {
+DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_indexed) {
DrawParams params{
.base_instance = regs.global_base_instance_index,
- .num_instances = is_instanced ? num_instances : 1,
+ .num_instances = num_instances,
.base_vertex = is_indexed ? regs.global_base_vertex_index : regs.vertex_buffer.first,
.num_vertices = is_indexed ? regs.index_buffer.count : regs.vertex_buffer.count,
.first_index = is_indexed ? regs.index_buffer.first : 0,
@@ -177,7 +176,7 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
RasterizerVulkan::~RasterizerVulkan() = default;
-void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
+void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) {
MICROPROFILE_SCOPE(Vulkan_Drawing);
SCOPE_EXIT({ gpu.TickWork(); });
@@ -194,13 +193,15 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
pipeline->SetEngine(maxwell3d, gpu_memory);
pipeline->Configure(is_indexed);
+ BindInlineIndexBuffer();
+
BeginTransformFeedback();
UpdateDynamicStates();
const auto& regs{maxwell3d->regs};
- const u32 num_instances{maxwell3d->mme_draw.instance_count};
- const DrawParams draw_params{MakeDrawParams(regs, num_instances, is_instanced, is_indexed)};
+ const u32 num_instances{instance_count};
+ const DrawParams draw_params{MakeDrawParams(regs, num_instances, is_indexed)};
scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) {
if (draw_params.is_indexed) {
cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances,
@@ -304,14 +305,19 @@ void RasterizerVulkan::Clear() {
}
}
- scheduler.Record([color_attachment, clear_value, clear_rect](vk::CommandBuffer cmdbuf) {
- const VkClearAttachment attachment{
- .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .colorAttachment = color_attachment,
- .clearValue = clear_value,
- };
- cmdbuf.ClearAttachments(attachment, clear_rect);
- });
+ if (regs.clear_surface.R && regs.clear_surface.G && regs.clear_surface.B &&
+ regs.clear_surface.A) {
+ scheduler.Record([color_attachment, clear_value, clear_rect](vk::CommandBuffer cmdbuf) {
+ const VkClearAttachment attachment{
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .colorAttachment = color_attachment,
+ .clearValue = clear_value,
+ };
+ cmdbuf.ClearAttachments(attachment, clear_rect);
+ });
+ } else {
+ UNIMPLEMENTED_MSG("Unimplemented Clear only the specified channel");
+ }
}
if (!use_depth && !use_stencil) {
@@ -1009,4 +1015,17 @@ void RasterizerVulkan::ReleaseChannel(s32 channel_id) {
query_cache.EraseChannel(channel_id);
}
+void RasterizerVulkan::BindInlineIndexBuffer() {
+ if (maxwell3d->inline_index_draw_indexes.empty()) {
+ return;
+ }
+ const auto data_count = static_cast<u32>(maxwell3d->inline_index_draw_indexes.size());
+ auto buffer = buffer_cache_runtime.UploadStagingBuffer(data_count);
+ std::memcpy(buffer.mapped_span.data(), maxwell3d->inline_index_draw_indexes.data(), data_count);
+ buffer_cache_runtime.BindIndexBuffer(
+ maxwell3d->regs.draw.topology, maxwell3d->regs.index_buffer.format,
+ maxwell3d->regs.index_buffer.first, maxwell3d->regs.index_buffer.count, buffer.buffer,
+ static_cast<u32>(buffer.offset), data_count);
+}
+
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 4cde3c983..e2fdc7611 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -64,7 +64,7 @@ public:
StateTracker& state_tracker_, Scheduler& scheduler_);
~RasterizerVulkan() override;
- void Draw(bool is_indexed, bool is_instanced) override;
+ void Draw(bool is_indexed, u32 instance_count) override;
void Clear() override;
void DispatchCompute() override;
void ResetCounter(VideoCore::QueryType type) override;
@@ -141,6 +141,8 @@ private:
void UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs);
+ void BindInlineIndexBuffer();
+
Tegra::GPU& gpu;
ScreenInfo& screen_info;
diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp
index ad935d386..08aa8ca33 100644
--- a/src/video_core/texture_cache/format_lookup_table.cpp
+++ b/src/video_core/texture_cache/format_lookup_table.cpp
@@ -150,6 +150,8 @@ PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red,
return PixelFormat::D24_UNORM_S8_UINT;
case Hash(TextureFormat::D32S8, FLOAT, UINT, UNORM, UNORM, LINEAR):
return PixelFormat::D32_FLOAT_S8_UINT;
+ case Hash(TextureFormat::R32_B24G8, FLOAT, UINT, UNORM, UNORM, LINEAR):
+ return PixelFormat::D32_FLOAT_S8_UINT;
case Hash(TextureFormat::BC1_RGBA, UNORM, LINEAR):
return PixelFormat::BC1_RGBA_UNORM;
case Hash(TextureFormat::BC1_RGBA, UNORM, SRGB):
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 0e0fd410f..8ef75fe73 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -442,7 +442,7 @@ void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) {
template <class P>
void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) {
std::vector<ImageId> images;
- ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) {
+ ForEachImageInRegion(cpu_addr, size, [&images](ImageId image_id, ImageBase& image) {
if (!image.IsSafeDownload()) {
return;
}
@@ -1502,9 +1502,9 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {
image.flags &= ~ImageFlagBits::BadOverlap;
lru_cache.Free(image.lru_index);
const auto& clear_page_table =
- [this, image_id](u64 page,
- std::unordered_map<u64, std::vector<ImageId>, Common::IdentityHash<u64>>&
- selected_page_table) {
+ [image_id](u64 page,
+ std::unordered_map<u64, std::vector<ImageId>, Common::IdentityHash<u64>>&
+ selected_page_table) {
const auto page_it = selected_page_table.find(page);
if (page_it == selected_page_table.end()) {
ASSERT_MSG(false, "Unregistering unregistered page=0x{:x}", page << YUZU_PAGEBITS);
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
index 15b9d4182..69a32819a 100644
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -1661,8 +1661,8 @@ void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height,
for (u32 z = 0; z < depth; ++z) {
const u32 depth_offset = z * height * width * 4;
for (u32 y_index = 0; y_index < rows; ++y_index) {
- auto decompress_stride = [data, width, height, depth, block_width, block_height, output,
- rows, cols, z, depth_offset, y_index] {
+ auto decompress_stride = [data, width, height, block_width, block_height, output, rows,
+ cols, z, depth_offset, y_index] {
const u32 y = y_index * block_height;
for (u32 x_index = 0; x_index < cols; ++x_index) {
const u32 block_index = (z * rows * cols) + (y_index * cols) + x_index;
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 52d067a2d..fd1a4b987 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -21,7 +21,7 @@ constexpr u32 pdep(u32 value) {
u32 m = mask;
for (u32 bit = 1; m; bit += bit) {
if (value & bit)
- result |= m & -m;
+ result |= m & (~m + 1);
m &= m - 1;
}
return result;