summaryrefslogtreecommitdiffstats
path: root/src/video_core/engines/maxwell_3d.cpp
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/video_core/engines/maxwell_3d.cpp464
1 files changed, 382 insertions, 82 deletions
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 39968d403..b318aedb8 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -22,6 +22,7 @@ Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& raste
MemoryManager& memory_manager)
: system{system}, rasterizer{rasterizer}, memory_manager{memory_manager},
macro_interpreter{*this}, upload_state{memory_manager, regs.upload} {
+ InitDirtySettings();
InitializeRegisterDefaults();
}
@@ -69,6 +70,10 @@ void Maxwell3D::InitializeRegisterDefaults() {
regs.stencil_back_func_mask = 0xFFFFFFFF;
regs.stencil_back_mask = 0xFFFFFFFF;
+ regs.depth_test_func = Regs::ComparisonOp::Always;
+ regs.cull.front_face = Regs::Cull::FrontFace::CounterClockWise;
+ regs.cull.cull_face = Regs::Cull::CullFace::Back;
+
// TODO(Rodrigo): Most games do not set a point size. I think this is a case of a
// register carrying a default value. Assume it's OpenGL's default (1).
regs.point_size = 1.0f;
@@ -84,23 +89,180 @@ void Maxwell3D::InitializeRegisterDefaults() {
// Commercial games seem to assume this value is enabled and nouveau sets this value manually.
regs.rt_separate_frag_data = 1;
+
+ // Some games (like Super Mario Odyssey) assume that SRGB is enabled.
+ regs.framebuffer_srgb = 1;
+ mme_inline[MAXWELL3D_REG_INDEX(draw.vertex_end_gl)] = true;
+ mme_inline[MAXWELL3D_REG_INDEX(draw.vertex_begin_gl)] = true;
+ mme_inline[MAXWELL3D_REG_INDEX(vertex_buffer.count)] = true;
+ mme_inline[MAXWELL3D_REG_INDEX(index_array.count)] = true;
}
-void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) {
+#define DIRTY_REGS_POS(field_name) (offsetof(Maxwell3D::DirtyRegs, field_name))
+
+void Maxwell3D::InitDirtySettings() {
+ const auto set_block = [this](const u32 start, const u32 range, const u8 position) {
+ const auto start_itr = dirty_pointers.begin() + start;
+ const auto end_itr = start_itr + range;
+ std::fill(start_itr, end_itr, position);
+ };
+ dirty.regs.fill(true);
+
+ // Init Render Targets
+ constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32);
+ constexpr u32 rt_start_reg = MAXWELL3D_REG_INDEX(rt);
+ constexpr u32 rt_end_reg = rt_start_reg + registers_per_rt * 8;
+ u32 rt_dirty_reg = DIRTY_REGS_POS(render_target);
+ for (u32 rt_reg = rt_start_reg; rt_reg < rt_end_reg; rt_reg += registers_per_rt) {
+ set_block(rt_reg, registers_per_rt, rt_dirty_reg);
+ rt_dirty_reg++;
+ }
+ constexpr u32 depth_buffer_flag = DIRTY_REGS_POS(depth_buffer);
+ dirty_pointers[MAXWELL3D_REG_INDEX(zeta_enable)] = depth_buffer_flag;
+ dirty_pointers[MAXWELL3D_REG_INDEX(zeta_width)] = depth_buffer_flag;
+ dirty_pointers[MAXWELL3D_REG_INDEX(zeta_height)] = depth_buffer_flag;
+ constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32);
+ constexpr u32 zeta_reg = MAXWELL3D_REG_INDEX(zeta);
+ set_block(zeta_reg, registers_in_zeta, depth_buffer_flag);
+
+ // Init Vertex Arrays
+ constexpr u32 vertex_array_start = MAXWELL3D_REG_INDEX(vertex_array);
+ constexpr u32 vertex_array_size = sizeof(regs.vertex_array[0]) / sizeof(u32);
+ constexpr u32 vertex_array_end = vertex_array_start + vertex_array_size * Regs::NumVertexArrays;
+ u32 va_reg = DIRTY_REGS_POS(vertex_array);
+ u32 vi_reg = DIRTY_REGS_POS(vertex_instance);
+ for (u32 vertex_reg = vertex_array_start; vertex_reg < vertex_array_end;
+ vertex_reg += vertex_array_size) {
+ set_block(vertex_reg, 3, va_reg);
+ // The divisor concerns vertex array instances
+ dirty_pointers[vertex_reg + 3] = vi_reg;
+ va_reg++;
+ vi_reg++;
+ }
+ constexpr u32 vertex_limit_start = MAXWELL3D_REG_INDEX(vertex_array_limit);
+ constexpr u32 vertex_limit_size = sizeof(regs.vertex_array_limit[0]) / sizeof(u32);
+ constexpr u32 vertex_limit_end = vertex_limit_start + vertex_limit_size * Regs::NumVertexArrays;
+ va_reg = DIRTY_REGS_POS(vertex_array);
+ for (u32 vertex_reg = vertex_limit_start; vertex_reg < vertex_limit_end;
+ vertex_reg += vertex_limit_size) {
+ set_block(vertex_reg, vertex_limit_size, va_reg);
+ va_reg++;
+ }
+ constexpr u32 vertex_instance_start = MAXWELL3D_REG_INDEX(instanced_arrays);
+ constexpr u32 vertex_instance_size =
+ sizeof(regs.instanced_arrays.is_instanced[0]) / sizeof(u32);
+ constexpr u32 vertex_instance_end =
+ vertex_instance_start + vertex_instance_size * Regs::NumVertexArrays;
+ vi_reg = DIRTY_REGS_POS(vertex_instance);
+ for (u32 vertex_reg = vertex_instance_start; vertex_reg < vertex_instance_end;
+ vertex_reg += vertex_instance_size) {
+ set_block(vertex_reg, vertex_instance_size, vi_reg);
+ vi_reg++;
+ }
+ set_block(MAXWELL3D_REG_INDEX(vertex_attrib_format), regs.vertex_attrib_format.size(),
+ DIRTY_REGS_POS(vertex_attrib_format));
+
+ // Init Shaders
+ constexpr u32 shader_registers_count =
+ sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32);
+ set_block(MAXWELL3D_REG_INDEX(shader_config[0]), shader_registers_count,
+ DIRTY_REGS_POS(shaders));
+
+ // State
+
+ // Viewport
+ constexpr u32 viewport_dirty_reg = DIRTY_REGS_POS(viewport);
+ constexpr u32 viewport_start = MAXWELL3D_REG_INDEX(viewports);
+ constexpr u32 viewport_size = sizeof(regs.viewports) / sizeof(u32);
+ set_block(viewport_start, viewport_size, viewport_dirty_reg);
+ constexpr u32 view_volume_start = MAXWELL3D_REG_INDEX(view_volume_clip_control);
+ constexpr u32 view_volume_size = sizeof(regs.view_volume_clip_control) / sizeof(u32);
+ set_block(view_volume_start, view_volume_size, viewport_dirty_reg);
+
+ // Viewport transformation
+ constexpr u32 viewport_trans_start = MAXWELL3D_REG_INDEX(viewport_transform);
+ constexpr u32 viewport_trans_size = sizeof(regs.viewport_transform) / sizeof(u32);
+ set_block(viewport_trans_start, viewport_trans_size, DIRTY_REGS_POS(viewport_transform));
+
+ // Cullmode
+ constexpr u32 cull_mode_start = MAXWELL3D_REG_INDEX(cull);
+ constexpr u32 cull_mode_size = sizeof(regs.cull) / sizeof(u32);
+ set_block(cull_mode_start, cull_mode_size, DIRTY_REGS_POS(cull_mode));
+
+ // Screen y control
+ dirty_pointers[MAXWELL3D_REG_INDEX(screen_y_control)] = DIRTY_REGS_POS(screen_y_control);
+
+ // Primitive Restart
+ constexpr u32 primitive_restart_start = MAXWELL3D_REG_INDEX(primitive_restart);
+ constexpr u32 primitive_restart_size = sizeof(regs.primitive_restart) / sizeof(u32);
+ set_block(primitive_restart_start, primitive_restart_size, DIRTY_REGS_POS(primitive_restart));
+
+ // Depth Test
+ constexpr u32 depth_test_dirty_reg = DIRTY_REGS_POS(depth_test);
+ dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_enable)] = depth_test_dirty_reg;
+ dirty_pointers[MAXWELL3D_REG_INDEX(depth_write_enabled)] = depth_test_dirty_reg;
+ dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_func)] = depth_test_dirty_reg;
+
+ // Stencil Test
+ constexpr u32 stencil_test_dirty_reg = DIRTY_REGS_POS(stencil_test);
+ dirty_pointers[MAXWELL3D_REG_INDEX(stencil_enable)] = stencil_test_dirty_reg;
+ dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_func)] = stencil_test_dirty_reg;
+ dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_ref)] = stencil_test_dirty_reg;
+ dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_mask)] = stencil_test_dirty_reg;
+ dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_fail)] = stencil_test_dirty_reg;
+ dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_zfail)] = stencil_test_dirty_reg;
+ dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_zpass)] = stencil_test_dirty_reg;
+ dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_mask)] = stencil_test_dirty_reg;
+ dirty_pointers[MAXWELL3D_REG_INDEX(stencil_two_side_enable)] = stencil_test_dirty_reg;
+ dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_func)] = stencil_test_dirty_reg;
+ dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_ref)] = stencil_test_dirty_reg;
+ dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_mask)] = stencil_test_dirty_reg;
+ dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_fail)] = stencil_test_dirty_reg;
+ dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_zfail)] = stencil_test_dirty_reg;
+ dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_zpass)] = stencil_test_dirty_reg;
+ dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_mask)] = stencil_test_dirty_reg;
+
+ // Color Mask
+ constexpr u32 color_mask_dirty_reg = DIRTY_REGS_POS(color_mask);
+ dirty_pointers[MAXWELL3D_REG_INDEX(color_mask_common)] = color_mask_dirty_reg;
+ set_block(MAXWELL3D_REG_INDEX(color_mask), sizeof(regs.color_mask) / sizeof(u32),
+ color_mask_dirty_reg);
+ // Blend State
+ constexpr u32 blend_state_dirty_reg = DIRTY_REGS_POS(blend_state);
+ set_block(MAXWELL3D_REG_INDEX(blend_color), sizeof(regs.blend_color) / sizeof(u32),
+ blend_state_dirty_reg);
+ dirty_pointers[MAXWELL3D_REG_INDEX(independent_blend_enable)] = blend_state_dirty_reg;
+ set_block(MAXWELL3D_REG_INDEX(blend), sizeof(regs.blend) / sizeof(u32), blend_state_dirty_reg);
+ set_block(MAXWELL3D_REG_INDEX(independent_blend), sizeof(regs.independent_blend) / sizeof(u32),
+ blend_state_dirty_reg);
+
+ // Scissor State
+ constexpr u32 scissor_test_dirty_reg = DIRTY_REGS_POS(scissor_test);
+ set_block(MAXWELL3D_REG_INDEX(scissor_test), sizeof(regs.scissor_test) / sizeof(u32),
+ scissor_test_dirty_reg);
+
+ // Polygon Offset
+ constexpr u32 polygon_offset_dirty_reg = DIRTY_REGS_POS(polygon_offset);
+ dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_fill_enable)] = polygon_offset_dirty_reg;
+ dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_line_enable)] = polygon_offset_dirty_reg;
+ dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_point_enable)] = polygon_offset_dirty_reg;
+ dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_units)] = polygon_offset_dirty_reg;
+ dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_factor)] = polygon_offset_dirty_reg;
+ dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_clamp)] = polygon_offset_dirty_reg;
+}
+
+void Maxwell3D::CallMacroMethod(u32 method, std::size_t num_parameters, const u32* parameters) {
// Reset the current macro.
executing_macro = 0;
// Lookup the macro offset
- const u32 entry{(method - MacroRegistersStart) >> 1};
- const auto& search{macro_offsets.find(entry)};
- if (search == macro_offsets.end()) {
- LOG_CRITICAL(HW_GPU, "macro not found for method 0x{:X}!", method);
- UNREACHABLE();
- return;
- }
+ const u32 entry = ((method - MacroRegistersStart) >> 1) % macro_positions.size();
// Execute the current macro.
- macro_interpreter.Execute(search->second, std::move(parameters));
+ macro_interpreter.Execute(macro_positions[entry], num_parameters, parameters);
+ if (mme_draw.current_mode != MMEDrawMode::Undefined) {
+ FlushMMEInlineDraw();
+ }
}
void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
@@ -108,6 +270,14 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
const u32 method = method_call.method;
+ if (method == cb_data_state.current) {
+ regs.reg_array[method] = method_call.argument;
+ ProcessCBData(method_call.argument);
+ return;
+ } else if (cb_data_state.current != null_cb_data) {
+ FinishCBData();
+ }
+
// It is an error to write to a register other than the current macro's ARG register before it
// has finished execution.
if (executing_macro != 0) {
@@ -129,7 +299,8 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
// Call the macro when there are no more parameters in the command buffer
if (method_call.IsLastCall()) {
- CallMacroMethod(executing_macro, std::move(macro_params));
+ CallMacroMethod(executing_macro, macro_params.size(), macro_params.data());
+ macro_params.clear();
}
return;
}
@@ -143,49 +314,19 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
if (regs.reg_array[method] != method_call.argument) {
regs.reg_array[method] = method_call.argument;
- // Color buffers
- constexpr u32 first_rt_reg = MAXWELL3D_REG_INDEX(rt);
- constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32);
- if (method >= first_rt_reg &&
- method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) {
- const std::size_t rt_index = (method - first_rt_reg) / registers_per_rt;
- dirty_flags.color_buffer.set(rt_index);
- }
-
- // Zeta buffer
- constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32);
- if (method == MAXWELL3D_REG_INDEX(zeta_enable) ||
- method == MAXWELL3D_REG_INDEX(zeta_width) ||
- method == MAXWELL3D_REG_INDEX(zeta_height) ||
- (method >= MAXWELL3D_REG_INDEX(zeta) &&
- method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) {
- dirty_flags.zeta_buffer = true;
- }
-
- // Shader
- constexpr u32 shader_registers_count =
- sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32);
- if (method >= MAXWELL3D_REG_INDEX(shader_config[0]) &&
- method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) {
- dirty_flags.shaders = true;
- }
-
- // Vertex format
- if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) &&
- method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {
- dirty_flags.vertex_attrib_format = true;
- }
-
- // Vertex buffer
- if (method >= MAXWELL3D_REG_INDEX(vertex_array) &&
- method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * Regs::NumVertexArrays) {
- dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2);
- } else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) &&
- method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * Regs::NumVertexArrays) {
- dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1);
- } else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) &&
- method < MAXWELL3D_REG_INDEX(instanced_arrays) + Regs::NumVertexArrays) {
- dirty_flags.vertex_array.set(method - MAXWELL3D_REG_INDEX(instanced_arrays));
+ const std::size_t dirty_reg = dirty_pointers[method];
+ if (dirty_reg) {
+ dirty.regs[dirty_reg] = true;
+ if (dirty_reg >= DIRTY_REGS_POS(vertex_array) &&
+ dirty_reg < DIRTY_REGS_POS(vertex_array_buffers)) {
+ dirty.vertex_array_buffers = true;
+ } else if (dirty_reg >= DIRTY_REGS_POS(vertex_instance) &&
+ dirty_reg < DIRTY_REGS_POS(vertex_instances)) {
+ dirty.vertex_instances = true;
+ } else if (dirty_reg >= DIRTY_REGS_POS(render_target) &&
+ dirty_reg < DIRTY_REGS_POS(render_settings)) {
+ dirty.render_settings = true;
+ }
}
}
@@ -198,6 +339,10 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
ProcessMacroBind(method_call.argument);
break;
}
+ case MAXWELL3D_REG_INDEX(firmware[4]): {
+ ProcessFirmwareCall4();
+ break;
+ }
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]):
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[1]):
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[2]):
@@ -214,7 +359,7 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]):
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]):
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]): {
- ProcessCBData(method_call.argument);
+ StartCBData(method);
break;
}
case MAXWELL3D_REG_INDEX(cb_bind[0].raw_config): {
@@ -249,6 +394,10 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
ProcessQueryGet();
break;
}
+ case MAXWELL3D_REG_INDEX(condition.mode): {
+ ProcessQueryCondition();
+ break;
+ }
case MAXWELL3D_REG_INDEX(sync_info): {
ProcessSyncPoint();
break;
@@ -261,7 +410,7 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
const bool is_last_call = method_call.IsLastCall();
upload_state.ProcessData(method_call.argument, is_last_call);
if (is_last_call) {
- dirty_flags.OnMemoryWrite();
+ dirty.OnMemoryWrite();
}
break;
}
@@ -274,6 +423,97 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
}
}
+void Maxwell3D::StepInstance(const MMEDrawMode expected_mode, const u32 count) {
+ if (mme_draw.current_mode == MMEDrawMode::Undefined) {
+ if (mme_draw.gl_begin_consume) {
+ mme_draw.current_mode = expected_mode;
+ mme_draw.current_count = count;
+ mme_draw.instance_count = 1;
+ mme_draw.gl_begin_consume = false;
+ mme_draw.gl_end_count = 0;
+ }
+ return;
+ } else {
+ if (mme_draw.current_mode == expected_mode && count == mme_draw.current_count &&
+ mme_draw.instance_mode && mme_draw.gl_begin_consume) {
+ mme_draw.instance_count++;
+ mme_draw.gl_begin_consume = false;
+ return;
+ } else {
+ FlushMMEInlineDraw();
+ }
+ }
+ // Tail call in case it needs to retry.
+ StepInstance(expected_mode, count);
+}
+
+void Maxwell3D::CallMethodFromMME(const GPU::MethodCall& method_call) {
+ const u32 method = method_call.method;
+ if (mme_inline[method]) {
+ regs.reg_array[method] = method_call.argument;
+ if (method == MAXWELL3D_REG_INDEX(vertex_buffer.count) ||
+ method == MAXWELL3D_REG_INDEX(index_array.count)) {
+ const MMEDrawMode expected_mode = method == MAXWELL3D_REG_INDEX(vertex_buffer.count)
+ ? MMEDrawMode::Array
+ : MMEDrawMode::Indexed;
+ StepInstance(expected_mode, method_call.argument);
+ } else if (method == MAXWELL3D_REG_INDEX(draw.vertex_begin_gl)) {
+ mme_draw.instance_mode =
+ (regs.draw.instance_next != 0) || (regs.draw.instance_cont != 0);
+ mme_draw.gl_begin_consume = true;
+ } else {
+ mme_draw.gl_end_count++;
+ }
+ } else {
+ if (mme_draw.current_mode != MMEDrawMode::Undefined) {
+ FlushMMEInlineDraw();
+ }
+ CallMethod(method_call);
+ }
+}
+
+void Maxwell3D::FlushMMEInlineDraw() {
+ LOG_DEBUG(HW_GPU, "called, topology={}, count={}", static_cast<u32>(regs.draw.topology.Value()),
+ regs.vertex_buffer.count);
+ ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?");
+ ASSERT(mme_draw.instance_count == mme_draw.gl_end_count);
+
+ auto debug_context = system.GetGPUDebugContext();
+
+ if (debug_context) {
+ debug_context->OnEvent(Tegra::DebugContext::Event::IncomingPrimitiveBatch, nullptr);
+ }
+
+ // Both instance configuration registers can not be set at the same time.
+ ASSERT_MSG(!regs.draw.instance_next || !regs.draw.instance_cont,
+ "Illegal combination of instancing parameters");
+
+ const bool is_indexed = mme_draw.current_mode == MMEDrawMode::Indexed;
+ if (ShouldExecute()) {
+ rasterizer.DrawMultiBatch(is_indexed);
+ }
+
+ if (debug_context) {
+ debug_context->OnEvent(Tegra::DebugContext::Event::FinishedPrimitiveBatch, nullptr);
+ }
+
+ // TODO(bunnei): Below, we reset vertex count so that we can use these registers to determine if
+ // the game is trying to draw indexed or direct mode. This needs to be verified on HW still -
+ // it's possible that it is incorrect and that there is some other register used to specify the
+ // drawing mode.
+ if (is_indexed) {
+ regs.index_array.count = 0;
+ } else {
+ regs.vertex_buffer.count = 0;
+ }
+ mme_draw.current_mode = MMEDrawMode::Undefined;
+ mme_draw.current_count = 0;
+ mme_draw.instance_count = 0;
+ mme_draw.instance_mode = false;
+ mme_draw.gl_begin_consume = false;
+ mme_draw.gl_end_count = 0;
+}
+
void Maxwell3D::ProcessMacroUpload(u32 data) {
ASSERT_MSG(regs.macros.upload_address < macro_memory.size(),
"upload_address exceeded macro_memory size!");
@@ -281,7 +521,15 @@ void Maxwell3D::ProcessMacroUpload(u32 data) {
}
void Maxwell3D::ProcessMacroBind(u32 data) {
- macro_offsets[regs.macros.entry] = data;
+ macro_positions[regs.macros.entry++] = data;
+}
+
+void Maxwell3D::ProcessFirmwareCall4() {
+ LOG_WARNING(HW_GPU, "(STUBBED) called");
+
+ // Firmware call 4 is a blob that changes some registers depending on its parameters.
+ // These registers don't affect emulation and so are stubbed by setting 0xd00 to 1.
+ regs.reg_array[0xd00] = 1;
}
void Maxwell3D::ProcessQueryGet() {
@@ -302,6 +550,7 @@ void Maxwell3D::ProcessQueryGet() {
result = regs.query.query_sequence;
break;
default:
+ result = 1;
UNIMPLEMENTED_MSG("Unimplemented query select type {}",
static_cast<u32>(regs.query.query_get.select.Value()));
}
@@ -333,7 +582,6 @@ void Maxwell3D::ProcessQueryGet() {
query_result.timestamp = system.CoreTiming().GetTicks();
memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result));
}
- dirty_flags.OnMemoryWrite();
break;
}
default:
@@ -342,16 +590,56 @@ void Maxwell3D::ProcessQueryGet() {
}
}
+void Maxwell3D::ProcessQueryCondition() {
+ const GPUVAddr condition_address{regs.condition.Address()};
+ switch (regs.condition.mode) {
+ case Regs::ConditionMode::Always: {
+ execute_on = true;
+ break;
+ }
+ case Regs::ConditionMode::Never: {
+ execute_on = false;
+ break;
+ }
+ case Regs::ConditionMode::ResNonZero: {
+ Regs::QueryCompare cmp;
+ memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp));
+ execute_on = cmp.initial_sequence != 0U && cmp.initial_mode != 0U;
+ break;
+ }
+ case Regs::ConditionMode::Equal: {
+ Regs::QueryCompare cmp;
+ memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp));
+ execute_on =
+ cmp.initial_sequence == cmp.current_sequence && cmp.initial_mode == cmp.current_mode;
+ break;
+ }
+ case Regs::ConditionMode::NotEqual: {
+ Regs::QueryCompare cmp;
+ memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp));
+ execute_on =
+ cmp.initial_sequence != cmp.current_sequence || cmp.initial_mode != cmp.current_mode;
+ break;
+ }
+ default: {
+ UNIMPLEMENTED_MSG("Uninplemented Condition Mode!");
+ execute_on = true;
+ break;
+ }
+ }
+}
+
void Maxwell3D::ProcessSyncPoint() {
const u32 sync_point = regs.sync_info.sync_point.Value();
const u32 increment = regs.sync_info.increment.Value();
- const u32 cache_flush = regs.sync_info.unknown.Value();
- LOG_DEBUG(HW_GPU, "Syncpoint set {}, increment: {}, unk: {}", sync_point, increment,
- cache_flush);
+ [[maybe_unused]] const u32 cache_flush = regs.sync_info.unknown.Value();
+ if (increment) {
+ system.GPU().IncrementSyncPoint(sync_point);
+ }
}
void Maxwell3D::DrawArrays() {
- LOG_DEBUG(HW_GPU, "called, topology={}, count={}", static_cast<u32>(regs.draw.topology.Value()),
+ LOG_TRACE(HW_GPU, "called, topology={}, count={}", static_cast<u32>(regs.draw.topology.Value()),
regs.vertex_buffer.count);
ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?");
@@ -374,7 +662,9 @@ void Maxwell3D::DrawArrays() {
}
const bool is_indexed{regs.index_array.count && !regs.vertex_buffer.count};
- rasterizer.AccelerateDrawBatch(is_indexed);
+ if (ShouldExecute()) {
+ rasterizer.DrawBatch(is_indexed);
+ }
if (debug_context) {
debug_context->OnEvent(Tegra::DebugContext::Event::FinishedPrimitiveBatch, nullptr);
@@ -396,34 +686,48 @@ void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) {
auto& shader = state.shader_stages[static_cast<std::size_t>(stage)];
auto& bind_data = regs.cb_bind[static_cast<std::size_t>(stage)];
- auto& buffer = shader.const_buffers[bind_data.index];
-
ASSERT(bind_data.index < Regs::MaxConstBuffers);
+ auto& buffer = shader.const_buffers[bind_data.index];
buffer.enabled = bind_data.valid.Value() != 0;
- buffer.index = bind_data.index;
buffer.address = regs.const_buffer.BufferAddress();
buffer.size = regs.const_buffer.cb_size;
}
void Maxwell3D::ProcessCBData(u32 value) {
+ const u32 id = cb_data_state.id;
+ cb_data_state.buffer[id][cb_data_state.counter] = value;
+ // Increment the current buffer position.
+ regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4;
+ cb_data_state.counter++;
+}
+
+void Maxwell3D::StartCBData(u32 method) {
+ constexpr u32 first_cb_data = MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]);
+ cb_data_state.start_pos = regs.const_buffer.cb_pos;
+ cb_data_state.id = method - first_cb_data;
+ cb_data_state.current = method;
+ cb_data_state.counter = 0;
+ ProcessCBData(regs.const_buffer.cb_data[cb_data_state.id]);
+}
+
+void Maxwell3D::FinishCBData() {
// Write the input value to the current const buffer at the current position.
const GPUVAddr buffer_address = regs.const_buffer.BufferAddress();
ASSERT(buffer_address != 0);
// Don't allow writing past the end of the buffer.
- ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size);
-
- const GPUVAddr address{buffer_address + regs.const_buffer.cb_pos};
+ ASSERT(regs.const_buffer.cb_pos <= regs.const_buffer.cb_size);
- u8* ptr{memory_manager.GetPointer(address)};
- rasterizer.InvalidateRegion(ToCacheAddr(ptr), sizeof(u32));
- memory_manager.Write<u32>(address, value);
+ const GPUVAddr address{buffer_address + cb_data_state.start_pos};
+ const std::size_t size = regs.const_buffer.cb_pos - cb_data_state.start_pos;
- dirty_flags.OnMemoryWrite();
+ const u32 id = cb_data_state.id;
+ memory_manager.WriteBlock(address, cb_data_state.buffer[id].data(), size);
+ dirty.OnMemoryWrite();
- // Increment the current buffer position.
- regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4;
+ cb_data_state.id = null_cb_data;
+ cb_data_state.current = null_cb_data;
}
Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
@@ -432,14 +736,10 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
Texture::TICEntry tic_entry;
memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));
- ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear ||
- tic_entry.header_version == Texture::TICHeaderVersion::Pitch,
- "TIC versions other than BlockLinear or Pitch are unimplemented");
-
- const auto r_type = tic_entry.r_type.Value();
- const auto g_type = tic_entry.g_type.Value();
- const auto b_type = tic_entry.b_type.Value();
- const auto a_type = tic_entry.a_type.Value();
+ [[maybe_unused]] const auto r_type{tic_entry.r_type.Value()};
+ [[maybe_unused]] const auto g_type{tic_entry.g_type.Value()};
+ [[maybe_unused]] const auto b_type{tic_entry.b_type.Value()};
+ [[maybe_unused]] const auto a_type{tic_entry.a_type.Value()};
// TODO(Subv): Different data types for separate components are not supported
DEBUG_ASSERT(r_type == g_type && r_type == b_type && r_type == a_type);