summaryrefslogtreecommitdiffstats
path: root/src/video_core/engines
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/video_core/engines/draw_manager.cpp31
-rw-r--r--src/video_core/engines/draw_manager.h25
-rw-r--r--src/video_core/engines/engine_interface.h24
-rw-r--r--src/video_core/engines/fermi_2d.cpp10
-rw-r--r--src/video_core/engines/fermi_2d.h2
-rw-r--r--src/video_core/engines/kepler_compute.cpp14
-rw-r--r--src/video_core/engines/kepler_compute.h2
-rw-r--r--src/video_core/engines/kepler_memory.cpp11
-rw-r--r--src/video_core/engines/kepler_memory.h2
-rw-r--r--src/video_core/engines/maxwell_3d.cpp173
-rw-r--r--src/video_core/engines/maxwell_3d.h75
-rw-r--r--src/video_core/engines/maxwell_dma.cpp12
-rw-r--r--src/video_core/engines/maxwell_dma.h2
13 files changed, 366 insertions, 17 deletions
diff --git a/src/video_core/engines/draw_manager.cpp b/src/video_core/engines/draw_manager.cpp
index 3a78421f6..2437121ce 100644
--- a/src/video_core/engines/draw_manager.cpp
+++ b/src/video_core/engines/draw_manager.cpp
@@ -91,6 +91,23 @@ void DrawManager::DrawIndex(PrimitiveTopology topology, u32 index_first, u32 ind
ProcessDraw(true, num_instances);
}
+void DrawManager::DrawArrayIndirect(PrimitiveTopology topology) {
+ draw_state.topology = topology;
+
+ ProcessDrawIndirect();
+}
+
+void DrawManager::DrawIndexedIndirect(PrimitiveTopology topology, u32 index_first,
+ u32 index_count) {
+ const auto& regs{maxwell3d->regs};
+ draw_state.topology = topology;
+ draw_state.index_buffer = regs.index_buffer;
+ draw_state.index_buffer.first = index_first;
+ draw_state.index_buffer.count = index_count;
+
+ ProcessDrawIndirect();
+}
+
void DrawManager::SetInlineIndexBuffer(u32 index) {
draw_state.inline_index_draw_indexes.push_back(static_cast<u8>(index & 0x000000ff));
draw_state.inline_index_draw_indexes.push_back(static_cast<u8>((index & 0x0000ff00) >> 8));
@@ -198,4 +215,18 @@ void DrawManager::ProcessDraw(bool draw_indexed, u32 instance_count) {
maxwell3d->rasterizer->Draw(draw_indexed, instance_count);
}
}
+
+void DrawManager::ProcessDrawIndirect() {
+ LOG_TRACE(
+ HW_GPU,
+ "called, topology={}, is_indexed={}, includes_count={}, buffer_size={}, max_draw_count={}",
+ draw_state.topology, indirect_state.is_indexed, indirect_state.include_count,
+ indirect_state.buffer_size, indirect_state.max_draw_counts);
+
+ UpdateTopology();
+
+ if (maxwell3d->ShouldExecute()) {
+ maxwell3d->rasterizer->DrawIndirect();
+ }
+}
} // namespace Tegra::Engines
diff --git a/src/video_core/engines/draw_manager.h b/src/video_core/engines/draw_manager.h
index 0e6930a9c..58d1b2d59 100644
--- a/src/video_core/engines/draw_manager.h
+++ b/src/video_core/engines/draw_manager.h
@@ -32,6 +32,16 @@ public:
std::vector<u8> inline_index_draw_indexes;
};
+ struct IndirectParams {
+ bool is_indexed;
+ bool include_count;
+ GPUVAddr count_start_address;
+ GPUVAddr indirect_start_address;
+ size_t buffer_size;
+ size_t max_draw_counts;
+ size_t stride;
+ };
+
explicit DrawManager(Maxwell3D* maxwell_3d);
void ProcessMethodCall(u32 method, u32 argument);
@@ -46,10 +56,22 @@ public:
void DrawIndex(PrimitiveTopology topology, u32 index_first, u32 index_count, u32 base_index,
u32 base_instance, u32 num_instances);
+ void DrawArrayIndirect(PrimitiveTopology topology);
+
+ void DrawIndexedIndirect(PrimitiveTopology topology, u32 index_first, u32 index_count);
+
const State& GetDrawState() const {
return draw_state;
}
+ IndirectParams& GetIndirectParams() {
+ return indirect_state;
+ }
+
+ const IndirectParams& GetIndirectParams() const {
+ return indirect_state;
+ }
+
private:
void SetInlineIndexBuffer(u32 index);
@@ -63,7 +85,10 @@ private:
void ProcessDraw(bool draw_indexed, u32 instance_count);
+ void ProcessDrawIndirect();
+
Maxwell3D* maxwell3d{};
State draw_state{};
+ IndirectParams indirect_state{};
};
} // namespace Tegra::Engines
diff --git a/src/video_core/engines/engine_interface.h b/src/video_core/engines/engine_interface.h
index 26cde8584..392322358 100644
--- a/src/video_core/engines/engine_interface.h
+++ b/src/video_core/engines/engine_interface.h
@@ -3,6 +3,10 @@
#pragma once
+#include <bitset>
+#include <limits>
+#include <vector>
+
#include "common/common_types.h"
namespace Tegra::Engines {
@@ -17,6 +21,26 @@ public:
/// Write multiple values to the register identified by method.
virtual void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
u32 methods_pending) = 0;
+
+ void ConsumeSink() {
+ if (method_sink.empty()) {
+ return;
+ }
+ ConsumeSinkImpl();
+ }
+
+ std::bitset<std::numeric_limits<u16>::max()> execution_mask{};
+ std::vector<std::pair<u32, u32>> method_sink{};
+ bool current_dirty{};
+ GPUVAddr current_dma_segment;
+
+protected:
+ virtual void ConsumeSinkImpl() {
+ for (auto [method, value] : method_sink) {
+ CallMethod(method, value, true);
+ }
+ method_sink.clear();
+ }
};
} // namespace Tegra::Engines
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index c6478ae85..e655e7254 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -25,6 +25,9 @@ Fermi2D::Fermi2D(MemoryManager& memory_manager_) {
// Nvidia's OpenGL driver seems to assume these values
regs.src.depth = 1;
regs.dst.depth = 1;
+
+ execution_mask.reset();
+ execution_mask[FERMI2D_REG_INDEX(pixels_from_memory.src_y0) + 1] = true;
}
Fermi2D::~Fermi2D() = default;
@@ -49,6 +52,13 @@ void Fermi2D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32
}
}
+void Fermi2D::ConsumeSinkImpl() {
+ for (auto [method, value] : method_sink) {
+ regs.reg_array[method] = value;
+ }
+ method_sink.clear();
+}
+
void Fermi2D::Blit() {
MICROPROFILE_SCOPE(GPU_BlitEngine);
LOG_DEBUG(HW_GPU, "called. source address=0x{:x}, destination address=0x{:x}",
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index 100b21bac..523fbdec2 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -309,6 +309,8 @@ private:
/// Performs the copy from the source surface to the destination surface as configured in the
/// registers.
void Blit();
+
+ void ConsumeSinkImpl() override;
};
#define ASSERT_REG_POSITION(field_name, position) \
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index e5c622155..601095f03 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -14,7 +14,12 @@
namespace Tegra::Engines {
KeplerCompute::KeplerCompute(Core::System& system_, MemoryManager& memory_manager_)
- : system{system_}, memory_manager{memory_manager_}, upload_state{memory_manager, regs.upload} {}
+ : system{system_}, memory_manager{memory_manager_}, upload_state{memory_manager, regs.upload} {
+ execution_mask.reset();
+ execution_mask[KEPLER_COMPUTE_REG_INDEX(exec_upload)] = true;
+ execution_mask[KEPLER_COMPUTE_REG_INDEX(data_upload)] = true;
+ execution_mask[KEPLER_COMPUTE_REG_INDEX(launch)] = true;
+}
KeplerCompute::~KeplerCompute() = default;
@@ -23,6 +28,13 @@ void KeplerCompute::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_)
upload_state.BindRasterizer(rasterizer);
}
+void KeplerCompute::ConsumeSinkImpl() {
+ for (auto [method, value] : method_sink) {
+ regs.reg_array[method] = value;
+ }
+ method_sink.clear();
+}
+
void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
ASSERT_MSG(method < Regs::NUM_REGS,
"Invalid KeplerCompute register, increase the size of the Regs structure");
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index e154e3f06..2092e685f 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -204,6 +204,8 @@ public:
private:
void ProcessLaunch();
+ void ConsumeSinkImpl() override;
+
/// Retrieves information about a specific TIC entry from the TIC buffer.
Texture::TICEntry GetTICEntry(u32 tic_index) const;
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index 08045d1cf..c026801a3 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -18,6 +18,17 @@ KeplerMemory::~KeplerMemory() = default;
void KeplerMemory::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
upload_state.BindRasterizer(rasterizer_);
+
+ execution_mask.reset();
+ execution_mask[KEPLERMEMORY_REG_INDEX(exec)] = true;
+ execution_mask[KEPLERMEMORY_REG_INDEX(data)] = true;
+}
+
+void KeplerMemory::ConsumeSinkImpl() {
+ for (auto [method, value] : method_sink) {
+ regs.reg_array[method] = value;
+ }
+ method_sink.clear();
}
void KeplerMemory::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h
index 5fe7489f0..fb1eecbba 100644
--- a/src/video_core/engines/kepler_memory.h
+++ b/src/video_core/engines/kepler_memory.h
@@ -73,6 +73,8 @@ public:
} regs{};
private:
+ void ConsumeSinkImpl() override;
+
Core::System& system;
Upload::State upload_state;
};
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 9b182b653..fbfd1ddd2 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -4,6 +4,8 @@
#include <cstring>
#include <optional>
#include "common/assert.h"
+#include "common/scope_exit.h"
+#include "common/settings.h"
#include "core/core.h"
#include "core/core_timing.h"
#include "video_core/dirty_flags.h"
@@ -28,6 +30,10 @@ Maxwell3D::Maxwell3D(Core::System& system_, MemoryManager& memory_manager_)
regs.upload} {
dirty.flags.flip();
InitializeRegisterDefaults();
+ execution_mask.reset();
+ for (size_t i = 0; i < execution_mask.size(); i++) {
+ execution_mask[i] = IsMethodExecutable(static_cast<u32>(i));
+ }
}
Maxwell3D::~Maxwell3D() = default;
@@ -121,6 +127,71 @@ void Maxwell3D::InitializeRegisterDefaults() {
shadow_state = regs;
}
+bool Maxwell3D::IsMethodExecutable(u32 method) {
+ if (method >= MacroRegistersStart) {
+ return true;
+ }
+ switch (method) {
+ case MAXWELL3D_REG_INDEX(draw.end):
+ case MAXWELL3D_REG_INDEX(draw.begin):
+ case MAXWELL3D_REG_INDEX(vertex_buffer.first):
+ case MAXWELL3D_REG_INDEX(vertex_buffer.count):
+ case MAXWELL3D_REG_INDEX(index_buffer.first):
+ case MAXWELL3D_REG_INDEX(index_buffer.count):
+ case MAXWELL3D_REG_INDEX(draw_inline_index):
+ case MAXWELL3D_REG_INDEX(index_buffer32_subsequent):
+ case MAXWELL3D_REG_INDEX(index_buffer16_subsequent):
+ case MAXWELL3D_REG_INDEX(index_buffer8_subsequent):
+ case MAXWELL3D_REG_INDEX(index_buffer32_first):
+ case MAXWELL3D_REG_INDEX(index_buffer16_first):
+ case MAXWELL3D_REG_INDEX(index_buffer8_first):
+ case MAXWELL3D_REG_INDEX(inline_index_2x16.even):
+ case MAXWELL3D_REG_INDEX(inline_index_4x8.index0):
+ case MAXWELL3D_REG_INDEX(vertex_array_instance_first):
+ case MAXWELL3D_REG_INDEX(vertex_array_instance_subsequent):
+ case MAXWELL3D_REG_INDEX(wait_for_idle):
+ case MAXWELL3D_REG_INDEX(shadow_ram_control):
+ case MAXWELL3D_REG_INDEX(load_mme.instruction_ptr):
+ case MAXWELL3D_REG_INDEX(load_mme.instruction):
+ case MAXWELL3D_REG_INDEX(load_mme.start_address):
+ case MAXWELL3D_REG_INDEX(falcon[4]):
+ case MAXWELL3D_REG_INDEX(const_buffer.buffer):
+ case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 1:
+ case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 2:
+ case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 3:
+ case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 4:
+ case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 5:
+ case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 6:
+ case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 7:
+ case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 8:
+ case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 9:
+ case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 10:
+ case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 11:
+ case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 12:
+ case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 13:
+ case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 14:
+ case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 15:
+ case MAXWELL3D_REG_INDEX(bind_groups[0].raw_config):
+ case MAXWELL3D_REG_INDEX(bind_groups[1].raw_config):
+ case MAXWELL3D_REG_INDEX(bind_groups[2].raw_config):
+ case MAXWELL3D_REG_INDEX(bind_groups[3].raw_config):
+ case MAXWELL3D_REG_INDEX(bind_groups[4].raw_config):
+ case MAXWELL3D_REG_INDEX(topology_override):
+ case MAXWELL3D_REG_INDEX(clear_surface):
+ case MAXWELL3D_REG_INDEX(report_semaphore.query):
+ case MAXWELL3D_REG_INDEX(render_enable.mode):
+ case MAXWELL3D_REG_INDEX(clear_report_value):
+ case MAXWELL3D_REG_INDEX(sync_info):
+ case MAXWELL3D_REG_INDEX(launch_dma):
+ case MAXWELL3D_REG_INDEX(inline_data):
+ case MAXWELL3D_REG_INDEX(fragment_barrier):
+ case MAXWELL3D_REG_INDEX(tiled_cache_barrier):
+ return true;
+ default:
+ return false;
+ }
+}
+
void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool is_last_call) {
if (executing_macro == 0) {
// A macro call must begin by writing the macro method's register, not its argument.
@@ -130,14 +201,72 @@ void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool
}
macro_params.insert(macro_params.end(), base_start, base_start + amount);
+ for (size_t i = 0; i < amount; i++) {
+ macro_addresses.push_back(current_dma_segment + i * sizeof(u32));
+ }
+ macro_segments.emplace_back(current_dma_segment, amount);
+ current_macro_dirty |= current_dirty;
+ current_dirty = false;
// Call the macro when there are no more parameters in the command buffer
if (is_last_call) {
+ ConsumeSink();
CallMacroMethod(executing_macro, macro_params);
macro_params.clear();
+ macro_addresses.clear();
+ macro_segments.clear();
+ current_macro_dirty = false;
}
}
+void Maxwell3D::RefreshParametersImpl() {
+ size_t current_index = 0;
+ for (auto& segment : macro_segments) {
+ if (segment.first == 0) {
+ current_index += segment.second;
+ continue;
+ }
+ memory_manager.ReadBlock(segment.first, &macro_params[current_index],
+ sizeof(u32) * segment.second);
+ current_index += segment.second;
+ }
+}
+
+u32 Maxwell3D::GetMaxCurrentVertices() {
+ u32 num_vertices = 0;
+ for (size_t index = 0; index < Regs::NumVertexArrays; ++index) {
+ const auto& array = regs.vertex_streams[index];
+ if (array.enable == 0) {
+ continue;
+ }
+ const auto& attribute = regs.vertex_attrib_format[index];
+ if (attribute.constant) {
+ num_vertices = std::max(num_vertices, 1U);
+ continue;
+ }
+ const auto& limit = regs.vertex_stream_limits[index];
+ const GPUVAddr gpu_addr_begin = array.Address();
+ const GPUVAddr gpu_addr_end = limit.Address() + 1;
+ const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
+ num_vertices = std::max(
+ num_vertices, address_size / std::max(attribute.SizeInBytes(), array.stride.Value()));
+ }
+ return num_vertices;
+}
+
+size_t Maxwell3D::EstimateIndexBufferSize() {
+ GPUVAddr start_address = regs.index_buffer.StartAddress();
+ GPUVAddr end_address = regs.index_buffer.EndAddress();
+ constexpr std::array<size_t, 4> max_sizes = {
+ std::numeric_limits<u8>::max(), std::numeric_limits<u16>::max(),
+ std::numeric_limits<u32>::max(), std::numeric_limits<u32>::max()};
+ const size_t byte_size = regs.index_buffer.FormatSizeInBytes();
+ return std::min<size_t>(
+ memory_manager.GetMemoryLayoutSize(start_address, byte_size * max_sizes[byte_size]) /
+ byte_size,
+ static_cast<size_t>(end_address - start_address));
+}
+
u32 Maxwell3D::ProcessShadowRam(u32 method, u32 argument) {
// Keep track of the register value in shadow_state when requested.
const auto control = shadow_state.shadow_ram_control;
@@ -152,6 +281,29 @@ u32 Maxwell3D::ProcessShadowRam(u32 method, u32 argument) {
return argument;
}
+void Maxwell3D::ConsumeSinkImpl() {
+ SCOPE_EXIT({ method_sink.clear(); });
+ const auto control = shadow_state.shadow_ram_control;
+ if (control == Regs::ShadowRamControl::Track ||
+ control == Regs::ShadowRamControl::TrackWithFilter) {
+
+ for (auto [method, value] : method_sink) {
+ shadow_state.reg_array[method] = value;
+ ProcessDirtyRegisters(method, value);
+ }
+ return;
+ }
+ if (control == Regs::ShadowRamControl::Replay) {
+ for (auto [method, value] : method_sink) {
+ ProcessDirtyRegisters(method, shadow_state.reg_array[method]);
+ }
+ return;
+ }
+ for (auto [method, value] : method_sink) {
+ ProcessDirtyRegisters(method, value);
+ }
+}
+
void Maxwell3D::ProcessDirtyRegisters(u32 method, u32 argument) {
if (regs.reg_array[method] == argument) {
return;
@@ -263,7 +415,6 @@ void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
const u32 argument = ProcessShadowRam(method, method_argument);
ProcessDirtyRegisters(method, argument);
-
ProcessMethodCall(method, argument, method_argument, is_last_call);
}
@@ -294,9 +445,11 @@ void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 15:
ProcessCBMultiData(base_start, amount);
break;
- case MAXWELL3D_REG_INDEX(inline_data):
+ case MAXWELL3D_REG_INDEX(inline_data): {
+ ASSERT(methods_pending == amount);
upload_state.ProcessData(base_start, amount);
return;
+ }
default:
for (u32 i = 0; i < amount; i++) {
CallMethod(method, base_start[i], methods_pending - i <= 1);
@@ -389,7 +542,11 @@ void Maxwell3D::ProcessQueryCondition() {
case Regs::RenderEnable::Override::NeverRender:
execute_on = false;
break;
- case Regs::RenderEnable::Override::UseRenderEnable:
+ case Regs::RenderEnable::Override::UseRenderEnable: {
+ if (rasterizer->AccelerateConditionalRendering()) {
+ execute_on = true;
+ return;
+ }
switch (regs.render_enable.mode) {
case Regs::RenderEnable::Mode::True: {
execute_on = true;
@@ -427,6 +584,7 @@ void Maxwell3D::ProcessQueryCondition() {
}
break;
}
+ }
}
void Maxwell3D::ProcessCounterReset() {
@@ -463,7 +621,8 @@ std::optional<u64> Maxwell3D::GetQueryResult() {
}
void Maxwell3D::ProcessCBBind(size_t stage_index) {
- // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage.
+ // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader
+ // stage.
const auto& bind_data = regs.bind_groups[stage_index];
auto& buffer = state.shader_stages[stage_index].const_buffers[bind_data.shader_slot];
buffer.enabled = bind_data.valid.Value() != 0;
@@ -524,4 +683,10 @@ u32 Maxwell3D::GetRegisterValue(u32 method) const {
return regs.reg_array[method];
}
+void Maxwell3D::SetHLEReplacementAttributeType(u32 bank, u32 offset,
+ HLEReplacementAttributeType name) {
+ const u64 key = (static_cast<u64>(bank) << 32) | offset;
+ replace_table.emplace(key, name);
+}
+
} // namespace Tegra::Engines
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 22b904319..0b2fd2928 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -272,6 +272,7 @@ public:
};
union {
+ u32 raw;
BitField<0, 1, Mode> mode;
BitField<4, 8, u32> pad;
};
@@ -1217,10 +1218,12 @@ public:
struct Window {
union {
+ u32 raw_x;
BitField<0, 16, u32> x_min;
BitField<16, 16, u32> x_max;
};
union {
+ u32 raw_y;
BitField<0, 16, u32> y_min;
BitField<16, 16, u32> y_max;
};
@@ -2708,7 +2711,7 @@ public:
u32 post_z_pixel_imask; ///< 0x0F1C
INSERT_PADDING_BYTES_NOINIT(0x20);
ConstantColorRendering const_color_rendering; ///< 0x0F40
- s32 stencil_back_ref; ///< 0x0F54
+ u32 stencil_back_ref; ///< 0x0F54
u32 stencil_back_mask; ///< 0x0F58
u32 stencil_back_func_mask; ///< 0x0F5C
INSERT_PADDING_BYTES_NOINIT(0x14);
@@ -2832,9 +2835,9 @@ public:
Blend blend; ///< 0x133C
u32 stencil_enable; ///< 0x1380
StencilOp stencil_front_op; ///< 0x1384
- s32 stencil_front_ref; ///< 0x1394
- s32 stencil_front_func_mask; ///< 0x1398
- s32 stencil_front_mask; ///< 0x139C
+ u32 stencil_front_ref; ///< 0x1394
+ u32 stencil_front_func_mask; ///< 0x1398
+ u32 stencil_front_mask; ///< 0x139C
INSERT_PADDING_BYTES_NOINIT(0x4);
u32 draw_auto_start_byte_count; ///< 0x13A4
PsSaturate frag_color_clamp; ///< 0x13A8
@@ -3020,6 +3023,24 @@ public:
/// Store temporary hw register values, used by some calls to restore state after a operation
Regs shadow_state;
+ // None Engine
+ enum class EngineHint : u32 {
+ None = 0x0,
+ OnHLEMacro = 0x1,
+ };
+
+ EngineHint engine_state{EngineHint::None};
+
+ enum class HLEReplacementAttributeType : u32 {
+ BaseVertex = 0x0,
+ BaseInstance = 0x1,
+ DrawID = 0x2,
+ };
+
+ void SetHLEReplacementAttributeType(u32 bank, u32 offset, HLEReplacementAttributeType name);
+
+ std::unordered_map<u64, HLEReplacementAttributeType> replace_table;
+
static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), "Maxwell3D Regs has wrong size");
static_assert(std::is_trivially_copyable_v<Regs>, "Maxwell3D Regs must be trivially copyable");
@@ -3067,6 +3088,35 @@ public:
std::unique_ptr<DrawManager> draw_manager;
friend class DrawManager;
+ GPUVAddr GetMacroAddress(size_t index) const {
+ return macro_addresses[index];
+ }
+
+ void RefreshParameters() {
+ if (!current_macro_dirty) {
+ return;
+ }
+ RefreshParametersImpl();
+ }
+
+ bool AnyParametersDirty() const {
+ return current_macro_dirty;
+ }
+
+ u32 GetMaxCurrentVertices();
+
+ size_t EstimateIndexBufferSize();
+
+ /// Handles a write to the CLEAR_BUFFERS register.
+ void ProcessClearBuffers(u32 layer_count);
+
+ /// Handles a write to the CB_BIND register.
+ void ProcessCBBind(size_t stage_index);
+
+ /// Handles a write to the CB_DATA[i] register.
+ void ProcessCBData(u32 value);
+ void ProcessCBMultiData(const u32* start_base, u32 amount);
+
private:
void InitializeRegisterDefaults();
@@ -3076,6 +3126,8 @@ private:
void ProcessDirtyRegisters(u32 method, u32 argument);
+ void ConsumeSinkImpl() override;
+
void ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argument, bool is_last_call);
/// Retrieves information about a specific TIC entry from the TIC buffer.
@@ -3116,16 +3168,13 @@ private:
/// Handles writes to syncing register.
void ProcessSyncPoint();
- /// Handles a write to the CB_DATA[i] register.
- void ProcessCBData(u32 value);
- void ProcessCBMultiData(const u32* start_base, u32 amount);
-
- /// Handles a write to the CB_BIND register.
- void ProcessCBBind(size_t stage_index);
-
/// Returns a query's value or an empty object if the value will be deferred through a cache.
std::optional<u64> GetQueryResult();
+ void RefreshParametersImpl();
+
+ bool IsMethodExecutable(u32 method);
+
Core::System& system;
MemoryManager& memory_manager;
@@ -3145,6 +3194,10 @@ private:
Upload::State upload_state;
bool execute_on{true};
+
+ std::vector<std::pair<GPUVAddr, size_t>> macro_segments;
+ std::vector<GPUVAddr> macro_addresses;
+ bool current_macro_dirty{};
};
#define ASSERT_REG_POSITION(field_name, position) \
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index f73d7bf0f..01f70ea9e 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -21,7 +21,10 @@ namespace Tegra::Engines {
using namespace Texture;
MaxwellDMA::MaxwellDMA(Core::System& system_, MemoryManager& memory_manager_)
- : system{system_}, memory_manager{memory_manager_} {}
+ : system{system_}, memory_manager{memory_manager_} {
+ execution_mask.reset();
+ execution_mask[offsetof(Regs, launch_dma) / sizeof(u32)] = true;
+}
MaxwellDMA::~MaxwellDMA() = default;
@@ -29,6 +32,13 @@ void MaxwellDMA::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
rasterizer = rasterizer_;
}
+void MaxwellDMA::ConsumeSinkImpl() {
+ for (auto [method, value] : method_sink) {
+ regs.reg_array[method] = value;
+ }
+ method_sink.clear();
+}
+
void MaxwellDMA::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
ASSERT_MSG(method < NUM_REGS, "Invalid MaxwellDMA register");
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index c88191a61..0e594fa74 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -231,6 +231,8 @@ private:
void ReleaseSemaphore();
+ void ConsumeSinkImpl() override;
+
Core::System& system;
MemoryManager& memory_manager;