summaryrefslogtreecommitdiffstats
path: root/src/video_core/engines/maxwell_dma.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/engines/maxwell_dma.cpp')
-rw-r--r--src/video_core/engines/maxwell_dma.cpp50
1 files changed, 45 insertions, 5 deletions
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 6630005b0..01d7df405 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -17,16 +17,16 @@ namespace Tegra::Engines {
MaxwellDMA::MaxwellDMA(Core::System& system, MemoryManager& memory_manager)
: system{system}, memory_manager{memory_manager} {}
-void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) {
- ASSERT_MSG(method_call.method < Regs::NUM_REGS,
+void MaxwellDMA::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
+ ASSERT_MSG(method < Regs::NUM_REGS,
"Invalid MaxwellDMA register, increase the size of the Regs structure");
- regs.reg_array[method_call.method] = method_call.argument;
+ regs.reg_array[method] = method_argument;
#define MAXWELLDMA_REG_INDEX(field_name) \
(offsetof(Tegra::Engines::MaxwellDMA::Regs, field_name) / sizeof(u32))
- switch (method_call.method) {
+ switch (method) {
case MAXWELLDMA_REG_INDEX(exec): {
HandleCopy();
break;
@@ -39,7 +39,7 @@ void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) {
void MaxwellDMA::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
u32 methods_pending) {
for (std::size_t i = 0; i < amount; i++) {
- CallMethod({method, base_start[i], 0, methods_pending - static_cast<u32>(i)});
+ CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1);
}
}
@@ -90,7 +90,47 @@ void MaxwellDMA::HandleCopy() {
ASSERT(regs.exec.enable_2d == 1);
if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
+
ASSERT(regs.src_params.BlockDepth() == 0);
+ // Optimized path for micro copies.
+ if (regs.dst_pitch * regs.y_count < Texture::GetGOBSize() && regs.dst_pitch <= 64) {
+ const u32 bytes_per_pixel = regs.dst_pitch / regs.x_count;
+ const std::size_t src_size = Texture::GetGOBSize();
+ const std::size_t dst_size = regs.dst_pitch * regs.y_count;
+ u32 pos_x = regs.src_params.pos_x;
+ u32 pos_y = regs.src_params.pos_y;
+ const u64 offset =
+ Texture::GetGOBOffset(regs.src_params.size_x, regs.src_params.size_y, pos_x, pos_y,
+ regs.src_params.BlockDepth(), bytes_per_pixel);
+ const u32 x_in_gob = 64 / bytes_per_pixel;
+ pos_x = pos_x % x_in_gob;
+ pos_y = pos_y % 8;
+
+ if (read_buffer.size() < src_size) {
+ read_buffer.resize(src_size);
+ }
+
+ if (write_buffer.size() < dst_size) {
+ write_buffer.resize(dst_size);
+ }
+
+ if (Settings::IsGPULevelExtreme()) {
+ memory_manager.ReadBlock(source + offset, read_buffer.data(), src_size);
+ memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
+ } else {
+ memory_manager.ReadBlockUnsafe(source + offset, read_buffer.data(), src_size);
+ memory_manager.ReadBlockUnsafe(dest, write_buffer.data(), dst_size);
+ }
+
+ Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch,
+ regs.src_params.size_x, bytes_per_pixel, read_buffer.data(),
+ write_buffer.data(), regs.src_params.BlockHeight(), pos_x,
+ pos_y);
+
+ memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);
+
+ return;
+ }
// If the input is tiled and the output is linear, deswizzle the input and copy it over.
const u32 bytes_per_pixel = regs.dst_pitch / regs.x_count;
const std::size_t src_size = Texture::CalculateSize(