diff options
Diffstat (limited to 'src/video_core/engines')
-rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 8 | ||||
-rw-r--r-- | src/video_core/engines/maxwell_3d.h | 26 | ||||
-rw-r--r-- | src/video_core/engines/maxwell_dma.cpp | 127 | ||||
-rw-r--r-- | src/video_core/engines/maxwell_dma.h | 2 |
4 files changed, 91 insertions, 72 deletions
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index fdf470913..b1a22b76c 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -74,15 +74,15 @@ void Maxwell3D::InitializeRegisterDefaults() { regs.stencil_front_op.zfail = Regs::StencilOp::Op::Keep_D3D; regs.stencil_front_op.zpass = Regs::StencilOp::Op::Keep_D3D; regs.stencil_front_op.func = Regs::ComparisonOp::Always_GL; - regs.stencil_front_func.func_mask = 0xFFFFFFFF; - regs.stencil_front_func.mask = 0xFFFFFFFF; + regs.stencil_front_func_mask = 0xFFFFFFFF; + regs.stencil_front_mask = 0xFFFFFFFF; regs.stencil_two_side_enable = 1; regs.stencil_back_op.fail = Regs::StencilOp::Op::Keep_D3D; regs.stencil_back_op.zfail = Regs::StencilOp::Op::Keep_D3D; regs.stencil_back_op.zpass = Regs::StencilOp::Op::Keep_D3D; regs.stencil_back_op.func = Regs::ComparisonOp::Always_GL; - regs.stencil_back_func.func_mask = 0xFFFFFFFF; - regs.stencil_back_func.mask = 0xFFFFFFFF; + regs.stencil_back_func_mask = 0xFFFFFFFF; + regs.stencil_back_mask = 0xFFFFFFFF; regs.depth_test_func = Regs::ComparisonOp::Always_GL; regs.gl_front_face = Regs::FrontFace::CounterClockWise; diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index efe1073b0..75e3b868d 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -390,7 +390,7 @@ public: FractionalEven = 2, }; - enum class OutputPrimitves : u32 { + enum class OutputPrimitives : u32 { Points = 0, Lines = 1, Triangles_CW = 2, @@ -401,7 +401,7 @@ public: union { BitField<0, 2, DomainType> domain_type; BitField<4, 2, Spacing> spacing; - BitField<8, 2, OutputPrimitves> output_primitives; + BitField<8, 2, OutputPrimitives> output_primitives; }; } params; @@ -1795,12 +1795,6 @@ public: ComparisonOp func; }; - struct StencilFunc { - s32 ref; - u32 func_mask; - u32 mask; - }; - struct PsSaturate { // Opposite of DepthMode enum class Depth : u32 { @@ -2737,7 +2731,9 @@ public: u32 post_z_pixel_imask; ///< 0x0F1C INSERT_PADDING_BYTES_NOINIT(0x20); ConstantColorRendering const_color_rendering; ///< 0x0F40 - StencilFunc stencil_back_func; ///< 0x0F54 + s32 stencil_back_ref; ///< 0x0F54 + u32 stencil_back_mask; ///< 0x0F58 + u32 stencil_back_func_mask; ///< 0x0F5C INSERT_PADDING_BYTES_NOINIT(0x24); VertexStreamSubstitute vertex_stream_substitute; ///< 0x0F84 u32 line_mode_clip_generated_edge_do_not_draw; ///< 0x0F8C @@ -2855,7 +2851,9 @@ public: Blend blend; ///< 0x133C u32 stencil_enable; ///< 0x1380 StencilOp stencil_front_op; ///< 0x1384 - StencilFunc stencil_front_func; ///< 0x1394 + s32 stencil_front_ref; ///< 0x1394 + s32 stencil_front_func_mask; ///< 0x1398 + s32 stencil_front_mask; ///< 0x139C INSERT_PADDING_BYTES_NOINIT(0x4); u32 draw_auto_start_byte_count; ///< 0x13A4 PsSaturate frag_color_clamp; ///< 0x13A8 @@ -3311,7 +3309,9 @@ ASSERT_REG_POSITION(vpc_perf, 0x0F14); ASSERT_REG_POSITION(pm_local_trigger, 0x0F18); ASSERT_REG_POSITION(post_z_pixel_imask, 0x0F1C); ASSERT_REG_POSITION(const_color_rendering, 0x0F40); -ASSERT_REG_POSITION(stencil_back_func, 0x0F54); +ASSERT_REG_POSITION(stencil_back_ref, 0x0F54); +ASSERT_REG_POSITION(stencil_back_mask, 0x0F58); +ASSERT_REG_POSITION(stencil_back_func_mask, 0x0F5C); ASSERT_REG_POSITION(vertex_stream_substitute, 0x0F84); ASSERT_REG_POSITION(line_mode_clip_generated_edge_do_not_draw, 0x0F8C); ASSERT_REG_POSITION(color_mask_common, 0x0F90); @@ -3416,7 +3416,9 @@ ASSERT_REG_POSITION(invalidate_texture_data_cache_lines, 0x1338); ASSERT_REG_POSITION(blend, 0x133C); ASSERT_REG_POSITION(stencil_enable, 0x1380); ASSERT_REG_POSITION(stencil_front_op, 0x1384); -ASSERT_REG_POSITION(stencil_front_func, 0x1394); +ASSERT_REG_POSITION(stencil_front_ref, 0x1394); +ASSERT_REG_POSITION(stencil_front_func_mask, 0x1398); +ASSERT_REG_POSITION(stencil_front_mask, 0x139C); ASSERT_REG_POSITION(draw_auto_start_byte_count, 0x13A4); ASSERT_REG_POSITION(frag_color_clamp, 0x13A8); ASSERT_REG_POSITION(window_origin, 0x13AC); diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 3909d36c1..4eb7a100d 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -56,66 +56,85 @@ void MaxwellDMA::Launch() { ASSERT(launch.interrupt_type == LaunchDMA::InterruptType::NONE); ASSERT(launch.data_transfer_type == LaunchDMA::DataTransferType::NON_PIPELINED); - const bool is_src_pitch = launch.src_memory_layout == LaunchDMA::MemoryLayout::PITCH; - const bool is_dst_pitch = launch.dst_memory_layout == LaunchDMA::MemoryLayout::PITCH; - - if (!is_src_pitch && !is_dst_pitch) { - // If both the source and the destination are in block layout, assert. - UNIMPLEMENTED_MSG("Tiled->Tiled DMA transfers are not yet implemented"); - return; - } + if (launch.multi_line_enable) { + const bool is_src_pitch = launch.src_memory_layout == LaunchDMA::MemoryLayout::PITCH; + const bool is_dst_pitch = launch.dst_memory_layout == LaunchDMA::MemoryLayout::PITCH; + + if (!is_src_pitch && !is_dst_pitch) { + // If both the source and the destination are in block layout, assert. + UNIMPLEMENTED_MSG("Tiled->Tiled DMA transfers are not yet implemented"); + return; + } - if (is_src_pitch && is_dst_pitch) { - CopyPitchToPitch(); + if (is_src_pitch && is_dst_pitch) { + for (u32 line = 0; line < regs.line_count; ++line) { + const GPUVAddr source_line = + regs.offset_in + static_cast<size_t>(line) * regs.pitch_in; + const GPUVAddr dest_line = + regs.offset_out + static_cast<size_t>(line) * regs.pitch_out; + memory_manager.CopyBlock(dest_line, source_line, regs.line_length_in); + } + } else { + if (!is_src_pitch && is_dst_pitch) { + CopyBlockLinearToPitch(); + } else { + CopyPitchToBlockLinear(); + } + } } else { - ASSERT(launch.multi_line_enable == 1); - - if (!is_src_pitch && is_dst_pitch) { - CopyBlockLinearToPitch(); + // TODO: allow multisized components. + auto& accelerate = rasterizer->AccessAccelerateDMA(); + const bool is_const_a_dst = regs.remap_const.dst_x == RemapConst::Swizzle::CONST_A; + if (regs.launch_dma.remap_enable != 0 && is_const_a_dst) { + ASSERT(regs.remap_const.component_size_minus_one == 3); + accelerate.BufferClear(regs.offset_out, regs.line_length_in, regs.remap_consta_value); + std::vector<u32> tmp_buffer(regs.line_length_in, regs.remap_consta_value); + memory_manager.WriteBlockUnsafe(regs.offset_out, + reinterpret_cast<u8*>(tmp_buffer.data()), + regs.line_length_in * sizeof(u32)); } else { - CopyPitchToBlockLinear(); + auto convert_linear_2_blocklinear_addr = [](u64 address) { + return (address & ~0x1f0ULL) | ((address & 0x40) >> 2) | ((address & 0x10) << 1) | + ((address & 0x180) >> 1) | ((address & 0x20) << 3); + }; + auto src_kind = memory_manager.GetPageKind(regs.offset_in); + auto dst_kind = memory_manager.GetPageKind(regs.offset_out); + const bool is_src_pitch = IsPitchKind(static_cast<PTEKind>(src_kind)); + const bool is_dst_pitch = IsPitchKind(static_cast<PTEKind>(dst_kind)); + if (!is_src_pitch && is_dst_pitch) { + std::vector<u8> tmp_buffer(regs.line_length_in); + std::vector<u8> dst_buffer(regs.line_length_in); + memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(), + regs.line_length_in); + for (u32 offset = 0; offset < regs.line_length_in; ++offset) { + dst_buffer[offset] = + tmp_buffer[convert_linear_2_blocklinear_addr(regs.offset_in + offset) - + regs.offset_in]; + } + memory_manager.WriteBlock(regs.offset_out, dst_buffer.data(), regs.line_length_in); + } else if (is_src_pitch && !is_dst_pitch) { + std::vector<u8> tmp_buffer(regs.line_length_in); + std::vector<u8> dst_buffer(regs.line_length_in); + memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(), + regs.line_length_in); + for (u32 offset = 0; offset < regs.line_length_in; ++offset) { + dst_buffer[convert_linear_2_blocklinear_addr(regs.offset_out + offset) - + regs.offset_out] = tmp_buffer[offset]; + } + memory_manager.WriteBlock(regs.offset_out, dst_buffer.data(), regs.line_length_in); + } else { + if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) { + std::vector<u8> tmp_buffer(regs.line_length_in); + memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(), + regs.line_length_in); + memory_manager.WriteBlock(regs.offset_out, tmp_buffer.data(), + regs.line_length_in); + } + } } } - ReleaseSemaphore(); -} -void MaxwellDMA::CopyPitchToPitch() { - // When `multi_line_enable` bit is enabled we copy a 2D image of dimensions - // (line_length_in, line_count). - // Otherwise the copy is performed as if we were copying a 1D buffer of length line_length_in. - const bool remap_enabled = regs.launch_dma.remap_enable != 0; - if (regs.launch_dma.multi_line_enable) { - UNIMPLEMENTED_IF(remap_enabled); - - // Perform a line-by-line copy. - // We're going to take a subrect of size (line_length_in, line_count) from the source - // rectangle. There is no need to manually flush/invalidate the regions because CopyBlock - // does that for us. - for (u32 line = 0; line < regs.line_count; ++line) { - const GPUVAddr source_line = regs.offset_in + static_cast<size_t>(line) * regs.pitch_in; - const GPUVAddr dest_line = regs.offset_out + static_cast<size_t>(line) * regs.pitch_out; - memory_manager.CopyBlock(dest_line, source_line, regs.line_length_in); - } - return; - } - // TODO: allow multisized components. - auto& accelerate = rasterizer->AccessAccelerateDMA(); - const bool is_const_a_dst = regs.remap_const.dst_x == RemapConst::Swizzle::CONST_A; - const bool is_buffer_clear = remap_enabled && is_const_a_dst; - if (is_buffer_clear) { - ASSERT(regs.remap_const.component_size_minus_one == 3); - accelerate.BufferClear(regs.offset_out, regs.line_length_in, regs.remap_consta_value); - std::vector<u32> tmp_buffer(regs.line_length_in, regs.remap_consta_value); - memory_manager.WriteBlockUnsafe(regs.offset_out, reinterpret_cast<u8*>(tmp_buffer.data()), - regs.line_length_in * sizeof(u32)); - return; - } - UNIMPLEMENTED_IF(remap_enabled); - if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) { - std::vector<u8> tmp_buffer(regs.line_length_in); - memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(), regs.line_length_in); - memory_manager.WriteBlock(regs.offset_out, tmp_buffer.data(), regs.line_length_in); - } + ReleaseSemaphore(); } void MaxwellDMA::CopyBlockLinearToPitch() { diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index bc48320ce..953e34adc 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h @@ -219,8 +219,6 @@ private: /// registers. void Launch(); - void CopyPitchToPitch(); - void CopyBlockLinearToPitch(); void CopyPitchToBlockLinear(); |