diff options
-rw-r--r-- | src/citra_qt/debugger/graphics_framebuffer.cpp | 62 | ||||
-rw-r--r-- | src/citra_qt/debugger/graphics_framebuffer.h | 4 | ||||
-rw-r--r-- | src/common/color.h | 27 | ||||
-rw-r--r-- | src/video_core/pica.h | 43 | ||||
-rw-r--r-- | src/video_core/rasterizer.cpp | 142 |
5 files changed, 258 insertions, 20 deletions
diff --git a/src/citra_qt/debugger/graphics_framebuffer.cpp b/src/citra_qt/debugger/graphics_framebuffer.cpp index 6bbe7572c..39eefbf75 100644 --- a/src/citra_qt/debugger/graphics_framebuffer.cpp +++ b/src/citra_qt/debugger/graphics_framebuffer.cpp @@ -55,7 +55,9 @@ GraphicsFramebufferWidget::GraphicsFramebufferWidget(std::shared_ptr<Pica::Debug framebuffer_format_control->addItem(tr("RGBA4")); framebuffer_format_control->addItem(tr("D16")); framebuffer_format_control->addItem(tr("D24")); - framebuffer_format_control->addItem(tr("D24S8")); + framebuffer_format_control->addItem(tr("D24X8")); + framebuffer_format_control->addItem(tr("X24S8")); + framebuffer_format_control->addItem(tr("(unknown)")); // TODO: This QLabel should shrink the image to the available space rather than just expanding... framebuffer_picture_label = new QLabel; @@ -184,8 +186,32 @@ void GraphicsFramebufferWidget::OnUpdate() framebuffer_address = framebuffer.GetColorBufferPhysicalAddress(); framebuffer_width = framebuffer.GetWidth(); framebuffer_height = framebuffer.GetHeight(); - // TODO: It's unknown how this format is actually specified - framebuffer_format = Format::RGBA8; + + switch (framebuffer.color_format) { + case Pica::Regs::ColorFormat::RGBA8: + framebuffer_format = Format::RGBA8; + break; + + case Pica::Regs::ColorFormat::RGB8: + framebuffer_format = Format::RGB8; + break; + + case Pica::Regs::ColorFormat::RGB5A1: + framebuffer_format = Format::RGB5A1; + break; + + case Pica::Regs::ColorFormat::RGB565: + framebuffer_format = Format::RGB565; + break; + + case Pica::Regs::ColorFormat::RGBA4: + framebuffer_format = Format::RGBA4; + break; + + default: + framebuffer_format = Format::Unknown; + break; + } break; } @@ -197,7 +223,24 @@ void GraphicsFramebufferWidget::OnUpdate() framebuffer_address = framebuffer.GetDepthBufferPhysicalAddress(); framebuffer_width = framebuffer.GetWidth(); framebuffer_height = framebuffer.GetHeight(); - framebuffer_format = Format::D16; + + switch (framebuffer.depth_format) { + case Pica::Regs::DepthFormat::D16: + framebuffer_format = Format::D16; + break; + + case Pica::Regs::DepthFormat::D24: + framebuffer_format = Format::D24; + break; + + case Pica::Regs::DepthFormat::D24S8: + framebuffer_format = Format::D24X8; + break; + + default: + framebuffer_format = Format::Unknown; + break; + } break; } @@ -258,7 +301,7 @@ void GraphicsFramebufferWidget::OnUpdate() color.b() = (data >> 16) & 0xFF; break; } - case Format::D24S8: + case Format::D24X8: { Math::Vec2<u32> data = Color::DecodeD24S8(pixel); color.r() = data.x & 0xFF; @@ -266,6 +309,12 @@ void GraphicsFramebufferWidget::OnUpdate() color.b() = (data.x >> 16) & 0xFF; break; } + case Format::X24S8: + { + Math::Vec2<u32> data = Color::DecodeD24S8(pixel); + color.r() = color.g() = color.b() = data.y; + break; + } default: qDebug() << "Unknown fb color format " << static_cast<int>(framebuffer_format); break; @@ -286,7 +335,8 @@ void GraphicsFramebufferWidget::OnUpdate() u32 GraphicsFramebufferWidget::BytesPerPixel(GraphicsFramebufferWidget::Format format) { switch (format) { case Format::RGBA8: - case Format::D24S8: + case Format::D24X8: + case Format::X24S8: return 4; case Format::RGB8: case Format::D24: diff --git a/src/citra_qt/debugger/graphics_framebuffer.h b/src/citra_qt/debugger/graphics_framebuffer.h index 4cb396ffe..e9eae679f 100644 --- a/src/citra_qt/debugger/graphics_framebuffer.h +++ b/src/citra_qt/debugger/graphics_framebuffer.h @@ -35,7 +35,9 @@ class GraphicsFramebufferWidget : public BreakPointObserverDock { RGBA4 = 4, D16 = 5, D24 = 6, - D24S8 = 7 + D24X8 = 7, + X24S8 = 8, + Unknown = 9 }; static u32 BytesPerPixel(Format format); diff --git a/src/common/color.h b/src/common/color.h index 422fdc8af..9dafdca0c 100644 --- a/src/common/color.h +++ b/src/common/color.h @@ -208,7 +208,32 @@ inline void EncodeD24(u32 value, u8* bytes) { * @param bytes Pointer where to store the encoded value */ inline void EncodeD24S8(u32 depth, u8 stencil, u8* bytes) { - *reinterpret_cast<u32_le*>(bytes) = (stencil << 24) | depth; + bytes[0] = depth & 0xFF; + bytes[1] = (depth >> 8) & 0xFF; + bytes[2] = (depth >> 16) & 0xFF; + bytes[3] = stencil; +} + +/** + * Encode a 24 bit depth value as D24X8 format (32 bits per pixel with 8 bits unused) + * @param depth 24 bit source depth value to encode + * @param bytes Pointer where to store the encoded value + * @note unused bits will not be modified + */ +inline void EncodeD24X8(u32 depth, u8* bytes) { + bytes[0] = depth & 0xFF; + bytes[1] = (depth >> 8) & 0xFF; + bytes[2] = (depth >> 16) & 0xFF; +} + +/** + * Encode an 8 bit stencil value as X24S8 format (32 bits per pixel with 24 bits unused) + * @param stencil 8 bit source stencil value to encode + * @param bytes Pointer where to store the encoded value + * @note unused bits will not be modified + */ +inline void EncodeX24S8(u8 stencil, u8* bytes) { + bytes[3] = stencil; } } // namespace diff --git a/src/video_core/pica.h b/src/video_core/pica.h index feb20214a..46a7b21dc 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -420,6 +420,11 @@ struct Regs { GreaterThanOrEqual = 7, }; + enum class StencilAction : u32 { + Keep = 0, + Xor = 5, + }; + struct { union { // If false, logic blending is used @@ -454,15 +459,35 @@ struct Regs { BitField< 8, 8, u32> ref; } alpha_test; - union { - BitField< 0, 1, u32> stencil_test_enable; - BitField< 4, 3, CompareFunc> stencil_test_func; - BitField< 8, 8, u32> stencil_replacement_value; - BitField<16, 8, u32> stencil_reference_value; - BitField<24, 8, u32> stencil_mask; - } stencil_test; + struct { + union { + // If true, enable stencil testing + BitField< 0, 1, u32> enable; - INSERT_PADDING_WORDS(0x1); + // Comparison operation for stencil testing + BitField< 4, 3, CompareFunc> func; + + // Value to calculate the new stencil value from + BitField< 8, 8, u32> replacement_value; + + // Value to compare against for stencil testing + BitField<16, 8, u32> reference_value; + + // Mask to apply on stencil test inputs + BitField<24, 8, u32> mask; + }; + + union { + // Action to perform when the stencil test fails + BitField< 0, 3, StencilAction> action_stencil_fail; + + // Action to perform when stencil testing passed but depth testing fails + BitField< 4, 3, StencilAction> action_depth_fail; + + // Action to perform when both stencil and depth testing pass + BitField< 8, 3, StencilAction> action_depth_pass; + }; + } stencil_test; union { BitField< 0, 1, u32> depth_test_enable; @@ -512,7 +537,7 @@ struct Regs { struct { INSERT_PADDING_WORDS(0x6); - DepthFormat depth_format; + DepthFormat depth_format; // TODO: Should be a BitField! BitField<16, 3, ColorFormat> color_format; INSERT_PADDING_WORDS(0x4); diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index 70b115744..c381c2bd9 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp @@ -126,6 +126,30 @@ static u32 GetDepth(int x, int y) { } } +static u8 GetStencil(int x, int y) { + const auto& framebuffer = g_state.regs.framebuffer; + const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress(); + u8* depth_buffer = Memory::GetPhysicalPointer(addr); + + y = framebuffer.height - y; + + const u32 coarse_y = y & ~7; + u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(framebuffer.depth_format); + u32 stride = framebuffer.width * bytes_per_pixel; + + u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; + u8* src_pixel = depth_buffer + src_offset; + + switch (framebuffer.depth_format) { + case Regs::DepthFormat::D24S8: + return Color::DecodeD24S8(src_pixel).y; + + default: + LOG_WARNING(HW_GPU, "GetStencil called for function which doesn't have a stencil component (format %u)", framebuffer.depth_format); + return 0; + } +} + static void SetDepth(int x, int y, u32 value) { const auto& framebuffer = g_state.regs.framebuffer; const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress(); @@ -144,13 +168,46 @@ static void SetDepth(int x, int y, u32 value) { case Regs::DepthFormat::D16: Color::EncodeD16(value, dst_pixel); break; + case Regs::DepthFormat::D24: Color::EncodeD24(value, dst_pixel); break; + case Regs::DepthFormat::D24S8: - // TODO(Subv): Implement the stencil buffer - Color::EncodeD24S8(value, 0, dst_pixel); + Color::EncodeD24X8(value, dst_pixel); break; + + default: + LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); + UNIMPLEMENTED(); + break; + } +} + +static void SetStencil(int x, int y, u8 value) { + const auto& framebuffer = g_state.regs.framebuffer; + const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress(); + u8* depth_buffer = Memory::GetPhysicalPointer(addr); + + y = framebuffer.height - y; + + const u32 coarse_y = y & ~7; + u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(framebuffer.depth_format); + u32 stride = framebuffer.width * bytes_per_pixel; + + u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; + u8* dst_pixel = depth_buffer + dst_offset; + + switch (framebuffer.depth_format) { + case Pica::Regs::DepthFormat::D16: + case Pica::Regs::DepthFormat::D24: + // Nothing to do + break; + + case Pica::Regs::DepthFormat::D24S8: + Color::EncodeX24S8(value, dst_pixel); + break; + default: LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); UNIMPLEMENTED(); @@ -158,6 +215,22 @@ static void SetDepth(int x, int y, u32 value) { } } +// TODO: Should the stencil mask be applied to the "dest" or "ref" operands? Most likely not! +static u8 PerformStencilAction(Regs::StencilAction action, u8 dest, u8 ref) { + switch (action) { + case Regs::StencilAction::Keep: + return dest; + + case Regs::StencilAction::Xor: + return dest ^ ref; + + default: + LOG_CRITICAL(HW_GPU, "Unknown stencil action %x", (int)action); + UNIMPLEMENTED(); + return 0; + } +} + // NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values struct Fix12P4 { Fix12P4() {} @@ -276,6 +349,9 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, auto textures = regs.GetTextures(); auto tev_stages = regs.GetTevStages(); + bool stencil_action_enable = g_state.regs.output_merger.stencil_test.enable && g_state.regs.framebuffer.depth_format == Regs::DepthFormat::D24S8; + const auto stencil_test = g_state.regs.output_merger.stencil_test; + // Enter rasterization loop, starting at the center of the topleft bounding box corner. // TODO: Not sure if looping through x first might be faster for (u16 y = min_y + 8; y < max_y; y += 0x10) { @@ -647,6 +723,7 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, } const auto& output_merger = regs.output_merger; + // TODO: Does alpha testing happen before or after stencil? if (output_merger.alpha_test.enable) { bool pass = false; @@ -688,6 +765,54 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, continue; } + u8 old_stencil = 0; + if (stencil_action_enable) { + old_stencil = GetStencil(x >> 4, y >> 4); + u8 dest = old_stencil & stencil_test.mask; + u8 ref = stencil_test.reference_value & stencil_test.mask; + + bool pass = false; + switch (stencil_test.func) { + case Regs::CompareFunc::Never: + pass = false; + break; + + case Regs::CompareFunc::Always: + pass = true; + break; + + case Regs::CompareFunc::Equal: + pass = (ref == dest); + break; + + case Regs::CompareFunc::NotEqual: + pass = (ref != dest); + break; + + case Regs::CompareFunc::LessThan: + pass = (ref < dest); + break; + + case Regs::CompareFunc::LessThanOrEqual: + pass = (ref <= dest); + break; + + case Regs::CompareFunc::GreaterThan: + pass = (ref > dest); + break; + + case Regs::CompareFunc::GreaterThanOrEqual: + pass = (ref >= dest); + break; + } + + if (!pass) { + u8 new_stencil = PerformStencilAction(stencil_test.action_stencil_fail, old_stencil, stencil_test.replacement_value); + SetStencil(x >> 4, y >> 4, new_stencil); + continue; + } + } + // TODO: Does depth indeed only get written even if depth testing is enabled? if (output_merger.depth_test_enable) { unsigned num_bits = Regs::DepthBitsPerPixel(regs.framebuffer.depth_format); @@ -732,11 +857,22 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, break; } - if (!pass) + if (!pass) { + if (stencil_action_enable) { + u8 new_stencil = PerformStencilAction(stencil_test.action_depth_fail, old_stencil, stencil_test.replacement_value); + SetStencil(x >> 4, y >> 4, new_stencil); + } continue; + } if (output_merger.depth_write_enable) SetDepth(x >> 4, y >> 4, z); + + if (stencil_action_enable) { + // TODO: What happens if stencil testing is enabled, but depth testing is not? Will stencil get updated anyway? + u8 new_stencil = PerformStencilAction(stencil_test.action_depth_pass, old_stencil, stencil_test.replacement_value); + SetStencil(x >> 4, y >> 4, new_stencil); + } } auto dest = GetPixel(x >> 4, y >> 4); |