summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorTony Wasserka <neobrainx@gmail.com>2015-07-13 23:05:04 +0200
committerTony Wasserka <neobrainx@gmail.com>2015-07-13 23:05:04 +0200
commitbf80ae6a1ad328814df332c1136fe14e836053c6 (patch)
treedd61e70261a8f3488344f0ce90df6c5bb67e6c81 /src
parentMerge pull request #702 from neobrain/citrace (diff)
parentPica: Implement stencil testing. (diff)
downloadyuzu-bf80ae6a1ad328814df332c1136fe14e836053c6.tar
yuzu-bf80ae6a1ad328814df332c1136fe14e836053c6.tar.gz
yuzu-bf80ae6a1ad328814df332c1136fe14e836053c6.tar.bz2
yuzu-bf80ae6a1ad328814df332c1136fe14e836053c6.tar.lz
yuzu-bf80ae6a1ad328814df332c1136fe14e836053c6.tar.xz
yuzu-bf80ae6a1ad328814df332c1136fe14e836053c6.tar.zst
yuzu-bf80ae6a1ad328814df332c1136fe14e836053c6.zip
Diffstat (limited to '')
-rw-r--r--src/citra_qt/debugger/graphics_framebuffer.cpp62
-rw-r--r--src/citra_qt/debugger/graphics_framebuffer.h4
-rw-r--r--src/common/color.h27
-rw-r--r--src/video_core/pica.h43
-rw-r--r--src/video_core/rasterizer.cpp142
5 files changed, 258 insertions, 20 deletions
diff --git a/src/citra_qt/debugger/graphics_framebuffer.cpp b/src/citra_qt/debugger/graphics_framebuffer.cpp
index 6bbe7572c..39eefbf75 100644
--- a/src/citra_qt/debugger/graphics_framebuffer.cpp
+++ b/src/citra_qt/debugger/graphics_framebuffer.cpp
@@ -55,7 +55,9 @@ GraphicsFramebufferWidget::GraphicsFramebufferWidget(std::shared_ptr<Pica::Debug
framebuffer_format_control->addItem(tr("RGBA4"));
framebuffer_format_control->addItem(tr("D16"));
framebuffer_format_control->addItem(tr("D24"));
- framebuffer_format_control->addItem(tr("D24S8"));
+ framebuffer_format_control->addItem(tr("D24X8"));
+ framebuffer_format_control->addItem(tr("X24S8"));
+ framebuffer_format_control->addItem(tr("(unknown)"));
// TODO: This QLabel should shrink the image to the available space rather than just expanding...
framebuffer_picture_label = new QLabel;
@@ -184,8 +186,32 @@ void GraphicsFramebufferWidget::OnUpdate()
framebuffer_address = framebuffer.GetColorBufferPhysicalAddress();
framebuffer_width = framebuffer.GetWidth();
framebuffer_height = framebuffer.GetHeight();
- // TODO: It's unknown how this format is actually specified
- framebuffer_format = Format::RGBA8;
+
+ switch (framebuffer.color_format) {
+ case Pica::Regs::ColorFormat::RGBA8:
+ framebuffer_format = Format::RGBA8;
+ break;
+
+ case Pica::Regs::ColorFormat::RGB8:
+ framebuffer_format = Format::RGB8;
+ break;
+
+ case Pica::Regs::ColorFormat::RGB5A1:
+ framebuffer_format = Format::RGB5A1;
+ break;
+
+ case Pica::Regs::ColorFormat::RGB565:
+ framebuffer_format = Format::RGB565;
+ break;
+
+ case Pica::Regs::ColorFormat::RGBA4:
+ framebuffer_format = Format::RGBA4;
+ break;
+
+ default:
+ framebuffer_format = Format::Unknown;
+ break;
+ }
break;
}
@@ -197,7 +223,24 @@ void GraphicsFramebufferWidget::OnUpdate()
framebuffer_address = framebuffer.GetDepthBufferPhysicalAddress();
framebuffer_width = framebuffer.GetWidth();
framebuffer_height = framebuffer.GetHeight();
- framebuffer_format = Format::D16;
+
+ switch (framebuffer.depth_format) {
+ case Pica::Regs::DepthFormat::D16:
+ framebuffer_format = Format::D16;
+ break;
+
+ case Pica::Regs::DepthFormat::D24:
+ framebuffer_format = Format::D24;
+ break;
+
+ case Pica::Regs::DepthFormat::D24S8:
+ framebuffer_format = Format::D24X8;
+ break;
+
+ default:
+ framebuffer_format = Format::Unknown;
+ break;
+ }
break;
}
@@ -258,7 +301,7 @@ void GraphicsFramebufferWidget::OnUpdate()
color.b() = (data >> 16) & 0xFF;
break;
}
- case Format::D24S8:
+ case Format::D24X8:
{
Math::Vec2<u32> data = Color::DecodeD24S8(pixel);
color.r() = data.x & 0xFF;
@@ -266,6 +309,12 @@ void GraphicsFramebufferWidget::OnUpdate()
color.b() = (data.x >> 16) & 0xFF;
break;
}
+ case Format::X24S8:
+ {
+ Math::Vec2<u32> data = Color::DecodeD24S8(pixel);
+ color.r() = color.g() = color.b() = data.y;
+ break;
+ }
default:
qDebug() << "Unknown fb color format " << static_cast<int>(framebuffer_format);
break;
@@ -286,7 +335,8 @@ void GraphicsFramebufferWidget::OnUpdate()
u32 GraphicsFramebufferWidget::BytesPerPixel(GraphicsFramebufferWidget::Format format) {
switch (format) {
case Format::RGBA8:
- case Format::D24S8:
+ case Format::D24X8:
+ case Format::X24S8:
return 4;
case Format::RGB8:
case Format::D24:
diff --git a/src/citra_qt/debugger/graphics_framebuffer.h b/src/citra_qt/debugger/graphics_framebuffer.h
index 4cb396ffe..e9eae679f 100644
--- a/src/citra_qt/debugger/graphics_framebuffer.h
+++ b/src/citra_qt/debugger/graphics_framebuffer.h
@@ -35,7 +35,9 @@ class GraphicsFramebufferWidget : public BreakPointObserverDock {
RGBA4 = 4,
D16 = 5,
D24 = 6,
- D24S8 = 7
+ D24X8 = 7,
+ X24S8 = 8,
+ Unknown = 9
};
static u32 BytesPerPixel(Format format);
diff --git a/src/common/color.h b/src/common/color.h
index 422fdc8af..9dafdca0c 100644
--- a/src/common/color.h
+++ b/src/common/color.h
@@ -208,7 +208,32 @@ inline void EncodeD24(u32 value, u8* bytes) {
* @param bytes Pointer where to store the encoded value
*/
inline void EncodeD24S8(u32 depth, u8 stencil, u8* bytes) {
- *reinterpret_cast<u32_le*>(bytes) = (stencil << 24) | depth;
+ bytes[0] = depth & 0xFF;
+ bytes[1] = (depth >> 8) & 0xFF;
+ bytes[2] = (depth >> 16) & 0xFF;
+ bytes[3] = stencil;
+}
+
+/**
+ * Encode a 24 bit depth value as D24X8 format (32 bits per pixel with 8 bits unused)
+ * @param depth 24 bit source depth value to encode
+ * @param bytes Pointer where to store the encoded value
+ * @note unused bits will not be modified
+ */
+inline void EncodeD24X8(u32 depth, u8* bytes) {
+ bytes[0] = depth & 0xFF;
+ bytes[1] = (depth >> 8) & 0xFF;
+ bytes[2] = (depth >> 16) & 0xFF;
+}
+
+/**
+ * Encode an 8 bit stencil value as X24S8 format (32 bits per pixel with 24 bits unused)
+ * @param stencil 8 bit source stencil value to encode
+ * @param bytes Pointer where to store the encoded value
+ * @note unused bits will not be modified
+ */
+inline void EncodeX24S8(u8 stencil, u8* bytes) {
+ bytes[3] = stencil;
}
} // namespace
diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index feb20214a..46a7b21dc 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -420,6 +420,11 @@ struct Regs {
GreaterThanOrEqual = 7,
};
+ enum class StencilAction : u32 {
+ Keep = 0,
+ Xor = 5,
+ };
+
struct {
union {
// If false, logic blending is used
@@ -454,15 +459,35 @@ struct Regs {
BitField< 8, 8, u32> ref;
} alpha_test;
- union {
- BitField< 0, 1, u32> stencil_test_enable;
- BitField< 4, 3, CompareFunc> stencil_test_func;
- BitField< 8, 8, u32> stencil_replacement_value;
- BitField<16, 8, u32> stencil_reference_value;
- BitField<24, 8, u32> stencil_mask;
- } stencil_test;
+ struct {
+ union {
+ // If true, enable stencil testing
+ BitField< 0, 1, u32> enable;
- INSERT_PADDING_WORDS(0x1);
+ // Comparison operation for stencil testing
+ BitField< 4, 3, CompareFunc> func;
+
+ // Value to calculate the new stencil value from
+ BitField< 8, 8, u32> replacement_value;
+
+ // Value to compare against for stencil testing
+ BitField<16, 8, u32> reference_value;
+
+ // Mask to apply on stencil test inputs
+ BitField<24, 8, u32> mask;
+ };
+
+ union {
+ // Action to perform when the stencil test fails
+ BitField< 0, 3, StencilAction> action_stencil_fail;
+
+ // Action to perform when stencil testing passed but depth testing fails
+ BitField< 4, 3, StencilAction> action_depth_fail;
+
+ // Action to perform when both stencil and depth testing pass
+ BitField< 8, 3, StencilAction> action_depth_pass;
+ };
+ } stencil_test;
union {
BitField< 0, 1, u32> depth_test_enable;
@@ -512,7 +537,7 @@ struct Regs {
struct {
INSERT_PADDING_WORDS(0x6);
- DepthFormat depth_format;
+ DepthFormat depth_format; // TODO: Should be a BitField!
BitField<16, 3, ColorFormat> color_format;
INSERT_PADDING_WORDS(0x4);
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index 70b115744..c381c2bd9 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -126,6 +126,30 @@ static u32 GetDepth(int x, int y) {
}
}
+static u8 GetStencil(int x, int y) {
+ const auto& framebuffer = g_state.regs.framebuffer;
+ const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress();
+ u8* depth_buffer = Memory::GetPhysicalPointer(addr);
+
+ y = framebuffer.height - y;
+
+ const u32 coarse_y = y & ~7;
+ u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(framebuffer.depth_format);
+ u32 stride = framebuffer.width * bytes_per_pixel;
+
+ u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
+ u8* src_pixel = depth_buffer + src_offset;
+
+ switch (framebuffer.depth_format) {
+ case Regs::DepthFormat::D24S8:
+ return Color::DecodeD24S8(src_pixel).y;
+
+ default:
+ LOG_WARNING(HW_GPU, "GetStencil called for function which doesn't have a stencil component (format %u)", framebuffer.depth_format);
+ return 0;
+ }
+}
+
static void SetDepth(int x, int y, u32 value) {
const auto& framebuffer = g_state.regs.framebuffer;
const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress();
@@ -144,13 +168,46 @@ static void SetDepth(int x, int y, u32 value) {
case Regs::DepthFormat::D16:
Color::EncodeD16(value, dst_pixel);
break;
+
case Regs::DepthFormat::D24:
Color::EncodeD24(value, dst_pixel);
break;
+
case Regs::DepthFormat::D24S8:
- // TODO(Subv): Implement the stencil buffer
- Color::EncodeD24S8(value, 0, dst_pixel);
+ Color::EncodeD24X8(value, dst_pixel);
break;
+
+ default:
+ LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format);
+ UNIMPLEMENTED();
+ break;
+ }
+}
+
+static void SetStencil(int x, int y, u8 value) {
+ const auto& framebuffer = g_state.regs.framebuffer;
+ const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress();
+ u8* depth_buffer = Memory::GetPhysicalPointer(addr);
+
+ y = framebuffer.height - y;
+
+ const u32 coarse_y = y & ~7;
+ u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(framebuffer.depth_format);
+ u32 stride = framebuffer.width * bytes_per_pixel;
+
+ u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
+ u8* dst_pixel = depth_buffer + dst_offset;
+
+ switch (framebuffer.depth_format) {
+ case Pica::Regs::DepthFormat::D16:
+ case Pica::Regs::DepthFormat::D24:
+ // Nothing to do
+ break;
+
+ case Pica::Regs::DepthFormat::D24S8:
+ Color::EncodeX24S8(value, dst_pixel);
+ break;
+
default:
LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format);
UNIMPLEMENTED();
@@ -158,6 +215,22 @@ static void SetDepth(int x, int y, u32 value) {
}
}
+// TODO: Should the stencil mask be applied to the "dest" or "ref" operands? Most likely not!
+static u8 PerformStencilAction(Regs::StencilAction action, u8 dest, u8 ref) {
+ switch (action) {
+ case Regs::StencilAction::Keep:
+ return dest;
+
+ case Regs::StencilAction::Xor:
+ return dest ^ ref;
+
+ default:
+ LOG_CRITICAL(HW_GPU, "Unknown stencil action %x", (int)action);
+ UNIMPLEMENTED();
+ return 0;
+ }
+}
+
// NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values
struct Fix12P4 {
Fix12P4() {}
@@ -276,6 +349,9 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
auto textures = regs.GetTextures();
auto tev_stages = regs.GetTevStages();
+ bool stencil_action_enable = g_state.regs.output_merger.stencil_test.enable && g_state.regs.framebuffer.depth_format == Regs::DepthFormat::D24S8;
+ const auto stencil_test = g_state.regs.output_merger.stencil_test;
+
// Enter rasterization loop, starting at the center of the topleft bounding box corner.
// TODO: Not sure if looping through x first might be faster
for (u16 y = min_y + 8; y < max_y; y += 0x10) {
@@ -647,6 +723,7 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
}
const auto& output_merger = regs.output_merger;
+ // TODO: Does alpha testing happen before or after stencil?
if (output_merger.alpha_test.enable) {
bool pass = false;
@@ -688,6 +765,54 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
continue;
}
+ u8 old_stencil = 0;
+ if (stencil_action_enable) {
+ old_stencil = GetStencil(x >> 4, y >> 4);
+ u8 dest = old_stencil & stencil_test.mask;
+ u8 ref = stencil_test.reference_value & stencil_test.mask;
+
+ bool pass = false;
+ switch (stencil_test.func) {
+ case Regs::CompareFunc::Never:
+ pass = false;
+ break;
+
+ case Regs::CompareFunc::Always:
+ pass = true;
+ break;
+
+ case Regs::CompareFunc::Equal:
+ pass = (ref == dest);
+ break;
+
+ case Regs::CompareFunc::NotEqual:
+ pass = (ref != dest);
+ break;
+
+ case Regs::CompareFunc::LessThan:
+ pass = (ref < dest);
+ break;
+
+ case Regs::CompareFunc::LessThanOrEqual:
+ pass = (ref <= dest);
+ break;
+
+ case Regs::CompareFunc::GreaterThan:
+ pass = (ref > dest);
+ break;
+
+ case Regs::CompareFunc::GreaterThanOrEqual:
+ pass = (ref >= dest);
+ break;
+ }
+
+ if (!pass) {
+ u8 new_stencil = PerformStencilAction(stencil_test.action_stencil_fail, old_stencil, stencil_test.replacement_value);
+ SetStencil(x >> 4, y >> 4, new_stencil);
+ continue;
+ }
+ }
+
// TODO: Does depth indeed only get written even if depth testing is enabled?
if (output_merger.depth_test_enable) {
unsigned num_bits = Regs::DepthBitsPerPixel(regs.framebuffer.depth_format);
@@ -732,11 +857,22 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
break;
}
- if (!pass)
+ if (!pass) {
+ if (stencil_action_enable) {
+ u8 new_stencil = PerformStencilAction(stencil_test.action_depth_fail, old_stencil, stencil_test.replacement_value);
+ SetStencil(x >> 4, y >> 4, new_stencil);
+ }
continue;
+ }
if (output_merger.depth_write_enable)
SetDepth(x >> 4, y >> 4, z);
+
+ if (stencil_action_enable) {
+ // TODO: What happens if stencil testing is enabled, but depth testing is not? Will stencil get updated anyway?
+ u8 new_stencil = PerformStencilAction(stencil_test.action_depth_pass, old_stencil, stencil_test.replacement_value);
+ SetStencil(x >> 4, y >> 4, new_stencil);
+ }
}
auto dest = GetPixel(x >> 4, y >> 4);