From b7a48c422aa7293525909ac7b32575bce8575bde Mon Sep 17 00:00:00 2001
From: Tony Wasserka <NeoBrainX@gmail.com>
Date: Sun, 21 Dec 2014 02:49:45 +0100
Subject: Pica/CommandProcessor: Add support for integer uniforms.

---
 src/video_core/command_processor.cpp | 13 +++++++++++++
 src/video_core/pica.h                | 10 +++++++++-
 src/video_core/vertex_shader.cpp     |  7 +++++++
 src/video_core/vertex_shader.h       |  1 +
 4 files changed, 30 insertions(+), 1 deletion(-)

(limited to 'src/video_core')
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index 9602779f4..9e1975ddb 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -173,6 +173,19 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
 
             break;
 
+        case PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[0], 0x2b1):
+        case PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[1], 0x2b2):
+        case PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[2], 0x2b3):
+        case PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[3], 0x2b4):
+        {
+            int index = (id - PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[0], 0x2b1));
+            auto values = registers.vs_int_uniforms[index];
+            VertexShader::GetIntUniform(index) = Math::Vec4<u8>(values.x, values.y, values.z, values.w);
+            LOG_ERROR(HW_GPU, "Set integer uniform %d to %02x %02x %02x %02x",
+                      index, values.x.Value(), values.y.Value(), values.z.Value(), values.w.Value());
+            break;
+        }
+
         case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[0], 0x2c1):
         case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[1], 0x2c2):
         case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[2], 0x2c3):
diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index 38bac748c..f518cc98b 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -495,8 +495,14 @@ struct Regs {
     INSERT_PADDING_WORDS(0x51);
 
     BitField<0, 16, u32> vs_bool_uniforms;
+    union {
+        BitField< 0, 8, u32> x;
+        BitField< 8, 8, u32> y;
+        BitField<16, 8, u32> z;
+        BitField<24, 8, u32> w;
+    } vs_int_uniforms[4];
 
-    INSERT_PADDING_WORDS(0x9);
+    INSERT_PADDING_WORDS(0x5);
 
     // Offset to shader program entry point (in words)
     BitField<0, 16, u32> vs_main_offset;
@@ -625,6 +631,7 @@ struct Regs {
         ADD_FIELD(trigger_draw_indexed);
         ADD_FIELD(triangle_topology);
         ADD_FIELD(vs_bool_uniforms);
+        ADD_FIELD(vs_int_uniforms);
         ADD_FIELD(vs_main_offset);
         ADD_FIELD(vs_input_register_map);
         ADD_FIELD(vs_uniform_setup);
@@ -696,6 +703,7 @@ ASSERT_REG_POSITION(trigger_draw, 0x22e);
 ASSERT_REG_POSITION(trigger_draw_indexed, 0x22f);
 ASSERT_REG_POSITION(triangle_topology, 0x25e);
 ASSERT_REG_POSITION(vs_bool_uniforms, 0x2b0);
+ASSERT_REG_POSITION(vs_int_uniforms, 0x2b1);
 ASSERT_REG_POSITION(vs_main_offset, 0x2ba);
 ASSERT_REG_POSITION(vs_input_register_map, 0x2bb);
 ASSERT_REG_POSITION(vs_uniform_setup, 0x2c0);
diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp
index bed5081a0..090ffd420 100644
--- a/src/video_core/vertex_shader.cpp
+++ b/src/video_core/vertex_shader.cpp
@@ -30,6 +30,8 @@ static struct {
     Math::Vec4<float24> f[96];
 
     std::array<bool,16> b;
+
+    std::array<Math::Vec4<u8>,4> i;
 } shader_uniforms;
 
 // TODO: Not sure where the shader binary and swizzle patterns are supposed to be loaded to!
@@ -57,6 +59,11 @@ bool& GetBoolUniform(u32 index)
     return shader_uniforms.b[index];
 }
 
+Math::Vec4<u8>& GetIntUniform(u32 index)
+{
+    return shader_uniforms.i[index];
+}
+
 const std::array<u32, 1024>& GetShaderBinary()
 {
     return shader_memory;
diff --git a/src/video_core/vertex_shader.h b/src/video_core/vertex_shader.h
index af3fb2a2f..3a68a3409 100644
--- a/src/video_core/vertex_shader.h
+++ b/src/video_core/vertex_shader.h
@@ -73,6 +73,7 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes);
 
 Math::Vec4<float24>& GetFloatUniform(u32 index);
 bool& GetBoolUniform(u32 index);
+Math::Vec4<u8>& GetIntUniform(u32 index);
 
 const std::array<u32, 1024>& GetShaderBinary();
 const std::array<u32, 1024>& GetSwizzlePatterns();
-- 
cgit v1.2.3


From 632655e292cc317f8a985747dda8883d3f785431 Mon Sep 17 00:00:00 2001
From: Tony Wasserka <NeoBrainX@gmail.com>
Date: Sun, 21 Dec 2014 02:51:48 +0100
Subject: Pica: Fix A4, IA4 and IA8 texture formats.

Both IA4 and IA8 had their component order mixed up. Additionally, IA4 used the wrong number of nibbles per texel. A4 skipped every second texel.
---
 src/video_core/debug_utils/debug_utils.cpp | 20 +++++++-------------
 1 file changed, 7 insertions(+), 13 deletions(-)

(limited to 'src/video_core')

diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp
index 5921185a6..9c0fbc453 100644
--- a/src/video_core/debug_utils/debug_utils.cpp
+++ b/src/video_core/debug_utils/debug_utils.cpp
@@ -389,13 +389,11 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
     {
         const u8* source_ptr = source + offset * 2;
 
-        // TODO: component order not verified
-
         if (disable_alpha) {
             // Show intensity as red, alpha as green
-            return { source_ptr[0], source_ptr[1], 0, 255 };
+            return { source_ptr[1], source_ptr[0], 0, 255 };
         } else {
-            return { source_ptr[0], source_ptr[0], source_ptr[0], source_ptr[1]};
+            return { source_ptr[1], source_ptr[1], source_ptr[1], source_ptr[0]};
         }
     }
 
@@ -418,12 +416,10 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
 
     case Regs::TextureFormat::IA4:
     {
-        const u8* source_ptr = source + offset / 2;
-
-        // TODO: component order not verified
+        const u8* source_ptr = source + offset;
 
-        u8 i = (*source_ptr) & 0xF;
-        u8 a = ((*source_ptr) & 0xF0) >> 4;
+        u8 i = ((*source_ptr) & 0xF0) >> 4;
+        u8 a = (*source_ptr) & 0xF;
         a |= a << 4;
         i |= i << 4;
 
@@ -439,15 +435,13 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
     {
         const u8* source_ptr = source + offset / 2;
 
-        // TODO: component order not verified
-
         u8 a = (coarse_x % 2) ? ((*source_ptr)&0xF) : (((*source_ptr) & 0xF0) >> 4);
         a |= a << 4;
 
         if (disable_alpha) {
-            return { *source_ptr, *source_ptr, *source_ptr, 255 };
+            return { a, a, a, 255 };
         } else {
-            return { 0, 0, 0, *source_ptr };
+            return { 0, 0, 0, a };
         }
     }
 
-- 
cgit v1.2.3


From 36291bc3f6e051f561b24408f7d3642235a749c8 Mon Sep 17 00:00:00 2001
From: Tony Wasserka <NeoBrainX@gmail.com>
Date: Sun, 21 Dec 2014 02:55:51 +0100
Subject: Pica: Add output merger definitions.

---
 src/video_core/pica.h | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 56 insertions(+), 1 deletion(-)

(limited to 'src/video_core')

diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index f518cc98b..4afda7b4b 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -289,7 +289,7 @@ struct Regs {
     TevStageConfig tev_stage4;
     INSERT_PADDING_WORDS(0x3);
     TevStageConfig tev_stage5;
-    INSERT_PADDING_WORDS(0x13);
+    INSERT_PADDING_WORDS(0x3);
 
     const std::array<Regs::TevStageConfig,6> GetTevStages() const {
         return { tev_stage0, tev_stage1,
@@ -297,6 +297,59 @@ struct Regs {
                  tev_stage4, tev_stage5 };
     };
 
+    struct {
+        enum DepthFunc : u32 {
+            Always      = 1,
+            GreaterThan = 6,
+        };
+
+        union {
+            // If false, logic blending is used
+            BitField<8, 1, u32> alphablend_enable;
+        };
+
+        union {
+            enum BlendEquation : u32 {
+                Add = 0,
+            };
+
+            enum BlendFactor : u32 {
+                Zero = 0,
+                One = 1,
+
+                SourceAlpha = 6,
+                OneMinusSourceAlpha = 7,
+            };
+
+            BitField< 0, 8, BlendEquation> blend_equation_rgb;
+            BitField< 8, 8, BlendEquation> blend_equation_a;
+
+            BitField<16, 4, BlendFactor> factor_source_rgb;
+            BitField<20, 4, BlendFactor> factor_dest_rgb;
+
+            BitField<24, 4, BlendFactor> factor_source_a;
+            BitField<28, 4, BlendFactor> factor_dest_a;
+        } alpha_blending;
+
+        union {
+            enum Op {
+                Set = 4,
+            };
+
+            BitField<0, 4, Op> op;
+        } logic_op;
+
+        INSERT_PADDING_WORDS(0x4);
+
+        union {
+            BitField< 0, 1, u32> depth_test_enable;
+            BitField< 4, 3, DepthFunc> depth_test_func;
+            BitField<12, 1, u32> depth_write_enable;
+        };
+
+        INSERT_PADDING_WORDS(0x8);
+    } output_merger;
+
     struct {
         enum ColorFormat : u32 {
             RGBA8    = 0,
@@ -623,6 +676,7 @@ struct Regs {
         ADD_FIELD(tev_stage3);
         ADD_FIELD(tev_stage4);
         ADD_FIELD(tev_stage5);
+        ADD_FIELD(output_merger);
         ADD_FIELD(framebuffer);
         ADD_FIELD(vertex_attributes);
         ADD_FIELD(index_array);
@@ -695,6 +749,7 @@ ASSERT_REG_POSITION(tev_stage2, 0xd0);
 ASSERT_REG_POSITION(tev_stage3, 0xd8);
 ASSERT_REG_POSITION(tev_stage4, 0xf0);
 ASSERT_REG_POSITION(tev_stage5, 0xf8);
+ASSERT_REG_POSITION(output_merger, 0x100);
 ASSERT_REG_POSITION(framebuffer, 0x110);
 ASSERT_REG_POSITION(vertex_attributes, 0x200);
 ASSERT_REG_POSITION(index_array, 0x227);
-- 
cgit v1.2.3


From 77bb58afeb39344b7481f6f003a9beb6c7b87199 Mon Sep 17 00:00:00 2001
From: Tony Wasserka <NeoBrainX@gmail.com>
Date: Sun, 21 Dec 2014 02:57:56 +0100
Subject: Pica/Rasterizer: Further enhance Tev support.

---
 src/video_core/rasterizer.cpp | 23 +++++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

(limited to 'src/video_core')

diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index a80148872..04ff68615 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -279,12 +279,15 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
                     }
                 };
 
-                auto GetColorModifier = [](ColorModifier factor, const Math::Vec4<u8>& values) -> Math::Vec3<u8> {
+                static auto GetColorModifier = [](ColorModifier factor, const Math::Vec4<u8>& values) -> Math::Vec3<u8> {
                     switch (factor)
                     {
                     case ColorModifier::SourceColor:
                         return values.rgb();
 
+                    case ColorModifier::OneMinusSourceColor:
+                        return (Math::Vec3<u8>(255, 255, 255) - values.rgb()).Cast<u8>();
+
                     case ColorModifier::SourceAlpha:
                         return { values.a(), values.a(), values.a() };
 
@@ -295,7 +298,7 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
                     }
                 };
 
-                auto GetAlphaModifier = [](AlphaModifier factor, u8 value) -> u8 {
+                static auto GetAlphaModifier = [](AlphaModifier factor, u8 value) -> u8 {
                     switch (factor) {
                     case AlphaModifier::SourceAlpha:
                         return value;
@@ -310,7 +313,7 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
                     }
                 };
 
-                auto ColorCombine = [](Operation op, const Math::Vec3<u8> input[3]) -> Math::Vec3<u8> {
+                static auto ColorCombine = [](Operation op, const Math::Vec3<u8> input[3]) -> Math::Vec3<u8> {
                     switch (op) {
                     case Operation::Replace:
                         return input[0];
@@ -330,6 +333,15 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
                     case Operation::Lerp:
                         return ((input[0] * input[2] + input[1] * (Math::MakeVec<u8>(255, 255, 255) - input[2]).Cast<u8>()) / 255).Cast<u8>();
 
+                    case Operation::Subtract:
+                    {
+                        auto result = input[0].Cast<int>() - input[1].Cast<int>();
+                        result.r() = std::max(0, result.r());
+                        result.g() = std::max(0, result.g());
+                        result.b() = std::max(0, result.b());
+                        return result.Cast<u8>();
+                    }
+
                     default:
                         LOG_ERROR(HW_GPU, "Unknown color combiner operation %d\n", (int)op);
                         _dbg_assert_(HW_GPU, 0);
@@ -337,7 +349,7 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
                     }
                 };
 
-                auto AlphaCombine = [](Operation op, const std::array<u8,3>& input) -> u8 {
+                static auto AlphaCombine = [](Operation op, const std::array<u8,3>& input) -> u8 {
                     switch (op) {
                     case Operation::Replace:
                         return input[0];
@@ -351,6 +363,9 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
                     case Operation::Lerp:
                         return (input[0] * input[2] + input[1] * (255 - input[2])) / 255;
 
+                    case Operation::Subtract:
+                        return std::max(0, (int)input[0] - (int)input[1]);
+
                     default:
                         LOG_ERROR(HW_GPU, "Unknown alpha combiner operation %d\n", (int)op);
                         _dbg_assert_(HW_GPU, 0);
-- 
cgit v1.2.3


From e229ff8c836fa213f1bdd31cabe924457f5e7e0c Mon Sep 17 00:00:00 2001
From: Tony Wasserka <NeoBrainX@gmail.com>
Date: Sun, 21 Dec 2014 02:59:08 +0100
Subject: Pica/Rasterizer: Implement depth testing.

---
 src/video_core/pica.h         |  1 +
 src/video_core/rasterizer.cpp | 39 +++++++++++++++++++++++++++++++++------
 2 files changed, 34 insertions(+), 6 deletions(-)

(limited to 'src/video_core')

diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index 4afda7b4b..810a926c9 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -300,6 +300,7 @@ struct Regs {
     struct {
         enum DepthFunc : u32 {
             Always      = 1,
+            LessThan    = 4,
             GreaterThan = 6,
         };
 
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index 04ff68615..8dff2db27 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -396,12 +396,39 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
                 combiner_output = Math::MakeVec(color_output, alpha_output);
             }
 
-            // TODO: Not sure if the multiplication by 65535 has already been taken care
-            // of when transforming to screen coordinates or not.
-            u16 z = (u16)(((float)v0.screenpos[2].ToFloat32() * w0 +
-                           (float)v1.screenpos[2].ToFloat32() * w1 +
-                           (float)v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum);
-            SetDepth(x >> 4, y >> 4, z);
+            // TODO: Does depth indeed only get written even if depth testing is enabled?
+            if (registers.output_merger.depth_test_enable) {
+                u16 z = (u16)(-((float)v0.screenpos[2].ToFloat32() * w0 +
+                            (float)v1.screenpos[2].ToFloat32() * w1 +
+                            (float)v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum);
+                u16 ref_z = GetDepth(x >> 4, y >> 4);
+
+                bool pass = false;
+
+                switch (registers.output_merger.depth_test_func) {
+                case registers.output_merger.Always:
+                    pass = true;
+                    break;
+
+                case registers.output_merger.LessThan:
+                    pass = z < ref_z;
+                    break;
+
+                case registers.output_merger.GreaterThan:
+                    pass = z > ref_z;
+                    break;
+
+                default:
+                    LOG_ERROR(HW_GPU, "Unknown depth test function %x", registers.output_merger.depth_test_func.Value());
+                    break;
+                }
+
+                if (!pass)
+                    continue;
+
+                if (registers.output_merger.depth_write_enable)
+                    SetDepth(x >> 4, y >> 4, z);
+            }
 
             DrawPixel(x >> 4, y >> 4, combiner_output);
         }
-- 
cgit v1.2.3


From a7ae0330b1e4d5aa7fab3bb07bb2cf58f8572dc5 Mon Sep 17 00:00:00 2001
From: Tony Wasserka <NeoBrainX@gmail.com>
Date: Sun, 21 Dec 2014 03:00:25 +0100
Subject: Pica/Rasterizer: Implement alpha blending.

---
 src/video_core/rasterizer.cpp | 84 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 84 insertions(+)

(limited to 'src/video_core')

diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index 8dff2db27..5f7971fe2 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -25,6 +25,18 @@ static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) {
     *(color_buffer + x + y * registers.framebuffer.GetWidth()) = value;
 }
 
+static const Math::Vec4<u8> GetPixel(int x, int y) {
+    u32* color_buffer_u32 = reinterpret_cast<u32*>(Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetColorBufferPhysicalAddress())));
+
+    u32 value = *(color_buffer_u32 + x + y * registers.framebuffer.GetWidth());
+    Math::Vec4<u8> ret;
+    ret.a() = value >> 24;
+    ret.r() = (value >> 16) & 0xFF;
+    ret.g() = (value >> 8) & 0xFF;
+    ret.b() = value & 0xFF;
+    return ret;
+ }
+
 static u32 GetDepth(int x, int y) {
     u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetDepthBufferPhysicalAddress())));
 
@@ -430,6 +442,78 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
                     SetDepth(x >> 4, y >> 4, z);
             }
 
+            auto dest = GetPixel(x >> 4, y >> 4);
+
+            if (registers.output_merger.alphablend_enable) {
+                auto params = registers.output_merger.alpha_blending;
+
+                auto LookupFactorRGB = [&](decltype(params)::BlendFactor factor) -> Math::Vec3<u8> {
+                    switch(factor) {
+                    case params.Zero:
+                        return Math::Vec3<u8>(0, 0, 0);
+
+                    case params.One:
+                        return Math::Vec3<u8>(255, 255, 255);
+
+                    case params.SourceAlpha:
+                        return Math::MakeVec(combiner_output.a(), combiner_output.a(), combiner_output.a());
+
+                    case params.OneMinusSourceAlpha:
+                        return Math::Vec3<u8>(255-combiner_output.a(), 255-combiner_output.a(), 255-combiner_output.a());
+
+                    default:
+                        LOG_CRITICAL(HW_GPU, "Unknown color blend factor %x", factor);
+                        exit(0);
+                        break;
+                    }
+                };
+
+                auto LookupFactorA = [&](decltype(params)::BlendFactor factor) -> u8 {
+                    switch(factor) {
+                    case params.Zero:
+                        return 0;
+
+                    case params.One:
+                        return 255;
+
+                    case params.SourceAlpha:
+                        return combiner_output.a();
+
+                    case params.OneMinusSourceAlpha:
+                        return 255 - combiner_output.a();
+
+                    default:
+                        LOG_CRITICAL(HW_GPU, "Unknown alpha blend factor %x", factor);
+                        exit(0);
+                        break;
+                    }
+                };
+
+                auto srcfactor = Math::MakeVec(LookupFactorRGB(params.factor_source_rgb),
+                                               LookupFactorA(params.factor_source_a));
+                auto dstfactor = Math::MakeVec(LookupFactorRGB(params.factor_dest_rgb),
+                                               LookupFactorA(params.factor_dest_a));
+
+                switch (params.blend_equation_rgb) {
+                case params.Add:
+                {
+                    auto result = (combiner_output * srcfactor + dest * dstfactor) / 255;
+                    result.r() = std::min(255, result.r());
+                    result.g() = std::min(255, result.g());
+                    result.b() = std::min(255, result.b());
+                    combiner_output = result.Cast<u8>();
+                    break;
+                }
+
+                default:
+                    LOG_CRITICAL(HW_GPU, "Unknown RGB blend equation %x", params.blend_equation_rgb.Value());
+                    exit(0);
+                }
+            } else {
+                LOG_CRITICAL(HW_GPU, "logic op: %x", registers.output_merger.logic_op);
+                exit(0);
+            }
+
             DrawPixel(x >> 4, y >> 4, combiner_output);
         }
     }
-- 
cgit v1.2.3


From 3da52ead9badab44257fce6e606873f6abc7dc6f Mon Sep 17 00:00:00 2001
From: Tony Wasserka <NeoBrainX@gmail.com>
Date: Sun, 28 Dec 2014 23:33:59 +0100
Subject: Pica/DebugUtils: Fix a bug in RGBA4 texture decoding.

---
 src/video_core/debug_utils/debug_utils.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/video_core')

diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp
index 9c0fbc453..83d585d16 100644
--- a/src/video_core/debug_utils/debug_utils.cpp
+++ b/src/video_core/debug_utils/debug_utils.cpp
@@ -375,9 +375,9 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
     {
         const u8* source_ptr = source + offset * 2;
         u8 r = source_ptr[1] >> 4;
-        u8 g = source_ptr[1] & 0xFF;
+        u8 g = source_ptr[1] & 0xF;
         u8 b = source_ptr[0] >> 4;
-        u8 a = source_ptr[0] & 0xFF;
+        u8 a = source_ptr[0] & 0xF;
         r = (r << 4) | r;
         g = (g << 4) | g;
         b = (b << 4) | b;
-- 
cgit v1.2.3


From 3b78af904e5a4f959ab206a207bd26441886c9a8 Mon Sep 17 00:00:00 2001
From: Tony Wasserka <NeoBrainX@gmail.com>
Date: Sun, 21 Dec 2014 02:56:32 +0100
Subject: Pica/Rasterizer: Textures seem to be laid out flipped vertically.

Not sure if this is a correct fix. Probably should instead change the decoding logic itself.
---
 src/video_core/rasterizer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/video_core')

diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index 5f7971fe2..08b649fb6 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -214,7 +214,7 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
                     }
                 };
                 s = GetWrappedTexCoord(registers.texture0.wrap_s, s, registers.texture0.width);
-                t = GetWrappedTexCoord(registers.texture0.wrap_t, t, registers.texture0.height);
+                t = registers.texture0.height - 1 - GetWrappedTexCoord(registers.texture0.wrap_t, t, registers.texture0.height);
 
                 u8* texture_data = Memory::GetPointer(PAddrToVAddr(texture.config.GetPhysicalAddress()));
                 auto info = DebugUtils::TextureInfo::FromPicaRegister(texture.config, texture.format);
-- 
cgit v1.2.3


From 0f494240228e24e21c88bf9f3178aaa68db4fb45 Mon Sep 17 00:00:00 2001
From: Tony Wasserka <NeoBrainX@gmail.com>
Date: Sat, 13 Dec 2014 21:39:42 +0100
Subject: Pica/Rasterizer: Implement backface culling.

---
 src/video_core/pica.h         | 16 +++++++++++++++-
 src/video_core/rasterizer.cpp | 30 +++++++++++++++++++++---------
 2 files changed, 36 insertions(+), 10 deletions(-)

(limited to 'src/video_core')

diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index 810a926c9..f5771ed84 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -50,7 +50,19 @@ struct Regs {
 
     u32 trigger_irq;
 
-    INSERT_PADDING_WORDS(0x30);
+    INSERT_PADDING_WORDS(0x2f);
+
+    enum class CullMode : u32 {
+        // Select which polygons are considered to be "frontfacing".
+        KeepAll              = 0,
+        KeepClockWise        = 1,
+        KeepCounterClockWise = 2,
+        // TODO: What does the third value imply?
+    };
+
+    union {
+        BitField<0, 2, CullMode> cull_mode;
+    };
 
     BitField<0, 24, u32> viewport_size_x;
 
@@ -659,6 +671,7 @@ struct Regs {
             } while(false)
 
         ADD_FIELD(trigger_irq);
+        ADD_FIELD(cull_mode);
         ADD_FIELD(viewport_size_x);
         ADD_FIELD(viewport_size_y);
         ADD_FIELD(viewport_depth_range);
@@ -730,6 +743,7 @@ private:
 #define ASSERT_REG_POSITION(field_name, position) static_assert(offsetof(Regs, field_name) == position * 4, "Field "#field_name" has invalid position")
 
 ASSERT_REG_POSITION(trigger_irq, 0x10);
+ASSERT_REG_POSITION(cull_mode, 0x40);
 ASSERT_REG_POSITION(viewport_size_x, 0x41);
 ASSERT_REG_POSITION(viewport_size_y, 0x43);
 ASSERT_REG_POSITION(viewport_depth_range, 0x4d);
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index 08b649fb6..9148745dc 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -82,10 +82,31 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
     auto ScreenToRasterizerCoordinates = [FloatToFix](const Math::Vec3<float24> vec) {
                                              return Math::Vec3<Fix12P4>{FloatToFix(vec.x), FloatToFix(vec.y), FloatToFix(vec.z)};
                                          };
+    static auto orient2d = [](const Math::Vec2<Fix12P4>& vtx1,
+                              const Math::Vec2<Fix12P4>& vtx2,
+                              const Math::Vec2<Fix12P4>& vtx3) {
+        const auto vec1 = Math::MakeVec(vtx2 - vtx1, 0);
+        const auto vec2 = Math::MakeVec(vtx3 - vtx1, 0);
+        // TODO: There is a very small chance this will overflow for sizeof(int) == 4
+        return Math::Cross(vec1, vec2).z;
+    };
+
     Math::Vec3<Fix12P4> vtxpos[3]{ ScreenToRasterizerCoordinates(v0.screenpos),
                                    ScreenToRasterizerCoordinates(v1.screenpos),
                                    ScreenToRasterizerCoordinates(v2.screenpos) };
 
+    if (registers.cull_mode == Regs::CullMode::KeepClockWise) {
+        // Reverse vertex order and use the CCW code path.
+        std::swap(vtxpos[1], vtxpos[2]);
+    }
+
+    if (registers.cull_mode != Regs::CullMode::KeepAll) {
+        // Cull away triangles which are wound clockwise.
+        // TODO: A check for degenerate triangles ("== 0") should be considered for CullMode::KeepAll
+        if (orient2d(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) <= 0)
+            return;
+    }
+
     // TODO: Proper scissor rect test!
     u16 min_x = std::min({vtxpos[0].x, vtxpos[1].x, vtxpos[2].x});
     u16 min_y = std::min({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y});
@@ -128,15 +149,6 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
         for (u16 x = min_x; x < max_x; x += 0x10) {
 
             // Calculate the barycentric coordinates w0, w1 and w2
-            auto orient2d = [](const Math::Vec2<Fix12P4>& vtx1,
-                               const Math::Vec2<Fix12P4>& vtx2,
-                               const Math::Vec2<Fix12P4>& vtx3) {
-                const auto vec1 = Math::MakeVec(vtx2 - vtx1, 0);
-                const auto vec2 = Math::MakeVec(vtx3 - vtx1, 0);
-                // TODO: There is a very small chance this will overflow for sizeof(int) == 4
-                return Math::Cross(vec1, vec2).z;
-            };
-
             int w0 = bias0 + orient2d(vtxpos[1].xy(), vtxpos[2].xy(), {x, y});
             int w1 = bias1 + orient2d(vtxpos[2].xy(), vtxpos[0].xy(), {x, y});
             int w2 = bias2 + orient2d(vtxpos[0].xy(), vtxpos[1].xy(), {x, y});
-- 
cgit v1.2.3


From b2d461020d12b9abf06857747ed237c0c3a6647a Mon Sep 17 00:00:00 2001
From: Tony Wasserka <NeoBrainX@gmail.com>
Date: Sun, 21 Dec 2014 03:02:15 +0100
Subject: Pica/CommandProcessor: Workaround games not setting the input
 position's w component.

---
 src/video_core/command_processor.cpp | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'src/video_core')

diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index 9e1975ddb..76acdc177 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -112,6 +112,10 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
                 // Initialize data for the current vertex
                 VertexShader::InputVertex input;
 
+                // Load a debugging token to check whether this gets loaded by the running
+                // application or not.
+                input.attr[0].w = float24::FromRawFloat24(0x00abcdef);
+
                 for (int i = 0; i < attribute_config.GetNumTotalAttributes(); ++i) {
                     for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
                         const u8* srcdata = Memory::GetPointer(PAddrToVAddr(vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i]));
@@ -136,6 +140,16 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
                     }
                 }
 
+                // HACK: Some games do not initialize the vertex position's w component. This leads
+                //       to critical issues since it messes up perspective division. As a
+                //       workaround, we force the fourth component to 1.0 if we find this to be the
+                //       case.
+                //       To do this, we additionally have to assume that the first input attribute
+                //       is the vertex position, since there's no information about this other than
+                //       the empiric observation that this is usually the case.
+                if (input.attr[0].w == float24::FromRawFloat24(0x00abcdef))
+                    input.attr[0].w = float24::FromFloat32(1.0);
+
                 if (g_debug_context)
                     g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, (void*)&input);
 
-- 
cgit v1.2.3


From 323a56f89835714f0973cf808b7b59b2589012d8 Mon Sep 17 00:00:00 2001
From: Tony Wasserka <NeoBrainX@gmail.com>
Date: Wed, 31 Dec 2014 15:01:50 +0100
Subject: Pica/CommandProcessor: Cleanups.

---
 src/video_core/command_processor.cpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'src/video_core')

diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index 76acdc177..0d9f4ba66 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -114,7 +114,8 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
 
                 // Load a debugging token to check whether this gets loaded by the running
                 // application or not.
-                input.attr[0].w = float24::FromRawFloat24(0x00abcdef);
+                static const float24 debug_token = float24::FromRawFloat24(0x00abcdef);
+                input.attr[0].w = debug_token;
 
                 for (int i = 0; i < attribute_config.GetNumTotalAttributes(); ++i) {
                     for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
@@ -147,7 +148,7 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
                 //       To do this, we additionally have to assume that the first input attribute
                 //       is the vertex position, since there's no information about this other than
                 //       the empiric observation that this is usually the case.
-                if (input.attr[0].w == float24::FromRawFloat24(0x00abcdef))
+                if (input.attr[0].w == debug_token)
                     input.attr[0].w = float24::FromFloat32(1.0);
 
                 if (g_debug_context)
@@ -195,7 +196,7 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
             int index = (id - PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[0], 0x2b1));
             auto values = registers.vs_int_uniforms[index];
             VertexShader::GetIntUniform(index) = Math::Vec4<u8>(values.x, values.y, values.z, values.w);
-            LOG_ERROR(HW_GPU, "Set integer uniform %d to %02x %02x %02x %02x",
+            LOG_TRACE(HW_GPU, "Set integer uniform %d to %02x %02x %02x %02x",
                       index, values.x.Value(), values.y.Value(), values.z.Value(), values.w.Value());
             break;
         }
-- 
cgit v1.2.3


From 40c720084146e8c2c00b58bc42bf0ebd98fa1496 Mon Sep 17 00:00:00 2001
From: Tony Wasserka <NeoBrainX@gmail.com>
Date: Wed, 31 Dec 2014 15:02:48 +0100
Subject: Pica/VertexShader: Coding style fixes.

---
 src/video_core/vertex_shader.cpp | 24 ++++++++----------------
 1 file changed, 8 insertions(+), 16 deletions(-)

(limited to 'src/video_core')

diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp
index 090ffd420..ff825e2e1 100644
--- a/src/video_core/vertex_shader.cpp
+++ b/src/video_core/vertex_shader.cpp
@@ -39,38 +39,31 @@ static struct {
 static std::array<u32, 1024> shader_memory;
 static std::array<u32, 1024> swizzle_data;
 
-void SubmitShaderMemoryChange(u32 addr, u32 value)
-{
+void SubmitShaderMemoryChange(u32 addr, u32 value) {
     shader_memory[addr] = value;
 }
 
-void SubmitSwizzleDataChange(u32 addr, u32 value)
-{
+void SubmitSwizzleDataChange(u32 addr, u32 value) {
     swizzle_data[addr] = value;
 }
 
-Math::Vec4<float24>& GetFloatUniform(u32 index)
-{
+Math::Vec4<float24>& GetFloatUniform(u32 index) {
     return shader_uniforms.f[index];
 }
 
-bool& GetBoolUniform(u32 index)
-{
+bool& GetBoolUniform(u32 index) {
     return shader_uniforms.b[index];
 }
 
-Math::Vec4<u8>& GetIntUniform(u32 index)
-{
+Math::Vec4<u8>& GetIntUniform(u32 index) {
     return shader_uniforms.i[index];
 }
 
-const std::array<u32, 1024>& GetShaderBinary()
-{
+const std::array<u32, 1024>& GetShaderBinary() {
     return shader_memory;
 }
 
-const std::array<u32, 1024>& GetSwizzlePatterns()
-{
+const std::array<u32, 1024>& GetSwizzlePatterns() {
     return swizzle_data;
 }
 
@@ -444,8 +437,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
     }
 }
 
-OutputVertex RunShader(const InputVertex& input, int num_attributes)
-{
+OutputVertex RunShader(const InputVertex& input, int num_attributes) {
     VertexShaderState state;
 
     const u32* main = &shader_memory[registers.vs_main_offset];
-- 
cgit v1.2.3


From 195d73a385c9dd88150ed9b875e313c186e7d96e Mon Sep 17 00:00:00 2001
From: Tony Wasserka <NeoBrainX@gmail.com>
Date: Wed, 31 Dec 2014 15:04:39 +0100
Subject: Pica/Rasterizer: Clean up long code lines.

---
 src/video_core/rasterizer.cpp | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'src/video_core')

diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index 9148745dc..9822b36a6 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -18,7 +18,8 @@ namespace Pica {
 namespace Rasterizer {
 
 static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) {
-    u32* color_buffer = reinterpret_cast<u32*>(Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetColorBufferPhysicalAddress())));
+    const PAddr addr = registers.framebuffer.GetColorBufferPhysicalAddress();
+    u32* color_buffer = reinterpret_cast<u32*>(Memory::GetPointer(PAddrToVAddr(addr)));
     u32 value = (color.a() << 24) | (color.r() << 16) | (color.g() << 8) | color.b();
 
     // Assuming RGBA8 format until actual framebuffer format handling is implemented
@@ -26,7 +27,8 @@ static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) {
 }
 
 static const Math::Vec4<u8> GetPixel(int x, int y) {
-    u32* color_buffer_u32 = reinterpret_cast<u32*>(Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetColorBufferPhysicalAddress())));
+    const PAddr addr = registers.framebuffer.GetColorBufferPhysicalAddress();
+    u32* color_buffer_u32 = reinterpret_cast<u32*>(Memory::GetPointer(PAddrToVAddr(addr)));
 
     u32 value = *(color_buffer_u32 + x + y * registers.framebuffer.GetWidth());
     Math::Vec4<u8> ret;
@@ -38,14 +40,16 @@ static const Math::Vec4<u8> GetPixel(int x, int y) {
  }
 
 static u32 GetDepth(int x, int y) {
-    u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetDepthBufferPhysicalAddress())));
+    const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress();
+    u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(addr)));
 
     // Assuming 16-bit depth buffer format until actual format handling is implemented
     return *(depth_buffer + x + y * registers.framebuffer.GetWidth());
 }
 
 static void SetDepth(int x, int y, u16 value) {
-    u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetDepthBufferPhysicalAddress())));
+    const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress();
+    u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(addr)));
 
     // Assuming 16-bit depth buffer format until actual format handling is implemented
     *(depth_buffer + x + y * registers.framebuffer.GetWidth()) = value;
-- 
cgit v1.2.3


From d13bd327ba70a89f8e634afc3c9c22ba3c0f6e38 Mon Sep 17 00:00:00 2001
From: Tony Wasserka <NeoBrainX@gmail.com>
Date: Wed, 31 Dec 2014 15:05:33 +0100
Subject: Pica/Rasterizer: Fix a bug related to multitexturing and texture
 wrapping.

---
 src/video_core/rasterizer.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/video_core')

diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index 9822b36a6..4dfc21885 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -229,8 +229,8 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
                             return 0;
                     }
                 };
-                s = GetWrappedTexCoord(registers.texture0.wrap_s, s, registers.texture0.width);
-                t = registers.texture0.height - 1 - GetWrappedTexCoord(registers.texture0.wrap_t, t, registers.texture0.height);
+                s = GetWrappedTexCoord(texture.config.wrap_s, s, texture.config.width);
+                t = texture.config.height - 1 - GetWrappedTexCoord(texture.config.wrap_t, t, texture.config.height);
 
                 u8* texture_data = Memory::GetPointer(PAddrToVAddr(texture.config.GetPhysicalAddress()));
                 auto info = DebugUtils::TextureInfo::FromPicaRegister(texture.config, texture.format);
-- 
cgit v1.2.3


From 614baa39d1bc1489c25acf3578ae7f99cc1b5ad0 Mon Sep 17 00:00:00 2001
From: Tony Wasserka <NeoBrainX@gmail.com>
Date: Wed, 31 Dec 2014 15:08:07 +0100
Subject: VideoCore: Remove some unused functions.

---
 src/video_core/utils.h | 26 --------------------------
 1 file changed, 26 deletions(-)

(limited to 'src/video_core')

diff --git a/src/video_core/utils.h b/src/video_core/utils.h
index 63ebccbde..6fd640425 100644
--- a/src/video_core/utils.h
+++ b/src/video_core/utils.h
@@ -8,32 +8,6 @@
 
 #include "common/common_types.h"
 
-namespace FormatPrecision {
-
-/// Adjust RGBA8 color with RGBA6 precision
-static inline u32 rgba8_with_rgba6(u32 src) {
-    u32 color = src;
-    color &= 0xFCFCFCFC;
-    color |= (color >> 6) & 0x03030303;
-    return color;
-}
-
-/// Adjust RGBA8 color with RGB565 precision
-static inline u32 rgba8_with_rgb565(u32 src) {
-    u32 color = (src & 0xF8FCF8);
-    color |= (color >> 5) & 0x070007;
-    color |= (color >> 6) & 0x000300;
-    color |= 0xFF000000;
-    return color;
-}
-
-/// Adjust Z24 depth value with Z16 precision
-static inline u32 z24_with_z16(u32 src) {
-    return (src & 0xFFFF00) | (src >> 16);
-}
-
-} // namespace
-
 namespace VideoCore {
 
 /// Structure for the TGA texture format (for dumping)
-- 
cgit v1.2.3


From 47543d62cf9e982598f58438ad24769c2b36ec77 Mon Sep 17 00:00:00 2001
From: Tony Wasserka <NeoBrainX@gmail.com>
Date: Wed, 31 Dec 2014 15:17:07 +0100
Subject: Pica: Cleanup color conversion.

---
 src/video_core/color.h                     | 32 ++++++++++++++++++++++++++++++
 src/video_core/debug_utils/debug_utils.cpp | 32 +++++++++++++-----------------
 2 files changed, 46 insertions(+), 18 deletions(-)
 create mode 100644 src/video_core/color.h

(limited to 'src/video_core')

diff --git a/src/video_core/color.h b/src/video_core/color.h
new file mode 100644
index 000000000..e86ac1265
--- /dev/null
+++ b/src/video_core/color.h
@@ -0,0 +1,32 @@
+// Copyright 2014 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+
+namespace Color {
+
+/// Convert a 1-bit color component to 8 bit
+static inline u8 Convert1To8(u8 value) {
+    return value * 255;
+}
+
+/// Convert a 4-bit color component to 8 bit
+static inline u8 Convert4To8(u8 value) {
+    return (value << 4) | value;
+}
+
+/// Convert a 5-bit color component to 8 bit
+static inline u8 Convert5To8(u8 value) {
+    return (value << 3) | (value >> 2);
+}
+
+/// Convert a 6-bit color component to 8 bit
+static inline u8 Convert6To8(u8 value) {
+    return (value << 2) | (value >> 4);
+}
+
+
+} // namespace
diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp
index 83d585d16..a494465b9 100644
--- a/src/video_core/debug_utils/debug_utils.cpp
+++ b/src/video_core/debug_utils/debug_utils.cpp
@@ -19,6 +19,7 @@
 #include "common/log.h"
 #include "common/file_util.h"
 
+#include "video_core/color.h"
 #include "video_core/math.h"
 #include "video_core/pica.h"
 
@@ -359,29 +360,26 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
         u8 g = ((source_ptr) >> 6) & 0x1F;
         u8 b = (source_ptr >> 1) & 0x1F;
         u8 a = source_ptr & 1;
-        return Math::MakeVec<u8>((r << 3) | (r >> 2), (g << 3) | (g >> 2), (b << 3) | (b >> 2), disable_alpha ? 255 : (a * 255));
+        return Math::MakeVec<u8>(Color::Convert5To8(r), Color::Convert5To8(g),
+                                 Color::Convert5To8(b), disable_alpha ? 255 : Color::Convert1To8(a));
     }
 
     case Regs::TextureFormat::RGB565:
     {
         const u16 source_ptr = *(const u16*)(source + offset * 2);
-        u8 r = (source_ptr >> 11) & 0x1F;
-        u8 g = ((source_ptr) >> 5) & 0x3F;
-        u8 b = (source_ptr) & 0x1F;
-        return Math::MakeVec<u8>((r << 3) | (r >> 2), (g << 2) | (g >> 4), (b << 3) | (b >> 2), 255);
+        u8 r = Color::Convert5To8((source_ptr >> 11) & 0x1F);
+        u8 g = Color::Convert6To8(((source_ptr) >> 5) & 0x3F);
+        u8 b = Color::Convert5To8((source_ptr) & 0x1F);
+        return Math::MakeVec<u8>(r, g, b, 255);
     }
 
     case Regs::TextureFormat::RGBA4:
     {
         const u8* source_ptr = source + offset * 2;
-        u8 r = source_ptr[1] >> 4;
-        u8 g = source_ptr[1] & 0xF;
-        u8 b = source_ptr[0] >> 4;
-        u8 a = source_ptr[0] & 0xF;
-        r = (r << 4) | r;
-        g = (g << 4) | g;
-        b = (b << 4) | b;
-        a = (a << 4) | a;
+        u8 r = Color::Convert4To8(source_ptr[1] >> 4);
+        u8 g = Color::Convert4To8(source_ptr[1] & 0xF);
+        u8 b = Color::Convert4To8(source_ptr[0] >> 4);
+        u8 a = Color::Convert4To8(source_ptr[0] & 0xF);
         return { r, g, b, disable_alpha ? (u8)255 : a };
     }
 
@@ -418,10 +416,8 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
     {
         const u8* source_ptr = source + offset;
 
-        u8 i = ((*source_ptr) & 0xF0) >> 4;
-        u8 a = (*source_ptr) & 0xF;
-        a |= a << 4;
-        i |= i << 4;
+        u8 i = Color::Convert4To8(((*source_ptr) & 0xF0) >> 4);
+        u8 a = Color::Convert4To8((*source_ptr) & 0xF);
 
         if (disable_alpha) {
             // Show intensity as red, alpha as green
@@ -436,7 +432,7 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
         const u8* source_ptr = source + offset / 2;
 
         u8 a = (coarse_x % 2) ? ((*source_ptr)&0xF) : (((*source_ptr) & 0xF0) >> 4);
-        a |= a << 4;
+        a = Color::Convert4To8(a);
 
         if (disable_alpha) {
             return { a, a, a, 255 };
-- 
cgit v1.2.3


From 9675d19b47865c2dac5e662f5a265589bd03a283 Mon Sep 17 00:00:00 2001
From: Tony Wasserka <NeoBrainX@gmail.com>
Date: Wed, 31 Dec 2014 15:19:40 +0100
Subject: Pica/Rasterizer: Make orient2d a free function and rename it to
 SignedArea.

---
 src/video_core/rasterizer.cpp | 69 ++++++++++++++++++++++++-------------------
 1 file changed, 38 insertions(+), 31 deletions(-)

(limited to 'src/video_core')

diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index 4dfc21885..9850e517a 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -55,30 +55,45 @@ static void SetDepth(int x, int y, u16 value) {
     *(depth_buffer + x + y * registers.framebuffer.GetWidth()) = value;
 }
 
-void ProcessTriangle(const VertexShader::OutputVertex& v0,
-                     const VertexShader::OutputVertex& v1,
-                     const VertexShader::OutputVertex& v2)
-{
-    // NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values
-    struct Fix12P4 {
-        Fix12P4() {}
-        Fix12P4(u16 val) : val(val) {}
+// NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values
+struct Fix12P4 {
+    Fix12P4() {}
+    Fix12P4(u16 val) : val(val) {}
 
-        static u16 FracMask() { return 0xF; }
-        static u16 IntMask() { return (u16)~0xF; }
+    static u16 FracMask() { return 0xF; }
+    static u16 IntMask() { return (u16)~0xF; }
 
-        operator u16() const {
-            return val;
-        }
+    operator u16() const {
+        return val;
+    }
 
-        bool operator < (const Fix12P4& oth) const {
-            return (u16)*this < (u16)oth;
-        }
+    bool operator < (const Fix12P4& oth) const {
+        return (u16)*this < (u16)oth;
+    }
 
-    private:
-        u16 val;
-    };
+private:
+    u16 val;
+};
+
+/**
+ * Calculate signed area of the triangle spanned by the three argument vertices.
+ * The sign denotes an orientation.
+ *
+ * @todo define orientation concretely.
+ */
+static int SignedArea (const Math::Vec2<Fix12P4>& vtx1,
+                       const Math::Vec2<Fix12P4>& vtx2,
+                       const Math::Vec2<Fix12P4>& vtx3) {
+    const auto vec1 = Math::MakeVec(vtx2 - vtx1, 0);
+    const auto vec2 = Math::MakeVec(vtx3 - vtx1, 0);
+    // TODO: There is a very small chance this will overflow for sizeof(int) == 4
+    return Math::Cross(vec1, vec2).z;
+};
 
+void ProcessTriangle(const VertexShader::OutputVertex& v0,
+                     const VertexShader::OutputVertex& v1,
+                     const VertexShader::OutputVertex& v2)
+{
     // vertex positions in rasterizer coordinates
     auto FloatToFix = [](float24 flt) {
                           return Fix12P4(static_cast<unsigned short>(flt.ToFloat32() * 16.0f));
@@ -86,14 +101,6 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
     auto ScreenToRasterizerCoordinates = [FloatToFix](const Math::Vec3<float24> vec) {
                                              return Math::Vec3<Fix12P4>{FloatToFix(vec.x), FloatToFix(vec.y), FloatToFix(vec.z)};
                                          };
-    static auto orient2d = [](const Math::Vec2<Fix12P4>& vtx1,
-                              const Math::Vec2<Fix12P4>& vtx2,
-                              const Math::Vec2<Fix12P4>& vtx3) {
-        const auto vec1 = Math::MakeVec(vtx2 - vtx1, 0);
-        const auto vec2 = Math::MakeVec(vtx3 - vtx1, 0);
-        // TODO: There is a very small chance this will overflow for sizeof(int) == 4
-        return Math::Cross(vec1, vec2).z;
-    };
 
     Math::Vec3<Fix12P4> vtxpos[3]{ ScreenToRasterizerCoordinates(v0.screenpos),
                                    ScreenToRasterizerCoordinates(v1.screenpos),
@@ -107,7 +114,7 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
     if (registers.cull_mode != Regs::CullMode::KeepAll) {
         // Cull away triangles which are wound clockwise.
         // TODO: A check for degenerate triangles ("== 0") should be considered for CullMode::KeepAll
-        if (orient2d(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) <= 0)
+        if (SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) <= 0)
             return;
     }
 
@@ -153,9 +160,9 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
         for (u16 x = min_x; x < max_x; x += 0x10) {
 
             // Calculate the barycentric coordinates w0, w1 and w2
-            int w0 = bias0 + orient2d(vtxpos[1].xy(), vtxpos[2].xy(), {x, y});
-            int w1 = bias1 + orient2d(vtxpos[2].xy(), vtxpos[0].xy(), {x, y});
-            int w2 = bias2 + orient2d(vtxpos[0].xy(), vtxpos[1].xy(), {x, y});
+            int w0 = bias0 + SignedArea(vtxpos[1].xy(), vtxpos[2].xy(), {x, y});
+            int w1 = bias1 + SignedArea(vtxpos[2].xy(), vtxpos[0].xy(), {x, y});
+            int w2 = bias2 + SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), {x, y});
             int wsum = w0 + w1 + w2;
 
             // If current pixel is not covered by the current primitive
-- 
cgit v1.2.3


From bc187be0c13f66b1a714d868ab8aa18214550bdc Mon Sep 17 00:00:00 2001
From: Tony Wasserka <NeoBrainX@gmail.com>
Date: Wed, 31 Dec 2014 15:29:45 +0100
Subject: Pica/Rasterizer: Remove some redundant casts.

---
 src/video_core/rasterizer.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'src/video_core')

diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index 9850e517a..025d4e484 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -433,9 +433,9 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
 
             // TODO: Does depth indeed only get written even if depth testing is enabled?
             if (registers.output_merger.depth_test_enable) {
-                u16 z = (u16)(-((float)v0.screenpos[2].ToFloat32() * w0 +
-                            (float)v1.screenpos[2].ToFloat32() * w1 +
-                            (float)v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum);
+                u16 z = (u16)(-(v0.screenpos[2].ToFloat32() * w0 +
+                            v1.screenpos[2].ToFloat32() * w1 +
+                            v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum);
                 u16 ref_z = GetDepth(x >> 4, y >> 4);
 
                 bool pass = false;
-- 
cgit v1.2.3