From 162d641a301d87d5e25ca5d677b7f8f07f29e748 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Tue, 12 Aug 2014 20:04:28 +0200 Subject: Pica/Math: Improved the design of the Vec2/Vec3/Vec4 classes and simplified rasterizer code accordingly. - Swizzlers now return const objects so that things like "first_vec4.xyz() = some_vec3" now will fail to compile (ideally we should support some vector holding references to make this actually work). - The methods "InsertBeforeX/Y/Z" and "Append" have been replaced by more versions of MakeVec, which now also supports building new vectors from vectors. - Vector library now follows C++ type promotion rules (hence, the result of Vec2 with another Vec2 is now a Vec2). --- src/video_core/rasterizer.cpp | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'src/video_core/rasterizer.cpp') diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index a7c1bab3e..f418518a1 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp @@ -78,10 +78,10 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, u16 max_x = std::max({vtxpos[0].x, vtxpos[1].x, vtxpos[2].x}); u16 max_y = std::max({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y}); - min_x = min_x & Fix12P4::IntMask(); - min_y = min_y & Fix12P4::IntMask(); - max_x = (max_x + Fix12P4::FracMask()) & Fix12P4::IntMask(); - max_y = (max_y + Fix12P4::FracMask()) & Fix12P4::IntMask(); + min_x &= Fix12P4::IntMask(); + min_y &= Fix12P4::IntMask(); + max_x = ((max_x + Fix12P4::FracMask()) & Fix12P4::IntMask()); + max_y = ((max_y + Fix12P4::FracMask()) & Fix12P4::IntMask()); // Triangle filling rules: Pixels on the right-sided edge or on flat bottom edges are not // drawn. Pixels on any other triangle border are drawn. This is implemented with three bias @@ -112,10 +112,10 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, auto orient2d = [](const Math::Vec2& vtx1, const Math::Vec2& vtx2, const Math::Vec2& vtx3) { - const auto vec1 = (vtx2.Cast() - vtx1.Cast()).Append(0); - const auto vec2 = (vtx3.Cast() - vtx1.Cast()).Append(0); + const auto vec1 = Math::MakeVec(vtx2 - vtx1, 0); + const auto vec2 = Math::MakeVec(vtx3 - vtx1, 0); // TODO: There is a very small chance this will overflow for sizeof(int) == 4 - return Cross(vec1, vec2).z; + return Math::Cross(vec1, vec2).z; }; int w0 = bias0 + orient2d(vtxpos[1].xy(), vtxpos[2].xy(), {x, y}); @@ -143,15 +143,15 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, // // The generalization to three vertices is straightforward in baricentric coordinates. auto GetInterpolatedAttribute = [&](float24 attr0, float24 attr1, float24 attr2) { - auto attr_over_w = Math::MakeVec3(attr0 / v0.pos.w, - attr1 / v1.pos.w, - attr2 / v2.pos.w); - auto w_inverse = Math::MakeVec3(float24::FromFloat32(1.f) / v0.pos.w, - float24::FromFloat32(1.f) / v1.pos.w, - float24::FromFloat32(1.f) / v2.pos.w); - auto baricentric_coordinates = Math::MakeVec3(float24::FromFloat32(w0), - float24::FromFloat32(w1), - float24::FromFloat32(w2)); + auto attr_over_w = Math::MakeVec(attr0 / v0.pos.w, + attr1 / v1.pos.w, + attr2 / v2.pos.w); + auto w_inverse = Math::MakeVec(float24::FromFloat32(1.f) / v0.pos.w, + float24::FromFloat32(1.f) / v1.pos.w, + float24::FromFloat32(1.f) / v2.pos.w); + auto baricentric_coordinates = Math::MakeVec(float24::FromFloat32(w0), + float24::FromFloat32(w1), + float24::FromFloat32(w2)); float24 interpolated_attr_over_w = Math::Dot(attr_over_w, baricentric_coordinates); float24 interpolated_w_inverse = Math::Dot(w_inverse, baricentric_coordinates); -- cgit v1.2.3 From 27cab6477e7e72771d0661418d71cce3c2721723 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Fri, 15 Aug 2014 16:33:17 +0200 Subject: Pica/Rasterizer: Add initial implementation of texture combiners. --- src/video_core/rasterizer.cpp | 122 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 121 insertions(+), 1 deletion(-) (limited to 'src/video_core/rasterizer.cpp') diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index f418518a1..5a4155c84 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp @@ -165,12 +165,132 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, (u8)(GetInterpolatedAttribute(v0.color.a(), v1.color.a(), v2.color.a()).ToFloat32() * 255) }; + // Texture environment - consists of 6 stages of color and alpha combining. + // + // Color combiners take three input color values from some source (e.g. interpolated + // vertex color, texture color, previous stage, etc), perform some very simple + // operations on each of them (e.g. inversion) and then calculate the output color + // with some basic arithmetic. Alpha combiners can be configured separately but work + // analogously. + Math::Vec4 combiner_output; + for (auto tev_stage : registers.GetTevStages()) { + using Source = Regs::TevStageConfig::Source; + using ColorModifier = Regs::TevStageConfig::ColorModifier; + using AlphaModifier = Regs::TevStageConfig::AlphaModifier; + using Operation = Regs::TevStageConfig::Operation; + + auto GetColorSource = [&](Source source) -> Math::Vec3 { + switch (source) { + case Source::PrimaryColor: + return primary_color.rgb(); + + case Source::Constant: + return {tev_stage.const_r, tev_stage.const_g, tev_stage.const_b}; + + case Source::Previous: + return combiner_output.rgb(); + + default: + ERROR_LOG(GPU, "Unknown color combiner source %d\n", (int)source); + return {}; + } + }; + + auto GetAlphaSource = [&](Source source) -> u8 { + switch (source) { + case Source::PrimaryColor: + return primary_color.a(); + + case Source::Constant: + return tev_stage.const_a; + + case Source::Previous: + return combiner_output.a(); + + default: + ERROR_LOG(GPU, "Unknown alpha combiner source %d\n", (int)source); + return 0; + } + }; + + auto GetColorModifier = [](ColorModifier factor, const Math::Vec3& values) -> Math::Vec3 { + switch (factor) + { + case ColorModifier::SourceColor: + return values; + default: + ERROR_LOG(GPU, "Unknown color factor %d\n", (int)factor); + return {}; + } + }; + + auto GetAlphaModifier = [](AlphaModifier factor, u8 value) -> u8 { + switch (factor) { + case AlphaModifier::SourceAlpha: + return value; + default: + ERROR_LOG(GPU, "Unknown color factor %d\n", (int)factor); + return 0; + } + }; + + auto ColorCombine = [](Operation op, const Math::Vec3 input[3]) -> Math::Vec3 { + switch (op) { + case Operation::Replace: + return input[0]; + + case Operation::Modulate: + return ((input[0] * input[1]) / 255).Cast(); + + default: + ERROR_LOG(GPU, "Unknown color combiner operation %d\n", (int)op); + return {}; + } + }; + + auto AlphaCombine = [](Operation op, const std::array& input) -> u8 { + switch (op) { + case Operation::Replace: + return input[0]; + + case Operation::Modulate: + return input[0] * input[1] / 255; + + default: + ERROR_LOG(GPU, "Unknown alpha combiner operation %d\n", (int)op); + return 0; + } + }; + + // color combiner + // NOTE: Not sure if the alpha combiner might use the color output of the previous + // stage as input. Hence, we currently don't directly write the result to + // combiner_output.rgb(), but instead store it in a temporary variable until + // alpha combining has been done. + Math::Vec3 color_result[3] = { + GetColorModifier(tev_stage.color_modifier1, GetColorSource(tev_stage.color_source1)), + GetColorModifier(tev_stage.color_modifier2, GetColorSource(tev_stage.color_source2)), + GetColorModifier(tev_stage.color_modifier3, GetColorSource(tev_stage.color_source3)) + }; + auto color_output = ColorCombine(tev_stage.color_op, color_result); + + // alpha combiner + std::array alpha_result = { + GetAlphaModifier(tev_stage.alpha_modifier1, GetAlphaSource(tev_stage.alpha_source1)), + GetAlphaModifier(tev_stage.alpha_modifier2, GetAlphaSource(tev_stage.alpha_source2)), + GetAlphaModifier(tev_stage.alpha_modifier3, GetAlphaSource(tev_stage.alpha_source3)) + }; + auto alpha_output = AlphaCombine(tev_stage.alpha_op, alpha_result); + + combiner_output = Math::MakeVec(color_output, alpha_output); + } + u16 z = (u16)(((float)v0.screenpos[2].ToFloat32() * w0 + (float)v1.screenpos[2].ToFloat32() * w1 + (float)v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum); // TODO: Shouldn't need to multiply by 65536? SetDepth(x >> 4, y >> 4, z); - DrawPixel(x >> 4, y >> 4, primary_color); + DrawPixel(x >> 4, y >> 4, combiner_output); } } } -- cgit v1.2.3 From 9679d231df0bc8fac9e0e596ab78750bb38ef248 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sun, 17 Aug 2014 12:31:19 +0200 Subject: Pica/Rasterizer: Add texturing support. --- src/video_core/rasterizer.cpp | 64 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) (limited to 'src/video_core/rasterizer.cpp') diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index 5a4155c84..1bd32e8d0 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp @@ -11,6 +11,8 @@ #include "rasterizer.h" #include "vertex_shader.h" +#include "debug_utils/debug_utils.h" + namespace Pica { namespace Rasterizer { @@ -165,6 +167,62 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, (u8)(GetInterpolatedAttribute(v0.color.a(), v1.color.a(), v2.color.a()).ToFloat32() * 255) }; + Math::Vec4 texture_color{}; + float24 u = GetInterpolatedAttribute(v0.tc0.u(), v1.tc0.u(), v2.tc0.u()); + float24 v = GetInterpolatedAttribute(v0.tc0.v(), v1.tc0.v(), v2.tc0.v()); + if (registers.texturing_enable) { + // Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each + // of which is composed of four 2x2 subtiles each of which is composed of four texels. + // Each structure is embedded into the next-bigger one in a diagonal pattern, e.g. + // texels are laid out in a 2x2 subtile like this: + // 2 3 + // 0 1 + // + // The full 8x8 tile has the texels arranged like this: + // + // 42 43 46 47 58 59 62 63 + // 40 41 44 45 56 57 60 61 + // 34 35 38 39 50 51 54 55 + // 32 33 36 37 48 49 52 53 + // 10 11 14 15 26 27 30 31 + // 08 09 12 13 24 25 28 29 + // 02 03 06 07 18 19 22 23 + // 00 01 04 05 16 17 20 21 + + // TODO: This is currently hardcoded for RGB8 + u32* texture_data = (u32*)Memory::GetPointer(registers.texture0.GetPhysicalAddress()); + + // TODO(neobrain): Not sure if this swizzling pattern is used for all textures. + // To be flexible in case different but similar patterns are used, we keep this + // somewhat inefficient code around for now. + int s = (int)(u * float24::FromFloat32(registers.texture0.width)).ToFloat32(); + int t = (int)(v * float24::FromFloat32(registers.texture0.height)).ToFloat32(); + int texel_index_within_tile = 0; + for (int block_size_index = 0; block_size_index < 3; ++block_size_index) { + int sub_tile_width = 1 << block_size_index; + int sub_tile_height = 1 << block_size_index; + + int sub_tile_index = (s & sub_tile_width) << block_size_index; + sub_tile_index += 2 * ((t & sub_tile_height) << block_size_index); + texel_index_within_tile += sub_tile_index; + } + + const int block_width = 8; + const int block_height = 8; + + int coarse_s = (s / block_width) * block_width; + int coarse_t = (t / block_height) * block_height; + + const int row_stride = registers.texture0.width * 3; + u8* source_ptr = (u8*)texture_data + coarse_s * block_height * 3 + coarse_t * row_stride + texel_index_within_tile * 3; + texture_color.r() = source_ptr[2]; + texture_color.g() = source_ptr[1]; + texture_color.b() = source_ptr[0]; + texture_color.a() = 0xFF; + + DebugUtils::DumpTexture(registers.texture0, (u8*)texture_data); + } + // Texture environment - consists of 6 stages of color and alpha combining. // // Color combiners take three input color values from some source (e.g. interpolated @@ -184,6 +242,9 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, case Source::PrimaryColor: return primary_color.rgb(); + case Source::Texture0: + return texture_color.rgb(); + case Source::Constant: return {tev_stage.const_r, tev_stage.const_g, tev_stage.const_b}; @@ -201,6 +262,9 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, case Source::PrimaryColor: return primary_color.a(); + case Source::Texture0: + return texture_color.a(); + case Source::Constant: return tev_stage.const_a; -- cgit v1.2.3 From a79644c9baaeaa88e79db9837e9ed6e2b74e9889 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Mon, 25 Aug 2014 21:45:49 +0200 Subject: Pica/Rasterizer: Clarify a TODO. --- src/video_core/rasterizer.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/video_core/rasterizer.cpp') diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index 1bd32e8d0..cdfdb6215 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp @@ -349,9 +349,11 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, combiner_output = Math::MakeVec(color_output, alpha_output); } + // TODO: Not sure if the multiplication by 65535 has already been taken care + // of when transforming to screen coordinates or not. u16 z = (u16)(((float)v0.screenpos[2].ToFloat32() * w0 + (float)v1.screenpos[2].ToFloat32() * w1 + - (float)v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum); // TODO: Shouldn't need to multiply by 65536? + (float)v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum); SetDepth(x >> 4, y >> 4, z); DrawPixel(x >> 4, y >> 4, combiner_output); -- cgit v1.2.3