From b2f472a2b1f36073b0070b81c08a666380ad180d Mon Sep 17 00:00:00 2001 From: Subv Date: Fri, 9 Jun 2017 13:14:55 -0500 Subject: SwRasterizer: Implement primary fragment color. --- src/video_core/swrasterizer/rasterizer.cpp | 117 ++++++++++++++++++++++++++++- 1 file changed, 113 insertions(+), 4 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/swrasterizer/rasterizer.cpp b/src/video_core/swrasterizer/rasterizer.cpp index 512e81c08..1ab41c2df 100644 --- a/src/video_core/swrasterizer/rasterizer.cpp +++ b/src/video_core/swrasterizer/rasterizer.cpp @@ -13,6 +13,7 @@ #include "common/logging/log.h" #include "common/math_util.h" #include "common/microprofile.h" +#include "common/quaternion.h" #include "common/vector_math.h" #include "core/hw/gpu.h" #include "core/memory.h" @@ -114,6 +115,86 @@ static std::tuple ConvertCubeCoord(float24 u, float24 v return std::make_tuple(x / z * half + half, y / z * half + half, addr); } +std::tuple, Math::Vec4> ComputeFragmentsColors(const Math::Quaternion& normquat, const Math::Vec3& view) { + const auto& lighting = g_state.regs.lighting; + + if (lighting.disable) + return {{}, {}}; + + // TODO(Subv): Bump mapping + Math::Vec3 surface_normal = {0.0f, 0.0f, 1.0f}; + + if (lighting.config0.bump_mode != LightingRegs::LightingBumpMode::None) { + LOG_CRITICAL(HW_GPU, "unimplemented bump mapping"); + UNIMPLEMENTED(); + } + + // TODO(Subv): Do we need to normalize the quaternion here? + auto normal = Math::QuaternionRotate(normquat, surface_normal); + + Math::Vec3 light_vector = {}; + Math::Vec3 diffuse_sum = {}; + // TODO(Subv): Calculate specular + Math::Vec3 specular_sum = {}; + + for (unsigned light_index = 0; light_index <= lighting.max_light_index; ++light_index) { + unsigned num = lighting.light_enable.GetNum(light_index); + const auto& light_config = g_state.regs.lighting.light[num]; + + Math::Vec3 position = {float16::FromRaw(light_config.x).ToFloat32(), float16::FromRaw(light_config.y).ToFloat32(), float16::FromRaw(light_config.z).ToFloat32()}; + + if (light_config.config.directional) + light_vector = position; + else + light_vector = position + view; + + light_vector.Normalize(); + + auto dot_product = Math::Dot(light_vector, normal); + + if (light_config.config.two_sided_diffuse) + dot_product = std::abs(dot_product); + else + dot_product = std::max(dot_product, 0.0f); + + float dist_atten = 1.0f; + if (!lighting.IsDistAttenDisabled(num)) { + auto distance = (-view - position).Length(); + float scale = Pica::float20::FromRaw(light_config.dist_atten_scale).ToFloat32(); + float bias = Pica::float20::FromRaw(light_config.dist_atten_scale).ToFloat32(); + size_t lut = static_cast(LightingRegs::LightingSampler::DistanceAttenuation) + num; + + float sample_loc = scale * distance + bias; + unsigned index_i = static_cast(MathUtil::Clamp(floor(sample_loc * 256), 0.0f, 1.0f)); + + float index_f = sample_loc - index_i; + + ASSERT_MSG(lut < g_state.lighting.luts.size(), "Out of range lut"); + + float lut_value = g_state.lighting.luts[lut][index_i].ToFloat(); + float lut_diff = g_state.lighting.luts[lut][index_i].DiffToFloat(); + + dist_atten = lut_value + lut_diff * index_f; + } + + auto diffuse = light_config.diffuse.ToVec3f() * dot_product + light_config.ambient.ToVec3f(); + diffuse_sum += diffuse * dist_atten; + } + + diffuse_sum += lighting.global_ambient.ToVec3f(); + return { + Math::MakeVec(MathUtil::Clamp(diffuse_sum.x, 0.0f, 1.0f) * 255, MathUtil::Clamp(diffuse_sum.y, 0.0f, 1.0f) * 255, MathUtil::Clamp(diffuse_sum.z, 0.0f, 1.0f) * 255, 255).Cast(), + Math::MakeVec(MathUtil::Clamp(specular_sum.x, 0.0f, 1.0f) * 255, MathUtil::Clamp(specular_sum.y, 0.0f, 1.0f) * 255, MathUtil::Clamp(specular_sum.z, 0.0f, 1.0f) * 255, 255).Cast() + }; +} + +static bool AreQuaternionsOpposite(Math::Vec4 qa, Math::Vec4 qb) { + Math::Vec4f a{ qa.x.ToFloat32(), qa.y.ToFloat32(), qa.z.ToFloat32(), qa.w.ToFloat32() }; + Math::Vec4f b{ qb.x.ToFloat32(), qb.y.ToFloat32(), qb.z.ToFloat32(), qb.w.ToFloat32() }; + + return (Math::Dot(a, b) < 0.f); +} + MICROPROFILE_DEFINE(GPU_Rasterization, "GPU", "Rasterization", MP_RGB(50, 50, 240)); /** @@ -207,6 +288,15 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve int bias2 = IsRightSideOrFlatBottomEdge(vtxpos[2].xy(), vtxpos[0].xy(), vtxpos[1].xy()) ? -1 : 0; + // Flip the quaternions if they are opposite to prevent interpolating them over the wrong direction. + auto v1_quat = v1.quat; + auto v2_quat = v2.quat; + + if (AreQuaternionsOpposite(v0.quat, v1.quat)) + v1_quat = v1_quat * float24::FromFloat32(-1.0f); + if (AreQuaternionsOpposite(v0.quat, v2.quat)) + v2_quat = v2_quat * float24::FromFloat32(-1.0f); + auto w_inverse = Math::MakeVec(v0.pos.w, v1.pos.w, v2.pos.w); auto textures = regs.texturing.GetTextures(); @@ -305,6 +395,21 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve 255), }; + Math::Quaternion normquat{ + { + GetInterpolatedAttribute(v0.quat.x, v1_quat.x, v2_quat.x).ToFloat32(), + GetInterpolatedAttribute(v0.quat.y, v1_quat.y, v2_quat.y).ToFloat32(), + GetInterpolatedAttribute(v0.quat.z, v1_quat.z, v2_quat.z).ToFloat32() + }, + GetInterpolatedAttribute(v0.quat.w, v1_quat.w, v2_quat.w).ToFloat32(), + }; + + Math::Vec3 fragment_position{ + GetInterpolatedAttribute(v0.view.x, v1.view.x, v2.view.x).ToFloat32(), + GetInterpolatedAttribute(v0.view.y, v1.view.y, v2.view.y).ToFloat32(), + GetInterpolatedAttribute(v0.view.z, v1.view.z, v2.view.z).ToFloat32() + }; + Math::Vec2 uv[3]; uv[0].u() = GetInterpolatedAttribute(v0.tc0.u(), v1.tc0.u(), v2.tc0.u()); uv[0].v() = GetInterpolatedAttribute(v0.tc0.v(), v1.tc0.v(), v2.tc0.v()); @@ -419,6 +524,11 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve regs.texturing.tev_combiner_buffer_color.a, }; + Math::Vec4 primary_fragment_color; + Math::Vec4 secondary_fragment_color; + + std::tie(primary_fragment_color, secondary_fragment_color) = ComputeFragmentsColors(normquat, fragment_position); + for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); ++tev_stage_index) { const auto& tev_stage = tev_stages[tev_stage_index]; @@ -427,14 +537,13 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve auto GetSource = [&](Source source) -> Math::Vec4 { switch (source) { case Source::PrimaryColor: + return primary_color; - // HACK: Until we implement fragment lighting, use primary_color case Source::PrimaryFragmentColor: - return primary_color; + return primary_fragment_color; - // HACK: Until we implement fragment lighting, use zero case Source::SecondaryFragmentColor: - return {0, 0, 0, 0}; + return secondary_fragment_color; case Source::Texture0: return texture_color[0]; -- cgit v1.2.3 From be25e78b07140cb745387f757001dd04b3b4cc64 Mon Sep 17 00:00:00 2001 From: Subv Date: Fri, 9 Jun 2017 14:25:41 -0500 Subject: SwRasterizer: Calculate specular_0 for fragment lighting. --- src/video_core/swrasterizer/rasterizer.cpp | 107 +++++++++++++++++++++++++---- 1 file changed, 94 insertions(+), 13 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/swrasterizer/rasterizer.cpp b/src/video_core/swrasterizer/rasterizer.cpp index 1ab41c2df..34b84b0af 100644 --- a/src/video_core/swrasterizer/rasterizer.cpp +++ b/src/video_core/swrasterizer/rasterizer.cpp @@ -115,6 +115,20 @@ static std::tuple ConvertCubeCoord(float24 u, float24 v return std::make_tuple(x / z * half + half, y / z * half + half, addr); } + +float LookupLightingLut(size_t lut_index, float index) { + unsigned index_i = static_cast(MathUtil::Clamp(floor(index * 256), 0.0f, 1.0f)); + + float index_f = index - index_i; + + ASSERT_MSG(lut_index < g_state.lighting.luts.size(), "Out of range lut"); + + float lut_value = g_state.lighting.luts[lut_index][index_i].ToFloat(); + float lut_diff = g_state.lighting.luts[lut_index][index_i].DiffToFloat(); + + return lut_value + lut_diff * index_f; +} + std::tuple, Math::Vec4> ComputeFragmentsColors(const Math::Quaternion& normquat, const Math::Vec3& view) { const auto& lighting = g_state.regs.lighting; @@ -133,9 +147,9 @@ std::tuple, Math::Vec4> ComputeFragmentsColors(const Math::Qu auto normal = Math::QuaternionRotate(normquat, surface_normal); Math::Vec3 light_vector = {}; - Math::Vec3 diffuse_sum = {}; + Math::Vec4 diffuse_sum = {0.f, 0.f, 0.f, 1.f}; // TODO(Subv): Calculate specular - Math::Vec3 specular_sum = {}; + Math::Vec4 specular_sum = {0.f, 0.f, 0.f, 1.f}; for (unsigned light_index = 0; light_index <= lighting.max_light_index; ++light_index) { unsigned num = lighting.light_enable.GetNum(light_index); @@ -150,7 +164,8 @@ std::tuple, Math::Vec4> ComputeFragmentsColors(const Math::Qu light_vector.Normalize(); - auto dot_product = Math::Dot(light_vector, normal); + auto LV_N = Math::Dot(light_vector, normal); + auto dot_product = LV_N; if (light_config.config.two_sided_diffuse) dot_product = std::abs(dot_product); @@ -165,26 +180,92 @@ std::tuple, Math::Vec4> ComputeFragmentsColors(const Math::Qu size_t lut = static_cast(LightingRegs::LightingSampler::DistanceAttenuation) + num; float sample_loc = scale * distance + bias; - unsigned index_i = static_cast(MathUtil::Clamp(floor(sample_loc * 256), 0.0f, 1.0f)); + dist_atten = LookupLightingLut(lut, sample_loc); + } + + float clamp_highlights = 1.0f; + + if (lighting.config0.clamp_highlights) { + if (LV_N <= 0.f) + clamp_highlights = 0.f; + else + clamp_highlights = 1.f; + } + + auto GetLutIndex = [&](unsigned num, LightingRegs::LightingLutInput input, + bool abs) -> float { + + Math::Vec3 norm_view = view.Normalized(); + Math::Vec3 half_angle = (norm_view + light_vector).Normalized(); + float result = 0.0f; + + switch (input) { + case LightingRegs::LightingLutInput::NH: + result = Math::Dot(normal, half_angle); + break; - float index_f = sample_loc - index_i; + case LightingRegs::LightingLutInput::VH: + result = Math::Dot(norm_view, half_angle); + break; - ASSERT_MSG(lut < g_state.lighting.luts.size(), "Out of range lut"); + case LightingRegs::LightingLutInput::NV: + result = Math::Dot(normal, norm_view); + break; - float lut_value = g_state.lighting.luts[lut][index_i].ToFloat(); - float lut_diff = g_state.lighting.luts[lut][index_i].DiffToFloat(); + case LightingRegs::LightingLutInput::LN: + result = Math::Dot(light_vector, normal); + break; - dist_atten = lut_value + lut_diff * index_f; + default: + LOG_CRITICAL(HW_GPU, "Unknown lighting LUT input %d\n", (int)input); + UNIMPLEMENTED(); + result = 0.f; + } + + if (abs) { + if (light_config.config.two_sided_diffuse) + result = std::abs(result); + else + result = std::max(result, 0.0f); + } else { + if (result < 0.f) + result += 2.f; + + result /= 2.f; + } + + return MathUtil::Clamp(result, 0.0f, 1.0f); + }; + + // Specular 0 component + float d0_lut_value = 1.0f; + if (lighting.config1.disable_lut_d0 == 0 && + LightingRegs::IsLightingSamplerSupported( + lighting.config0.config, LightingRegs::LightingSampler::Distribution0)) { + + // Lookup specular "distribution 0" LUT value + float index = GetLutIndex(num, lighting.lut_input.d0.Value(), lighting.abs_lut_input.disable_d0 == 0); + + float scale = lighting.lut_scale.GetScale(lighting.lut_scale.d0); + + d0_lut_value = scale * LookupLightingLut(static_cast(LightingRegs::LightingSampler::Distribution0), index); } + Math::Vec3 specular_0 = d0_lut_value * light_config.specular_0.ToVec3f(); + + // TODO(Subv): Specular 1 + Math::Vec3 specular_1 = {}; + auto diffuse = light_config.diffuse.ToVec3f() * dot_product + light_config.ambient.ToVec3f(); - diffuse_sum += diffuse * dist_atten; + diffuse_sum += Math::MakeVec(diffuse * dist_atten, 0.0f); + + specular_sum += Math::MakeVec((specular_0 + specular_1) * clamp_highlights * dist_atten, 0.f); } - diffuse_sum += lighting.global_ambient.ToVec3f(); + diffuse_sum += Math::MakeVec(lighting.global_ambient.ToVec3f(), 0.0f); return { - Math::MakeVec(MathUtil::Clamp(diffuse_sum.x, 0.0f, 1.0f) * 255, MathUtil::Clamp(diffuse_sum.y, 0.0f, 1.0f) * 255, MathUtil::Clamp(diffuse_sum.z, 0.0f, 1.0f) * 255, 255).Cast(), - Math::MakeVec(MathUtil::Clamp(specular_sum.x, 0.0f, 1.0f) * 255, MathUtil::Clamp(specular_sum.y, 0.0f, 1.0f) * 255, MathUtil::Clamp(specular_sum.z, 0.0f, 1.0f) * 255, 255).Cast() + Math::MakeVec(MathUtil::Clamp(diffuse_sum.x, 0.0f, 1.0f) * 255, MathUtil::Clamp(diffuse_sum.y, 0.0f, 1.0f) * 255, MathUtil::Clamp(diffuse_sum.z, 0.0f, 1.0f) * 255, MathUtil::Clamp(diffuse_sum.w, 0.0f, 1.0f) * 255).Cast(), + Math::MakeVec(MathUtil::Clamp(specular_sum.x, 0.0f, 1.0f) * 255, MathUtil::Clamp(specular_sum.y, 0.0f, 1.0f) * 255, MathUtil::Clamp(specular_sum.z, 0.0f, 1.0f) * 255, MathUtil::Clamp(specular_sum.w, 0.0f, 1.0f) * 255).Cast() }; } -- cgit v1.2.3 From 46b8c8e1da6bc29df2662d63b0e028136fef3636 Mon Sep 17 00:00:00 2001 From: Subv Date: Fri, 9 Jun 2017 14:44:02 -0500 Subject: SwRasterizer: Calculate specular_1 for fragment lighting. --- src/video_core/swrasterizer/rasterizer.cpp | 62 ++++++++++++++++++++++++++++-- 1 file changed, 59 insertions(+), 3 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/swrasterizer/rasterizer.cpp b/src/video_core/swrasterizer/rasterizer.cpp index 34b84b0af..e0c326a4a 100644 --- a/src/video_core/swrasterizer/rasterizer.cpp +++ b/src/video_core/swrasterizer/rasterizer.cpp @@ -148,8 +148,8 @@ std::tuple, Math::Vec4> ComputeFragmentsColors(const Math::Qu Math::Vec3 light_vector = {}; Math::Vec4 diffuse_sum = {0.f, 0.f, 0.f, 1.f}; - // TODO(Subv): Calculate specular Math::Vec4 specular_sum = {0.f, 0.f, 0.f, 1.f}; + Math::Vec3 refl_value = {}; for (unsigned light_index = 0; light_index <= lighting.max_light_index; ++light_index) { unsigned num = lighting.light_enable.GetNum(light_index); @@ -253,8 +253,64 @@ std::tuple, Math::Vec4> ComputeFragmentsColors(const Math::Qu Math::Vec3 specular_0 = d0_lut_value * light_config.specular_0.ToVec3f(); - // TODO(Subv): Specular 1 - Math::Vec3 specular_1 = {}; + // If enabled, lookup ReflectRed value, otherwise, 1.0 is used + if (lighting.config1.disable_lut_rr == 0 && + LightingRegs::IsLightingSamplerSupported(lighting.config0.config, + LightingRegs::LightingSampler::ReflectRed)) { + + float index = GetLutIndex(num, lighting.lut_input.rr, lighting.abs_lut_input.disable_rr == 0); + + float scale = lighting.lut_scale.GetScale(lighting.lut_scale.rr); + + refl_value.x = scale * LookupLightingLut(static_cast(LightingRegs::LightingSampler::ReflectRed), index); + } else { + refl_value.x = 1.0f; + } + + // If enabled, lookup ReflectGreen value, otherwise, ReflectRed value is used + if (lighting.config1.disable_lut_rg == 0 && + LightingRegs::IsLightingSamplerSupported(lighting.config0.config, + LightingRegs::LightingSampler::ReflectGreen)) { + + float index = GetLutIndex(num, lighting.lut_input.rg, lighting.abs_lut_input.disable_rg == 0); + + float scale = lighting.lut_scale.GetScale(lighting.lut_scale.rg); + + refl_value.y = scale * LookupLightingLut(static_cast(LightingRegs::LightingSampler::ReflectGreen), index); + } else { + refl_value.y = refl_value.x; + } + + // If enabled, lookup ReflectBlue value, otherwise, ReflectRed value is used + if (lighting.config1.disable_lut_rb == 0 && + LightingRegs::IsLightingSamplerSupported(lighting.config0.config, + LightingRegs::LightingSampler::ReflectBlue)) { + + float index = GetLutIndex(num, lighting.lut_input.rb, lighting.abs_lut_input.disable_rb == 0); + + float scale = lighting.lut_scale.GetScale(lighting.lut_scale.rb); + + refl_value.z = scale * LookupLightingLut(static_cast(LightingRegs::LightingSampler::ReflectBlue), index); + } else { + refl_value.z = refl_value.x; + } + + float d1_lut_value = 1.0f; + if (lighting.config1.disable_lut_d1 == 0 && + LightingRegs::IsLightingSamplerSupported( + lighting.config0.config, LightingRegs::LightingSampler::Distribution1)) { + + // Lookup specular "distribution 1" LUT value + float index = GetLutIndex(num, lighting.lut_input.d1.Value(), lighting.abs_lut_input.disable_d1 == 0); + + float scale = lighting.lut_scale.GetScale(lighting.lut_scale.d1); + + d1_lut_value = scale * LookupLightingLut(static_cast(LightingRegs::LightingSampler::Distribution1), index); + } + + Math::Vec3 specular_1 = d1_lut_value * refl_value * light_config.specular_1.ToVec3f(); + + // TODO(Subv): Fresnel auto diffuse = light_config.diffuse.ToVec3f() * dot_product + light_config.ambient.ToVec3f(); diffuse_sum += Math::MakeVec(diffuse * dist_atten, 0.0f); -- cgit v1.2.3 From 10b0bea06008fea89564dc5ef8895c0274f8ef18 Mon Sep 17 00:00:00 2001 From: Subv Date: Fri, 9 Jun 2017 14:55:54 -0500 Subject: SwRasterizer: Calculate fresnel for fragment lighting. --- src/video_core/swrasterizer/rasterizer.cpp | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/swrasterizer/rasterizer.cpp b/src/video_core/swrasterizer/rasterizer.cpp index e0c326a4a..2d1daa24a 100644 --- a/src/video_core/swrasterizer/rasterizer.cpp +++ b/src/video_core/swrasterizer/rasterizer.cpp @@ -310,7 +310,31 @@ std::tuple, Math::Vec4> ComputeFragmentsColors(const Math::Qu Math::Vec3 specular_1 = d1_lut_value * refl_value * light_config.specular_1.ToVec3f(); - // TODO(Subv): Fresnel + if (lighting.config1.disable_lut_fr == 0 && + LightingRegs::IsLightingSamplerSupported( + lighting.config0.config, LightingRegs::LightingSampler::Fresnel)) { + + // Lookup fresnel LUT value + float index = GetLutIndex(num, lighting.lut_input.fr.Value(), lighting.abs_lut_input.disable_fr == 0); + + float scale = lighting.lut_scale.GetScale(lighting.lut_scale.fr); + + float lut_value = scale * LookupLightingLut(static_cast(LightingRegs::LightingSampler::Fresnel), index); + + // Enabled for difffuse lighting alpha component + if (lighting.config0.fresnel_selector == LightingRegs::LightingFresnelSelector::PrimaryAlpha || + lighting.config0.fresnel_selector == LightingRegs::LightingFresnelSelector::Both) { + diffuse_sum.a() *= lut_value; + } + + // Enabled for the specular lighting alpha component + if (lighting.config0.fresnel_selector == + LightingRegs::LightingFresnelSelector::SecondaryAlpha || + lighting.config0.fresnel_selector == LightingRegs::LightingFresnelSelector::Both) { + specular_sum.a() *= lut_value; + } + } + auto diffuse = light_config.diffuse.ToVec3f() * dot_product + light_config.ambient.ToVec3f(); diffuse_sum += Math::MakeVec(diffuse * dist_atten, 0.0f); -- cgit v1.2.3 From 80b6fc592e3a2f5821975e84b5df35f5dc4ae51a Mon Sep 17 00:00:00 2001 From: Subv Date: Fri, 9 Jun 2017 15:24:28 -0500 Subject: SwRasterizer: Fixed the lighting lut lookup function. --- src/video_core/swrasterizer/rasterizer.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/swrasterizer/rasterizer.cpp b/src/video_core/swrasterizer/rasterizer.cpp index 2d1daa24a..2b85ac86c 100644 --- a/src/video_core/swrasterizer/rasterizer.cpp +++ b/src/video_core/swrasterizer/rasterizer.cpp @@ -117,7 +117,9 @@ static std::tuple ConvertCubeCoord(float24 u, float24 v float LookupLightingLut(size_t lut_index, float index) { - unsigned index_i = static_cast(MathUtil::Clamp(floor(index * 256), 0.0f, 1.0f)); + index *= 256; + + unsigned index_i = static_cast(MathUtil::Clamp(floor(index), 0.0f, 255.0f)); float index_f = index - index_i; @@ -126,7 +128,7 @@ float LookupLightingLut(size_t lut_index, float index) { float lut_value = g_state.lighting.luts[lut_index][index_i].ToFloat(); float lut_diff = g_state.lighting.luts[lut_index][index_i].DiffToFloat(); - return lut_value + lut_diff * index_f; + return lut_value + lut_diff * index_f / 256.f; } std::tuple, Math::Vec4> ComputeFragmentsColors(const Math::Quaternion& normquat, const Math::Vec3& view) { -- cgit v1.2.3 From f2d4d5c2191275bd91f2f42b880f3edf3bccfd63 Mon Sep 17 00:00:00 2001 From: Subv Date: Fri, 9 Jun 2017 17:33:25 -0500 Subject: SwRasterizer: Corrected the light LUT lookups. --- src/video_core/swrasterizer/rasterizer.cpp | 76 +++++++++++++++++------------- 1 file changed, 43 insertions(+), 33 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/swrasterizer/rasterizer.cpp b/src/video_core/swrasterizer/rasterizer.cpp index 2b85ac86c..a9098e1f0 100644 --- a/src/video_core/swrasterizer/rasterizer.cpp +++ b/src/video_core/swrasterizer/rasterizer.cpp @@ -115,20 +115,14 @@ static std::tuple ConvertCubeCoord(float24 u, float24 v return std::make_tuple(x / z * half + half, y / z * half + half, addr); } - -float LookupLightingLut(size_t lut_index, float index) { - index *= 256; - - unsigned index_i = static_cast(MathUtil::Clamp(floor(index), 0.0f, 255.0f)); - - float index_f = index - index_i; - +float LookupLightingLut(size_t lut_index, u8 index, float delta) { ASSERT_MSG(lut_index < g_state.lighting.luts.size(), "Out of range lut"); + ASSERT_MSG(index < g_state.lighting.luts[0].size(), "Out of range index"); - float lut_value = g_state.lighting.luts[lut_index][index_i].ToFloat(); - float lut_diff = g_state.lighting.luts[lut_index][index_i].DiffToFloat(); + float lut_value = g_state.lighting.luts[lut_index][index].ToFloat(); + float lut_diff = g_state.lighting.luts[lut_index][index].DiffToFloat(); - return lut_value + lut_diff * index_f / 256.f; + return lut_value + lut_diff * delta; } std::tuple, Math::Vec4> ComputeFragmentsColors(const Math::Quaternion& normquat, const Math::Vec3& view) { @@ -145,8 +139,8 @@ std::tuple, Math::Vec4> ComputeFragmentsColors(const Math::Qu UNIMPLEMENTED(); } - // TODO(Subv): Do we need to normalize the quaternion here? - auto normal = Math::QuaternionRotate(normquat, surface_normal); + // Use the normalized the quaternion when performing the rotation + auto normal = Math::QuaternionRotate(normquat.Normalized(), surface_normal); Math::Vec3 light_vector = {}; Math::Vec4 diffuse_sum = {0.f, 0.f, 0.f, 1.f}; @@ -182,7 +176,10 @@ std::tuple, Math::Vec4> ComputeFragmentsColors(const Math::Qu size_t lut = static_cast(LightingRegs::LightingSampler::DistanceAttenuation) + num; float sample_loc = scale * distance + bias; - dist_atten = LookupLightingLut(lut, sample_loc); + + u8 lutindex = MathUtil::Clamp(floorf(sample_loc * 256.f), 0.0f, 255.0f); + float delta = sample_loc * 256 - lutindex; + dist_atten = LookupLightingLut(lut, lutindex, delta / 256.f); } float clamp_highlights = 1.0f; @@ -195,7 +192,7 @@ std::tuple, Math::Vec4> ComputeFragmentsColors(const Math::Qu } auto GetLutIndex = [&](unsigned num, LightingRegs::LightingLutInput input, - bool abs) -> float { + bool abs) -> std::tuple { Math::Vec3 norm_view = view.Normalized(); Math::Vec3 half_angle = (norm_view + light_vector).Normalized(); @@ -229,14 +226,15 @@ std::tuple, Math::Vec4> ComputeFragmentsColors(const Math::Qu result = std::abs(result); else result = std::max(result, 0.0f); - } else { - if (result < 0.f) - result += 2.f; - result /= 2.f; + u8 lutindex = MathUtil::Clamp(floorf(result * 256.f), 0.0f, 255.0f); + float delta = result * 256 - lutindex; + return { lutindex, delta / 256.f }; + } else { + u8 tmpi = MathUtil::Clamp(floorf(result * 128.f), 0.0f, 127.0f); + float delta = result * 128.f - tmpi; + return { tmpi & 0xFF, delta / 128.f }; } - - return MathUtil::Clamp(result, 0.0f, 1.0f); }; // Specular 0 component @@ -246,11 +244,13 @@ std::tuple, Math::Vec4> ComputeFragmentsColors(const Math::Qu lighting.config0.config, LightingRegs::LightingSampler::Distribution0)) { // Lookup specular "distribution 0" LUT value - float index = GetLutIndex(num, lighting.lut_input.d0.Value(), lighting.abs_lut_input.disable_d0 == 0); + u8 index; + float delta; + std::tie(index, delta) = GetLutIndex(num, lighting.lut_input.d0.Value(), lighting.abs_lut_input.disable_d0 == 0); float scale = lighting.lut_scale.GetScale(lighting.lut_scale.d0); - d0_lut_value = scale * LookupLightingLut(static_cast(LightingRegs::LightingSampler::Distribution0), index); + d0_lut_value = scale * LookupLightingLut(static_cast(LightingRegs::LightingSampler::Distribution0), index, delta); } Math::Vec3 specular_0 = d0_lut_value * light_config.specular_0.ToVec3f(); @@ -260,11 +260,13 @@ std::tuple, Math::Vec4> ComputeFragmentsColors(const Math::Qu LightingRegs::IsLightingSamplerSupported(lighting.config0.config, LightingRegs::LightingSampler::ReflectRed)) { - float index = GetLutIndex(num, lighting.lut_input.rr, lighting.abs_lut_input.disable_rr == 0); + u8 index; + float delta; + std::tie(index, delta) = GetLutIndex(num, lighting.lut_input.rr, lighting.abs_lut_input.disable_rr == 0); float scale = lighting.lut_scale.GetScale(lighting.lut_scale.rr); - refl_value.x = scale * LookupLightingLut(static_cast(LightingRegs::LightingSampler::ReflectRed), index); + refl_value.x = scale * LookupLightingLut(static_cast(LightingRegs::LightingSampler::ReflectRed), index, delta); } else { refl_value.x = 1.0f; } @@ -274,11 +276,13 @@ std::tuple, Math::Vec4> ComputeFragmentsColors(const Math::Qu LightingRegs::IsLightingSamplerSupported(lighting.config0.config, LightingRegs::LightingSampler::ReflectGreen)) { - float index = GetLutIndex(num, lighting.lut_input.rg, lighting.abs_lut_input.disable_rg == 0); + u8 index; + float delta; + std::tie(index, delta) = GetLutIndex(num, lighting.lut_input.rg, lighting.abs_lut_input.disable_rg == 0); float scale = lighting.lut_scale.GetScale(lighting.lut_scale.rg); - refl_value.y = scale * LookupLightingLut(static_cast(LightingRegs::LightingSampler::ReflectGreen), index); + refl_value.y = scale * LookupLightingLut(static_cast(LightingRegs::LightingSampler::ReflectGreen), index, delta); } else { refl_value.y = refl_value.x; } @@ -288,11 +292,13 @@ std::tuple, Math::Vec4> ComputeFragmentsColors(const Math::Qu LightingRegs::IsLightingSamplerSupported(lighting.config0.config, LightingRegs::LightingSampler::ReflectBlue)) { - float index = GetLutIndex(num, lighting.lut_input.rb, lighting.abs_lut_input.disable_rb == 0); + u8 index; + float delta; + std::tie(index, delta) = GetLutIndex(num, lighting.lut_input.rb, lighting.abs_lut_input.disable_rb == 0); float scale = lighting.lut_scale.GetScale(lighting.lut_scale.rb); - refl_value.z = scale * LookupLightingLut(static_cast(LightingRegs::LightingSampler::ReflectBlue), index); + refl_value.z = scale * LookupLightingLut(static_cast(LightingRegs::LightingSampler::ReflectBlue), index, delta); } else { refl_value.z = refl_value.x; } @@ -303,11 +309,13 @@ std::tuple, Math::Vec4> ComputeFragmentsColors(const Math::Qu lighting.config0.config, LightingRegs::LightingSampler::Distribution1)) { // Lookup specular "distribution 1" LUT value - float index = GetLutIndex(num, lighting.lut_input.d1.Value(), lighting.abs_lut_input.disable_d1 == 0); + u8 index; + float delta; + std::tie(index, delta) = GetLutIndex(num, lighting.lut_input.d1.Value(), lighting.abs_lut_input.disable_d1 == 0); float scale = lighting.lut_scale.GetScale(lighting.lut_scale.d1); - d1_lut_value = scale * LookupLightingLut(static_cast(LightingRegs::LightingSampler::Distribution1), index); + d1_lut_value = scale * LookupLightingLut(static_cast(LightingRegs::LightingSampler::Distribution1), index, delta); } Math::Vec3 specular_1 = d1_lut_value * refl_value * light_config.specular_1.ToVec3f(); @@ -317,11 +325,13 @@ std::tuple, Math::Vec4> ComputeFragmentsColors(const Math::Qu lighting.config0.config, LightingRegs::LightingSampler::Fresnel)) { // Lookup fresnel LUT value - float index = GetLutIndex(num, lighting.lut_input.fr.Value(), lighting.abs_lut_input.disable_fr == 0); + u8 index; + float delta; + std::tie(index, delta) = GetLutIndex(num, lighting.lut_input.fr.Value(), lighting.abs_lut_input.disable_fr == 0); float scale = lighting.lut_scale.GetScale(lighting.lut_scale.fr); - float lut_value = scale * LookupLightingLut(static_cast(LightingRegs::LightingSampler::Fresnel), index); + float lut_value = scale * LookupLightingLut(static_cast(LightingRegs::LightingSampler::Fresnel), index, delta); // Enabled for difffuse lighting alpha component if (lighting.config0.fresnel_selector == LightingRegs::LightingFresnelSelector::PrimaryAlpha || -- cgit v1.2.3 From 2a75837bc30ba08e2470f4b91078747a08c5213a Mon Sep 17 00:00:00 2001 From: Subv Date: Fri, 9 Jun 2017 18:18:57 -0500 Subject: SwRasterizer: Corrected the light LUT lookups. --- src/video_core/swrasterizer/rasterizer.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/swrasterizer/rasterizer.cpp b/src/video_core/swrasterizer/rasterizer.cpp index a9098e1f0..2c804b6e7 100644 --- a/src/video_core/swrasterizer/rasterizer.cpp +++ b/src/video_core/swrasterizer/rasterizer.cpp @@ -177,9 +177,9 @@ std::tuple, Math::Vec4> ComputeFragmentsColors(const Math::Qu float sample_loc = scale * distance + bias; - u8 lutindex = MathUtil::Clamp(floorf(sample_loc * 256.f), 0.0f, 255.0f); + u8 lutindex = MathUtil::Clamp(std::floor(sample_loc * 256.f), 0.0f, 255.0f); float delta = sample_loc * 256 - lutindex; - dist_atten = LookupLightingLut(lut, lutindex, delta / 256.f); + dist_atten = LookupLightingLut(lut, lutindex, delta); } float clamp_highlights = 1.0f; @@ -227,13 +227,14 @@ std::tuple, Math::Vec4> ComputeFragmentsColors(const Math::Qu else result = std::max(result, 0.0f); - u8 lutindex = MathUtil::Clamp(floorf(result * 256.f), 0.0f, 255.0f); + u8 lutindex = MathUtil::Clamp(std::floor(result * 256.f), 0.0f, 255.0f); float delta = result * 256 - lutindex; - return { lutindex, delta / 256.f }; + return { lutindex, delta }; } else { - u8 tmpi = MathUtil::Clamp(floorf(result * 128.f), 0.0f, 127.0f); + float flr = std::floor(result * 128.f); + s8 tmpi = MathUtil::Clamp(flr, -128.0f, 127.0f); float delta = result * 128.f - tmpi; - return { tmpi & 0xFF, delta / 128.f }; + return { tmpi & 0xFF, delta }; } }; -- cgit v1.2.3 From 73566ff7a990cdfe8d8f023997b57942dc785fc4 Mon Sep 17 00:00:00 2001 From: Subv Date: Sun, 11 Jun 2017 11:55:35 -0500 Subject: SwRasterizer: Flip the vertex quaternions before clipping (if necessary). --- src/video_core/swrasterizer/clipper.cpp | 11 +++++++++++ src/video_core/swrasterizer/rasterizer.cpp | 24 ++++-------------------- 2 files changed, 15 insertions(+), 20 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/swrasterizer/clipper.cpp b/src/video_core/swrasterizer/clipper.cpp index 6fb923756..7537689b7 100644 --- a/src/video_core/swrasterizer/clipper.cpp +++ b/src/video_core/swrasterizer/clipper.cpp @@ -95,6 +95,17 @@ void ProcessTriangle(const OutputVertex& v0, const OutputVertex& v1, const Outpu static const size_t MAX_VERTICES = 9; static_vector buffer_a = {v0, v1, v2}; static_vector buffer_b; + + auto FlipQuaternionIfOpposite = [](auto& a, const auto& b) { + if (Math::Dot(a, b) < float24::Zero()) + a = -a; + }; + + // Flip the quaternions if they are opposite to prevent interpolating them over the wrong + // direction. + FlipQuaternionIfOpposite(buffer_a[1].quat, buffer_a[0].quat); + FlipQuaternionIfOpposite(buffer_a[2].quat, buffer_a[0].quat); + auto* output_list = &buffer_a; auto* input_list = &buffer_b; diff --git a/src/video_core/swrasterizer/rasterizer.cpp b/src/video_core/swrasterizer/rasterizer.cpp index 2c804b6e7..76f793c86 100644 --- a/src/video_core/swrasterizer/rasterizer.cpp +++ b/src/video_core/swrasterizer/rasterizer.cpp @@ -362,13 +362,6 @@ std::tuple, Math::Vec4> ComputeFragmentsColors(const Math::Qu }; } -static bool AreQuaternionsOpposite(Math::Vec4 qa, Math::Vec4 qb) { - Math::Vec4f a{ qa.x.ToFloat32(), qa.y.ToFloat32(), qa.z.ToFloat32(), qa.w.ToFloat32() }; - Math::Vec4f b{ qb.x.ToFloat32(), qb.y.ToFloat32(), qb.z.ToFloat32(), qb.w.ToFloat32() }; - - return (Math::Dot(a, b) < 0.f); -} - MICROPROFILE_DEFINE(GPU_Rasterization, "GPU", "Rasterization", MP_RGB(50, 50, 240)); /** @@ -462,15 +455,6 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve int bias2 = IsRightSideOrFlatBottomEdge(vtxpos[2].xy(), vtxpos[0].xy(), vtxpos[1].xy()) ? -1 : 0; - // Flip the quaternions if they are opposite to prevent interpolating them over the wrong direction. - auto v1_quat = v1.quat; - auto v2_quat = v2.quat; - - if (AreQuaternionsOpposite(v0.quat, v1.quat)) - v1_quat = v1_quat * float24::FromFloat32(-1.0f); - if (AreQuaternionsOpposite(v0.quat, v2.quat)) - v2_quat = v2_quat * float24::FromFloat32(-1.0f); - auto w_inverse = Math::MakeVec(v0.pos.w, v1.pos.w, v2.pos.w); auto textures = regs.texturing.GetTextures(); @@ -571,11 +555,11 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve Math::Quaternion normquat{ { - GetInterpolatedAttribute(v0.quat.x, v1_quat.x, v2_quat.x).ToFloat32(), - GetInterpolatedAttribute(v0.quat.y, v1_quat.y, v2_quat.y).ToFloat32(), - GetInterpolatedAttribute(v0.quat.z, v1_quat.z, v2_quat.z).ToFloat32() + GetInterpolatedAttribute(v0.quat.x, v1.quat.x, v2.quat.x).ToFloat32(), + GetInterpolatedAttribute(v0.quat.y, v1.quat.y, v2.quat.y).ToFloat32(), + GetInterpolatedAttribute(v0.quat.z, v1.quat.z, v2.quat.z).ToFloat32() }, - GetInterpolatedAttribute(v0.quat.w, v1_quat.w, v2_quat.w).ToFloat32(), + GetInterpolatedAttribute(v0.quat.w, v1.quat.w, v2.quat.w).ToFloat32(), }; Math::Vec3 fragment_position{ -- cgit v1.2.3 From 2d69a9b8bf232fdd9e3bbb2a9c624ee9dd6ec637 Mon Sep 17 00:00:00 2001 From: Subv Date: Tue, 13 Jun 2017 12:31:28 -0500 Subject: SwRasterizer: Run clang-format --- src/video_core/swrasterizer/rasterizer.cpp | 128 +++++++++++++++++++---------- 1 file changed, 83 insertions(+), 45 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/swrasterizer/rasterizer.cpp b/src/video_core/swrasterizer/rasterizer.cpp index 76f793c86..382b5927b 100644 --- a/src/video_core/swrasterizer/rasterizer.cpp +++ b/src/video_core/swrasterizer/rasterizer.cpp @@ -125,11 +125,12 @@ float LookupLightingLut(size_t lut_index, u8 index, float delta) { return lut_value + lut_diff * delta; } -std::tuple, Math::Vec4> ComputeFragmentsColors(const Math::Quaternion& normquat, const Math::Vec3& view) { +std::tuple, Math::Vec4> ComputeFragmentsColors( + const Math::Quaternion& normquat, const Math::Vec3& view) { const auto& lighting = g_state.regs.lighting; if (lighting.disable) - return {{}, {}}; + return {Math::MakeVec(0, 0, 0, 0), Math::MakeVec(0, 0, 0, 0)}; // TODO(Subv): Bump mapping Math::Vec3 surface_normal = {0.0f, 0.0f, 1.0f}; @@ -151,7 +152,9 @@ std::tuple, Math::Vec4> ComputeFragmentsColors(const Math::Qu unsigned num = lighting.light_enable.GetNum(light_index); const auto& light_config = g_state.regs.lighting.light[num]; - Math::Vec3 position = {float16::FromRaw(light_config.x).ToFloat32(), float16::FromRaw(light_config.y).ToFloat32(), float16::FromRaw(light_config.z).ToFloat32()}; + Math::Vec3 position = {float16::FromRaw(light_config.x).ToFloat32(), + float16::FromRaw(light_config.y).ToFloat32(), + float16::FromRaw(light_config.z).ToFloat32()}; if (light_config.config.directional) light_vector = position; @@ -173,11 +176,13 @@ std::tuple, Math::Vec4> ComputeFragmentsColors(const Math::Qu auto distance = (-view - position).Length(); float scale = Pica::float20::FromRaw(light_config.dist_atten_scale).ToFloat32(); float bias = Pica::float20::FromRaw(light_config.dist_atten_scale).ToFloat32(); - size_t lut = static_cast(LightingRegs::LightingSampler::DistanceAttenuation) + num; + size_t lut = + static_cast(LightingRegs::LightingSampler::DistanceAttenuation) + num; float sample_loc = scale * distance + bias; - u8 lutindex = MathUtil::Clamp(std::floor(sample_loc * 256.f), 0.0f, 255.0f); + u8 lutindex = + static_cast(MathUtil::Clamp(std::floor(sample_loc * 256.f), 0.0f, 255.0f)); float delta = sample_loc * 256 - lutindex; dist_atten = LookupLightingLut(lut, lutindex, delta); } @@ -192,7 +197,7 @@ std::tuple, Math::Vec4> ComputeFragmentsColors(const Math::Qu } auto GetLutIndex = [&](unsigned num, LightingRegs::LightingLutInput input, - bool abs) -> std::tuple { + bool abs) -> std::tuple { Math::Vec3 norm_view = view.Normalized(); Math::Vec3 half_angle = (norm_view + light_vector).Normalized(); @@ -216,7 +221,7 @@ std::tuple, Math::Vec4> ComputeFragmentsColors(const Math::Qu break; default: - LOG_CRITICAL(HW_GPU, "Unknown lighting LUT input %d\n", (int)input); + LOG_CRITICAL(HW_GPU, "Unknown lighting LUT input %u\n", static_cast(input)); UNIMPLEMENTED(); result = 0.f; } @@ -227,14 +232,15 @@ std::tuple, Math::Vec4> ComputeFragmentsColors(const Math::Qu else result = std::max(result, 0.0f); - u8 lutindex = MathUtil::Clamp(std::floor(result * 256.f), 0.0f, 255.0f); + float flr = std::floor(result * 256.f); + u8 lutindex = static_cast(MathUtil::Clamp(flr, 0.0f, 255.0f)); float delta = result * 256 - lutindex; - return { lutindex, delta }; + return {lutindex, delta}; } else { float flr = std::floor(result * 128.f); - s8 tmpi = MathUtil::Clamp(flr, -128.0f, 127.0f); - float delta = result * 128.f - tmpi; - return { tmpi & 0xFF, delta }; + s8 lutindex = static_cast(MathUtil::Clamp(flr, -128.0f, 127.0f)); + float delta = result * 128.f - lutindex; + return {static_cast(lutindex), delta}; } }; @@ -247,11 +253,15 @@ std::tuple, Math::Vec4> ComputeFragmentsColors(const Math::Qu // Lookup specular "distribution 0" LUT value u8 index; float delta; - std::tie(index, delta) = GetLutIndex(num, lighting.lut_input.d0.Value(), lighting.abs_lut_input.disable_d0 == 0); + std::tie(index, delta) = GetLutIndex(num, lighting.lut_input.d0.Value(), + lighting.abs_lut_input.disable_d0 == 0); float scale = lighting.lut_scale.GetScale(lighting.lut_scale.d0); - d0_lut_value = scale * LookupLightingLut(static_cast(LightingRegs::LightingSampler::Distribution0), index, delta); + d0_lut_value = + scale * + LookupLightingLut(static_cast(LightingRegs::LightingSampler::Distribution0), + index, delta); } Math::Vec3 specular_0 = d0_lut_value * light_config.specular_0.ToVec3f(); @@ -263,11 +273,15 @@ std::tuple, Math::Vec4> ComputeFragmentsColors(const Math::Qu u8 index; float delta; - std::tie(index, delta) = GetLutIndex(num, lighting.lut_input.rr, lighting.abs_lut_input.disable_rr == 0); + std::tie(index, delta) = + GetLutIndex(num, lighting.lut_input.rr, lighting.abs_lut_input.disable_rr == 0); float scale = lighting.lut_scale.GetScale(lighting.lut_scale.rr); - refl_value.x = scale * LookupLightingLut(static_cast(LightingRegs::LightingSampler::ReflectRed), index, delta); + refl_value.x = + scale * + LookupLightingLut(static_cast(LightingRegs::LightingSampler::ReflectRed), + index, delta); } else { refl_value.x = 1.0f; } @@ -279,11 +293,15 @@ std::tuple, Math::Vec4> ComputeFragmentsColors(const Math::Qu u8 index; float delta; - std::tie(index, delta) = GetLutIndex(num, lighting.lut_input.rg, lighting.abs_lut_input.disable_rg == 0); + std::tie(index, delta) = + GetLutIndex(num, lighting.lut_input.rg, lighting.abs_lut_input.disable_rg == 0); float scale = lighting.lut_scale.GetScale(lighting.lut_scale.rg); - refl_value.y = scale * LookupLightingLut(static_cast(LightingRegs::LightingSampler::ReflectGreen), index, delta); + refl_value.y = + scale * + LookupLightingLut(static_cast(LightingRegs::LightingSampler::ReflectGreen), + index, delta); } else { refl_value.y = refl_value.x; } @@ -295,11 +313,15 @@ std::tuple, Math::Vec4> ComputeFragmentsColors(const Math::Qu u8 index; float delta; - std::tie(index, delta) = GetLutIndex(num, lighting.lut_input.rb, lighting.abs_lut_input.disable_rb == 0); + std::tie(index, delta) = + GetLutIndex(num, lighting.lut_input.rb, lighting.abs_lut_input.disable_rb == 0); float scale = lighting.lut_scale.GetScale(lighting.lut_scale.rb); - refl_value.z = scale * LookupLightingLut(static_cast(LightingRegs::LightingSampler::ReflectBlue), index, delta); + refl_value.z = + scale * + LookupLightingLut(static_cast(LightingRegs::LightingSampler::ReflectBlue), + index, delta); } else { refl_value.z = refl_value.x; } @@ -312,54 +334,72 @@ std::tuple, Math::Vec4> ComputeFragmentsColors(const Math::Qu // Lookup specular "distribution 1" LUT value u8 index; float delta; - std::tie(index, delta) = GetLutIndex(num, lighting.lut_input.d1.Value(), lighting.abs_lut_input.disable_d1 == 0); + std::tie(index, delta) = GetLutIndex(num, lighting.lut_input.d1.Value(), + lighting.abs_lut_input.disable_d1 == 0); float scale = lighting.lut_scale.GetScale(lighting.lut_scale.d1); - d1_lut_value = scale * LookupLightingLut(static_cast(LightingRegs::LightingSampler::Distribution1), index, delta); + d1_lut_value = + scale * + LookupLightingLut(static_cast(LightingRegs::LightingSampler::Distribution1), + index, delta); } - Math::Vec3 specular_1 = d1_lut_value * refl_value * light_config.specular_1.ToVec3f(); + Math::Vec3 specular_1 = + d1_lut_value * refl_value * light_config.specular_1.ToVec3f(); if (lighting.config1.disable_lut_fr == 0 && - LightingRegs::IsLightingSamplerSupported( - lighting.config0.config, LightingRegs::LightingSampler::Fresnel)) { + LightingRegs::IsLightingSamplerSupported(lighting.config0.config, + LightingRegs::LightingSampler::Fresnel)) { // Lookup fresnel LUT value u8 index; float delta; - std::tie(index, delta) = GetLutIndex(num, lighting.lut_input.fr.Value(), lighting.abs_lut_input.disable_fr == 0); + std::tie(index, delta) = GetLutIndex(num, lighting.lut_input.fr.Value(), + lighting.abs_lut_input.disable_fr == 0); float scale = lighting.lut_scale.GetScale(lighting.lut_scale.fr); - float lut_value = scale * LookupLightingLut(static_cast(LightingRegs::LightingSampler::Fresnel), index, delta); + float lut_value = + scale * + LookupLightingLut(static_cast(LightingRegs::LightingSampler::Fresnel), + index, delta); - // Enabled for difffuse lighting alpha component - if (lighting.config0.fresnel_selector == LightingRegs::LightingFresnelSelector::PrimaryAlpha || + // Enabled for diffuse lighting alpha component + if (lighting.config0.fresnel_selector == + LightingRegs::LightingFresnelSelector::PrimaryAlpha || lighting.config0.fresnel_selector == LightingRegs::LightingFresnelSelector::Both) { diffuse_sum.a() *= lut_value; } // Enabled for the specular lighting alpha component if (lighting.config0.fresnel_selector == - LightingRegs::LightingFresnelSelector::SecondaryAlpha || + LightingRegs::LightingFresnelSelector::SecondaryAlpha || lighting.config0.fresnel_selector == LightingRegs::LightingFresnelSelector::Both) { specular_sum.a() *= lut_value; } } - - auto diffuse = light_config.diffuse.ToVec3f() * dot_product + light_config.ambient.ToVec3f(); + auto diffuse = + light_config.diffuse.ToVec3f() * dot_product + light_config.ambient.ToVec3f(); diffuse_sum += Math::MakeVec(diffuse * dist_atten, 0.0f); - specular_sum += Math::MakeVec((specular_0 + specular_1) * clamp_highlights * dist_atten, 0.f); + specular_sum += + Math::MakeVec((specular_0 + specular_1) * clamp_highlights * dist_atten, 0.f); } diffuse_sum += Math::MakeVec(lighting.global_ambient.ToVec3f(), 0.0f); - return { - Math::MakeVec(MathUtil::Clamp(diffuse_sum.x, 0.0f, 1.0f) * 255, MathUtil::Clamp(diffuse_sum.y, 0.0f, 1.0f) * 255, MathUtil::Clamp(diffuse_sum.z, 0.0f, 1.0f) * 255, MathUtil::Clamp(diffuse_sum.w, 0.0f, 1.0f) * 255).Cast(), - Math::MakeVec(MathUtil::Clamp(specular_sum.x, 0.0f, 1.0f) * 255, MathUtil::Clamp(specular_sum.y, 0.0f, 1.0f) * 255, MathUtil::Clamp(specular_sum.z, 0.0f, 1.0f) * 255, MathUtil::Clamp(specular_sum.w, 0.0f, 1.0f) * 255).Cast() - }; + + return {Math::MakeVec(MathUtil::Clamp(diffuse_sum.x, 0.0f, 1.0f) * 255, + MathUtil::Clamp(diffuse_sum.y, 0.0f, 1.0f) * 255, + MathUtil::Clamp(diffuse_sum.z, 0.0f, 1.0f) * 255, + MathUtil::Clamp(diffuse_sum.w, 0.0f, 1.0f) * 255) + .Cast(), + Math::MakeVec(MathUtil::Clamp(specular_sum.x, 0.0f, 1.0f) * 255, + MathUtil::Clamp(specular_sum.y, 0.0f, 1.0f) * 255, + MathUtil::Clamp(specular_sum.z, 0.0f, 1.0f) * 255, + MathUtil::Clamp(specular_sum.w, 0.0f, 1.0f) * 255) + .Cast()}; } MICROPROFILE_DEFINE(GPU_Rasterization, "GPU", "Rasterization", MP_RGB(50, 50, 240)); @@ -554,19 +594,16 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve }; Math::Quaternion normquat{ - { - GetInterpolatedAttribute(v0.quat.x, v1.quat.x, v2.quat.x).ToFloat32(), - GetInterpolatedAttribute(v0.quat.y, v1.quat.y, v2.quat.y).ToFloat32(), - GetInterpolatedAttribute(v0.quat.z, v1.quat.z, v2.quat.z).ToFloat32() - }, + {GetInterpolatedAttribute(v0.quat.x, v1.quat.x, v2.quat.x).ToFloat32(), + GetInterpolatedAttribute(v0.quat.y, v1.quat.y, v2.quat.y).ToFloat32(), + GetInterpolatedAttribute(v0.quat.z, v1.quat.z, v2.quat.z).ToFloat32()}, GetInterpolatedAttribute(v0.quat.w, v1.quat.w, v2.quat.w).ToFloat32(), }; Math::Vec3 fragment_position{ GetInterpolatedAttribute(v0.view.x, v1.view.x, v2.view.x).ToFloat32(), GetInterpolatedAttribute(v0.view.y, v1.view.y, v2.view.y).ToFloat32(), - GetInterpolatedAttribute(v0.view.z, v1.view.z, v2.view.z).ToFloat32() - }; + GetInterpolatedAttribute(v0.view.z, v1.view.z, v2.view.z).ToFloat32()}; Math::Vec2 uv[3]; uv[0].u() = GetInterpolatedAttribute(v0.tc0.u(), v1.tc0.u(), v2.tc0.u()); @@ -685,7 +722,8 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve Math::Vec4 primary_fragment_color; Math::Vec4 secondary_fragment_color; - std::tie(primary_fragment_color, secondary_fragment_color) = ComputeFragmentsColors(normquat, fragment_position); + std::tie(primary_fragment_color, secondary_fragment_color) = + ComputeFragmentsColors(normquat, fragment_position); for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); ++tev_stage_index) { -- cgit v1.2.3 From 6250f52e939c714ccb302003502ee78941c8221b Mon Sep 17 00:00:00 2001 From: Subv Date: Tue, 13 Jun 2017 12:36:45 -0500 Subject: SwRasterizer: Fixed a few conversion warnings and moved per-light values into the per-light loop. --- src/video_core/swrasterizer/rasterizer.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/swrasterizer/rasterizer.cpp b/src/video_core/swrasterizer/rasterizer.cpp index 382b5927b..54af53bbd 100644 --- a/src/video_core/swrasterizer/rasterizer.cpp +++ b/src/video_core/swrasterizer/rasterizer.cpp @@ -143,18 +143,18 @@ std::tuple, Math::Vec4> ComputeFragmentsColors( // Use the normalized the quaternion when performing the rotation auto normal = Math::QuaternionRotate(normquat.Normalized(), surface_normal); - Math::Vec3 light_vector = {}; Math::Vec4 diffuse_sum = {0.f, 0.f, 0.f, 1.f}; Math::Vec4 specular_sum = {0.f, 0.f, 0.f, 1.f}; - Math::Vec3 refl_value = {}; for (unsigned light_index = 0; light_index <= lighting.max_light_index; ++light_index) { unsigned num = lighting.light_enable.GetNum(light_index); const auto& light_config = g_state.regs.lighting.light[num]; + Math::Vec3 refl_value = {}; Math::Vec3 position = {float16::FromRaw(light_config.x).ToFloat32(), float16::FromRaw(light_config.y).ToFloat32(), float16::FromRaw(light_config.z).ToFloat32()}; + Math::Vec3 light_vector; if (light_config.config.directional) light_vector = position; @@ -175,11 +175,12 @@ std::tuple, Math::Vec4> ComputeFragmentsColors( if (!lighting.IsDistAttenDisabled(num)) { auto distance = (-view - position).Length(); float scale = Pica::float20::FromRaw(light_config.dist_atten_scale).ToFloat32(); - float bias = Pica::float20::FromRaw(light_config.dist_atten_scale).ToFloat32(); + float dist_aten_bias = + Pica::float20::FromRaw(light_config.dist_atten_scale).ToFloat32(); size_t lut = static_cast(LightingRegs::LightingSampler::DistanceAttenuation) + num; - float sample_loc = scale * distance + bias; + float sample_loc = scale * distance + dist_aten_bias; u8 lutindex = static_cast(MathUtil::Clamp(std::floor(sample_loc * 256.f), 0.0f, 255.0f)); @@ -238,7 +239,7 @@ std::tuple, Math::Vec4> ComputeFragmentsColors( return {lutindex, delta}; } else { float flr = std::floor(result * 128.f); - s8 lutindex = static_cast(MathUtil::Clamp(flr, -128.0f, 127.0f)); + s8 lutindex = static_cast(MathUtil::Clamp(flr, -128.0f, 127.0f)); float delta = result * 128.f - lutindex; return {static_cast(lutindex), delta}; } -- cgit v1.2.3 From 37ac2b6657002e19d78cbc97841f8d3eee6ac5b8 Mon Sep 17 00:00:00 2001 From: Subv Date: Tue, 13 Jun 2017 12:53:50 -0500 Subject: SwRasterizer/Lighting: Fixed a bug where the distance attenuation bias was being set to the dist atten scale. --- src/video_core/swrasterizer/rasterizer.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/swrasterizer/rasterizer.cpp b/src/video_core/swrasterizer/rasterizer.cpp index 54af53bbd..48ed8ccbf 100644 --- a/src/video_core/swrasterizer/rasterizer.cpp +++ b/src/video_core/swrasterizer/rasterizer.cpp @@ -175,12 +175,11 @@ std::tuple, Math::Vec4> ComputeFragmentsColors( if (!lighting.IsDistAttenDisabled(num)) { auto distance = (-view - position).Length(); float scale = Pica::float20::FromRaw(light_config.dist_atten_scale).ToFloat32(); - float dist_aten_bias = - Pica::float20::FromRaw(light_config.dist_atten_scale).ToFloat32(); + float bias = Pica::float20::FromRaw(light_config.dist_atten_bias).ToFloat32(); size_t lut = static_cast(LightingRegs::LightingSampler::DistanceAttenuation) + num; - float sample_loc = scale * distance + dist_aten_bias; + float sample_loc = scale * distance + bias; u8 lutindex = static_cast(MathUtil::Clamp(std::floor(sample_loc * 256.f), 0.0f, 255.0f)); -- cgit v1.2.3 From 7bc467e8725c6751eb44ea45ff2203af8692cda1 Mon Sep 17 00:00:00 2001 From: Subv Date: Wed, 28 Jun 2017 12:34:16 -0500 Subject: SwRasterizer/Lighting: Do not use global state in LookupLightingLut. --- src/video_core/pica_state.h | 2 +- src/video_core/swrasterizer/rasterizer.cpp | 33 +++++++++++++++++++----------- 2 files changed, 22 insertions(+), 13 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/pica_state.h b/src/video_core/pica_state.h index 2d23d34e6..864a2c9e6 100644 --- a/src/video_core/pica_state.h +++ b/src/video_core/pica_state.h @@ -79,7 +79,7 @@ struct State { std::array color_diff_table; } proctex; - struct { + struct Lighting { union LutEntry { // Used for raw access u32 raw; diff --git a/src/video_core/swrasterizer/rasterizer.cpp b/src/video_core/swrasterizer/rasterizer.cpp index 48ed8ccbf..b69f7b692 100644 --- a/src/video_core/swrasterizer/rasterizer.cpp +++ b/src/video_core/swrasterizer/rasterizer.cpp @@ -115,12 +115,15 @@ static std::tuple ConvertCubeCoord(float24 u, float24 v return std::make_tuple(x / z * half + half, y / z * half + half, addr); } -float LookupLightingLut(size_t lut_index, u8 index, float delta) { - ASSERT_MSG(lut_index < g_state.lighting.luts.size(), "Out of range lut"); - ASSERT_MSG(index < g_state.lighting.luts[0].size(), "Out of range index"); +static float LookupLightingLut(const Pica::State::Lighting& lighting, size_t lut_index, u8 index, + float delta) { + ASSERT_MSG(lut_index < lighting.luts.size(), "Out of range lut"); + ASSERT_MSG(index < lighting.luts[0].size(), "Out of range index"); - float lut_value = g_state.lighting.luts[lut_index][index].ToFloat(); - float lut_diff = g_state.lighting.luts[lut_index][index].DiffToFloat(); + const auto& lut = lighting.luts[lut_index][index]; + + float lut_value = lut.ToFloat(); + float lut_diff = lut.DiffToFloat(); return lut_value + lut_diff * delta; } @@ -184,7 +187,7 @@ std::tuple, Math::Vec4> ComputeFragmentsColors( u8 lutindex = static_cast(MathUtil::Clamp(std::floor(sample_loc * 256.f), 0.0f, 255.0f)); float delta = sample_loc * 256 - lutindex; - dist_atten = LookupLightingLut(lut, lutindex, delta); + dist_atten = LookupLightingLut(g_state.lighting, lut, lutindex, delta); } float clamp_highlights = 1.0f; @@ -260,7 +263,8 @@ std::tuple, Math::Vec4> ComputeFragmentsColors( d0_lut_value = scale * - LookupLightingLut(static_cast(LightingRegs::LightingSampler::Distribution0), + LookupLightingLut(g_state.lighting, + static_cast(LightingRegs::LightingSampler::Distribution0), index, delta); } @@ -280,7 +284,8 @@ std::tuple, Math::Vec4> ComputeFragmentsColors( refl_value.x = scale * - LookupLightingLut(static_cast(LightingRegs::LightingSampler::ReflectRed), + LookupLightingLut(g_state.lighting, + static_cast(LightingRegs::LightingSampler::ReflectRed), index, delta); } else { refl_value.x = 1.0f; @@ -300,7 +305,8 @@ std::tuple, Math::Vec4> ComputeFragmentsColors( refl_value.y = scale * - LookupLightingLut(static_cast(LightingRegs::LightingSampler::ReflectGreen), + LookupLightingLut(g_state.lighting, + static_cast(LightingRegs::LightingSampler::ReflectGreen), index, delta); } else { refl_value.y = refl_value.x; @@ -320,7 +326,8 @@ std::tuple, Math::Vec4> ComputeFragmentsColors( refl_value.z = scale * - LookupLightingLut(static_cast(LightingRegs::LightingSampler::ReflectBlue), + LookupLightingLut(g_state.lighting, + static_cast(LightingRegs::LightingSampler::ReflectBlue), index, delta); } else { refl_value.z = refl_value.x; @@ -341,7 +348,8 @@ std::tuple, Math::Vec4> ComputeFragmentsColors( d1_lut_value = scale * - LookupLightingLut(static_cast(LightingRegs::LightingSampler::Distribution1), + LookupLightingLut(g_state.lighting, + static_cast(LightingRegs::LightingSampler::Distribution1), index, delta); } @@ -362,7 +370,8 @@ std::tuple, Math::Vec4> ComputeFragmentsColors( float lut_value = scale * - LookupLightingLut(static_cast(LightingRegs::LightingSampler::Fresnel), + LookupLightingLut(g_state.lighting, + static_cast(LightingRegs::LightingSampler::Fresnel), index, delta); // Enabled for diffuse lighting alpha component -- cgit v1.2.3 From b8229a768434ab9b47123359669761c05ecdd6b0 Mon Sep 17 00:00:00 2001 From: Subv Date: Wed, 28 Jun 2017 12:35:35 -0500 Subject: SwRasterizer/Lighting: Do not use global registers state in ComputeFragmentsColors. --- src/video_core/swrasterizer/rasterizer.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/swrasterizer/rasterizer.cpp b/src/video_core/swrasterizer/rasterizer.cpp index b69f7b692..d2d77e8b0 100644 --- a/src/video_core/swrasterizer/rasterizer.cpp +++ b/src/video_core/swrasterizer/rasterizer.cpp @@ -129,8 +129,8 @@ static float LookupLightingLut(const Pica::State::Lighting& lighting, size_t lut } std::tuple, Math::Vec4> ComputeFragmentsColors( - const Math::Quaternion& normquat, const Math::Vec3& view) { - const auto& lighting = g_state.regs.lighting; + const Pica::LightingRegs& lighting, const Math::Quaternion& normquat, + const Math::Vec3& view) { if (lighting.disable) return {Math::MakeVec(0, 0, 0, 0), Math::MakeVec(0, 0, 0, 0)}; @@ -732,7 +732,7 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve Math::Vec4 secondary_fragment_color; std::tie(primary_fragment_color, secondary_fragment_color) = - ComputeFragmentsColors(normquat, fragment_position); + ComputeFragmentsColors(g_state.regs.lighting, normquat, fragment_position); for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); ++tev_stage_index) { -- cgit v1.2.3 From 7526af5e52ac1e24512faa1cd8f1a169407689fb Mon Sep 17 00:00:00 2001 From: Subv Date: Wed, 28 Jun 2017 12:37:14 -0500 Subject: SwRasterizer/Lighting: Move the lighting enable check outside the ComputeFragmentsColors function. --- src/video_core/swrasterizer/rasterizer.cpp | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/swrasterizer/rasterizer.cpp b/src/video_core/swrasterizer/rasterizer.cpp index d2d77e8b0..b2d2b6ef2 100644 --- a/src/video_core/swrasterizer/rasterizer.cpp +++ b/src/video_core/swrasterizer/rasterizer.cpp @@ -132,9 +132,6 @@ std::tuple, Math::Vec4> ComputeFragmentsColors( const Pica::LightingRegs& lighting, const Math::Quaternion& normquat, const Math::Vec3& view) { - if (lighting.disable) - return {Math::MakeVec(0, 0, 0, 0), Math::MakeVec(0, 0, 0, 0)}; - // TODO(Subv): Bump mapping Math::Vec3 surface_normal = {0.0f, 0.0f, 1.0f}; @@ -728,11 +725,13 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve regs.texturing.tev_combiner_buffer_color.a, }; - Math::Vec4 primary_fragment_color; - Math::Vec4 secondary_fragment_color; + Math::Vec4 primary_fragment_color = {0, 0, 0, 0}; + Math::Vec4 secondary_fragment_color = {0, 0, 0, 0}; - std::tie(primary_fragment_color, secondary_fragment_color) = - ComputeFragmentsColors(g_state.regs.lighting, normquat, fragment_position); + if (!g_state.regs.lighting.disable) { + std::tie(primary_fragment_color, secondary_fragment_color) = + ComputeFragmentsColors(g_state.regs.lighting, normquat, fragment_position); + } for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); ++tev_stage_index) { -- cgit v1.2.3 From 9906feefbd37ebfd658fecc47e960f23adc6b190 Mon Sep 17 00:00:00 2001 From: Subv Date: Wed, 28 Jun 2017 12:43:00 -0500 Subject: SwRasterizer/Lighting: Move the clamp highlight calculation to the end of the per-light loop body. --- src/video_core/swrasterizer/rasterizer.cpp | 34 +++++++++++++++--------------- 1 file changed, 17 insertions(+), 17 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/swrasterizer/rasterizer.cpp b/src/video_core/swrasterizer/rasterizer.cpp index b2d2b6ef2..2c7a1a815 100644 --- a/src/video_core/swrasterizer/rasterizer.cpp +++ b/src/video_core/swrasterizer/rasterizer.cpp @@ -163,14 +163,6 @@ std::tuple, Math::Vec4> ComputeFragmentsColors( light_vector.Normalize(); - auto LV_N = Math::Dot(light_vector, normal); - auto dot_product = LV_N; - - if (light_config.config.two_sided_diffuse) - dot_product = std::abs(dot_product); - else - dot_product = std::max(dot_product, 0.0f); - float dist_atten = 1.0f; if (!lighting.IsDistAttenDisabled(num)) { auto distance = (-view - position).Length(); @@ -187,15 +179,6 @@ std::tuple, Math::Vec4> ComputeFragmentsColors( dist_atten = LookupLightingLut(g_state.lighting, lut, lutindex, delta); } - float clamp_highlights = 1.0f; - - if (lighting.config0.clamp_highlights) { - if (LV_N <= 0.f) - clamp_highlights = 0.f; - else - clamp_highlights = 1.f; - } - auto GetLutIndex = [&](unsigned num, LightingRegs::LightingLutInput input, bool abs) -> std::tuple { @@ -386,6 +369,23 @@ std::tuple, Math::Vec4> ComputeFragmentsColors( } } + auto dot_product = Math::Dot(light_vector, normal); + + // Calculate clamp highlights before applying the two-sided diffuse configuration to the dot + // product. + float clamp_highlights = 1.0f; + if (lighting.config0.clamp_highlights) { + if (dot_product <= 0.f) + clamp_highlights = 0.f; + else + clamp_highlights = 1.f; + } + + if (light_config.config.two_sided_diffuse) + dot_product = std::abs(dot_product); + else + dot_product = std::max(dot_product, 0.0f); + auto diffuse = light_config.diffuse.ToVec3f() * dot_product + light_config.ambient.ToVec3f(); diffuse_sum += Math::MakeVec(diffuse * dist_atten, 0.0f); -- cgit v1.2.3 From efc655aec00d43d53c41b55d9a94d17ce81e5942 Mon Sep 17 00:00:00 2001 From: wwylele Date: Tue, 11 Jul 2017 20:06:26 +0300 Subject: SwRasterizer/Lighting: pass lighting state as parameter --- src/video_core/swrasterizer/rasterizer.cpp | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/swrasterizer/rasterizer.cpp b/src/video_core/swrasterizer/rasterizer.cpp index 2c7a1a815..b108a0f86 100644 --- a/src/video_core/swrasterizer/rasterizer.cpp +++ b/src/video_core/swrasterizer/rasterizer.cpp @@ -116,7 +116,7 @@ static std::tuple ConvertCubeCoord(float24 u, float24 v } static float LookupLightingLut(const Pica::State::Lighting& lighting, size_t lut_index, u8 index, - float delta) { + float delta) { ASSERT_MSG(lut_index < lighting.luts.size(), "Out of range lut"); ASSERT_MSG(index < lighting.luts[0].size(), "Out of range index"); @@ -129,8 +129,8 @@ static float LookupLightingLut(const Pica::State::Lighting& lighting, size_t lut } std::tuple, Math::Vec4> ComputeFragmentsColors( - const Pica::LightingRegs& lighting, const Math::Quaternion& normquat, - const Math::Vec3& view) { + const Pica::LightingRegs& lighting, const Pica::State::Lighting& lighting_state, + const Math::Quaternion& normquat, const Math::Vec3& view) { // TODO(Subv): Bump mapping Math::Vec3 surface_normal = {0.0f, 0.0f, 1.0f}; @@ -148,7 +148,7 @@ std::tuple, Math::Vec4> ComputeFragmentsColors( for (unsigned light_index = 0; light_index <= lighting.max_light_index; ++light_index) { unsigned num = lighting.light_enable.GetNum(light_index); - const auto& light_config = g_state.regs.lighting.light[num]; + const auto& light_config = lighting.light[num]; Math::Vec3 refl_value = {}; Math::Vec3 position = {float16::FromRaw(light_config.x).ToFloat32(), @@ -176,7 +176,7 @@ std::tuple, Math::Vec4> ComputeFragmentsColors( u8 lutindex = static_cast(MathUtil::Clamp(std::floor(sample_loc * 256.f), 0.0f, 255.0f)); float delta = sample_loc * 256 - lutindex; - dist_atten = LookupLightingLut(g_state.lighting, lut, lutindex, delta); + dist_atten = LookupLightingLut(lighting_state, lut, lutindex, delta); } auto GetLutIndex = [&](unsigned num, LightingRegs::LightingLutInput input, @@ -243,7 +243,7 @@ std::tuple, Math::Vec4> ComputeFragmentsColors( d0_lut_value = scale * - LookupLightingLut(g_state.lighting, + LookupLightingLut(lighting_state, static_cast(LightingRegs::LightingSampler::Distribution0), index, delta); } @@ -264,7 +264,7 @@ std::tuple, Math::Vec4> ComputeFragmentsColors( refl_value.x = scale * - LookupLightingLut(g_state.lighting, + LookupLightingLut(lighting_state, static_cast(LightingRegs::LightingSampler::ReflectRed), index, delta); } else { @@ -285,7 +285,7 @@ std::tuple, Math::Vec4> ComputeFragmentsColors( refl_value.y = scale * - LookupLightingLut(g_state.lighting, + LookupLightingLut(lighting_state, static_cast(LightingRegs::LightingSampler::ReflectGreen), index, delta); } else { @@ -306,7 +306,7 @@ std::tuple, Math::Vec4> ComputeFragmentsColors( refl_value.z = scale * - LookupLightingLut(g_state.lighting, + LookupLightingLut(lighting_state, static_cast(LightingRegs::LightingSampler::ReflectBlue), index, delta); } else { @@ -328,7 +328,7 @@ std::tuple, Math::Vec4> ComputeFragmentsColors( d1_lut_value = scale * - LookupLightingLut(g_state.lighting, + LookupLightingLut(lighting_state, static_cast(LightingRegs::LightingSampler::Distribution1), index, delta); } @@ -350,7 +350,7 @@ std::tuple, Math::Vec4> ComputeFragmentsColors( float lut_value = scale * - LookupLightingLut(g_state.lighting, + LookupLightingLut(lighting_state, static_cast(LightingRegs::LightingSampler::Fresnel), index, delta); @@ -729,8 +729,8 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve Math::Vec4 secondary_fragment_color = {0, 0, 0, 0}; if (!g_state.regs.lighting.disable) { - std::tie(primary_fragment_color, secondary_fragment_color) = - ComputeFragmentsColors(g_state.regs.lighting, normquat, fragment_position); + std::tie(primary_fragment_color, secondary_fragment_color) = ComputeFragmentsColors( + g_state.regs.lighting, g_state.lighting, normquat, fragment_position); } for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); -- cgit v1.2.3 From f13cf506e0b0e42e6c9b00b163aaabc3b63fb7ea Mon Sep 17 00:00:00 2001 From: wwylele Date: Tue, 11 Jul 2017 20:15:23 +0300 Subject: SwRasterizer: only interpolate quat and view when lighting is enabled --- src/video_core/swrasterizer/rasterizer.cpp | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/swrasterizer/rasterizer.cpp b/src/video_core/swrasterizer/rasterizer.cpp index b108a0f86..5844c401c 100644 --- a/src/video_core/swrasterizer/rasterizer.cpp +++ b/src/video_core/swrasterizer/rasterizer.cpp @@ -599,18 +599,6 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve 255), }; - Math::Quaternion normquat{ - {GetInterpolatedAttribute(v0.quat.x, v1.quat.x, v2.quat.x).ToFloat32(), - GetInterpolatedAttribute(v0.quat.y, v1.quat.y, v2.quat.y).ToFloat32(), - GetInterpolatedAttribute(v0.quat.z, v1.quat.z, v2.quat.z).ToFloat32()}, - GetInterpolatedAttribute(v0.quat.w, v1.quat.w, v2.quat.w).ToFloat32(), - }; - - Math::Vec3 fragment_position{ - GetInterpolatedAttribute(v0.view.x, v1.view.x, v2.view.x).ToFloat32(), - GetInterpolatedAttribute(v0.view.y, v1.view.y, v2.view.y).ToFloat32(), - GetInterpolatedAttribute(v0.view.z, v1.view.z, v2.view.z).ToFloat32()}; - Math::Vec2 uv[3]; uv[0].u() = GetInterpolatedAttribute(v0.tc0.u(), v1.tc0.u(), v2.tc0.u()); uv[0].v() = GetInterpolatedAttribute(v0.tc0.v(), v1.tc0.v(), v2.tc0.v()); @@ -729,8 +717,20 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve Math::Vec4 secondary_fragment_color = {0, 0, 0, 0}; if (!g_state.regs.lighting.disable) { - std::tie(primary_fragment_color, secondary_fragment_color) = ComputeFragmentsColors( - g_state.regs.lighting, g_state.lighting, normquat, fragment_position); + Math::Quaternion normquat{ + {GetInterpolatedAttribute(v0.quat.x, v1.quat.x, v2.quat.x).ToFloat32(), + GetInterpolatedAttribute(v0.quat.y, v1.quat.y, v2.quat.y).ToFloat32(), + GetInterpolatedAttribute(v0.quat.z, v1.quat.z, v2.quat.z).ToFloat32()}, + GetInterpolatedAttribute(v0.quat.w, v1.quat.w, v2.quat.w).ToFloat32(), + }; + + Math::Vec3 view{ + GetInterpolatedAttribute(v0.view.x, v1.view.x, v2.view.x).ToFloat32(), + GetInterpolatedAttribute(v0.view.y, v1.view.y, v2.view.y).ToFloat32(), + GetInterpolatedAttribute(v0.view.z, v1.view.z, v2.view.z).ToFloat32(), + }; + std::tie(primary_fragment_color, secondary_fragment_color) = + ComputeFragmentsColors(g_state.regs.lighting, g_state.lighting, normquat, view); } for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); -- cgit v1.2.3 From c6d1472513394cc55b5d5a852d5f76b5e9a51f2b Mon Sep 17 00:00:00 2001 From: wwylele Date: Tue, 11 Jul 2017 21:36:19 +0300 Subject: SwRasterizer/Lighting: refactor GetLutValue into a function. merging similar pattern. Also makes the code more similar to the gl one --- src/video_core/swrasterizer/rasterizer.cpp | 110 +++++++---------------------- 1 file changed, 27 insertions(+), 83 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/swrasterizer/rasterizer.cpp b/src/video_core/swrasterizer/rasterizer.cpp index 5844c401c..53c3bb585 100644 --- a/src/video_core/swrasterizer/rasterizer.cpp +++ b/src/video_core/swrasterizer/rasterizer.cpp @@ -179,9 +179,9 @@ std::tuple, Math::Vec4> ComputeFragmentsColors( dist_atten = LookupLightingLut(lighting_state, lut, lutindex, delta); } - auto GetLutIndex = [&](unsigned num, LightingRegs::LightingLutInput input, - bool abs) -> std::tuple { - + auto GetLutValue = [&](LightingRegs::LightingLutInput input, bool abs, + LightingRegs::LightingScale scale_enum, + LightingRegs::LightingSampler sampler) { Math::Vec3 norm_view = view.Normalized(); Math::Vec3 half_angle = (norm_view + light_vector).Normalized(); float result = 0.0f; @@ -209,6 +209,9 @@ std::tuple, Math::Vec4> ComputeFragmentsColors( result = 0.f; } + u8 index; + float delta; + if (abs) { if (light_config.config.two_sided_diffuse) result = std::abs(result); @@ -216,15 +219,18 @@ std::tuple, Math::Vec4> ComputeFragmentsColors( result = std::max(result, 0.0f); float flr = std::floor(result * 256.f); - u8 lutindex = static_cast(MathUtil::Clamp(flr, 0.0f, 255.0f)); - float delta = result * 256 - lutindex; - return {lutindex, delta}; + index = static_cast(MathUtil::Clamp(flr, 0.0f, 255.0f)); + delta = result * 256 - index; } else { float flr = std::floor(result * 128.f); - s8 lutindex = static_cast(MathUtil::Clamp(flr, -128.0f, 127.0f)); - float delta = result * 128.f - lutindex; - return {static_cast(lutindex), delta}; + s8 signed_index = static_cast(MathUtil::Clamp(flr, -128.0f, 127.0f)); + delta = result * 128.f - signed_index; + index = static_cast(signed_index); } + + float scale = lighting.lut_scale.GetScale(scale_enum); + return scale * + LookupLightingLut(lighting_state, static_cast(sampler), index, delta); }; // Specular 0 component @@ -232,20 +238,9 @@ std::tuple, Math::Vec4> ComputeFragmentsColors( if (lighting.config1.disable_lut_d0 == 0 && LightingRegs::IsLightingSamplerSupported( lighting.config0.config, LightingRegs::LightingSampler::Distribution0)) { - - // Lookup specular "distribution 0" LUT value - u8 index; - float delta; - std::tie(index, delta) = GetLutIndex(num, lighting.lut_input.d0.Value(), - lighting.abs_lut_input.disable_d0 == 0); - - float scale = lighting.lut_scale.GetScale(lighting.lut_scale.d0); - d0_lut_value = - scale * - LookupLightingLut(lighting_state, - static_cast(LightingRegs::LightingSampler::Distribution0), - index, delta); + GetLutValue(lighting.lut_input.d0, lighting.abs_lut_input.disable_d0 == 0, + lighting.lut_scale.d0, LightingRegs::LightingSampler::Distribution0); } Math::Vec3 specular_0 = d0_lut_value * light_config.specular_0.ToVec3f(); @@ -254,19 +249,9 @@ std::tuple, Math::Vec4> ComputeFragmentsColors( if (lighting.config1.disable_lut_rr == 0 && LightingRegs::IsLightingSamplerSupported(lighting.config0.config, LightingRegs::LightingSampler::ReflectRed)) { - - u8 index; - float delta; - std::tie(index, delta) = - GetLutIndex(num, lighting.lut_input.rr, lighting.abs_lut_input.disable_rr == 0); - - float scale = lighting.lut_scale.GetScale(lighting.lut_scale.rr); - refl_value.x = - scale * - LookupLightingLut(lighting_state, - static_cast(LightingRegs::LightingSampler::ReflectRed), - index, delta); + GetLutValue(lighting.lut_input.rr, lighting.abs_lut_input.disable_rr == 0, + lighting.lut_scale.rr, LightingRegs::LightingSampler::ReflectRed); } else { refl_value.x = 1.0f; } @@ -275,19 +260,9 @@ std::tuple, Math::Vec4> ComputeFragmentsColors( if (lighting.config1.disable_lut_rg == 0 && LightingRegs::IsLightingSamplerSupported(lighting.config0.config, LightingRegs::LightingSampler::ReflectGreen)) { - - u8 index; - float delta; - std::tie(index, delta) = - GetLutIndex(num, lighting.lut_input.rg, lighting.abs_lut_input.disable_rg == 0); - - float scale = lighting.lut_scale.GetScale(lighting.lut_scale.rg); - refl_value.y = - scale * - LookupLightingLut(lighting_state, - static_cast(LightingRegs::LightingSampler::ReflectGreen), - index, delta); + GetLutValue(lighting.lut_input.rg, lighting.abs_lut_input.disable_rg == 0, + lighting.lut_scale.rg, LightingRegs::LightingSampler::ReflectGreen); } else { refl_value.y = refl_value.x; } @@ -296,19 +271,9 @@ std::tuple, Math::Vec4> ComputeFragmentsColors( if (lighting.config1.disable_lut_rb == 0 && LightingRegs::IsLightingSamplerSupported(lighting.config0.config, LightingRegs::LightingSampler::ReflectBlue)) { - - u8 index; - float delta; - std::tie(index, delta) = - GetLutIndex(num, lighting.lut_input.rb, lighting.abs_lut_input.disable_rb == 0); - - float scale = lighting.lut_scale.GetScale(lighting.lut_scale.rb); - refl_value.z = - scale * - LookupLightingLut(lighting_state, - static_cast(LightingRegs::LightingSampler::ReflectBlue), - index, delta); + GetLutValue(lighting.lut_input.rb, lighting.abs_lut_input.disable_rb == 0, + lighting.lut_scale.rb, LightingRegs::LightingSampler::ReflectBlue); } else { refl_value.z = refl_value.x; } @@ -317,20 +282,9 @@ std::tuple, Math::Vec4> ComputeFragmentsColors( if (lighting.config1.disable_lut_d1 == 0 && LightingRegs::IsLightingSamplerSupported( lighting.config0.config, LightingRegs::LightingSampler::Distribution1)) { - - // Lookup specular "distribution 1" LUT value - u8 index; - float delta; - std::tie(index, delta) = GetLutIndex(num, lighting.lut_input.d1.Value(), - lighting.abs_lut_input.disable_d1 == 0); - - float scale = lighting.lut_scale.GetScale(lighting.lut_scale.d1); - d1_lut_value = - scale * - LookupLightingLut(lighting_state, - static_cast(LightingRegs::LightingSampler::Distribution1), - index, delta); + GetLutValue(lighting.lut_input.d1, lighting.abs_lut_input.disable_d1 == 0, + lighting.lut_scale.d1, LightingRegs::LightingSampler::Distribution1); } Math::Vec3 specular_1 = @@ -340,19 +294,9 @@ std::tuple, Math::Vec4> ComputeFragmentsColors( LightingRegs::IsLightingSamplerSupported(lighting.config0.config, LightingRegs::LightingSampler::Fresnel)) { - // Lookup fresnel LUT value - u8 index; - float delta; - std::tie(index, delta) = GetLutIndex(num, lighting.lut_input.fr.Value(), - lighting.abs_lut_input.disable_fr == 0); - - float scale = lighting.lut_scale.GetScale(lighting.lut_scale.fr); - float lut_value = - scale * - LookupLightingLut(lighting_state, - static_cast(LightingRegs::LightingSampler::Fresnel), - index, delta); + GetLutValue(lighting.lut_input.fr, lighting.abs_lut_input.disable_fr == 0, + lighting.lut_scale.fr, LightingRegs::LightingSampler::Fresnel); // Enabled for diffuse lighting alpha component if (lighting.config0.fresnel_selector == -- cgit v1.2.3 From e415558a4fc471bc3ac2d22dd8052aeb63769c6e Mon Sep 17 00:00:00 2001 From: wwylele Date: Tue, 11 Jul 2017 21:47:55 +0300 Subject: SwRasterizer/Lighting: get rid of nested return --- src/video_core/swrasterizer/rasterizer.cpp | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/swrasterizer/rasterizer.cpp b/src/video_core/swrasterizer/rasterizer.cpp index 53c3bb585..e46790f85 100644 --- a/src/video_core/swrasterizer/rasterizer.cpp +++ b/src/video_core/swrasterizer/rasterizer.cpp @@ -340,16 +340,17 @@ std::tuple, Math::Vec4> ComputeFragmentsColors( diffuse_sum += Math::MakeVec(lighting.global_ambient.ToVec3f(), 0.0f); - return {Math::MakeVec(MathUtil::Clamp(diffuse_sum.x, 0.0f, 1.0f) * 255, - MathUtil::Clamp(diffuse_sum.y, 0.0f, 1.0f) * 255, - MathUtil::Clamp(diffuse_sum.z, 0.0f, 1.0f) * 255, - MathUtil::Clamp(diffuse_sum.w, 0.0f, 1.0f) * 255) - .Cast(), - Math::MakeVec(MathUtil::Clamp(specular_sum.x, 0.0f, 1.0f) * 255, - MathUtil::Clamp(specular_sum.y, 0.0f, 1.0f) * 255, - MathUtil::Clamp(specular_sum.z, 0.0f, 1.0f) * 255, - MathUtil::Clamp(specular_sum.w, 0.0f, 1.0f) * 255) - .Cast()}; + auto diffuse = Math::MakeVec(MathUtil::Clamp(diffuse_sum.x, 0.0f, 1.0f) * 255, + MathUtil::Clamp(diffuse_sum.y, 0.0f, 1.0f) * 255, + MathUtil::Clamp(diffuse_sum.z, 0.0f, 1.0f) * 255, + MathUtil::Clamp(diffuse_sum.w, 0.0f, 1.0f) * 255) + .Cast(); + auto specular = Math::MakeVec(MathUtil::Clamp(specular_sum.x, 0.0f, 1.0f) * 255, + MathUtil::Clamp(specular_sum.y, 0.0f, 1.0f) * 255, + MathUtil::Clamp(specular_sum.z, 0.0f, 1.0f) * 255, + MathUtil::Clamp(specular_sum.w, 0.0f, 1.0f) * 255) + .Cast(); + return {diffuse, specular}; } MICROPROFILE_DEFINE(GPU_Rasterization, "GPU", "Rasterization", MP_RGB(50, 50, 240)); -- cgit v1.2.3 From 56e5425e593e29aecf255c441791f2e24512f418 Mon Sep 17 00:00:00 2001 From: wwylele Date: Tue, 11 Jul 2017 22:07:19 +0300 Subject: SwRasterizer/Lighting: unify float suffix --- src/video_core/swrasterizer/rasterizer.cpp | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/swrasterizer/rasterizer.cpp b/src/video_core/swrasterizer/rasterizer.cpp index e46790f85..c83680629 100644 --- a/src/video_core/swrasterizer/rasterizer.cpp +++ b/src/video_core/swrasterizer/rasterizer.cpp @@ -143,8 +143,8 @@ std::tuple, Math::Vec4> ComputeFragmentsColors( // Use the normalized the quaternion when performing the rotation auto normal = Math::QuaternionRotate(normquat.Normalized(), surface_normal); - Math::Vec4 diffuse_sum = {0.f, 0.f, 0.f, 1.f}; - Math::Vec4 specular_sum = {0.f, 0.f, 0.f, 1.f}; + Math::Vec4 diffuse_sum = {0.0f, 0.0f, 0.0f, 1.0f}; + Math::Vec4 specular_sum = {0.0f, 0.0f, 0.0f, 1.0f}; for (unsigned light_index = 0; light_index <= lighting.max_light_index; ++light_index) { unsigned num = lighting.light_enable.GetNum(light_index); @@ -174,7 +174,7 @@ std::tuple, Math::Vec4> ComputeFragmentsColors( float sample_loc = scale * distance + bias; u8 lutindex = - static_cast(MathUtil::Clamp(std::floor(sample_loc * 256.f), 0.0f, 255.0f)); + static_cast(MathUtil::Clamp(std::floor(sample_loc * 256.0f), 0.0f, 255.0f)); float delta = sample_loc * 256 - lutindex; dist_atten = LookupLightingLut(lighting_state, lut, lutindex, delta); } @@ -206,7 +206,7 @@ std::tuple, Math::Vec4> ComputeFragmentsColors( default: LOG_CRITICAL(HW_GPU, "Unknown lighting LUT input %u\n", static_cast(input)); UNIMPLEMENTED(); - result = 0.f; + result = 0.0f; } u8 index; @@ -218,13 +218,13 @@ std::tuple, Math::Vec4> ComputeFragmentsColors( else result = std::max(result, 0.0f); - float flr = std::floor(result * 256.f); + float flr = std::floor(result * 256.0f); index = static_cast(MathUtil::Clamp(flr, 0.0f, 255.0f)); delta = result * 256 - index; } else { - float flr = std::floor(result * 128.f); + float flr = std::floor(result * 128.0f); s8 signed_index = static_cast(MathUtil::Clamp(flr, -128.0f, 127.0f)); - delta = result * 128.f - signed_index; + delta = result * 128.0f - signed_index; index = static_cast(signed_index); } @@ -278,6 +278,7 @@ std::tuple, Math::Vec4> ComputeFragmentsColors( refl_value.z = refl_value.x; } + // Specular 1 component float d1_lut_value = 1.0f; if (lighting.config1.disable_lut_d1 == 0 && LightingRegs::IsLightingSamplerSupported( @@ -290,6 +291,7 @@ std::tuple, Math::Vec4> ComputeFragmentsColors( Math::Vec3 specular_1 = d1_lut_value * refl_value * light_config.specular_1.ToVec3f(); + // Fresnel if (lighting.config1.disable_lut_fr == 0 && LightingRegs::IsLightingSamplerSupported(lighting.config0.config, LightingRegs::LightingSampler::Fresnel)) { @@ -319,10 +321,10 @@ std::tuple, Math::Vec4> ComputeFragmentsColors( // product. float clamp_highlights = 1.0f; if (lighting.config0.clamp_highlights) { - if (dot_product <= 0.f) - clamp_highlights = 0.f; + if (dot_product <= 0.0f) + clamp_highlights = 0.0f; else - clamp_highlights = 1.f; + clamp_highlights = 1.0f; } if (light_config.config.two_sided_diffuse) @@ -335,7 +337,7 @@ std::tuple, Math::Vec4> ComputeFragmentsColors( diffuse_sum += Math::MakeVec(diffuse * dist_atten, 0.0f); specular_sum += - Math::MakeVec((specular_0 + specular_1) * clamp_highlights * dist_atten, 0.f); + Math::MakeVec((specular_0 + specular_1) * clamp_highlights * dist_atten, 0.0f); } diffuse_sum += Math::MakeVec(lighting.global_ambient.ToVec3f(), 0.0f); -- cgit v1.2.3 From 4feff63ffaec4d62d5bdfc85968cc99298907767 Mon Sep 17 00:00:00 2001 From: wwylele Date: Tue, 11 Jul 2017 22:19:00 +0300 Subject: SwRasterizer/Lighting: dist atten lut input need to be clamp --- src/video_core/swrasterizer/rasterizer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/swrasterizer/rasterizer.cpp b/src/video_core/swrasterizer/rasterizer.cpp index c83680629..37d1313cf 100644 --- a/src/video_core/swrasterizer/rasterizer.cpp +++ b/src/video_core/swrasterizer/rasterizer.cpp @@ -171,7 +171,7 @@ std::tuple, Math::Vec4> ComputeFragmentsColors( size_t lut = static_cast(LightingRegs::LightingSampler::DistanceAttenuation) + num; - float sample_loc = scale * distance + bias; + float sample_loc = MathUtil::Clamp(scale * distance + bias, 0.0f, 1.0f); u8 lutindex = static_cast(MathUtil::Clamp(std::floor(sample_loc * 256.0f), 0.0f, 255.0f)); -- cgit v1.2.3 From c89f804a01ef4c54de6051c3ce8c70d7e66812b0 Mon Sep 17 00:00:00 2001 From: wwylele Date: Thu, 27 Jul 2017 13:48:27 +0300 Subject: pica/shader_interpreter: fix off-by-one in LOOP --- src/video_core/shader/shader_interpreter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp index aa1cec81f..206c0978a 100644 --- a/src/video_core/shader/shader_interpreter.cpp +++ b/src/video_core/shader/shader_interpreter.cpp @@ -631,7 +631,7 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData state.address_registers[2] = loop_param.y; Record(debug_data, iteration, loop_param); - call(program_counter + 1, instr.flow_control.dest_offset - program_counter + 1, + call(program_counter + 1, instr.flow_control.dest_offset - program_counter, instr.flow_control.dest_offset + 1, loop_param.x, loop_param.z); break; } -- cgit v1.2.3 From c59ed47608367de8cd5e4e6d58da02dee30810a9 Mon Sep 17 00:00:00 2001 From: wwylele Date: Wed, 2 Aug 2017 22:05:53 +0300 Subject: SwRasterizer/Lighting: move quaternion normalization to the caller --- src/video_core/swrasterizer/rasterizer.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/swrasterizer/rasterizer.cpp b/src/video_core/swrasterizer/rasterizer.cpp index 37d1313cf..80ecf72ec 100644 --- a/src/video_core/swrasterizer/rasterizer.cpp +++ b/src/video_core/swrasterizer/rasterizer.cpp @@ -141,7 +141,7 @@ std::tuple, Math::Vec4> ComputeFragmentsColors( } // Use the normalized the quaternion when performing the rotation - auto normal = Math::QuaternionRotate(normquat.Normalized(), surface_normal); + auto normal = Math::QuaternionRotate(normquat, surface_normal); Math::Vec4 diffuse_sum = {0.0f, 0.0f, 0.0f, 1.0f}; Math::Vec4 specular_sum = {0.0f, 0.0f, 0.0f, 1.0f}; @@ -664,12 +664,12 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve Math::Vec4 secondary_fragment_color = {0, 0, 0, 0}; if (!g_state.regs.lighting.disable) { - Math::Quaternion normquat{ + Math::Quaternion normquat = Math::Quaternion{ {GetInterpolatedAttribute(v0.quat.x, v1.quat.x, v2.quat.x).ToFloat32(), GetInterpolatedAttribute(v0.quat.y, v1.quat.y, v2.quat.y).ToFloat32(), GetInterpolatedAttribute(v0.quat.z, v1.quat.z, v2.quat.z).ToFloat32()}, GetInterpolatedAttribute(v0.quat.w, v1.quat.w, v2.quat.w).ToFloat32(), - }; + }.Normalized(); Math::Vec3 view{ GetInterpolatedAttribute(v0.view.x, v1.view.x, v2.view.x).ToFloat32(), -- cgit v1.2.3 From 48b410587116c92339d936ed3b1fd00aba38d6b5 Mon Sep 17 00:00:00 2001 From: wwylele Date: Wed, 2 Aug 2017 22:07:15 +0300 Subject: SwRasterizer/Lighting: reduce confusion --- src/video_core/swrasterizer/rasterizer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/swrasterizer/rasterizer.cpp b/src/video_core/swrasterizer/rasterizer.cpp index 80ecf72ec..aee630954 100644 --- a/src/video_core/swrasterizer/rasterizer.cpp +++ b/src/video_core/swrasterizer/rasterizer.cpp @@ -118,7 +118,7 @@ static std::tuple ConvertCubeCoord(float24 u, float24 v static float LookupLightingLut(const Pica::State::Lighting& lighting, size_t lut_index, u8 index, float delta) { ASSERT_MSG(lut_index < lighting.luts.size(), "Out of range lut"); - ASSERT_MSG(index < lighting.luts[0].size(), "Out of range index"); + ASSERT_MSG(index < lighting.luts[lut_index].size(), "Out of range index"); const auto& lut = lighting.luts[lut_index][index]; -- cgit v1.2.3 From eda28266fb1f0eb96a2096cadb41b62db3dc2d2e Mon Sep 17 00:00:00 2001 From: wwylele Date: Wed, 2 Aug 2017 22:20:40 +0300 Subject: SwRasterizer/Lighting: move to its own file --- src/video_core/CMakeLists.txt | 2 + src/video_core/swrasterizer/fragment_lighting.cpp | 250 ++++++++++++++++++++++ src/video_core/swrasterizer/fragment_lighting.h | 18 ++ src/video_core/swrasterizer/rasterizer.cpp | 241 +-------------------- 4 files changed, 271 insertions(+), 240 deletions(-) create mode 100644 src/video_core/swrasterizer/fragment_lighting.cpp create mode 100644 src/video_core/swrasterizer/fragment_lighting.h (limited to 'src/video_core') diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 0961a3251..b2280f2ef 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -14,6 +14,7 @@ set(SRCS shader/shader.cpp shader/shader_interpreter.cpp swrasterizer/clipper.cpp + swrasterizer/fragment_lighting.cpp swrasterizer/framebuffer.cpp swrasterizer/proctex.cpp swrasterizer/rasterizer.cpp @@ -54,6 +55,7 @@ set(HEADERS shader/shader.h shader/shader_interpreter.h swrasterizer/clipper.h + swrasterizer/fragment_lighting.h swrasterizer/framebuffer.h swrasterizer/proctex.h swrasterizer/rasterizer.h diff --git a/src/video_core/swrasterizer/fragment_lighting.cpp b/src/video_core/swrasterizer/fragment_lighting.cpp new file mode 100644 index 000000000..45a86b5cd --- /dev/null +++ b/src/video_core/swrasterizer/fragment_lighting.cpp @@ -0,0 +1,250 @@ +// Copyright 2017 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/math_util.h" +#include "video_core/swrasterizer/fragment_lighting.h" + +namespace Pica { + +static float LookupLightingLut(const Pica::State::Lighting& lighting, size_t lut_index, u8 index, + float delta) { + ASSERT_MSG(lut_index < lighting.luts.size(), "Out of range lut"); + ASSERT_MSG(index < lighting.luts[lut_index].size(), "Out of range index"); + + const auto& lut = lighting.luts[lut_index][index]; + + float lut_value = lut.ToFloat(); + float lut_diff = lut.DiffToFloat(); + + return lut_value + lut_diff * delta; +} + +std::tuple, Math::Vec4> ComputeFragmentsColors( + const Pica::LightingRegs& lighting, const Pica::State::Lighting& lighting_state, + const Math::Quaternion& normquat, const Math::Vec3& view) { + + // TODO(Subv): Bump mapping + Math::Vec3 surface_normal = {0.0f, 0.0f, 1.0f}; + + if (lighting.config0.bump_mode != LightingRegs::LightingBumpMode::None) { + LOG_CRITICAL(HW_GPU, "unimplemented bump mapping"); + UNIMPLEMENTED(); + } + + // Use the normalized the quaternion when performing the rotation + auto normal = Math::QuaternionRotate(normquat, surface_normal); + + Math::Vec4 diffuse_sum = {0.0f, 0.0f, 0.0f, 1.0f}; + Math::Vec4 specular_sum = {0.0f, 0.0f, 0.0f, 1.0f}; + + for (unsigned light_index = 0; light_index <= lighting.max_light_index; ++light_index) { + unsigned num = lighting.light_enable.GetNum(light_index); + const auto& light_config = lighting.light[num]; + + Math::Vec3 refl_value = {}; + Math::Vec3 position = {float16::FromRaw(light_config.x).ToFloat32(), + float16::FromRaw(light_config.y).ToFloat32(), + float16::FromRaw(light_config.z).ToFloat32()}; + Math::Vec3 light_vector; + + if (light_config.config.directional) + light_vector = position; + else + light_vector = position + view; + + light_vector.Normalize(); + + float dist_atten = 1.0f; + if (!lighting.IsDistAttenDisabled(num)) { + auto distance = (-view - position).Length(); + float scale = Pica::float20::FromRaw(light_config.dist_atten_scale).ToFloat32(); + float bias = Pica::float20::FromRaw(light_config.dist_atten_bias).ToFloat32(); + size_t lut = + static_cast(LightingRegs::LightingSampler::DistanceAttenuation) + num; + + float sample_loc = MathUtil::Clamp(scale * distance + bias, 0.0f, 1.0f); + + u8 lutindex = + static_cast(MathUtil::Clamp(std::floor(sample_loc * 256.0f), 0.0f, 255.0f)); + float delta = sample_loc * 256 - lutindex; + dist_atten = LookupLightingLut(lighting_state, lut, lutindex, delta); + } + + auto GetLutValue = [&](LightingRegs::LightingLutInput input, bool abs, + LightingRegs::LightingScale scale_enum, + LightingRegs::LightingSampler sampler) { + Math::Vec3 norm_view = view.Normalized(); + Math::Vec3 half_angle = (norm_view + light_vector).Normalized(); + float result = 0.0f; + + switch (input) { + case LightingRegs::LightingLutInput::NH: + result = Math::Dot(normal, half_angle); + break; + + case LightingRegs::LightingLutInput::VH: + result = Math::Dot(norm_view, half_angle); + break; + + case LightingRegs::LightingLutInput::NV: + result = Math::Dot(normal, norm_view); + break; + + case LightingRegs::LightingLutInput::LN: + result = Math::Dot(light_vector, normal); + break; + + default: + LOG_CRITICAL(HW_GPU, "Unknown lighting LUT input %u\n", static_cast(input)); + UNIMPLEMENTED(); + result = 0.0f; + } + + u8 index; + float delta; + + if (abs) { + if (light_config.config.two_sided_diffuse) + result = std::abs(result); + else + result = std::max(result, 0.0f); + + float flr = std::floor(result * 256.0f); + index = static_cast(MathUtil::Clamp(flr, 0.0f, 255.0f)); + delta = result * 256 - index; + } else { + float flr = std::floor(result * 128.0f); + s8 signed_index = static_cast(MathUtil::Clamp(flr, -128.0f, 127.0f)); + delta = result * 128.0f - signed_index; + index = static_cast(signed_index); + } + + float scale = lighting.lut_scale.GetScale(scale_enum); + return scale * + LookupLightingLut(lighting_state, static_cast(sampler), index, delta); + }; + + // Specular 0 component + float d0_lut_value = 1.0f; + if (lighting.config1.disable_lut_d0 == 0 && + LightingRegs::IsLightingSamplerSupported( + lighting.config0.config, LightingRegs::LightingSampler::Distribution0)) { + d0_lut_value = + GetLutValue(lighting.lut_input.d0, lighting.abs_lut_input.disable_d0 == 0, + lighting.lut_scale.d0, LightingRegs::LightingSampler::Distribution0); + } + + Math::Vec3 specular_0 = d0_lut_value * light_config.specular_0.ToVec3f(); + + // If enabled, lookup ReflectRed value, otherwise, 1.0 is used + if (lighting.config1.disable_lut_rr == 0 && + LightingRegs::IsLightingSamplerSupported(lighting.config0.config, + LightingRegs::LightingSampler::ReflectRed)) { + refl_value.x = + GetLutValue(lighting.lut_input.rr, lighting.abs_lut_input.disable_rr == 0, + lighting.lut_scale.rr, LightingRegs::LightingSampler::ReflectRed); + } else { + refl_value.x = 1.0f; + } + + // If enabled, lookup ReflectGreen value, otherwise, ReflectRed value is used + if (lighting.config1.disable_lut_rg == 0 && + LightingRegs::IsLightingSamplerSupported(lighting.config0.config, + LightingRegs::LightingSampler::ReflectGreen)) { + refl_value.y = + GetLutValue(lighting.lut_input.rg, lighting.abs_lut_input.disable_rg == 0, + lighting.lut_scale.rg, LightingRegs::LightingSampler::ReflectGreen); + } else { + refl_value.y = refl_value.x; + } + + // If enabled, lookup ReflectBlue value, otherwise, ReflectRed value is used + if (lighting.config1.disable_lut_rb == 0 && + LightingRegs::IsLightingSamplerSupported(lighting.config0.config, + LightingRegs::LightingSampler::ReflectBlue)) { + refl_value.z = + GetLutValue(lighting.lut_input.rb, lighting.abs_lut_input.disable_rb == 0, + lighting.lut_scale.rb, LightingRegs::LightingSampler::ReflectBlue); + } else { + refl_value.z = refl_value.x; + } + + // Specular 1 component + float d1_lut_value = 1.0f; + if (lighting.config1.disable_lut_d1 == 0 && + LightingRegs::IsLightingSamplerSupported( + lighting.config0.config, LightingRegs::LightingSampler::Distribution1)) { + d1_lut_value = + GetLutValue(lighting.lut_input.d1, lighting.abs_lut_input.disable_d1 == 0, + lighting.lut_scale.d1, LightingRegs::LightingSampler::Distribution1); + } + + Math::Vec3 specular_1 = + d1_lut_value * refl_value * light_config.specular_1.ToVec3f(); + + // Fresnel + if (lighting.config1.disable_lut_fr == 0 && + LightingRegs::IsLightingSamplerSupported(lighting.config0.config, + LightingRegs::LightingSampler::Fresnel)) { + + float lut_value = + GetLutValue(lighting.lut_input.fr, lighting.abs_lut_input.disable_fr == 0, + lighting.lut_scale.fr, LightingRegs::LightingSampler::Fresnel); + + // Enabled for diffuse lighting alpha component + if (lighting.config0.fresnel_selector == + LightingRegs::LightingFresnelSelector::PrimaryAlpha || + lighting.config0.fresnel_selector == LightingRegs::LightingFresnelSelector::Both) { + diffuse_sum.a() *= lut_value; + } + + // Enabled for the specular lighting alpha component + if (lighting.config0.fresnel_selector == + LightingRegs::LightingFresnelSelector::SecondaryAlpha || + lighting.config0.fresnel_selector == LightingRegs::LightingFresnelSelector::Both) { + specular_sum.a() *= lut_value; + } + } + + auto dot_product = Math::Dot(light_vector, normal); + + // Calculate clamp highlights before applying the two-sided diffuse configuration to the dot + // product. + float clamp_highlights = 1.0f; + if (lighting.config0.clamp_highlights) { + if (dot_product <= 0.0f) + clamp_highlights = 0.0f; + else + clamp_highlights = 1.0f; + } + + if (light_config.config.two_sided_diffuse) + dot_product = std::abs(dot_product); + else + dot_product = std::max(dot_product, 0.0f); + + auto diffuse = + light_config.diffuse.ToVec3f() * dot_product + light_config.ambient.ToVec3f(); + diffuse_sum += Math::MakeVec(diffuse * dist_atten, 0.0f); + + specular_sum += + Math::MakeVec((specular_0 + specular_1) * clamp_highlights * dist_atten, 0.0f); + } + + diffuse_sum += Math::MakeVec(lighting.global_ambient.ToVec3f(), 0.0f); + + auto diffuse = Math::MakeVec(MathUtil::Clamp(diffuse_sum.x, 0.0f, 1.0f) * 255, + MathUtil::Clamp(diffuse_sum.y, 0.0f, 1.0f) * 255, + MathUtil::Clamp(diffuse_sum.z, 0.0f, 1.0f) * 255, + MathUtil::Clamp(diffuse_sum.w, 0.0f, 1.0f) * 255) + .Cast(); + auto specular = Math::MakeVec(MathUtil::Clamp(specular_sum.x, 0.0f, 1.0f) * 255, + MathUtil::Clamp(specular_sum.y, 0.0f, 1.0f) * 255, + MathUtil::Clamp(specular_sum.z, 0.0f, 1.0f) * 255, + MathUtil::Clamp(specular_sum.w, 0.0f, 1.0f) * 255) + .Cast(); + return {diffuse, specular}; +} + +} // namespace Pica diff --git a/src/video_core/swrasterizer/fragment_lighting.h b/src/video_core/swrasterizer/fragment_lighting.h new file mode 100644 index 000000000..438dca926 --- /dev/null +++ b/src/video_core/swrasterizer/fragment_lighting.h @@ -0,0 +1,18 @@ +// Copyright 2017 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include "common/quaternion.h" +#include "common/vector_math.h" +#include "video_core/pica_state.h" + +namespace Pica { + +std::tuple, Math::Vec4> ComputeFragmentsColors( + const Pica::LightingRegs& lighting, const Pica::State::Lighting& lighting_state, + const Math::Quaternion& normquat, const Math::Vec3& view); + +} // namespace Pica diff --git a/src/video_core/swrasterizer/rasterizer.cpp b/src/video_core/swrasterizer/rasterizer.cpp index aee630954..bc7e1c56c 100644 --- a/src/video_core/swrasterizer/rasterizer.cpp +++ b/src/video_core/swrasterizer/rasterizer.cpp @@ -24,6 +24,7 @@ #include "video_core/regs_rasterizer.h" #include "video_core/regs_texturing.h" #include "video_core/shader/shader.h" +#include "video_core/swrasterizer/fragment_lighting.h" #include "video_core/swrasterizer/framebuffer.h" #include "video_core/swrasterizer/proctex.h" #include "video_core/swrasterizer/rasterizer.h" @@ -115,246 +116,6 @@ static std::tuple ConvertCubeCoord(float24 u, float24 v return std::make_tuple(x / z * half + half, y / z * half + half, addr); } -static float LookupLightingLut(const Pica::State::Lighting& lighting, size_t lut_index, u8 index, - float delta) { - ASSERT_MSG(lut_index < lighting.luts.size(), "Out of range lut"); - ASSERT_MSG(index < lighting.luts[lut_index].size(), "Out of range index"); - - const auto& lut = lighting.luts[lut_index][index]; - - float lut_value = lut.ToFloat(); - float lut_diff = lut.DiffToFloat(); - - return lut_value + lut_diff * delta; -} - -std::tuple, Math::Vec4> ComputeFragmentsColors( - const Pica::LightingRegs& lighting, const Pica::State::Lighting& lighting_state, - const Math::Quaternion& normquat, const Math::Vec3& view) { - - // TODO(Subv): Bump mapping - Math::Vec3 surface_normal = {0.0f, 0.0f, 1.0f}; - - if (lighting.config0.bump_mode != LightingRegs::LightingBumpMode::None) { - LOG_CRITICAL(HW_GPU, "unimplemented bump mapping"); - UNIMPLEMENTED(); - } - - // Use the normalized the quaternion when performing the rotation - auto normal = Math::QuaternionRotate(normquat, surface_normal); - - Math::Vec4 diffuse_sum = {0.0f, 0.0f, 0.0f, 1.0f}; - Math::Vec4 specular_sum = {0.0f, 0.0f, 0.0f, 1.0f}; - - for (unsigned light_index = 0; light_index <= lighting.max_light_index; ++light_index) { - unsigned num = lighting.light_enable.GetNum(light_index); - const auto& light_config = lighting.light[num]; - - Math::Vec3 refl_value = {}; - Math::Vec3 position = {float16::FromRaw(light_config.x).ToFloat32(), - float16::FromRaw(light_config.y).ToFloat32(), - float16::FromRaw(light_config.z).ToFloat32()}; - Math::Vec3 light_vector; - - if (light_config.config.directional) - light_vector = position; - else - light_vector = position + view; - - light_vector.Normalize(); - - float dist_atten = 1.0f; - if (!lighting.IsDistAttenDisabled(num)) { - auto distance = (-view - position).Length(); - float scale = Pica::float20::FromRaw(light_config.dist_atten_scale).ToFloat32(); - float bias = Pica::float20::FromRaw(light_config.dist_atten_bias).ToFloat32(); - size_t lut = - static_cast(LightingRegs::LightingSampler::DistanceAttenuation) + num; - - float sample_loc = MathUtil::Clamp(scale * distance + bias, 0.0f, 1.0f); - - u8 lutindex = - static_cast(MathUtil::Clamp(std::floor(sample_loc * 256.0f), 0.0f, 255.0f)); - float delta = sample_loc * 256 - lutindex; - dist_atten = LookupLightingLut(lighting_state, lut, lutindex, delta); - } - - auto GetLutValue = [&](LightingRegs::LightingLutInput input, bool abs, - LightingRegs::LightingScale scale_enum, - LightingRegs::LightingSampler sampler) { - Math::Vec3 norm_view = view.Normalized(); - Math::Vec3 half_angle = (norm_view + light_vector).Normalized(); - float result = 0.0f; - - switch (input) { - case LightingRegs::LightingLutInput::NH: - result = Math::Dot(normal, half_angle); - break; - - case LightingRegs::LightingLutInput::VH: - result = Math::Dot(norm_view, half_angle); - break; - - case LightingRegs::LightingLutInput::NV: - result = Math::Dot(normal, norm_view); - break; - - case LightingRegs::LightingLutInput::LN: - result = Math::Dot(light_vector, normal); - break; - - default: - LOG_CRITICAL(HW_GPU, "Unknown lighting LUT input %u\n", static_cast(input)); - UNIMPLEMENTED(); - result = 0.0f; - } - - u8 index; - float delta; - - if (abs) { - if (light_config.config.two_sided_diffuse) - result = std::abs(result); - else - result = std::max(result, 0.0f); - - float flr = std::floor(result * 256.0f); - index = static_cast(MathUtil::Clamp(flr, 0.0f, 255.0f)); - delta = result * 256 - index; - } else { - float flr = std::floor(result * 128.0f); - s8 signed_index = static_cast(MathUtil::Clamp(flr, -128.0f, 127.0f)); - delta = result * 128.0f - signed_index; - index = static_cast(signed_index); - } - - float scale = lighting.lut_scale.GetScale(scale_enum); - return scale * - LookupLightingLut(lighting_state, static_cast(sampler), index, delta); - }; - - // Specular 0 component - float d0_lut_value = 1.0f; - if (lighting.config1.disable_lut_d0 == 0 && - LightingRegs::IsLightingSamplerSupported( - lighting.config0.config, LightingRegs::LightingSampler::Distribution0)) { - d0_lut_value = - GetLutValue(lighting.lut_input.d0, lighting.abs_lut_input.disable_d0 == 0, - lighting.lut_scale.d0, LightingRegs::LightingSampler::Distribution0); - } - - Math::Vec3 specular_0 = d0_lut_value * light_config.specular_0.ToVec3f(); - - // If enabled, lookup ReflectRed value, otherwise, 1.0 is used - if (lighting.config1.disable_lut_rr == 0 && - LightingRegs::IsLightingSamplerSupported(lighting.config0.config, - LightingRegs::LightingSampler::ReflectRed)) { - refl_value.x = - GetLutValue(lighting.lut_input.rr, lighting.abs_lut_input.disable_rr == 0, - lighting.lut_scale.rr, LightingRegs::LightingSampler::ReflectRed); - } else { - refl_value.x = 1.0f; - } - - // If enabled, lookup ReflectGreen value, otherwise, ReflectRed value is used - if (lighting.config1.disable_lut_rg == 0 && - LightingRegs::IsLightingSamplerSupported(lighting.config0.config, - LightingRegs::LightingSampler::ReflectGreen)) { - refl_value.y = - GetLutValue(lighting.lut_input.rg, lighting.abs_lut_input.disable_rg == 0, - lighting.lut_scale.rg, LightingRegs::LightingSampler::ReflectGreen); - } else { - refl_value.y = refl_value.x; - } - - // If enabled, lookup ReflectBlue value, otherwise, ReflectRed value is used - if (lighting.config1.disable_lut_rb == 0 && - LightingRegs::IsLightingSamplerSupported(lighting.config0.config, - LightingRegs::LightingSampler::ReflectBlue)) { - refl_value.z = - GetLutValue(lighting.lut_input.rb, lighting.abs_lut_input.disable_rb == 0, - lighting.lut_scale.rb, LightingRegs::LightingSampler::ReflectBlue); - } else { - refl_value.z = refl_value.x; - } - - // Specular 1 component - float d1_lut_value = 1.0f; - if (lighting.config1.disable_lut_d1 == 0 && - LightingRegs::IsLightingSamplerSupported( - lighting.config0.config, LightingRegs::LightingSampler::Distribution1)) { - d1_lut_value = - GetLutValue(lighting.lut_input.d1, lighting.abs_lut_input.disable_d1 == 0, - lighting.lut_scale.d1, LightingRegs::LightingSampler::Distribution1); - } - - Math::Vec3 specular_1 = - d1_lut_value * refl_value * light_config.specular_1.ToVec3f(); - - // Fresnel - if (lighting.config1.disable_lut_fr == 0 && - LightingRegs::IsLightingSamplerSupported(lighting.config0.config, - LightingRegs::LightingSampler::Fresnel)) { - - float lut_value = - GetLutValue(lighting.lut_input.fr, lighting.abs_lut_input.disable_fr == 0, - lighting.lut_scale.fr, LightingRegs::LightingSampler::Fresnel); - - // Enabled for diffuse lighting alpha component - if (lighting.config0.fresnel_selector == - LightingRegs::LightingFresnelSelector::PrimaryAlpha || - lighting.config0.fresnel_selector == LightingRegs::LightingFresnelSelector::Both) { - diffuse_sum.a() *= lut_value; - } - - // Enabled for the specular lighting alpha component - if (lighting.config0.fresnel_selector == - LightingRegs::LightingFresnelSelector::SecondaryAlpha || - lighting.config0.fresnel_selector == LightingRegs::LightingFresnelSelector::Both) { - specular_sum.a() *= lut_value; - } - } - - auto dot_product = Math::Dot(light_vector, normal); - - // Calculate clamp highlights before applying the two-sided diffuse configuration to the dot - // product. - float clamp_highlights = 1.0f; - if (lighting.config0.clamp_highlights) { - if (dot_product <= 0.0f) - clamp_highlights = 0.0f; - else - clamp_highlights = 1.0f; - } - - if (light_config.config.two_sided_diffuse) - dot_product = std::abs(dot_product); - else - dot_product = std::max(dot_product, 0.0f); - - auto diffuse = - light_config.diffuse.ToVec3f() * dot_product + light_config.ambient.ToVec3f(); - diffuse_sum += Math::MakeVec(diffuse * dist_atten, 0.0f); - - specular_sum += - Math::MakeVec((specular_0 + specular_1) * clamp_highlights * dist_atten, 0.0f); - } - - diffuse_sum += Math::MakeVec(lighting.global_ambient.ToVec3f(), 0.0f); - - auto diffuse = Math::MakeVec(MathUtil::Clamp(diffuse_sum.x, 0.0f, 1.0f) * 255, - MathUtil::Clamp(diffuse_sum.y, 0.0f, 1.0f) * 255, - MathUtil::Clamp(diffuse_sum.z, 0.0f, 1.0f) * 255, - MathUtil::Clamp(diffuse_sum.w, 0.0f, 1.0f) * 255) - .Cast(); - auto specular = Math::MakeVec(MathUtil::Clamp(specular_sum.x, 0.0f, 1.0f) * 255, - MathUtil::Clamp(specular_sum.y, 0.0f, 1.0f) * 255, - MathUtil::Clamp(specular_sum.z, 0.0f, 1.0f) * 255, - MathUtil::Clamp(specular_sum.w, 0.0f, 1.0f) * 255) - .Cast(); - return {diffuse, specular}; -} - MICROPROFILE_DEFINE(GPU_Rasterization, "GPU", "Rasterization", MP_RGB(50, 50, 240)); /** -- cgit v1.2.3 From 2252a63f8036cdf2612243271ce29e6104f82825 Mon Sep 17 00:00:00 2001 From: wwylele Date: Thu, 3 Aug 2017 12:01:31 +0300 Subject: SwRasterizer/Lighting: shorten file name --- src/video_core/CMakeLists.txt | 4 +- src/video_core/swrasterizer/fragment_lighting.cpp | 250 ---------------------- src/video_core/swrasterizer/fragment_lighting.h | 18 -- src/video_core/swrasterizer/lighting.cpp | 250 ++++++++++++++++++++++ src/video_core/swrasterizer/lighting.h | 18 ++ src/video_core/swrasterizer/rasterizer.cpp | 2 +- 6 files changed, 271 insertions(+), 271 deletions(-) delete mode 100644 src/video_core/swrasterizer/fragment_lighting.cpp delete mode 100644 src/video_core/swrasterizer/fragment_lighting.h create mode 100644 src/video_core/swrasterizer/lighting.cpp create mode 100644 src/video_core/swrasterizer/lighting.h (limited to 'src/video_core') diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index b2280f2ef..cffa4c952 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -14,8 +14,8 @@ set(SRCS shader/shader.cpp shader/shader_interpreter.cpp swrasterizer/clipper.cpp - swrasterizer/fragment_lighting.cpp swrasterizer/framebuffer.cpp + swrasterizer/lighting.cpp swrasterizer/proctex.cpp swrasterizer/rasterizer.cpp swrasterizer/swrasterizer.cpp @@ -55,8 +55,8 @@ set(HEADERS shader/shader.h shader/shader_interpreter.h swrasterizer/clipper.h - swrasterizer/fragment_lighting.h swrasterizer/framebuffer.h + swrasterizer/lighting.h swrasterizer/proctex.h swrasterizer/rasterizer.h swrasterizer/swrasterizer.h diff --git a/src/video_core/swrasterizer/fragment_lighting.cpp b/src/video_core/swrasterizer/fragment_lighting.cpp deleted file mode 100644 index 45a86b5cd..000000000 --- a/src/video_core/swrasterizer/fragment_lighting.cpp +++ /dev/null @@ -1,250 +0,0 @@ -// Copyright 2017 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/math_util.h" -#include "video_core/swrasterizer/fragment_lighting.h" - -namespace Pica { - -static float LookupLightingLut(const Pica::State::Lighting& lighting, size_t lut_index, u8 index, - float delta) { - ASSERT_MSG(lut_index < lighting.luts.size(), "Out of range lut"); - ASSERT_MSG(index < lighting.luts[lut_index].size(), "Out of range index"); - - const auto& lut = lighting.luts[lut_index][index]; - - float lut_value = lut.ToFloat(); - float lut_diff = lut.DiffToFloat(); - - return lut_value + lut_diff * delta; -} - -std::tuple, Math::Vec4> ComputeFragmentsColors( - const Pica::LightingRegs& lighting, const Pica::State::Lighting& lighting_state, - const Math::Quaternion& normquat, const Math::Vec3& view) { - - // TODO(Subv): Bump mapping - Math::Vec3 surface_normal = {0.0f, 0.0f, 1.0f}; - - if (lighting.config0.bump_mode != LightingRegs::LightingBumpMode::None) { - LOG_CRITICAL(HW_GPU, "unimplemented bump mapping"); - UNIMPLEMENTED(); - } - - // Use the normalized the quaternion when performing the rotation - auto normal = Math::QuaternionRotate(normquat, surface_normal); - - Math::Vec4 diffuse_sum = {0.0f, 0.0f, 0.0f, 1.0f}; - Math::Vec4 specular_sum = {0.0f, 0.0f, 0.0f, 1.0f}; - - for (unsigned light_index = 0; light_index <= lighting.max_light_index; ++light_index) { - unsigned num = lighting.light_enable.GetNum(light_index); - const auto& light_config = lighting.light[num]; - - Math::Vec3 refl_value = {}; - Math::Vec3 position = {float16::FromRaw(light_config.x).ToFloat32(), - float16::FromRaw(light_config.y).ToFloat32(), - float16::FromRaw(light_config.z).ToFloat32()}; - Math::Vec3 light_vector; - - if (light_config.config.directional) - light_vector = position; - else - light_vector = position + view; - - light_vector.Normalize(); - - float dist_atten = 1.0f; - if (!lighting.IsDistAttenDisabled(num)) { - auto distance = (-view - position).Length(); - float scale = Pica::float20::FromRaw(light_config.dist_atten_scale).ToFloat32(); - float bias = Pica::float20::FromRaw(light_config.dist_atten_bias).ToFloat32(); - size_t lut = - static_cast(LightingRegs::LightingSampler::DistanceAttenuation) + num; - - float sample_loc = MathUtil::Clamp(scale * distance + bias, 0.0f, 1.0f); - - u8 lutindex = - static_cast(MathUtil::Clamp(std::floor(sample_loc * 256.0f), 0.0f, 255.0f)); - float delta = sample_loc * 256 - lutindex; - dist_atten = LookupLightingLut(lighting_state, lut, lutindex, delta); - } - - auto GetLutValue = [&](LightingRegs::LightingLutInput input, bool abs, - LightingRegs::LightingScale scale_enum, - LightingRegs::LightingSampler sampler) { - Math::Vec3 norm_view = view.Normalized(); - Math::Vec3 half_angle = (norm_view + light_vector).Normalized(); - float result = 0.0f; - - switch (input) { - case LightingRegs::LightingLutInput::NH: - result = Math::Dot(normal, half_angle); - break; - - case LightingRegs::LightingLutInput::VH: - result = Math::Dot(norm_view, half_angle); - break; - - case LightingRegs::LightingLutInput::NV: - result = Math::Dot(normal, norm_view); - break; - - case LightingRegs::LightingLutInput::LN: - result = Math::Dot(light_vector, normal); - break; - - default: - LOG_CRITICAL(HW_GPU, "Unknown lighting LUT input %u\n", static_cast(input)); - UNIMPLEMENTED(); - result = 0.0f; - } - - u8 index; - float delta; - - if (abs) { - if (light_config.config.two_sided_diffuse) - result = std::abs(result); - else - result = std::max(result, 0.0f); - - float flr = std::floor(result * 256.0f); - index = static_cast(MathUtil::Clamp(flr, 0.0f, 255.0f)); - delta = result * 256 - index; - } else { - float flr = std::floor(result * 128.0f); - s8 signed_index = static_cast(MathUtil::Clamp(flr, -128.0f, 127.0f)); - delta = result * 128.0f - signed_index; - index = static_cast(signed_index); - } - - float scale = lighting.lut_scale.GetScale(scale_enum); - return scale * - LookupLightingLut(lighting_state, static_cast(sampler), index, delta); - }; - - // Specular 0 component - float d0_lut_value = 1.0f; - if (lighting.config1.disable_lut_d0 == 0 && - LightingRegs::IsLightingSamplerSupported( - lighting.config0.config, LightingRegs::LightingSampler::Distribution0)) { - d0_lut_value = - GetLutValue(lighting.lut_input.d0, lighting.abs_lut_input.disable_d0 == 0, - lighting.lut_scale.d0, LightingRegs::LightingSampler::Distribution0); - } - - Math::Vec3 specular_0 = d0_lut_value * light_config.specular_0.ToVec3f(); - - // If enabled, lookup ReflectRed value, otherwise, 1.0 is used - if (lighting.config1.disable_lut_rr == 0 && - LightingRegs::IsLightingSamplerSupported(lighting.config0.config, - LightingRegs::LightingSampler::ReflectRed)) { - refl_value.x = - GetLutValue(lighting.lut_input.rr, lighting.abs_lut_input.disable_rr == 0, - lighting.lut_scale.rr, LightingRegs::LightingSampler::ReflectRed); - } else { - refl_value.x = 1.0f; - } - - // If enabled, lookup ReflectGreen value, otherwise, ReflectRed value is used - if (lighting.config1.disable_lut_rg == 0 && - LightingRegs::IsLightingSamplerSupported(lighting.config0.config, - LightingRegs::LightingSampler::ReflectGreen)) { - refl_value.y = - GetLutValue(lighting.lut_input.rg, lighting.abs_lut_input.disable_rg == 0, - lighting.lut_scale.rg, LightingRegs::LightingSampler::ReflectGreen); - } else { - refl_value.y = refl_value.x; - } - - // If enabled, lookup ReflectBlue value, otherwise, ReflectRed value is used - if (lighting.config1.disable_lut_rb == 0 && - LightingRegs::IsLightingSamplerSupported(lighting.config0.config, - LightingRegs::LightingSampler::ReflectBlue)) { - refl_value.z = - GetLutValue(lighting.lut_input.rb, lighting.abs_lut_input.disable_rb == 0, - lighting.lut_scale.rb, LightingRegs::LightingSampler::ReflectBlue); - } else { - refl_value.z = refl_value.x; - } - - // Specular 1 component - float d1_lut_value = 1.0f; - if (lighting.config1.disable_lut_d1 == 0 && - LightingRegs::IsLightingSamplerSupported( - lighting.config0.config, LightingRegs::LightingSampler::Distribution1)) { - d1_lut_value = - GetLutValue(lighting.lut_input.d1, lighting.abs_lut_input.disable_d1 == 0, - lighting.lut_scale.d1, LightingRegs::LightingSampler::Distribution1); - } - - Math::Vec3 specular_1 = - d1_lut_value * refl_value * light_config.specular_1.ToVec3f(); - - // Fresnel - if (lighting.config1.disable_lut_fr == 0 && - LightingRegs::IsLightingSamplerSupported(lighting.config0.config, - LightingRegs::LightingSampler::Fresnel)) { - - float lut_value = - GetLutValue(lighting.lut_input.fr, lighting.abs_lut_input.disable_fr == 0, - lighting.lut_scale.fr, LightingRegs::LightingSampler::Fresnel); - - // Enabled for diffuse lighting alpha component - if (lighting.config0.fresnel_selector == - LightingRegs::LightingFresnelSelector::PrimaryAlpha || - lighting.config0.fresnel_selector == LightingRegs::LightingFresnelSelector::Both) { - diffuse_sum.a() *= lut_value; - } - - // Enabled for the specular lighting alpha component - if (lighting.config0.fresnel_selector == - LightingRegs::LightingFresnelSelector::SecondaryAlpha || - lighting.config0.fresnel_selector == LightingRegs::LightingFresnelSelector::Both) { - specular_sum.a() *= lut_value; - } - } - - auto dot_product = Math::Dot(light_vector, normal); - - // Calculate clamp highlights before applying the two-sided diffuse configuration to the dot - // product. - float clamp_highlights = 1.0f; - if (lighting.config0.clamp_highlights) { - if (dot_product <= 0.0f) - clamp_highlights = 0.0f; - else - clamp_highlights = 1.0f; - } - - if (light_config.config.two_sided_diffuse) - dot_product = std::abs(dot_product); - else - dot_product = std::max(dot_product, 0.0f); - - auto diffuse = - light_config.diffuse.ToVec3f() * dot_product + light_config.ambient.ToVec3f(); - diffuse_sum += Math::MakeVec(diffuse * dist_atten, 0.0f); - - specular_sum += - Math::MakeVec((specular_0 + specular_1) * clamp_highlights * dist_atten, 0.0f); - } - - diffuse_sum += Math::MakeVec(lighting.global_ambient.ToVec3f(), 0.0f); - - auto diffuse = Math::MakeVec(MathUtil::Clamp(diffuse_sum.x, 0.0f, 1.0f) * 255, - MathUtil::Clamp(diffuse_sum.y, 0.0f, 1.0f) * 255, - MathUtil::Clamp(diffuse_sum.z, 0.0f, 1.0f) * 255, - MathUtil::Clamp(diffuse_sum.w, 0.0f, 1.0f) * 255) - .Cast(); - auto specular = Math::MakeVec(MathUtil::Clamp(specular_sum.x, 0.0f, 1.0f) * 255, - MathUtil::Clamp(specular_sum.y, 0.0f, 1.0f) * 255, - MathUtil::Clamp(specular_sum.z, 0.0f, 1.0f) * 255, - MathUtil::Clamp(specular_sum.w, 0.0f, 1.0f) * 255) - .Cast(); - return {diffuse, specular}; -} - -} // namespace Pica diff --git a/src/video_core/swrasterizer/fragment_lighting.h b/src/video_core/swrasterizer/fragment_lighting.h deleted file mode 100644 index 438dca926..000000000 --- a/src/video_core/swrasterizer/fragment_lighting.h +++ /dev/null @@ -1,18 +0,0 @@ -// Copyright 2017 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include "common/quaternion.h" -#include "common/vector_math.h" -#include "video_core/pica_state.h" - -namespace Pica { - -std::tuple, Math::Vec4> ComputeFragmentsColors( - const Pica::LightingRegs& lighting, const Pica::State::Lighting& lighting_state, - const Math::Quaternion& normquat, const Math::Vec3& view); - -} // namespace Pica diff --git a/src/video_core/swrasterizer/lighting.cpp b/src/video_core/swrasterizer/lighting.cpp new file mode 100644 index 000000000..63088eee8 --- /dev/null +++ b/src/video_core/swrasterizer/lighting.cpp @@ -0,0 +1,250 @@ +// Copyright 2017 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/math_util.h" +#include "video_core/swrasterizer/lighting.h" + +namespace Pica { + +static float LookupLightingLut(const Pica::State::Lighting& lighting, size_t lut_index, u8 index, + float delta) { + ASSERT_MSG(lut_index < lighting.luts.size(), "Out of range lut"); + ASSERT_MSG(index < lighting.luts[lut_index].size(), "Out of range index"); + + const auto& lut = lighting.luts[lut_index][index]; + + float lut_value = lut.ToFloat(); + float lut_diff = lut.DiffToFloat(); + + return lut_value + lut_diff * delta; +} + +std::tuple, Math::Vec4> ComputeFragmentsColors( + const Pica::LightingRegs& lighting, const Pica::State::Lighting& lighting_state, + const Math::Quaternion& normquat, const Math::Vec3& view) { + + // TODO(Subv): Bump mapping + Math::Vec3 surface_normal = {0.0f, 0.0f, 1.0f}; + + if (lighting.config0.bump_mode != LightingRegs::LightingBumpMode::None) { + LOG_CRITICAL(HW_GPU, "unimplemented bump mapping"); + UNIMPLEMENTED(); + } + + // Use the normalized the quaternion when performing the rotation + auto normal = Math::QuaternionRotate(normquat, surface_normal); + + Math::Vec4 diffuse_sum = {0.0f, 0.0f, 0.0f, 1.0f}; + Math::Vec4 specular_sum = {0.0f, 0.0f, 0.0f, 1.0f}; + + for (unsigned light_index = 0; light_index <= lighting.max_light_index; ++light_index) { + unsigned num = lighting.light_enable.GetNum(light_index); + const auto& light_config = lighting.light[num]; + + Math::Vec3 refl_value = {}; + Math::Vec3 position = {float16::FromRaw(light_config.x).ToFloat32(), + float16::FromRaw(light_config.y).ToFloat32(), + float16::FromRaw(light_config.z).ToFloat32()}; + Math::Vec3 light_vector; + + if (light_config.config.directional) + light_vector = position; + else + light_vector = position + view; + + light_vector.Normalize(); + + float dist_atten = 1.0f; + if (!lighting.IsDistAttenDisabled(num)) { + auto distance = (-view - position).Length(); + float scale = Pica::float20::FromRaw(light_config.dist_atten_scale).ToFloat32(); + float bias = Pica::float20::FromRaw(light_config.dist_atten_bias).ToFloat32(); + size_t lut = + static_cast(LightingRegs::LightingSampler::DistanceAttenuation) + num; + + float sample_loc = MathUtil::Clamp(scale * distance + bias, 0.0f, 1.0f); + + u8 lutindex = + static_cast(MathUtil::Clamp(std::floor(sample_loc * 256.0f), 0.0f, 255.0f)); + float delta = sample_loc * 256 - lutindex; + dist_atten = LookupLightingLut(lighting_state, lut, lutindex, delta); + } + + auto GetLutValue = [&](LightingRegs::LightingLutInput input, bool abs, + LightingRegs::LightingScale scale_enum, + LightingRegs::LightingSampler sampler) { + Math::Vec3 norm_view = view.Normalized(); + Math::Vec3 half_angle = (norm_view + light_vector).Normalized(); + float result = 0.0f; + + switch (input) { + case LightingRegs::LightingLutInput::NH: + result = Math::Dot(normal, half_angle); + break; + + case LightingRegs::LightingLutInput::VH: + result = Math::Dot(norm_view, half_angle); + break; + + case LightingRegs::LightingLutInput::NV: + result = Math::Dot(normal, norm_view); + break; + + case LightingRegs::LightingLutInput::LN: + result = Math::Dot(light_vector, normal); + break; + + default: + LOG_CRITICAL(HW_GPU, "Unknown lighting LUT input %u\n", static_cast(input)); + UNIMPLEMENTED(); + result = 0.0f; + } + + u8 index; + float delta; + + if (abs) { + if (light_config.config.two_sided_diffuse) + result = std::abs(result); + else + result = std::max(result, 0.0f); + + float flr = std::floor(result * 256.0f); + index = static_cast(MathUtil::Clamp(flr, 0.0f, 255.0f)); + delta = result * 256 - index; + } else { + float flr = std::floor(result * 128.0f); + s8 signed_index = static_cast(MathUtil::Clamp(flr, -128.0f, 127.0f)); + delta = result * 128.0f - signed_index; + index = static_cast(signed_index); + } + + float scale = lighting.lut_scale.GetScale(scale_enum); + return scale * + LookupLightingLut(lighting_state, static_cast(sampler), index, delta); + }; + + // Specular 0 component + float d0_lut_value = 1.0f; + if (lighting.config1.disable_lut_d0 == 0 && + LightingRegs::IsLightingSamplerSupported( + lighting.config0.config, LightingRegs::LightingSampler::Distribution0)) { + d0_lut_value = + GetLutValue(lighting.lut_input.d0, lighting.abs_lut_input.disable_d0 == 0, + lighting.lut_scale.d0, LightingRegs::LightingSampler::Distribution0); + } + + Math::Vec3 specular_0 = d0_lut_value * light_config.specular_0.ToVec3f(); + + // If enabled, lookup ReflectRed value, otherwise, 1.0 is used + if (lighting.config1.disable_lut_rr == 0 && + LightingRegs::IsLightingSamplerSupported(lighting.config0.config, + LightingRegs::LightingSampler::ReflectRed)) { + refl_value.x = + GetLutValue(lighting.lut_input.rr, lighting.abs_lut_input.disable_rr == 0, + lighting.lut_scale.rr, LightingRegs::LightingSampler::ReflectRed); + } else { + refl_value.x = 1.0f; + } + + // If enabled, lookup ReflectGreen value, otherwise, ReflectRed value is used + if (lighting.config1.disable_lut_rg == 0 && + LightingRegs::IsLightingSamplerSupported(lighting.config0.config, + LightingRegs::LightingSampler::ReflectGreen)) { + refl_value.y = + GetLutValue(lighting.lut_input.rg, lighting.abs_lut_input.disable_rg == 0, + lighting.lut_scale.rg, LightingRegs::LightingSampler::ReflectGreen); + } else { + refl_value.y = refl_value.x; + } + + // If enabled, lookup ReflectBlue value, otherwise, ReflectRed value is used + if (lighting.config1.disable_lut_rb == 0 && + LightingRegs::IsLightingSamplerSupported(lighting.config0.config, + LightingRegs::LightingSampler::ReflectBlue)) { + refl_value.z = + GetLutValue(lighting.lut_input.rb, lighting.abs_lut_input.disable_rb == 0, + lighting.lut_scale.rb, LightingRegs::LightingSampler::ReflectBlue); + } else { + refl_value.z = refl_value.x; + } + + // Specular 1 component + float d1_lut_value = 1.0f; + if (lighting.config1.disable_lut_d1 == 0 && + LightingRegs::IsLightingSamplerSupported( + lighting.config0.config, LightingRegs::LightingSampler::Distribution1)) { + d1_lut_value = + GetLutValue(lighting.lut_input.d1, lighting.abs_lut_input.disable_d1 == 0, + lighting.lut_scale.d1, LightingRegs::LightingSampler::Distribution1); + } + + Math::Vec3 specular_1 = + d1_lut_value * refl_value * light_config.specular_1.ToVec3f(); + + // Fresnel + if (lighting.config1.disable_lut_fr == 0 && + LightingRegs::IsLightingSamplerSupported(lighting.config0.config, + LightingRegs::LightingSampler::Fresnel)) { + + float lut_value = + GetLutValue(lighting.lut_input.fr, lighting.abs_lut_input.disable_fr == 0, + lighting.lut_scale.fr, LightingRegs::LightingSampler::Fresnel); + + // Enabled for diffuse lighting alpha component + if (lighting.config0.fresnel_selector == + LightingRegs::LightingFresnelSelector::PrimaryAlpha || + lighting.config0.fresnel_selector == LightingRegs::LightingFresnelSelector::Both) { + diffuse_sum.a() *= lut_value; + } + + // Enabled for the specular lighting alpha component + if (lighting.config0.fresnel_selector == + LightingRegs::LightingFresnelSelector::SecondaryAlpha || + lighting.config0.fresnel_selector == LightingRegs::LightingFresnelSelector::Both) { + specular_sum.a() *= lut_value; + } + } + + auto dot_product = Math::Dot(light_vector, normal); + + // Calculate clamp highlights before applying the two-sided diffuse configuration to the dot + // product. + float clamp_highlights = 1.0f; + if (lighting.config0.clamp_highlights) { + if (dot_product <= 0.0f) + clamp_highlights = 0.0f; + else + clamp_highlights = 1.0f; + } + + if (light_config.config.two_sided_diffuse) + dot_product = std::abs(dot_product); + else + dot_product = std::max(dot_product, 0.0f); + + auto diffuse = + light_config.diffuse.ToVec3f() * dot_product + light_config.ambient.ToVec3f(); + diffuse_sum += Math::MakeVec(diffuse * dist_atten, 0.0f); + + specular_sum += + Math::MakeVec((specular_0 + specular_1) * clamp_highlights * dist_atten, 0.0f); + } + + diffuse_sum += Math::MakeVec(lighting.global_ambient.ToVec3f(), 0.0f); + + auto diffuse = Math::MakeVec(MathUtil::Clamp(diffuse_sum.x, 0.0f, 1.0f) * 255, + MathUtil::Clamp(diffuse_sum.y, 0.0f, 1.0f) * 255, + MathUtil::Clamp(diffuse_sum.z, 0.0f, 1.0f) * 255, + MathUtil::Clamp(diffuse_sum.w, 0.0f, 1.0f) * 255) + .Cast(); + auto specular = Math::MakeVec(MathUtil::Clamp(specular_sum.x, 0.0f, 1.0f) * 255, + MathUtil::Clamp(specular_sum.y, 0.0f, 1.0f) * 255, + MathUtil::Clamp(specular_sum.z, 0.0f, 1.0f) * 255, + MathUtil::Clamp(specular_sum.w, 0.0f, 1.0f) * 255) + .Cast(); + return {diffuse, specular}; +} + +} // namespace Pica diff --git a/src/video_core/swrasterizer/lighting.h b/src/video_core/swrasterizer/lighting.h new file mode 100644 index 000000000..438dca926 --- /dev/null +++ b/src/video_core/swrasterizer/lighting.h @@ -0,0 +1,18 @@ +// Copyright 2017 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include "common/quaternion.h" +#include "common/vector_math.h" +#include "video_core/pica_state.h" + +namespace Pica { + +std::tuple, Math::Vec4> ComputeFragmentsColors( + const Pica::LightingRegs& lighting, const Pica::State::Lighting& lighting_state, + const Math::Quaternion& normquat, const Math::Vec3& view); + +} // namespace Pica diff --git a/src/video_core/swrasterizer/rasterizer.cpp b/src/video_core/swrasterizer/rasterizer.cpp index bc7e1c56c..fdc1df199 100644 --- a/src/video_core/swrasterizer/rasterizer.cpp +++ b/src/video_core/swrasterizer/rasterizer.cpp @@ -24,8 +24,8 @@ #include "video_core/regs_rasterizer.h" #include "video_core/regs_texturing.h" #include "video_core/shader/shader.h" -#include "video_core/swrasterizer/fragment_lighting.h" #include "video_core/swrasterizer/framebuffer.h" +#include "video_core/swrasterizer/lighting.h" #include "video_core/swrasterizer/proctex.h" #include "video_core/swrasterizer/rasterizer.h" #include "video_core/swrasterizer/texturing.h" -- cgit v1.2.3 From baa24f4ea9d9c4d7c1bd60ba8a6fc188dfa9cc8f Mon Sep 17 00:00:00 2001 From: wwylele Date: Thu, 3 Aug 2017 01:40:42 +0300 Subject: pica: upload shared shader code to both unit --- src/video_core/command_processor.cpp | 62 +++++++++++++++++++++--------------- src/video_core/regs_pipeline.h | 9 +++++- 2 files changed, 45 insertions(+), 26 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 4633a1df1..f98ca3302 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -119,27 +119,6 @@ static void WriteUniformFloatReg(ShaderRegs& config, Shader::ShaderSetup& setup, } } -static void WriteProgramCode(ShaderRegs& config, Shader::ShaderSetup& setup, - unsigned max_program_code_length, u32 value) { - if (config.program.offset >= max_program_code_length) { - LOG_ERROR(HW_GPU, "Invalid %s program offset %d", GetShaderSetupTypeName(setup), - (int)config.program.offset); - } else { - setup.program_code[config.program.offset] = value; - config.program.offset++; - } -} - -static void WriteSwizzlePatterns(ShaderRegs& config, Shader::ShaderSetup& setup, u32 value) { - if (config.swizzle_patterns.offset >= setup.swizzle_data.size()) { - LOG_ERROR(HW_GPU, "Invalid %s swizzle pattern offset %d", GetShaderSetupTypeName(setup), - (int)config.swizzle_patterns.offset); - } else { - setup.swizzle_data[config.swizzle_patterns.offset] = value; - config.swizzle_patterns.offset++; - } -} - static void WritePicaReg(u32 id, u32 value, u32 mask) { auto& regs = g_state.regs; @@ -458,7 +437,13 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { case PICA_REG_INDEX_WORKAROUND(gs.program.set_word[5], 0x2a1): case PICA_REG_INDEX_WORKAROUND(gs.program.set_word[6], 0x2a2): case PICA_REG_INDEX_WORKAROUND(gs.program.set_word[7], 0x2a3): { - WriteProgramCode(g_state.regs.gs, g_state.gs, 4096, value); + u32& offset = g_state.regs.gs.program.offset; + if (offset >= 4096) { + LOG_ERROR(HW_GPU, "Invalid GS program offset %u", offset); + } else { + g_state.gs.program_code[offset] = value; + offset++; + } break; } @@ -470,11 +455,18 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { case PICA_REG_INDEX_WORKAROUND(gs.swizzle_patterns.set_word[5], 0x2ab): case PICA_REG_INDEX_WORKAROUND(gs.swizzle_patterns.set_word[6], 0x2ac): case PICA_REG_INDEX_WORKAROUND(gs.swizzle_patterns.set_word[7], 0x2ad): { - WriteSwizzlePatterns(g_state.regs.gs, g_state.gs, value); + u32& offset = g_state.regs.gs.swizzle_patterns.offset; + if (offset >= g_state.gs.swizzle_data.size()) { + LOG_ERROR(HW_GPU, "Invalid GS swizzle pattern offset %u", offset); + } else { + g_state.gs.swizzle_data[offset] = value; + offset++; + } break; } case PICA_REG_INDEX(vs.bool_uniforms): + // TODO (wwylele): does regs.pipeline.gs_unit_exclusive_configuration affect this? WriteUniformBoolReg(g_state.vs, g_state.regs.vs.bool_uniforms.Value()); break; @@ -482,6 +474,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[1], 0x2b2): case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[2], 0x2b3): case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[3], 0x2b4): { + // TODO (wwylele): does regs.pipeline.gs_unit_exclusive_configuration affect this? unsigned index = (id - PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[0], 0x2b1)); auto values = regs.vs.int_uniforms[index]; WriteUniformIntReg(g_state.vs, index, @@ -497,6 +490,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[5], 0x2c6): case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[6], 0x2c7): case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[7], 0x2c8): { + // TODO (wwylele): does regs.pipeline.gs_unit_exclusive_configuration affect this? WriteUniformFloatReg(g_state.regs.vs, g_state.vs, vs_float_regs_counter, vs_uniform_write_buffer, value); break; @@ -510,7 +504,16 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[5], 0x2d1): case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[6], 0x2d2): case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[7], 0x2d3): { - WriteProgramCode(g_state.regs.vs, g_state.vs, 512, value); + u32& offset = g_state.regs.vs.program.offset; + if (offset >= 512) { + LOG_ERROR(HW_GPU, "Invalid VS program offset %u", offset); + } else { + g_state.vs.program_code[offset] = value; + if (!g_state.regs.pipeline.gs_unit_exclusive_configuration) { + g_state.gs.program_code[offset] = value; + } + offset++; + } break; } @@ -522,7 +525,16 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[5], 0x2db): case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[6], 0x2dc): case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[7], 0x2dd): { - WriteSwizzlePatterns(g_state.regs.vs, g_state.vs, value); + u32& offset = g_state.regs.vs.swizzle_patterns.offset; + if (offset >= g_state.vs.swizzle_data.size()) { + LOG_ERROR(HW_GPU, "Invalid VS swizzle pattern offset %u", offset); + } else { + g_state.vs.swizzle_data[offset] = value; + if (!g_state.regs.pipeline.gs_unit_exclusive_configuration) { + g_state.gs.swizzle_data[offset] = value; + } + offset++; + } break; } diff --git a/src/video_core/regs_pipeline.h b/src/video_core/regs_pipeline.h index 31c747d77..8b6369297 100644 --- a/src/video_core/regs_pipeline.h +++ b/src/video_core/regs_pipeline.h @@ -202,7 +202,14 @@ struct PipelineRegs { /// Number of input attributes to the vertex shader minus 1 BitField<0, 4, u32> max_input_attrib_index; - INSERT_PADDING_WORDS(2); + INSERT_PADDING_WORDS(1); + + // The shader unit 3, which can be used for both vertex and geometry shader, gets its + // configuration depending on this register. If this is not set, unit 3 will share some + // configuration with other units. It is known that program code and swizzle pattern uploaded + // via regs.vs will be also uploaded to unit 3 if this is not set. Although very likely, it is + // still unclear whether uniforms and other configuration can be also shared. + BitField<0, 1, u32> gs_unit_exclusive_configuration; enum class GPUMode : u32 { Drawing = 0, -- cgit v1.2.3 From db309b2423a996cb792273080e73906b07f8b45b Mon Sep 17 00:00:00 2001 From: wwylele Date: Mon, 24 Jul 2017 14:13:33 +0300 Subject: pica/regs: layout geometry shader configuration regs All the register meanings are derived from ctrulib (3dbrew is outdated for most of them) --- src/video_core/regs_pipeline.h | 34 ++++++++++++++++++++++++++++++++-- src/video_core/regs_shader.h | 7 +++++++ 2 files changed, 39 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/regs_pipeline.h b/src/video_core/regs_pipeline.h index 8b6369297..e78c3e331 100644 --- a/src/video_core/regs_pipeline.h +++ b/src/video_core/regs_pipeline.h @@ -147,7 +147,15 @@ struct PipelineRegs { // Number of vertices to render u32 num_vertices; - INSERT_PADDING_WORDS(0x1); + enum class UseGS : u32 { + No = 0, + Yes = 2, + }; + + union { + BitField<0, 2, UseGS> use_gs; + BitField<31, 1, u32> variable_primitive; + }; // The index of the first vertex to render u32 vertex_offset; @@ -218,7 +226,29 @@ struct PipelineRegs { GPUMode gpu_mode; - INSERT_PADDING_WORDS(0x18); + INSERT_PADDING_WORDS(0x4); + BitField<0, 4, u32> vs_outmap_total_minus_1_a; + INSERT_PADDING_WORDS(0x6); + BitField<0, 4, u32> vs_outmap_total_minus_1_b; + + enum class GSMode : u32 { + Point = 0, + VariablePrimitive = 1, + FixedPrimitive = 2, + }; + + union { + BitField<0, 8, GSMode> mode; + BitField<8, 4, u32> fixed_vertex_num_minus_1; + BitField<12, 4, u32> stride_minus_1; + BitField<16, 4, u32> start_index; + } gs_config; + + INSERT_PADDING_WORDS(0x1); + + u32 variable_vertex_main_num_minus_1; + + INSERT_PADDING_WORDS(0x9); enum class TriangleTopology : u32 { List = 0, diff --git a/src/video_core/regs_shader.h b/src/video_core/regs_shader.h index ddb1ee451..c15d4d162 100644 --- a/src/video_core/regs_shader.h +++ b/src/video_core/regs_shader.h @@ -24,9 +24,16 @@ struct ShaderRegs { INSERT_PADDING_WORDS(0x4); + enum ShaderMode { + GS = 0x08, + VS = 0xA0, + }; + union { // Number of input attributes to shader unit - 1 BitField<0, 4, u32> max_input_attribute_index; + BitField<8, 8, u32> input_to_uniform; + BitField<24, 8, ShaderMode> shader_mode; }; // Offset to shader program entry point (in words) -- cgit v1.2.3 From 5d9d42f0d0e0b2619e7412b86699a9b2b9dfa4ea Mon Sep 17 00:00:00 2001 From: wwylele Date: Thu, 10 Aug 2017 11:56:55 +0300 Subject: SwRasterizer/Lighting: use make_tuple instead of constructor implicit tuple constructor is a c++17 thing, which is not supported by some not-so-old libraries. Play safe for now --- src/video_core/swrasterizer/lighting.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/swrasterizer/lighting.cpp b/src/video_core/swrasterizer/lighting.cpp index 63088eee8..d61e6d572 100644 --- a/src/video_core/swrasterizer/lighting.cpp +++ b/src/video_core/swrasterizer/lighting.cpp @@ -244,7 +244,7 @@ std::tuple, Math::Vec4> ComputeFragmentsColors( MathUtil::Clamp(specular_sum.z, 0.0f, 1.0f) * 255, MathUtil::Clamp(specular_sum.w, 0.0f, 1.0f) * 255) .Cast(); - return {diffuse, specular}; + return std::make_tuple(diffuse, specular); } } // namespace Pica -- cgit v1.2.3 From 14ee32c46a6dc97c1c6a0597e72e5284bf4e86e6 Mon Sep 17 00:00:00 2001 From: wwylele Date: Fri, 11 Aug 2017 01:13:55 +0300 Subject: SwRasterizer/Lighting: implement geometric factor --- src/video_core/swrasterizer/lighting.cpp | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/swrasterizer/lighting.cpp b/src/video_core/swrasterizer/lighting.cpp index d61e6d572..91683afa4 100644 --- a/src/video_core/swrasterizer/lighting.cpp +++ b/src/video_core/swrasterizer/lighting.cpp @@ -55,6 +55,9 @@ std::tuple, Math::Vec4> ComputeFragmentsColors( light_vector.Normalize(); + Math::Vec3 norm_view = view.Normalized(); + Math::Vec3 half_vector = norm_view + light_vector; + float dist_atten = 1.0f; if (!lighting.IsDistAttenDisabled(num)) { auto distance = (-view - position).Length(); @@ -74,17 +77,15 @@ std::tuple, Math::Vec4> ComputeFragmentsColors( auto GetLutValue = [&](LightingRegs::LightingLutInput input, bool abs, LightingRegs::LightingScale scale_enum, LightingRegs::LightingSampler sampler) { - Math::Vec3 norm_view = view.Normalized(); - Math::Vec3 half_angle = (norm_view + light_vector).Normalized(); float result = 0.0f; switch (input) { case LightingRegs::LightingLutInput::NH: - result = Math::Dot(normal, half_angle); + result = Math::Dot(normal, half_vector.Normalized()); break; case LightingRegs::LightingLutInput::VH: - result = Math::Dot(norm_view, half_angle); + result = Math::Dot(norm_view, half_vector.Normalized()); break; case LightingRegs::LightingLutInput::NV: @@ -224,6 +225,17 @@ std::tuple, Math::Vec4> ComputeFragmentsColors( else dot_product = std::max(dot_product, 0.0f); + if (light_config.config.geometric_factor_0 || light_config.config.geometric_factor_1) { + float geo_factor = half_vector.Length2(); + geo_factor = geo_factor == 0.0f ? 0.0f : std::min(dot_product / geo_factor, 1.0f); + if (light_config.config.geometric_factor_0) { + specular_0 *= geo_factor; + } + if (light_config.config.geometric_factor_1) { + specular_1 *= geo_factor; + } + } + auto diffuse = light_config.diffuse.ToVec3f() * dot_product + light_config.ambient.ToVec3f(); diffuse_sum += Math::MakeVec(diffuse * dist_atten, 0.0f); -- cgit v1.2.3 From 945f9a1b04d51aff674e0b7061c29a04211a17bd Mon Sep 17 00:00:00 2001 From: wwylele Date: Fri, 11 Aug 2017 00:41:37 +0300 Subject: SwRasterizer/Lighting: implement spot light --- src/video_core/swrasterizer/lighting.cpp | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/swrasterizer/lighting.cpp b/src/video_core/swrasterizer/lighting.cpp index d61e6d572..ffd35792a 100644 --- a/src/video_core/swrasterizer/lighting.cpp +++ b/src/video_core/swrasterizer/lighting.cpp @@ -95,6 +95,12 @@ std::tuple, Math::Vec4> ComputeFragmentsColors( result = Math::Dot(light_vector, normal); break; + case LightingRegs::LightingLutInput::SP: { + Math::Vec3 spot_dir{light_config.spot_x.Value(), light_config.spot_y.Value(), + light_config.spot_z.Value()}; + result = Math::Dot(light_vector, spot_dir.Cast() / 2047.0f); + break; + } default: LOG_CRITICAL(HW_GPU, "Unknown lighting LUT input %u\n", static_cast(input)); UNIMPLEMENTED(); @@ -125,6 +131,16 @@ std::tuple, Math::Vec4> ComputeFragmentsColors( LookupLightingLut(lighting_state, static_cast(sampler), index, delta); }; + // If enabled, compute spot light attenuation value + float spot_atten = 1.0f; + if (!lighting.IsSpotAttenDisabled(num) && + LightingRegs::IsLightingSamplerSupported( + lighting.config0.config, LightingRegs::LightingSampler::SpotlightAttenuation)) { + auto lut = LightingRegs::SpotlightAttenuationSampler(num); + spot_atten = GetLutValue(lighting.lut_input.sp, lighting.abs_lut_input.disable_sp == 0, + lighting.lut_scale.sp, lut); + } + // Specular 0 component float d0_lut_value = 1.0f; if (lighting.config1.disable_lut_d0 == 0 && @@ -226,10 +242,10 @@ std::tuple, Math::Vec4> ComputeFragmentsColors( auto diffuse = light_config.diffuse.ToVec3f() * dot_product + light_config.ambient.ToVec3f(); - diffuse_sum += Math::MakeVec(diffuse * dist_atten, 0.0f); + diffuse_sum += Math::MakeVec(diffuse * dist_atten * spot_atten, 0.0f); - specular_sum += - Math::MakeVec((specular_0 + specular_1) * clamp_highlights * dist_atten, 0.0f); + specular_sum += Math::MakeVec( + (specular_0 + specular_1) * clamp_highlights * dist_atten * spot_atten, 0.0f); } diffuse_sum += Math::MakeVec(lighting.global_ambient.ToVec3f(), 0.0f); -- cgit v1.2.3 From 686fb3e78cb394bb7db18fd951d104ca86d805d9 Mon Sep 17 00:00:00 2001 From: wwylele Date: Fri, 11 Aug 2017 18:24:24 +0300 Subject: gl_shader_gen: don't call SampleTexture when bump map is not used --- src/video_core/renderer_opengl/gl_shader_gen.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index bb192affd..ae67aab05 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -525,11 +525,12 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { "float geo_factor = 1.0;\n"; // Compute fragment normals and tangents - const std::string pertubation = - "2.0 * (" + SampleTexture(config, lighting.bump_selector) + ").rgb - 1.0"; + auto Perturbation = [&]() { + return "2.0 * (" + SampleTexture(config, lighting.bump_selector) + ").rgb - 1.0"; + }; if (lighting.bump_mode == LightingRegs::LightingBumpMode::NormalMap) { // Bump mapping is enabled using a normal map - out += "vec3 surface_normal = " + pertubation + ";\n"; + out += "vec3 surface_normal = " + Perturbation() + ";\n"; // Recompute Z-component of perturbation if 'renorm' is enabled, this provides a higher // precision result @@ -543,7 +544,7 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { out += "vec3 surface_tangent = vec3(1.0, 0.0, 0.0);\n"; } else if (lighting.bump_mode == LightingRegs::LightingBumpMode::TangentMap) { // Bump mapping is enabled using a tangent map - out += "vec3 surface_tangent = " + pertubation + ";\n"; + out += "vec3 surface_tangent = " + Perturbation() + ";\n"; // Mathematically, recomputing Z-component of the tangent vector won't affect the relevant // computation below, which is also confirmed on 3DS. So we don't bother recomputing here // even if 'renorm' is enabled. -- cgit v1.2.3 From 46c6973d2bde25a2a8ae9ac434660798fd1dfaee Mon Sep 17 00:00:00 2001 From: wwylele Date: Tue, 25 Jul 2017 22:30:29 +0300 Subject: pica/shader: extend UnitState for GS Among four shader units in pica, a special unit can be configured to run both VS and GS program. GSUnitState represents this unit, which extends UnitState (which represents the other three normal units) with extra state for primitive emitting. It uses lots of raw pointers to represent internal structure in order to keep it standard layout type for JIT to access. This unit doesn't handle triangle winding (inverting) itself; instead, it calls a WindingSetter handler. This will be explained in the following commits --- src/video_core/shader/shader.cpp | 38 +++++++++++++++++++++++++++++++++ src/video_core/shader/shader.h | 46 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+) (limited to 'src/video_core') diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index 67ed19ba8..b12468d3a 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp @@ -82,6 +82,44 @@ void UnitState::WriteOutput(const ShaderRegs& config, AttributeBuffer& output) { } } +UnitState::UnitState(GSEmitter* emitter) : emitter_ptr(emitter) {} + +GSEmitter::GSEmitter() { + handlers = new Handlers; +} + +GSEmitter::~GSEmitter() { + delete handlers; +} + +void GSEmitter::Emit(Math::Vec4 (&vertex)[16]) { + ASSERT(vertex_id < 3); + std::copy(std::begin(vertex), std::end(vertex), buffer[vertex_id].begin()); + if (prim_emit) { + if (winding) + handlers->winding_setter(); + for (size_t i = 0; i < buffer.size(); ++i) { + AttributeBuffer output; + unsigned int output_i = 0; + for (unsigned int reg : Common::BitSet(output_mask)) { + output.attr[output_i++] = buffer[i][reg]; + } + handlers->vertex_handler(output); + } + } +} + +GSUnitState::GSUnitState() : UnitState(&emitter) {} + +void GSUnitState::SetVertexHandler(VertexHandler vertex_handler, WindingSetter winding_setter) { + emitter.handlers->vertex_handler = std::move(vertex_handler); + emitter.handlers->winding_setter = std::move(winding_setter); +} + +void GSUnitState::ConfigOutput(const ShaderRegs& config) { + emitter.output_mask = config.output_mask; +} + MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240)); #ifdef ARCHITECTURE_x86_64 diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index e156f6aef..caec96043 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h @@ -6,6 +6,7 @@ #include #include +#include #include #include #include "common/assert.h" @@ -31,6 +32,12 @@ struct AttributeBuffer { alignas(16) Math::Vec4 attr[16]; }; +/// Handler type for receiving vertex outputs from vertex shader or geometry shader +using VertexHandler = std::function; + +/// Handler type for signaling to invert the vertex order of the next triangle +using WindingSetter = std::function; + struct OutputVertex { Math::Vec4 pos; Math::Vec4 quat; @@ -60,6 +67,29 @@ ASSERT_POS(tc2, RasterizerRegs::VSOutputAttributes::TEXCOORD2_U); static_assert(std::is_pod::value, "Structure is not POD"); static_assert(sizeof(OutputVertex) == 24 * sizeof(float), "OutputVertex has invalid size"); +/** + * This structure contains state information for primitive emitting in geometry shader. + */ +struct GSEmitter { + std::array, 16>, 3> buffer; + u8 vertex_id; + bool prim_emit; + bool winding; + u32 output_mask; + + // Function objects are hidden behind a raw pointer to make the structure standard layout type, + // for JIT to use offsetof to access other members. + struct Handlers { + VertexHandler vertex_handler; + WindingSetter winding_setter; + } * handlers; + + GSEmitter(); + ~GSEmitter(); + void Emit(Math::Vec4 (&vertex)[16]); +}; +static_assert(std::is_standard_layout::value, "GSEmitter is not standard layout type"); + /** * This structure contains the state information that needs to be unique for a shader unit. The 3DS * has four shader units that process shaders in parallel. At the present, Citra only implements a @@ -67,6 +97,7 @@ static_assert(sizeof(OutputVertex) == 24 * sizeof(float), "OutputVertex has inva * here will make it easier for us to parallelize the shader processing later. */ struct UnitState { + explicit UnitState(GSEmitter* emitter = nullptr); struct Registers { // The registers are accessed by the shader JIT using SSE instructions, and are therefore // required to be 16-byte aligned. @@ -82,6 +113,8 @@ struct UnitState { // TODO: How many bits do these actually have? s32 address_registers[3]; + GSEmitter* emitter_ptr; + static size_t InputOffset(const SourceRegister& reg) { switch (reg.GetRegisterType()) { case RegisterType::Input: @@ -125,6 +158,19 @@ struct UnitState { void WriteOutput(const ShaderRegs& config, AttributeBuffer& output); }; +/** + * This is an extended shader unit state that represents the special unit that can run both vertex + * shader and geometry shader. It contains an additional primitive emitter and utilities for + * geometry shader. + */ +struct GSUnitState : public UnitState { + GSUnitState(); + void SetVertexHandler(VertexHandler vertex_handler, WindingSetter winding_setter); + void ConfigOutput(const ShaderRegs& config); + + GSEmitter emitter; +}; + struct ShaderSetup { struct { // The float uniforms are accessed by the shader JIT using SSE instructions, and are -- cgit v1.2.3 From 28128348f21d83c30979ef10399a8a764bb08a73 Mon Sep 17 00:00:00 2001 From: wwylele Date: Tue, 25 Jul 2017 22:43:25 +0300 Subject: pica/shader/interpreter: implement SETEMIT and EMIT --- src/video_core/shader/shader_interpreter.cpp | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'src/video_core') diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp index 206c0978a..9d4da4904 100644 --- a/src/video_core/shader/shader_interpreter.cpp +++ b/src/video_core/shader/shader_interpreter.cpp @@ -636,6 +636,22 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData break; } + case OpCode::Id::EMIT: { + GSEmitter* emitter = state.emitter_ptr; + ASSERT_MSG(emitter, "Execute EMIT on VS"); + emitter->Emit(state.registers.output); + break; + } + + case OpCode::Id::SETEMIT: { + GSEmitter* emitter = state.emitter_ptr; + ASSERT_MSG(emitter, "Execute SETEMIT on VS"); + emitter->vertex_id = instr.setemit.vertex_id; + emitter->prim_emit = instr.setemit.prim_emit != 0; + emitter->winding = instr.setemit.winding != 0; + break; + } + default: LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", (int)instr.opcode.Value().EffectiveOpCode(), -- cgit v1.2.3 From bb63ae305279d9a73ea70133c89e92a36dc79f69 Mon Sep 17 00:00:00 2001 From: wwylele Date: Wed, 26 Jul 2017 00:39:43 +0300 Subject: correct constness --- src/video_core/shader/shader.cpp | 3 ++- src/video_core/shader/shader.h | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index b12468d3a..e9063e616 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp @@ -21,7 +21,8 @@ namespace Pica { namespace Shader { -OutputVertex OutputVertex::FromAttributeBuffer(const RasterizerRegs& regs, AttributeBuffer& input) { +OutputVertex OutputVertex::FromAttributeBuffer(const RasterizerRegs& regs, + const AttributeBuffer& input) { // Setup output data union { OutputVertex ret{}; diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index caec96043..a3789da01 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h @@ -50,7 +50,8 @@ struct OutputVertex { INSERT_PADDING_WORDS(1); Math::Vec2 tc2; - static OutputVertex FromAttributeBuffer(const RasterizerRegs& regs, AttributeBuffer& output); + static OutputVertex FromAttributeBuffer(const RasterizerRegs& regs, + const AttributeBuffer& output); }; #define ASSERT_POS(var, pos) \ static_assert(offsetof(OutputVertex, var) == pos * sizeof(float24), "Semantic at wrong " \ -- cgit v1.2.3 From 36981a5aa6ffcc10417e533ab00de3b6f9bad067 Mon Sep 17 00:00:00 2001 From: wwylele Date: Wed, 26 Jul 2017 15:07:13 +0300 Subject: pica/primitive_assembly: Handle winding for GS primitive hwtest shows that, although GS always emit a group of three vertices as one primitive, it still respects to the topology type, as if the three vertices are input into the primitive assembler independently and sequentially. It is also shown that the winding flag in SETEMIT only takes effect for Shader topology type, which is believed to be the actual difference between List and Shader (hence removed the TODO). However, only Shader topology type is observed in official games when GS is in use, so the other mode seems to be just unintended usage. --- src/video_core/primitive_assembly.cpp | 15 ++++++++++++--- src/video_core/primitive_assembly.h | 7 +++++++ 2 files changed, 19 insertions(+), 3 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/primitive_assembly.cpp b/src/video_core/primitive_assembly.cpp index acd2ac5e2..9c3dd4cab 100644 --- a/src/video_core/primitive_assembly.cpp +++ b/src/video_core/primitive_assembly.cpp @@ -17,15 +17,18 @@ template void PrimitiveAssembler::SubmitVertex(const VertexType& vtx, TriangleHandler triangle_handler) { switch (topology) { - // TODO: Figure out what's different with TriangleTopology::Shader. case PipelineRegs::TriangleTopology::List: case PipelineRegs::TriangleTopology::Shader: if (buffer_index < 2) { buffer[buffer_index++] = vtx; } else { buffer_index = 0; - - triangle_handler(buffer[0], buffer[1], vtx); + if (topology == PipelineRegs::TriangleTopology::Shader && winding) { + triangle_handler(buffer[1], buffer[0], vtx); + winding = false; + } else { + triangle_handler(buffer[0], buffer[1], vtx); + } } break; @@ -50,10 +53,16 @@ void PrimitiveAssembler::SubmitVertex(const VertexType& vtx, } } +template +void PrimitiveAssembler::SetWinding() { + winding = true; +} + template void PrimitiveAssembler::Reset() { buffer_index = 0; strip_ready = false; + winding = false; } template diff --git a/src/video_core/primitive_assembly.h b/src/video_core/primitive_assembly.h index e8eccdf27..12de8e3b9 100644 --- a/src/video_core/primitive_assembly.h +++ b/src/video_core/primitive_assembly.h @@ -29,6 +29,12 @@ struct PrimitiveAssembler { */ void SubmitVertex(const VertexType& vtx, TriangleHandler triangle_handler); + /** + * Invert the vertex order of the next triangle. Called by geometry shader emitter. + * This only takes effect for TriangleTopology::Shader. + */ + void SetWinding(); + /** * Resets the internal state of the PrimitiveAssembler. */ @@ -45,6 +51,7 @@ private: int buffer_index; VertexType buffer[2]; bool strip_ready = false; + bool winding = false; }; } // namespace -- cgit v1.2.3 From 8285ca4ad8f9a5d07c9a2ba91367fcf3756f5153 Mon Sep 17 00:00:00 2001 From: wwylele Date: Wed, 26 Jul 2017 18:44:52 +0300 Subject: pica/shader/jit: implement SETEMIT and EMIT --- src/video_core/shader/shader_jit_x64_compiler.cpp | 49 ++++++++++++++++++++++- src/video_core/shader/shader_jit_x64_compiler.h | 2 + 2 files changed, 49 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/shader/shader_jit_x64_compiler.cpp b/src/video_core/shader/shader_jit_x64_compiler.cpp index 42a57aab1..1b31623bd 100644 --- a/src/video_core/shader/shader_jit_x64_compiler.cpp +++ b/src/video_core/shader/shader_jit_x64_compiler.cpp @@ -75,8 +75,8 @@ const JitFunction instr_table[64] = { &JitShader::Compile_IF, // ifu &JitShader::Compile_IF, // ifc &JitShader::Compile_LOOP, // loop - nullptr, // emit - nullptr, // sete + &JitShader::Compile_EMIT, // emit + &JitShader::Compile_SETE, // sete &JitShader::Compile_JMP, // jmpc &JitShader::Compile_JMP, // jmpu &JitShader::Compile_CMP, // cmp @@ -772,6 +772,51 @@ void JitShader::Compile_JMP(Instruction instr) { } } +static void Emit(GSEmitter* emitter, Math::Vec4 (*output)[16]) { + emitter->Emit(*output); +} + +void JitShader::Compile_EMIT(Instruction instr) { + Label have_emitter, end; + mov(rax, qword[STATE + offsetof(UnitState, emitter_ptr)]); + test(rax, rax); + jnz(have_emitter); + + ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); + mov(ABI_PARAM1, reinterpret_cast("Execute EMIT on VS")); + CallFarFunction(*this, LogCritical); + ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); + jmp(end); + + L(have_emitter); + ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); + mov(ABI_PARAM1, rax); + mov(ABI_PARAM2, STATE); + add(ABI_PARAM2, static_cast(offsetof(UnitState, registers.output))); + CallFarFunction(*this, Emit); + ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); + L(end); +} + +void JitShader::Compile_SETE(Instruction instr) { + Label have_emitter, end; + mov(rax, qword[STATE + offsetof(UnitState, emitter_ptr)]); + test(rax, rax); + jnz(have_emitter); + + ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); + mov(ABI_PARAM1, reinterpret_cast("Execute SETEMIT on VS")); + CallFarFunction(*this, LogCritical); + ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); + jmp(end); + + L(have_emitter); + mov(byte[rax + offsetof(GSEmitter, vertex_id)], instr.setemit.vertex_id); + mov(byte[rax + offsetof(GSEmitter, prim_emit)], instr.setemit.prim_emit); + mov(byte[rax + offsetof(GSEmitter, winding)], instr.setemit.winding); + L(end); +} + void JitShader::Compile_Block(unsigned end) { while (program_counter < end) { Compile_NextInstr(); diff --git a/src/video_core/shader/shader_jit_x64_compiler.h b/src/video_core/shader/shader_jit_x64_compiler.h index 31af0ca48..4aee56b1d 100644 --- a/src/video_core/shader/shader_jit_x64_compiler.h +++ b/src/video_core/shader/shader_jit_x64_compiler.h @@ -66,6 +66,8 @@ public: void Compile_JMP(Instruction instr); void Compile_CMP(Instruction instr); void Compile_MAD(Instruction instr); + void Compile_EMIT(Instruction instr); + void Compile_SETE(Instruction instr); private: void Compile_Block(unsigned end); -- cgit v1.2.3 From 0f35755572fe63534813528de9a0710193f2e335 Mon Sep 17 00:00:00 2001 From: wwylele Date: Fri, 4 Aug 2017 17:03:17 +0300 Subject: pica/command_processor: build geometry pipeline and run geometry shader The geometry pipeline manages data transfer between VS, GS and primitive assembler. It has known four modes: - no GS mode: sends VS output directly to the primitive assembler (what citra currently does) - GS mode 0: sends VS output to GS input registers, and sends GS output to primitive assembler - GS mode 1: sends VS output to GS uniform registers, and sends GS output to primitive assembler. It also takes an index from the index buffer at the beginning of each primitive for determine the primitive size. - GS mode 2: similar to mode 1, but doesn't take the index and uses a fixed primitive size. hwtest shows that immediate mode also supports GS (at least for mode 0), so the geometry pipeline gets refactored into its own class for supporting both drawing mode. In the immediate mode, some games don't set the pipeline registers to a valid value until the first attribute input, so a geometry pipeline reset flag is set in `pipeline.vs_default_attributes_setup.index` trigger, and the actual pipeline reconfigure is triggered in the first attribute input. In the normal drawing mode with index buffer, the vertex cache is a little bit modified to support the geometry pipeline. Instead of OutputVertex, it now holds AttributeBuffer, which is the input to the geometry pipeline. The AttributeBuffer->OutputVertex conversion is done inside the pipeline vertex handler. The actual hardware vertex cache is believed to be implemented in a similar way (because this is the only way that makes sense). Both geometry pipeline and GS unit rely on states preservation across drawing call, so they are put into the global state. In the future, the other three vertex shader units should be also placed in the global state, and a scheduler should be implemented on top of the four units. Note that the current gs_unit already allows running VS on it in the future. --- src/video_core/CMakeLists.txt | 2 + src/video_core/command_processor.cpp | 54 +++---- src/video_core/geometry_pipeline.cpp | 274 +++++++++++++++++++++++++++++++++++ src/video_core/geometry_pipeline.h | 49 +++++++ src/video_core/pica.cpp | 21 ++- src/video_core/pica_state.h | 11 ++ 6 files changed, 383 insertions(+), 28 deletions(-) create mode 100644 src/video_core/geometry_pipeline.cpp create mode 100644 src/video_core/geometry_pipeline.h (limited to 'src/video_core') diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index cffa4c952..82f47d8a9 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -1,6 +1,7 @@ set(SRCS command_processor.cpp debug_utils/debug_utils.cpp + geometry_pipeline.cpp pica.cpp primitive_assembly.cpp regs.cpp @@ -29,6 +30,7 @@ set(SRCS set(HEADERS command_processor.h debug_utils/debug_utils.h + geometry_pipeline.h gpu_debugger.h pica.h pica_state.h diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index f98ca3302..fb65a3a0a 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -161,6 +161,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { case PICA_REG_INDEX(pipeline.vs_default_attributes_setup.index): g_state.immediate.current_attribute = 0; + g_state.immediate.reset_geometry_pipeline = true; default_attr_counter = 0; break; @@ -234,16 +235,14 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { shader_engine->Run(g_state.vs, shader_unit); shader_unit.WriteOutput(regs.vs, output); - // Send to renderer - using Pica::Shader::OutputVertex; - auto AddTriangle = [](const OutputVertex& v0, const OutputVertex& v1, - const OutputVertex& v2) { - VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2); - }; - - g_state.primitive_assembler.SubmitVertex( - Shader::OutputVertex::FromAttributeBuffer(regs.rasterizer, output), - AddTriangle); + // Send to geometry pipeline + if (g_state.immediate.reset_geometry_pipeline) { + g_state.geometry_pipeline.Reconfigure(); + g_state.immediate.reset_geometry_pipeline = false; + } + ASSERT(!g_state.geometry_pipeline.NeedIndexInput()); + g_state.geometry_pipeline.Setup(shader_engine); + g_state.geometry_pipeline.SubmitVertex(output); } } } @@ -321,8 +320,8 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { // The size has been tuned for optimal balance between hit-rate and the cost of lookup const size_t VERTEX_CACHE_SIZE = 32; std::array vertex_cache_ids; - std::array vertex_cache; - Shader::OutputVertex output_vertex; + std::array vertex_cache; + Shader::AttributeBuffer vs_output; unsigned int vertex_cache_pos = 0; vertex_cache_ids.fill(-1); @@ -332,6 +331,11 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { shader_engine->SetupBatch(g_state.vs, regs.vs.main_offset); + g_state.geometry_pipeline.Reconfigure(); + g_state.geometry_pipeline.Setup(shader_engine); + if (g_state.geometry_pipeline.NeedIndexInput()) + ASSERT(is_indexed); + for (unsigned int index = 0; index < regs.pipeline.num_vertices; ++index) { // Indexed rendering doesn't use the start offset unsigned int vertex = @@ -345,6 +349,11 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { bool vertex_cache_hit = false; if (is_indexed) { + if (g_state.geometry_pipeline.NeedIndexInput()) { + g_state.geometry_pipeline.SubmitIndex(vertex); + continue; + } + if (g_debug_context && Pica::g_debug_context->recorder) { int size = index_u16 ? 2 : 1; memory_accesses.AddAccess(base_address + index_info.offset + size * index, @@ -353,7 +362,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { for (unsigned int i = 0; i < VERTEX_CACHE_SIZE; ++i) { if (vertex == vertex_cache_ids[i]) { - output_vertex = vertex_cache[i]; + vs_output = vertex_cache[i]; vertex_cache_hit = true; break; } @@ -362,7 +371,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { if (!vertex_cache_hit) { // Initialize data for the current vertex - Shader::AttributeBuffer input, output{}; + Shader::AttributeBuffer input; loader.LoadVertex(base_address, index, vertex, input, memory_accesses); // Send to vertex shader @@ -371,26 +380,17 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { (void*)&input); shader_unit.LoadInput(regs.vs, input); shader_engine->Run(g_state.vs, shader_unit); - shader_unit.WriteOutput(regs.vs, output); - - // Retrieve vertex from register data - output_vertex = Shader::OutputVertex::FromAttributeBuffer(regs.rasterizer, output); + shader_unit.WriteOutput(regs.vs, vs_output); if (is_indexed) { - vertex_cache[vertex_cache_pos] = output_vertex; + vertex_cache[vertex_cache_pos] = vs_output; vertex_cache_ids[vertex_cache_pos] = vertex; vertex_cache_pos = (vertex_cache_pos + 1) % VERTEX_CACHE_SIZE; } } - // Send to renderer - using Pica::Shader::OutputVertex; - auto AddTriangle = [](const OutputVertex& v0, const OutputVertex& v1, - const OutputVertex& v2) { - VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2); - }; - - primitive_assembler.SubmitVertex(output_vertex, AddTriangle); + // Send to geometry pipeline + g_state.geometry_pipeline.SubmitVertex(vs_output); } for (auto& range : memory_accesses.ranges) { diff --git a/src/video_core/geometry_pipeline.cpp b/src/video_core/geometry_pipeline.cpp new file mode 100644 index 000000000..b146e2ecb --- /dev/null +++ b/src/video_core/geometry_pipeline.cpp @@ -0,0 +1,274 @@ +// Copyright 2017 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "video_core/geometry_pipeline.h" +#include "video_core/pica_state.h" +#include "video_core/regs.h" +#include "video_core/renderer_base.h" +#include "video_core/video_core.h" + +namespace Pica { + +/// An attribute buffering interface for different pipeline modes +class GeometryPipelineBackend { +public: + virtual ~GeometryPipelineBackend() = default; + + /// Checks if there is no incomplete data transfer + virtual bool IsEmpty() const = 0; + + /// Checks if the pipeline needs a direct input from index buffer + virtual bool NeedIndexInput() const = 0; + + /// Submits an index from index buffer + virtual void SubmitIndex(unsigned int val) = 0; + + /** + * Submits vertex attributes + * @param input attributes of a vertex output from vertex shader + * @return if the buffer is full and the geometry shader should be invoked + */ + virtual bool SubmitVertex(const Shader::AttributeBuffer& input) = 0; +}; + +// In the Point mode, vertex attributes are sent to the input registers in the geometry shader unit. +// The size of vertex shader outputs and geometry shader inputs are constants. Geometry shader is +// invoked upon inputs buffer filled up by vertex shader outputs. For example, if we have a geometry +// shader that takes 6 inputs, and the vertex shader outputs 2 attributes, it would take 3 vertices +// for one geometry shader invocation. +// TODO: what happens when the input size is not divisible by the output size? +class GeometryPipeline_Point : public GeometryPipelineBackend { +public: + GeometryPipeline_Point(const Regs& regs, Shader::GSUnitState& unit) : regs(regs), unit(unit) { + ASSERT(regs.pipeline.variable_primitive == 0); + ASSERT(regs.gs.input_to_uniform == 0); + vs_output_num = regs.pipeline.vs_outmap_total_minus_1_a + 1; + size_t gs_input_num = regs.gs.max_input_attribute_index + 1; + ASSERT(gs_input_num % vs_output_num == 0); + buffer_cur = attribute_buffer.attr; + buffer_end = attribute_buffer.attr + gs_input_num; + } + + bool IsEmpty() const override { + return buffer_cur == attribute_buffer.attr; + } + + bool NeedIndexInput() const override { + return false; + } + + void SubmitIndex(unsigned int val) override { + UNREACHABLE(); + } + + bool SubmitVertex(const Shader::AttributeBuffer& input) override { + buffer_cur = std::copy(input.attr, input.attr + vs_output_num, buffer_cur); + if (buffer_cur == buffer_end) { + buffer_cur = attribute_buffer.attr; + unit.LoadInput(regs.gs, attribute_buffer); + return true; + } + return false; + } + +private: + const Regs& regs; + Shader::GSUnitState& unit; + Shader::AttributeBuffer attribute_buffer; + Math::Vec4* buffer_cur; + Math::Vec4* buffer_end; + unsigned int vs_output_num; +}; + +// In VariablePrimitive mode, vertex attributes are buffered into the uniform registers in the +// geometry shader unit. The number of vertex is variable, which is specified by the first index +// value in the batch. This mode is usually used for subdivision. +class GeometryPipeline_VariablePrimitive : public GeometryPipelineBackend { +public: + GeometryPipeline_VariablePrimitive(const Regs& regs, Shader::ShaderSetup& setup) + : regs(regs), setup(setup) { + ASSERT(regs.pipeline.variable_primitive == 1); + ASSERT(regs.gs.input_to_uniform == 1); + vs_output_num = regs.pipeline.vs_outmap_total_minus_1_a + 1; + } + + bool IsEmpty() const override { + return need_index; + } + + bool NeedIndexInput() const override { + return need_index; + } + + void SubmitIndex(unsigned int val) override { + DEBUG_ASSERT(need_index); + + // The number of vertex input is put to the uniform register + float24 vertex_num = float24::FromFloat32(val); + setup.uniforms.f[0] = Math::MakeVec(vertex_num, vertex_num, vertex_num, vertex_num); + + // The second uniform register and so on are used for receiving input vertices + buffer_cur = setup.uniforms.f + 1; + + main_vertex_num = regs.pipeline.variable_vertex_main_num_minus_1 + 1; + total_vertex_num = val; + need_index = false; + } + + bool SubmitVertex(const Shader::AttributeBuffer& input) override { + DEBUG_ASSERT(!need_index); + if (main_vertex_num != 0) { + // For main vertices, receive all attributes + buffer_cur = std::copy(input.attr, input.attr + vs_output_num, buffer_cur); + --main_vertex_num; + } else { + // For other vertices, only receive the first attribute (usually the position) + *(buffer_cur++) = input.attr[0]; + } + --total_vertex_num; + + if (total_vertex_num == 0) { + need_index = true; + return true; + } + + return false; + } + +private: + bool need_index = true; + const Regs& regs; + Shader::ShaderSetup& setup; + unsigned int main_vertex_num; + unsigned int total_vertex_num; + Math::Vec4* buffer_cur; + unsigned int vs_output_num; +}; + +// In FixedPrimitive mode, vertex attributes are buffered into the uniform registers in the geometry +// shader unit. The number of vertex per shader invocation is constant. This is usually used for +// particle system. +class GeometryPipeline_FixedPrimitive : public GeometryPipelineBackend { +public: + GeometryPipeline_FixedPrimitive(const Regs& regs, Shader::ShaderSetup& setup) + : regs(regs), setup(setup) { + ASSERT(regs.pipeline.variable_primitive == 0); + ASSERT(regs.gs.input_to_uniform == 1); + vs_output_num = regs.pipeline.vs_outmap_total_minus_1_a + 1; + ASSERT(vs_output_num == regs.pipeline.gs_config.stride_minus_1 + 1); + size_t vertex_num = regs.pipeline.gs_config.fixed_vertex_num_minus_1 + 1; + buffer_cur = buffer_begin = setup.uniforms.f + regs.pipeline.gs_config.start_index; + buffer_end = buffer_begin + vs_output_num * vertex_num; + } + + bool IsEmpty() const override { + return buffer_cur == buffer_begin; + } + + bool NeedIndexInput() const override { + return false; + } + + void SubmitIndex(unsigned int val) override { + UNREACHABLE(); + } + + bool SubmitVertex(const Shader::AttributeBuffer& input) override { + buffer_cur = std::copy(input.attr, input.attr + vs_output_num, buffer_cur); + if (buffer_cur == buffer_end) { + buffer_cur = buffer_begin; + return true; + } + return false; + } + +private: + const Regs& regs; + Shader::ShaderSetup& setup; + Math::Vec4* buffer_begin; + Math::Vec4* buffer_cur; + Math::Vec4* buffer_end; + unsigned int vs_output_num; +}; + +GeometryPipeline::GeometryPipeline(State& state) : state(state) {} + +GeometryPipeline::~GeometryPipeline() = default; + +void GeometryPipeline::SetVertexHandler(Shader::VertexHandler vertex_handler) { + this->vertex_handler = vertex_handler; +} + +void GeometryPipeline::Setup(Shader::ShaderEngine* shader_engine) { + if (!backend) + return; + + this->shader_engine = shader_engine; + shader_engine->SetupBatch(state.gs, state.regs.gs.main_offset); +} + +void GeometryPipeline::Reconfigure() { + ASSERT(!backend || backend->IsEmpty()); + + if (state.regs.pipeline.use_gs == PipelineRegs::UseGS::No) { + backend = nullptr; + return; + } + + ASSERT(state.regs.pipeline.use_gs == PipelineRegs::UseGS::Yes); + + // The following assumes that when geometry shader is in use, the shader unit 3 is configured as + // a geometry shader unit. + // TODO: what happens if this is not true? + ASSERT(state.regs.pipeline.gs_unit_exclusive_configuration == 1); + ASSERT(state.regs.gs.shader_mode == ShaderRegs::ShaderMode::GS); + + state.gs_unit.ConfigOutput(state.regs.gs); + + ASSERT(state.regs.pipeline.vs_outmap_total_minus_1_a == + state.regs.pipeline.vs_outmap_total_minus_1_b); + + switch (state.regs.pipeline.gs_config.mode) { + case PipelineRegs::GSMode::Point: + backend = std::make_unique(state.regs, state.gs_unit); + break; + case PipelineRegs::GSMode::VariablePrimitive: + backend = std::make_unique(state.regs, state.gs); + break; + case PipelineRegs::GSMode::FixedPrimitive: + backend = std::make_unique(state.regs, state.gs); + break; + default: + UNREACHABLE(); + } +} + +bool GeometryPipeline::NeedIndexInput() const { + if (!backend) + return false; + return backend->NeedIndexInput(); +} + +void GeometryPipeline::SubmitIndex(unsigned int val) { + backend->SubmitIndex(val); +} + +void GeometryPipeline::SubmitVertex(const Shader::AttributeBuffer& input) { + if (!backend) { + // No backend means the geometry shader is disabled, so we send the vertex shader output + // directly to the primitive assembler. + vertex_handler(input); + } else { + if (backend->SubmitVertex(input)) { + shader_engine->Run(state.gs, state.gs_unit); + + // The uniform b15 is set to true after every geometry shader invocation. This is useful + // for the shader to know if this is the first invocation in a batch, if the program set + // b15 to false first. + state.gs.uniforms.b[15] = true; + } + } +} + +} // namespace Pica diff --git a/src/video_core/geometry_pipeline.h b/src/video_core/geometry_pipeline.h new file mode 100644 index 000000000..91fdd3192 --- /dev/null +++ b/src/video_core/geometry_pipeline.h @@ -0,0 +1,49 @@ +// Copyright 2017 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include "video_core/shader/shader.h" + +namespace Pica { + +struct State; + +class GeometryPipelineBackend; + +/// A pipeline receiving from vertex shader and sending to geometry shader and primitive assembler +class GeometryPipeline { +public: + explicit GeometryPipeline(State& state); + ~GeometryPipeline(); + + /// Sets the handler for receiving vertex outputs from vertex shader + void SetVertexHandler(Shader::VertexHandler vertex_handler); + + /** + * Setup the geometry shader unit if it is in use + * @param shader_engine the shader engine for the geometry shader to run + */ + void Setup(Shader::ShaderEngine* shader_engine); + + /// Reconfigures the pipeline according to current register settings + void Reconfigure(); + + /// Checks if the pipeline needs a direct input from index buffer + bool NeedIndexInput() const; + + /// Submits an index from index buffer. Call this only when NeedIndexInput returns true + void SubmitIndex(unsigned int val); + + /// Submits vertex attributes output from vertex shader + void SubmitVertex(const Shader::AttributeBuffer& input); + +private: + Shader::VertexHandler vertex_handler; + Shader::ShaderEngine* shader_engine; + std::unique_ptr backend; + State& state; +}; +} // namespace Pica diff --git a/src/video_core/pica.cpp b/src/video_core/pica.cpp index b95148a6a..218e06883 100644 --- a/src/video_core/pica.cpp +++ b/src/video_core/pica.cpp @@ -3,9 +3,11 @@ // Refer to the license.txt file included. #include +#include "video_core/geometry_pipeline.h" #include "video_core/pica.h" #include "video_core/pica_state.h" -#include "video_core/regs_pipeline.h" +#include "video_core/renderer_base.h" +#include "video_core/video_core.h" namespace Pica { @@ -24,6 +26,23 @@ void Zero(T& o) { memset(&o, 0, sizeof(o)); } +State::State() : geometry_pipeline(*this) { + auto SubmitVertex = [this](const Shader::AttributeBuffer& vertex) { + using Pica::Shader::OutputVertex; + auto AddTriangle = [this](const OutputVertex& v0, const OutputVertex& v1, + const OutputVertex& v2) { + VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2); + }; + primitive_assembler.SubmitVertex( + Shader::OutputVertex::FromAttributeBuffer(regs.rasterizer, vertex), AddTriangle); + }; + + auto SetWinding = [this]() { primitive_assembler.SetWinding(); }; + + g_state.gs_unit.SetVertexHandler(SubmitVertex, SetWinding); + g_state.geometry_pipeline.SetVertexHandler(SubmitVertex); +} + void State::Reset() { Zero(regs); Zero(vs); diff --git a/src/video_core/pica_state.h b/src/video_core/pica_state.h index 864a2c9e6..c6634a0bc 100644 --- a/src/video_core/pica_state.h +++ b/src/video_core/pica_state.h @@ -8,6 +8,7 @@ #include "common/bit_field.h" #include "common/common_types.h" #include "common/vector_math.h" +#include "video_core/geometry_pipeline.h" #include "video_core/primitive_assembly.h" #include "video_core/regs.h" #include "video_core/shader/shader.h" @@ -16,6 +17,7 @@ namespace Pica { /// Struct used to describe current Pica state struct State { + State(); void Reset(); /// Pica registers @@ -137,8 +139,17 @@ struct State { Shader::AttributeBuffer input_vertex; // Index of the next attribute to be loaded into `input_vertex`. u32 current_attribute = 0; + // Indicates the immediate mode just started and the geometry pipeline needs to reconfigure + bool reset_geometry_pipeline = true; } immediate; + // the geometry shader needs to be kept in the global state because some shaders relie on + // preserved register value across shader invocation. + // TODO: also bring the three vertex shader units here and implement the shader scheduler. + Shader::GSUnitState gs_unit; + + GeometryPipeline geometry_pipeline; + // This is constructed with a dummy triangle topology PrimitiveAssembler primitive_assembler; }; -- cgit v1.2.3 From 1eca380886b5028e027f1380c04f221ac94ed47d Mon Sep 17 00:00:00 2001 From: wwylele Date: Thu, 17 Aug 2017 10:46:59 +0300 Subject: gl_rasterizer: add clipping plane z<=0 defined in PICA --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 3 +++ src/video_core/renderer_opengl/gl_shader_gen.cpp | 2 ++ src/video_core/renderer_opengl/gl_state.cpp | 13 +++++++++++++ src/video_core/renderer_opengl/gl_state.h | 3 +++ 4 files changed, 21 insertions(+) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 1c6c15a58..aa95ef21d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -28,6 +28,9 @@ MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255)); MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) { + // Clipping plane 0 is always enabled for PICA fixed clip plane z <= 0 + state.clip_distance[0] = true; + // Create sampler objects for (size_t i = 0; i < texture_samplers.size(); ++i) { texture_samplers[i].Create(); diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index ae67aab05..0dae4b91e 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -1196,6 +1196,8 @@ void main() { normquat = vert_normquat; view = vert_view; gl_Position = vec4(vert_position.x, vert_position.y, -vert_position.z, vert_position.w); + gl_ClipDistance[0] = -vert_position.z; // fixed PICA clipping plane z <= 0 + // TODO (wwylele): calculate gl_ClipDistance[1] from user-defined clipping plane } )"; diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index bc9d34b84..06a905766 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -68,6 +68,8 @@ OpenGLState::OpenGLState() { draw.vertex_buffer = 0; draw.uniform_buffer = 0; draw.shader_program = 0; + + clip_distance = {}; } void OpenGLState::Apply() const { @@ -261,6 +263,17 @@ void OpenGLState::Apply() const { glUseProgram(draw.shader_program); } + // Clip distance + for (size_t i = 0; i < clip_distance.size(); ++i) { + if (clip_distance[i] != cur_state.clip_distance[i]) { + if (clip_distance[i]) { + glEnable(GL_CLIP_DISTANCE0 + i); + } else { + glDisable(GL_CLIP_DISTANCE0 + i); + } + } + } + cur_state = *this; } diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index 745a74479..437fe34c4 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -4,6 +4,7 @@ #pragma once +#include #include namespace TextureUnits { @@ -123,6 +124,8 @@ public: GLuint shader_program; // GL_CURRENT_PROGRAM } draw; + std::array clip_distance; // GL_CLIP_DISTANCE + OpenGLState(); /// Get the currently active OpenGL state -- cgit v1.2.3 From 5a4af616c67a4d7968c71b419795777c3601341b Mon Sep 17 00:00:00 2001 From: wwylele Date: Thu, 17 Aug 2017 10:56:15 +0300 Subject: gl_shader_gen: simplify and clarify the depth transformation between vertex shader and fragment shader --- src/video_core/renderer_opengl/gl_shader_gen.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 0dae4b91e..015e69da9 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -1112,7 +1112,10 @@ vec4 secondary_fragment_color = vec4(0.0); "gl_FragCoord.y < scissor_y2)) discard;\n"; } - out += "float z_over_w = 1.0 - gl_FragCoord.z * 2.0;\n"; + // After perspective divide, OpenGL transform z_over_w from [-1, 1] to [near, far]. Here we use + // default near = 0 and far = 1, and undo the transformation to get the original z_over_w, then + // do our own transformation according to PICA specification. + out += "float z_over_w = 2.0 * gl_FragCoord.z - 1.0;\n"; out += "float depth = z_over_w * depth_scale + depth_offset;\n"; if (state.depthmap_enable == RasterizerRegs::DepthBuffering::WBuffering) { out += "depth /= gl_FragCoord.w;\n"; @@ -1195,7 +1198,7 @@ void main() { texcoord0_w = vert_texcoord0_w; normquat = vert_normquat; view = vert_view; - gl_Position = vec4(vert_position.x, vert_position.y, -vert_position.z, vert_position.w); + gl_Position = vert_position; gl_ClipDistance[0] = -vert_position.z; // fixed PICA clipping plane z <= 0 // TODO (wwylele): calculate gl_ClipDistance[1] from user-defined clipping plane } -- cgit v1.2.3 From 72b26ac32f74457d017e4eb96d83e2a66e713a5a Mon Sep 17 00:00:00 2001 From: wwylele Date: Thu, 17 Aug 2017 10:57:31 +0300 Subject: swrasterizer/clipper: remove tested TODO hwtested. Current implementation is the correct behavior --- src/video_core/swrasterizer/clipper.cpp | 4 ---- 1 file changed, 4 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/swrasterizer/clipper.cpp b/src/video_core/swrasterizer/clipper.cpp index 7537689b7..cdbc71502 100644 --- a/src/video_core/swrasterizer/clipper.cpp +++ b/src/video_core/swrasterizer/clipper.cpp @@ -125,10 +125,6 @@ void ProcessTriangle(const OutputVertex& v0, const OutputVertex& v1, const Outpu {Math::MakeVec(f0, f0, f0, -f1), Math::Vec4(f0, f0, f0, EPSILON)}, // w = EPSILON }}; - // TODO: If one vertex lies outside one of the depth clipping planes, some platforms (e.g. Wii) - // drop the whole primitive instead of clipping the primitive properly. We should test if - // this happens on the 3DS, too. - // Simple implementation of the Sutherland-Hodgman clipping algorithm. // TODO: Make this less inefficient (currently lots of useless buffering overhead happens here) for (auto edge : clipping_edges) { -- cgit v1.2.3 From 63b6e802cdffc1464b4a1fe3f5171d71146e8e9a Mon Sep 17 00:00:00 2001 From: wwylele Date: Thu, 17 Aug 2017 11:02:19 +0300 Subject: swrasterizer: remove invalid TODO This function is called in clipping, before the pespective divide, and is not used in later rasterization. Thus it doesn't need perspective correction. --- src/video_core/swrasterizer/rasterizer.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/swrasterizer/rasterizer.h b/src/video_core/swrasterizer/rasterizer.h index 2f0877581..66cd6cfd4 100644 --- a/src/video_core/swrasterizer/rasterizer.h +++ b/src/video_core/swrasterizer/rasterizer.h @@ -19,10 +19,9 @@ struct Vertex : Shader::OutputVertex { // Linear interpolation // factor: 0=this, 1=vtx + // Note: This function cannot be called after perspective divide void Lerp(float24 factor, const Vertex& vtx) { pos = pos * factor + vtx.pos * (float24::FromFloat32(1) - factor); - - // TODO: Should perform perspective correct interpolation here... quat = quat * factor + vtx.quat * (float24::FromFloat32(1) - factor); color = color * factor + vtx.color * (float24::FromFloat32(1) - factor); tc0 = tc0 * factor + vtx.tc0 * (float24::FromFloat32(1) - factor); @@ -30,12 +29,11 @@ struct Vertex : Shader::OutputVertex { tc0_w = tc0_w * factor + vtx.tc0_w * (float24::FromFloat32(1) - factor); view = view * factor + vtx.view * (float24::FromFloat32(1) - factor); tc2 = tc2 * factor + vtx.tc2 * (float24::FromFloat32(1) - factor); - - screenpos = screenpos * factor + vtx.screenpos * (float24::FromFloat32(1) - factor); } // Linear interpolation // factor: 0=v0, 1=v1 + // Note: This function cannot be called after perspective divide static Vertex Lerp(float24 factor, const Vertex& v0, const Vertex& v1) { Vertex ret = v0; ret.Lerp(factor, v1); -- cgit v1.2.3 From 3e478ca13110639a67ad95880aae5d7d13e096b7 Mon Sep 17 00:00:00 2001 From: wwylele Date: Fri, 18 Aug 2017 15:04:56 +0300 Subject: SwRasterizer/Lighting: implement bump mapping --- src/video_core/swrasterizer/lighting.cpp | 28 +++++++++++++++++++++++----- src/video_core/swrasterizer/lighting.h | 3 ++- src/video_core/swrasterizer/rasterizer.cpp | 4 ++-- 3 files changed, 27 insertions(+), 8 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/swrasterizer/lighting.cpp b/src/video_core/swrasterizer/lighting.cpp index 39a3e396d..4f16bac07 100644 --- a/src/video_core/swrasterizer/lighting.cpp +++ b/src/video_core/swrasterizer/lighting.cpp @@ -22,14 +22,32 @@ static float LookupLightingLut(const Pica::State::Lighting& lighting, size_t lut std::tuple, Math::Vec4> ComputeFragmentsColors( const Pica::LightingRegs& lighting, const Pica::State::Lighting& lighting_state, - const Math::Quaternion& normquat, const Math::Vec3& view) { + const Math::Quaternion& normquat, const Math::Vec3& view, + const Math::Vec4 (&texture_color)[4]) { - // TODO(Subv): Bump mapping - Math::Vec3 surface_normal = {0.0f, 0.0f, 1.0f}; + Math::Vec3 surface_normal; + Math::Vec3 surface_tangent; if (lighting.config0.bump_mode != LightingRegs::LightingBumpMode::None) { - LOG_CRITICAL(HW_GPU, "unimplemented bump mapping"); - UNIMPLEMENTED(); + Math::Vec3 perturbation = + texture_color[lighting.config0.bump_selector].xyz().Cast() / 127.5f - + Math::MakeVec(1.0f, 1.0f, 1.0f); + if (lighting.config0.bump_mode == LightingRegs::LightingBumpMode::NormalMap) { + if (!lighting.config0.disable_bump_renorm) { + const float z_square = 1 - perturbation.xy().Length2(); + perturbation.z = std::sqrt(std::max(z_square, 0.0f)); + } + surface_normal = perturbation; + surface_tangent = Math::MakeVec(1.0f, 0.0f, 0.0f); + } else if (lighting.config0.bump_mode == LightingRegs::LightingBumpMode::TangentMap) { + surface_normal = Math::MakeVec(0.0f, 0.0f, 1.0f); + surface_tangent = perturbation; + } else { + LOG_ERROR(HW_GPU, "Unknown bump mode %u", lighting.config0.bump_mode.Value()); + } + } else { + surface_normal = Math::MakeVec(0.0f, 0.0f, 1.0f); + surface_tangent = Math::MakeVec(1.0f, 0.0f, 0.0f); } // Use the normalized the quaternion when performing the rotation diff --git a/src/video_core/swrasterizer/lighting.h b/src/video_core/swrasterizer/lighting.h index 438dca926..d807a3d94 100644 --- a/src/video_core/swrasterizer/lighting.h +++ b/src/video_core/swrasterizer/lighting.h @@ -13,6 +13,7 @@ namespace Pica { std::tuple, Math::Vec4> ComputeFragmentsColors( const Pica::LightingRegs& lighting, const Pica::State::Lighting& lighting_state, - const Math::Quaternion& normquat, const Math::Vec3& view); + const Math::Quaternion& normquat, const Math::Vec3& view, + const Math::Vec4 (&texture_color)[4]); } // namespace Pica diff --git a/src/video_core/swrasterizer/rasterizer.cpp b/src/video_core/swrasterizer/rasterizer.cpp index fdc1df199..862135614 100644 --- a/src/video_core/swrasterizer/rasterizer.cpp +++ b/src/video_core/swrasterizer/rasterizer.cpp @@ -437,8 +437,8 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve GetInterpolatedAttribute(v0.view.y, v1.view.y, v2.view.y).ToFloat32(), GetInterpolatedAttribute(v0.view.z, v1.view.z, v2.view.z).ToFloat32(), }; - std::tie(primary_fragment_color, secondary_fragment_color) = - ComputeFragmentsColors(g_state.regs.lighting, g_state.lighting, normquat, view); + std::tie(primary_fragment_color, secondary_fragment_color) = ComputeFragmentsColors( + g_state.regs.lighting, g_state.lighting, normquat, view, texture_color); } for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); -- cgit v1.2.3 From b5aa5703540adceb1fc867b577dad50388a47e15 Mon Sep 17 00:00:00 2001 From: wwylele Date: Fri, 18 Aug 2017 16:35:11 +0300 Subject: SwRasterizer/Lighting: implement LUT input CP --- src/video_core/swrasterizer/lighting.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'src/video_core') diff --git a/src/video_core/swrasterizer/lighting.cpp b/src/video_core/swrasterizer/lighting.cpp index 4f16bac07..b38964530 100644 --- a/src/video_core/swrasterizer/lighting.cpp +++ b/src/video_core/swrasterizer/lighting.cpp @@ -52,6 +52,7 @@ std::tuple, Math::Vec4> ComputeFragmentsColors( // Use the normalized the quaternion when performing the rotation auto normal = Math::QuaternionRotate(normquat, surface_normal); + auto tangent = Math::QuaternionRotate(normquat, surface_tangent); Math::Vec4 diffuse_sum = {0.0f, 0.0f, 0.0f, 1.0f}; Math::Vec4 specular_sum = {0.0f, 0.0f, 0.0f, 1.0f}; @@ -120,6 +121,16 @@ std::tuple, Math::Vec4> ComputeFragmentsColors( result = Math::Dot(light_vector, spot_dir.Cast() / 2047.0f); break; } + case LightingRegs::LightingLutInput::CP: + if (lighting.config0.config == LightingRegs::LightingConfig::Config7) { + const Math::Vec3 norm_half_vector = half_vector.Normalized(); + const Math::Vec3 half_vector_proj = + norm_half_vector - normal * Math::Dot(normal, norm_half_vector); + result = Math::Dot(half_vector_proj, tangent); + } else { + result = 0.0f; + } + break; default: LOG_CRITICAL(HW_GPU, "Unknown lighting LUT input %u\n", static_cast(input)); UNIMPLEMENTED(); -- cgit v1.2.3 From 17c6104d2afda7bf354c454f87561a3dbdf524e3 Mon Sep 17 00:00:00 2001 From: wwylele Date: Mon, 21 Aug 2017 12:03:38 +0300 Subject: gl_rasterizer/lighting: more accurate CP formula --- src/video_core/renderer_opengl/gl_shader_gen.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index ae67aab05..d85f281e5 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -594,8 +594,8 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { // Note: even if the normal vector is modified by normal map, which is not the // normal of the tangent plane anymore, the half angle vector is still projected // using the modified normal vector. - std::string half_angle_proj = "normalize(half_vector) - normal / dot(normal, " - "normal) * dot(normal, normalize(half_vector))"; + std::string half_angle_proj = + "normalize(half_vector) - normal * dot(normal, normalize(half_vector))"; // Note: the half angle vector projection is confirmed not normalized before the dot // product. The result is in fact not cos(phi) as the name suggested. index = "dot(" + half_angle_proj + ", tangent)"; -- cgit v1.2.3 From ea51a3af261254e5455f63a0ef41e55ef1dfc471 Mon Sep 17 00:00:00 2001 From: wwylele Date: Tue, 22 Aug 2017 09:49:26 +0300 Subject: SwRasterizer: implement custom clip plane --- src/video_core/regs_rasterizer.h | 14 ++++++++++++-- src/video_core/swrasterizer/clipper.cpp | 15 +++++++++++++-- 2 files changed, 25 insertions(+), 4 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/regs_rasterizer.h b/src/video_core/regs_rasterizer.h index 2874fd127..4fef00d76 100644 --- a/src/video_core/regs_rasterizer.h +++ b/src/video_core/regs_rasterizer.h @@ -5,10 +5,10 @@ #pragma once #include - #include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" +#include "video_core/pica_types.h" namespace Pica { @@ -31,7 +31,17 @@ struct RasterizerRegs { BitField<0, 24, u32> viewport_size_y; - INSERT_PADDING_WORDS(0x9); + INSERT_PADDING_WORDS(0x3); + + BitField<0, 1, u32> clip_enable; + BitField<0, 24, u32> clip_coef[4]; // float24 + + Math::Vec4 GetClipCoef() const { + return {float24::FromRaw(clip_coef[0]), float24::FromRaw(clip_coef[1]), + float24::FromRaw(clip_coef[2]), float24::FromRaw(clip_coef[3])}; + } + + INSERT_PADDING_WORDS(0x1); BitField<0, 24, u32> viewport_depth_range; // float24 BitField<0, 24, u32> viewport_depth_near_plane; // float24 diff --git a/src/video_core/swrasterizer/clipper.cpp b/src/video_core/swrasterizer/clipper.cpp index cdbc71502..cc76ba555 100644 --- a/src/video_core/swrasterizer/clipper.cpp +++ b/src/video_core/swrasterizer/clipper.cpp @@ -127,8 +127,7 @@ void ProcessTriangle(const OutputVertex& v0, const OutputVertex& v1, const Outpu // Simple implementation of the Sutherland-Hodgman clipping algorithm. // TODO: Make this less inefficient (currently lots of useless buffering overhead happens here) - for (auto edge : clipping_edges) { - + auto Clip = [&](const ClippingEdge& edge) { std::swap(input_list, output_list); output_list->clear(); @@ -147,12 +146,24 @@ void ProcessTriangle(const OutputVertex& v0, const OutputVertex& v1, const Outpu } reference_vertex = &vertex; } + }; + + for (auto edge : clipping_edges) { + Clip(edge); // Need to have at least a full triangle to continue... if (output_list->size() < 3) return; } + if (g_state.regs.rasterizer.clip_enable) { + ClippingEdge custom_edge{-g_state.regs.rasterizer.GetClipCoef()}; + Clip(custom_edge); + + if (output_list->size() < 3) + return; + } + InitScreenCoordinates((*output_list)[0]); InitScreenCoordinates((*output_list)[1]); -- cgit v1.2.3 From addbcd5784c8195f49cecc20834537c80d1c8c72 Mon Sep 17 00:00:00 2001 From: wwylele Date: Tue, 22 Aug 2017 09:49:53 +0300 Subject: gl_rasterizer: implement custom clip plane --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 28 +++++++++ src/video_core/renderer_opengl/gl_rasterizer.h | 9 ++- src/video_core/renderer_opengl/gl_shader_gen.cpp | 80 ++++++++++++++---------- 3 files changed, 83 insertions(+), 34 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index aa95ef21d..7b0cd1b66 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -169,6 +169,8 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) { glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA32F, proctex_diff_lut_buffer.handle); // Sync fixed function OpenGL state + SyncClipEnabled(); + SyncClipCoef(); SyncCullMode(); SyncBlendEnabled(); SyncBlendFuncs(); @@ -401,6 +403,18 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { SyncCullMode(); break; + // Clipping plane + case PICA_REG_INDEX(rasterizer.clip_enable): + SyncClipEnabled(); + break; + + case PICA_REG_INDEX_WORKAROUND(rasterizer.clip_coef[0], 0x48): + case PICA_REG_INDEX_WORKAROUND(rasterizer.clip_coef[1], 0x49): + case PICA_REG_INDEX_WORKAROUND(rasterizer.clip_coef[2], 0x4a): + case PICA_REG_INDEX_WORKAROUND(rasterizer.clip_coef[3], 0x4b): + SyncClipCoef(); + break; + // Depth modifiers case PICA_REG_INDEX(rasterizer.viewport_depth_range): SyncDepthScale(); @@ -1280,6 +1294,20 @@ void RasterizerOpenGL::SetShader() { } } +void RasterizerOpenGL::SyncClipEnabled() { + state.clip_distance[1] = Pica::g_state.regs.rasterizer.clip_enable != 0; +} + +void RasterizerOpenGL::SyncClipCoef() { + const auto raw_clip_coef = Pica::g_state.regs.rasterizer.GetClipCoef(); + const GLvec4 new_clip_coef = {raw_clip_coef.x.ToFloat32(), raw_clip_coef.y.ToFloat32(), + raw_clip_coef.z.ToFloat32(), raw_clip_coef.w.ToFloat32()}; + if (new_clip_coef != uniform_block_data.data.clip_coef) { + uniform_block_data.data.clip_coef = new_clip_coef; + uniform_block_data.dirty = true; + } +} + void RasterizerOpenGL::SyncCullMode() { const auto& regs = Pica::g_state.regs; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 78e218efe..46c62961c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -151,14 +151,21 @@ private: LightSrc light_src[8]; alignas(16) GLvec4 const_color[6]; // A vec4 color for each of the six tev stages alignas(16) GLvec4 tev_combiner_buffer_color; + alignas(16) GLvec4 clip_coef; }; static_assert( - sizeof(UniformData) == 0x460, + sizeof(UniformData) == 0x470, "The size of the UniformData structure has changed, update the structure in the shader"); static_assert(sizeof(UniformData) < 16384, "UniformData structure must be less than 16kb as per the OpenGL spec"); + /// Syncs the clip enabled status to match the PICA register + void SyncClipEnabled(); + + /// Syncs the clip coefficients to match the PICA register + void SyncClipCoef(); + /// Sets the OpenGL shader in accordance with the current PICA register state void SetShader(); diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 015e69da9..aa60b2e7f 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -24,6 +24,42 @@ using TevStageConfig = TexturingRegs::TevStageConfig; namespace GLShader { +static const std::string UniformBlockDef = R"( +#define NUM_TEV_STAGES 6 +#define NUM_LIGHTS 8 + +struct LightSrc { + vec3 specular_0; + vec3 specular_1; + vec3 diffuse; + vec3 ambient; + vec3 position; + vec3 spot_direction; + float dist_atten_bias; + float dist_atten_scale; +}; + +layout (std140) uniform shader_data { + vec2 framebuffer_scale; + int alphatest_ref; + float depth_scale; + float depth_offset; + int scissor_x1; + int scissor_y1; + int scissor_x2; + int scissor_y2; + vec3 fog_color; + vec2 proctex_noise_f; + vec2 proctex_noise_a; + vec2 proctex_noise_p; + vec3 lighting_global_ambient; + LightSrc light_src[NUM_LIGHTS]; + vec4 const_color[NUM_TEV_STAGES]; + vec4 tev_combiner_buffer_color; + vec4 clip_coef; +}; +)"; + PicaShaderConfig PicaShaderConfig::BuildFromRegs(const Pica::Regs& regs) { PicaShaderConfig res; @@ -1008,8 +1044,6 @@ std::string GenerateFragmentShader(const PicaShaderConfig& config) { std::string out = R"( #version 330 core -#define NUM_TEV_STAGES 6 -#define NUM_LIGHTS 8 in vec4 primary_color; in vec2 texcoord[3]; @@ -1021,36 +1055,6 @@ in vec4 gl_FragCoord; out vec4 color; -struct LightSrc { - vec3 specular_0; - vec3 specular_1; - vec3 diffuse; - vec3 ambient; - vec3 position; - vec3 spot_direction; - float dist_atten_bias; - float dist_atten_scale; -}; - -layout (std140) uniform shader_data { - vec2 framebuffer_scale; - int alphatest_ref; - float depth_scale; - float depth_offset; - int scissor_x1; - int scissor_y1; - int scissor_x2; - int scissor_y2; - vec3 fog_color; - vec2 proctex_noise_f; - vec2 proctex_noise_a; - vec2 proctex_noise_p; - vec3 lighting_global_ambient; - LightSrc light_src[NUM_LIGHTS]; - vec4 const_color[NUM_TEV_STAGES]; - vec4 tev_combiner_buffer_color; -}; - uniform sampler2D tex[3]; uniform samplerBuffer lighting_lut; uniform samplerBuffer fog_lut; @@ -1059,7 +1063,11 @@ uniform samplerBuffer proctex_color_map; uniform samplerBuffer proctex_alpha_map; uniform samplerBuffer proctex_lut; uniform samplerBuffer proctex_diff_lut; +)"; + + out += UniformBlockDef; + out += R"( // Rotate the vector v by the quaternion q vec3 quaternion_rotate(vec4 q, vec3 v) { return v + 2.0 * cross(q.xyz, cross(q.xyz, v) + q.w * v); @@ -1190,6 +1198,12 @@ out float texcoord0_w; out vec4 normquat; out vec3 view; +)"; + + out += UniformBlockDef; + + out += R"( + void main() { primary_color = vert_color; texcoord[0] = vert_texcoord0; @@ -1200,7 +1214,7 @@ void main() { view = vert_view; gl_Position = vert_position; gl_ClipDistance[0] = -vert_position.z; // fixed PICA clipping plane z <= 0 - // TODO (wwylele): calculate gl_ClipDistance[1] from user-defined clipping plane + gl_ClipDistance[1] = dot(clip_coef, vert_position); } )"; -- cgit v1.2.3 From 417cb45e3fc20a7529ce5d548ba0fbc36ea0a621 Mon Sep 17 00:00:00 2001 From: wwylele Date: Tue, 22 Aug 2017 09:47:15 +0300 Subject: SwRasterizer/Clipper: flip the sign convention to match PICA and OpenGL --- src/video_core/swrasterizer/clipper.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/swrasterizer/clipper.cpp b/src/video_core/swrasterizer/clipper.cpp index cc76ba555..a52129eb7 100644 --- a/src/video_core/swrasterizer/clipper.cpp +++ b/src/video_core/swrasterizer/clipper.cpp @@ -31,7 +31,7 @@ public: : coeffs(coeffs), bias(bias) {} bool IsInside(const Vertex& vertex) const { - return Math::Dot(vertex.pos + bias, coeffs) <= float24::FromFloat32(0); + return Math::Dot(vertex.pos + bias, coeffs) >= float24::FromFloat32(0); } bool IsOutSide(const Vertex& vertex) const { @@ -116,13 +116,13 @@ void ProcessTriangle(const OutputVertex& v0, const OutputVertex& v1, const Outpu static const float24 f0 = float24::FromFloat32(0.0); static const float24 f1 = float24::FromFloat32(1.0); static const std::array clipping_edges = {{ - {Math::MakeVec(f1, f0, f0, -f1)}, // x = +w - {Math::MakeVec(-f1, f0, f0, -f1)}, // x = -w - {Math::MakeVec(f0, f1, f0, -f1)}, // y = +w - {Math::MakeVec(f0, -f1, f0, -f1)}, // y = -w - {Math::MakeVec(f0, f0, f1, f0)}, // z = 0 - {Math::MakeVec(f0, f0, -f1, -f1)}, // z = -w - {Math::MakeVec(f0, f0, f0, -f1), Math::Vec4(f0, f0, f0, EPSILON)}, // w = EPSILON + {Math::MakeVec(-f1, f0, f0, f1)}, // x = +w + {Math::MakeVec(f1, f0, f0, f1)}, // x = -w + {Math::MakeVec(f0, -f1, f0, f1)}, // y = +w + {Math::MakeVec(f0, f1, f0, f1)}, // y = -w + {Math::MakeVec(f0, f0, -f1, f0)}, // z = 0 + {Math::MakeVec(f0, f0, f1, f1)}, // z = -w + {Math::MakeVec(f0, f0, f0, f1), Math::Vec4(f0, f0, f0, EPSILON)}, // w = EPSILON }}; // Simple implementation of the Sutherland-Hodgman clipping algorithm. @@ -157,7 +157,7 @@ void ProcessTriangle(const OutputVertex& v0, const OutputVertex& v1, const Outpu } if (g_state.regs.rasterizer.clip_enable) { - ClippingEdge custom_edge{-g_state.regs.rasterizer.GetClipCoef()}; + ClippingEdge custom_edge{g_state.regs.rasterizer.GetClipCoef()}; Clip(custom_edge); if (output_list->size() < 3) -- cgit v1.2.3 From da88f3b8f0f9f1162b7ad41f70e2126195eee999 Mon Sep 17 00:00:00 2001 From: Subv Date: Mon, 21 Aug 2017 12:18:52 -0500 Subject: Warnings: Fixed a few missing-return warnings in video_core. --- src/video_core/regs_framebuffer.h | 10 ++++------ src/video_core/swrasterizer/framebuffer.cpp | 2 ++ src/video_core/swrasterizer/texturing.cpp | 4 ++++ 3 files changed, 10 insertions(+), 6 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/regs_framebuffer.h b/src/video_core/regs_framebuffer.h index a50bd4111..7b565f911 100644 --- a/src/video_core/regs_framebuffer.h +++ b/src/video_core/regs_framebuffer.h @@ -256,10 +256,9 @@ struct FramebufferRegs { return 3; case DepthFormat::D24S8: return 4; - default: - LOG_CRITICAL(HW_GPU, "Unknown depth format %u", format); - UNIMPLEMENTED(); } + + ASSERT_MSG(false, "Unknown depth format %u", format); } // Returns the number of bits per depth component of the specified depth format @@ -270,10 +269,9 @@ struct FramebufferRegs { case DepthFormat::D24: case DepthFormat::D24S8: return 24; - default: - LOG_CRITICAL(HW_GPU, "Unknown depth format %u", format); - UNIMPLEMENTED(); } + + ASSERT_MSG(false, "Unknown depth format %u", format); } INSERT_PADDING_WORDS(0x20); diff --git a/src/video_core/swrasterizer/framebuffer.cpp b/src/video_core/swrasterizer/framebuffer.cpp index 7de3aac75..f34eab6cf 100644 --- a/src/video_core/swrasterizer/framebuffer.cpp +++ b/src/video_core/swrasterizer/framebuffer.cpp @@ -352,6 +352,8 @@ u8 LogicOp(u8 src, u8 dest, FramebufferRegs::LogicOp op) { case FramebufferRegs::LogicOp::OrInverted: return ~src | dest; } + + UNREACHABLE(); }; } // namespace Rasterizer diff --git a/src/video_core/swrasterizer/texturing.cpp b/src/video_core/swrasterizer/texturing.cpp index 4f02b93f2..79b1ce841 100644 --- a/src/video_core/swrasterizer/texturing.cpp +++ b/src/video_core/swrasterizer/texturing.cpp @@ -89,6 +89,8 @@ Math::Vec3 GetColorModifier(TevStageConfig::ColorModifier factor, case ColorModifier::OneMinusSourceBlue: return (Math::Vec3(255, 255, 255) - values.bbb()).Cast(); } + + UNREACHABLE(); }; u8 GetAlphaModifier(TevStageConfig::AlphaModifier factor, const Math::Vec4& values) { @@ -119,6 +121,8 @@ u8 GetAlphaModifier(TevStageConfig::AlphaModifier factor, const Math::Vec4& case AlphaModifier::OneMinusSourceBlue: return 255 - values.b(); } + + UNREACHABLE(); }; Math::Vec3 ColorCombine(TevStageConfig::Operation op, const Math::Vec3 input[3]) { -- cgit v1.2.3 From e2c41a589198ff3162da8047a4c33162b02b0f2b Mon Sep 17 00:00:00 2001 From: wwylele Date: Thu, 31 Aug 2017 12:24:00 +0300 Subject: video_core: report telemetry for gas mode --- src/video_core/renderer_opengl/gl_shader_gen.cpp | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 3f390491a..c8fc7a0ff 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -8,6 +8,7 @@ #include "common/assert.h" #include "common/bit_field.h" #include "common/logging/log.h" +#include "core/core.h" #include "video_core/regs_framebuffer.h" #include "video_core/regs_lighting.h" #include "video_core/regs_rasterizer.h" @@ -1155,6 +1156,11 @@ vec4 secondary_fragment_color = vec4(0.0); // Blend the fog out += "last_tex_env_out.rgb = mix(fog_color.rgb, last_tex_env_out.rgb, fog_factor);\n"; + } else if (state.fog_mode == TexturingRegs::FogMode::Gas) { + Core::Telemetry().AddField(Telemetry::FieldType::Session, "VideoCore_Pica_UseGasMode", + true); + LOG_CRITICAL(Render_OpenGL, "Unimplemented gas mode"); + UNIMPLEMENTED(); } out += "gl_FragDepth = depth;\n"; -- cgit v1.2.3 From 12fbc8c8dff3265b03cffdd5bb5e6dd6537cd824 Mon Sep 17 00:00:00 2001 From: wwylele Date: Sun, 27 Aug 2017 07:33:27 +0300 Subject: pica/lighting: only apply Fresnel factor for the last light --- src/video_core/renderer_opengl/gl_shader_gen.cpp | 9 +++++---- src/video_core/swrasterizer/lighting.cpp | 7 ++++--- 2 files changed, 9 insertions(+), 7 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 3f390491a..b5f359da6 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -750,7 +750,8 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { } // Fresnel - if (lighting.lut_fr.enable && + // Note: only the last entry in the light slots applies the Fresnel factor + if (light_index == lighting.src_num - 1 && lighting.lut_fr.enable && LightingRegs::IsLightingSamplerSupported(lighting.config, LightingRegs::LightingSampler::Fresnel)) { // Lookup fresnel LUT value @@ -759,17 +760,17 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { lighting.lut_fr.type, lighting.lut_fr.abs_input); value = "(" + std::to_string(lighting.lut_fr.scale) + " * " + value + ")"; - // Enabled for difffuse lighting alpha component + // Enabled for diffuse lighting alpha component if (lighting.fresnel_selector == LightingRegs::LightingFresnelSelector::PrimaryAlpha || lighting.fresnel_selector == LightingRegs::LightingFresnelSelector::Both) { - out += "diffuse_sum.a *= " + value + ";\n"; + out += "diffuse_sum.a = " + value + ";\n"; } // Enabled for the specular lighting alpha component if (lighting.fresnel_selector == LightingRegs::LightingFresnelSelector::SecondaryAlpha || lighting.fresnel_selector == LightingRegs::LightingFresnelSelector::Both) { - out += "specular_sum.a *= " + value + ";\n"; + out += "specular_sum.a = " + value + ";\n"; } } diff --git a/src/video_core/swrasterizer/lighting.cpp b/src/video_core/swrasterizer/lighting.cpp index b38964530..5fa748611 100644 --- a/src/video_core/swrasterizer/lighting.cpp +++ b/src/video_core/swrasterizer/lighting.cpp @@ -230,7 +230,8 @@ std::tuple, Math::Vec4> ComputeFragmentsColors( d1_lut_value * refl_value * light_config.specular_1.ToVec3f(); // Fresnel - if (lighting.config1.disable_lut_fr == 0 && + // Note: only the last entry in the light slots applies the Fresnel factor + if (light_index == lighting.max_light_index && lighting.config1.disable_lut_fr == 0 && LightingRegs::IsLightingSamplerSupported(lighting.config0.config, LightingRegs::LightingSampler::Fresnel)) { @@ -242,14 +243,14 @@ std::tuple, Math::Vec4> ComputeFragmentsColors( if (lighting.config0.fresnel_selector == LightingRegs::LightingFresnelSelector::PrimaryAlpha || lighting.config0.fresnel_selector == LightingRegs::LightingFresnelSelector::Both) { - diffuse_sum.a() *= lut_value; + diffuse_sum.a() = lut_value; } // Enabled for the specular lighting alpha component if (lighting.config0.fresnel_selector == LightingRegs::LightingFresnelSelector::SecondaryAlpha || lighting.config0.fresnel_selector == LightingRegs::LightingFresnelSelector::Both) { - specular_sum.a() *= lut_value; + specular_sum.a() = lut_value; } } -- cgit v1.2.3 From ad0b57f4071fb7ec9da764b3905e0bb5e4c5eef2 Mon Sep 17 00:00:00 2001 From: James Rowe Date: Thu, 7 Sep 2017 22:05:42 -0600 Subject: GPU: Add draw for immediate and batch modes PR #1461 introduced a regression where some games would change configuration even while in the poorly named "drawing" mode, which broke the heuristic citra was using to determine when to draw the batch. This change adds back in a draw call for batching, and also adds in a draw call in immediate mode each time it adds a triangle. --- src/video_core/command_processor.cpp | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index fb65a3a0a..fff159058 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -243,6 +243,15 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { ASSERT(!g_state.geometry_pipeline.NeedIndexInput()); g_state.geometry_pipeline.Setup(shader_engine); g_state.geometry_pipeline.SubmitVertex(output); + + // TODO: If drawing after every immediate mode triangle kills performance, + // change it to flush triangles whenever a draing config register changes + // See: https://github.com/citra-emu/citra/pull/2866#issuecomment-327011550 + VideoCore::g_renderer->Rasterizer()->DrawTriangles(); + if (g_debug_context) { + g_debug_context->OnEvent(DebugContext::Event::FinishedPrimitiveBatch, + nullptr); + } } } } @@ -398,6 +407,12 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { range.second, range.first); } + MICROPROFILE_SCOPE(GPU_Drawing); + VideoCore::g_renderer->Rasterizer()->DrawTriangles(); + if (g_debug_context) { + g_debug_context->OnEvent(DebugContext::Event::FinishedPrimitiveBatch, nullptr); + } + break; } @@ -632,6 +647,6 @@ void ProcessCommandList(const u32* list, u32 size) { } } -} // namespace +} // namespace CommandProcessor -} // namespace +} // namespace Pica -- cgit v1.2.3 From 6a110ac5f55502aa1330cc4dd09d11a4eb502e1b Mon Sep 17 00:00:00 2001 From: Huw Pascoe Date: Sat, 16 Sep 2017 04:30:35 +0100 Subject: Fixed framebuffer warning --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 25 +++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 7b0cd1b66..7e09e4712 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -237,13 +237,24 @@ void RasterizerOpenGL::DrawTriangles() { glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, color_surface != nullptr ? color_surface->texture.handle : 0, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, - depth_surface != nullptr ? depth_surface->texture.handle : 0, 0); - bool has_stencil = - regs.framebuffer.framebuffer.depth_format == Pica::FramebufferRegs::DepthFormat::D24S8; - glFramebufferTexture2D( - GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - (has_stencil && depth_surface != nullptr) ? depth_surface->texture.handle : 0, 0); + if (depth_surface != nullptr) { + if (regs.framebuffer.framebuffer.depth_format == + Pica::FramebufferRegs::DepthFormat::D24S8) { + // attach both depth and stencil + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + depth_surface->texture.handle, 0); + } else { + // attach depth + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, + depth_surface->texture.handle, 0); + // clear stencil attachment + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); + } + } else { + // clear both depth and stencil attachment + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, + 0); + } // Sync the viewport // These registers hold half-width and half-height, so must be multiplied by 2 -- cgit v1.2.3 From a234e4c2009b08039d0698cbbcc8595a1f04a615 Mon Sep 17 00:00:00 2001 From: Huw Pascoe Date: Sun, 17 Sep 2017 15:42:45 +0100 Subject: Improved performance of FromAttributeBuffer Ternary operator is optimized by the compiler whereas std::min() is meant to return a value. I've noticed a 5%-10% emulation speed increase. --- src/video_core/shader/shader.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index e9063e616..2857d2829 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp @@ -52,7 +52,8 @@ OutputVertex OutputVertex::FromAttributeBuffer(const RasterizerRegs& regs, // The hardware takes the absolute and saturates vertex colors like this, *before* doing // interpolation for (unsigned i = 0; i < 4; ++i) { - ret.color[i] = float24::FromFloat32(std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f)); + float c = std::fabs(ret.color[i].ToFloat32()); + ret.color[i] = float24::FromFloat32(c < 1.0f ? c : 1.0f); } LOG_TRACE(HW_GPU, "Output vertex: pos(%.2f, %.2f, %.2f, %.2f), quat(%.2f, %.2f, %.2f, %.2f), " -- cgit v1.2.3 From 19d41dcc6e6892125f1123b34db3dc284f04b744 Mon Sep 17 00:00:00 2001 From: James Rowe Date: Sat, 23 Sep 2017 09:28:20 -0600 Subject: Remove pipeline.gpu_mode and fix minor issues --- src/video_core/command_processor.cpp | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index fff159058..3ab4af374 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -245,7 +245,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { g_state.geometry_pipeline.SubmitVertex(output); // TODO: If drawing after every immediate mode triangle kills performance, - // change it to flush triangles whenever a draing config register changes + // change it to flush triangles whenever a drawing config register changes // See: https://github.com/citra-emu/citra/pull/2866#issuecomment-327011550 VideoCore::g_renderer->Rasterizer()->DrawTriangles(); if (g_debug_context) { @@ -259,16 +259,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { } case PICA_REG_INDEX(pipeline.gpu_mode): - if (regs.pipeline.gpu_mode == PipelineRegs::GPUMode::Configuring) { - MICROPROFILE_SCOPE(GPU_Drawing); - - // Draw immediate mode triangles when GPU Mode is set to GPUMode::Configuring - VideoCore::g_renderer->Rasterizer()->DrawTriangles(); - - if (g_debug_context) { - g_debug_context->OnEvent(DebugContext::Event::FinishedPrimitiveBatch, nullptr); - } - } + // This register likely just enables vertex processing and doesn't need any special handling break; case PICA_REG_INDEX_WORKAROUND(pipeline.command_buffer.trigger[0], 0x23c): @@ -407,7 +398,6 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { range.second, range.first); } - MICROPROFILE_SCOPE(GPU_Drawing); VideoCore::g_renderer->Rasterizer()->DrawTriangles(); if (g_debug_context) { g_debug_context->OnEvent(DebugContext::Event::FinishedPrimitiveBatch, nullptr); -- cgit v1.2.3 From 876aa82c29d2e17f8b5a4f74155971cba78c00b6 Mon Sep 17 00:00:00 2001 From: Huw Pascoe Date: Sun, 24 Sep 2017 22:24:45 +0100 Subject: Optimized Morton --- src/video_core/utils.h | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/utils.h b/src/video_core/utils.h index 7ce83a055..d8567f314 100644 --- a/src/video_core/utils.h +++ b/src/video_core/utils.h @@ -8,17 +8,11 @@ namespace VideoCore { -/** - * Interleave the lower 3 bits of each coordinate to get the intra-block offsets, which are - * arranged in a Z-order curve. More details on the bit manipulation at: - * https://fgiesen.wordpress.com/2009/12/13/decoding-morton-codes/ - */ +// 8x8 Z-Order coordinate from 2D coordinates static inline u32 MortonInterleave(u32 x, u32 y) { - u32 i = (x & 7) | ((y & 7) << 8); // ---- -210 - i = (i ^ (i << 2)) & 0x1313; // ---2 --10 - i = (i ^ (i << 1)) & 0x1515; // ---2 -1-0 - i = (i | (i >> 7)) & 0x3F; - return i; + static const u32 xlut[] = {0x00, 0x01, 0x04, 0x05, 0x10, 0x11, 0x14, 0x15}; + static const u32 ylut[] = {0x00, 0x02, 0x08, 0x0a, 0x20, 0x22, 0x28, 0x2a}; + return xlut[x % 8] + ylut[y % 8]; } /** -- cgit v1.2.3 From 903906da3b9b274836510adcabf8adf8e2c15954 Mon Sep 17 00:00:00 2001 From: Huw Pascoe Date: Fri, 22 Sep 2017 15:37:42 +0100 Subject: Optimized Float multiplication Before: ucomiss xmm1, xmm1 jp .L9 pxor xmm2, xmm2 mov edx, 1 ucomiss xmm0, xmm2 setp al cmovne eax, edx test al, al jne .L9 .L3: movaps xmm0, xmm2 ret .L9: ucomiss xmm0, xmm0 jp .L10 pxor xmm2, xmm2 mov edx, 1 ucomiss xmm1, xmm2 setp al cmovne eax, edx test al, al je .L3 After: movaps xmm2, xmm1 mulss xmm2, xmm0 ucomiss xmm2, xmm2 jnp .L3 ucomiss xmm1, xmm0 jnp .L11 .L3: movaps xmm0, xmm2 ret .L11: pxor xmm2, xmm2 jmp .L3 --- src/video_core/pica_types.h | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/pica_types.h b/src/video_core/pica_types.h index 5d7e10066..2eafa7e9e 100644 --- a/src/video_core/pica_types.h +++ b/src/video_core/pica_types.h @@ -58,11 +58,12 @@ public: } Float operator*(const Float& flt) const { - if ((this->value == 0.f && !std::isnan(flt.value)) || - (flt.value == 0.f && !std::isnan(this->value))) - // PICA gives 0 instead of NaN when multiplying by inf - return Zero(); - return Float::FromFloat32(ToFloat32() * flt.ToFloat32()); + float result = value * flt.ToFloat32(); + // PICA gives 0 instead of NaN when multiplying by inf + if (!std::isnan(value) && !std::isnan(flt.ToFloat32())) + if (std::isnan(result)) + result = 0.f; + return Float::FromFloat32(result); } Float operator/(const Float& flt) const { @@ -78,12 +79,7 @@ public: } Float& operator*=(const Float& flt) { - if ((this->value == 0.f && !std::isnan(flt.value)) || - (flt.value == 0.f && !std::isnan(this->value))) - // PICA gives 0 instead of NaN when multiplying by inf - *this = Zero(); - else - value *= flt.ToFloat32(); + value = operator*(flt).value; return *this; } -- cgit v1.2.3 From a321bce37834c1f3034bd87df14fc71c13e6b84a Mon Sep 17 00:00:00 2001 From: Subv Date: Tue, 29 Aug 2017 12:59:54 -0500 Subject: Disable unary operator- on Math::Vec2/Vec3/Vec4 for unsigned types. It is unlikely we will ever use this without first doing a Cast to a signed type. Fixes 9 "unary minus operator applied to unsigned type, result still unsigned" warnings on MSVC2017.3 --- src/video_core/swrasterizer/clipper.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/swrasterizer/clipper.cpp b/src/video_core/swrasterizer/clipper.cpp index a52129eb7..c1ed48398 100644 --- a/src/video_core/swrasterizer/clipper.cpp +++ b/src/video_core/swrasterizer/clipper.cpp @@ -98,7 +98,7 @@ void ProcessTriangle(const OutputVertex& v0, const OutputVertex& v1, const Outpu auto FlipQuaternionIfOpposite = [](auto& a, const auto& b) { if (Math::Dot(a, b) < float24::Zero()) - a = -a; + a = a * float24::FromFloat32(-1.0f); }; // Flip the quaternions if they are opposite to prevent interpolating them over the wrong -- cgit v1.2.3 From a13ab958cbba75bc9abd1ca50f3030a10a75784e Mon Sep 17 00:00:00 2001 From: Huw Pascoe Date: Wed, 27 Sep 2017 00:26:09 +0100 Subject: Fixed type conversion ambiguity --- src/video_core/geometry_pipeline.cpp | 2 +- src/video_core/renderer_opengl/gl_state.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/geometry_pipeline.cpp b/src/video_core/geometry_pipeline.cpp index b146e2ecb..98ff2ccd3 100644 --- a/src/video_core/geometry_pipeline.cpp +++ b/src/video_core/geometry_pipeline.cpp @@ -105,7 +105,7 @@ public: DEBUG_ASSERT(need_index); // The number of vertex input is put to the uniform register - float24 vertex_num = float24::FromFloat32(val); + float24 vertex_num = float24::FromFloat32(static_cast(val)); setup.uniforms.f[0] = Math::MakeVec(vertex_num, vertex_num, vertex_num, vertex_num); // The second uniform register and so on are used for receiving input vertices diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 06a905766..5770ae08f 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -267,9 +267,9 @@ void OpenGLState::Apply() const { for (size_t i = 0; i < clip_distance.size(); ++i) { if (clip_distance[i] != cur_state.clip_distance[i]) { if (clip_distance[i]) { - glEnable(GL_CLIP_DISTANCE0 + i); + glEnable(GL_CLIP_DISTANCE0 + static_cast(i)); } else { - glDisable(GL_CLIP_DISTANCE0 + i); + glDisable(GL_CLIP_DISTANCE0 + static_cast(i)); } } } -- cgit v1.2.3 From b3b34a1e76664c412fd7b37b3529cadd3983acfb Mon Sep 17 00:00:00 2001 From: Huw Pascoe Date: Tue, 3 Oct 2017 12:21:37 +0100 Subject: Extracted the attribute setup and draw commands into their own functions --- src/video_core/command_processor.cpp | 439 ++++++++++++++++++----------------- 1 file changed, 222 insertions(+), 217 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 3ab4af374..caf9f7a06 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -119,6 +119,224 @@ static void WriteUniformFloatReg(ShaderRegs& config, Shader::ShaderSetup& setup, } } +static void LoadDefaultVertexAttributes(u32 register_value) { + auto& regs = g_state.regs; + + // TODO: Does actual hardware indeed keep an intermediate buffer or does + // it directly write the values? + default_attr_write_buffer[default_attr_counter++] = register_value; + + // Default attributes are written in a packed format such that four float24 values are encoded + // in three 32-bit numbers. + // We write to internal memory once a full such vector is written. + if (default_attr_counter >= 3) { + default_attr_counter = 0; + + auto& setup = regs.pipeline.vs_default_attributes_setup; + + if (setup.index >= 16) { + LOG_ERROR(HW_GPU, "Invalid VS default attribute index %d", (int)setup.index); + return; + } + + Math::Vec4 attribute; + + // NOTE: The destination component order indeed is "backwards" + attribute.w = float24::FromRaw(default_attr_write_buffer[0] >> 8); + attribute.z = float24::FromRaw(((default_attr_write_buffer[0] & 0xFF) << 16) | + ((default_attr_write_buffer[1] >> 16) & 0xFFFF)); + attribute.y = float24::FromRaw(((default_attr_write_buffer[1] & 0xFFFF) << 8) | + ((default_attr_write_buffer[2] >> 24) & 0xFF)); + attribute.x = float24::FromRaw(default_attr_write_buffer[2] & 0xFFFFFF); + + LOG_TRACE(HW_GPU, "Set default VS attribute %x to (%f %f %f %f)", (int)setup.index, + attribute.x.ToFloat32(), attribute.y.ToFloat32(), attribute.z.ToFloat32(), + attribute.w.ToFloat32()); + + // TODO: Verify that this actually modifies the register! + if (setup.index < 15) { + g_state.input_default_attributes.attr[setup.index] = attribute; + setup.index++; + } else { + // Put each attribute into an immediate input buffer. When all specified immediate + // attributes are present, the Vertex Shader is invoked and everything is sent to + // the primitive assembler. + + auto& immediate_input = g_state.immediate.input_vertex; + auto& immediate_attribute_id = g_state.immediate.current_attribute; + + immediate_input.attr[immediate_attribute_id] = attribute; + + if (immediate_attribute_id < regs.pipeline.max_input_attrib_index) { + immediate_attribute_id += 1; + } else { + MICROPROFILE_SCOPE(GPU_Drawing); + immediate_attribute_id = 0; + + auto* shader_engine = Shader::GetEngine(); + shader_engine->SetupBatch(g_state.vs, regs.vs.main_offset); + + // Send to vertex shader + if (g_debug_context) + g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, + static_cast(&immediate_input)); + Shader::UnitState shader_unit; + Shader::AttributeBuffer output{}; + + shader_unit.LoadInput(regs.vs, immediate_input); + shader_engine->Run(g_state.vs, shader_unit); + shader_unit.WriteOutput(regs.vs, output); + + // Send to geometry pipeline + if (g_state.immediate.reset_geometry_pipeline) { + g_state.geometry_pipeline.Reconfigure(); + g_state.immediate.reset_geometry_pipeline = false; + } + ASSERT(!g_state.geometry_pipeline.NeedIndexInput()); + g_state.geometry_pipeline.Setup(shader_engine); + g_state.geometry_pipeline.SubmitVertex(output); + + // TODO: If drawing after every immediate mode triangle kills performance, + // change it to flush triangles whenever a drawing config register changes + // See: https://github.com/citra-emu/citra/pull/2866#issuecomment-327011550 + VideoCore::g_renderer->Rasterizer()->DrawTriangles(); + if (g_debug_context) { + g_debug_context->OnEvent(DebugContext::Event::FinishedPrimitiveBatch, nullptr); + } + } + } + } +} + +static void Draw(u32 command_id) { + MICROPROFILE_SCOPE(GPU_Drawing); + auto& regs = g_state.regs; + +#if PICA_LOG_TEV + DebugUtils::DumpTevStageConfig(regs.GetTevStages()); +#endif + if (g_debug_context) + g_debug_context->OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr); + + // Processes information about internal vertex attributes to figure out how a vertex is + // loaded. + // Later, these can be compiled and cached. + const u32 base_address = regs.pipeline.vertex_attributes.GetPhysicalBaseAddress(); + VertexLoader loader(regs.pipeline); + + // Load vertices + bool is_indexed = (command_id == PICA_REG_INDEX(pipeline.trigger_draw_indexed)); + + const auto& index_info = regs.pipeline.index_array; + const u8* index_address_8 = Memory::GetPhysicalPointer(base_address + index_info.offset); + const u16* index_address_16 = reinterpret_cast(index_address_8); + bool index_u16 = index_info.format != 0; + + PrimitiveAssembler& primitive_assembler = g_state.primitive_assembler; + + if (g_debug_context && g_debug_context->recorder) { + for (int i = 0; i < 3; ++i) { + const auto texture = regs.texturing.GetTextures()[i]; + if (!texture.enabled) + continue; + + u8* texture_data = Memory::GetPhysicalPointer(texture.config.GetPhysicalAddress()); + g_debug_context->recorder->MemoryAccessed( + texture_data, Pica::TexturingRegs::NibblesPerPixel(texture.format) * + texture.config.width / 2 * texture.config.height, + texture.config.GetPhysicalAddress()); + } + } + + DebugUtils::MemoryAccessTracker memory_accesses; + + // Simple circular-replacement vertex cache + // The size has been tuned for optimal balance between hit-rate and the cost of lookup + const size_t VERTEX_CACHE_SIZE = 32; + std::array vertex_cache_ids; + std::array vertex_cache; + Shader::AttributeBuffer vs_output; + + unsigned int vertex_cache_pos = 0; + vertex_cache_ids.fill(-1); + + auto* shader_engine = Shader::GetEngine(); + Shader::UnitState shader_unit; + + shader_engine->SetupBatch(g_state.vs, regs.vs.main_offset); + + g_state.geometry_pipeline.Reconfigure(); + g_state.geometry_pipeline.Setup(shader_engine); + if (g_state.geometry_pipeline.NeedIndexInput()) + ASSERT(is_indexed); + + for (unsigned int index = 0; index < regs.pipeline.num_vertices; ++index) { + // Indexed rendering doesn't use the start offset + unsigned int vertex = is_indexed + ? (index_u16 ? index_address_16[index] : index_address_8[index]) + : (index + regs.pipeline.vertex_offset); + + // -1 is a common special value used for primitive restart. Since it's unknown if + // the PICA supports it, and it would mess up the caching, guard against it here. + ASSERT(vertex != -1); + + bool vertex_cache_hit = false; + + if (is_indexed) { + if (g_state.geometry_pipeline.NeedIndexInput()) { + g_state.geometry_pipeline.SubmitIndex(vertex); + continue; + } + + if (g_debug_context && Pica::g_debug_context->recorder) { + int size = index_u16 ? 2 : 1; + memory_accesses.AddAccess(base_address + index_info.offset + size * index, size); + } + + for (unsigned int i = 0; i < VERTEX_CACHE_SIZE; ++i) { + if (vertex == vertex_cache_ids[i]) { + vs_output = vertex_cache[i]; + vertex_cache_hit = true; + break; + } + } + } + + if (!vertex_cache_hit) { + // Initialize data for the current vertex + Shader::AttributeBuffer input; + loader.LoadVertex(base_address, index, vertex, input, memory_accesses); + + // Send to vertex shader + if (g_debug_context) + g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, + (void*)&input); + shader_unit.LoadInput(regs.vs, input); + shader_engine->Run(g_state.vs, shader_unit); + shader_unit.WriteOutput(regs.vs, vs_output); + + if (is_indexed) { + vertex_cache[vertex_cache_pos] = vs_output; + vertex_cache_ids[vertex_cache_pos] = vertex; + vertex_cache_pos = (vertex_cache_pos + 1) % VERTEX_CACHE_SIZE; + } + } + + // Send to geometry pipeline + g_state.geometry_pipeline.SubmitVertex(vs_output); + } + + for (auto& range : memory_accesses.ranges) { + g_debug_context->recorder->MemoryAccessed(Memory::GetPhysicalPointer(range.first), + range.second, range.first); + } + + VideoCore::g_renderer->Rasterizer()->DrawTriangles(); + if (g_debug_context) { + g_debug_context->OnEvent(DebugContext::Event::FinishedPrimitiveBatch, nullptr); + } +} + static void WritePicaReg(u32 id, u32 value, u32 mask) { auto& regs = g_state.regs; @@ -168,95 +386,9 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { // Load default vertex input attributes case PICA_REG_INDEX_WORKAROUND(pipeline.vs_default_attributes_setup.set_value[0], 0x233): case PICA_REG_INDEX_WORKAROUND(pipeline.vs_default_attributes_setup.set_value[1], 0x234): - case PICA_REG_INDEX_WORKAROUND(pipeline.vs_default_attributes_setup.set_value[2], 0x235): { - // TODO: Does actual hardware indeed keep an intermediate buffer or does - // it directly write the values? - default_attr_write_buffer[default_attr_counter++] = value; - - // Default attributes are written in a packed format such that four float24 values are - // encoded in - // three 32-bit numbers. We write to internal memory once a full such vector is - // written. - if (default_attr_counter >= 3) { - default_attr_counter = 0; - - auto& setup = regs.pipeline.vs_default_attributes_setup; - - if (setup.index >= 16) { - LOG_ERROR(HW_GPU, "Invalid VS default attribute index %d", (int)setup.index); - break; - } - - Math::Vec4 attribute; - - // NOTE: The destination component order indeed is "backwards" - attribute.w = float24::FromRaw(default_attr_write_buffer[0] >> 8); - attribute.z = float24::FromRaw(((default_attr_write_buffer[0] & 0xFF) << 16) | - ((default_attr_write_buffer[1] >> 16) & 0xFFFF)); - attribute.y = float24::FromRaw(((default_attr_write_buffer[1] & 0xFFFF) << 8) | - ((default_attr_write_buffer[2] >> 24) & 0xFF)); - attribute.x = float24::FromRaw(default_attr_write_buffer[2] & 0xFFFFFF); - - LOG_TRACE(HW_GPU, "Set default VS attribute %x to (%f %f %f %f)", (int)setup.index, - attribute.x.ToFloat32(), attribute.y.ToFloat32(), attribute.z.ToFloat32(), - attribute.w.ToFloat32()); - - // TODO: Verify that this actually modifies the register! - if (setup.index < 15) { - g_state.input_default_attributes.attr[setup.index] = attribute; - setup.index++; - } else { - // Put each attribute into an immediate input buffer. When all specified immediate - // attributes are present, the Vertex Shader is invoked and everything is sent to - // the primitive assembler. - - auto& immediate_input = g_state.immediate.input_vertex; - auto& immediate_attribute_id = g_state.immediate.current_attribute; - - immediate_input.attr[immediate_attribute_id] = attribute; - - if (immediate_attribute_id < regs.pipeline.max_input_attrib_index) { - immediate_attribute_id += 1; - } else { - MICROPROFILE_SCOPE(GPU_Drawing); - immediate_attribute_id = 0; - - auto* shader_engine = Shader::GetEngine(); - shader_engine->SetupBatch(g_state.vs, regs.vs.main_offset); - - // Send to vertex shader - if (g_debug_context) - g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, - static_cast(&immediate_input)); - Shader::UnitState shader_unit; - Shader::AttributeBuffer output{}; - - shader_unit.LoadInput(regs.vs, immediate_input); - shader_engine->Run(g_state.vs, shader_unit); - shader_unit.WriteOutput(regs.vs, output); - - // Send to geometry pipeline - if (g_state.immediate.reset_geometry_pipeline) { - g_state.geometry_pipeline.Reconfigure(); - g_state.immediate.reset_geometry_pipeline = false; - } - ASSERT(!g_state.geometry_pipeline.NeedIndexInput()); - g_state.geometry_pipeline.Setup(shader_engine); - g_state.geometry_pipeline.SubmitVertex(output); - - // TODO: If drawing after every immediate mode triangle kills performance, - // change it to flush triangles whenever a drawing config register changes - // See: https://github.com/citra-emu/citra/pull/2866#issuecomment-327011550 - VideoCore::g_renderer->Rasterizer()->DrawTriangles(); - if (g_debug_context) { - g_debug_context->OnEvent(DebugContext::Event::FinishedPrimitiveBatch, - nullptr); - } - } - } - } + case PICA_REG_INDEX_WORKAROUND(pipeline.vs_default_attributes_setup.set_value[2], 0x235): + LoadDefaultVertexAttributes(value); break; - } case PICA_REG_INDEX(pipeline.gpu_mode): // This register likely just enables vertex processing and doesn't need any special handling @@ -275,136 +407,9 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { // It seems like these trigger vertex rendering case PICA_REG_INDEX(pipeline.trigger_draw): - case PICA_REG_INDEX(pipeline.trigger_draw_indexed): { - MICROPROFILE_SCOPE(GPU_Drawing); - -#if PICA_LOG_TEV - DebugUtils::DumpTevStageConfig(regs.GetTevStages()); -#endif - if (g_debug_context) - g_debug_context->OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr); - - // Processes information about internal vertex attributes to figure out how a vertex is - // loaded. - // Later, these can be compiled and cached. - const u32 base_address = regs.pipeline.vertex_attributes.GetPhysicalBaseAddress(); - VertexLoader loader(regs.pipeline); - - // Load vertices - bool is_indexed = (id == PICA_REG_INDEX(pipeline.trigger_draw_indexed)); - - const auto& index_info = regs.pipeline.index_array; - const u8* index_address_8 = Memory::GetPhysicalPointer(base_address + index_info.offset); - const u16* index_address_16 = reinterpret_cast(index_address_8); - bool index_u16 = index_info.format != 0; - - PrimitiveAssembler& primitive_assembler = g_state.primitive_assembler; - - if (g_debug_context && g_debug_context->recorder) { - for (int i = 0; i < 3; ++i) { - const auto texture = regs.texturing.GetTextures()[i]; - if (!texture.enabled) - continue; - - u8* texture_data = Memory::GetPhysicalPointer(texture.config.GetPhysicalAddress()); - g_debug_context->recorder->MemoryAccessed( - texture_data, Pica::TexturingRegs::NibblesPerPixel(texture.format) * - texture.config.width / 2 * texture.config.height, - texture.config.GetPhysicalAddress()); - } - } - - DebugUtils::MemoryAccessTracker memory_accesses; - - // Simple circular-replacement vertex cache - // The size has been tuned for optimal balance between hit-rate and the cost of lookup - const size_t VERTEX_CACHE_SIZE = 32; - std::array vertex_cache_ids; - std::array vertex_cache; - Shader::AttributeBuffer vs_output; - - unsigned int vertex_cache_pos = 0; - vertex_cache_ids.fill(-1); - - auto* shader_engine = Shader::GetEngine(); - Shader::UnitState shader_unit; - - shader_engine->SetupBatch(g_state.vs, regs.vs.main_offset); - - g_state.geometry_pipeline.Reconfigure(); - g_state.geometry_pipeline.Setup(shader_engine); - if (g_state.geometry_pipeline.NeedIndexInput()) - ASSERT(is_indexed); - - for (unsigned int index = 0; index < regs.pipeline.num_vertices; ++index) { - // Indexed rendering doesn't use the start offset - unsigned int vertex = - is_indexed ? (index_u16 ? index_address_16[index] : index_address_8[index]) - : (index + regs.pipeline.vertex_offset); - - // -1 is a common special value used for primitive restart. Since it's unknown if - // the PICA supports it, and it would mess up the caching, guard against it here. - ASSERT(vertex != -1); - - bool vertex_cache_hit = false; - - if (is_indexed) { - if (g_state.geometry_pipeline.NeedIndexInput()) { - g_state.geometry_pipeline.SubmitIndex(vertex); - continue; - } - - if (g_debug_context && Pica::g_debug_context->recorder) { - int size = index_u16 ? 2 : 1; - memory_accesses.AddAccess(base_address + index_info.offset + size * index, - size); - } - - for (unsigned int i = 0; i < VERTEX_CACHE_SIZE; ++i) { - if (vertex == vertex_cache_ids[i]) { - vs_output = vertex_cache[i]; - vertex_cache_hit = true; - break; - } - } - } - - if (!vertex_cache_hit) { - // Initialize data for the current vertex - Shader::AttributeBuffer input; - loader.LoadVertex(base_address, index, vertex, input, memory_accesses); - - // Send to vertex shader - if (g_debug_context) - g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, - (void*)&input); - shader_unit.LoadInput(regs.vs, input); - shader_engine->Run(g_state.vs, shader_unit); - shader_unit.WriteOutput(regs.vs, vs_output); - - if (is_indexed) { - vertex_cache[vertex_cache_pos] = vs_output; - vertex_cache_ids[vertex_cache_pos] = vertex; - vertex_cache_pos = (vertex_cache_pos + 1) % VERTEX_CACHE_SIZE; - } - } - - // Send to geometry pipeline - g_state.geometry_pipeline.SubmitVertex(vs_output); - } - - for (auto& range : memory_accesses.ranges) { - g_debug_context->recorder->MemoryAccessed(Memory::GetPhysicalPointer(range.first), - range.second, range.first); - } - - VideoCore::g_renderer->Rasterizer()->DrawTriangles(); - if (g_debug_context) { - g_debug_context->OnEvent(DebugContext::Event::FinishedPrimitiveBatch, nullptr); - } - + case PICA_REG_INDEX(pipeline.trigger_draw_indexed): + Draw(id); break; - } case PICA_REG_INDEX(gs.bool_uniforms): WriteUniformBoolReg(g_state.gs, g_state.regs.gs.bool_uniforms.Value()); -- cgit v1.2.3