diff options
Diffstat (limited to 'src/video_core')
-rw-r--r-- | src/video_core/CMakeLists.txt | 2 | ||||
-rw-r--r-- | src/video_core/command_processor.cpp | 62 | ||||
-rw-r--r-- | src/video_core/pica_state.h | 2 | ||||
-rw-r--r-- | src/video_core/regs_pipeline.h | 9 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 68 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 5 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 9 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_gen.cpp | 38 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_state.cpp | 63 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_state.h | 13 | ||||
-rw-r--r-- | src/video_core/shader/shader_interpreter.cpp | 2 | ||||
-rw-r--r-- | src/video_core/swrasterizer/clipper.cpp | 15 | ||||
-rw-r--r-- | src/video_core/swrasterizer/lighting.cpp | 278 | ||||
-rw-r--r-- | src/video_core/swrasterizer/lighting.h | 18 | ||||
-rw-r--r-- | src/video_core/swrasterizer/rasterizer.cpp | 29 | ||||
-rw-r--r-- | src/video_core/swrasterizer/rasterizer.h | 6 |
16 files changed, 499 insertions, 120 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 0961a3251..cffa4c952 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -15,6 +15,7 @@ set(SRCS shader/shader_interpreter.cpp swrasterizer/clipper.cpp swrasterizer/framebuffer.cpp + swrasterizer/lighting.cpp swrasterizer/proctex.cpp swrasterizer/rasterizer.cpp swrasterizer/swrasterizer.cpp @@ -55,6 +56,7 @@ set(HEADERS shader/shader_interpreter.h swrasterizer/clipper.h swrasterizer/framebuffer.h + swrasterizer/lighting.h swrasterizer/proctex.h swrasterizer/rasterizer.h swrasterizer/swrasterizer.h diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 4633a1df1..f98ca3302 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -119,27 +119,6 @@ static void WriteUniformFloatReg(ShaderRegs& config, Shader::ShaderSetup& setup, } } -static void WriteProgramCode(ShaderRegs& config, Shader::ShaderSetup& setup, - unsigned max_program_code_length, u32 value) { - if (config.program.offset >= max_program_code_length) { - LOG_ERROR(HW_GPU, "Invalid %s program offset %d", GetShaderSetupTypeName(setup), - (int)config.program.offset); - } else { - setup.program_code[config.program.offset] = value; - config.program.offset++; - } -} - -static void WriteSwizzlePatterns(ShaderRegs& config, Shader::ShaderSetup& setup, u32 value) { - if (config.swizzle_patterns.offset >= setup.swizzle_data.size()) { - LOG_ERROR(HW_GPU, "Invalid %s swizzle pattern offset %d", GetShaderSetupTypeName(setup), - (int)config.swizzle_patterns.offset); - } else { - setup.swizzle_data[config.swizzle_patterns.offset] = value; - config.swizzle_patterns.offset++; - } -} - static void WritePicaReg(u32 id, u32 value, u32 mask) { auto& regs = g_state.regs; @@ -458,7 +437,13 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { case PICA_REG_INDEX_WORKAROUND(gs.program.set_word[5], 0x2a1): case PICA_REG_INDEX_WORKAROUND(gs.program.set_word[6], 0x2a2): case PICA_REG_INDEX_WORKAROUND(gs.program.set_word[7], 0x2a3): { - WriteProgramCode(g_state.regs.gs, g_state.gs, 4096, value); + u32& offset = g_state.regs.gs.program.offset; + if (offset >= 4096) { + LOG_ERROR(HW_GPU, "Invalid GS program offset %u", offset); + } else { + g_state.gs.program_code[offset] = value; + offset++; + } break; } @@ -470,11 +455,18 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { case PICA_REG_INDEX_WORKAROUND(gs.swizzle_patterns.set_word[5], 0x2ab): case PICA_REG_INDEX_WORKAROUND(gs.swizzle_patterns.set_word[6], 0x2ac): case PICA_REG_INDEX_WORKAROUND(gs.swizzle_patterns.set_word[7], 0x2ad): { - WriteSwizzlePatterns(g_state.regs.gs, g_state.gs, value); + u32& offset = g_state.regs.gs.swizzle_patterns.offset; + if (offset >= g_state.gs.swizzle_data.size()) { + LOG_ERROR(HW_GPU, "Invalid GS swizzle pattern offset %u", offset); + } else { + g_state.gs.swizzle_data[offset] = value; + offset++; + } break; } case PICA_REG_INDEX(vs.bool_uniforms): + // TODO (wwylele): does regs.pipeline.gs_unit_exclusive_configuration affect this? WriteUniformBoolReg(g_state.vs, g_state.regs.vs.bool_uniforms.Value()); break; @@ -482,6 +474,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[1], 0x2b2): case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[2], 0x2b3): case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[3], 0x2b4): { + // TODO (wwylele): does regs.pipeline.gs_unit_exclusive_configuration affect this? unsigned index = (id - PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[0], 0x2b1)); auto values = regs.vs.int_uniforms[index]; WriteUniformIntReg(g_state.vs, index, @@ -497,6 +490,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[5], 0x2c6): case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[6], 0x2c7): case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[7], 0x2c8): { + // TODO (wwylele): does regs.pipeline.gs_unit_exclusive_configuration affect this? WriteUniformFloatReg(g_state.regs.vs, g_state.vs, vs_float_regs_counter, vs_uniform_write_buffer, value); break; @@ -510,7 +504,16 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[5], 0x2d1): case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[6], 0x2d2): case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[7], 0x2d3): { - WriteProgramCode(g_state.regs.vs, g_state.vs, 512, value); + u32& offset = g_state.regs.vs.program.offset; + if (offset >= 512) { + LOG_ERROR(HW_GPU, "Invalid VS program offset %u", offset); + } else { + g_state.vs.program_code[offset] = value; + if (!g_state.regs.pipeline.gs_unit_exclusive_configuration) { + g_state.gs.program_code[offset] = value; + } + offset++; + } break; } @@ -522,7 +525,16 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[5], 0x2db): case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[6], 0x2dc): case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[7], 0x2dd): { - WriteSwizzlePatterns(g_state.regs.vs, g_state.vs, value); + u32& offset = g_state.regs.vs.swizzle_patterns.offset; + if (offset >= g_state.vs.swizzle_data.size()) { + LOG_ERROR(HW_GPU, "Invalid VS swizzle pattern offset %u", offset); + } else { + g_state.vs.swizzle_data[offset] = value; + if (!g_state.regs.pipeline.gs_unit_exclusive_configuration) { + g_state.gs.swizzle_data[offset] = value; + } + offset++; + } break; } diff --git a/src/video_core/pica_state.h b/src/video_core/pica_state.h index 2d23d34e6..864a2c9e6 100644 --- a/src/video_core/pica_state.h +++ b/src/video_core/pica_state.h @@ -79,7 +79,7 @@ struct State { std::array<ColorDifferenceEntry, 256> color_diff_table; } proctex; - struct { + struct Lighting { union LutEntry { // Used for raw access u32 raw; diff --git a/src/video_core/regs_pipeline.h b/src/video_core/regs_pipeline.h index 31c747d77..8b6369297 100644 --- a/src/video_core/regs_pipeline.h +++ b/src/video_core/regs_pipeline.h @@ -202,7 +202,14 @@ struct PipelineRegs { /// Number of input attributes to the vertex shader minus 1 BitField<0, 4, u32> max_input_attrib_index; - INSERT_PADDING_WORDS(2); + INSERT_PADDING_WORDS(1); + + // The shader unit 3, which can be used for both vertex and geometry shader, gets its + // configuration depending on this register. If this is not set, unit 3 will share some + // configuration with other units. It is known that program code and swizzle pattern uploaded + // via regs.vs will be also uploaded to unit 3 if this is not set. Although very likely, it is + // still unclear whether uniforms and other configuration can be also shared. + BitField<0, 1, u32> gs_unit_exclusive_configuration; enum class GPUMode : u32 { Drawing = 0, diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index ff3f69ba3..aa95ef21d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -28,6 +28,9 @@ MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255)); MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) { + // Clipping plane 0 is always enabled for PICA fixed clip plane z <= 0 + state.clip_distance[0] = true; + // Create sampler objects for (size_t i = 0; i < texture_samplers.size(); ++i) { texture_samplers[i].Create(); @@ -117,48 +120,53 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) { // Setup the noise LUT for proctex proctex_noise_lut.Create(); - state.proctex_noise_lut.texture_1d = proctex_noise_lut.handle; + state.proctex_noise_lut.texture_buffer = proctex_noise_lut.handle; state.Apply(); + proctex_noise_lut_buffer.Create(); + glBindBuffer(GL_TEXTURE_BUFFER, proctex_noise_lut_buffer.handle); + glBufferData(GL_TEXTURE_BUFFER, sizeof(GLfloat) * 2 * 128, nullptr, GL_DYNAMIC_DRAW); glActiveTexture(TextureUnits::ProcTexNoiseLUT.Enum()); - glTexImage1D(GL_TEXTURE_1D, 0, GL_RG32F, 128, 0, GL_RG, GL_FLOAT, nullptr); - glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexBuffer(GL_TEXTURE_BUFFER, GL_RG32F, proctex_noise_lut_buffer.handle); // Setup the color map for proctex proctex_color_map.Create(); - state.proctex_color_map.texture_1d = proctex_color_map.handle; + state.proctex_color_map.texture_buffer = proctex_color_map.handle; state.Apply(); + proctex_color_map_buffer.Create(); + glBindBuffer(GL_TEXTURE_BUFFER, proctex_color_map_buffer.handle); + glBufferData(GL_TEXTURE_BUFFER, sizeof(GLfloat) * 2 * 128, nullptr, GL_DYNAMIC_DRAW); glActiveTexture(TextureUnits::ProcTexColorMap.Enum()); - glTexImage1D(GL_TEXTURE_1D, 0, GL_RG32F, 128, 0, GL_RG, GL_FLOAT, nullptr); - glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexBuffer(GL_TEXTURE_BUFFER, GL_RG32F, proctex_color_map_buffer.handle); // Setup the alpha map for proctex proctex_alpha_map.Create(); - state.proctex_alpha_map.texture_1d = proctex_alpha_map.handle; + state.proctex_alpha_map.texture_buffer = proctex_alpha_map.handle; state.Apply(); + proctex_alpha_map_buffer.Create(); + glBindBuffer(GL_TEXTURE_BUFFER, proctex_alpha_map_buffer.handle); + glBufferData(GL_TEXTURE_BUFFER, sizeof(GLfloat) * 2 * 128, nullptr, GL_DYNAMIC_DRAW); glActiveTexture(TextureUnits::ProcTexAlphaMap.Enum()); - glTexImage1D(GL_TEXTURE_1D, 0, GL_RG32F, 128, 0, GL_RG, GL_FLOAT, nullptr); - glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexBuffer(GL_TEXTURE_BUFFER, GL_RG32F, proctex_alpha_map_buffer.handle); // Setup the LUT for proctex proctex_lut.Create(); - state.proctex_lut.texture_1d = proctex_lut.handle; + state.proctex_lut.texture_buffer = proctex_lut.handle; state.Apply(); + proctex_lut_buffer.Create(); + glBindBuffer(GL_TEXTURE_BUFFER, proctex_lut_buffer.handle); + glBufferData(GL_TEXTURE_BUFFER, sizeof(GLfloat) * 4 * 256, nullptr, GL_DYNAMIC_DRAW); glActiveTexture(TextureUnits::ProcTexLUT.Enum()); - glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA32F, 256, 0, GL_RGBA, GL_FLOAT, nullptr); - glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA32F, proctex_lut_buffer.handle); // Setup the difference LUT for proctex proctex_diff_lut.Create(); - state.proctex_diff_lut.texture_1d = proctex_diff_lut.handle; + state.proctex_diff_lut.texture_buffer = proctex_diff_lut.handle; state.Apply(); + proctex_diff_lut_buffer.Create(); + glBindBuffer(GL_TEXTURE_BUFFER, proctex_diff_lut_buffer.handle); + glBufferData(GL_TEXTURE_BUFFER, sizeof(GLfloat) * 4 * 256, nullptr, GL_DYNAMIC_DRAW); glActiveTexture(TextureUnits::ProcTexDiffLUT.Enum()); - glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA32F, 256, 0, GL_RGBA, GL_FLOAT, nullptr); - glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA32F, proctex_diff_lut_buffer.handle); // Sync fixed function OpenGL state SyncCullMode(); @@ -1387,7 +1395,7 @@ void RasterizerOpenGL::SyncProcTexNoise() { // helper function for SyncProcTexNoiseLUT/ColorMap/AlphaMap static void SyncProcTexValueLUT(const std::array<Pica::State::ProcTex::ValueEntry, 128>& lut, - std::array<GLvec2, 128>& lut_data, GLenum texture) { + std::array<GLvec2, 128>& lut_data, GLuint buffer) { std::array<GLvec2, 128> new_data; std::transform(lut.begin(), lut.end(), new_data.begin(), [](const auto& entry) { return GLvec2{entry.ToFloat(), entry.DiffToFloat()}; @@ -1395,24 +1403,24 @@ static void SyncProcTexValueLUT(const std::array<Pica::State::ProcTex::ValueEntr if (new_data != lut_data) { lut_data = new_data; - glActiveTexture(texture); - glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 128, GL_RG, GL_FLOAT, lut_data.data()); + glBindBuffer(GL_TEXTURE_BUFFER, buffer); + glBufferSubData(GL_TEXTURE_BUFFER, 0, new_data.size() * sizeof(GLvec2), new_data.data()); } } void RasterizerOpenGL::SyncProcTexNoiseLUT() { SyncProcTexValueLUT(Pica::g_state.proctex.noise_table, proctex_noise_lut_data, - TextureUnits::ProcTexNoiseLUT.Enum()); + proctex_noise_lut_buffer.handle); } void RasterizerOpenGL::SyncProcTexColorMap() { SyncProcTexValueLUT(Pica::g_state.proctex.color_map_table, proctex_color_map_data, - TextureUnits::ProcTexColorMap.Enum()); + proctex_color_map_buffer.handle); } void RasterizerOpenGL::SyncProcTexAlphaMap() { SyncProcTexValueLUT(Pica::g_state.proctex.alpha_map_table, proctex_alpha_map_data, - TextureUnits::ProcTexAlphaMap.Enum()); + proctex_alpha_map_buffer.handle); } void RasterizerOpenGL::SyncProcTexLUT() { @@ -1427,8 +1435,8 @@ void RasterizerOpenGL::SyncProcTexLUT() { if (new_data != proctex_lut_data) { proctex_lut_data = new_data; - glActiveTexture(TextureUnits::ProcTexLUT.Enum()); - glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 256, GL_RGBA, GL_FLOAT, proctex_lut_data.data()); + glBindBuffer(GL_TEXTURE_BUFFER, proctex_lut_buffer.handle); + glBufferSubData(GL_TEXTURE_BUFFER, 0, new_data.size() * sizeof(GLvec4), new_data.data()); } } @@ -1444,8 +1452,8 @@ void RasterizerOpenGL::SyncProcTexDiffLUT() { if (new_data != proctex_diff_lut_data) { proctex_diff_lut_data = new_data; - glActiveTexture(TextureUnits::ProcTexDiffLUT.Enum()); - glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 256, GL_RGBA, GL_FLOAT, proctex_diff_lut_data.data()); + glBindBuffer(GL_TEXTURE_BUFFER, proctex_diff_lut_buffer.handle); + glBufferSubData(GL_TEXTURE_BUFFER, 0, new_data.size() * sizeof(GLvec4), new_data.data()); } } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index a433c1d4a..78e218efe 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -287,18 +287,23 @@ private: OGLTexture fog_lut; std::array<GLvec2, 128> fog_lut_data{}; + OGLBuffer proctex_noise_lut_buffer; OGLTexture proctex_noise_lut; std::array<GLvec2, 128> proctex_noise_lut_data{}; + OGLBuffer proctex_color_map_buffer; OGLTexture proctex_color_map; std::array<GLvec2, 128> proctex_color_map_data{}; + OGLBuffer proctex_alpha_map_buffer; OGLTexture proctex_alpha_map; std::array<GLvec2, 128> proctex_alpha_map_data{}; + OGLBuffer proctex_lut_buffer; OGLTexture proctex_lut; std::array<GLvec4, 256> proctex_lut_data{}; + OGLBuffer proctex_diff_lut_buffer; OGLTexture proctex_diff_lut; std::array<GLvec4, 256> proctex_diff_lut_data{}; }; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 8b717e43d..f37894e7a 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -542,10 +542,11 @@ RasterizerCacheOpenGL::GetFramebufferSurfaces( config.GetDepthBufferPhysicalAddress(), fb_area * Pica::FramebufferRegs::BytesPerDepthPixel(config.depth_format)); bool using_color_fb = config.GetColorBufferPhysicalAddress() != 0; - bool using_depth_fb = - config.GetDepthBufferPhysicalAddress() != 0 && - (regs.framebuffer.output_merger.depth_test_enable || - regs.framebuffer.output_merger.depth_write_enable || !framebuffers_overlap); + bool depth_write_enable = regs.framebuffer.output_merger.depth_write_enable && + regs.framebuffer.framebuffer.allow_depth_stencil_write; + bool using_depth_fb = config.GetDepthBufferPhysicalAddress() != 0 && + (regs.framebuffer.output_merger.depth_test_enable || depth_write_enable || + !framebuffers_overlap); if (framebuffers_overlap && using_color_fb && using_depth_fb) { LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; " diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index c93b108fb..015e69da9 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -525,11 +525,12 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { "float geo_factor = 1.0;\n"; // Compute fragment normals and tangents - const std::string pertubation = - "2.0 * (" + SampleTexture(config, lighting.bump_selector) + ").rgb - 1.0"; + auto Perturbation = [&]() { + return "2.0 * (" + SampleTexture(config, lighting.bump_selector) + ").rgb - 1.0"; + }; if (lighting.bump_mode == LightingRegs::LightingBumpMode::NormalMap) { // Bump mapping is enabled using a normal map - out += "vec3 surface_normal = " + pertubation + ";\n"; + out += "vec3 surface_normal = " + Perturbation() + ";\n"; // Recompute Z-component of perturbation if 'renorm' is enabled, this provides a higher // precision result @@ -543,7 +544,7 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { out += "vec3 surface_tangent = vec3(1.0, 0.0, 0.0);\n"; } else if (lighting.bump_mode == LightingRegs::LightingBumpMode::TangentMap) { // Bump mapping is enabled using a tangent map - out += "vec3 surface_tangent = " + pertubation + ";\n"; + out += "vec3 surface_tangent = " + Perturbation() + ";\n"; // Mathematically, recomputing Z-component of the tangent vector won't affect the relevant // computation below, which is also confirmed on 3DS. So we don't bother recomputing here // even if 'renorm' is enabled. @@ -886,12 +887,12 @@ void AppendProcTexSampler(std::string& out, const PicaShaderConfig& config) { // coord=1.0 is lut[127]+lut_diff[127]. For other indices, the result is interpolated using // value entries and difference entries. out += R"( -float ProcTexLookupLUT(sampler1D lut, float coord) { +float ProcTexLookupLUT(samplerBuffer lut, float coord) { coord *= 128; float index_i = clamp(floor(coord), 0.0, 127.0); float index_f = coord - index_i; // fract() cannot be used here because 128.0 needs to be // extracted as index_i = 127.0 and index_f = 1.0 - vec2 entry = texelFetch(lut, int(index_i), 0).rg; + vec2 entry = texelFetch(lut, int(index_i)).rg; return clamp(entry.r + entry.g * index_f, 0.0, 1.0); } )"; @@ -979,14 +980,14 @@ float ProcTexNoiseCoef(vec2 x) { out += "int lut_index_i = int(lut_coord) + " + std::to_string(config.state.proctex.lut_offset) + ";\n"; out += "float lut_index_f = fract(lut_coord);\n"; - out += "vec4 final_color = texelFetch(proctex_lut, lut_index_i, 0) + lut_index_f * " - "texelFetch(proctex_diff_lut, lut_index_i, 0);\n"; + out += "vec4 final_color = texelFetch(proctex_lut, lut_index_i) + lut_index_f * " + "texelFetch(proctex_diff_lut, lut_index_i);\n"; break; case ProcTexFilter::Nearest: case ProcTexFilter::NearestMipmapLinear: case ProcTexFilter::NearestMipmapNearest: out += "lut_coord += " + std::to_string(config.state.proctex.lut_offset) + ";\n"; - out += "vec4 final_color = texelFetch(proctex_lut, int(round(lut_coord)), 0);\n"; + out += "vec4 final_color = texelFetch(proctex_lut, int(round(lut_coord)));\n"; break; } @@ -1053,11 +1054,11 @@ layout (std140) uniform shader_data { uniform sampler2D tex[3]; uniform samplerBuffer lighting_lut; uniform samplerBuffer fog_lut; -uniform sampler1D proctex_noise_lut; -uniform sampler1D proctex_color_map; -uniform sampler1D proctex_alpha_map; -uniform sampler1D proctex_lut; -uniform sampler1D proctex_diff_lut; +uniform samplerBuffer proctex_noise_lut; +uniform samplerBuffer proctex_color_map; +uniform samplerBuffer proctex_alpha_map; +uniform samplerBuffer proctex_lut; +uniform samplerBuffer proctex_diff_lut; // Rotate the vector v by the quaternion q vec3 quaternion_rotate(vec4 q, vec3 v) { @@ -1111,7 +1112,10 @@ vec4 secondary_fragment_color = vec4(0.0); "gl_FragCoord.y < scissor_y2)) discard;\n"; } - out += "float z_over_w = 1.0 - gl_FragCoord.z * 2.0;\n"; + // After perspective divide, OpenGL transform z_over_w from [-1, 1] to [near, far]. Here we use + // default near = 0 and far = 1, and undo the transformation to get the original z_over_w, then + // do our own transformation according to PICA specification. + out += "float z_over_w = 2.0 * gl_FragCoord.z - 1.0;\n"; out += "float depth = z_over_w * depth_scale + depth_offset;\n"; if (state.depthmap_enable == RasterizerRegs::DepthBuffering::WBuffering) { out += "depth /= gl_FragCoord.w;\n"; @@ -1194,7 +1198,9 @@ void main() { texcoord0_w = vert_texcoord0_w; normquat = vert_normquat; view = vert_view; - gl_Position = vec4(vert_position.x, vert_position.y, -vert_position.z, vert_position.w); + gl_Position = vert_position; + gl_ClipDistance[0] = -vert_position.z; // fixed PICA clipping plane z <= 0 + // TODO (wwylele): calculate gl_ClipDistance[1] from user-defined clipping plane } )"; diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index eface2dea..06a905766 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -56,11 +56,11 @@ OpenGLState::OpenGLState() { fog_lut.texture_buffer = 0; - proctex_lut.texture_1d = 0; - proctex_diff_lut.texture_1d = 0; - proctex_color_map.texture_1d = 0; - proctex_alpha_map.texture_1d = 0; - proctex_noise_lut.texture_1d = 0; + proctex_lut.texture_buffer = 0; + proctex_diff_lut.texture_buffer = 0; + proctex_color_map.texture_buffer = 0; + proctex_alpha_map.texture_buffer = 0; + proctex_noise_lut.texture_buffer = 0; draw.read_framebuffer = 0; draw.draw_framebuffer = 0; @@ -68,6 +68,8 @@ OpenGLState::OpenGLState() { draw.vertex_buffer = 0; draw.uniform_buffer = 0; draw.shader_program = 0; + + clip_distance = {}; } void OpenGLState::Apply() const { @@ -204,33 +206,33 @@ void OpenGLState::Apply() const { } // ProcTex Noise LUT - if (proctex_noise_lut.texture_1d != cur_state.proctex_noise_lut.texture_1d) { + if (proctex_noise_lut.texture_buffer != cur_state.proctex_noise_lut.texture_buffer) { glActiveTexture(TextureUnits::ProcTexNoiseLUT.Enum()); - glBindTexture(GL_TEXTURE_1D, proctex_noise_lut.texture_1d); + glBindTexture(GL_TEXTURE_BUFFER, proctex_noise_lut.texture_buffer); } // ProcTex Color Map - if (proctex_color_map.texture_1d != cur_state.proctex_color_map.texture_1d) { + if (proctex_color_map.texture_buffer != cur_state.proctex_color_map.texture_buffer) { glActiveTexture(TextureUnits::ProcTexColorMap.Enum()); - glBindTexture(GL_TEXTURE_1D, proctex_color_map.texture_1d); + glBindTexture(GL_TEXTURE_BUFFER, proctex_color_map.texture_buffer); } // ProcTex Alpha Map - if (proctex_alpha_map.texture_1d != cur_state.proctex_alpha_map.texture_1d) { + if (proctex_alpha_map.texture_buffer != cur_state.proctex_alpha_map.texture_buffer) { glActiveTexture(TextureUnits::ProcTexAlphaMap.Enum()); - glBindTexture(GL_TEXTURE_1D, proctex_alpha_map.texture_1d); + glBindTexture(GL_TEXTURE_BUFFER, proctex_alpha_map.texture_buffer); } // ProcTex LUT - if (proctex_lut.texture_1d != cur_state.proctex_lut.texture_1d) { + if (proctex_lut.texture_buffer != cur_state.proctex_lut.texture_buffer) { glActiveTexture(TextureUnits::ProcTexLUT.Enum()); - glBindTexture(GL_TEXTURE_1D, proctex_lut.texture_1d); + glBindTexture(GL_TEXTURE_BUFFER, proctex_lut.texture_buffer); } // ProcTex Diff LUT - if (proctex_diff_lut.texture_1d != cur_state.proctex_diff_lut.texture_1d) { + if (proctex_diff_lut.texture_buffer != cur_state.proctex_diff_lut.texture_buffer) { glActiveTexture(TextureUnits::ProcTexDiffLUT.Enum()); - glBindTexture(GL_TEXTURE_1D, proctex_diff_lut.texture_1d); + glBindTexture(GL_TEXTURE_BUFFER, proctex_diff_lut.texture_buffer); } // Framebuffer @@ -261,6 +263,17 @@ void OpenGLState::Apply() const { glUseProgram(draw.shader_program); } + // Clip distance + for (size_t i = 0; i < clip_distance.size(); ++i) { + if (clip_distance[i] != cur_state.clip_distance[i]) { + if (clip_distance[i]) { + glEnable(GL_CLIP_DISTANCE0 + i); + } else { + glDisable(GL_CLIP_DISTANCE0 + i); + } + } + } + cur_state = *this; } @@ -274,16 +287,16 @@ void OpenGLState::ResetTexture(GLuint handle) { cur_state.lighting_lut.texture_buffer = 0; if (cur_state.fog_lut.texture_buffer == handle) cur_state.fog_lut.texture_buffer = 0; - if (cur_state.proctex_noise_lut.texture_1d == handle) - cur_state.proctex_noise_lut.texture_1d = 0; - if (cur_state.proctex_color_map.texture_1d == handle) - cur_state.proctex_color_map.texture_1d = 0; - if (cur_state.proctex_alpha_map.texture_1d == handle) - cur_state.proctex_alpha_map.texture_1d = 0; - if (cur_state.proctex_lut.texture_1d == handle) - cur_state.proctex_lut.texture_1d = 0; - if (cur_state.proctex_diff_lut.texture_1d == handle) - cur_state.proctex_diff_lut.texture_1d = 0; + if (cur_state.proctex_noise_lut.texture_buffer == handle) + cur_state.proctex_noise_lut.texture_buffer = 0; + if (cur_state.proctex_color_map.texture_buffer == handle) + cur_state.proctex_color_map.texture_buffer = 0; + if (cur_state.proctex_alpha_map.texture_buffer == handle) + cur_state.proctex_alpha_map.texture_buffer = 0; + if (cur_state.proctex_lut.texture_buffer == handle) + cur_state.proctex_lut.texture_buffer = 0; + if (cur_state.proctex_diff_lut.texture_buffer == handle) + cur_state.proctex_diff_lut.texture_buffer = 0; } void OpenGLState::ResetSampler(GLuint handle) { diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index 1efcf0811..437fe34c4 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -4,6 +4,7 @@ #pragma once +#include <array> #include <glad/glad.h> namespace TextureUnits { @@ -95,23 +96,23 @@ public: } fog_lut; struct { - GLuint texture_1d; // GL_TEXTURE_BINDING_1D + GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER } proctex_noise_lut; struct { - GLuint texture_1d; // GL_TEXTURE_BINDING_1D + GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER } proctex_color_map; struct { - GLuint texture_1d; // GL_TEXTURE_BINDING_1D + GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER } proctex_alpha_map; struct { - GLuint texture_1d; // GL_TEXTURE_BINDING_1D + GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER } proctex_lut; struct { - GLuint texture_1d; // GL_TEXTURE_BINDING_1D + GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER } proctex_diff_lut; struct { @@ -123,6 +124,8 @@ public: GLuint shader_program; // GL_CURRENT_PROGRAM } draw; + std::array<bool, 2> clip_distance; // GL_CLIP_DISTANCE + OpenGLState(); /// Get the currently active OpenGL state diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp index aa1cec81f..206c0978a 100644 --- a/src/video_core/shader/shader_interpreter.cpp +++ b/src/video_core/shader/shader_interpreter.cpp @@ -631,7 +631,7 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData state.address_registers[2] = loop_param.y; Record<DebugDataRecord::LOOP_INT_IN>(debug_data, iteration, loop_param); - call(program_counter + 1, instr.flow_control.dest_offset - program_counter + 1, + call(program_counter + 1, instr.flow_control.dest_offset - program_counter, instr.flow_control.dest_offset + 1, loop_param.x, loop_param.z); break; } diff --git a/src/video_core/swrasterizer/clipper.cpp b/src/video_core/swrasterizer/clipper.cpp index 6fb923756..cdbc71502 100644 --- a/src/video_core/swrasterizer/clipper.cpp +++ b/src/video_core/swrasterizer/clipper.cpp @@ -95,6 +95,17 @@ void ProcessTriangle(const OutputVertex& v0, const OutputVertex& v1, const Outpu static const size_t MAX_VERTICES = 9; static_vector<Vertex, MAX_VERTICES> buffer_a = {v0, v1, v2}; static_vector<Vertex, MAX_VERTICES> buffer_b; + + auto FlipQuaternionIfOpposite = [](auto& a, const auto& b) { + if (Math::Dot(a, b) < float24::Zero()) + a = -a; + }; + + // Flip the quaternions if they are opposite to prevent interpolating them over the wrong + // direction. + FlipQuaternionIfOpposite(buffer_a[1].quat, buffer_a[0].quat); + FlipQuaternionIfOpposite(buffer_a[2].quat, buffer_a[0].quat); + auto* output_list = &buffer_a; auto* input_list = &buffer_b; @@ -114,10 +125,6 @@ void ProcessTriangle(const OutputVertex& v0, const OutputVertex& v1, const Outpu {Math::MakeVec(f0, f0, f0, -f1), Math::Vec4<float24>(f0, f0, f0, EPSILON)}, // w = EPSILON }}; - // TODO: If one vertex lies outside one of the depth clipping planes, some platforms (e.g. Wii) - // drop the whole primitive instead of clipping the primitive properly. We should test if - // this happens on the 3DS, too. - // Simple implementation of the Sutherland-Hodgman clipping algorithm. // TODO: Make this less inefficient (currently lots of useless buffering overhead happens here) for (auto edge : clipping_edges) { diff --git a/src/video_core/swrasterizer/lighting.cpp b/src/video_core/swrasterizer/lighting.cpp new file mode 100644 index 000000000..39a3e396d --- /dev/null +++ b/src/video_core/swrasterizer/lighting.cpp @@ -0,0 +1,278 @@ +// Copyright 2017 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/math_util.h" +#include "video_core/swrasterizer/lighting.h" + +namespace Pica { + +static float LookupLightingLut(const Pica::State::Lighting& lighting, size_t lut_index, u8 index, + float delta) { + ASSERT_MSG(lut_index < lighting.luts.size(), "Out of range lut"); + ASSERT_MSG(index < lighting.luts[lut_index].size(), "Out of range index"); + + const auto& lut = lighting.luts[lut_index][index]; + + float lut_value = lut.ToFloat(); + float lut_diff = lut.DiffToFloat(); + + return lut_value + lut_diff * delta; +} + +std::tuple<Math::Vec4<u8>, Math::Vec4<u8>> ComputeFragmentsColors( + const Pica::LightingRegs& lighting, const Pica::State::Lighting& lighting_state, + const Math::Quaternion<float>& normquat, const Math::Vec3<float>& view) { + + // TODO(Subv): Bump mapping + Math::Vec3<float> surface_normal = {0.0f, 0.0f, 1.0f}; + + if (lighting.config0.bump_mode != LightingRegs::LightingBumpMode::None) { + LOG_CRITICAL(HW_GPU, "unimplemented bump mapping"); + UNIMPLEMENTED(); + } + + // Use the normalized the quaternion when performing the rotation + auto normal = Math::QuaternionRotate(normquat, surface_normal); + + Math::Vec4<float> diffuse_sum = {0.0f, 0.0f, 0.0f, 1.0f}; + Math::Vec4<float> specular_sum = {0.0f, 0.0f, 0.0f, 1.0f}; + + for (unsigned light_index = 0; light_index <= lighting.max_light_index; ++light_index) { + unsigned num = lighting.light_enable.GetNum(light_index); + const auto& light_config = lighting.light[num]; + + Math::Vec3<float> refl_value = {}; + Math::Vec3<float> position = {float16::FromRaw(light_config.x).ToFloat32(), + float16::FromRaw(light_config.y).ToFloat32(), + float16::FromRaw(light_config.z).ToFloat32()}; + Math::Vec3<float> light_vector; + + if (light_config.config.directional) + light_vector = position; + else + light_vector = position + view; + + light_vector.Normalize(); + + Math::Vec3<float> norm_view = view.Normalized(); + Math::Vec3<float> half_vector = norm_view + light_vector; + + float dist_atten = 1.0f; + if (!lighting.IsDistAttenDisabled(num)) { + auto distance = (-view - position).Length(); + float scale = Pica::float20::FromRaw(light_config.dist_atten_scale).ToFloat32(); + float bias = Pica::float20::FromRaw(light_config.dist_atten_bias).ToFloat32(); + size_t lut = + static_cast<size_t>(LightingRegs::LightingSampler::DistanceAttenuation) + num; + + float sample_loc = MathUtil::Clamp(scale * distance + bias, 0.0f, 1.0f); + + u8 lutindex = + static_cast<u8>(MathUtil::Clamp(std::floor(sample_loc * 256.0f), 0.0f, 255.0f)); + float delta = sample_loc * 256 - lutindex; + dist_atten = LookupLightingLut(lighting_state, lut, lutindex, delta); + } + + auto GetLutValue = [&](LightingRegs::LightingLutInput input, bool abs, + LightingRegs::LightingScale scale_enum, + LightingRegs::LightingSampler sampler) { + float result = 0.0f; + + switch (input) { + case LightingRegs::LightingLutInput::NH: + result = Math::Dot(normal, half_vector.Normalized()); + break; + + case LightingRegs::LightingLutInput::VH: + result = Math::Dot(norm_view, half_vector.Normalized()); + break; + + case LightingRegs::LightingLutInput::NV: + result = Math::Dot(normal, norm_view); + break; + + case LightingRegs::LightingLutInput::LN: + result = Math::Dot(light_vector, normal); + break; + + case LightingRegs::LightingLutInput::SP: { + Math::Vec3<s32> spot_dir{light_config.spot_x.Value(), light_config.spot_y.Value(), + light_config.spot_z.Value()}; + result = Math::Dot(light_vector, spot_dir.Cast<float>() / 2047.0f); + break; + } + default: + LOG_CRITICAL(HW_GPU, "Unknown lighting LUT input %u\n", static_cast<u32>(input)); + UNIMPLEMENTED(); + result = 0.0f; + } + + u8 index; + float delta; + + if (abs) { + if (light_config.config.two_sided_diffuse) + result = std::abs(result); + else + result = std::max(result, 0.0f); + + float flr = std::floor(result * 256.0f); + index = static_cast<u8>(MathUtil::Clamp(flr, 0.0f, 255.0f)); + delta = result * 256 - index; + } else { + float flr = std::floor(result * 128.0f); + s8 signed_index = static_cast<s8>(MathUtil::Clamp(flr, -128.0f, 127.0f)); + delta = result * 128.0f - signed_index; + index = static_cast<u8>(signed_index); + } + + float scale = lighting.lut_scale.GetScale(scale_enum); + return scale * + LookupLightingLut(lighting_state, static_cast<size_t>(sampler), index, delta); + }; + + // If enabled, compute spot light attenuation value + float spot_atten = 1.0f; + if (!lighting.IsSpotAttenDisabled(num) && + LightingRegs::IsLightingSamplerSupported( + lighting.config0.config, LightingRegs::LightingSampler::SpotlightAttenuation)) { + auto lut = LightingRegs::SpotlightAttenuationSampler(num); + spot_atten = GetLutValue(lighting.lut_input.sp, lighting.abs_lut_input.disable_sp == 0, + lighting.lut_scale.sp, lut); + } + + // Specular 0 component + float d0_lut_value = 1.0f; + if (lighting.config1.disable_lut_d0 == 0 && + LightingRegs::IsLightingSamplerSupported( + lighting.config0.config, LightingRegs::LightingSampler::Distribution0)) { + d0_lut_value = + GetLutValue(lighting.lut_input.d0, lighting.abs_lut_input.disable_d0 == 0, + lighting.lut_scale.d0, LightingRegs::LightingSampler::Distribution0); + } + + Math::Vec3<float> specular_0 = d0_lut_value * light_config.specular_0.ToVec3f(); + + // If enabled, lookup ReflectRed value, otherwise, 1.0 is used + if (lighting.config1.disable_lut_rr == 0 && + LightingRegs::IsLightingSamplerSupported(lighting.config0.config, + LightingRegs::LightingSampler::ReflectRed)) { + refl_value.x = + GetLutValue(lighting.lut_input.rr, lighting.abs_lut_input.disable_rr == 0, + lighting.lut_scale.rr, LightingRegs::LightingSampler::ReflectRed); + } else { + refl_value.x = 1.0f; + } + + // If enabled, lookup ReflectGreen value, otherwise, ReflectRed value is used + if (lighting.config1.disable_lut_rg == 0 && + LightingRegs::IsLightingSamplerSupported(lighting.config0.config, + LightingRegs::LightingSampler::ReflectGreen)) { + refl_value.y = + GetLutValue(lighting.lut_input.rg, lighting.abs_lut_input.disable_rg == 0, + lighting.lut_scale.rg, LightingRegs::LightingSampler::ReflectGreen); + } else { + refl_value.y = refl_value.x; + } + + // If enabled, lookup ReflectBlue value, otherwise, ReflectRed value is used + if (lighting.config1.disable_lut_rb == 0 && + LightingRegs::IsLightingSamplerSupported(lighting.config0.config, + LightingRegs::LightingSampler::ReflectBlue)) { + refl_value.z = + GetLutValue(lighting.lut_input.rb, lighting.abs_lut_input.disable_rb == 0, + lighting.lut_scale.rb, LightingRegs::LightingSampler::ReflectBlue); + } else { + refl_value.z = refl_value.x; + } + + // Specular 1 component + float d1_lut_value = 1.0f; + if (lighting.config1.disable_lut_d1 == 0 && + LightingRegs::IsLightingSamplerSupported( + lighting.config0.config, LightingRegs::LightingSampler::Distribution1)) { + d1_lut_value = + GetLutValue(lighting.lut_input.d1, lighting.abs_lut_input.disable_d1 == 0, + lighting.lut_scale.d1, LightingRegs::LightingSampler::Distribution1); + } + + Math::Vec3<float> specular_1 = + d1_lut_value * refl_value * light_config.specular_1.ToVec3f(); + + // Fresnel + if (lighting.config1.disable_lut_fr == 0 && + LightingRegs::IsLightingSamplerSupported(lighting.config0.config, + LightingRegs::LightingSampler::Fresnel)) { + + float lut_value = + GetLutValue(lighting.lut_input.fr, lighting.abs_lut_input.disable_fr == 0, + lighting.lut_scale.fr, LightingRegs::LightingSampler::Fresnel); + + // Enabled for diffuse lighting alpha component + if (lighting.config0.fresnel_selector == + LightingRegs::LightingFresnelSelector::PrimaryAlpha || + lighting.config0.fresnel_selector == LightingRegs::LightingFresnelSelector::Both) { + diffuse_sum.a() *= lut_value; + } + + // Enabled for the specular lighting alpha component + if (lighting.config0.fresnel_selector == + LightingRegs::LightingFresnelSelector::SecondaryAlpha || + lighting.config0.fresnel_selector == LightingRegs::LightingFresnelSelector::Both) { + specular_sum.a() *= lut_value; + } + } + + auto dot_product = Math::Dot(light_vector, normal); + + // Calculate clamp highlights before applying the two-sided diffuse configuration to the dot + // product. + float clamp_highlights = 1.0f; + if (lighting.config0.clamp_highlights) { + if (dot_product <= 0.0f) + clamp_highlights = 0.0f; + else + clamp_highlights = 1.0f; + } + + if (light_config.config.two_sided_diffuse) + dot_product = std::abs(dot_product); + else + dot_product = std::max(dot_product, 0.0f); + + if (light_config.config.geometric_factor_0 || light_config.config.geometric_factor_1) { + float geo_factor = half_vector.Length2(); + geo_factor = geo_factor == 0.0f ? 0.0f : std::min(dot_product / geo_factor, 1.0f); + if (light_config.config.geometric_factor_0) { + specular_0 *= geo_factor; + } + if (light_config.config.geometric_factor_1) { + specular_1 *= geo_factor; + } + } + + auto diffuse = + light_config.diffuse.ToVec3f() * dot_product + light_config.ambient.ToVec3f(); + diffuse_sum += Math::MakeVec(diffuse * dist_atten * spot_atten, 0.0f); + + specular_sum += Math::MakeVec( + (specular_0 + specular_1) * clamp_highlights * dist_atten * spot_atten, 0.0f); + } + + diffuse_sum += Math::MakeVec(lighting.global_ambient.ToVec3f(), 0.0f); + + auto diffuse = Math::MakeVec<float>(MathUtil::Clamp(diffuse_sum.x, 0.0f, 1.0f) * 255, + MathUtil::Clamp(diffuse_sum.y, 0.0f, 1.0f) * 255, + MathUtil::Clamp(diffuse_sum.z, 0.0f, 1.0f) * 255, + MathUtil::Clamp(diffuse_sum.w, 0.0f, 1.0f) * 255) + .Cast<u8>(); + auto specular = Math::MakeVec<float>(MathUtil::Clamp(specular_sum.x, 0.0f, 1.0f) * 255, + MathUtil::Clamp(specular_sum.y, 0.0f, 1.0f) * 255, + MathUtil::Clamp(specular_sum.z, 0.0f, 1.0f) * 255, + MathUtil::Clamp(specular_sum.w, 0.0f, 1.0f) * 255) + .Cast<u8>(); + return std::make_tuple(diffuse, specular); +} + +} // namespace Pica diff --git a/src/video_core/swrasterizer/lighting.h b/src/video_core/swrasterizer/lighting.h new file mode 100644 index 000000000..438dca926 --- /dev/null +++ b/src/video_core/swrasterizer/lighting.h @@ -0,0 +1,18 @@ +// Copyright 2017 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <tuple> +#include "common/quaternion.h" +#include "common/vector_math.h" +#include "video_core/pica_state.h" + +namespace Pica { + +std::tuple<Math::Vec4<u8>, Math::Vec4<u8>> ComputeFragmentsColors( + const Pica::LightingRegs& lighting, const Pica::State::Lighting& lighting_state, + const Math::Quaternion<float>& normquat, const Math::Vec3<float>& view); + +} // namespace Pica diff --git a/src/video_core/swrasterizer/rasterizer.cpp b/src/video_core/swrasterizer/rasterizer.cpp index 512e81c08..fdc1df199 100644 --- a/src/video_core/swrasterizer/rasterizer.cpp +++ b/src/video_core/swrasterizer/rasterizer.cpp @@ -13,6 +13,7 @@ #include "common/logging/log.h" #include "common/math_util.h" #include "common/microprofile.h" +#include "common/quaternion.h" #include "common/vector_math.h" #include "core/hw/gpu.h" #include "core/memory.h" @@ -24,6 +25,7 @@ #include "video_core/regs_texturing.h" #include "video_core/shader/shader.h" #include "video_core/swrasterizer/framebuffer.h" +#include "video_core/swrasterizer/lighting.h" #include "video_core/swrasterizer/proctex.h" #include "video_core/swrasterizer/rasterizer.h" #include "video_core/swrasterizer/texturing.h" @@ -419,6 +421,26 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve regs.texturing.tev_combiner_buffer_color.a, }; + Math::Vec4<u8> primary_fragment_color = {0, 0, 0, 0}; + Math::Vec4<u8> secondary_fragment_color = {0, 0, 0, 0}; + + if (!g_state.regs.lighting.disable) { + Math::Quaternion<float> normquat = Math::Quaternion<float>{ + {GetInterpolatedAttribute(v0.quat.x, v1.quat.x, v2.quat.x).ToFloat32(), + GetInterpolatedAttribute(v0.quat.y, v1.quat.y, v2.quat.y).ToFloat32(), + GetInterpolatedAttribute(v0.quat.z, v1.quat.z, v2.quat.z).ToFloat32()}, + GetInterpolatedAttribute(v0.quat.w, v1.quat.w, v2.quat.w).ToFloat32(), + }.Normalized(); + + Math::Vec3<float> view{ + GetInterpolatedAttribute(v0.view.x, v1.view.x, v2.view.x).ToFloat32(), + GetInterpolatedAttribute(v0.view.y, v1.view.y, v2.view.y).ToFloat32(), + GetInterpolatedAttribute(v0.view.z, v1.view.z, v2.view.z).ToFloat32(), + }; + std::tie(primary_fragment_color, secondary_fragment_color) = + ComputeFragmentsColors(g_state.regs.lighting, g_state.lighting, normquat, view); + } + for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); ++tev_stage_index) { const auto& tev_stage = tev_stages[tev_stage_index]; @@ -427,14 +449,13 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve auto GetSource = [&](Source source) -> Math::Vec4<u8> { switch (source) { case Source::PrimaryColor: + return primary_color; - // HACK: Until we implement fragment lighting, use primary_color case Source::PrimaryFragmentColor: - return primary_color; + return primary_fragment_color; - // HACK: Until we implement fragment lighting, use zero case Source::SecondaryFragmentColor: - return {0, 0, 0, 0}; + return secondary_fragment_color; case Source::Texture0: return texture_color[0]; diff --git a/src/video_core/swrasterizer/rasterizer.h b/src/video_core/swrasterizer/rasterizer.h index 2f0877581..66cd6cfd4 100644 --- a/src/video_core/swrasterizer/rasterizer.h +++ b/src/video_core/swrasterizer/rasterizer.h @@ -19,10 +19,9 @@ struct Vertex : Shader::OutputVertex { // Linear interpolation // factor: 0=this, 1=vtx + // Note: This function cannot be called after perspective divide void Lerp(float24 factor, const Vertex& vtx) { pos = pos * factor + vtx.pos * (float24::FromFloat32(1) - factor); - - // TODO: Should perform perspective correct interpolation here... quat = quat * factor + vtx.quat * (float24::FromFloat32(1) - factor); color = color * factor + vtx.color * (float24::FromFloat32(1) - factor); tc0 = tc0 * factor + vtx.tc0 * (float24::FromFloat32(1) - factor); @@ -30,12 +29,11 @@ struct Vertex : Shader::OutputVertex { tc0_w = tc0_w * factor + vtx.tc0_w * (float24::FromFloat32(1) - factor); view = view * factor + vtx.view * (float24::FromFloat32(1) - factor); tc2 = tc2 * factor + vtx.tc2 * (float24::FromFloat32(1) - factor); - - screenpos = screenpos * factor + vtx.screenpos * (float24::FromFloat32(1) - factor); } // Linear interpolation // factor: 0=v0, 1=v1 + // Note: This function cannot be called after perspective divide static Vertex Lerp(float24 factor, const Vertex& v0, const Vertex& v1) { Vertex ret = v0; ret.Lerp(factor, v1); |