summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/pica_state.h12
-rw-r--r--src/video_core/regs_lighting.h6
-rw-r--r--src/video_core/regs_texturing.h8
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp128
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h7
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp179
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.h2
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp40
-rw-r--r--src/video_core/renderer_opengl/gl_state.h27
-rw-r--r--src/video_core/renderer_opengl/pica_to_gl.h13
-rw-r--r--src/video_core/shader/shader_jit_x64_compiler.cpp26
-rw-r--r--src/video_core/swrasterizer/rasterizer.cpp20
-rw-r--r--src/video_core/swrasterizer/texturing.cpp19
13 files changed, 297 insertions, 190 deletions
diff --git a/src/video_core/pica_state.h b/src/video_core/pica_state.h
index f46db09fb..3b00df0b3 100644
--- a/src/video_core/pica_state.h
+++ b/src/video_core/pica_state.h
@@ -87,12 +87,18 @@ struct State {
// LUT value, encoded as 12-bit fixed point, with 12 fraction bits
BitField<0, 12, u32> value; // 0.0.12 fixed point
- // Used by HW for efficient interpolation, Citra does not use these
- BitField<12, 12, s32> difference; // 1.0.11 fixed point
+ // Used for efficient interpolation.
+ BitField<12, 11, u32> difference; // 0.0.11 fixed point
+ BitField<23, 1, u32> neg_difference;
- float ToFloat() {
+ float ToFloat() const {
return static_cast<float>(value) / 4095.f;
}
+
+ float DiffToFloat() const {
+ float diff = static_cast<float>(difference) / 2047.f;
+ return neg_difference ? -diff : diff;
+ }
};
std::array<std::array<LutEntry, 256>, 24> luts;
diff --git a/src/video_core/regs_lighting.h b/src/video_core/regs_lighting.h
index fbfebc0a7..b89709cfe 100644
--- a/src/video_core/regs_lighting.h
+++ b/src/video_core/regs_lighting.h
@@ -26,6 +26,8 @@ struct LightingRegs {
DistanceAttenuation = 16,
};
+ static constexpr unsigned NumLightingSampler = 24;
+
static LightingSampler SpotlightAttenuationSampler(unsigned index) {
return static_cast<LightingSampler>(
static_cast<unsigned>(LightingSampler::SpotlightAttenuation) + index);
@@ -84,7 +86,7 @@ struct LightingRegs {
NV = 2, // Cosine of the angle between the normal and the view vector
LN = 3, // Cosine of the angle between the light and the normal vectors
SP = 4, // Cosine of the angle between the light and the inverse spotlight vectors
- CP = 5, // TODO: document and implement
+ CP = 5, // Cosine of the angle between the tangent and projection of half-angle vectors
};
enum class LightingBumpMode : u32 {
@@ -168,6 +170,8 @@ struct LightingRegs {
union {
BitField<0, 1, u32> directional;
BitField<1, 1, u32> two_sided_diffuse; // When disabled, clamp dot-product to 0
+ BitField<2, 1, u32> geometric_factor_0;
+ BitField<3, 1, u32> geometric_factor_1;
} config;
BitField<0, 20, u32> dist_atten_bias;
diff --git a/src/video_core/regs_texturing.h b/src/video_core/regs_texturing.h
index 3f5355fa9..0b09f2299 100644
--- a/src/video_core/regs_texturing.h
+++ b/src/video_core/regs_texturing.h
@@ -30,10 +30,10 @@ struct TexturingRegs {
Repeat = 2,
MirroredRepeat = 3,
// Mode 4-7 produces some weird result and may be just invalid:
- // 4: Positive coord: clamp to edge; negative coord: repeat
- // 5: Positive coord: clamp to border; negative coord: repeat
- // 6: Repeat
- // 7: Repeat
+ ClampToEdge2 = 4, // Positive coord: clamp to edge; negative coord: repeat
+ ClampToBorder2 = 5, // Positive coord: clamp to border; negative coord: repeat
+ Repeat2 = 6, // Same as Repeat
+ Repeat3 = 7, // Same as Repeat
};
enum TextureFilter : u32 {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index e6cccebf6..8b7991c04 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -49,9 +49,7 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) {
uniform_block_data.dirty = true;
- for (unsigned index = 0; index < lighting_luts.size(); index++) {
- uniform_block_data.lut_dirty[index] = true;
- }
+ uniform_block_data.lut_dirty.fill(true);
uniform_block_data.fog_lut_dirty = true;
@@ -96,18 +94,16 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) {
framebuffer.Create();
// Allocate and bind lighting lut textures
- for (size_t i = 0; i < lighting_luts.size(); ++i) {
- lighting_luts[i].Create();
- state.lighting_luts[i].texture_1d = lighting_luts[i].handle;
- }
+ lighting_lut_buffer.Create();
+ state.lighting_lut.texture_buffer = lighting_lut.handle;
state.Apply();
-
- for (size_t i = 0; i < lighting_luts.size(); ++i) {
- glActiveTexture(static_cast<GLenum>(GL_TEXTURE3 + i));
- glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA32F, 256, 0, GL_RGBA, GL_FLOAT, nullptr);
- glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
- glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
- }
+ lighting_lut.Create();
+ glBindBuffer(GL_TEXTURE_BUFFER, lighting_lut_buffer.handle);
+ glBufferData(GL_TEXTURE_BUFFER,
+ sizeof(GLfloat) * 2 * 256 * Pica::LightingRegs::NumLightingSampler, nullptr,
+ GL_DYNAMIC_DRAW);
+ glActiveTexture(TextureUnits::LightingLUT.Enum());
+ glTexBuffer(GL_TEXTURE_BUFFER, GL_RG32F, lighting_lut_buffer.handle);
// Setup the LUT for the fog
{
@@ -116,7 +112,7 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) {
}
state.Apply();
- glActiveTexture(GL_TEXTURE9);
+ glActiveTexture(TextureUnits::FogLUT.Enum());
glTexImage1D(GL_TEXTURE_1D, 0, GL_R32UI, 128, 0, GL_RED_INTEGER, GL_UNSIGNED_INT, nullptr);
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
@@ -125,7 +121,7 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) {
proctex_noise_lut.Create();
state.proctex_noise_lut.texture_1d = proctex_noise_lut.handle;
state.Apply();
- glActiveTexture(GL_TEXTURE10);
+ glActiveTexture(TextureUnits::ProcTexNoiseLUT.Enum());
glTexImage1D(GL_TEXTURE_1D, 0, GL_RG32F, 128, 0, GL_RG, GL_FLOAT, nullptr);
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
@@ -134,7 +130,7 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) {
proctex_color_map.Create();
state.proctex_color_map.texture_1d = proctex_color_map.handle;
state.Apply();
- glActiveTexture(GL_TEXTURE11);
+ glActiveTexture(TextureUnits::ProcTexColorMap.Enum());
glTexImage1D(GL_TEXTURE_1D, 0, GL_RG32F, 128, 0, GL_RG, GL_FLOAT, nullptr);
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
@@ -143,7 +139,7 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) {
proctex_alpha_map.Create();
state.proctex_alpha_map.texture_1d = proctex_alpha_map.handle;
state.Apply();
- glActiveTexture(GL_TEXTURE12);
+ glActiveTexture(TextureUnits::ProcTexAlphaMap.Enum());
glTexImage1D(GL_TEXTURE_1D, 0, GL_RG32F, 128, 0, GL_RG, GL_FLOAT, nullptr);
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
@@ -152,7 +148,7 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) {
proctex_lut.Create();
state.proctex_lut.texture_1d = proctex_lut.handle;
state.Apply();
- glActiveTexture(GL_TEXTURE13);
+ glActiveTexture(TextureUnits::ProcTexLUT.Enum());
glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA32F, 256, 0, GL_RGBA, GL_FLOAT, nullptr);
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
@@ -161,7 +157,7 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) {
proctex_diff_lut.Create();
state.proctex_diff_lut.texture_1d = proctex_diff_lut.handle;
state.Apply();
- glActiveTexture(GL_TEXTURE14);
+ glActiveTexture(TextureUnits::ProcTexDiffLUT.Enum());
glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA32F, 256, 0, GL_RGBA, GL_FLOAT, nullptr);
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
@@ -313,7 +309,7 @@ void RasterizerOpenGL::DrawTriangles() {
}
// Sync the lighting luts
- for (unsigned index = 0; index < lighting_luts.size(); index++) {
+ for (unsigned index = 0; index < uniform_block_data.lut_dirty.size(); index++) {
if (uniform_block_data.lut_dirty[index]) {
SyncLightingLUT(index);
uniform_block_data.lut_dirty[index] = false;
@@ -851,7 +847,7 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[6], 0x1ce):
case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[7], 0x1cf): {
auto& lut_config = regs.lighting.lut_config;
- uniform_block_data.lut_dirty[lut_config.type / 4] = true;
+ uniform_block_data.lut_dirty[lut_config.type] = true;
break;
}
}
@@ -1187,77 +1183,57 @@ void RasterizerOpenGL::SetShader() {
state.Apply();
// Set the texture samplers to correspond to different texture units
- GLuint uniform_tex = glGetUniformLocation(shader->shader.handle, "tex[0]");
+ GLint uniform_tex = glGetUniformLocation(shader->shader.handle, "tex[0]");
if (uniform_tex != -1) {
- glUniform1i(uniform_tex, 0);
+ glUniform1i(uniform_tex, TextureUnits::PicaTexture(0).id);
}
uniform_tex = glGetUniformLocation(shader->shader.handle, "tex[1]");
if (uniform_tex != -1) {
- glUniform1i(uniform_tex, 1);
+ glUniform1i(uniform_tex, TextureUnits::PicaTexture(1).id);
}
uniform_tex = glGetUniformLocation(shader->shader.handle, "tex[2]");
if (uniform_tex != -1) {
- glUniform1i(uniform_tex, 2);
+ glUniform1i(uniform_tex, TextureUnits::PicaTexture(2).id);
}
// Set the texture samplers to correspond to different lookup table texture units
- GLuint uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[0]");
- if (uniform_lut != -1) {
- glUniform1i(uniform_lut, 3);
- }
- uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[1]");
- if (uniform_lut != -1) {
- glUniform1i(uniform_lut, 4);
- }
- uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[2]");
- if (uniform_lut != -1) {
- glUniform1i(uniform_lut, 5);
- }
- uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[3]");
+ GLint uniform_lut = glGetUniformLocation(shader->shader.handle, "lighting_lut");
if (uniform_lut != -1) {
- glUniform1i(uniform_lut, 6);
- }
- uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[4]");
- if (uniform_lut != -1) {
- glUniform1i(uniform_lut, 7);
- }
- uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[5]");
- if (uniform_lut != -1) {
- glUniform1i(uniform_lut, 8);
+ glUniform1i(uniform_lut, TextureUnits::LightingLUT.id);
}
- GLuint uniform_fog_lut = glGetUniformLocation(shader->shader.handle, "fog_lut");
+ GLint uniform_fog_lut = glGetUniformLocation(shader->shader.handle, "fog_lut");
if (uniform_fog_lut != -1) {
- glUniform1i(uniform_fog_lut, 9);
+ glUniform1i(uniform_fog_lut, TextureUnits::FogLUT.id);
}
- GLuint uniform_proctex_noise_lut =
+ GLint uniform_proctex_noise_lut =
glGetUniformLocation(shader->shader.handle, "proctex_noise_lut");
if (uniform_proctex_noise_lut != -1) {
- glUniform1i(uniform_proctex_noise_lut, 10);
+ glUniform1i(uniform_proctex_noise_lut, TextureUnits::ProcTexNoiseLUT.id);
}
- GLuint uniform_proctex_color_map =
+ GLint uniform_proctex_color_map =
glGetUniformLocation(shader->shader.handle, "proctex_color_map");
if (uniform_proctex_color_map != -1) {
- glUniform1i(uniform_proctex_color_map, 11);
+ glUniform1i(uniform_proctex_color_map, TextureUnits::ProcTexColorMap.id);
}
- GLuint uniform_proctex_alpha_map =
+ GLint uniform_proctex_alpha_map =
glGetUniformLocation(shader->shader.handle, "proctex_alpha_map");
if (uniform_proctex_alpha_map != -1) {
- glUniform1i(uniform_proctex_alpha_map, 12);
+ glUniform1i(uniform_proctex_alpha_map, TextureUnits::ProcTexAlphaMap.id);
}
- GLuint uniform_proctex_lut = glGetUniformLocation(shader->shader.handle, "proctex_lut");
+ GLint uniform_proctex_lut = glGetUniformLocation(shader->shader.handle, "proctex_lut");
if (uniform_proctex_lut != -1) {
- glUniform1i(uniform_proctex_lut, 13);
+ glUniform1i(uniform_proctex_lut, TextureUnits::ProcTexLUT.id);
}
- GLuint uniform_proctex_diff_lut =
+ GLint uniform_proctex_diff_lut =
glGetUniformLocation(shader->shader.handle, "proctex_diff_lut");
if (uniform_proctex_diff_lut != -1) {
- glUniform1i(uniform_proctex_diff_lut, 14);
+ glUniform1i(uniform_proctex_diff_lut, TextureUnits::ProcTexDiffLUT.id);
}
current_shader = shader_cache.emplace(config, std::move(shader)).first->second.get();
@@ -1387,7 +1363,7 @@ void RasterizerOpenGL::SyncFogLUT() {
if (new_data != fog_lut_data) {
fog_lut_data = new_data;
- glActiveTexture(GL_TEXTURE9);
+ glActiveTexture(TextureUnits::FogLUT.Enum());
glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 128, GL_RED_INTEGER, GL_UNSIGNED_INT,
fog_lut_data.data());
}
@@ -1426,17 +1402,18 @@ static void SyncProcTexValueLUT(const std::array<Pica::State::ProcTex::ValueEntr
}
void RasterizerOpenGL::SyncProcTexNoiseLUT() {
- SyncProcTexValueLUT(Pica::g_state.proctex.noise_table, proctex_noise_lut_data, GL_TEXTURE10);
+ SyncProcTexValueLUT(Pica::g_state.proctex.noise_table, proctex_noise_lut_data,
+ TextureUnits::ProcTexNoiseLUT.Enum());
}
void RasterizerOpenGL::SyncProcTexColorMap() {
SyncProcTexValueLUT(Pica::g_state.proctex.color_map_table, proctex_color_map_data,
- GL_TEXTURE11);
+ TextureUnits::ProcTexColorMap.Enum());
}
void RasterizerOpenGL::SyncProcTexAlphaMap() {
SyncProcTexValueLUT(Pica::g_state.proctex.alpha_map_table, proctex_alpha_map_data,
- GL_TEXTURE12);
+ TextureUnits::ProcTexAlphaMap.Enum());
}
void RasterizerOpenGL::SyncProcTexLUT() {
@@ -1451,7 +1428,7 @@ void RasterizerOpenGL::SyncProcTexLUT() {
if (new_data != proctex_lut_data) {
proctex_lut_data = new_data;
- glActiveTexture(GL_TEXTURE13);
+ glActiveTexture(TextureUnits::ProcTexLUT.Enum());
glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 256, GL_RGBA, GL_FLOAT, proctex_lut_data.data());
}
}
@@ -1468,7 +1445,7 @@ void RasterizerOpenGL::SyncProcTexDiffLUT() {
if (new_data != proctex_diff_lut_data) {
proctex_diff_lut_data = new_data;
- glActiveTexture(GL_TEXTURE14);
+ glActiveTexture(TextureUnits::ProcTexDiffLUT.Enum());
glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 256, GL_RGBA, GL_FLOAT, proctex_diff_lut_data.data());
}
}
@@ -1571,20 +1548,17 @@ void RasterizerOpenGL::SyncGlobalAmbient() {
}
void RasterizerOpenGL::SyncLightingLUT(unsigned lut_index) {
- std::array<GLvec4, 256> new_data;
-
- for (unsigned offset = 0; offset < new_data.size(); ++offset) {
- new_data[offset][0] = Pica::g_state.lighting.luts[(lut_index * 4) + 0][offset].ToFloat();
- new_data[offset][1] = Pica::g_state.lighting.luts[(lut_index * 4) + 1][offset].ToFloat();
- new_data[offset][2] = Pica::g_state.lighting.luts[(lut_index * 4) + 2][offset].ToFloat();
- new_data[offset][3] = Pica::g_state.lighting.luts[(lut_index * 4) + 3][offset].ToFloat();
- }
+ std::array<GLvec2, 256> new_data;
+ const auto& source_lut = Pica::g_state.lighting.luts[lut_index];
+ std::transform(source_lut.begin(), source_lut.end(), new_data.begin(), [](const auto& entry) {
+ return GLvec2{entry.ToFloat(), entry.DiffToFloat()};
+ });
if (new_data != lighting_lut_data[lut_index]) {
lighting_lut_data[lut_index] = new_data;
- glActiveTexture(GL_TEXTURE3 + lut_index);
- glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 256, GL_RGBA, GL_FLOAT,
- lighting_lut_data[lut_index].data());
+ glBindBuffer(GL_TEXTURE_BUFFER, lighting_lut_buffer.handle);
+ glBufferSubData(GL_TEXTURE_BUFFER, lut_index * new_data.size() * sizeof(GLvec2),
+ new_data.size() * sizeof(GLvec2), new_data.data());
}
}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index d9a3e9d1c..79acd4230 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -263,7 +263,7 @@ private:
struct {
UniformData data;
- bool lut_dirty[6];
+ std::array<bool, Pica::LightingRegs::NumLightingSampler> lut_dirty;
bool fog_lut_dirty;
bool proctex_noise_lut_dirty;
bool proctex_color_map_dirty;
@@ -279,8 +279,9 @@ private:
OGLBuffer uniform_buffer;
OGLFramebuffer framebuffer;
- std::array<OGLTexture, 6> lighting_luts;
- std::array<std::array<GLvec4, 256>, 6> lighting_lut_data{};
+ OGLBuffer lighting_lut_buffer;
+ OGLTexture lighting_lut;
+ std::array<std::array<GLvec2, 256>, Pica::LightingRegs::NumLightingSampler> lighting_lut_data{};
OGLTexture fog_lut;
std::array<GLuint, 128> fog_lut_data{};
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index db53710aa..0c7c4dd5c 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -73,6 +73,8 @@ PicaShaderConfig PicaShaderConfig::BuildFromRegs(const Pica::Regs& regs) {
state.lighting.light[light_index].num = num;
state.lighting.light[light_index].directional = light.config.directional != 0;
state.lighting.light[light_index].two_sided_diffuse = light.config.two_sided_diffuse != 0;
+ state.lighting.light[light_index].geometric_factor_0 = light.config.geometric_factor_0 != 0;
+ state.lighting.light[light_index].geometric_factor_1 = light.config.geometric_factor_1 != 0;
state.lighting.light[light_index].dist_atten_enable =
!regs.lighting.IsDistAttenDisabled(num);
state.lighting.light[light_index].spot_atten_enable =
@@ -518,14 +520,16 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
"vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0);\n"
"vec3 light_vector = vec3(0.0);\n"
"vec3 refl_value = vec3(0.0);\n"
- "vec3 spot_dir = vec3(0.0);\n;";
+ "vec3 spot_dir = vec3(0.0);\n"
+ "vec3 half_vector = vec3(0.0);\n"
+ "float geo_factor = 1.0;\n";
- // Compute fragment normals
+ // Compute fragment normals and tangents
+ const std::string pertubation =
+ "2.0 * (" + SampleTexture(config, lighting.bump_selector) + ").rgb - 1.0";
if (lighting.bump_mode == LightingRegs::LightingBumpMode::NormalMap) {
- // Bump mapping is enabled using a normal map, read perturbation vector from the selected
- // texture
- out += "vec3 surface_normal = 2.0 * (" + SampleTexture(config, lighting.bump_selector) +
- ").rgb - 1.0;\n";
+ // Bump mapping is enabled using a normal map
+ out += "vec3 surface_normal = " + pertubation + ";\n";
// Recompute Z-component of perturbation if 'renorm' is enabled, this provides a higher
// precision result
@@ -534,31 +538,41 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
"(1.0 - (surface_normal.x*surface_normal.x + surface_normal.y*surface_normal.y))";
out += "surface_normal.z = sqrt(max(" + val + ", 0.0));\n";
}
+
+ // The tangent vector is not perturbed by the normal map and is just a unit vector.
+ out += "vec3 surface_tangent = vec3(1.0, 0.0, 0.0);\n";
} else if (lighting.bump_mode == LightingRegs::LightingBumpMode::TangentMap) {
// Bump mapping is enabled using a tangent map
- LOG_CRITICAL(HW_GPU, "unimplemented bump mapping mode (tangent mapping)");
- UNIMPLEMENTED();
+ out += "vec3 surface_tangent = " + pertubation + ";\n";
+ // Mathematically, recomputing Z-component of the tangent vector won't affect the relevant
+ // computation below, which is also confirmed on 3DS. So we don't bother recomputing here
+ // even if 'renorm' is enabled.
+
+ // The normal vector is not perturbed by the tangent map and is just a unit vector.
+ out += "vec3 surface_normal = vec3(0.0, 0.0, 1.0);\n";
} else {
- // No bump mapping - surface local normal is just a unit normal
+ // No bump mapping - surface local normal and tangent are just unit vectors
out += "vec3 surface_normal = vec3(0.0, 0.0, 1.0);\n";
+ out += "vec3 surface_tangent = vec3(1.0, 0.0, 0.0);\n";
}
// Rotate the surface-local normal by the interpolated normal quaternion to convert it to
// eyespace.
- out += "vec3 normal = quaternion_rotate(normalize(normquat), surface_normal);\n";
+ out += "vec4 normalized_normquat = normalize(normquat);\n";
+ out += "vec3 normal = quaternion_rotate(normalized_normquat, surface_normal);\n";
+ out += "vec3 tangent = quaternion_rotate(normalized_normquat, surface_tangent);\n";
- // Gets the index into the specified lookup table for specular lighting
- auto GetLutIndex = [&lighting](unsigned light_num, LightingRegs::LightingLutInput input,
- bool abs) {
- const std::string half_angle = "normalize(normalize(view) + light_vector)";
+ // Samples the specified lookup table for specular lighting
+ auto GetLutValue = [&lighting](LightingRegs::LightingSampler sampler, unsigned light_num,
+ LightingRegs::LightingLutInput input, bool abs) {
std::string index;
switch (input) {
case LightingRegs::LightingLutInput::NH:
- index = "dot(normal, " + half_angle + ")";
+ index = "dot(normal, normalize(half_vector))";
break;
case LightingRegs::LightingLutInput::VH:
- index = std::string("dot(normalize(view), " + half_angle + ")");
+ index = std::string("dot(normalize(view), normalize(half_vector))");
break;
case LightingRegs::LightingLutInput::NV:
@@ -573,6 +587,22 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
index = std::string("dot(light_vector, spot_dir)");
break;
+ case LightingRegs::LightingLutInput::CP:
+ // CP input is only available with configuration 7
+ if (lighting.config == LightingRegs::LightingConfig::Config7) {
+ // Note: even if the normal vector is modified by normal map, which is not the
+ // normal of the tangent plane anymore, the half angle vector is still projected
+ // using the modified normal vector.
+ std::string half_angle_proj = "normalize(half_vector) - normal / dot(normal, "
+ "normal) * dot(normal, normalize(half_vector))";
+ // Note: the half angle vector projection is confirmed not normalized before the dot
+ // product. The result is in fact not cos(phi) as the name suggested.
+ index = "dot(" + half_angle_proj + ", tangent)";
+ } else {
+ index = "0.0";
+ }
+ break;
+
default:
LOG_CRITICAL(HW_GPU, "Unknown lighting LUT input %d\n", (int)input);
UNIMPLEMENTED();
@@ -580,22 +610,18 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
break;
}
+ std::string sampler_string = std::to_string(static_cast<unsigned>(sampler));
+
if (abs) {
// LUT index is in the range of (0.0, 1.0)
index = lighting.light[light_num].two_sided_diffuse ? "abs(" + index + ")"
: "max(" + index + ", 0.0)";
+ return "LookupLightingLUTUnsigned(" + sampler_string + ", " + index + ")";
} else {
// LUT index is in the range of (-1.0, 1.0)
- index = "((" + index + " < 0) ? " + index + " + 2.0 : " + index + ") / 2.0";
+ return "LookupLightingLUTSigned(" + sampler_string + ", " + index + ")";
}
- return "(OFFSET_256 + SCALE_256 * clamp(" + index + ", 0.0, 1.0))";
- };
-
- // Gets the lighting lookup table value given the specified sampler and index
- auto GetLutValue = [](LightingRegs::LightingSampler sampler, std::string lut_index) {
- return std::string("texture(lut[" + std::to_string((unsigned)sampler / 4) + "], " +
- lut_index + ")[" + std::to_string((unsigned)sampler & 3) + "]");
};
// Write the code to emulate each enabled light
@@ -610,6 +636,7 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
out += "light_vector = normalize(" + light_src + ".position + view);\n";
out += "spot_dir = " + light_src + ".spot_direction;\n";
+ out += "half_vector = normalize(view) + light_vector;\n";
// Compute dot product of light_vector and normal, adjust if lighting is one-sided or
// two-sided
@@ -622,48 +649,57 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
if (light_config.spot_atten_enable &&
LightingRegs::IsLightingSamplerSupported(
lighting.config, LightingRegs::LightingSampler::SpotlightAttenuation)) {
- std::string index =
- GetLutIndex(light_config.num, lighting.lut_sp.type, lighting.lut_sp.abs_input);
- auto sampler = LightingRegs::SpotlightAttenuationSampler(light_config.num);
- spot_atten = "(" + std::to_string(lighting.lut_sp.scale) + " * " +
- GetLutValue(sampler, index) + ")";
+ std::string value =
+ GetLutValue(LightingRegs::SpotlightAttenuationSampler(light_config.num),
+ light_config.num, lighting.lut_sp.type, lighting.lut_sp.abs_input);
+ spot_atten = "(" + std::to_string(lighting.lut_sp.scale) + " * " + value + ")";
}
// If enabled, compute distance attenuation value
std::string dist_atten = "1.0";
if (light_config.dist_atten_enable) {
- std::string index = "(" + light_src + ".dist_atten_scale * length(-view - " +
- light_src + ".position) + " + light_src + ".dist_atten_bias)";
- index = "(OFFSET_256 + SCALE_256 * clamp(" + index + ", 0.0, 1.0))";
+ std::string index = "clamp(" + light_src + ".dist_atten_scale * length(-view - " +
+ light_src + ".position) + " + light_src +
+ ".dist_atten_bias, 0.0, 1.0)";
auto sampler = LightingRegs::DistanceAttenuationSampler(light_config.num);
- dist_atten = GetLutValue(sampler, index);
+ dist_atten = "LookupLightingLUTUnsigned(" +
+ std::to_string(static_cast<unsigned>(sampler)) + "," + index + ")";
}
// If enabled, clamp specular component if lighting result is negative
std::string clamp_highlights =
lighting.clamp_highlights ? "(dot(light_vector, normal) <= 0.0 ? 0.0 : 1.0)" : "1.0";
+ if (light_config.geometric_factor_0 || light_config.geometric_factor_1) {
+ out += "geo_factor = dot(half_vector, half_vector);\n"
+ "geo_factor = geo_factor == 0.0 ? 0.0 : min(" +
+ dot_product + " / geo_factor, 1.0);\n";
+ }
+
// Specular 0 component
std::string d0_lut_value = "1.0";
if (lighting.lut_d0.enable &&
LightingRegs::IsLightingSamplerSupported(
lighting.config, LightingRegs::LightingSampler::Distribution0)) {
// Lookup specular "distribution 0" LUT value
- std::string index =
- GetLutIndex(light_config.num, lighting.lut_d0.type, lighting.lut_d0.abs_input);
- d0_lut_value = "(" + std::to_string(lighting.lut_d0.scale) + " * " +
- GetLutValue(LightingRegs::LightingSampler::Distribution0, index) + ")";
+ std::string value =
+ GetLutValue(LightingRegs::LightingSampler::Distribution0, light_config.num,
+ lighting.lut_d0.type, lighting.lut_d0.abs_input);
+ d0_lut_value = "(" + std::to_string(lighting.lut_d0.scale) + " * " + value + ")";
}
std::string specular_0 = "(" + d0_lut_value + " * " + light_src + ".specular_0)";
+ if (light_config.geometric_factor_0) {
+ specular_0 = "(" + specular_0 + " * geo_factor)";
+ }
// If enabled, lookup ReflectRed value, otherwise, 1.0 is used
if (lighting.lut_rr.enable &&
LightingRegs::IsLightingSamplerSupported(lighting.config,
LightingRegs::LightingSampler::ReflectRed)) {
- std::string index =
- GetLutIndex(light_config.num, lighting.lut_rr.type, lighting.lut_rr.abs_input);
- std::string value = "(" + std::to_string(lighting.lut_rr.scale) + " * " +
- GetLutValue(LightingRegs::LightingSampler::ReflectRed, index) + ")";
+ std::string value =
+ GetLutValue(LightingRegs::LightingSampler::ReflectRed, light_config.num,
+ lighting.lut_rr.type, lighting.lut_rr.abs_input);
+ value = "(" + std::to_string(lighting.lut_rr.scale) + " * " + value + ")";
out += "refl_value.r = " + value + ";\n";
} else {
out += "refl_value.r = 1.0;\n";
@@ -673,11 +709,10 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
if (lighting.lut_rg.enable &&
LightingRegs::IsLightingSamplerSupported(lighting.config,
LightingRegs::LightingSampler::ReflectGreen)) {
- std::string index =
- GetLutIndex(light_config.num, lighting.lut_rg.type, lighting.lut_rg.abs_input);
- std::string value = "(" + std::to_string(lighting.lut_rg.scale) + " * " +
- GetLutValue(LightingRegs::LightingSampler::ReflectGreen, index) +
- ")";
+ std::string value =
+ GetLutValue(LightingRegs::LightingSampler::ReflectGreen, light_config.num,
+ lighting.lut_rg.type, lighting.lut_rg.abs_input);
+ value = "(" + std::to_string(lighting.lut_rg.scale) + " * " + value + ")";
out += "refl_value.g = " + value + ";\n";
} else {
out += "refl_value.g = refl_value.r;\n";
@@ -687,11 +722,10 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
if (lighting.lut_rb.enable &&
LightingRegs::IsLightingSamplerSupported(lighting.config,
LightingRegs::LightingSampler::ReflectBlue)) {
- std::string index =
- GetLutIndex(light_config.num, lighting.lut_rb.type, lighting.lut_rb.abs_input);
- std::string value = "(" + std::to_string(lighting.lut_rb.scale) + " * " +
- GetLutValue(LightingRegs::LightingSampler::ReflectBlue, index) +
- ")";
+ std::string value =
+ GetLutValue(LightingRegs::LightingSampler::ReflectBlue, light_config.num,
+ lighting.lut_rb.type, lighting.lut_rb.abs_input);
+ value = "(" + std::to_string(lighting.lut_rb.scale) + " * " + value + ")";
out += "refl_value.b = " + value + ";\n";
} else {
out += "refl_value.b = refl_value.r;\n";
@@ -703,23 +737,26 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
LightingRegs::IsLightingSamplerSupported(
lighting.config, LightingRegs::LightingSampler::Distribution1)) {
// Lookup specular "distribution 1" LUT value
- std::string index =
- GetLutIndex(light_config.num, lighting.lut_d1.type, lighting.lut_d1.abs_input);
- d1_lut_value = "(" + std::to_string(lighting.lut_d1.scale) + " * " +
- GetLutValue(LightingRegs::LightingSampler::Distribution1, index) + ")";
+ std::string value =
+ GetLutValue(LightingRegs::LightingSampler::Distribution1, light_config.num,
+ lighting.lut_d1.type, lighting.lut_d1.abs_input);
+ d1_lut_value = "(" + std::to_string(lighting.lut_d1.scale) + " * " + value + ")";
}
std::string specular_1 =
"(" + d1_lut_value + " * refl_value * " + light_src + ".specular_1)";
+ if (light_config.geometric_factor_1) {
+ specular_1 = "(" + specular_1 + " * geo_factor)";
+ }
// Fresnel
if (lighting.lut_fr.enable &&
LightingRegs::IsLightingSamplerSupported(lighting.config,
LightingRegs::LightingSampler::Fresnel)) {
// Lookup fresnel LUT value
- std::string index =
- GetLutIndex(light_config.num, lighting.lut_fr.type, lighting.lut_fr.abs_input);
- std::string value = "(" + std::to_string(lighting.lut_fr.scale) + " * " +
- GetLutValue(LightingRegs::LightingSampler::Fresnel, index) + ")";
+ std::string value =
+ GetLutValue(LightingRegs::LightingSampler::Fresnel, light_config.num,
+ lighting.lut_fr.type, lighting.lut_fr.abs_input);
+ value = "(" + std::to_string(lighting.lut_fr.scale) + " * " + value + ")";
// Enabled for difffuse lighting alpha component
if (lighting.fresnel_selector == LightingRegs::LightingFresnelSelector::PrimaryAlpha ||
@@ -973,10 +1010,6 @@ std::string GenerateFragmentShader(const PicaShaderConfig& config) {
#define NUM_TEV_STAGES 6
#define NUM_LIGHTS 8
-// Texture coordinate offsets and scales
-#define OFFSET_256 (0.5 / 256.0)
-#define SCALE_256 (255.0 / 256.0)
-
in vec4 primary_color;
in vec2 texcoord[3];
in float texcoord0_w;
@@ -1018,7 +1051,7 @@ layout (std140) uniform shader_data {
};
uniform sampler2D tex[3];
-uniform sampler1D lut[6];
+uniform samplerBuffer lighting_lut;
uniform usampler1D fog_lut;
uniform sampler1D proctex_noise_lut;
uniform sampler1D proctex_color_map;
@@ -1031,6 +1064,24 @@ vec3 quaternion_rotate(vec4 q, vec3 v) {
return v + 2.0 * cross(q.xyz, cross(q.xyz, v) + q.w * v);
}
+float LookupLightingLUT(int lut_index, int index, float delta) {
+ vec2 entry = texelFetch(lighting_lut, lut_index * 256 + index).rg;
+ return entry.r + entry.g * delta;
+}
+
+float LookupLightingLUTUnsigned(int lut_index, float pos) {
+ int index = clamp(int(pos * 256.0), 0, 255);
+ float delta = pos * 256.0 - index;
+ return LookupLightingLUT(lut_index, index, delta);
+}
+
+float LookupLightingLUTSigned(int lut_index, float pos) {
+ int index = clamp(int(pos * 128.0), -128, 127);
+ float delta = pos * 128.0 - index;
+ if (index < 0) index += 256;
+ return LookupLightingLUT(lut_index, index, delta);
+}
+
)";
if (config.state.proctex.enable)
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
index 9c90eadf9..2302ae453 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -94,6 +94,8 @@ union PicaShaderConfig {
bool two_sided_diffuse;
bool dist_atten_enable;
bool spot_atten_enable;
+ bool geometric_factor_0;
+ bool geometric_factor_1;
} light[8];
bool enable;
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index bf837a7fb..14e63115c 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -52,9 +52,7 @@ OpenGLState::OpenGLState() {
texture_unit.sampler = 0;
}
- for (auto& lut : lighting_luts) {
- lut.texture_1d = 0;
- }
+ lighting_lut.texture_buffer = 0;
fog_lut.texture_1d = 0;
@@ -185,7 +183,7 @@ void OpenGLState::Apply() const {
// Textures
for (unsigned i = 0; i < ARRAY_SIZE(texture_units); ++i) {
if (texture_units[i].texture_2d != cur_state.texture_units[i].texture_2d) {
- glActiveTexture(GL_TEXTURE0 + i);
+ glActiveTexture(TextureUnits::PicaTexture(i).Enum());
glBindTexture(GL_TEXTURE_2D, texture_units[i].texture_2d);
}
if (texture_units[i].sampler != cur_state.texture_units[i].sampler) {
@@ -194,46 +192,44 @@ void OpenGLState::Apply() const {
}
// Lighting LUTs
- for (unsigned i = 0; i < ARRAY_SIZE(lighting_luts); ++i) {
- if (lighting_luts[i].texture_1d != cur_state.lighting_luts[i].texture_1d) {
- glActiveTexture(GL_TEXTURE3 + i);
- glBindTexture(GL_TEXTURE_1D, lighting_luts[i].texture_1d);
- }
+ if (lighting_lut.texture_buffer != cur_state.lighting_lut.texture_buffer) {
+ glActiveTexture(TextureUnits::LightingLUT.Enum());
+ glBindTexture(GL_TEXTURE_BUFFER, cur_state.lighting_lut.texture_buffer);
}
// Fog LUT
if (fog_lut.texture_1d != cur_state.fog_lut.texture_1d) {
- glActiveTexture(GL_TEXTURE9);
+ glActiveTexture(TextureUnits::FogLUT.Enum());
glBindTexture(GL_TEXTURE_1D, fog_lut.texture_1d);
}
// ProcTex Noise LUT
if (proctex_noise_lut.texture_1d != cur_state.proctex_noise_lut.texture_1d) {
- glActiveTexture(GL_TEXTURE10);
+ glActiveTexture(TextureUnits::ProcTexNoiseLUT.Enum());
glBindTexture(GL_TEXTURE_1D, proctex_noise_lut.texture_1d);
}
// ProcTex Color Map
if (proctex_color_map.texture_1d != cur_state.proctex_color_map.texture_1d) {
- glActiveTexture(GL_TEXTURE11);
+ glActiveTexture(TextureUnits::ProcTexColorMap.Enum());
glBindTexture(GL_TEXTURE_1D, proctex_color_map.texture_1d);
}
// ProcTex Alpha Map
if (proctex_alpha_map.texture_1d != cur_state.proctex_alpha_map.texture_1d) {
- glActiveTexture(GL_TEXTURE12);
+ glActiveTexture(TextureUnits::ProcTexAlphaMap.Enum());
glBindTexture(GL_TEXTURE_1D, proctex_alpha_map.texture_1d);
}
// ProcTex LUT
if (proctex_lut.texture_1d != cur_state.proctex_lut.texture_1d) {
- glActiveTexture(GL_TEXTURE13);
+ glActiveTexture(TextureUnits::ProcTexLUT.Enum());
glBindTexture(GL_TEXTURE_1D, proctex_lut.texture_1d);
}
// ProcTex Diff LUT
if (proctex_diff_lut.texture_1d != cur_state.proctex_diff_lut.texture_1d) {
- glActiveTexture(GL_TEXTURE14);
+ glActiveTexture(TextureUnits::ProcTexDiffLUT.Enum());
glBindTexture(GL_TEXTURE_1D, proctex_diff_lut.texture_1d);
}
@@ -274,6 +270,20 @@ void OpenGLState::ResetTexture(GLuint handle) {
unit.texture_2d = 0;
}
}
+ if (cur_state.lighting_lut.texture_buffer == handle)
+ cur_state.lighting_lut.texture_buffer = 0;
+ if (cur_state.fog_lut.texture_1d == handle)
+ cur_state.fog_lut.texture_1d = 0;
+ if (cur_state.proctex_noise_lut.texture_1d == handle)
+ cur_state.proctex_noise_lut.texture_1d = 0;
+ if (cur_state.proctex_color_map.texture_1d == handle)
+ cur_state.proctex_color_map.texture_1d = 0;
+ if (cur_state.proctex_alpha_map.texture_1d == handle)
+ cur_state.proctex_alpha_map.texture_1d = 0;
+ if (cur_state.proctex_lut.texture_1d == handle)
+ cur_state.proctex_lut.texture_1d = 0;
+ if (cur_state.proctex_diff_lut.texture_1d == handle)
+ cur_state.proctex_diff_lut.texture_1d = 0;
}
void OpenGLState::ResetSampler(GLuint handle) {
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index 7dcc03bd5..bb0218708 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -6,6 +6,29 @@
#include <glad/glad.h>
+namespace TextureUnits {
+
+struct TextureUnit {
+ GLint id;
+ constexpr GLenum Enum() const {
+ return static_cast<GLenum>(GL_TEXTURE0 + id);
+ }
+};
+
+constexpr TextureUnit PicaTexture(int unit) {
+ return TextureUnit{unit};
+}
+
+constexpr TextureUnit LightingLUT{3};
+constexpr TextureUnit FogLUT{4};
+constexpr TextureUnit ProcTexNoiseLUT{5};
+constexpr TextureUnit ProcTexColorMap{6};
+constexpr TextureUnit ProcTexAlphaMap{7};
+constexpr TextureUnit ProcTexLUT{8};
+constexpr TextureUnit ProcTexDiffLUT{9};
+
+} // namespace TextureUnits
+
class OpenGLState {
public:
struct {
@@ -64,8 +87,8 @@ public:
} texture_units[3];
struct {
- GLuint texture_1d; // GL_TEXTURE_BINDING_1D
- } lighting_luts[6];
+ GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER
+ } lighting_lut;
struct {
GLuint texture_1d; // GL_TEXTURE_BINDING_1D
diff --git a/src/video_core/renderer_opengl/pica_to_gl.h b/src/video_core/renderer_opengl/pica_to_gl.h
index 93d7b0b71..70298e211 100644
--- a/src/video_core/renderer_opengl/pica_to_gl.h
+++ b/src/video_core/renderer_opengl/pica_to_gl.h
@@ -55,6 +55,12 @@ inline GLenum WrapMode(Pica::TexturingRegs::TextureConfig::WrapMode mode) {
GL_CLAMP_TO_BORDER, // WrapMode::ClampToBorder
GL_REPEAT, // WrapMode::Repeat
GL_MIRRORED_REPEAT, // WrapMode::MirroredRepeat
+ // TODO(wwylele): ClampToEdge2 and ClampToBorder2 are not properly implemented here. See the
+ // comments in enum WrapMode.
+ GL_CLAMP_TO_EDGE, // WrapMode::ClampToEdge2
+ GL_CLAMP_TO_BORDER, // WrapMode::ClampToBorder2
+ GL_REPEAT, // WrapMode::Repeat2
+ GL_REPEAT, // WrapMode::Repeat3
};
// Range check table for input
@@ -65,6 +71,13 @@ inline GLenum WrapMode(Pica::TexturingRegs::TextureConfig::WrapMode mode) {
return GL_CLAMP_TO_EDGE;
}
+ if (static_cast<u32>(mode) > 3) {
+ // It is still unclear whether mode 4-7 are valid, so log it if a game uses them.
+ // TODO(wwylele): telemetry should be added here so we can collect more info about which
+ // game uses this.
+ LOG_WARNING(Render_OpenGL, "Using texture wrap mode %u", static_cast<u32>(mode));
+ }
+
GLenum gl_mode = wrap_mode_table[mode];
// Check for dummy values indicating an unknown mode
diff --git a/src/video_core/shader/shader_jit_x64_compiler.cpp b/src/video_core/shader/shader_jit_x64_compiler.cpp
index 5d9b6448c..42a57aab1 100644
--- a/src/video_core/shader/shader_jit_x64_compiler.cpp
+++ b/src/video_core/shader/shader_jit_x64_compiler.cpp
@@ -321,27 +321,27 @@ void JitShader::Compile_EvaluateCondition(Instruction instr) {
case Instruction::FlowControlType::Or:
mov(eax, COND0);
mov(ebx, COND1);
- xor(eax, (instr.flow_control.refx.Value() ^ 1));
- xor(ebx, (instr.flow_control.refy.Value() ^ 1));
- or (eax, ebx);
+ xor_(eax, (instr.flow_control.refx.Value() ^ 1));
+ xor_(ebx, (instr.flow_control.refy.Value() ^ 1));
+ or_(eax, ebx);
break;
case Instruction::FlowControlType::And:
mov(eax, COND0);
mov(ebx, COND1);
- xor(eax, (instr.flow_control.refx.Value() ^ 1));
- xor(ebx, (instr.flow_control.refy.Value() ^ 1));
- and(eax, ebx);
+ xor_(eax, (instr.flow_control.refx.Value() ^ 1));
+ xor_(ebx, (instr.flow_control.refy.Value() ^ 1));
+ and_(eax, ebx);
break;
case Instruction::FlowControlType::JustX:
mov(eax, COND0);
- xor(eax, (instr.flow_control.refx.Value() ^ 1));
+ xor_(eax, (instr.flow_control.refx.Value() ^ 1));
break;
case Instruction::FlowControlType::JustY:
mov(eax, COND1);
- xor(eax, (instr.flow_control.refy.Value() ^ 1));
+ xor_(eax, (instr.flow_control.refy.Value() ^ 1));
break;
}
}
@@ -734,10 +734,10 @@ void JitShader::Compile_LOOP(Instruction instr) {
mov(LOOPCOUNT, dword[SETUP + offset]);
mov(LOOPCOUNT_REG, LOOPCOUNT);
shr(LOOPCOUNT_REG, 4);
- and(LOOPCOUNT_REG, 0xFF0); // Y-component is the start
+ and_(LOOPCOUNT_REG, 0xFF0); // Y-component is the start
mov(LOOPINC, LOOPCOUNT);
shr(LOOPINC, 12);
- and(LOOPINC, 0xFF0); // Z-component is the incrementer
+ and_(LOOPINC, 0xFF0); // Z-component is the incrementer
movzx(LOOPCOUNT, LOOPCOUNT.cvt8()); // X-component is iteration count
add(LOOPCOUNT, 1); // Iteration count is X-component + 1
@@ -858,9 +858,9 @@ void JitShader::Compile(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_
mov(STATE, ABI_PARAM2);
// Zero address/loop registers
- xor(ADDROFFS_REG_0.cvt32(), ADDROFFS_REG_0.cvt32());
- xor(ADDROFFS_REG_1.cvt32(), ADDROFFS_REG_1.cvt32());
- xor(LOOPCOUNT_REG, LOOPCOUNT_REG);
+ xor_(ADDROFFS_REG_0.cvt32(), ADDROFFS_REG_0.cvt32());
+ xor_(ADDROFFS_REG_1.cvt32(), ADDROFFS_REG_1.cvt32());
+ xor_(LOOPCOUNT_REG, LOOPCOUNT_REG);
// Used to set a register to one
static const __m128 one = {1.f, 1.f, 1.f, 1.f};
diff --git a/src/video_core/swrasterizer/rasterizer.cpp b/src/video_core/swrasterizer/rasterizer.cpp
index 8b7b1defb..cd7b6c39d 100644
--- a/src/video_core/swrasterizer/rasterizer.cpp
+++ b/src/video_core/swrasterizer/rasterizer.cpp
@@ -357,10 +357,22 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve
int t = (int)(v * float24::FromFloat32(static_cast<float>(texture.config.height)))
.ToFloat32();
- if ((texture.config.wrap_s == TexturingRegs::TextureConfig::ClampToBorder &&
- (s < 0 || static_cast<u32>(s) >= texture.config.width)) ||
- (texture.config.wrap_t == TexturingRegs::TextureConfig::ClampToBorder &&
- (t < 0 || static_cast<u32>(t) >= texture.config.height))) {
+ bool use_border_s = false;
+ bool use_border_t = false;
+
+ if (texture.config.wrap_s == TexturingRegs::TextureConfig::ClampToBorder) {
+ use_border_s = s < 0 || s >= static_cast<int>(texture.config.width);
+ } else if (texture.config.wrap_s == TexturingRegs::TextureConfig::ClampToBorder2) {
+ use_border_s = s >= static_cast<int>(texture.config.width);
+ }
+
+ if (texture.config.wrap_t == TexturingRegs::TextureConfig::ClampToBorder) {
+ use_border_t = t < 0 || t >= static_cast<int>(texture.config.height);
+ } else if (texture.config.wrap_t == TexturingRegs::TextureConfig::ClampToBorder2) {
+ use_border_t = t >= static_cast<int>(texture.config.height);
+ }
+
+ if (use_border_s || use_border_t) {
auto border_color = texture.config.border_color;
texture_color[i] = {border_color.r, border_color.g, border_color.b,
border_color.a};
diff --git a/src/video_core/swrasterizer/texturing.cpp b/src/video_core/swrasterizer/texturing.cpp
index aeb6aeb8c..4f02b93f2 100644
--- a/src/video_core/swrasterizer/texturing.cpp
+++ b/src/video_core/swrasterizer/texturing.cpp
@@ -18,22 +18,33 @@ using TevStageConfig = TexturingRegs::TevStageConfig;
int GetWrappedTexCoord(TexturingRegs::TextureConfig::WrapMode mode, int val, unsigned size) {
switch (mode) {
+ case TexturingRegs::TextureConfig::ClampToEdge2:
+ // For negative coordinate, ClampToEdge2 behaves the same as Repeat
+ if (val < 0) {
+ return static_cast<int>(static_cast<unsigned>(val) % size);
+ }
+ // [[fallthrough]]
case TexturingRegs::TextureConfig::ClampToEdge:
val = std::max(val, 0);
- val = std::min(val, (int)size - 1);
+ val = std::min(val, static_cast<int>(size) - 1);
return val;
case TexturingRegs::TextureConfig::ClampToBorder:
return val;
+ case TexturingRegs::TextureConfig::ClampToBorder2:
+ // For ClampToBorder2, the case of positive coordinate beyond the texture size is already
+ // handled outside. Here we only handle the negative coordinate in the same way as Repeat.
+ case TexturingRegs::TextureConfig::Repeat2:
+ case TexturingRegs::TextureConfig::Repeat3:
case TexturingRegs::TextureConfig::Repeat:
- return (int)((unsigned)val % size);
+ return static_cast<int>(static_cast<unsigned>(val) % size);
case TexturingRegs::TextureConfig::MirroredRepeat: {
- unsigned int coord = ((unsigned)val % (2 * size));
+ unsigned int coord = (static_cast<unsigned>(val) % (2 * size));
if (coord >= size)
coord = 2 * size - 1 - coord;
- return (int)coord;
+ return static_cast<int>(coord);
}
default: