summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/common/threadsafe_queue.h11
-rw-r--r--src/video_core/engines/shader_bytecode.h3
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp29
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp224
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h22
-rw-r--r--src/video_core/shader/decode/texture.cpp133
-rw-r--r--src/video_core/shader/shader_ir.h6
7 files changed, 248 insertions, 180 deletions
diff --git a/src/common/threadsafe_queue.h b/src/common/threadsafe_queue.h
index e714ba5b3..8268bbd5c 100644
--- a/src/common/threadsafe_queue.h
+++ b/src/common/threadsafe_queue.h
@@ -46,9 +46,16 @@ public:
ElementPtr* new_ptr = new ElementPtr();
write_ptr->next.store(new_ptr, std::memory_order_release);
write_ptr = new_ptr;
- cv.notify_one();
- ++size;
+ const size_t previous_size{size++};
+
+ // Acquire the mutex and then immediately release it as a fence.
+ // TODO(bunnei): This can be replaced with C++20 waitable atomics when properly supported.
+ // See discussion on https://github.com/yuzu-emu/yuzu/pull/3173 for details.
+ if (previous_size == 0) {
+ std::lock_guard lock{cv_mutex};
+ }
+ cv.notify_one();
}
void Pop() {
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 290d929df..d6a2cc8b8 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -1292,6 +1292,7 @@ union Instruction {
BitField<50, 1, u64> dc_flag;
BitField<51, 1, u64> aoffi_flag;
BitField<52, 2, u64> component;
+ BitField<55, 1, u64> fp16_flag;
bool UsesMiscMode(TextureMiscMode mode) const {
switch (mode) {
@@ -1972,7 +1973,7 @@ private:
INST("1101-01---------", Id::TLDS, Type::Texture, "TLDS"),
INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"),
INST("1101111011111---", Id::TLD4_B, Type::Texture, "TLD4_B"),
- INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"),
+ INST("11011111--00----", Id::TLD4S, Type::Texture, "TLD4S"),
INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"),
INST("1101111101011---", Id::TMML, Type::Texture, "TMML"),
INST("11011110011110--", Id::TXD_B, Type::Texture, "TXD_B"),
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index fa7049bbe..d1ae4be6d 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -1076,7 +1076,7 @@ private:
}
std::string GenerateTexture(Operation operation, const std::string& function_suffix,
- const std::vector<TextureIR>& extras) {
+ const std::vector<TextureIR>& extras, bool sepparate_dc = false) {
constexpr std::array coord_constructors = {"float", "vec2", "vec3", "vec4"};
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
@@ -1091,7 +1091,8 @@ private:
expr += "Offset";
}
expr += '(' + GetSampler(meta->sampler) + ", ";
- expr += coord_constructors.at(count + (has_array ? 1 : 0) + (has_shadow ? 1 : 0) - 1);
+ expr += coord_constructors.at(count + (has_array ? 1 : 0) +
+ (has_shadow && !sepparate_dc ? 1 : 0) - 1);
expr += '(';
for (std::size_t i = 0; i < count; ++i) {
expr += Visit(operation[i]).AsFloat();
@@ -1104,9 +1105,14 @@ private:
expr += ", float(" + Visit(meta->array).AsInt() + ')';
}
if (has_shadow) {
- expr += ", " + Visit(meta->depth_compare).AsFloat();
+ if (sepparate_dc) {
+ expr += "), " + Visit(meta->depth_compare).AsFloat();
+ } else {
+ expr += ", " + Visit(meta->depth_compare).AsFloat() + ')';
+ }
+ } else {
+ expr += ')';
}
- expr += ')';
for (const auto& variant : extras) {
if (const auto argument = std::get_if<TextureArgument>(&variant)) {
@@ -1706,10 +1712,17 @@ private:
ASSERT(meta);
const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int;
- return {GenerateTexture(operation, "Gather",
- {TextureAoffi{}, TextureArgument{type, meta->component}}) +
- GetSwizzle(meta->element),
- Type::Float};
+ if (meta->sampler.IsShadow()) {
+ return {GenerateTexture(operation, "Gather", {TextureAoffi{}}, true) +
+ GetSwizzle(meta->element),
+ Type::Float};
+ } else {
+ return {GenerateTexture(operation, "Gather",
+ {TextureAoffi{}, TextureArgument{type, meta->component}},
+ false) +
+ GetSwizzle(meta->element),
+ Type::Float};
+ }
}
Expression TextureQueryDimensions(Operation operation) {
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index a57a564f7..bba16afaf 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -24,19 +24,21 @@
namespace OpenGL {
-static const char vertex_shader[] = R"(
-#version 150 core
+namespace {
-in vec2 vert_position;
-in vec2 vert_tex_coord;
-out vec2 frag_tex_coord;
+constexpr char vertex_shader[] = R"(
+#version 430 core
+
+layout (location = 0) in vec2 vert_position;
+layout (location = 1) in vec2 vert_tex_coord;
+layout (location = 0) out vec2 frag_tex_coord;
// This is a truncated 3x3 matrix for 2D transformations:
// The upper-left 2x2 submatrix performs scaling/rotation/mirroring.
// The third column performs translation.
// The third row could be used for projection, which we don't need in 2D. It hence is assumed to
// implicitly be [0, 0, 1]
-uniform mat3x2 modelview_matrix;
+layout (location = 0) uniform mat3x2 modelview_matrix;
void main() {
// Multiply input position by the rotscale part of the matrix and then manually translate by
@@ -47,34 +49,29 @@ void main() {
}
)";
-static const char fragment_shader[] = R"(
-#version 150 core
+constexpr char fragment_shader[] = R"(
+#version 430 core
-in vec2 frag_tex_coord;
-out vec4 color;
+layout (location = 0) in vec2 frag_tex_coord;
+layout (location = 0) out vec4 color;
-uniform sampler2D color_texture;
+layout (binding = 0) uniform sampler2D color_texture;
void main() {
- // Swap RGBA -> ABGR so we don't have to do this on the CPU. This needs to change if we have to
- // support more framebuffer pixel formats.
color = texture(color_texture, frag_tex_coord);
}
)";
-/**
- * Vertex structure that the drawn screen rectangles are composed of.
- */
+constexpr GLint PositionLocation = 0;
+constexpr GLint TexCoordLocation = 1;
+constexpr GLint ModelViewMatrixLocation = 0;
+
struct ScreenRectVertex {
- ScreenRectVertex(GLfloat x, GLfloat y, GLfloat u, GLfloat v) {
- position[0] = x;
- position[1] = y;
- tex_coord[0] = u;
- tex_coord[1] = v;
- }
+ constexpr ScreenRectVertex(GLfloat x, GLfloat y, GLfloat u, GLfloat v)
+ : position{{x, y}}, tex_coord{{u, v}} {}
- GLfloat position[2];
- GLfloat tex_coord[2];
+ std::array<GLfloat, 2> position;
+ std::array<GLfloat, 2> tex_coord;
};
/**
@@ -84,18 +81,82 @@ struct ScreenRectVertex {
* The projection part of the matrix is trivial, hence these operations are represented
* by a 3x2 matrix.
*/
-static std::array<GLfloat, 3 * 2> MakeOrthographicMatrix(const float width, const float height) {
+std::array<GLfloat, 3 * 2> MakeOrthographicMatrix(float width, float height) {
std::array<GLfloat, 3 * 2> matrix; // Laid out in column-major order
// clang-format off
- matrix[0] = 2.f / width; matrix[2] = 0.f; matrix[4] = -1.f;
- matrix[1] = 0.f; matrix[3] = -2.f / height; matrix[5] = 1.f;
+ matrix[0] = 2.f / width; matrix[2] = 0.f; matrix[4] = -1.f;
+ matrix[1] = 0.f; matrix[3] = -2.f / height; matrix[5] = 1.f;
// Last matrix row is implicitly assumed to be [0, 0, 1].
// clang-format on
return matrix;
}
+const char* GetSource(GLenum source) {
+ switch (source) {
+ case GL_DEBUG_SOURCE_API:
+ return "API";
+ case GL_DEBUG_SOURCE_WINDOW_SYSTEM:
+ return "WINDOW_SYSTEM";
+ case GL_DEBUG_SOURCE_SHADER_COMPILER:
+ return "SHADER_COMPILER";
+ case GL_DEBUG_SOURCE_THIRD_PARTY:
+ return "THIRD_PARTY";
+ case GL_DEBUG_SOURCE_APPLICATION:
+ return "APPLICATION";
+ case GL_DEBUG_SOURCE_OTHER:
+ return "OTHER";
+ default:
+ UNREACHABLE();
+ return "Unknown source";
+ }
+}
+
+const char* GetType(GLenum type) {
+ switch (type) {
+ case GL_DEBUG_TYPE_ERROR:
+ return "ERROR";
+ case GL_DEBUG_TYPE_DEPRECATED_BEHAVIOR:
+ return "DEPRECATED_BEHAVIOR";
+ case GL_DEBUG_TYPE_UNDEFINED_BEHAVIOR:
+ return "UNDEFINED_BEHAVIOR";
+ case GL_DEBUG_TYPE_PORTABILITY:
+ return "PORTABILITY";
+ case GL_DEBUG_TYPE_PERFORMANCE:
+ return "PERFORMANCE";
+ case GL_DEBUG_TYPE_OTHER:
+ return "OTHER";
+ case GL_DEBUG_TYPE_MARKER:
+ return "MARKER";
+ default:
+ UNREACHABLE();
+ return "Unknown type";
+ }
+}
+
+void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length,
+ const GLchar* message, const void* user_param) {
+ const char format[] = "{} {} {}: {}";
+ const char* const str_source = GetSource(source);
+ const char* const str_type = GetType(type);
+
+ switch (severity) {
+ case GL_DEBUG_SEVERITY_HIGH:
+ LOG_CRITICAL(Render_OpenGL, format, str_source, str_type, id, message);
+ break;
+ case GL_DEBUG_SEVERITY_MEDIUM:
+ LOG_WARNING(Render_OpenGL, format, str_source, str_type, id, message);
+ break;
+ case GL_DEBUG_SEVERITY_NOTIFICATION:
+ case GL_DEBUG_SEVERITY_LOW:
+ LOG_DEBUG(Render_OpenGL, format, str_source, str_type, id, message);
+ break;
+ }
+}
+
+} // Anonymous namespace
+
RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system)
: VideoCore::RendererBase{emu_window}, emu_window{emu_window}, system{system} {}
@@ -138,9 +199,6 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
prev_state.Apply();
}
-/**
- * Loads framebuffer from emulated memory into the active OpenGL texture.
- */
void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer) {
// Framebuffer orientation handling
framebuffer_transform_flags = framebuffer.transform_flags;
@@ -181,19 +239,12 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
}
-/**
- * Fills active OpenGL texture with the given RGB color. Since the color is solid, the texture can
- * be 1x1 but will stretch across whatever it's rendered on.
- */
void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a,
const TextureInfo& texture) {
const u8 framebuffer_data[4] = {color_a, color_b, color_g, color_r};
glClearTexImage(texture.resource.handle, 0, GL_RGBA, GL_UNSIGNED_BYTE, framebuffer_data);
}
-/**
- * Initializes the OpenGL state and creates persistent objects.
- */
void RendererOpenGL::InitOpenGLObjects() {
glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue,
0.0f);
@@ -203,10 +254,6 @@ void RendererOpenGL::InitOpenGLObjects() {
state.draw.shader_program = shader.handle;
state.AllDirty();
state.Apply();
- uniform_modelview_matrix = glGetUniformLocation(shader.handle, "modelview_matrix");
- uniform_color_texture = glGetUniformLocation(shader.handle, "color_texture");
- attrib_position = glGetAttribLocation(shader.handle, "vert_position");
- attrib_tex_coord = glGetAttribLocation(shader.handle, "vert_tex_coord");
// Generate VBO handle for drawing
vertex_buffer.Create();
@@ -217,14 +264,14 @@ void RendererOpenGL::InitOpenGLObjects() {
// Attach vertex data to VAO
glNamedBufferData(vertex_buffer.handle, sizeof(ScreenRectVertex) * 4, nullptr, GL_STREAM_DRAW);
- glVertexArrayAttribFormat(vertex_array.handle, attrib_position, 2, GL_FLOAT, GL_FALSE,
+ glVertexArrayAttribFormat(vertex_array.handle, PositionLocation, 2, GL_FLOAT, GL_FALSE,
offsetof(ScreenRectVertex, position));
- glVertexArrayAttribFormat(vertex_array.handle, attrib_tex_coord, 2, GL_FLOAT, GL_FALSE,
+ glVertexArrayAttribFormat(vertex_array.handle, TexCoordLocation, 2, GL_FLOAT, GL_FALSE,
offsetof(ScreenRectVertex, tex_coord));
- glVertexArrayAttribBinding(vertex_array.handle, attrib_position, 0);
- glVertexArrayAttribBinding(vertex_array.handle, attrib_tex_coord, 0);
- glEnableVertexArrayAttrib(vertex_array.handle, attrib_position);
- glEnableVertexArrayAttrib(vertex_array.handle, attrib_tex_coord);
+ glVertexArrayAttribBinding(vertex_array.handle, PositionLocation, 0);
+ glVertexArrayAttribBinding(vertex_array.handle, TexCoordLocation, 0);
+ glEnableVertexArrayAttrib(vertex_array.handle, PositionLocation);
+ glEnableVertexArrayAttrib(vertex_array.handle, TexCoordLocation);
glVertexArrayVertexBuffer(vertex_array.handle, 0, vertex_buffer.handle, 0,
sizeof(ScreenRectVertex));
@@ -331,18 +378,18 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x,
static_cast<f32>(screen_info.texture.height);
}
- std::array<ScreenRectVertex, 4> vertices = {{
+ const std::array vertices = {
ScreenRectVertex(x, y, texcoords.top * scale_u, left * scale_v),
ScreenRectVertex(x + w, y, texcoords.bottom * scale_u, left * scale_v),
ScreenRectVertex(x, y + h, texcoords.top * scale_u, right * scale_v),
ScreenRectVertex(x + w, y + h, texcoords.bottom * scale_u, right * scale_v),
- }};
+ };
state.textures[0] = screen_info.display_texture;
state.framebuffer_srgb.enabled = screen_info.display_srgb;
state.AllDirty();
state.Apply();
- glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), vertices.data());
+ glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), std::data(vertices));
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
// Restore default state
state.framebuffer_srgb.enabled = false;
@@ -351,9 +398,6 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x,
state.Apply();
}
-/**
- * Draws the emulated screens to the emulator window.
- */
void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
if (renderer_settings.set_background_color) {
// Update background color before drawing
@@ -367,21 +411,17 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
glClear(GL_COLOR_BUFFER_BIT);
// Set projection matrix
- std::array<GLfloat, 3 * 2> ortho_matrix =
- MakeOrthographicMatrix((float)layout.width, (float)layout.height);
- glUniformMatrix3x2fv(uniform_modelview_matrix, 1, GL_FALSE, ortho_matrix.data());
-
- // Bind texture in Texture Unit 0
- glActiveTexture(GL_TEXTURE0);
- glUniform1i(uniform_color_texture, 0);
+ const std::array ortho_matrix =
+ MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height));
+ glUniformMatrix3x2fv(ModelViewMatrixLocation, 1, GL_FALSE, ortho_matrix.data());
- DrawScreenTriangles(screen_info, (float)screen.left, (float)screen.top,
- (float)screen.GetWidth(), (float)screen.GetHeight());
+ DrawScreenTriangles(screen_info, static_cast<float>(screen.left),
+ static_cast<float>(screen.top), static_cast<float>(screen.GetWidth()),
+ static_cast<float>(screen.GetHeight()));
m_current_frame++;
}
-/// Updates the framerate
void RendererOpenGL::UpdateFramerate() {}
void RendererOpenGL::CaptureScreenshot() {
@@ -418,63 +458,6 @@ void RendererOpenGL::CaptureScreenshot() {
renderer_settings.screenshot_requested = false;
}
-static const char* GetSource(GLenum source) {
-#define RET(s) \
- case GL_DEBUG_SOURCE_##s: \
- return #s
- switch (source) {
- RET(API);
- RET(WINDOW_SYSTEM);
- RET(SHADER_COMPILER);
- RET(THIRD_PARTY);
- RET(APPLICATION);
- RET(OTHER);
- default:
- UNREACHABLE();
- return "Unknown source";
- }
-#undef RET
-}
-
-static const char* GetType(GLenum type) {
-#define RET(t) \
- case GL_DEBUG_TYPE_##t: \
- return #t
- switch (type) {
- RET(ERROR);
- RET(DEPRECATED_BEHAVIOR);
- RET(UNDEFINED_BEHAVIOR);
- RET(PORTABILITY);
- RET(PERFORMANCE);
- RET(OTHER);
- RET(MARKER);
- default:
- UNREACHABLE();
- return "Unknown type";
- }
-#undef RET
-}
-
-static void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severity,
- GLsizei length, const GLchar* message, const void* user_param) {
- const char format[] = "{} {} {}: {}";
- const char* const str_source = GetSource(source);
- const char* const str_type = GetType(type);
-
- switch (severity) {
- case GL_DEBUG_SEVERITY_HIGH:
- LOG_CRITICAL(Render_OpenGL, format, str_source, str_type, id, message);
- break;
- case GL_DEBUG_SEVERITY_MEDIUM:
- LOG_WARNING(Render_OpenGL, format, str_source, str_type, id, message);
- break;
- case GL_DEBUG_SEVERITY_NOTIFICATION:
- case GL_DEBUG_SEVERITY_LOW:
- LOG_DEBUG(Render_OpenGL, format, str_source, str_type, id, message);
- break;
- }
-}
-
bool RendererOpenGL::Init() {
Core::Frontend::ScopeAcquireWindowContext acquire_context{render_window};
@@ -495,7 +478,6 @@ bool RendererOpenGL::Init() {
return true;
}
-/// Shutdown the renderer
void RendererOpenGL::ShutDown() {}
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index cf26628ca..b56328a7f 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -59,21 +59,31 @@ public:
void ShutDown() override;
private:
+ /// Initializes the OpenGL state and creates persistent objects.
void InitOpenGLObjects();
+
void AddTelemetryFields();
+
void CreateRasterizer();
void ConfigureFramebufferTexture(TextureInfo& texture,
const Tegra::FramebufferConfig& framebuffer);
+
+ /// Draws the emulated screens to the emulator window.
void DrawScreen(const Layout::FramebufferLayout& layout);
+
void DrawScreenTriangles(const ScreenInfo& screen_info, float x, float y, float w, float h);
+
+ /// Updates the framerate.
void UpdateFramerate();
void CaptureScreenshot();
- // Loads framebuffer from emulated memory into the display information structure
+ /// Loads framebuffer from emulated memory into the active OpenGL texture.
void LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer);
- // Fills active OpenGL texture with the given RGBA color.
+
+ /// Fills active OpenGL texture with the given RGB color.Since the color is solid, the texture
+ /// can be 1x1 but will stretch across whatever it's rendered on.
void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a,
const TextureInfo& texture);
@@ -94,14 +104,6 @@ private:
/// OpenGL framebuffer data
std::vector<u8> gl_framebuffer_data;
- // Shader uniform location indices
- GLuint uniform_modelview_matrix;
- GLuint uniform_color_texture;
-
- // Shader attribute input indices
- GLuint attrib_position;
- GLuint attrib_tex_coord;
-
/// Used for transforming the framebuffer orientation
Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags;
Common::Rectangle<int> framebuffer_crop_rect;
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index da8e886df..994c05611 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -107,8 +107,8 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
break;
}
case OpCode::Id::TLD4S: {
- UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI),
- "AOFFI is not implemented");
+ const bool uses_aoffi = instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI);
+ UNIMPLEMENTED_IF_MSG(uses_aoffi, "AOFFI is not implemented");
const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC);
const Node op_a = GetRegister(instr.gpr8);
@@ -116,29 +116,40 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
// TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
std::vector<Node> coords;
+ Node dc_reg;
if (depth_compare) {
// Note: TLD4S coordinate encoding works just like TEXS's
const Node op_y = GetRegister(instr.gpr8.Value() + 1);
coords.push_back(op_a);
coords.push_back(op_y);
- coords.push_back(op_b);
+ dc_reg = uses_aoffi ? GetRegister(instr.gpr20.Value() + 1) : op_b;
} else {
coords.push_back(op_a);
- coords.push_back(op_b);
+ if (uses_aoffi) {
+ const Node op_y = GetRegister(instr.gpr8.Value() + 1);
+ coords.push_back(op_y);
+ } else {
+ coords.push_back(op_b);
+ }
+ dc_reg = {};
}
const Node component = Immediate(static_cast<u32>(instr.tld4s.component));
const SamplerInfo info{TextureType::Texture2D, false, depth_compare};
- const auto& sampler = GetSampler(instr.sampler, info);
+ const Sampler& sampler = *GetSampler(instr.sampler, info);
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
auto coords_copy = coords;
- MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, component, element};
+ MetaTexture meta{sampler, {}, dc_reg, {}, {}, {}, {}, component, element};
values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
}
- WriteTexsInstructionFloat(bb, instr, values, true);
+ if (instr.tld4s.fp16_flag) {
+ WriteTexsInstructionHalfFloat(bb, instr, values, true);
+ } else {
+ WriteTexsInstructionFloat(bb, instr, values, true);
+ }
break;
}
case OpCode::Id::TXD_B:
@@ -154,9 +165,17 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
const auto texture_type = instr.txd.texture_type.Value();
const auto coord_count = GetCoordCount(texture_type);
- const auto& sampler = is_bindless
- ? GetBindlessSampler(base_reg, {{texture_type, false, false}})
- : GetSampler(instr.sampler, {{texture_type, false, false}});
+ const Sampler* sampler = is_bindless
+ ? GetBindlessSampler(base_reg, {{texture_type, false, false}})
+ : GetSampler(instr.sampler, {{texture_type, false, false}});
+ Node4 values;
+ if (sampler == nullptr) {
+ for (u32 element = 0; element < values.size(); ++element) {
+ values[element] = Immediate(0);
+ }
+ WriteTexInstructionFloat(bb, instr, values);
+ break;
+ }
if (is_bindless) {
base_reg++;
}
@@ -170,9 +189,8 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
derivates.push_back(GetRegister(derivate_reg + derivate + 1));
}
- Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
- MetaTexture meta{sampler, {}, {}, {}, derivates, {}, {}, {}, element};
+ MetaTexture meta{*sampler, {}, {}, {}, derivates, {}, {}, {}, element};
values[element] = Operation(OperationCode::TextureGradient, std::move(meta), coords);
}
@@ -187,9 +205,24 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
// TODO: The new commits on the texture refactor, change the way samplers work.
// Sadly, not all texture instructions specify the type of texture their sampler
// uses. This must be fixed at a later instance.
- const auto& sampler =
+ const Sampler* sampler =
is_bindless ? GetBindlessSampler(instr.gpr8) : GetSampler(instr.sampler);
+ if (sampler == nullptr) {
+ u32 indexer = 0;
+ for (u32 element = 0; element < 4; ++element) {
+ if (!instr.txq.IsComponentEnabled(element)) {
+ continue;
+ }
+ const Node value = Immediate(0);
+ SetTemporary(bb, indexer++, value);
+ }
+ for (u32 i = 0; i < indexer; ++i) {
+ SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
+ }
+ break;
+ }
+
u32 indexer = 0;
switch (instr.txq.query_type) {
case Tegra::Shader::TextureQueryType::Dimension: {
@@ -197,7 +230,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
if (!instr.txq.IsComponentEnabled(element)) {
continue;
}
- MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, {}, element};
+ MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, element};
const Node value =
Operation(OperationCode::TextureQueryDimensions, meta,
GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0)));
@@ -223,9 +256,24 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
auto texture_type = instr.tmml.texture_type.Value();
const bool is_array = instr.tmml.array != 0;
- const auto& sampler =
+ const Sampler* sampler =
is_bindless ? GetBindlessSampler(instr.gpr20) : GetSampler(instr.sampler);
+ if (sampler == nullptr) {
+ u32 indexer = 0;
+ for (u32 element = 0; element < 2; ++element) {
+ if (!instr.tmml.IsComponentEnabled(element)) {
+ continue;
+ }
+ const Node value = Immediate(0);
+ SetTemporary(bb, indexer++, value);
+ }
+ for (u32 i = 0; i < indexer; ++i) {
+ SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
+ }
+ break;
+ }
+
std::vector<Node> coords;
// TODO: Add coordinates for different samplers once other texture types are implemented.
@@ -251,7 +299,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
continue;
}
auto params = coords;
- MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, {}, element};
+ MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, element};
const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
SetTemporary(bb, indexer++, value);
}
@@ -307,7 +355,7 @@ ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(std::optional<SamplerInfo> sample
sampler->is_buffer != 0};
}
-const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler,
+const Sampler* ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler,
std::optional<SamplerInfo> sampler_info) {
const auto offset = static_cast<u32>(sampler.index.Value());
const auto info = GetSamplerInfo(sampler_info, offset);
@@ -319,21 +367,24 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler,
if (it != used_samplers.end()) {
ASSERT(!it->IsBindless() && it->GetType() == info.type && it->IsArray() == info.is_array &&
it->IsShadow() == info.is_shadow && it->IsBuffer() == info.is_buffer);
- return *it;
+ return &(*it);
}
// Otherwise create a new mapping for this sampler
const auto next_index = static_cast<u32>(used_samplers.size());
- return used_samplers.emplace_back(next_index, offset, info.type, info.is_array, info.is_shadow,
- info.is_buffer);
+ return &used_samplers.emplace_back(next_index, offset, info.type, info.is_array, info.is_shadow,
+ info.is_buffer);
}
-const Sampler& ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
+const Sampler* ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
std::optional<SamplerInfo> sampler_info) {
const Node sampler_register = GetRegister(reg);
const auto [base_sampler, buffer, offset] =
TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size()));
ASSERT(base_sampler != nullptr);
+ if (base_sampler == nullptr) {
+ return nullptr;
+ }
const auto info = GetSamplerInfo(sampler_info, offset, buffer);
@@ -346,13 +397,13 @@ const Sampler& ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
if (it != used_samplers.end()) {
ASSERT(it->IsBindless() && it->GetType() == info.type && it->IsArray() == info.is_array &&
it->IsShadow() == info.is_shadow);
- return *it;
+ return &(*it);
}
// Otherwise create a new mapping for this sampler
const auto next_index = static_cast<u32>(used_samplers.size());
- return used_samplers.emplace_back(next_index, offset, buffer, info.type, info.is_array,
- info.is_shadow, info.is_buffer);
+ return &used_samplers.emplace_back(next_index, offset, buffer, info.type, info.is_array,
+ info.is_shadow, info.is_buffer);
}
void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
@@ -395,14 +446,14 @@ void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr, const
}
void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
- const Node4& components) {
+ const Node4& components, bool ignore_mask) {
// TEXS.F16 destionation registers are packed in two registers in pairs (just like any half
// float instruction).
Node4 values;
u32 dest_elem = 0;
for (u32 component = 0; component < 4; ++component) {
- if (!instr.texs.IsComponentEnabled(component))
+ if (!instr.texs.IsComponentEnabled(component) && !ignore_mask)
continue;
values[dest_elem++] = components[component];
}
@@ -438,8 +489,15 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
"This method is not supported.");
const SamplerInfo info{texture_type, is_array, is_shadow, false};
- const auto& sampler =
+ const Sampler* sampler =
is_bindless ? GetBindlessSampler(*bindless_reg, info) : GetSampler(instr.sampler, info);
+ Node4 values;
+ if (sampler == nullptr) {
+ for (u32 element = 0; element < values.size(); ++element) {
+ values[element] = Immediate(0);
+ }
+ return values;
+ }
const bool lod_needed = process_mode == TextureProcessMode::LZ ||
process_mode == TextureProcessMode::LL ||
@@ -478,10 +536,9 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
}
}
- Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
auto copy_coords = coords;
- MetaTexture meta{sampler, array, depth_compare, aoffi, {}, bias, lod, {}, element};
+ MetaTexture meta{*sampler, array, depth_compare, aoffi, {}, bias, lod, {}, element};
values[element] = Operation(read_method, meta, std::move(copy_coords));
}
@@ -594,8 +651,15 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
u64 parameter_register = instr.gpr20.Value();
const SamplerInfo info{texture_type, is_array, depth_compare, false};
- const auto& sampler = is_bindless ? GetBindlessSampler(parameter_register++, info)
- : GetSampler(instr.sampler, info);
+ const Sampler* sampler = is_bindless ? GetBindlessSampler(parameter_register++, info)
+ : GetSampler(instr.sampler, info);
+ Node4 values;
+ if (sampler == nullptr) {
+ for (u32 element = 0; element < values.size(); ++element) {
+ values[element] = Immediate(0);
+ }
+ return values;
+ }
std::vector<Node> aoffi;
if (is_aoffi) {
@@ -610,10 +674,9 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
const Node component = is_bindless ? Immediate(static_cast<u32>(instr.tld4_b.component))
: Immediate(static_cast<u32>(instr.tld4.component));
- Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
auto coords_copy = coords;
- MetaTexture meta{sampler, GetRegister(array_register), dc, aoffi, {}, {}, {}, component,
+ MetaTexture meta{*sampler, GetRegister(array_register), dc, aoffi, {}, {}, {}, component,
element};
values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
}
@@ -642,7 +705,7 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) {
// const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr};
// const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr};
- const auto& sampler = GetSampler(instr.sampler);
+ const auto& sampler = *GetSampler(instr.sampler);
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
@@ -655,7 +718,7 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) {
}
Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) {
- const auto& sampler = GetSampler(instr.sampler);
+ const Sampler& sampler = *GetSampler(instr.sampler);
const std::size_t type_coord_count = GetCoordCount(texture_type);
const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 580f84fcb..04ae5f822 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -313,11 +313,11 @@ private:
std::optional<u32> buffer = std::nullopt);
/// Accesses a texture sampler
- const Sampler& GetSampler(const Tegra::Shader::Sampler& sampler,
+ const Sampler* GetSampler(const Tegra::Shader::Sampler& sampler,
std::optional<SamplerInfo> sampler_info = std::nullopt);
/// Accesses a texture sampler for a bindless texture.
- const Sampler& GetBindlessSampler(Tegra::Shader::Register reg,
+ const Sampler* GetBindlessSampler(Tegra::Shader::Register reg,
std::optional<SamplerInfo> sampler_info = std::nullopt);
/// Accesses an image.
@@ -338,7 +338,7 @@ private:
void WriteTexsInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
const Node4& components, bool ignore_mask = false);
void WriteTexsInstructionHalfFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
- const Node4& components);
+ const Node4& components, bool ignore_mask = false);
Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,