summaryrefslogtreecommitdiffstats
path: root/src/video_core/renderer_opengl
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/renderer_opengl')
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp308
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h16
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp1555
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h426
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.h2
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp460
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_shader_util.cpp6
-rw-r--r--src/video_core/renderer_opengl/gl_shader_util.h8
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp83
-rw-r--r--src/video_core/renderer_opengl/gl_state.h43
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h89
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp17
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h2
15 files changed, 1214 insertions, 1809 deletions
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 6f05f24a0..ea138d402 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -112,7 +112,7 @@ RasterizerOpenGL::RasterizerOpenGL() {
glEnable(GL_BLEND);
- NGLOG_CRITICAL(Render_OpenGL, "Sync fixed function OpenGL state here!");
+ LOG_CRITICAL(Render_OpenGL, "Sync fixed function OpenGL state here!");
}
RasterizerOpenGL::~RasterizerOpenGL() {
@@ -146,7 +146,6 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr,
u64 size = end - start + 1;
// Copy vertex array data
- res_cache.FlushRegion(start, size, nullptr);
Memory::ReadBlock(*memory_manager->GpuToCpuAddress(start), array_ptr, size);
// Bind the vertex array to the buffer at the current offset.
@@ -166,9 +165,9 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr,
// assume every shader uses them all.
for (unsigned index = 0; index < 16; ++index) {
auto& attrib = regs.vertex_attrib_format[index];
- NGLOG_DEBUG(HW_GPU, "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}",
- index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(),
- attrib.offset.Value(), attrib.IsNormalized());
+ LOG_DEBUG(HW_GPU, "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}",
+ index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(),
+ attrib.offset.Value(), attrib.IsNormalized());
auto& buffer = regs.vertex_array[attrib.buffer];
ASSERT(buffer.IsEnabled());
@@ -197,8 +196,8 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {
ASSERT_MSG(!gpu.regs.shader_config[0].enable, "VertexA is unsupported!");
// Next available bindpoints to use when uploading the const buffers and textures to the GLSL
- // shaders.
- u32 current_constbuffer_bindpoint = 0;
+ // shaders. The constbuffer bindpoint starts after the shader stage configuration bind points.
+ u32 current_constbuffer_bindpoint = uniform_buffers.size();
u32 current_texture_bindpoint = 0;
for (unsigned index = 1; index < Maxwell::MaxShaderProgram; ++index) {
@@ -252,8 +251,8 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {
break;
}
default:
- NGLOG_CRITICAL(HW_GPU, "Unimplemented shader index={}, enable={}, offset=0x{:08X}",
- index, shader_config.enable.Value(), shader_config.offset);
+ LOG_CRITICAL(HW_GPU, "Unimplemented shader index={}, enable={}, offset=0x{:08X}", index,
+ shader_config.enable.Value(), shader_config.offset);
UNREACHABLE();
}
@@ -298,17 +297,16 @@ bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) {
return true;
}
-void RasterizerOpenGL::DrawArrays() {
- if (accelerate_draw == AccelDraw::Disabled)
- return;
-
- MICROPROFILE_SCOPE(OpenGL_Drawing);
+std::pair<Surface, Surface> RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb,
+ bool using_depth_fb) {
const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
- // TODO(bunnei): Implement these
+ // Sync the depth test state before configuring the framebuffer surfaces.
+ SyncDepthTestState();
+
+ // TODO(bunnei): Implement this
const bool has_stencil = false;
- const bool using_color_fb = true;
- const bool using_depth_fb = false;
+
const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[0].GetRect()};
const bool write_color_fb =
@@ -325,35 +323,21 @@ void RasterizerOpenGL::DrawArrays() {
std::tie(color_surface, depth_surface, surfaces_rect) =
res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb, viewport_rect);
- const u16 res_scale = color_surface != nullptr
- ? color_surface->res_scale
- : (depth_surface == nullptr ? 1u : depth_surface->res_scale);
-
MathUtil::Rectangle<u32> draw_rect{
+ static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.left,
+ surfaces_rect.left, surfaces_rect.right)), // Left
+ static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + viewport_rect.top,
+ surfaces_rect.bottom, surfaces_rect.top)), // Top
+ static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.right,
+ surfaces_rect.left, surfaces_rect.right)), // Right
static_cast<u32>(
- std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.left * res_scale,
- surfaces_rect.left, surfaces_rect.right)), // Left
- static_cast<u32>(
- std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + viewport_rect.top * res_scale,
- surfaces_rect.bottom, surfaces_rect.top)), // Top
- static_cast<u32>(
- std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.right * res_scale,
- surfaces_rect.left, surfaces_rect.right)), // Right
- static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) +
- viewport_rect.bottom * res_scale,
- surfaces_rect.bottom, surfaces_rect.top))}; // Bottom
+ std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + viewport_rect.bottom,
+ surfaces_rect.bottom, surfaces_rect.top))}; // Bottom
// Bind the framebuffer surfaces
BindFramebufferSurfaces(color_surface, depth_surface, has_stencil);
- // Sync the viewport
- SyncViewport(surfaces_rect, res_scale);
-
- // Sync the blend state registers
- SyncBlendState();
-
- // TODO(bunnei): Sync framebuffer_scale uniform here
- // TODO(bunnei): Sync scissorbox uniform(s) here
+ SyncViewport(surfaces_rect);
// Viewport can have negative offsets or larger dimensions than our framebuffer sub-rect. Enable
// scissor test to prevent drawing outside of the framebuffer region
@@ -364,6 +348,66 @@ void RasterizerOpenGL::DrawArrays() {
state.scissor.height = draw_rect.GetHeight();
state.Apply();
+ // Only return the surface to be marked as dirty if writing to it is enabled.
+ return std::make_pair(write_color_fb ? color_surface : nullptr,
+ write_depth_fb ? depth_surface : nullptr);
+}
+
+void RasterizerOpenGL::Clear() {
+ const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
+
+ bool use_color_fb = false;
+ bool use_depth_fb = false;
+
+ GLbitfield clear_mask = 0;
+ if (regs.clear_buffers.R && regs.clear_buffers.G && regs.clear_buffers.B &&
+ regs.clear_buffers.A) {
+ clear_mask |= GL_COLOR_BUFFER_BIT;
+ use_color_fb = true;
+ }
+ if (regs.clear_buffers.Z) {
+ clear_mask |= GL_DEPTH_BUFFER_BIT;
+ use_depth_fb = true;
+ }
+
+ if (clear_mask == 0)
+ return;
+
+ auto [dirty_color_surface, dirty_depth_surface] =
+ ConfigureFramebuffers(use_color_fb, use_depth_fb);
+
+ // TODO(Subv): Support clearing only partial colors.
+ glClearColor(regs.clear_color[0], regs.clear_color[1], regs.clear_color[2],
+ regs.clear_color[3]);
+ glClearDepth(regs.clear_depth);
+
+ glClear(clear_mask);
+
+ // Mark framebuffer surfaces as dirty
+ if (dirty_color_surface != nullptr) {
+ res_cache.MarkSurfaceAsDirty(dirty_color_surface);
+ }
+ if (dirty_depth_surface != nullptr) {
+ res_cache.MarkSurfaceAsDirty(dirty_depth_surface);
+ }
+}
+
+void RasterizerOpenGL::DrawArrays() {
+ if (accelerate_draw == AccelDraw::Disabled)
+ return;
+
+ MICROPROFILE_SCOPE(OpenGL_Drawing);
+ const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
+
+ auto [dirty_color_surface, dirty_depth_surface] =
+ ConfigureFramebuffers(true, regs.zeta.Address() != 0);
+
+ SyncBlendState();
+ SyncCullMode();
+
+ // TODO(bunnei): Sync framebuffer_scale uniform here
+ // TODO(bunnei): Sync scissorbox uniform(s) here
+
// Draw the vertex batch
const bool is_indexed = accelerate_draw == AccelDraw::Indexed;
const u64 index_buffer_size{regs.index_array.count * regs.index_array.FormatSizeInBytes()};
@@ -420,14 +464,16 @@ void RasterizerOpenGL::DrawArrays() {
const GLenum primitive_mode{MaxwellToGL::PrimitiveTopology(regs.draw.topology)};
if (is_indexed) {
- const GLint index_min{static_cast<GLint>(regs.index_array.first)};
- const GLint index_max{static_cast<GLint>(regs.index_array.first + regs.index_array.count)};
- glDrawRangeElementsBaseVertex(primitive_mode, index_min, index_max, regs.index_array.count,
- MaxwellToGL::IndexFormat(regs.index_array.format),
- reinterpret_cast<const void*>(index_buffer_offset),
- -index_min);
+ const GLint base_vertex{static_cast<GLint>(regs.vb_element_base)};
+
+ // Adjust the index buffer offset so it points to the first desired index.
+ index_buffer_offset += regs.index_array.first * regs.index_array.FormatSizeInBytes();
+
+ glDrawElementsBaseVertex(primitive_mode, regs.index_array.count,
+ MaxwellToGL::IndexFormat(regs.index_array.format),
+ reinterpret_cast<const void*>(index_buffer_offset), base_vertex);
} else {
- glDrawArrays(primitive_mode, 0, regs.vertex_buffer.count);
+ glDrawArrays(primitive_mode, regs.vertex_buffer.first, regs.vertex_buffer.count);
}
// Disable scissor test
@@ -437,24 +483,16 @@ void RasterizerOpenGL::DrawArrays() {
// Unbind textures for potential future use as framebuffer attachments
for (auto& texture_unit : state.texture_units) {
- texture_unit.texture_2d = 0;
+ texture_unit.Unbind();
}
state.Apply();
// Mark framebuffer surfaces as dirty
- MathUtil::Rectangle<u32> draw_rect_unscaled{
- draw_rect.left / res_scale, draw_rect.top / res_scale, draw_rect.right / res_scale,
- draw_rect.bottom / res_scale};
-
- if (color_surface != nullptr && write_color_fb) {
- auto interval = color_surface->GetSubRectInterval(draw_rect_unscaled);
- res_cache.InvalidateRegion(boost::icl::first(interval), boost::icl::length(interval),
- color_surface);
+ if (dirty_color_surface != nullptr) {
+ res_cache.MarkSurfaceAsDirty(dirty_color_surface);
}
- if (depth_surface != nullptr && write_depth_fb) {
- auto interval = depth_surface->GetSubRectInterval(draw_rect_unscaled);
- res_cache.InvalidateRegion(boost::icl::first(interval), boost::icl::length(interval),
- depth_surface);
+ if (dirty_depth_surface != nullptr) {
+ res_cache.MarkSurfaceAsDirty(dirty_depth_surface);
}
}
@@ -462,7 +500,7 @@ void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 method) {}
void RasterizerOpenGL::FlushAll() {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
- res_cache.FlushAll();
+ res_cache.FlushRegion(0, Kernel::VMManager::MAX_ADDRESS);
}
void RasterizerOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size) {
@@ -472,13 +510,13 @@ void RasterizerOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size) {
void RasterizerOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, u64 size) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
- res_cache.InvalidateRegion(addr, size, nullptr);
+ res_cache.InvalidateRegion(addr, size);
}
void RasterizerOpenGL::FlushAndInvalidateRegion(Tegra::GPUVAddr addr, u64 size) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
res_cache.FlushRegion(addr, size);
- res_cache.InvalidateRegion(addr, size, nullptr);
+ res_cache.InvalidateRegion(addr, size);
}
bool RasterizerOpenGL::AccelerateDisplayTransfer(const void* config) {
@@ -497,45 +535,28 @@ bool RasterizerOpenGL::AccelerateFill(const void* config) {
return true;
}
-bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& framebuffer,
+bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
VAddr framebuffer_addr, u32 pixel_stride,
ScreenInfo& screen_info) {
- if (framebuffer_addr == 0) {
- return false;
+ if (!framebuffer_addr) {
+ return {};
}
+
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
- SurfaceParams src_params;
- src_params.cpu_addr = framebuffer_addr;
- src_params.addr = res_cache.TryFindFramebufferGpuAddress(framebuffer_addr).get_value_or(0);
- src_params.width = std::min(framebuffer.width, pixel_stride);
- src_params.height = framebuffer.height;
- src_params.stride = pixel_stride;
- src_params.is_tiled = true;
- src_params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight;
- src_params.pixel_format =
- SurfaceParams::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format);
- src_params.component_type =
- SurfaceParams::ComponentTypeFromGPUPixelFormat(framebuffer.pixel_format);
- src_params.UpdateParams();
-
- MathUtil::Rectangle<u32> src_rect;
- Surface src_surface;
- std::tie(src_surface, src_rect) =
- res_cache.GetSurfaceSubRect(src_params, ScaleMatch::Ignore, true);
-
- if (src_surface == nullptr) {
- return false;
+ const auto& surface{res_cache.TryFindFramebufferSurface(framebuffer_addr)};
+ if (!surface) {
+ return {};
}
- u32 scaled_width = src_surface->GetScaledWidth();
- u32 scaled_height = src_surface->GetScaledHeight();
+ // Verify that the cached surface is the same size and format as the requested framebuffer
+ const auto& params{surface->GetSurfaceParams()};
+ const auto& pixel_format{SurfaceParams::PixelFormatFromGPUPixelFormat(config.pixel_format)};
+ ASSERT_MSG(params.width == config.width, "Framebuffer width is different");
+ ASSERT_MSG(params.height == config.height, "Framebuffer height is different");
+ ASSERT_MSG(params.pixel_format == pixel_format, "Framebuffer pixel_format is different");
- screen_info.display_texcoords = MathUtil::Rectangle<float>(
- (float)src_rect.bottom / (float)scaled_height, (float)src_rect.left / (float)scaled_width,
- (float)src_rect.top / (float)scaled_height, (float)src_rect.right / (float)scaled_width);
-
- screen_info.display_texture = src_surface->texture.handle;
+ screen_info.display_texture = surface->Texture().handle;
return true;
}
@@ -608,32 +629,44 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, GLuint progr
boost::optional<VAddr> addr = gpu.memory_manager->GpuToCpuAddress(buffer.address);
- std::vector<u8> data;
+ size_t size = 0;
+
if (used_buffer.IsIndirect()) {
// Buffer is accessed indirectly, so upload the entire thing
- data.resize(buffer.size * sizeof(float));
+ size = buffer.size * sizeof(float);
+
+ if (size > MaxConstbufferSize) {
+ LOG_ERROR(HW_GPU, "indirect constbuffer size {} exceeds maximum {}", size,
+ MaxConstbufferSize);
+ size = MaxConstbufferSize;
+ }
} else {
// Buffer is accessed directly, upload just what we use
- data.resize(used_buffer.GetSize() * sizeof(float));
+ size = used_buffer.GetSize() * sizeof(float);
}
+ // Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140
+ // UBO alignment requirements.
+ size = Common::AlignUp(size, sizeof(GLvec4));
+ ASSERT_MSG(size <= MaxConstbufferSize, "Constbuffer too big");
+
+ std::vector<u8> data(size);
Memory::ReadBlock(*addr, data.data(), data.size());
- glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer_draw_state.ssbo);
- glBufferData(GL_SHADER_STORAGE_BUFFER, data.size(), data.data(), GL_DYNAMIC_DRAW);
- glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
+ glBindBuffer(GL_UNIFORM_BUFFER, buffer_draw_state.ssbo);
+ glBufferData(GL_UNIFORM_BUFFER, data.size(), data.data(), GL_DYNAMIC_DRAW);
+ glBindBuffer(GL_UNIFORM_BUFFER, 0);
// Now configure the bindpoint of the buffer inside the shader
std::string buffer_name = used_buffer.GetName();
- GLuint index =
- glGetProgramResourceIndex(program, GL_SHADER_STORAGE_BLOCK, buffer_name.c_str());
+ GLuint index = glGetProgramResourceIndex(program, GL_UNIFORM_BLOCK, buffer_name.c_str());
if (index != -1)
- glShaderStorageBlockBinding(program, index, buffer_draw_state.bindpoint);
+ glUniformBlockBinding(program, index, buffer_draw_state.bindpoint);
}
state.Apply();
- return current_bindpoint + entries.size();
+ return current_bindpoint + static_cast<u32>(entries.size());
}
u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, GLuint program, u32 current_unit,
@@ -653,16 +686,23 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, GLuint program,
// Bind the uniform to the sampler.
GLint uniform = glGetUniformLocation(program, entry.GetName().c_str());
- ASSERT(uniform != -1);
+ if (uniform == -1) {
+ continue;
+ }
+
glProgramUniform1i(program, uniform, current_bindpoint);
const auto texture = maxwell3d.GetStageTexture(entry.GetStage(), entry.GetOffset());
- ASSERT(texture.enabled);
+
+ if (!texture.enabled) {
+ state.texture_units[current_bindpoint].texture_2d = 0;
+ continue;
+ }
texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc);
Surface surface = res_cache.GetTextureSurface(texture);
if (surface != nullptr) {
- state.texture_units[current_bindpoint].texture_2d = surface->texture.handle;
+ state.texture_units[current_bindpoint].texture_2d = surface->Texture().handle;
state.texture_units[current_bindpoint].swizzle.r =
MaxwellToGL::SwizzleSource(texture.tic.x_source);
state.texture_units[current_bindpoint].swizzle.g =
@@ -679,7 +719,7 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, GLuint program,
state.Apply();
- return current_unit + entries.size();
+ return current_unit + static_cast<u32>(entries.size());
}
void RasterizerOpenGL::BindFramebufferSurfaces(const Surface& color_surface,
@@ -688,16 +728,16 @@ void RasterizerOpenGL::BindFramebufferSurfaces(const Surface& color_surface,
state.Apply();
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
- color_surface != nullptr ? color_surface->texture.handle : 0, 0);
+ color_surface != nullptr ? color_surface->Texture().handle : 0, 0);
if (depth_surface != nullptr) {
if (has_stencil) {
// attach both depth and stencil
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
- depth_surface->texture.handle, 0);
+ depth_surface->Texture().handle, 0);
} else {
// attach depth
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
- depth_surface->texture.handle, 0);
+ depth_surface->Texture().handle, 0);
// clear stencil attachment
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
}
@@ -708,14 +748,14 @@ void RasterizerOpenGL::BindFramebufferSurfaces(const Surface& color_surface,
}
}
-void RasterizerOpenGL::SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect, u16 res_scale) {
+void RasterizerOpenGL::SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect) {
const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[0].GetRect()};
- state.viewport.x = static_cast<GLint>(surfaces_rect.left) + viewport_rect.left * res_scale;
- state.viewport.y = static_cast<GLint>(surfaces_rect.bottom) + viewport_rect.bottom * res_scale;
- state.viewport.width = static_cast<GLsizei>(viewport_rect.GetWidth() * res_scale);
- state.viewport.height = static_cast<GLsizei>(viewport_rect.GetHeight() * res_scale);
+ state.viewport.x = static_cast<GLint>(surfaces_rect.left) + viewport_rect.left;
+ state.viewport.y = static_cast<GLint>(surfaces_rect.bottom) + viewport_rect.bottom;
+ state.viewport.width = static_cast<GLsizei>(viewport_rect.GetWidth());
+ state.viewport.height = static_cast<GLsizei>(viewport_rect.GetHeight());
}
void RasterizerOpenGL::SyncClipEnabled() {
@@ -727,7 +767,27 @@ void RasterizerOpenGL::SyncClipCoef() {
}
void RasterizerOpenGL::SyncCullMode() {
- UNREACHABLE();
+ const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
+
+ state.cull.enabled = regs.cull.enabled != 0;
+
+ if (state.cull.enabled) {
+ state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face);
+ state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face);
+
+ const bool flip_triangles{regs.screen_y_control.triangle_rast_flip == 0 ||
+ regs.viewport_transform[0].scale_y < 0.0f};
+
+ // If the GPU is configured to flip the rasterized triangles, then we need to flip the
+ // notion of front and back. Note: We flip the triangles when the value of the register is 0
+ // because OpenGL already does it for us.
+ if (flip_triangles) {
+ if (state.cull.front_face == GL_CCW)
+ state.cull.front_face = GL_CW;
+ else if (state.cull.front_face == GL_CW)
+ state.cull.front_face = GL_CCW;
+ }
+ }
}
void RasterizerOpenGL::SyncDepthScale() {
@@ -738,9 +798,20 @@ void RasterizerOpenGL::SyncDepthOffset() {
UNREACHABLE();
}
+void RasterizerOpenGL::SyncDepthTestState() {
+ const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
+
+ state.depth.test_enabled = regs.depth_test_enable != 0;
+ state.depth.write_mask = regs.depth_write_enabled ? GL_TRUE : GL_FALSE;
+
+ if (!state.depth.test_enabled)
+ return;
+
+ state.depth.test_func = MaxwellToGL::ComparisonOp(regs.depth_test_func);
+}
+
void RasterizerOpenGL::SyncBlendState() {
const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
- ASSERT_MSG(regs.independent_blend_enable == 1, "Only independent blending is implemented");
// TODO(Subv): Support more than just render target 0.
state.blend.enabled = regs.blend.enable[0] != 0;
@@ -748,6 +819,7 @@ void RasterizerOpenGL::SyncBlendState() {
if (!state.blend.enabled)
return;
+ ASSERT_MSG(regs.independent_blend_enable == 1, "Only independent blending is implemented");
ASSERT_MSG(!regs.independent_blend[0].separate_alpha, "Unimplemented");
state.blend.rgb_equation = MaxwellToGL::BlendEquation(regs.independent_blend[0].equation_rgb);
state.blend.src_rgb_func = MaxwellToGL::BlendFunc(regs.independent_blend[0].factor_source_rgb);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index b7c8cf843..c406142e4 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -7,6 +7,7 @@
#include <array>
#include <cstddef>
#include <memory>
+#include <utility>
#include <vector>
#include <glad/glad.h>
#include "common/common_types.h"
@@ -28,6 +29,7 @@ public:
~RasterizerOpenGL() override;
void DrawArrays() override;
+ void Clear() override;
void NotifyMaxwellRegisterChanged(u32 method) override;
void FlushAll() override;
void FlushRegion(Tegra::GPUVAddr addr, u64 size) override;
@@ -54,6 +56,11 @@ public:
OGLShader shader;
};
+ /// Maximum supported size that a constbuffer can have in bytes.
+ static constexpr size_t MaxConstbufferSize = 0x10000;
+ static_assert(MaxConstbufferSize % sizeof(GLvec4) == 0,
+ "The maximum size of a constbuffer must be a multiple of the size of GLvec4");
+
private:
class SamplerInfo {
public:
@@ -76,6 +83,10 @@ private:
u32 border_color_a;
};
+ /// Configures the color and depth framebuffer states and returns the dirty <Color, Depth>
+ /// surfaces if writing was enabled.
+ std::pair<Surface, Surface> ConfigureFramebuffers(bool using_color_fb, bool using_depth_fb);
+
/// Binds the framebuffer color and depth surface
void BindFramebufferSurfaces(const Surface& color_surface, const Surface& depth_surface,
bool has_stencil);
@@ -104,7 +115,7 @@ private:
u32 current_unit, const std::vector<GLShader::SamplerEntry>& entries);
/// Syncs the viewport to match the guest state
- void SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect, u16 res_scale);
+ void SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect);
/// Syncs the clip enabled status to match the guest state
void SyncClipEnabled();
@@ -121,6 +132,9 @@ private:
/// Syncs the depth offset to match the guest state
void SyncDepthOffset();
+ /// Syncs the depth test state to match the guest state
+ void SyncDepthTestState();
+
/// Syncs the blend state to match the guest state
void SyncBlendState();
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index ff48a2669..323ff7408 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -1,36 +1,23 @@
-// Copyright 2015 Citra Emulator Project
+// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
-#include <atomic>
-#include <cstring>
-#include <iterator>
-#include <memory>
-#include <utility>
-#include <vector>
-#include <boost/optional.hpp>
-#include <boost/range/iterator_range.hpp>
#include <glad/glad.h>
+
#include "common/alignment.h"
-#include "common/bit_field.h"
-#include "common/color.h"
-#include "common/logging/log.h"
-#include "common/math_util.h"
+#include "common/assert.h"
#include "common/microprofile.h"
#include "common/scope_exit.h"
#include "core/core.h"
-#include "core/frontend/emu_window.h"
#include "core/hle/kernel/process.h"
-#include "core/hle/kernel/vm_manager.h"
#include "core/memory.h"
#include "core/settings.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
-#include "video_core/renderer_opengl/gl_state.h"
+#include "video_core/textures/astc.h"
#include "video_core/textures/decoders.h"
#include "video_core/utils.h"
-#include "video_core/video_core.h"
using SurfaceType = SurfaceParams::SurfaceType;
using PixelFormat = SurfaceParams::PixelFormat;
@@ -40,89 +27,178 @@ struct FormatTuple {
GLint internal_format;
GLenum format;
GLenum type;
+ ComponentType component_type;
bool compressed;
};
+/*static*/ SurfaceParams SurfaceParams::CreateForTexture(
+ const Tegra::Texture::FullTextureInfo& config) {
+
+ SurfaceParams params{};
+ params.addr = config.tic.Address();
+ params.is_tiled = config.tic.IsTiled();
+ params.block_height = params.is_tiled ? config.tic.BlockHeight() : 0,
+ params.pixel_format = PixelFormatFromTextureFormat(config.tic.format);
+ params.component_type = ComponentTypeFromTexture(config.tic.r_type.Value());
+ params.type = GetFormatType(params.pixel_format);
+ params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format));
+ params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format));
+ params.unaligned_height = config.tic.Height();
+ params.size_in_bytes = params.SizeInBytes();
+ return params;
+}
+
+/*static*/ SurfaceParams SurfaceParams::CreateForFramebuffer(
+ const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config) {
+
+ SurfaceParams params{};
+ params.addr = config.Address();
+ params.is_tiled = true;
+ params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight;
+ params.pixel_format = PixelFormatFromRenderTargetFormat(config.format);
+ params.component_type = ComponentTypeFromRenderTarget(config.format);
+ params.type = GetFormatType(params.pixel_format);
+ params.width = config.width;
+ params.height = config.height;
+ params.unaligned_height = config.height;
+ params.size_in_bytes = params.SizeInBytes();
+ return params;
+}
+
+/*static*/ SurfaceParams SurfaceParams::CreateForDepthBuffer(
+ const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config, Tegra::GPUVAddr zeta_address,
+ Tegra::DepthFormat format) {
+
+ SurfaceParams params{};
+ params.addr = zeta_address;
+ params.is_tiled = true;
+ params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight;
+ params.pixel_format = PixelFormatFromDepthFormat(format);
+ params.component_type = ComponentTypeFromDepthFormat(format);
+ params.type = GetFormatType(params.pixel_format);
+ params.size_in_bytes = params.SizeInBytes();
+ params.width = config.width;
+ params.height = config.height;
+ params.unaligned_height = config.height;
+ params.size_in_bytes = params.SizeInBytes();
+ return params;
+}
+
static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_format_tuples = {{
- {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false}, // ABGR8
- {GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5, false}, // B5G6R5
- {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, false}, // A2B10G10R10
- {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, false}, // A1B5G5R5
- {GL_R8, GL_RED, GL_UNSIGNED_BYTE, false}, // R8
- {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBA16F
- {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, false}, // R11FG11FB10F
- {GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT1
- {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT23
- {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT45
- {GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, true}, // DXN1
+ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // ABGR8
+ {GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, ComponentType::UNorm, false}, // B5G6R5
+ {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, ComponentType::UNorm,
+ false}, // A2B10G10R10
+ {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, ComponentType::UNorm, false}, // A1B5G5R5
+ {GL_R8, GL_RED, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // R8
+ {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, ComponentType::Float, false}, // RGBA16F
+ {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, ComponentType::Float,
+ false}, // R11FG11FB10F
+ {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RGBA32UI
+ {GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
+ true}, // DXT1
+ {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
+ true}, // DXT23
+ {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
+ true}, // DXT45
+ {GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, true}, // DXN1
+ {GL_COMPRESSED_RGBA_BPTC_UNORM_ARB, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
+ true}, // BC7U
+ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4
+
+ // DepthStencil formats
+ {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm,
+ false}, // Z24S8
+ {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm,
+ false}, // S8Z24
+ {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F
}};
static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) {
- const SurfaceType type = SurfaceParams::GetFormatType(pixel_format);
- if (type == SurfaceType::ColorTexture) {
- ASSERT(static_cast<size_t>(pixel_format) < tex_format_tuples.size());
- // For now only UNORM components are supported, or either R11FG11FB10F or RGBA16F which are
- // type FLOAT
- ASSERT(component_type == ComponentType::UNorm || pixel_format == PixelFormat::RGBA16F ||
- pixel_format == PixelFormat::R11FG11FB10F);
- return tex_format_tuples[static_cast<unsigned int>(pixel_format)];
- } else if (type == SurfaceType::Depth || type == SurfaceType::DepthStencil) {
- // TODO(Subv): Implement depth formats
- ASSERT_MSG(false, "Unimplemented");
- }
+ ASSERT(static_cast<size_t>(pixel_format) < tex_format_tuples.size());
+ auto& format = tex_format_tuples[static_cast<unsigned int>(pixel_format)];
+ ASSERT(component_type == format.component_type);
- UNREACHABLE();
- return {};
+ return format;
}
-template <typename Map, typename Interval>
-constexpr auto RangeFromInterval(Map& map, const Interval& interval) {
- return boost::make_iterator_range(map.equal_range(interval));
+VAddr SurfaceParams::GetCpuAddr() const {
+ const auto& gpu = Core::System::GetInstance().GPU();
+ return *gpu.memory_manager->GpuToCpuAddress(addr);
}
-static u16 GetResolutionScaleFactor() {
- return static_cast<u16>(!Settings::values.resolution_factor
- ? VideoCore::g_emu_window->GetFramebufferLayout().GetScalingRatio()
- : Settings::values.resolution_factor);
+static bool IsPixelFormatASTC(PixelFormat format) {
+ switch (format) {
+ case PixelFormat::ASTC_2D_4X4:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) {
+ switch (format) {
+ case PixelFormat::ASTC_2D_4X4:
+ return {4, 4};
+ default:
+ LOG_CRITICAL(HW_GPU, "Unhandled format: {}", static_cast<u32>(format));
+ UNREACHABLE();
+ }
+}
+
+MathUtil::Rectangle<u32> SurfaceParams::GetRect() const {
+ u32 actual_height{unaligned_height};
+ if (IsPixelFormatASTC(pixel_format)) {
+ // ASTC formats must stop at the ATSC block size boundary
+ actual_height = Common::AlignDown(actual_height, GetASTCBlockSize(pixel_format).second);
+ }
+ return {0, actual_height, width, 0};
}
template <bool morton_to_gl, PixelFormat format>
-void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, Tegra::GPUVAddr base,
- Tegra::GPUVAddr start, Tegra::GPUVAddr end) {
+void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, Tegra::GPUVAddr addr) {
constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / CHAR_BIT;
constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format);
const auto& gpu = Core::System::GetInstance().GPU();
if (morton_to_gl) {
- auto data = Tegra::Texture::UnswizzleTexture(
- *gpu.memory_manager->GpuToCpuAddress(base),
- SurfaceParams::TextureFormatFromPixelFormat(format), stride, height, block_height);
- std::memcpy(gl_buffer, data.data(), data.size());
+ if (SurfaceParams::GetFormatType(format) == SurfaceType::ColorTexture) {
+ auto data = Tegra::Texture::UnswizzleTexture(
+ *gpu.memory_manager->GpuToCpuAddress(addr),
+ SurfaceParams::TextureFormatFromPixelFormat(format), stride, height, block_height);
+ std::memcpy(gl_buffer, data.data(), data.size());
+ } else {
+ auto data = Tegra::Texture::UnswizzleDepthTexture(
+ *gpu.memory_manager->GpuToCpuAddress(addr),
+ SurfaceParams::DepthFormatFromPixelFormat(format), stride, height, block_height);
+ std::memcpy(gl_buffer, data.data(), data.size());
+ }
} else {
- // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check
- // the configuration for this and perform more generic un/swizzle
- NGLOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!");
+ // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should
+ // check the configuration for this and perform more generic un/swizzle
+ LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!");
VideoCore::MortonCopyPixels128(
stride, height, bytes_per_pixel, gl_bytes_per_pixel,
- Memory::GetPointer(*gpu.memory_manager->GpuToCpuAddress(base)), gl_buffer,
+ Memory::GetPointer(*gpu.memory_manager->GpuToCpuAddress(addr)), gl_buffer,
morton_to_gl);
}
}
-static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra::GPUVAddr,
- Tegra::GPUVAddr),
+static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr),
SurfaceParams::MaxPixelFormat>
morton_to_gl_fns = {
MortonCopy<true, PixelFormat::ABGR8>, MortonCopy<true, PixelFormat::B5G6R5>,
MortonCopy<true, PixelFormat::A2B10G10R10>, MortonCopy<true, PixelFormat::A1B5G5R5>,
MortonCopy<true, PixelFormat::R8>, MortonCopy<true, PixelFormat::RGBA16F>,
- MortonCopy<true, PixelFormat::R11FG11FB10F>, MortonCopy<true, PixelFormat::DXT1>,
- MortonCopy<true, PixelFormat::DXT23>, MortonCopy<true, PixelFormat::DXT45>,
- MortonCopy<true, PixelFormat::DXN1>,
+ MortonCopy<true, PixelFormat::R11FG11FB10F>, MortonCopy<true, PixelFormat::RGBA32UI>,
+ MortonCopy<true, PixelFormat::DXT1>, MortonCopy<true, PixelFormat::DXT23>,
+ MortonCopy<true, PixelFormat::DXT45>, MortonCopy<true, PixelFormat::DXN1>,
+ MortonCopy<true, PixelFormat::BC7U>, MortonCopy<true, PixelFormat::ASTC_2D_4X4>,
+ MortonCopy<true, PixelFormat::Z24S8>, MortonCopy<true, PixelFormat::S8Z24>,
+ MortonCopy<true, PixelFormat::Z32F>,
};
-static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra::GPUVAddr,
- Tegra::GPUVAddr),
+static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr),
SurfaceParams::MaxPixelFormat>
gl_to_morton_fns = {
MortonCopy<false, PixelFormat::ABGR8>,
@@ -132,11 +208,17 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra:
MortonCopy<false, PixelFormat::R8>,
MortonCopy<false, PixelFormat::RGBA16F>,
MortonCopy<false, PixelFormat::R11FG11FB10F>,
- // TODO(Subv): Swizzling the DXT1/DXT23/DXT45/DXN1 formats is not yet supported
+ MortonCopy<false, PixelFormat::RGBA32UI>,
+ // TODO(Subv): Swizzling the DXT1/DXT23/DXT45/DXN1/BC7U formats is not yet supported
+ nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
+ MortonCopy<false, PixelFormat::ABGR8>,
+ MortonCopy<false, PixelFormat::Z24S8>,
+ MortonCopy<false, PixelFormat::S8Z24>,
+ MortonCopy<false, PixelFormat::Z32F>,
};
// Allocate an uninitialized texture of appropriate size and format for the surface
@@ -166,374 +248,144 @@ static void AllocateSurfaceTexture(GLuint texture, const FormatTuple& format_tup
cur_state.Apply();
}
-static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rect, GLuint dst_tex,
- const MathUtil::Rectangle<u32>& dst_rect, SurfaceType type,
- GLuint read_fb_handle, GLuint draw_fb_handle) {
-
- glCopyImageSubData(src_tex, GL_TEXTURE_2D, 0, src_rect.left, src_rect.bottom, 0, dst_tex,
- GL_TEXTURE_2D, 0, dst_rect.left, dst_rect.bottom, 0, src_rect.GetWidth(),
- src_rect.GetHeight(), 0);
- return true;
-}
-
-static bool FillSurface(const Surface& surface, const u8* fill_data,
- const MathUtil::Rectangle<u32>& fill_rect, GLuint draw_fb_handle) {
- UNREACHABLE();
- return {};
-}
-
-SurfaceParams SurfaceParams::FromInterval(SurfaceInterval interval) const {
- SurfaceParams params = *this;
- const u32 tiled_size = is_tiled ? 8 : 1;
- const u64 stride_tiled_bytes = BytesInPixels(stride * tiled_size);
- Tegra::GPUVAddr aligned_start =
- addr + Common::AlignDown(boost::icl::first(interval) - addr, stride_tiled_bytes);
- Tegra::GPUVAddr aligned_end =
- addr + Common::AlignUp(boost::icl::last_next(interval) - addr, stride_tiled_bytes);
-
- if (aligned_end - aligned_start > stride_tiled_bytes) {
- params.addr = aligned_start;
- params.height = static_cast<u32>((aligned_end - aligned_start) / BytesInPixels(stride));
- } else {
- // 1 row
- ASSERT(aligned_end - aligned_start == stride_tiled_bytes);
- const u64 tiled_alignment = BytesInPixels(is_tiled ? 8 * 8 : 1);
- aligned_start =
- addr + Common::AlignDown(boost::icl::first(interval) - addr, tiled_alignment);
- aligned_end =
- addr + Common::AlignUp(boost::icl::last_next(interval) - addr, tiled_alignment);
- params.addr = aligned_start;
- params.width = static_cast<u32>(PixelsInBytes(aligned_end - aligned_start) / tiled_size);
- params.stride = params.width;
- params.height = tiled_size;
- }
- params.UpdateParams();
-
- return params;
-}
-
-SurfaceInterval SurfaceParams::GetSubRectInterval(MathUtil::Rectangle<u32> unscaled_rect) const {
- if (unscaled_rect.GetHeight() == 0 || unscaled_rect.GetWidth() == 0) {
- return {};
- }
-
- if (is_tiled) {
- unscaled_rect.left = Common::AlignDown(unscaled_rect.left, 8) * 8;
- unscaled_rect.bottom = Common::AlignDown(unscaled_rect.bottom, 8) / 8;
- unscaled_rect.right = Common::AlignUp(unscaled_rect.right, 8) * 8;
- unscaled_rect.top = Common::AlignUp(unscaled_rect.top, 8) / 8;
- }
-
- const u32 stride_tiled = !is_tiled ? stride : stride * 8;
-
- const u32 pixel_offset =
- stride_tiled * (!is_tiled ? unscaled_rect.bottom : (height / 8) - unscaled_rect.top) +
- unscaled_rect.left;
-
- const u32 pixels = (unscaled_rect.GetHeight() - 1) * stride_tiled + unscaled_rect.GetWidth();
-
- return {addr + BytesInPixels(pixel_offset), addr + BytesInPixels(pixel_offset + pixels)};
-}
-
-MathUtil::Rectangle<u32> SurfaceParams::GetSubRect(const SurfaceParams& sub_surface) const {
- const u32 begin_pixel_index = static_cast<u32>(PixelsInBytes(sub_surface.addr - addr));
-
- if (is_tiled) {
- const int x0 = (begin_pixel_index % (stride * 8)) / 8;
- const int y0 = (begin_pixel_index / (stride * 8)) * 8;
- // Top to bottom
- return MathUtil::Rectangle<u32>(x0, height - y0, x0 + sub_surface.width,
- height - (y0 + sub_surface.height));
- }
-
- const int x0 = begin_pixel_index % stride;
- const int y0 = begin_pixel_index / stride;
- // Bottom to top
- return MathUtil::Rectangle<u32>(x0, y0 + sub_surface.height, x0 + sub_surface.width, y0);
-}
-
-MathUtil::Rectangle<u32> SurfaceParams::GetScaledSubRect(const SurfaceParams& sub_surface) const {
- auto rect = GetSubRect(sub_surface);
- rect.left = rect.left * res_scale;
- rect.right = rect.right * res_scale;
- rect.top = rect.top * res_scale;
- rect.bottom = rect.bottom * res_scale;
- return rect;
-}
-
-bool SurfaceParams::ExactMatch(const SurfaceParams& other_surface) const {
- return std::tie(other_surface.addr, other_surface.width, other_surface.height,
- other_surface.stride, other_surface.block_height, other_surface.pixel_format,
- other_surface.component_type,
- other_surface.is_tiled) == std::tie(addr, width, height, stride, block_height,
- pixel_format, component_type, is_tiled) &&
- pixel_format != PixelFormat::Invalid;
-}
-
-bool SurfaceParams::CanSubRect(const SurfaceParams& sub_surface) const {
- return sub_surface.addr >= addr && sub_surface.end <= end &&
- sub_surface.pixel_format == pixel_format && pixel_format != PixelFormat::Invalid &&
- sub_surface.is_tiled == is_tiled && sub_surface.block_height == block_height &&
- sub_surface.component_type == component_type &&
- (sub_surface.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0 &&
- (sub_surface.stride == stride || sub_surface.height <= (is_tiled ? 8u : 1u)) &&
- GetSubRect(sub_surface).left + sub_surface.width <= stride;
-}
-
-bool SurfaceParams::CanExpand(const SurfaceParams& expanded_surface) const {
- return pixel_format != PixelFormat::Invalid && pixel_format == expanded_surface.pixel_format &&
- addr <= expanded_surface.end && expanded_surface.addr <= end &&
- is_tiled == expanded_surface.is_tiled && block_height == expanded_surface.block_height &&
- component_type == expanded_surface.component_type && stride == expanded_surface.stride &&
- (std::max(expanded_surface.addr, addr) - std::min(expanded_surface.addr, addr)) %
- BytesInPixels(stride * (is_tiled ? 8 : 1)) ==
- 0;
-}
-
-bool SurfaceParams::CanTexCopy(const SurfaceParams& texcopy_params) const {
- if (pixel_format == PixelFormat::Invalid || addr > texcopy_params.addr ||
- end < texcopy_params.end) {
- return false;
- }
- if (texcopy_params.block_height != block_height ||
- texcopy_params.component_type != component_type)
- return false;
-
- if (texcopy_params.width != texcopy_params.stride) {
- const u32 tile_stride = static_cast<u32>(BytesInPixels(stride * (is_tiled ? 8 : 1)));
- return (texcopy_params.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0 &&
- texcopy_params.width % BytesInPixels(is_tiled ? 64 : 1) == 0 &&
- (texcopy_params.height == 1 || texcopy_params.stride == tile_stride) &&
- ((texcopy_params.addr - addr) % tile_stride) + texcopy_params.width <= tile_stride;
- }
- return FromInterval(texcopy_params.GetInterval()).GetInterval() == texcopy_params.GetInterval();
-}
-
-VAddr SurfaceParams::GetCpuAddr() const {
- // When this function is used, only cpu_addr or (GPU) addr should be set, not both
- ASSERT(!(cpu_addr && addr));
- const auto& gpu = Core::System::GetInstance().GPU();
- return cpu_addr.get_value_or(*gpu.memory_manager->GpuToCpuAddress(addr));
-}
-
-bool CachedSurface::CanFill(const SurfaceParams& dest_surface,
- SurfaceInterval fill_interval) const {
- if (type == SurfaceType::Fill && IsRegionValid(fill_interval) &&
- boost::icl::first(fill_interval) >= addr &&
- boost::icl::last_next(fill_interval) <= end && // dest_surface is within our fill range
- dest_surface.FromInterval(fill_interval).GetInterval() ==
- fill_interval) { // make sure interval is a rectangle in dest surface
- if (fill_size * CHAR_BIT != dest_surface.GetFormatBpp()) {
- // Check if bits repeat for our fill_size
- const u32 dest_bytes_per_pixel = std::max(dest_surface.GetFormatBpp() / CHAR_BIT, 1u);
- std::vector<u8> fill_test(fill_size * dest_bytes_per_pixel);
-
- for (u32 i = 0; i < dest_bytes_per_pixel; ++i)
- std::memcpy(&fill_test[i * fill_size], &fill_data[0], fill_size);
-
- for (u32 i = 0; i < fill_size; ++i)
- if (std::memcmp(&fill_test[dest_bytes_per_pixel * i], &fill_test[0],
- dest_bytes_per_pixel) != 0)
- return false;
-
- if (dest_surface.GetFormatBpp() == 4 && (fill_test[0] & 0xF) != (fill_test[0] >> 4))
- return false;
+CachedSurface::CachedSurface(const SurfaceParams& params) : params(params) {
+ texture.Create();
+ const auto& rect{params.GetRect()};
+ AllocateSurfaceTexture(texture.handle,
+ GetFormatTuple(params.pixel_format, params.component_type),
+ rect.GetWidth(), rect.GetHeight());
+}
+
+static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height) {
+ union S8Z24 {
+ BitField<0, 24, u32> z24;
+ BitField<24, 8, u32> s8;
+ };
+ static_assert(sizeof(S8Z24) == 4, "S8Z24 is incorrect size");
+
+ union Z24S8 {
+ BitField<0, 8, u32> s8;
+ BitField<8, 24, u32> z24;
+ };
+ static_assert(sizeof(Z24S8) == 4, "Z24S8 is incorrect size");
+
+ S8Z24 input_pixel{};
+ Z24S8 output_pixel{};
+ for (size_t y = 0; y < height; ++y) {
+ for (size_t x = 0; x < width; ++x) {
+ const size_t offset{y * width + x};
+ std::memcpy(&input_pixel, &data[offset], sizeof(S8Z24));
+ output_pixel.s8.Assign(input_pixel.s8);
+ output_pixel.z24.Assign(input_pixel.z24);
+ std::memcpy(&data[offset], &output_pixel, sizeof(Z24S8));
}
- return true;
}
- return false;
-}
-
-bool CachedSurface::CanCopy(const SurfaceParams& dest_surface,
- SurfaceInterval copy_interval) const {
- SurfaceParams subrect_params = dest_surface.FromInterval(copy_interval);
- ASSERT(subrect_params.GetInterval() == copy_interval);
- if (CanSubRect(subrect_params))
- return true;
-
- if (CanFill(dest_surface, copy_interval))
- return true;
-
- return false;
}
-
-SurfaceInterval SurfaceParams::GetCopyableInterval(const Surface& src_surface) const {
- SurfaceInterval result{};
- const auto valid_regions =
- SurfaceRegions(GetInterval() & src_surface->GetInterval()) - src_surface->invalid_regions;
- for (auto& valid_interval : valid_regions) {
- const SurfaceInterval aligned_interval{
- addr + Common::AlignUp(boost::icl::first(valid_interval) - addr,
- BytesInPixels(is_tiled ? 8 * 8 : 1)),
- addr + Common::AlignDown(boost::icl::last_next(valid_interval) - addr,
- BytesInPixels(is_tiled ? 8 * 8 : 1))};
-
- if (BytesInPixels(is_tiled ? 8 * 8 : 1) > boost::icl::length(valid_interval) ||
- boost::icl::length(aligned_interval) == 0) {
- continue;
- }
-
- // Get the rectangle within aligned_interval
- const u32 stride_bytes = static_cast<u32>(BytesInPixels(stride)) * (is_tiled ? 8 : 1);
- SurfaceInterval rect_interval{
- addr + Common::AlignUp(boost::icl::first(aligned_interval) - addr, stride_bytes),
- addr + Common::AlignDown(boost::icl::last_next(aligned_interval) - addr, stride_bytes),
- };
- if (boost::icl::first(rect_interval) > boost::icl::last_next(rect_interval)) {
- // 1 row
- rect_interval = aligned_interval;
- } else if (boost::icl::length(rect_interval) == 0) {
- // 2 rows that do not make a rectangle, return the larger one
- const SurfaceInterval row1{boost::icl::first(aligned_interval),
- boost::icl::first(rect_interval)};
- const SurfaceInterval row2{boost::icl::first(rect_interval),
- boost::icl::last_next(aligned_interval)};
- rect_interval = (boost::icl::length(row1) > boost::icl::length(row2)) ? row1 : row2;
- }
-
- if (boost::icl::length(rect_interval) > boost::icl::length(result)) {
- result = rect_interval;
- }
+/**
+ * Helper function to perform software conversion (as needed) when loading a buffer from Switch
+ * memory. This is for Maxwell pixel formats that cannot be represented as-is in OpenGL or with
+ * typical desktop GPUs.
+ */
+static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelFormat pixel_format,
+ u32 width, u32 height) {
+ switch (pixel_format) {
+ case PixelFormat::ASTC_2D_4X4: {
+ // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC.
+ u32 block_width{};
+ u32 block_height{};
+ std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format);
+ data = Tegra::Texture::ASTC::Decompress(data, width, height, block_width, block_height);
+ break;
+ }
+ case PixelFormat::S8Z24:
+ // Convert the S8Z24 depth format to Z24S8, as OpenGL does not support S8Z24.
+ ConvertS8Z24ToZ24S8(data, width, height);
+ break;
+ }
+}
+
+/**
+ * Helper function to perform software conversion (as needed) when flushing a buffer to Switch
+ * memory. This is for Maxwell pixel formats that cannot be represented as-is in OpenGL or with
+ * typical desktop GPUs.
+ */
+static void ConvertFormatAsNeeded_FlushGLBuffer(std::vector<u8>& /*data*/, PixelFormat pixel_format,
+ u32 /*width*/, u32 /*height*/) {
+ switch (pixel_format) {
+ case PixelFormat::ASTC_2D_4X4:
+ case PixelFormat::S8Z24:
+ LOG_CRITICAL(Render_OpenGL, "Unimplemented pixel_format={}",
+ static_cast<u32>(pixel_format));
+ UNREACHABLE();
+ break;
}
- return result;
-}
-
-void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surface& dst_surface,
- SurfaceInterval copy_interval) {
- SurfaceParams subrect_params = dst_surface->FromInterval(copy_interval);
- ASSERT(subrect_params.GetInterval() == copy_interval);
-
- ASSERT(src_surface != dst_surface);
-
- // This is only called when CanCopy is true, no need to run checks here
- if (src_surface->type == SurfaceType::Fill) {
- // FillSurface needs a 4 bytes buffer
- const u64 fill_offset =
- (boost::icl::first(copy_interval) - src_surface->addr) % src_surface->fill_size;
- std::array<u8, 4> fill_buffer;
-
- u64 fill_buff_pos = fill_offset;
- for (int i : {0, 1, 2, 3})
- fill_buffer[i] = src_surface->fill_data[fill_buff_pos++ % src_surface->fill_size];
-
- FillSurface(dst_surface, &fill_buffer[0], dst_surface->GetScaledSubRect(subrect_params),
- draw_framebuffer.handle);
- return;
- }
- if (src_surface->CanSubRect(subrect_params)) {
- BlitTextures(src_surface->texture.handle, src_surface->GetScaledSubRect(subrect_params),
- dst_surface->texture.handle, dst_surface->GetScaledSubRect(subrect_params),
- src_surface->type, read_framebuffer.handle, draw_framebuffer.handle);
- return;
- }
- UNREACHABLE();
}
MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64, 192));
-void CachedSurface::LoadGLBuffer(Tegra::GPUVAddr load_start, Tegra::GPUVAddr load_end) {
- ASSERT(type != SurfaceType::Fill);
+void CachedSurface::LoadGLBuffer() {
+ ASSERT(params.type != SurfaceType::Fill);
- u8* const texture_src_data = Memory::GetPointer(GetCpuAddr());
- if (texture_src_data == nullptr)
- return;
+ u8* const texture_src_data = Memory::GetPointer(params.GetCpuAddr());
- if (gl_buffer == nullptr) {
- gl_buffer_size = GetActualWidth() * GetActualHeight() * GetGLBytesPerPixel(pixel_format);
- gl_buffer.reset(new u8[gl_buffer_size]);
- }
+ ASSERT(texture_src_data);
- MICROPROFILE_SCOPE(OpenGL_SurfaceLoad);
+ gl_buffer.resize(params.width * params.height * GetGLBytesPerPixel(params.pixel_format));
- ASSERT(load_start >= addr && load_end <= end);
- const u64 start_offset = load_start - addr;
+ MICROPROFILE_SCOPE(OpenGL_SurfaceLoad);
- if (!is_tiled) {
- const u32 bytes_per_pixel{GetFormatBpp() >> 3};
+ if (!params.is_tiled) {
+ const u32 bytes_per_pixel{params.GetFormatBpp() >> 3};
- std::memcpy(&gl_buffer[start_offset], texture_src_data + start_offset,
- bytes_per_pixel * width * height);
+ std::memcpy(gl_buffer.data(), texture_src_data,
+ bytes_per_pixel * params.width * params.height);
} else {
- morton_to_gl_fns[static_cast<size_t>(pixel_format)](GetActualWidth(), block_height,
- GetActualHeight(), &gl_buffer[0], addr,
- load_start, load_end);
+ morton_to_gl_fns[static_cast<size_t>(params.pixel_format)](
+ params.width, params.block_height, params.height, gl_buffer.data(), params.addr);
}
+
+ ConvertFormatAsNeeded_LoadGLBuffer(gl_buffer, params.pixel_format, params.width, params.height);
}
MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64));
-void CachedSurface::FlushGLBuffer(Tegra::GPUVAddr flush_start, Tegra::GPUVAddr flush_end) {
- u8* const dst_buffer = Memory::GetPointer(GetCpuAddr());
- if (dst_buffer == nullptr)
- return;
-
- ASSERT(gl_buffer_size == width * height * GetGLBytesPerPixel(pixel_format));
+void CachedSurface::FlushGLBuffer() {
+ u8* const dst_buffer = Memory::GetPointer(params.GetCpuAddr());
- // TODO: Should probably be done in ::Memory:: and check for other regions too
- // same as loadglbuffer()
- if (flush_start < Memory::VRAM_VADDR_END && flush_end > Memory::VRAM_VADDR_END)
- flush_end = Memory::VRAM_VADDR_END;
-
- if (flush_start < Memory::VRAM_VADDR && flush_end > Memory::VRAM_VADDR)
- flush_start = Memory::VRAM_VADDR;
+ ASSERT(dst_buffer);
+ ASSERT(gl_buffer.size() ==
+ params.width * params.height * GetGLBytesPerPixel(params.pixel_format));
MICROPROFILE_SCOPE(OpenGL_SurfaceFlush);
- ASSERT(flush_start >= addr && flush_end <= end);
- const u64 start_offset = flush_start - addr;
- const u64 end_offset = flush_end - addr;
-
- if (type == SurfaceType::Fill) {
- const u64 coarse_start_offset = start_offset - (start_offset % fill_size);
- const u64 backup_bytes = start_offset % fill_size;
- std::array<u8, 4> backup_data;
- if (backup_bytes)
- std::memcpy(&backup_data[0], &dst_buffer[coarse_start_offset], backup_bytes);
-
- for (u64 offset = coarse_start_offset; offset < end_offset; offset += fill_size) {
- std::memcpy(&dst_buffer[offset], &fill_data[0],
- std::min(fill_size, end_offset - offset));
- }
+ ConvertFormatAsNeeded_FlushGLBuffer(gl_buffer, params.pixel_format, params.width,
+ params.height);
- if (backup_bytes)
- std::memcpy(&dst_buffer[coarse_start_offset], &backup_data[0], backup_bytes);
- } else if (!is_tiled) {
- std::memcpy(dst_buffer + start_offset, &gl_buffer[start_offset], flush_end - flush_start);
+ if (!params.is_tiled) {
+ std::memcpy(dst_buffer, gl_buffer.data(), params.size_in_bytes);
} else {
- gl_to_morton_fns[static_cast<size_t>(pixel_format)](
- stride, block_height, height, &gl_buffer[0], addr, flush_start, flush_end);
+ gl_to_morton_fns[static_cast<size_t>(params.pixel_format)](
+ params.width, params.block_height, params.height, gl_buffer.data(), params.addr);
}
}
MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 64, 192));
-void CachedSurface::UploadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint read_fb_handle,
- GLuint draw_fb_handle) {
- if (type == SurfaceType::Fill)
+void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle) {
+ if (params.type == SurfaceType::Fill)
return;
MICROPROFILE_SCOPE(OpenGL_TextureUL);
- ASSERT(gl_buffer_size ==
- GetActualWidth() * GetActualHeight() * GetGLBytesPerPixel(pixel_format));
+ ASSERT(gl_buffer.size() ==
+ params.width * params.height * GetGLBytesPerPixel(params.pixel_format));
+
+ const auto& rect{params.GetRect()};
// Load data from memory to the surface
GLint x0 = static_cast<GLint>(rect.left);
GLint y0 = static_cast<GLint>(rect.bottom);
- size_t buffer_offset = (y0 * stride + x0) * GetGLBytesPerPixel(pixel_format);
+ size_t buffer_offset = (y0 * params.width + x0) * GetGLBytesPerPixel(params.pixel_format);
- const FormatTuple& tuple = GetFormatTuple(pixel_format, component_type);
+ const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);
GLuint target_tex = texture.handle;
-
- // If not 1x scale, create 1x texture that we will blit from to replace texture subrect in
- // surface
- OGLTexture unscaled_tex;
- if (res_scale != 1) {
- x0 = 0;
- y0 = 0;
-
- unscaled_tex.Create();
- AllocateSurfaceTexture(unscaled_tex.handle, tuple, rect.GetWidth(), rect.GetHeight());
- target_tex = unscaled_tex.handle;
- }
-
OpenGLState cur_state = OpenGLState::GetCurState();
GLuint old_tex = cur_state.texture_units[0].texture_2d;
@@ -541,15 +393,15 @@ void CachedSurface::UploadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint
cur_state.Apply();
// Ensure no bad interactions with GL_UNPACK_ALIGNMENT
- ASSERT(stride * GetGLBytesPerPixel(pixel_format) % 4 == 0);
- glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(stride));
+ ASSERT(params.width * GetGLBytesPerPixel(params.pixel_format) % 4 == 0);
+ glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.width));
glActiveTexture(GL_TEXTURE0);
if (tuple.compressed) {
- glCompressedTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format,
- static_cast<GLsizei>(rect.GetWidth() * GetCompresssionFactor()),
- static_cast<GLsizei>(rect.GetHeight() * GetCompresssionFactor()), 0,
- size, &gl_buffer[buffer_offset]);
+ glCompressedTexImage2D(
+ GL_TEXTURE_2D, 0, tuple.internal_format, static_cast<GLsizei>(params.width),
+ static_cast<GLsizei>(params.height), 0, static_cast<GLsizei>(params.size_in_bytes),
+ &gl_buffer[buffer_offset]);
} else {
glTexSubImage2D(GL_TEXTURE_2D, 0, x0, y0, static_cast<GLsizei>(rect.GetWidth()),
static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
@@ -560,845 +412,250 @@ void CachedSurface::UploadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint
cur_state.texture_units[0].texture_2d = old_tex;
cur_state.Apply();
-
- if (res_scale != 1) {
- auto scaled_rect = rect;
- scaled_rect.left *= res_scale;
- scaled_rect.top *= res_scale;
- scaled_rect.right *= res_scale;
- scaled_rect.bottom *= res_scale;
-
- BlitTextures(unscaled_tex.handle, {0, rect.GetHeight(), rect.GetWidth(), 0}, texture.handle,
- scaled_rect, type, read_fb_handle, draw_fb_handle);
- }
}
MICROPROFILE_DEFINE(OpenGL_TextureDL, "OpenGL", "Texture Download", MP_RGB(128, 192, 64));
-void CachedSurface::DownloadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint read_fb_handle,
- GLuint draw_fb_handle) {
- if (type == SurfaceType::Fill)
+void CachedSurface::DownloadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle) {
+ if (params.type == SurfaceType::Fill)
return;
MICROPROFILE_SCOPE(OpenGL_TextureDL);
- if (gl_buffer == nullptr) {
- gl_buffer_size = width * height * GetGLBytesPerPixel(pixel_format);
- gl_buffer.reset(new u8[gl_buffer_size]);
- }
+ gl_buffer.resize(params.width * params.height * GetGLBytesPerPixel(params.pixel_format));
OpenGLState state = OpenGLState::GetCurState();
OpenGLState prev_state = state;
SCOPE_EXIT({ prev_state.Apply(); });
- const FormatTuple& tuple = GetFormatTuple(pixel_format, component_type);
+ const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);
// Ensure no bad interactions with GL_PACK_ALIGNMENT
- ASSERT(stride * GetGLBytesPerPixel(pixel_format) % 4 == 0);
- glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(stride));
- size_t buffer_offset = (rect.bottom * stride + rect.left) * GetGLBytesPerPixel(pixel_format);
-
- // If not 1x scale, blit scaled texture to a new 1x texture and use that to flush
- if (res_scale != 1) {
- auto scaled_rect = rect;
- scaled_rect.left *= res_scale;
- scaled_rect.top *= res_scale;
- scaled_rect.right *= res_scale;
- scaled_rect.bottom *= res_scale;
-
- OGLTexture unscaled_tex;
- unscaled_tex.Create();
-
- MathUtil::Rectangle<u32> unscaled_tex_rect{0, rect.GetHeight(), rect.GetWidth(), 0};
- AllocateSurfaceTexture(unscaled_tex.handle, tuple, rect.GetWidth(), rect.GetHeight());
- BlitTextures(texture.handle, scaled_rect, unscaled_tex.handle, unscaled_tex_rect, type,
- read_fb_handle, draw_fb_handle);
-
- state.texture_units[0].texture_2d = unscaled_tex.handle;
- state.Apply();
-
- glActiveTexture(GL_TEXTURE0);
- glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, &gl_buffer[buffer_offset]);
- } else {
- state.ResetTexture(texture.handle);
- state.draw.read_framebuffer = read_fb_handle;
- state.Apply();
-
- if (type == SurfaceType::ColorTexture) {
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
- texture.handle, 0);
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
- 0, 0);
- } else if (type == SurfaceType::Depth) {
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
- texture.handle, 0);
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
- } else {
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
- texture.handle, 0);
- }
- glReadPixels(static_cast<GLint>(rect.left), static_cast<GLint>(rect.bottom),
- static_cast<GLsizei>(rect.GetWidth()), static_cast<GLsizei>(rect.GetHeight()),
- tuple.format, tuple.type, &gl_buffer[buffer_offset]);
- }
+ ASSERT(params.width * GetGLBytesPerPixel(params.pixel_format) % 4 == 0);
+ glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.width));
- glPixelStorei(GL_PACK_ROW_LENGTH, 0);
-}
-
-enum class MatchFlags {
- None = 0,
- Invalid = 1, // Flag that can be applied to other match types, invalid matches require
- // validation before they can be used
- Exact = 1 << 1, // Surfaces perfectly match
- SubRect = 1 << 2, // Surface encompasses params
- Copy = 1 << 3, // Surface we can copy from
- Expand = 1 << 4, // Surface that can expand params
- TexCopy = 1 << 5 // Surface that will match a display transfer "texture copy" parameters
-};
-
-constexpr MatchFlags operator|(MatchFlags lhs, MatchFlags rhs) {
- return static_cast<MatchFlags>(static_cast<int>(lhs) | static_cast<int>(rhs));
-}
+ const auto& rect{params.GetRect()};
+ size_t buffer_offset =
+ (rect.bottom * params.width + rect.left) * GetGLBytesPerPixel(params.pixel_format);
-constexpr MatchFlags operator&(MatchFlags lhs, MatchFlags rhs) {
- return static_cast<MatchFlags>(static_cast<int>(lhs) & static_cast<int>(rhs));
-}
+ state.UnbindTexture(texture.handle);
+ state.draw.read_framebuffer = read_fb_handle;
+ state.Apply();
-/// Get the best surface match (and its match type) for the given flags
-template <MatchFlags find_flags>
-Surface FindMatch(const SurfaceCache& surface_cache, const SurfaceParams& params,
- ScaleMatch match_scale_type,
- boost::optional<SurfaceInterval> validate_interval = boost::none) {
- Surface match_surface = nullptr;
- bool match_valid = false;
- u32 match_scale = 0;
- SurfaceInterval match_interval{};
-
- for (auto& pair : RangeFromInterval(surface_cache, params.GetInterval())) {
- for (auto& surface : pair.second) {
- bool res_scale_matched = match_scale_type == ScaleMatch::Exact
- ? (params.res_scale == surface->res_scale)
- : (params.res_scale <= surface->res_scale);
- // validity will be checked in GetCopyableInterval
- bool is_valid =
- (find_flags & MatchFlags::Copy) != MatchFlags::None
- ? true
- : surface->IsRegionValid(validate_interval.value_or(params.GetInterval()));
-
- if ((find_flags & MatchFlags::Invalid) == MatchFlags::None && !is_valid)
- continue;
-
- auto IsMatch_Helper = [&](auto check_type, auto match_fn) {
- if ((find_flags & check_type) == MatchFlags::None)
- return;
-
- bool matched;
- SurfaceInterval surface_interval;
- std::tie(matched, surface_interval) = match_fn();
- if (!matched)
- return;
-
- if (!res_scale_matched && match_scale_type != ScaleMatch::Ignore &&
- surface->type != SurfaceType::Fill)
- return;
-
- // Found a match, update only if this is better than the previous one
- auto UpdateMatch = [&] {
- match_surface = surface;
- match_valid = is_valid;
- match_scale = surface->res_scale;
- match_interval = surface_interval;
- };
-
- if (surface->res_scale > match_scale) {
- UpdateMatch();
- return;
- } else if (surface->res_scale < match_scale) {
- return;
- }
-
- if (is_valid && !match_valid) {
- UpdateMatch();
- return;
- } else if (is_valid != match_valid) {
- return;
- }
-
- if (boost::icl::length(surface_interval) > boost::icl::length(match_interval)) {
- UpdateMatch();
- }
- };
- IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::Exact>{}, [&] {
- return std::make_pair(surface->ExactMatch(params), surface->GetInterval());
- });
- IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::SubRect>{}, [&] {
- return std::make_pair(surface->CanSubRect(params), surface->GetInterval());
- });
- IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::Copy>{}, [&] {
- auto copy_interval =
- params.FromInterval(*validate_interval).GetCopyableInterval(surface);
- bool matched = boost::icl::length(copy_interval & *validate_interval) != 0 &&
- surface->CanCopy(params, copy_interval);
- return std::make_pair(matched, copy_interval);
- });
- IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::Expand>{}, [&] {
- return std::make_pair(surface->CanExpand(params), surface->GetInterval());
- });
- IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::TexCopy>{}, [&] {
- return std::make_pair(surface->CanTexCopy(params), surface->GetInterval());
- });
- }
+ if (params.type == SurfaceType::ColorTexture) {
+ glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
+ texture.handle, 0);
+ glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
+ 0);
+ } else if (params.type == SurfaceType::Depth) {
+ glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
+ glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
+ texture.handle, 0);
+ glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
+ } else {
+ glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
+ glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
+ texture.handle, 0);
}
- return match_surface;
+ glReadPixels(static_cast<GLint>(rect.left), static_cast<GLint>(rect.bottom),
+ static_cast<GLsizei>(rect.GetWidth()), static_cast<GLsizei>(rect.GetHeight()),
+ tuple.format, tuple.type, &gl_buffer[buffer_offset]);
+
+ glPixelStorei(GL_PACK_ROW_LENGTH, 0);
}
RasterizerCacheOpenGL::RasterizerCacheOpenGL() {
read_framebuffer.Create();
draw_framebuffer.Create();
-
- attributeless_vao.Create();
-
- d24s8_abgr_buffer.Create();
- d24s8_abgr_buffer_size = 0;
-
- const char* vs_source = R"(
-#version 330 core
-const vec2 vertices[4] = vec2[4](vec2(-1.0, -1.0), vec2(1.0, -1.0), vec2(-1.0, 1.0), vec2(1.0, 1.0));
-void main() {
- gl_Position = vec4(vertices[gl_VertexID], 0.0, 1.0);
-}
-)";
- const char* fs_source = R"(
-#version 330 core
-
-uniform samplerBuffer tbo;
-uniform vec2 tbo_size;
-uniform vec4 viewport;
-
-out vec4 color;
-
-void main() {
- vec2 tbo_coord = (gl_FragCoord.xy - viewport.xy) * tbo_size / viewport.zw;
- int tbo_offset = int(tbo_coord.y) * int(tbo_size.x) + int(tbo_coord.x);
- color = texelFetch(tbo, tbo_offset).rabg;
-}
-)";
- d24s8_abgr_shader.CreateFromSource(vs_source, nullptr, fs_source);
-
- OpenGLState state = OpenGLState::GetCurState();
- GLuint old_program = state.draw.shader_program;
- state.draw.shader_program = d24s8_abgr_shader.handle;
- state.Apply();
-
- GLint tbo_u_id = glGetUniformLocation(d24s8_abgr_shader.handle, "tbo");
- ASSERT(tbo_u_id != -1);
- glUniform1i(tbo_u_id, 0);
-
- state.draw.shader_program = old_program;
- state.Apply();
-
- d24s8_abgr_tbo_size_u_id = glGetUniformLocation(d24s8_abgr_shader.handle, "tbo_size");
- ASSERT(d24s8_abgr_tbo_size_u_id != -1);
- d24s8_abgr_viewport_u_id = glGetUniformLocation(d24s8_abgr_shader.handle, "viewport");
- ASSERT(d24s8_abgr_viewport_u_id != -1);
}
RasterizerCacheOpenGL::~RasterizerCacheOpenGL() {
- FlushAll();
- while (!surface_cache.empty())
- UnregisterSurface(*surface_cache.begin()->second.begin());
-}
-
-bool RasterizerCacheOpenGL::BlitSurfaces(const Surface& src_surface,
- const MathUtil::Rectangle<u32>& src_rect,
- const Surface& dst_surface,
- const MathUtil::Rectangle<u32>& dst_rect) {
- if (!SurfaceParams::CheckFormatsBlittable(src_surface->pixel_format, dst_surface->pixel_format))
- return false;
-
- return BlitTextures(src_surface->texture.handle, src_rect, dst_surface->texture.handle,
- dst_rect, src_surface->type, read_framebuffer.handle,
- draw_framebuffer.handle);
-}
-
-void RasterizerCacheOpenGL::ConvertD24S8toABGR(GLuint src_tex,
- const MathUtil::Rectangle<u32>& src_rect,
- GLuint dst_tex,
- const MathUtil::Rectangle<u32>& dst_rect) {
- OpenGLState prev_state = OpenGLState::GetCurState();
- SCOPE_EXIT({ prev_state.Apply(); });
-
- OpenGLState state;
- state.draw.read_framebuffer = read_framebuffer.handle;
- state.draw.draw_framebuffer = draw_framebuffer.handle;
- state.Apply();
-
- glBindBuffer(GL_PIXEL_PACK_BUFFER, d24s8_abgr_buffer.handle);
-
- GLsizeiptr target_pbo_size = src_rect.GetWidth() * src_rect.GetHeight() * 4;
- if (target_pbo_size > d24s8_abgr_buffer_size) {
- d24s8_abgr_buffer_size = target_pbo_size * 2;
- glBufferData(GL_PIXEL_PACK_BUFFER, d24s8_abgr_buffer_size, nullptr, GL_STREAM_COPY);
- }
-
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, src_tex,
- 0);
- glReadPixels(static_cast<GLint>(src_rect.left), static_cast<GLint>(src_rect.bottom),
- static_cast<GLsizei>(src_rect.GetWidth()),
- static_cast<GLsizei>(src_rect.GetHeight()), GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8,
- 0);
-
- glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
-
- // PBO now contains src_tex in RABG format
- state.draw.shader_program = d24s8_abgr_shader.handle;
- state.draw.vertex_array = attributeless_vao.handle;
- state.viewport.x = static_cast<GLint>(dst_rect.left);
- state.viewport.y = static_cast<GLint>(dst_rect.bottom);
- state.viewport.width = static_cast<GLsizei>(dst_rect.GetWidth());
- state.viewport.height = static_cast<GLsizei>(dst_rect.GetHeight());
- state.Apply();
-
- OGLTexture tbo;
- tbo.Create();
- glActiveTexture(GL_TEXTURE0);
- glBindTexture(GL_TEXTURE_BUFFER, tbo.handle);
- glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA8, d24s8_abgr_buffer.handle);
-
- glUniform2f(d24s8_abgr_tbo_size_u_id, static_cast<GLfloat>(src_rect.GetWidth()),
- static_cast<GLfloat>(src_rect.GetHeight()));
- glUniform4f(d24s8_abgr_viewport_u_id, static_cast<GLfloat>(state.viewport.x),
- static_cast<GLfloat>(state.viewport.y), static_cast<GLfloat>(state.viewport.width),
- static_cast<GLfloat>(state.viewport.height));
-
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_tex, 0);
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
- glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
-
- glBindTexture(GL_TEXTURE_BUFFER, 0);
-}
-
-Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale,
- bool load_if_create) {
- if (params.addr == 0 || params.height * params.width == 0) {
- return nullptr;
- }
- // Use GetSurfaceSubRect instead
- ASSERT(params.width == params.stride);
-
- ASSERT(!params.is_tiled ||
- (params.GetActualWidth() % 8 == 0 && params.GetActualHeight() % 8 == 0));
-
- // Check for an exact match in existing surfaces
- Surface surface =
- FindMatch<MatchFlags::Exact | MatchFlags::Invalid>(surface_cache, params, match_res_scale);
-
- if (surface == nullptr) {
- u16 target_res_scale = params.res_scale;
- if (match_res_scale != ScaleMatch::Exact) {
- // This surface may have a subrect of another surface with a higher res_scale, find it
- // to adjust our params
- SurfaceParams find_params = params;
- Surface expandable = FindMatch<MatchFlags::Expand | MatchFlags::Invalid>(
- surface_cache, find_params, match_res_scale);
- if (expandable != nullptr && expandable->res_scale > target_res_scale) {
- target_res_scale = expandable->res_scale;
- }
- }
- SurfaceParams new_params = params;
- new_params.res_scale = target_res_scale;
- surface = CreateSurface(new_params);
- RegisterSurface(surface);
- }
-
- if (load_if_create) {
- ValidateSurface(surface, params.addr, params.size);
- }
-
- return surface;
-}
-
-boost::optional<Tegra::GPUVAddr> RasterizerCacheOpenGL::TryFindFramebufferGpuAddress(
- VAddr cpu_addr) const {
- // Tries to find the GPU address of a framebuffer based on the CPU address. This is because
- // final output framebuffers are specified by CPU address, but internally our GPU cache uses GPU
- // addresses. We iterate through all cached framebuffers, and compare their starting CPU address
- // to the one provided. This is obviously not great, and won't work if the framebuffer overlaps
- // surfaces.
-
- std::vector<Tegra::GPUVAddr> gpu_addresses;
- for (const auto& pair : surface_cache) {
- for (const auto& surface : pair.second) {
- const VAddr surface_cpu_addr = surface->GetCpuAddr();
- if (cpu_addr >= surface_cpu_addr && cpu_addr < (surface_cpu_addr + surface->size)) {
- ASSERT_MSG(cpu_addr == surface_cpu_addr, "overlapping surfaces are unsupported");
- gpu_addresses.push_back(surface->addr);
- }
- }
+ while (!surface_cache.empty()) {
+ UnregisterSurface(surface_cache.begin()->second);
}
-
- if (gpu_addresses.empty()) {
- return {};
- }
-
- ASSERT_MSG(gpu_addresses.size() == 1, ">1 surface is unsupported");
- return gpu_addresses[0];
-}
-
-SurfaceRect_Tuple RasterizerCacheOpenGL::GetSurfaceSubRect(const SurfaceParams& params,
- ScaleMatch match_res_scale,
- bool load_if_create) {
- if (params.addr == 0 || params.height * params.width == 0) {
- return std::make_tuple(nullptr, MathUtil::Rectangle<u32>{});
- }
-
- // Attempt to find encompassing surface
- Surface surface = FindMatch<MatchFlags::SubRect | MatchFlags::Invalid>(surface_cache, params,
- match_res_scale);
-
- // Check if FindMatch failed because of res scaling
- // If that's the case create a new surface with
- // the dimensions of the lower res_scale surface
- // to suggest it should not be used again
- if (surface == nullptr && match_res_scale != ScaleMatch::Ignore) {
- surface = FindMatch<MatchFlags::SubRect | MatchFlags::Invalid>(surface_cache, params,
- ScaleMatch::Ignore);
- if (surface != nullptr) {
- ASSERT(surface->res_scale < params.res_scale);
- SurfaceParams new_params = *surface;
- new_params.res_scale = params.res_scale;
-
- surface = CreateSurface(new_params);
- RegisterSurface(surface);
- }
- }
-
- SurfaceParams aligned_params = params;
- if (params.is_tiled) {
- aligned_params.height = Common::AlignUp(params.height, 8);
- aligned_params.width = Common::AlignUp(params.width, 8);
- aligned_params.stride = Common::AlignUp(params.stride, 8);
- aligned_params.UpdateParams();
- }
-
- // Check for a surface we can expand before creating a new one
- if (surface == nullptr) {
- surface = FindMatch<MatchFlags::Expand | MatchFlags::Invalid>(surface_cache, aligned_params,
- match_res_scale);
- if (surface != nullptr) {
- aligned_params.width = aligned_params.stride;
- aligned_params.UpdateParams();
-
- SurfaceParams new_params = *surface;
- new_params.addr = std::min(aligned_params.addr, surface->addr);
- new_params.end = std::max(aligned_params.end, surface->end);
- new_params.size = new_params.end - new_params.addr;
- new_params.height = static_cast<u32>(
- new_params.size / aligned_params.BytesInPixels(aligned_params.stride));
- ASSERT(new_params.size % aligned_params.BytesInPixels(aligned_params.stride) == 0);
-
- Surface new_surface = CreateSurface(new_params);
- DuplicateSurface(surface, new_surface);
-
- // Delete the expanded surface, this can't be done safely yet
- // because it may still be in use
- remove_surfaces.emplace(surface);
-
- surface = new_surface;
- RegisterSurface(new_surface);
- }
- }
-
- // No subrect found - create and return a new surface
- if (surface == nullptr) {
- SurfaceParams new_params = aligned_params;
- // Can't have gaps in a surface
- new_params.width = aligned_params.stride;
- new_params.UpdateParams();
- // GetSurface will create the new surface and possibly adjust res_scale if necessary
- surface = GetSurface(new_params, match_res_scale, load_if_create);
- } else if (load_if_create) {
- ValidateSurface(surface, aligned_params.addr, aligned_params.size);
- }
-
- return std::make_tuple(surface, surface->GetScaledSubRect(params));
}
Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextureInfo& config) {
- auto& gpu = Core::System::GetInstance().GPU();
-
- SurfaceParams params;
- params.addr = config.tic.Address();
- params.is_tiled = config.tic.IsTiled();
- params.pixel_format = SurfaceParams::PixelFormatFromTextureFormat(config.tic.format);
-
- params.width = Common::AlignUp(config.tic.Width(), params.GetCompresssionFactor()) /
- params.GetCompresssionFactor();
- params.height = Common::AlignUp(config.tic.Height(), params.GetCompresssionFactor()) /
- params.GetCompresssionFactor();
-
- // TODO(Subv): Different types per component are not supported.
- ASSERT(config.tic.r_type.Value() == config.tic.g_type.Value() &&
- config.tic.r_type.Value() == config.tic.b_type.Value() &&
- config.tic.r_type.Value() == config.tic.a_type.Value());
-
- params.component_type = SurfaceParams::ComponentTypeFromTexture(config.tic.r_type.Value());
-
- if (config.tic.IsTiled()) {
- params.block_height = config.tic.BlockHeight();
- params.width = Common::AlignUp(params.width, params.block_height);
- params.height = Common::AlignUp(params.height, params.block_height);
- } else {
- // Use the texture-provided stride value if the texture isn't tiled.
- params.stride = static_cast<u32>(params.PixelsInBytes(config.tic.Pitch()));
- }
-
- params.UpdateParams();
-
- if (params.GetActualWidth() % 8 != 0 || params.GetActualHeight() % 8 != 0 ||
- params.stride != params.width) {
- Surface src_surface;
- MathUtil::Rectangle<u32> rect;
- std::tie(src_surface, rect) = GetSurfaceSubRect(params, ScaleMatch::Ignore, true);
-
- rect = rect.Scale(params.GetCompresssionFactor());
-
- params.res_scale = src_surface->res_scale;
- Surface tmp_surface = CreateSurface(params);
-
- auto dst_rect = tmp_surface->GetScaledRect().Scale(params.GetCompresssionFactor());
- BlitTextures(src_surface->texture.handle, rect, tmp_surface->texture.handle, dst_rect,
- SurfaceParams::GetFormatType(params.pixel_format), read_framebuffer.handle,
- draw_framebuffer.handle);
-
- remove_surfaces.emplace(tmp_surface);
- return tmp_surface;
- }
-
- return GetSurface(params, ScaleMatch::Ignore, true);
+ return GetSurface(SurfaceParams::CreateForTexture(config));
}
SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces(
bool using_color_fb, bool using_depth_fb, const MathUtil::Rectangle<s32>& viewport) {
const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
- const auto& config = regs.rt[0];
// TODO(bunnei): This is hard corded to use just the first render buffer
- NGLOG_WARNING(Render_OpenGL, "hard-coded for render target 0!");
-
- // update resolution_scale_factor and reset cache if changed
- // TODO (bunnei): This code was ported as-is from Citra, and is technically not thread-safe. We
- // need to fix this before making the renderer multi-threaded.
- static u16 resolution_scale_factor = GetResolutionScaleFactor();
- if (resolution_scale_factor != GetResolutionScaleFactor()) {
- resolution_scale_factor = GetResolutionScaleFactor();
- FlushAll();
- while (!surface_cache.empty())
- UnregisterSurface(*surface_cache.begin()->second.begin());
- }
-
- MathUtil::Rectangle<u32> viewport_clamped{
- static_cast<u32>(std::clamp(viewport.left, 0, static_cast<s32>(config.width))),
- static_cast<u32>(std::clamp(viewport.top, 0, static_cast<s32>(config.height))),
- static_cast<u32>(std::clamp(viewport.right, 0, static_cast<s32>(config.width))),
- static_cast<u32>(std::clamp(viewport.bottom, 0, static_cast<s32>(config.height)))};
+ LOG_WARNING(Render_OpenGL, "hard-coded for render target 0!");
// get color and depth surfaces
- SurfaceParams color_params;
- color_params.is_tiled = true;
- color_params.res_scale = resolution_scale_factor;
- color_params.width = config.width;
- color_params.height = config.height;
- // TODO(Subv): Can framebuffers use a different block height?
- color_params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight;
- SurfaceParams depth_params = color_params;
-
- color_params.addr = config.Address();
- color_params.pixel_format = SurfaceParams::PixelFormatFromRenderTargetFormat(config.format);
- color_params.component_type = SurfaceParams::ComponentTypeFromRenderTarget(config.format);
- color_params.UpdateParams();
-
- ASSERT_MSG(!using_depth_fb, "depth buffer is unimplemented");
- // depth_params.addr = config.GetDepthBufferPhysicalAddress();
- // depth_params.pixel_format = SurfaceParams::PixelFormatFromDepthFormat(config.depth_format);
- // depth_params.UpdateParams();
-
- auto color_vp_interval = color_params.GetSubRectInterval(viewport_clamped);
- auto depth_vp_interval = depth_params.GetSubRectInterval(viewport_clamped);
-
- // Make sure that framebuffers don't overlap if both color and depth are being used
- if (using_color_fb && using_depth_fb &&
- boost::icl::length(color_vp_interval & depth_vp_interval)) {
- NGLOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; "
- "overlapping framebuffers not supported!");
- using_depth_fb = false;
+ SurfaceParams color_params{};
+ SurfaceParams depth_params{};
+
+ if (using_color_fb) {
+ color_params = SurfaceParams::CreateForFramebuffer(regs.rt[0]);
+ }
+
+ if (using_depth_fb) {
+ depth_params =
+ SurfaceParams::CreateForDepthBuffer(regs.rt[0], regs.zeta.Address(), regs.zeta.format);
}
MathUtil::Rectangle<u32> color_rect{};
- Surface color_surface = nullptr;
- if (using_color_fb)
- std::tie(color_surface, color_rect) =
- GetSurfaceSubRect(color_params, ScaleMatch::Exact, false);
+ Surface color_surface;
+ if (using_color_fb) {
+ color_surface = GetSurface(color_params);
+ if (color_surface) {
+ color_rect = color_surface->GetSurfaceParams().GetRect();
+ }
+ }
MathUtil::Rectangle<u32> depth_rect{};
- Surface depth_surface = nullptr;
- if (using_depth_fb)
- std::tie(depth_surface, depth_rect) =
- GetSurfaceSubRect(depth_params, ScaleMatch::Exact, false);
+ Surface depth_surface;
+ if (using_depth_fb) {
+ depth_surface = GetSurface(depth_params);
+ if (depth_surface) {
+ depth_rect = depth_surface->GetSurfaceParams().GetRect();
+ }
+ }
MathUtil::Rectangle<u32> fb_rect{};
- if (color_surface != nullptr && depth_surface != nullptr) {
+ if (color_surface && depth_surface) {
fb_rect = color_rect;
// Color and Depth surfaces must have the same dimensions and offsets
if (color_rect.bottom != depth_rect.bottom || color_rect.top != depth_rect.top ||
color_rect.left != depth_rect.left || color_rect.right != depth_rect.right) {
- color_surface = GetSurface(color_params, ScaleMatch::Exact, false);
- depth_surface = GetSurface(depth_params, ScaleMatch::Exact, false);
- fb_rect = color_surface->GetScaledRect();
+ color_surface = GetSurface(color_params);
+ depth_surface = GetSurface(depth_params);
+ fb_rect = color_surface->GetSurfaceParams().GetRect();
}
- } else if (color_surface != nullptr) {
+ } else if (color_surface) {
fb_rect = color_rect;
- } else if (depth_surface != nullptr) {
+ } else if (depth_surface) {
fb_rect = depth_rect;
}
- if (color_surface != nullptr) {
- ValidateSurface(color_surface, boost::icl::first(color_vp_interval),
- boost::icl::length(color_vp_interval));
- }
- if (depth_surface != nullptr) {
- ValidateSurface(depth_surface, boost::icl::first(depth_vp_interval),
- boost::icl::length(depth_vp_interval));
- }
-
return std::make_tuple(color_surface, depth_surface, fb_rect);
}
-Surface RasterizerCacheOpenGL::GetFillSurface(const void* config) {
- UNREACHABLE();
- return {};
+void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) {
+ surface->LoadGLBuffer();
+ surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle);
}
-SurfaceRect_Tuple RasterizerCacheOpenGL::GetTexCopySurface(const SurfaceParams& params) {
- MathUtil::Rectangle<u32> rect{};
+void RasterizerCacheOpenGL::MarkSurfaceAsDirty(const Surface& surface) {
+ if (Settings::values.use_accurate_framebuffers) {
+ // If enabled, always flush dirty surfaces
+ surface->DownloadGLTexture(read_framebuffer.handle, draw_framebuffer.handle);
+ surface->FlushGLBuffer();
+ } else {
+ // Otherwise, don't mark surfaces that we write to as cached, because the resulting loads
+ // and flushes are very slow and do not seem to improve accuracy
+ const auto& params{surface->GetSurfaceParams()};
+ Memory::RasterizerMarkRegionCached(params.addr, params.size_in_bytes, false);
+ }
+}
- Surface match_surface = FindMatch<MatchFlags::TexCopy | MatchFlags::Invalid>(
- surface_cache, params, ScaleMatch::Ignore);
+Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params) {
+ if (params.addr == 0 || params.height * params.width == 0) {
+ return {};
+ }
- if (match_surface != nullptr) {
- ValidateSurface(match_surface, params.addr, params.size);
+ const auto& gpu = Core::System::GetInstance().GPU();
+ // Don't try to create any entries in the cache if the address of the texture is invalid.
+ if (gpu.memory_manager->GpuToCpuAddress(params.addr) == boost::none)
+ return {};
- SurfaceParams match_subrect;
- if (params.width != params.stride) {
- const u32 tiled_size = match_surface->is_tiled ? 8 : 1;
- match_subrect = params;
- match_subrect.width =
- static_cast<u32>(match_surface->PixelsInBytes(params.width) / tiled_size);
- match_subrect.stride =
- static_cast<u32>(match_surface->PixelsInBytes(params.stride) / tiled_size);
- match_subrect.height *= tiled_size;
- } else {
- match_subrect = match_surface->FromInterval(params.GetInterval());
- ASSERT(match_subrect.GetInterval() == params.GetInterval());
+ // Check for an exact match in existing surfaces
+ const auto& surface_key{SurfaceKey::Create(params)};
+ const auto& search{surface_cache.find(surface_key)};
+ Surface surface;
+ if (search != surface_cache.end()) {
+ surface = search->second;
+ if (Settings::values.use_accurate_framebuffers) {
+ // Reload the surface from Switch memory
+ LoadSurface(surface);
}
-
- rect = match_surface->GetScaledSubRect(match_subrect);
+ } else {
+ surface = std::make_shared<CachedSurface>(params);
+ RegisterSurface(surface);
+ LoadSurface(surface);
}
- return std::make_tuple(match_surface, rect);
+ return surface;
}
-void RasterizerCacheOpenGL::DuplicateSurface(const Surface& src_surface,
- const Surface& dest_surface) {
- ASSERT(dest_surface->addr <= src_surface->addr && dest_surface->end >= src_surface->end);
-
- BlitSurfaces(src_surface, src_surface->GetScaledRect(), dest_surface,
- dest_surface->GetScaledSubRect(*src_surface));
-
- dest_surface->invalid_regions -= src_surface->GetInterval();
- dest_surface->invalid_regions += src_surface->invalid_regions;
-
- SurfaceRegions regions;
- for (auto& pair : RangeFromInterval(dirty_regions, src_surface->GetInterval())) {
- if (pair.second == src_surface) {
- regions += pair.first;
+Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(VAddr cpu_addr) const {
+ // Tries to find the GPU address of a framebuffer based on the CPU address. This is because
+ // final output framebuffers are specified by CPU address, but internally our GPU cache uses
+ // GPU addresses. We iterate through all cached framebuffers, and compare their starting CPU
+ // address to the one provided. This is obviously not great, and won't work if the
+ // framebuffer overlaps surfaces.
+
+ std::vector<Surface> surfaces;
+ for (const auto& surface : surface_cache) {
+ const auto& params = surface.second->GetSurfaceParams();
+ const VAddr surface_cpu_addr = params.GetCpuAddr();
+ if (cpu_addr >= surface_cpu_addr && cpu_addr < (surface_cpu_addr + params.size_in_bytes)) {
+ ASSERT_MSG(cpu_addr == surface_cpu_addr, "overlapping surfaces are unsupported");
+ surfaces.push_back(surface.second);
}
}
- for (auto& interval : regions) {
- dirty_regions.set({interval, dest_surface});
- }
-}
-void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, Tegra::GPUVAddr addr,
- u64 size) {
- if (size == 0)
- return;
-
- const SurfaceInterval validate_interval(addr, addr + size);
-
- if (surface->type == SurfaceType::Fill) {
- // Sanity check, fill surfaces will always be valid when used
- ASSERT(surface->IsRegionValid(validate_interval));
- return;
+ if (surfaces.empty()) {
+ return {};
}
- while (true) {
- const auto it = surface->invalid_regions.find(validate_interval);
- if (it == surface->invalid_regions.end())
- break;
-
- const auto interval = *it & validate_interval;
- // Look for a valid surface to copy from
- SurfaceParams params = surface->FromInterval(interval);
-
- Surface copy_surface =
- FindMatch<MatchFlags::Copy>(surface_cache, params, ScaleMatch::Ignore, interval);
- if (copy_surface != nullptr) {
- SurfaceInterval copy_interval = params.GetCopyableInterval(copy_surface);
- CopySurface(copy_surface, surface, copy_interval);
- surface->invalid_regions.erase(copy_interval);
- continue;
- }
+ ASSERT_MSG(surfaces.size() == 1, ">1 surface is unsupported");
- // Load data from Switch memory
- FlushRegion(params.addr, params.size);
- surface->LoadGLBuffer(params.addr, params.end);
- surface->UploadGLTexture(surface->GetSubRect(params), read_framebuffer.handle,
- draw_framebuffer.handle);
- surface->invalid_regions.erase(params.GetInterval());
- }
+ return surfaces[0];
}
-void RasterizerCacheOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size, Surface flush_surface) {
- if (size == 0)
- return;
-
- const SurfaceInterval flush_interval(addr, addr + size);
- SurfaceRegions flushed_intervals;
-
- for (auto& pair : RangeFromInterval(dirty_regions, flush_interval)) {
- // small sizes imply that this most likely comes from the cpu, flush the entire region
- // the point is to avoid thousands of small writes every frame if the cpu decides to access
- // that region, anything higher than 8 you're guaranteed it comes from a service
- const auto interval = size <= 8 ? pair.first : pair.first & flush_interval;
- auto& surface = pair.second;
-
- if (flush_surface != nullptr && surface != flush_surface)
- continue;
+void RasterizerCacheOpenGL::FlushRegion(Tegra::GPUVAddr /*addr*/, size_t /*size*/) {
+ // TODO(bunnei): This is unused in the current implementation of the rasterizer cache. We should
+ // probably implement this in the future, but for now, the `use_accurate_framebufers` setting
+ // can be used to always flush.
+}
- // Sanity check, this surface is the last one that marked this region dirty
- ASSERT(surface->IsRegionValid(interval));
+void RasterizerCacheOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, size_t size) {
+ for (const auto& pair : surface_cache) {
+ const auto& surface{pair.second};
+ const auto& params{surface->GetSurfaceParams()};
- if (surface->type != SurfaceType::Fill) {
- SurfaceParams params = surface->FromInterval(interval);
- surface->DownloadGLTexture(surface->GetSubRect(params), read_framebuffer.handle,
- draw_framebuffer.handle);
+ if (params.IsOverlappingRegion(addr, size)) {
+ UnregisterSurface(surface);
}
- surface->FlushGLBuffer(boost::icl::first(interval), boost::icl::last_next(interval));
- flushed_intervals += interval;
}
- // Reset dirty regions
- dirty_regions -= flushed_intervals;
}
-void RasterizerCacheOpenGL::FlushAll() {
- FlushRegion(0, Kernel::VMManager::MAX_ADDRESS);
-}
+void RasterizerCacheOpenGL::RegisterSurface(const Surface& surface) {
+ const auto& params{surface->GetSurfaceParams()};
+ const auto& surface_key{SurfaceKey::Create(params)};
+ const auto& search{surface_cache.find(surface_key)};
-void RasterizerCacheOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, u64 size,
- const Surface& region_owner) {
- if (size == 0)
+ if (search != surface_cache.end()) {
+ // Registered already
return;
-
- const SurfaceInterval invalid_interval(addr, addr + size);
-
- if (region_owner != nullptr) {
- ASSERT(addr >= region_owner->addr && addr + size <= region_owner->end);
- // Surfaces can't have a gap
- ASSERT(region_owner->width == region_owner->stride);
- region_owner->invalid_regions.erase(invalid_interval);
- }
-
- for (auto& pair : RangeFromInterval(surface_cache, invalid_interval)) {
- for (auto& cached_surface : pair.second) {
- if (cached_surface == region_owner)
- continue;
-
- // If cpu is invalidating this region we want to remove it
- // to (likely) mark the memory pages as uncached
- if (region_owner == nullptr && size <= 8) {
- FlushRegion(cached_surface->addr, cached_surface->size, cached_surface);
- remove_surfaces.emplace(cached_surface);
- continue;
- }
-
- const auto interval = cached_surface->GetInterval() & invalid_interval;
- cached_surface->invalid_regions.insert(interval);
-
- // Remove only "empty" fill surfaces to avoid destroying and recreating OGL textures
- if (cached_surface->type == SurfaceType::Fill &&
- cached_surface->IsSurfaceFullyInvalid()) {
- remove_surfaces.emplace(cached_surface);
- }
- }
}
- if (region_owner != nullptr)
- dirty_regions.set({invalid_interval, region_owner});
- else
- dirty_regions.erase(invalid_interval);
-
- for (auto& remove_surface : remove_surfaces) {
- if (remove_surface == region_owner) {
- Surface expanded_surface = FindMatch<MatchFlags::SubRect | MatchFlags::Invalid>(
- surface_cache, *region_owner, ScaleMatch::Ignore);
- ASSERT(expanded_surface);
-
- if ((region_owner->invalid_regions - expanded_surface->invalid_regions).empty()) {
- DuplicateSurface(region_owner, expanded_surface);
- } else {
- continue;
- }
- }
- UnregisterSurface(remove_surface);
- }
-
- remove_surfaces.clear();
+ surface_cache[surface_key] = surface;
+ UpdatePagesCachedCount(params.addr, params.size_in_bytes, 1);
}
-Surface RasterizerCacheOpenGL::CreateSurface(const SurfaceParams& params) {
- Surface surface = std::make_shared<CachedSurface>();
- static_cast<SurfaceParams&>(*surface) = params;
-
- surface->texture.Create();
-
- surface->gl_buffer_size = 0;
- surface->invalid_regions.insert(surface->GetInterval());
- AllocateSurfaceTexture(surface->texture.handle,
- GetFormatTuple(surface->pixel_format, surface->component_type),
- surface->GetScaledWidth(), surface->GetScaledHeight());
-
- return surface;
-}
+void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) {
+ const auto& params{surface->GetSurfaceParams()};
+ const auto& surface_key{SurfaceKey::Create(params)};
+ const auto& search{surface_cache.find(surface_key)};
-void RasterizerCacheOpenGL::RegisterSurface(const Surface& surface) {
- if (surface->registered) {
+ if (search == surface_cache.end()) {
+ // Unregistered already
return;
}
- surface->registered = true;
- surface_cache.add({surface->GetInterval(), SurfaceSet{surface}});
- UpdatePagesCachedCount(surface->addr, surface->size, 1);
+
+ UpdatePagesCachedCount(params.addr, params.size_in_bytes, -1);
+ surface_cache.erase(search);
}
-void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) {
- if (!surface->registered) {
- return;
- }
- surface->registered = false;
- UpdatePagesCachedCount(surface->addr, surface->size, -1);
- surface_cache.subtract({surface->GetInterval(), SurfaceSet{surface}});
+template <typename Map, typename Interval>
+constexpr auto RangeFromInterval(Map& map, const Interval& interval) {
+ return boost::make_iterator_range(map.equal_range(interval));
}
void RasterizerCacheOpenGL::UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 0f43e863d..1bedae992 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -1,57 +1,26 @@
-// Copyright 2015 Citra Emulator Project
+// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
+#include <map>
#include <memory>
-#include <set>
-#include <tuple>
-#ifdef __GNUC__
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wunused-local-typedefs"
-#endif
+#include <vector>
#include <boost/icl/interval_map.hpp>
-#include <boost/icl/interval_set.hpp>
-#ifdef __GNUC__
-#pragma GCC diagnostic pop
-#endif
-#include <boost/optional.hpp>
-#include <glad/glad.h>
-#include "common/assert.h"
-#include "common/common_funcs.h"
#include "common/common_types.h"
+#include "common/hash.h"
#include "common/math_util.h"
-#include "video_core/gpu.h"
-#include "video_core/memory_manager.h"
+#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/textures/texture.h"
-struct CachedSurface;
+class CachedSurface;
using Surface = std::shared_ptr<CachedSurface>;
-using SurfaceSet = std::set<Surface>;
-
-using SurfaceRegions = boost::icl::interval_set<Tegra::GPUVAddr>;
-using SurfaceMap = boost::icl::interval_map<Tegra::GPUVAddr, Surface>;
-using SurfaceCache = boost::icl::interval_map<Tegra::GPUVAddr, SurfaceSet>;
-
-using SurfaceInterval = SurfaceCache::interval_type;
-static_assert(std::is_same<SurfaceRegions::interval_type, SurfaceCache::interval_type>() &&
- std::is_same<SurfaceMap::interval_type, SurfaceCache::interval_type>(),
- "incorrect interval types");
-
-using SurfaceRect_Tuple = std::tuple<Surface, MathUtil::Rectangle<u32>>;
using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, MathUtil::Rectangle<u32>>;
-
using PageMap = boost::icl::interval_map<u64, int>;
-enum class ScaleMatch {
- Exact, // only accept same res scale
- Upscale, // only allow higher scale than params
- Ignore // accept every scaled res
-};
-
struct SurfaceParams {
enum class PixelFormat {
ABGR8 = 0,
@@ -61,12 +30,24 @@ struct SurfaceParams {
R8 = 4,
RGBA16F = 5,
R11FG11FB10F = 6,
- DXT1 = 7,
- DXT23 = 8,
- DXT45 = 9,
- DXN1 = 10, // This is also known as BC4
+ RGBA32UI = 7,
+ DXT1 = 8,
+ DXT23 = 9,
+ DXT45 = 10,
+ DXN1 = 11, // This is also known as BC4
+ BC7U = 12,
+ ASTC_2D_4X4 = 13,
- Max,
+ MaxColorFormat,
+
+ // DepthStencil formats
+ Z24S8 = 14,
+ S8Z24 = 15,
+ Z32F = 16,
+
+ MaxDepthStencilFormat,
+
+ Max = MaxDepthStencilFormat,
Invalid = 255,
};
@@ -92,10 +73,10 @@ struct SurfaceParams {
/**
* Gets the compression factor for the specified PixelFormat. This applies to just the
* "compressed width" and "compressed height", not the overall compression factor of a
- * compressed image. This is used for maintaining proper surface sizes for compressed texture
- * formats.
+ * compressed image. This is used for maintaining proper surface sizes for compressed
+ * texture formats.
*/
- static constexpr u32 GetCompresssionFactor(PixelFormat format) {
+ static constexpr u32 GetCompressionFactor(PixelFormat format) {
if (format == PixelFormat::Invalid)
return 0;
@@ -107,18 +88,21 @@ struct SurfaceParams {
1, // R8
1, // RGBA16F
1, // R11FG11FB10F
+ 1, // RGBA32UI
4, // DXT1
4, // DXT23
4, // DXT45
4, // DXN1
+ 4, // BC7U
+ 4, // ASTC_2D_4X4
+ 1, // Z24S8
+ 1, // S8Z24
+ 1, // Z32F
}};
ASSERT(static_cast<size_t>(format) < compression_factor_table.size());
return compression_factor_table[static_cast<size_t>(format)];
}
- u32 GetCompresssionFactor() const {
- return GetCompresssionFactor(pixel_format);
- }
static constexpr u32 GetFormatBpp(PixelFormat format) {
if (format == PixelFormat::Invalid)
@@ -132,10 +116,16 @@ struct SurfaceParams {
8, // R8
64, // RGBA16F
32, // R11FG11FB10F
+ 128, // RGBA32UI
64, // DXT1
128, // DXT23
128, // DXT45
64, // DXN1
+ 128, // BC7U
+ 32, // ASTC_2D_4X4
+ 32, // Z24S8
+ 32, // S8Z24
+ 32, // Z32F
}};
ASSERT(static_cast<size_t>(format) < bpp_table.size());
@@ -145,6 +135,20 @@ struct SurfaceParams {
return GetFormatBpp(pixel_format);
}
+ static PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format) {
+ switch (format) {
+ case Tegra::DepthFormat::S8_Z24_UNORM:
+ return PixelFormat::S8Z24;
+ case Tegra::DepthFormat::Z24_S8_UNORM:
+ return PixelFormat::Z24S8;
+ case Tegra::DepthFormat::Z32_FLOAT:
+ return PixelFormat::Z32F;
+ default:
+ LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
+ UNREACHABLE();
+ }
+ }
+
static PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) {
switch (format) {
case Tegra::RenderTargetFormat::RGBA8_UNORM:
@@ -156,18 +160,10 @@ struct SurfaceParams {
return PixelFormat::RGBA16F;
case Tegra::RenderTargetFormat::R11G11B10_FLOAT:
return PixelFormat::R11FG11FB10F;
+ case Tegra::RenderTargetFormat::RGBA32_UINT:
+ return PixelFormat::RGBA32UI;
default:
- NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
- UNREACHABLE();
- }
- }
-
- static PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format) {
- switch (format) {
- case Tegra::FramebufferConfig::PixelFormat::ABGR8:
- return PixelFormat::ABGR8;
- default:
- NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
+ LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
UNREACHABLE();
}
}
@@ -189,6 +185,8 @@ struct SurfaceParams {
return PixelFormat::RGBA16F;
case Tegra::Texture::TextureFormat::BF10GF11RF11:
return PixelFormat::R11FG11FB10F;
+ case Tegra::Texture::TextureFormat::R32_G32_B32_A32:
+ return PixelFormat::RGBA32UI;
case Tegra::Texture::TextureFormat::DXT1:
return PixelFormat::DXT1;
case Tegra::Texture::TextureFormat::DXT23:
@@ -197,8 +195,12 @@ struct SurfaceParams {
return PixelFormat::DXT45;
case Tegra::Texture::TextureFormat::DXN1:
return PixelFormat::DXN1;
+ case Tegra::Texture::TextureFormat::BC7U:
+ return PixelFormat::BC7U;
+ case Tegra::Texture::TextureFormat::ASTC_2D_4X4:
+ return PixelFormat::ASTC_2D_4X4;
default:
- NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
+ LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
UNREACHABLE();
}
}
@@ -220,6 +222,8 @@ struct SurfaceParams {
return Tegra::Texture::TextureFormat::R16_G16_B16_A16;
case PixelFormat::R11FG11FB10F:
return Tegra::Texture::TextureFormat::BF10GF11RF11;
+ case PixelFormat::RGBA32UI:
+ return Tegra::Texture::TextureFormat::R32_G32_B32_A32;
case PixelFormat::DXT1:
return Tegra::Texture::TextureFormat::DXT1;
case PixelFormat::DXT23:
@@ -228,6 +232,23 @@ struct SurfaceParams {
return Tegra::Texture::TextureFormat::DXT45;
case PixelFormat::DXN1:
return Tegra::Texture::TextureFormat::DXN1;
+ case PixelFormat::BC7U:
+ return Tegra::Texture::TextureFormat::BC7U;
+ case PixelFormat::ASTC_2D_4X4:
+ return Tegra::Texture::TextureFormat::ASTC_2D_4X4;
+ default:
+ UNREACHABLE();
+ }
+ }
+
+ static Tegra::DepthFormat DepthFormatFromPixelFormat(PixelFormat format) {
+ switch (format) {
+ case PixelFormat::S8Z24:
+ return Tegra::DepthFormat::S8_Z24_UNORM;
+ case PixelFormat::Z24S8:
+ return Tegra::DepthFormat::Z24_S8_UNORM;
+ case PixelFormat::Z32F:
+ return Tegra::DepthFormat::Z32_FLOAT;
default:
UNREACHABLE();
}
@@ -239,7 +260,7 @@ struct SurfaceParams {
case Tegra::Texture::ComponentType::UNORM:
return ComponentType::UNorm;
default:
- NGLOG_CRITICAL(HW_GPU, "Unimplemented component type={}", static_cast<u32>(type));
+ LOG_CRITICAL(HW_GPU, "Unimplemented component type={}", static_cast<u32>(type));
UNREACHABLE();
}
}
@@ -254,215 +275,153 @@ struct SurfaceParams {
case Tegra::RenderTargetFormat::RGBA16_FLOAT:
case Tegra::RenderTargetFormat::R11G11B10_FLOAT:
return ComponentType::Float;
+ case Tegra::RenderTargetFormat::RGBA32_UINT:
+ return ComponentType::UInt;
default:
- NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
+ LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
UNREACHABLE();
}
}
- static ComponentType ComponentTypeFromGPUPixelFormat(
- Tegra::FramebufferConfig::PixelFormat format) {
+ static PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format) {
switch (format) {
case Tegra::FramebufferConfig::PixelFormat::ABGR8:
- return ComponentType::UNorm;
+ return PixelFormat::ABGR8;
default:
- NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
+ LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
UNREACHABLE();
}
}
- static bool CheckFormatsBlittable(PixelFormat pixel_format_a, PixelFormat pixel_format_b) {
- SurfaceType a_type = GetFormatType(pixel_format_a);
- SurfaceType b_type = GetFormatType(pixel_format_b);
-
- if (a_type == SurfaceType::ColorTexture && b_type == SurfaceType::ColorTexture) {
- return true;
- }
-
- if (a_type == SurfaceType::Depth && b_type == SurfaceType::Depth) {
- return true;
- }
-
- if (a_type == SurfaceType::DepthStencil && b_type == SurfaceType::DepthStencil) {
- return true;
+ static ComponentType ComponentTypeFromDepthFormat(Tegra::DepthFormat format) {
+ switch (format) {
+ case Tegra::DepthFormat::S8_Z24_UNORM:
+ case Tegra::DepthFormat::Z24_S8_UNORM:
+ return ComponentType::UNorm;
+ case Tegra::DepthFormat::Z32_FLOAT:
+ return ComponentType::Float;
+ default:
+ LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
+ UNREACHABLE();
}
-
- return false;
}
static SurfaceType GetFormatType(PixelFormat pixel_format) {
- if (static_cast<size_t>(pixel_format) < MaxPixelFormat) {
+ if (static_cast<size_t>(pixel_format) < static_cast<size_t>(PixelFormat::MaxColorFormat)) {
return SurfaceType::ColorTexture;
}
+ if (static_cast<size_t>(pixel_format) <
+ static_cast<size_t>(PixelFormat::MaxDepthStencilFormat)) {
+ return SurfaceType::DepthStencil;
+ }
+
// TODO(Subv): Implement the other formats
ASSERT(false);
return SurfaceType::Invalid;
}
- /// Update the params "size", "end" and "type" from the already set "addr", "width", "height"
- /// and "pixel_format"
- void UpdateParams() {
- if (stride == 0) {
- stride = width;
- }
- type = GetFormatType(pixel_format);
- size = !is_tiled ? BytesInPixels(stride * (height - 1) + width)
- : BytesInPixels(stride * 8 * (height / 8 - 1) + width * 8);
- end = addr + size;
- }
-
- SurfaceInterval GetInterval() const {
- return SurfaceInterval::right_open(addr, end);
- }
-
- // Returns the outer rectangle containing "interval"
- SurfaceParams FromInterval(SurfaceInterval interval) const;
-
- SurfaceInterval GetSubRectInterval(MathUtil::Rectangle<u32> unscaled_rect) const;
-
- // Returns the region of the biggest valid rectange within interval
- SurfaceInterval GetCopyableInterval(const Surface& src_surface) const;
-
- /**
- * Gets the actual width (in pixels) of the surface. This is provided because `width` is used
- * for tracking the surface region in memory, which may be compressed for certain formats. In
- * this scenario, `width` is actually the compressed width.
- */
- u32 GetActualWidth() const {
- return width * GetCompresssionFactor();
- }
-
- /**
- * Gets the actual height (in pixels) of the surface. This is provided because `height` is used
- * for tracking the surface region in memory, which may be compressed for certain formats. In
- * this scenario, `height` is actually the compressed height.
- */
- u32 GetActualHeight() const {
- return height * GetCompresssionFactor();
- }
+ /// Returns the rectangle corresponding to this surface
+ MathUtil::Rectangle<u32> GetRect() const;
- u32 GetScaledWidth() const {
- return width * res_scale;
+ /// Returns the size of this surface in bytes, adjusted for compression
+ size_t SizeInBytes() const {
+ const u32 compression_factor{GetCompressionFactor(pixel_format)};
+ ASSERT(width % compression_factor == 0);
+ ASSERT(height % compression_factor == 0);
+ return (width / compression_factor) * (height / compression_factor) *
+ GetFormatBpp(pixel_format) / CHAR_BIT;
}
- u32 GetScaledHeight() const {
- return height * res_scale;
- }
+ /// Returns the CPU virtual address for this surface
+ VAddr GetCpuAddr() const;
- MathUtil::Rectangle<u32> GetRect() const {
- return {0, height, width, 0};
+ /// Returns true if the specified region overlaps with this surface's region in Switch memory
+ bool IsOverlappingRegion(Tegra::GPUVAddr region_addr, size_t region_size) const {
+ return addr <= (region_addr + region_size) && region_addr <= (addr + size_in_bytes);
}
- MathUtil::Rectangle<u32> GetScaledRect() const {
- return {0, GetScaledHeight(), GetScaledWidth(), 0};
- }
+ /// Creates SurfaceParams from a texture configuration
+ static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config);
+
+ /// Creates SurfaceParams from a framebuffer configuration
+ static SurfaceParams CreateForFramebuffer(
+ const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config);
+
+ /// Creates SurfaceParams for a depth buffer configuration
+ static SurfaceParams CreateForDepthBuffer(
+ const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config,
+ Tegra::GPUVAddr zeta_address, Tegra::DepthFormat format);
+
+ Tegra::GPUVAddr addr;
+ bool is_tiled;
+ u32 block_height;
+ PixelFormat pixel_format;
+ ComponentType component_type;
+ SurfaceType type;
+ u32 width;
+ u32 height;
+ u32 unaligned_height;
+ size_t size_in_bytes;
+};
- u64 PixelsInBytes(u64 size) const {
- return size * CHAR_BIT / GetFormatBpp(pixel_format);
+/// Hashable variation of SurfaceParams, used for a key in the surface cache
+struct SurfaceKey : Common::HashableStruct<SurfaceParams> {
+ static SurfaceKey Create(const SurfaceParams& params) {
+ SurfaceKey res;
+ res.state = params;
+ return res;
}
+};
- u64 BytesInPixels(u64 pixels) const {
- return pixels * GetFormatBpp(pixel_format) / CHAR_BIT;
+namespace std {
+template <>
+struct hash<SurfaceKey> {
+ size_t operator()(const SurfaceKey& k) const {
+ return k.Hash();
}
-
- VAddr GetCpuAddr() const;
-
- bool ExactMatch(const SurfaceParams& other_surface) const;
- bool CanSubRect(const SurfaceParams& sub_surface) const;
- bool CanExpand(const SurfaceParams& expanded_surface) const;
- bool CanTexCopy(const SurfaceParams& texcopy_params) const;
-
- MathUtil::Rectangle<u32> GetSubRect(const SurfaceParams& sub_surface) const;
- MathUtil::Rectangle<u32> GetScaledSubRect(const SurfaceParams& sub_surface) const;
-
- Tegra::GPUVAddr addr = 0;
- Tegra::GPUVAddr end = 0;
- boost::optional<VAddr> cpu_addr;
- u64 size = 0;
-
- u32 width = 0;
- u32 height = 0;
- u32 stride = 0;
- u32 block_height = 0;
- u16 res_scale = 1;
-
- bool is_tiled = false;
- PixelFormat pixel_format = PixelFormat::Invalid;
- SurfaceType type = SurfaceType::Invalid;
- ComponentType component_type = ComponentType::Invalid;
};
+} // namespace std
-struct CachedSurface : SurfaceParams {
- bool CanFill(const SurfaceParams& dest_surface, SurfaceInterval fill_interval) const;
- bool CanCopy(const SurfaceParams& dest_surface, SurfaceInterval copy_interval) const;
-
- bool IsRegionValid(SurfaceInterval interval) const {
- return (invalid_regions.find(interval) == invalid_regions.end());
- }
+class CachedSurface final {
+public:
+ CachedSurface(const SurfaceParams& params);
- bool IsSurfaceFullyInvalid() const {
- return (invalid_regions & GetInterval()) == SurfaceRegions(GetInterval());
+ const OGLTexture& Texture() const {
+ return texture;
}
- bool registered = false;
- SurfaceRegions invalid_regions;
-
- u64 fill_size = 0; /// Number of bytes to read from fill_data
- std::array<u8, 4> fill_data;
-
- OGLTexture texture;
-
- static constexpr unsigned int GetGLBytesPerPixel(PixelFormat format) {
- if (format == PixelFormat::Invalid)
+ static constexpr unsigned int GetGLBytesPerPixel(SurfaceParams::PixelFormat format) {
+ if (format == SurfaceParams::PixelFormat::Invalid)
return 0;
return SurfaceParams::GetFormatBpp(format) / CHAR_BIT;
}
- std::unique_ptr<u8[]> gl_buffer;
- size_t gl_buffer_size = 0;
+ const SurfaceParams& GetSurfaceParams() const {
+ return params;
+ }
// Read/Write data in Switch memory to/from gl_buffer
- void LoadGLBuffer(Tegra::GPUVAddr load_start, Tegra::GPUVAddr load_end);
- void FlushGLBuffer(Tegra::GPUVAddr flush_start, Tegra::GPUVAddr flush_end);
+ void LoadGLBuffer();
+ void FlushGLBuffer();
// Upload/Download data in gl_buffer in/to this surface's texture
- void UploadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint read_fb_handle,
- GLuint draw_fb_handle);
- void DownloadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint read_fb_handle,
- GLuint draw_fb_handle);
+ void UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle);
+ void DownloadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle);
+
+private:
+ OGLTexture texture;
+ std::vector<u8> gl_buffer;
+ SurfaceParams params;
};
-class RasterizerCacheOpenGL : NonCopyable {
+class RasterizerCacheOpenGL final : NonCopyable {
public:
RasterizerCacheOpenGL();
~RasterizerCacheOpenGL();
- /// Blit one surface's texture to another
- bool BlitSurfaces(const Surface& src_surface, const MathUtil::Rectangle<u32>& src_rect,
- const Surface& dst_surface, const MathUtil::Rectangle<u32>& dst_rect);
-
- void ConvertD24S8toABGR(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rect,
- GLuint dst_tex, const MathUtil::Rectangle<u32>& dst_rect);
-
- /// Copy one surface's region to another
- void CopySurface(const Surface& src_surface, const Surface& dst_surface,
- SurfaceInterval copy_interval);
-
- /// Load a texture from Switch memory to OpenGL and cache it (if not already cached)
- Surface GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale,
- bool load_if_create);
-
- /// Tries to find a framebuffer GPU address based on the provided CPU address
- boost::optional<Tegra::GPUVAddr> TryFindFramebufferGpuAddress(VAddr cpu_addr) const;
-
- /// Attempt to find a subrect (resolution scaled) of a surface, otherwise loads a texture from
- /// Switch memory to OpenGL and caches it (if not already cached)
- SurfaceRect_Tuple GetSurfaceSubRect(const SurfaceParams& params, ScaleMatch match_res_scale,
- bool load_if_create);
-
/// Get a surface based on the texture configuration
Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config);
@@ -470,29 +429,21 @@ public:
SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb,
const MathUtil::Rectangle<s32>& viewport);
- /// Get a surface that matches the fill config
- Surface GetFillSurface(const void* config);
+ /// Marks the specified surface as "dirty", in that it is out of sync with Switch memory
+ void MarkSurfaceAsDirty(const Surface& surface);
- /// Get a surface that matches a "texture copy" display transfer config
- SurfaceRect_Tuple GetTexCopySurface(const SurfaceParams& params);
+ /// Tries to find a framebuffer GPU address based on the provided CPU address
+ Surface TryFindFramebufferSurface(VAddr cpu_addr) const;
/// Write any cached resources overlapping the region back to memory (if dirty)
- void FlushRegion(Tegra::GPUVAddr addr, u64 size, Surface flush_surface = nullptr);
-
- /// Mark region as being invalidated by region_owner (nullptr if Switch memory)
- void InvalidateRegion(Tegra::GPUVAddr addr, u64 size, const Surface& region_owner);
+ void FlushRegion(Tegra::GPUVAddr addr, size_t size);
- /// Flush all cached resources tracked by this cache manager
- void FlushAll();
+ /// Mark the specified region as being invalidated
+ void InvalidateRegion(Tegra::GPUVAddr addr, size_t size);
private:
- void DuplicateSurface(const Surface& src_surface, const Surface& dest_surface);
-
- /// Update surface's texture for given region when necessary
- void ValidateSurface(const Surface& surface, Tegra::GPUVAddr addr, u64 size);
-
- /// Create a new surface
- Surface CreateSurface(const SurfaceParams& params);
+ void LoadSurface(const Surface& surface);
+ Surface GetSurface(const SurfaceParams& params);
/// Register surface into the cache
void RegisterSurface(const Surface& surface);
@@ -503,18 +454,9 @@ private:
/// Increase/decrease the number of surface in pages touching the specified region
void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta);
- SurfaceCache surface_cache;
+ std::unordered_map<SurfaceKey, Surface> surface_cache;
PageMap cached_pages;
- SurfaceMap dirty_regions;
- SurfaceSet remove_surfaces;
OGLFramebuffer read_framebuffer;
OGLFramebuffer draw_framebuffer;
-
- OGLVertexArray attributeless_vao;
- OGLBuffer d24s8_abgr_buffer;
- GLsizeiptr d24s8_abgr_buffer_size;
- OGLProgram d24s8_abgr_shader;
- GLint d24s8_abgr_tbo_size_u_id;
- GLint d24s8_abgr_viewport_u_id;
};
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h
index 93f9172e7..0fed93ca5 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.h
+++ b/src/video_core/renderer_opengl/gl_resource_manager.h
@@ -38,7 +38,7 @@ public:
if (handle == 0)
return;
glDeleteTextures(1, &handle);
- OpenGLState::GetCurState().ResetTexture(handle).Apply();
+ OpenGLState::GetCurState().UnbindTexture(handle).Apply();
handle = 0;
}
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 67726e7c6..5914077e8 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -9,6 +9,7 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
+#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
namespace GLShader {
@@ -16,6 +17,7 @@ namespace Decompiler {
using Tegra::Shader::Attribute;
using Tegra::Shader::Instruction;
+using Tegra::Shader::LogicOperation;
using Tegra::Shader::OpCode;
using Tegra::Shader::Register;
using Tegra::Shader::Sampler;
@@ -266,6 +268,27 @@ public:
}
/**
+ * Returns code that does an integer size conversion for the specified size.
+ * @param value Value to perform integer size conversion on.
+ * @param size Register size to use for conversion instructions.
+ * @returns GLSL string corresponding to the value converted to the specified size.
+ */
+ static std::string ConvertIntegerSize(const std::string& value, Register::Size size) {
+ switch (size) {
+ case Register::Size::Byte:
+ return "((" + value + " << 24) >> 24)";
+ case Register::Size::Short:
+ return "((" + value + " << 16) >> 16)";
+ case Register::Size::Word:
+ // Default - do nothing
+ return value;
+ default:
+ LOG_CRITICAL(HW_GPU, "Unimplemented conversion size {}", static_cast<u32>(size));
+ UNREACHABLE();
+ }
+ }
+
+ /**
* Gets a register as an float.
* @param reg The register to get.
* @param elem The element to use for the operation.
@@ -281,15 +304,18 @@ public:
* @param reg The register to get.
* @param elem The element to use for the operation.
* @param is_signed Whether to get the register as a signed (or unsigned) integer.
+ * @param size Register size to use for conversion instructions.
* @returns GLSL string corresponding to the register as an integer.
*/
- std::string GetRegisterAsInteger(const Register& reg, unsigned elem = 0,
- bool is_signed = true) {
+ std::string GetRegisterAsInteger(const Register& reg, unsigned elem = 0, bool is_signed = true,
+ Register::Size size = Register::Size::Word) {
const std::string func = GetGLSLConversionFunc(
GLSLRegister::Type::Float,
is_signed ? GLSLRegister::Type::Integer : GLSLRegister::Type::UnsignedInteger);
- return func + '(' + GetRegister(reg, elem) + ')';
+ std::string value = func + '(' + GetRegister(reg, elem) + ')';
+
+ return ConvertIntegerSize(value, size);
}
/**
@@ -299,13 +325,15 @@ public:
* @param value The code representing the value to assign.
* @param dest_num_components Number of components in the destination.
* @param value_num_components Number of components in the value.
- * @param is_abs Optional, when True, applies absolute value to output.
+ * @param is_saturated Optional, when True, saturates the provided value.
* @param dest_elem Optional, the destination element to use for the operation.
*/
void SetRegisterToFloat(const Register& reg, u64 elem, const std::string& value,
- u64 dest_num_components, u64 value_num_components, bool is_abs = false,
- u64 dest_elem = 0) {
- SetRegister(reg, elem, value, dest_num_components, value_num_components, is_abs, dest_elem);
+ u64 dest_num_components, u64 value_num_components,
+ bool is_saturated = false, u64 dest_elem = 0) {
+
+ SetRegister(reg, elem, is_saturated ? "clamp(" + value + ", 0.0, 1.0)" : value,
+ dest_num_components, value_num_components, dest_elem);
}
/**
@@ -315,18 +343,22 @@ public:
* @param value The code representing the value to assign.
* @param dest_num_components Number of components in the destination.
* @param value_num_components Number of components in the value.
- * @param is_abs Optional, when True, applies absolute value to output.
+ * @param is_saturated Optional, when True, saturates the provided value.
* @param dest_elem Optional, the destination element to use for the operation.
+ * @param size Register size to use for conversion instructions.
*/
void SetRegisterToInteger(const Register& reg, bool is_signed, u64 elem,
const std::string& value, u64 dest_num_components,
- u64 value_num_components, bool is_abs = false, u64 dest_elem = 0) {
+ u64 value_num_components, bool is_saturated = false,
+ u64 dest_elem = 0, Register::Size size = Register::Size::Word) {
+ ASSERT_MSG(!is_saturated, "Unimplemented");
+
const std::string func = GetGLSLConversionFunc(
is_signed ? GLSLRegister::Type::Integer : GLSLRegister::Type::UnsignedInteger,
GLSLRegister::Type::Float);
- SetRegister(reg, elem, func + '(' + value + ')', dest_num_components, value_num_components,
- is_abs, dest_elem);
+ SetRegister(reg, elem, func + '(' + ConvertIntegerSize(value, size) + ')',
+ dest_num_components, value_num_components, dest_elem);
}
/**
@@ -366,7 +398,8 @@ public:
/// Generates code representing a uniform (C buffer) register, interpreted as the input type.
std::string GetUniform(u64 index, u64 offset, GLSLRegister::Type type) {
declr_const_buffers[index].MarkAsUsed(index, offset, stage);
- std::string value = 'c' + std::to_string(index) + '[' + std::to_string(offset) + ']';
+ std::string value = 'c' + std::to_string(index) + '[' + std::to_string(offset / 4) + "][" +
+ std::to_string(offset % 4) + ']';
if (type == GLSLRegister::Type::Float) {
return value;
@@ -380,8 +413,12 @@ public:
std::string GetUniformIndirect(u64 index, s64 offset, const Register& index_reg,
GLSLRegister::Type type) {
declr_const_buffers[index].MarkAsUsedIndirect(index, stage);
- std::string value = 'c' + std::to_string(index) + "[(floatBitsToInt(" +
- GetRegister(index_reg, 0) + ") + " + std::to_string(offset) + ") / 4]";
+
+ std::string final_offset = "((floatBitsToInt(" + GetRegister(index_reg, 0) + ") + " +
+ std::to_string(offset) + ") / 4)";
+
+ std::string value =
+ 'c' + std::to_string(index) + '[' + final_offset + " / 4][" + final_offset + " % 4]";
if (type == GLSLRegister::Type::Float) {
return value;
@@ -423,9 +460,10 @@ public:
unsigned const_buffer_layout = 0;
for (const auto& entry : GetConstBuffersDeclarations()) {
- declarations.AddLine("layout(std430) buffer " + entry.GetName());
+ declarations.AddLine("layout(std140) uniform " + entry.GetName());
declarations.AddLine('{');
- declarations.AddLine(" float c" + std::to_string(entry.GetIndex()) + "[];");
+ declarations.AddLine(" vec4 c" + std::to_string(entry.GetIndex()) +
+ "[MAX_CONSTBUFFER_ELEMENTS];");
declarations.AddLine("};");
declarations.AddNewLine();
++const_buffer_layout;
@@ -500,13 +538,11 @@ private:
* @param value The code representing the value to assign.
* @param dest_num_components Number of components in the destination.
* @param value_num_components Number of components in the value.
- * @param is_abs Optional, when True, applies absolute value to output.
* @param dest_elem Optional, the destination element to use for the operation.
*/
void SetRegister(const Register& reg, u64 elem, const std::string& value,
- u64 dest_num_components, u64 value_num_components, bool is_abs,
- u64 dest_elem) {
- std::string dest = GetRegister(reg, dest_elem);
+ u64 dest_num_components, u64 value_num_components, u64 dest_elem) {
+ std::string dest = GetRegister(reg, static_cast<u32>(dest_elem));
if (dest_num_components > 1) {
dest += GetSwizzle(elem);
}
@@ -516,8 +552,6 @@ private:
src += GetSwizzle(elem);
}
- src = is_abs ? "abs(" + src + ')' : src;
-
shader.AddLine(dest + " = " + src + ';');
}
@@ -547,7 +581,7 @@ private:
return "input_attribute_" + std::to_string(index);
}
- NGLOG_CRITICAL(HW_GPU, "Unhandled input attribute: {}", index);
+ LOG_CRITICAL(HW_GPU, "Unhandled input attribute: {}", index);
UNREACHABLE();
}
}
@@ -565,7 +599,7 @@ private:
return "output_attribute_" + std::to_string(index);
}
- NGLOG_CRITICAL(HW_GPU, "Unhandled output attribute: {}", index);
+ LOG_CRITICAL(HW_GPU, "Unhandled output attribute: {}", index);
UNREACHABLE();
}
}
@@ -685,21 +719,31 @@ private:
/**
* Returns the comparison string to use to compare two values in the 'set' family of
* instructions.
- * @params condition The condition used in the 'set'-family instruction.
+ * @param condition The condition used in the 'set'-family instruction.
+ * @param op_a First operand to use for the comparison.
+ * @param op_b Second operand to use for the comparison.
* @returns String corresponding to the GLSL operator that matches the desired comparison.
*/
- std::string GetPredicateComparison(Tegra::Shader::PredCondition condition) const {
+ std::string GetPredicateComparison(Tegra::Shader::PredCondition condition,
+ const std::string& op_a, const std::string& op_b) const {
using Tegra::Shader::PredCondition;
static const std::unordered_map<PredCondition, const char*> PredicateComparisonStrings = {
- {PredCondition::LessThan, "<"}, {PredCondition::Equal, "=="},
- {PredCondition::LessEqual, "<="}, {PredCondition::GreaterThan, ">"},
- {PredCondition::NotEqual, "!="}, {PredCondition::GreaterEqual, ">="},
+ {PredCondition::LessThan, "<"}, {PredCondition::Equal, "=="},
+ {PredCondition::LessEqual, "<="}, {PredCondition::GreaterThan, ">"},
+ {PredCondition::NotEqual, "!="}, {PredCondition::GreaterEqual, ">="},
+ {PredCondition::NotEqualWithNan, "!="},
};
- auto comparison = PredicateComparisonStrings.find(condition);
+ const auto& comparison{PredicateComparisonStrings.find(condition)};
ASSERT_MSG(comparison != PredicateComparisonStrings.end(),
"Unknown predicate comparison operation");
- return comparison->second;
+
+ std::string predicate{'(' + op_a + ") " + comparison->second + " (" + op_b + ')'};
+ if (condition == PredCondition::NotEqualWithNan) {
+ predicate += " || isnan(" + op_a + ") || isnan(" + op_b + ')';
+ }
+
+ return predicate;
}
/**
@@ -733,6 +777,31 @@ private:
return (absolute_offset % SchedPeriod) == 0;
}
+ void WriteLogicOperation(Register dest, LogicOperation logic_op, const std::string& op_a,
+ const std::string& op_b) {
+ switch (logic_op) {
+ case LogicOperation::And: {
+ regs.SetRegisterToInteger(dest, true, 0, '(' + op_a + " & " + op_b + ')', 1, 1);
+ break;
+ }
+ case LogicOperation::Or: {
+ regs.SetRegisterToInteger(dest, true, 0, '(' + op_a + " | " + op_b + ')', 1, 1);
+ break;
+ }
+ case LogicOperation::Xor: {
+ regs.SetRegisterToInteger(dest, true, 0, '(' + op_a + " ^ " + op_b + ')', 1, 1);
+ break;
+ }
+ case LogicOperation::PassB: {
+ regs.SetRegisterToInteger(dest, true, 0, op_b, 1, 1);
+ break;
+ }
+ default:
+ LOG_CRITICAL(HW_GPU, "Unimplemented logic operation: {}", static_cast<u32>(logic_op));
+ UNREACHABLE();
+ }
+ }
+
/**
* Compiles a single instruction from Tegra to GLSL.
* @param offset the offset of the Tegra shader instruction.
@@ -750,8 +819,9 @@ private:
// Decoding failure
if (!opcode) {
- NGLOG_CRITICAL(HW_GPU, "Unhandled instruction: {0:x}", instr.value);
+ LOG_CRITICAL(HW_GPU, "Unhandled instruction: {0:x}", instr.value);
UNREACHABLE();
+ return offset + 1;
}
shader.AddLine("// " + std::to_string(offset) + ": " + opcode->GetName());
@@ -770,22 +840,25 @@ private:
switch (opcode->GetType()) {
case OpCode::Type::Arithmetic: {
- std::string op_a = instr.alu.negate_a ? "-" : "";
- op_a += regs.GetRegisterAsFloat(instr.gpr8);
+ std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
if (instr.alu.abs_a) {
op_a = "abs(" + op_a + ')';
}
- std::string op_b = instr.alu.negate_b ? "-" : "";
+ if (instr.alu.negate_a) {
+ op_a = "-(" + op_a + ')';
+ }
+
+ std::string op_b;
if (instr.is_b_imm) {
- op_b += GetImmediate19(instr);
+ op_b = GetImmediate19(instr);
} else {
if (instr.is_b_gpr) {
- op_b += regs.GetRegisterAsFloat(instr.gpr20);
+ op_b = regs.GetRegisterAsFloat(instr.gpr20);
} else {
- op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
- GLSLRegister::Type::Float);
+ op_b = regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
+ GLSLRegister::Type::Float);
}
}
@@ -793,6 +866,10 @@ private:
op_b = "abs(" + op_b + ')';
}
+ if (instr.alu.negate_b) {
+ op_b = "-(" + op_b + ')';
+ }
+
switch (opcode->GetId()) {
case OpCode::Id::MOV_C:
case OpCode::Id::MOV_R: {
@@ -800,68 +877,53 @@ private:
break;
}
- case OpCode::Id::MOV32_IMM: {
- // mov32i doesn't have abs or neg bits.
- regs.SetRegisterToFloat(instr.gpr0, 0, GetImmediate32(instr), 1, 1);
- break;
- }
case OpCode::Id::FMUL_C:
case OpCode::Id::FMUL_R:
case OpCode::Id::FMUL_IMM: {
- ASSERT_MSG(!instr.saturate_a, "Unimplemented");
-
- regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b, 1, 1, instr.alu.abs_d);
- break;
- }
- case OpCode::Id::FMUL32_IMM: {
- // fmul32i doesn't have abs or neg bits.
- regs.SetRegisterToFloat(
- instr.gpr0, 0,
- regs.GetRegisterAsFloat(instr.gpr8) + " * " + GetImmediate32(instr), 1, 1);
+ regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b, 1, 1,
+ instr.alu.saturate_d);
break;
}
case OpCode::Id::FADD_C:
case OpCode::Id::FADD_R:
case OpCode::Id::FADD_IMM: {
- ASSERT_MSG(!instr.saturate_a, "Unimplemented");
-
- regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1, instr.alu.abs_d);
+ regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1,
+ instr.alu.saturate_d);
break;
}
case OpCode::Id::MUFU: {
- ASSERT_MSG(!instr.saturate_a, "Unimplemented");
-
switch (instr.sub_op) {
case SubOp::Cos:
regs.SetRegisterToFloat(instr.gpr0, 0, "cos(" + op_a + ')', 1, 1,
- instr.alu.abs_d);
+ instr.alu.saturate_d);
break;
case SubOp::Sin:
regs.SetRegisterToFloat(instr.gpr0, 0, "sin(" + op_a + ')', 1, 1,
- instr.alu.abs_d);
+ instr.alu.saturate_d);
break;
case SubOp::Ex2:
regs.SetRegisterToFloat(instr.gpr0, 0, "exp2(" + op_a + ')', 1, 1,
- instr.alu.abs_d);
+ instr.alu.saturate_d);
break;
case SubOp::Lg2:
regs.SetRegisterToFloat(instr.gpr0, 0, "log2(" + op_a + ')', 1, 1,
- instr.alu.abs_d);
+ instr.alu.saturate_d);
break;
case SubOp::Rcp:
- regs.SetRegisterToFloat(instr.gpr0, 0, "1.0 / " + op_a, 1, 1, instr.alu.abs_d);
+ regs.SetRegisterToFloat(instr.gpr0, 0, "1.0 / " + op_a, 1, 1,
+ instr.alu.saturate_d);
break;
case SubOp::Rsq:
regs.SetRegisterToFloat(instr.gpr0, 0, "inversesqrt(" + op_a + ')', 1, 1,
- instr.alu.abs_d);
+ instr.alu.saturate_d);
break;
- case SubOp::Min:
- regs.SetRegisterToFloat(instr.gpr0, 0, "min(" + op_a + "," + op_b + ')', 1, 1,
- instr.alu.abs_d);
+ case SubOp::Sqrt:
+ regs.SetRegisterToFloat(instr.gpr0, 0, "sqrt(" + op_a + ')', 1, 1,
+ instr.alu.saturate_d);
break;
default:
- NGLOG_CRITICAL(HW_GPU, "Unhandled MUFU sub op: {0:x}",
- static_cast<unsigned>(instr.sub_op.Value()));
+ LOG_CRITICAL(HW_GPU, "Unhandled MUFU sub op: {0:x}",
+ static_cast<unsigned>(instr.sub_op.Value()));
UNREACHABLE();
}
break;
@@ -884,16 +946,31 @@ private:
// Currently RRO is only implemented as a register move.
// Usage of `abs_b` and `negate_b` here should also be correct.
regs.SetRegisterToFloat(instr.gpr0, 0, op_b, 1, 1);
- NGLOG_WARNING(HW_GPU, "RRO instruction is incomplete");
+ LOG_WARNING(HW_GPU, "RRO instruction is incomplete");
break;
}
default: {
- NGLOG_CRITICAL(HW_GPU, "Unhandled arithmetic instruction: {}", opcode->GetName());
+ LOG_CRITICAL(HW_GPU, "Unhandled arithmetic instruction: {}", opcode->GetName());
UNREACHABLE();
}
}
break;
}
+ case OpCode::Type::ArithmeticImmediate: {
+ switch (opcode->GetId()) {
+ case OpCode::Id::MOV32_IMM: {
+ regs.SetRegisterToFloat(instr.gpr0, 0, GetImmediate32(instr), 1, 1);
+ break;
+ }
+ case OpCode::Id::FMUL32_IMM: {
+ regs.SetRegisterToFloat(
+ instr.gpr0, 0,
+ regs.GetRegisterAsFloat(instr.gpr8) + " * " + GetImmediate32(instr), 1, 1);
+ break;
+ }
+ }
+ break;
+ }
case OpCode::Type::Bfe: {
ASSERT_MSG(!instr.bfe.negate_b, "Unimplemented");
@@ -912,56 +989,13 @@ private:
break;
}
default: {
- NGLOG_CRITICAL(HW_GPU, "Unhandled BFE instruction: {}", opcode->GetName());
+ LOG_CRITICAL(HW_GPU, "Unhandled BFE instruction: {}", opcode->GetName());
UNREACHABLE();
}
}
break;
}
- case OpCode::Type::Logic: {
- std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, true);
-
- if (instr.alu.lop.invert_a)
- op_a = "~(" + op_a + ')';
-
- switch (opcode->GetId()) {
- case OpCode::Id::LOP32I: {
- u32 imm = static_cast<u32>(instr.alu.imm20_32.Value());
-
- if (instr.alu.lop.invert_b)
- imm = ~imm;
-
- switch (instr.alu.lop.operation) {
- case Tegra::Shader::LogicOperation::And: {
- regs.SetRegisterToInteger(instr.gpr0, true, 0,
- '(' + op_a + " & " + std::to_string(imm) + ')', 1, 1);
- break;
- }
- case Tegra::Shader::LogicOperation::Or: {
- regs.SetRegisterToInteger(instr.gpr0, true, 0,
- '(' + op_a + " | " + std::to_string(imm) + ')', 1, 1);
- break;
- }
- case Tegra::Shader::LogicOperation::Xor: {
- regs.SetRegisterToInteger(instr.gpr0, true, 0,
- '(' + op_a + " ^ " + std::to_string(imm) + ')', 1, 1);
- break;
- }
- default:
- NGLOG_CRITICAL(HW_GPU, "Unimplemented lop32i operation: {}",
- static_cast<u32>(instr.alu.lop.operation.Value()));
- UNREACHABLE();
- }
- break;
- }
- default: {
- NGLOG_CRITICAL(HW_GPU, "Unhandled logic instruction: {}", opcode->GetName());
- UNREACHABLE();
- }
- }
- break;
- }
case OpCode::Type::Shift: {
std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, true);
@@ -998,21 +1032,46 @@ private:
regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " << " + op_b, 1, 1);
break;
default: {
- NGLOG_CRITICAL(HW_GPU, "Unhandled shift instruction: {}", opcode->GetName());
+ LOG_CRITICAL(HW_GPU, "Unhandled shift instruction: {}", opcode->GetName());
UNREACHABLE();
}
}
break;
}
- case OpCode::Type::ArithmeticInteger: {
+ case OpCode::Type::ArithmeticIntegerImmediate: {
std::string op_a = regs.GetRegisterAsInteger(instr.gpr8);
+ std::string op_b = std::to_string(instr.alu.imm20_32.Value());
- if (instr.alu_integer.negate_a)
- op_a = '-' + op_a;
+ switch (opcode->GetId()) {
+ case OpCode::Id::IADD32I:
+ if (instr.iadd32i.negate_a)
+ op_a = "-(" + op_a + ')';
- std::string op_b = instr.alu_integer.negate_b ? "-" : "";
+ regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " + " + op_b, 1, 1,
+ instr.iadd32i.saturate != 0);
+ break;
+ case OpCode::Id::LOP32I: {
+ if (instr.alu.lop32i.invert_a)
+ op_a = "~(" + op_a + ')';
+ if (instr.alu.lop32i.invert_b)
+ op_b = "~(" + op_b + ')';
+
+ WriteLogicOperation(instr.gpr0, instr.alu.lop32i.operation, op_a, op_b);
+ break;
+ }
+ default: {
+ LOG_CRITICAL(HW_GPU, "Unhandled ArithmeticIntegerImmediate instruction: {}",
+ opcode->GetName());
+ UNREACHABLE();
+ }
+ }
+ break;
+ }
+ case OpCode::Type::ArithmeticInteger: {
+ std::string op_a = regs.GetRegisterAsInteger(instr.gpr8);
+ std::string op_b;
if (instr.is_b_imm) {
op_b += '(' + std::to_string(instr.alu.GetSignedImm20_20()) + ')';
} else {
@@ -1028,22 +1087,63 @@ private:
case OpCode::Id::IADD_C:
case OpCode::Id::IADD_R:
case OpCode::Id::IADD_IMM: {
- ASSERT_MSG(!instr.saturate_a, "Unimplemented");
- regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " + " + op_b, 1, 1);
+ if (instr.alu_integer.negate_a)
+ op_a = "-(" + op_a + ')';
+
+ if (instr.alu_integer.negate_b)
+ op_b = "-(" + op_b + ')';
+
+ regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " + " + op_b, 1, 1,
+ instr.alu.saturate_d);
break;
}
case OpCode::Id::ISCADD_C:
case OpCode::Id::ISCADD_R:
case OpCode::Id::ISCADD_IMM: {
+ if (instr.alu_integer.negate_a)
+ op_a = "-(" + op_a + ')';
+
+ if (instr.alu_integer.negate_b)
+ op_b = "-(" + op_b + ')';
+
std::string shift = std::to_string(instr.alu_integer.shift_amount.Value());
regs.SetRegisterToInteger(instr.gpr0, true, 0,
"((" + op_a + " << " + shift + ") + " + op_b + ')', 1, 1);
break;
}
+ case OpCode::Id::LOP_C:
+ case OpCode::Id::LOP_R:
+ case OpCode::Id::LOP_IMM: {
+ ASSERT_MSG(!instr.alu.lop.unk44, "Unimplemented");
+ ASSERT_MSG(instr.alu.lop.pred48 == Pred::UnusedIndex, "Unimplemented");
+
+ if (instr.alu.lop.invert_a)
+ op_a = "~(" + op_a + ')';
+
+ if (instr.alu.lop.invert_b)
+ op_b = "~(" + op_b + ')';
+
+ WriteLogicOperation(instr.gpr0, instr.alu.lop.operation, op_a, op_b);
+ break;
+ }
+ case OpCode::Id::IMNMX_C:
+ case OpCode::Id::IMNMX_R:
+ case OpCode::Id::IMNMX_IMM: {
+ ASSERT_MSG(instr.imnmx.exchange == Tegra::Shader::IMinMaxExchange::None,
+ "Unimplemented");
+ std::string condition =
+ GetPredicateCondition(instr.imnmx.pred, instr.imnmx.negate_pred != 0);
+ std::string parameters = op_a + ',' + op_b;
+ regs.SetRegisterToInteger(instr.gpr0, instr.imnmx.is_signed, 0,
+ '(' + condition + ") ? min(" + parameters + ") : max(" +
+ parameters + ')',
+ 1, 1);
+ break;
+ }
default: {
- NGLOG_CRITICAL(HW_GPU, "Unhandled ArithmeticInteger instruction: {}",
- opcode->GetName());
+ LOG_CRITICAL(HW_GPU, "Unhandled ArithmeticInteger instruction: {}",
+ opcode->GetName());
UNREACHABLE();
}
}
@@ -1051,8 +1151,6 @@ private:
break;
}
case OpCode::Type::Ffma: {
- ASSERT_MSG(!instr.saturate_a, "Unimplemented");
-
std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
std::string op_b = instr.ffma.negate_b ? "-" : "";
std::string op_c = instr.ffma.negate_c ? "-" : "";
@@ -1081,38 +1179,38 @@ private:
break;
}
default: {
- NGLOG_CRITICAL(HW_GPU, "Unhandled FFMA instruction: {}", opcode->GetName());
+ LOG_CRITICAL(HW_GPU, "Unhandled FFMA instruction: {}", opcode->GetName());
UNREACHABLE();
}
}
- regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b + " + " + op_c, 1, 1);
+ regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b + " + " + op_c, 1, 1,
+ instr.alu.saturate_d);
break;
}
case OpCode::Type::Conversion: {
- ASSERT_MSG(instr.conversion.size == Register::Size::Word, "Unimplemented");
ASSERT_MSG(!instr.conversion.negate_a, "Unimplemented");
- ASSERT_MSG(!instr.saturate_a, "Unimplemented");
switch (opcode->GetId()) {
case OpCode::Id::I2I_R: {
ASSERT_MSG(!instr.conversion.selector, "Unimplemented");
- std::string op_a =
- regs.GetRegisterAsInteger(instr.gpr20, 0, instr.conversion.is_input_signed);
+ std::string op_a = regs.GetRegisterAsInteger(
+ instr.gpr20, 0, instr.conversion.is_input_signed, instr.conversion.src_size);
if (instr.conversion.abs_a) {
op_a = "abs(" + op_a + ')';
}
regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1,
- 1);
+ 1, instr.alu.saturate_d, 0, instr.conversion.dest_size);
break;
}
case OpCode::Id::I2F_R: {
+ ASSERT_MSG(instr.conversion.dest_size == Register::Size::Word, "Unimplemented");
ASSERT_MSG(!instr.conversion.selector, "Unimplemented");
- std::string op_a =
- regs.GetRegisterAsInteger(instr.gpr20, 0, instr.conversion.is_input_signed);
+ std::string op_a = regs.GetRegisterAsInteger(
+ instr.gpr20, 0, instr.conversion.is_input_signed, instr.conversion.src_size);
if (instr.conversion.abs_a) {
op_a = "abs(" + op_a + ')';
@@ -1122,13 +1220,16 @@ private:
break;
}
case OpCode::Id::F2F_R: {
- ASSERT_MSG(!instr.saturate_a, "Unimplemented");
-
+ ASSERT_MSG(instr.conversion.dest_size == Register::Size::Word, "Unimplemented");
+ ASSERT_MSG(instr.conversion.src_size == Register::Size::Word, "Unimplemented");
std::string op_a = regs.GetRegisterAsFloat(instr.gpr20);
switch (instr.conversion.f2f.rounding) {
case Tegra::Shader::F2fRoundingOp::None:
break;
+ case Tegra::Shader::F2fRoundingOp::Round:
+ op_a = "roundEven(" + op_a + ')';
+ break;
case Tegra::Shader::F2fRoundingOp::Floor:
op_a = "floor(" + op_a + ')';
break;
@@ -1139,8 +1240,8 @@ private:
op_a = "trunc(" + op_a + ')';
break;
default:
- NGLOG_CRITICAL(HW_GPU, "Unimplemented f2f rounding mode {}",
- static_cast<u32>(instr.conversion.f2f.rounding.Value()));
+ LOG_CRITICAL(HW_GPU, "Unimplemented f2f rounding mode {}",
+ static_cast<u32>(instr.conversion.f2f.rounding.Value()));
UNREACHABLE();
break;
}
@@ -1149,10 +1250,11 @@ private:
op_a = "abs(" + op_a + ')';
}
- regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1);
+ regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1, instr.alu.saturate_d);
break;
}
case OpCode::Id::F2I_R: {
+ ASSERT_MSG(instr.conversion.src_size == Register::Size::Word, "Unimplemented");
std::string op_a = regs.GetRegisterAsFloat(instr.gpr20);
if (instr.conversion.abs_a) {
@@ -1172,8 +1274,8 @@ private:
op_a = "trunc(" + op_a + ')';
break;
default:
- NGLOG_CRITICAL(HW_GPU, "Unimplemented f2i rounding mode {}",
- static_cast<u32>(instr.conversion.f2i.rounding.Value()));
+ LOG_CRITICAL(HW_GPU, "Unimplemented f2i rounding mode {}",
+ static_cast<u32>(instr.conversion.f2i.rounding.Value()));
UNREACHABLE();
break;
}
@@ -1185,11 +1287,11 @@ private:
}
regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1,
- 1);
+ 1, false, 0, instr.conversion.dest_size);
break;
}
default: {
- NGLOG_CRITICAL(HW_GPU, "Unhandled conversion instruction: {}", opcode->GetName());
+ LOG_CRITICAL(HW_GPU, "Unhandled conversion instruction: {}", opcode->GetName());
UNREACHABLE();
}
}
@@ -1224,8 +1326,8 @@ private:
break;
default:
- NGLOG_CRITICAL(HW_GPU, "Unhandled type: {}",
- static_cast<unsigned>(instr.ld_c.type.Value()));
+ LOG_CRITICAL(HW_GPU, "Unhandled type: {}",
+ static_cast<unsigned>(instr.ld_c.type.Value()));
UNREACHABLE();
}
break;
@@ -1298,7 +1400,7 @@ private:
break;
}
default: {
- NGLOG_CRITICAL(HW_GPU, "Unhandled memory instruction: {}", opcode->GetName());
+ LOG_CRITICAL(HW_GPU, "Unhandled memory instruction: {}", opcode->GetName());
UNREACHABLE();
}
}
@@ -1340,10 +1442,9 @@ private:
std::string second_pred =
GetPredicateCondition(instr.fsetp.pred39, instr.fsetp.neg_pred != 0);
- std::string comparator = GetPredicateComparison(instr.fsetp.cond);
std::string combiner = GetPredicateCombiner(instr.fsetp.op);
- std::string predicate = '(' + op_a + ") " + comparator + " (" + op_b + ')';
+ std::string predicate = GetPredicateComparison(instr.fsetp.cond, op_a, op_b);
// Set the primary predicate to the result of Predicate OP SecondPredicate
SetPredicate(instr.fsetp.pred3,
'(' + predicate + ") " + combiner + " (" + second_pred + ')');
@@ -1378,10 +1479,9 @@ private:
std::string second_pred =
GetPredicateCondition(instr.isetp.pred39, instr.isetp.neg_pred != 0);
- std::string comparator = GetPredicateComparison(instr.isetp.cond);
std::string combiner = GetPredicateCombiner(instr.isetp.op);
- std::string predicate = '(' + op_a + ") " + comparator + " (" + op_b + ')';
+ std::string predicate = GetPredicateComparison(instr.isetp.cond, op_a, op_b);
// Set the primary predicate to the result of Predicate OP SecondPredicate
SetPredicate(instr.isetp.pred3,
'(' + predicate + ") " + combiner + " (" + second_pred + ')');
@@ -1394,6 +1494,36 @@ private:
}
break;
}
+ case OpCode::Type::PredicateSetPredicate: {
+ std::string op_a =
+ GetPredicateCondition(instr.psetp.pred12, instr.psetp.neg_pred12 != 0);
+ std::string op_b =
+ GetPredicateCondition(instr.psetp.pred29, instr.psetp.neg_pred29 != 0);
+
+ using Tegra::Shader::Pred;
+ // We can't use the constant predicate as destination.
+ ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
+
+ std::string second_pred =
+ GetPredicateCondition(instr.psetp.pred39, instr.psetp.neg_pred39 != 0);
+
+ std::string combiner = GetPredicateCombiner(instr.psetp.op);
+
+ std::string predicate =
+ '(' + op_a + ") " + GetPredicateCombiner(instr.psetp.cond) + " (" + op_b + ')';
+
+ // Set the primary predicate to the result of Predicate OP SecondPredicate
+ SetPredicate(instr.psetp.pred3,
+ '(' + predicate + ") " + combiner + " (" + second_pred + ')');
+
+ if (instr.psetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
+ // Set the secondary predicate to the result of !Predicate OP SecondPredicate,
+ // if enabled
+ SetPredicate(instr.psetp.pred0,
+ "!(" + predicate + ") " + combiner + " (" + second_pred + ')');
+ }
+ break;
+ }
case OpCode::Type::FloatSet: {
std::string op_a = instr.fset.neg_a ? "-" : "";
op_a += regs.GetRegisterAsFloat(instr.gpr8);
@@ -1428,11 +1558,10 @@ private:
std::string second_pred =
GetPredicateCondition(instr.fset.pred39, instr.fset.neg_pred != 0);
- std::string comparator = GetPredicateComparison(instr.fset.cond);
std::string combiner = GetPredicateCombiner(instr.fset.op);
- std::string predicate = "(((" + op_a + ") " + comparator + " (" + op_b + ")) " +
- combiner + " (" + second_pred + "))";
+ std::string predicate = "((" + GetPredicateComparison(instr.fset.cond, op_a, op_b) +
+ ") " + combiner + " (" + second_pred + "))";
if (instr.fset.bf) {
regs.SetRegisterToFloat(instr.gpr0, 0, predicate + " ? 1.0 : 0.0", 1, 1);
@@ -1463,11 +1592,10 @@ private:
std::string second_pred =
GetPredicateCondition(instr.iset.pred39, instr.iset.neg_pred != 0);
- std::string comparator = GetPredicateComparison(instr.iset.cond);
std::string combiner = GetPredicateCombiner(instr.iset.op);
- std::string predicate = "(((" + op_a + ") " + comparator + " (" + op_b + ")) " +
- combiner + " (" + second_pred + "))";
+ std::string predicate = "((" + GetPredicateComparison(instr.iset.cond, op_a, op_b) +
+ ") " + combiner + " (" + second_pred + "))";
if (instr.iset.bf) {
regs.SetRegisterToFloat(instr.gpr0, 0, predicate + " ? 1.0 : 0.0", 1, 1);
@@ -1518,8 +1646,15 @@ private:
// can ignore this when generating GLSL code.
break;
}
+ case OpCode::Id::DEPBAR:
+ case OpCode::Id::SYNC: {
+ // TODO(Subv): Find out if we actually have to care about these instructions or if
+ // the GLSL compiler takes care of that for us.
+ LOG_WARNING(HW_GPU, "DEPBAR/SYNC instruction is stubbed");
+ break;
+ }
default: {
- NGLOG_CRITICAL(HW_GPU, "Unhandled instruction: {}", opcode->GetName());
+ LOG_CRITICAL(HW_GPU, "Unhandled instruction: {}", opcode->GetName());
UNREACHABLE();
}
}
@@ -1646,7 +1781,10 @@ private:
}; // namespace Decompiler
std::string GetCommonDeclarations() {
- return "bool exec_shader();";
+ std::string declarations = "bool exec_shader();\n";
+ declarations += "#define MAX_CONSTBUFFER_ELEMENTS " +
+ std::to_string(RasterizerOpenGL::MaxConstbufferSize / (sizeof(GLvec4)));
+ return declarations;
}
boost::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code, u32 main_offset,
@@ -1656,7 +1794,7 @@ boost::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code,
GLSLGenerator generator(subroutines, program_code, main_offset, stage);
return ProgramResult{generator.GetShaderCode(), generator.GetEntries()};
} catch (const DecompileFail& exception) {
- NGLOG_ERROR(HW_GPU, "Shader decompilation failed: {}", exception.what());
+ LOG_ERROR(HW_GPU, "Shader decompilation failed: {}", exception.what());
}
return boost::none;
}
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index b88d592b7..c1e6fac9f 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -39,6 +39,10 @@ void main() {
// Viewport can be flipped, which is unsupported by glViewport
position.xy *= viewport_flip.xy;
gl_Position = position;
+
+ // TODO(bunnei): This is likely a hack, position.w should be interpolated as 1.0
+ // For now, this is here to bring order in lieu of proper emulation
+ position.w = 1.0;
}
)";
out += program.first;
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index 7c00beb33..d7167b298 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -38,8 +38,8 @@ void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& sh
const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
// TODO(bunnei): Support more than one viewport
- viewport_flip[0] = regs.viewport_transform[0].scale_x < 0.0 ? -1.0 : 1.0;
- viewport_flip[1] = regs.viewport_transform[0].scale_y < 0.0 ? -1.0 : 1.0;
+ viewport_flip[0] = regs.viewport_transform[0].scale_x < 0.0 ? -1.0f : 1.0f;
+ viewport_flip[1] = regs.viewport_transform[0].scale_y < 0.0 ? -1.0f : 1.0f;
}
} // namespace GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp
index 8568fface..3c087d638 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_util.cpp
@@ -27,7 +27,7 @@ GLuint LoadShader(const char* source, GLenum type) {
}
GLuint shader_id = glCreateShader(type);
glShaderSource(shader_id, 1, &source, nullptr);
- NGLOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type);
+ LOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type);
glCompileShader(shader_id);
GLint result = GL_FALSE;
@@ -39,9 +39,9 @@ GLuint LoadShader(const char* source, GLenum type) {
std::string shader_error(info_log_length, ' ');
glGetShaderInfoLog(shader_id, info_log_length, nullptr, &shader_error[0]);
if (result == GL_TRUE) {
- NGLOG_DEBUG(Render_OpenGL, "{}", shader_error);
+ LOG_DEBUG(Render_OpenGL, "{}", shader_error);
} else {
- NGLOG_ERROR(Render_OpenGL, "Error compiling {} shader:\n{}", debug_type, shader_error);
+ LOG_ERROR(Render_OpenGL, "Error compiling {} shader:\n{}", debug_type, shader_error);
}
}
return shader_id;
diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h
index 2036a06a9..0e4d782e2 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.h
+++ b/src/video_core/renderer_opengl/gl_shader_util.h
@@ -29,7 +29,7 @@ void LogShaderSource(T... shaders) {
std::string source(source_length, ' ');
glGetShaderSource(shader, source_length, nullptr, &source[0]);
- NGLOG_INFO(Render_OpenGL, "Shader source {}", source);
+ LOG_INFO(Render_OpenGL, "Shader source {}", source);
}
}
@@ -49,7 +49,7 @@ GLuint LoadShader(const char* source, GLenum type);
template <typename... T>
GLuint LoadProgram(bool separable_program, T... shaders) {
// Link the program
- NGLOG_DEBUG(Render_OpenGL, "Linking program...");
+ LOG_DEBUG(Render_OpenGL, "Linking program...");
GLuint program_id = glCreateProgram();
@@ -71,9 +71,9 @@ GLuint LoadProgram(bool separable_program, T... shaders) {
std::string program_error(info_log_length, ' ');
glGetProgramInfoLog(program_id, info_log_length, nullptr, &program_error[0]);
if (result == GL_TRUE) {
- NGLOG_DEBUG(Render_OpenGL, "{}", program_error);
+ LOG_DEBUG(Render_OpenGL, "{}", program_error);
} else {
- NGLOG_ERROR(Render_OpenGL, "Error linking shader:\n{}", program_error);
+ LOG_ERROR(Render_OpenGL, "Error linking shader:\n{}", program_error);
}
}
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index 44f0c8a01..2e8a422a8 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -48,24 +48,9 @@ OpenGLState::OpenGLState() {
logic_op = GL_COPY;
for (auto& texture_unit : texture_units) {
- texture_unit.texture_2d = 0;
- texture_unit.sampler = 0;
- texture_unit.swizzle.r = GL_RED;
- texture_unit.swizzle.g = GL_GREEN;
- texture_unit.swizzle.b = GL_BLUE;
- texture_unit.swizzle.a = GL_ALPHA;
+ texture_unit.Reset();
}
- lighting_lut.texture_buffer = 0;
-
- fog_lut.texture_buffer = 0;
-
- proctex_lut.texture_buffer = 0;
- proctex_diff_lut.texture_buffer = 0;
- proctex_color_map.texture_buffer = 0;
- proctex_alpha_map.texture_buffer = 0;
- proctex_noise_lut.texture_buffer = 0;
-
draw.read_framebuffer = 0;
draw.draw_framebuffer = 0;
draw.vertex_array = 0;
@@ -196,13 +181,13 @@ void OpenGLState::Apply() const {
}
// Textures
- for (size_t i = 0; i < std::size(texture_units); ++i) {
+ for (int i = 0; i < std::size(texture_units); ++i) {
if (texture_units[i].texture_2d != cur_state.texture_units[i].texture_2d) {
glActiveTexture(TextureUnits::MaxwellTexture(i).Enum());
glBindTexture(GL_TEXTURE_2D, texture_units[i].texture_2d);
}
if (texture_units[i].sampler != cur_state.texture_units[i].sampler) {
- glBindSampler(i, texture_units[i].sampler);
+ glBindSampler(static_cast<GLuint>(i), texture_units[i].sampler);
}
// Update the texture swizzle
if (texture_units[i].swizzle.r != cur_state.texture_units[i].swizzle.r ||
@@ -223,54 +208,12 @@ void OpenGLState::Apply() const {
if (current.enabled != new_state.enabled || current.bindpoint != new_state.bindpoint ||
current.ssbo != new_state.ssbo) {
if (new_state.enabled) {
- glBindBufferBase(GL_SHADER_STORAGE_BUFFER, new_state.bindpoint, new_state.ssbo);
+ glBindBufferBase(GL_UNIFORM_BUFFER, new_state.bindpoint, new_state.ssbo);
}
}
}
}
- // Lighting LUTs
- if (lighting_lut.texture_buffer != cur_state.lighting_lut.texture_buffer) {
- glActiveTexture(TextureUnits::LightingLUT.Enum());
- glBindTexture(GL_TEXTURE_BUFFER, lighting_lut.texture_buffer);
- }
-
- // Fog LUT
- if (fog_lut.texture_buffer != cur_state.fog_lut.texture_buffer) {
- glActiveTexture(TextureUnits::FogLUT.Enum());
- glBindTexture(GL_TEXTURE_BUFFER, fog_lut.texture_buffer);
- }
-
- // ProcTex Noise LUT
- if (proctex_noise_lut.texture_buffer != cur_state.proctex_noise_lut.texture_buffer) {
- glActiveTexture(TextureUnits::ProcTexNoiseLUT.Enum());
- glBindTexture(GL_TEXTURE_BUFFER, proctex_noise_lut.texture_buffer);
- }
-
- // ProcTex Color Map
- if (proctex_color_map.texture_buffer != cur_state.proctex_color_map.texture_buffer) {
- glActiveTexture(TextureUnits::ProcTexColorMap.Enum());
- glBindTexture(GL_TEXTURE_BUFFER, proctex_color_map.texture_buffer);
- }
-
- // ProcTex Alpha Map
- if (proctex_alpha_map.texture_buffer != cur_state.proctex_alpha_map.texture_buffer) {
- glActiveTexture(TextureUnits::ProcTexAlphaMap.Enum());
- glBindTexture(GL_TEXTURE_BUFFER, proctex_alpha_map.texture_buffer);
- }
-
- // ProcTex LUT
- if (proctex_lut.texture_buffer != cur_state.proctex_lut.texture_buffer) {
- glActiveTexture(TextureUnits::ProcTexLUT.Enum());
- glBindTexture(GL_TEXTURE_BUFFER, proctex_lut.texture_buffer);
- }
-
- // ProcTex Diff LUT
- if (proctex_diff_lut.texture_buffer != cur_state.proctex_diff_lut.texture_buffer) {
- glActiveTexture(TextureUnits::ProcTexDiffLUT.Enum());
- glBindTexture(GL_TEXTURE_BUFFER, proctex_diff_lut.texture_buffer);
- }
-
// Framebuffer
if (draw.read_framebuffer != cur_state.draw.read_framebuffer) {
glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer);
@@ -338,26 +281,12 @@ void OpenGLState::Apply() const {
cur_state = *this;
}
-OpenGLState& OpenGLState::ResetTexture(GLuint handle) {
+OpenGLState& OpenGLState::UnbindTexture(GLuint handle) {
for (auto& unit : texture_units) {
if (unit.texture_2d == handle) {
- unit.texture_2d = 0;
+ unit.Unbind();
}
}
- if (lighting_lut.texture_buffer == handle)
- lighting_lut.texture_buffer = 0;
- if (fog_lut.texture_buffer == handle)
- fog_lut.texture_buffer = 0;
- if (proctex_noise_lut.texture_buffer == handle)
- proctex_noise_lut.texture_buffer = 0;
- if (proctex_color_map.texture_buffer == handle)
- proctex_color_map.texture_buffer = 0;
- if (proctex_alpha_map.texture_buffer == handle)
- proctex_alpha_map.texture_buffer = 0;
- if (proctex_lut.texture_buffer == handle)
- proctex_lut.texture_buffer = 0;
- if (proctex_diff_lut.texture_buffer == handle)
- proctex_diff_lut.texture_buffer = 0;
return *this;
}
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index 839e50e93..3398d7c04 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -91,35 +91,20 @@ public:
GLint b; // GL_TEXTURE_SWIZZLE_B
GLint a; // GL_TEXTURE_SWIZZLE_A
} swizzle;
- } texture_units[32];
-
- struct {
- GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER
- } lighting_lut;
-
- struct {
- GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER
- } fog_lut;
-
- struct {
- GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER
- } proctex_noise_lut;
- struct {
- GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER
- } proctex_color_map;
-
- struct {
- GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER
- } proctex_alpha_map;
-
- struct {
- GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER
- } proctex_lut;
-
- struct {
- GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER
- } proctex_diff_lut;
+ void Unbind() {
+ texture_2d = 0;
+ swizzle.r = GL_RED;
+ swizzle.g = GL_GREEN;
+ swizzle.b = GL_BLUE;
+ swizzle.a = GL_ALPHA;
+ }
+
+ void Reset() {
+ Unbind();
+ sampler = 0;
+ }
+ } texture_units[32];
struct {
GLuint read_framebuffer; // GL_READ_FRAMEBUFFER_BINDING
@@ -165,7 +150,7 @@ public:
void Apply() const;
/// Resets any references to the given resource
- OpenGLState& ResetTexture(GLuint handle);
+ OpenGLState& UnbindTexture(GLuint handle);
OpenGLState& ResetSampler(GLuint handle);
OpenGLState& ResetProgram(GLuint handle);
OpenGLState& ResetPipeline(GLuint handle);
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index 2155fb019..e19c3b280 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -29,9 +29,13 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
switch (attrib.size) {
case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
return GL_UNSIGNED_BYTE;
+ case Maxwell::VertexAttribute::Size::Size_16_16:
+ return GL_UNSIGNED_SHORT;
+ case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
+ return GL_UNSIGNED_INT_2_10_10_10_REV;
}
- NGLOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
+ LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
UNREACHABLE();
return {};
}
@@ -41,9 +45,13 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
switch (attrib.size) {
case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
return GL_BYTE;
+ case Maxwell::VertexAttribute::Size::Size_16_16:
+ return GL_SHORT;
+ case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
+ return GL_INT_2_10_10_10_REV;
}
- NGLOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
+ LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
UNREACHABLE();
return {};
}
@@ -52,7 +60,7 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
return GL_FLOAT;
}
- NGLOG_CRITICAL(Render_OpenGL, "Unimplemented vertex type={}", attrib.TypeString());
+ LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex type={}", attrib.TypeString());
UNREACHABLE();
return {};
}
@@ -66,7 +74,7 @@ inline GLenum IndexFormat(Maxwell::IndexFormat index_format) {
case Maxwell::IndexFormat::UnsignedInt:
return GL_UNSIGNED_INT;
}
- NGLOG_CRITICAL(Render_OpenGL, "Unimplemented index_format={}", static_cast<u32>(index_format));
+ LOG_CRITICAL(Render_OpenGL, "Unimplemented index_format={}", static_cast<u32>(index_format));
UNREACHABLE();
return {};
}
@@ -78,7 +86,7 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
case Maxwell::PrimitiveTopology::TriangleStrip:
return GL_TRIANGLE_STRIP;
}
- NGLOG_CRITICAL(Render_OpenGL, "Unimplemented topology={}", static_cast<u32>(topology));
+ LOG_CRITICAL(Render_OpenGL, "Unimplemented topology={}", static_cast<u32>(topology));
UNREACHABLE();
return {};
}
@@ -90,8 +98,8 @@ inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode) {
case Tegra::Texture::TextureFilter::Nearest:
return GL_NEAREST;
}
- NGLOG_CRITICAL(Render_OpenGL, "Unimplemented texture filter mode={}",
- static_cast<u32>(filter_mode));
+ LOG_CRITICAL(Render_OpenGL, "Unimplemented texture filter mode={}",
+ static_cast<u32>(filter_mode));
UNREACHABLE();
return {};
}
@@ -110,8 +118,7 @@ inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) {
// manually mix them. However the shader part of this is not yet implemented.
return GL_CLAMP_TO_BORDER;
}
- NGLOG_CRITICAL(Render_OpenGL, "Unimplemented texture wrap mode={}",
- static_cast<u32>(wrap_mode));
+ LOG_CRITICAL(Render_OpenGL, "Unimplemented texture wrap mode={}", static_cast<u32>(wrap_mode));
UNREACHABLE();
return {};
}
@@ -129,7 +136,7 @@ inline GLenum BlendEquation(Maxwell::Blend::Equation equation) {
case Maxwell::Blend::Equation::Max:
return GL_MAX;
}
- NGLOG_CRITICAL(Render_OpenGL, "Unimplemented blend equation={}", static_cast<u32>(equation));
+ LOG_CRITICAL(Render_OpenGL, "Unimplemented blend equation={}", static_cast<u32>(equation));
UNREACHABLE();
return {};
}
@@ -175,7 +182,7 @@ inline GLenum BlendFunc(Maxwell::Blend::Factor factor) {
case Maxwell::Blend::Factor::OneMinusConstantAlpha:
return GL_ONE_MINUS_CONSTANT_ALPHA;
}
- NGLOG_CRITICAL(Render_OpenGL, "Unimplemented blend factor={}", static_cast<u32>(factor));
+ LOG_CRITICAL(Render_OpenGL, "Unimplemented blend factor={}", static_cast<u32>(factor));
UNREACHABLE();
return {};
}
@@ -196,7 +203,65 @@ inline GLenum SwizzleSource(Tegra::Texture::SwizzleSource source) {
case Tegra::Texture::SwizzleSource::OneFloat:
return GL_ONE;
}
- NGLOG_CRITICAL(Render_OpenGL, "Unimplemented swizzle source={}", static_cast<u32>(source));
+ LOG_CRITICAL(Render_OpenGL, "Unimplemented swizzle source={}", static_cast<u32>(source));
+ UNREACHABLE();
+ return {};
+}
+
+inline GLenum ComparisonOp(Maxwell::ComparisonOp comparison) {
+ switch (comparison) {
+ case Maxwell::ComparisonOp::Never:
+ case Maxwell::ComparisonOp::NeverOld:
+ return GL_NEVER;
+ case Maxwell::ComparisonOp::Less:
+ case Maxwell::ComparisonOp::LessOld:
+ return GL_LESS;
+ case Maxwell::ComparisonOp::Equal:
+ case Maxwell::ComparisonOp::EqualOld:
+ return GL_EQUAL;
+ case Maxwell::ComparisonOp::LessEqual:
+ case Maxwell::ComparisonOp::LessEqualOld:
+ return GL_LEQUAL;
+ case Maxwell::ComparisonOp::Greater:
+ case Maxwell::ComparisonOp::GreaterOld:
+ return GL_GREATER;
+ case Maxwell::ComparisonOp::NotEqual:
+ case Maxwell::ComparisonOp::NotEqualOld:
+ return GL_NOTEQUAL;
+ case Maxwell::ComparisonOp::GreaterEqual:
+ case Maxwell::ComparisonOp::GreaterEqualOld:
+ return GL_GEQUAL;
+ case Maxwell::ComparisonOp::Always:
+ case Maxwell::ComparisonOp::AlwaysOld:
+ return GL_ALWAYS;
+ }
+ LOG_CRITICAL(Render_OpenGL, "Unimplemented comparison op={}", static_cast<u32>(comparison));
+ UNREACHABLE();
+ return {};
+}
+
+inline GLenum FrontFace(Maxwell::Cull::FrontFace front_face) {
+ switch (front_face) {
+ case Maxwell::Cull::FrontFace::ClockWise:
+ return GL_CW;
+ case Maxwell::Cull::FrontFace::CounterClockWise:
+ return GL_CCW;
+ }
+ LOG_CRITICAL(Render_OpenGL, "Unimplemented front face cull={}", static_cast<u32>(front_face));
+ UNREACHABLE();
+ return {};
+}
+
+inline GLenum CullFace(Maxwell::Cull::CullFace cull_face) {
+ switch (cull_face) {
+ case Maxwell::Cull::CullFace::Front:
+ return GL_FRONT;
+ case Maxwell::Cull::CullFace::Back:
+ return GL_BACK;
+ case Maxwell::Cull::CullFace::FrontAndBack:
+ return GL_FRONT_AND_BACK;
+ }
+ LOG_CRITICAL(Render_OpenGL, "Unimplemented cull face={}", static_cast<u32>(cull_face));
UNREACHABLE();
return {};
}
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index f33766bfd..00841e937 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -150,7 +150,6 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
screen_info)) {
// Reset the screen info's display texture to its own permanent texture
screen_info.display_texture = screen_info.texture.resource.handle;
- screen_info.display_texcoords = MathUtil::Rectangle<float>(0.f, 0.f, 1.f, 1.f);
Memory::RasterizerFlushVirtualRegion(framebuffer_addr, size_in_bytes,
Memory::FlushMode::Flush);
@@ -302,8 +301,8 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x,
right = texcoords.left;
} else {
// Other transformations are unsupported
- NGLOG_CRITICAL(Render_OpenGL, "Unsupported framebuffer_transform_flags={}",
- static_cast<u32>(framebuffer_transform_flags));
+ LOG_CRITICAL(Render_OpenGL, "Unsupported framebuffer_transform_flags={}",
+ static_cast<u32>(framebuffer_transform_flags));
UNIMPLEMENTED();
}
}
@@ -405,14 +404,14 @@ static void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum
switch (severity) {
case GL_DEBUG_SEVERITY_HIGH:
- NGLOG_ERROR(Render_OpenGL, format, str_source, str_type, id, message);
+ LOG_ERROR(Render_OpenGL, format, str_source, str_type, id, message);
break;
case GL_DEBUG_SEVERITY_MEDIUM:
- NGLOG_WARNING(Render_OpenGL, format, str_source, str_type, id, message);
+ LOG_WARNING(Render_OpenGL, format, str_source, str_type, id, message);
break;
case GL_DEBUG_SEVERITY_NOTIFICATION:
case GL_DEBUG_SEVERITY_LOW:
- NGLOG_DEBUG(Render_OpenGL, format, str_source, str_type, id, message);
+ LOG_DEBUG(Render_OpenGL, format, str_source, str_type, id, message);
break;
}
}
@@ -430,9 +429,9 @@ bool RendererOpenGL::Init() {
const char* gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))};
const char* gpu_model{reinterpret_cast<char const*>(glGetString(GL_RENDERER))};
- NGLOG_INFO(Render_OpenGL, "GL_VERSION: {}", gl_version);
- NGLOG_INFO(Render_OpenGL, "GL_VENDOR: {}", gpu_vendor);
- NGLOG_INFO(Render_OpenGL, "GL_RENDERER: {}", gpu_model);
+ LOG_INFO(Render_OpenGL, "GL_VERSION: {}", gl_version);
+ LOG_INFO(Render_OpenGL, "GL_VENDOR: {}", gpu_vendor);
+ LOG_INFO(Render_OpenGL, "GL_RENDERER: {}", gpu_model);
Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_Vendor", gpu_vendor);
Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_Model", gpu_model);
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index 2cc6d9a00..21f0d298c 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -27,7 +27,7 @@ struct TextureInfo {
/// Structure used for storing information about the display target for the Switch screen
struct ScreenInfo {
GLuint display_texture;
- MathUtil::Rectangle<float> display_texcoords;
+ const MathUtil::Rectangle<float> display_texcoords{0.0f, 0.0f, 1.0f, 1.0f};
TextureInfo texture;
};