summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp7
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp429
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h43
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp30
-rw-r--r--src/video_core/textures/texture.h2
5 files changed, 374 insertions, 137 deletions
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 1fcd13f04..587d9dffb 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -738,7 +738,7 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader,
}
texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc);
- Surface surface = res_cache.GetTextureSurface(texture);
+ Surface surface = res_cache.GetTextureSurface(texture, entry);
if (surface != nullptr) {
state.texture_units[current_bindpoint].texture = surface->Texture().handle;
state.texture_units[current_bindpoint].target = surface->Target();
@@ -909,7 +909,10 @@ void RasterizerOpenGL::SyncTransformFeedback() {
void RasterizerOpenGL::SyncPointState() {
const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
- state.point.size = regs.point_size;
+ // TODO(Rodrigo): Most games do not set a point size. I think this is a case of a
+ // register carrying a default value. For now, if the point size is zero, assume it's
+ // OpenGL's default (1).
+ state.point.size = regs.point_size == 0 ? 1 : regs.point_size;
}
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 24a540258..ce967c4d6 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -41,7 +41,7 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {
}
/*static*/ SurfaceParams SurfaceParams::CreateForTexture(
- const Tegra::Texture::FullTextureInfo& config) {
+ const Tegra::Texture::FullTextureInfo& config, const GLShader::SamplerEntry& entry) {
SurfaceParams params{};
params.addr = TryGetCpuAddr(config.tic.Address());
params.is_tiled = config.tic.IsTiled();
@@ -60,9 +60,23 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {
case SurfaceTarget::Texture2D:
params.depth = 1;
break;
+ case SurfaceTarget::TextureCubemap:
+ params.depth = config.tic.Depth() * 6;
+ break;
case SurfaceTarget::Texture3D:
+ params.depth = config.tic.Depth();
+ break;
case SurfaceTarget::Texture2DArray:
params.depth = config.tic.Depth();
+ if (!entry.IsArray()) {
+ // TODO(bunnei): We have seen games re-use a Texture2D as Texture2DArray with depth of
+ // one, but sample the texture in the shader as if it were not an array texture. This
+ // probably is valid on hardware, but we still need to write a test to confirm this. In
+ // emulation, the workaround here is to continue to treat this as a Texture2D. An
+ // example game that does this is Super Mario Odyssey (in Cloud Kingdom).
+ ASSERT(params.depth == 1);
+ params.target = SurfaceTarget::Texture2D;
+ }
break;
default:
LOG_CRITICAL(HW_GPU, "Unknown depth for target={}", static_cast<u32>(params.target));
@@ -71,7 +85,11 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {
break;
}
- params.size_in_bytes = params.SizeInBytes();
+ params.size_in_bytes_total = params.SizeInBytesTotal();
+ params.size_in_bytes_2d = params.SizeInBytes2D();
+ params.max_mip_level = config.tic.max_mip_level + 1;
+ params.rt = {};
+
return params;
}
@@ -89,7 +107,16 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {
params.unaligned_height = config.height;
params.target = SurfaceTarget::Texture2D;
params.depth = 1;
- params.size_in_bytes = params.SizeInBytes();
+ params.size_in_bytes_total = params.SizeInBytesTotal();
+ params.size_in_bytes_2d = params.SizeInBytes2D();
+ params.max_mip_level = 0;
+
+ // Render target specific parameters, not used for caching
+ params.rt.index = static_cast<u32>(index);
+ params.rt.array_mode = config.array_mode;
+ params.rt.layer_stride = config.layer_stride;
+ params.rt.base_layer = config.base_layer;
+
return params;
}
@@ -108,7 +135,11 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {
params.unaligned_height = zeta_height;
params.target = SurfaceTarget::Texture2D;
params.depth = 1;
- params.size_in_bytes = params.SizeInBytes();
+ params.size_in_bytes_total = params.SizeInBytesTotal();
+ params.size_in_bytes_2d = params.SizeInBytes2D();
+ params.max_mip_level = 0;
+ params.rt = {};
+
return params;
}
@@ -400,9 +431,13 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, std::size_t, VAddr),
// clang-format on
};
-static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rect, GLuint dst_tex,
- const MathUtil::Rectangle<u32>& dst_rect, SurfaceType type,
- GLuint read_fb_handle, GLuint draw_fb_handle) {
+static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface,
+ GLuint read_fb_handle, GLuint draw_fb_handle, GLenum src_attachment = 0,
+ GLenum dst_attachment = 0, std::size_t cubemap_face = 0) {
+
+ const auto& src_params{src_surface->GetSurfaceParams()};
+ const auto& dst_params{dst_surface->GetSurfaceParams()};
+
OpenGLState prev_state{OpenGLState::GetCurState()};
SCOPE_EXIT({ prev_state.Apply(); });
@@ -413,47 +448,203 @@ static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rec
u32 buffers{};
- if (type == SurfaceType::ColorTexture) {
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, src_tex,
- 0);
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
- 0);
+ if (src_params.type == SurfaceType::ColorTexture) {
+ switch (src_params.target) {
+ case SurfaceParams::SurfaceTarget::Texture2D:
+ glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
+ GL_TEXTURE_2D, src_surface->Texture().handle, 0);
+ glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
+ 0, 0);
+ break;
+ case SurfaceParams::SurfaceTarget::TextureCubemap:
+ glFramebufferTexture2D(
+ GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
+ static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face),
+ src_surface->Texture().handle, 0);
+ glFramebufferTexture2D(
+ GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
+ static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), 0, 0);
+ break;
+ case SurfaceParams::SurfaceTarget::Texture2DArray:
+ glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
+ src_surface->Texture().handle, 0, 0);
+ glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, 0, 0, 0);
+ break;
+ case SurfaceParams::SurfaceTarget::Texture3D:
+ glFramebufferTexture3D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
+ SurfaceTargetToGL(src_params.target),
+ src_surface->Texture().handle, 0, 0);
+ glFramebufferTexture3D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
+ SurfaceTargetToGL(src_params.target), 0, 0, 0);
+ break;
+ default:
+ glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
+ GL_TEXTURE_2D, src_surface->Texture().handle, 0);
+ glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
+ 0, 0);
+ break;
+ }
+
+ switch (dst_params.target) {
+ case SurfaceParams::SurfaceTarget::Texture2D:
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
+ GL_TEXTURE_2D, dst_surface->Texture().handle, 0);
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
+ 0, 0);
+ break;
+ case SurfaceParams::SurfaceTarget::TextureCubemap:
+ glFramebufferTexture2D(
+ GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
+ static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face),
+ dst_surface->Texture().handle, 0);
+ glFramebufferTexture2D(
+ GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
+ static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), 0, 0);
+ break;
+ case SurfaceParams::SurfaceTarget::Texture2DArray:
+ glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
+ dst_surface->Texture().handle, 0, 0);
+ glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, 0, 0, 0);
+ break;
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_tex,
- 0);
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
- 0);
+ case SurfaceParams::SurfaceTarget::Texture3D:
+ glFramebufferTexture3D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
+ SurfaceTargetToGL(dst_params.target),
+ dst_surface->Texture().handle, 0, 0);
+ glFramebufferTexture3D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
+ SurfaceTargetToGL(dst_params.target), 0, 0, 0);
+ break;
+ default:
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
+ GL_TEXTURE_2D, dst_surface->Texture().handle, 0);
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
+ 0, 0);
+ break;
+ }
buffers = GL_COLOR_BUFFER_BIT;
- } else if (type == SurfaceType::Depth) {
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, src_tex, 0);
+ } else if (src_params.type == SurfaceType::Depth) {
+ glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
+ GL_TEXTURE_2D, 0, 0);
+ glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
+ src_surface->Texture().handle, 0);
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, dst_tex, 0);
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
+ GL_TEXTURE_2D, 0, 0);
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
+ dst_surface->Texture().handle, 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
buffers = GL_DEPTH_BUFFER_BIT;
- } else if (type == SurfaceType::DepthStencil) {
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
+ } else if (src_params.type == SurfaceType::DepthStencil) {
+ glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
+ GL_TEXTURE_2D, 0, 0);
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
- src_tex, 0);
+ src_surface->Texture().handle, 0);
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
+ GL_TEXTURE_2D, 0, 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
- dst_tex, 0);
+ dst_surface->Texture().handle, 0);
buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
}
- glBlitFramebuffer(src_rect.left, src_rect.bottom, src_rect.right, src_rect.top, dst_rect.left,
- dst_rect.bottom, dst_rect.right, dst_rect.top, buffers,
+ const auto& rect{src_params.GetRect()};
+ glBlitFramebuffer(rect.left, rect.bottom, rect.right, rect.top, rect.left, rect.bottom,
+ rect.right, rect.top, buffers,
buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST);
return true;
}
+static void CopySurface(const Surface& src_surface, const Surface& dst_surface,
+ GLuint copy_pbo_handle, GLenum src_attachment = 0,
+ GLenum dst_attachment = 0, std::size_t cubemap_face = 0) {
+ ASSERT_MSG(dst_attachment == 0, "Unimplemented");
+
+ const auto& src_params{src_surface->GetSurfaceParams()};
+ const auto& dst_params{dst_surface->GetSurfaceParams()};
+
+ auto source_format = GetFormatTuple(src_params.pixel_format, src_params.component_type);
+ auto dest_format = GetFormatTuple(dst_params.pixel_format, dst_params.component_type);
+
+ std::size_t buffer_size =
+ std::max(src_params.size_in_bytes_total, dst_params.size_in_bytes_total);
+
+ glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo_handle);
+ glBufferData(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, GL_STREAM_DRAW_ARB);
+ if (source_format.compressed) {
+ glGetCompressedTextureImage(src_surface->Texture().handle, src_attachment,
+ static_cast<GLsizei>(src_params.size_in_bytes_total), nullptr);
+ } else {
+ glGetTextureImage(src_surface->Texture().handle, src_attachment, source_format.format,
+ source_format.type, static_cast<GLsizei>(src_params.size_in_bytes_total),
+ nullptr);
+ }
+ // If the new texture is bigger than the previous one, we need to fill in the rest with data
+ // from the CPU.
+ if (src_params.size_in_bytes_total < dst_params.size_in_bytes_total) {
+ // Upload the rest of the memory.
+ if (dst_params.is_tiled) {
+ // TODO(Subv): We might have to de-tile the subtexture and re-tile it with the rest
+ // of the data in this case. Games like Super Mario Odyssey seem to hit this case
+ // when drawing, it re-uses the memory of a previous texture as a bigger framebuffer
+ // but it doesn't clear it beforehand, the texture is already full of zeros.
+ LOG_DEBUG(HW_GPU, "Trying to upload extra texture data from the CPU during "
+ "reinterpretation but the texture is tiled.");
+ }
+ std::size_t remaining_size =
+ dst_params.size_in_bytes_total - src_params.size_in_bytes_total;
+ std::vector<u8> data(remaining_size);
+ Memory::ReadBlock(dst_params.addr + src_params.size_in_bytes_total, data.data(),
+ data.size());
+ glBufferSubData(GL_PIXEL_PACK_BUFFER, src_params.size_in_bytes_total, remaining_size,
+ data.data());
+ }
+
+ glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
+
+ const GLsizei width{static_cast<GLsizei>(
+ std::min(src_params.GetRect().GetWidth(), dst_params.GetRect().GetWidth()))};
+ const GLsizei height{static_cast<GLsizei>(
+ std::min(src_params.GetRect().GetHeight(), dst_params.GetRect().GetHeight()))};
+
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, copy_pbo_handle);
+ if (dest_format.compressed) {
+ LOG_CRITICAL(HW_GPU, "Compressed copy is unimplemented!");
+ UNREACHABLE();
+ } else {
+ switch (dst_params.target) {
+ case SurfaceParams::SurfaceTarget::Texture1D:
+ glTextureSubImage1D(dst_surface->Texture().handle, 0, 0, width, dest_format.format,
+ dest_format.type, nullptr);
+ break;
+ case SurfaceParams::SurfaceTarget::Texture2D:
+ glTextureSubImage2D(dst_surface->Texture().handle, 0, 0, 0, width, height,
+ dest_format.format, dest_format.type, nullptr);
+ break;
+ case SurfaceParams::SurfaceTarget::Texture3D:
+ case SurfaceParams::SurfaceTarget::Texture2DArray:
+ glTextureSubImage3D(dst_surface->Texture().handle, 0, 0, 0, 0, width, height,
+ static_cast<GLsizei>(dst_params.depth), dest_format.format,
+ dest_format.type, nullptr);
+ break;
+ case SurfaceParams::SurfaceTarget::TextureCubemap:
+ glTextureSubImage3D(dst_surface->Texture().handle, 0, 0, 0,
+ static_cast<GLint>(cubemap_face), width, height, 1,
+ dest_format.format, dest_format.type, nullptr);
+ break;
+ default:
+ LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
+ static_cast<u32>(dst_params.target));
+ UNREACHABLE();
+ }
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+ }
+}
+
CachedSurface::CachedSurface(const SurfaceParams& params)
: params(params), gl_target(SurfaceTargetToGL(params.target)) {
texture.Create();
@@ -481,6 +672,7 @@ CachedSurface::CachedSurface(const SurfaceParams& params)
rect.GetWidth());
break;
case SurfaceParams::SurfaceTarget::Texture2D:
+ case SurfaceParams::SurfaceTarget::TextureCubemap:
glTexStorage2D(SurfaceTargetToGL(params.target), 1, format_tuple.internal_format,
rect.GetWidth(), rect.GetHeight());
break;
@@ -585,29 +777,39 @@ void CachedSurface::LoadGLBuffer() {
const u32 bytes_per_pixel = GetGLBytesPerPixel(params.pixel_format);
const u32 copy_size = params.width * params.height * bytes_per_pixel;
+ const std::size_t total_size = copy_size * params.depth;
MICROPROFILE_SCOPE(OpenGL_SurfaceLoad);
if (params.is_tiled) {
+ gl_buffer.resize(total_size);
+
// TODO(bunnei): This only unswizzles and copies a 2D texture - we do not yet know how to do
// this for 3D textures, etc.
switch (params.target) {
case SurfaceParams::SurfaceTarget::Texture2D:
// Pass impl. to the fallback code below
break;
+ case SurfaceParams::SurfaceTarget::Texture2DArray:
+ case SurfaceParams::SurfaceTarget::TextureCubemap:
+ for (std::size_t index = 0; index < params.depth; ++index) {
+ const std::size_t offset{index * copy_size};
+ morton_to_gl_fns[static_cast<std::size_t>(params.pixel_format)](
+ params.width, params.block_height, params.height, gl_buffer.data() + offset,
+ copy_size, params.addr + offset);
+ }
+ break;
default:
LOG_CRITICAL(HW_GPU, "Unimplemented tiled load for target={}",
static_cast<u32>(params.target));
UNREACHABLE();
}
- gl_buffer.resize(static_cast<std::size_t>(params.depth) * copy_size);
morton_to_gl_fns[static_cast<std::size_t>(params.pixel_format)](
params.width, params.block_height, params.height, gl_buffer.data(), copy_size,
params.addr);
} else {
- const u8* const texture_src_data_end{texture_src_data +
- (static_cast<std::size_t>(params.depth) * copy_size)};
+ const u8* const texture_src_data_end{texture_src_data + total_size};
gl_buffer.assign(texture_src_data, texture_src_data_end);
}
@@ -634,7 +836,7 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle
// Load data from memory to the surface
const GLint x0 = static_cast<GLint>(rect.left);
const GLint y0 = static_cast<GLint>(rect.bottom);
- const std::size_t buffer_offset =
+ std::size_t buffer_offset =
static_cast<std::size_t>(static_cast<std::size_t>(y0) * params.width +
static_cast<std::size_t>(x0)) *
GetGLBytesPerPixel(params.pixel_format);
@@ -663,15 +865,25 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle
glCompressedTexImage2D(
SurfaceTargetToGL(params.target), 0, tuple.internal_format,
static_cast<GLsizei>(params.width), static_cast<GLsizei>(params.height), 0,
- static_cast<GLsizei>(params.size_in_bytes), &gl_buffer[buffer_offset]);
+ static_cast<GLsizei>(params.size_in_bytes_2d), &gl_buffer[buffer_offset]);
break;
case SurfaceParams::SurfaceTarget::Texture3D:
case SurfaceParams::SurfaceTarget::Texture2DArray:
glCompressedTexImage3D(
SurfaceTargetToGL(params.target), 0, tuple.internal_format,
static_cast<GLsizei>(params.width), static_cast<GLsizei>(params.height),
- static_cast<GLsizei>(params.depth), 0, static_cast<GLsizei>(params.size_in_bytes),
- &gl_buffer[buffer_offset]);
+ static_cast<GLsizei>(params.depth), 0,
+ static_cast<GLsizei>(params.size_in_bytes_total), &gl_buffer[buffer_offset]);
+ break;
+ case SurfaceParams::SurfaceTarget::TextureCubemap:
+ for (std::size_t face = 0; face < params.depth; ++face) {
+ glCompressedTexImage2D(static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + face),
+ 0, tuple.internal_format, static_cast<GLsizei>(params.width),
+ static_cast<GLsizei>(params.height), 0,
+ static_cast<GLsizei>(params.size_in_bytes_2d),
+ &gl_buffer[buffer_offset]);
+ buffer_offset += params.size_in_bytes_2d;
+ }
break;
default:
LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
@@ -679,8 +891,8 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle
UNREACHABLE();
glCompressedTexImage2D(
GL_TEXTURE_2D, 0, tuple.internal_format, static_cast<GLsizei>(params.width),
- static_cast<GLsizei>(params.height), 0, static_cast<GLsizei>(params.size_in_bytes),
- &gl_buffer[buffer_offset]);
+ static_cast<GLsizei>(params.height), 0,
+ static_cast<GLsizei>(params.size_in_bytes_2d), &gl_buffer[buffer_offset]);
}
} else {
@@ -703,6 +915,15 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle
static_cast<GLsizei>(rect.GetHeight()), params.depth, tuple.format,
tuple.type, &gl_buffer[buffer_offset]);
break;
+ case SurfaceParams::SurfaceTarget::TextureCubemap:
+ for (std::size_t face = 0; face < params.depth; ++face) {
+ glTexSubImage2D(static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + face), 0, x0,
+ y0, static_cast<GLsizei>(rect.GetWidth()),
+ static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
+ &gl_buffer[buffer_offset]);
+ buffer_offset += params.size_in_bytes_2d;
+ }
+ break;
default:
LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
static_cast<u32>(params.target));
@@ -722,8 +943,9 @@ RasterizerCacheOpenGL::RasterizerCacheOpenGL() {
copy_pbo.Create();
}
-Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextureInfo& config) {
- return GetSurface(SurfaceParams::CreateForTexture(config));
+Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextureInfo& config,
+ const GLShader::SamplerEntry& entry) {
+ return GetSurface(SurfaceParams::CreateForTexture(config, entry));
}
Surface RasterizerCacheOpenGL::GetDepthBufferSurface(bool preserve_contents) {
@@ -811,98 +1033,69 @@ Surface RasterizerCacheOpenGL::GetUncachedSurface(const SurfaceParams& params) {
return surface;
}
-Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface,
+Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
const SurfaceParams& new_params) {
// Verify surface is compatible for blitting
- const auto& params{surface->GetSurfaceParams()};
+ auto old_params{old_surface->GetSurfaceParams()};
// Get a new surface with the new parameters, and blit the previous surface to it
Surface new_surface{GetUncachedSurface(new_params)};
- if (params.pixel_format == new_params.pixel_format ||
- !Settings::values.use_accurate_framebuffers) {
- // If the format is the same, just do a framebuffer blit. This is significantly faster than
- // using PBOs. The is also likely less accurate, as textures will be converted rather than
- // reinterpreted.
-
- BlitTextures(surface->Texture().handle, params.GetRect(), new_surface->Texture().handle,
- params.GetRect(), params.type, read_framebuffer.handle,
- draw_framebuffer.handle);
- } else {
- // When use_accurate_framebuffers setting is enabled, perform a more accurate surface copy,
- // where pixels are reinterpreted as a new format (without conversion). This code path uses
- // OpenGL PBOs and is quite slow.
-
- auto source_format = GetFormatTuple(params.pixel_format, params.component_type);
- auto dest_format = GetFormatTuple(new_params.pixel_format, new_params.component_type);
-
- std::size_t buffer_size = std::max(params.SizeInBytes(), new_params.SizeInBytes());
+ // If the format is the same, just do a framebuffer blit. This is significantly faster than
+ // using PBOs. The is also likely less accurate, as textures will be converted rather than
+ // reinterpreted. When use_accurate_framebuffers setting is enabled, perform a more accurate
+ // surface copy, where pixels are reinterpreted as a new format (without conversion). This
+ // code path uses OpenGL PBOs and is quite slow.
+ const bool is_blit{old_params.pixel_format == new_params.pixel_format ||
+ !Settings::values.use_accurate_framebuffers};
- glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo.handle);
- glBufferData(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, GL_STREAM_DRAW_ARB);
- if (source_format.compressed) {
- glGetCompressedTextureImage(surface->Texture().handle, 0,
- static_cast<GLsizei>(params.SizeInBytes()), nullptr);
+ switch (new_params.target) {
+ case SurfaceParams::SurfaceTarget::Texture2D:
+ if (is_blit) {
+ BlitSurface(old_surface, new_surface, read_framebuffer.handle, draw_framebuffer.handle);
} else {
- glGetTextureImage(surface->Texture().handle, 0, source_format.format,
- source_format.type, static_cast<GLsizei>(params.SizeInBytes()),
- nullptr);
+ CopySurface(old_surface, new_surface, copy_pbo.handle);
}
- // If the new texture is bigger than the previous one, we need to fill in the rest with data
- // from the CPU.
- if (params.SizeInBytes() < new_params.SizeInBytes()) {
- // Upload the rest of the memory.
- if (new_params.is_tiled) {
- // TODO(Subv): We might have to de-tile the subtexture and re-tile it with the rest
- // of the data in this case. Games like Super Mario Odyssey seem to hit this case
- // when drawing, it re-uses the memory of a previous texture as a bigger framebuffer
- // but it doesn't clear it beforehand, the texture is already full of zeros.
- LOG_DEBUG(HW_GPU, "Trying to upload extra texture data from the CPU during "
- "reinterpretation but the texture is tiled.");
- }
- std::size_t remaining_size = new_params.SizeInBytes() - params.SizeInBytes();
- std::vector<u8> data(remaining_size);
- Memory::ReadBlock(new_params.addr + params.SizeInBytes(), data.data(), data.size());
- glBufferSubData(GL_PIXEL_PACK_BUFFER, params.SizeInBytes(), remaining_size,
- data.data());
+ break;
+ case SurfaceParams::SurfaceTarget::TextureCubemap: {
+ if (old_params.rt.array_mode != 1) {
+ // TODO(bunnei): This is used by Breath of the Wild, I'm not sure how to implement this
+ // yet (array rendering used as a cubemap texture).
+ LOG_CRITICAL(HW_GPU, "Unhandled rendertarget array_mode {}", old_params.rt.array_mode);
+ UNREACHABLE();
+ return new_surface;
}
- glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
-
- const auto& dest_rect{new_params.GetRect()};
-
- glBindBuffer(GL_PIXEL_UNPACK_BUFFER, copy_pbo.handle);
- if (dest_format.compressed) {
- LOG_CRITICAL(HW_GPU, "Compressed copy is unimplemented!");
- UNREACHABLE();
- } else {
- switch (new_params.target) {
- case SurfaceParams::SurfaceTarget::Texture1D:
- glTextureSubImage1D(new_surface->Texture().handle, 0, 0,
- static_cast<GLsizei>(dest_rect.GetWidth()), dest_format.format,
- dest_format.type, nullptr);
- break;
- case SurfaceParams::SurfaceTarget::Texture2D:
- glTextureSubImage2D(new_surface->Texture().handle, 0, 0, 0,
- static_cast<GLsizei>(dest_rect.GetWidth()),
- static_cast<GLsizei>(dest_rect.GetHeight()), dest_format.format,
- dest_format.type, nullptr);
- break;
- case SurfaceParams::SurfaceTarget::Texture3D:
- case SurfaceParams::SurfaceTarget::Texture2DArray:
- glTextureSubImage3D(new_surface->Texture().handle, 0, 0, 0, 0,
- static_cast<GLsizei>(dest_rect.GetWidth()),
- static_cast<GLsizei>(dest_rect.GetHeight()),
- static_cast<GLsizei>(new_params.depth), dest_format.format,
- dest_format.type, nullptr);
- break;
- default:
- LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
- static_cast<u32>(params.target));
- UNREACHABLE();
+ // This seems to be used for render-to-cubemap texture
+ ASSERT_MSG(old_params.target == SurfaceParams::SurfaceTarget::Texture2D, "Unexpected");
+ ASSERT_MSG(old_params.pixel_format == new_params.pixel_format, "Unexpected");
+ ASSERT_MSG(old_params.rt.base_layer == 0, "Unimplemented");
+
+ // TODO(bunnei): Verify the below - this stride seems to be in 32-bit words, not pixels.
+ // Tested with Splatoon 2, Super Mario Odyssey, and Breath of the Wild.
+ const std::size_t byte_stride{old_params.rt.layer_stride * sizeof(u32)};
+
+ for (std::size_t index = 0; index < new_params.depth; ++index) {
+ Surface face_surface{TryGetReservedSurface(old_params)};
+ ASSERT_MSG(face_surface, "Unexpected");
+
+ if (is_blit) {
+ BlitSurface(face_surface, new_surface, read_framebuffer.handle,
+ draw_framebuffer.handle, face_surface->GetSurfaceParams().rt.index,
+ new_params.rt.index, index);
+ } else {
+ CopySurface(face_surface, new_surface, copy_pbo.handle,
+ face_surface->GetSurfaceParams().rt.index, new_params.rt.index, index);
}
+
+ old_params.addr += byte_stride;
}
- glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+ break;
+ }
+ default:
+ LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
+ static_cast<u32>(new_params.target));
+ UNREACHABLE();
}
return new_surface;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 80c5f324b..49025a3fe 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -9,12 +9,14 @@
#include <memory>
#include <vector>
+#include "common/alignment.h"
#include "common/common_types.h"
#include "common/hash.h"
#include "common/math_util.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
+#include "video_core/renderer_opengl/gl_shader_gen.h"
#include "video_core/textures/texture.h"
namespace OpenGL {
@@ -126,6 +128,8 @@ struct SurfaceParams {
case Tegra::Texture::TextureType::Texture2D:
case Tegra::Texture::TextureType::Texture2DNoMipmap:
return SurfaceTarget::Texture2D;
+ case Tegra::Texture::TextureType::TextureCubemap:
+ return SurfaceTarget::TextureCubemap;
case Tegra::Texture::TextureType::Texture1DArray:
return SurfaceTarget::Texture1DArray;
case Tegra::Texture::TextureType::Texture2DArray:
@@ -689,17 +693,23 @@ struct SurfaceParams {
/// Returns the rectangle corresponding to this surface
MathUtil::Rectangle<u32> GetRect() const;
- /// Returns the size of this surface in bytes, adjusted for compression
- std::size_t SizeInBytes() const {
+ /// Returns the size of this surface as a 2D texture in bytes, adjusted for compression
+ std::size_t SizeInBytes2D() const {
const u32 compression_factor{GetCompressionFactor(pixel_format)};
ASSERT(width % compression_factor == 0);
ASSERT(height % compression_factor == 0);
return (width / compression_factor) * (height / compression_factor) *
- GetFormatBpp(pixel_format) * depth / CHAR_BIT;
+ GetFormatBpp(pixel_format) / CHAR_BIT;
+ }
+
+ /// Returns the total size of this surface in bytes, adjusted for compression
+ std::size_t SizeInBytesTotal() const {
+ return SizeInBytes2D() * depth;
}
/// Creates SurfaceParams from a texture configuration
- static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config);
+ static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config,
+ const GLShader::SamplerEntry& entry);
/// Creates SurfaceParams from a framebuffer configuration
static SurfaceParams CreateForFramebuffer(std::size_t index);
@@ -711,8 +721,9 @@ struct SurfaceParams {
/// Checks if surfaces are compatible for caching
bool IsCompatibleSurface(const SurfaceParams& other) const {
- return std::tie(pixel_format, type, width, height) ==
- std::tie(other.pixel_format, other.type, other.width, other.height);
+ return std::tie(pixel_format, type, width, height, target, depth) ==
+ std::tie(other.pixel_format, other.type, other.width, other.height, other.target,
+ other.depth);
}
VAddr addr;
@@ -725,8 +736,18 @@ struct SurfaceParams {
u32 height;
u32 depth;
u32 unaligned_height;
- std::size_t size_in_bytes;
+ std::size_t size_in_bytes_total;
+ std::size_t size_in_bytes_2d;
SurfaceTarget target;
+ u32 max_mip_level;
+
+ // Render target specific parameters, not used in caching
+ struct {
+ u32 index;
+ u32 array_mode;
+ u32 layer_stride;
+ u32 base_layer;
+ } rt;
};
}; // namespace OpenGL
@@ -736,6 +757,7 @@ struct SurfaceReserveKey : Common::HashableStruct<OpenGL::SurfaceParams> {
static SurfaceReserveKey Create(const OpenGL::SurfaceParams& params) {
SurfaceReserveKey res;
res.state = params;
+ res.state.rt = {}; // Ignore rt config in caching
return res;
}
};
@@ -759,7 +781,7 @@ public:
}
std::size_t GetSizeInBytes() const {
- return params.size_in_bytes;
+ return params.size_in_bytes_total;
}
const OGLTexture& Texture() const {
@@ -800,7 +822,8 @@ public:
RasterizerCacheOpenGL();
/// Get a surface based on the texture configuration
- Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config);
+ Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config,
+ const GLShader::SamplerEntry& entry);
/// Get the depth surface based on the framebuffer configuration
Surface GetDepthBufferSurface(bool preserve_contents);
@@ -822,7 +845,7 @@ private:
Surface GetUncachedSurface(const SurfaceParams& params);
/// Recreates a surface with new parameters
- Surface RecreateSurface(const Surface& surface, const SurfaceParams& new_params);
+ Surface RecreateSurface(const Surface& old_surface, const SurfaceParams& new_params);
/// Reserves a unique surface that can be reused later
void ReserveSurface(const Surface& surface);
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index b3e95187e..579a78702 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -2000,6 +2000,14 @@ private:
}
break;
}
+ case Tegra::Shader::TextureType::TextureCube: {
+ ASSERT_MSG(!is_array, "Unimplemented");
+ std::string x = regs.GetRegisterAsFloat(instr.gpr8);
+ std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
+ std::string z = regs.GetRegisterAsFloat(instr.gpr20);
+ coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");";
+ break;
+ }
default:
LOG_CRITICAL(HW_GPU, "Unhandled texture type {}",
static_cast<u32>(texture_type));
@@ -2018,9 +2026,12 @@ private:
break;
}
case OpCode::Id::TLDS: {
- ASSERT(instr.tlds.GetTextureType() == Tegra::Shader::TextureType::Texture2D);
- ASSERT(instr.tlds.IsArrayTexture() == false);
std::string coord;
+ const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()};
+ const bool is_array{instr.tlds.IsArrayTexture()};
+
+ ASSERT(texture_type == Tegra::Shader::TextureType::Texture2D);
+ ASSERT(is_array == false);
ASSERT_MSG(!instr.tlds.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
"NODEP is not implemented");
@@ -2029,9 +2040,14 @@ private:
ASSERT_MSG(!instr.tlds.UsesMiscMode(Tegra::Shader::TextureMiscMode::MZ),
"MZ is not implemented");
- switch (instr.tlds.GetTextureType()) {
+ switch (texture_type) {
+ case Tegra::Shader::TextureType::Texture1D: {
+ const std::string x = regs.GetRegisterAsInteger(instr.gpr8);
+ coord = "int coords = " + x + ';';
+ break;
+ }
case Tegra::Shader::TextureType::Texture2D: {
- if (instr.tlds.IsArrayTexture()) {
+ if (is_array) {
LOG_CRITICAL(HW_GPU, "Unhandled 2d array texture");
UNREACHABLE();
} else {
@@ -2043,11 +2059,11 @@ private:
}
default:
LOG_CRITICAL(HW_GPU, "Unhandled texture type {}",
- static_cast<u32>(instr.tlds.GetTextureType()));
+ static_cast<u32>(texture_type));
UNREACHABLE();
}
- const std::string sampler = GetSampler(instr.sampler, instr.tlds.GetTextureType(),
- instr.tlds.IsArrayTexture());
+
+ const std::string sampler = GetSampler(instr.sampler, texture_type, is_array);
const std::string texture = "texelFetch(" + sampler + ", coords, 0)";
WriteTexsInstruction(instr, coord, texture);
break;
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index c2fb824b2..14aea4838 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -165,6 +165,8 @@ struct TICEntry {
// High 16 bits of the pitch value
BitField<0, 16, u32> pitch_high;
+
+ BitField<28, 4, u32> max_mip_level;
};
union {
BitField<0, 16, u32> width_minus_1;