summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorameerj <52414509+ameerj@users.noreply.github.com>2021-03-04 20:14:19 +0100
committerameerj <52414509+ameerj@users.noreply.github.com>2021-03-04 20:14:19 +0100
commit0639244d856f403cc9bdec5ca927df7a61edd1d8 (patch)
treea87c6e3142665bfb632001507cf04c973a88e1a6
parentMerge pull request #6004 from german77/udprandom (diff)
downloadyuzu-0639244d856f403cc9bdec5ca927df7a61edd1d8.tar
yuzu-0639244d856f403cc9bdec5ca927df7a61edd1d8.tar.gz
yuzu-0639244d856f403cc9bdec5ca927df7a61edd1d8.tar.bz2
yuzu-0639244d856f403cc9bdec5ca927df7a61edd1d8.tar.lz
yuzu-0639244d856f403cc9bdec5ca927df7a61edd1d8.tar.xz
yuzu-0639244d856f403cc9bdec5ca927df7a61edd1d8.tar.zst
yuzu-0639244d856f403cc9bdec5ca927df7a61edd1d8.zip
-rw-r--r--src/video_core/host_shaders/CMakeLists.txt1
-rw-r--r--src/video_core/host_shaders/opengl_copy_bgra.comp15
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp20
-rw-r--r--src/video_core/renderer_opengl/util_shaders.cpp76
-rw-r--r--src/video_core/renderer_opengl/util_shaders.h22
5 files changed, 132 insertions, 2 deletions
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt
index 970120acc..3494318ca 100644
--- a/src/video_core/host_shaders/CMakeLists.txt
+++ b/src/video_core/host_shaders/CMakeLists.txt
@@ -5,6 +5,7 @@ set(SHADER_FILES
convert_float_to_depth.frag
full_screen_triangle.vert
opengl_copy_bc4.comp
+ opengl_copy_bgra.comp
opengl_present.frag
opengl_present.vert
pitch_unswizzle.comp
diff --git a/src/video_core/host_shaders/opengl_copy_bgra.comp b/src/video_core/host_shaders/opengl_copy_bgra.comp
new file mode 100644
index 000000000..2571a4abf
--- /dev/null
+++ b/src/video_core/host_shaders/opengl_copy_bgra.comp
@@ -0,0 +1,15 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#version 430 core
+
+layout (local_size_x = 4, local_size_y = 4) in;
+
+layout(binding = 0, rgba8) readonly uniform image2DArray bgr_input;
+layout(binding = 1, rgba8) writeonly uniform image2DArray bgr_output;
+
+void main() {
+ vec4 color = imageLoad(bgr_input, ivec3(gl_GlobalInvocationID));
+ imageStore(bgr_output, ivec3(gl_GlobalInvocationID), color.bgra);
+}
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 12434db67..e028677e9 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -96,7 +96,7 @@ constexpr std::array<FormatTuple, MaxPixelFormat> FORMAT_TABLE = {{
{GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT
{GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT
{GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM
- {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM
+ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM
{GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT
{GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT
{GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT
@@ -125,7 +125,7 @@ constexpr std::array<FormatTuple, MaxPixelFormat> FORMAT_TABLE = {{
{GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM
{GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM
{GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM
- {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM
+ {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_SRGB
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB
@@ -396,6 +396,17 @@ void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) {
}
}
+[[nodiscard]] bool IsPixelFormatBGR(PixelFormat format) {
+ switch (format) {
+ case PixelFormat::B5G6R5_UNORM:
+ case PixelFormat::B8G8R8A8_UNORM:
+ case PixelFormat::B8G8R8A8_SRGB:
+ return true;
+ default:
+ return false;
+ }
+}
+
} // Anonymous namespace
ImageBufferMap::~ImageBufferMap() {
@@ -512,6 +523,9 @@ bool TextureCacheRuntime::CanImageBeCopied(const Image& dst, const Image& src) {
if (dst.info.type == ImageType::e3D && dst.info.format == PixelFormat::BC4_UNORM) {
return false;
}
+ if (IsPixelFormatBGR(dst.info.format) || IsPixelFormatBGR(src.info.format)) {
+ return false;
+ }
return true;
}
@@ -520,6 +534,8 @@ void TextureCacheRuntime::EmulateCopyImage(Image& dst, Image& src,
if (dst.info.type == ImageType::e3D && dst.info.format == PixelFormat::BC4_UNORM) {
ASSERT(src.info.type == ImageType::e3D);
util_shaders.CopyBC4(dst, src, copies);
+ } else if (IsPixelFormatBGR(dst.info.format) || IsPixelFormatBGR(src.info.format)) {
+ util_shaders.CopyBGR(dst, src, copies);
} else {
UNREACHABLE();
}
diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp
index 31ec68505..2fe4799bc 100644
--- a/src/video_core/renderer_opengl/util_shaders.cpp
+++ b/src/video_core/renderer_opengl/util_shaders.cpp
@@ -14,6 +14,7 @@
#include "video_core/host_shaders/block_linear_unswizzle_2d_comp.h"
#include "video_core/host_shaders/block_linear_unswizzle_3d_comp.h"
#include "video_core/host_shaders/opengl_copy_bc4_comp.h"
+#include "video_core/host_shaders/opengl_copy_bgra_comp.h"
#include "video_core/host_shaders/pitch_unswizzle_comp.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
@@ -48,6 +49,11 @@ OGLProgram MakeProgram(std::string_view source) {
return program;
}
+size_t NumPixelsInCopy(const VideoCommon::ImageCopy& copy) {
+ return static_cast<size_t>(copy.extent.width * copy.extent.height *
+ copy.src_subresource.num_layers);
+}
+
} // Anonymous namespace
UtilShaders::UtilShaders(ProgramManager& program_manager_)
@@ -55,6 +61,7 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_)
block_linear_unswizzle_2d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_2D_COMP)),
block_linear_unswizzle_3d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_3D_COMP)),
pitch_unswizzle_program(MakeProgram(PITCH_UNSWIZZLE_COMP)),
+ copy_bgra_program(MakeProgram(OPENGL_COPY_BGRA_COMP)),
copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)) {
const auto swizzle_table = Tegra::Texture::MakeSwizzleTable();
swizzle_table_buffer.Create();
@@ -205,6 +212,43 @@ void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const Im
program_manager.RestoreGuestCompute();
}
+void UtilShaders::CopyBGR(Image& dst_image, Image& src_image,
+ std::span<const VideoCommon::ImageCopy> copies) {
+ static constexpr GLuint BINDING_INPUT_IMAGE = 0;
+ static constexpr GLuint BINDING_OUTPUT_IMAGE = 1;
+ static constexpr VideoCommon::Offset3D zero_offset{0, 0, 0};
+ const u32 bytes_per_block = BytesPerBlock(dst_image.info.format);
+ switch (bytes_per_block) {
+ case 2:
+ // BGR565 copy
+ for (const ImageCopy& copy : copies) {
+ ASSERT(copy.src_offset == zero_offset);
+ ASSERT(copy.dst_offset == zero_offset);
+ bgr_copy_pass.Execute(dst_image, src_image, copy);
+ }
+ break;
+ case 4: {
+ // BGRA8 copy
+ program_manager.BindHostCompute(copy_bgra_program.handle);
+ constexpr GLenum FORMAT = GL_RGBA8;
+ for (const ImageCopy& copy : copies) {
+ ASSERT(copy.src_offset == zero_offset);
+ ASSERT(copy.dst_offset == zero_offset);
+ glBindImageTexture(BINDING_INPUT_IMAGE, src_image.StorageHandle(),
+ copy.src_subresource.base_level, GL_FALSE, 0, GL_READ_ONLY, FORMAT);
+ glBindImageTexture(BINDING_OUTPUT_IMAGE, dst_image.StorageHandle(),
+ copy.dst_subresource.base_level, GL_FALSE, 0, GL_WRITE_ONLY, FORMAT);
+ glDispatchCompute(copy.extent.width, copy.extent.height, copy.extent.depth);
+ }
+ program_manager.RestoreGuestCompute();
+ break;
+ }
+ default:
+ UNREACHABLE();
+ break;
+ }
+}
+
GLenum StoreFormat(u32 bytes_per_block) {
switch (bytes_per_block) {
case 1:
@@ -222,4 +266,36 @@ GLenum StoreFormat(u32 bytes_per_block) {
return GL_R8UI;
}
+void Bgr565CopyPass::Execute(const Image& dst_image, const Image& src_image,
+ const ImageCopy& copy) {
+ if (CopyBufferCreationNeeded(copy)) {
+ CreateNewCopyBuffer(copy, GL_TEXTURE_2D_ARRAY, GL_RGB565);
+ }
+ // Copy from source to PBO
+ glPixelStorei(GL_PACK_ALIGNMENT, 1);
+ glPixelStorei(GL_PACK_ROW_LENGTH, copy.extent.width);
+ glBindBuffer(GL_PIXEL_PACK_BUFFER, bgr16_pbo.handle);
+ glGetTextureSubImage(src_image.Handle(), 0, 0, 0, 0, copy.extent.width, copy.extent.height,
+ copy.src_subresource.num_layers, GL_RGB, GL_UNSIGNED_SHORT_5_6_5,
+ static_cast<GLsizei>(bgr16_pbo_size), nullptr);
+
+ // Copy from PBO to destination in reverse order
+ glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
+ glPixelStorei(GL_UNPACK_ROW_LENGTH, copy.extent.width);
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, bgr16_pbo.handle);
+ glTextureSubImage3D(dst_image.Handle(), 0, 0, 0, 0, copy.extent.width, copy.extent.height,
+ copy.dst_subresource.num_layers, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV,
+ nullptr);
+}
+
+bool Bgr565CopyPass::CopyBufferCreationNeeded(const ImageCopy& copy) {
+ return bgr16_pbo_size < NumPixelsInCopy(copy) * sizeof(u16);
+}
+
+void Bgr565CopyPass::CreateNewCopyBuffer(const ImageCopy& copy, GLenum target, GLuint format) {
+ bgr16_pbo.Create();
+ bgr16_pbo_size = NumPixelsInCopy(copy) * sizeof(u16);
+ glNamedBufferData(bgr16_pbo.handle, bgr16_pbo_size, nullptr, GL_STREAM_COPY);
+}
+
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/util_shaders.h b/src/video_core/renderer_opengl/util_shaders.h
index 7b1d16b09..93b009743 100644
--- a/src/video_core/renderer_opengl/util_shaders.h
+++ b/src/video_core/renderer_opengl/util_shaders.h
@@ -19,6 +19,22 @@ class ProgramManager;
struct ImageBufferMap;
+class Bgr565CopyPass {
+public:
+ Bgr565CopyPass() = default;
+ ~Bgr565CopyPass() = default;
+
+ void Execute(const Image& dst_image, const Image& src_image,
+ const VideoCommon::ImageCopy& copy);
+
+private:
+ [[nodiscard]] bool CopyBufferCreationNeeded(const VideoCommon::ImageCopy& copy);
+ void CreateNewCopyBuffer(const VideoCommon::ImageCopy& copy, GLenum target, GLuint format);
+
+ OGLBuffer bgr16_pbo;
+ size_t bgr16_pbo_size{};
+};
+
class UtilShaders {
public:
explicit UtilShaders(ProgramManager& program_manager);
@@ -36,6 +52,9 @@ public:
void CopyBC4(Image& dst_image, Image& src_image,
std::span<const VideoCommon::ImageCopy> copies);
+ void CopyBGR(Image& dst_image, Image& src_image,
+ std::span<const VideoCommon::ImageCopy> copies);
+
private:
ProgramManager& program_manager;
@@ -44,7 +63,10 @@ private:
OGLProgram block_linear_unswizzle_2d_program;
OGLProgram block_linear_unswizzle_3d_program;
OGLProgram pitch_unswizzle_program;
+ OGLProgram copy_bgra_program;
OGLProgram copy_bc4_program;
+
+ Bgr565CopyPass bgr_copy_pass;
};
GLenum StoreFormat(u32 bytes_per_block);