summaryrefslogtreecommitdiffstats
path: root/src/video_core/renderer_opengl/util_shaders.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/renderer_opengl/util_shaders.cpp')
-rw-r--r--src/video_core/renderer_opengl/util_shaders.cpp119
1 files changed, 98 insertions, 21 deletions
diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp
index eb849cbf2..2fe4799bc 100644
--- a/src/video_core/renderer_opengl/util_shaders.cpp
+++ b/src/video_core/renderer_opengl/util_shaders.cpp
@@ -14,6 +14,7 @@
#include "video_core/host_shaders/block_linear_unswizzle_2d_comp.h"
#include "video_core/host_shaders/block_linear_unswizzle_3d_comp.h"
#include "video_core/host_shaders/opengl_copy_bc4_comp.h"
+#include "video_core/host_shaders/opengl_copy_bgra_comp.h"
#include "video_core/host_shaders/pitch_unswizzle_comp.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
@@ -48,6 +49,11 @@ OGLProgram MakeProgram(std::string_view source) {
return program;
}
+size_t NumPixelsInCopy(const VideoCommon::ImageCopy& copy) {
+ return static_cast<size_t>(copy.extent.width * copy.extent.height *
+ copy.src_subresource.num_layers);
+}
+
} // Anonymous namespace
UtilShaders::UtilShaders(ProgramManager& program_manager_)
@@ -55,6 +61,7 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_)
block_linear_unswizzle_2d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_2D_COMP)),
block_linear_unswizzle_3d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_3D_COMP)),
pitch_unswizzle_program(MakeProgram(PITCH_UNSWIZZLE_COMP)),
+ copy_bgra_program(MakeProgram(OPENGL_COPY_BGRA_COMP)),
copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)) {
const auto swizzle_table = Tegra::Texture::MakeSwizzleTable();
swizzle_table_buffer.Create();
@@ -63,7 +70,7 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_)
UtilShaders::~UtilShaders() = default;
-void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, size_t buffer_offset,
+void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map,
std::span<const SwizzleParameters> swizzles) {
static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};
static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0;
@@ -71,13 +78,13 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, s
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle);
- glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
+ glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format));
for (const SwizzleParameters& swizzle : swizzles) {
const Extent3D num_tiles = swizzle.num_tiles;
- const size_t input_offset = swizzle.buffer_offset + buffer_offset;
+ const size_t input_offset = swizzle.buffer_offset + map.offset;
const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
@@ -91,16 +98,16 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, s
glUniform1ui(5, params.x_shift);
glUniform1ui(6, params.block_height);
glUniform1ui(7, params.block_height_mask);
- glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(),
- input_offset, image.guest_size_bytes - swizzle.buffer_offset);
- glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0,
+ glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset,
+ image.guest_size_bytes - swizzle.buffer_offset);
+ glBindImageTexture(BINDING_OUTPUT_IMAGE, image.StorageHandle(), swizzle.level, GL_TRUE, 0,
GL_WRITE_ONLY, store_format);
glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers);
}
program_manager.RestoreGuestCompute();
}
-void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, size_t buffer_offset,
+void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map,
std::span<const SwizzleParameters> swizzles) {
static constexpr Extent3D WORKGROUP_SIZE{16, 8, 8};
@@ -108,14 +115,14 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, s
static constexpr GLuint BINDING_INPUT_BUFFER = 1;
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
- glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
+ glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format));
for (const SwizzleParameters& swizzle : swizzles) {
const Extent3D num_tiles = swizzle.num_tiles;
- const size_t input_offset = swizzle.buffer_offset + buffer_offset;
+ const size_t input_offset = swizzle.buffer_offset + map.offset;
const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
@@ -132,16 +139,16 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, s
glUniform1ui(7, params.block_height_mask);
glUniform1ui(8, params.block_depth);
glUniform1ui(9, params.block_depth_mask);
- glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(),
- input_offset, image.guest_size_bytes - swizzle.buffer_offset);
- glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0,
+ glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset,
+ image.guest_size_bytes - swizzle.buffer_offset);
+ glBindImageTexture(BINDING_OUTPUT_IMAGE, image.StorageHandle(), swizzle.level, GL_TRUE, 0,
GL_WRITE_ONLY, store_format);
glDispatchCompute(num_dispatches_x, num_dispatches_y, num_dispatches_z);
}
program_manager.RestoreGuestCompute();
}
-void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset,
+void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map,
std::span<const SwizzleParameters> swizzles) {
static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};
static constexpr GLuint BINDING_INPUT_BUFFER = 0;
@@ -159,21 +166,22 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t bu
"Non-power of two images are not implemented");
program_manager.BindHostCompute(pitch_unswizzle_program.handle);
- glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
+ glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
glUniform2ui(LOC_ORIGIN, 0, 0);
glUniform2i(LOC_DESTINATION, 0, 0);
glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block);
glUniform1ui(LOC_PITCH, pitch);
- glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), 0, GL_FALSE, 0, GL_WRITE_ONLY, format);
+ glBindImageTexture(BINDING_OUTPUT_IMAGE, image.StorageHandle(), 0, GL_FALSE, 0, GL_WRITE_ONLY,
+ format);
for (const SwizzleParameters& swizzle : swizzles) {
const Extent3D num_tiles = swizzle.num_tiles;
- const size_t input_offset = swizzle.buffer_offset + buffer_offset;
+ const size_t input_offset = swizzle.buffer_offset + map.offset;
const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
- glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(),
- input_offset, image.guest_size_bytes - swizzle.buffer_offset);
+ glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset,
+ image.guest_size_bytes - swizzle.buffer_offset);
glDispatchCompute(num_dispatches_x, num_dispatches_y, 1);
}
program_manager.RestoreGuestCompute();
@@ -195,15 +203,52 @@ void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const Im
glUniform3ui(LOC_SRC_OFFSET, copy.src_offset.x, copy.src_offset.y, copy.src_offset.z);
glUniform3ui(LOC_DST_OFFSET, copy.dst_offset.x, copy.dst_offset.y, copy.dst_offset.z);
- glBindImageTexture(BINDING_INPUT_IMAGE, src_image.Handle(), copy.src_subresource.base_level,
- GL_FALSE, 0, GL_READ_ONLY, GL_RG32UI);
- glBindImageTexture(BINDING_OUTPUT_IMAGE, dst_image.Handle(),
+ glBindImageTexture(BINDING_INPUT_IMAGE, src_image.StorageHandle(),
+ copy.src_subresource.base_level, GL_FALSE, 0, GL_READ_ONLY, GL_RG32UI);
+ glBindImageTexture(BINDING_OUTPUT_IMAGE, dst_image.StorageHandle(),
copy.dst_subresource.base_level, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8UI);
glDispatchCompute(copy.extent.width, copy.extent.height, copy.extent.depth);
}
program_manager.RestoreGuestCompute();
}
+void UtilShaders::CopyBGR(Image& dst_image, Image& src_image,
+ std::span<const VideoCommon::ImageCopy> copies) {
+ static constexpr GLuint BINDING_INPUT_IMAGE = 0;
+ static constexpr GLuint BINDING_OUTPUT_IMAGE = 1;
+ static constexpr VideoCommon::Offset3D zero_offset{0, 0, 0};
+ const u32 bytes_per_block = BytesPerBlock(dst_image.info.format);
+ switch (bytes_per_block) {
+ case 2:
+ // BGR565 copy
+ for (const ImageCopy& copy : copies) {
+ ASSERT(copy.src_offset == zero_offset);
+ ASSERT(copy.dst_offset == zero_offset);
+ bgr_copy_pass.Execute(dst_image, src_image, copy);
+ }
+ break;
+ case 4: {
+ // BGRA8 copy
+ program_manager.BindHostCompute(copy_bgra_program.handle);
+ constexpr GLenum FORMAT = GL_RGBA8;
+ for (const ImageCopy& copy : copies) {
+ ASSERT(copy.src_offset == zero_offset);
+ ASSERT(copy.dst_offset == zero_offset);
+ glBindImageTexture(BINDING_INPUT_IMAGE, src_image.StorageHandle(),
+ copy.src_subresource.base_level, GL_FALSE, 0, GL_READ_ONLY, FORMAT);
+ glBindImageTexture(BINDING_OUTPUT_IMAGE, dst_image.StorageHandle(),
+ copy.dst_subresource.base_level, GL_FALSE, 0, GL_WRITE_ONLY, FORMAT);
+ glDispatchCompute(copy.extent.width, copy.extent.height, copy.extent.depth);
+ }
+ program_manager.RestoreGuestCompute();
+ break;
+ }
+ default:
+ UNREACHABLE();
+ break;
+ }
+}
+
GLenum StoreFormat(u32 bytes_per_block) {
switch (bytes_per_block) {
case 1:
@@ -221,4 +266,36 @@ GLenum StoreFormat(u32 bytes_per_block) {
return GL_R8UI;
}
+void Bgr565CopyPass::Execute(const Image& dst_image, const Image& src_image,
+ const ImageCopy& copy) {
+ if (CopyBufferCreationNeeded(copy)) {
+ CreateNewCopyBuffer(copy, GL_TEXTURE_2D_ARRAY, GL_RGB565);
+ }
+ // Copy from source to PBO
+ glPixelStorei(GL_PACK_ALIGNMENT, 1);
+ glPixelStorei(GL_PACK_ROW_LENGTH, copy.extent.width);
+ glBindBuffer(GL_PIXEL_PACK_BUFFER, bgr16_pbo.handle);
+ glGetTextureSubImage(src_image.Handle(), 0, 0, 0, 0, copy.extent.width, copy.extent.height,
+ copy.src_subresource.num_layers, GL_RGB, GL_UNSIGNED_SHORT_5_6_5,
+ static_cast<GLsizei>(bgr16_pbo_size), nullptr);
+
+ // Copy from PBO to destination in reverse order
+ glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
+ glPixelStorei(GL_UNPACK_ROW_LENGTH, copy.extent.width);
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, bgr16_pbo.handle);
+ glTextureSubImage3D(dst_image.Handle(), 0, 0, 0, 0, copy.extent.width, copy.extent.height,
+ copy.dst_subresource.num_layers, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV,
+ nullptr);
+}
+
+bool Bgr565CopyPass::CopyBufferCreationNeeded(const ImageCopy& copy) {
+ return bgr16_pbo_size < NumPixelsInCopy(copy) * sizeof(u16);
+}
+
+void Bgr565CopyPass::CreateNewCopyBuffer(const ImageCopy& copy, GLenum target, GLuint format) {
+ bgr16_pbo.Create();
+ bgr16_pbo_size = NumPixelsInCopy(copy) * sizeof(u16);
+ glNamedBufferData(bgr16_pbo.handle, bgr16_pbo_size, nullptr, GL_STREAM_COPY);
+}
+
} // namespace OpenGL