// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later #include #include #include #include "video_core/engines/sw_blitter/blitter.h" #include "video_core/engines/sw_blitter/converter.h" #include "video_core/memory_manager.h" #include "video_core/surface.h" #include "video_core/textures/decoders.h" namespace Tegra { class MemoryManager; } using VideoCore::Surface::BytesPerBlock; using VideoCore::Surface::PixelFormatFromRenderTargetFormat; namespace Tegra::Engines::Blitter { using namespace Texture; namespace { constexpr size_t ir_components = 4; void NearestNeighbor(std::span input, std::span output, u32 src_width, u32 src_height, u32 dst_width, u32 dst_height, size_t bpp) { const size_t dx_du = std::llround((static_cast(src_width) / dst_width) * (1ULL << 32)); const size_t dy_dv = std::llround((static_cast(src_height) / dst_height) * (1ULL << 32)); size_t src_y = 0; for (u32 y = 0; y < dst_height; y++) { size_t src_x = 0; for (u32 x = 0; x < dst_width; x++) { const size_t read_from = ((src_y * src_width + src_x) >> 32) * bpp; const size_t write_to = (y * dst_width + x) * bpp; std::memcpy(&output[write_to], &input[read_from], bpp); src_x += dx_du; } src_y += dy_dv; } } void NearestNeighborFast(std::span input, std::span output, u32 src_width, u32 src_height, u32 dst_width, u32 dst_height) { const size_t dx_du = std::llround((static_cast(src_width) / dst_width) * (1ULL << 32)); const size_t dy_dv = std::llround((static_cast(src_height) / dst_height) * (1ULL << 32)); size_t src_y = 0; for (u32 y = 0; y < dst_height; y++) { size_t src_x = 0; for (u32 x = 0; x < dst_width; x++) { const size_t read_from = ((src_y * src_width + src_x) >> 32) * ir_components; const size_t write_to = (y * dst_width + x) * ir_components; std::memcpy(&output[write_to], &input[read_from], sizeof(f32) * ir_components); src_x += dx_du; } src_y += dy_dv; } } void Bilinear(std::span input, std::span output, size_t src_width, size_t src_height, size_t dst_width, size_t dst_height) { const auto bilinear_sample = [](std::span x0_y0, std::span x1_y0, std::span x0_y1, std::span x1_y1, f32 weight_x, f32 weight_y) { std::array result{}; for (size_t i = 0; i < ir_components; i++) { const f32 a = std::lerp(x0_y0[i], x1_y0[i], weight_x); const f32 b = std::lerp(x0_y1[i], x1_y1[i], weight_x); result[i] = std::lerp(a, b, weight_y); } return result; }; const f32 dx_du = dst_width > 1 ? static_cast(src_width - 1) / static_cast(dst_width - 1) : 0.f; const f32 dy_dv = dst_height > 1 ? static_cast(src_height - 1) / static_cast(dst_height - 1) : 0.f; for (u32 y = 0; y < dst_height; y++) { for (u32 x = 0; x < dst_width; x++) { const f32 x_low = std::floor(static_cast(x) * dx_du); const f32 y_low = std::floor(static_cast(y) * dy_dv); const f32 x_high = std::ceil(static_cast(x) * dx_du); const f32 y_high = std::ceil(static_cast(y) * dy_dv); const f32 weight_x = (static_cast(x) * dx_du) - x_low; const f32 weight_y = (static_cast(y) * dy_dv) - y_low; const auto read_src = [&](f32 in_x, f32 in_y) { const size_t read_from = ((static_cast(in_x) * src_width + static_cast(in_y)) >> 32) * ir_components; return std::span(&input[read_from], ir_components); }; auto x0_y0 = read_src(x_low, y_low); auto x1_y0 = read_src(x_high, y_low); auto x0_y1 = read_src(x_low, y_high); auto x1_y1 = read_src(x_high, y_high); const auto result = bilinear_sample(x0_y0, x1_y0, x0_y1, x1_y1, weight_x, weight_y); const size_t write_to = (y * dst_width + x) * ir_components; std::memcpy(&output[write_to], &result, sizeof(f32) * ir_components); } } } } // namespace struct SoftwareBlitEngine::BlitEngineImpl { std::vector tmp_buffer; std::vector src_buffer; std::vector dst_buffer; std::vector intermediate_src; std::vector intermediate_dst; ConverterFactory converter_factory; }; SoftwareBlitEngine::SoftwareBlitEngine(MemoryManager& memory_manager_) : memory_manager{memory_manager_} { impl = std::make_unique(); } SoftwareBlitEngine::~SoftwareBlitEngine() = default; bool SoftwareBlitEngine::Blit(Fermi2D::Surface& src, Fermi2D::Surface& dst, Fermi2D::Config& config) { const auto get_surface_size = [](Fermi2D::Surface& surface, u32 bytes_per_pixel) { if (surface.linear == Fermi2D::MemoryLayout::BlockLinear) { return CalculateSize(true, bytes_per_pixel, surface.width, surface.height, surface.depth, surface.block_height, surface.block_depth); } return static_cast(surface.pitch * surface.height); }; const auto process_pitch_linear = [](bool unpack, std::span input, std::span output, u32 extent_x, u32 extent_y, u32 pitch, u32 x0, u32 y0, size_t bpp) { const size_t base_offset = x0 * bpp; const size_t copy_size = extent_x * bpp; for (u32 y = y0; y < extent_y; y++) { const size_t first_offset = y * pitch + base_offset; const size_t second_offset = y * extent_x * bpp; u8* write_to = unpack ? &output[first_offset] : &output[second_offset]; const u8* read_from = unpack ? &input[second_offset] : &input[first_offset]; std::memcpy(write_to, read_from, copy_size); } }; const u32 src_extent_x = config.src_x1 - config.src_x0; const u32 src_extent_y = config.src_y1 - config.src_y0; const u32 dst_extent_x = config.dst_x1 - config.dst_x0; const u32 dst_extent_y = config.dst_y1 - config.dst_y0; const auto src_bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format)); const auto dst_bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(dst.format)); const size_t src_size = get_surface_size(src, src_bytes_per_pixel); impl->tmp_buffer.resize(src_size); memory_manager.ReadBlock(src.Address(), impl->tmp_buffer.data(), src_size); const size_t src_copy_size = src_extent_x * src_extent_y * src_bytes_per_pixel; const size_t dst_copy_size = dst_extent_x * dst_extent_y * dst_bytes_per_pixel; impl->src_buffer.resize(src_copy_size); const bool no_passthrough = src.format != dst.format || src_extent_x != dst_extent_x || src_extent_y != dst_extent_y; const auto convertion_phase_same_format = [&]() { NearestNeighbor(impl->src_buffer, impl->dst_buffer, src_extent_x, src_extent_y, dst_extent_x, dst_extent_y, dst_bytes_per_pixel); }; const auto convertion_phase_ir = [&]() { auto* input_converter = impl->converter_factory.GetFormatConverter(src.format); impl->intermediate_src.resize((src_copy_size / src_bytes_per_pixel) * ir_components); impl->intermediate_dst.resize((dst_copy_size / dst_bytes_per_pixel) * ir_components); input_converter->ConvertTo(impl->src_buffer, impl->intermediate_src); if (config.filter != Fermi2D::Filter::Bilinear) { NearestNeighborFast(impl->intermediate_src, impl->intermediate_dst, src_extent_x, src_extent_y, dst_extent_x, dst_extent_y); } else { Bilinear(impl->intermediate_src, impl->intermediate_dst, src_extent_x, src_extent_y, dst_extent_x, dst_extent_y); } auto* output_converter = impl->converter_factory.GetFormatConverter(dst.format); output_converter->ConvertFrom(impl->intermediate_dst, impl->dst_buffer); }; // Do actuall Blit impl->dst_buffer.resize(dst_copy_size); if (src.linear == Fermi2D::MemoryLayout::BlockLinear) { UnswizzleSubrect(impl->src_buffer, impl->tmp_buffer, src_bytes_per_pixel, src.width, src.height, src.depth, config.src_x0, config.src_y0, src_extent_x, src_extent_y, src.block_height, src.block_depth, src_extent_x * src_bytes_per_pixel); } else { process_pitch_linear(false, impl->tmp_buffer, impl->src_buffer, src_extent_x, src_extent_y, src.pitch, config.src_x0, config.src_y0, src_bytes_per_pixel); } // Conversion Phase if (no_passthrough) { if (src.format != dst.format || config.filter == Fermi2D::Filter::Bilinear) { convertion_phase_ir(); } else { convertion_phase_same_format(); } } else { impl->dst_buffer.swap(impl->src_buffer); } const size_t dst_size = get_surface_size(dst, dst_bytes_per_pixel); impl->tmp_buffer.resize(dst_size); memory_manager.ReadBlock(dst.Address(), impl->tmp_buffer.data(), dst_size); if (dst.linear == Fermi2D::MemoryLayout::BlockLinear) { SwizzleSubrect(impl->tmp_buffer, impl->dst_buffer, dst_bytes_per_pixel, dst.width, dst.height, dst.depth, config.dst_x0, config.dst_y0, dst_extent_x, dst_extent_y, dst.block_height, dst.block_depth, dst_extent_x * dst_bytes_per_pixel); } else { process_pitch_linear(true, impl->dst_buffer, impl->tmp_buffer, dst_extent_x, dst_extent_y, dst.pitch, config.dst_x0, config.dst_y0, static_cast(dst_bytes_per_pixel)); } memory_manager.WriteBlock(dst.Address(), impl->tmp_buffer.data(), dst_size); return true; } } // namespace Tegra::Engines::Blitter