From 0906de9a14b735d1d409290ca050eb7d2c2d3d84 Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Tue, 10 Oct 2017 17:32:14 -0400
Subject: hle: Remove a large amount of 3ds-specific service code.

---
 src/core/hw/y2r.cpp | 382 ----------------------------------------------------
 src/core/hw/y2r.h   |  17 ---
 2 files changed, 399 deletions(-)
 delete mode 100644 src/core/hw/y2r.cpp
 delete mode 100644 src/core/hw/y2r.h

(limited to 'src/core/hw')
diff --git a/src/core/hw/y2r.cpp b/src/core/hw/y2r.cpp
deleted file mode 100644
index e697f84b3..000000000
--- a/src/core/hw/y2r.cpp
+++ /dev/null
@@ -1,382 +0,0 @@
-// Copyright 2015 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <array>
-#include <cstddef>
-#include <memory>
-#include "common/assert.h"
-#include "common/color.h"
-#include "common/common_types.h"
-#include "common/math_util.h"
-#include "common/vector_math.h"
-#include "core/hle/service/y2r_u.h"
-#include "core/hw/y2r.h"
-#include "core/memory.h"
-
-namespace HW {
-namespace Y2R {
-
-using namespace Service::Y2R;
-
-static const size_t MAX_TILES = 1024 / 8;
-static const size_t TILE_SIZE = 8 * 8;
-using ImageTile = std::array<u32, TILE_SIZE>;
-
-/// Converts a image strip from the source YUV format into individual 8x8 RGB32 tiles.
-static void ConvertYUVToRGB(InputFormat input_format, const u8* input_Y, const u8* input_U,
-                            const u8* input_V, ImageTile output[], unsigned int width,
-                            unsigned int height, const CoefficientSet& coefficients) {
-
-    for (unsigned int y = 0; y < height; ++y) {
-        for (unsigned int x = 0; x < width; ++x) {
-            s32 Y = 0;
-            s32 U = 0;
-            s32 V = 0;
-            switch (input_format) {
-            case InputFormat::YUV422_Indiv8:
-            case InputFormat::YUV422_Indiv16:
-                Y = input_Y[y * width + x];
-                U = input_U[(y * width + x) / 2];
-                V = input_V[(y * width + x) / 2];
-                break;
-            case InputFormat::YUV420_Indiv8:
-            case InputFormat::YUV420_Indiv16:
-                Y = input_Y[y * width + x];
-                U = input_U[((y / 2) * width + x) / 2];
-                V = input_V[((y / 2) * width + x) / 2];
-                break;
-            case InputFormat::YUYV422_Interleaved:
-                Y = input_Y[(y * width + x) * 2];
-                U = input_Y[(y * width + (x / 2) * 2) * 2 + 1];
-                V = input_Y[(y * width + (x / 2) * 2) * 2 + 3];
-                break;
-            }
-
-            // This conversion process is bit-exact with hardware, as far as could be tested.
-            auto& c = coefficients;
-            s32 cY = c[0] * Y;
-
-            s32 r = cY + c[1] * V;
-            s32 g = cY - c[2] * V - c[3] * U;
-            s32 b = cY + c[4] * U;
-
-            const s32 rounding_offset = 0x18;
-            r = (r >> 3) + c[5] + rounding_offset;
-            g = (g >> 3) + c[6] + rounding_offset;
-            b = (b >> 3) + c[7] + rounding_offset;
-
-            unsigned int tile = x / 8;
-            unsigned int tile_x = x % 8;
-            u32* out = &output[tile][y * 8 + tile_x];
-
-            using MathUtil::Clamp;
-            *out = ((u32)Clamp(r >> 5, 0, 0xFF) << 24) | ((u32)Clamp(g >> 5, 0, 0xFF) << 16) |
-                   ((u32)Clamp(b >> 5, 0, 0xFF) << 8);
-        }
-    }
-}
-
-/// Simulates an incoming CDMA transfer. The N parameter is used to automatically convert 16-bit
-/// formats to 8-bit.
-template <size_t N>
-static void ReceiveData(u8* output, ConversionBuffer& buf, size_t amount_of_data) {
-    const u8* input = Memory::GetPointer(buf.address);
-
-    size_t output_unit = buf.transfer_unit / N;
-    ASSERT(amount_of_data % output_unit == 0);
-
-    while (amount_of_data > 0) {
-        for (size_t i = 0; i < output_unit; ++i) {
-            output[i] = input[i * N];
-        }
-
-        output += output_unit;
-        input += buf.transfer_unit + buf.gap;
-
-        buf.address += buf.transfer_unit + buf.gap;
-        buf.image_size -= buf.transfer_unit;
-        amount_of_data -= output_unit;
-    }
-}
-
-/// Convert intermediate RGB32 format to the final output format while simulating an outgoing CDMA
-/// transfer.
-static void SendData(const u32* input, ConversionBuffer& buf, int amount_of_data,
-                     OutputFormat output_format, u8 alpha) {
-
-    u8* output = Memory::GetPointer(buf.address);
-
-    while (amount_of_data > 0) {
-        u8* unit_end = output + buf.transfer_unit;
-        while (output < unit_end) {
-            u32 color = *input++;
-            Math::Vec4<u8> col_vec{(u8)(color >> 24), (u8)(color >> 16), (u8)(color >> 8), alpha};
-
-            switch (output_format) {
-            case OutputFormat::RGBA8:
-                Color::EncodeRGBA8(col_vec, output);
-                output += 4;
-                break;
-            case OutputFormat::RGB8:
-                Color::EncodeRGB8(col_vec, output);
-                output += 3;
-                break;
-            case OutputFormat::RGB5A1:
-                Color::EncodeRGB5A1(col_vec, output);
-                output += 2;
-                break;
-            case OutputFormat::RGB565:
-                Color::EncodeRGB565(col_vec, output);
-                output += 2;
-                break;
-            }
-
-            amount_of_data -= 1;
-        }
-
-        output += buf.gap;
-        buf.address += buf.transfer_unit + buf.gap;
-        buf.image_size -= buf.transfer_unit;
-    }
-}
-
-static const u8 linear_lut[TILE_SIZE] = {
-    // clang-format off
-     0,  1,  2,  3,  4,  5,  6,  7,
-     8,  9, 10, 11, 12, 13, 14, 15,
-    16, 17, 18, 19, 20, 21, 22, 23,
-    24, 25, 26, 27, 28, 29, 30, 31,
-    32, 33, 34, 35, 36, 37, 38, 39,
-    40, 41, 42, 43, 44, 45, 46, 47,
-    48, 49, 50, 51, 52, 53, 54, 55,
-    56, 57, 58, 59, 60, 61, 62, 63,
-    // clang-format on
-};
-
-static const u8 morton_lut[TILE_SIZE] = {
-    // clang-format off
-     0,  1,  4,  5, 16, 17, 20, 21,
-     2,  3,  6,  7, 18, 19, 22, 23,
-     8,  9, 12, 13, 24, 25, 28, 29,
-    10, 11, 14, 15, 26, 27, 30, 31,
-    32, 33, 36, 37, 48, 49, 52, 53,
-    34, 35, 38, 39, 50, 51, 54, 55,
-    40, 41, 44, 45, 56, 57, 60, 61,
-    42, 43, 46, 47, 58, 59, 62, 63,
-    // clang-format on
-};
-
-static void RotateTile0(const ImageTile& input, ImageTile& output, int height,
-                        const u8 out_map[64]) {
-    for (int i = 0; i < height * 8; ++i) {
-        output[out_map[i]] = input[i];
-    }
-}
-
-static void RotateTile90(const ImageTile& input, ImageTile& output, int height,
-                         const u8 out_map[64]) {
-    int out_i = 0;
-    for (int x = 0; x < 8; ++x) {
-        for (int y = height - 1; y >= 0; --y) {
-            output[out_map[out_i++]] = input[y * 8 + x];
-        }
-    }
-}
-
-static void RotateTile180(const ImageTile& input, ImageTile& output, int height,
-                          const u8 out_map[64]) {
-    int out_i = 0;
-    for (int i = height * 8 - 1; i >= 0; --i) {
-        output[out_map[out_i++]] = input[i];
-    }
-}
-
-static void RotateTile270(const ImageTile& input, ImageTile& output, int height,
-                          const u8 out_map[64]) {
-    int out_i = 0;
-    for (int x = 8 - 1; x >= 0; --x) {
-        for (int y = 0; y < height; ++y) {
-            output[out_map[out_i++]] = input[y * 8 + x];
-        }
-    }
-}
-
-static void WriteTileToOutput(u32* output, const ImageTile& tile, int height, int line_stride) {
-    for (int y = 0; y < height; ++y) {
-        for (int x = 0; x < 8; ++x) {
-            output[y * line_stride + x] = tile[y * 8 + x];
-        }
-    }
-}
-
-/**
- * Performs a Y2R colorspace conversion.
- *
- * The Y2R hardware implements hardware-accelerated YUV to RGB colorspace conversions. It is most
- * commonly used for video playback or to display camera input to the screen.
- *
- * The conversion process is quite configurable, and can be divided in distinct steps. From
- * observation, it appears that the hardware buffers a single 8-pixel tall strip of image data
- * internally and converts it in one go before writing to the output and loading the next strip.
- *
- * The steps taken to convert one strip of image data are:
- *
- * - The hardware receives data via CDMA (http://3dbrew.org/wiki/Corelink_DMA_Engines), which is
- *   presumably stored in one or more internal buffers. This process can be done in several separate
- *   transfers, as long as they don't exceed the size of the internal image buffer. This allows
- *   flexibility in input strides.
- * - The input data is decoded into a YUV tuple. Several formats are suported, see the `InputFormat`
- *   enum.
- * - The YUV tuple is converted, using fixed point calculations, to RGB. This step can be configured
- *   using a set of coefficients to support different colorspace standards. See `CoefficientSet`.
- * - The strip can be optionally rotated 90, 180 or 270 degrees. Since each strip is processed
- *   independently, this notably rotates each *strip*, not the entire image. This means that for 90
- *   or 270 degree rotations, the output will be in terms of several 8 x height images, and for any
- *   non-zero rotation the strips will have to be re-arranged so that the parts of the image will
- *   not be shuffled together. This limitation makes this a feature of somewhat dubious utility. 90
- *   or 270 degree rotations in images with non-even height don't seem to work properly.
- * - The data is converted to the output RGB format. See the `OutputFormat` enum.
- * - The data can be output either linearly line-by-line or in the swizzled 8x8 tile format used by
- *   the PICA. This is decided by the `BlockAlignment` enum. If 8x8 alignment is used, then the
- *   image must have a height divisible by 8. The image width must always be divisible by 8.
- * - The final data is then CDMAed out to main memory and the next image strip is processed. This
- *   offers the same flexibility as the input stage.
- *
- * In this implementation, to avoid the combinatorial explosion of parameter combinations, common
- * intermediate formats are used and where possible tables or parameters are used instead of
- * diverging code paths to keep the amount of branches in check. Some steps are also merged to
- * increase efficiency.
- *
- * Output for all valid settings combinations matches hardware, however output in some edge-cases
- * differs:
- *
- * - `Block8x8` alignment with non-mod8 height produces different garbage patterns on the last
- *   strip, especially when combined with rotation.
- * - Hardware, when using `Linear` alignment with a non-even height and 90 or 270 degree rotation
- *   produces misaligned output on the last strip. This implmentation produces output with the
- *   correct "expected" alignment.
- *
- * Hardware behaves strangely (doesn't fire the completion interrupt, for example) in these cases,
- * so they are believed to be invalid configurations anyway.
- */
-void PerformConversion(ConversionConfiguration& cvt) {
-    ASSERT(cvt.input_line_width % 8 == 0);
-    ASSERT(cvt.block_alignment != BlockAlignment::Block8x8 || cvt.input_lines % 8 == 0);
-    // Tiles per row
-    size_t num_tiles = cvt.input_line_width / 8;
-    ASSERT(num_tiles <= MAX_TILES);
-
-    // Buffer used as a CDMA source/target.
-    std::unique_ptr<u8[]> data_buffer(new u8[cvt.input_line_width * 8 * 4]);
-    // Intermediate storage for decoded 8x8 image tiles. Always stored as RGB32.
-    std::unique_ptr<ImageTile[]> tiles(new ImageTile[num_tiles]);
-    ImageTile tmp_tile;
-
-    // LUT used to remap writes to a tile. Used to allow linear or swizzled output without
-    // requiring two different code paths.
-    const u8* tile_remap = nullptr;
-    switch (cvt.block_alignment) {
-    case BlockAlignment::Linear:
-        tile_remap = linear_lut;
-        break;
-    case BlockAlignment::Block8x8:
-        tile_remap = morton_lut;
-        break;
-    }
-
-    for (unsigned int y = 0; y < cvt.input_lines; y += 8) {
-        unsigned int row_height = std::min(cvt.input_lines - y, 8u);
-
-        // Total size in pixels of incoming data required for this strip.
-        const size_t row_data_size = row_height * cvt.input_line_width;
-
-        u8* input_Y = data_buffer.get();
-        u8* input_U = input_Y + 8 * cvt.input_line_width;
-        u8* input_V = input_U + 8 * cvt.input_line_width / 2;
-
-        switch (cvt.input_format) {
-        case InputFormat::YUV422_Indiv8:
-            ReceiveData<1>(input_Y, cvt.src_Y, row_data_size);
-            ReceiveData<1>(input_U, cvt.src_U, row_data_size / 2);
-            ReceiveData<1>(input_V, cvt.src_V, row_data_size / 2);
-            break;
-        case InputFormat::YUV420_Indiv8:
-            ReceiveData<1>(input_Y, cvt.src_Y, row_data_size);
-            ReceiveData<1>(input_U, cvt.src_U, row_data_size / 4);
-            ReceiveData<1>(input_V, cvt.src_V, row_data_size / 4);
-            break;
-        case InputFormat::YUV422_Indiv16:
-            ReceiveData<2>(input_Y, cvt.src_Y, row_data_size);
-            ReceiveData<2>(input_U, cvt.src_U, row_data_size / 2);
-            ReceiveData<2>(input_V, cvt.src_V, row_data_size / 2);
-            break;
-        case InputFormat::YUV420_Indiv16:
-            ReceiveData<2>(input_Y, cvt.src_Y, row_data_size);
-            ReceiveData<2>(input_U, cvt.src_U, row_data_size / 4);
-            ReceiveData<2>(input_V, cvt.src_V, row_data_size / 4);
-            break;
-        case InputFormat::YUYV422_Interleaved:
-            input_U = nullptr;
-            input_V = nullptr;
-            ReceiveData<1>(input_Y, cvt.src_YUYV, row_data_size * 2);
-            break;
-        }
-
-        // Note(yuriks): If additional optimization is required, input_format can be moved to a
-        // template parameter, so that its dispatch can be moved to outside the inner loop.
-        ConvertYUVToRGB(cvt.input_format, input_Y, input_U, input_V, tiles.get(),
-                        cvt.input_line_width, row_height, cvt.coefficients);
-
-        u32* output_buffer = reinterpret_cast<u32*>(data_buffer.get());
-
-        for (size_t i = 0; i < num_tiles; ++i) {
-            int image_strip_width = 0;
-            int output_stride = 0;
-
-            switch (cvt.rotation) {
-            case Rotation::None:
-                RotateTile0(tiles[i], tmp_tile, row_height, tile_remap);
-                image_strip_width = cvt.input_line_width;
-                output_stride = 8;
-                break;
-            case Rotation::Clockwise_90:
-                RotateTile90(tiles[i], tmp_tile, row_height, tile_remap);
-                image_strip_width = 8;
-                output_stride = 8 * row_height;
-                break;
-            case Rotation::Clockwise_180:
-                // For 180 and 270 degree rotations we also invert the order of tiles in the strip,
-                // since the rotates are done individually on each tile.
-                RotateTile180(tiles[num_tiles - i - 1], tmp_tile, row_height, tile_remap);
-                image_strip_width = cvt.input_line_width;
-                output_stride = 8;
-                break;
-            case Rotation::Clockwise_270:
-                RotateTile270(tiles[num_tiles - i - 1], tmp_tile, row_height, tile_remap);
-                image_strip_width = 8;
-                output_stride = 8 * row_height;
-                break;
-            }
-
-            switch (cvt.block_alignment) {
-            case BlockAlignment::Linear:
-                WriteTileToOutput(output_buffer, tmp_tile, row_height, image_strip_width);
-                output_buffer += output_stride;
-                break;
-            case BlockAlignment::Block8x8:
-                WriteTileToOutput(output_buffer, tmp_tile, 8, 8);
-                output_buffer += TILE_SIZE;
-                break;
-            }
-        }
-
-        // Note(yuriks): If additional optimization is required, output_format can be moved to a
-        // template parameter, so that its dispatch can be moved to outside the inner loop.
-        SendData(reinterpret_cast<u32*>(data_buffer.get()), cvt.dst, (int)row_data_size,
-                 cvt.output_format, (u8)cvt.alpha);
-    }
-}
-}
-}
diff --git a/src/core/hw/y2r.h b/src/core/hw/y2r.h
deleted file mode 100644
index 25fcd781c..000000000
--- a/src/core/hw/y2r.h
+++ /dev/null
@@ -1,17 +0,0 @@
-// Copyright 2015 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-namespace Service {
-namespace Y2R {
-struct ConversionConfiguration;
-}
-}
-
-namespace HW {
-namespace Y2R {
-void PerformConversion(Service::Y2R::ConversionConfiguration& cvt);
-}
-}
-- 
cgit v1.2.3