diff options
-rw-r--r-- | src/audio_core/CMakeLists.txt | 8 | ||||
-rw-r--r-- | src/audio_core/algorithm/filter.cpp | 79 | ||||
-rw-r--r-- | src/audio_core/algorithm/filter.h | 62 | ||||
-rw-r--r-- | src/audio_core/algorithm/interpolate.cpp | 71 | ||||
-rw-r--r-- | src/audio_core/algorithm/interpolate.h | 43 | ||||
-rw-r--r-- | src/audio_core/audio_renderer.cpp | 5 | ||||
-rw-r--r-- | src/audio_core/audio_renderer.h | 2 | ||||
-rw-r--r-- | src/core/arm/dynarmic/arm_dynarmic.cpp | 4 | ||||
-rw-r--r-- | src/core/file_sys/card_image.cpp | 8 | ||||
-rw-r--r-- | src/core/file_sys/card_image.h | 6 | ||||
-rw-r--r-- | src/core/file_sys/vfs.cpp | 20 | ||||
-rw-r--r-- | src/core/hle/kernel/kernel.cpp | 2 | ||||
-rw-r--r-- | src/core/hle/kernel/object.h | 15 | ||||
-rw-r--r-- | src/video_core/gpu.cpp | 4 | ||||
-rw-r--r-- | src/video_core/gpu.h | 4 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 76 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.h | 110 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/maxwell_to_gl.h | 1 |
18 files changed, 434 insertions, 86 deletions
diff --git a/src/audio_core/CMakeLists.txt b/src/audio_core/CMakeLists.txt index ec71524a3..82e4850f7 100644 --- a/src/audio_core/CMakeLists.txt +++ b/src/audio_core/CMakeLists.txt @@ -1,4 +1,8 @@ add_library(audio_core STATIC + algorithm/filter.cpp + algorithm/filter.h + algorithm/interpolate.cpp + algorithm/interpolate.h audio_out.cpp audio_out.h audio_renderer.cpp @@ -7,12 +11,12 @@ add_library(audio_core STATIC codec.cpp codec.h null_sink.h - stream.cpp - stream.h sink.h sink_details.cpp sink_details.h sink_stream.h + stream.cpp + stream.h $<$<BOOL:${ENABLE_CUBEB}>:cubeb_sink.cpp cubeb_sink.h> ) diff --git a/src/audio_core/algorithm/filter.cpp b/src/audio_core/algorithm/filter.cpp new file mode 100644 index 000000000..403b8503f --- /dev/null +++ b/src/audio_core/algorithm/filter.cpp @@ -0,0 +1,79 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#define _USE_MATH_DEFINES + +#include <algorithm> +#include <array> +#include <cmath> +#include <vector> +#include "audio_core/algorithm/filter.h" +#include "common/common_types.h" + +namespace AudioCore { + +Filter Filter::LowPass(double cutoff, double Q) { + const double w0 = 2.0 * M_PI * cutoff; + const double sin_w0 = std::sin(w0); + const double cos_w0 = std::cos(w0); + const double alpha = sin_w0 / (2 * Q); + + const double a0 = 1 + alpha; + const double a1 = -2.0 * cos_w0; + const double a2 = 1 - alpha; + const double b0 = 0.5 * (1 - cos_w0); + const double b1 = 1.0 * (1 - cos_w0); + const double b2 = 0.5 * (1 - cos_w0); + + return {a0, a1, a2, b0, b1, b2}; +} + +Filter::Filter() : Filter(1.0, 0.0, 0.0, 1.0, 0.0, 0.0) {} + +Filter::Filter(double a0, double a1, double a2, double b0, double b1, double b2) + : a1(a1 / a0), a2(a2 / a0), b0(b0 / a0), b1(b1 / a0), b2(b2 / a0) {} + +void Filter::Process(std::vector<s16>& signal) { + const size_t num_frames = signal.size() / 2; + for (size_t i = 0; i < num_frames; i++) { + std::rotate(in.begin(), in.end() - 1, in.end()); + std::rotate(out.begin(), out.end() - 1, out.end()); + + for (size_t ch = 0; ch < channel_count; ch++) { + in[0][ch] = signal[i * channel_count + ch]; + + out[0][ch] = b0 * in[0][ch] + b1 * in[1][ch] + b2 * in[2][ch] - a1 * out[1][ch] - + a2 * out[2][ch]; + + signal[i * 2 + ch] = std::clamp(out[0][ch], -32768.0, 32767.0); + } + } +} + +/// Calculates the appropriate Q for each biquad in a cascading filter. +/// @param total_count The total number of biquads to be cascaded. +/// @param index 0-index of the biquad to calculate the Q value for. +static double CascadingBiquadQ(size_t total_count, size_t index) { + const double pole = M_PI * (2 * index + 1) / (4.0 * total_count); + return 1.0 / (2.0 * std::cos(pole)); +} + +CascadingFilter CascadingFilter::LowPass(double cutoff, size_t cascade_size) { + std::vector<Filter> cascade(cascade_size); + for (size_t i = 0; i < cascade_size; i++) { + cascade[i] = Filter::LowPass(cutoff, CascadingBiquadQ(cascade_size, i)); + } + return CascadingFilter{std::move(cascade)}; +} + +CascadingFilter::CascadingFilter() = default; +CascadingFilter::CascadingFilter(std::vector<Filter> filters) : filters(std::move(filters)) {} + +void CascadingFilter::Process(std::vector<s16>& signal) { + for (auto& filter : filters) { + filter.Process(signal); + } +} + +} // namespace AudioCore diff --git a/src/audio_core/algorithm/filter.h b/src/audio_core/algorithm/filter.h new file mode 100644 index 000000000..a41beef98 --- /dev/null +++ b/src/audio_core/algorithm/filter.h @@ -0,0 +1,62 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <vector> +#include "common/common_types.h" + +namespace AudioCore { + +/// Digital biquad filter: +/// +/// b0 + b1 z^-1 + b2 z^-2 +/// H(z) = ------------------------ +/// a0 + a1 z^-1 + b2 z^-2 +class Filter { +public: + /// Creates a low-pass filter. + /// @param cutoff Determines the cutoff frequency. A value from 0.0 to 1.0. + /// @param Q Determines the quality factor of this filter. + static Filter LowPass(double cutoff, double Q = 0.7071); + + /// Passthrough filter. + Filter(); + + Filter(double a0, double a1, double a2, double b0, double b1, double b2); + + void Process(std::vector<s16>& signal); + +private: + static constexpr size_t channel_count = 2; + + /// Coefficients are in normalized form (a0 = 1.0). + double a1, a2, b0, b1, b2; + /// Input History + std::array<std::array<double, channel_count>, 3> in; + /// Output History + std::array<std::array<double, channel_count>, 3> out; +}; + +/// Cascade filters to build up higher-order filters from lower-order ones. +class CascadingFilter { +public: + /// Creates a cascading low-pass filter. + /// @param cutoff Determines the cutoff frequency. A value from 0.0 to 1.0. + /// @param cascade_size Number of biquads in cascade. + static CascadingFilter LowPass(double cutoff, size_t cascade_size); + + /// Passthrough. + CascadingFilter(); + + explicit CascadingFilter(std::vector<Filter> filters); + + void Process(std::vector<s16>& signal); + +private: + std::vector<Filter> filters; +}; + +} // namespace AudioCore diff --git a/src/audio_core/algorithm/interpolate.cpp b/src/audio_core/algorithm/interpolate.cpp new file mode 100644 index 000000000..11459821f --- /dev/null +++ b/src/audio_core/algorithm/interpolate.cpp @@ -0,0 +1,71 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#define _USE_MATH_DEFINES + +#include <algorithm> +#include <cmath> +#include <vector> +#include "audio_core/algorithm/interpolate.h" +#include "common/common_types.h" +#include "common/logging/log.h" + +namespace AudioCore { + +/// The Lanczos kernel +static double Lanczos(size_t a, double x) { + if (x == 0.0) + return 1.0; + const double px = M_PI * x; + return a * std::sin(px) * std::sin(px / a) / (px * px); +} + +std::vector<s16> Interpolate(InterpolationState& state, std::vector<s16> input, double ratio) { + if (input.size() < 2) + return {}; + + if (ratio <= 0) { + LOG_CRITICAL(Audio, "Nonsensical interpolation ratio {}", ratio); + ratio = 1.0; + } + + if (ratio != state.current_ratio) { + const double cutoff_frequency = std::min(0.5 / ratio, 0.5 * ratio); + state.nyquist = CascadingFilter::LowPass(std::clamp(cutoff_frequency, 0.0, 0.4), 3); + state.current_ratio = ratio; + } + state.nyquist.Process(input); + + constexpr size_t taps = InterpolationState::lanczos_taps; + const size_t num_frames = input.size() / 2; + + std::vector<s16> output; + output.reserve(static_cast<size_t>(input.size() / ratio + 4)); + + double& pos = state.position; + auto& h = state.history; + for (size_t i = 0; i < num_frames; ++i) { + std::rotate(h.begin(), h.end() - 1, h.end()); + h[0][0] = input[i * 2 + 0]; + h[0][1] = input[i * 2 + 1]; + + while (pos <= 1.0) { + double l = 0.0; + double r = 0.0; + for (size_t j = 0; j < h.size(); j++) { + l += Lanczos(taps, pos + j - taps + 1) * h[j][0]; + r += Lanczos(taps, pos + j - taps + 1) * h[j][1]; + } + output.emplace_back(static_cast<s16>(std::clamp(l, -32768.0, 32767.0))); + output.emplace_back(static_cast<s16>(std::clamp(r, -32768.0, 32767.0))); + + pos += ratio; + } + pos -= 1.0; + } + + return output; +} + +} // namespace AudioCore diff --git a/src/audio_core/algorithm/interpolate.h b/src/audio_core/algorithm/interpolate.h new file mode 100644 index 000000000..c79c2eef4 --- /dev/null +++ b/src/audio_core/algorithm/interpolate.h @@ -0,0 +1,43 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <vector> +#include "audio_core/algorithm/filter.h" +#include "common/common_types.h" + +namespace AudioCore { + +struct InterpolationState { + static constexpr size_t lanczos_taps = 4; + static constexpr size_t history_size = lanczos_taps * 2 - 1; + + double current_ratio = 0.0; + CascadingFilter nyquist; + std::array<std::array<s16, 2>, history_size> history = {}; + double position = 0; +}; + +/// Interpolates input signal to produce output signal. +/// @param input The signal to interpolate. +/// @param ratio Interpolation ratio. +/// ratio > 1.0 results in fewer output samples. +/// ratio < 1.0 results in more output samples. +/// @returns Output signal. +std::vector<s16> Interpolate(InterpolationState& state, std::vector<s16> input, double ratio); + +/// Interpolates input signal to produce output signal. +/// @param input The signal to interpolate. +/// @param input_rate The sample rate of input. +/// @param output_rate The desired sample rate of the output. +/// @returns Output signal. +inline std::vector<s16> Interpolate(InterpolationState& state, std::vector<s16> input, + u32 input_rate, u32 output_rate) { + const double ratio = static_cast<double>(input_rate) / static_cast<double>(output_rate); + return Interpolate(state, std::move(input), ratio); +} + +} // namespace AudioCore diff --git a/src/audio_core/audio_renderer.cpp b/src/audio_core/audio_renderer.cpp index 6ebed3fb0..397b107f5 100644 --- a/src/audio_core/audio_renderer.cpp +++ b/src/audio_core/audio_renderer.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include "audio_core/algorithm/interpolate.h" #include "audio_core/audio_renderer.h" #include "common/assert.h" #include "common/logging/log.h" @@ -199,6 +200,8 @@ void AudioRenderer::VoiceState::RefreshBuffer() { break; } + samples = Interpolate(interp_state, std::move(samples), Info().sample_rate, STREAM_SAMPLE_RATE); + is_refresh_pending = false; } @@ -224,7 +227,7 @@ void AudioRenderer::QueueMixedBuffer(Buffer::Tag tag) { break; } - samples_remaining -= samples.size(); + samples_remaining -= samples.size() / stream->GetNumChannels(); for (const auto& sample : samples) { const s32 buffer_sample{buffer[offset]}; diff --git a/src/audio_core/audio_renderer.h b/src/audio_core/audio_renderer.h index 13c5d0adc..eba67f28e 100644 --- a/src/audio_core/audio_renderer.h +++ b/src/audio_core/audio_renderer.h @@ -8,6 +8,7 @@ #include <memory> #include <vector> +#include "audio_core/algorithm/interpolate.h" #include "audio_core/audio_out.h" #include "audio_core/codec.h" #include "audio_core/stream.h" @@ -194,6 +195,7 @@ private: size_t wave_index{}; size_t offset{}; Codec::ADPCMState adpcm_state{}; + InterpolationState interp_state{}; std::vector<s16> samples; VoiceOutStatus out_status{}; VoiceInfo info{}; diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp index 0996f129c..20e5200a8 100644 --- a/src/core/arm/dynarmic/arm_dynarmic.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic.cpp @@ -243,9 +243,7 @@ void ARM_Dynarmic::LoadContext(const ThreadContext& ctx) { } void ARM_Dynarmic::PrepareReschedule() { - if (jit->IsExecuting()) { - jit->HaltExecution(); - } + jit->HaltExecution(); } void ARM_Dynarmic::ClearInstructionCache() { diff --git a/src/core/file_sys/card_image.cpp b/src/core/file_sys/card_image.cpp index a4823353e..8e05b9d0e 100644 --- a/src/core/file_sys/card_image.cpp +++ b/src/core/file_sys/card_image.cpp @@ -107,19 +107,19 @@ VirtualFile XCI::GetNCAFileByType(NCAContentType type) const { return nullptr; } -std::vector<std::shared_ptr<VfsFile>> XCI::GetFiles() const { +std::vector<VirtualFile> XCI::GetFiles() const { return {}; } -std::vector<std::shared_ptr<VfsDirectory>> XCI::GetSubdirectories() const { - return std::vector<std::shared_ptr<VfsDirectory>>(); +std::vector<VirtualDir> XCI::GetSubdirectories() const { + return {}; } std::string XCI::GetName() const { return file->GetName(); } -std::shared_ptr<VfsDirectory> XCI::GetParentDirectory() const { +VirtualDir XCI::GetParentDirectory() const { return file->GetContainingDirectory(); } diff --git a/src/core/file_sys/card_image.h b/src/core/file_sys/card_image.h index e089d737c..4618d9c00 100644 --- a/src/core/file_sys/card_image.h +++ b/src/core/file_sys/card_image.h @@ -71,13 +71,13 @@ public: std::shared_ptr<NCA> GetNCAByType(NCAContentType type) const; VirtualFile GetNCAFileByType(NCAContentType type) const; - std::vector<std::shared_ptr<VfsFile>> GetFiles() const override; + std::vector<VirtualFile> GetFiles() const override; - std::vector<std::shared_ptr<VfsDirectory>> GetSubdirectories() const override; + std::vector<VirtualDir> GetSubdirectories() const override; std::string GetName() const override; - std::shared_ptr<VfsDirectory> GetParentDirectory() const override; + VirtualDir GetParentDirectory() const override; protected: bool ReplaceFileWithSubdirectory(VirtualFile file, VirtualDir dir) override; diff --git a/src/core/file_sys/vfs.cpp b/src/core/file_sys/vfs.cpp index 24e158962..a5ec50b1a 100644 --- a/src/core/file_sys/vfs.cpp +++ b/src/core/file_sys/vfs.cpp @@ -74,15 +74,15 @@ VirtualFile VfsFilesystem::CopyFile(std::string_view old_path_, std::string_view return new_file; } -VirtualFile VfsFilesystem::MoveFile(std::string_view old_path_, std::string_view new_path_) { - const auto old_path = FileUtil::SanitizePath(old_path_); - const auto new_path = FileUtil::SanitizePath(new_path_); +VirtualFile VfsFilesystem::MoveFile(std::string_view old_path, std::string_view new_path) { + const auto sanitized_old_path = FileUtil::SanitizePath(old_path); + const auto sanitized_new_path = FileUtil::SanitizePath(new_path); // Again, non-default impls are highly encouraged to provide a more optimized version of this. - auto out = CopyFile(old_path_, new_path_); + auto out = CopyFile(sanitized_old_path, sanitized_new_path); if (out == nullptr) return nullptr; - if (DeleteFile(old_path)) + if (DeleteFile(sanitized_old_path)) return out; return nullptr; } @@ -137,15 +137,15 @@ VirtualDir VfsFilesystem::CopyDirectory(std::string_view old_path_, std::string_ return new_dir; } -VirtualDir VfsFilesystem::MoveDirectory(std::string_view old_path_, std::string_view new_path_) { - const auto old_path = FileUtil::SanitizePath(old_path_); - const auto new_path = FileUtil::SanitizePath(new_path_); +VirtualDir VfsFilesystem::MoveDirectory(std::string_view old_path, std::string_view new_path) { + const auto sanitized_old_path = FileUtil::SanitizePath(old_path); + const auto sanitized_new_path = FileUtil::SanitizePath(new_path); // Non-default impls are highly encouraged to provide a more optimized version of this. - auto out = CopyDirectory(old_path_, new_path_); + auto out = CopyDirectory(sanitized_old_path, sanitized_new_path); if (out == nullptr) return nullptr; - if (DeleteDirectory(old_path)) + if (DeleteDirectory(sanitized_old_path)) return out; return nullptr; } diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index 1b0cd0abf..8c19e86d3 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp @@ -11,7 +11,7 @@ namespace Kernel { -unsigned int Object::next_object_id; +std::atomic<u32> Object::next_object_id{0}; /// Initialize the kernel void Init() { diff --git a/src/core/hle/kernel/object.h b/src/core/hle/kernel/object.h index 83df68dfd..526ac9cc3 100644 --- a/src/core/hle/kernel/object.h +++ b/src/core/hle/kernel/object.h @@ -4,6 +4,7 @@ #pragma once +#include <atomic> #include <string> #include <utility> @@ -42,8 +43,8 @@ public: virtual ~Object(); /// Returns a unique identifier for the object. For debugging purposes only. - unsigned int GetObjectId() const { - return object_id; + u32 GetObjectId() const { + return object_id.load(std::memory_order_relaxed); } virtual std::string GetTypeName() const { @@ -61,23 +62,23 @@ public: bool IsWaitable() const; public: - static unsigned int next_object_id; + static std::atomic<u32> next_object_id; private: friend void intrusive_ptr_add_ref(Object*); friend void intrusive_ptr_release(Object*); - unsigned int ref_count = 0; - unsigned int object_id = next_object_id++; + std::atomic<u32> ref_count{0}; + std::atomic<u32> object_id{next_object_id++}; }; // Special functions used by boost::instrusive_ptr to do automatic ref-counting inline void intrusive_ptr_add_ref(Object* object) { - ++object->ref_count; + object->ref_count.fetch_add(1, std::memory_order_relaxed); } inline void intrusive_ptr_release(Object* object) { - if (--object->ref_count == 0) { + if (object->ref_count.fetch_sub(1, std::memory_order_acq_rel) == 1) { delete object; } } diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 19e7f1161..c9f6b82b7 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -46,8 +46,10 @@ u32 RenderTargetBytesPerPixel(RenderTargetFormat format) { case RenderTargetFormat::RGBA32_FLOAT: case RenderTargetFormat::RGBA32_UINT: return 16; + case RenderTargetFormat::RGBA16_UINT: case RenderTargetFormat::RGBA16_FLOAT: case RenderTargetFormat::RG32_FLOAT: + case RenderTargetFormat::RG32_UINT: return 8; case RenderTargetFormat::RGBA8_UNORM: case RenderTargetFormat::RGBA8_SNORM: @@ -61,12 +63,14 @@ u32 RenderTargetBytesPerPixel(RenderTargetFormat format) { case RenderTargetFormat::RG16_FLOAT: case RenderTargetFormat::R32_FLOAT: case RenderTargetFormat::R11G11B10_FLOAT: + case RenderTargetFormat::R32_UINT: return 4; case RenderTargetFormat::R16_UNORM: case RenderTargetFormat::R16_SNORM: case RenderTargetFormat::R16_UINT: case RenderTargetFormat::R16_SINT: case RenderTargetFormat::R16_FLOAT: + case RenderTargetFormat::RG8_UNORM: case RenderTargetFormat::RG8_SNORM: return 2; case RenderTargetFormat::R8_UNORM: diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index e008d8f26..8a90a3a66 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -20,8 +20,10 @@ enum class RenderTargetFormat : u32 { NONE = 0x0, RGBA32_FLOAT = 0xC0, RGBA32_UINT = 0xC2, + RGBA16_UINT = 0xC9, RGBA16_FLOAT = 0xCA, RG32_FLOAT = 0xCB, + RG32_UINT = 0xCD, BGRA8_UNORM = 0xCF, RGB10_A2_UNORM = 0xD1, RGBA8_UNORM = 0xD5, @@ -33,8 +35,10 @@ enum class RenderTargetFormat : u32 { RG16_UINT = 0xDD, RG16_FLOAT = 0xDE, R11G11B10_FLOAT = 0xE0, + R32_UINT = 0xE4, R32_FLOAT = 0xE5, B5G6R5_UNORM = 0xE8, + RG8_UNORM = 0xEA, RG8_SNORM = 0xEB, R16_UNORM = 0xEE, R16_SNORM = 0xEF, diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 84c250c63..4b48ab8e2 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -101,6 +101,7 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form {GL_R8, GL_RED, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // R8 {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, ComponentType::UInt, false}, // R8UI {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, ComponentType::Float, false}, // RGBA16F + {GL_RGBA16UI, GL_RGBA, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // RGBA16UI {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, ComponentType::Float, false}, // R11FG11FB10F {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RGBA32UI @@ -134,7 +135,10 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form {GL_RG16_SNORM, GL_RG, GL_SHORT, ComponentType::SNorm, false}, // RG16S {GL_RGB32F, GL_RGB, GL_FLOAT, ComponentType::Float, false}, // RGB32F {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // SRGBA8 + {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // RG8U {GL_RG8, GL_RG, GL_BYTE, ComponentType::SNorm, false}, // RG8S + {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RG32UI + {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // R32UI // DepthStencil formats {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm, @@ -234,32 +238,59 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, std::vector<u8>& gl_bu static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPUVAddr), SurfaceParams::MaxPixelFormat> morton_to_gl_fns = { - MortonCopy<true, PixelFormat::ABGR8U>, MortonCopy<true, PixelFormat::ABGR8S>, - MortonCopy<true, PixelFormat::B5G6R5>, MortonCopy<true, PixelFormat::A2B10G10R10>, - MortonCopy<true, PixelFormat::A1B5G5R5>, MortonCopy<true, PixelFormat::R8>, - MortonCopy<true, PixelFormat::R8UI>, MortonCopy<true, PixelFormat::RGBA16F>, - MortonCopy<true, PixelFormat::R11FG11FB10F>, MortonCopy<true, PixelFormat::RGBA32UI>, - MortonCopy<true, PixelFormat::DXT1>, MortonCopy<true, PixelFormat::DXT23>, - MortonCopy<true, PixelFormat::DXT45>, MortonCopy<true, PixelFormat::DXN1>, - MortonCopy<true, PixelFormat::DXN2UNORM>, MortonCopy<true, PixelFormat::DXN2SNORM>, - MortonCopy<true, PixelFormat::BC7U>, MortonCopy<true, PixelFormat::ASTC_2D_4X4>, - MortonCopy<true, PixelFormat::G8R8>, MortonCopy<true, PixelFormat::BGRA8>, - MortonCopy<true, PixelFormat::RGBA32F>, MortonCopy<true, PixelFormat::RG32F>, - MortonCopy<true, PixelFormat::R32F>, MortonCopy<true, PixelFormat::R16F>, - MortonCopy<true, PixelFormat::R16UNORM>, MortonCopy<true, PixelFormat::R16S>, - MortonCopy<true, PixelFormat::R16UI>, MortonCopy<true, PixelFormat::R16I>, - MortonCopy<true, PixelFormat::RG16>, MortonCopy<true, PixelFormat::RG16F>, - MortonCopy<true, PixelFormat::RG16UI>, MortonCopy<true, PixelFormat::RG16I>, - MortonCopy<true, PixelFormat::RG16S>, MortonCopy<true, PixelFormat::RGB32F>, - MortonCopy<true, PixelFormat::SRGBA8>, MortonCopy<true, PixelFormat::RG8S>, - MortonCopy<true, PixelFormat::Z24S8>, MortonCopy<true, PixelFormat::S8Z24>, - MortonCopy<true, PixelFormat::Z32F>, MortonCopy<true, PixelFormat::Z16>, + // clang-format off + MortonCopy<true, PixelFormat::ABGR8U>, + MortonCopy<true, PixelFormat::ABGR8S>, + MortonCopy<true, PixelFormat::B5G6R5>, + MortonCopy<true, PixelFormat::A2B10G10R10>, + MortonCopy<true, PixelFormat::A1B5G5R5>, + MortonCopy<true, PixelFormat::R8>, + MortonCopy<true, PixelFormat::R8UI>, + MortonCopy<true, PixelFormat::RGBA16F>, + MortonCopy<true, PixelFormat::RGBA16UI>, + MortonCopy<true, PixelFormat::R11FG11FB10F>, + MortonCopy<true, PixelFormat::RGBA32UI>, + MortonCopy<true, PixelFormat::DXT1>, + MortonCopy<true, PixelFormat::DXT23>, + MortonCopy<true, PixelFormat::DXT45>, + MortonCopy<true, PixelFormat::DXN1>, + MortonCopy<true, PixelFormat::DXN2UNORM>, + MortonCopy<true, PixelFormat::DXN2SNORM>, + MortonCopy<true, PixelFormat::BC7U>, + MortonCopy<true, PixelFormat::ASTC_2D_4X4>, + MortonCopy<true, PixelFormat::G8R8>, + MortonCopy<true, PixelFormat::BGRA8>, + MortonCopy<true, PixelFormat::RGBA32F>, + MortonCopy<true, PixelFormat::RG32F>, + MortonCopy<true, PixelFormat::R32F>, + MortonCopy<true, PixelFormat::R16F>, + MortonCopy<true, PixelFormat::R16UNORM>, + MortonCopy<true, PixelFormat::R16S>, + MortonCopy<true, PixelFormat::R16UI>, + MortonCopy<true, PixelFormat::R16I>, + MortonCopy<true, PixelFormat::RG16>, + MortonCopy<true, PixelFormat::RG16F>, + MortonCopy<true, PixelFormat::RG16UI>, + MortonCopy<true, PixelFormat::RG16I>, + MortonCopy<true, PixelFormat::RG16S>, + MortonCopy<true, PixelFormat::RGB32F>, + MortonCopy<true, PixelFormat::SRGBA8>, + MortonCopy<true, PixelFormat::RG8U>, + MortonCopy<true, PixelFormat::RG8S>, + MortonCopy<true, PixelFormat::RG32UI>, + MortonCopy<true, PixelFormat::R32UI>, + MortonCopy<true, PixelFormat::Z24S8>, + MortonCopy<true, PixelFormat::S8Z24>, + MortonCopy<true, PixelFormat::Z32F>, + MortonCopy<true, PixelFormat::Z16>, MortonCopy<true, PixelFormat::Z32FS8>, + // clang-format on }; static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPUVAddr), SurfaceParams::MaxPixelFormat> gl_to_morton_fns = { + // clang-format off MortonCopy<false, PixelFormat::ABGR8U>, MortonCopy<false, PixelFormat::ABGR8S>, MortonCopy<false, PixelFormat::B5G6R5>, @@ -268,6 +299,7 @@ static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPU MortonCopy<false, PixelFormat::R8>, MortonCopy<false, PixelFormat::R8UI>, MortonCopy<false, PixelFormat::RGBA16F>, + MortonCopy<false, PixelFormat::RGBA16UI>, MortonCopy<false, PixelFormat::R11FG11FB10F>, MortonCopy<false, PixelFormat::RGBA32UI>, // TODO(Subv): Swizzling DXT1/DXT23/DXT45/DXN1/DXN2/BC7U/ASTC_2D_4X4 formats is not @@ -297,12 +329,16 @@ static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPU MortonCopy<false, PixelFormat::RG16S>, MortonCopy<false, PixelFormat::RGB32F>, MortonCopy<false, PixelFormat::SRGBA8>, + MortonCopy<false, PixelFormat::RG8U>, MortonCopy<false, PixelFormat::RG8S>, + MortonCopy<false, PixelFormat::RG32UI>, + MortonCopy<false, PixelFormat::R32UI>, MortonCopy<false, PixelFormat::Z24S8>, MortonCopy<false, PixelFormat::S8Z24>, MortonCopy<false, PixelFormat::Z32F>, MortonCopy<false, PixelFormat::Z16>, MortonCopy<false, PixelFormat::Z32FS8>, + // clang-format on }; // Allocate an uninitialized texture of appropriate size and format for the surface diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 202257b58..630b40e77 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -31,43 +31,47 @@ struct SurfaceParams { R8 = 5, R8UI = 6, RGBA16F = 7, - R11FG11FB10F = 8, - RGBA32UI = 9, - DXT1 = 10, - DXT23 = 11, - DXT45 = 12, - DXN1 = 13, // This is also known as BC4 - DXN2UNORM = 14, - DXN2SNORM = 15, - BC7U = 16, - ASTC_2D_4X4 = 17, - G8R8 = 18, - BGRA8 = 19, - RGBA32F = 20, - RG32F = 21, - R32F = 22, - R16F = 23, - R16UNORM = 24, - R16S = 25, - R16UI = 26, - R16I = 27, - RG16 = 28, - RG16F = 29, - RG16UI = 30, - RG16I = 31, - RG16S = 32, - RGB32F = 33, - SRGBA8 = 34, - RG8S = 35, + RGBA16UI = 8, + R11FG11FB10F = 9, + RGBA32UI = 10, + DXT1 = 11, + DXT23 = 12, + DXT45 = 13, + DXN1 = 14, // This is also known as BC4 + DXN2UNORM = 15, + DXN2SNORM = 16, + BC7U = 17, + ASTC_2D_4X4 = 18, + G8R8 = 19, + BGRA8 = 20, + RGBA32F = 21, + RG32F = 22, + R32F = 23, + R16F = 24, + R16UNORM = 25, + R16S = 26, + R16UI = 27, + R16I = 28, + RG16 = 29, + RG16F = 30, + RG16UI = 31, + RG16I = 32, + RG16S = 33, + RGB32F = 34, + SRGBA8 = 35, + RG8U = 36, + RG8S = 37, + RG32UI = 38, + R32UI = 39, MaxColorFormat, // DepthStencil formats - Z24S8 = 36, - S8Z24 = 37, - Z32F = 38, - Z16 = 39, - Z32FS8 = 40, + Z24S8 = 40, + S8Z24 = 41, + Z32F = 42, + Z16 = 43, + Z32FS8 = 44, MaxDepthStencilFormat, @@ -113,6 +117,7 @@ struct SurfaceParams { 1, // R8 1, // R8UI 1, // RGBA16F + 1, // RGBA16UI 1, // R11FG11FB10F 1, // RGBA32UI 4, // DXT1 @@ -140,7 +145,10 @@ struct SurfaceParams { 1, // RG16S 1, // RGB32F 1, // SRGBA8 + 1, // RG8U 1, // RG8S + 1, // RG32UI + 1, // R32UI 1, // Z24S8 1, // S8Z24 1, // Z32F @@ -165,6 +173,7 @@ struct SurfaceParams { 8, // R8 8, // R8UI 64, // RGBA16F + 64, // RGBA16UI 32, // R11FG11FB10F 128, // RGBA32UI 64, // DXT1 @@ -192,7 +201,10 @@ struct SurfaceParams { 32, // RG16S 96, // RGB32F 32, // SRGBA8 + 16, // RG8U 16, // RG8S + 64, // RG32UI + 32, // R32UI 32, // Z24S8 32, // S8Z24 32, // Z32F @@ -241,6 +253,8 @@ struct SurfaceParams { return PixelFormat::A2B10G10R10; case Tegra::RenderTargetFormat::RGBA16_FLOAT: return PixelFormat::RGBA16F; + case Tegra::RenderTargetFormat::RGBA16_UINT: + return PixelFormat::RGBA16UI; case Tegra::RenderTargetFormat::RGBA32_FLOAT: return PixelFormat::RGBA32F; case Tegra::RenderTargetFormat::RG32_FLOAT: @@ -265,6 +279,8 @@ struct SurfaceParams { return PixelFormat::RG16; case Tegra::RenderTargetFormat::RG16_SNORM: return PixelFormat::RG16S; + case Tegra::RenderTargetFormat::RG8_UNORM: + return PixelFormat::RG8U; case Tegra::RenderTargetFormat::RG8_SNORM: return PixelFormat::RG8S; case Tegra::RenderTargetFormat::R16_FLOAT: @@ -279,6 +295,10 @@ struct SurfaceParams { return PixelFormat::R16I; case Tegra::RenderTargetFormat::R32_FLOAT: return PixelFormat::R32F; + case Tegra::RenderTargetFormat::R32_UINT: + return PixelFormat::R32UI; + case Tegra::RenderTargetFormat::RG32_UINT: + return PixelFormat::RG32UI; default: LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); UNREACHABLE(); @@ -332,7 +352,15 @@ struct SurfaceParams { static_cast<u32>(component_type)); UNREACHABLE(); case Tegra::Texture::TextureFormat::R32_G32: - return PixelFormat::RG32F; + switch (component_type) { + case Tegra::Texture::ComponentType::FLOAT: + return PixelFormat::RG32F; + case Tegra::Texture::ComponentType::UINT: + return PixelFormat::RG32UI; + } + LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", + static_cast<u32>(component_type)); + UNREACHABLE(); case Tegra::Texture::TextureFormat::R32_G32_B32: return PixelFormat::RGB32F; case Tegra::Texture::TextureFormat::R16: @@ -352,7 +380,15 @@ struct SurfaceParams { static_cast<u32>(component_type)); UNREACHABLE(); case Tegra::Texture::TextureFormat::R32: - return PixelFormat::R32F; + switch (component_type) { + case Tegra::Texture::ComponentType::FLOAT: + return PixelFormat::R32F; + case Tegra::Texture::ComponentType::UINT: + return PixelFormat::R32UI; + } + LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", + static_cast<u32>(component_type)); + UNREACHABLE(); case Tegra::Texture::TextureFormat::ZF32: return PixelFormat::Z32F; case Tegra::Texture::TextureFormat::Z24S8: @@ -432,6 +468,7 @@ struct SurfaceParams { case Tegra::RenderTargetFormat::RG16_UNORM: case Tegra::RenderTargetFormat::R16_UNORM: case Tegra::RenderTargetFormat::B5G6R5_UNORM: + case Tegra::RenderTargetFormat::RG8_UNORM: return ComponentType::UNorm; case Tegra::RenderTargetFormat::RGBA8_SNORM: case Tegra::RenderTargetFormat::RG16_SNORM: @@ -447,9 +484,12 @@ struct SurfaceParams { case Tegra::RenderTargetFormat::R32_FLOAT: return ComponentType::Float; case Tegra::RenderTargetFormat::RGBA32_UINT: + case Tegra::RenderTargetFormat::RGBA16_UINT: case Tegra::RenderTargetFormat::RG16_UINT: case Tegra::RenderTargetFormat::R8_UINT: case Tegra::RenderTargetFormat::R16_UINT: + case Tegra::RenderTargetFormat::RG32_UINT: + case Tegra::RenderTargetFormat::R32_UINT: return ComponentType::UInt; case Tegra::RenderTargetFormat::RG16_SINT: case Tegra::RenderTargetFormat::R16_SINT: diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index 679e5ceb2..83ea0cfc0 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -27,6 +27,7 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { case Maxwell::VertexAttribute::Type::UnsignedNorm: { switch (attrib.size) { + case Maxwell::VertexAttribute::Size::Size_8: case Maxwell::VertexAttribute::Size::Size_8_8: case Maxwell::VertexAttribute::Size::Size_8_8_8_8: return GL_UNSIGNED_BYTE; |