diff options
43 files changed, 793 insertions, 359 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index ff8385e3a..59c610732 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -66,10 +66,12 @@ if (NOT ENABLE_GENERIC) detect_architecture("_M_AMD64" x86_64) detect_architecture("_M_IX86" x86) detect_architecture("_M_ARM" ARM) + detect_architecture("_M_ARM64" ARM64) else() detect_architecture("__x86_64__" x86_64) detect_architecture("__i386__" x86) detect_architecture("__arm__" ARM) + detect_architecture("__aarch64__" ARM64) endif() endif() diff --git a/src/audio_core/CMakeLists.txt b/src/audio_core/CMakeLists.txt index ec71524a3..82e4850f7 100644 --- a/src/audio_core/CMakeLists.txt +++ b/src/audio_core/CMakeLists.txt @@ -1,4 +1,8 @@ add_library(audio_core STATIC + algorithm/filter.cpp + algorithm/filter.h + algorithm/interpolate.cpp + algorithm/interpolate.h audio_out.cpp audio_out.h audio_renderer.cpp @@ -7,12 +11,12 @@ add_library(audio_core STATIC codec.cpp codec.h null_sink.h - stream.cpp - stream.h sink.h sink_details.cpp sink_details.h sink_stream.h + stream.cpp + stream.h $<$<BOOL:${ENABLE_CUBEB}>:cubeb_sink.cpp cubeb_sink.h> ) diff --git a/src/audio_core/algorithm/filter.cpp b/src/audio_core/algorithm/filter.cpp new file mode 100644 index 000000000..403b8503f --- /dev/null +++ b/src/audio_core/algorithm/filter.cpp @@ -0,0 +1,79 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#define _USE_MATH_DEFINES + +#include <algorithm> +#include <array> +#include <cmath> +#include <vector> +#include "audio_core/algorithm/filter.h" +#include "common/common_types.h" + +namespace AudioCore { + +Filter Filter::LowPass(double cutoff, double Q) { + const double w0 = 2.0 * M_PI * cutoff; + const double sin_w0 = std::sin(w0); + const double cos_w0 = std::cos(w0); + const double alpha = sin_w0 / (2 * Q); + + const double a0 = 1 + alpha; + const double a1 = -2.0 * cos_w0; + const double a2 = 1 - alpha; + const double b0 = 0.5 * (1 - cos_w0); + const double b1 = 1.0 * (1 - cos_w0); + const double b2 = 0.5 * (1 - cos_w0); + + return {a0, a1, a2, b0, b1, b2}; +} + +Filter::Filter() : Filter(1.0, 0.0, 0.0, 1.0, 0.0, 0.0) {} + +Filter::Filter(double a0, double a1, double a2, double b0, double b1, double b2) + : a1(a1 / a0), a2(a2 / a0), b0(b0 / a0), b1(b1 / a0), b2(b2 / a0) {} + +void Filter::Process(std::vector<s16>& signal) { + const size_t num_frames = signal.size() / 2; + for (size_t i = 0; i < num_frames; i++) { + std::rotate(in.begin(), in.end() - 1, in.end()); + std::rotate(out.begin(), out.end() - 1, out.end()); + + for (size_t ch = 0; ch < channel_count; ch++) { + in[0][ch] = signal[i * channel_count + ch]; + + out[0][ch] = b0 * in[0][ch] + b1 * in[1][ch] + b2 * in[2][ch] - a1 * out[1][ch] - + a2 * out[2][ch]; + + signal[i * 2 + ch] = std::clamp(out[0][ch], -32768.0, 32767.0); + } + } +} + +/// Calculates the appropriate Q for each biquad in a cascading filter. +/// @param total_count The total number of biquads to be cascaded. +/// @param index 0-index of the biquad to calculate the Q value for. +static double CascadingBiquadQ(size_t total_count, size_t index) { + const double pole = M_PI * (2 * index + 1) / (4.0 * total_count); + return 1.0 / (2.0 * std::cos(pole)); +} + +CascadingFilter CascadingFilter::LowPass(double cutoff, size_t cascade_size) { + std::vector<Filter> cascade(cascade_size); + for (size_t i = 0; i < cascade_size; i++) { + cascade[i] = Filter::LowPass(cutoff, CascadingBiquadQ(cascade_size, i)); + } + return CascadingFilter{std::move(cascade)}; +} + +CascadingFilter::CascadingFilter() = default; +CascadingFilter::CascadingFilter(std::vector<Filter> filters) : filters(std::move(filters)) {} + +void CascadingFilter::Process(std::vector<s16>& signal) { + for (auto& filter : filters) { + filter.Process(signal); + } +} + +} // namespace AudioCore diff --git a/src/audio_core/algorithm/filter.h b/src/audio_core/algorithm/filter.h new file mode 100644 index 000000000..a41beef98 --- /dev/null +++ b/src/audio_core/algorithm/filter.h @@ -0,0 +1,62 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <vector> +#include "common/common_types.h" + +namespace AudioCore { + +/// Digital biquad filter: +/// +/// b0 + b1 z^-1 + b2 z^-2 +/// H(z) = ------------------------ +/// a0 + a1 z^-1 + b2 z^-2 +class Filter { +public: + /// Creates a low-pass filter. + /// @param cutoff Determines the cutoff frequency. A value from 0.0 to 1.0. + /// @param Q Determines the quality factor of this filter. + static Filter LowPass(double cutoff, double Q = 0.7071); + + /// Passthrough filter. + Filter(); + + Filter(double a0, double a1, double a2, double b0, double b1, double b2); + + void Process(std::vector<s16>& signal); + +private: + static constexpr size_t channel_count = 2; + + /// Coefficients are in normalized form (a0 = 1.0). + double a1, a2, b0, b1, b2; + /// Input History + std::array<std::array<double, channel_count>, 3> in; + /// Output History + std::array<std::array<double, channel_count>, 3> out; +}; + +/// Cascade filters to build up higher-order filters from lower-order ones. +class CascadingFilter { +public: + /// Creates a cascading low-pass filter. + /// @param cutoff Determines the cutoff frequency. A value from 0.0 to 1.0. + /// @param cascade_size Number of biquads in cascade. + static CascadingFilter LowPass(double cutoff, size_t cascade_size); + + /// Passthrough. + CascadingFilter(); + + explicit CascadingFilter(std::vector<Filter> filters); + + void Process(std::vector<s16>& signal); + +private: + std::vector<Filter> filters; +}; + +} // namespace AudioCore diff --git a/src/audio_core/algorithm/interpolate.cpp b/src/audio_core/algorithm/interpolate.cpp new file mode 100644 index 000000000..11459821f --- /dev/null +++ b/src/audio_core/algorithm/interpolate.cpp @@ -0,0 +1,71 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#define _USE_MATH_DEFINES + +#include <algorithm> +#include <cmath> +#include <vector> +#include "audio_core/algorithm/interpolate.h" +#include "common/common_types.h" +#include "common/logging/log.h" + +namespace AudioCore { + +/// The Lanczos kernel +static double Lanczos(size_t a, double x) { + if (x == 0.0) + return 1.0; + const double px = M_PI * x; + return a * std::sin(px) * std::sin(px / a) / (px * px); +} + +std::vector<s16> Interpolate(InterpolationState& state, std::vector<s16> input, double ratio) { + if (input.size() < 2) + return {}; + + if (ratio <= 0) { + LOG_CRITICAL(Audio, "Nonsensical interpolation ratio {}", ratio); + ratio = 1.0; + } + + if (ratio != state.current_ratio) { + const double cutoff_frequency = std::min(0.5 / ratio, 0.5 * ratio); + state.nyquist = CascadingFilter::LowPass(std::clamp(cutoff_frequency, 0.0, 0.4), 3); + state.current_ratio = ratio; + } + state.nyquist.Process(input); + + constexpr size_t taps = InterpolationState::lanczos_taps; + const size_t num_frames = input.size() / 2; + + std::vector<s16> output; + output.reserve(static_cast<size_t>(input.size() / ratio + 4)); + + double& pos = state.position; + auto& h = state.history; + for (size_t i = 0; i < num_frames; ++i) { + std::rotate(h.begin(), h.end() - 1, h.end()); + h[0][0] = input[i * 2 + 0]; + h[0][1] = input[i * 2 + 1]; + + while (pos <= 1.0) { + double l = 0.0; + double r = 0.0; + for (size_t j = 0; j < h.size(); j++) { + l += Lanczos(taps, pos + j - taps + 1) * h[j][0]; + r += Lanczos(taps, pos + j - taps + 1) * h[j][1]; + } + output.emplace_back(static_cast<s16>(std::clamp(l, -32768.0, 32767.0))); + output.emplace_back(static_cast<s16>(std::clamp(r, -32768.0, 32767.0))); + + pos += ratio; + } + pos -= 1.0; + } + + return output; +} + +} // namespace AudioCore diff --git a/src/audio_core/algorithm/interpolate.h b/src/audio_core/algorithm/interpolate.h new file mode 100644 index 000000000..c79c2eef4 --- /dev/null +++ b/src/audio_core/algorithm/interpolate.h @@ -0,0 +1,43 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <vector> +#include "audio_core/algorithm/filter.h" +#include "common/common_types.h" + +namespace AudioCore { + +struct InterpolationState { + static constexpr size_t lanczos_taps = 4; + static constexpr size_t history_size = lanczos_taps * 2 - 1; + + double current_ratio = 0.0; + CascadingFilter nyquist; + std::array<std::array<s16, 2>, history_size> history = {}; + double position = 0; +}; + +/// Interpolates input signal to produce output signal. +/// @param input The signal to interpolate. +/// @param ratio Interpolation ratio. +/// ratio > 1.0 results in fewer output samples. +/// ratio < 1.0 results in more output samples. +/// @returns Output signal. +std::vector<s16> Interpolate(InterpolationState& state, std::vector<s16> input, double ratio); + +/// Interpolates input signal to produce output signal. +/// @param input The signal to interpolate. +/// @param input_rate The sample rate of input. +/// @param output_rate The desired sample rate of the output. +/// @returns Output signal. +inline std::vector<s16> Interpolate(InterpolationState& state, std::vector<s16> input, + u32 input_rate, u32 output_rate) { + const double ratio = static_cast<double>(input_rate) / static_cast<double>(output_rate); + return Interpolate(state, std::move(input), ratio); +} + +} // namespace AudioCore diff --git a/src/audio_core/audio_renderer.cpp b/src/audio_core/audio_renderer.cpp index 6ebed3fb0..397b107f5 100644 --- a/src/audio_core/audio_renderer.cpp +++ b/src/audio_core/audio_renderer.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include "audio_core/algorithm/interpolate.h" #include "audio_core/audio_renderer.h" #include "common/assert.h" #include "common/logging/log.h" @@ -199,6 +200,8 @@ void AudioRenderer::VoiceState::RefreshBuffer() { break; } + samples = Interpolate(interp_state, std::move(samples), Info().sample_rate, STREAM_SAMPLE_RATE); + is_refresh_pending = false; } @@ -224,7 +227,7 @@ void AudioRenderer::QueueMixedBuffer(Buffer::Tag tag) { break; } - samples_remaining -= samples.size(); + samples_remaining -= samples.size() / stream->GetNumChannels(); for (const auto& sample : samples) { const s32 buffer_sample{buffer[offset]}; diff --git a/src/audio_core/audio_renderer.h b/src/audio_core/audio_renderer.h index 13c5d0adc..eba67f28e 100644 --- a/src/audio_core/audio_renderer.h +++ b/src/audio_core/audio_renderer.h @@ -8,6 +8,7 @@ #include <memory> #include <vector> +#include "audio_core/algorithm/interpolate.h" #include "audio_core/audio_out.h" #include "audio_core/codec.h" #include "audio_core/stream.h" @@ -194,6 +195,7 @@ private: size_t wave_index{}; size_t offset{}; Codec::ADPCMState adpcm_state{}; + InterpolationState interp_state{}; std::vector<s16> samples; VoiceOutStatus out_status{}; VoiceInfo info{}; diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index d5d4f6f82..939b8a7d3 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -29,8 +29,6 @@ add_library(common STATIC assert.h bit_field.h bit_set.h - break_points.cpp - break_points.h cityhash.cpp cityhash.h color.h diff --git a/src/common/break_points.cpp b/src/common/break_points.cpp deleted file mode 100644 index fa367a4ca..000000000 --- a/src/common/break_points.cpp +++ /dev/null @@ -1,90 +0,0 @@ -// Copyright 2013 Dolphin Emulator Project / 2014 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <algorithm> -#include <sstream> -#include "common/break_points.h" - -bool BreakPoints::IsAddressBreakPoint(u32 iAddress) const { - auto cond = [&iAddress](const TBreakPoint& bp) { return bp.iAddress == iAddress; }; - auto it = std::find_if(m_BreakPoints.begin(), m_BreakPoints.end(), cond); - return it != m_BreakPoints.end(); -} - -bool BreakPoints::IsTempBreakPoint(u32 iAddress) const { - auto cond = [&iAddress](const TBreakPoint& bp) { - return bp.iAddress == iAddress && bp.bTemporary; - }; - auto it = std::find_if(m_BreakPoints.begin(), m_BreakPoints.end(), cond); - return it != m_BreakPoints.end(); -} - -BreakPoints::TBreakPointsStr BreakPoints::GetStrings() const { - TBreakPointsStr bps; - for (auto breakpoint : m_BreakPoints) { - if (!breakpoint.bTemporary) { - std::stringstream bp; - bp << std::hex << breakpoint.iAddress << " " << (breakpoint.bOn ? "n" : ""); - bps.push_back(bp.str()); - } - } - - return bps; -} - -void BreakPoints::AddFromStrings(const TBreakPointsStr& bps) { - for (auto bps_item : bps) { - TBreakPoint bp; - std::stringstream bpstr; - bpstr << std::hex << bps_item; - bpstr >> bp.iAddress; - bp.bOn = bps_item.find("n") != bps_item.npos; - bp.bTemporary = false; - Add(bp); - } -} - -void BreakPoints::Add(const TBreakPoint& bp) { - if (!IsAddressBreakPoint(bp.iAddress)) { - m_BreakPoints.push_back(bp); - // if (jit) - // jit->GetBlockCache()->InvalidateICache(bp.iAddress, 4); - } -} - -void BreakPoints::Add(u32 em_address, bool temp) { - if (!IsAddressBreakPoint(em_address)) // only add new addresses - { - TBreakPoint pt; // breakpoint settings - pt.bOn = true; - pt.bTemporary = temp; - pt.iAddress = em_address; - - m_BreakPoints.push_back(pt); - - // if (jit) - // jit->GetBlockCache()->InvalidateICache(em_address, 4); - } -} - -void BreakPoints::Remove(u32 em_address) { - auto cond = [&em_address](const TBreakPoint& bp) { return bp.iAddress == em_address; }; - auto it = std::find_if(m_BreakPoints.begin(), m_BreakPoints.end(), cond); - if (it != m_BreakPoints.end()) - m_BreakPoints.erase(it); -} - -void BreakPoints::Clear() { - // if (jit) - //{ - // std::for_each(m_BreakPoints.begin(), m_BreakPoints.end(), - // [](const TBreakPoint& bp) - // { - // jit->GetBlockCache()->InvalidateICache(bp.iAddress, 4); - // } - // ); - //} - - m_BreakPoints.clear(); -} diff --git a/src/common/break_points.h b/src/common/break_points.h deleted file mode 100644 index e15b9f842..000000000 --- a/src/common/break_points.h +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright 2013 Dolphin Emulator Project / 2014 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <string> -#include <vector> -#include "common/common_types.h" - -class DebugInterface; - -struct TBreakPoint { - u32 iAddress; - bool bOn; - bool bTemporary; -}; - -// Code breakpoints. -class BreakPoints { -public: - typedef std::vector<TBreakPoint> TBreakPoints; - typedef std::vector<std::string> TBreakPointsStr; - - const TBreakPoints& GetBreakPoints() { - return m_BreakPoints; - } - - TBreakPointsStr GetStrings() const; - void AddFromStrings(const TBreakPointsStr& bps); - - // is address breakpoint - bool IsAddressBreakPoint(u32 iAddress) const; - bool IsTempBreakPoint(u32 iAddress) const; - - // Add BreakPoint - void Add(u32 em_address, bool temp = false); - void Add(const TBreakPoint& bp); - - // Remove Breakpoint - void Remove(u32 iAddress); - void Clear(); - - void DeleteByAddress(u32 Address); - -private: - TBreakPoints m_BreakPoints; - u32 m_iBreakOnCount; -}; diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp index e80784c3c..1323f8d0f 100644 --- a/src/common/logging/backend.cpp +++ b/src/common/logging/backend.cpp @@ -302,13 +302,14 @@ Backend* GetBackend(std::string_view backend_name) { void FmtLogMessageImpl(Class log_class, Level log_level, const char* filename, unsigned int line_num, const char* function, const char* format, const fmt::format_args& args) { - auto filter = Impl::Instance().GetGlobalFilter(); + auto& instance = Impl::Instance(); + const auto& filter = instance.GetGlobalFilter(); if (!filter.CheckMessage(log_class, log_level)) return; Entry entry = CreateEntry(log_class, log_level, filename, line_num, function, fmt::vformat(format, args)); - Impl::Instance().PushEntry(std::move(entry)); + instance.PushEntry(std::move(entry)); } } // namespace Log diff --git a/src/common/misc.cpp b/src/common/misc.cpp index 217a87098..3fa8a3bc4 100644 --- a/src/common/misc.cpp +++ b/src/common/misc.cpp @@ -4,7 +4,7 @@ #include <cstddef> #ifdef _WIN32 -#include <Windows.h> +#include <windows.h> #else #include <cerrno> #include <cstring> diff --git a/src/common/x64/xbyak_abi.h b/src/common/x64/xbyak_abi.h index fd3fbdd4b..927da9187 100644 --- a/src/common/x64/xbyak_abi.h +++ b/src/common/x64/xbyak_abi.h @@ -9,10 +9,9 @@ #include "common/assert.h" #include "common/bit_set.h" -namespace Common { -namespace X64 { +namespace Common::X64 { -int RegToIndex(const Xbyak::Reg& reg) { +inline int RegToIndex(const Xbyak::Reg& reg) { using Kind = Xbyak::Reg::Kind; ASSERT_MSG((reg.getKind() & (Kind::REG | Kind::XMM)) != 0, "RegSet only support GPRs and XMM registers."); @@ -152,8 +151,8 @@ constexpr size_t ABI_SHADOW_SPACE = 0; #endif -void ABI_CalculateFrameSize(BitSet32 regs, size_t rsp_alignment, size_t needed_frame_size, - s32* out_subtraction, s32* out_xmm_offset) { +inline void ABI_CalculateFrameSize(BitSet32 regs, size_t rsp_alignment, size_t needed_frame_size, + s32* out_subtraction, s32* out_xmm_offset) { int count = (regs & ABI_ALL_GPRS).Count(); rsp_alignment -= count * 8; size_t subtraction = 0; @@ -174,8 +173,8 @@ void ABI_CalculateFrameSize(BitSet32 regs, size_t rsp_alignment, size_t needed_f *out_xmm_offset = (s32)(subtraction - xmm_base_subtraction); } -size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, BitSet32 regs, - size_t rsp_alignment, size_t needed_frame_size = 0) { +inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, BitSet32 regs, + size_t rsp_alignment, size_t needed_frame_size = 0) { s32 subtraction, xmm_offset; ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); @@ -195,8 +194,8 @@ size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, BitSet32 regs return ABI_SHADOW_SPACE; } -void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, BitSet32 regs, size_t rsp_alignment, - size_t needed_frame_size = 0) { +inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, BitSet32 regs, + size_t rsp_alignment, size_t needed_frame_size = 0) { s32 subtraction, xmm_offset; ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); @@ -217,5 +216,4 @@ void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, BitSet32 regs, s } } -} // namespace X64 -} // namespace Common +} // namespace Common::X64 diff --git a/src/common/x64/xbyak_util.h b/src/common/x64/xbyak_util.h index ec76e0a47..02323a017 100644 --- a/src/common/x64/xbyak_util.h +++ b/src/common/x64/xbyak_util.h @@ -8,8 +8,7 @@ #include <xbyak.h> #include "common/x64/xbyak_abi.h" -namespace Common { -namespace X64 { +namespace Common::X64 { // Constants for use with cmpps/cmpss enum { @@ -45,5 +44,4 @@ inline void CallFarFunction(Xbyak::CodeGenerator& code, const T f) { } } -} // namespace X64 -} // namespace Common +} // namespace Common::X64 diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index cceb1564b..0b0ae5ccc 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -249,6 +249,10 @@ add_library(core STATIC hle/service/nvdrv/devices/nvhost_gpu.h hle/service/nvdrv/devices/nvhost_nvdec.cpp hle/service/nvdrv/devices/nvhost_nvdec.h + hle/service/nvdrv/devices/nvhost_nvjpg.cpp + hle/service/nvdrv/devices/nvhost_nvjpg.h + hle/service/nvdrv/devices/nvhost_vic.cpp + hle/service/nvdrv/devices/nvhost_vic.h hle/service/nvdrv/devices/nvmap.cpp hle/service/nvdrv/devices/nvmap.h hle/service/nvdrv/interface.cpp diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp index 0996f129c..20e5200a8 100644 --- a/src/core/arm/dynarmic/arm_dynarmic.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic.cpp @@ -243,9 +243,7 @@ void ARM_Dynarmic::LoadContext(const ThreadContext& ctx) { } void ARM_Dynarmic::PrepareReschedule() { - if (jit->IsExecuting()) { - jit->HaltExecution(); - } + jit->HaltExecution(); } void ARM_Dynarmic::ClearInstructionCache() { diff --git a/src/core/frontend/emu_window.h b/src/core/frontend/emu_window.h index 384dc7822..7006a37b3 100644 --- a/src/core/frontend/emu_window.h +++ b/src/core/frontend/emu_window.h @@ -34,9 +34,9 @@ class EmuWindow { public: /// Data structure to store emuwindow configuration struct WindowConfig { - bool fullscreen; - int res_width; - int res_height; + bool fullscreen = false; + int res_width = 0; + int res_height = 0; std::pair<unsigned, unsigned> min_client_area_size; }; diff --git a/src/core/hle/kernel/server_session.cpp b/src/core/hle/kernel/server_session.cpp index d09ca5992..51a1ec160 100644 --- a/src/core/hle/kernel/server_session.cpp +++ b/src/core/hle/kernel/server_session.cpp @@ -152,7 +152,7 @@ ResultCode ServerSession::HandleSyncRequest(SharedPtr<Thread> thread) { // Handle scenario when ConvertToDomain command was issued, as we must do the conversion at the // end of the command such that only commands following this one are handled as domains if (convert_to_domain) { - ASSERT_MSG(domain_request_handlers.empty(), "already a domain"); + ASSERT_MSG(IsSession(), "ServerSession is already a domain instance."); domain_request_handlers = {hle_handler}; convert_to_domain = false; } diff --git a/src/core/hle/kernel/server_session.h b/src/core/hle/kernel/server_session.h index 2bce54fee..1a88e66b9 100644 --- a/src/core/hle/kernel/server_session.h +++ b/src/core/hle/kernel/server_session.h @@ -97,7 +97,12 @@ public: /// Returns true if the session has been converted to a domain, otherwise False bool IsDomain() const { - return !domain_request_handlers.empty(); + return !IsSession(); + } + + /// Returns true if this session has not been converted to a domain, otherwise false. + bool IsSession() const { + return domain_request_handlers.empty(); } /// Converts the session to a domain at the end of the current command diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index b24f409b3..6be5c474e 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp @@ -250,8 +250,11 @@ static ResultCode ArbitrateUnlock(VAddr mutex_addr) { } /// Break program execution -static void Break(u64 unk_0, u64 unk_1, u64 unk_2) { - LOG_CRITICAL(Debug_Emulated, "Emulated program broke execution!"); +static void Break(u64 reason, u64 info1, u64 info2) { + LOG_CRITICAL( + Debug_Emulated, + "Emulated program broke execution! reason=0x{:016X}, info1=0x{:016X}, info2=0x{:016X}", + reason, info1, info2); ASSERT(false); } diff --git a/src/core/hle/service/audio/audout_u.cpp b/src/core/hle/service/audio/audout_u.cpp index 108a7c6eb..ce709ccf4 100644 --- a/src/core/hle/service/audio/audout_u.cpp +++ b/src/core/hle/service/audio/audout_u.cpp @@ -28,7 +28,7 @@ constexpr int DefaultSampleRate{48000}; class IAudioOut final : public ServiceFramework<IAudioOut> { public: IAudioOut(AudoutParams audio_params, AudioCore::AudioOut& audio_core) - : ServiceFramework("IAudioOut"), audio_params(audio_params), audio_core(audio_core) { + : ServiceFramework("IAudioOut"), audio_core(audio_core), audio_params(audio_params) { static const FunctionInfo functions[] = { {0, &IAudioOut::GetAudioOutState, "GetAudioOutState"}, diff --git a/src/core/hle/service/mm/mm_u.cpp b/src/core/hle/service/mm/mm_u.cpp index 08f45b78a..7b91bb258 100644 --- a/src/core/hle/service/mm/mm_u.cpp +++ b/src/core/hle/service/mm/mm_u.cpp @@ -9,42 +9,63 @@ namespace Service::MM { -void InstallInterfaces(SM::ServiceManager& service_manager) { - std::make_shared<MM_U>()->InstallAsService(service_manager); -} +class MM_U final : public ServiceFramework<MM_U> { +public: + explicit MM_U() : ServiceFramework{"mm:u"} { + // clang-format off + static const FunctionInfo functions[] = { + {0, &MM_U::Initialize, "InitializeOld"}, + {1, &MM_U::Finalize, "FinalizeOld"}, + {2, &MM_U::SetAndWait, "SetAndWaitOld"}, + {3, &MM_U::Get, "GetOld"}, + {4, &MM_U::Initialize, "Initialize"}, + {5, &MM_U::Finalize, "Finalize"}, + {6, &MM_U::SetAndWait, "SetAndWait"}, + {7, &MM_U::Get, "Get"}, + }; + // clang-format on -void MM_U::Initialize(Kernel::HLERequestContext& ctx) { - LOG_WARNING(Service_MM, "(STUBBED) called"); - IPC::ResponseBuilder rb{ctx, 2}; - rb.Push(RESULT_SUCCESS); -} + RegisterHandlers(functions); + } -void MM_U::SetAndWait(Kernel::HLERequestContext& ctx) { - IPC::RequestParser rp{ctx}; - min = rp.Pop<u32>(); - max = rp.Pop<u32>(); - current = min; +private: + void Initialize(Kernel::HLERequestContext& ctx) { + LOG_WARNING(Service_MM, "(STUBBED) called"); + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(RESULT_SUCCESS); + } - LOG_WARNING(Service_MM, "(STUBBED) called, min=0x{:X}, max=0x{:X}", min, max); - IPC::ResponseBuilder rb{ctx, 2}; - rb.Push(RESULT_SUCCESS); -} + void Finalize(Kernel::HLERequestContext& ctx) { + LOG_WARNING(Service_MM, "(STUBBED) called"); + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(RESULT_SUCCESS); + } -void MM_U::Get(Kernel::HLERequestContext& ctx) { - LOG_WARNING(Service_MM, "(STUBBED) called"); - IPC::ResponseBuilder rb{ctx, 3}; - rb.Push(RESULT_SUCCESS); - rb.Push(current); -} + void SetAndWait(Kernel::HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + min = rp.Pop<u32>(); + max = rp.Pop<u32>(); + current = min; -MM_U::MM_U() : ServiceFramework("mm:u") { - static const FunctionInfo functions[] = { - {0, nullptr, "InitializeOld"}, {1, nullptr, "FinalizeOld"}, - {2, nullptr, "SetAndWaitOld"}, {3, nullptr, "GetOld"}, - {4, &MM_U::Initialize, "Initialize"}, {5, nullptr, "Finalize"}, - {6, &MM_U::SetAndWait, "SetAndWait"}, {7, &MM_U::Get, "Get"}, - }; - RegisterHandlers(functions); + LOG_WARNING(Service_MM, "(STUBBED) called, min=0x{:X}, max=0x{:X}", min, max); + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(RESULT_SUCCESS); + } + + void Get(Kernel::HLERequestContext& ctx) { + LOG_WARNING(Service_MM, "(STUBBED) called"); + IPC::ResponseBuilder rb{ctx, 3}; + rb.Push(RESULT_SUCCESS); + rb.Push(current); + } + + u32 min{0}; + u32 max{0}; + u32 current{0}; +}; + +void InstallInterfaces(SM::ServiceManager& service_manager) { + std::make_shared<MM_U>()->InstallAsService(service_manager); } } // namespace Service::MM diff --git a/src/core/hle/service/mm/mm_u.h b/src/core/hle/service/mm/mm_u.h index 79eeedf9c..5439fa653 100644 --- a/src/core/hle/service/mm/mm_u.h +++ b/src/core/hle/service/mm/mm_u.h @@ -8,21 +8,6 @@ namespace Service::MM { -class MM_U final : public ServiceFramework<MM_U> { -public: - MM_U(); - ~MM_U() = default; - -private: - void Initialize(Kernel::HLERequestContext& ctx); - void SetAndWait(Kernel::HLERequestContext& ctx); - void Get(Kernel::HLERequestContext& ctx); - - u32 min{0}; - u32 max{0}; - u32 current{0}; -}; - /// Registers all MM services with the specified service manager. void InstallInterfaces(SM::ServiceManager& service_manager); diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp new file mode 100644 index 000000000..51f01077b --- /dev/null +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp @@ -0,0 +1,34 @@ +// Copyright 2018 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <cstring> + +#include "common/assert.h" +#include "common/logging/log.h" +#include "core/hle/service/nvdrv/devices/nvhost_nvjpg.h" + +namespace Service::Nvidia::Devices { + +u32 nvhost_nvjpg::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { + LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", + command.raw, input.size(), output.size()); + + switch (static_cast<IoctlCommand>(command.raw)) { + case IoctlCommand::IocSetNVMAPfdCommand: + return SetNVMAPfd(input, output); + } + + UNIMPLEMENTED_MSG("Unimplemented ioctl"); + return 0; +} + +u32 nvhost_nvjpg::SetNVMAPfd(const std::vector<u8>& input, std::vector<u8>& output) { + IoctlSetNvmapFD params{}; + std::memcpy(¶ms, input.data(), input.size()); + LOG_DEBUG(Service_NVDRV, "called, fd={}", params.nvmap_fd); + nvmap_fd = params.nvmap_fd; + return 0; +} + +} // namespace Service::Nvidia::Devices diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h new file mode 100644 index 000000000..2b0eb43ee --- /dev/null +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h @@ -0,0 +1,36 @@ +// Copyright 2018 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <vector> +#include "common/common_types.h" +#include "common/swap.h" +#include "core/hle/service/nvdrv/devices/nvdevice.h" + +namespace Service::Nvidia::Devices { + +class nvhost_nvjpg final : public nvdevice { +public: + nvhost_nvjpg() = default; + ~nvhost_nvjpg() override = default; + + u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; + +private: + enum class IoctlCommand : u32_le { + IocSetNVMAPfdCommand = 0x40044801, + }; + + struct IoctlSetNvmapFD { + u32_le nvmap_fd; + }; + static_assert(sizeof(IoctlSetNvmapFD) == 4, "IoctlSetNvmapFD is incorrect size"); + + u32_le nvmap_fd{}; + + u32 SetNVMAPfd(const std::vector<u8>& input, std::vector<u8>& output); +}; + +} // namespace Service::Nvidia::Devices diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp new file mode 100644 index 000000000..fcb488d50 --- /dev/null +++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp @@ -0,0 +1,34 @@ +// Copyright 2018 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <cstring> + +#include "common/assert.h" +#include "common/logging/log.h" +#include "core/hle/service/nvdrv/devices/nvhost_vic.h" + +namespace Service::Nvidia::Devices { + +u32 nvhost_vic::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { + LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", + command.raw, input.size(), output.size()); + + switch (static_cast<IoctlCommand>(command.raw)) { + case IoctlCommand::IocSetNVMAPfdCommand: + return SetNVMAPfd(input, output); + } + + UNIMPLEMENTED_MSG("Unimplemented ioctl"); + return 0; +} + +u32 nvhost_vic::SetNVMAPfd(const std::vector<u8>& input, std::vector<u8>& output) { + IoctlSetNvmapFD params{}; + std::memcpy(¶ms, input.data(), input.size()); + LOG_DEBUG(Service_NVDRV, "called, fd={}", params.nvmap_fd); + nvmap_fd = params.nvmap_fd; + return 0; +} + +} // namespace Service::Nvidia::Devices diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.h b/src/core/hle/service/nvdrv/devices/nvhost_vic.h new file mode 100644 index 000000000..c7d681e52 --- /dev/null +++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.h @@ -0,0 +1,36 @@ +// Copyright 2018 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <vector> +#include "common/common_types.h" +#include "common/swap.h" +#include "core/hle/service/nvdrv/devices/nvdevice.h" + +namespace Service::Nvidia::Devices { + +class nvhost_vic final : public nvdevice { +public: + nvhost_vic() = default; + ~nvhost_vic() override = default; + + u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; + +private: + enum class IoctlCommand : u32_le { + IocSetNVMAPfdCommand = 0x40044801, + }; + + struct IoctlSetNvmapFD { + u32_le nvmap_fd; + }; + static_assert(sizeof(IoctlSetNvmapFD) == 4, "IoctlSetNvmapFD is incorrect size"); + + u32_le nvmap_fd{}; + + u32 SetNVMAPfd(const std::vector<u8>& input, std::vector<u8>& output); +}; + +} // namespace Service::Nvidia::Devices diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp index 427f4b574..2de39822f 100644 --- a/src/core/hle/service/nvdrv/nvdrv.cpp +++ b/src/core/hle/service/nvdrv/nvdrv.cpp @@ -12,6 +12,8 @@ #include "core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h" #include "core/hle/service/nvdrv/devices/nvhost_gpu.h" #include "core/hle/service/nvdrv/devices/nvhost_nvdec.h" +#include "core/hle/service/nvdrv/devices/nvhost_nvjpg.h" +#include "core/hle/service/nvdrv/devices/nvhost_vic.h" #include "core/hle/service/nvdrv/devices/nvmap.h" #include "core/hle/service/nvdrv/interface.h" #include "core/hle/service/nvdrv/nvdrv.h" @@ -39,6 +41,8 @@ Module::Module() { devices["/dev/nvdisp_disp0"] = std::make_shared<Devices::nvdisp_disp0>(nvmap_dev); devices["/dev/nvhost-ctrl"] = std::make_shared<Devices::nvhost_ctrl>(); devices["/dev/nvhost-nvdec"] = std::make_shared<Devices::nvhost_nvdec>(); + devices["/dev/nvhost-nvjpg"] = std::make_shared<Devices::nvhost_nvjpg>(); + devices["/dev/nvhost-vic"] = std::make_shared<Devices::nvhost_vic>(); } u32 Module::Open(const std::string& device_name) { diff --git a/src/core/hle/service/sm/controller.cpp b/src/core/hle/service/sm/controller.cpp index 518a0cc46..1cef73216 100644 --- a/src/core/hle/service/sm/controller.cpp +++ b/src/core/hle/service/sm/controller.cpp @@ -10,7 +10,7 @@ namespace Service::SM { void Controller::ConvertSessionToDomain(Kernel::HLERequestContext& ctx) { - ASSERT_MSG(!ctx.Session()->IsDomain(), "session is alread a domain"); + ASSERT_MSG(ctx.Session()->IsSession(), "Session is already a domain"); ctx.Session()->ConvertToDomain(); IPC::ResponseBuilder rb{ctx, 3}; @@ -41,7 +41,7 @@ void Controller::DuplicateSessionEx(Kernel::HLERequestContext& ctx) { void Controller::QueryPointerBufferSize(Kernel::HLERequestContext& ctx) { IPC::ResponseBuilder rb{ctx, 3}; rb.Push(RESULT_SUCCESS); - rb.Push<u32>(0x500); + rb.Push<u16>(0x500); LOG_WARNING(Service, "(STUBBED) called"); } diff --git a/src/core/loader/deconstructed_rom_directory.cpp b/src/core/loader/deconstructed_rom_directory.cpp index de05f21d8..d575a9bea 100644 --- a/src/core/loader/deconstructed_rom_directory.cpp +++ b/src/core/loader/deconstructed_rom_directory.cpp @@ -118,7 +118,6 @@ ResultStatus AppLoader_DeconstructedRomDirectory::Load( process->program_id = metadata.GetTitleID(); process->svc_access_mask.set(); - process->address_mappings = default_address_mappings; process->resource_limit = Kernel::ResourceLimit::GetForCategory(Kernel::ResourceLimitCategory::APPLICATION); process->Run(Memory::PROCESS_IMAGE_VADDR, metadata.GetMainThreadPriority(), diff --git a/src/core/loader/elf.cpp b/src/core/loader/elf.cpp index 401cad3ab..6420a7f11 100644 --- a/src/core/loader/elf.cpp +++ b/src/core/loader/elf.cpp @@ -398,7 +398,6 @@ ResultStatus AppLoader_ELF::Load(Kernel::SharedPtr<Kernel::Process>& process) { process->LoadModule(codeset, codeset->entrypoint); process->svc_access_mask.set(); - process->address_mappings = default_address_mappings; // Attach the default resource limit (APPLICATION) to the process process->resource_limit = diff --git a/src/core/loader/loader.cpp b/src/core/loader/loader.cpp index 1f2f31535..b143f043c 100644 --- a/src/core/loader/loader.cpp +++ b/src/core/loader/loader.cpp @@ -17,12 +17,6 @@ namespace Loader { -const std::initializer_list<Kernel::AddressMapping> default_address_mappings = { - {0x1FF50000, 0x8000, true}, // part of DSP RAM - {0x1FF70000, 0x8000, true}, // part of DSP RAM - {0x1F000000, 0x600000, false}, // entire VRAM -}; - FileType IdentifyFile(FileSys::VirtualFile file) { FileType type; diff --git a/src/core/loader/loader.h b/src/core/loader/loader.h index 285363549..6dffe451a 100644 --- a/src/core/loader/loader.h +++ b/src/core/loader/loader.h @@ -5,7 +5,6 @@ #pragma once #include <algorithm> -#include <initializer_list> #include <memory> #include <string> #include <utility> @@ -208,12 +207,6 @@ protected: }; /** - * Common address mappings found in most games, used for binary formats that don't have this - * information. - */ -extern const std::initializer_list<Kernel::AddressMapping> default_address_mappings; - -/** * Identifies a bootable file and return a suitable loader * @param file The bootable file * @return the best loader for this file diff --git a/src/core/loader/nro.cpp b/src/core/loader/nro.cpp index 908d91eab..2179cf2ea 100644 --- a/src/core/loader/nro.cpp +++ b/src/core/loader/nro.cpp @@ -186,7 +186,6 @@ ResultStatus AppLoader_NRO::Load(Kernel::SharedPtr<Kernel::Process>& process) { } process->svc_access_mask.set(); - process->address_mappings = default_address_mappings; process->resource_limit = Kernel::ResourceLimit::GetForCategory(Kernel::ResourceLimitCategory::APPLICATION); process->Run(base_addr, THREADPRIO_DEFAULT, Memory::DEFAULT_STACK_SIZE); diff --git a/src/core/loader/nso.cpp b/src/core/loader/nso.cpp index fee7d58c6..a94558ac5 100644 --- a/src/core/loader/nso.cpp +++ b/src/core/loader/nso.cpp @@ -152,7 +152,6 @@ ResultStatus AppLoader_NSO::Load(Kernel::SharedPtr<Kernel::Process>& process) { LOG_DEBUG(Loader, "loaded module {} @ 0x{:X}", file->GetName(), Memory::PROCESS_IMAGE_VADDR); process->svc_access_mask.set(); - process->address_mappings = default_address_mappings; process->resource_limit = Kernel::ResourceLimit::GetForCategory(Kernel::ResourceLimitCategory::APPLICATION); process->Run(Memory::PROCESS_IMAGE_VADDR, THREADPRIO_DEFAULT, Memory::DEFAULT_STACK_SIZE); diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 6f0343888..5a593c1f7 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -47,8 +47,10 @@ u32 RenderTargetBytesPerPixel(RenderTargetFormat format) { case RenderTargetFormat::RGBA32_UINT: return 16; case RenderTargetFormat::RGBA16_UINT: + case RenderTargetFormat::RGBA16_UNORM: case RenderTargetFormat::RGBA16_FLOAT: case RenderTargetFormat::RG32_FLOAT: + case RenderTargetFormat::RG32_UINT: return 8; case RenderTargetFormat::RGBA8_UNORM: case RenderTargetFormat::RGBA8_SNORM: @@ -62,6 +64,7 @@ u32 RenderTargetBytesPerPixel(RenderTargetFormat format) { case RenderTargetFormat::RG16_FLOAT: case RenderTargetFormat::R32_FLOAT: case RenderTargetFormat::R11G11B10_FLOAT: + case RenderTargetFormat::R32_UINT: return 4; case RenderTargetFormat::R16_UNORM: case RenderTargetFormat::R16_SNORM: diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 73abb7a18..97dcccb92 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -20,9 +20,11 @@ enum class RenderTargetFormat : u32 { NONE = 0x0, RGBA32_FLOAT = 0xC0, RGBA32_UINT = 0xC2, + RGBA16_UNORM = 0xC6, RGBA16_UINT = 0xC9, RGBA16_FLOAT = 0xCA, RG32_FLOAT = 0xCB, + RG32_UINT = 0xCD, BGRA8_UNORM = 0xCF, RGB10_A2_UNORM = 0xD1, RGBA8_UNORM = 0xD5, @@ -34,6 +36,7 @@ enum class RenderTargetFormat : u32 { RG16_UINT = 0xDD, RG16_FLOAT = 0xDE, R11G11B10_FLOAT = 0xE0, + R32_UINT = 0xE4, R32_FLOAT = 0xE5, B5G6R5_UNORM = 0xE8, RG8_UNORM = 0xEA, diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 52a649e2f..9d1549fe9 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -648,11 +648,11 @@ std::tuple<u8*, GLintptr, u32> RasterizerOpenGL::SetupConstBuffers( if (used_buffer.IsIndirect()) { // Buffer is accessed indirectly, so upload the entire thing - size = buffer.size * sizeof(float); + size = buffer.size; if (size > MaxConstbufferSize) { - LOG_ERROR(HW_GPU, "indirect constbuffer size {} exceeds maximum {}", size, - MaxConstbufferSize); + LOG_CRITICAL(HW_GPU, "indirect constbuffer size {} exceeds maximum {}", size, + MaxConstbufferSize); size = MaxConstbufferSize; } } else { diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index d635550d2..b6947b97b 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -94,13 +94,14 @@ struct FormatTuple { static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_format_tuples = {{ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // ABGR8U {GL_RGBA8, GL_RGBA, GL_BYTE, ComponentType::SNorm, false}, // ABGR8S - {GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, ComponentType::UNorm, false}, // B5G6R5 + {GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, ComponentType::UNorm, false}, // B5G6R5U {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, ComponentType::UNorm, - false}, // A2B10G10R10 - {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, ComponentType::UNorm, false}, // A1B5G5R5 - {GL_R8, GL_RED, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // R8 + false}, // A2B10G10R10U + {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, ComponentType::UNorm, false}, // A1B5G5R5U + {GL_R8, GL_RED, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // R8U {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, ComponentType::UInt, false}, // R8UI {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, ComponentType::Float, false}, // RGBA16F + {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // RGBA16U {GL_RGBA16UI, GL_RGBA, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // RGBA16UI {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, ComponentType::Float, false}, // R11FG11FB10F @@ -115,16 +116,17 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form {GL_COMPRESSED_RG_RGTC2, GL_RG, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, true}, // DXN2UNORM {GL_COMPRESSED_SIGNED_RG_RGTC2, GL_RG, GL_INT, ComponentType::SNorm, true}, // DXN2SNORM - {GL_COMPRESSED_RGBA_BPTC_UNORM_ARB, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, + {GL_COMPRESSED_RGBA_BPTC_UNORM_ARB, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, true}, // BC7U {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4 - {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // G8R8 + {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // G8R8U + {GL_RG8, GL_RG, GL_BYTE, ComponentType::SNorm, false}, // G8R8S {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // BGRA8 {GL_RGBA32F, GL_RGBA, GL_FLOAT, ComponentType::Float, false}, // RGBA32F {GL_RG32F, GL_RG, GL_FLOAT, ComponentType::Float, false}, // RG32F {GL_R32F, GL_RED, GL_FLOAT, ComponentType::Float, false}, // R32F {GL_R16F, GL_RED, GL_HALF_FLOAT, ComponentType::Float, false}, // R16F - {GL_R16, GL_RED, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // R16UNORM + {GL_R16, GL_RED, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // R16U {GL_R16_SNORM, GL_RED, GL_SHORT, ComponentType::SNorm, false}, // R16S {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // R16UI {GL_R16I, GL_RED_INTEGER, GL_SHORT, ComponentType::SInt, false}, // R16I @@ -137,6 +139,8 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // SRGBA8 {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // RG8U {GL_RG8, GL_RG, GL_BYTE, ComponentType::SNorm, false}, // RG8S + {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RG32UI + {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // R32UI // DepthStencil formats {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm, @@ -239,12 +243,13 @@ static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPU // clang-format off MortonCopy<true, PixelFormat::ABGR8U>, MortonCopy<true, PixelFormat::ABGR8S>, - MortonCopy<true, PixelFormat::B5G6R5>, - MortonCopy<true, PixelFormat::A2B10G10R10>, - MortonCopy<true, PixelFormat::A1B5G5R5>, - MortonCopy<true, PixelFormat::R8>, + MortonCopy<true, PixelFormat::B5G6R5U>, + MortonCopy<true, PixelFormat::A2B10G10R10U>, + MortonCopy<true, PixelFormat::A1B5G5R5U>, + MortonCopy<true, PixelFormat::R8U>, MortonCopy<true, PixelFormat::R8UI>, MortonCopy<true, PixelFormat::RGBA16F>, + MortonCopy<true, PixelFormat::RGBA16U>, MortonCopy<true, PixelFormat::RGBA16UI>, MortonCopy<true, PixelFormat::R11FG11FB10F>, MortonCopy<true, PixelFormat::RGBA32UI>, @@ -256,13 +261,14 @@ static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPU MortonCopy<true, PixelFormat::DXN2SNORM>, MortonCopy<true, PixelFormat::BC7U>, MortonCopy<true, PixelFormat::ASTC_2D_4X4>, - MortonCopy<true, PixelFormat::G8R8>, + MortonCopy<true, PixelFormat::G8R8U>, + MortonCopy<true, PixelFormat::G8R8S>, MortonCopy<true, PixelFormat::BGRA8>, MortonCopy<true, PixelFormat::RGBA32F>, MortonCopy<true, PixelFormat::RG32F>, MortonCopy<true, PixelFormat::R32F>, MortonCopy<true, PixelFormat::R16F>, - MortonCopy<true, PixelFormat::R16UNORM>, + MortonCopy<true, PixelFormat::R16U>, MortonCopy<true, PixelFormat::R16S>, MortonCopy<true, PixelFormat::R16UI>, MortonCopy<true, PixelFormat::R16I>, @@ -275,6 +281,8 @@ static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPU MortonCopy<true, PixelFormat::SRGBA8>, MortonCopy<true, PixelFormat::RG8U>, MortonCopy<true, PixelFormat::RG8S>, + MortonCopy<true, PixelFormat::RG32UI>, + MortonCopy<true, PixelFormat::R32UI>, MortonCopy<true, PixelFormat::Z24S8>, MortonCopy<true, PixelFormat::S8Z24>, MortonCopy<true, PixelFormat::Z32F>, @@ -289,12 +297,13 @@ static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPU // clang-format off MortonCopy<false, PixelFormat::ABGR8U>, MortonCopy<false, PixelFormat::ABGR8S>, - MortonCopy<false, PixelFormat::B5G6R5>, - MortonCopy<false, PixelFormat::A2B10G10R10>, - MortonCopy<false, PixelFormat::A1B5G5R5>, - MortonCopy<false, PixelFormat::R8>, + MortonCopy<false, PixelFormat::B5G6R5U>, + MortonCopy<false, PixelFormat::A2B10G10R10U>, + MortonCopy<false, PixelFormat::A1B5G5R5U>, + MortonCopy<false, PixelFormat::R8U>, MortonCopy<false, PixelFormat::R8UI>, MortonCopy<false, PixelFormat::RGBA16F>, + MortonCopy<false, PixelFormat::RGBA16U>, MortonCopy<false, PixelFormat::RGBA16UI>, MortonCopy<false, PixelFormat::R11FG11FB10F>, MortonCopy<false, PixelFormat::RGBA32UI>, @@ -308,13 +317,14 @@ static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPU nullptr, nullptr, nullptr, - MortonCopy<false, PixelFormat::G8R8>, + MortonCopy<false, PixelFormat::G8R8U>, + MortonCopy<false, PixelFormat::G8R8S>, MortonCopy<false, PixelFormat::BGRA8>, MortonCopy<false, PixelFormat::RGBA32F>, MortonCopy<false, PixelFormat::RG32F>, MortonCopy<false, PixelFormat::R32F>, MortonCopy<false, PixelFormat::R16F>, - MortonCopy<false, PixelFormat::R16UNORM>, + MortonCopy<false, PixelFormat::R16U>, MortonCopy<false, PixelFormat::R16S>, MortonCopy<false, PixelFormat::R16UI>, MortonCopy<false, PixelFormat::R16I>, @@ -327,6 +337,8 @@ static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPU MortonCopy<false, PixelFormat::SRGBA8>, MortonCopy<false, PixelFormat::RG8U>, MortonCopy<false, PixelFormat::RG8S>, + MortonCopy<false, PixelFormat::RG32UI>, + MortonCopy<false, PixelFormat::R32UI>, MortonCopy<false, PixelFormat::Z24S8>, MortonCopy<false, PixelFormat::S8Z24>, MortonCopy<false, PixelFormat::Z32F>, @@ -452,7 +464,7 @@ static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height) { } static void ConvertG8R8ToR8G8(std::vector<u8>& data, u32 width, u32 height) { - const auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::G8R8)}; + const auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::G8R8U)}; for (size_t y = 0; y < height; ++y) { for (size_t x = 0; x < width; ++x) { const size_t offset{bpp * (y * width + x)}; @@ -484,7 +496,8 @@ static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelForma ConvertS8Z24ToZ24S8(data, width, height); break; - case PixelFormat::G8R8: + case PixelFormat::G8R8U: + case PixelFormat::G8R8S: // Convert the G8R8 color format to R8G8, as OpenGL does not support G8R8. ConvertG8R8ToR8G8(data, width, height); break; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 4ab74342e..55cf3782c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -25,51 +25,55 @@ struct SurfaceParams { enum class PixelFormat { ABGR8U = 0, ABGR8S = 1, - B5G6R5 = 2, - A2B10G10R10 = 3, - A1B5G5R5 = 4, - R8 = 5, + B5G6R5U = 2, + A2B10G10R10U = 3, + A1B5G5R5U = 4, + R8U = 5, R8UI = 6, RGBA16F = 7, - RGBA16UI = 8, - R11FG11FB10F = 9, - RGBA32UI = 10, - DXT1 = 11, - DXT23 = 12, - DXT45 = 13, - DXN1 = 14, // This is also known as BC4 - DXN2UNORM = 15, - DXN2SNORM = 16, - BC7U = 17, - ASTC_2D_4X4 = 18, - G8R8 = 19, - BGRA8 = 20, - RGBA32F = 21, - RG32F = 22, - R32F = 23, - R16F = 24, - R16UNORM = 25, - R16S = 26, - R16UI = 27, - R16I = 28, - RG16 = 29, - RG16F = 30, - RG16UI = 31, - RG16I = 32, - RG16S = 33, - RGB32F = 34, - SRGBA8 = 35, - RG8U = 36, - RG8S = 37, + RGBA16U = 8, + RGBA16UI = 9, + R11FG11FB10F = 10, + RGBA32UI = 11, + DXT1 = 12, + DXT23 = 13, + DXT45 = 14, + DXN1 = 15, // This is also known as BC4 + DXN2UNORM = 16, + DXN2SNORM = 17, + BC7U = 18, + ASTC_2D_4X4 = 19, + G8R8U = 20, + G8R8S = 21, + BGRA8 = 22, + RGBA32F = 23, + RG32F = 24, + R32F = 25, + R16F = 26, + R16U = 27, + R16S = 28, + R16UI = 29, + R16I = 30, + RG16 = 31, + RG16F = 32, + RG16UI = 33, + RG16I = 34, + RG16S = 35, + RGB32F = 36, + SRGBA8 = 37, + RG8U = 38, + RG8S = 39, + RG32UI = 40, + R32UI = 41, MaxColorFormat, // DepthStencil formats - Z24S8 = 38, - S8Z24 = 39, - Z32F = 40, - Z16 = 41, - Z32FS8 = 42, + Z24S8 = 42, + S8Z24 = 43, + Z32F = 44, + Z16 = 45, + Z32FS8 = 46, MaxDepthStencilFormat, @@ -109,12 +113,13 @@ struct SurfaceParams { constexpr std::array<u32, MaxPixelFormat> compression_factor_table = {{ 1, // ABGR8U 1, // ABGR8S - 1, // B5G6R5 - 1, // A2B10G10R10 - 1, // A1B5G5R5 - 1, // R8 + 1, // B5G6R5U + 1, // A2B10G10R10U + 1, // A1B5G5R5U + 1, // R8U 1, // R8UI 1, // RGBA16F + 1, // RGBA16U 1, // RGBA16UI 1, // R11FG11FB10F 1, // RGBA32UI @@ -126,13 +131,14 @@ struct SurfaceParams { 4, // DXN2SNORM 4, // BC7U 4, // ASTC_2D_4X4 - 1, // G8R8 + 1, // G8R8U + 1, // G8R8S 1, // BGRA8 1, // RGBA32F 1, // RG32F 1, // R32F 1, // R16F - 1, // R16UNORM + 1, // R16U 1, // R16S 1, // R16UI 1, // R16I @@ -145,6 +151,8 @@ struct SurfaceParams { 1, // SRGBA8 1, // RG8U 1, // RG8S + 1, // RG32UI + 1, // R32UI 1, // Z24S8 1, // S8Z24 1, // Z32F @@ -163,12 +171,13 @@ struct SurfaceParams { constexpr std::array<u32, MaxPixelFormat> bpp_table = {{ 32, // ABGR8U 32, // ABGR8S - 16, // B5G6R5 - 32, // A2B10G10R10 - 16, // A1B5G5R5 - 8, // R8 + 16, // B5G6R5U + 32, // A2B10G10R10U + 16, // A1B5G5R5U + 8, // R8U 8, // R8UI 64, // RGBA16F + 64, // RGBA16U 64, // RGBA16UI 32, // R11FG11FB10F 128, // RGBA32UI @@ -180,13 +189,14 @@ struct SurfaceParams { 128, // DXN2SNORM 128, // BC7U 32, // ASTC_2D_4X4 - 16, // G8R8 + 16, // G8R8U + 16, // G8R8S 32, // BGRA8 128, // RGBA32F 64, // RG32F 32, // R32F 16, // R16F - 16, // R16UNORM + 16, // R16U 16, // R16S 16, // R16UI 16, // R16I @@ -199,6 +209,8 @@ struct SurfaceParams { 32, // SRGBA8 16, // RG8U 16, // RG8S + 64, // RG32UI + 32, // R32UI 32, // Z24S8 32, // S8Z24 32, // Z32F @@ -244,9 +256,11 @@ struct SurfaceParams { case Tegra::RenderTargetFormat::BGRA8_UNORM: return PixelFormat::BGRA8; case Tegra::RenderTargetFormat::RGB10_A2_UNORM: - return PixelFormat::A2B10G10R10; + return PixelFormat::A2B10G10R10U; case Tegra::RenderTargetFormat::RGBA16_FLOAT: return PixelFormat::RGBA16F; + case Tegra::RenderTargetFormat::RGBA16_UNORM: + return PixelFormat::RGBA16U; case Tegra::RenderTargetFormat::RGBA16_UINT: return PixelFormat::RGBA16UI; case Tegra::RenderTargetFormat::RGBA32_FLOAT: @@ -256,11 +270,11 @@ struct SurfaceParams { case Tegra::RenderTargetFormat::R11G11B10_FLOAT: return PixelFormat::R11FG11FB10F; case Tegra::RenderTargetFormat::B5G6R5_UNORM: - return PixelFormat::B5G6R5; + return PixelFormat::B5G6R5U; case Tegra::RenderTargetFormat::RGBA32_UINT: return PixelFormat::RGBA32UI; case Tegra::RenderTargetFormat::R8_UNORM: - return PixelFormat::R8; + return PixelFormat::R8U; case Tegra::RenderTargetFormat::R8_UINT: return PixelFormat::R8UI; case Tegra::RenderTargetFormat::RG16_FLOAT: @@ -280,7 +294,7 @@ struct SurfaceParams { case Tegra::RenderTargetFormat::R16_FLOAT: return PixelFormat::R16F; case Tegra::RenderTargetFormat::R16_UNORM: - return PixelFormat::R16UNORM; + return PixelFormat::R16U; case Tegra::RenderTargetFormat::R16_SNORM: return PixelFormat::R16S; case Tegra::RenderTargetFormat::R16_UINT: @@ -289,6 +303,10 @@ struct SurfaceParams { return PixelFormat::R16I; case Tegra::RenderTargetFormat::R32_FLOAT: return PixelFormat::R32F; + case Tegra::RenderTargetFormat::R32_UINT: + return PixelFormat::R32UI; + case Tegra::RenderTargetFormat::RG32_UINT: + return PixelFormat::RG32UI; default: LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); UNREACHABLE(); @@ -310,15 +328,33 @@ struct SurfaceParams { static_cast<u32>(component_type)); UNREACHABLE(); case Tegra::Texture::TextureFormat::B5G6R5: - return PixelFormat::B5G6R5; + switch (component_type) { + case Tegra::Texture::ComponentType::UNORM: + return PixelFormat::B5G6R5U; + } + LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", + static_cast<u32>(component_type)); + UNREACHABLE(); case Tegra::Texture::TextureFormat::A2B10G10R10: - return PixelFormat::A2B10G10R10; + switch (component_type) { + case Tegra::Texture::ComponentType::UNORM: + return PixelFormat::A2B10G10R10U; + } + LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", + static_cast<u32>(component_type)); + UNREACHABLE(); case Tegra::Texture::TextureFormat::A1B5G5R5: - return PixelFormat::A1B5G5R5; + switch (component_type) { + case Tegra::Texture::ComponentType::UNORM: + return PixelFormat::A1B5G5R5U; + } + LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", + static_cast<u32>(component_type)); + UNREACHABLE(); case Tegra::Texture::TextureFormat::R8: switch (component_type) { case Tegra::Texture::ComponentType::UNORM: - return PixelFormat::R8; + return PixelFormat::R8U; case Tegra::Texture::ComponentType::UINT: return PixelFormat::R8UI; } @@ -326,11 +362,33 @@ struct SurfaceParams { static_cast<u32>(component_type)); UNREACHABLE(); case Tegra::Texture::TextureFormat::G8R8: - return PixelFormat::G8R8; + switch (component_type) { + case Tegra::Texture::ComponentType::UNORM: + return PixelFormat::G8R8U; + case Tegra::Texture::ComponentType::SNORM: + return PixelFormat::G8R8S; + } + LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", + static_cast<u32>(component_type)); + UNREACHABLE(); case Tegra::Texture::TextureFormat::R16_G16_B16_A16: - return PixelFormat::RGBA16F; + switch (component_type) { + case Tegra::Texture::ComponentType::UNORM: + return PixelFormat::RGBA16U; + case Tegra::Texture::ComponentType::FLOAT: + return PixelFormat::RGBA16F; + } + LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", + static_cast<u32>(component_type)); + UNREACHABLE(); case Tegra::Texture::TextureFormat::BF10GF11RF11: - return PixelFormat::R11FG11FB10F; + switch (component_type) { + case Tegra::Texture::ComponentType::FLOAT: + return PixelFormat::R11FG11FB10F; + } + LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", + static_cast<u32>(component_type)); + UNREACHABLE(); case Tegra::Texture::TextureFormat::R32_G32_B32_A32: switch (component_type) { case Tegra::Texture::ComponentType::FLOAT: @@ -342,15 +400,29 @@ struct SurfaceParams { static_cast<u32>(component_type)); UNREACHABLE(); case Tegra::Texture::TextureFormat::R32_G32: - return PixelFormat::RG32F; + switch (component_type) { + case Tegra::Texture::ComponentType::FLOAT: + return PixelFormat::RG32F; + case Tegra::Texture::ComponentType::UINT: + return PixelFormat::RG32UI; + } + LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", + static_cast<u32>(component_type)); + UNREACHABLE(); case Tegra::Texture::TextureFormat::R32_G32_B32: - return PixelFormat::RGB32F; + switch (component_type) { + case Tegra::Texture::ComponentType::FLOAT: + return PixelFormat::RGB32F; + } + LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", + static_cast<u32>(component_type)); + UNREACHABLE(); case Tegra::Texture::TextureFormat::R16: switch (component_type) { case Tegra::Texture::ComponentType::FLOAT: return PixelFormat::R16F; case Tegra::Texture::ComponentType::UNORM: - return PixelFormat::R16UNORM; + return PixelFormat::R16U; case Tegra::Texture::ComponentType::SNORM: return PixelFormat::R16S; case Tegra::Texture::ComponentType::UINT: @@ -362,9 +434,19 @@ struct SurfaceParams { static_cast<u32>(component_type)); UNREACHABLE(); case Tegra::Texture::TextureFormat::R32: - return PixelFormat::R32F; + switch (component_type) { + case Tegra::Texture::ComponentType::FLOAT: + return PixelFormat::R32F; + case Tegra::Texture::ComponentType::UINT: + return PixelFormat::R32UI; + } + LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", + static_cast<u32>(component_type)); + UNREACHABLE(); case Tegra::Texture::TextureFormat::ZF32: return PixelFormat::Z32F; + case Tegra::Texture::TextureFormat::Z16: + return PixelFormat::Z16; case Tegra::Texture::TextureFormat::Z24S8: return PixelFormat::Z24S8; case Tegra::Texture::TextureFormat::DXT1: @@ -443,6 +525,7 @@ struct SurfaceParams { case Tegra::RenderTargetFormat::R16_UNORM: case Tegra::RenderTargetFormat::B5G6R5_UNORM: case Tegra::RenderTargetFormat::RG8_UNORM: + case Tegra::RenderTargetFormat::RGBA16_UNORM: return ComponentType::UNorm; case Tegra::RenderTargetFormat::RGBA8_SNORM: case Tegra::RenderTargetFormat::RG16_SNORM: @@ -462,6 +545,8 @@ struct SurfaceParams { case Tegra::RenderTargetFormat::RG16_UINT: case Tegra::RenderTargetFormat::R8_UINT: case Tegra::RenderTargetFormat::R16_UINT: + case Tegra::RenderTargetFormat::RG32_UINT: + case Tegra::RenderTargetFormat::R32_UINT: return ComponentType::UInt; case Tegra::RenderTargetFormat::RG16_SINT: case Tegra::RenderTargetFormat::R16_SINT: diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 6834d7085..e0dfdbb9f 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -367,31 +367,32 @@ public: } /// Generates code representing a uniform (C buffer) register, interpreted as the input type. - std::string GetUniform(u64 index, u64 offset, GLSLRegister::Type type) { + std::string GetUniform(u64 index, u64 offset, GLSLRegister::Type type, + Register::Size size = Register::Size::Word) { declr_const_buffers[index].MarkAsUsed(index, offset, stage); std::string value = 'c' + std::to_string(index) + '[' + std::to_string(offset / 4) + "][" + std::to_string(offset % 4) + ']'; if (type == GLSLRegister::Type::Float) { - return value; + // Do nothing, default } else if (type == GLSLRegister::Type::Integer) { - return "floatBitsToInt(" + value + ')'; + value = "floatBitsToInt(" + value + ')'; } else if (type == GLSLRegister::Type::UnsignedInteger) { - return "floatBitsToUint(" + value + ')'; + value = "floatBitsToUint(" + value + ')'; } else { UNREACHABLE(); } + + return ConvertIntegerSize(value, size); } - std::string GetUniformIndirect(u64 index, s64 offset, const Register& index_reg, + std::string GetUniformIndirect(u64 cbuf_index, s64 offset, const std::string& index_str, GLSLRegister::Type type) { - declr_const_buffers[index].MarkAsUsedIndirect(index, stage); - - std::string final_offset = "((floatBitsToInt(" + GetRegister(index_reg, 0) + ") + " + - std::to_string(offset) + ") / 4)"; + declr_const_buffers[cbuf_index].MarkAsUsedIndirect(cbuf_index, stage); - std::string value = - 'c' + std::to_string(index) + '[' + final_offset + " / 4][" + final_offset + " % 4]"; + std::string final_offset = fmt::format("({} + {})", index_str, offset / 4); + std::string value = 'c' + std::to_string(cbuf_index) + '[' + final_offset + " / 4][" + + final_offset + " % 4]"; if (type == GLSLRegister::Type::Float) { return value; @@ -1249,20 +1250,41 @@ private: op_a = "abs(" + op_a + ')'; } + if (instr.conversion.negate_a) { + op_a = "-(" + op_a + ')'; + } + regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1, 1, instr.alu.saturate_d, 0, instr.conversion.dest_size); break; } - case OpCode::Id::I2F_R: { + case OpCode::Id::I2F_R: + case OpCode::Id::I2F_C: { ASSERT_MSG(instr.conversion.dest_size == Register::Size::Word, "Unimplemented"); ASSERT_MSG(!instr.conversion.selector, "Unimplemented"); - std::string op_a = regs.GetRegisterAsInteger( - instr.gpr20, 0, instr.conversion.is_input_signed, instr.conversion.src_size); + + std::string op_a{}; + + if (instr.is_b_gpr) { + op_a = + regs.GetRegisterAsInteger(instr.gpr20, 0, instr.conversion.is_input_signed, + instr.conversion.src_size); + } else { + op_a = regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, + instr.conversion.is_input_signed + ? GLSLRegister::Type::Integer + : GLSLRegister::Type::UnsignedInteger, + instr.conversion.src_size); + } if (instr.conversion.abs_a) { op_a = "abs(" + op_a + ')'; } + if (instr.conversion.negate_a) { + op_a = "-(" + op_a + ')'; + } + regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1); break; } @@ -1271,6 +1293,14 @@ private: ASSERT_MSG(instr.conversion.src_size == Register::Size::Word, "Unimplemented"); std::string op_a = regs.GetRegisterAsFloat(instr.gpr20); + if (instr.conversion.abs_a) { + op_a = "abs(" + op_a + ')'; + } + + if (instr.conversion.negate_a) { + op_a = "-(" + op_a + ')'; + } + switch (instr.conversion.f2f.rounding) { case Tegra::Shader::F2fRoundingOp::None: break; @@ -1293,21 +1323,29 @@ private: break; } - if (instr.conversion.abs_a) { - op_a = "abs(" + op_a + ')'; - } - regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1, instr.alu.saturate_d); break; } - case OpCode::Id::F2I_R: { + case OpCode::Id::F2I_R: + case OpCode::Id::F2I_C: { ASSERT_MSG(instr.conversion.src_size == Register::Size::Word, "Unimplemented"); - std::string op_a = regs.GetRegisterAsFloat(instr.gpr20); + std::string op_a{}; + + if (instr.is_b_gpr) { + op_a = regs.GetRegisterAsFloat(instr.gpr20); + } else { + op_a = regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, + GLSLRegister::Type::Float); + } if (instr.conversion.abs_a) { op_a = "abs(" + op_a + ')'; } + if (instr.conversion.negate_a) { + op_a = "-(" + op_a + ')'; + } + switch (instr.conversion.f2i.rounding) { case Tegra::Shader::F2iRoundingOp::None: break; @@ -1355,11 +1393,16 @@ private: case OpCode::Id::LD_C: { ASSERT_MSG(instr.ld_c.unknown == 0, "Unimplemented"); + // Add an extra scope and declare the index register inside to prevent + // overwriting it in case it is used as an output of the LD instruction. + shader.AddLine("{"); + ++shader.scope; + + shader.AddLine("uint index = (" + regs.GetRegisterAsInteger(instr.gpr8, 0, false) + + " / 4) & (MAX_CONSTBUFFER_ELEMENTS - 1);"); + std::string op_a = - regs.GetUniformIndirect(instr.cbuf36.index, instr.cbuf36.offset + 0, instr.gpr8, - GLSLRegister::Type::Float); - std::string op_b = - regs.GetUniformIndirect(instr.cbuf36.index, instr.cbuf36.offset + 4, instr.gpr8, + regs.GetUniformIndirect(instr.cbuf36.index, instr.cbuf36.offset + 0, "index", GLSLRegister::Type::Float); switch (instr.ld_c.type.Value()) { @@ -1367,16 +1410,22 @@ private: regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1); break; - case Tegra::Shader::UniformType::Double: + case Tegra::Shader::UniformType::Double: { + std::string op_b = + regs.GetUniformIndirect(instr.cbuf36.index, instr.cbuf36.offset + 4, + "index", GLSLRegister::Type::Float); regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1); regs.SetRegisterToFloat(instr.gpr0.Value() + 1, 0, op_b, 1, 1); break; - + } default: LOG_CRITICAL(HW_GPU, "Unhandled type: {}", static_cast<unsigned>(instr.ld_c.type.Value())); UNREACHABLE(); } + + --shader.scope; + shader.AddLine("}"); break; } case OpCode::Id::ST_A: { diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index 679e5ceb2..8f719fdd8 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -24,15 +24,25 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs; inline GLenum VertexType(Maxwell::VertexAttribute attrib) { switch (attrib.type) { + case Maxwell::VertexAttribute::Type::UnsignedInt: case Maxwell::VertexAttribute::Type::UnsignedNorm: { switch (attrib.size) { + case Maxwell::VertexAttribute::Size::Size_8: case Maxwell::VertexAttribute::Size::Size_8_8: + case Maxwell::VertexAttribute::Size::Size_8_8_8: case Maxwell::VertexAttribute::Size::Size_8_8_8_8: return GL_UNSIGNED_BYTE; + case Maxwell::VertexAttribute::Size::Size_16: case Maxwell::VertexAttribute::Size::Size_16_16: + case Maxwell::VertexAttribute::Size::Size_16_16_16: case Maxwell::VertexAttribute::Size::Size_16_16_16_16: return GL_UNSIGNED_SHORT; + case Maxwell::VertexAttribute::Size::Size_32: + case Maxwell::VertexAttribute::Size::Size_32_32: + case Maxwell::VertexAttribute::Size::Size_32_32_32: + case Maxwell::VertexAttribute::Size::Size_32_32_32_32: + return GL_UNSIGNED_INT; case Maxwell::VertexAttribute::Size::Size_10_10_10_2: return GL_UNSIGNED_INT_2_10_10_10_REV; } @@ -42,16 +52,25 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { return {}; } + case Maxwell::VertexAttribute::Type::SignedInt: case Maxwell::VertexAttribute::Type::SignedNorm: { switch (attrib.size) { - case Maxwell::VertexAttribute::Size::Size_32_32_32: - return GL_INT; + case Maxwell::VertexAttribute::Size::Size_8: case Maxwell::VertexAttribute::Size::Size_8_8: + case Maxwell::VertexAttribute::Size::Size_8_8_8: case Maxwell::VertexAttribute::Size::Size_8_8_8_8: return GL_BYTE; + case Maxwell::VertexAttribute::Size::Size_16: case Maxwell::VertexAttribute::Size::Size_16_16: + case Maxwell::VertexAttribute::Size::Size_16_16_16: + case Maxwell::VertexAttribute::Size::Size_16_16_16_16: return GL_SHORT; + case Maxwell::VertexAttribute::Size::Size_32: + case Maxwell::VertexAttribute::Size::Size_32_32: + case Maxwell::VertexAttribute::Size::Size_32_32_32: + case Maxwell::VertexAttribute::Size::Size_32_32_32_32: + return GL_INT; case Maxwell::VertexAttribute::Size::Size_10_10_10_2: return GL_INT_2_10_10_10_REV; } @@ -61,9 +80,6 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { return {}; } - case Maxwell::VertexAttribute::Type::UnsignedInt: - return GL_UNSIGNED_INT; - case Maxwell::VertexAttribute::Type::Float: return GL_FLOAT; } |