From 8e56a84566036cfff0aa5c3d80ae1b051d2bd0bf Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Sun, 23 Apr 2023 00:01:08 -0400 Subject: core_timing: Use CNTPCT as the guest CPU tick Previously, we were mixing the raw CPU frequency and CNTFRQ. The raw CPU frequency (1020 MHz) should've never been used as CNTPCT (whose frequency is CNTFRQ) is the only counter available. --- src/audio_core/renderer/adsp/adsp.cpp | 1 - src/audio_core/renderer/adsp/audio_renderer.cpp | 5 ++--- src/audio_core/renderer/adsp/command_list_processor.cpp | 1 - .../renderer/command/performance/performance.cpp | 15 ++++++--------- 4 files changed, 8 insertions(+), 14 deletions(-) (limited to 'src/audio_core/renderer') diff --git a/src/audio_core/renderer/adsp/adsp.cpp b/src/audio_core/renderer/adsp/adsp.cpp index 74772fc50..b1db31e93 100644 --- a/src/audio_core/renderer/adsp/adsp.cpp +++ b/src/audio_core/renderer/adsp/adsp.cpp @@ -7,7 +7,6 @@ #include "common/logging/log.h" #include "core/core.h" #include "core/core_timing.h" -#include "core/core_timing_util.h" #include "core/memory.h" namespace AudioCore::AudioRenderer::ADSP { diff --git a/src/audio_core/renderer/adsp/audio_renderer.cpp b/src/audio_core/renderer/adsp/audio_renderer.cpp index 8bc39f9f9..9ca716b60 100644 --- a/src/audio_core/renderer/adsp/audio_renderer.cpp +++ b/src/audio_core/renderer/adsp/audio_renderer.cpp @@ -13,7 +13,6 @@ #include "common/thread.h" #include "core/core.h" #include "core/core_timing.h" -#include "core/core_timing_util.h" MICROPROFILE_DEFINE(Audio_Renderer, "Audio", "DSP", MP_RGB(60, 19, 97)); @@ -144,6 +143,7 @@ void AudioRenderer::ThreadFunc(std::stop_token stop_token) { mailbox->ADSPSendMessage(RenderMessage::AudioRenderer_InitializeOK); + // 0.12 seconds (2304000 / 19200000) constexpr u64 max_process_time{2'304'000ULL}; while (!stop_token.stop_requested()) { @@ -184,8 +184,7 @@ void AudioRenderer::ThreadFunc(std::stop_token stop_token) { u64 max_time{max_process_time}; if (index == 1 && command_buffer.applet_resource_user_id == mailbox->GetCommandBuffer(0).applet_resource_user_id) { - max_time = max_process_time - - Core::Timing::CyclesToNs(render_times_taken[0]).count(); + max_time = max_process_time - render_times_taken[0]; if (render_times_taken[0] > max_process_time) { max_time = 0; } diff --git a/src/audio_core/renderer/adsp/command_list_processor.cpp b/src/audio_core/renderer/adsp/command_list_processor.cpp index 7a300d216..3a0f1ae38 100644 --- a/src/audio_core/renderer/adsp/command_list_processor.cpp +++ b/src/audio_core/renderer/adsp/command_list_processor.cpp @@ -9,7 +9,6 @@ #include "common/settings.h" #include "core/core.h" #include "core/core_timing.h" -#include "core/core_timing_util.h" #include "core/memory.h" namespace AudioCore::AudioRenderer::ADSP { diff --git a/src/audio_core/renderer/command/performance/performance.cpp b/src/audio_core/renderer/command/performance/performance.cpp index 985958b03..4a881547f 100644 --- a/src/audio_core/renderer/command/performance/performance.cpp +++ b/src/audio_core/renderer/command/performance/performance.cpp @@ -5,7 +5,6 @@ #include "audio_core/renderer/command/performance/performance.h" #include "core/core.h" #include "core/core_timing.h" -#include "core/core_timing_util.h" namespace AudioCore::AudioRenderer { @@ -18,20 +17,18 @@ void PerformanceCommand::Process(const ADSP::CommandListProcessor& processor) { auto base{entry_address.translated_address}; if (state == PerformanceState::Start) { auto start_time_ptr{reinterpret_cast(base + entry_address.entry_start_time_offset)}; - *start_time_ptr = static_cast( - Core::Timing::CyclesToUs(processor.system->CoreTiming().GetClockTicks() - - processor.start_time - processor.current_processing_time) - .count()); + *start_time_ptr = + static_cast(processor.system->CoreTiming().GetClockTicks() - processor.start_time - + processor.current_processing_time); } else if (state == PerformanceState::Stop) { auto processed_time_ptr{ reinterpret_cast(base + entry_address.entry_processed_time_offset)}; auto entry_count_ptr{ reinterpret_cast(base + entry_address.header_entry_count_offset)}; - *processed_time_ptr = static_cast( - Core::Timing::CyclesToUs(processor.system->CoreTiming().GetClockTicks() - - processor.start_time - processor.current_processing_time) - .count()); + *processed_time_ptr = + static_cast(processor.system->CoreTiming().GetClockTicks() - processor.start_time - + processor.current_processing_time); (*entry_count_ptr)++; } } -- cgit v1.2.3 From 5da70f719703084482933e103e561cc98163f370 Mon Sep 17 00:00:00 2001 From: Kelebek1 Date: Tue, 23 May 2023 14:45:54 +0100 Subject: Remove memory allocations in some hot paths --- .../renderer/command/data_source/decode.cpp | 23 +++++++++++----------- .../renderer/command/effect/compressor.cpp | 8 ++++---- src/audio_core/renderer/command/effect/delay.cpp | 14 ++++++------- .../renderer/command/effect/i3dl2_reverb.cpp | 4 ++-- .../renderer/command/effect/light_limiter.cpp | 12 +++++------ src/audio_core/renderer/command/effect/reverb.cpp | 12 +++++------ .../renderer/command/sink/circular_buffer.cpp | 4 ++-- src/audio_core/renderer/command/sink/device.cpp | 5 ++--- src/audio_core/renderer/mix/mix_context.cpp | 6 +++--- src/audio_core/renderer/nodes/node_states.cpp | 4 ++-- src/audio_core/renderer/nodes/node_states.h | 2 +- src/audio_core/renderer/system.cpp | 1 + 12 files changed, 47 insertions(+), 48 deletions(-) (limited to 'src/audio_core/renderer') diff --git a/src/audio_core/renderer/command/data_source/decode.cpp b/src/audio_core/renderer/command/data_source/decode.cpp index ff5d31bd6..f45933203 100644 --- a/src/audio_core/renderer/command/data_source/decode.cpp +++ b/src/audio_core/renderer/command/data_source/decode.cpp @@ -8,6 +8,7 @@ #include "audio_core/renderer/command/resample/resample.h" #include "common/fixed_point.h" #include "common/logging/log.h" +#include "common/scratch_buffer.h" #include "core/memory.h" namespace AudioCore::AudioRenderer { @@ -27,6 +28,7 @@ constexpr std::array PitchBySrcQuality = {4, 8, 4}; template static u32 DecodePcm(Core::Memory::Memory& memory, std::span out_buffer, const DecodeArg& req) { + std::array tmp_samples{}; constexpr s32 min{std::numeric_limits::min()}; constexpr s32 max{std::numeric_limits::max()}; @@ -49,18 +51,17 @@ static u32 DecodePcm(Core::Memory::Memory& memory, std::span out_buffer, const u64 size{channel_count * samples_to_decode}; const u64 size_bytes{size * sizeof(T)}; - std::vector samples(size); - memory.ReadBlockUnsafe(source, samples.data(), size_bytes); + memory.ReadBlockUnsafe(source, tmp_samples.data(), size_bytes); if constexpr (std::is_floating_point_v) { for (u32 i = 0; i < samples_to_decode; i++) { - auto sample{static_cast(samples[i * channel_count + req.target_channel] * + auto sample{static_cast(tmp_samples[i * channel_count + req.target_channel] * std::numeric_limits::max())}; out_buffer[i] = static_cast(std::clamp(sample, min, max)); } } else { for (u32 i = 0; i < samples_to_decode; i++) { - out_buffer[i] = samples[i * channel_count + req.target_channel]; + out_buffer[i] = tmp_samples[i * channel_count + req.target_channel]; } } } break; @@ -73,17 +74,16 @@ static u32 DecodePcm(Core::Memory::Memory& memory, std::span out_buffer, } const VAddr source{req.buffer + ((req.start_offset + req.offset) * sizeof(T))}; - std::vector samples(samples_to_decode); - memory.ReadBlockUnsafe(source, samples.data(), samples_to_decode * sizeof(T)); + memory.ReadBlockUnsafe(source, tmp_samples.data(), samples_to_decode * sizeof(T)); if constexpr (std::is_floating_point_v) { for (u32 i = 0; i < samples_to_decode; i++) { - auto sample{static_cast(samples[i * channel_count + req.target_channel] * + auto sample{static_cast(tmp_samples[i * channel_count + req.target_channel] * std::numeric_limits::max())}; out_buffer[i] = static_cast(std::clamp(sample, min, max)); } } else { - std::memcpy(out_buffer.data(), samples.data(), samples_to_decode * sizeof(s16)); + std::memcpy(out_buffer.data(), tmp_samples.data(), samples_to_decode * sizeof(s16)); } break; } @@ -101,6 +101,7 @@ static u32 DecodePcm(Core::Memory::Memory& memory, std::span out_buffer, */ static u32 DecodeAdpcm(Core::Memory::Memory& memory, std::span out_buffer, const DecodeArg& req) { + std::array wavebuffer{}; constexpr u32 SamplesPerFrame{14}; constexpr u32 NibblesPerFrame{16}; @@ -138,9 +139,7 @@ static u32 DecodeAdpcm(Core::Memory::Memory& memory, std::span out_buffer, } const auto size{std::max((samples_to_process / 8U) * SamplesPerFrame, 8U)}; - std::vector wavebuffer(size); - memory.ReadBlockUnsafe(req.buffer + position_in_frame / 2, wavebuffer.data(), - wavebuffer.size()); + memory.ReadBlockUnsafe(req.buffer + position_in_frame / 2, wavebuffer.data(), size); auto context{req.adpcm_context}; auto header{context->header}; @@ -258,7 +257,7 @@ void DecodeFromWaveBuffers(Core::Memory::Memory& memory, const DecodeFromWaveBuf u32 offset{voice_state.offset}; auto output_buffer{args.output}; - std::vector temp_buffer(TempBufferSize, 0); + std::array temp_buffer{}; while (remaining_sample_count > 0) { const auto samples_to_write{std::min(remaining_sample_count, max_remaining_sample_count)}; diff --git a/src/audio_core/renderer/command/effect/compressor.cpp b/src/audio_core/renderer/command/effect/compressor.cpp index 7229618e8..ee9b68d5b 100644 --- a/src/audio_core/renderer/command/effect/compressor.cpp +++ b/src/audio_core/renderer/command/effect/compressor.cpp @@ -44,8 +44,8 @@ static void InitializeCompressorEffect(const CompressorInfo::ParameterVersion2& static void ApplyCompressorEffect(const CompressorInfo::ParameterVersion2& params, CompressorInfo::State& state, bool enabled, - std::vector> input_buffers, - std::vector> output_buffers, u32 sample_count) { + std::span> input_buffers, + std::span> output_buffers, u32 sample_count) { if (enabled) { auto state_00{state.unk_00}; auto state_04{state.unk_04}; @@ -124,8 +124,8 @@ void CompressorCommand::Dump([[maybe_unused]] const ADSP::CommandListProcessor& } void CompressorCommand::Process(const ADSP::CommandListProcessor& processor) { - std::vector> input_buffers(parameter.channel_count); - std::vector> output_buffers(parameter.channel_count); + std::array, MaxChannels> input_buffers{}; + std::array, MaxChannels> output_buffers{}; for (s16 i = 0; i < parameter.channel_count; i++) { input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count, diff --git a/src/audio_core/renderer/command/effect/delay.cpp b/src/audio_core/renderer/command/effect/delay.cpp index a4e408d40..e536cbb1e 100644 --- a/src/audio_core/renderer/command/effect/delay.cpp +++ b/src/audio_core/renderer/command/effect/delay.cpp @@ -51,7 +51,7 @@ static void InitializeDelayEffect(const DelayInfo::ParameterVersion1& params, state.delay_lines[channel].sample_count_max = sample_count_max.to_int_floor(); state.delay_lines[channel].sample_count = sample_count.to_int_floor(); state.delay_lines[channel].buffer.resize(state.delay_lines[channel].sample_count, 0); - if (state.delay_lines[channel].buffer.size() == 0) { + if (state.delay_lines[channel].sample_count == 0) { state.delay_lines[channel].buffer.push_back(0); } state.delay_lines[channel].buffer_pos = 0; @@ -74,8 +74,8 @@ static void InitializeDelayEffect(const DelayInfo::ParameterVersion1& params, */ template static void ApplyDelay(const DelayInfo::ParameterVersion1& params, DelayInfo::State& state, - std::vector>& inputs, - std::vector>& outputs, const u32 sample_count) { + std::span> inputs, std::span> outputs, + const u32 sample_count) { for (u32 sample_index = 0; sample_index < sample_count; sample_index++) { std::array, NumChannels> input_samples{}; for (u32 channel = 0; channel < NumChannels; channel++) { @@ -153,8 +153,8 @@ static void ApplyDelay(const DelayInfo::ParameterVersion1& params, DelayInfo::St * @param sample_count - Number of samples to process. */ static void ApplyDelayEffect(const DelayInfo::ParameterVersion1& params, DelayInfo::State& state, - const bool enabled, std::vector>& inputs, - std::vector>& outputs, const u32 sample_count) { + const bool enabled, std::span> inputs, + std::span> outputs, const u32 sample_count) { if (!IsChannelCountValid(params.channel_count)) { LOG_ERROR(Service_Audio, "Invalid delay channels {}", params.channel_count); @@ -208,8 +208,8 @@ void DelayCommand::Dump([[maybe_unused]] const ADSP::CommandListProcessor& proce } void DelayCommand::Process(const ADSP::CommandListProcessor& processor) { - std::vector> input_buffers(parameter.channel_count); - std::vector> output_buffers(parameter.channel_count); + std::array, MaxChannels> input_buffers{}; + std::array, MaxChannels> output_buffers{}; for (s16 i = 0; i < parameter.channel_count; i++) { input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count, diff --git a/src/audio_core/renderer/command/effect/i3dl2_reverb.cpp b/src/audio_core/renderer/command/effect/i3dl2_reverb.cpp index 27d8b9844..d2bfb67cc 100644 --- a/src/audio_core/renderer/command/effect/i3dl2_reverb.cpp +++ b/src/audio_core/renderer/command/effect/i3dl2_reverb.cpp @@ -408,8 +408,8 @@ void I3dl2ReverbCommand::Dump([[maybe_unused]] const ADSP::CommandListProcessor& } void I3dl2ReverbCommand::Process(const ADSP::CommandListProcessor& processor) { - std::vector> input_buffers(parameter.channel_count); - std::vector> output_buffers(parameter.channel_count); + std::array, MaxChannels> input_buffers{}; + std::array, MaxChannels> output_buffers{}; for (u32 i = 0; i < parameter.channel_count; i++) { input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count, diff --git a/src/audio_core/renderer/command/effect/light_limiter.cpp b/src/audio_core/renderer/command/effect/light_limiter.cpp index e8fb0e2fc..4161a9821 100644 --- a/src/audio_core/renderer/command/effect/light_limiter.cpp +++ b/src/audio_core/renderer/command/effect/light_limiter.cpp @@ -47,8 +47,8 @@ static void InitializeLightLimiterEffect(const LightLimiterInfo::ParameterVersio */ static void ApplyLightLimiterEffect(const LightLimiterInfo::ParameterVersion2& params, LightLimiterInfo::State& state, const bool enabled, - std::vector>& inputs, - std::vector>& outputs, const u32 sample_count, + std::span> inputs, + std::span> outputs, const u32 sample_count, LightLimiterInfo::StatisticsInternal* statistics) { constexpr s64 min{std::numeric_limits::min()}; constexpr s64 max{std::numeric_limits::max()}; @@ -147,8 +147,8 @@ void LightLimiterVersion1Command::Dump([[maybe_unused]] const ADSP::CommandListP } void LightLimiterVersion1Command::Process(const ADSP::CommandListProcessor& processor) { - std::vector> input_buffers(parameter.channel_count); - std::vector> output_buffers(parameter.channel_count); + std::array, MaxChannels> input_buffers{}; + std::array, MaxChannels> output_buffers{}; for (u32 i = 0; i < parameter.channel_count; i++) { input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count, @@ -190,8 +190,8 @@ void LightLimiterVersion2Command::Dump([[maybe_unused]] const ADSP::CommandListP } void LightLimiterVersion2Command::Process(const ADSP::CommandListProcessor& processor) { - std::vector> input_buffers(parameter.channel_count); - std::vector> output_buffers(parameter.channel_count); + std::array, MaxChannels> input_buffers{}; + std::array, MaxChannels> output_buffers{}; for (u32 i = 0; i < parameter.channel_count; i++) { input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count, diff --git a/src/audio_core/renderer/command/effect/reverb.cpp b/src/audio_core/renderer/command/effect/reverb.cpp index 8b9b65214..fc2f15a5e 100644 --- a/src/audio_core/renderer/command/effect/reverb.cpp +++ b/src/audio_core/renderer/command/effect/reverb.cpp @@ -250,8 +250,8 @@ static Common::FixedPoint<50, 14> Axfx2AllPassTick(ReverbInfo::ReverbDelayLine& */ template static void ApplyReverbEffect(const ReverbInfo::ParameterVersion2& params, ReverbInfo::State& state, - std::vector>& inputs, - std::vector>& outputs, const u32 sample_count) { + std::span> inputs, + std::span> outputs, const u32 sample_count) { static constexpr std::array OutTapIndexes1Ch{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; @@ -369,8 +369,8 @@ static void ApplyReverbEffect(const ReverbInfo::ParameterVersion2& params, Rever * @param sample_count - Number of samples to process. */ static void ApplyReverbEffect(const ReverbInfo::ParameterVersion2& params, ReverbInfo::State& state, - const bool enabled, std::vector>& inputs, - std::vector>& outputs, const u32 sample_count) { + const bool enabled, std::span> inputs, + std::span> outputs, const u32 sample_count) { if (enabled) { switch (params.channel_count) { case 0: @@ -412,8 +412,8 @@ void ReverbCommand::Dump([[maybe_unused]] const ADSP::CommandListProcessor& proc } void ReverbCommand::Process(const ADSP::CommandListProcessor& processor) { - std::vector> input_buffers(parameter.channel_count); - std::vector> output_buffers(parameter.channel_count); + std::array, MaxChannels> input_buffers{}; + std::array, MaxChannels> output_buffers{}; for (u32 i = 0; i < parameter.channel_count; i++) { input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count, diff --git a/src/audio_core/renderer/command/sink/circular_buffer.cpp b/src/audio_core/renderer/command/sink/circular_buffer.cpp index ded5afc94..e2ce59792 100644 --- a/src/audio_core/renderer/command/sink/circular_buffer.cpp +++ b/src/audio_core/renderer/command/sink/circular_buffer.cpp @@ -24,7 +24,7 @@ void CircularBufferSinkCommand::Process(const ADSP::CommandListProcessor& proces constexpr s32 min{std::numeric_limits::min()}; constexpr s32 max{std::numeric_limits::max()}; - std::vector output(processor.sample_count); + std::array output{}; for (u32 channel = 0; channel < input_count; channel++) { auto input{processor.mix_buffers.subspan(inputs[channel] * processor.sample_count, processor.sample_count)}; @@ -33,7 +33,7 @@ void CircularBufferSinkCommand::Process(const ADSP::CommandListProcessor& proces } processor.memory->WriteBlockUnsafe(address + pos, output.data(), - output.size() * sizeof(s16)); + processor.sample_count * sizeof(s16)); pos += static_cast(processor.sample_count * sizeof(s16)); if (pos >= size) { pos = 0; diff --git a/src/audio_core/renderer/command/sink/device.cpp b/src/audio_core/renderer/command/sink/device.cpp index e88372a75..5f74dd7ad 100644 --- a/src/audio_core/renderer/command/sink/device.cpp +++ b/src/audio_core/renderer/command/sink/device.cpp @@ -33,8 +33,7 @@ void DeviceSinkCommand::Process(const ADSP::CommandListProcessor& processor) { .consumed{false}, }; - std::vector samples(out_buffer.frames * input_count); - + std::array samples{}; for (u32 channel = 0; channel < input_count; channel++) { const auto offset{inputs[channel] * out_buffer.frames}; @@ -45,7 +44,7 @@ void DeviceSinkCommand::Process(const ADSP::CommandListProcessor& processor) { } out_buffer.tag = reinterpret_cast(samples.data()); - stream->AppendBuffer(out_buffer, samples); + stream->AppendBuffer(out_buffer, {samples.data(), out_buffer.frames * input_count}); if (stream->IsPaused()) { stream->Start(); diff --git a/src/audio_core/renderer/mix/mix_context.cpp b/src/audio_core/renderer/mix/mix_context.cpp index 35b748ede..3a18ae7c2 100644 --- a/src/audio_core/renderer/mix/mix_context.cpp +++ b/src/audio_core/renderer/mix/mix_context.cpp @@ -125,10 +125,10 @@ bool MixContext::TSortInfo(const SplitterContext& splitter_context) { return false; } - std::vector sorted_results{node_states.GetSortedResuls()}; - const auto result_size{std::min(count, static_cast(sorted_results.size()))}; + auto sorted_results{node_states.GetSortedResuls()}; + const auto result_size{std::min(count, static_cast(sorted_results.second))}; for (s32 i = 0; i < result_size; i++) { - sorted_mix_infos[i] = &mix_infos[sorted_results[i]]; + sorted_mix_infos[i] = &mix_infos[sorted_results.first[i]]; } CalcMixBufferOffset(); diff --git a/src/audio_core/renderer/nodes/node_states.cpp b/src/audio_core/renderer/nodes/node_states.cpp index 1821a51e6..b7a44a54c 100644 --- a/src/audio_core/renderer/nodes/node_states.cpp +++ b/src/audio_core/renderer/nodes/node_states.cpp @@ -134,8 +134,8 @@ u32 NodeStates::GetNodeCount() const { return node_count; } -std::vector NodeStates::GetSortedResuls() const { - return {results.rbegin(), results.rbegin() + result_pos}; +std::pair::reverse_iterator, size_t> NodeStates::GetSortedResuls() const { + return {results.rbegin(), result_pos}; } } // namespace AudioCore::AudioRenderer diff --git a/src/audio_core/renderer/nodes/node_states.h b/src/audio_core/renderer/nodes/node_states.h index 94b1d1254..e768cd4b5 100644 --- a/src/audio_core/renderer/nodes/node_states.h +++ b/src/audio_core/renderer/nodes/node_states.h @@ -175,7 +175,7 @@ public: * * @return Vector of nodes in reverse order. */ - std::vector GetSortedResuls() const; + std::pair::reverse_iterator, size_t> GetSortedResuls() const; private: /// Number of nodes in the graph diff --git a/src/audio_core/renderer/system.cpp b/src/audio_core/renderer/system.cpp index 53b258c4f..a23627472 100644 --- a/src/audio_core/renderer/system.cpp +++ b/src/audio_core/renderer/system.cpp @@ -444,6 +444,7 @@ Result System::Update(std::span input, std::span performance, std: std::scoped_lock l{lock}; const auto start_time{core.CoreTiming().GetClockTicks()}; + std::memset(output.data(), 0, output.size()); InfoUpdater info_updater(input, output, process_handle, behavior); -- cgit v1.2.3