From 5da70f719703084482933e103e561cc98163f370 Mon Sep 17 00:00:00 2001 From: Kelebek1 Date: Tue, 23 May 2023 14:45:54 +0100 Subject: Remove memory allocations in some hot paths --- .../renderer/command/data_source/decode.cpp | 23 +++++++++++----------- .../renderer/command/effect/compressor.cpp | 8 ++++---- src/audio_core/renderer/command/effect/delay.cpp | 14 ++++++------- .../renderer/command/effect/i3dl2_reverb.cpp | 4 ++-- .../renderer/command/effect/light_limiter.cpp | 12 +++++------ src/audio_core/renderer/command/effect/reverb.cpp | 12 +++++------ .../renderer/command/sink/circular_buffer.cpp | 4 ++-- src/audio_core/renderer/command/sink/device.cpp | 5 ++--- src/audio_core/renderer/mix/mix_context.cpp | 6 +++--- src/audio_core/renderer/nodes/node_states.cpp | 4 ++-- src/audio_core/renderer/nodes/node_states.h | 2 +- src/audio_core/renderer/system.cpp | 1 + 12 files changed, 47 insertions(+), 48 deletions(-) (limited to 'src/audio_core/renderer') diff --git a/src/audio_core/renderer/command/data_source/decode.cpp b/src/audio_core/renderer/command/data_source/decode.cpp index ff5d31bd6..f45933203 100644 --- a/src/audio_core/renderer/command/data_source/decode.cpp +++ b/src/audio_core/renderer/command/data_source/decode.cpp @@ -8,6 +8,7 @@ #include "audio_core/renderer/command/resample/resample.h" #include "common/fixed_point.h" #include "common/logging/log.h" +#include "common/scratch_buffer.h" #include "core/memory.h" namespace AudioCore::AudioRenderer { @@ -27,6 +28,7 @@ constexpr std::array PitchBySrcQuality = {4, 8, 4}; template static u32 DecodePcm(Core::Memory::Memory& memory, std::span out_buffer, const DecodeArg& req) { + std::array tmp_samples{}; constexpr s32 min{std::numeric_limits::min()}; constexpr s32 max{std::numeric_limits::max()}; @@ -49,18 +51,17 @@ static u32 DecodePcm(Core::Memory::Memory& memory, std::span out_buffer, const u64 size{channel_count * samples_to_decode}; const u64 size_bytes{size * sizeof(T)}; - std::vector samples(size); - memory.ReadBlockUnsafe(source, samples.data(), size_bytes); + memory.ReadBlockUnsafe(source, tmp_samples.data(), size_bytes); if constexpr (std::is_floating_point_v) { for (u32 i = 0; i < samples_to_decode; i++) { - auto sample{static_cast(samples[i * channel_count + req.target_channel] * + auto sample{static_cast(tmp_samples[i * channel_count + req.target_channel] * std::numeric_limits::max())}; out_buffer[i] = static_cast(std::clamp(sample, min, max)); } } else { for (u32 i = 0; i < samples_to_decode; i++) { - out_buffer[i] = samples[i * channel_count + req.target_channel]; + out_buffer[i] = tmp_samples[i * channel_count + req.target_channel]; } } } break; @@ -73,17 +74,16 @@ static u32 DecodePcm(Core::Memory::Memory& memory, std::span out_buffer, } const VAddr source{req.buffer + ((req.start_offset + req.offset) * sizeof(T))}; - std::vector samples(samples_to_decode); - memory.ReadBlockUnsafe(source, samples.data(), samples_to_decode * sizeof(T)); + memory.ReadBlockUnsafe(source, tmp_samples.data(), samples_to_decode * sizeof(T)); if constexpr (std::is_floating_point_v) { for (u32 i = 0; i < samples_to_decode; i++) { - auto sample{static_cast(samples[i * channel_count + req.target_channel] * + auto sample{static_cast(tmp_samples[i * channel_count + req.target_channel] * std::numeric_limits::max())}; out_buffer[i] = static_cast(std::clamp(sample, min, max)); } } else { - std::memcpy(out_buffer.data(), samples.data(), samples_to_decode * sizeof(s16)); + std::memcpy(out_buffer.data(), tmp_samples.data(), samples_to_decode * sizeof(s16)); } break; } @@ -101,6 +101,7 @@ static u32 DecodePcm(Core::Memory::Memory& memory, std::span out_buffer, */ static u32 DecodeAdpcm(Core::Memory::Memory& memory, std::span out_buffer, const DecodeArg& req) { + std::array wavebuffer{}; constexpr u32 SamplesPerFrame{14}; constexpr u32 NibblesPerFrame{16}; @@ -138,9 +139,7 @@ static u32 DecodeAdpcm(Core::Memory::Memory& memory, std::span out_buffer, } const auto size{std::max((samples_to_process / 8U) * SamplesPerFrame, 8U)}; - std::vector wavebuffer(size); - memory.ReadBlockUnsafe(req.buffer + position_in_frame / 2, wavebuffer.data(), - wavebuffer.size()); + memory.ReadBlockUnsafe(req.buffer + position_in_frame / 2, wavebuffer.data(), size); auto context{req.adpcm_context}; auto header{context->header}; @@ -258,7 +257,7 @@ void DecodeFromWaveBuffers(Core::Memory::Memory& memory, const DecodeFromWaveBuf u32 offset{voice_state.offset}; auto output_buffer{args.output}; - std::vector temp_buffer(TempBufferSize, 0); + std::array temp_buffer{}; while (remaining_sample_count > 0) { const auto samples_to_write{std::min(remaining_sample_count, max_remaining_sample_count)}; diff --git a/src/audio_core/renderer/command/effect/compressor.cpp b/src/audio_core/renderer/command/effect/compressor.cpp index 7229618e8..ee9b68d5b 100644 --- a/src/audio_core/renderer/command/effect/compressor.cpp +++ b/src/audio_core/renderer/command/effect/compressor.cpp @@ -44,8 +44,8 @@ static void InitializeCompressorEffect(const CompressorInfo::ParameterVersion2& static void ApplyCompressorEffect(const CompressorInfo::ParameterVersion2& params, CompressorInfo::State& state, bool enabled, - std::vector> input_buffers, - std::vector> output_buffers, u32 sample_count) { + std::span> input_buffers, + std::span> output_buffers, u32 sample_count) { if (enabled) { auto state_00{state.unk_00}; auto state_04{state.unk_04}; @@ -124,8 +124,8 @@ void CompressorCommand::Dump([[maybe_unused]] const ADSP::CommandListProcessor& } void CompressorCommand::Process(const ADSP::CommandListProcessor& processor) { - std::vector> input_buffers(parameter.channel_count); - std::vector> output_buffers(parameter.channel_count); + std::array, MaxChannels> input_buffers{}; + std::array, MaxChannels> output_buffers{}; for (s16 i = 0; i < parameter.channel_count; i++) { input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count, diff --git a/src/audio_core/renderer/command/effect/delay.cpp b/src/audio_core/renderer/command/effect/delay.cpp index a4e408d40..e536cbb1e 100644 --- a/src/audio_core/renderer/command/effect/delay.cpp +++ b/src/audio_core/renderer/command/effect/delay.cpp @@ -51,7 +51,7 @@ static void InitializeDelayEffect(const DelayInfo::ParameterVersion1& params, state.delay_lines[channel].sample_count_max = sample_count_max.to_int_floor(); state.delay_lines[channel].sample_count = sample_count.to_int_floor(); state.delay_lines[channel].buffer.resize(state.delay_lines[channel].sample_count, 0); - if (state.delay_lines[channel].buffer.size() == 0) { + if (state.delay_lines[channel].sample_count == 0) { state.delay_lines[channel].buffer.push_back(0); } state.delay_lines[channel].buffer_pos = 0; @@ -74,8 +74,8 @@ static void InitializeDelayEffect(const DelayInfo::ParameterVersion1& params, */ template static void ApplyDelay(const DelayInfo::ParameterVersion1& params, DelayInfo::State& state, - std::vector>& inputs, - std::vector>& outputs, const u32 sample_count) { + std::span> inputs, std::span> outputs, + const u32 sample_count) { for (u32 sample_index = 0; sample_index < sample_count; sample_index++) { std::array, NumChannels> input_samples{}; for (u32 channel = 0; channel < NumChannels; channel++) { @@ -153,8 +153,8 @@ static void ApplyDelay(const DelayInfo::ParameterVersion1& params, DelayInfo::St * @param sample_count - Number of samples to process. */ static void ApplyDelayEffect(const DelayInfo::ParameterVersion1& params, DelayInfo::State& state, - const bool enabled, std::vector>& inputs, - std::vector>& outputs, const u32 sample_count) { + const bool enabled, std::span> inputs, + std::span> outputs, const u32 sample_count) { if (!IsChannelCountValid(params.channel_count)) { LOG_ERROR(Service_Audio, "Invalid delay channels {}", params.channel_count); @@ -208,8 +208,8 @@ void DelayCommand::Dump([[maybe_unused]] const ADSP::CommandListProcessor& proce } void DelayCommand::Process(const ADSP::CommandListProcessor& processor) { - std::vector> input_buffers(parameter.channel_count); - std::vector> output_buffers(parameter.channel_count); + std::array, MaxChannels> input_buffers{}; + std::array, MaxChannels> output_buffers{}; for (s16 i = 0; i < parameter.channel_count; i++) { input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count, diff --git a/src/audio_core/renderer/command/effect/i3dl2_reverb.cpp b/src/audio_core/renderer/command/effect/i3dl2_reverb.cpp index 27d8b9844..d2bfb67cc 100644 --- a/src/audio_core/renderer/command/effect/i3dl2_reverb.cpp +++ b/src/audio_core/renderer/command/effect/i3dl2_reverb.cpp @@ -408,8 +408,8 @@ void I3dl2ReverbCommand::Dump([[maybe_unused]] const ADSP::CommandListProcessor& } void I3dl2ReverbCommand::Process(const ADSP::CommandListProcessor& processor) { - std::vector> input_buffers(parameter.channel_count); - std::vector> output_buffers(parameter.channel_count); + std::array, MaxChannels> input_buffers{}; + std::array, MaxChannels> output_buffers{}; for (u32 i = 0; i < parameter.channel_count; i++) { input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count, diff --git a/src/audio_core/renderer/command/effect/light_limiter.cpp b/src/audio_core/renderer/command/effect/light_limiter.cpp index e8fb0e2fc..4161a9821 100644 --- a/src/audio_core/renderer/command/effect/light_limiter.cpp +++ b/src/audio_core/renderer/command/effect/light_limiter.cpp @@ -47,8 +47,8 @@ static void InitializeLightLimiterEffect(const LightLimiterInfo::ParameterVersio */ static void ApplyLightLimiterEffect(const LightLimiterInfo::ParameterVersion2& params, LightLimiterInfo::State& state, const bool enabled, - std::vector>& inputs, - std::vector>& outputs, const u32 sample_count, + std::span> inputs, + std::span> outputs, const u32 sample_count, LightLimiterInfo::StatisticsInternal* statistics) { constexpr s64 min{std::numeric_limits::min()}; constexpr s64 max{std::numeric_limits::max()}; @@ -147,8 +147,8 @@ void LightLimiterVersion1Command::Dump([[maybe_unused]] const ADSP::CommandListP } void LightLimiterVersion1Command::Process(const ADSP::CommandListProcessor& processor) { - std::vector> input_buffers(parameter.channel_count); - std::vector> output_buffers(parameter.channel_count); + std::array, MaxChannels> input_buffers{}; + std::array, MaxChannels> output_buffers{}; for (u32 i = 0; i < parameter.channel_count; i++) { input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count, @@ -190,8 +190,8 @@ void LightLimiterVersion2Command::Dump([[maybe_unused]] const ADSP::CommandListP } void LightLimiterVersion2Command::Process(const ADSP::CommandListProcessor& processor) { - std::vector> input_buffers(parameter.channel_count); - std::vector> output_buffers(parameter.channel_count); + std::array, MaxChannels> input_buffers{}; + std::array, MaxChannels> output_buffers{}; for (u32 i = 0; i < parameter.channel_count; i++) { input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count, diff --git a/src/audio_core/renderer/command/effect/reverb.cpp b/src/audio_core/renderer/command/effect/reverb.cpp index 8b9b65214..fc2f15a5e 100644 --- a/src/audio_core/renderer/command/effect/reverb.cpp +++ b/src/audio_core/renderer/command/effect/reverb.cpp @@ -250,8 +250,8 @@ static Common::FixedPoint<50, 14> Axfx2AllPassTick(ReverbInfo::ReverbDelayLine& */ template static void ApplyReverbEffect(const ReverbInfo::ParameterVersion2& params, ReverbInfo::State& state, - std::vector>& inputs, - std::vector>& outputs, const u32 sample_count) { + std::span> inputs, + std::span> outputs, const u32 sample_count) { static constexpr std::array OutTapIndexes1Ch{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; @@ -369,8 +369,8 @@ static void ApplyReverbEffect(const ReverbInfo::ParameterVersion2& params, Rever * @param sample_count - Number of samples to process. */ static void ApplyReverbEffect(const ReverbInfo::ParameterVersion2& params, ReverbInfo::State& state, - const bool enabled, std::vector>& inputs, - std::vector>& outputs, const u32 sample_count) { + const bool enabled, std::span> inputs, + std::span> outputs, const u32 sample_count) { if (enabled) { switch (params.channel_count) { case 0: @@ -412,8 +412,8 @@ void ReverbCommand::Dump([[maybe_unused]] const ADSP::CommandListProcessor& proc } void ReverbCommand::Process(const ADSP::CommandListProcessor& processor) { - std::vector> input_buffers(parameter.channel_count); - std::vector> output_buffers(parameter.channel_count); + std::array, MaxChannels> input_buffers{}; + std::array, MaxChannels> output_buffers{}; for (u32 i = 0; i < parameter.channel_count; i++) { input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count, diff --git a/src/audio_core/renderer/command/sink/circular_buffer.cpp b/src/audio_core/renderer/command/sink/circular_buffer.cpp index ded5afc94..e2ce59792 100644 --- a/src/audio_core/renderer/command/sink/circular_buffer.cpp +++ b/src/audio_core/renderer/command/sink/circular_buffer.cpp @@ -24,7 +24,7 @@ void CircularBufferSinkCommand::Process(const ADSP::CommandListProcessor& proces constexpr s32 min{std::numeric_limits::min()}; constexpr s32 max{std::numeric_limits::max()}; - std::vector output(processor.sample_count); + std::array output{}; for (u32 channel = 0; channel < input_count; channel++) { auto input{processor.mix_buffers.subspan(inputs[channel] * processor.sample_count, processor.sample_count)}; @@ -33,7 +33,7 @@ void CircularBufferSinkCommand::Process(const ADSP::CommandListProcessor& proces } processor.memory->WriteBlockUnsafe(address + pos, output.data(), - output.size() * sizeof(s16)); + processor.sample_count * sizeof(s16)); pos += static_cast(processor.sample_count * sizeof(s16)); if (pos >= size) { pos = 0; diff --git a/src/audio_core/renderer/command/sink/device.cpp b/src/audio_core/renderer/command/sink/device.cpp index e88372a75..5f74dd7ad 100644 --- a/src/audio_core/renderer/command/sink/device.cpp +++ b/src/audio_core/renderer/command/sink/device.cpp @@ -33,8 +33,7 @@ void DeviceSinkCommand::Process(const ADSP::CommandListProcessor& processor) { .consumed{false}, }; - std::vector samples(out_buffer.frames * input_count); - + std::array samples{}; for (u32 channel = 0; channel < input_count; channel++) { const auto offset{inputs[channel] * out_buffer.frames}; @@ -45,7 +44,7 @@ void DeviceSinkCommand::Process(const ADSP::CommandListProcessor& processor) { } out_buffer.tag = reinterpret_cast(samples.data()); - stream->AppendBuffer(out_buffer, samples); + stream->AppendBuffer(out_buffer, {samples.data(), out_buffer.frames * input_count}); if (stream->IsPaused()) { stream->Start(); diff --git a/src/audio_core/renderer/mix/mix_context.cpp b/src/audio_core/renderer/mix/mix_context.cpp index 35b748ede..3a18ae7c2 100644 --- a/src/audio_core/renderer/mix/mix_context.cpp +++ b/src/audio_core/renderer/mix/mix_context.cpp @@ -125,10 +125,10 @@ bool MixContext::TSortInfo(const SplitterContext& splitter_context) { return false; } - std::vector sorted_results{node_states.GetSortedResuls()}; - const auto result_size{std::min(count, static_cast(sorted_results.size()))}; + auto sorted_results{node_states.GetSortedResuls()}; + const auto result_size{std::min(count, static_cast(sorted_results.second))}; for (s32 i = 0; i < result_size; i++) { - sorted_mix_infos[i] = &mix_infos[sorted_results[i]]; + sorted_mix_infos[i] = &mix_infos[sorted_results.first[i]]; } CalcMixBufferOffset(); diff --git a/src/audio_core/renderer/nodes/node_states.cpp b/src/audio_core/renderer/nodes/node_states.cpp index 1821a51e6..b7a44a54c 100644 --- a/src/audio_core/renderer/nodes/node_states.cpp +++ b/src/audio_core/renderer/nodes/node_states.cpp @@ -134,8 +134,8 @@ u32 NodeStates::GetNodeCount() const { return node_count; } -std::vector NodeStates::GetSortedResuls() const { - return {results.rbegin(), results.rbegin() + result_pos}; +std::pair::reverse_iterator, size_t> NodeStates::GetSortedResuls() const { + return {results.rbegin(), result_pos}; } } // namespace AudioCore::AudioRenderer diff --git a/src/audio_core/renderer/nodes/node_states.h b/src/audio_core/renderer/nodes/node_states.h index 94b1d1254..e768cd4b5 100644 --- a/src/audio_core/renderer/nodes/node_states.h +++ b/src/audio_core/renderer/nodes/node_states.h @@ -175,7 +175,7 @@ public: * * @return Vector of nodes in reverse order. */ - std::vector GetSortedResuls() const; + std::pair::reverse_iterator, size_t> GetSortedResuls() const; private: /// Number of nodes in the graph diff --git a/src/audio_core/renderer/system.cpp b/src/audio_core/renderer/system.cpp index 53b258c4f..a23627472 100644 --- a/src/audio_core/renderer/system.cpp +++ b/src/audio_core/renderer/system.cpp @@ -444,6 +444,7 @@ Result System::Update(std::span input, std::span performance, std: std::scoped_lock l{lock}; const auto start_time{core.CoreTiming().GetClockTicks()}; + std::memset(output.data(), 0, output.size()); InfoUpdater info_updater(input, output, process_handle, behavior); -- cgit v1.2.3