From 933508e2a2f7923cebc15d679b78933df8fb9ee5 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Thu, 3 Aug 2017 12:22:51 +0100 Subject: interpolate: Interpolate on a frame-by-frame basis --- src/audio_core/interpolate.cpp | 86 +++++++++++++++++++----------------------- 1 file changed, 38 insertions(+), 48 deletions(-) (limited to 'src/audio_core/interpolate.cpp') diff --git a/src/audio_core/interpolate.cpp b/src/audio_core/interpolate.cpp index 8a5d4181a..16e68bc5c 100644 --- a/src/audio_core/interpolate.cpp +++ b/src/audio_core/interpolate.cpp @@ -13,74 +13,64 @@ namespace AudioInterp { constexpr u64 scale_factor = 1 << 24; constexpr u64 scale_mask = scale_factor - 1; -/// Here we step over the input in steps of rate_multiplier, until we consume all of the input. +/// Here we step over the input in steps of rate, until we consume all of the input. /// Three adjacent samples are passed to fn each step. template -static StereoBuffer16 StepOverSamples(State& state, const StereoBuffer16& input, - float rate_multiplier, Function fn) { - ASSERT(rate_multiplier > 0); +static void StepOverSamples(State& state, StereoBuffer16& input, float rate, + DSP::HLE::StereoFrame16& output, size_t& outputi, Function fn) { + ASSERT(rate > 0); - if (input.size() < 2) - return {}; + if (input.empty()) + return; - StereoBuffer16 output; - output.reserve(static_cast(input.size() / rate_multiplier)); + input.insert(input.begin(), {state.xn2, state.xn1}); - u64 step_size = static_cast(rate_multiplier * scale_factor); + const u64 step_size = static_cast(rate * scale_factor); + u64 fposition = state.fposition; + size_t inputi = 0; - u64 fposition = 0; - const u64 max_fposition = input.size() * scale_factor; + while (outputi < output.size()) { + inputi = static_cast(fposition / scale_factor); - while (fposition < 1 * scale_factor) { - u64 fraction = fposition & scale_mask; - - output.push_back(fn(fraction, state.xn2, state.xn1, input[0])); - - fposition += step_size; - } - - while (fposition < 2 * scale_factor) { - u64 fraction = fposition & scale_mask; - - output.push_back(fn(fraction, state.xn1, input[0], input[1])); - - fposition += step_size; - } + if (inputi + 2 >= input.size()) { + inputi = input.size() - 2; + break; + } - while (fposition < max_fposition) { u64 fraction = fposition & scale_mask; - - size_t index = static_cast(fposition / scale_factor); - output.push_back(fn(fraction, input[index - 2], input[index - 1], input[index])); + output[outputi++] = fn(fraction, input[inputi], input[inputi + 1], input[inputi + 2]); fposition += step_size; } - state.xn2 = input[input.size() - 2]; - state.xn1 = input[input.size() - 1]; + state.xn2 = input[inputi]; + state.xn1 = input[inputi + 1]; + state.fposition = fposition - inputi * scale_factor; - return output; + input.erase(input.begin(), input.begin() + inputi + 2); } -StereoBuffer16 None(State& state, const StereoBuffer16& input, float rate_multiplier) { - return StepOverSamples( - state, input, rate_multiplier, +void None(State& state, StereoBuffer16& input, float rate, DSP::HLE::StereoFrame16& output, + size_t& outputi) { + StepOverSamples( + state, input, rate, output, outputi, [](u64 fraction, const auto& x0, const auto& x1, const auto& x2) { return x0; }); } -StereoBuffer16 Linear(State& state, const StereoBuffer16& input, float rate_multiplier) { +void Linear(State& state, StereoBuffer16& input, float rate, DSP::HLE::StereoFrame16& output, + size_t& outputi) { // Note on accuracy: Some values that this produces are +/- 1 from the actual firmware. - return StepOverSamples(state, input, rate_multiplier, - [](u64 fraction, const auto& x0, const auto& x1, const auto& x2) { - // This is a saturated subtraction. (Verified by black-box fuzzing.) - s64 delta0 = MathUtil::Clamp(x1[0] - x0[0], -32768, 32767); - s64 delta1 = MathUtil::Clamp(x1[1] - x0[1], -32768, 32767); - - return std::array{ - static_cast(x0[0] + fraction * delta0 / scale_factor), - static_cast(x0[1] + fraction * delta1 / scale_factor), - }; - }); + StepOverSamples(state, input, rate, output, outputi, + [](u64 fraction, const auto& x0, const auto& x1, const auto& x2) { + // This is a saturated subtraction. (Verified by black-box fuzzing.) + s64 delta0 = MathUtil::Clamp(x1[0] - x0[0], -32768, 32767); + s64 delta1 = MathUtil::Clamp(x1[1] - x0[1], -32768, 32767); + + return std::array{ + static_cast(x0[0] + fraction * delta0 / scale_factor), + static_cast(x0[1] + fraction * delta1 / scale_factor), + }; + }); } } // namespace AudioInterp -- cgit v1.2.3 From d7459354f58d1b71fc0c5ec48de9242e6a2fd00c Mon Sep 17 00:00:00 2001 From: Subv Date: Mon, 25 Sep 2017 13:06:42 -0500 Subject: Audio: Use std::deque instead of std::vector for the audio buffer type (StereoBuffer16). The current code inserts and deletes elements from the beginning of the audio buffer, which is very inefficient in an std::vector. Profiling was done using VisualStudio2017's Performance Analyzer in Super Mario 3D Land. Before this change: AudioInterp::Linear had 14.14% of the runtime (inclusive) and most of that time was spent in std::vector's insert implementation. After this change: AudioInterp::Linear has 0.36% of the runtime (inclusive) --- src/audio_core/interpolate.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/audio_core/interpolate.cpp') diff --git a/src/audio_core/interpolate.cpp b/src/audio_core/interpolate.cpp index 16e68bc5c..83573d772 100644 --- a/src/audio_core/interpolate.cpp +++ b/src/audio_core/interpolate.cpp @@ -47,7 +47,7 @@ static void StepOverSamples(State& state, StereoBuffer16& input, float rate, state.xn1 = input[inputi + 1]; state.fposition = fposition - inputi * scale_factor; - input.erase(input.begin(), input.begin() + inputi + 2); + input.erase(input.begin(), std::next(input.begin(), inputi + 2)); } void None(State& state, StereoBuffer16& input, float rate, DSP::HLE::StereoFrame16& output, -- cgit v1.2.3