diff options
Diffstat (limited to 'venv/lib/python3.9/site-packages/pydub/effects.py')
-rw-r--r-- | venv/lib/python3.9/site-packages/pydub/effects.py | 341 |
1 files changed, 341 insertions, 0 deletions
diff --git a/venv/lib/python3.9/site-packages/pydub/effects.py b/venv/lib/python3.9/site-packages/pydub/effects.py new file mode 100644 index 00000000..0210521a --- /dev/null +++ b/venv/lib/python3.9/site-packages/pydub/effects.py @@ -0,0 +1,341 @@ +import sys +import math +import array +from .utils import ( + db_to_float, + ratio_to_db, + register_pydub_effect, + make_chunks, + audioop, + get_min_max_value +) +from .silence import split_on_silence +from .exceptions import TooManyMissingFrames, InvalidDuration + +if sys.version_info >= (3, 0): + xrange = range + + +@register_pydub_effect +def apply_mono_filter_to_each_channel(seg, filter_fn): + n_channels = seg.channels + + channel_segs = seg.split_to_mono() + channel_segs = [filter_fn(channel_seg) for channel_seg in channel_segs] + + out_data = seg.get_array_of_samples() + for channel_i, channel_seg in enumerate(channel_segs): + for sample_i, sample in enumerate(channel_seg.get_array_of_samples()): + index = (sample_i * n_channels) + channel_i + out_data[index] = sample + + return seg._spawn(out_data) + + +@register_pydub_effect +def normalize(seg, headroom=0.1): + """ + headroom is how close to the maximum volume to boost the signal up to (specified in dB) + """ + peak_sample_val = seg.max + + # if the max is 0, this audio segment is silent, and can't be normalized + if peak_sample_val == 0: + return seg + + target_peak = seg.max_possible_amplitude * db_to_float(-headroom) + + needed_boost = ratio_to_db(target_peak / peak_sample_val) + return seg.apply_gain(needed_boost) + + +@register_pydub_effect +def speedup(seg, playback_speed=1.5, chunk_size=150, crossfade=25): + # we will keep audio in 150ms chunks since one waveform at 20Hz is 50ms long + # (20 Hz is the lowest frequency audible to humans) + + # portion of AUDIO TO KEEP. if playback speed is 1.25 we keep 80% (0.8) and + # discard 20% (0.2) + atk = 1.0 / playback_speed + + if playback_speed < 2.0: + # throwing out more than half the audio - keep 50ms chunks + ms_to_remove_per_chunk = int(chunk_size * (1 - atk) / atk) + else: + # throwing out less than half the audio - throw out 50ms chunks + ms_to_remove_per_chunk = int(chunk_size) + chunk_size = int(atk * chunk_size / (1 - atk)) + + # the crossfade cannot be longer than the amount of audio we're removing + crossfade = min(crossfade, ms_to_remove_per_chunk - 1) + + # DEBUG + #print("chunk: {0}, rm: {1}".format(chunk_size, ms_to_remove_per_chunk)) + + chunks = make_chunks(seg, chunk_size + ms_to_remove_per_chunk) + if len(chunks) < 2: + raise Exception("Could not speed up AudioSegment, it was too short {2:0.2f}s for the current settings:\n{0}ms chunks at {1:0.1f}x speedup".format( + chunk_size, playback_speed, seg.duration_seconds)) + + # we'll actually truncate a bit less than we calculated to make up for the + # crossfade between chunks + ms_to_remove_per_chunk -= crossfade + + # we don't want to truncate the last chunk since it is not guaranteed to be + # the full chunk length + last_chunk = chunks[-1] + chunks = [chunk[:-ms_to_remove_per_chunk] for chunk in chunks[:-1]] + + out = chunks[0] + for chunk in chunks[1:]: + out = out.append(chunk, crossfade=crossfade) + + out += last_chunk + return out + + +@register_pydub_effect +def strip_silence(seg, silence_len=1000, silence_thresh=-16, padding=100): + if padding > silence_len: + raise InvalidDuration("padding cannot be longer than silence_len") + + chunks = split_on_silence(seg, silence_len, silence_thresh, padding) + crossfade = padding / 2 + + if not len(chunks): + return seg[0:0] + + seg = chunks[0] + for chunk in chunks[1:]: + seg = seg.append(chunk, crossfade=crossfade) + + return seg + + +@register_pydub_effect +def compress_dynamic_range(seg, threshold=-20.0, ratio=4.0, attack=5.0, release=50.0): + """ + Keyword Arguments: + + threshold - default: -20.0 + Threshold in dBFS. default of -20.0 means -20dB relative to the + maximum possible volume. 0dBFS is the maximum possible value so + all values for this argument sould be negative. + + ratio - default: 4.0 + Compression ratio. Audio louder than the threshold will be + reduced to 1/ratio the volume. A ratio of 4.0 is equivalent to + a setting of 4:1 in a pro-audio compressor like the Waves C1. + + attack - default: 5.0 + Attack in milliseconds. How long it should take for the compressor + to kick in once the audio has exceeded the threshold. + + release - default: 50.0 + Release in milliseconds. How long it should take for the compressor + to stop compressing after the audio has falled below the threshold. + + + For an overview of Dynamic Range Compression, and more detailed explanation + of the related terminology, see: + + http://en.wikipedia.org/wiki/Dynamic_range_compression + """ + + thresh_rms = seg.max_possible_amplitude * db_to_float(threshold) + + look_frames = int(seg.frame_count(ms=attack)) + def rms_at(frame_i): + return seg.get_sample_slice(frame_i - look_frames, frame_i).rms + def db_over_threshold(rms): + if rms == 0: return 0.0 + db = ratio_to_db(rms / thresh_rms) + return max(db, 0) + + output = [] + + # amount to reduce the volume of the audio by (in dB) + attenuation = 0.0 + + attack_frames = seg.frame_count(ms=attack) + release_frames = seg.frame_count(ms=release) + for i in xrange(int(seg.frame_count())): + rms_now = rms_at(i) + + # with a ratio of 4.0 this means the volume will exceed the threshold by + # 1/4 the amount (of dB) that it would otherwise + max_attenuation = (1 - (1.0 / ratio)) * db_over_threshold(rms_now) + + attenuation_inc = max_attenuation / attack_frames + attenuation_dec = max_attenuation / release_frames + + if rms_now > thresh_rms and attenuation <= max_attenuation: + attenuation += attenuation_inc + attenuation = min(attenuation, max_attenuation) + else: + attenuation -= attenuation_dec + attenuation = max(attenuation, 0) + + frame = seg.get_frame(i) + if attenuation != 0.0: + frame = audioop.mul(frame, + seg.sample_width, + db_to_float(-attenuation)) + + output.append(frame) + + return seg._spawn(data=b''.join(output)) + + +# Invert the phase of the signal. + +@register_pydub_effect + +def invert_phase(seg, channels=(1, 1)): + """ + channels- specifies which channel (left or right) to reverse the phase of. + Note that mono AudioSegments will become stereo. + """ + if channels == (1, 1): + inverted = audioop.mul(seg._data, seg.sample_width, -1.0) + return seg._spawn(data=inverted) + + else: + if seg.channels == 2: + left, right = seg.split_to_mono() + else: + raise Exception("Can't implicitly convert an AudioSegment with " + str(seg.channels) + " channels to stereo.") + + if channels == (1, 0): + left = left.invert_phase() + else: + right = right.invert_phase() + + return seg.from_mono_audiosegments(left, right) + + + +# High and low pass filters based on implementation found on Stack Overflow: +# http://stackoverflow.com/questions/13882038/implementing-simple-high-and-low-pass-filters-in-c + +@register_pydub_effect +def low_pass_filter(seg, cutoff): + """ + cutoff - Frequency (in Hz) where higher frequency signal will begin to + be reduced by 6dB per octave (doubling in frequency) above this point + """ + RC = 1.0 / (cutoff * 2 * math.pi) + dt = 1.0 / seg.frame_rate + + alpha = dt / (RC + dt) + + original = seg.get_array_of_samples() + filteredArray = array.array(seg.array_type, original) + + frame_count = int(seg.frame_count()) + + last_val = [0] * seg.channels + for i in range(seg.channels): + last_val[i] = filteredArray[i] = original[i] + + for i in range(1, frame_count): + for j in range(seg.channels): + offset = (i * seg.channels) + j + last_val[j] = last_val[j] + (alpha * (original[offset] - last_val[j])) + filteredArray[offset] = int(last_val[j]) + + return seg._spawn(data=filteredArray) + + +@register_pydub_effect +def high_pass_filter(seg, cutoff): + """ + cutoff - Frequency (in Hz) where lower frequency signal will begin to + be reduced by 6dB per octave (doubling in frequency) below this point + """ + RC = 1.0 / (cutoff * 2 * math.pi) + dt = 1.0 / seg.frame_rate + + alpha = RC / (RC + dt) + + minval, maxval = get_min_max_value(seg.sample_width * 8) + + original = seg.get_array_of_samples() + filteredArray = array.array(seg.array_type, original) + + frame_count = int(seg.frame_count()) + + last_val = [0] * seg.channels + for i in range(seg.channels): + last_val[i] = filteredArray[i] = original[i] + + for i in range(1, frame_count): + for j in range(seg.channels): + offset = (i * seg.channels) + j + offset_minus_1 = ((i-1) * seg.channels) + j + + last_val[j] = alpha * (last_val[j] + original[offset] - original[offset_minus_1]) + filteredArray[offset] = int(min(max(last_val[j], minval), maxval)) + + return seg._spawn(data=filteredArray) + + +@register_pydub_effect +def pan(seg, pan_amount): + """ + pan_amount should be between -1.0 (100% left) and +1.0 (100% right) + + When pan_amount == 0.0 the left/right balance is not changed. + + Panning does not alter the *perceived* loundness, but since loudness + is decreasing on one side, the other side needs to get louder to + compensate. When panned hard left, the left channel will be 3dB louder. + """ + if not -1.0 <= pan_amount <= 1.0: + raise ValueError("pan_amount should be between -1.0 (100% left) and +1.0 (100% right)") + + max_boost_db = ratio_to_db(2.0) + boost_db = abs(pan_amount) * max_boost_db + + boost_factor = db_to_float(boost_db) + reduce_factor = db_to_float(max_boost_db) - boost_factor + + reduce_db = ratio_to_db(reduce_factor) + + # Cut boost in half (max boost== 3dB) - in reality 2 speakers + # do not sum to a full 6 dB. + boost_db = boost_db / 2.0 + + if pan_amount < 0: + return seg.apply_gain_stereo(boost_db, reduce_db) + else: + return seg.apply_gain_stereo(reduce_db, boost_db) + + +@register_pydub_effect +def apply_gain_stereo(seg, left_gain=0.0, right_gain=0.0): + """ + left_gain - amount of gain to apply to the left channel (in dB) + right_gain - amount of gain to apply to the right channel (in dB) + + note: mono audio segments will be converted to stereo + """ + if seg.channels == 1: + left = right = seg + elif seg.channels == 2: + left, right = seg.split_to_mono() + + l_mult_factor = db_to_float(left_gain) + r_mult_factor = db_to_float(right_gain) + + left_data = audioop.mul(left._data, left.sample_width, l_mult_factor) + left_data = audioop.tostereo(left_data, left.sample_width, 1, 0) + + right_data = audioop.mul(right._data, right.sample_width, r_mult_factor) + right_data = audioop.tostereo(right_data, right.sample_width, 0, 1) + + output = audioop.add(left_data, right_data, seg.sample_width) + + return seg._spawn(data=output, + overrides={'channels': 2, + 'frame_width': 2 * seg.sample_width}) |