From 355dee533bb34a571b9367820a63cccb668cf866 Mon Sep 17 00:00:00 2001 From: noptuno Date: Thu, 27 Apr 2023 20:29:30 -0400 Subject: Merging PR_218 openai_rev package with new streamlit chat app --- .../lib/python3.9/site-packages/toolz/itertoolz.py | 1057 ++++++++++++++++++++ 1 file changed, 1057 insertions(+) create mode 100644 venv/lib/python3.9/site-packages/toolz/itertoolz.py (limited to 'venv/lib/python3.9/site-packages/toolz/itertoolz.py') diff --git a/venv/lib/python3.9/site-packages/toolz/itertoolz.py b/venv/lib/python3.9/site-packages/toolz/itertoolz.py new file mode 100644 index 00000000..5049e5eb --- /dev/null +++ b/venv/lib/python3.9/site-packages/toolz/itertoolz.py @@ -0,0 +1,1057 @@ +import itertools +import heapq +import collections +import operator +from functools import partial +from itertools import filterfalse, zip_longest +from collections.abc import Sequence +from toolz.utils import no_default + + +__all__ = ('remove', 'accumulate', 'groupby', 'merge_sorted', 'interleave', + 'unique', 'isiterable', 'isdistinct', 'take', 'drop', 'take_nth', + 'first', 'second', 'nth', 'last', 'get', 'concat', 'concatv', + 'mapcat', 'cons', 'interpose', 'frequencies', 'reduceby', 'iterate', + 'sliding_window', 'partition', 'partition_all', 'count', 'pluck', + 'join', 'tail', 'diff', 'topk', 'peek', 'peekn', 'random_sample') + + +def remove(predicate, seq): + """ Return those items of sequence for which predicate(item) is False + + >>> def iseven(x): + ... return x % 2 == 0 + >>> list(remove(iseven, [1, 2, 3, 4])) + [1, 3] + """ + return filterfalse(predicate, seq) + + +def accumulate(binop, seq, initial=no_default): + """ Repeatedly apply binary function to a sequence, accumulating results + + >>> from operator import add, mul + >>> list(accumulate(add, [1, 2, 3, 4, 5])) + [1, 3, 6, 10, 15] + >>> list(accumulate(mul, [1, 2, 3, 4, 5])) + [1, 2, 6, 24, 120] + + Accumulate is similar to ``reduce`` and is good for making functions like + cumulative sum: + + >>> from functools import partial, reduce + >>> sum = partial(reduce, add) + >>> cumsum = partial(accumulate, add) + + Accumulate also takes an optional argument that will be used as the first + value. This is similar to reduce. + + >>> list(accumulate(add, [1, 2, 3], -1)) + [-1, 0, 2, 5] + >>> list(accumulate(add, [], 1)) + [1] + + See Also: + itertools.accumulate : In standard itertools for Python 3.2+ + """ + seq = iter(seq) + if initial == no_default: + try: + result = next(seq) + except StopIteration: + return + else: + result = initial + yield result + for elem in seq: + result = binop(result, elem) + yield result + + +def groupby(key, seq): + """ Group a collection by a key function + + >>> names = ['Alice', 'Bob', 'Charlie', 'Dan', 'Edith', 'Frank'] + >>> groupby(len, names) # doctest: +SKIP + {3: ['Bob', 'Dan'], 5: ['Alice', 'Edith', 'Frank'], 7: ['Charlie']} + + >>> iseven = lambda x: x % 2 == 0 + >>> groupby(iseven, [1, 2, 3, 4, 5, 6, 7, 8]) # doctest: +SKIP + {False: [1, 3, 5, 7], True: [2, 4, 6, 8]} + + Non-callable keys imply grouping on a member. + + >>> groupby('gender', [{'name': 'Alice', 'gender': 'F'}, + ... {'name': 'Bob', 'gender': 'M'}, + ... {'name': 'Charlie', 'gender': 'M'}]) # doctest:+SKIP + {'F': [{'gender': 'F', 'name': 'Alice'}], + 'M': [{'gender': 'M', 'name': 'Bob'}, + {'gender': 'M', 'name': 'Charlie'}]} + + Not to be confused with ``itertools.groupby`` + + See Also: + countby + """ + if not callable(key): + key = getter(key) + d = collections.defaultdict(lambda: [].append) + for item in seq: + d[key(item)](item) + rv = {} + for k, v in d.items(): + rv[k] = v.__self__ + return rv + + +def merge_sorted(*seqs, **kwargs): + """ Merge and sort a collection of sorted collections + + This works lazily and only keeps one value from each iterable in memory. + + >>> list(merge_sorted([1, 3, 5], [2, 4, 6])) + [1, 2, 3, 4, 5, 6] + + >>> ''.join(merge_sorted('abc', 'abc', 'abc')) + 'aaabbbccc' + + The "key" function used to sort the input may be passed as a keyword. + + >>> list(merge_sorted([2, 3], [1, 3], key=lambda x: x // 3)) + [2, 1, 3, 3] + """ + if len(seqs) == 0: + return iter([]) + elif len(seqs) == 1: + return iter(seqs[0]) + + key = kwargs.get('key', None) + if key is None: + return _merge_sorted_binary(seqs) + else: + return _merge_sorted_binary_key(seqs, key) + + +def _merge_sorted_binary(seqs): + mid = len(seqs) // 2 + L1 = seqs[:mid] + if len(L1) == 1: + seq1 = iter(L1[0]) + else: + seq1 = _merge_sorted_binary(L1) + L2 = seqs[mid:] + if len(L2) == 1: + seq2 = iter(L2[0]) + else: + seq2 = _merge_sorted_binary(L2) + + try: + val2 = next(seq2) + except StopIteration: + for val1 in seq1: + yield val1 + return + + for val1 in seq1: + if val2 < val1: + yield val2 + for val2 in seq2: + if val2 < val1: + yield val2 + else: + yield val1 + break + else: + break + else: + yield val1 + else: + yield val2 + for val2 in seq2: + yield val2 + return + yield val1 + for val1 in seq1: + yield val1 + + +def _merge_sorted_binary_key(seqs, key): + mid = len(seqs) // 2 + L1 = seqs[:mid] + if len(L1) == 1: + seq1 = iter(L1[0]) + else: + seq1 = _merge_sorted_binary_key(L1, key) + L2 = seqs[mid:] + if len(L2) == 1: + seq2 = iter(L2[0]) + else: + seq2 = _merge_sorted_binary_key(L2, key) + + try: + val2 = next(seq2) + except StopIteration: + for val1 in seq1: + yield val1 + return + key2 = key(val2) + + for val1 in seq1: + key1 = key(val1) + if key2 < key1: + yield val2 + for val2 in seq2: + key2 = key(val2) + if key2 < key1: + yield val2 + else: + yield val1 + break + else: + break + else: + yield val1 + else: + yield val2 + for val2 in seq2: + yield val2 + return + yield val1 + for val1 in seq1: + yield val1 + + +def interleave(seqs): + """ Interleave a sequence of sequences + + >>> list(interleave([[1, 2], [3, 4]])) + [1, 3, 2, 4] + + >>> ''.join(interleave(('ABC', 'XY'))) + 'AXBYC' + + Both the individual sequences and the sequence of sequences may be infinite + + Returns a lazy iterator + """ + iters = itertools.cycle(map(iter, seqs)) + while True: + try: + for itr in iters: + yield next(itr) + return + except StopIteration: + predicate = partial(operator.is_not, itr) + iters = itertools.cycle(itertools.takewhile(predicate, iters)) + + +def unique(seq, key=None): + """ Return only unique elements of a sequence + + >>> tuple(unique((1, 2, 3))) + (1, 2, 3) + >>> tuple(unique((1, 2, 1, 3))) + (1, 2, 3) + + Uniqueness can be defined by key keyword + + >>> tuple(unique(['cat', 'mouse', 'dog', 'hen'], key=len)) + ('cat', 'mouse') + """ + seen = set() + seen_add = seen.add + if key is None: + for item in seq: + if item not in seen: + seen_add(item) + yield item + else: # calculate key + for item in seq: + val = key(item) + if val not in seen: + seen_add(val) + yield item + + +def isiterable(x): + """ Is x iterable? + + >>> isiterable([1, 2, 3]) + True + >>> isiterable('abc') + True + >>> isiterable(5) + False + """ + try: + iter(x) + return True + except TypeError: + return False + + +def isdistinct(seq): + """ All values in sequence are distinct + + >>> isdistinct([1, 2, 3]) + True + >>> isdistinct([1, 2, 1]) + False + + >>> isdistinct("Hello") + False + >>> isdistinct("World") + True + """ + if iter(seq) is seq: + seen = set() + seen_add = seen.add + for item in seq: + if item in seen: + return False + seen_add(item) + return True + else: + return len(seq) == len(set(seq)) + + +def take(n, seq): + """ The first n elements of a sequence + + >>> list(take(2, [10, 20, 30, 40, 50])) + [10, 20] + + See Also: + drop + tail + """ + return itertools.islice(seq, n) + + +def tail(n, seq): + """ The last n elements of a sequence + + >>> tail(2, [10, 20, 30, 40, 50]) + [40, 50] + + See Also: + drop + take + """ + try: + return seq[-n:] + except (TypeError, KeyError): + return tuple(collections.deque(seq, n)) + + +def drop(n, seq): + """ The sequence following the first n elements + + >>> list(drop(2, [10, 20, 30, 40, 50])) + [30, 40, 50] + + See Also: + take + tail + """ + return itertools.islice(seq, n, None) + + +def take_nth(n, seq): + """ Every nth item in seq + + >>> list(take_nth(2, [10, 20, 30, 40, 50])) + [10, 30, 50] + """ + return itertools.islice(seq, 0, None, n) + + +def first(seq): + """ The first element in a sequence + + >>> first('ABC') + 'A' + """ + return next(iter(seq)) + + +def second(seq): + """ The second element in a sequence + + >>> second('ABC') + 'B' + """ + seq = iter(seq) + next(seq) + return next(seq) + + +def nth(n, seq): + """ The nth element in a sequence + + >>> nth(1, 'ABC') + 'B' + """ + if isinstance(seq, (tuple, list, Sequence)): + return seq[n] + else: + return next(itertools.islice(seq, n, None)) + + +def last(seq): + """ The last element in a sequence + + >>> last('ABC') + 'C' + """ + return tail(1, seq)[0] + + +rest = partial(drop, 1) + + +def _get(ind, seq, default): + try: + return seq[ind] + except (KeyError, IndexError): + return default + + +def get(ind, seq, default=no_default): + """ Get element in a sequence or dict + + Provides standard indexing + + >>> get(1, 'ABC') # Same as 'ABC'[1] + 'B' + + Pass a list to get multiple values + + >>> get([1, 2], 'ABC') # ('ABC'[1], 'ABC'[2]) + ('B', 'C') + + Works on any value that supports indexing/getitem + For example here we see that it works with dictionaries + + >>> phonebook = {'Alice': '555-1234', + ... 'Bob': '555-5678', + ... 'Charlie':'555-9999'} + >>> get('Alice', phonebook) + '555-1234' + + >>> get(['Alice', 'Bob'], phonebook) + ('555-1234', '555-5678') + + Provide a default for missing values + + >>> get(['Alice', 'Dennis'], phonebook, None) + ('555-1234', None) + + See Also: + pluck + """ + try: + return seq[ind] + except TypeError: # `ind` may be a list + if isinstance(ind, list): + if default == no_default: + if len(ind) > 1: + return operator.itemgetter(*ind)(seq) + elif ind: + return seq[ind[0]], + else: + return () + else: + return tuple(_get(i, seq, default) for i in ind) + elif default != no_default: + return default + else: + raise + except (KeyError, IndexError): # we know `ind` is not a list + if default == no_default: + raise + else: + return default + + +def concat(seqs): + """ Concatenate zero or more iterables, any of which may be infinite. + + An infinite sequence will prevent the rest of the arguments from + being included. + + We use chain.from_iterable rather than ``chain(*seqs)`` so that seqs + can be a generator. + + >>> list(concat([[], [1], [2, 3]])) + [1, 2, 3] + + See also: + itertools.chain.from_iterable equivalent + """ + return itertools.chain.from_iterable(seqs) + + +def concatv(*seqs): + """ Variadic version of concat + + >>> list(concatv([], ["a"], ["b", "c"])) + ['a', 'b', 'c'] + + See also: + itertools.chain + """ + return concat(seqs) + + +def mapcat(func, seqs): + """ Apply func to each sequence in seqs, concatenating results. + + >>> list(mapcat(lambda s: [c.upper() for c in s], + ... [["a", "b"], ["c", "d", "e"]])) + ['A', 'B', 'C', 'D', 'E'] + """ + return concat(map(func, seqs)) + + +def cons(el, seq): + """ Add el to beginning of (possibly infinite) sequence seq. + + >>> list(cons(1, [2, 3])) + [1, 2, 3] + """ + return itertools.chain([el], seq) + + +def interpose(el, seq): + """ Introduce element between each pair of elements in seq + + >>> list(interpose("a", [1, 2, 3])) + [1, 'a', 2, 'a', 3] + """ + inposed = concat(zip(itertools.repeat(el), seq)) + next(inposed) + return inposed + + +def frequencies(seq): + """ Find number of occurrences of each value in seq + + >>> frequencies(['cat', 'cat', 'ox', 'pig', 'pig', 'cat']) #doctest: +SKIP + {'cat': 3, 'ox': 1, 'pig': 2} + + See Also: + countby + groupby + """ + d = collections.defaultdict(int) + for item in seq: + d[item] += 1 + return dict(d) + + +def reduceby(key, binop, seq, init=no_default): + """ Perform a simultaneous groupby and reduction + + The computation: + + >>> result = reduceby(key, binop, seq, init) # doctest: +SKIP + + is equivalent to the following: + + >>> def reduction(group): # doctest: +SKIP + ... return reduce(binop, group, init) # doctest: +SKIP + + >>> groups = groupby(key, seq) # doctest: +SKIP + >>> result = valmap(reduction, groups) # doctest: +SKIP + + But the former does not build the intermediate groups, allowing it to + operate in much less space. This makes it suitable for larger datasets + that do not fit comfortably in memory + + The ``init`` keyword argument is the default initialization of the + reduction. This can be either a constant value like ``0`` or a callable + like ``lambda : 0`` as might be used in ``defaultdict``. + + Simple Examples + --------------- + + >>> from operator import add, mul + >>> iseven = lambda x: x % 2 == 0 + + >>> data = [1, 2, 3, 4, 5] + + >>> reduceby(iseven, add, data) # doctest: +SKIP + {False: 9, True: 6} + + >>> reduceby(iseven, mul, data) # doctest: +SKIP + {False: 15, True: 8} + + Complex Example + --------------- + + >>> projects = [{'name': 'build roads', 'state': 'CA', 'cost': 1000000}, + ... {'name': 'fight crime', 'state': 'IL', 'cost': 100000}, + ... {'name': 'help farmers', 'state': 'IL', 'cost': 2000000}, + ... {'name': 'help farmers', 'state': 'CA', 'cost': 200000}] + + >>> reduceby('state', # doctest: +SKIP + ... lambda acc, x: acc + x['cost'], + ... projects, 0) + {'CA': 1200000, 'IL': 2100000} + + Example Using ``init`` + ---------------------- + + >>> def set_add(s, i): + ... s.add(i) + ... return s + + >>> reduceby(iseven, set_add, [1, 2, 3, 4, 1, 2, 3], set) # doctest: +SKIP + {True: set([2, 4]), + False: set([1, 3])} + """ + is_no_default = init == no_default + if not is_no_default and not callable(init): + _init = init + init = lambda: _init + if not callable(key): + key = getter(key) + d = {} + for item in seq: + k = key(item) + if k not in d: + if is_no_default: + d[k] = item + continue + else: + d[k] = init() + d[k] = binop(d[k], item) + return d + + +def iterate(func, x): + """ Repeatedly apply a function func onto an original input + + Yields x, then func(x), then func(func(x)), then func(func(func(x))), etc.. + + >>> def inc(x): return x + 1 + >>> counter = iterate(inc, 0) + >>> next(counter) + 0 + >>> next(counter) + 1 + >>> next(counter) + 2 + + >>> double = lambda x: x * 2 + >>> powers_of_two = iterate(double, 1) + >>> next(powers_of_two) + 1 + >>> next(powers_of_two) + 2 + >>> next(powers_of_two) + 4 + >>> next(powers_of_two) + 8 + """ + while True: + yield x + x = func(x) + + +def sliding_window(n, seq): + """ A sequence of overlapping subsequences + + >>> list(sliding_window(2, [1, 2, 3, 4])) + [(1, 2), (2, 3), (3, 4)] + + This function creates a sliding window suitable for transformations like + sliding means / smoothing + + >>> mean = lambda seq: float(sum(seq)) / len(seq) + >>> list(map(mean, sliding_window(2, [1, 2, 3, 4]))) + [1.5, 2.5, 3.5] + """ + return zip(*(collections.deque(itertools.islice(it, i), 0) or it + for i, it in enumerate(itertools.tee(seq, n)))) + + +no_pad = '__no__pad__' + + +def partition(n, seq, pad=no_pad): + """ Partition sequence into tuples of length n + + >>> list(partition(2, [1, 2, 3, 4])) + [(1, 2), (3, 4)] + + If the length of ``seq`` is not evenly divisible by ``n``, the final tuple + is dropped if ``pad`` is not specified, or filled to length ``n`` by pad: + + >>> list(partition(2, [1, 2, 3, 4, 5])) + [(1, 2), (3, 4)] + + >>> list(partition(2, [1, 2, 3, 4, 5], pad=None)) + [(1, 2), (3, 4), (5, None)] + + See Also: + partition_all + """ + args = [iter(seq)] * n + if pad is no_pad: + return zip(*args) + else: + return zip_longest(*args, fillvalue=pad) + + +def partition_all(n, seq): + """ Partition all elements of sequence into tuples of length at most n + + The final tuple may be shorter to accommodate extra elements. + + >>> list(partition_all(2, [1, 2, 3, 4])) + [(1, 2), (3, 4)] + + >>> list(partition_all(2, [1, 2, 3, 4, 5])) + [(1, 2), (3, 4), (5,)] + + See Also: + partition + """ + args = [iter(seq)] * n + it = zip_longest(*args, fillvalue=no_pad) + try: + prev = next(it) + except StopIteration: + return + for item in it: + yield prev + prev = item + if prev[-1] is no_pad: + try: + # If seq defines __len__, then + # we can quickly calculate where no_pad starts + yield prev[:len(seq) % n] + except TypeError: + # Get first index of no_pad without using .index() + # https://github.com/pytoolz/toolz/issues/387 + # Binary search from CPython's bisect module, + # modified for identity testing. + lo, hi = 0, n + while lo < hi: + mid = (lo + hi) // 2 + if prev[mid] is no_pad: + hi = mid + else: + lo = mid + 1 + yield prev[:lo] + else: + yield prev + + +def count(seq): + """ Count the number of items in seq + + Like the builtin ``len`` but works on lazy sequences. + + Not to be confused with ``itertools.count`` + + See also: + len + """ + if hasattr(seq, '__len__'): + return len(seq) + return sum(1 for i in seq) + + +def pluck(ind, seqs, default=no_default): + """ plucks an element or several elements from each item in a sequence. + + ``pluck`` maps ``itertoolz.get`` over a sequence and returns one or more + elements of each item in the sequence. + + This is equivalent to running `map(curried.get(ind), seqs)` + + ``ind`` can be either a single string/index or a list of strings/indices. + ``seqs`` should be sequence containing sequences or dicts. + + e.g. + + >>> data = [{'id': 1, 'name': 'Cheese'}, {'id': 2, 'name': 'Pies'}] + >>> list(pluck('name', data)) + ['Cheese', 'Pies'] + >>> list(pluck([0, 1], [[1, 2, 3], [4, 5, 7]])) + [(1, 2), (4, 5)] + + See Also: + get + map + """ + if default == no_default: + get = getter(ind) + return map(get, seqs) + elif isinstance(ind, list): + return (tuple(_get(item, seq, default) for item in ind) + for seq in seqs) + return (_get(ind, seq, default) for seq in seqs) + + +def getter(index): + if isinstance(index, list): + if len(index) == 1: + index = index[0] + return lambda x: (x[index],) + elif index: + return operator.itemgetter(*index) + else: + return lambda x: () + else: + return operator.itemgetter(index) + + +def join(leftkey, leftseq, rightkey, rightseq, + left_default=no_default, right_default=no_default): + """ Join two sequences on common attributes + + This is a semi-streaming operation. The LEFT sequence is fully evaluated + and placed into memory. The RIGHT sequence is evaluated lazily and so can + be arbitrarily large. + (Note: If right_default is defined, then unique keys of rightseq + will also be stored in memory.) + + >>> friends = [('Alice', 'Edith'), + ... ('Alice', 'Zhao'), + ... ('Edith', 'Alice'), + ... ('Zhao', 'Alice'), + ... ('Zhao', 'Edith')] + + >>> cities = [('Alice', 'NYC'), + ... ('Alice', 'Chicago'), + ... ('Dan', 'Syndey'), + ... ('Edith', 'Paris'), + ... ('Edith', 'Berlin'), + ... ('Zhao', 'Shanghai')] + + >>> # Vacation opportunities + >>> # In what cities do people have friends? + >>> result = join(second, friends, + ... first, cities) + >>> for ((a, b), (c, d)) in sorted(unique(result)): + ... print((a, d)) + ('Alice', 'Berlin') + ('Alice', 'Paris') + ('Alice', 'Shanghai') + ('Edith', 'Chicago') + ('Edith', 'NYC') + ('Zhao', 'Chicago') + ('Zhao', 'NYC') + ('Zhao', 'Berlin') + ('Zhao', 'Paris') + + Specify outer joins with keyword arguments ``left_default`` and/or + ``right_default``. Here is a full outer join in which unmatched elements + are paired with None. + + >>> identity = lambda x: x + >>> list(join(identity, [1, 2, 3], + ... identity, [2, 3, 4], + ... left_default=None, right_default=None)) + [(2, 2), (3, 3), (None, 4), (1, None)] + + Usually the key arguments are callables to be applied to the sequences. If + the keys are not obviously callable then it is assumed that indexing was + intended, e.g. the following is a legal change. + The join is implemented as a hash join and the keys of leftseq must be + hashable. Additionally, if right_default is defined, then keys of rightseq + must also be hashable. + + >>> # result = join(second, friends, first, cities) + >>> result = join(1, friends, 0, cities) # doctest: +SKIP + """ + if not callable(leftkey): + leftkey = getter(leftkey) + if not callable(rightkey): + rightkey = getter(rightkey) + + d = groupby(leftkey, leftseq) + + if left_default == no_default and right_default == no_default: + # Inner Join + for item in rightseq: + key = rightkey(item) + if key in d: + for left_match in d[key]: + yield (left_match, item) + elif left_default != no_default and right_default == no_default: + # Right Join + for item in rightseq: + key = rightkey(item) + if key in d: + for left_match in d[key]: + yield (left_match, item) + else: + yield (left_default, item) + elif right_default != no_default: + seen_keys = set() + seen = seen_keys.add + + if left_default == no_default: + # Left Join + for item in rightseq: + key = rightkey(item) + seen(key) + if key in d: + for left_match in d[key]: + yield (left_match, item) + else: + # Full Join + for item in rightseq: + key = rightkey(item) + seen(key) + if key in d: + for left_match in d[key]: + yield (left_match, item) + else: + yield (left_default, item) + + for key, matches in d.items(): + if key not in seen_keys: + for match in matches: + yield (match, right_default) + + +def diff(*seqs, **kwargs): + """ Return those items that differ between sequences + + >>> list(diff([1, 2, 3], [1, 2, 10, 100])) + [(3, 10)] + + Shorter sequences may be padded with a ``default`` value: + + >>> list(diff([1, 2, 3], [1, 2, 10, 100], default=None)) + [(3, 10), (None, 100)] + + A ``key`` function may also be applied to each item to use during + comparisons: + + >>> list(diff(['apples', 'bananas'], ['Apples', 'Oranges'], key=str.lower)) + [('bananas', 'Oranges')] + """ + N = len(seqs) + if N == 1 and isinstance(seqs[0], list): + seqs = seqs[0] + N = len(seqs) + if N < 2: + raise TypeError('Too few sequences given (min 2 required)') + default = kwargs.get('default', no_default) + if default == no_default: + iters = zip(*seqs) + else: + iters = zip_longest(*seqs, fillvalue=default) + key = kwargs.get('key', None) + if key is None: + for items in iters: + if items.count(items[0]) != N: + yield items + else: + for items in iters: + vals = tuple(map(key, items)) + if vals.count(vals[0]) != N: + yield items + + +def topk(k, seq, key=None): + """ Find the k largest elements of a sequence + + Operates lazily in ``n*log(k)`` time + + >>> topk(2, [1, 100, 10, 1000]) + (1000, 100) + + Use a key function to change sorted order + + >>> topk(2, ['Alice', 'Bob', 'Charlie', 'Dan'], key=len) + ('Charlie', 'Alice') + + See also: + heapq.nlargest + """ + if key is not None and not callable(key): + key = getter(key) + return tuple(heapq.nlargest(k, seq, key=key)) + + +def peek(seq): + """ Retrieve the next element of a sequence + + Returns the first element and an iterable equivalent to the original + sequence, still having the element retrieved. + + >>> seq = [0, 1, 2, 3, 4] + >>> first, seq = peek(seq) + >>> first + 0 + >>> list(seq) + [0, 1, 2, 3, 4] + """ + iterator = iter(seq) + item = next(iterator) + return item, itertools.chain((item,), iterator) + + +def peekn(n, seq): + """ Retrieve the next n elements of a sequence + + Returns a tuple of the first n elements and an iterable equivalent + to the original, still having the elements retrieved. + + >>> seq = [0, 1, 2, 3, 4] + >>> first_two, seq = peekn(2, seq) + >>> first_two + (0, 1) + >>> list(seq) + [0, 1, 2, 3, 4] + """ + iterator = iter(seq) + peeked = tuple(take(n, iterator)) + return peeked, itertools.chain(iter(peeked), iterator) + + +def random_sample(prob, seq, random_state=None): + """ Return elements from a sequence with probability of prob + + Returns a lazy iterator of random items from seq. + + ``random_sample`` considers each item independently and without + replacement. See below how the first time it returned 13 items and the + next time it returned 6 items. + + >>> seq = list(range(100)) + >>> list(random_sample(0.1, seq)) # doctest: +SKIP + [6, 9, 19, 35, 45, 50, 58, 62, 68, 72, 78, 86, 95] + >>> list(random_sample(0.1, seq)) # doctest: +SKIP + [6, 44, 54, 61, 69, 94] + + Providing an integer seed for ``random_state`` will result in + deterministic sampling. Given the same seed it will return the same sample + every time. + + >>> list(random_sample(0.1, seq, random_state=2016)) + [7, 9, 19, 25, 30, 32, 34, 48, 59, 60, 81, 98] + >>> list(random_sample(0.1, seq, random_state=2016)) + [7, 9, 19, 25, 30, 32, 34, 48, 59, 60, 81, 98] + + ``random_state`` can also be any object with a method ``random`` that + returns floats between 0.0 and 1.0 (exclusive). + + >>> from random import Random + >>> randobj = Random(2016) + >>> list(random_sample(0.1, seq, random_state=randobj)) + [7, 9, 19, 25, 30, 32, 34, 48, 59, 60, 81, 98] + """ + if not hasattr(random_state, 'random'): + from random import Random + + random_state = Random(random_state) + return filter(lambda _: random_state.random() < prob, seq) -- cgit v1.2.3