summaryrefslogtreecommitdiffstats
path: root/venv/lib/python3.9/site-packages/toolz/itertoolz.py
diff options
context:
space:
mode:
Diffstat (limited to 'venv/lib/python3.9/site-packages/toolz/itertoolz.py')
-rw-r--r--venv/lib/python3.9/site-packages/toolz/itertoolz.py1057
1 files changed, 1057 insertions, 0 deletions
diff --git a/venv/lib/python3.9/site-packages/toolz/itertoolz.py b/venv/lib/python3.9/site-packages/toolz/itertoolz.py
new file mode 100644
index 00000000..5049e5eb
--- /dev/null
+++ b/venv/lib/python3.9/site-packages/toolz/itertoolz.py
@@ -0,0 +1,1057 @@
+import itertools
+import heapq
+import collections
+import operator
+from functools import partial
+from itertools import filterfalse, zip_longest
+from collections.abc import Sequence
+from toolz.utils import no_default
+
+
+__all__ = ('remove', 'accumulate', 'groupby', 'merge_sorted', 'interleave',
+ 'unique', 'isiterable', 'isdistinct', 'take', 'drop', 'take_nth',
+ 'first', 'second', 'nth', 'last', 'get', 'concat', 'concatv',
+ 'mapcat', 'cons', 'interpose', 'frequencies', 'reduceby', 'iterate',
+ 'sliding_window', 'partition', 'partition_all', 'count', 'pluck',
+ 'join', 'tail', 'diff', 'topk', 'peek', 'peekn', 'random_sample')
+
+
+def remove(predicate, seq):
+ """ Return those items of sequence for which predicate(item) is False
+
+ >>> def iseven(x):
+ ... return x % 2 == 0
+ >>> list(remove(iseven, [1, 2, 3, 4]))
+ [1, 3]
+ """
+ return filterfalse(predicate, seq)
+
+
+def accumulate(binop, seq, initial=no_default):
+ """ Repeatedly apply binary function to a sequence, accumulating results
+
+ >>> from operator import add, mul
+ >>> list(accumulate(add, [1, 2, 3, 4, 5]))
+ [1, 3, 6, 10, 15]
+ >>> list(accumulate(mul, [1, 2, 3, 4, 5]))
+ [1, 2, 6, 24, 120]
+
+ Accumulate is similar to ``reduce`` and is good for making functions like
+ cumulative sum:
+
+ >>> from functools import partial, reduce
+ >>> sum = partial(reduce, add)
+ >>> cumsum = partial(accumulate, add)
+
+ Accumulate also takes an optional argument that will be used as the first
+ value. This is similar to reduce.
+
+ >>> list(accumulate(add, [1, 2, 3], -1))
+ [-1, 0, 2, 5]
+ >>> list(accumulate(add, [], 1))
+ [1]
+
+ See Also:
+ itertools.accumulate : In standard itertools for Python 3.2+
+ """
+ seq = iter(seq)
+ if initial == no_default:
+ try:
+ result = next(seq)
+ except StopIteration:
+ return
+ else:
+ result = initial
+ yield result
+ for elem in seq:
+ result = binop(result, elem)
+ yield result
+
+
+def groupby(key, seq):
+ """ Group a collection by a key function
+
+ >>> names = ['Alice', 'Bob', 'Charlie', 'Dan', 'Edith', 'Frank']
+ >>> groupby(len, names) # doctest: +SKIP
+ {3: ['Bob', 'Dan'], 5: ['Alice', 'Edith', 'Frank'], 7: ['Charlie']}
+
+ >>> iseven = lambda x: x % 2 == 0
+ >>> groupby(iseven, [1, 2, 3, 4, 5, 6, 7, 8]) # doctest: +SKIP
+ {False: [1, 3, 5, 7], True: [2, 4, 6, 8]}
+
+ Non-callable keys imply grouping on a member.
+
+ >>> groupby('gender', [{'name': 'Alice', 'gender': 'F'},
+ ... {'name': 'Bob', 'gender': 'M'},
+ ... {'name': 'Charlie', 'gender': 'M'}]) # doctest:+SKIP
+ {'F': [{'gender': 'F', 'name': 'Alice'}],
+ 'M': [{'gender': 'M', 'name': 'Bob'},
+ {'gender': 'M', 'name': 'Charlie'}]}
+
+ Not to be confused with ``itertools.groupby``
+
+ See Also:
+ countby
+ """
+ if not callable(key):
+ key = getter(key)
+ d = collections.defaultdict(lambda: [].append)
+ for item in seq:
+ d[key(item)](item)
+ rv = {}
+ for k, v in d.items():
+ rv[k] = v.__self__
+ return rv
+
+
+def merge_sorted(*seqs, **kwargs):
+ """ Merge and sort a collection of sorted collections
+
+ This works lazily and only keeps one value from each iterable in memory.
+
+ >>> list(merge_sorted([1, 3, 5], [2, 4, 6]))
+ [1, 2, 3, 4, 5, 6]
+
+ >>> ''.join(merge_sorted('abc', 'abc', 'abc'))
+ 'aaabbbccc'
+
+ The "key" function used to sort the input may be passed as a keyword.
+
+ >>> list(merge_sorted([2, 3], [1, 3], key=lambda x: x // 3))
+ [2, 1, 3, 3]
+ """
+ if len(seqs) == 0:
+ return iter([])
+ elif len(seqs) == 1:
+ return iter(seqs[0])
+
+ key = kwargs.get('key', None)
+ if key is None:
+ return _merge_sorted_binary(seqs)
+ else:
+ return _merge_sorted_binary_key(seqs, key)
+
+
+def _merge_sorted_binary(seqs):
+ mid = len(seqs) // 2
+ L1 = seqs[:mid]
+ if len(L1) == 1:
+ seq1 = iter(L1[0])
+ else:
+ seq1 = _merge_sorted_binary(L1)
+ L2 = seqs[mid:]
+ if len(L2) == 1:
+ seq2 = iter(L2[0])
+ else:
+ seq2 = _merge_sorted_binary(L2)
+
+ try:
+ val2 = next(seq2)
+ except StopIteration:
+ for val1 in seq1:
+ yield val1
+ return
+
+ for val1 in seq1:
+ if val2 < val1:
+ yield val2
+ for val2 in seq2:
+ if val2 < val1:
+ yield val2
+ else:
+ yield val1
+ break
+ else:
+ break
+ else:
+ yield val1
+ else:
+ yield val2
+ for val2 in seq2:
+ yield val2
+ return
+ yield val1
+ for val1 in seq1:
+ yield val1
+
+
+def _merge_sorted_binary_key(seqs, key):
+ mid = len(seqs) // 2
+ L1 = seqs[:mid]
+ if len(L1) == 1:
+ seq1 = iter(L1[0])
+ else:
+ seq1 = _merge_sorted_binary_key(L1, key)
+ L2 = seqs[mid:]
+ if len(L2) == 1:
+ seq2 = iter(L2[0])
+ else:
+ seq2 = _merge_sorted_binary_key(L2, key)
+
+ try:
+ val2 = next(seq2)
+ except StopIteration:
+ for val1 in seq1:
+ yield val1
+ return
+ key2 = key(val2)
+
+ for val1 in seq1:
+ key1 = key(val1)
+ if key2 < key1:
+ yield val2
+ for val2 in seq2:
+ key2 = key(val2)
+ if key2 < key1:
+ yield val2
+ else:
+ yield val1
+ break
+ else:
+ break
+ else:
+ yield val1
+ else:
+ yield val2
+ for val2 in seq2:
+ yield val2
+ return
+ yield val1
+ for val1 in seq1:
+ yield val1
+
+
+def interleave(seqs):
+ """ Interleave a sequence of sequences
+
+ >>> list(interleave([[1, 2], [3, 4]]))
+ [1, 3, 2, 4]
+
+ >>> ''.join(interleave(('ABC', 'XY')))
+ 'AXBYC'
+
+ Both the individual sequences and the sequence of sequences may be infinite
+
+ Returns a lazy iterator
+ """
+ iters = itertools.cycle(map(iter, seqs))
+ while True:
+ try:
+ for itr in iters:
+ yield next(itr)
+ return
+ except StopIteration:
+ predicate = partial(operator.is_not, itr)
+ iters = itertools.cycle(itertools.takewhile(predicate, iters))
+
+
+def unique(seq, key=None):
+ """ Return only unique elements of a sequence
+
+ >>> tuple(unique((1, 2, 3)))
+ (1, 2, 3)
+ >>> tuple(unique((1, 2, 1, 3)))
+ (1, 2, 3)
+
+ Uniqueness can be defined by key keyword
+
+ >>> tuple(unique(['cat', 'mouse', 'dog', 'hen'], key=len))
+ ('cat', 'mouse')
+ """
+ seen = set()
+ seen_add = seen.add
+ if key is None:
+ for item in seq:
+ if item not in seen:
+ seen_add(item)
+ yield item
+ else: # calculate key
+ for item in seq:
+ val = key(item)
+ if val not in seen:
+ seen_add(val)
+ yield item
+
+
+def isiterable(x):
+ """ Is x iterable?
+
+ >>> isiterable([1, 2, 3])
+ True
+ >>> isiterable('abc')
+ True
+ >>> isiterable(5)
+ False
+ """
+ try:
+ iter(x)
+ return True
+ except TypeError:
+ return False
+
+
+def isdistinct(seq):
+ """ All values in sequence are distinct
+
+ >>> isdistinct([1, 2, 3])
+ True
+ >>> isdistinct([1, 2, 1])
+ False
+
+ >>> isdistinct("Hello")
+ False
+ >>> isdistinct("World")
+ True
+ """
+ if iter(seq) is seq:
+ seen = set()
+ seen_add = seen.add
+ for item in seq:
+ if item in seen:
+ return False
+ seen_add(item)
+ return True
+ else:
+ return len(seq) == len(set(seq))
+
+
+def take(n, seq):
+ """ The first n elements of a sequence
+
+ >>> list(take(2, [10, 20, 30, 40, 50]))
+ [10, 20]
+
+ See Also:
+ drop
+ tail
+ """
+ return itertools.islice(seq, n)
+
+
+def tail(n, seq):
+ """ The last n elements of a sequence
+
+ >>> tail(2, [10, 20, 30, 40, 50])
+ [40, 50]
+
+ See Also:
+ drop
+ take
+ """
+ try:
+ return seq[-n:]
+ except (TypeError, KeyError):
+ return tuple(collections.deque(seq, n))
+
+
+def drop(n, seq):
+ """ The sequence following the first n elements
+
+ >>> list(drop(2, [10, 20, 30, 40, 50]))
+ [30, 40, 50]
+
+ See Also:
+ take
+ tail
+ """
+ return itertools.islice(seq, n, None)
+
+
+def take_nth(n, seq):
+ """ Every nth item in seq
+
+ >>> list(take_nth(2, [10, 20, 30, 40, 50]))
+ [10, 30, 50]
+ """
+ return itertools.islice(seq, 0, None, n)
+
+
+def first(seq):
+ """ The first element in a sequence
+
+ >>> first('ABC')
+ 'A'
+ """
+ return next(iter(seq))
+
+
+def second(seq):
+ """ The second element in a sequence
+
+ >>> second('ABC')
+ 'B'
+ """
+ seq = iter(seq)
+ next(seq)
+ return next(seq)
+
+
+def nth(n, seq):
+ """ The nth element in a sequence
+
+ >>> nth(1, 'ABC')
+ 'B'
+ """
+ if isinstance(seq, (tuple, list, Sequence)):
+ return seq[n]
+ else:
+ return next(itertools.islice(seq, n, None))
+
+
+def last(seq):
+ """ The last element in a sequence
+
+ >>> last('ABC')
+ 'C'
+ """
+ return tail(1, seq)[0]
+
+
+rest = partial(drop, 1)
+
+
+def _get(ind, seq, default):
+ try:
+ return seq[ind]
+ except (KeyError, IndexError):
+ return default
+
+
+def get(ind, seq, default=no_default):
+ """ Get element in a sequence or dict
+
+ Provides standard indexing
+
+ >>> get(1, 'ABC') # Same as 'ABC'[1]
+ 'B'
+
+ Pass a list to get multiple values
+
+ >>> get([1, 2], 'ABC') # ('ABC'[1], 'ABC'[2])
+ ('B', 'C')
+
+ Works on any value that supports indexing/getitem
+ For example here we see that it works with dictionaries
+
+ >>> phonebook = {'Alice': '555-1234',
+ ... 'Bob': '555-5678',
+ ... 'Charlie':'555-9999'}
+ >>> get('Alice', phonebook)
+ '555-1234'
+
+ >>> get(['Alice', 'Bob'], phonebook)
+ ('555-1234', '555-5678')
+
+ Provide a default for missing values
+
+ >>> get(['Alice', 'Dennis'], phonebook, None)
+ ('555-1234', None)
+
+ See Also:
+ pluck
+ """
+ try:
+ return seq[ind]
+ except TypeError: # `ind` may be a list
+ if isinstance(ind, list):
+ if default == no_default:
+ if len(ind) > 1:
+ return operator.itemgetter(*ind)(seq)
+ elif ind:
+ return seq[ind[0]],
+ else:
+ return ()
+ else:
+ return tuple(_get(i, seq, default) for i in ind)
+ elif default != no_default:
+ return default
+ else:
+ raise
+ except (KeyError, IndexError): # we know `ind` is not a list
+ if default == no_default:
+ raise
+ else:
+ return default
+
+
+def concat(seqs):
+ """ Concatenate zero or more iterables, any of which may be infinite.
+
+ An infinite sequence will prevent the rest of the arguments from
+ being included.
+
+ We use chain.from_iterable rather than ``chain(*seqs)`` so that seqs
+ can be a generator.
+
+ >>> list(concat([[], [1], [2, 3]]))
+ [1, 2, 3]
+
+ See also:
+ itertools.chain.from_iterable equivalent
+ """
+ return itertools.chain.from_iterable(seqs)
+
+
+def concatv(*seqs):
+ """ Variadic version of concat
+
+ >>> list(concatv([], ["a"], ["b", "c"]))
+ ['a', 'b', 'c']
+
+ See also:
+ itertools.chain
+ """
+ return concat(seqs)
+
+
+def mapcat(func, seqs):
+ """ Apply func to each sequence in seqs, concatenating results.
+
+ >>> list(mapcat(lambda s: [c.upper() for c in s],
+ ... [["a", "b"], ["c", "d", "e"]]))
+ ['A', 'B', 'C', 'D', 'E']
+ """
+ return concat(map(func, seqs))
+
+
+def cons(el, seq):
+ """ Add el to beginning of (possibly infinite) sequence seq.
+
+ >>> list(cons(1, [2, 3]))
+ [1, 2, 3]
+ """
+ return itertools.chain([el], seq)
+
+
+def interpose(el, seq):
+ """ Introduce element between each pair of elements in seq
+
+ >>> list(interpose("a", [1, 2, 3]))
+ [1, 'a', 2, 'a', 3]
+ """
+ inposed = concat(zip(itertools.repeat(el), seq))
+ next(inposed)
+ return inposed
+
+
+def frequencies(seq):
+ """ Find number of occurrences of each value in seq
+
+ >>> frequencies(['cat', 'cat', 'ox', 'pig', 'pig', 'cat']) #doctest: +SKIP
+ {'cat': 3, 'ox': 1, 'pig': 2}
+
+ See Also:
+ countby
+ groupby
+ """
+ d = collections.defaultdict(int)
+ for item in seq:
+ d[item] += 1
+ return dict(d)
+
+
+def reduceby(key, binop, seq, init=no_default):
+ """ Perform a simultaneous groupby and reduction
+
+ The computation:
+
+ >>> result = reduceby(key, binop, seq, init) # doctest: +SKIP
+
+ is equivalent to the following:
+
+ >>> def reduction(group): # doctest: +SKIP
+ ... return reduce(binop, group, init) # doctest: +SKIP
+
+ >>> groups = groupby(key, seq) # doctest: +SKIP
+ >>> result = valmap(reduction, groups) # doctest: +SKIP
+
+ But the former does not build the intermediate groups, allowing it to
+ operate in much less space. This makes it suitable for larger datasets
+ that do not fit comfortably in memory
+
+ The ``init`` keyword argument is the default initialization of the
+ reduction. This can be either a constant value like ``0`` or a callable
+ like ``lambda : 0`` as might be used in ``defaultdict``.
+
+ Simple Examples
+ ---------------
+
+ >>> from operator import add, mul
+ >>> iseven = lambda x: x % 2 == 0
+
+ >>> data = [1, 2, 3, 4, 5]
+
+ >>> reduceby(iseven, add, data) # doctest: +SKIP
+ {False: 9, True: 6}
+
+ >>> reduceby(iseven, mul, data) # doctest: +SKIP
+ {False: 15, True: 8}
+
+ Complex Example
+ ---------------
+
+ >>> projects = [{'name': 'build roads', 'state': 'CA', 'cost': 1000000},
+ ... {'name': 'fight crime', 'state': 'IL', 'cost': 100000},
+ ... {'name': 'help farmers', 'state': 'IL', 'cost': 2000000},
+ ... {'name': 'help farmers', 'state': 'CA', 'cost': 200000}]
+
+ >>> reduceby('state', # doctest: +SKIP
+ ... lambda acc, x: acc + x['cost'],
+ ... projects, 0)
+ {'CA': 1200000, 'IL': 2100000}
+
+ Example Using ``init``
+ ----------------------
+
+ >>> def set_add(s, i):
+ ... s.add(i)
+ ... return s
+
+ >>> reduceby(iseven, set_add, [1, 2, 3, 4, 1, 2, 3], set) # doctest: +SKIP
+ {True: set([2, 4]),
+ False: set([1, 3])}
+ """
+ is_no_default = init == no_default
+ if not is_no_default and not callable(init):
+ _init = init
+ init = lambda: _init
+ if not callable(key):
+ key = getter(key)
+ d = {}
+ for item in seq:
+ k = key(item)
+ if k not in d:
+ if is_no_default:
+ d[k] = item
+ continue
+ else:
+ d[k] = init()
+ d[k] = binop(d[k], item)
+ return d
+
+
+def iterate(func, x):
+ """ Repeatedly apply a function func onto an original input
+
+ Yields x, then func(x), then func(func(x)), then func(func(func(x))), etc..
+
+ >>> def inc(x): return x + 1
+ >>> counter = iterate(inc, 0)
+ >>> next(counter)
+ 0
+ >>> next(counter)
+ 1
+ >>> next(counter)
+ 2
+
+ >>> double = lambda x: x * 2
+ >>> powers_of_two = iterate(double, 1)
+ >>> next(powers_of_two)
+ 1
+ >>> next(powers_of_two)
+ 2
+ >>> next(powers_of_two)
+ 4
+ >>> next(powers_of_two)
+ 8
+ """
+ while True:
+ yield x
+ x = func(x)
+
+
+def sliding_window(n, seq):
+ """ A sequence of overlapping subsequences
+
+ >>> list(sliding_window(2, [1, 2, 3, 4]))
+ [(1, 2), (2, 3), (3, 4)]
+
+ This function creates a sliding window suitable for transformations like
+ sliding means / smoothing
+
+ >>> mean = lambda seq: float(sum(seq)) / len(seq)
+ >>> list(map(mean, sliding_window(2, [1, 2, 3, 4])))
+ [1.5, 2.5, 3.5]
+ """
+ return zip(*(collections.deque(itertools.islice(it, i), 0) or it
+ for i, it in enumerate(itertools.tee(seq, n))))
+
+
+no_pad = '__no__pad__'
+
+
+def partition(n, seq, pad=no_pad):
+ """ Partition sequence into tuples of length n
+
+ >>> list(partition(2, [1, 2, 3, 4]))
+ [(1, 2), (3, 4)]
+
+ If the length of ``seq`` is not evenly divisible by ``n``, the final tuple
+ is dropped if ``pad`` is not specified, or filled to length ``n`` by pad:
+
+ >>> list(partition(2, [1, 2, 3, 4, 5]))
+ [(1, 2), (3, 4)]
+
+ >>> list(partition(2, [1, 2, 3, 4, 5], pad=None))
+ [(1, 2), (3, 4), (5, None)]
+
+ See Also:
+ partition_all
+ """
+ args = [iter(seq)] * n
+ if pad is no_pad:
+ return zip(*args)
+ else:
+ return zip_longest(*args, fillvalue=pad)
+
+
+def partition_all(n, seq):
+ """ Partition all elements of sequence into tuples of length at most n
+
+ The final tuple may be shorter to accommodate extra elements.
+
+ >>> list(partition_all(2, [1, 2, 3, 4]))
+ [(1, 2), (3, 4)]
+
+ >>> list(partition_all(2, [1, 2, 3, 4, 5]))
+ [(1, 2), (3, 4), (5,)]
+
+ See Also:
+ partition
+ """
+ args = [iter(seq)] * n
+ it = zip_longest(*args, fillvalue=no_pad)
+ try:
+ prev = next(it)
+ except StopIteration:
+ return
+ for item in it:
+ yield prev
+ prev = item
+ if prev[-1] is no_pad:
+ try:
+ # If seq defines __len__, then
+ # we can quickly calculate where no_pad starts
+ yield prev[:len(seq) % n]
+ except TypeError:
+ # Get first index of no_pad without using .index()
+ # https://github.com/pytoolz/toolz/issues/387
+ # Binary search from CPython's bisect module,
+ # modified for identity testing.
+ lo, hi = 0, n
+ while lo < hi:
+ mid = (lo + hi) // 2
+ if prev[mid] is no_pad:
+ hi = mid
+ else:
+ lo = mid + 1
+ yield prev[:lo]
+ else:
+ yield prev
+
+
+def count(seq):
+ """ Count the number of items in seq
+
+ Like the builtin ``len`` but works on lazy sequences.
+
+ Not to be confused with ``itertools.count``
+
+ See also:
+ len
+ """
+ if hasattr(seq, '__len__'):
+ return len(seq)
+ return sum(1 for i in seq)
+
+
+def pluck(ind, seqs, default=no_default):
+ """ plucks an element or several elements from each item in a sequence.
+
+ ``pluck`` maps ``itertoolz.get`` over a sequence and returns one or more
+ elements of each item in the sequence.
+
+ This is equivalent to running `map(curried.get(ind), seqs)`
+
+ ``ind`` can be either a single string/index or a list of strings/indices.
+ ``seqs`` should be sequence containing sequences or dicts.
+
+ e.g.
+
+ >>> data = [{'id': 1, 'name': 'Cheese'}, {'id': 2, 'name': 'Pies'}]
+ >>> list(pluck('name', data))
+ ['Cheese', 'Pies']
+ >>> list(pluck([0, 1], [[1, 2, 3], [4, 5, 7]]))
+ [(1, 2), (4, 5)]
+
+ See Also:
+ get
+ map
+ """
+ if default == no_default:
+ get = getter(ind)
+ return map(get, seqs)
+ elif isinstance(ind, list):
+ return (tuple(_get(item, seq, default) for item in ind)
+ for seq in seqs)
+ return (_get(ind, seq, default) for seq in seqs)
+
+
+def getter(index):
+ if isinstance(index, list):
+ if len(index) == 1:
+ index = index[0]
+ return lambda x: (x[index],)
+ elif index:
+ return operator.itemgetter(*index)
+ else:
+ return lambda x: ()
+ else:
+ return operator.itemgetter(index)
+
+
+def join(leftkey, leftseq, rightkey, rightseq,
+ left_default=no_default, right_default=no_default):
+ """ Join two sequences on common attributes
+
+ This is a semi-streaming operation. The LEFT sequence is fully evaluated
+ and placed into memory. The RIGHT sequence is evaluated lazily and so can
+ be arbitrarily large.
+ (Note: If right_default is defined, then unique keys of rightseq
+ will also be stored in memory.)
+
+ >>> friends = [('Alice', 'Edith'),
+ ... ('Alice', 'Zhao'),
+ ... ('Edith', 'Alice'),
+ ... ('Zhao', 'Alice'),
+ ... ('Zhao', 'Edith')]
+
+ >>> cities = [('Alice', 'NYC'),
+ ... ('Alice', 'Chicago'),
+ ... ('Dan', 'Syndey'),
+ ... ('Edith', 'Paris'),
+ ... ('Edith', 'Berlin'),
+ ... ('Zhao', 'Shanghai')]
+
+ >>> # Vacation opportunities
+ >>> # In what cities do people have friends?
+ >>> result = join(second, friends,
+ ... first, cities)
+ >>> for ((a, b), (c, d)) in sorted(unique(result)):
+ ... print((a, d))
+ ('Alice', 'Berlin')
+ ('Alice', 'Paris')
+ ('Alice', 'Shanghai')
+ ('Edith', 'Chicago')
+ ('Edith', 'NYC')
+ ('Zhao', 'Chicago')
+ ('Zhao', 'NYC')
+ ('Zhao', 'Berlin')
+ ('Zhao', 'Paris')
+
+ Specify outer joins with keyword arguments ``left_default`` and/or
+ ``right_default``. Here is a full outer join in which unmatched elements
+ are paired with None.
+
+ >>> identity = lambda x: x
+ >>> list(join(identity, [1, 2, 3],
+ ... identity, [2, 3, 4],
+ ... left_default=None, right_default=None))
+ [(2, 2), (3, 3), (None, 4), (1, None)]
+
+ Usually the key arguments are callables to be applied to the sequences. If
+ the keys are not obviously callable then it is assumed that indexing was
+ intended, e.g. the following is a legal change.
+ The join is implemented as a hash join and the keys of leftseq must be
+ hashable. Additionally, if right_default is defined, then keys of rightseq
+ must also be hashable.
+
+ >>> # result = join(second, friends, first, cities)
+ >>> result = join(1, friends, 0, cities) # doctest: +SKIP
+ """
+ if not callable(leftkey):
+ leftkey = getter(leftkey)
+ if not callable(rightkey):
+ rightkey = getter(rightkey)
+
+ d = groupby(leftkey, leftseq)
+
+ if left_default == no_default and right_default == no_default:
+ # Inner Join
+ for item in rightseq:
+ key = rightkey(item)
+ if key in d:
+ for left_match in d[key]:
+ yield (left_match, item)
+ elif left_default != no_default and right_default == no_default:
+ # Right Join
+ for item in rightseq:
+ key = rightkey(item)
+ if key in d:
+ for left_match in d[key]:
+ yield (left_match, item)
+ else:
+ yield (left_default, item)
+ elif right_default != no_default:
+ seen_keys = set()
+ seen = seen_keys.add
+
+ if left_default == no_default:
+ # Left Join
+ for item in rightseq:
+ key = rightkey(item)
+ seen(key)
+ if key in d:
+ for left_match in d[key]:
+ yield (left_match, item)
+ else:
+ # Full Join
+ for item in rightseq:
+ key = rightkey(item)
+ seen(key)
+ if key in d:
+ for left_match in d[key]:
+ yield (left_match, item)
+ else:
+ yield (left_default, item)
+
+ for key, matches in d.items():
+ if key not in seen_keys:
+ for match in matches:
+ yield (match, right_default)
+
+
+def diff(*seqs, **kwargs):
+ """ Return those items that differ between sequences
+
+ >>> list(diff([1, 2, 3], [1, 2, 10, 100]))
+ [(3, 10)]
+
+ Shorter sequences may be padded with a ``default`` value:
+
+ >>> list(diff([1, 2, 3], [1, 2, 10, 100], default=None))
+ [(3, 10), (None, 100)]
+
+ A ``key`` function may also be applied to each item to use during
+ comparisons:
+
+ >>> list(diff(['apples', 'bananas'], ['Apples', 'Oranges'], key=str.lower))
+ [('bananas', 'Oranges')]
+ """
+ N = len(seqs)
+ if N == 1 and isinstance(seqs[0], list):
+ seqs = seqs[0]
+ N = len(seqs)
+ if N < 2:
+ raise TypeError('Too few sequences given (min 2 required)')
+ default = kwargs.get('default', no_default)
+ if default == no_default:
+ iters = zip(*seqs)
+ else:
+ iters = zip_longest(*seqs, fillvalue=default)
+ key = kwargs.get('key', None)
+ if key is None:
+ for items in iters:
+ if items.count(items[0]) != N:
+ yield items
+ else:
+ for items in iters:
+ vals = tuple(map(key, items))
+ if vals.count(vals[0]) != N:
+ yield items
+
+
+def topk(k, seq, key=None):
+ """ Find the k largest elements of a sequence
+
+ Operates lazily in ``n*log(k)`` time
+
+ >>> topk(2, [1, 100, 10, 1000])
+ (1000, 100)
+
+ Use a key function to change sorted order
+
+ >>> topk(2, ['Alice', 'Bob', 'Charlie', 'Dan'], key=len)
+ ('Charlie', 'Alice')
+
+ See also:
+ heapq.nlargest
+ """
+ if key is not None and not callable(key):
+ key = getter(key)
+ return tuple(heapq.nlargest(k, seq, key=key))
+
+
+def peek(seq):
+ """ Retrieve the next element of a sequence
+
+ Returns the first element and an iterable equivalent to the original
+ sequence, still having the element retrieved.
+
+ >>> seq = [0, 1, 2, 3, 4]
+ >>> first, seq = peek(seq)
+ >>> first
+ 0
+ >>> list(seq)
+ [0, 1, 2, 3, 4]
+ """
+ iterator = iter(seq)
+ item = next(iterator)
+ return item, itertools.chain((item,), iterator)
+
+
+def peekn(n, seq):
+ """ Retrieve the next n elements of a sequence
+
+ Returns a tuple of the first n elements and an iterable equivalent
+ to the original, still having the elements retrieved.
+
+ >>> seq = [0, 1, 2, 3, 4]
+ >>> first_two, seq = peekn(2, seq)
+ >>> first_two
+ (0, 1)
+ >>> list(seq)
+ [0, 1, 2, 3, 4]
+ """
+ iterator = iter(seq)
+ peeked = tuple(take(n, iterator))
+ return peeked, itertools.chain(iter(peeked), iterator)
+
+
+def random_sample(prob, seq, random_state=None):
+ """ Return elements from a sequence with probability of prob
+
+ Returns a lazy iterator of random items from seq.
+
+ ``random_sample`` considers each item independently and without
+ replacement. See below how the first time it returned 13 items and the
+ next time it returned 6 items.
+
+ >>> seq = list(range(100))
+ >>> list(random_sample(0.1, seq)) # doctest: +SKIP
+ [6, 9, 19, 35, 45, 50, 58, 62, 68, 72, 78, 86, 95]
+ >>> list(random_sample(0.1, seq)) # doctest: +SKIP
+ [6, 44, 54, 61, 69, 94]
+
+ Providing an integer seed for ``random_state`` will result in
+ deterministic sampling. Given the same seed it will return the same sample
+ every time.
+
+ >>> list(random_sample(0.1, seq, random_state=2016))
+ [7, 9, 19, 25, 30, 32, 34, 48, 59, 60, 81, 98]
+ >>> list(random_sample(0.1, seq, random_state=2016))
+ [7, 9, 19, 25, 30, 32, 34, 48, 59, 60, 81, 98]
+
+ ``random_state`` can also be any object with a method ``random`` that
+ returns floats between 0.0 and 1.0 (exclusive).
+
+ >>> from random import Random
+ >>> randobj = Random(2016)
+ >>> list(random_sample(0.1, seq, random_state=randobj))
+ [7, 9, 19, 25, 30, 32, 34, 48, 59, 60, 81, 98]
+ """
+ if not hasattr(random_state, 'random'):
+ from random import Random
+
+ random_state = Random(random_state)
+ return filter(lambda _: random_state.random() < prob, seq)