mirror of https://github.com/mahmoud/boltons.git
iterutils refresh. argname change: keyfunc -> key. docs reviewed and updated.
This commit is contained in:
parent
4e35cb8fbb
commit
cc364279da
|
@ -1,6 +1,5 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
"""\
|
""":mod:`itertools` is full of great examples of Python generator
|
||||||
:mod:`itertools` is full of great examples of Python generator
|
|
||||||
usage. However, there are still some critical gaps. ``iterutils``
|
usage. However, there are still some critical gaps. ``iterutils``
|
||||||
fills many of those gaps with featureful, tested, and Pythonic
|
fills many of those gaps with featureful, tested, and Pythonic
|
||||||
solutions.
|
solutions.
|
||||||
|
@ -12,12 +11,7 @@ following are based on examples in itertools docs.
|
||||||
"""
|
"""
|
||||||
import itertools
|
import itertools
|
||||||
|
|
||||||
from compat import basestring
|
from compat import basestring # TODO
|
||||||
|
|
||||||
|
|
||||||
__all__ = ['is_iterable', 'is_scalar', 'split', 'split_iter',
|
|
||||||
'chunked', 'chunked_iter', 'windowed', 'windowed_iter',
|
|
||||||
'bucketize', 'partition', 'unique', 'unique_iter']
|
|
||||||
|
|
||||||
|
|
||||||
def is_iterable(obj):
|
def is_iterable(obj):
|
||||||
|
@ -39,7 +33,7 @@ def is_iterable(obj):
|
||||||
|
|
||||||
def is_scalar(obj):
|
def is_scalar(obj):
|
||||||
"""\
|
"""\
|
||||||
A near-mirror of :func:`is_iterable`, returns ``False`` if an
|
A near-mirror of :func:`is_iterable`. Returns ``False`` if an
|
||||||
object is an iterable container type. Strings are considered
|
object is an iterable container type. Strings are considered
|
||||||
scalar as well, because strings are more often treated as whole
|
scalar as well, because strings are more often treated as whole
|
||||||
values as opposed to iterables of 1-character substrings.
|
values as opposed to iterables of 1-character substrings.
|
||||||
|
@ -56,25 +50,25 @@ def is_scalar(obj):
|
||||||
|
|
||||||
def split(src, sep=None, maxsplit=None):
|
def split(src, sep=None, maxsplit=None):
|
||||||
"""\
|
"""\
|
||||||
Splits an iterable based on a separator, like :func:`str.split`,
|
Splits an iterable based on a separator. Like :meth:`str.split`,
|
||||||
but for all iterables. Returns a list of lists.
|
but for all iterables. Returns a list of lists.
|
||||||
|
|
||||||
>>> split(['hi', 'hello', None, None, 'sup', None, 'soap', None])
|
>>> split(['hi', 'hello', None, None, 'sup', None, 'soap', None])
|
||||||
[['hi', 'hello'], ['sup'], ['soap']]
|
[['hi', 'hello'], ['sup'], ['soap']]
|
||||||
|
|
||||||
See :func:`split_iter` docs below for more info.
|
See :func:`split_iter` docs for more info.
|
||||||
"""
|
"""
|
||||||
return list(split_iter(src, sep, maxsplit))
|
return list(split_iter(src, sep, maxsplit))
|
||||||
|
|
||||||
|
|
||||||
def split_iter(src, sep=None, maxsplit=None):
|
def split_iter(src, sep=None, maxsplit=None):
|
||||||
"""\
|
"""Splits an iterable based on a separator, *sep*, a max of
|
||||||
Splits an iterable based on a separator, ``sep``. ``sep`` can be:
|
*maxsplit* times (no max by default). *sep* can be:
|
||||||
|
|
||||||
* a single value
|
* a single value
|
||||||
* an iterable of separators
|
* an iterable of separators
|
||||||
* a single-argument callable that returns True when a separator is
|
* a single-argument callable that returns True when a separator is
|
||||||
encountered
|
encountered
|
||||||
|
|
||||||
``split_iter()`` yields lists of non-separator values. A separator will
|
``split_iter()`` yields lists of non-separator values. A separator will
|
||||||
never appear in the output.
|
never appear in the output.
|
||||||
|
@ -83,7 +77,7 @@ def split_iter(src, sep=None, maxsplit=None):
|
||||||
[['hi', 'hello'], ['sup'], ['soap']]
|
[['hi', 'hello'], ['sup'], ['soap']]
|
||||||
|
|
||||||
Note that ``split_iter`` is based on :func:`str.split`, so if
|
Note that ``split_iter`` is based on :func:`str.split`, so if
|
||||||
``sep`` is None, ``split()`` "groups" separators. If empty lists
|
*sep* is ``None``, ``split()`` **groups** separators. If empty lists
|
||||||
are desired between two contiguous ``None`` values, simply use
|
are desired between two contiguous ``None`` values, simply use
|
||||||
``sep=[None]``:
|
``sep=[None]``:
|
||||||
|
|
||||||
|
@ -99,6 +93,7 @@ def split_iter(src, sep=None, maxsplit=None):
|
||||||
[['hi', 'hello'], [], ['sup'], []]
|
[['hi', 'hello'], [], ['sup'], []]
|
||||||
|
|
||||||
See :func:`split` for a list-returning version.
|
See :func:`split` for a list-returning version.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
if not is_iterable(src):
|
if not is_iterable(src):
|
||||||
raise TypeError('expected an iterable')
|
raise TypeError('expected an iterable')
|
||||||
|
@ -139,13 +134,11 @@ def split_iter(src, sep=None, maxsplit=None):
|
||||||
|
|
||||||
|
|
||||||
def chunked(src, size, count=None, **kw):
|
def chunked(src, size, count=None, **kw):
|
||||||
"""\
|
"""Returns a list of *count* chunks, each with *size* elements,
|
||||||
Returns a list of ``count`` chunks, each with ``size`` elements,
|
generated from iterable *src*. If *src* is not evenly divisible by
|
||||||
generated from iterable ``src``. If ``src`` is not evenly
|
*size*, the final chunk will have fewer than *size* elements.
|
||||||
divisible by ``size``, the final chunk will have fewer than
|
Provide the *fill* keyword argument to provide a pad value and
|
||||||
``size`` elements. Provide the ``fill`` keyword argument to turn
|
enable padding, otherwise no padding will take place.
|
||||||
on padding and provide a pad value, otherwise no padding will take
|
|
||||||
place.
|
|
||||||
|
|
||||||
>>> chunked(range(10), 3)
|
>>> chunked(range(10), 3)
|
||||||
[[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]]
|
[[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]]
|
||||||
|
@ -168,19 +161,19 @@ def chunked(src, size, count=None, **kw):
|
||||||
|
|
||||||
|
|
||||||
def chunked_iter(src, size, **kw):
|
def chunked_iter(src, size, **kw):
|
||||||
"""
|
"""Generates *size*-sized chunks from *src* iterable. Unless the
|
||||||
Generates 'size'-sized chunks from 'src' iterable. Unless
|
optional *fill* keyword argument is provided, iterables not even
|
||||||
the optional 'fill' keyword argument is provided, iterables
|
divisible by *size* will have a final chunk that is smaller than
|
||||||
not even divisible by 'size' will have a final chunk that is
|
*size*.
|
||||||
smaller than 'size'.
|
|
||||||
|
|
||||||
Note that ``fill=None`` will in fact use ``None`` as the fill value.
|
|
||||||
|
|
||||||
>>> list(chunked_iter(range(10), 3))
|
>>> list(chunked_iter(range(10), 3))
|
||||||
[[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]]
|
[[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]]
|
||||||
>>> list(chunked_iter(range(10), 3, fill=None))
|
>>> list(chunked_iter(range(10), 3, fill=None))
|
||||||
[[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, None, None]]
|
[[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, None, None]]
|
||||||
|
|
||||||
|
Note that ``fill=None`` in fact uses ``None`` as the fill value.
|
||||||
"""
|
"""
|
||||||
|
# TODO: add count kwarg?
|
||||||
if not is_iterable(src):
|
if not is_iterable(src):
|
||||||
raise TypeError('expected an iterable')
|
raise TypeError('expected an iterable')
|
||||||
size = int(size)
|
size = int(size)
|
||||||
|
@ -216,23 +209,21 @@ def chunked_iter(src, size, **kw):
|
||||||
|
|
||||||
|
|
||||||
def windowed(src, size):
|
def windowed(src, size):
|
||||||
"""\
|
"""Returns tuples with exactly length *size*. If the iterable is
|
||||||
Returns tuples with exactly length ``size``. If the iterable is
|
too short to make a window of length *size*, no tuples are
|
||||||
too short to make a window of length ``size``, no tuples are
|
|
||||||
returned. See :func:`windowed_iter` for more.
|
returned. See :func:`windowed_iter` for more.
|
||||||
"""
|
"""
|
||||||
return list(windowed_iter(src, size))
|
return list(windowed_iter(src, size))
|
||||||
|
|
||||||
|
|
||||||
def windowed_iter(src, size):
|
def windowed_iter(src, size):
|
||||||
"""\
|
"""Returns tuples with length *size* which represent a sliding
|
||||||
Returns tuples with length `size` which represent a sliding
|
window over iterable *src*.
|
||||||
window over iterable `src`.
|
|
||||||
|
|
||||||
>>> list(windowed_iter(range(7), 3))
|
>>> list(windowed_iter(range(7), 3))
|
||||||
[(0, 1, 2), (1, 2, 3), (2, 3, 4), (3, 4, 5), (4, 5, 6)]
|
[(0, 1, 2), (1, 2, 3), (2, 3, 4), (3, 4, 5), (4, 5, 6)]
|
||||||
|
|
||||||
If the iterable is too short to make a window of length `size`,
|
If the iterable is too short to make a window of length *size*,
|
||||||
then no window tuples are returned.
|
then no window tuples are returned.
|
||||||
|
|
||||||
>>> list(windowed_iter(range(3), 5))
|
>>> list(windowed_iter(range(3), 5))
|
||||||
|
@ -249,13 +240,10 @@ def windowed_iter(src, size):
|
||||||
return itertools.izip(*tees)
|
return itertools.izip(*tees)
|
||||||
|
|
||||||
|
|
||||||
def bucketize(src, keyfunc=None):
|
def bucketize(src, key=None):
|
||||||
"""\
|
"""Group values in the *src* iterable by the value returned by *key*,
|
||||||
Group values in the ``src`` iterable by value returned by
|
which defaults to :class:`bool`, grouping values by
|
||||||
``keyfunc``. keyfunc defaults to :class:`bool`, which will group
|
truthiness.
|
||||||
the values by truthiness. This means there will be at most two
|
|
||||||
keys, ``True`` and ``False``, and each key present will have a
|
|
||||||
list with at least one item.
|
|
||||||
|
|
||||||
>>> bucketize(range(5))
|
>>> bucketize(range(5))
|
||||||
{False: [0], True: [1, 2, 3, 4]}
|
{False: [0], True: [1, 2, 3, 4]}
|
||||||
|
@ -268,27 +256,27 @@ def bucketize(src, keyfunc=None):
|
||||||
>>> bucketize([None, None, None, 'hello'])
|
>>> bucketize([None, None, None, 'hello'])
|
||||||
{False: [None, None, None], True: ['hello']}
|
{False: [None, None, None], True: ['hello']}
|
||||||
|
|
||||||
See :func:`partition` for a version specialized for binary use
|
Note in these examples there were at most two keys, ``True`` and
|
||||||
cases.
|
``False``, and each key present has a list with at least one
|
||||||
|
item. See :func:`partition` for a version specialized for binary
|
||||||
|
use cases.
|
||||||
"""
|
"""
|
||||||
if not is_iterable(src):
|
if not is_iterable(src):
|
||||||
raise TypeError('expected an iterable')
|
raise TypeError('expected an iterable')
|
||||||
if keyfunc is None:
|
if key is None:
|
||||||
keyfunc = bool
|
key = bool
|
||||||
if not callable(keyfunc):
|
if not callable(key):
|
||||||
raise TypeError('expected callable key function')
|
raise TypeError('expected callable key function')
|
||||||
|
|
||||||
ret = {}
|
ret = {}
|
||||||
for val in src:
|
for val in src:
|
||||||
key = keyfunc(val)
|
keyval = key(val)
|
||||||
ret.setdefault(key, []).append(val)
|
ret.setdefault(keyval, []).append(val)
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
|
||||||
def partition(src, keyfunc=None):
|
def partition(src, key=None):
|
||||||
"""\
|
"""No relation to :meth:`str.partition`, ``partition`` is like
|
||||||
No relation to :meth:`str.partition`, ``partition`` is like
|
|
||||||
:func:`bucketize`, but for added convenience returns a tuple of
|
:func:`bucketize`, but for added convenience returns a tuple of
|
||||||
``(truthy_values, falsy_values)``.
|
``(truthy_values, falsy_values)``.
|
||||||
|
|
||||||
|
@ -296,7 +284,7 @@ def partition(src, keyfunc=None):
|
||||||
>>> nonempty
|
>>> nonempty
|
||||||
['hi', 'bye']
|
['hi', 'bye']
|
||||||
|
|
||||||
``keyfunc`` defaults to :class:`bool`, but can be carefully overridden to
|
*key* defaults to :class:`bool`, but can be carefully overridden to
|
||||||
use any function that returns either ``True`` or ``False``.
|
use any function that returns either ``True`` or ``False``.
|
||||||
|
|
||||||
>>> import string
|
>>> import string
|
||||||
|
@ -305,38 +293,35 @@ def partition(src, keyfunc=None):
|
||||||
>>> ''.join(decimal_digits), ''.join(hexletters)
|
>>> ''.join(decimal_digits), ''.join(hexletters)
|
||||||
('0123456789', 'abcdefABCDEF')
|
('0123456789', 'abcdefABCDEF')
|
||||||
"""
|
"""
|
||||||
bucketized = bucketize(src, keyfunc)
|
bucketized = bucketize(src, key)
|
||||||
return bucketized.get(True, []), bucketized.get(False, [])
|
return bucketized.get(True, []), bucketized.get(False, [])
|
||||||
|
|
||||||
|
|
||||||
def unique(src, key=None):
|
def unique(src, key=None):
|
||||||
"""\
|
"""``unique()`` returns a list of unique values, as determined by
|
||||||
``unique()`` returns a list of unique values, as determined by
|
*key*, in the order they first appeared in the input iterable,
|
||||||
``key``, in the order they first appeared in the input iterable,
|
*src*.
|
||||||
``src``.
|
|
||||||
|
|
||||||
>>> ones_n_zeros = '11010110001010010101010'
|
>>> ones_n_zeros = '11010110001010010101010'
|
||||||
>>> ''.join(unique(ones_n_zeros))
|
>>> ''.join(unique(ones_n_zeros))
|
||||||
'10'
|
'10'
|
||||||
|
|
||||||
See :func:`unique_iter` docs for more info.
|
See :func:`unique_iter` docs for more details.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
return list(unique_iter(src, key))
|
return list(unique_iter(src, key))
|
||||||
|
|
||||||
|
|
||||||
def unique_iter(src, key=None):
|
def unique_iter(src, key=None):
|
||||||
"""\
|
"""Yield unique elements from the iterable, *src*, based on *key*,
|
||||||
Yield unique elements from the iterable ``src``, based on ``key``,
|
in the order in which they first appeared in *src*.
|
||||||
in the order in which they first appeared in ``src``.
|
|
||||||
|
|
||||||
>>> repetitious = [1, 2, 3] * 10
|
>>> repetitious = [1, 2, 3] * 10
|
||||||
>>> list(unique_iter(repetitious))
|
>>> list(unique_iter(repetitious))
|
||||||
[1, 2, 3]
|
[1, 2, 3]
|
||||||
|
|
||||||
By default, ``key`` is the object itself, but ``key`` can either
|
By default, *key* is the object itself, but *key* can either be a
|
||||||
be a callable or, for convenience, a string name of the attribute
|
callable or, for convenience, a string name of the attribute on
|
||||||
on which to uniqueify objects, falling back on identity when the
|
which to uniqueify objects, falling back on identity when the
|
||||||
attribute is not present.
|
attribute is not present.
|
||||||
|
|
||||||
>>> pleasantries = ['hi', 'hello', 'ok', 'bye', 'yes']
|
>>> pleasantries = ['hi', 'hello', 'ok', 'bye', 'yes']
|
||||||
|
|
|
@ -2,5 +2,33 @@
|
||||||
==========================================
|
==========================================
|
||||||
|
|
||||||
.. automodule:: boltons.iterutils
|
.. automodule:: boltons.iterutils
|
||||||
:members:
|
|
||||||
:undoc-members:
|
Iteration
|
||||||
|
---------
|
||||||
|
|
||||||
|
These are generators and convenient :class:`list`-producing
|
||||||
|
counterparts comprising several common patterns of iteration missing
|
||||||
|
from the standard library.
|
||||||
|
|
||||||
|
.. autofunction:: split
|
||||||
|
.. autofunction:: split_iter
|
||||||
|
.. autofunction:: chunked
|
||||||
|
.. autofunction:: chunked_iter
|
||||||
|
.. autofunction:: windowed
|
||||||
|
.. autofunction:: windowed_iter
|
||||||
|
|
||||||
|
Categorization
|
||||||
|
--------------
|
||||||
|
|
||||||
|
These functions operate on iterables, dividing into groups based on a given condition.
|
||||||
|
|
||||||
|
.. autofunction:: bucketize
|
||||||
|
.. autofunction:: partition
|
||||||
|
|
||||||
|
Type Checks
|
||||||
|
-----------
|
||||||
|
|
||||||
|
In the same vein as the feature-checking builtin, :func:`callable`.
|
||||||
|
|
||||||
|
.. autofunction:: is_iterable
|
||||||
|
.. autofunction:: is_scalar
|
||||||
|
|
Loading…
Reference in New Issue