mirror of https://github.com/mahmoud/boltons.git
split and split_iter work
This commit is contained in:
parent
2c4595e327
commit
0ee86d9827
97
iterutils.py
97
iterutils.py
|
@ -1,47 +1,86 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
|
||||||
|
def is_iterable(obj):
|
||||||
|
return callable(getattr(obj, '__iter__', None))
|
||||||
|
|
||||||
|
|
||||||
def is_scalar(obj):
|
def is_scalar(obj):
|
||||||
return not hasattr(obj, '__iter__') or isinstance(obj, basestring)
|
return not is_iterable(obj) or isinstance(obj, basestring)
|
||||||
|
|
||||||
|
|
||||||
def split(src, key=None, maxsplit=None):
|
def split(src, sep=None, maxsplit=None):
|
||||||
"""
|
"""
|
||||||
Splits an iterable based on a separator, iterable of separators, or
|
Splits an iterable based on a separator, see split_iter
|
||||||
function that evaluates to True when a separator is encountered.
|
docs below for more info.
|
||||||
|
|
||||||
Frankly, this feature should be part of the list builtin.
|
|
||||||
|
|
||||||
TODO: This works with iterators but could itself be an generator.
|
|
||||||
"""
|
"""
|
||||||
|
return list(split_iter(src, sep, maxsplit))
|
||||||
|
|
||||||
|
|
||||||
|
def split_iter(src, sep=None, maxsplit=None):
|
||||||
|
"""
|
||||||
|
Splits an iterable based on a separator, 'sep'. 'sep' can be a
|
||||||
|
single value, an iterable of separators, or a single-argument
|
||||||
|
callable that returns True when a separator is encountered.
|
||||||
|
|
||||||
|
split_iter yields lists of non-separator values. A separator will
|
||||||
|
never appear in the output.
|
||||||
|
|
||||||
|
Note that split_iter is based on str.split(), so if sep is None,
|
||||||
|
str.split() "groups" separators check the str.split() docs
|
||||||
|
for more info.
|
||||||
|
|
||||||
|
>>> list(split_iter(['hi', 'hello', None, None, 'sup', None, 'soap', None])
|
||||||
|
[['hi', 'hello'], ['sup'], ['soap']]
|
||||||
|
|
||||||
|
>>> falsy_sep = lambda x: not x
|
||||||
|
>>> list(split_iter(['hi', 'hello', None, '', 'sup', False], falsy_sep))
|
||||||
|
[['hi', 'hello'], [], ['sup'], []]
|
||||||
|
"""
|
||||||
|
if not is_iterable(src):
|
||||||
|
raise TypeError('expected an iterable')
|
||||||
|
|
||||||
if maxsplit is not None:
|
if maxsplit is not None:
|
||||||
maxsplit = int(maxsplit)
|
maxsplit = int(maxsplit)
|
||||||
if maxsplit == 0:
|
if maxsplit == 0:
|
||||||
return [src]
|
yield [src]
|
||||||
|
return
|
||||||
|
|
||||||
if callable(key):
|
if callable(sep):
|
||||||
key_func = key
|
sep_func = sep
|
||||||
elif not is_scalar(key):
|
elif not is_scalar(sep):
|
||||||
key = set(key)
|
sep = frozenset(sep)
|
||||||
key_func = lambda x: x in key
|
sep_func = lambda x: x in sep
|
||||||
else:
|
else:
|
||||||
key_func = lambda x: x == key
|
sep_func = lambda x: x == sep
|
||||||
|
|
||||||
ret = []
|
cur_group = []
|
||||||
cur_list = []
|
split_count = 0
|
||||||
for s in src:
|
for s in src:
|
||||||
if key_func(s):
|
if maxsplit is not None and split_count >= maxsplit:
|
||||||
ret.append(cur_list)
|
sep_func = lambda x: False
|
||||||
cur_list = []
|
if sep_func(s):
|
||||||
if maxsplit is not None and len(ret) >= maxsplit:
|
if sep is None and not cur_group:
|
||||||
key_func = lambda x: False
|
|
||||||
else:
|
|
||||||
cur_list.append(s)
|
|
||||||
ret.append(cur_list)
|
|
||||||
|
|
||||||
if key is None:
|
|
||||||
# If sep is none, str.split() "groups" separators
|
# If sep is none, str.split() "groups" separators
|
||||||
# check the str.split() docs for more info
|
# check the str.split() docs for more info
|
||||||
return [x for x in ret if x]
|
continue
|
||||||
|
split_count += 1
|
||||||
|
yield cur_group
|
||||||
|
cur_group = []
|
||||||
else:
|
else:
|
||||||
return ret
|
cur_group.append(s)
|
||||||
|
|
||||||
|
if cur_group or sep is not None:
|
||||||
|
yield cur_group
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
vals = ['hi', 'hello', None, None, 'sup', None, 'soap', None]
|
||||||
|
falsy_sep = lambda x: not x
|
||||||
|
print list(split(vals, falsy_sep))
|
||||||
|
print list(split(vals, [None]))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
|
|
Loading…
Reference in New Issue