mirror of https://github.com/mahmoud/boltons.git
split and split_iter work
This commit is contained in:
parent
2c4595e327
commit
0ee86d9827
99
iterutils.py
99
iterutils.py
|
@ -1,47 +1,86 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
|
||||
def is_iterable(obj):
|
||||
return callable(getattr(obj, '__iter__', None))
|
||||
|
||||
|
||||
def is_scalar(obj):
|
||||
return not hasattr(obj, '__iter__') or isinstance(obj, basestring)
|
||||
return not is_iterable(obj) or isinstance(obj, basestring)
|
||||
|
||||
|
||||
def split(src, key=None, maxsplit=None):
|
||||
def split(src, sep=None, maxsplit=None):
|
||||
"""
|
||||
Splits an iterable based on a separator, iterable of separators, or
|
||||
function that evaluates to True when a separator is encountered.
|
||||
|
||||
Frankly, this feature should be part of the list builtin.
|
||||
|
||||
TODO: This works with iterators but could itself be an generator.
|
||||
Splits an iterable based on a separator, see split_iter
|
||||
docs below for more info.
|
||||
"""
|
||||
return list(split_iter(src, sep, maxsplit))
|
||||
|
||||
|
||||
def split_iter(src, sep=None, maxsplit=None):
|
||||
"""
|
||||
Splits an iterable based on a separator, 'sep'. 'sep' can be a
|
||||
single value, an iterable of separators, or a single-argument
|
||||
callable that returns True when a separator is encountered.
|
||||
|
||||
split_iter yields lists of non-separator values. A separator will
|
||||
never appear in the output.
|
||||
|
||||
Note that split_iter is based on str.split(), so if sep is None,
|
||||
str.split() "groups" separators check the str.split() docs
|
||||
for more info.
|
||||
|
||||
>>> list(split_iter(['hi', 'hello', None, None, 'sup', None, 'soap', None])
|
||||
[['hi', 'hello'], ['sup'], ['soap']]
|
||||
|
||||
>>> falsy_sep = lambda x: not x
|
||||
>>> list(split_iter(['hi', 'hello', None, '', 'sup', False], falsy_sep))
|
||||
[['hi', 'hello'], [], ['sup'], []]
|
||||
"""
|
||||
if not is_iterable(src):
|
||||
raise TypeError('expected an iterable')
|
||||
|
||||
if maxsplit is not None:
|
||||
maxsplit = int(maxsplit)
|
||||
if maxsplit == 0:
|
||||
return [src]
|
||||
yield [src]
|
||||
return
|
||||
|
||||
if callable(key):
|
||||
key_func = key
|
||||
elif not is_scalar(key):
|
||||
key = set(key)
|
||||
key_func = lambda x: x in key
|
||||
if callable(sep):
|
||||
sep_func = sep
|
||||
elif not is_scalar(sep):
|
||||
sep = frozenset(sep)
|
||||
sep_func = lambda x: x in sep
|
||||
else:
|
||||
key_func = lambda x: x == key
|
||||
sep_func = lambda x: x == sep
|
||||
|
||||
ret = []
|
||||
cur_list = []
|
||||
cur_group = []
|
||||
split_count = 0
|
||||
for s in src:
|
||||
if key_func(s):
|
||||
ret.append(cur_list)
|
||||
cur_list = []
|
||||
if maxsplit is not None and len(ret) >= maxsplit:
|
||||
key_func = lambda x: False
|
||||
if maxsplit is not None and split_count >= maxsplit:
|
||||
sep_func = lambda x: False
|
||||
if sep_func(s):
|
||||
if sep is None and not cur_group:
|
||||
# If sep is none, str.split() "groups" separators
|
||||
# check the str.split() docs for more info
|
||||
continue
|
||||
split_count += 1
|
||||
yield cur_group
|
||||
cur_group = []
|
||||
else:
|
||||
cur_list.append(s)
|
||||
ret.append(cur_list)
|
||||
cur_group.append(s)
|
||||
|
||||
if key is None:
|
||||
# If sep is none, str.split() "groups" separators
|
||||
# check the str.split() docs for more info
|
||||
return [x for x in ret if x]
|
||||
else:
|
||||
return ret
|
||||
if cur_group or sep is not None:
|
||||
yield cur_group
|
||||
return
|
||||
|
||||
|
||||
def main():
|
||||
vals = ['hi', 'hello', None, None, 'sup', None, 'soap', None]
|
||||
falsy_sep = lambda x: not x
|
||||
print list(split(vals, falsy_sep))
|
||||
print list(split(vals, [None]))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
|
Loading…
Reference in New Issue