split and split_iter work

This commit is contained in:
Mahmoud Hashemi 2013-02-19 20:25:28 -08:00
parent 2c4595e327
commit 0ee86d9827
1 changed files with 69 additions and 30 deletions

View File

@ -1,47 +1,86 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
def is_iterable(obj):
return callable(getattr(obj, '__iter__', None))
def is_scalar(obj): def is_scalar(obj):
return not hasattr(obj, '__iter__') or isinstance(obj, basestring) return not is_iterable(obj) or isinstance(obj, basestring)
def split(src, key=None, maxsplit=None): def split(src, sep=None, maxsplit=None):
""" """
Splits an iterable based on a separator, iterable of separators, or Splits an iterable based on a separator, see split_iter
function that evaluates to True when a separator is encountered. docs below for more info.
Frankly, this feature should be part of the list builtin.
TODO: This works with iterators but could itself be an generator.
""" """
return list(split_iter(src, sep, maxsplit))
def split_iter(src, sep=None, maxsplit=None):
"""
Splits an iterable based on a separator, 'sep'. 'sep' can be a
single value, an iterable of separators, or a single-argument
callable that returns True when a separator is encountered.
split_iter yields lists of non-separator values. A separator will
never appear in the output.
Note that split_iter is based on str.split(), so if sep is None,
str.split() "groups" separators check the str.split() docs
for more info.
>>> list(split_iter(['hi', 'hello', None, None, 'sup', None, 'soap', None])
[['hi', 'hello'], ['sup'], ['soap']]
>>> falsy_sep = lambda x: not x
>>> list(split_iter(['hi', 'hello', None, '', 'sup', False], falsy_sep))
[['hi', 'hello'], [], ['sup'], []]
"""
if not is_iterable(src):
raise TypeError('expected an iterable')
if maxsplit is not None: if maxsplit is not None:
maxsplit = int(maxsplit) maxsplit = int(maxsplit)
if maxsplit == 0: if maxsplit == 0:
return [src] yield [src]
return
if callable(key): if callable(sep):
key_func = key sep_func = sep
elif not is_scalar(key): elif not is_scalar(sep):
key = set(key) sep = frozenset(sep)
key_func = lambda x: x in key sep_func = lambda x: x in sep
else: else:
key_func = lambda x: x == key sep_func = lambda x: x == sep
ret = [] cur_group = []
cur_list = [] split_count = 0
for s in src: for s in src:
if key_func(s): if maxsplit is not None and split_count >= maxsplit:
ret.append(cur_list) sep_func = lambda x: False
cur_list = [] if sep_func(s):
if maxsplit is not None and len(ret) >= maxsplit: if sep is None and not cur_group:
key_func = lambda x: False
else:
cur_list.append(s)
ret.append(cur_list)
if key is None:
# If sep is none, str.split() "groups" separators # If sep is none, str.split() "groups" separators
# check the str.split() docs for more info # check the str.split() docs for more info
return [x for x in ret if x] continue
split_count += 1
yield cur_group
cur_group = []
else: else:
return ret cur_group.append(s)
if cur_group or sep is not None:
yield cur_group
return
def main():
vals = ['hi', 'hello', None, None, 'sup', None, 'soap', None]
falsy_sep = lambda x: not x
print list(split(vals, falsy_sep))
print list(split(vals, [None]))
if __name__ == '__main__':
main()