From 0ee86d9827d390f816dca98321ed228bf0ea0f3b Mon Sep 17 00:00:00 2001 From: Mahmoud Hashemi Date: Tue, 19 Feb 2013 20:25:28 -0800 Subject: [PATCH] split and split_iter work --- iterutils.py | 99 ++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 69 insertions(+), 30 deletions(-) diff --git a/iterutils.py b/iterutils.py index 22e3cc5..76923eb 100644 --- a/iterutils.py +++ b/iterutils.py @@ -1,47 +1,86 @@ # -*- coding: utf-8 -*- +def is_iterable(obj): + return callable(getattr(obj, '__iter__', None)) + + def is_scalar(obj): - return not hasattr(obj, '__iter__') or isinstance(obj, basestring) + return not is_iterable(obj) or isinstance(obj, basestring) -def split(src, key=None, maxsplit=None): +def split(src, sep=None, maxsplit=None): """ - Splits an iterable based on a separator, iterable of separators, or - function that evaluates to True when a separator is encountered. - - Frankly, this feature should be part of the list builtin. - - TODO: This works with iterators but could itself be an generator. + Splits an iterable based on a separator, see split_iter + docs below for more info. """ + return list(split_iter(src, sep, maxsplit)) + + +def split_iter(src, sep=None, maxsplit=None): + """ + Splits an iterable based on a separator, 'sep'. 'sep' can be a + single value, an iterable of separators, or a single-argument + callable that returns True when a separator is encountered. + + split_iter yields lists of non-separator values. A separator will + never appear in the output. + + Note that split_iter is based on str.split(), so if sep is None, + str.split() "groups" separators check the str.split() docs + for more info. + + >>> list(split_iter(['hi', 'hello', None, None, 'sup', None, 'soap', None]) + [['hi', 'hello'], ['sup'], ['soap']] + + >>> falsy_sep = lambda x: not x + >>> list(split_iter(['hi', 'hello', None, '', 'sup', False], falsy_sep)) + [['hi', 'hello'], [], ['sup'], []] + """ + if not is_iterable(src): + raise TypeError('expected an iterable') + if maxsplit is not None: maxsplit = int(maxsplit) if maxsplit == 0: - return [src] + yield [src] + return - if callable(key): - key_func = key - elif not is_scalar(key): - key = set(key) - key_func = lambda x: x in key + if callable(sep): + sep_func = sep + elif not is_scalar(sep): + sep = frozenset(sep) + sep_func = lambda x: x in sep else: - key_func = lambda x: x == key + sep_func = lambda x: x == sep - ret = [] - cur_list = [] + cur_group = [] + split_count = 0 for s in src: - if key_func(s): - ret.append(cur_list) - cur_list = [] - if maxsplit is not None and len(ret) >= maxsplit: - key_func = lambda x: False + if maxsplit is not None and split_count >= maxsplit: + sep_func = lambda x: False + if sep_func(s): + if sep is None and not cur_group: + # If sep is none, str.split() "groups" separators + # check the str.split() docs for more info + continue + split_count += 1 + yield cur_group + cur_group = [] else: - cur_list.append(s) - ret.append(cur_list) + cur_group.append(s) - if key is None: - # If sep is none, str.split() "groups" separators - # check the str.split() docs for more info - return [x for x in ret if x] - else: - return ret + if cur_group or sep is not None: + yield cur_group + return + + +def main(): + vals = ['hi', 'hello', None, None, 'sup', None, 'soap', None] + falsy_sep = lambda x: not x + print list(split(vals, falsy_sep)) + print list(split(vals, [None])) + + +if __name__ == '__main__': + main()