From a106359085e7f542686320c1d4407c72bc457b2b Mon Sep 17 00:00:00 2001 From: Mahmoud Hashemi Date: Sat, 11 Mar 2017 19:34:58 -0800 Subject: [PATCH] iterutils: add research function --- boltons/iterutils.py | 65 +++++++++++++++++++++++++++++++++++++++-- tests/test_iterutils.py | 22 ++++++++++++++ 2 files changed, 85 insertions(+), 2 deletions(-) diff --git a/boltons/iterutils.py b/boltons/iterutils.py index 3254e04..8c40406 100644 --- a/boltons/iterutils.py +++ b/boltons/iterutils.py @@ -979,12 +979,73 @@ def get_path(root, path, default=_UNSET): return default return cur -# TODO: get_path/set_path + +def research(root, query=lambda p, k, v: True, reraise=False): + """The :func:`research` function uses :func:`remap` to recurse over + any data nested in *root*, and find values which match a given + criterion, specified by the *query* callable. + + Results are returned as a list of ``(path, value)`` pairs. The + paths are tuples in the same format accepted by + :func:`get_path`. This can be useful for comparing values nested + in two or more different structures. + + Here's a simple example that finds all integers: + + >>> root = {'a': {'b': 1, 'c': (2, 'd', 3)}, 'e': None} + >>> res = research(root, query=lambda p, k, v: isinstance(v, int)) + >>> print(sorted(res)) + [(('a', 'b'), 1), (('a', 'c', 0), 2), (('a', 'c', 2), 3)] + + Note how *query* follows the same, familiar ``path, key, value`` + signature as the ``visit`` and ``enter`` functions on + :func:`remap`, and returns a :class:`bool`. + + Args: + root: The target object to search. Supports the same types of + objects as :func:`remap`, including :class:`list`, + :class:`tuple`, :class:`dict`, and :class:`set`. + query (callable): The function called on every object to + determine whether to include it in the search results. The + callable must accept three arguments, *path*, *key*, and + *value*, commonly abbreviated *p*, *k*, and *v*, same as + *enter* and *visit* from :func:`remap`. + reraise (bool): Whether to reraise exceptions raised by *query* + or to simply drop the result that caused the error. + + + With :func:`research` it's easy to inspect the details of a data + structure, like finding values that are at a certain depth (using + ``len(p)``) and much more. If more advanced functionality is + needed, check out the code and make your own :func:`remap` + wrapper, and consider `submitting a patch`_! + + .. _submitting a patch: https://github.com/mahmoud/boltons/pulls + """ + ret = [] + + if not callable(query): + raise TypeError('query expected callable, not: %r' % query) + + def enter(path, key, value): + try: + if query(path, key, value): + ret.append((path + (key,), value)) + except Exception: + if reraise: + raise + return default_enter(path, key, value) + + remap(root, enter=enter) + return ret + + # TODO: recollect() +# TODO: refilter() # TODO: reiter() -# GUID iterators: 10x faster and somewhat more compact than uuid. +# GUID iterators: 10x faster and somewhat more compact than uuid. class GUIDerator(object): """The GUIDerator is an iterator that yields a globally-unique diff --git a/tests/test_iterutils.py b/tests/test_iterutils.py index d47ae71..405ed55 100644 --- a/tests/test_iterutils.py +++ b/tests/test_iterutils.py @@ -4,6 +4,7 @@ import pytest from boltons.dictutils import OMD from boltons.iterutils import (first, remap, + research, default_enter, default_exit, get_path) @@ -356,6 +357,27 @@ class TestGetPath(object): assert get_path(root, 'key.0') == 'test' +def test_research(): + root = {} + + with pytest.raises(TypeError): + research(root, query=None) + + root = {'a': 'a'} + res = research(root, query=lambda p, k, v: v == 'a') + assert len(res) == 1 + assert res[0] == (('a',), 'a') + + def broken_query(p, k, v): + raise RuntimeError() + + with pytest.raises(RuntimeError): + research(root, broken_query, reraise=True) + + # empty results with default, reraise=False + assert research(root, broken_query) == [] + + def test_backoff_basic(): from boltons.iterutils import backoff