From a106359085e7f542686320c1d4407c72bc457b2b Mon Sep 17 00:00:00 2001
From: Mahmoud Hashemi <mahmoud@hatnote.com>
Date: Sat, 11 Mar 2017 19:34:58 -0800
Subject: [PATCH] iterutils: add research function

---
 boltons/iterutils.py    | 65 +++++++++++++++++++++++++++++++++++++++--
 tests/test_iterutils.py | 22 ++++++++++++++
 2 files changed, 85 insertions(+), 2 deletions(-)

diff --git a/boltons/iterutils.py b/boltons/iterutils.py
index 3254e04..8c40406 100644
--- a/boltons/iterutils.py
+++ b/boltons/iterutils.py
@@ -979,12 +979,73 @@ def get_path(root, path, default=_UNSET):
         return default
     return cur
 
-# TODO: get_path/set_path
+
+def research(root, query=lambda p, k, v: True, reraise=False):
+    """The :func:`research` function uses :func:`remap` to recurse over
+    any data nested in *root*, and find values which match a given
+    criterion, specified by the *query* callable.
+
+    Results are returned as a list of ``(path, value)`` pairs. The
+    paths are tuples in the same format accepted by
+    :func:`get_path`. This can be useful for comparing values nested
+    in two or more different structures.
+
+    Here's a simple example that finds all integers:
+
+    >>> root = {'a': {'b': 1, 'c': (2, 'd', 3)}, 'e': None}
+    >>> res = research(root, query=lambda p, k, v: isinstance(v, int))
+    >>> print(sorted(res))
+    [(('a', 'b'), 1), (('a', 'c', 0), 2), (('a', 'c', 2), 3)]
+
+    Note how *query* follows the same, familiar ``path, key, value``
+    signature as the ``visit`` and ``enter`` functions on
+    :func:`remap`, and returns a :class:`bool`.
+
+    Args:
+       root: The target object to search. Supports the same types of
+          objects as :func:`remap`, including :class:`list`,
+          :class:`tuple`, :class:`dict`, and :class:`set`.
+       query (callable): The function called on every object to
+          determine whether to include it in the search results. The
+          callable must accept three arguments, *path*, *key*, and
+          *value*, commonly abbreviated *p*, *k*, and *v*, same as
+          *enter* and *visit* from :func:`remap`.
+       reraise (bool): Whether to reraise exceptions raised by *query*
+          or to simply drop the result that caused the error.
+
+
+    With :func:`research` it's easy to inspect the details of a data
+    structure, like finding values that are at a certain depth (using
+    ``len(p)``) and much more. If more advanced functionality is
+    needed, check out the code and make your own :func:`remap`
+    wrapper, and consider `submitting a patch`_!
+
+    .. _submitting a patch: https://github.com/mahmoud/boltons/pulls
+    """
+    ret = []
+
+    if not callable(query):
+        raise TypeError('query expected callable, not: %r' % query)
+
+    def enter(path, key, value):
+        try:
+            if query(path, key, value):
+                ret.append((path + (key,), value))
+        except Exception:
+            if reraise:
+                raise
+        return default_enter(path, key, value)
+
+    remap(root, enter=enter)
+    return ret
+
+
 # TODO: recollect()
+# TODO: refilter()
 # TODO: reiter()
 
-# GUID iterators: 10x faster and somewhat more compact than uuid.
 
+# GUID iterators: 10x faster and somewhat more compact than uuid.
 
 class GUIDerator(object):
     """The GUIDerator is an iterator that yields a globally-unique
diff --git a/tests/test_iterutils.py b/tests/test_iterutils.py
index d47ae71..405ed55 100644
--- a/tests/test_iterutils.py
+++ b/tests/test_iterutils.py
@@ -4,6 +4,7 @@ import pytest
 from boltons.dictutils import OMD
 from boltons.iterutils import (first,
                                remap,
+                               research,
                                default_enter,
                                default_exit,
                                get_path)
@@ -356,6 +357,27 @@ class TestGetPath(object):
         assert get_path(root, 'key.0') == 'test'
 
 
+def test_research():
+    root = {}
+
+    with pytest.raises(TypeError):
+        research(root, query=None)
+
+    root = {'a': 'a'}
+    res = research(root, query=lambda p, k, v: v == 'a')
+    assert len(res) == 1
+    assert res[0] == (('a',), 'a')
+
+    def broken_query(p, k, v):
+        raise RuntimeError()
+
+    with pytest.raises(RuntimeError):
+        research(root, broken_query, reraise=True)
+
+    # empty results with default, reraise=False
+    assert research(root, broken_query) == []
+
+
 def test_backoff_basic():
     from boltons.iterutils import backoff