automated constucting and destructing of tqdm_pandas, neater tests and documentation

2015-11-29 16:46:32 +00:00 · 2015-11-29 16:46:32 +00:00 · ec5f157df8
parent 5c12158dd8
commit ec5f157df8
2 changed files with 52 additions and 85 deletions
--- a/tqdm/_pandas.py
+++ b/tqdm/_pandas.py
@ -2,38 +2,39 @@
 # a result precise floating numbers (instead of truncated int)
 from __future__ import division, absolute_import

+from tqdm import tqdm

-__author__ = {"github.com/": ["casperdcl", "hadim"]}
+
+__author__ = "github.com/casperdcl"
 __all__ = ['tqdm_pandas']


-def tqdm_pandas(t):
+def tqdm_pandas(**kwargs):
    """
-    Adds given `tqdm` instance to `DataFrameGroupBy.progress_apply()`.
-    Don't forget to close the `tqdm` instance afterwards
-    (or just use `with` syntax):
+    Creates a `tqdm` instance with the given kwargs and registers it with
+    `pandas.core.groupby.DataFrameGroupBy.progress_apply`.
+    It will even close() the `tqdm` instance upon completion.

    Examples
    --------
    >>> import pandas as pd
    >>> import numpy as np
    >>> from tqdm import tqdm, tqdm_pandas
-    >>> form time import time
    >>>
    >>> df = pd.DataFrame(np.random.randint(0, 100, (100000, 6)))
-    >>> with tqdm(...) as t:
-    ...     tqdm_pandas(t)
-    ...     # Now you can use `progress_apply` instead of `apply`
-    ...     df.groupby(0).progress_apply(lambda x: time.sleep(0.01))
+    >>> tqdm_pandas()  # can specify any optional kwargs (same as for `tqdm`)
+    >>> # Now you can use `progress_apply` instead of `apply`
+    >>> df.groupby(0).progress_apply(lambda x: x**2)

    References
    ----------
    https://stackoverflow.com/questions/18603270/
    progress-indicator-during-pandas-operations-python
    """
+    t = tqdm(**kwargs)
    from pandas.core.groupby import DataFrameGroupBy

-    def inner(groups, func, progress_kwargs={}, *args, **kwargs):
+    def inner(groups, func, *args, **kwargs):
        """
        Parameters
        ----------
@ -41,47 +42,10 @@ def tqdm_pandas(t):
            Grouped data.
        func  : function
            To be applied on the grouped data.
-        progress_kwargs  : dict
-            Parameters for the progress bar (same as for `tqdm`).

        *args and *kwargs are transmitted to DataFrameGroupBy.apply()
        """
-        for key, val in progress_kwargs.items():
-            # TODO: do we need this?
-            if getattr(t, key, None) is not None:
-                setattr(t, key, val)
-
-        t.total = len(groups)
-
-        # def progress_decorator(func):
-        #     def wrapper(*args, **kwargs):
-        #         start_t = wrapper.start_t
-        #         last_print_t = wrapper.last_print_t
-        #         last_print_n = wrapper.last_print_n
-        #         n = wrapper.n
-        #
-        #         if n - last_print_n >= miniters:
-        #             # We check the counter first, to reduce the overhead of
-        #             # time.time()
-        #             cur_t = time.time()
-        #             if cur_t - last_print_t >= mininterval:
-        #                 fmeter = format_meter(n, total, cur_t - start_t)
-        #                 sp.print_status(prefix + fmeter)
-        #                 last_print_n = n
-        #                 last_print_t = cur_t
-        #
-        #         wrapper.n += 1
-        #
-        #         return func(*args, **kwargs)
-        #
-        #     wrapper.start_t = time.time()
-        #     wrapper.last_print_t = wrapper.start_t
-        #     wrapper.last_print_n = 0
-        #     wrapper.n = 0
-        #
-        #     return wrapper
-        # progress_func = progress_decorator(func)
-        # result = groups.apply(progress_func, *args, **kwargs)
+        t.total = len(groups) + 1  # pandas calls update once too many

        def wrapper(*args, **kwargs):
            t.update()
@ -89,10 +53,6 @@ def tqdm_pandas(t):

        result = groups.apply(wrapper, *args, **kwargs)

-        # if not leave:
-        #     sp.print_status('')
-        #     sys.stdout.write('\r')
-        # TODO: check if above can be replaced by:
        t.close()

        return result
--- a/tqdm/tests/tests_pandas.py
+++ b/tqdm/tests/tests_pandas.py
@ -1,54 +1,61 @@
 from __future__ import unicode_literals
+from nose.plugins.skip import SkipTest

 try:
    from StringIO import StringIO
 except:
    from io import StringIO
-
-import time
-
-from nose.plugins.skip import SkipTest
-from nose.tools import with_setup
+# Ensure we can use `with closing(...) as ... :` syntax
+if getattr(StringIO, '__exit__', False) and \
+   getattr(StringIO, '__enter__', False):
+    def closing(arg):
+        return arg
+else:
+    from contextlib import closing


-def setup_pandas():
+def test_pandas():
+    import pandas as pd
+    import numpy as np
    try:
-        from tqdm import enable_progress_apply
-        enable_progress_apply()
+        from tqdm import tqdm_pandas
    except:
        raise SkipTest

+    with closing(StringIO()) as our_file:
+        df = pd.DataFrame(np.random.randint(0, 100, (1000, 6)))
+        tqdm_pandas(file=our_file, leave=False)
+        df.groupby(0).progress_apply(lambda x: None)

-@with_setup(setup_pandas)
-def test_pandas():
+        our_file.seek(0)

-    import pandas as pd
-    import numpy as np
-
-    our_file = StringIO()
-
-    df = pd.DataFrame(np.random.randint(0, 100, (1000, 6)))
-    df.groupby(0).progress_apply(lambda x: time.sleep(0.01),
-                                 progress_kwargs=dict(file=our_file,
-                                                      leave=False))
-
-    our_file.seek(0)
-
-    assert "|##########| 100/100 100%" in our_file.read()
+        try:
+            # don't expect final output since no `leave` and
+            # high dynamic `miniters`
+            assert '100%|##########| 101/101' not in our_file.read()
+        except:
+            raise AssertionError('Did not expect:\n\t100%|##########| 101/101')


-@with_setup(setup_pandas)
 def test_pandas_leave():
-
    import pandas as pd
    import numpy as np
+    try:
+        from tqdm import tqdm_pandas
+    except:
+        raise SkipTest

-    our_file = StringIO()
+    with closing(StringIO()) as our_file:
+        df = pd.DataFrame(np.random.randint(0, 100, (1000, 6)))
+        tqdm_pandas(file=our_file, leave=True)
+        df.groupby(0).progress_apply(lambda x: None)

-    df = pd.DataFrame(np.random.randint(0, 100, (1000, 6)))
-    df.groupby(0).progress_apply(lambda x: time.sleep(0.01),
-                                 progress_kwargs=dict(file=our_file,
-                                                      leave=True))
-    our_file.seek(0)
+        our_file.seek(0)

-    assert "|##########| 100/100 100%" in our_file.read()
+        try:
+            assert '100%|##########| 101/101' in our_file.read()
+        except:
+            our_file.seek(0)
+            raise AssertionError('\n'.join(('Expected:',
+                                            '100%|##########| 101/101', 'Got:',
+                                            our_file.read())))