From ec5f157df8eddf3ceff0cff28257d02a2a5ad528 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Sun, 29 Nov 2015 16:46:32 +0000 Subject: [PATCH] automated constucting and destructing of tqdm_pandas, neater tests and documentation --- tqdm/_pandas.py | 66 +++++++---------------------------- tqdm/tests/tests_pandas.py | 71 +++++++++++++++++++++----------------- 2 files changed, 52 insertions(+), 85 deletions(-) diff --git a/tqdm/_pandas.py b/tqdm/_pandas.py index 480990cd..dec17aee 100644 --- a/tqdm/_pandas.py +++ b/tqdm/_pandas.py @@ -2,38 +2,39 @@ # a result precise floating numbers (instead of truncated int) from __future__ import division, absolute_import +from tqdm import tqdm -__author__ = {"github.com/": ["casperdcl", "hadim"]} + +__author__ = "github.com/casperdcl" __all__ = ['tqdm_pandas'] -def tqdm_pandas(t): +def tqdm_pandas(**kwargs): """ - Adds given `tqdm` instance to `DataFrameGroupBy.progress_apply()`. - Don't forget to close the `tqdm` instance afterwards - (or just use `with` syntax): + Creates a `tqdm` instance with the given kwargs and registers it with + `pandas.core.groupby.DataFrameGroupBy.progress_apply`. + It will even close() the `tqdm` instance upon completion. Examples -------- >>> import pandas as pd >>> import numpy as np >>> from tqdm import tqdm, tqdm_pandas - >>> form time import time >>> >>> df = pd.DataFrame(np.random.randint(0, 100, (100000, 6))) - >>> with tqdm(...) as t: - ... tqdm_pandas(t) - ... # Now you can use `progress_apply` instead of `apply` - ... df.groupby(0).progress_apply(lambda x: time.sleep(0.01)) + >>> tqdm_pandas() # can specify any optional kwargs (same as for `tqdm`) + >>> # Now you can use `progress_apply` instead of `apply` + >>> df.groupby(0).progress_apply(lambda x: x**2) References ---------- https://stackoverflow.com/questions/18603270/ progress-indicator-during-pandas-operations-python """ + t = tqdm(**kwargs) from pandas.core.groupby import DataFrameGroupBy - def inner(groups, func, progress_kwargs={}, *args, **kwargs): + def inner(groups, func, *args, **kwargs): """ Parameters ---------- @@ -41,47 +42,10 @@ def tqdm_pandas(t): Grouped data. func : function To be applied on the grouped data. - progress_kwargs : dict - Parameters for the progress bar (same as for `tqdm`). *args and *kwargs are transmitted to DataFrameGroupBy.apply() """ - for key, val in progress_kwargs.items(): - # TODO: do we need this? - if getattr(t, key, None) is not None: - setattr(t, key, val) - - t.total = len(groups) - - # def progress_decorator(func): - # def wrapper(*args, **kwargs): - # start_t = wrapper.start_t - # last_print_t = wrapper.last_print_t - # last_print_n = wrapper.last_print_n - # n = wrapper.n - # - # if n - last_print_n >= miniters: - # # We check the counter first, to reduce the overhead of - # # time.time() - # cur_t = time.time() - # if cur_t - last_print_t >= mininterval: - # fmeter = format_meter(n, total, cur_t - start_t) - # sp.print_status(prefix + fmeter) - # last_print_n = n - # last_print_t = cur_t - # - # wrapper.n += 1 - # - # return func(*args, **kwargs) - # - # wrapper.start_t = time.time() - # wrapper.last_print_t = wrapper.start_t - # wrapper.last_print_n = 0 - # wrapper.n = 0 - # - # return wrapper - # progress_func = progress_decorator(func) - # result = groups.apply(progress_func, *args, **kwargs) + t.total = len(groups) + 1 # pandas calls update once too many def wrapper(*args, **kwargs): t.update() @@ -89,10 +53,6 @@ def tqdm_pandas(t): result = groups.apply(wrapper, *args, **kwargs) - # if not leave: - # sp.print_status('') - # sys.stdout.write('\r') - # TODO: check if above can be replaced by: t.close() return result diff --git a/tqdm/tests/tests_pandas.py b/tqdm/tests/tests_pandas.py index 3077c03e..05629513 100644 --- a/tqdm/tests/tests_pandas.py +++ b/tqdm/tests/tests_pandas.py @@ -1,54 +1,61 @@ from __future__ import unicode_literals +from nose.plugins.skip import SkipTest try: from StringIO import StringIO except: from io import StringIO - -import time - -from nose.plugins.skip import SkipTest -from nose.tools import with_setup +# Ensure we can use `with closing(...) as ... :` syntax +if getattr(StringIO, '__exit__', False) and \ + getattr(StringIO, '__enter__', False): + def closing(arg): + return arg +else: + from contextlib import closing -def setup_pandas(): +def test_pandas(): + import pandas as pd + import numpy as np try: - from tqdm import enable_progress_apply - enable_progress_apply() + from tqdm import tqdm_pandas except: raise SkipTest + with closing(StringIO()) as our_file: + df = pd.DataFrame(np.random.randint(0, 100, (1000, 6))) + tqdm_pandas(file=our_file, leave=False) + df.groupby(0).progress_apply(lambda x: None) -@with_setup(setup_pandas) -def test_pandas(): + our_file.seek(0) - import pandas as pd - import numpy as np - - our_file = StringIO() - - df = pd.DataFrame(np.random.randint(0, 100, (1000, 6))) - df.groupby(0).progress_apply(lambda x: time.sleep(0.01), - progress_kwargs=dict(file=our_file, - leave=False)) - - our_file.seek(0) - - assert "|##########| 100/100 100%" in our_file.read() + try: + # don't expect final output since no `leave` and + # high dynamic `miniters` + assert '100%|##########| 101/101' not in our_file.read() + except: + raise AssertionError('Did not expect:\n\t100%|##########| 101/101') -@with_setup(setup_pandas) def test_pandas_leave(): - import pandas as pd import numpy as np + try: + from tqdm import tqdm_pandas + except: + raise SkipTest - our_file = StringIO() + with closing(StringIO()) as our_file: + df = pd.DataFrame(np.random.randint(0, 100, (1000, 6))) + tqdm_pandas(file=our_file, leave=True) + df.groupby(0).progress_apply(lambda x: None) - df = pd.DataFrame(np.random.randint(0, 100, (1000, 6))) - df.groupby(0).progress_apply(lambda x: time.sleep(0.01), - progress_kwargs=dict(file=our_file, - leave=True)) - our_file.seek(0) + our_file.seek(0) - assert "|##########| 100/100 100%" in our_file.read() + try: + assert '100%|##########| 101/101' in our_file.read() + except: + our_file.seek(0) + raise AssertionError('\n'.join(('Expected:', + '100%|##########| 101/101', 'Got:', + our_file.read())))