diff --git a/tqdm/__init__.py b/tqdm/__init__.py index a2b6b4e2..57b86abc 100644 --- a/tqdm/__init__.py +++ b/tqdm/__init__.py @@ -4,8 +4,8 @@ from ._tqdm import format_interval from ._tqdm import format_meter from ._tqdm_gui import tqdm_gui from ._tqdm_gui import tgrange -from ._pandas import enable_progress_apply +from ._pandas import tqdm_pandas from ._version import __version__ # NOQA __all__ = ['tqdm', 'tqdm_gui', 'trange', 'tgrange', 'format_interval', - 'format_meter', 'enable_progress_apply', '__version__'] + 'format_meter', 'tqdm_pandas', '__version__'] diff --git a/tqdm/_pandas.py b/tqdm/_pandas.py index e0224a71..480990cd 100644 --- a/tqdm/_pandas.py +++ b/tqdm/_pandas.py @@ -1,106 +1,101 @@ -import sys -import time - -from tqdm._tqdm import StatusPrinter -from tqdm._tqdm import format_meter - -__all__ = ['enable_progress_apply'] +# future division is important to divide integers and get as +# a result precise floating numbers (instead of truncated int) +from __future__ import division, absolute_import -def enable_progress_apply(): - try: - from pandas.core.groupby import DataFrameGroupBy - DataFrameGroupBy.progress_apply = _progress_apply - except ImportError: - raise("You can't enable Pandas progress apply ", - "because Pandas is not installed") +__author__ = {"github.com/": ["casperdcl", "hadim"]} +__all__ = ['tqdm_pandas'] -def _progress_apply(groups, func, progress_kwargs={}, *args, **kwargs): - """Add a progress bar during DataFrameGroupBy.apply(). Largely inspired from - https://stackoverflow.com/questions/18603270/progress-indicator-during-pandas-operations-python. - - Parameters - ---------- - groups : DataFrameGroupBy - Grouped data. - func : function - To be applied on the grouped data. - progress_kwargs : dict - Parameters for the progress bar (same as for `tqdm.tqdm`). - - *args and *kwargs are transmitted to DataFrameGroupBy.apply() +def tqdm_pandas(t): + """ + Adds given `tqdm` instance to `DataFrameGroupBy.progress_apply()`. + Don't forget to close the `tqdm` instance afterwards + (or just use `with` syntax): Examples -------- - >>> import time >>> import pandas as pd >>> import numpy as np + >>> from tqdm import tqdm, tqdm_pandas + >>> form time import time >>> - >>> from tqdm import enable_progress_apply - >>> enable_progress_apply() - >>> - >>> # Now you can use `progress_apply` instead of `apply` >>> df = pd.DataFrame(np.random.randint(0, 100, (100000, 6))) - >>> df.groupby(0).progress_apply(lambda x: time.sleep(0.01)) + >>> with tqdm(...) as t: + ... tqdm_pandas(t) + ... # Now you can use `progress_apply` instead of `apply` + ... df.groupby(0).progress_apply(lambda x: time.sleep(0.01)) + References + ---------- + https://stackoverflow.com/questions/18603270/ + progress-indicator-during-pandas-operations-python """ + from pandas.core.groupby import DataFrameGroupBy - mininterval = progress_kwargs['mininterval'] if 'mininterval' \ - in progress_kwargs.keys() else 0.5 - miniters = progress_kwargs['miniters'] if 'miniters' \ - in progress_kwargs.keys() else 1 - file = progress_kwargs['file'] if 'file' \ - in progress_kwargs.keys() else sys.stderr - desc = progress_kwargs['desc'] if 'desc' \ - in progress_kwargs.keys() else '' - leave = progress_kwargs['leave'] if 'leave' \ - in progress_kwargs.keys() else False + def inner(groups, func, progress_kwargs={}, *args, **kwargs): + """ + Parameters + ---------- + groups : DataFrameGroupBy + Grouped data. + func : function + To be applied on the grouped data. + progress_kwargs : dict + Parameters for the progress bar (same as for `tqdm`). - for key, value in progress_kwargs.items(): - locals()[key] = value + *args and *kwargs are transmitted to DataFrameGroupBy.apply() + """ + for key, val in progress_kwargs.items(): + # TODO: do we need this? + if getattr(t, key, None) is not None: + setattr(t, key, val) - prefix = desc + ': ' if desc else '' + t.total = len(groups) - total = len(groups) - - sp = StatusPrinter(file) - sp.print_status(prefix + format_meter(0, total, 0)) - - def progress_decorator(func): + # def progress_decorator(func): + # def wrapper(*args, **kwargs): + # start_t = wrapper.start_t + # last_print_t = wrapper.last_print_t + # last_print_n = wrapper.last_print_n + # n = wrapper.n + # + # if n - last_print_n >= miniters: + # # We check the counter first, to reduce the overhead of + # # time.time() + # cur_t = time.time() + # if cur_t - last_print_t >= mininterval: + # fmeter = format_meter(n, total, cur_t - start_t) + # sp.print_status(prefix + fmeter) + # last_print_n = n + # last_print_t = cur_t + # + # wrapper.n += 1 + # + # return func(*args, **kwargs) + # + # wrapper.start_t = time.time() + # wrapper.last_print_t = wrapper.start_t + # wrapper.last_print_n = 0 + # wrapper.n = 0 + # + # return wrapper + # progress_func = progress_decorator(func) + # result = groups.apply(progress_func, *args, **kwargs) def wrapper(*args, **kwargs): - - start_t = wrapper.start_t - last_print_t = wrapper.last_print_t - last_print_n = wrapper.last_print_n - n = wrapper.n - - if n - last_print_n >= miniters: - # We check the counter first, to reduce the overhead of - # time.time() - cur_t = time.time() - if cur_t - last_print_t >= mininterval: - fmeter = format_meter(n, total, cur_t - start_t) - sp.print_status(prefix + fmeter) - last_print_n = n - last_print_t = cur_t - - wrapper.n += 1 + t.update() return func(*args, **kwargs) - wrapper.start_t = time.time() - wrapper.last_print_t = wrapper.start_t - wrapper.last_print_n = 0 - wrapper.n = 0 + result = groups.apply(wrapper, *args, **kwargs) - return wrapper + # if not leave: + # sp.print_status('') + # sys.stdout.write('\r') + # TODO: check if above can be replaced by: + t.close() - progress_func = progress_decorator(func) - result = groups.apply(progress_func, *args, **kwargs) + return result - if not leave: - sp.print_status('') - sys.stdout.write('\r') - - return result + # Enable custom tqdm progress in pandas! + DataFrameGroupBy.progress_apply = inner diff --git a/tqdm/_tqdm_gui.py b/tqdm/_tqdm_gui.py index bd7e111e..dd951da8 100644 --- a/tqdm/_tqdm_gui.py +++ b/tqdm/_tqdm_gui.py @@ -11,8 +11,8 @@ Usage: # a result precise floating numbers (instead of truncated int) from __future__ import division, absolute_import # import compatibility functions and utilities -from ._utils import _range from time import time +from ._utils import _range # to inherit from the tqdm class from ._tqdm import tqdm, format_meter