mirror of https://github.com/tqdm/tqdm.git
untested pandas support
This commit is contained in:
parent
53a275679e
commit
5c12158dd8
|
@ -4,8 +4,8 @@ from ._tqdm import format_interval
|
|||
from ._tqdm import format_meter
|
||||
from ._tqdm_gui import tqdm_gui
|
||||
from ._tqdm_gui import tgrange
|
||||
from ._pandas import enable_progress_apply
|
||||
from ._pandas import tqdm_pandas
|
||||
from ._version import __version__ # NOQA
|
||||
|
||||
__all__ = ['tqdm', 'tqdm_gui', 'trange', 'tgrange', 'format_interval',
|
||||
'format_meter', 'enable_progress_apply', '__version__']
|
||||
'format_meter', 'tqdm_pandas', '__version__']
|
||||
|
|
159
tqdm/_pandas.py
159
tqdm/_pandas.py
|
@ -1,106 +1,101 @@
|
|||
import sys
|
||||
import time
|
||||
|
||||
from tqdm._tqdm import StatusPrinter
|
||||
from tqdm._tqdm import format_meter
|
||||
|
||||
__all__ = ['enable_progress_apply']
|
||||
# future division is important to divide integers and get as
|
||||
# a result precise floating numbers (instead of truncated int)
|
||||
from __future__ import division, absolute_import
|
||||
|
||||
|
||||
def enable_progress_apply():
|
||||
try:
|
||||
from pandas.core.groupby import DataFrameGroupBy
|
||||
DataFrameGroupBy.progress_apply = _progress_apply
|
||||
except ImportError:
|
||||
raise("You can't enable Pandas progress apply ",
|
||||
"because Pandas is not installed")
|
||||
__author__ = {"github.com/": ["casperdcl", "hadim"]}
|
||||
__all__ = ['tqdm_pandas']
|
||||
|
||||
|
||||
def _progress_apply(groups, func, progress_kwargs={}, *args, **kwargs):
|
||||
"""Add a progress bar during DataFrameGroupBy.apply(). Largely inspired from
|
||||
https://stackoverflow.com/questions/18603270/progress-indicator-during-pandas-operations-python.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
groups : DataFrameGroupBy
|
||||
Grouped data.
|
||||
func : function
|
||||
To be applied on the grouped data.
|
||||
progress_kwargs : dict
|
||||
Parameters for the progress bar (same as for `tqdm.tqdm`).
|
||||
|
||||
*args and *kwargs are transmitted to DataFrameGroupBy.apply()
|
||||
def tqdm_pandas(t):
|
||||
"""
|
||||
Adds given `tqdm` instance to `DataFrameGroupBy.progress_apply()`.
|
||||
Don't forget to close the `tqdm` instance afterwards
|
||||
(or just use `with` syntax):
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> import time
|
||||
>>> import pandas as pd
|
||||
>>> import numpy as np
|
||||
>>> from tqdm import tqdm, tqdm_pandas
|
||||
>>> form time import time
|
||||
>>>
|
||||
>>> from tqdm import enable_progress_apply
|
||||
>>> enable_progress_apply()
|
||||
>>>
|
||||
>>> # Now you can use `progress_apply` instead of `apply`
|
||||
>>> df = pd.DataFrame(np.random.randint(0, 100, (100000, 6)))
|
||||
>>> df.groupby(0).progress_apply(lambda x: time.sleep(0.01))
|
||||
>>> with tqdm(...) as t:
|
||||
... tqdm_pandas(t)
|
||||
... # Now you can use `progress_apply` instead of `apply`
|
||||
... df.groupby(0).progress_apply(lambda x: time.sleep(0.01))
|
||||
|
||||
References
|
||||
----------
|
||||
https://stackoverflow.com/questions/18603270/
|
||||
progress-indicator-during-pandas-operations-python
|
||||
"""
|
||||
from pandas.core.groupby import DataFrameGroupBy
|
||||
|
||||
mininterval = progress_kwargs['mininterval'] if 'mininterval' \
|
||||
in progress_kwargs.keys() else 0.5
|
||||
miniters = progress_kwargs['miniters'] if 'miniters' \
|
||||
in progress_kwargs.keys() else 1
|
||||
file = progress_kwargs['file'] if 'file' \
|
||||
in progress_kwargs.keys() else sys.stderr
|
||||
desc = progress_kwargs['desc'] if 'desc' \
|
||||
in progress_kwargs.keys() else ''
|
||||
leave = progress_kwargs['leave'] if 'leave' \
|
||||
in progress_kwargs.keys() else False
|
||||
def inner(groups, func, progress_kwargs={}, *args, **kwargs):
|
||||
"""
|
||||
Parameters
|
||||
----------
|
||||
groups : DataFrameGroupBy
|
||||
Grouped data.
|
||||
func : function
|
||||
To be applied on the grouped data.
|
||||
progress_kwargs : dict
|
||||
Parameters for the progress bar (same as for `tqdm`).
|
||||
|
||||
for key, value in progress_kwargs.items():
|
||||
locals()[key] = value
|
||||
*args and *kwargs are transmitted to DataFrameGroupBy.apply()
|
||||
"""
|
||||
for key, val in progress_kwargs.items():
|
||||
# TODO: do we need this?
|
||||
if getattr(t, key, None) is not None:
|
||||
setattr(t, key, val)
|
||||
|
||||
prefix = desc + ': ' if desc else ''
|
||||
t.total = len(groups)
|
||||
|
||||
total = len(groups)
|
||||
|
||||
sp = StatusPrinter(file)
|
||||
sp.print_status(prefix + format_meter(0, total, 0))
|
||||
|
||||
def progress_decorator(func):
|
||||
# def progress_decorator(func):
|
||||
# def wrapper(*args, **kwargs):
|
||||
# start_t = wrapper.start_t
|
||||
# last_print_t = wrapper.last_print_t
|
||||
# last_print_n = wrapper.last_print_n
|
||||
# n = wrapper.n
|
||||
#
|
||||
# if n - last_print_n >= miniters:
|
||||
# # We check the counter first, to reduce the overhead of
|
||||
# # time.time()
|
||||
# cur_t = time.time()
|
||||
# if cur_t - last_print_t >= mininterval:
|
||||
# fmeter = format_meter(n, total, cur_t - start_t)
|
||||
# sp.print_status(prefix + fmeter)
|
||||
# last_print_n = n
|
||||
# last_print_t = cur_t
|
||||
#
|
||||
# wrapper.n += 1
|
||||
#
|
||||
# return func(*args, **kwargs)
|
||||
#
|
||||
# wrapper.start_t = time.time()
|
||||
# wrapper.last_print_t = wrapper.start_t
|
||||
# wrapper.last_print_n = 0
|
||||
# wrapper.n = 0
|
||||
#
|
||||
# return wrapper
|
||||
# progress_func = progress_decorator(func)
|
||||
# result = groups.apply(progress_func, *args, **kwargs)
|
||||
|
||||
def wrapper(*args, **kwargs):
|
||||
|
||||
start_t = wrapper.start_t
|
||||
last_print_t = wrapper.last_print_t
|
||||
last_print_n = wrapper.last_print_n
|
||||
n = wrapper.n
|
||||
|
||||
if n - last_print_n >= miniters:
|
||||
# We check the counter first, to reduce the overhead of
|
||||
# time.time()
|
||||
cur_t = time.time()
|
||||
if cur_t - last_print_t >= mininterval:
|
||||
fmeter = format_meter(n, total, cur_t - start_t)
|
||||
sp.print_status(prefix + fmeter)
|
||||
last_print_n = n
|
||||
last_print_t = cur_t
|
||||
|
||||
wrapper.n += 1
|
||||
t.update()
|
||||
return func(*args, **kwargs)
|
||||
|
||||
wrapper.start_t = time.time()
|
||||
wrapper.last_print_t = wrapper.start_t
|
||||
wrapper.last_print_n = 0
|
||||
wrapper.n = 0
|
||||
result = groups.apply(wrapper, *args, **kwargs)
|
||||
|
||||
return wrapper
|
||||
# if not leave:
|
||||
# sp.print_status('')
|
||||
# sys.stdout.write('\r')
|
||||
# TODO: check if above can be replaced by:
|
||||
t.close()
|
||||
|
||||
progress_func = progress_decorator(func)
|
||||
result = groups.apply(progress_func, *args, **kwargs)
|
||||
return result
|
||||
|
||||
if not leave:
|
||||
sp.print_status('')
|
||||
sys.stdout.write('\r')
|
||||
|
||||
return result
|
||||
# Enable custom tqdm progress in pandas!
|
||||
DataFrameGroupBy.progress_apply = inner
|
||||
|
|
|
@ -11,8 +11,8 @@ Usage:
|
|||
# a result precise floating numbers (instead of truncated int)
|
||||
from __future__ import division, absolute_import
|
||||
# import compatibility functions and utilities
|
||||
from ._utils import _range
|
||||
from time import time
|
||||
from ._utils import _range
|
||||
# to inherit from the tqdm class
|
||||
from ._tqdm import tqdm, format_meter
|
||||
|
||||
|
|
Loading…
Reference in New Issue