untested pandas support

This commit is contained in:
Casper da Costa-Luis 2015-11-29 16:07:43 +00:00
parent 53a275679e
commit 5c12158dd8
3 changed files with 80 additions and 85 deletions

View File

@ -4,8 +4,8 @@ from ._tqdm import format_interval
from ._tqdm import format_meter
from ._tqdm_gui import tqdm_gui
from ._tqdm_gui import tgrange
from ._pandas import enable_progress_apply
from ._pandas import tqdm_pandas
from ._version import __version__ # NOQA
__all__ = ['tqdm', 'tqdm_gui', 'trange', 'tgrange', 'format_interval',
'format_meter', 'enable_progress_apply', '__version__']
'format_meter', 'tqdm_pandas', '__version__']

View File

@ -1,106 +1,101 @@
import sys
import time
from tqdm._tqdm import StatusPrinter
from tqdm._tqdm import format_meter
__all__ = ['enable_progress_apply']
# future division is important to divide integers and get as
# a result precise floating numbers (instead of truncated int)
from __future__ import division, absolute_import
def enable_progress_apply():
try:
from pandas.core.groupby import DataFrameGroupBy
DataFrameGroupBy.progress_apply = _progress_apply
except ImportError:
raise("You can't enable Pandas progress apply ",
"because Pandas is not installed")
__author__ = {"github.com/": ["casperdcl", "hadim"]}
__all__ = ['tqdm_pandas']
def _progress_apply(groups, func, progress_kwargs={}, *args, **kwargs):
"""Add a progress bar during DataFrameGroupBy.apply(). Largely inspired from
https://stackoverflow.com/questions/18603270/progress-indicator-during-pandas-operations-python.
Parameters
----------
groups : DataFrameGroupBy
Grouped data.
func : function
To be applied on the grouped data.
progress_kwargs : dict
Parameters for the progress bar (same as for `tqdm.tqdm`).
*args and *kwargs are transmitted to DataFrameGroupBy.apply()
def tqdm_pandas(t):
"""
Adds given `tqdm` instance to `DataFrameGroupBy.progress_apply()`.
Don't forget to close the `tqdm` instance afterwards
(or just use `with` syntax):
Examples
--------
>>> import time
>>> import pandas as pd
>>> import numpy as np
>>> from tqdm import tqdm, tqdm_pandas
>>> form time import time
>>>
>>> from tqdm import enable_progress_apply
>>> enable_progress_apply()
>>>
>>> # Now you can use `progress_apply` instead of `apply`
>>> df = pd.DataFrame(np.random.randint(0, 100, (100000, 6)))
>>> df.groupby(0).progress_apply(lambda x: time.sleep(0.01))
>>> with tqdm(...) as t:
... tqdm_pandas(t)
... # Now you can use `progress_apply` instead of `apply`
... df.groupby(0).progress_apply(lambda x: time.sleep(0.01))
References
----------
https://stackoverflow.com/questions/18603270/
progress-indicator-during-pandas-operations-python
"""
from pandas.core.groupby import DataFrameGroupBy
mininterval = progress_kwargs['mininterval'] if 'mininterval' \
in progress_kwargs.keys() else 0.5
miniters = progress_kwargs['miniters'] if 'miniters' \
in progress_kwargs.keys() else 1
file = progress_kwargs['file'] if 'file' \
in progress_kwargs.keys() else sys.stderr
desc = progress_kwargs['desc'] if 'desc' \
in progress_kwargs.keys() else ''
leave = progress_kwargs['leave'] if 'leave' \
in progress_kwargs.keys() else False
def inner(groups, func, progress_kwargs={}, *args, **kwargs):
"""
Parameters
----------
groups : DataFrameGroupBy
Grouped data.
func : function
To be applied on the grouped data.
progress_kwargs : dict
Parameters for the progress bar (same as for `tqdm`).
for key, value in progress_kwargs.items():
locals()[key] = value
*args and *kwargs are transmitted to DataFrameGroupBy.apply()
"""
for key, val in progress_kwargs.items():
# TODO: do we need this?
if getattr(t, key, None) is not None:
setattr(t, key, val)
prefix = desc + ': ' if desc else ''
t.total = len(groups)
total = len(groups)
sp = StatusPrinter(file)
sp.print_status(prefix + format_meter(0, total, 0))
def progress_decorator(func):
# def progress_decorator(func):
# def wrapper(*args, **kwargs):
# start_t = wrapper.start_t
# last_print_t = wrapper.last_print_t
# last_print_n = wrapper.last_print_n
# n = wrapper.n
#
# if n - last_print_n >= miniters:
# # We check the counter first, to reduce the overhead of
# # time.time()
# cur_t = time.time()
# if cur_t - last_print_t >= mininterval:
# fmeter = format_meter(n, total, cur_t - start_t)
# sp.print_status(prefix + fmeter)
# last_print_n = n
# last_print_t = cur_t
#
# wrapper.n += 1
#
# return func(*args, **kwargs)
#
# wrapper.start_t = time.time()
# wrapper.last_print_t = wrapper.start_t
# wrapper.last_print_n = 0
# wrapper.n = 0
#
# return wrapper
# progress_func = progress_decorator(func)
# result = groups.apply(progress_func, *args, **kwargs)
def wrapper(*args, **kwargs):
start_t = wrapper.start_t
last_print_t = wrapper.last_print_t
last_print_n = wrapper.last_print_n
n = wrapper.n
if n - last_print_n >= miniters:
# We check the counter first, to reduce the overhead of
# time.time()
cur_t = time.time()
if cur_t - last_print_t >= mininterval:
fmeter = format_meter(n, total, cur_t - start_t)
sp.print_status(prefix + fmeter)
last_print_n = n
last_print_t = cur_t
wrapper.n += 1
t.update()
return func(*args, **kwargs)
wrapper.start_t = time.time()
wrapper.last_print_t = wrapper.start_t
wrapper.last_print_n = 0
wrapper.n = 0
result = groups.apply(wrapper, *args, **kwargs)
return wrapper
# if not leave:
# sp.print_status('')
# sys.stdout.write('\r')
# TODO: check if above can be replaced by:
t.close()
progress_func = progress_decorator(func)
result = groups.apply(progress_func, *args, **kwargs)
return result
if not leave:
sp.print_status('')
sys.stdout.write('\r')
return result
# Enable custom tqdm progress in pandas!
DataFrameGroupBy.progress_apply = inner

View File

@ -11,8 +11,8 @@ Usage:
# a result precise floating numbers (instead of truncated int)
from __future__ import division, absolute_import
# import compatibility functions and utilities
from ._utils import _range
from time import time
from ._utils import _range
# to inherit from the tqdm class
from ._tqdm import tqdm, format_meter