automated constucting and destructing of tqdm_pandas, neater tests and documentation

This commit is contained in:
Casper da Costa-Luis 2015-11-29 16:46:32 +00:00
parent 5c12158dd8
commit ec5f157df8
2 changed files with 52 additions and 85 deletions

View File

@ -2,38 +2,39 @@
# a result precise floating numbers (instead of truncated int)
from __future__ import division, absolute_import
from tqdm import tqdm
__author__ = {"github.com/": ["casperdcl", "hadim"]}
__author__ = "github.com/casperdcl"
__all__ = ['tqdm_pandas']
def tqdm_pandas(t):
def tqdm_pandas(**kwargs):
"""
Adds given `tqdm` instance to `DataFrameGroupBy.progress_apply()`.
Don't forget to close the `tqdm` instance afterwards
(or just use `with` syntax):
Creates a `tqdm` instance with the given kwargs and registers it with
`pandas.core.groupby.DataFrameGroupBy.progress_apply`.
It will even close() the `tqdm` instance upon completion.
Examples
--------
>>> import pandas as pd
>>> import numpy as np
>>> from tqdm import tqdm, tqdm_pandas
>>> form time import time
>>>
>>> df = pd.DataFrame(np.random.randint(0, 100, (100000, 6)))
>>> with tqdm(...) as t:
... tqdm_pandas(t)
... # Now you can use `progress_apply` instead of `apply`
... df.groupby(0).progress_apply(lambda x: time.sleep(0.01))
>>> tqdm_pandas() # can specify any optional kwargs (same as for `tqdm`)
>>> # Now you can use `progress_apply` instead of `apply`
>>> df.groupby(0).progress_apply(lambda x: x**2)
References
----------
https://stackoverflow.com/questions/18603270/
progress-indicator-during-pandas-operations-python
"""
t = tqdm(**kwargs)
from pandas.core.groupby import DataFrameGroupBy
def inner(groups, func, progress_kwargs={}, *args, **kwargs):
def inner(groups, func, *args, **kwargs):
"""
Parameters
----------
@ -41,47 +42,10 @@ def tqdm_pandas(t):
Grouped data.
func : function
To be applied on the grouped data.
progress_kwargs : dict
Parameters for the progress bar (same as for `tqdm`).
*args and *kwargs are transmitted to DataFrameGroupBy.apply()
"""
for key, val in progress_kwargs.items():
# TODO: do we need this?
if getattr(t, key, None) is not None:
setattr(t, key, val)
t.total = len(groups)
# def progress_decorator(func):
# def wrapper(*args, **kwargs):
# start_t = wrapper.start_t
# last_print_t = wrapper.last_print_t
# last_print_n = wrapper.last_print_n
# n = wrapper.n
#
# if n - last_print_n >= miniters:
# # We check the counter first, to reduce the overhead of
# # time.time()
# cur_t = time.time()
# if cur_t - last_print_t >= mininterval:
# fmeter = format_meter(n, total, cur_t - start_t)
# sp.print_status(prefix + fmeter)
# last_print_n = n
# last_print_t = cur_t
#
# wrapper.n += 1
#
# return func(*args, **kwargs)
#
# wrapper.start_t = time.time()
# wrapper.last_print_t = wrapper.start_t
# wrapper.last_print_n = 0
# wrapper.n = 0
#
# return wrapper
# progress_func = progress_decorator(func)
# result = groups.apply(progress_func, *args, **kwargs)
t.total = len(groups) + 1 # pandas calls update once too many
def wrapper(*args, **kwargs):
t.update()
@ -89,10 +53,6 @@ def tqdm_pandas(t):
result = groups.apply(wrapper, *args, **kwargs)
# if not leave:
# sp.print_status('')
# sys.stdout.write('\r')
# TODO: check if above can be replaced by:
t.close()
return result

View File

@ -1,54 +1,61 @@
from __future__ import unicode_literals
from nose.plugins.skip import SkipTest
try:
from StringIO import StringIO
except:
from io import StringIO
import time
from nose.plugins.skip import SkipTest
from nose.tools import with_setup
# Ensure we can use `with closing(...) as ... :` syntax
if getattr(StringIO, '__exit__', False) and \
getattr(StringIO, '__enter__', False):
def closing(arg):
return arg
else:
from contextlib import closing
def setup_pandas():
def test_pandas():
import pandas as pd
import numpy as np
try:
from tqdm import enable_progress_apply
enable_progress_apply()
from tqdm import tqdm_pandas
except:
raise SkipTest
with closing(StringIO()) as our_file:
df = pd.DataFrame(np.random.randint(0, 100, (1000, 6)))
tqdm_pandas(file=our_file, leave=False)
df.groupby(0).progress_apply(lambda x: None)
@with_setup(setup_pandas)
def test_pandas():
our_file.seek(0)
import pandas as pd
import numpy as np
our_file = StringIO()
df = pd.DataFrame(np.random.randint(0, 100, (1000, 6)))
df.groupby(0).progress_apply(lambda x: time.sleep(0.01),
progress_kwargs=dict(file=our_file,
leave=False))
our_file.seek(0)
assert "|##########| 100/100 100%" in our_file.read()
try:
# don't expect final output since no `leave` and
# high dynamic `miniters`
assert '100%|##########| 101/101' not in our_file.read()
except:
raise AssertionError('Did not expect:\n\t100%|##########| 101/101')
@with_setup(setup_pandas)
def test_pandas_leave():
import pandas as pd
import numpy as np
try:
from tqdm import tqdm_pandas
except:
raise SkipTest
our_file = StringIO()
with closing(StringIO()) as our_file:
df = pd.DataFrame(np.random.randint(0, 100, (1000, 6)))
tqdm_pandas(file=our_file, leave=True)
df.groupby(0).progress_apply(lambda x: None)
df = pd.DataFrame(np.random.randint(0, 100, (1000, 6)))
df.groupby(0).progress_apply(lambda x: time.sleep(0.01),
progress_kwargs=dict(file=our_file,
leave=True))
our_file.seek(0)
our_file.seek(0)
assert "|##########| 100/100 100%" in our_file.read()
try:
assert '100%|##########| 101/101' in our_file.read()
except:
our_file.seek(0)
raise AssertionError('\n'.join(('Expected:',
'100%|##########| 101/101', 'Got:',
our_file.read())))