Improve profiling

This commit is contained in:
Matthew Honnibal 2017-11-15 13:51:25 +01:00
parent b797dca977
commit 8d692771f6
1 changed files with 7 additions and 2 deletions

View File

@ -11,6 +11,7 @@ import spacy
import sys import sys
import tqdm import tqdm
import cytoolz import cytoolz
import thinc.extra.datasets
def read_inputs(loc): def read_inputs(loc):
@ -32,14 +33,18 @@ def profile(cmd, lang, inputs=None):
""" """
Profile a spaCy pipeline, to find out which functions take the most time. Profile a spaCy pipeline, to find out which functions take the most time.
""" """
if inputs is None:
imdb_train, _ = thinc.extra.datasets.imdb()
inputs, _ = zip(*imdb_train)
inputs = inputs[:2000]
nlp = spacy.load(lang) nlp = spacy.load(lang)
texts = list(cytoolz.take(10000, inputs)) texts = list(cytoolz.take(10000, inputs))
cProfile.runctx("parse_texts(nlp, texts)", globals(), locals(), cProfile.runctx("parse_texts(nlp, texts)", globals(), locals(),
"Profile.prof") "Profile.prof")
s = pstats.Stats("Profile.prof") s = pstats.Stats("Profile.prof")
s.strip_dirs().sort_stats("time").print_stats() s.strip_dirs().sort_stats("cumtime").print_stats()
def parse_texts(nlp, texts): def parse_texts(nlp, texts):
for doc in nlp.pipe(tqdm.tqdm(texts), batch_size=128): for doc in nlp.pipe(tqdm.tqdm(texts), batch_size=16):
pass pass