mirror of https://github.com/explosion/spaCy.git
Improve profiling
This commit is contained in:
parent
b797dca977
commit
8d692771f6
|
@ -11,6 +11,7 @@ import spacy
|
||||||
import sys
|
import sys
|
||||||
import tqdm
|
import tqdm
|
||||||
import cytoolz
|
import cytoolz
|
||||||
|
import thinc.extra.datasets
|
||||||
|
|
||||||
|
|
||||||
def read_inputs(loc):
|
def read_inputs(loc):
|
||||||
|
@ -32,14 +33,18 @@ def profile(cmd, lang, inputs=None):
|
||||||
"""
|
"""
|
||||||
Profile a spaCy pipeline, to find out which functions take the most time.
|
Profile a spaCy pipeline, to find out which functions take the most time.
|
||||||
"""
|
"""
|
||||||
|
if inputs is None:
|
||||||
|
imdb_train, _ = thinc.extra.datasets.imdb()
|
||||||
|
inputs, _ = zip(*imdb_train)
|
||||||
|
inputs = inputs[:2000]
|
||||||
nlp = spacy.load(lang)
|
nlp = spacy.load(lang)
|
||||||
texts = list(cytoolz.take(10000, inputs))
|
texts = list(cytoolz.take(10000, inputs))
|
||||||
cProfile.runctx("parse_texts(nlp, texts)", globals(), locals(),
|
cProfile.runctx("parse_texts(nlp, texts)", globals(), locals(),
|
||||||
"Profile.prof")
|
"Profile.prof")
|
||||||
s = pstats.Stats("Profile.prof")
|
s = pstats.Stats("Profile.prof")
|
||||||
s.strip_dirs().sort_stats("time").print_stats()
|
s.strip_dirs().sort_stats("cumtime").print_stats()
|
||||||
|
|
||||||
|
|
||||||
def parse_texts(nlp, texts):
|
def parse_texts(nlp, texts):
|
||||||
for doc in nlp.pipe(tqdm.tqdm(texts), batch_size=128):
|
for doc in nlp.pipe(tqdm.tqdm(texts), batch_size=16):
|
||||||
pass
|
pass
|
||||||
|
|
Loading…
Reference in New Issue