mirror of https://github.com/explosion/spaCy.git
Move timing into Language.evaluate (#5836)
Move timing into `Language.evaluate` so that only the processing is timing, not processing + scoring. `Language.evaluate` returns `scores["speed"]` as words per second, which should be identical to how the speed was added to the scores previously. Also add the speed to the evaluate CLI output.
This commit is contained in:
parent
256b24b720
commit
0cddb0dbe9
|
@ -67,10 +67,7 @@ def evaluate(
|
|||
corpus = Corpus(data_path, data_path)
|
||||
nlp = util.load_model(model)
|
||||
dev_dataset = list(corpus.dev_dataset(nlp, gold_preproc=gold_preproc))
|
||||
begin = timer()
|
||||
scores = nlp.evaluate(dev_dataset, verbose=False)
|
||||
end = timer()
|
||||
nwords = sum(len(ex.predicted) for ex in dev_dataset)
|
||||
metrics = {
|
||||
"TOK": "token_acc",
|
||||
"TAG": "tag_acc",
|
||||
|
@ -82,16 +79,20 @@ def evaluate(
|
|||
"NER P": "ents_p",
|
||||
"NER R": "ents_r",
|
||||
"NER F": "ents_f",
|
||||
"Textcat": "cats_score",
|
||||
"Sent P": "sents_p",
|
||||
"Sent R": "sents_r",
|
||||
"Sent F": "sents_f",
|
||||
"TEXTCAT": "cats_score",
|
||||
"SENT P": "sents_p",
|
||||
"SENT R": "sents_r",
|
||||
"SENT F": "sents_f",
|
||||
"SPEED": "speed",
|
||||
}
|
||||
results = {}
|
||||
for metric, key in metrics.items():
|
||||
if key in scores:
|
||||
if key == "cats_score":
|
||||
metric = metric + " (" + scores.get("cats_score_desc", "unk") + ")"
|
||||
if key == "speed":
|
||||
results[metric] = f"{scores[key]:.0f}"
|
||||
else:
|
||||
results[metric] = f"{scores[key]*100:.2f}"
|
||||
data = {re.sub(r"[\s/]", "_", k.lower()): v for k, v in results.items()}
|
||||
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
from typing import Optional, Dict, Any, Tuple, Union, Callable, List
|
||||
from timeit import default_timer as timer
|
||||
import srsly
|
||||
import tqdm
|
||||
from pathlib import Path
|
||||
|
@ -248,14 +247,11 @@ def create_evaluation_callback(
|
|||
dev_examples = list(dev_examples)
|
||||
n_words = sum(len(ex.predicted) for ex in dev_examples)
|
||||
batch_size = cfg["eval_batch_size"]
|
||||
start_time = timer()
|
||||
if optimizer.averages:
|
||||
with nlp.use_params(optimizer.averages):
|
||||
scores = nlp.evaluate(dev_examples, batch_size=batch_size)
|
||||
else:
|
||||
scores = nlp.evaluate(dev_examples, batch_size=batch_size)
|
||||
end_time = timer()
|
||||
wps = n_words / (end_time - start_time)
|
||||
# Calculate a weighted sum based on score_weights for the main score
|
||||
weights = cfg["score_weights"]
|
||||
try:
|
||||
|
@ -264,7 +260,6 @@ def create_evaluation_callback(
|
|||
keys = list(scores.keys())
|
||||
err = Errors.E983.format(dict="score_weights", key=str(e), keys=keys)
|
||||
raise KeyError(err)
|
||||
scores["speed"] = wps
|
||||
return weighted_score, scores
|
||||
|
||||
return evaluate
|
||||
|
|
|
@ -14,6 +14,7 @@ from thinc.api import get_current_ops, Config, require_gpu, Optimizer
|
|||
import srsly
|
||||
import multiprocessing as mp
|
||||
from itertools import chain, cycle
|
||||
from timeit import default_timer as timer
|
||||
|
||||
from .tokens.underscore import Underscore
|
||||
from .vocab import Vocab, create_vocab
|
||||
|
@ -1088,7 +1089,14 @@ class Language:
|
|||
kwargs.setdefault("verbose", verbose)
|
||||
kwargs.setdefault("nlp", self)
|
||||
scorer = Scorer(**kwargs)
|
||||
docs = list(eg.predicted for eg in examples)
|
||||
texts = [eg.reference.text for eg in examples]
|
||||
docs = [eg.predicted for eg in examples]
|
||||
start_time = timer()
|
||||
# tokenize the texts only for timing purposes
|
||||
if not hasattr(self.tokenizer, "pipe"):
|
||||
_ = [self.tokenizer(text) for text in texts]
|
||||
else:
|
||||
_ = list(self.tokenizer.pipe(texts))
|
||||
for name, pipe in self.pipeline:
|
||||
kwargs = component_cfg.get(name, {})
|
||||
kwargs.setdefault("batch_size", batch_size)
|
||||
|
@ -1096,11 +1104,18 @@ class Language:
|
|||
docs = _pipe(docs, pipe, kwargs)
|
||||
else:
|
||||
docs = pipe.pipe(docs, **kwargs)
|
||||
# iterate over the final generator
|
||||
if len(self.pipeline):
|
||||
docs = list(docs)
|
||||
end_time = timer()
|
||||
for i, (doc, eg) in enumerate(zip(docs, examples)):
|
||||
if verbose:
|
||||
print(doc)
|
||||
eg.predicted = doc
|
||||
return scorer.score(examples)
|
||||
results = scorer.score(examples)
|
||||
n_words = sum(len(eg.predicted) for eg in examples)
|
||||
results["speed"] = n_words / (end_time - start_time)
|
||||
return results
|
||||
|
||||
@contextmanager
|
||||
def use_params(self, params: dict):
|
||||
|
|
Loading…
Reference in New Issue