mirror of https://github.com/explosion/spaCy.git
Move timing into Language.evaluate (#5836)
Move timing into `Language.evaluate` so that only the processing is timing, not processing + scoring. `Language.evaluate` returns `scores["speed"]` as words per second, which should be identical to how the speed was added to the scores previously. Also add the speed to the evaluate CLI output.
This commit is contained in:
parent
256b24b720
commit
0cddb0dbe9
|
@ -67,10 +67,7 @@ def evaluate(
|
||||||
corpus = Corpus(data_path, data_path)
|
corpus = Corpus(data_path, data_path)
|
||||||
nlp = util.load_model(model)
|
nlp = util.load_model(model)
|
||||||
dev_dataset = list(corpus.dev_dataset(nlp, gold_preproc=gold_preproc))
|
dev_dataset = list(corpus.dev_dataset(nlp, gold_preproc=gold_preproc))
|
||||||
begin = timer()
|
|
||||||
scores = nlp.evaluate(dev_dataset, verbose=False)
|
scores = nlp.evaluate(dev_dataset, verbose=False)
|
||||||
end = timer()
|
|
||||||
nwords = sum(len(ex.predicted) for ex in dev_dataset)
|
|
||||||
metrics = {
|
metrics = {
|
||||||
"TOK": "token_acc",
|
"TOK": "token_acc",
|
||||||
"TAG": "tag_acc",
|
"TAG": "tag_acc",
|
||||||
|
@ -82,17 +79,21 @@ def evaluate(
|
||||||
"NER P": "ents_p",
|
"NER P": "ents_p",
|
||||||
"NER R": "ents_r",
|
"NER R": "ents_r",
|
||||||
"NER F": "ents_f",
|
"NER F": "ents_f",
|
||||||
"Textcat": "cats_score",
|
"TEXTCAT": "cats_score",
|
||||||
"Sent P": "sents_p",
|
"SENT P": "sents_p",
|
||||||
"Sent R": "sents_r",
|
"SENT R": "sents_r",
|
||||||
"Sent F": "sents_f",
|
"SENT F": "sents_f",
|
||||||
|
"SPEED": "speed",
|
||||||
}
|
}
|
||||||
results = {}
|
results = {}
|
||||||
for metric, key in metrics.items():
|
for metric, key in metrics.items():
|
||||||
if key in scores:
|
if key in scores:
|
||||||
if key == "cats_score":
|
if key == "cats_score":
|
||||||
metric = metric + " (" + scores.get("cats_score_desc", "unk") + ")"
|
metric = metric + " (" + scores.get("cats_score_desc", "unk") + ")"
|
||||||
results[metric] = f"{scores[key]*100:.2f}"
|
if key == "speed":
|
||||||
|
results[metric] = f"{scores[key]:.0f}"
|
||||||
|
else:
|
||||||
|
results[metric] = f"{scores[key]*100:.2f}"
|
||||||
data = {re.sub(r"[\s/]", "_", k.lower()): v for k, v in results.items()}
|
data = {re.sub(r"[\s/]", "_", k.lower()): v for k, v in results.items()}
|
||||||
|
|
||||||
msg.table(results, title="Results")
|
msg.table(results, title="Results")
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
from typing import Optional, Dict, Any, Tuple, Union, Callable, List
|
from typing import Optional, Dict, Any, Tuple, Union, Callable, List
|
||||||
from timeit import default_timer as timer
|
|
||||||
import srsly
|
import srsly
|
||||||
import tqdm
|
import tqdm
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
@ -248,14 +247,11 @@ def create_evaluation_callback(
|
||||||
dev_examples = list(dev_examples)
|
dev_examples = list(dev_examples)
|
||||||
n_words = sum(len(ex.predicted) for ex in dev_examples)
|
n_words = sum(len(ex.predicted) for ex in dev_examples)
|
||||||
batch_size = cfg["eval_batch_size"]
|
batch_size = cfg["eval_batch_size"]
|
||||||
start_time = timer()
|
|
||||||
if optimizer.averages:
|
if optimizer.averages:
|
||||||
with nlp.use_params(optimizer.averages):
|
with nlp.use_params(optimizer.averages):
|
||||||
scores = nlp.evaluate(dev_examples, batch_size=batch_size)
|
scores = nlp.evaluate(dev_examples, batch_size=batch_size)
|
||||||
else:
|
else:
|
||||||
scores = nlp.evaluate(dev_examples, batch_size=batch_size)
|
scores = nlp.evaluate(dev_examples, batch_size=batch_size)
|
||||||
end_time = timer()
|
|
||||||
wps = n_words / (end_time - start_time)
|
|
||||||
# Calculate a weighted sum based on score_weights for the main score
|
# Calculate a weighted sum based on score_weights for the main score
|
||||||
weights = cfg["score_weights"]
|
weights = cfg["score_weights"]
|
||||||
try:
|
try:
|
||||||
|
@ -264,7 +260,6 @@ def create_evaluation_callback(
|
||||||
keys = list(scores.keys())
|
keys = list(scores.keys())
|
||||||
err = Errors.E983.format(dict="score_weights", key=str(e), keys=keys)
|
err = Errors.E983.format(dict="score_weights", key=str(e), keys=keys)
|
||||||
raise KeyError(err)
|
raise KeyError(err)
|
||||||
scores["speed"] = wps
|
|
||||||
return weighted_score, scores
|
return weighted_score, scores
|
||||||
|
|
||||||
return evaluate
|
return evaluate
|
||||||
|
|
|
@ -14,6 +14,7 @@ from thinc.api import get_current_ops, Config, require_gpu, Optimizer
|
||||||
import srsly
|
import srsly
|
||||||
import multiprocessing as mp
|
import multiprocessing as mp
|
||||||
from itertools import chain, cycle
|
from itertools import chain, cycle
|
||||||
|
from timeit import default_timer as timer
|
||||||
|
|
||||||
from .tokens.underscore import Underscore
|
from .tokens.underscore import Underscore
|
||||||
from .vocab import Vocab, create_vocab
|
from .vocab import Vocab, create_vocab
|
||||||
|
@ -1088,7 +1089,14 @@ class Language:
|
||||||
kwargs.setdefault("verbose", verbose)
|
kwargs.setdefault("verbose", verbose)
|
||||||
kwargs.setdefault("nlp", self)
|
kwargs.setdefault("nlp", self)
|
||||||
scorer = Scorer(**kwargs)
|
scorer = Scorer(**kwargs)
|
||||||
docs = list(eg.predicted for eg in examples)
|
texts = [eg.reference.text for eg in examples]
|
||||||
|
docs = [eg.predicted for eg in examples]
|
||||||
|
start_time = timer()
|
||||||
|
# tokenize the texts only for timing purposes
|
||||||
|
if not hasattr(self.tokenizer, "pipe"):
|
||||||
|
_ = [self.tokenizer(text) for text in texts]
|
||||||
|
else:
|
||||||
|
_ = list(self.tokenizer.pipe(texts))
|
||||||
for name, pipe in self.pipeline:
|
for name, pipe in self.pipeline:
|
||||||
kwargs = component_cfg.get(name, {})
|
kwargs = component_cfg.get(name, {})
|
||||||
kwargs.setdefault("batch_size", batch_size)
|
kwargs.setdefault("batch_size", batch_size)
|
||||||
|
@ -1096,11 +1104,18 @@ class Language:
|
||||||
docs = _pipe(docs, pipe, kwargs)
|
docs = _pipe(docs, pipe, kwargs)
|
||||||
else:
|
else:
|
||||||
docs = pipe.pipe(docs, **kwargs)
|
docs = pipe.pipe(docs, **kwargs)
|
||||||
|
# iterate over the final generator
|
||||||
|
if len(self.pipeline):
|
||||||
|
docs = list(docs)
|
||||||
|
end_time = timer()
|
||||||
for i, (doc, eg) in enumerate(zip(docs, examples)):
|
for i, (doc, eg) in enumerate(zip(docs, examples)):
|
||||||
if verbose:
|
if verbose:
|
||||||
print(doc)
|
print(doc)
|
||||||
eg.predicted = doc
|
eg.predicted = doc
|
||||||
return scorer.score(examples)
|
results = scorer.score(examples)
|
||||||
|
n_words = sum(len(eg.predicted) for eg in examples)
|
||||||
|
results["speed"] = n_words / (end_time - start_time)
|
||||||
|
return results
|
||||||
|
|
||||||
@contextmanager
|
@contextmanager
|
||||||
def use_params(self, params: dict):
|
def use_params(self, params: dict):
|
||||||
|
|
Loading…
Reference in New Issue