* Update spacy.scorer, to use P/R/F to support tokenization errors

This commit is contained in:
Matthew Honnibal 2015-05-24 20:07:18 +02:00
parent efe7a7d7d6
commit 765b61cac4
1 changed files with 70 additions and 46 deletions

View File

@ -1,78 +1,102 @@
from __future__ import division from __future__ import division
class PRFScore(object):
"""A precision / recall / F score"""
def __init__(self):
self.tp = 0
self.fp = 0
self.fn = 0
def score_set(self, cand, gold):
self.tp += len(cand.intersection(gold))
self.fp += len(cand - gold)
self.fn += len(gold - cand)
@property
def precision(self):
return self.tp / (self.tp + self.fp + 1e-100)
@property
def recall(self):
return self.tp / (self.tp + self.fn + 1e-100)
@property
def fscore(self):
p = self.precision
r = self.recall
return 2 * ((p * r) / (p + r + 1e-100))
class Scorer(object): class Scorer(object):
def __init__(self, eval_punct=False): def __init__(self, eval_punct=False):
self.heads_corr = 0 self.tokens = PRFScore()
self.labels_corr = 0 self.sbd = PRFScore()
self.tags_corr = 0 self.unlabelled = PRFScore()
self.ents_tp = 0 self.labelled = PRFScore()
self.ents_fp = 0 self.tags = PRFScore()
self.ents_fn = 0 self.ner = PRFScore()
self.total = 1e-100
self.mistokened = 0
self.n_tokens = 0
self.eval_punct = eval_punct self.eval_punct = eval_punct
@property @property
def tags_acc(self): def tags_acc(self):
return (self.tags_corr / (self.n_tokens - self.mistokened)) * 100 return self.tags.fscore * 100
@property @property
def token_acc(self): def token_acc(self):
return (self.mistokened / self.n_tokens) * 100 return self.tokens.fscore * 100
@property @property
def uas(self): def uas(self):
return (self.heads_corr / self.total) * 100 return self.unlabelled.fscore * 100
@property @property
def las(self): def las(self):
return (self.labels_corr / self.total) * 100 return self.labelled.fscore * 100
@property @property
def ents_p(self): def ents_p(self):
return (self.ents_tp / (self.ents_tp + self.ents_fp + 1e-100)) * 100 return self.ner.precision
@property @property
def ents_r(self): def ents_r(self):
return (self.ents_tp / (self.ents_tp + self.ents_fn + 1e-100)) * 100 return self.ner.recall
@property @property
def ents_f(self): def ents_f(self):
return (2 * self.ents_p * self.ents_r) / (self.ents_p + self.ents_r + 1e-100) return self.ner.fscore
def score(self, tokens, gold, verbose=False): def score(self, tokens, gold, verbose=False):
assert len(tokens) == len(gold) assert len(tokens) == len(gold)
for i, token in enumerate(tokens): gold_deps = set()
if not self.skip_token(i, token, gold): gold_tags = set()
self.total += 1 gold_tags = set()
if verbose: for id_, word, tag, head, dep, ner in gold.orig_annot:
print token.orth_, token.tag_, token.dep_, token.head.orth_, token.head.i == gold.heads[i] if dep.lower() not in ('p', 'punct'):
if token.head.i == gold.heads[i]: gold_deps.add((id_, head, dep))
self.heads_corr += 1 gold_tags.add((id_, tag))
self.labels_corr += token.dep_.lower() == gold.labels[i].lower() cand_deps = set()
if gold.tags[i] != None: cand_tags = set()
self.tags_corr += token.tag_ == gold.tags[i] for token in tokens:
self.n_tokens += 1 if token.dep_ not in ('p', 'punct') and token.orth_.strip():
gold_ents = set((start, end, label) for (start, end, label) in gold.ents) gold_i = gold.cand_to_gold[token.i]
guess_ents = set((e.start, e.end, e.label_) for e in tokens.ents) gold_head = gold.cand_to_gold[token.head.i]
if verbose and gold_ents: # None is indistinct, so we can't just add it to the set
for start, end, label in guess_ents: # Multiple (None, None) deps are possible
mark = 'T' if (start, end, label) in gold_ents else 'F' if gold_i is None or gold_head is None:
ent_str = ' '.join(tokens[i].orth_ for i in range(start, end)) self.unlabelled.fp += 1
print mark, label, ent_str self.labelled.fp += 1
for start, end, label in gold_ents: else:
if (start, end, label) not in guess_ents: cand_deps.add((gold_i, gold_head, token.dep_))
ent_str = ' '.join(tokens[i].orth_ for i in range(start, end)) if gold_i is None:
print 'M', label, ent_str self.tags.fp += 1
print else:
if gold_ents: cand_tags.add((gold_i, token.tag_))
self.ents_tp += len(gold_ents.intersection(guess_ents))
self.ents_fn += len(gold_ents - guess_ents)
self.ents_fp += len(guess_ents - gold_ents)
def skip_token(self, i, token, gold): self.tags.score_set(cand_tags, cand_deps)
return gold.labels[i] in ('P', 'punct') or gold.heads[i] == None self.labelled.score_set(cand_deps, gold_deps)
self.unlabelled.score_set(
set(item[:2] for item in cand_deps),
set(item[:2] for item in gold_deps),
)