spaCy/spacy/scorer.py

# coding: utf8
from __future__ import division, print_function, unicode_literals

from .gold import tags_to_entities, GoldParse
from .errors import Errors


class PRFScore(object):
    """
    A precision / recall / F score
    """
    def __init__(self):
        self.tp = 0
        self.fp = 0
        self.fn = 0

    def score_set(self, cand, gold):
        self.tp += len(cand.intersection(gold))
        self.fp += len(cand - gold)
        self.fn += len(gold - cand)

    @property
    def precision(self):
        return self.tp / (self.tp + self.fp + 1e-100)

    @property
    def recall(self):
        return self.tp / (self.tp + self.fn + 1e-100)

    @property
    def fscore(self):
        p = self.precision
        r = self.recall
        return 2 * ((p * r) / (p + r + 1e-100))


class Scorer(object):
    def __init__(self, eval_punct=False):
        self.tokens = PRFScore()
        self.sbd = PRFScore()
        self.unlabelled = PRFScore()
        self.labelled = PRFScore()
        self.tags = PRFScore()
        self.ner = PRFScore()
        self.eval_punct = eval_punct

    @property
    def tags_acc(self):
        return self.tags.fscore * 100

    @property
    def token_acc(self):
        return self.tokens.precision * 100

    @property
    def uas(self):
        return self.unlabelled.fscore * 100

    @property
    def las(self):
        return self.labelled.fscore * 100

    @property
    def ents_p(self):
        return self.ner.precision * 100

    @property
    def ents_r(self):
        return self.ner.recall * 100

    @property
    def ents_f(self):
        return self.ner.fscore * 100

    @property
    def scores(self):
        return {
            'uas': self.uas,
            'las': self.las,
            'ents_p': self.ents_p,
            'ents_r': self.ents_r,
            'ents_f': self.ents_f,
            'tags_acc': self.tags_acc,
            'token_acc': self.token_acc
        }

    def score(self, tokens, gold, verbose=False, punct_labels=('p', 'punct')):
        if len(tokens) != len(gold):
            gold = GoldParse.from_annot_tuples(tokens, zip(*gold.orig_annot))
        gold_deps = set()
        gold_tags = set()
        gold_ents = set(tags_to_entities([annot[-1]
                        for annot in gold.orig_annot]))
        for id_, word, tag, head, dep, ner in gold.orig_annot:
            gold_tags.add((id_, tag))
            if dep not in (None, "") and dep.lower() not in punct_labels:
                gold_deps.add((id_, head, dep.lower()))
        cand_deps = set()
        cand_tags = set()
        for token in tokens:
            if token.orth_.isspace():
                continue
            gold_i = gold.cand_to_gold[token.i]
            if gold_i is None:
                self.tokens.fp += 1
            else:
                self.tokens.tp += 1
                cand_tags.add((gold_i, token.tag_))
            if token.dep_.lower() not in punct_labels and token.orth_.strip():
                gold_head = gold.cand_to_gold[token.head.i]
                # None is indistinct, so we can't just add it to the set
                # Multiple (None, None) deps are possible
                if gold_i is None or gold_head is None:
                    self.unlabelled.fp += 1
                    self.labelled.fp += 1
                else:
                    cand_deps.add((gold_i, gold_head, token.dep_.lower()))
        if '-' not in [token[-1] for token in gold.orig_annot]:
            cand_ents = set()
            for ent in tokens.ents:
                first = gold.cand_to_gold[ent.start]
                last = gold.cand_to_gold[ent.end-1]
                if first is None or last is None:
                    self.ner.fp += 1
                else:
                    cand_ents.add((ent.label_, first, last))
            self.ner.score_set(cand_ents, gold_ents)
        self.tags.score_set(cand_tags, gold_tags)
        self.labelled.score_set(cand_deps, gold_deps)
        self.unlabelled.score_set(
            set(item[:2] for item in cand_deps),
            set(item[:2] for item in gold_deps),
        )
        if verbose:
            gold_words = [item[1] for item in gold.orig_annot]
            for w_id, h_id, dep in (cand_deps - gold_deps):
                print('F', gold_words[w_id], dep, gold_words[h_id])
            for w_id, h_id, dep in (gold_deps - cand_deps):
                print('M', gold_words[w_id], dep, gold_words[h_id])
Clean up imports, unused code, whitespace, docstrings 2017-04-15 10:05:47 +00:00			`# coding: utf8`
			`from __future__ import division, print_function, unicode_literals`
* Add scorer script 2015-03-11 01:07:03 +00:00
Revert "Merge branch 'develop' of https://github.com/explosion/spaCy into develop" This reverts commit c9ba3d3c2dc7067cf8bd55f878cec45a8c6d73d4, reversing changes made to 92c26a35d425d4e8ca1b805ea776ea10f5ded3df. 2018-03-27 17:23:02 +00:00			`from .gold import tags_to_entities, GoldParse`
💫 New system for error messages and warnings (#2163) * Add spacy.errors module * Update deprecation and user warnings * Replace errors and asserts with new error message system * Remove redundant asserts * Fix whitespace * Add messages for print/util.prints statements * Fix typo * Fix typos * Move CLI messages to spacy.cli._messages * Add decorator to display error code with message An implementation like this is nice because it only modifies the string when it's retrieved from the containing class – so we don't have to worry about manipulating tracebacks etc. * Remove unused link in spacy.about * Update errors for invalid pipeline components * Improve error for unknown factories * Add displaCy warnings * Update formatting consistency * Move error message to spacy.errors * Update errors and check if doc returned by component is None 2018-04-03 13:50:31 +00:00			`from .errors import Errors`
* Fix evaluation of NER in scorer.py 2015-05-27 01:18:16 +00:00
* Print parse if verbose in scorer 2015-04-05 20:29:30 +00:00
* Update spacy.scorer, to use P/R/F to support tokenization errors 2015-05-24 18:07:18 +00:00			`class PRFScore(object):`
Use consistent formatting for docstrings 2017-04-15 09:59:21 +00:00			`"""`
			`A precision / recall / F score`
			`"""`
* Update spacy.scorer, to use P/R/F to support tokenization errors 2015-05-24 18:07:18 +00:00			`def __init__(self):`
			`self.tp = 0`
			`self.fp = 0`
			`self.fn = 0`

			`def score_set(self, cand, gold):`
			`self.tp += len(cand.intersection(gold))`
			`self.fp += len(cand - gold)`
			`self.fn += len(gold - cand)`

			`@property`
			`def precision(self):`
			`return self.tp / (self.tp + self.fp + 1e-100)`

			`@property`
			`def recall(self):`
			`return self.tp / (self.tp + self.fn + 1e-100)`

			`@property`
			`def fscore(self):`
			`p = self.precision`
			`r = self.recall`
			`return 2 * ((p * r) / (p + r + 1e-100))`


* Add scorer script 2015-03-11 01:07:03 +00:00			`class Scorer(object):`
			`def __init__(self, eval_punct=False):`
* Update spacy.scorer, to use P/R/F to support tokenization errors 2015-05-24 18:07:18 +00:00			`self.tokens = PRFScore()`
			`self.sbd = PRFScore()`
			`self.unlabelled = PRFScore()`
			`self.labelled = PRFScore()`
			`self.tags = PRFScore()`
			`self.ner = PRFScore()`
* Add scorer script 2015-03-11 01:07:03 +00:00			`self.eval_punct = eval_punct`

			`@property`
			`def tags_acc(self):`
* Update spacy.scorer, to use P/R/F to support tokenization errors 2015-05-24 18:07:18 +00:00			`return self.tags.fscore * 100`
* Tmp commit. Working on whole document parsing 2015-05-24 00:49:56 +00:00
			`@property`
			`def token_acc(self):`
* Start scoring tokens 2015-06-28 04:21:38 +00:00			`return self.tokens.precision * 100`
* Add scorer script 2015-03-11 01:07:03 +00:00
			`@property`
			`def uas(self):`
* Update spacy.scorer, to use P/R/F to support tokenization errors 2015-05-24 18:07:18 +00:00			`return self.unlabelled.fscore * 100`
* Add scorer script 2015-03-11 01:07:03 +00:00
			`@property`
			`def las(self):`
* Update spacy.scorer, to use P/R/F to support tokenization errors 2015-05-24 18:07:18 +00:00			`return self.labelled.fscore * 100`
* Add scorer script 2015-03-11 01:07:03 +00:00
			`@property`
			`def ents_p(self):`
* Fix evaluation of NER in scorer.py 2015-05-27 01:18:16 +00:00			`return self.ner.precision * 100`
* Add scorer script 2015-03-11 01:07:03 +00:00
			`@property`
			`def ents_r(self):`
* Fix evaluation of NER in scorer.py 2015-05-27 01:18:16 +00:00			`return self.ner.recall * 100`
Remove trailing whitespace 2015-04-19 08:31:31 +00:00
* Add scorer script 2015-03-11 01:07:03 +00:00			`@property`
			`def ents_f(self):`
* Fix evaluation of NER in scorer.py 2015-05-27 01:18:16 +00:00			`return self.ner.fscore * 100`
* Add scorer script 2015-03-11 01:07:03 +00:00
Refactor training, with new spacy.train module. Defaults still a little awkward. 2016-10-09 10:24:24 +00:00			`@property`
			`def scores(self):`
			`return {`
Tidy up language, lemmatizer and scorer 2017-10-27 12:40:14 +00:00			`'uas': self.uas,`
			`'las': self.las,`
			`'ents_p': self.ents_p,`
			`'ents_r': self.ents_r,`
			`'ents_f': self.ents_f,`
Refactor training, with new spacy.train module. Defaults still a little awkward. 2016-10-09 10:24:24 +00:00			`'tags_acc': self.tags_acc,`
			`'token_acc': self.token_acc`
			`}`

* Accept punct_labels as an argument to the scorer 2016-02-02 21:59:06 +00:00			`def score(self, tokens, gold, verbose=False, punct_labels=('p', 'punct')):`
Revert "Merge branch 'develop' of https://github.com/explosion/spaCy into develop" This reverts commit c9ba3d3c2dc7067cf8bd55f878cec45a8c6d73d4, reversing changes made to 92c26a35d425d4e8ca1b805ea776ea10f5ded3df. 2018-03-27 17:23:02 +00:00			`if len(tokens) != len(gold):`
Fix scoring if tokenization changes 2018-04-30 23:33:20 +00:00			`gold = GoldParse.from_annot_tuples(tokens, zip(*gold.orig_annot))`
* Update spacy.scorer, to use P/R/F to support tokenization errors 2015-05-24 18:07:18 +00:00			`gold_deps = set()`
			`gold_tags = set()`
Tidy up language, lemmatizer and scorer 2017-10-27 12:40:14 +00:00			`gold_ents = set(tags_to_entities([annot[-1]`
			`for annot in gold.orig_annot]))`
* Update spacy.scorer, to use P/R/F to support tokenization errors 2015-05-24 18:07:18 +00:00			`for id_, word, tag, head, dep, ner in gold.orig_annot:`
* Fix POS tag evaluation in scorer.py: do evaluate punctuation tags 2015-05-30 16:24:32 +00:00			`gold_tags.add((id_, tag))`
Fix scorer bug for NER, related to ambiguity between missing annotations and misaligned tokens 2017-03-16 14:38:28 +00:00			`if dep not in (None, "") and dep.lower() not in punct_labels:`
* Fix evaluation of NER in scorer.py 2015-05-27 01:18:16 +00:00			`gold_deps.add((id_, head, dep.lower()))`
* Update spacy.scorer, to use P/R/F to support tokenization errors 2015-05-24 18:07:18 +00:00			`cand_deps = set()`
			`cand_tags = set()`
			`for token in tokens:`
* Don't score whitespace tokens 2015-06-07 17:10:32 +00:00			`if token.orth_.isspace():`
			`continue`
* Fix POS tag evaluation in scorer.py: do evaluate punctuation tags 2015-05-30 16:24:32 +00:00			`gold_i = gold.cand_to_gold[token.i]`
			`if gold_i is None:`
Revert "Merge branch 'develop' of https://github.com/explosion/spaCy into develop" This reverts commit c9ba3d3c2dc7067cf8bd55f878cec45a8c6d73d4, reversing changes made to 92c26a35d425d4e8ca1b805ea776ea10f5ded3df. 2018-03-27 17:23:02 +00:00			`self.tokens.fp += 1`
* Fix POS tag evaluation in scorer.py: do evaluate punctuation tags 2015-05-30 16:24:32 +00:00			`else:`
* Start scoring tokens 2015-06-28 04:21:38 +00:00			`self.tokens.tp += 1`
* Fix POS tag evaluation in scorer.py: do evaluate punctuation tags 2015-05-30 16:24:32 +00:00			`cand_tags.add((gold_i, token.tag_))`
* Accept punct_labels as an argument to the scorer 2016-02-02 21:59:06 +00:00			`if token.dep_.lower() not in punct_labels and token.orth_.strip():`
* Update spacy.scorer, to use P/R/F to support tokenization errors 2015-05-24 18:07:18 +00:00			`gold_head = gold.cand_to_gold[token.head.i]`
			`# None is indistinct, so we can't just add it to the set`
			`# Multiple (None, None) deps are possible`
			`if gold_i is None or gold_head is None:`
			`self.unlabelled.fp += 1`
			`self.labelled.fp += 1`
			`else:`
* Fix evaluation of NER in scorer.py 2015-05-27 01:18:16 +00:00			`cand_deps.add((gold_i, gold_head, token.dep_.lower()))`
* Avoid NER scoring for sentences with some missing NER values. 2015-05-28 20:39:08 +00:00			`if '-' not in [token[-1] for token in gold.orig_annot]:`
			`cand_ents = set()`
			`for ent in tokens.ents:`
			`first = gold.cand_to_gold[ent.start]`
			`last = gold.cand_to_gold[ent.end-1]`
			`if first is None or last is None:`
			`self.ner.fp += 1`
			`else:`
			`cand_ents.add((ent.label_, first, last))`
			`self.ner.score_set(cand_ents, gold_ents)`
* Fix evaluation of NER in scorer.py 2015-05-27 01:18:16 +00:00			`self.tags.score_set(cand_tags, gold_tags)`
* Update spacy.scorer, to use P/R/F to support tokenization errors 2015-05-24 18:07:18 +00:00			`self.labelled.score_set(cand_deps, gold_deps)`
			`self.unlabelled.score_set(`
			`set(item[:2] for item in cand_deps),`
			`set(item[:2] for item in gold_deps),`
			`)`
* Add verbose printing to scorer 2015-06-14 15:45:50 +00:00			`if verbose:`
			`gold_words = [item[1] for item in gold.orig_annot]`
			`for w_id, h_id, dep in (cand_deps - gold_deps):`
* Fix training under python3 2015-07-28 12:09:30 +00:00			`print('F', gold_words[w_id], dep, gold_words[h_id])`
* Add verbose printing to scorer 2015-06-14 15:45:50 +00:00			`for w_id, h_id, dep in (gold_deps - cand_deps):`
* Fix training under python3 2015-07-28 12:09:30 +00:00			`print('M', gold_words[w_id], dep, gold_words[h_id])`