spaCy/spacy/scorer.py

import numpy as np

from .gold import tags_to_entities, GoldParse, DocAnnotation
from .errors import Errors


class PRFScore(object):
    """
    A precision / recall / F score
    """

    def __init__(self):
        self.tp = 0
        self.fp = 0
        self.fn = 0

    def score_set(self, cand, gold):
        self.tp += len(cand.intersection(gold))
        self.fp += len(cand - gold)
        self.fn += len(gold - cand)

    @property
    def precision(self):
        return self.tp / (self.tp + self.fp + 1e-100)

    @property
    def recall(self):
        return self.tp / (self.tp + self.fn + 1e-100)

    @property
    def fscore(self):
        p = self.precision
        r = self.recall
        return 2 * ((p * r) / (p + r + 1e-100))


class ROCAUCScore(object):
    """
    An AUC ROC score.
    """

    def __init__(self):
        self.golds = []
        self.cands = []
        self.saved_score = 0.0
        self.saved_score_at_len = 0

    def score_set(self, cand, gold):
        self.cands.append(cand)
        self.golds.append(gold)

    @property
    def score(self):
        if len(self.golds) == self.saved_score_at_len:
            return self.saved_score
        try:
            self.saved_score = _roc_auc_score(self.golds, self.cands)
        # catch ValueError: Only one class present in y_true.
        # ROC AUC score is not defined in that case.
        except ValueError:
            self.saved_score = -float("inf")
        self.saved_score_at_len = len(self.golds)
        return self.saved_score


class Scorer(object):
    """Compute evaluation scores."""

    def __init__(self, eval_punct=False, pipeline=None):
        """Initialize the Scorer.

        eval_punct (bool): Evaluate the dependency attachments to and from
            punctuation.
        RETURNS (Scorer): The newly created object.

        DOCS: https://spacy.io/api/scorer#init
        """
        self.tokens = PRFScore()
        self.sbd = PRFScore()
        self.unlabelled = PRFScore()
        self.labelled = PRFScore()
        self.labelled_per_dep = dict()
        self.tags = PRFScore()
        self.pos = PRFScore()
        self.morphs = PRFScore()
        self.morphs_per_feat = dict()
        self.sent_starts = PRFScore()
        self.ner = PRFScore()
        self.ner_per_ents = dict()
        self.eval_punct = eval_punct
        self.textcat = PRFScore()
        self.textcat_f_per_cat = dict()
        self.textcat_auc_per_cat = dict()
        self.textcat_positive_label = None
        self.textcat_multilabel = False

        if pipeline:
            for name, component in pipeline:
                if name == "textcat":
                    self.textcat_multilabel = component.model.attrs["multi_label"]
                    self.textcat_positive_label = component.cfg.get(
                        "positive_label", None
                    )
                    for label in component.cfg.get("labels", []):
                        self.textcat_auc_per_cat[label] = ROCAUCScore()
                        self.textcat_f_per_cat[label] = PRFScore()

    @property
    def tags_acc(self):
        """RETURNS (float): Part-of-speech tag accuracy (fine grained tags,
            i.e. `Token.tag`).
        """
        return self.tags.fscore * 100

    @property
    def pos_acc(self):
        """RETURNS (float): Part-of-speech tag accuracy (coarse grained pos,
            i.e. `Token.pos`).
        """
        return self.pos.fscore * 100

    @property
    def morphs_acc(self):
        """RETURNS (float): Morph tag accuracy (morphological features,
           i.e. `Token.morph`).
       """
        return self.morphs.fscore * 100

    @property
    def morphs_per_type(self):
        """RETURNS (dict): Scores per dependency label.
       """
        return {
            k: {"p": v.precision * 100, "r": v.recall * 100, "f": v.fscore * 100}
            for k, v in self.morphs_per_feat.items()
        }

    @property
    def sent_p(self):
        """RETURNS (float): F-score for identification of sentence starts.
            i.e. `Token.is_sent_start`).
        """
        return self.sent_starts.precision * 100

    @property
    def sent_r(self):
        """RETURNS (float): F-score for identification of sentence starts.
            i.e. `Token.is_sent_start`).
        """
        return self.sent_starts.recall * 100

    @property
    def sent_f(self):
        """RETURNS (float): F-score for identification of sentence starts.
            i.e. `Token.is_sent_start`).
        """
        return self.sent_starts.fscore * 100

    @property
    def token_acc(self):
        """RETURNS (float): Tokenization accuracy."""
        return self.tokens.precision * 100

    @property
    def uas(self):
        """RETURNS (float): Unlabelled dependency score."""
        return self.unlabelled.fscore * 100

    @property
    def las(self):
        """RETURNS (float): Labelled dependency score."""
        return self.labelled.fscore * 100

    @property
    def las_per_type(self):
        """RETURNS (dict): Scores per dependency label.
        """
        return {
            k: {"p": v.precision * 100, "r": v.recall * 100, "f": v.fscore * 100}
            for k, v in self.labelled_per_dep.items()
        }

    @property
    def ents_p(self):
        """RETURNS (float): Named entity accuracy (precision)."""
        return self.ner.precision * 100

    @property
    def ents_r(self):
        """RETURNS (float): Named entity accuracy (recall)."""
        return self.ner.recall * 100

    @property
    def ents_f(self):
        """RETURNS (float): Named entity accuracy (F-score)."""
        return self.ner.fscore * 100

    @property
    def ents_per_type(self):
        """RETURNS (dict): Scores per entity label.
        """
        return {
            k: {"p": v.precision * 100, "r": v.recall * 100, "f": v.fscore * 100}
            for k, v in self.ner_per_ents.items()
        }

    @property
    def textcat_f(self):
        """RETURNS (float): f-score on positive label for binary classification,
        macro-averaged f-score for multilabel classification
        """
        if not self.textcat_multilabel:
            if self.textcat_positive_label:
                # binary classification
                return self.textcat.fscore * 100
        # multi-class and/or multi-label
        return (
            sum([score.fscore for label, score in self.textcat_f_per_cat.items()])
            / (len(self.textcat_f_per_cat) + 1e-100)
            * 100
        )

    @property
    def textcat_auc(self):
        """RETURNS (float): macro-averaged AUC ROC score for multilabel classification (-1 if undefined)
        """
        return max(
            sum([score.score for label, score in self.textcat_auc_per_cat.items()])
            / (len(self.textcat_auc_per_cat) + 1e-100),
            -1,
        )

    @property
    def textcats_auc_per_cat(self):
        """RETURNS (dict): AUC ROC Scores per textcat label.
        """
        return {
            k: {"roc_auc_score": max(v.score, -1)}
            for k, v in self.textcat_auc_per_cat.items()
        }

    @property
    def textcats_f_per_cat(self):
        """RETURNS (dict): F-scores per textcat label.
        """
        return {
            k: {"p": v.precision * 100, "r": v.recall * 100, "f": v.fscore * 100}
            for k, v in self.textcat_f_per_cat.items()
        }

    @property
    def scores(self):
        """RETURNS (dict): All scores mapped by key.
        """
        return {
            "uas": self.uas,
            "las": self.las,
            "las_per_type": self.las_per_type,
            "ents_p": self.ents_p,
            "ents_r": self.ents_r,
            "ents_f": self.ents_f,
            "ents_per_type": self.ents_per_type,
            "tags_acc": self.tags_acc,
            "pos_acc": self.pos_acc,
            "morphs_acc": self.morphs_acc,
            "morphs_per_type": self.morphs_per_type,
            "sent_p": self.sent_p,
            "sent_r": self.sent_r,
            "sent_f": self.sent_f,
            "token_acc": self.token_acc,
            "textcat_f": self.textcat_f,
            "textcat_auc": self.textcat_auc,
            "textcats_f_per_cat": self.textcats_f_per_cat,
            "textcats_auc_per_cat": self.textcats_auc_per_cat,
        }

    def score(self, example, verbose=False, punct_labels=("p", "punct")):
        """Update the evaluation scores from a single Doc / GoldParse pair.

        example (Example): The predicted annotations + correct annotations.
        verbose (bool): Print debugging information.
        punct_labels (tuple): Dependency labels for punctuation. Used to
            evaluate dependency attachments to punctuation if `eval_punct` is
            `True`.

        DOCS: https://spacy.io/api/scorer#score
        """
        if isinstance(example, tuple) and len(example) == 2:
            doc, gold = example
        else:
            gold = example.gold
            doc = example.doc

        if len(doc) != len(gold):
            doc_annotation = DocAnnotation(cats=gold.cats)
            token_annotation = gold.orig
            gold = GoldParse.from_annotation(doc, doc_annotation, token_annotation)
        orig = gold.orig
        gold_deps = set()
        gold_deps_per_dep = {}
        gold_tags = set()
        gold_pos = set()
        gold_morphs = set()
        gold_morphs_per_feat = {}
        gold_sent_starts = set()
        gold_ents = set(tags_to_entities(orig.entities))
        for id_, tag, pos, morph, head, dep, sent_start in zip(
            orig.ids,
            orig.tags,
            orig.pos,
            orig.morphs,
            orig.heads,
            orig.deps,
            orig.sent_starts,
        ):
            gold_tags.add((id_, tag))
            gold_pos.add((id_, pos))
            gold_morphs.add((id_, morph))
            if morph:
                for feat in morph.split("|"):
                    field, values = feat.split("=")
                    if field not in self.morphs_per_feat:
                        self.morphs_per_feat[field] = PRFScore()
                    if field not in gold_morphs_per_feat:
                        gold_morphs_per_feat[field] = set()
                    gold_morphs_per_feat[field].add((id_, feat))
            if sent_start:
                gold_sent_starts.add(id_)
            if dep not in (None, "") and dep.lower() not in punct_labels:
                gold_deps.add((id_, head, dep.lower()))
                if dep.lower() not in self.labelled_per_dep:
                    self.labelled_per_dep[dep.lower()] = PRFScore()
                if dep.lower() not in gold_deps_per_dep:
                    gold_deps_per_dep[dep.lower()] = set()
                gold_deps_per_dep[dep.lower()].add((id_, head, dep.lower()))
        cand_deps = set()
        cand_deps_per_dep = {}
        cand_tags = set()
        cand_pos = set()
        cand_morphs = set()
        cand_morphs_per_feat = {}
        cand_sent_starts = set()
        for token in doc:
            if token.orth_.isspace():
                continue
            gold_i = gold.cand_to_gold[token.i]
            if gold_i is None:
                self.tokens.fp += 1
            else:
                self.tokens.tp += 1
                cand_tags.add((gold_i, token.tag_))
                cand_pos.add((gold_i, token.pos_))
                cand_morphs.add((gold_i, token.morph_))
                if token.morph_:
                    for feat in token.morph_.split("|"):
                        field, values = feat.split("=")
                        if field not in self.morphs_per_feat:
                            self.morphs_per_feat[field] = PRFScore()
                        if field not in cand_morphs_per_feat:
                            cand_morphs_per_feat[field] = set()
                        cand_morphs_per_feat[field].add((gold_i, feat))
                if token.is_sent_start:
                    cand_sent_starts.add(gold_i)
            if token.dep_.lower() not in punct_labels and token.orth_.strip():
                gold_head = gold.cand_to_gold[token.head.i]
                # None is indistinct, so we can't just add it to the set
                # Multiple (None, None) deps are possible
                if gold_i is None or gold_head is None:
                    self.unlabelled.fp += 1
                    self.labelled.fp += 1
                else:
                    cand_deps.add((gold_i, gold_head, token.dep_.lower()))
                    if token.dep_.lower() not in self.labelled_per_dep:
                        self.labelled_per_dep[token.dep_.lower()] = PRFScore()
                    if token.dep_.lower() not in cand_deps_per_dep:
                        cand_deps_per_dep[token.dep_.lower()] = set()
                    cand_deps_per_dep[token.dep_.lower()].add(
                        (gold_i, gold_head, token.dep_.lower())
                    )
        if "-" not in [token[-1] for token in orig.entities]:
            # Find all NER labels in gold and doc
            ent_labels = set([x[0] for x in gold_ents] + [k.label_ for k in doc.ents])
            # Set up all labels for per type scoring and prepare gold per type
            gold_per_ents = {ent_label: set() for ent_label in ent_labels}
            for ent_label in ent_labels:
                if ent_label not in self.ner_per_ents:
                    self.ner_per_ents[ent_label] = PRFScore()
                gold_per_ents[ent_label].update(
                    [x for x in gold_ents if x[0] == ent_label]
                )
            # Find all candidate labels, for all and per type
            cand_ents = set()
            cand_per_ents = {ent_label: set() for ent_label in ent_labels}
            for ent in doc.ents:
                first = gold.cand_to_gold[ent.start]
                last = gold.cand_to_gold[ent.end - 1]
                if first is None or last is None:
                    self.ner.fp += 1
                    self.ner_per_ents[ent.label_].fp += 1
                else:
                    cand_ents.add((ent.label_, first, last))
                    cand_per_ents[ent.label_].add((ent.label_, first, last))
            # Scores per ent
            for k, v in self.ner_per_ents.items():
                if k in cand_per_ents:
                    v.score_set(cand_per_ents[k], gold_per_ents[k])
            # Score for all ents
            self.ner.score_set(cand_ents, gold_ents)
        self.tags.score_set(cand_tags, gold_tags)
        self.pos.score_set(cand_pos, gold_pos)
        self.morphs.score_set(cand_morphs, gold_morphs)
        for field in self.morphs_per_feat:
            self.morphs_per_feat[field].score_set(
                cand_morphs_per_feat.get(field, set()),
                gold_morphs_per_feat.get(field, set()),
            )
        self.sent_starts.score_set(cand_sent_starts, gold_sent_starts)
        self.labelled.score_set(cand_deps, gold_deps)
        for dep in self.labelled_per_dep:
            self.labelled_per_dep[dep].score_set(
                cand_deps_per_dep.get(dep, set()), gold_deps_per_dep.get(dep, set())
            )
        self.unlabelled.score_set(
            set(item[:2] for item in cand_deps), set(item[:2] for item in gold_deps)
        )
        if (
            len(gold.cats) > 0
            and set(self.textcat_f_per_cat)
            == set(self.textcat_auc_per_cat)
            == set(gold.cats)
            and set(gold.cats) == set(doc.cats)
        ):
            goldcat = max(gold.cats, key=gold.cats.get)
            candcat = max(doc.cats, key=doc.cats.get)
            if self.textcat_positive_label:
                self.textcat.score_set(
                    set([self.textcat_positive_label]) & set([candcat]),
                    set([self.textcat_positive_label]) & set([goldcat]),
                )
            for label in set(gold.cats):
                self.textcat_auc_per_cat[label].score_set(
                    doc.cats[label], gold.cats[label]
                )
                self.textcat_f_per_cat[label].score_set(
                    set([label]) & set([candcat]), set([label]) & set([goldcat])
                )
        elif len(self.textcat_f_per_cat) > 0:
            model_labels = set(self.textcat_f_per_cat)
            eval_labels = set(gold.cats)
            raise ValueError(
                Errors.E162.format(model_labels=model_labels, eval_labels=eval_labels)
            )
        elif len(self.textcat_auc_per_cat) > 0:
            model_labels = set(self.textcat_auc_per_cat)
            eval_labels = set(gold.cats)
            raise ValueError(
                Errors.E162.format(model_labels=model_labels, eval_labels=eval_labels)
            )
        if verbose:
            gold_words = orig.words
            for w_id, h_id, dep in cand_deps - gold_deps:
                print("F", gold_words[w_id], dep, gold_words[h_id])
            for w_id, h_id, dep in gold_deps - cand_deps:
                print("M", gold_words[w_id], dep, gold_words[h_id])


#############################################################################
#
# The following implementation of roc_auc_score() is adapted from
# scikit-learn, which is distributed under the following license:
#
# New BSD License
#
# Copyright (c) 2007–2019 The scikit-learn developers.
# All rights reserved.
#
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
#   a. Redistributions of source code must retain the above copyright notice,
#      this list of conditions and the following disclaimer.
#   b. Redistributions in binary form must reproduce the above copyright
#      notice, this list of conditions and the following disclaimer in the
#      documentation and/or other materials provided with the distribution.
#   c. Neither the name of the Scikit-learn Developers  nor the names of
#      its contributors may be used to endorse or promote products
#      derived from this software without specific prior written
#      permission.
#
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
# DAMAGE.


def _roc_auc_score(y_true, y_score):
    """Compute Area Under the Receiver Operating Characteristic Curve (ROC AUC)
    from prediction scores.

    Note: this implementation is restricted to the binary classification task

    Parameters
    ----------
    y_true : array, shape = [n_samples] or [n_samples, n_classes]
        True binary labels or binary label indicators.
        The multiclass case expects shape = [n_samples] and labels
        with values in ``range(n_classes)``.

    y_score : array, shape = [n_samples] or [n_samples, n_classes]
        Target scores, can either be probability estimates of the positive
        class, confidence values, or non-thresholded measure of decisions
        (as returned by "decision_function" on some classifiers). For binary
        y_true, y_score is supposed to be the score of the class with greater
        label. The multiclass case expects shape = [n_samples, n_classes]
        where the scores correspond to probability estimates.

    Returns
    -------
    auc : float

    References
    ----------
    .. [1] `Wikipedia entry for the Receiver operating characteristic
            <https://en.wikipedia.org/wiki/Receiver_operating_characteristic>`_

    .. [2] Fawcett T. An introduction to ROC analysis[J]. Pattern Recognition
           Letters, 2006, 27(8):861-874.

    .. [3] `Analyzing a portion of the ROC curve. McClish, 1989
            <https://www.ncbi.nlm.nih.gov/pubmed/2668680>`_
    """
    if len(np.unique(y_true)) != 2:
        raise ValueError(Errors.E165)
    fpr, tpr, _ = _roc_curve(y_true, y_score)
    return _auc(fpr, tpr)


def _roc_curve(y_true, y_score):
    """Compute Receiver operating characteristic (ROC)

    Note: this implementation is restricted to the binary classification task.

    Parameters
    ----------

    y_true : array, shape = [n_samples]
        True binary labels. If labels are not either {-1, 1} or {0, 1}, then
        pos_label should be explicitly given.

    y_score : array, shape = [n_samples]
        Target scores, can either be probability estimates of the positive
        class, confidence values, or non-thresholded measure of decisions
        (as returned by "decision_function" on some classifiers).

    Returns
    -------
    fpr : array, shape = [>2]
        Increasing false positive rates such that element i is the false
        positive rate of predictions with score >= thresholds[i].

    tpr : array, shape = [>2]
        Increasing true positive rates such that element i is the true
        positive rate of predictions with score >= thresholds[i].

    thresholds : array, shape = [n_thresholds]
        Decreasing thresholds on the decision function used to compute
        fpr and tpr. `thresholds[0]` represents no instances being predicted
        and is arbitrarily set to `max(y_score) + 1`.

    Notes
    -----
    Since the thresholds are sorted from low to high values, they
    are reversed upon returning them to ensure they correspond to both ``fpr``
    and ``tpr``, which are sorted in reversed order during their calculation.

    References
    ----------
    .. [1] `Wikipedia entry for the Receiver operating characteristic
            <https://en.wikipedia.org/wiki/Receiver_operating_characteristic>`_

    .. [2] Fawcett T. An introduction to ROC analysis[J]. Pattern Recognition
           Letters, 2006, 27(8):861-874.
    """
    fps, tps, thresholds = _binary_clf_curve(y_true, y_score)

    # Add an extra threshold position
    # to make sure that the curve starts at (0, 0)
    tps = np.r_[0, tps]
    fps = np.r_[0, fps]
    thresholds = np.r_[thresholds[0] + 1, thresholds]

    if fps[-1] <= 0:
        fpr = np.repeat(np.nan, fps.shape)
    else:
        fpr = fps / fps[-1]

    if tps[-1] <= 0:
        tpr = np.repeat(np.nan, tps.shape)
    else:
        tpr = tps / tps[-1]

    return fpr, tpr, thresholds


def _binary_clf_curve(y_true, y_score):
    """Calculate true and false positives per binary classification threshold.

    Parameters
    ----------
    y_true : array, shape = [n_samples]
        True targets of binary classification

    y_score : array, shape = [n_samples]
        Estimated probabilities or decision function

    Returns
    -------
    fps : array, shape = [n_thresholds]
        A count of false positives, at index i being the number of negative
        samples assigned a score >= thresholds[i]. The total number of
        negative samples is equal to fps[-1] (thus true negatives are given by
        fps[-1] - fps).

    tps : array, shape = [n_thresholds <= len(np.unique(y_score))]
        An increasing count of true positives, at index i being the number
        of positive samples assigned a score >= thresholds[i]. The total
        number of positive samples is equal to tps[-1] (thus false negatives
        are given by tps[-1] - tps).

    thresholds : array, shape = [n_thresholds]
        Decreasing score values.
    """
    pos_label = 1.0

    y_true = np.ravel(y_true)
    y_score = np.ravel(y_score)

    # make y_true a boolean vector
    y_true = y_true == pos_label

    # sort scores and corresponding truth values
    desc_score_indices = np.argsort(y_score, kind="mergesort")[::-1]
    y_score = y_score[desc_score_indices]
    y_true = y_true[desc_score_indices]
    weight = 1.0

    # y_score typically has many tied values. Here we extract
    # the indices associated with the distinct values. We also
    # concatenate a value for the end of the curve.
    distinct_value_indices = np.where(np.diff(y_score))[0]
    threshold_idxs = np.r_[distinct_value_indices, y_true.size - 1]

    # accumulate the true positives with decreasing threshold
    tps = _stable_cumsum(y_true * weight)[threshold_idxs]
    fps = 1 + threshold_idxs - tps
    return fps, tps, y_score[threshold_idxs]


def _stable_cumsum(arr, axis=None, rtol=1e-05, atol=1e-08):
    """Use high precision for cumsum and check that final value matches sum

    Parameters
    ----------
    arr : array-like
        To be cumulatively summed as flat
    axis : int, optional
        Axis along which the cumulative sum is computed.
        The default (None) is to compute the cumsum over the flattened array.
    rtol : float
        Relative tolerance, see ``np.allclose``
    atol : float
        Absolute tolerance, see ``np.allclose``
    """
    out = np.cumsum(arr, axis=axis, dtype=np.float64)
    expected = np.sum(arr, axis=axis, dtype=np.float64)
    if not np.all(
        np.isclose(
            out.take(-1, axis=axis), expected, rtol=rtol, atol=atol, equal_nan=True
        )
    ):
        raise ValueError(Errors.E163)
    return out


def _auc(x, y):
    """Compute Area Under the Curve (AUC) using the trapezoidal rule

    This is a general function, given points on a curve.  For computing the
    area under the ROC-curve, see :func:`roc_auc_score`.

    Parameters
    ----------
    x : array, shape = [n]
        x coordinates. These must be either monotonic increasing or monotonic
        decreasing.
    y : array, shape = [n]
        y coordinates.

    Returns
    -------
    auc : float
    """
    x = np.ravel(x)
    y = np.ravel(y)

    direction = 1
    dx = np.diff(x)
    if np.any(dx < 0):
        if np.all(dx <= 0):
            direction = -1
        else:
            raise ValueError(Errors.E164.format(x))

    area = direction * np.trapz(y, x)
    if isinstance(area, np.memmap):
        # Reductions such as .sum used internally in np.trapz do not return a
        # scalar by default for numpy.memmap instances contrary to
        # regular numpy.ndarray instances.
        area = area.dtype.type(area)
    return area
-												Add textcat to train CLI (#4226)

* Add doc.cats to spacy.gold at the paragraph level

Support `doc.cats` as `"cats": [{"label": string, "value": number}]` in
the spacy JSON training format at the paragraph level.

* `spacy.gold.docs_to_json()` writes `docs.cats`

* `GoldCorpus` reads in cats in each `GoldParse`

* Update instances of gold_tuples to handle cats

Update iteration over gold_tuples / gold_parses to handle addition of
cats at the paragraph level.

* Add textcat to train CLI

* Add textcat options to train CLI
* Add textcat labels in `TextCategorizer.begin_training()`
* Add textcat evaluation to `Scorer`:
  * For binary exclusive classes with provided label: F1 for label
  * For 2+ exclusive classes: F1 macro average
  * For multilabel (not exclusive): ROC AUC macro average (currently
relying on sklearn)
* Provide user info on textcat evaluation settings, potential
incompatibilities
* Provide pipeline to Scorer in `Language.evaluate` for textcat config
* Customize train CLI output to include only metrics relevant to current
pipeline
* Add textcat evaluation to evaluate CLI

* Fix handling of unset arguments and config params

Fix handling of unset arguments and model confiug parameters in Scorer
initialization.

* Temporarily add sklearn requirement

* Remove sklearn version number

* Improve Scorer handling of models without textcats

* Fixing Scorer handling of models without textcats

* Update Scorer output for python 2.7

* Modify inf in Scorer for python 2.7

* Auto-format

Also make small adjustments to make auto-formatting with black easier and produce nicer results

* Move error message to Errors

* Update documentation

* Add cats to annotation JSON format [ci skip]

* Fix tpl flag and docs [ci skip]

* Switch to internal roc_auc_score

Switch to internal `roc_auc_score()` adapted from scikit-learn.

* Add AUCROCScore tests and improve errors/warnings

* Add tests for AUCROCScore and roc_auc_score
* Add missing error for only positive/negative values
* Remove unnecessary warnings and errors

* Make reduced roc_auc_score functions private

Because most of the checks and warnings have been stripped for the
internal functions and access is only intended through `ROCAUCScore`,
make the functions for roc_auc_score adapted from scikit-learn private.

* Check that data corresponds with multilabel flag

Check that the training instances correspond with the multilabel flag,
adding the multilabel flag if required.

* Add textcat score to early stopping check

* Add more checks to debug-data for textcat

* Add example training data for textcat

* Add more checks to textcat train CLI

* Check configuration when extending base model
* Fix typos

* Update textcat example data

* Provide licensing details and licenses for data
* Remove two labels with no positive instances from jigsaw-toxic-comment
data.


Co-authored-by: Ines Montani <ines@ines.io>
											
										
										
											2019-09-15 20:31:31 +00:00
+								import numpy as np
-												Example class for training data (#4543)

* OrigAnnot class instead of gold.orig_annot list of zipped tuples

* from_orig to replace from_annot_tuples

* rename to RawAnnot

* some unit tests for GoldParse creation and internal format

* removing orig_annot and switching to lists instead of tuple

* rewriting tuples to use RawAnnot (+ debug statements, WIP)

* fix pop() changing the data

* small fixes

* pop-append fixes

* return RawAnnot for existing GoldParse to have uniform interface

* clean up imports

* fix merge_sents

* add unit test for 4402 with new structure (not working yet)

* introduce DocAnnot

* typo fixes

* add unit test for merge_sents

* rename from_orig to from_raw

* fixing unit tests

* fix nn parser

* read_annots to produce text, doc_annot pairs

* _make_golds fix

* rename golds_to_gold_annots

* small fixes

* fix encoding

* have golds_to_gold_annots use DocAnnot

* missed a spot

* merge_sents as function in DocAnnot

* allow specifying only part of the token-level annotations

* refactor with Example class + underlying dicts

* pipeline components to work with Example objects (wip)

* input checking

* fix yielding

* fix calls to update

* small fixes

* fix scorer unit test with new format

* fix kwargs order

* fixes for ud and conllu scripts

* fix reading data for conllu script

* add in proper errors (not fixed numbering yet to avoid merge conflicts)

* fixing few more small bugs

* fix EL script

											
										
										
											2019-11-11 16:35:27 +00:00
+								from .gold import tags_to_entities, GoldParse, DocAnnotation
-												Add textcat to train CLI (#4226)

* Add doc.cats to spacy.gold at the paragraph level

Support `doc.cats` as `"cats": [{"label": string, "value": number}]` in
the spacy JSON training format at the paragraph level.

* `spacy.gold.docs_to_json()` writes `docs.cats`

* `GoldCorpus` reads in cats in each `GoldParse`

* Update instances of gold_tuples to handle cats

Update iteration over gold_tuples / gold_parses to handle addition of
cats at the paragraph level.

* Add textcat to train CLI

* Add textcat options to train CLI
* Add textcat labels in `TextCategorizer.begin_training()`
* Add textcat evaluation to `Scorer`:
  * For binary exclusive classes with provided label: F1 for label
  * For 2+ exclusive classes: F1 macro average
  * For multilabel (not exclusive): ROC AUC macro average (currently
relying on sklearn)
* Provide user info on textcat evaluation settings, potential
incompatibilities
* Provide pipeline to Scorer in `Language.evaluate` for textcat config
* Customize train CLI output to include only metrics relevant to current
pipeline
* Add textcat evaluation to evaluate CLI

* Fix handling of unset arguments and config params

Fix handling of unset arguments and model confiug parameters in Scorer
initialization.

* Temporarily add sklearn requirement

* Remove sklearn version number

* Improve Scorer handling of models without textcats

* Fixing Scorer handling of models without textcats

* Update Scorer output for python 2.7

* Modify inf in Scorer for python 2.7

* Auto-format

Also make small adjustments to make auto-formatting with black easier and produce nicer results

* Move error message to Errors

* Update documentation

* Add cats to annotation JSON format [ci skip]

* Fix tpl flag and docs [ci skip]

* Switch to internal roc_auc_score

Switch to internal `roc_auc_score()` adapted from scikit-learn.

* Add AUCROCScore tests and improve errors/warnings

* Add tests for AUCROCScore and roc_auc_score
* Add missing error for only positive/negative values
* Remove unnecessary warnings and errors

* Make reduced roc_auc_score functions private

Because most of the checks and warnings have been stripped for the
internal functions and access is only intended through `ROCAUCScore`,
make the functions for roc_auc_score adapted from scikit-learn private.

* Check that data corresponds with multilabel flag

Check that the training instances correspond with the multilabel flag,
adding the multilabel flag if required.

* Add textcat score to early stopping check

* Add more checks to debug-data for textcat

* Add example training data for textcat

* Add more checks to textcat train CLI

* Check configuration when extending base model
* Fix typos

* Update textcat example data

* Provide licensing details and licenses for data
* Remove two labels with no positive instances from jigsaw-toxic-comment
data.


Co-authored-by: Ines Montani <ines@ines.io>
											
										
										
											2019-09-15 20:31:31 +00:00
+								from .errors import Errors
-												* Fix evaluation of NER in scorer.py

											
										
										
											2015-05-27 01:18:16 +00:00
-												* Print parse if verbose in scorer

											
										
										
											2015-04-05 20:29:30 +00:00
-												* Update spacy.scorer, to use P/R/F to support tokenization errors

											
										
										
											2015-05-24 18:07:18 +00:00
+								class PRFScore(object):
-												Use consistent formatting for docstrings

											
										
										
											2017-04-15 09:59:21 +00:00
+								    """
 								    A precision / recall / F score
 								    """
-												💫 Tidy up and auto-format .py files (#2983)

<!--- Provide a general summary of your changes in the title. -->

## Description
- [x] Use [`black`](https://github.com/ambv/black) to auto-format all `.py` files.
- [x] Update flake8 config to exclude very large files (lemmatization tables etc.)
- [x] Update code to be compatible with flake8 rules
- [x] Fix various small bugs, inconsistencies and messy stuff in the language data
- [x] Update docs to explain new code style (`black`, `flake8`, when to use `# fmt: off` and `# fmt: on` and what `# noqa` means)

Once #2932 is merged, which auto-formats and tidies up the CLI, we'll be able to run `flake8 spacy` actually get meaningful results.

At the moment, the code style and linting isn't applied automatically, but I'm hoping that the new [GitHub Actions](https://github.com/features/actions) will let us auto-format pull requests and post comments with relevant linting information.

### Types of change
enhancement, code style

## Checklist
<!--- Before you submit the PR, go over this checklist and make sure you can
tick off all the boxes. [] -> [x] -->
- [x] I have submitted the spaCy Contributor Agreement.
- [x] I ran the tests, and all new and existing tests passed.
- [x] My changes don't require a change to the documentation, or if they do, I've added all required information.

											
										
										
											2018-11-30 16:03:03 +00:00
-												* Update spacy.scorer, to use P/R/F to support tokenization errors

											
										
										
											2015-05-24 18:07:18 +00:00
+								    def __init__(self):
 								        self.tp = 0
 								        self.fp = 0
 								        self.fn = 0
 								    def score_set(self, cand, gold):
 								        self.tp += len(cand.intersection(gold))
 								        self.fp += len(cand - gold)
 								        self.fn += len(gold - cand)
 								    @property
 								    def precision(self):
 								        return self.tp / (self.tp + self.fp + 1e-100)
 								    @property
 								    def recall(self):
 								        return self.tp / (self.tp + self.fn + 1e-100)
 								    @property
 								    def fscore(self):
 								        p = self.precision
 								        r = self.recall
 								        return 2 * ((p * r) / (p + r + 1e-100))
-												Add textcat to train CLI (#4226)

* Add doc.cats to spacy.gold at the paragraph level

Support `doc.cats` as `"cats": [{"label": string, "value": number}]` in
the spacy JSON training format at the paragraph level.

* `spacy.gold.docs_to_json()` writes `docs.cats`

* `GoldCorpus` reads in cats in each `GoldParse`

* Update instances of gold_tuples to handle cats

Update iteration over gold_tuples / gold_parses to handle addition of
cats at the paragraph level.

* Add textcat to train CLI

* Add textcat options to train CLI
* Add textcat labels in `TextCategorizer.begin_training()`
* Add textcat evaluation to `Scorer`:
  * For binary exclusive classes with provided label: F1 for label
  * For 2+ exclusive classes: F1 macro average
  * For multilabel (not exclusive): ROC AUC macro average (currently
relying on sklearn)
* Provide user info on textcat evaluation settings, potential
incompatibilities
* Provide pipeline to Scorer in `Language.evaluate` for textcat config
* Customize train CLI output to include only metrics relevant to current
pipeline
* Add textcat evaluation to evaluate CLI

* Fix handling of unset arguments and config params

Fix handling of unset arguments and model confiug parameters in Scorer
initialization.

* Temporarily add sklearn requirement

* Remove sklearn version number

* Improve Scorer handling of models without textcats

* Fixing Scorer handling of models without textcats

* Update Scorer output for python 2.7

* Modify inf in Scorer for python 2.7

* Auto-format

Also make small adjustments to make auto-formatting with black easier and produce nicer results

* Move error message to Errors

* Update documentation

* Add cats to annotation JSON format [ci skip]

* Fix tpl flag and docs [ci skip]

* Switch to internal roc_auc_score

Switch to internal `roc_auc_score()` adapted from scikit-learn.

* Add AUCROCScore tests and improve errors/warnings

* Add tests for AUCROCScore and roc_auc_score
* Add missing error for only positive/negative values
* Remove unnecessary warnings and errors

* Make reduced roc_auc_score functions private

Because most of the checks and warnings have been stripped for the
internal functions and access is only intended through `ROCAUCScore`,
make the functions for roc_auc_score adapted from scikit-learn private.

* Check that data corresponds with multilabel flag

Check that the training instances correspond with the multilabel flag,
adding the multilabel flag if required.

* Add textcat score to early stopping check

* Add more checks to debug-data for textcat

* Add example training data for textcat

* Add more checks to textcat train CLI

* Check configuration when extending base model
* Fix typos

* Update textcat example data

* Provide licensing details and licenses for data
* Remove two labels with no positive instances from jigsaw-toxic-comment
data.


Co-authored-by: Ines Montani <ines@ines.io>
											
										
										
											2019-09-15 20:31:31 +00:00
+								class ROCAUCScore(object):
 								    """
 								    An AUC ROC score.
 								    """
 								    def __init__(self):
 								        self.golds = []
 								        self.cands = []
 								        self.saved_score = 0.0
 								        self.saved_score_at_len = 0
 								    def score_set(self, cand, gold):
 								        self.cands.append(cand)
 								        self.golds.append(gold)
 								    @property
 								    def score(self):
 								        if len(self.golds) == self.saved_score_at_len:
 								            return self.saved_score
 								        try:
 								            self.saved_score = _roc_auc_score(self.golds, self.cands)
 								        # catch ValueError: Only one class present in y_true.
 								        # ROC AUC score is not defined in that case.
-												Make except more explicit

											
										
										
											2019-09-18 17:57:08 +00:00
+								        except ValueError:
-												Add textcat to train CLI (#4226)

* Add doc.cats to spacy.gold at the paragraph level

Support `doc.cats` as `"cats": [{"label": string, "value": number}]` in
the spacy JSON training format at the paragraph level.

* `spacy.gold.docs_to_json()` writes `docs.cats`

* `GoldCorpus` reads in cats in each `GoldParse`

* Update instances of gold_tuples to handle cats

Update iteration over gold_tuples / gold_parses to handle addition of
cats at the paragraph level.

* Add textcat to train CLI

* Add textcat options to train CLI
* Add textcat labels in `TextCategorizer.begin_training()`
* Add textcat evaluation to `Scorer`:
  * For binary exclusive classes with provided label: F1 for label
  * For 2+ exclusive classes: F1 macro average
  * For multilabel (not exclusive): ROC AUC macro average (currently
relying on sklearn)
* Provide user info on textcat evaluation settings, potential
incompatibilities
* Provide pipeline to Scorer in `Language.evaluate` for textcat config
* Customize train CLI output to include only metrics relevant to current
pipeline
* Add textcat evaluation to evaluate CLI

* Fix handling of unset arguments and config params

Fix handling of unset arguments and model confiug parameters in Scorer
initialization.

* Temporarily add sklearn requirement

* Remove sklearn version number

* Improve Scorer handling of models without textcats

* Fixing Scorer handling of models without textcats

* Update Scorer output for python 2.7

* Modify inf in Scorer for python 2.7

* Auto-format

Also make small adjustments to make auto-formatting with black easier and produce nicer results

* Move error message to Errors

* Update documentation

* Add cats to annotation JSON format [ci skip]

* Fix tpl flag and docs [ci skip]

* Switch to internal roc_auc_score

Switch to internal `roc_auc_score()` adapted from scikit-learn.

* Add AUCROCScore tests and improve errors/warnings

* Add tests for AUCROCScore and roc_auc_score
* Add missing error for only positive/negative values
* Remove unnecessary warnings and errors

* Make reduced roc_auc_score functions private

Because most of the checks and warnings have been stripped for the
internal functions and access is only intended through `ROCAUCScore`,
make the functions for roc_auc_score adapted from scikit-learn private.

* Check that data corresponds with multilabel flag

Check that the training instances correspond with the multilabel flag,
adding the multilabel flag if required.

* Add textcat score to early stopping check

* Add more checks to debug-data for textcat

* Add example training data for textcat

* Add more checks to textcat train CLI

* Check configuration when extending base model
* Fix typos

* Update textcat example data

* Provide licensing details and licenses for data
* Remove two labels with no positive instances from jigsaw-toxic-comment
data.


Co-authored-by: Ines Montani <ines@ines.io>
											
										
										
											2019-09-15 20:31:31 +00:00
+								            self.saved_score = -float("inf")
 								        self.saved_score_at_len = len(self.golds)
 								        return self.saved_score
-												* Add scorer script

											
										
										
											2015-03-11 01:07:03 +00:00
+								class Scorer(object):
-												Update Scorer and add API docs

											
										
										
											2019-05-24 12:06:04 +00:00
+								    """Compute evaluation scores."""
-												Add textcat to train CLI (#4226)

* Add doc.cats to spacy.gold at the paragraph level

Support `doc.cats` as `"cats": [{"label": string, "value": number}]` in
the spacy JSON training format at the paragraph level.

* `spacy.gold.docs_to_json()` writes `docs.cats`

* `GoldCorpus` reads in cats in each `GoldParse`

* Update instances of gold_tuples to handle cats

Update iteration over gold_tuples / gold_parses to handle addition of
cats at the paragraph level.

* Add textcat to train CLI

* Add textcat options to train CLI
* Add textcat labels in `TextCategorizer.begin_training()`
* Add textcat evaluation to `Scorer`:
  * For binary exclusive classes with provided label: F1 for label
  * For 2+ exclusive classes: F1 macro average
  * For multilabel (not exclusive): ROC AUC macro average (currently
relying on sklearn)
* Provide user info on textcat evaluation settings, potential
incompatibilities
* Provide pipeline to Scorer in `Language.evaluate` for textcat config
* Customize train CLI output to include only metrics relevant to current
pipeline
* Add textcat evaluation to evaluate CLI

* Fix handling of unset arguments and config params

Fix handling of unset arguments and model confiug parameters in Scorer
initialization.

* Temporarily add sklearn requirement

* Remove sklearn version number

* Improve Scorer handling of models without textcats

* Fixing Scorer handling of models without textcats

* Update Scorer output for python 2.7

* Modify inf in Scorer for python 2.7

* Auto-format

Also make small adjustments to make auto-formatting with black easier and produce nicer results

* Move error message to Errors

* Update documentation

* Add cats to annotation JSON format [ci skip]

* Fix tpl flag and docs [ci skip]

* Switch to internal roc_auc_score

Switch to internal `roc_auc_score()` adapted from scikit-learn.

* Add AUCROCScore tests and improve errors/warnings

* Add tests for AUCROCScore and roc_auc_score
* Add missing error for only positive/negative values
* Remove unnecessary warnings and errors

* Make reduced roc_auc_score functions private

Because most of the checks and warnings have been stripped for the
internal functions and access is only intended through `ROCAUCScore`,
make the functions for roc_auc_score adapted from scikit-learn private.

* Check that data corresponds with multilabel flag

Check that the training instances correspond with the multilabel flag,
adding the multilabel flag if required.

* Add textcat score to early stopping check

* Add more checks to debug-data for textcat

* Add example training data for textcat

* Add more checks to textcat train CLI

* Check configuration when extending base model
* Fix typos

* Update textcat example data

* Provide licensing details and licenses for data
* Remove two labels with no positive instances from jigsaw-toxic-comment
data.


Co-authored-by: Ines Montani <ines@ines.io>
											
										
										
											2019-09-15 20:31:31 +00:00
+								    def __init__(self, eval_punct=False, pipeline=None):
-												Update Scorer and add API docs

											
										
										
											2019-05-24 12:06:04 +00:00
+								        """Initialize the Scorer.
 								        eval_punct (bool): Evaluate the dependency attachments to and from
 								            punctuation.
 								        RETURNS (Scorer): The newly created object.
 								        DOCS: https://spacy.io/api/scorer#init
 								        """
-												* Update spacy.scorer, to use P/R/F to support tokenization errors

											
										
										
											2015-05-24 18:07:18 +00:00
+								        self.tokens = PRFScore()
 								        self.sbd = PRFScore()
 								        self.unlabelled = PRFScore()
 								        self.labelled = PRFScore()
-												Add LAS per dependency to Scorer (#4560)


											
										
										
											2019-10-31 20:18:16 +00:00
+								        self.labelled_per_dep = dict()
-												* Update spacy.scorer, to use P/R/F to support tokenization errors

											
										
										
											2015-05-24 18:07:18 +00:00
+								        self.tags = PRFScore()
-												Update morphologizer (#5108)

* Add pos and morph scoring to Scorer

Add pos, morph, and morph_per_type to `Scorer`. Report pos and morph
accuracy in `spacy evaluate`.

* Update morphologizer for v3

* switch to tagger-based morphologizer
* use `spacy.HashCharEmbedCNN` for morphologizer defaults
* add `Doc.is_morphed` flag

* Add morphologizer to train CLI

* Add basic morphologizer pipeline tests

* Add simple morphologizer training example

* Remove subword_features from CharEmbed models

Remove `subword_features` argument from `spacy.HashCharEmbedCNN.v1` and
`spacy.HashCharEmbedBiLSTM.v1` since in these cases `subword_features`
is always `False`.

* Rename setting in morphologizer example

Use `with_pos_tags` instead of `without_pos_tags`.

* Fix kwargs for spacy.HashCharEmbedBiLSTM.v1

* Remove defaults for spacy.HashCharEmbedBiLSTM.v1

Remove default `nM/nC` for `spacy.HashCharEmbedBiLSTM.v1`.

* Set random seed for textcat overfitting test
											
										
										
											2020-04-02 12:46:32 +00:00
+								        self.pos = PRFScore()
 								        self.morphs = PRFScore()
 								        self.morphs_per_feat = dict()
-												Add a tagger-based SentenceRecognizer (#4713)

* Add sent_starts to GoldParse

* Add SentTagger pipeline component

Add `SentTagger` pipeline component as a subclass of `Tagger`.

* Model reduces default parameters from `Tagger` to be small and fast
* Hard-coded set of two labels:
  * S (1): token at beginning of sentence
  * I (0): all other sentence positions
* Sets `token.sent_start` values

* Add sentence segmentation to Scorer

Report `sent_p/r/f` for sentence boundaries, which may be provided by
various pipeline components.

* Add sentence segmentation to CLI evaluate

* Add senttagger metrics/scoring to train CLI

* Rename SentTagger to SentenceRecognizer

* Add SentenceRecognizer to spacy.pipes imports

* Add SentenceRecognizer serialization test

* Shorten component name to sentrec

* Remove duplicates from train CLI output metrics

											
										
										
											2019-11-28 10:10:07 +00:00
+								        self.sent_starts = PRFScore()
-												* Update spacy.scorer, to use P/R/F to support tokenization errors

											
										
										
											2015-05-24 18:07:18 +00:00
+								        self.ner = PRFScore()
-												Evaluation of NER model per entity type, closes #3490 (#3911)

* Evaluation of NER model per entity type, closes ##3490

Now each ent score is tracked individually in order to have its own Precision, Recall and F1 Score

* Keep track of each entity individually using dicts

* Improving how to compute the scores for each entity

* Fixed bug computing scores for ents

* Formatting with black

* Added key ents_per_type to the scores function

The key `ents_per_type` contains the metrics Precision, Recall and F1-Score for each entity individually

											
										
										
											2019-07-09 18:54:59 +00:00
+								        self.ner_per_ents = dict()
-												* Add scorer script

											
										
										
											2015-03-11 01:07:03 +00:00
+								        self.eval_punct = eval_punct
-												train is from-config by default (#5575)

* verbose and tag_map options

* adding init_tok2vec option and only changing the tok2vec that is specified

* adding omit_extra_lookups and verifying textcat config

* wip

* pretrain bugfix

* add replace and resume options

* train_textcat fix

* raw text functionality

* improve UX when KeyError or when input data can't be parsed

* avoid unnecessary access to goldparse in TextCat pipe

* save performance information in nlp.meta

* add noise_level to config

* move nn_parser's defaults to config file

* multitask in config - doesn't work yet

* scorer offering both F and AUC options, need to be specified in config

* add textcat verification code from old train script

* small fixes to config files

* clean up

* set default config for ner/parser to allow create_pipe to work as before

* two more test fixes

* small fixes

* cleanup

* fix NER pickling + additional unit test

* create_pipe as before
											
										
										
											2020-06-12 00:02:07 +00:00
+								        self.textcat = PRFScore()
 								        self.textcat_f_per_cat = dict()
 								        self.textcat_auc_per_cat = dict()
-												Add textcat to train CLI (#4226)

* Add doc.cats to spacy.gold at the paragraph level

Support `doc.cats` as `"cats": [{"label": string, "value": number}]` in
the spacy JSON training format at the paragraph level.

* `spacy.gold.docs_to_json()` writes `docs.cats`

* `GoldCorpus` reads in cats in each `GoldParse`

* Update instances of gold_tuples to handle cats

Update iteration over gold_tuples / gold_parses to handle addition of
cats at the paragraph level.

* Add textcat to train CLI

* Add textcat options to train CLI
* Add textcat labels in `TextCategorizer.begin_training()`
* Add textcat evaluation to `Scorer`:
  * For binary exclusive classes with provided label: F1 for label
  * For 2+ exclusive classes: F1 macro average
  * For multilabel (not exclusive): ROC AUC macro average (currently
relying on sklearn)
* Provide user info on textcat evaluation settings, potential
incompatibilities
* Provide pipeline to Scorer in `Language.evaluate` for textcat config
* Customize train CLI output to include only metrics relevant to current
pipeline
* Add textcat evaluation to evaluate CLI

* Fix handling of unset arguments and config params

Fix handling of unset arguments and model confiug parameters in Scorer
initialization.

* Temporarily add sklearn requirement

* Remove sklearn version number

* Improve Scorer handling of models without textcats

* Fixing Scorer handling of models without textcats

* Update Scorer output for python 2.7

* Modify inf in Scorer for python 2.7

* Auto-format

Also make small adjustments to make auto-formatting with black easier and produce nicer results

* Move error message to Errors

* Update documentation

* Add cats to annotation JSON format [ci skip]

* Fix tpl flag and docs [ci skip]

* Switch to internal roc_auc_score

Switch to internal `roc_auc_score()` adapted from scikit-learn.

* Add AUCROCScore tests and improve errors/warnings

* Add tests for AUCROCScore and roc_auc_score
* Add missing error for only positive/negative values
* Remove unnecessary warnings and errors

* Make reduced roc_auc_score functions private

Because most of the checks and warnings have been stripped for the
internal functions and access is only intended through `ROCAUCScore`,
make the functions for roc_auc_score adapted from scikit-learn private.

* Check that data corresponds with multilabel flag

Check that the training instances correspond with the multilabel flag,
adding the multilabel flag if required.

* Add textcat score to early stopping check

* Add more checks to debug-data for textcat

* Add example training data for textcat

* Add more checks to textcat train CLI

* Check configuration when extending base model
* Fix typos

* Update textcat example data

* Provide licensing details and licenses for data
* Remove two labels with no positive instances from jigsaw-toxic-comment
data.


Co-authored-by: Ines Montani <ines@ines.io>
											
										
										
											2019-09-15 20:31:31 +00:00
+								        self.textcat_positive_label = None
 								        self.textcat_multilabel = False
 								        if pipeline:
-												train is from-config by default (#5575)

* verbose and tag_map options

* adding init_tok2vec option and only changing the tok2vec that is specified

* adding omit_extra_lookups and verifying textcat config

* wip

* pretrain bugfix

* add replace and resume options

* train_textcat fix

* raw text functionality

* improve UX when KeyError or when input data can't be parsed

* avoid unnecessary access to goldparse in TextCat pipe

* save performance information in nlp.meta

* add noise_level to config

* move nn_parser's defaults to config file

* multitask in config - doesn't work yet

* scorer offering both F and AUC options, need to be specified in config

* add textcat verification code from old train script

* small fixes to config files

* clean up

* set default config for ner/parser to allow create_pipe to work as before

* two more test fixes

* small fixes

* cleanup

* fix NER pickling + additional unit test

* create_pipe as before
											
										
										
											2020-06-12 00:02:07 +00:00
+								            for name, component in pipeline:
-												Add textcat to train CLI (#4226)

* Add doc.cats to spacy.gold at the paragraph level

Support `doc.cats` as `"cats": [{"label": string, "value": number}]` in
the spacy JSON training format at the paragraph level.

* `spacy.gold.docs_to_json()` writes `docs.cats`

* `GoldCorpus` reads in cats in each `GoldParse`

* Update instances of gold_tuples to handle cats

Update iteration over gold_tuples / gold_parses to handle addition of
cats at the paragraph level.

* Add textcat to train CLI

* Add textcat options to train CLI
* Add textcat labels in `TextCategorizer.begin_training()`
* Add textcat evaluation to `Scorer`:
  * For binary exclusive classes with provided label: F1 for label
  * For 2+ exclusive classes: F1 macro average
  * For multilabel (not exclusive): ROC AUC macro average (currently
relying on sklearn)
* Provide user info on textcat evaluation settings, potential
incompatibilities
* Provide pipeline to Scorer in `Language.evaluate` for textcat config
* Customize train CLI output to include only metrics relevant to current
pipeline
* Add textcat evaluation to evaluate CLI

* Fix handling of unset arguments and config params

Fix handling of unset arguments and model confiug parameters in Scorer
initialization.

* Temporarily add sklearn requirement

* Remove sklearn version number

* Improve Scorer handling of models without textcats

* Fixing Scorer handling of models without textcats

* Update Scorer output for python 2.7

* Modify inf in Scorer for python 2.7

* Auto-format

Also make small adjustments to make auto-formatting with black easier and produce nicer results

* Move error message to Errors

* Update documentation

* Add cats to annotation JSON format [ci skip]

* Fix tpl flag and docs [ci skip]

* Switch to internal roc_auc_score

Switch to internal `roc_auc_score()` adapted from scikit-learn.

* Add AUCROCScore tests and improve errors/warnings

* Add tests for AUCROCScore and roc_auc_score
* Add missing error for only positive/negative values
* Remove unnecessary warnings and errors

* Make reduced roc_auc_score functions private

Because most of the checks and warnings have been stripped for the
internal functions and access is only intended through `ROCAUCScore`,
make the functions for roc_auc_score adapted from scikit-learn private.

* Check that data corresponds with multilabel flag

Check that the training instances correspond with the multilabel flag,
adding the multilabel flag if required.

* Add textcat score to early stopping check

* Add more checks to debug-data for textcat

* Add example training data for textcat

* Add more checks to textcat train CLI

* Check configuration when extending base model
* Fix typos

* Update textcat example data

* Provide licensing details and licenses for data
* Remove two labels with no positive instances from jigsaw-toxic-comment
data.


Co-authored-by: Ines Montani <ines@ines.io>
											
										
										
											2019-09-15 20:31:31 +00:00
+								                if name == "textcat":
-												train is from-config by default (#5575)

* verbose and tag_map options

* adding init_tok2vec option and only changing the tok2vec that is specified

* adding omit_extra_lookups and verifying textcat config

* wip

* pretrain bugfix

* add replace and resume options

* train_textcat fix

* raw text functionality

* improve UX when KeyError or when input data can't be parsed

* avoid unnecessary access to goldparse in TextCat pipe

* save performance information in nlp.meta

* add noise_level to config

* move nn_parser's defaults to config file

* multitask in config - doesn't work yet

* scorer offering both F and AUC options, need to be specified in config

* add textcat verification code from old train script

* small fixes to config files

* clean up

* set default config for ner/parser to allow create_pipe to work as before

* two more test fixes

* small fixes

* cleanup

* fix NER pickling + additional unit test

* create_pipe as before
											
										
										
											2020-06-12 00:02:07 +00:00
+								                    self.textcat_multilabel = component.model.attrs["multi_label"]
-												Tidy up and auto-format

											
										
										
											2020-06-20 12:15:04 +00:00
+								                    self.textcat_positive_label = component.cfg.get(
 								                        "positive_label", None
 								                    )
-												train is from-config by default (#5575)

* verbose and tag_map options

* adding init_tok2vec option and only changing the tok2vec that is specified

* adding omit_extra_lookups and verifying textcat config

* wip

* pretrain bugfix

* add replace and resume options

* train_textcat fix

* raw text functionality

* improve UX when KeyError or when input data can't be parsed

* avoid unnecessary access to goldparse in TextCat pipe

* save performance information in nlp.meta

* add noise_level to config

* move nn_parser's defaults to config file

* multitask in config - doesn't work yet

* scorer offering both F and AUC options, need to be specified in config

* add textcat verification code from old train script

* small fixes to config files

* clean up

* set default config for ner/parser to allow create_pipe to work as before

* two more test fixes

* small fixes

* cleanup

* fix NER pickling + additional unit test

* create_pipe as before
											
										
										
											2020-06-12 00:02:07 +00:00
+								                    for label in component.cfg.get("labels", []):
 								                        self.textcat_auc_per_cat[label] = ROCAUCScore()
 								                        self.textcat_f_per_cat[label] = PRFScore()
-												* Add scorer script

											
										
										
											2015-03-11 01:07:03 +00:00
 								    @property
 								    def tags_acc(self):
-												Update Scorer and add API docs

											
										
										
											2019-05-24 12:06:04 +00:00
+								        """RETURNS (float): Part-of-speech tag accuracy (fine grained tags,
 								            i.e. `Token.tag`).
 								        """
-												* Update spacy.scorer, to use P/R/F to support tokenization errors

											
										
										
											2015-05-24 18:07:18 +00:00
+								        return self.tags.fscore * 100
-												* Tmp commit. Working on whole document parsing

											
										
										
											2015-05-24 00:49:56 +00:00
-												Update morphologizer (#5108)

* Add pos and morph scoring to Scorer

Add pos, morph, and morph_per_type to `Scorer`. Report pos and morph
accuracy in `spacy evaluate`.

* Update morphologizer for v3

* switch to tagger-based morphologizer
* use `spacy.HashCharEmbedCNN` for morphologizer defaults
* add `Doc.is_morphed` flag

* Add morphologizer to train CLI

* Add basic morphologizer pipeline tests

* Add simple morphologizer training example

* Remove subword_features from CharEmbed models

Remove `subword_features` argument from `spacy.HashCharEmbedCNN.v1` and
`spacy.HashCharEmbedBiLSTM.v1` since in these cases `subword_features`
is always `False`.

* Rename setting in morphologizer example

Use `with_pos_tags` instead of `without_pos_tags`.

* Fix kwargs for spacy.HashCharEmbedBiLSTM.v1

* Remove defaults for spacy.HashCharEmbedBiLSTM.v1

Remove default `nM/nC` for `spacy.HashCharEmbedBiLSTM.v1`.

* Set random seed for textcat overfitting test
											
										
										
											2020-04-02 12:46:32 +00:00
+								    @property
 								    def pos_acc(self):
 								        """RETURNS (float): Part-of-speech tag accuracy (coarse grained pos,
 								            i.e. `Token.pos`).
 								        """
 								        return self.pos.fscore * 100
 								    @property
 								    def morphs_acc(self):
-												Tidy up and auto-format

											
										
										
											2020-06-20 12:15:04 +00:00
+								        """RETURNS (float): Morph tag accuracy (morphological features,
-												Update morphologizer (#5108)

* Add pos and morph scoring to Scorer

Add pos, morph, and morph_per_type to `Scorer`. Report pos and morph
accuracy in `spacy evaluate`.

* Update morphologizer for v3

* switch to tagger-based morphologizer
* use `spacy.HashCharEmbedCNN` for morphologizer defaults
* add `Doc.is_morphed` flag

* Add morphologizer to train CLI

* Add basic morphologizer pipeline tests

* Add simple morphologizer training example

* Remove subword_features from CharEmbed models

Remove `subword_features` argument from `spacy.HashCharEmbedCNN.v1` and
`spacy.HashCharEmbedBiLSTM.v1` since in these cases `subword_features`
is always `False`.

* Rename setting in morphologizer example

Use `with_pos_tags` instead of `without_pos_tags`.

* Fix kwargs for spacy.HashCharEmbedBiLSTM.v1

* Remove defaults for spacy.HashCharEmbedBiLSTM.v1

Remove default `nM/nC` for `spacy.HashCharEmbedBiLSTM.v1`.

* Set random seed for textcat overfitting test
											
										
										
											2020-04-02 12:46:32 +00:00
+								           i.e. `Token.morph`).
 								       """
-												Tidy up and auto-format

											
										
										
											2020-06-20 12:15:04 +00:00
+								        return self.morphs.fscore * 100
-												Update morphologizer (#5108)

* Add pos and morph scoring to Scorer

Add pos, morph, and morph_per_type to `Scorer`. Report pos and morph
accuracy in `spacy evaluate`.

* Update morphologizer for v3

* switch to tagger-based morphologizer
* use `spacy.HashCharEmbedCNN` for morphologizer defaults
* add `Doc.is_morphed` flag

* Add morphologizer to train CLI

* Add basic morphologizer pipeline tests

* Add simple morphologizer training example

* Remove subword_features from CharEmbed models

Remove `subword_features` argument from `spacy.HashCharEmbedCNN.v1` and
`spacy.HashCharEmbedBiLSTM.v1` since in these cases `subword_features`
is always `False`.

* Rename setting in morphologizer example

Use `with_pos_tags` instead of `without_pos_tags`.

* Fix kwargs for spacy.HashCharEmbedBiLSTM.v1

* Remove defaults for spacy.HashCharEmbedBiLSTM.v1

Remove default `nM/nC` for `spacy.HashCharEmbedBiLSTM.v1`.

* Set random seed for textcat overfitting test
											
										
										
											2020-04-02 12:46:32 +00:00
 								    @property
 								    def morphs_per_type(self):
-												Tidy up and auto-format

											
										
										
											2020-06-20 12:15:04 +00:00
+								        """RETURNS (dict): Scores per dependency label.
-												Update morphologizer (#5108)

* Add pos and morph scoring to Scorer

Add pos, morph, and morph_per_type to `Scorer`. Report pos and morph
accuracy in `spacy evaluate`.

* Update morphologizer for v3

* switch to tagger-based morphologizer
* use `spacy.HashCharEmbedCNN` for morphologizer defaults
* add `Doc.is_morphed` flag

* Add morphologizer to train CLI

* Add basic morphologizer pipeline tests

* Add simple morphologizer training example

* Remove subword_features from CharEmbed models

Remove `subword_features` argument from `spacy.HashCharEmbedCNN.v1` and
`spacy.HashCharEmbedBiLSTM.v1` since in these cases `subword_features`
is always `False`.

* Rename setting in morphologizer example

Use `with_pos_tags` instead of `without_pos_tags`.

* Fix kwargs for spacy.HashCharEmbedBiLSTM.v1

* Remove defaults for spacy.HashCharEmbedBiLSTM.v1

Remove default `nM/nC` for `spacy.HashCharEmbedBiLSTM.v1`.

* Set random seed for textcat overfitting test
											
										
										
											2020-04-02 12:46:32 +00:00
+								       """
-												Tidy up and auto-format

											
										
										
											2020-06-20 12:15:04 +00:00
+								        return {
 								            k: {"p": v.precision * 100, "r": v.recall * 100, "f": v.fscore * 100}
 								            for k, v in self.morphs_per_feat.items()
 								        }
-												Update morphologizer (#5108)

* Add pos and morph scoring to Scorer

Add pos, morph, and morph_per_type to `Scorer`. Report pos and morph
accuracy in `spacy evaluate`.

* Update morphologizer for v3

* switch to tagger-based morphologizer
* use `spacy.HashCharEmbedCNN` for morphologizer defaults
* add `Doc.is_morphed` flag

* Add morphologizer to train CLI

* Add basic morphologizer pipeline tests

* Add simple morphologizer training example

* Remove subword_features from CharEmbed models

Remove `subword_features` argument from `spacy.HashCharEmbedCNN.v1` and
`spacy.HashCharEmbedBiLSTM.v1` since in these cases `subword_features`
is always `False`.

* Rename setting in morphologizer example

Use `with_pos_tags` instead of `without_pos_tags`.

* Fix kwargs for spacy.HashCharEmbedBiLSTM.v1

* Remove defaults for spacy.HashCharEmbedBiLSTM.v1

Remove default `nM/nC` for `spacy.HashCharEmbedBiLSTM.v1`.

* Set random seed for textcat overfitting test
											
										
										
											2020-04-02 12:46:32 +00:00
-												Add a tagger-based SentenceRecognizer (#4713)

* Add sent_starts to GoldParse

* Add SentTagger pipeline component

Add `SentTagger` pipeline component as a subclass of `Tagger`.

* Model reduces default parameters from `Tagger` to be small and fast
* Hard-coded set of two labels:
  * S (1): token at beginning of sentence
  * I (0): all other sentence positions
* Sets `token.sent_start` values

* Add sentence segmentation to Scorer

Report `sent_p/r/f` for sentence boundaries, which may be provided by
various pipeline components.

* Add sentence segmentation to CLI evaluate

* Add senttagger metrics/scoring to train CLI

* Rename SentTagger to SentenceRecognizer

* Add SentenceRecognizer to spacy.pipes imports

* Add SentenceRecognizer serialization test

* Shorten component name to sentrec

* Remove duplicates from train CLI output metrics

											
										
										
											2019-11-28 10:10:07 +00:00
+								    @property
 								    def sent_p(self):
 								        """RETURNS (float): F-score for identification of sentence starts.
 								            i.e. `Token.is_sent_start`).
 								        """
 								        return self.sent_starts.precision * 100
 								    @property
 								    def sent_r(self):
 								        """RETURNS (float): F-score for identification of sentence starts.
 								            i.e. `Token.is_sent_start`).
 								        """
 								        return self.sent_starts.recall * 100
 								    @property
 								    def sent_f(self):
 								        """RETURNS (float): F-score for identification of sentence starts.
 								            i.e. `Token.is_sent_start`).
 								        """
 								        return self.sent_starts.fscore * 100
-												* Tmp commit. Working on whole document parsing

											
										
										
											2015-05-24 00:49:56 +00:00
+								    @property
 								    def token_acc(self):
-												Update Scorer and add API docs

											
										
										
											2019-05-24 12:06:04 +00:00
+								        """RETURNS (float): Tokenization accuracy."""
-												* Start scoring tokens

											
										
										
											2015-06-28 04:21:38 +00:00
+								        return self.tokens.precision * 100
-												* Add scorer script

											
										
										
											2015-03-11 01:07:03 +00:00
 								    @property
 								    def uas(self):
-												Update Scorer and add API docs

											
										
										
											2019-05-24 12:06:04 +00:00
+								        """RETURNS (float): Unlabelled dependency score."""
-												* Update spacy.scorer, to use P/R/F to support tokenization errors

											
										
										
											2015-05-24 18:07:18 +00:00
+								        return self.unlabelled.fscore * 100
-												* Add scorer script

											
										
										
											2015-03-11 01:07:03 +00:00
 								    @property
 								    def las(self):
-												Add LAS per dependency to Scorer (#4560)


											
										
										
											2019-10-31 20:18:16 +00:00
+								        """RETURNS (float): Labelled dependency score."""
-												* Update spacy.scorer, to use P/R/F to support tokenization errors

											
										
										
											2015-05-24 18:07:18 +00:00
+								        return self.labelled.fscore * 100
-												* Add scorer script

											
										
										
											2015-03-11 01:07:03 +00:00
-												Add LAS per dependency to Scorer (#4560)


											
										
										
											2019-10-31 20:18:16 +00:00
+								    @property
 								    def las_per_type(self):
 								        """RETURNS (dict): Scores per dependency label.
 								        """
 								        return {
 								            k: {"p": v.precision * 100, "r": v.recall * 100, "f": v.fscore * 100}
 								            for k, v in self.labelled_per_dep.items()
 								        }
-												* Add scorer script

											
										
										
											2015-03-11 01:07:03 +00:00
+								    @property
 								    def ents_p(self):
-												Update Scorer and add API docs

											
										
										
											2019-05-24 12:06:04 +00:00
+								        """RETURNS (float): Named entity accuracy (precision)."""
-												* Fix evaluation of NER in scorer.py

											
										
										
											2015-05-27 01:18:16 +00:00
+								        return self.ner.precision * 100
-												* Add scorer script

											
										
										
											2015-03-11 01:07:03 +00:00
 								    @property
 								    def ents_r(self):
-												Update Scorer and add API docs

											
										
										
											2019-05-24 12:06:04 +00:00
+								        """RETURNS (float): Named entity accuracy (recall)."""
-												* Fix evaluation of NER in scorer.py

											
										
										
											2015-05-27 01:18:16 +00:00
+								        return self.ner.recall * 100
-												Remove trailing whitespace

											
										
										
											2015-04-19 08:31:31 +00:00
-												* Add scorer script

											
										
										
											2015-03-11 01:07:03 +00:00
+								    @property
 								    def ents_f(self):
-												Update Scorer and add API docs

											
										
										
											2019-05-24 12:06:04 +00:00
+								        """RETURNS (float): Named entity accuracy (F-score)."""
-												* Fix evaluation of NER in scorer.py

											
										
										
											2015-05-27 01:18:16 +00:00
+								        return self.ner.fscore * 100
-												* Add scorer script

											
										
										
											2015-03-11 01:07:03 +00:00
-												Update Scorer.ents_per_type

											
										
										
											2019-07-10 09:19:28 +00:00
+								    @property
 								    def ents_per_type(self):
 								        """RETURNS (dict): Scores per entity label.
 								        """
 								        return {
 								            k: {"p": v.precision * 100, "r": v.recall * 100, "f": v.fscore * 100}
 								            for k, v in self.ner_per_ents.items()
 								        }
-												Add textcat to train CLI (#4226)

* Add doc.cats to spacy.gold at the paragraph level

Support `doc.cats` as `"cats": [{"label": string, "value": number}]` in
the spacy JSON training format at the paragraph level.

* `spacy.gold.docs_to_json()` writes `docs.cats`

* `GoldCorpus` reads in cats in each `GoldParse`

* Update instances of gold_tuples to handle cats

Update iteration over gold_tuples / gold_parses to handle addition of
cats at the paragraph level.

* Add textcat to train CLI

* Add textcat options to train CLI
* Add textcat labels in `TextCategorizer.begin_training()`
* Add textcat evaluation to `Scorer`:
  * For binary exclusive classes with provided label: F1 for label
  * For 2+ exclusive classes: F1 macro average
  * For multilabel (not exclusive): ROC AUC macro average (currently
relying on sklearn)
* Provide user info on textcat evaluation settings, potential
incompatibilities
* Provide pipeline to Scorer in `Language.evaluate` for textcat config
* Customize train CLI output to include only metrics relevant to current
pipeline
* Add textcat evaluation to evaluate CLI

* Fix handling of unset arguments and config params

Fix handling of unset arguments and model confiug parameters in Scorer
initialization.

* Temporarily add sklearn requirement

* Remove sklearn version number

* Improve Scorer handling of models without textcats

* Fixing Scorer handling of models without textcats

* Update Scorer output for python 2.7

* Modify inf in Scorer for python 2.7

* Auto-format

Also make small adjustments to make auto-formatting with black easier and produce nicer results

* Move error message to Errors

* Update documentation

* Add cats to annotation JSON format [ci skip]

* Fix tpl flag and docs [ci skip]

* Switch to internal roc_auc_score

Switch to internal `roc_auc_score()` adapted from scikit-learn.

* Add AUCROCScore tests and improve errors/warnings

* Add tests for AUCROCScore and roc_auc_score
* Add missing error for only positive/negative values
* Remove unnecessary warnings and errors

* Make reduced roc_auc_score functions private

Because most of the checks and warnings have been stripped for the
internal functions and access is only intended through `ROCAUCScore`,
make the functions for roc_auc_score adapted from scikit-learn private.

* Check that data corresponds with multilabel flag

Check that the training instances correspond with the multilabel flag,
adding the multilabel flag if required.

* Add textcat score to early stopping check

* Add more checks to debug-data for textcat

* Add example training data for textcat

* Add more checks to textcat train CLI

* Check configuration when extending base model
* Fix typos

* Update textcat example data

* Provide licensing details and licenses for data
* Remove two labels with no positive instances from jigsaw-toxic-comment
data.


Co-authored-by: Ines Montani <ines@ines.io>
											
										
										
											2019-09-15 20:31:31 +00:00
+								    @property
-												train is from-config by default (#5575)

* verbose and tag_map options

* adding init_tok2vec option and only changing the tok2vec that is specified

* adding omit_extra_lookups and verifying textcat config

* wip

* pretrain bugfix

* add replace and resume options

* train_textcat fix

* raw text functionality

* improve UX when KeyError or when input data can't be parsed

* avoid unnecessary access to goldparse in TextCat pipe

* save performance information in nlp.meta

* add noise_level to config

* move nn_parser's defaults to config file

* multitask in config - doesn't work yet

* scorer offering both F and AUC options, need to be specified in config

* add textcat verification code from old train script

* small fixes to config files

* clean up

* set default config for ner/parser to allow create_pipe to work as before

* two more test fixes

* small fixes

* cleanup

* fix NER pickling + additional unit test

* create_pipe as before
											
										
										
											2020-06-12 00:02:07 +00:00
+								    def textcat_f(self):
 								        """RETURNS (float): f-score on positive label for binary classification,
 								        macro-averaged f-score for multilabel classification
-												Add textcat to train CLI (#4226)

* Add doc.cats to spacy.gold at the paragraph level

Support `doc.cats` as `"cats": [{"label": string, "value": number}]` in
the spacy JSON training format at the paragraph level.

* `spacy.gold.docs_to_json()` writes `docs.cats`

* `GoldCorpus` reads in cats in each `GoldParse`

* Update instances of gold_tuples to handle cats

Update iteration over gold_tuples / gold_parses to handle addition of
cats at the paragraph level.

* Add textcat to train CLI

* Add textcat options to train CLI
* Add textcat labels in `TextCategorizer.begin_training()`
* Add textcat evaluation to `Scorer`:
  * For binary exclusive classes with provided label: F1 for label
  * For 2+ exclusive classes: F1 macro average
  * For multilabel (not exclusive): ROC AUC macro average (currently
relying on sklearn)
* Provide user info on textcat evaluation settings, potential
incompatibilities
* Provide pipeline to Scorer in `Language.evaluate` for textcat config
* Customize train CLI output to include only metrics relevant to current
pipeline
* Add textcat evaluation to evaluate CLI

* Fix handling of unset arguments and config params

Fix handling of unset arguments and model confiug parameters in Scorer
initialization.

* Temporarily add sklearn requirement

* Remove sklearn version number

* Improve Scorer handling of models without textcats

* Fixing Scorer handling of models without textcats

* Update Scorer output for python 2.7

* Modify inf in Scorer for python 2.7

* Auto-format

Also make small adjustments to make auto-formatting with black easier and produce nicer results

* Move error message to Errors

* Update documentation

* Add cats to annotation JSON format [ci skip]

* Fix tpl flag and docs [ci skip]

* Switch to internal roc_auc_score

Switch to internal `roc_auc_score()` adapted from scikit-learn.

* Add AUCROCScore tests and improve errors/warnings

* Add tests for AUCROCScore and roc_auc_score
* Add missing error for only positive/negative values
* Remove unnecessary warnings and errors

* Make reduced roc_auc_score functions private

Because most of the checks and warnings have been stripped for the
internal functions and access is only intended through `ROCAUCScore`,
make the functions for roc_auc_score adapted from scikit-learn private.

* Check that data corresponds with multilabel flag

Check that the training instances correspond with the multilabel flag,
adding the multilabel flag if required.

* Add textcat score to early stopping check

* Add more checks to debug-data for textcat

* Add example training data for textcat

* Add more checks to textcat train CLI

* Check configuration when extending base model
* Fix typos

* Update textcat example data

* Provide licensing details and licenses for data
* Remove two labels with no positive instances from jigsaw-toxic-comment
data.


Co-authored-by: Ines Montani <ines@ines.io>
											
										
										
											2019-09-15 20:31:31 +00:00
+								        """
 								        if not self.textcat_multilabel:
 								            if self.textcat_positive_label:
-												train is from-config by default (#5575)

* verbose and tag_map options

* adding init_tok2vec option and only changing the tok2vec that is specified

* adding omit_extra_lookups and verifying textcat config

* wip

* pretrain bugfix

* add replace and resume options

* train_textcat fix

* raw text functionality

* improve UX when KeyError or when input data can't be parsed

* avoid unnecessary access to goldparse in TextCat pipe

* save performance information in nlp.meta

* add noise_level to config

* move nn_parser's defaults to config file

* multitask in config - doesn't work yet

* scorer offering both F and AUC options, need to be specified in config

* add textcat verification code from old train script

* small fixes to config files

* clean up

* set default config for ner/parser to allow create_pipe to work as before

* two more test fixes

* small fixes

* cleanup

* fix NER pickling + additional unit test

* create_pipe as before
											
										
										
											2020-06-12 00:02:07 +00:00
+								                # binary classification
-												Add textcat to train CLI (#4226)

* Add doc.cats to spacy.gold at the paragraph level

Support `doc.cats` as `"cats": [{"label": string, "value": number}]` in
the spacy JSON training format at the paragraph level.

* `spacy.gold.docs_to_json()` writes `docs.cats`

* `GoldCorpus` reads in cats in each `GoldParse`

* Update instances of gold_tuples to handle cats

Update iteration over gold_tuples / gold_parses to handle addition of
cats at the paragraph level.

* Add textcat to train CLI

* Add textcat options to train CLI
* Add textcat labels in `TextCategorizer.begin_training()`
* Add textcat evaluation to `Scorer`:
  * For binary exclusive classes with provided label: F1 for label
  * For 2+ exclusive classes: F1 macro average
  * For multilabel (not exclusive): ROC AUC macro average (currently
relying on sklearn)
* Provide user info on textcat evaluation settings, potential
incompatibilities
* Provide pipeline to Scorer in `Language.evaluate` for textcat config
* Customize train CLI output to include only metrics relevant to current
pipeline
* Add textcat evaluation to evaluate CLI

* Fix handling of unset arguments and config params

Fix handling of unset arguments and model confiug parameters in Scorer
initialization.

* Temporarily add sklearn requirement

* Remove sklearn version number

* Improve Scorer handling of models without textcats

* Fixing Scorer handling of models without textcats

* Update Scorer output for python 2.7

* Modify inf in Scorer for python 2.7

* Auto-format

Also make small adjustments to make auto-formatting with black easier and produce nicer results

* Move error message to Errors

* Update documentation

* Add cats to annotation JSON format [ci skip]

* Fix tpl flag and docs [ci skip]

* Switch to internal roc_auc_score

Switch to internal `roc_auc_score()` adapted from scikit-learn.

* Add AUCROCScore tests and improve errors/warnings

* Add tests for AUCROCScore and roc_auc_score
* Add missing error for only positive/negative values
* Remove unnecessary warnings and errors

* Make reduced roc_auc_score functions private

Because most of the checks and warnings have been stripped for the
internal functions and access is only intended through `ROCAUCScore`,
make the functions for roc_auc_score adapted from scikit-learn private.

* Check that data corresponds with multilabel flag

Check that the training instances correspond with the multilabel flag,
adding the multilabel flag if required.

* Add textcat score to early stopping check

* Add more checks to debug-data for textcat

* Add example training data for textcat

* Add more checks to textcat train CLI

* Check configuration when extending base model
* Fix typos

* Update textcat example data

* Provide licensing details and licenses for data
* Remove two labels with no positive instances from jigsaw-toxic-comment
data.


Co-authored-by: Ines Montani <ines@ines.io>
											
										
										
											2019-09-15 20:31:31 +00:00
+								                return self.textcat.fscore * 100
-												train is from-config by default (#5575)

* verbose and tag_map options

* adding init_tok2vec option and only changing the tok2vec that is specified

* adding omit_extra_lookups and verifying textcat config

* wip

* pretrain bugfix

* add replace and resume options

* train_textcat fix

* raw text functionality

* improve UX when KeyError or when input data can't be parsed

* avoid unnecessary access to goldparse in TextCat pipe

* save performance information in nlp.meta

* add noise_level to config

* move nn_parser's defaults to config file

* multitask in config - doesn't work yet

* scorer offering both F and AUC options, need to be specified in config

* add textcat verification code from old train script

* small fixes to config files

* clean up

* set default config for ner/parser to allow create_pipe to work as before

* two more test fixes

* small fixes

* cleanup

* fix NER pickling + additional unit test

* create_pipe as before
											
										
										
											2020-06-12 00:02:07 +00:00
+								        # multi-class and/or multi-label
 								        return (
 								            sum([score.fscore for label, score in self.textcat_f_per_cat.items()])
 								            / (len(self.textcat_f_per_cat) + 1e-100)
 								            * 100
 								        )
 								    @property
 								    def textcat_auc(self):
 								        """RETURNS (float): macro-averaged AUC ROC score for multilabel classification (-1 if undefined)
 								        """
-												Add textcat to train CLI (#4226)

* Add doc.cats to spacy.gold at the paragraph level

Support `doc.cats` as `"cats": [{"label": string, "value": number}]` in
the spacy JSON training format at the paragraph level.

* `spacy.gold.docs_to_json()` writes `docs.cats`

* `GoldCorpus` reads in cats in each `GoldParse`

* Update instances of gold_tuples to handle cats

Update iteration over gold_tuples / gold_parses to handle addition of
cats at the paragraph level.

* Add textcat to train CLI

* Add textcat options to train CLI
* Add textcat labels in `TextCategorizer.begin_training()`
* Add textcat evaluation to `Scorer`:
  * For binary exclusive classes with provided label: F1 for label
  * For 2+ exclusive classes: F1 macro average
  * For multilabel (not exclusive): ROC AUC macro average (currently
relying on sklearn)
* Provide user info on textcat evaluation settings, potential
incompatibilities
* Provide pipeline to Scorer in `Language.evaluate` for textcat config
* Customize train CLI output to include only metrics relevant to current
pipeline
* Add textcat evaluation to evaluate CLI

* Fix handling of unset arguments and config params

Fix handling of unset arguments and model confiug parameters in Scorer
initialization.

* Temporarily add sklearn requirement

* Remove sklearn version number

* Improve Scorer handling of models without textcats

* Fixing Scorer handling of models without textcats

* Update Scorer output for python 2.7

* Modify inf in Scorer for python 2.7

* Auto-format

Also make small adjustments to make auto-formatting with black easier and produce nicer results

* Move error message to Errors

* Update documentation

* Add cats to annotation JSON format [ci skip]

* Fix tpl flag and docs [ci skip]

* Switch to internal roc_auc_score

Switch to internal `roc_auc_score()` adapted from scikit-learn.

* Add AUCROCScore tests and improve errors/warnings

* Add tests for AUCROCScore and roc_auc_score
* Add missing error for only positive/negative values
* Remove unnecessary warnings and errors

* Make reduced roc_auc_score functions private

Because most of the checks and warnings have been stripped for the
internal functions and access is only intended through `ROCAUCScore`,
make the functions for roc_auc_score adapted from scikit-learn private.

* Check that data corresponds with multilabel flag

Check that the training instances correspond with the multilabel flag,
adding the multilabel flag if required.

* Add textcat score to early stopping check

* Add more checks to debug-data for textcat

* Add example training data for textcat

* Add more checks to textcat train CLI

* Check configuration when extending base model
* Fix typos

* Update textcat example data

* Provide licensing details and licenses for data
* Remove two labels with no positive instances from jigsaw-toxic-comment
data.


Co-authored-by: Ines Montani <ines@ines.io>
											
										
										
											2019-09-15 20:31:31 +00:00
+								        return max(
-												train is from-config by default (#5575)

* verbose and tag_map options

* adding init_tok2vec option and only changing the tok2vec that is specified

* adding omit_extra_lookups and verifying textcat config

* wip

* pretrain bugfix

* add replace and resume options

* train_textcat fix

* raw text functionality

* improve UX when KeyError or when input data can't be parsed

* avoid unnecessary access to goldparse in TextCat pipe

* save performance information in nlp.meta

* add noise_level to config

* move nn_parser's defaults to config file

* multitask in config - doesn't work yet

* scorer offering both F and AUC options, need to be specified in config

* add textcat verification code from old train script

* small fixes to config files

* clean up

* set default config for ner/parser to allow create_pipe to work as before

* two more test fixes

* small fixes

* cleanup

* fix NER pickling + additional unit test

* create_pipe as before
											
										
										
											2020-06-12 00:02:07 +00:00
+								            sum([score.score for label, score in self.textcat_auc_per_cat.items()])
 								            / (len(self.textcat_auc_per_cat) + 1e-100),
-												Add textcat to train CLI (#4226)

* Add doc.cats to spacy.gold at the paragraph level

Support `doc.cats` as `"cats": [{"label": string, "value": number}]` in
the spacy JSON training format at the paragraph level.

* `spacy.gold.docs_to_json()` writes `docs.cats`

* `GoldCorpus` reads in cats in each `GoldParse`

* Update instances of gold_tuples to handle cats

Update iteration over gold_tuples / gold_parses to handle addition of
cats at the paragraph level.

* Add textcat to train CLI

* Add textcat options to train CLI
* Add textcat labels in `TextCategorizer.begin_training()`
* Add textcat evaluation to `Scorer`:
  * For binary exclusive classes with provided label: F1 for label
  * For 2+ exclusive classes: F1 macro average
  * For multilabel (not exclusive): ROC AUC macro average (currently
relying on sklearn)
* Provide user info on textcat evaluation settings, potential
incompatibilities
* Provide pipeline to Scorer in `Language.evaluate` for textcat config
* Customize train CLI output to include only metrics relevant to current
pipeline
* Add textcat evaluation to evaluate CLI

* Fix handling of unset arguments and config params

Fix handling of unset arguments and model confiug parameters in Scorer
initialization.

* Temporarily add sklearn requirement

* Remove sklearn version number

* Improve Scorer handling of models without textcats

* Fixing Scorer handling of models without textcats

* Update Scorer output for python 2.7

* Modify inf in Scorer for python 2.7

* Auto-format

Also make small adjustments to make auto-formatting with black easier and produce nicer results

* Move error message to Errors

* Update documentation

* Add cats to annotation JSON format [ci skip]

* Fix tpl flag and docs [ci skip]

* Switch to internal roc_auc_score

Switch to internal `roc_auc_score()` adapted from scikit-learn.

* Add AUCROCScore tests and improve errors/warnings

* Add tests for AUCROCScore and roc_auc_score
* Add missing error for only positive/negative values
* Remove unnecessary warnings and errors

* Make reduced roc_auc_score functions private

Because most of the checks and warnings have been stripped for the
internal functions and access is only intended through `ROCAUCScore`,
make the functions for roc_auc_score adapted from scikit-learn private.

* Check that data corresponds with multilabel flag

Check that the training instances correspond with the multilabel flag,
adding the multilabel flag if required.

* Add textcat score to early stopping check

* Add more checks to debug-data for textcat

* Add example training data for textcat

* Add more checks to textcat train CLI

* Check configuration when extending base model
* Fix typos

* Update textcat example data

* Provide licensing details and licenses for data
* Remove two labels with no positive instances from jigsaw-toxic-comment
data.


Co-authored-by: Ines Montani <ines@ines.io>
											
										
										
											2019-09-15 20:31:31 +00:00
+								            -1,
 								        )
 								    @property
-												train is from-config by default (#5575)

* verbose and tag_map options

* adding init_tok2vec option and only changing the tok2vec that is specified

* adding omit_extra_lookups and verifying textcat config

* wip

* pretrain bugfix

* add replace and resume options

* train_textcat fix

* raw text functionality

* improve UX when KeyError or when input data can't be parsed

* avoid unnecessary access to goldparse in TextCat pipe

* save performance information in nlp.meta

* add noise_level to config

* move nn_parser's defaults to config file

* multitask in config - doesn't work yet

* scorer offering both F and AUC options, need to be specified in config

* add textcat verification code from old train script

* small fixes to config files

* clean up

* set default config for ner/parser to allow create_pipe to work as before

* two more test fixes

* small fixes

* cleanup

* fix NER pickling + additional unit test

* create_pipe as before
											
										
										
											2020-06-12 00:02:07 +00:00
+								    def textcats_auc_per_cat(self):
 								        """RETURNS (dict): AUC ROC Scores per textcat label.
-												Add textcat to train CLI (#4226)

* Add doc.cats to spacy.gold at the paragraph level

Support `doc.cats` as `"cats": [{"label": string, "value": number}]` in
the spacy JSON training format at the paragraph level.

* `spacy.gold.docs_to_json()` writes `docs.cats`

* `GoldCorpus` reads in cats in each `GoldParse`

* Update instances of gold_tuples to handle cats

Update iteration over gold_tuples / gold_parses to handle addition of
cats at the paragraph level.

* Add textcat to train CLI

* Add textcat options to train CLI
* Add textcat labels in `TextCategorizer.begin_training()`
* Add textcat evaluation to `Scorer`:
  * For binary exclusive classes with provided label: F1 for label
  * For 2+ exclusive classes: F1 macro average
  * For multilabel (not exclusive): ROC AUC macro average (currently
relying on sklearn)
* Provide user info on textcat evaluation settings, potential
incompatibilities
* Provide pipeline to Scorer in `Language.evaluate` for textcat config
* Customize train CLI output to include only metrics relevant to current
pipeline
* Add textcat evaluation to evaluate CLI

* Fix handling of unset arguments and config params

Fix handling of unset arguments and model confiug parameters in Scorer
initialization.

* Temporarily add sklearn requirement

* Remove sklearn version number

* Improve Scorer handling of models without textcats

* Fixing Scorer handling of models without textcats

* Update Scorer output for python 2.7

* Modify inf in Scorer for python 2.7

* Auto-format

Also make small adjustments to make auto-formatting with black easier and produce nicer results

* Move error message to Errors

* Update documentation

* Add cats to annotation JSON format [ci skip]

* Fix tpl flag and docs [ci skip]

* Switch to internal roc_auc_score

Switch to internal `roc_auc_score()` adapted from scikit-learn.

* Add AUCROCScore tests and improve errors/warnings

* Add tests for AUCROCScore and roc_auc_score
* Add missing error for only positive/negative values
* Remove unnecessary warnings and errors

* Make reduced roc_auc_score functions private

Because most of the checks and warnings have been stripped for the
internal functions and access is only intended through `ROCAUCScore`,
make the functions for roc_auc_score adapted from scikit-learn private.

* Check that data corresponds with multilabel flag

Check that the training instances correspond with the multilabel flag,
adding the multilabel flag if required.

* Add textcat score to early stopping check

* Add more checks to debug-data for textcat

* Add example training data for textcat

* Add more checks to textcat train CLI

* Check configuration when extending base model
* Fix typos

* Update textcat example data

* Provide licensing details and licenses for data
* Remove two labels with no positive instances from jigsaw-toxic-comment
data.


Co-authored-by: Ines Montani <ines@ines.io>
											
										
										
											2019-09-15 20:31:31 +00:00
+								        """
 								        return {
 								            k: {"roc_auc_score": max(v.score, -1)}
-												train is from-config by default (#5575)

* verbose and tag_map options

* adding init_tok2vec option and only changing the tok2vec that is specified

* adding omit_extra_lookups and verifying textcat config

* wip

* pretrain bugfix

* add replace and resume options

* train_textcat fix

* raw text functionality

* improve UX when KeyError or when input data can't be parsed

* avoid unnecessary access to goldparse in TextCat pipe

* save performance information in nlp.meta

* add noise_level to config

* move nn_parser's defaults to config file

* multitask in config - doesn't work yet

* scorer offering both F and AUC options, need to be specified in config

* add textcat verification code from old train script

* small fixes to config files

* clean up

* set default config for ner/parser to allow create_pipe to work as before

* two more test fixes

* small fixes

* cleanup

* fix NER pickling + additional unit test

* create_pipe as before
											
										
										
											2020-06-12 00:02:07 +00:00
+								            for k, v in self.textcat_auc_per_cat.items()
 								        }
 								    @property
 								    def textcats_f_per_cat(self):
 								        """RETURNS (dict): F-scores per textcat label.
 								        """
 								        return {
 								            k: {"p": v.precision * 100, "r": v.recall * 100, "f": v.fscore * 100}
 								            for k, v in self.textcat_f_per_cat.items()
-												Add textcat to train CLI (#4226)

* Add doc.cats to spacy.gold at the paragraph level

Support `doc.cats` as `"cats": [{"label": string, "value": number}]` in
the spacy JSON training format at the paragraph level.

* `spacy.gold.docs_to_json()` writes `docs.cats`

* `GoldCorpus` reads in cats in each `GoldParse`

* Update instances of gold_tuples to handle cats

Update iteration over gold_tuples / gold_parses to handle addition of
cats at the paragraph level.

* Add textcat to train CLI

* Add textcat options to train CLI
* Add textcat labels in `TextCategorizer.begin_training()`
* Add textcat evaluation to `Scorer`:
  * For binary exclusive classes with provided label: F1 for label
  * For 2+ exclusive classes: F1 macro average
  * For multilabel (not exclusive): ROC AUC macro average (currently
relying on sklearn)
* Provide user info on textcat evaluation settings, potential
incompatibilities
* Provide pipeline to Scorer in `Language.evaluate` for textcat config
* Customize train CLI output to include only metrics relevant to current
pipeline
* Add textcat evaluation to evaluate CLI

* Fix handling of unset arguments and config params

Fix handling of unset arguments and model confiug parameters in Scorer
initialization.

* Temporarily add sklearn requirement

* Remove sklearn version number

* Improve Scorer handling of models without textcats

* Fixing Scorer handling of models without textcats

* Update Scorer output for python 2.7

* Modify inf in Scorer for python 2.7

* Auto-format

Also make small adjustments to make auto-formatting with black easier and produce nicer results

* Move error message to Errors

* Update documentation

* Add cats to annotation JSON format [ci skip]

* Fix tpl flag and docs [ci skip]

* Switch to internal roc_auc_score

Switch to internal `roc_auc_score()` adapted from scikit-learn.

* Add AUCROCScore tests and improve errors/warnings

* Add tests for AUCROCScore and roc_auc_score
* Add missing error for only positive/negative values
* Remove unnecessary warnings and errors

* Make reduced roc_auc_score functions private

Because most of the checks and warnings have been stripped for the
internal functions and access is only intended through `ROCAUCScore`,
make the functions for roc_auc_score adapted from scikit-learn private.

* Check that data corresponds with multilabel flag

Check that the training instances correspond with the multilabel flag,
adding the multilabel flag if required.

* Add textcat score to early stopping check

* Add more checks to debug-data for textcat

* Add example training data for textcat

* Add more checks to textcat train CLI

* Check configuration when extending base model
* Fix typos

* Update textcat example data

* Provide licensing details and licenses for data
* Remove two labels with no positive instances from jigsaw-toxic-comment
data.


Co-authored-by: Ines Montani <ines@ines.io>
											
										
										
											2019-09-15 20:31:31 +00:00
+								        }
-												Refactor training, with new spacy.train module. Defaults still a little awkward.

											
										
										
											2016-10-09 10:24:24 +00:00
+								    @property
 								    def scores(self):
-												train is from-config by default (#5575)

* verbose and tag_map options

* adding init_tok2vec option and only changing the tok2vec that is specified

* adding omit_extra_lookups and verifying textcat config

* wip

* pretrain bugfix

* add replace and resume options

* train_textcat fix

* raw text functionality

* improve UX when KeyError or when input data can't be parsed

* avoid unnecessary access to goldparse in TextCat pipe

* save performance information in nlp.meta

* add noise_level to config

* move nn_parser's defaults to config file

* multitask in config - doesn't work yet

* scorer offering both F and AUC options, need to be specified in config

* add textcat verification code from old train script

* small fixes to config files

* clean up

* set default config for ner/parser to allow create_pipe to work as before

* two more test fixes

* small fixes

* cleanup

* fix NER pickling + additional unit test

* create_pipe as before
											
										
										
											2020-06-12 00:02:07 +00:00
+								        """RETURNS (dict): All scores mapped by key.
-												Update Scorer and add API docs

											
										
										
											2019-05-24 12:06:04 +00:00
+								        """
-												Refactor training, with new spacy.train module. Defaults still a little awkward.

											
										
										
											2016-10-09 10:24:24 +00:00
+								        return {
-												💫 Tidy up and auto-format .py files (#2983)

<!--- Provide a general summary of your changes in the title. -->

## Description
- [x] Use [`black`](https://github.com/ambv/black) to auto-format all `.py` files.
- [x] Update flake8 config to exclude very large files (lemmatization tables etc.)
- [x] Update code to be compatible with flake8 rules
- [x] Fix various small bugs, inconsistencies and messy stuff in the language data
- [x] Update docs to explain new code style (`black`, `flake8`, when to use `# fmt: off` and `# fmt: on` and what `# noqa` means)

Once #2932 is merged, which auto-formats and tidies up the CLI, we'll be able to run `flake8 spacy` actually get meaningful results.

At the moment, the code style and linting isn't applied automatically, but I'm hoping that the new [GitHub Actions](https://github.com/features/actions) will let us auto-format pull requests and post comments with relevant linting information.

### Types of change
enhancement, code style

## Checklist
<!--- Before you submit the PR, go over this checklist and make sure you can
tick off all the boxes. [] -> [x] -->
- [x] I have submitted the spaCy Contributor Agreement.
- [x] I ran the tests, and all new and existing tests passed.
- [x] My changes don't require a change to the documentation, or if they do, I've added all required information.

											
										
										
											2018-11-30 16:03:03 +00:00
+								            "uas": self.uas,
 								            "las": self.las,
-												Add LAS per dependency to Scorer (#4560)


											
										
										
											2019-10-31 20:18:16 +00:00
+								            "las_per_type": self.las_per_type,
-												💫 Tidy up and auto-format .py files (#2983)

<!--- Provide a general summary of your changes in the title. -->

## Description
- [x] Use [`black`](https://github.com/ambv/black) to auto-format all `.py` files.
- [x] Update flake8 config to exclude very large files (lemmatization tables etc.)
- [x] Update code to be compatible with flake8 rules
- [x] Fix various small bugs, inconsistencies and messy stuff in the language data
- [x] Update docs to explain new code style (`black`, `flake8`, when to use `# fmt: off` and `# fmt: on` and what `# noqa` means)

Once #2932 is merged, which auto-formats and tidies up the CLI, we'll be able to run `flake8 spacy` actually get meaningful results.

At the moment, the code style and linting isn't applied automatically, but I'm hoping that the new [GitHub Actions](https://github.com/features/actions) will let us auto-format pull requests and post comments with relevant linting information.

### Types of change
enhancement, code style

## Checklist
<!--- Before you submit the PR, go over this checklist and make sure you can
tick off all the boxes. [] -> [x] -->
- [x] I have submitted the spaCy Contributor Agreement.
- [x] I ran the tests, and all new and existing tests passed.
- [x] My changes don't require a change to the documentation, or if they do, I've added all required information.

											
										
										
											2018-11-30 16:03:03 +00:00
+								            "ents_p": self.ents_p,
 								            "ents_r": self.ents_r,
 								            "ents_f": self.ents_f,
-												Update Scorer.ents_per_type

											
										
										
											2019-07-10 09:19:28 +00:00
+								            "ents_per_type": self.ents_per_type,
-												💫 Tidy up and auto-format .py files (#2983)

<!--- Provide a general summary of your changes in the title. -->

## Description
- [x] Use [`black`](https://github.com/ambv/black) to auto-format all `.py` files.
- [x] Update flake8 config to exclude very large files (lemmatization tables etc.)
- [x] Update code to be compatible with flake8 rules
- [x] Fix various small bugs, inconsistencies and messy stuff in the language data
- [x] Update docs to explain new code style (`black`, `flake8`, when to use `# fmt: off` and `# fmt: on` and what `# noqa` means)

Once #2932 is merged, which auto-formats and tidies up the CLI, we'll be able to run `flake8 spacy` actually get meaningful results.

At the moment, the code style and linting isn't applied automatically, but I'm hoping that the new [GitHub Actions](https://github.com/features/actions) will let us auto-format pull requests and post comments with relevant linting information.

### Types of change
enhancement, code style

## Checklist
<!--- Before you submit the PR, go over this checklist and make sure you can
tick off all the boxes. [] -> [x] -->
- [x] I have submitted the spaCy Contributor Agreement.
- [x] I ran the tests, and all new and existing tests passed.
- [x] My changes don't require a change to the documentation, or if they do, I've added all required information.

											
										
										
											2018-11-30 16:03:03 +00:00
+								            "tags_acc": self.tags_acc,
-												Update morphologizer (#5108)

* Add pos and morph scoring to Scorer

Add pos, morph, and morph_per_type to `Scorer`. Report pos and morph
accuracy in `spacy evaluate`.

* Update morphologizer for v3

* switch to tagger-based morphologizer
* use `spacy.HashCharEmbedCNN` for morphologizer defaults
* add `Doc.is_morphed` flag

* Add morphologizer to train CLI

* Add basic morphologizer pipeline tests

* Add simple morphologizer training example

* Remove subword_features from CharEmbed models

Remove `subword_features` argument from `spacy.HashCharEmbedCNN.v1` and
`spacy.HashCharEmbedBiLSTM.v1` since in these cases `subword_features`
is always `False`.

* Rename setting in morphologizer example

Use `with_pos_tags` instead of `without_pos_tags`.

* Fix kwargs for spacy.HashCharEmbedBiLSTM.v1

* Remove defaults for spacy.HashCharEmbedBiLSTM.v1

Remove default `nM/nC` for `spacy.HashCharEmbedBiLSTM.v1`.

* Set random seed for textcat overfitting test
											
										
										
											2020-04-02 12:46:32 +00:00
+								            "pos_acc": self.pos_acc,
 								            "morphs_acc": self.morphs_acc,
 								            "morphs_per_type": self.morphs_per_type,
-												Add a tagger-based SentenceRecognizer (#4713)

* Add sent_starts to GoldParse

* Add SentTagger pipeline component

Add `SentTagger` pipeline component as a subclass of `Tagger`.

* Model reduces default parameters from `Tagger` to be small and fast
* Hard-coded set of two labels:
  * S (1): token at beginning of sentence
  * I (0): all other sentence positions
* Sets `token.sent_start` values

* Add sentence segmentation to Scorer

Report `sent_p/r/f` for sentence boundaries, which may be provided by
various pipeline components.

* Add sentence segmentation to CLI evaluate

* Add senttagger metrics/scoring to train CLI

* Rename SentTagger to SentenceRecognizer

* Add SentenceRecognizer to spacy.pipes imports

* Add SentenceRecognizer serialization test

* Shorten component name to sentrec

* Remove duplicates from train CLI output metrics

											
										
										
											2019-11-28 10:10:07 +00:00
+								            "sent_p": self.sent_p,
 								            "sent_r": self.sent_r,
 								            "sent_f": self.sent_f,
-												💫 Tidy up and auto-format .py files (#2983)

<!--- Provide a general summary of your changes in the title. -->

## Description
- [x] Use [`black`](https://github.com/ambv/black) to auto-format all `.py` files.
- [x] Update flake8 config to exclude very large files (lemmatization tables etc.)
- [x] Update code to be compatible with flake8 rules
- [x] Fix various small bugs, inconsistencies and messy stuff in the language data
- [x] Update docs to explain new code style (`black`, `flake8`, when to use `# fmt: off` and `# fmt: on` and what `# noqa` means)

Once #2932 is merged, which auto-formats and tidies up the CLI, we'll be able to run `flake8 spacy` actually get meaningful results.

At the moment, the code style and linting isn't applied automatically, but I'm hoping that the new [GitHub Actions](https://github.com/features/actions) will let us auto-format pull requests and post comments with relevant linting information.

### Types of change
enhancement, code style

## Checklist
<!--- Before you submit the PR, go over this checklist and make sure you can
tick off all the boxes. [] -> [x] -->
- [x] I have submitted the spaCy Contributor Agreement.
- [x] I ran the tests, and all new and existing tests passed.
- [x] My changes don't require a change to the documentation, or if they do, I've added all required information.

											
										
										
											2018-11-30 16:03:03 +00:00
+								            "token_acc": self.token_acc,
-												train is from-config by default (#5575)

* verbose and tag_map options

* adding init_tok2vec option and only changing the tok2vec that is specified

* adding omit_extra_lookups and verifying textcat config

* wip

* pretrain bugfix

* add replace and resume options

* train_textcat fix

* raw text functionality

* improve UX when KeyError or when input data can't be parsed

* avoid unnecessary access to goldparse in TextCat pipe

* save performance information in nlp.meta

* add noise_level to config

* move nn_parser's defaults to config file

* multitask in config - doesn't work yet

* scorer offering both F and AUC options, need to be specified in config

* add textcat verification code from old train script

* small fixes to config files

* clean up

* set default config for ner/parser to allow create_pipe to work as before

* two more test fixes

* small fixes

* cleanup

* fix NER pickling + additional unit test

* create_pipe as before
											
										
										
											2020-06-12 00:02:07 +00:00
+								            "textcat_f": self.textcat_f,
 								            "textcat_auc": self.textcat_auc,
 								            "textcats_f_per_cat": self.textcats_f_per_cat,
 								            "textcats_auc_per_cat": self.textcats_auc_per_cat,
-												Refactor training, with new spacy.train module. Defaults still a little awkward.

											
										
										
											2016-10-09 10:24:24 +00:00
+								        }
-												Example class for training data (#4543)

* OrigAnnot class instead of gold.orig_annot list of zipped tuples

* from_orig to replace from_annot_tuples

* rename to RawAnnot

* some unit tests for GoldParse creation and internal format

* removing orig_annot and switching to lists instead of tuple

* rewriting tuples to use RawAnnot (+ debug statements, WIP)

* fix pop() changing the data

* small fixes

* pop-append fixes

* return RawAnnot for existing GoldParse to have uniform interface

* clean up imports

* fix merge_sents

* add unit test for 4402 with new structure (not working yet)

* introduce DocAnnot

* typo fixes

* add unit test for merge_sents

* rename from_orig to from_raw

* fixing unit tests

* fix nn parser

* read_annots to produce text, doc_annot pairs

* _make_golds fix

* rename golds_to_gold_annots

* small fixes

* fix encoding

* have golds_to_gold_annots use DocAnnot

* missed a spot

* merge_sents as function in DocAnnot

* allow specifying only part of the token-level annotations

* refactor with Example class + underlying dicts

* pipeline components to work with Example objects (wip)

* input checking

* fix yielding

* fix calls to update

* small fixes

* fix scorer unit test with new format

* fix kwargs order

* fixes for ud and conllu scripts

* fix reading data for conllu script

* add in proper errors (not fixed numbering yet to avoid merge conflicts)

* fixing few more small bugs

* fix EL script

											
										
										
											2019-11-11 16:35:27 +00:00
+								    def score(self, example, verbose=False, punct_labels=("p", "punct")):
-												Update Scorer and add API docs

											
										
										
											2019-05-24 12:06:04 +00:00
+								        """Update the evaluation scores from a single Doc / GoldParse pair.
-												Example class for training data (#4543)

* OrigAnnot class instead of gold.orig_annot list of zipped tuples

* from_orig to replace from_annot_tuples

* rename to RawAnnot

* some unit tests for GoldParse creation and internal format

* removing orig_annot and switching to lists instead of tuple

* rewriting tuples to use RawAnnot (+ debug statements, WIP)

* fix pop() changing the data

* small fixes

* pop-append fixes

* return RawAnnot for existing GoldParse to have uniform interface

* clean up imports

* fix merge_sents

* add unit test for 4402 with new structure (not working yet)

* introduce DocAnnot

* typo fixes

* add unit test for merge_sents

* rename from_orig to from_raw

* fixing unit tests

* fix nn parser

* read_annots to produce text, doc_annot pairs

* _make_golds fix

* rename golds_to_gold_annots

* small fixes

* fix encoding

* have golds_to_gold_annots use DocAnnot

* missed a spot

* merge_sents as function in DocAnnot

* allow specifying only part of the token-level annotations

* refactor with Example class + underlying dicts

* pipeline components to work with Example objects (wip)

* input checking

* fix yielding

* fix calls to update

* small fixes

* fix scorer unit test with new format

* fix kwargs order

* fixes for ud and conllu scripts

* fix reading data for conllu script

* add in proper errors (not fixed numbering yet to avoid merge conflicts)

* fixing few more small bugs

* fix EL script

											
										
										
											2019-11-11 16:35:27 +00:00
+								        example (Example): The predicted annotations + correct annotations.
-												Update Scorer and add API docs

											
										
										
											2019-05-24 12:06:04 +00:00
+								        verbose (bool): Print debugging information.
 								        punct_labels (tuple): Dependency labels for punctuation. Used to
 								            evaluate dependency attachments to punctuation if `eval_punct` is
 								            `True`.
 								        DOCS: https://spacy.io/api/scorer#score
 								        """
-												Example class for training data (#4543)

* OrigAnnot class instead of gold.orig_annot list of zipped tuples

* from_orig to replace from_annot_tuples

* rename to RawAnnot

* some unit tests for GoldParse creation and internal format

* removing orig_annot and switching to lists instead of tuple

* rewriting tuples to use RawAnnot (+ debug statements, WIP)

* fix pop() changing the data

* small fixes

* pop-append fixes

* return RawAnnot for existing GoldParse to have uniform interface

* clean up imports

* fix merge_sents

* add unit test for 4402 with new structure (not working yet)

* introduce DocAnnot

* typo fixes

* add unit test for merge_sents

* rename from_orig to from_raw

* fixing unit tests

* fix nn parser

* read_annots to produce text, doc_annot pairs

* _make_golds fix

* rename golds_to_gold_annots

* small fixes

* fix encoding

* have golds_to_gold_annots use DocAnnot

* missed a spot

* merge_sents as function in DocAnnot

* allow specifying only part of the token-level annotations

* refactor with Example class + underlying dicts

* pipeline components to work with Example objects (wip)

* input checking

* fix yielding

* fix calls to update

* small fixes

* fix scorer unit test with new format

* fix kwargs order

* fixes for ud and conllu scripts

* fix reading data for conllu script

* add in proper errors (not fixed numbering yet to avoid merge conflicts)

* fixing few more small bugs

* fix EL script

											
										
										
											2019-11-11 16:35:27 +00:00
+								        if isinstance(example, tuple) and len(example) == 2:
 								            doc, gold = example
 								        else:
 								            gold = example.gold
 								            doc = example.doc
-												Update Scorer and add API docs

											
										
										
											2019-05-24 12:06:04 +00:00
+								        if len(doc) != len(gold):
-												Example class for training data (#4543)

* OrigAnnot class instead of gold.orig_annot list of zipped tuples

* from_orig to replace from_annot_tuples

* rename to RawAnnot

* some unit tests for GoldParse creation and internal format

* removing orig_annot and switching to lists instead of tuple

* rewriting tuples to use RawAnnot (+ debug statements, WIP)

* fix pop() changing the data

* small fixes

* pop-append fixes

* return RawAnnot for existing GoldParse to have uniform interface

* clean up imports

* fix merge_sents

* add unit test for 4402 with new structure (not working yet)

* introduce DocAnnot

* typo fixes

* add unit test for merge_sents

* rename from_orig to from_raw

* fixing unit tests

* fix nn parser

* read_annots to produce text, doc_annot pairs

* _make_golds fix

* rename golds_to_gold_annots

* small fixes

* fix encoding

* have golds_to_gold_annots use DocAnnot

* missed a spot

* merge_sents as function in DocAnnot

* allow specifying only part of the token-level annotations

* refactor with Example class + underlying dicts

* pipeline components to work with Example objects (wip)

* input checking

* fix yielding

* fix calls to update

* small fixes

* fix scorer unit test with new format

* fix kwargs order

* fixes for ud and conllu scripts

* fix reading data for conllu script

* add in proper errors (not fixed numbering yet to avoid merge conflicts)

* fixing few more small bugs

* fix EL script

											
										
										
											2019-11-11 16:35:27 +00:00
+								            doc_annotation = DocAnnotation(cats=gold.cats)
 								            token_annotation = gold.orig
-												Restructure Example with merged sents as default (#4632)

* Switch to train_dataset() function in train CLI

* Fixes for pipe() methods in pipeline components

* Don't clobber `examples` variable with `as_example` in pipe() methods
* Remove unnecessary traversals of `examples`

* Update Parser.pipe() for Examples

* Add `as_examples` kwarg to `pipe()` with implementation to return
`Example`s

* Accept `Doc` or `Example` in `pipe()` with `_get_doc()` (copied from
`Pipe`)

* Fixes to Example implementation in spacy.gold

* Move `make_projective` from an attribute of Example to an argument of
`Example.get_gold_parses()`

* Head of 0 are not treated as unset

* Unset heads are set to self rather than `None` (which causes problems
while projectivizing)

* Check for `Doc` (not just not `None`) when creating GoldParses for
pre-merged example

* Don't clobber `examples` variable in `iter_gold_docs()`

* Add/modify gold tests for handling projectivity

* In JSON roundtrip compare results from `dev_dataset` rather than
`train_dataset` to avoid projectivization (and other potential
modifications)

* Add test for projective train vs. nonprojective dev versions of the
same `Doc`

* Handle ignore_misaligned as arg rather than attr

Move `ignore_misaligned` from an attribute of `Example` to an argument
to `Example.get_gold_parses()`, which makes it parallel to
`make_projective`.

Add test with old and new align that checks whether `ignore_misaligned`
errors are raised as expected (only for new align).

* Remove unused attrs from gold.pxd

Remove `ignore_misaligned` and `make_projective` from `gold.pxd`

* Restructure Example with merged sents as default

An `Example` now includes a single `TokenAnnotation` that includes all
the information from one `Doc` (=JSON `paragraph`). If required, the
individual sentences can be returned as a list of examples with
`Example.split_sents()` with no raw text available.

* Input/output a single `Example.token_annotation`

* Add `sent_starts` to `TokenAnnotation` to handle sentence boundaries

* Replace `Example.merge_sents()` with `Example.split_sents()`

* Modify components to use a single `Example.token_annotation`

  * Pipeline components
  * conllu2json converter

* Rework/rename `add_token_annotation()` and `add_doc_annotation()` to
`set_token_annotation()` and `set_doc_annotation()`, functions that set
rather then appending/extending.

* Rename `morphology` to `morphs` in `TokenAnnotation` and `GoldParse`

* Add getters to `TokenAnnotation` to supply default values when a given
attribute is not available

* `Example.get_gold_parses()` in `spacy.gold._make_golds()` is only
applied on single examples, so the `GoldParse` is returned saved in the
provided `Example` rather than creating a new `Example` with no other
internal annotation

* Update tests for API changes and `merge_sents()` vs. `split_sents()`

* Refer to Example.goldparse in iter_gold_docs()

Use `Example.goldparse` in `iter_gold_docs()` instead of `Example.gold`
because a `None` `GoldParse` is generated with ignore_misaligned and
generating it on-the-fly can raise an unwanted AlignmentError

* Fix make_orth_variants()

Fix bug in make_orth_variants() related to conversion from multiple to
one TokenAnnotation per Example.

* Add basic test for make_orth_variants()

* Replace try/except with conditionals

* Replace default morph value with set

											
										
										
											2019-11-25 15:03:28 +00:00
+								            gold = GoldParse.from_annotation(doc, doc_annotation, token_annotation)
-												Example class for training data (#4543)

* OrigAnnot class instead of gold.orig_annot list of zipped tuples

* from_orig to replace from_annot_tuples

* rename to RawAnnot

* some unit tests for GoldParse creation and internal format

* removing orig_annot and switching to lists instead of tuple

* rewriting tuples to use RawAnnot (+ debug statements, WIP)

* fix pop() changing the data

* small fixes

* pop-append fixes

* return RawAnnot for existing GoldParse to have uniform interface

* clean up imports

* fix merge_sents

* add unit test for 4402 with new structure (not working yet)

* introduce DocAnnot

* typo fixes

* add unit test for merge_sents

* rename from_orig to from_raw

* fixing unit tests

* fix nn parser

* read_annots to produce text, doc_annot pairs

* _make_golds fix

* rename golds_to_gold_annots

* small fixes

* fix encoding

* have golds_to_gold_annots use DocAnnot

* missed a spot

* merge_sents as function in DocAnnot

* allow specifying only part of the token-level annotations

* refactor with Example class + underlying dicts

* pipeline components to work with Example objects (wip)

* input checking

* fix yielding

* fix calls to update

* small fixes

* fix scorer unit test with new format

* fix kwargs order

* fixes for ud and conllu scripts

* fix reading data for conllu script

* add in proper errors (not fixed numbering yet to avoid merge conflicts)

* fixing few more small bugs

* fix EL script

											
										
										
											2019-11-11 16:35:27 +00:00
+								        orig = gold.orig
-												* Update spacy.scorer, to use P/R/F to support tokenization errors

											
										
										
											2015-05-24 18:07:18 +00:00
+								        gold_deps = set()
-												Add LAS per dependency to Scorer (#4560)


											
										
										
											2019-10-31 20:18:16 +00:00
+								        gold_deps_per_dep = {}
-												* Update spacy.scorer, to use P/R/F to support tokenization errors

											
										
										
											2015-05-24 18:07:18 +00:00
+								        gold_tags = set()
-												Update morphologizer (#5108)

* Add pos and morph scoring to Scorer

Add pos, morph, and morph_per_type to `Scorer`. Report pos and morph
accuracy in `spacy evaluate`.

* Update morphologizer for v3

* switch to tagger-based morphologizer
* use `spacy.HashCharEmbedCNN` for morphologizer defaults
* add `Doc.is_morphed` flag

* Add morphologizer to train CLI

* Add basic morphologizer pipeline tests

* Add simple morphologizer training example

* Remove subword_features from CharEmbed models

Remove `subword_features` argument from `spacy.HashCharEmbedCNN.v1` and
`spacy.HashCharEmbedBiLSTM.v1` since in these cases `subword_features`
is always `False`.

* Rename setting in morphologizer example

Use `with_pos_tags` instead of `without_pos_tags`.

* Fix kwargs for spacy.HashCharEmbedBiLSTM.v1

* Remove defaults for spacy.HashCharEmbedBiLSTM.v1

Remove default `nM/nC` for `spacy.HashCharEmbedBiLSTM.v1`.

* Set random seed for textcat overfitting test
											
										
										
											2020-04-02 12:46:32 +00:00
+								        gold_pos = set()
 								        gold_morphs = set()
 								        gold_morphs_per_feat = {}
-												Add a tagger-based SentenceRecognizer (#4713)

* Add sent_starts to GoldParse

* Add SentTagger pipeline component

Add `SentTagger` pipeline component as a subclass of `Tagger`.

* Model reduces default parameters from `Tagger` to be small and fast
* Hard-coded set of two labels:
  * S (1): token at beginning of sentence
  * I (0): all other sentence positions
* Sets `token.sent_start` values

* Add sentence segmentation to Scorer

Report `sent_p/r/f` for sentence boundaries, which may be provided by
various pipeline components.

* Add sentence segmentation to CLI evaluate

* Add senttagger metrics/scoring to train CLI

* Rename SentTagger to SentenceRecognizer

* Add SentenceRecognizer to spacy.pipes imports

* Add SentenceRecognizer serialization test

* Shorten component name to sentrec

* Remove duplicates from train CLI output metrics

											
										
										
											2019-11-28 10:10:07 +00:00
+								        gold_sent_starts = set()
-												Example class for training data (#4543)

* OrigAnnot class instead of gold.orig_annot list of zipped tuples

* from_orig to replace from_annot_tuples

* rename to RawAnnot

* some unit tests for GoldParse creation and internal format

* removing orig_annot and switching to lists instead of tuple

* rewriting tuples to use RawAnnot (+ debug statements, WIP)

* fix pop() changing the data

* small fixes

* pop-append fixes

* return RawAnnot for existing GoldParse to have uniform interface

* clean up imports

* fix merge_sents

* add unit test for 4402 with new structure (not working yet)

* introduce DocAnnot

* typo fixes

* add unit test for merge_sents

* rename from_orig to from_raw

* fixing unit tests

* fix nn parser

* read_annots to produce text, doc_annot pairs

* _make_golds fix

* rename golds_to_gold_annots

* small fixes

* fix encoding

* have golds_to_gold_annots use DocAnnot

* missed a spot

* merge_sents as function in DocAnnot

* allow specifying only part of the token-level annotations

* refactor with Example class + underlying dicts

* pipeline components to work with Example objects (wip)

* input checking

* fix yielding

* fix calls to update

* small fixes

* fix scorer unit test with new format

* fix kwargs order

* fixes for ud and conllu scripts

* fix reading data for conllu script

* add in proper errors (not fixed numbering yet to avoid merge conflicts)

* fixing few more small bugs

* fix EL script

											
										
										
											2019-11-11 16:35:27 +00:00
+								        gold_ents = set(tags_to_entities(orig.entities))
-												Tidy up and auto-format

											
										
										
											2020-06-20 12:15:04 +00:00
+								        for id_, tag, pos, morph, head, dep, sent_start in zip(
 								            orig.ids,
 								            orig.tags,
 								            orig.pos,
 								            orig.morphs,
 								            orig.heads,
 								            orig.deps,
 								            orig.sent_starts,
 								        ):
-												* Fix POS tag evaluation in scorer.py: do evaluate punctuation tags

											
										
										
											2015-05-30 16:24:32 +00:00
+								            gold_tags.add((id_, tag))
-												Update morphologizer (#5108)

* Add pos and morph scoring to Scorer

Add pos, morph, and morph_per_type to `Scorer`. Report pos and morph
accuracy in `spacy evaluate`.

* Update morphologizer for v3

* switch to tagger-based morphologizer
* use `spacy.HashCharEmbedCNN` for morphologizer defaults
* add `Doc.is_morphed` flag

* Add morphologizer to train CLI

* Add basic morphologizer pipeline tests

* Add simple morphologizer training example

* Remove subword_features from CharEmbed models

Remove `subword_features` argument from `spacy.HashCharEmbedCNN.v1` and
`spacy.HashCharEmbedBiLSTM.v1` since in these cases `subword_features`
is always `False`.

* Rename setting in morphologizer example

Use `with_pos_tags` instead of `without_pos_tags`.

* Fix kwargs for spacy.HashCharEmbedBiLSTM.v1

* Remove defaults for spacy.HashCharEmbedBiLSTM.v1

Remove default `nM/nC` for `spacy.HashCharEmbedBiLSTM.v1`.

* Set random seed for textcat overfitting test
											
										
										
											2020-04-02 12:46:32 +00:00
+								            gold_pos.add((id_, pos))
 								            gold_morphs.add((id_, morph))
 								            if morph:
 								                for feat in morph.split("|"):
 								                    field, values = feat.split("=")
 								                    if field not in self.morphs_per_feat:
 								                        self.morphs_per_feat[field] = PRFScore()
 								                    if field not in gold_morphs_per_feat:
 								                        gold_morphs_per_feat[field] = set()
 								                    gold_morphs_per_feat[field].add((id_, feat))
-												Add a tagger-based SentenceRecognizer (#4713)

* Add sent_starts to GoldParse

* Add SentTagger pipeline component

Add `SentTagger` pipeline component as a subclass of `Tagger`.

* Model reduces default parameters from `Tagger` to be small and fast
* Hard-coded set of two labels:
  * S (1): token at beginning of sentence
  * I (0): all other sentence positions
* Sets `token.sent_start` values

* Add sentence segmentation to Scorer

Report `sent_p/r/f` for sentence boundaries, which may be provided by
various pipeline components.

* Add sentence segmentation to CLI evaluate

* Add senttagger metrics/scoring to train CLI

* Rename SentTagger to SentenceRecognizer

* Add SentenceRecognizer to spacy.pipes imports

* Add SentenceRecognizer serialization test

* Shorten component name to sentrec

* Remove duplicates from train CLI output metrics

											
										
										
											2019-11-28 10:10:07 +00:00
+								            if sent_start:
 								                gold_sent_starts.add(id_)
-												Fix scorer bug for NER, related to ambiguity between missing annotations and misaligned tokens

											
										
										
											2017-03-16 14:38:28 +00:00
+								            if dep not in (None, "") and dep.lower() not in punct_labels:
-												* Fix evaluation of NER in scorer.py

											
										
										
											2015-05-27 01:18:16 +00:00
+								                gold_deps.add((id_, head, dep.lower()))
-												Add LAS per dependency to Scorer (#4560)


											
										
										
											2019-10-31 20:18:16 +00:00
+								                if dep.lower() not in self.labelled_per_dep:
 								                    self.labelled_per_dep[dep.lower()] = PRFScore()
 								                if dep.lower() not in gold_deps_per_dep:
 								                    gold_deps_per_dep[dep.lower()] = set()
 								                gold_deps_per_dep[dep.lower()].add((id_, head, dep.lower()))
-												* Update spacy.scorer, to use P/R/F to support tokenization errors

											
										
										
											2015-05-24 18:07:18 +00:00
+								        cand_deps = set()
-												Add LAS per dependency to Scorer (#4560)


											
										
										
											2019-10-31 20:18:16 +00:00
+								        cand_deps_per_dep = {}
-												* Update spacy.scorer, to use P/R/F to support tokenization errors

											
										
										
											2015-05-24 18:07:18 +00:00
+								        cand_tags = set()
-												Update morphologizer (#5108)

* Add pos and morph scoring to Scorer

Add pos, morph, and morph_per_type to `Scorer`. Report pos and morph
accuracy in `spacy evaluate`.

* Update morphologizer for v3

* switch to tagger-based morphologizer
* use `spacy.HashCharEmbedCNN` for morphologizer defaults
* add `Doc.is_morphed` flag

* Add morphologizer to train CLI

* Add basic morphologizer pipeline tests

* Add simple morphologizer training example

* Remove subword_features from CharEmbed models

Remove `subword_features` argument from `spacy.HashCharEmbedCNN.v1` and
`spacy.HashCharEmbedBiLSTM.v1` since in these cases `subword_features`
is always `False`.

* Rename setting in morphologizer example

Use `with_pos_tags` instead of `without_pos_tags`.

* Fix kwargs for spacy.HashCharEmbedBiLSTM.v1

* Remove defaults for spacy.HashCharEmbedBiLSTM.v1

Remove default `nM/nC` for `spacy.HashCharEmbedBiLSTM.v1`.

* Set random seed for textcat overfitting test
											
										
										
											2020-04-02 12:46:32 +00:00
+								        cand_pos = set()
 								        cand_morphs = set()
 								        cand_morphs_per_feat = {}
-												Add a tagger-based SentenceRecognizer (#4713)

* Add sent_starts to GoldParse

* Add SentTagger pipeline component

Add `SentTagger` pipeline component as a subclass of `Tagger`.

* Model reduces default parameters from `Tagger` to be small and fast
* Hard-coded set of two labels:
  * S (1): token at beginning of sentence
  * I (0): all other sentence positions
* Sets `token.sent_start` values

* Add sentence segmentation to Scorer

Report `sent_p/r/f` for sentence boundaries, which may be provided by
various pipeline components.

* Add sentence segmentation to CLI evaluate

* Add senttagger metrics/scoring to train CLI

* Rename SentTagger to SentenceRecognizer

* Add SentenceRecognizer to spacy.pipes imports

* Add SentenceRecognizer serialization test

* Shorten component name to sentrec

* Remove duplicates from train CLI output metrics

											
										
										
											2019-11-28 10:10:07 +00:00
+								        cand_sent_starts = set()
-												Update Scorer and add API docs

											
										
										
											2019-05-24 12:06:04 +00:00
+								        for token in doc:
-												* Don't score whitespace tokens

											
										
										
											2015-06-07 17:10:32 +00:00
+								            if token.orth_.isspace():
 								                continue
-												* Fix POS tag evaluation in scorer.py: do evaluate punctuation tags

											
										
										
											2015-05-30 16:24:32 +00:00
+								            gold_i = gold.cand_to_gold[token.i]
 								            if gold_i is None:
-												Revert "Merge branch 'develop' of https://github.com/explosion/spaCy into develop"

This reverts commit c9ba3d3c2dc7067cf8bd55f878cec45a8c6d73d4, reversing
changes made to 92c26a35d425d4e8ca1b805ea776ea10f5ded3df.

											
										
										
											2018-03-27 17:23:02 +00:00
+								                self.tokens.fp += 1
-												* Fix POS tag evaluation in scorer.py: do evaluate punctuation tags

											
										
										
											2015-05-30 16:24:32 +00:00
+								            else:
-												* Start scoring tokens

											
										
										
											2015-06-28 04:21:38 +00:00
+								                self.tokens.tp += 1
-												* Fix POS tag evaluation in scorer.py: do evaluate punctuation tags

											
										
										
											2015-05-30 16:24:32 +00:00
+								                cand_tags.add((gold_i, token.tag_))
-												Update morphologizer (#5108)

* Add pos and morph scoring to Scorer

Add pos, morph, and morph_per_type to `Scorer`. Report pos and morph
accuracy in `spacy evaluate`.

* Update morphologizer for v3

* switch to tagger-based morphologizer
* use `spacy.HashCharEmbedCNN` for morphologizer defaults
* add `Doc.is_morphed` flag

* Add morphologizer to train CLI

* Add basic morphologizer pipeline tests

* Add simple morphologizer training example

* Remove subword_features from CharEmbed models

Remove `subword_features` argument from `spacy.HashCharEmbedCNN.v1` and
`spacy.HashCharEmbedBiLSTM.v1` since in these cases `subword_features`
is always `False`.

* Rename setting in morphologizer example

Use `with_pos_tags` instead of `without_pos_tags`.

* Fix kwargs for spacy.HashCharEmbedBiLSTM.v1

* Remove defaults for spacy.HashCharEmbedBiLSTM.v1

Remove default `nM/nC` for `spacy.HashCharEmbedBiLSTM.v1`.

* Set random seed for textcat overfitting test
											
										
										
											2020-04-02 12:46:32 +00:00
+								                cand_pos.add((gold_i, token.pos_))
 								                cand_morphs.add((gold_i, token.morph_))
 								                if token.morph_:
 								                    for feat in token.morph_.split("|"):
 								                        field, values = feat.split("=")
 								                        if field not in self.morphs_per_feat:
 								                            self.morphs_per_feat[field] = PRFScore()
 								                        if field not in cand_morphs_per_feat:
 								                            cand_morphs_per_feat[field] = set()
 								                        cand_morphs_per_feat[field].add((gold_i, feat))
-												Add a tagger-based SentenceRecognizer (#4713)

* Add sent_starts to GoldParse

* Add SentTagger pipeline component

Add `SentTagger` pipeline component as a subclass of `Tagger`.

* Model reduces default parameters from `Tagger` to be small and fast
* Hard-coded set of two labels:
  * S (1): token at beginning of sentence
  * I (0): all other sentence positions
* Sets `token.sent_start` values

* Add sentence segmentation to Scorer

Report `sent_p/r/f` for sentence boundaries, which may be provided by
various pipeline components.

* Add sentence segmentation to CLI evaluate

* Add senttagger metrics/scoring to train CLI

* Rename SentTagger to SentenceRecognizer

* Add SentenceRecognizer to spacy.pipes imports

* Add SentenceRecognizer serialization test

* Shorten component name to sentrec

* Remove duplicates from train CLI output metrics

											
										
										
											2019-11-28 10:10:07 +00:00
+								                if token.is_sent_start:
 								                    cand_sent_starts.add(gold_i)
-												* Accept punct_labels as an argument to the scorer

											
										
										
											2016-02-02 21:59:06 +00:00
+								            if token.dep_.lower() not in punct_labels and token.orth_.strip():
-												* Update spacy.scorer, to use P/R/F to support tokenization errors

											
										
										
											2015-05-24 18:07:18 +00:00
+								                gold_head = gold.cand_to_gold[token.head.i]
 								                # None is indistinct, so we can't just add it to the set
 								                # Multiple (None, None) deps are possible
 								                if gold_i is None or gold_head is None:
 								                    self.unlabelled.fp += 1
 								                    self.labelled.fp += 1
 								                else:
-												* Fix evaluation of NER in scorer.py

											
										
										
											2015-05-27 01:18:16 +00:00
+								                    cand_deps.add((gold_i, gold_head, token.dep_.lower()))
-												Add LAS per dependency to Scorer (#4560)


											
										
										
											2019-10-31 20:18:16 +00:00
+								                    if token.dep_.lower() not in self.labelled_per_dep:
 								                        self.labelled_per_dep[token.dep_.lower()] = PRFScore()
 								                    if token.dep_.lower() not in cand_deps_per_dep:
 								                        cand_deps_per_dep[token.dep_.lower()] = set()
-												Auto-format

											
										
										
											2019-11-20 12:15:24 +00:00
+								                    cand_deps_per_dep[token.dep_.lower()].add(
 								                        (gold_i, gold_head, token.dep_.lower())
 								                    )
-												Fix problems caused by merge conflict

											
										
										
											2019-12-21 18:57:41 +00:00
+								        if "-" not in [token[-1] for token in orig.entities]:
-												Improve NER per type scoring (#4052)

* Improve NER per type scoring

* include all gold labels in per type scoring, not only when recall > 0
* improve efficiency of per type scoring

* Create Scorer tests, initially with NER tests

* move regression test #3968 (per type NER scoring) to Scorer tests

* add new test for per type NER scoring with imperfect P/R/F and per
type P/R/F including a case where R == 0.0

											
										
										
											2019-08-01 15:15:36 +00:00
+								            # Find all NER labels in gold and doc
-												Tidy up and auto-format

											
										
										
											2019-08-18 13:09:16 +00:00
+								            ent_labels = set([x[0] for x in gold_ents] + [k.label_ for k in doc.ents])
-												Improve NER per type scoring (#4052)

* Improve NER per type scoring

* include all gold labels in per type scoring, not only when recall > 0
* improve efficiency of per type scoring

* Create Scorer tests, initially with NER tests

* move regression test #3968 (per type NER scoring) to Scorer tests

* add new test for per type NER scoring with imperfect P/R/F and per
type P/R/F including a case where R == 0.0

											
										
										
											2019-08-01 15:15:36 +00:00
+								            # Set up all labels for per type scoring and prepare gold per type
 								            gold_per_ents = {ent_label: set() for ent_label in ent_labels}
 								            for ent_label in ent_labels:
 								                if ent_label not in self.ner_per_ents:
 								                    self.ner_per_ents[ent_label] = PRFScore()
-												Tidy up and auto-format

											
										
										
											2019-08-18 13:09:16 +00:00
+								                gold_per_ents[ent_label].update(
 								                    [x for x in gold_ents if x[0] == ent_label]
 								                )
-												Improve NER per type scoring (#4052)

* Improve NER per type scoring

* include all gold labels in per type scoring, not only when recall > 0
* improve efficiency of per type scoring

* Create Scorer tests, initially with NER tests

* move regression test #3968 (per type NER scoring) to Scorer tests

* add new test for per type NER scoring with imperfect P/R/F and per
type P/R/F including a case where R == 0.0

											
										
										
											2019-08-01 15:15:36 +00:00
+								            # Find all candidate labels, for all and per type
-												* Avoid NER scoring for sentences with some missing NER values.

											
										
										
											2015-05-28 20:39:08 +00:00
+								            cand_ents = set()
-												Improve NER per type scoring (#4052)

* Improve NER per type scoring

* include all gold labels in per type scoring, not only when recall > 0
* improve efficiency of per type scoring

* Create Scorer tests, initially with NER tests

* move regression test #3968 (per type NER scoring) to Scorer tests

* add new test for per type NER scoring with imperfect P/R/F and per
type P/R/F including a case where R == 0.0

											
										
										
											2019-08-01 15:15:36 +00:00
+								            cand_per_ents = {ent_label: set() for ent_label in ent_labels}
-												Update Scorer and add API docs

											
										
										
											2019-05-24 12:06:04 +00:00
+								            for ent in doc.ents:
-												* Avoid NER scoring for sentences with some missing NER values.

											
										
										
											2015-05-28 20:39:08 +00:00
+								                first = gold.cand_to_gold[ent.start]
-												💫 Tidy up and auto-format .py files (#2983)

<!--- Provide a general summary of your changes in the title. -->

## Description
- [x] Use [`black`](https://github.com/ambv/black) to auto-format all `.py` files.
- [x] Update flake8 config to exclude very large files (lemmatization tables etc.)
- [x] Update code to be compatible with flake8 rules
- [x] Fix various small bugs, inconsistencies and messy stuff in the language data
- [x] Update docs to explain new code style (`black`, `flake8`, when to use `# fmt: off` and `# fmt: on` and what `# noqa` means)

Once #2932 is merged, which auto-formats and tidies up the CLI, we'll be able to run `flake8 spacy` actually get meaningful results.

At the moment, the code style and linting isn't applied automatically, but I'm hoping that the new [GitHub Actions](https://github.com/features/actions) will let us auto-format pull requests and post comments with relevant linting information.

### Types of change
enhancement, code style

## Checklist
<!--- Before you submit the PR, go over this checklist and make sure you can
tick off all the boxes. [] -> [x] -->
- [x] I have submitted the spaCy Contributor Agreement.
- [x] I ran the tests, and all new and existing tests passed.
- [x] My changes don't require a change to the documentation, or if they do, I've added all required information.

											
										
										
											2018-11-30 16:03:03 +00:00
+								                last = gold.cand_to_gold[ent.end - 1]
-												* Avoid NER scoring for sentences with some missing NER values.

											
										
										
											2015-05-28 20:39:08 +00:00
+								                if first is None or last is None:
 								                    self.ner.fp += 1
-												Evaluation of NER model per entity type, closes #3490 (#3911)

* Evaluation of NER model per entity type, closes ##3490

Now each ent score is tracked individually in order to have its own Precision, Recall and F1 Score

* Keep track of each entity individually using dicts

* Improving how to compute the scores for each entity

* Fixed bug computing scores for ents

* Formatting with black

* Added key ents_per_type to the scores function

The key `ents_per_type` contains the metrics Precision, Recall and F1-Score for each entity individually

											
										
										
											2019-07-09 18:54:59 +00:00
+								                    self.ner_per_ents[ent.label_].fp += 1
-												* Avoid NER scoring for sentences with some missing NER values.

											
										
										
											2015-05-28 20:39:08 +00:00
+								                else:
 								                    cand_ents.add((ent.label_, first, last))
-												Improve NER per type scoring (#4052)

* Improve NER per type scoring

* include all gold labels in per type scoring, not only when recall > 0
* improve efficiency of per type scoring

* Create Scorer tests, initially with NER tests

* move regression test #3968 (per type NER scoring) to Scorer tests

* add new test for per type NER scoring with imperfect P/R/F and per
type P/R/F including a case where R == 0.0

											
										
										
											2019-08-01 15:15:36 +00:00
+								                    cand_per_ents[ent.label_].add((ent.label_, first, last))
-												Evaluation of NER model per entity type, closes #3490 (#3911)

* Evaluation of NER model per entity type, closes ##3490

Now each ent score is tracked individually in order to have its own Precision, Recall and F1 Score

* Keep track of each entity individually using dicts

* Improving how to compute the scores for each entity

* Fixed bug computing scores for ents

* Formatting with black

* Added key ents_per_type to the scores function

The key `ents_per_type` contains the metrics Precision, Recall and F1-Score for each entity individually

											
										
										
											2019-07-09 18:54:59 +00:00
+								            # Scores per ent
-												Improve NER per type scoring (#4052)

* Improve NER per type scoring

* include all gold labels in per type scoring, not only when recall > 0
* improve efficiency of per type scoring

* Create Scorer tests, initially with NER tests

* move regression test #3968 (per type NER scoring) to Scorer tests

* add new test for per type NER scoring with imperfect P/R/F and per
type P/R/F including a case where R == 0.0

											
										
										
											2019-08-01 15:15:36 +00:00
+								            for k, v in self.ner_per_ents.items():
 								                if k in cand_per_ents:
 								                    v.score_set(cand_per_ents[k], gold_per_ents[k])
-												Evaluation of NER model per entity type, closes #3490 (#3911)

* Evaluation of NER model per entity type, closes ##3490

Now each ent score is tracked individually in order to have its own Precision, Recall and F1 Score

* Keep track of each entity individually using dicts

* Improving how to compute the scores for each entity

* Fixed bug computing scores for ents

* Formatting with black

* Added key ents_per_type to the scores function

The key `ents_per_type` contains the metrics Precision, Recall and F1-Score for each entity individually

											
										
										
											2019-07-09 18:54:59 +00:00
+								            # Score for all ents
-												* Avoid NER scoring for sentences with some missing NER values.

											
										
										
											2015-05-28 20:39:08 +00:00
+								            self.ner.score_set(cand_ents, gold_ents)
-												* Fix evaluation of NER in scorer.py

											
										
										
											2015-05-27 01:18:16 +00:00
+								        self.tags.score_set(cand_tags, gold_tags)
-												Update morphologizer (#5108)

* Add pos and morph scoring to Scorer

Add pos, morph, and morph_per_type to `Scorer`. Report pos and morph
accuracy in `spacy evaluate`.

* Update morphologizer for v3

* switch to tagger-based morphologizer
* use `spacy.HashCharEmbedCNN` for morphologizer defaults
* add `Doc.is_morphed` flag

* Add morphologizer to train CLI

* Add basic morphologizer pipeline tests

* Add simple morphologizer training example

* Remove subword_features from CharEmbed models

Remove `subword_features` argument from `spacy.HashCharEmbedCNN.v1` and
`spacy.HashCharEmbedBiLSTM.v1` since in these cases `subword_features`
is always `False`.

* Rename setting in morphologizer example

Use `with_pos_tags` instead of `without_pos_tags`.

* Fix kwargs for spacy.HashCharEmbedBiLSTM.v1

* Remove defaults for spacy.HashCharEmbedBiLSTM.v1

Remove default `nM/nC` for `spacy.HashCharEmbedBiLSTM.v1`.

* Set random seed for textcat overfitting test
											
										
										
											2020-04-02 12:46:32 +00:00
+								        self.pos.score_set(cand_pos, gold_pos)
 								        self.morphs.score_set(cand_morphs, gold_morphs)
 								        for field in self.morphs_per_feat:
-												Tidy up and auto-format

											
										
										
											2020-06-20 12:15:04 +00:00
+								            self.morphs_per_feat[field].score_set(
 								                cand_morphs_per_feat.get(field, set()),
 								                gold_morphs_per_feat.get(field, set()),
 								            )
-												Add a tagger-based SentenceRecognizer (#4713)

* Add sent_starts to GoldParse

* Add SentTagger pipeline component

Add `SentTagger` pipeline component as a subclass of `Tagger`.

* Model reduces default parameters from `Tagger` to be small and fast
* Hard-coded set of two labels:
  * S (1): token at beginning of sentence
  * I (0): all other sentence positions
* Sets `token.sent_start` values

* Add sentence segmentation to Scorer

Report `sent_p/r/f` for sentence boundaries, which may be provided by
various pipeline components.

* Add sentence segmentation to CLI evaluate

* Add senttagger metrics/scoring to train CLI

* Rename SentTagger to SentenceRecognizer

* Add SentenceRecognizer to spacy.pipes imports

* Add SentenceRecognizer serialization test

* Shorten component name to sentrec

* Remove duplicates from train CLI output metrics

											
										
										
											2019-11-28 10:10:07 +00:00
+								        self.sent_starts.score_set(cand_sent_starts, gold_sent_starts)
-												* Update spacy.scorer, to use P/R/F to support tokenization errors

											
										
										
											2015-05-24 18:07:18 +00:00
+								        self.labelled.score_set(cand_deps, gold_deps)
-												Add LAS per dependency to Scorer (#4560)


											
										
										
											2019-10-31 20:18:16 +00:00
+								        for dep in self.labelled_per_dep:
-												Auto-format

											
										
										
											2019-11-20 12:15:24 +00:00
+								            self.labelled_per_dep[dep].score_set(
 								                cand_deps_per_dep.get(dep, set()), gold_deps_per_dep.get(dep, set())
 								            )
-												* Update spacy.scorer, to use P/R/F to support tokenization errors

											
										
										
											2015-05-24 18:07:18 +00:00
+								        self.unlabelled.score_set(
-												💫 Tidy up and auto-format .py files (#2983)

<!--- Provide a general summary of your changes in the title. -->

## Description
- [x] Use [`black`](https://github.com/ambv/black) to auto-format all `.py` files.
- [x] Update flake8 config to exclude very large files (lemmatization tables etc.)
- [x] Update code to be compatible with flake8 rules
- [x] Fix various small bugs, inconsistencies and messy stuff in the language data
- [x] Update docs to explain new code style (`black`, `flake8`, when to use `# fmt: off` and `# fmt: on` and what `# noqa` means)

Once #2932 is merged, which auto-formats and tidies up the CLI, we'll be able to run `flake8 spacy` actually get meaningful results.

At the moment, the code style and linting isn't applied automatically, but I'm hoping that the new [GitHub Actions](https://github.com/features/actions) will let us auto-format pull requests and post comments with relevant linting information.

### Types of change
enhancement, code style

## Checklist
<!--- Before you submit the PR, go over this checklist and make sure you can
tick off all the boxes. [] -> [x] -->
- [x] I have submitted the spaCy Contributor Agreement.
- [x] I ran the tests, and all new and existing tests passed.
- [x] My changes don't require a change to the documentation, or if they do, I've added all required information.

											
										
										
											2018-11-30 16:03:03 +00:00
+								            set(item[:2] for item in cand_deps), set(item[:2] for item in gold_deps)
-												* Update spacy.scorer, to use P/R/F to support tokenization errors

											
										
										
											2015-05-24 18:07:18 +00:00
+								        )
-												Add textcat to train CLI (#4226)

* Add doc.cats to spacy.gold at the paragraph level

Support `doc.cats` as `"cats": [{"label": string, "value": number}]` in
the spacy JSON training format at the paragraph level.

* `spacy.gold.docs_to_json()` writes `docs.cats`

* `GoldCorpus` reads in cats in each `GoldParse`

* Update instances of gold_tuples to handle cats

Update iteration over gold_tuples / gold_parses to handle addition of
cats at the paragraph level.

* Add textcat to train CLI

* Add textcat options to train CLI
* Add textcat labels in `TextCategorizer.begin_training()`
* Add textcat evaluation to `Scorer`:
  * For binary exclusive classes with provided label: F1 for label
  * For 2+ exclusive classes: F1 macro average
  * For multilabel (not exclusive): ROC AUC macro average (currently
relying on sklearn)
* Provide user info on textcat evaluation settings, potential
incompatibilities
* Provide pipeline to Scorer in `Language.evaluate` for textcat config
* Customize train CLI output to include only metrics relevant to current
pipeline
* Add textcat evaluation to evaluate CLI

* Fix handling of unset arguments and config params

Fix handling of unset arguments and model confiug parameters in Scorer
initialization.

* Temporarily add sklearn requirement

* Remove sklearn version number

* Improve Scorer handling of models without textcats

* Fixing Scorer handling of models without textcats

* Update Scorer output for python 2.7

* Modify inf in Scorer for python 2.7

* Auto-format

Also make small adjustments to make auto-formatting with black easier and produce nicer results

* Move error message to Errors

* Update documentation

* Add cats to annotation JSON format [ci skip]

* Fix tpl flag and docs [ci skip]

* Switch to internal roc_auc_score

Switch to internal `roc_auc_score()` adapted from scikit-learn.

* Add AUCROCScore tests and improve errors/warnings

* Add tests for AUCROCScore and roc_auc_score
* Add missing error for only positive/negative values
* Remove unnecessary warnings and errors

* Make reduced roc_auc_score functions private

Because most of the checks and warnings have been stripped for the
internal functions and access is only intended through `ROCAUCScore`,
make the functions for roc_auc_score adapted from scikit-learn private.

* Check that data corresponds with multilabel flag

Check that the training instances correspond with the multilabel flag,
adding the multilabel flag if required.

* Add textcat score to early stopping check

* Add more checks to debug-data for textcat

* Add example training data for textcat

* Add more checks to textcat train CLI

* Check configuration when extending base model
* Fix typos

* Update textcat example data

* Provide licensing details and licenses for data
* Remove two labels with no positive instances from jigsaw-toxic-comment
data.


Co-authored-by: Ines Montani <ines@ines.io>
											
										
										
											2019-09-15 20:31:31 +00:00
+								        if (
 								            len(gold.cats) > 0
-												Tidy up and auto-format

											
										
										
											2020-06-20 12:15:04 +00:00
+								            and set(self.textcat_f_per_cat)
 								            == set(self.textcat_auc_per_cat)
 								            == set(gold.cats)
-												Add textcat to train CLI (#4226)

* Add doc.cats to spacy.gold at the paragraph level

Support `doc.cats` as `"cats": [{"label": string, "value": number}]` in
the spacy JSON training format at the paragraph level.

* `spacy.gold.docs_to_json()` writes `docs.cats`

* `GoldCorpus` reads in cats in each `GoldParse`

* Update instances of gold_tuples to handle cats

Update iteration over gold_tuples / gold_parses to handle addition of
cats at the paragraph level.

* Add textcat to train CLI

* Add textcat options to train CLI
* Add textcat labels in `TextCategorizer.begin_training()`
* Add textcat evaluation to `Scorer`:
  * For binary exclusive classes with provided label: F1 for label
  * For 2+ exclusive classes: F1 macro average
  * For multilabel (not exclusive): ROC AUC macro average (currently
relying on sklearn)
* Provide user info on textcat evaluation settings, potential
incompatibilities
* Provide pipeline to Scorer in `Language.evaluate` for textcat config
* Customize train CLI output to include only metrics relevant to current
pipeline
* Add textcat evaluation to evaluate CLI

* Fix handling of unset arguments and config params

Fix handling of unset arguments and model confiug parameters in Scorer
initialization.

* Temporarily add sklearn requirement

* Remove sklearn version number

* Improve Scorer handling of models without textcats

* Fixing Scorer handling of models without textcats

* Update Scorer output for python 2.7

* Modify inf in Scorer for python 2.7

* Auto-format

Also make small adjustments to make auto-formatting with black easier and produce nicer results

* Move error message to Errors

* Update documentation

* Add cats to annotation JSON format [ci skip]

* Fix tpl flag and docs [ci skip]

* Switch to internal roc_auc_score

Switch to internal `roc_auc_score()` adapted from scikit-learn.

* Add AUCROCScore tests and improve errors/warnings

* Add tests for AUCROCScore and roc_auc_score
* Add missing error for only positive/negative values
* Remove unnecessary warnings and errors

* Make reduced roc_auc_score functions private

Because most of the checks and warnings have been stripped for the
internal functions and access is only intended through `ROCAUCScore`,
make the functions for roc_auc_score adapted from scikit-learn private.

* Check that data corresponds with multilabel flag

Check that the training instances correspond with the multilabel flag,
adding the multilabel flag if required.

* Add textcat score to early stopping check

* Add more checks to debug-data for textcat

* Add example training data for textcat

* Add more checks to textcat train CLI

* Check configuration when extending base model
* Fix typos

* Update textcat example data

* Provide licensing details and licenses for data
* Remove two labels with no positive instances from jigsaw-toxic-comment
data.


Co-authored-by: Ines Montani <ines@ines.io>
											
										
										
											2019-09-15 20:31:31 +00:00
+								            and set(gold.cats) == set(doc.cats)
 								        ):
 								            goldcat = max(gold.cats, key=gold.cats.get)
 								            candcat = max(doc.cats, key=doc.cats.get)
 								            if self.textcat_positive_label:
 								                self.textcat.score_set(
 								                    set([self.textcat_positive_label]) & set([candcat]),
 								                    set([self.textcat_positive_label]) & set([goldcat]),
 								                )
-												train is from-config by default (#5575)

* verbose and tag_map options

* adding init_tok2vec option and only changing the tok2vec that is specified

* adding omit_extra_lookups and verifying textcat config

* wip

* pretrain bugfix

* add replace and resume options

* train_textcat fix

* raw text functionality

* improve UX when KeyError or when input data can't be parsed

* avoid unnecessary access to goldparse in TextCat pipe

* save performance information in nlp.meta

* add noise_level to config

* move nn_parser's defaults to config file

* multitask in config - doesn't work yet

* scorer offering both F and AUC options, need to be specified in config

* add textcat verification code from old train script

* small fixes to config files

* clean up

* set default config for ner/parser to allow create_pipe to work as before

* two more test fixes

* small fixes

* cleanup

* fix NER pickling + additional unit test

* create_pipe as before
											
										
										
											2020-06-12 00:02:07 +00:00
+								            for label in set(gold.cats):
 								                self.textcat_auc_per_cat[label].score_set(
-												Tidy up and auto-format

											
										
										
											2020-06-20 12:15:04 +00:00
+								                    doc.cats[label], gold.cats[label]
-												train is from-config by default (#5575)

* verbose and tag_map options

* adding init_tok2vec option and only changing the tok2vec that is specified

* adding omit_extra_lookups and verifying textcat config

* wip

* pretrain bugfix

* add replace and resume options

* train_textcat fix

* raw text functionality

* improve UX when KeyError or when input data can't be parsed

* avoid unnecessary access to goldparse in TextCat pipe

* save performance information in nlp.meta

* add noise_level to config

* move nn_parser's defaults to config file

* multitask in config - doesn't work yet

* scorer offering both F and AUC options, need to be specified in config

* add textcat verification code from old train script

* small fixes to config files

* clean up

* set default config for ner/parser to allow create_pipe to work as before

* two more test fixes

* small fixes

* cleanup

* fix NER pickling + additional unit test

* create_pipe as before
											
										
										
											2020-06-12 00:02:07 +00:00
+								                )
 								                self.textcat_f_per_cat[label].score_set(
-												Tidy up and auto-format

											
										
										
											2020-06-20 12:15:04 +00:00
+								                    set([label]) & set([candcat]), set([label]) & set([goldcat])
-												train is from-config by default (#5575)

* verbose and tag_map options

* adding init_tok2vec option and only changing the tok2vec that is specified

* adding omit_extra_lookups and verifying textcat config

* wip

* pretrain bugfix

* add replace and resume options

* train_textcat fix

* raw text functionality

* improve UX when KeyError or when input data can't be parsed

* avoid unnecessary access to goldparse in TextCat pipe

* save performance information in nlp.meta

* add noise_level to config

* move nn_parser's defaults to config file

* multitask in config - doesn't work yet

* scorer offering both F and AUC options, need to be specified in config

* add textcat verification code from old train script

* small fixes to config files

* clean up

* set default config for ner/parser to allow create_pipe to work as before

* two more test fixes

* small fixes

* cleanup

* fix NER pickling + additional unit test

* create_pipe as before
											
										
										
											2020-06-12 00:02:07 +00:00
+								                )
 								        elif len(self.textcat_f_per_cat) > 0:
 								            model_labels = set(self.textcat_f_per_cat)
 								            eval_labels = set(gold.cats)
 								            raise ValueError(
 								                Errors.E162.format(model_labels=model_labels, eval_labels=eval_labels)
 								            )
 								        elif len(self.textcat_auc_per_cat) > 0:
 								            model_labels = set(self.textcat_auc_per_cat)
-												Add textcat to train CLI (#4226)

* Add doc.cats to spacy.gold at the paragraph level

Support `doc.cats` as `"cats": [{"label": string, "value": number}]` in
the spacy JSON training format at the paragraph level.

* `spacy.gold.docs_to_json()` writes `docs.cats`

* `GoldCorpus` reads in cats in each `GoldParse`

* Update instances of gold_tuples to handle cats

Update iteration over gold_tuples / gold_parses to handle addition of
cats at the paragraph level.

* Add textcat to train CLI

* Add textcat options to train CLI
* Add textcat labels in `TextCategorizer.begin_training()`
* Add textcat evaluation to `Scorer`:
  * For binary exclusive classes with provided label: F1 for label
  * For 2+ exclusive classes: F1 macro average
  * For multilabel (not exclusive): ROC AUC macro average (currently
relying on sklearn)
* Provide user info on textcat evaluation settings, potential
incompatibilities
* Provide pipeline to Scorer in `Language.evaluate` for textcat config
* Customize train CLI output to include only metrics relevant to current
pipeline
* Add textcat evaluation to evaluate CLI

* Fix handling of unset arguments and config params

Fix handling of unset arguments and model confiug parameters in Scorer
initialization.

* Temporarily add sklearn requirement

* Remove sklearn version number

* Improve Scorer handling of models without textcats

* Fixing Scorer handling of models without textcats

* Update Scorer output for python 2.7

* Modify inf in Scorer for python 2.7

* Auto-format

Also make small adjustments to make auto-formatting with black easier and produce nicer results

* Move error message to Errors

* Update documentation

* Add cats to annotation JSON format [ci skip]

* Fix tpl flag and docs [ci skip]

* Switch to internal roc_auc_score

Switch to internal `roc_auc_score()` adapted from scikit-learn.

* Add AUCROCScore tests and improve errors/warnings

* Add tests for AUCROCScore and roc_auc_score
* Add missing error for only positive/negative values
* Remove unnecessary warnings and errors

* Make reduced roc_auc_score functions private

Because most of the checks and warnings have been stripped for the
internal functions and access is only intended through `ROCAUCScore`,
make the functions for roc_auc_score adapted from scikit-learn private.

* Check that data corresponds with multilabel flag

Check that the training instances correspond with the multilabel flag,
adding the multilabel flag if required.

* Add textcat score to early stopping check

* Add more checks to debug-data for textcat

* Add example training data for textcat

* Add more checks to textcat train CLI

* Check configuration when extending base model
* Fix typos

* Update textcat example data

* Provide licensing details and licenses for data
* Remove two labels with no positive instances from jigsaw-toxic-comment
data.


Co-authored-by: Ines Montani <ines@ines.io>
											
										
										
											2019-09-15 20:31:31 +00:00
+								            eval_labels = set(gold.cats)
 								            raise ValueError(
 								                Errors.E162.format(model_labels=model_labels, eval_labels=eval_labels)
 								            )
-												* Add verbose printing to scorer

											
										
										
											2015-06-14 15:45:50 +00:00
+								        if verbose:
-												Example class for training data (#4543)

* OrigAnnot class instead of gold.orig_annot list of zipped tuples

* from_orig to replace from_annot_tuples

* rename to RawAnnot

* some unit tests for GoldParse creation and internal format

* removing orig_annot and switching to lists instead of tuple

* rewriting tuples to use RawAnnot (+ debug statements, WIP)

* fix pop() changing the data

* small fixes

* pop-append fixes

* return RawAnnot for existing GoldParse to have uniform interface

* clean up imports

* fix merge_sents

* add unit test for 4402 with new structure (not working yet)

* introduce DocAnnot

* typo fixes

* add unit test for merge_sents

* rename from_orig to from_raw

* fixing unit tests

* fix nn parser

* read_annots to produce text, doc_annot pairs

* _make_golds fix

* rename golds_to_gold_annots

* small fixes

* fix encoding

* have golds_to_gold_annots use DocAnnot

* missed a spot

* merge_sents as function in DocAnnot

* allow specifying only part of the token-level annotations

* refactor with Example class + underlying dicts

* pipeline components to work with Example objects (wip)

* input checking

* fix yielding

* fix calls to update

* small fixes

* fix scorer unit test with new format

* fix kwargs order

* fixes for ud and conllu scripts

* fix reading data for conllu script

* add in proper errors (not fixed numbering yet to avoid merge conflicts)

* fixing few more small bugs

* fix EL script

											
										
										
											2019-11-11 16:35:27 +00:00
+								            gold_words = orig.words
-												💫 Tidy up and auto-format .py files (#2983)

<!--- Provide a general summary of your changes in the title. -->

## Description
- [x] Use [`black`](https://github.com/ambv/black) to auto-format all `.py` files.
- [x] Update flake8 config to exclude very large files (lemmatization tables etc.)
- [x] Update code to be compatible with flake8 rules
- [x] Fix various small bugs, inconsistencies and messy stuff in the language data
- [x] Update docs to explain new code style (`black`, `flake8`, when to use `# fmt: off` and `# fmt: on` and what `# noqa` means)

Once #2932 is merged, which auto-formats and tidies up the CLI, we'll be able to run `flake8 spacy` actually get meaningful results.

At the moment, the code style and linting isn't applied automatically, but I'm hoping that the new [GitHub Actions](https://github.com/features/actions) will let us auto-format pull requests and post comments with relevant linting information.

### Types of change
enhancement, code style

## Checklist
<!--- Before you submit the PR, go over this checklist and make sure you can
tick off all the boxes. [] -> [x] -->
- [x] I have submitted the spaCy Contributor Agreement.
- [x] I ran the tests, and all new and existing tests passed.
- [x] My changes don't require a change to the documentation, or if they do, I've added all required information.

											
										
										
											2018-11-30 16:03:03 +00:00
+								            for w_id, h_id, dep in cand_deps - gold_deps:
 								                print("F", gold_words[w_id], dep, gold_words[h_id])
 								            for w_id, h_id, dep in gold_deps - cand_deps:
 								                print("M", gold_words[w_id], dep, gold_words[h_id])
-												Add textcat to train CLI (#4226)

* Add doc.cats to spacy.gold at the paragraph level

Support `doc.cats` as `"cats": [{"label": string, "value": number}]` in
the spacy JSON training format at the paragraph level.

* `spacy.gold.docs_to_json()` writes `docs.cats`

* `GoldCorpus` reads in cats in each `GoldParse`

* Update instances of gold_tuples to handle cats

Update iteration over gold_tuples / gold_parses to handle addition of
cats at the paragraph level.

* Add textcat to train CLI

* Add textcat options to train CLI
* Add textcat labels in `TextCategorizer.begin_training()`
* Add textcat evaluation to `Scorer`:
  * For binary exclusive classes with provided label: F1 for label
  * For 2+ exclusive classes: F1 macro average
  * For multilabel (not exclusive): ROC AUC macro average (currently
relying on sklearn)
* Provide user info on textcat evaluation settings, potential
incompatibilities
* Provide pipeline to Scorer in `Language.evaluate` for textcat config
* Customize train CLI output to include only metrics relevant to current
pipeline
* Add textcat evaluation to evaluate CLI

* Fix handling of unset arguments and config params

Fix handling of unset arguments and model confiug parameters in Scorer
initialization.

* Temporarily add sklearn requirement

* Remove sklearn version number

* Improve Scorer handling of models without textcats

* Fixing Scorer handling of models without textcats

* Update Scorer output for python 2.7

* Modify inf in Scorer for python 2.7

* Auto-format

Also make small adjustments to make auto-formatting with black easier and produce nicer results

* Move error message to Errors

* Update documentation

* Add cats to annotation JSON format [ci skip]

* Fix tpl flag and docs [ci skip]

* Switch to internal roc_auc_score

Switch to internal `roc_auc_score()` adapted from scikit-learn.

* Add AUCROCScore tests and improve errors/warnings

* Add tests for AUCROCScore and roc_auc_score
* Add missing error for only positive/negative values
* Remove unnecessary warnings and errors

* Make reduced roc_auc_score functions private

Because most of the checks and warnings have been stripped for the
internal functions and access is only intended through `ROCAUCScore`,
make the functions for roc_auc_score adapted from scikit-learn private.

* Check that data corresponds with multilabel flag

Check that the training instances correspond with the multilabel flag,
adding the multilabel flag if required.

* Add textcat score to early stopping check

* Add more checks to debug-data for textcat

* Add example training data for textcat

* Add more checks to textcat train CLI

* Check configuration when extending base model
* Fix typos

* Update textcat example data

* Provide licensing details and licenses for data
* Remove two labels with no positive instances from jigsaw-toxic-comment
data.


Co-authored-by: Ines Montani <ines@ines.io>
											
										
										
											2019-09-15 20:31:31 +00:00
 								#############################################################################
 								#
 								# The following implementation of roc_auc_score() is adapted from
 								# scikit-learn, which is distributed under the following license:
 								#
 								# New BSD License
 								#
 								# Copyright (c) 2007–2019 The scikit-learn developers.
 								# All rights reserved.
 								#
 								#
 								# Redistribution and use in source and binary forms, with or without
 								# modification, are permitted provided that the following conditions are met:
 								#
 								#   a. Redistributions of source code must retain the above copyright notice,
 								#      this list of conditions and the following disclaimer.
 								#   b. Redistributions in binary form must reproduce the above copyright
 								#      notice, this list of conditions and the following disclaimer in the
 								#      documentation and/or other materials provided with the distribution.
 								#   c. Neither the name of the Scikit-learn Developers  nor the names of
 								#      its contributors may be used to endorse or promote products
 								#      derived from this software without specific prior written
-												Auto-format

											
										
										
											2019-09-18 17:56:55 +00:00
+								#      permission.
-												Add textcat to train CLI (#4226)

* Add doc.cats to spacy.gold at the paragraph level

Support `doc.cats` as `"cats": [{"label": string, "value": number}]` in
the spacy JSON training format at the paragraph level.

* `spacy.gold.docs_to_json()` writes `docs.cats`

* `GoldCorpus` reads in cats in each `GoldParse`

* Update instances of gold_tuples to handle cats

Update iteration over gold_tuples / gold_parses to handle addition of
cats at the paragraph level.

* Add textcat to train CLI

* Add textcat options to train CLI
* Add textcat labels in `TextCategorizer.begin_training()`
* Add textcat evaluation to `Scorer`:
  * For binary exclusive classes with provided label: F1 for label
  * For 2+ exclusive classes: F1 macro average
  * For multilabel (not exclusive): ROC AUC macro average (currently
relying on sklearn)
* Provide user info on textcat evaluation settings, potential
incompatibilities
* Provide pipeline to Scorer in `Language.evaluate` for textcat config
* Customize train CLI output to include only metrics relevant to current
pipeline
* Add textcat evaluation to evaluate CLI

* Fix handling of unset arguments and config params

Fix handling of unset arguments and model confiug parameters in Scorer
initialization.

* Temporarily add sklearn requirement

* Remove sklearn version number

* Improve Scorer handling of models without textcats

* Fixing Scorer handling of models without textcats

* Update Scorer output for python 2.7

* Modify inf in Scorer for python 2.7

* Auto-format

Also make small adjustments to make auto-formatting with black easier and produce nicer results

* Move error message to Errors

* Update documentation

* Add cats to annotation JSON format [ci skip]

* Fix tpl flag and docs [ci skip]

* Switch to internal roc_auc_score

Switch to internal `roc_auc_score()` adapted from scikit-learn.

* Add AUCROCScore tests and improve errors/warnings

* Add tests for AUCROCScore and roc_auc_score
* Add missing error for only positive/negative values
* Remove unnecessary warnings and errors

* Make reduced roc_auc_score functions private

Because most of the checks and warnings have been stripped for the
internal functions and access is only intended through `ROCAUCScore`,
make the functions for roc_auc_score adapted from scikit-learn private.

* Check that data corresponds with multilabel flag

Check that the training instances correspond with the multilabel flag,
adding the multilabel flag if required.

* Add textcat score to early stopping check

* Add more checks to debug-data for textcat

* Add example training data for textcat

* Add more checks to textcat train CLI

* Check configuration when extending base model
* Fix typos

* Update textcat example data

* Provide licensing details and licenses for data
* Remove two labels with no positive instances from jigsaw-toxic-comment
data.


Co-authored-by: Ines Montani <ines@ines.io>
											
										
										
											2019-09-15 20:31:31 +00:00
+								#
 								#
 								# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 								# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 								# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 								# ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR
 								# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 								# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 								# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 								# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 								# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 								# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
 								# DAMAGE.
-												Auto-format

											
										
										
											2019-09-18 17:56:55 +00:00
-												Add textcat to train CLI (#4226)

* Add doc.cats to spacy.gold at the paragraph level

Support `doc.cats` as `"cats": [{"label": string, "value": number}]` in
the spacy JSON training format at the paragraph level.

* `spacy.gold.docs_to_json()` writes `docs.cats`

* `GoldCorpus` reads in cats in each `GoldParse`

* Update instances of gold_tuples to handle cats

Update iteration over gold_tuples / gold_parses to handle addition of
cats at the paragraph level.

* Add textcat to train CLI

* Add textcat options to train CLI
* Add textcat labels in `TextCategorizer.begin_training()`
* Add textcat evaluation to `Scorer`:
  * For binary exclusive classes with provided label: F1 for label
  * For 2+ exclusive classes: F1 macro average
  * For multilabel (not exclusive): ROC AUC macro average (currently
relying on sklearn)
* Provide user info on textcat evaluation settings, potential
incompatibilities
* Provide pipeline to Scorer in `Language.evaluate` for textcat config
* Customize train CLI output to include only metrics relevant to current
pipeline
* Add textcat evaluation to evaluate CLI

* Fix handling of unset arguments and config params

Fix handling of unset arguments and model confiug parameters in Scorer
initialization.

* Temporarily add sklearn requirement

* Remove sklearn version number

* Improve Scorer handling of models without textcats

* Fixing Scorer handling of models without textcats

* Update Scorer output for python 2.7

* Modify inf in Scorer for python 2.7

* Auto-format

Also make small adjustments to make auto-formatting with black easier and produce nicer results

* Move error message to Errors

* Update documentation

* Add cats to annotation JSON format [ci skip]

* Fix tpl flag and docs [ci skip]

* Switch to internal roc_auc_score

Switch to internal `roc_auc_score()` adapted from scikit-learn.

* Add AUCROCScore tests and improve errors/warnings

* Add tests for AUCROCScore and roc_auc_score
* Add missing error for only positive/negative values
* Remove unnecessary warnings and errors

* Make reduced roc_auc_score functions private

Because most of the checks and warnings have been stripped for the
internal functions and access is only intended through `ROCAUCScore`,
make the functions for roc_auc_score adapted from scikit-learn private.

* Check that data corresponds with multilabel flag

Check that the training instances correspond with the multilabel flag,
adding the multilabel flag if required.

* Add textcat score to early stopping check

* Add more checks to debug-data for textcat

* Add example training data for textcat

* Add more checks to textcat train CLI

* Check configuration when extending base model
* Fix typos

* Update textcat example data

* Provide licensing details and licenses for data
* Remove two labels with no positive instances from jigsaw-toxic-comment
data.


Co-authored-by: Ines Montani <ines@ines.io>
											
										
										
											2019-09-15 20:31:31 +00:00
+								def _roc_auc_score(y_true, y_score):
 								    """Compute Area Under the Receiver Operating Characteristic Curve (ROC AUC)
 								    from prediction scores.
 								    Note: this implementation is restricted to the binary classification task
 								    Parameters
 								    ----------
 								    y_true : array, shape = [n_samples] or [n_samples, n_classes]
 								        True binary labels or binary label indicators.
 								        The multiclass case expects shape = [n_samples] and labels
 								        with values in ``range(n_classes)``.
 								    y_score : array, shape = [n_samples] or [n_samples, n_classes]
 								        Target scores, can either be probability estimates of the positive
 								        class, confidence values, or non-thresholded measure of decisions
 								        (as returned by "decision_function" on some classifiers). For binary
 								        y_true, y_score is supposed to be the score of the class with greater
 								        label. The multiclass case expects shape = [n_samples, n_classes]
 								        where the scores correspond to probability estimates.
 								    Returns
 								    -------
 								    auc : float
 								    References
 								    ----------
 								    .. [1] `Wikipedia entry for the Receiver operating characteristic
 								            <https://en.wikipedia.org/wiki/Receiver_operating_characteristic>`_
 								    .. [2] Fawcett T. An introduction to ROC analysis[J]. Pattern Recognition
 								           Letters, 2006, 27(8):861-874.
 								    .. [3] `Analyzing a portion of the ROC curve. McClish, 1989
 								            <https://www.ncbi.nlm.nih.gov/pubmed/2668680>`_
 								    """
 								    if len(np.unique(y_true)) != 2:
 								        raise ValueError(Errors.E165)
 								    fpr, tpr, _ = _roc_curve(y_true, y_score)
 								    return _auc(fpr, tpr)
 								def _roc_curve(y_true, y_score):
 								    """Compute Receiver operating characteristic (ROC)
 								    Note: this implementation is restricted to the binary classification task.
 								    Parameters
 								    ----------
 								    y_true : array, shape = [n_samples]
 								        True binary labels. If labels are not either {-1, 1} or {0, 1}, then
 								        pos_label should be explicitly given.
 								    y_score : array, shape = [n_samples]
 								        Target scores, can either be probability estimates of the positive
 								        class, confidence values, or non-thresholded measure of decisions
 								        (as returned by "decision_function" on some classifiers).
 								    Returns
 								    -------
 								    fpr : array, shape = [>2]
 								        Increasing false positive rates such that element i is the false
 								        positive rate of predictions with score >= thresholds[i].
 								    tpr : array, shape = [>2]
 								        Increasing true positive rates such that element i is the true
 								        positive rate of predictions with score >= thresholds[i].
 								    thresholds : array, shape = [n_thresholds]
 								        Decreasing thresholds on the decision function used to compute
 								        fpr and tpr. `thresholds[0]` represents no instances being predicted
 								        and is arbitrarily set to `max(y_score) + 1`.
 								    Notes
 								    -----
 								    Since the thresholds are sorted from low to high values, they
 								    are reversed upon returning them to ensure they correspond to both ``fpr``
 								    and ``tpr``, which are sorted in reversed order during their calculation.
 								    References
 								    ----------
 								    .. [1] `Wikipedia entry for the Receiver operating characteristic
 								            <https://en.wikipedia.org/wiki/Receiver_operating_characteristic>`_
 								    .. [2] Fawcett T. An introduction to ROC analysis[J]. Pattern Recognition
 								           Letters, 2006, 27(8):861-874.
 								    """
 								    fps, tps, thresholds = _binary_clf_curve(y_true, y_score)
 								    # Add an extra threshold position
 								    # to make sure that the curve starts at (0, 0)
 								    tps = np.r_[0, tps]
 								    fps = np.r_[0, fps]
 								    thresholds = np.r_[thresholds[0] + 1, thresholds]
 								    if fps[-1] <= 0:
 								        fpr = np.repeat(np.nan, fps.shape)
 								    else:
 								        fpr = fps / fps[-1]
 								    if tps[-1] <= 0:
 								        tpr = np.repeat(np.nan, tps.shape)
 								    else:
 								        tpr = tps / tps[-1]
 								    return fpr, tpr, thresholds
 								def _binary_clf_curve(y_true, y_score):
 								    """Calculate true and false positives per binary classification threshold.
 								    Parameters
 								    ----------
 								    y_true : array, shape = [n_samples]
 								        True targets of binary classification
 								    y_score : array, shape = [n_samples]
 								        Estimated probabilities or decision function
 								    Returns
 								    -------
 								    fps : array, shape = [n_thresholds]
 								        A count of false positives, at index i being the number of negative
 								        samples assigned a score >= thresholds[i]. The total number of
 								        negative samples is equal to fps[-1] (thus true negatives are given by
 								        fps[-1] - fps).
 								    tps : array, shape = [n_thresholds <= len(np.unique(y_score))]
 								        An increasing count of true positives, at index i being the number
 								        of positive samples assigned a score >= thresholds[i]. The total
 								        number of positive samples is equal to tps[-1] (thus false negatives
 								        are given by tps[-1] - tps).
 								    thresholds : array, shape = [n_thresholds]
 								        Decreasing score values.
 								    """
-												Auto-format

											
										
										
											2019-09-18 17:56:55 +00:00
+								    pos_label = 1.0
-												Add textcat to train CLI (#4226)

* Add doc.cats to spacy.gold at the paragraph level

Support `doc.cats` as `"cats": [{"label": string, "value": number}]` in
the spacy JSON training format at the paragraph level.

* `spacy.gold.docs_to_json()` writes `docs.cats`

* `GoldCorpus` reads in cats in each `GoldParse`

* Update instances of gold_tuples to handle cats

Update iteration over gold_tuples / gold_parses to handle addition of
cats at the paragraph level.

* Add textcat to train CLI

* Add textcat options to train CLI
* Add textcat labels in `TextCategorizer.begin_training()`
* Add textcat evaluation to `Scorer`:
  * For binary exclusive classes with provided label: F1 for label
  * For 2+ exclusive classes: F1 macro average
  * For multilabel (not exclusive): ROC AUC macro average (currently
relying on sklearn)
* Provide user info on textcat evaluation settings, potential
incompatibilities
* Provide pipeline to Scorer in `Language.evaluate` for textcat config
* Customize train CLI output to include only metrics relevant to current
pipeline
* Add textcat evaluation to evaluate CLI

* Fix handling of unset arguments and config params

Fix handling of unset arguments and model confiug parameters in Scorer
initialization.

* Temporarily add sklearn requirement

* Remove sklearn version number

* Improve Scorer handling of models without textcats

* Fixing Scorer handling of models without textcats

* Update Scorer output for python 2.7

* Modify inf in Scorer for python 2.7

* Auto-format

Also make small adjustments to make auto-formatting with black easier and produce nicer results

* Move error message to Errors

* Update documentation

* Add cats to annotation JSON format [ci skip]

* Fix tpl flag and docs [ci skip]

* Switch to internal roc_auc_score

Switch to internal `roc_auc_score()` adapted from scikit-learn.

* Add AUCROCScore tests and improve errors/warnings

* Add tests for AUCROCScore and roc_auc_score
* Add missing error for only positive/negative values
* Remove unnecessary warnings and errors

* Make reduced roc_auc_score functions private

Because most of the checks and warnings have been stripped for the
internal functions and access is only intended through `ROCAUCScore`,
make the functions for roc_auc_score adapted from scikit-learn private.

* Check that data corresponds with multilabel flag

Check that the training instances correspond with the multilabel flag,
adding the multilabel flag if required.

* Add textcat score to early stopping check

* Add more checks to debug-data for textcat

* Add example training data for textcat

* Add more checks to textcat train CLI

* Check configuration when extending base model
* Fix typos

* Update textcat example data

* Provide licensing details and licenses for data
* Remove two labels with no positive instances from jigsaw-toxic-comment
data.


Co-authored-by: Ines Montani <ines@ines.io>
											
										
										
											2019-09-15 20:31:31 +00:00
 								    y_true = np.ravel(y_true)
 								    y_score = np.ravel(y_score)
 								    # make y_true a boolean vector
-												Auto-format

											
										
										
											2019-09-18 17:56:55 +00:00
+								    y_true = y_true == pos_label
-												Add textcat to train CLI (#4226)

* Add doc.cats to spacy.gold at the paragraph level

Support `doc.cats` as `"cats": [{"label": string, "value": number}]` in
the spacy JSON training format at the paragraph level.

* `spacy.gold.docs_to_json()` writes `docs.cats`

* `GoldCorpus` reads in cats in each `GoldParse`

* Update instances of gold_tuples to handle cats

Update iteration over gold_tuples / gold_parses to handle addition of
cats at the paragraph level.

* Add textcat to train CLI

* Add textcat options to train CLI
* Add textcat labels in `TextCategorizer.begin_training()`
* Add textcat evaluation to `Scorer`:
  * For binary exclusive classes with provided label: F1 for label
  * For 2+ exclusive classes: F1 macro average
  * For multilabel (not exclusive): ROC AUC macro average (currently
relying on sklearn)
* Provide user info on textcat evaluation settings, potential
incompatibilities
* Provide pipeline to Scorer in `Language.evaluate` for textcat config
* Customize train CLI output to include only metrics relevant to current
pipeline
* Add textcat evaluation to evaluate CLI

* Fix handling of unset arguments and config params

Fix handling of unset arguments and model confiug parameters in Scorer
initialization.

* Temporarily add sklearn requirement

* Remove sklearn version number

* Improve Scorer handling of models without textcats

* Fixing Scorer handling of models without textcats

* Update Scorer output for python 2.7

* Modify inf in Scorer for python 2.7

* Auto-format

Also make small adjustments to make auto-formatting with black easier and produce nicer results

* Move error message to Errors

* Update documentation

* Add cats to annotation JSON format [ci skip]

* Fix tpl flag and docs [ci skip]

* Switch to internal roc_auc_score

Switch to internal `roc_auc_score()` adapted from scikit-learn.

* Add AUCROCScore tests and improve errors/warnings

* Add tests for AUCROCScore and roc_auc_score
* Add missing error for only positive/negative values
* Remove unnecessary warnings and errors

* Make reduced roc_auc_score functions private

Because most of the checks and warnings have been stripped for the
internal functions and access is only intended through `ROCAUCScore`,
make the functions for roc_auc_score adapted from scikit-learn private.

* Check that data corresponds with multilabel flag

Check that the training instances correspond with the multilabel flag,
adding the multilabel flag if required.

* Add textcat score to early stopping check

* Add more checks to debug-data for textcat

* Add example training data for textcat

* Add more checks to textcat train CLI

* Check configuration when extending base model
* Fix typos

* Update textcat example data

* Provide licensing details and licenses for data
* Remove two labels with no positive instances from jigsaw-toxic-comment
data.


Co-authored-by: Ines Montani <ines@ines.io>
											
										
										
											2019-09-15 20:31:31 +00:00
 								    # sort scores and corresponding truth values
 								    desc_score_indices = np.argsort(y_score, kind="mergesort")[::-1]
 								    y_score = y_score[desc_score_indices]
 								    y_true = y_true[desc_score_indices]
-												Auto-format

											
										
										
											2019-09-18 17:56:55 +00:00
+								    weight = 1.0
-												Add textcat to train CLI (#4226)

* Add doc.cats to spacy.gold at the paragraph level

Support `doc.cats` as `"cats": [{"label": string, "value": number}]` in
the spacy JSON training format at the paragraph level.

* `spacy.gold.docs_to_json()` writes `docs.cats`

* `GoldCorpus` reads in cats in each `GoldParse`

* Update instances of gold_tuples to handle cats

Update iteration over gold_tuples / gold_parses to handle addition of
cats at the paragraph level.

* Add textcat to train CLI

* Add textcat options to train CLI
* Add textcat labels in `TextCategorizer.begin_training()`
* Add textcat evaluation to `Scorer`:
  * For binary exclusive classes with provided label: F1 for label
  * For 2+ exclusive classes: F1 macro average
  * For multilabel (not exclusive): ROC AUC macro average (currently
relying on sklearn)
* Provide user info on textcat evaluation settings, potential
incompatibilities
* Provide pipeline to Scorer in `Language.evaluate` for textcat config
* Customize train CLI output to include only metrics relevant to current
pipeline
* Add textcat evaluation to evaluate CLI

* Fix handling of unset arguments and config params

Fix handling of unset arguments and model confiug parameters in Scorer
initialization.

* Temporarily add sklearn requirement

* Remove sklearn version number

* Improve Scorer handling of models without textcats

* Fixing Scorer handling of models without textcats

* Update Scorer output for python 2.7

* Modify inf in Scorer for python 2.7

* Auto-format

Also make small adjustments to make auto-formatting with black easier and produce nicer results

* Move error message to Errors

* Update documentation

* Add cats to annotation JSON format [ci skip]

* Fix tpl flag and docs [ci skip]

* Switch to internal roc_auc_score

Switch to internal `roc_auc_score()` adapted from scikit-learn.

* Add AUCROCScore tests and improve errors/warnings

* Add tests for AUCROCScore and roc_auc_score
* Add missing error for only positive/negative values
* Remove unnecessary warnings and errors

* Make reduced roc_auc_score functions private

Because most of the checks and warnings have been stripped for the
internal functions and access is only intended through `ROCAUCScore`,
make the functions for roc_auc_score adapted from scikit-learn private.

* Check that data corresponds with multilabel flag

Check that the training instances correspond with the multilabel flag,
adding the multilabel flag if required.

* Add textcat score to early stopping check

* Add more checks to debug-data for textcat

* Add example training data for textcat

* Add more checks to textcat train CLI

* Check configuration when extending base model
* Fix typos

* Update textcat example data

* Provide licensing details and licenses for data
* Remove two labels with no positive instances from jigsaw-toxic-comment
data.


Co-authored-by: Ines Montani <ines@ines.io>
											
										
										
											2019-09-15 20:31:31 +00:00
 								    # y_score typically has many tied values. Here we extract
 								    # the indices associated with the distinct values. We also
 								    # concatenate a value for the end of the curve.
 								    distinct_value_indices = np.where(np.diff(y_score))[0]
 								    threshold_idxs = np.r_[distinct_value_indices, y_true.size - 1]
 								    # accumulate the true positives with decreasing threshold
 								    tps = _stable_cumsum(y_true * weight)[threshold_idxs]
 								    fps = 1 + threshold_idxs - tps
 								    return fps, tps, y_score[threshold_idxs]
 								def _stable_cumsum(arr, axis=None, rtol=1e-05, atol=1e-08):
 								    """Use high precision for cumsum and check that final value matches sum
 								    Parameters
 								    ----------
 								    arr : array-like
 								        To be cumulatively summed as flat
 								    axis : int, optional
 								        Axis along which the cumulative sum is computed.
 								        The default (None) is to compute the cumsum over the flattened array.
 								    rtol : float
 								        Relative tolerance, see ``np.allclose``
 								    atol : float
 								        Absolute tolerance, see ``np.allclose``
 								    """
 								    out = np.cumsum(arr, axis=axis, dtype=np.float64)
 								    expected = np.sum(arr, axis=axis, dtype=np.float64)
-												Auto-format

											
										
										
											2019-09-18 17:56:55 +00:00
+								    if not np.all(
 								        np.isclose(
 								            out.take(-1, axis=axis), expected, rtol=rtol, atol=atol, equal_nan=True
 								        )
 								    ):
-												Add textcat to train CLI (#4226)

* Add doc.cats to spacy.gold at the paragraph level

Support `doc.cats` as `"cats": [{"label": string, "value": number}]` in
the spacy JSON training format at the paragraph level.

* `spacy.gold.docs_to_json()` writes `docs.cats`

* `GoldCorpus` reads in cats in each `GoldParse`

* Update instances of gold_tuples to handle cats

Update iteration over gold_tuples / gold_parses to handle addition of
cats at the paragraph level.

* Add textcat to train CLI

* Add textcat options to train CLI
* Add textcat labels in `TextCategorizer.begin_training()`
* Add textcat evaluation to `Scorer`:
  * For binary exclusive classes with provided label: F1 for label
  * For 2+ exclusive classes: F1 macro average
  * For multilabel (not exclusive): ROC AUC macro average (currently
relying on sklearn)
* Provide user info on textcat evaluation settings, potential
incompatibilities
* Provide pipeline to Scorer in `Language.evaluate` for textcat config
* Customize train CLI output to include only metrics relevant to current
pipeline
* Add textcat evaluation to evaluate CLI

* Fix handling of unset arguments and config params

Fix handling of unset arguments and model confiug parameters in Scorer
initialization.

* Temporarily add sklearn requirement

* Remove sklearn version number

* Improve Scorer handling of models without textcats

* Fixing Scorer handling of models without textcats

* Update Scorer output for python 2.7

* Modify inf in Scorer for python 2.7

* Auto-format

Also make small adjustments to make auto-formatting with black easier and produce nicer results

* Move error message to Errors

* Update documentation

* Add cats to annotation JSON format [ci skip]

* Fix tpl flag and docs [ci skip]

* Switch to internal roc_auc_score

Switch to internal `roc_auc_score()` adapted from scikit-learn.

* Add AUCROCScore tests and improve errors/warnings

* Add tests for AUCROCScore and roc_auc_score
* Add missing error for only positive/negative values
* Remove unnecessary warnings and errors

* Make reduced roc_auc_score functions private

Because most of the checks and warnings have been stripped for the
internal functions and access is only intended through `ROCAUCScore`,
make the functions for roc_auc_score adapted from scikit-learn private.

* Check that data corresponds with multilabel flag

Check that the training instances correspond with the multilabel flag,
adding the multilabel flag if required.

* Add textcat score to early stopping check

* Add more checks to debug-data for textcat

* Add example training data for textcat

* Add more checks to textcat train CLI

* Check configuration when extending base model
* Fix typos

* Update textcat example data

* Provide licensing details and licenses for data
* Remove two labels with no positive instances from jigsaw-toxic-comment
data.


Co-authored-by: Ines Montani <ines@ines.io>
											
										
										
											2019-09-15 20:31:31 +00:00
+								        raise ValueError(Errors.E163)
 								    return out
 								def _auc(x, y):
 								    """Compute Area Under the Curve (AUC) using the trapezoidal rule
 								    This is a general function, given points on a curve.  For computing the
 								    area under the ROC-curve, see :func:`roc_auc_score`.
 								    Parameters
 								    ----------
 								    x : array, shape = [n]
 								        x coordinates. These must be either monotonic increasing or monotonic
 								        decreasing.
 								    y : array, shape = [n]
 								        y coordinates.
 								    Returns
 								    -------
 								    auc : float
 								    """
 								    x = np.ravel(x)
 								    y = np.ravel(y)
 								    direction = 1
 								    dx = np.diff(x)
 								    if np.any(dx < 0):
 								        if np.all(dx <= 0):
 								            direction = -1
 								        else:
 								            raise ValueError(Errors.E164.format(x))
 								    area = direction * np.trapz(y, x)
 								    if isinstance(area, np.memmap):
 								        # Reductions such as .sum used internally in np.trapz do not return a
 								        # scalar by default for numpy.memmap instances contrary to
 								        # regular numpy.ndarray instances.
 								        area = area.dtype.type(area)
 								    return area