From 8e7557656fb9b4c51fb1ff6f49f7ccd516ff1a91 Mon Sep 17 00:00:00 2001 From: Sofie Van Landeghem Date: Wed, 9 Sep 2020 10:31:03 +0200 Subject: [PATCH] Renaming gold & annotation_setter (#6042) * version bump to 3.0.0a16 * rename "gold" folder to "training" * rename 'annotation_setter' to 'set_extra_annotations' * formatting --- .../textcatjsonl_to_trainjson.py | 2 +- setup.py | 4 +-- spacy/about.py | 2 +- spacy/cli/convert.py | 4 +-- spacy/cli/debug_data.py | 2 +- spacy/cli/evaluate.py | 2 +- spacy/cli/train.py | 2 +- spacy/errors.py | 2 +- spacy/language.py | 2 +- .../pipeline/_parser_internals/arc_eager.pyx | 2 +- spacy/pipeline/_parser_internals/ner.pyx | 2 +- .../_parser_internals/transition_system.pxd | 2 +- spacy/pipeline/attributeruler.py | 2 +- spacy/pipeline/dep_parser.pyx | 2 +- spacy/pipeline/entity_linker.py | 2 +- spacy/pipeline/entityruler.py | 2 +- spacy/pipeline/lemmatizer.py | 2 +- spacy/pipeline/morphologizer.pyx | 2 +- spacy/pipeline/multitask.pyx | 2 +- spacy/pipeline/ner.pyx | 2 +- spacy/pipeline/pipe.pyx | 2 +- spacy/pipeline/sentencizer.pyx | 2 +- spacy/pipeline/senter.pyx | 2 +- spacy/pipeline/simple_ner.py | 4 +-- spacy/pipeline/tagger.pyx | 2 +- spacy/pipeline/textcat.py | 2 +- spacy/pipeline/tok2vec.py | 2 +- spacy/pipeline/transition_parser.pyx | 2 +- spacy/schemas.py | 2 +- spacy/scorer.py | 2 +- spacy/tests/doc/test_add_entities.py | 2 +- spacy/tests/parser/test_add_label.py | 2 +- spacy/tests/parser/test_arc_eager_oracle.py | 2 +- spacy/tests/parser/test_ner.py | 2 +- spacy/tests/parser/test_neural_parser.py | 2 +- spacy/tests/parser/test_parse.py | 2 +- spacy/tests/parser/test_preset_sbd.py | 2 +- spacy/tests/pipeline/test_attributeruler.py | 2 +- spacy/tests/pipeline/test_entity_linker.py | 2 +- spacy/tests/pipeline/test_morphologizer.py | 2 +- spacy/tests/pipeline/test_senter.py | 2 +- spacy/tests/pipeline/test_simple_ner.py | 2 +- spacy/tests/pipeline/test_tagger.py | 2 +- spacy/tests/pipeline/test_textcat.py | 2 +- spacy/tests/regression/test_issue1-1000.py | 2 +- spacy/tests/regression/test_issue1501-2000.py | 2 +- spacy/tests/regression/test_issue2001-2500.py | 2 +- spacy/tests/regression/test_issue2501-3000.py | 2 +- spacy/tests/regression/test_issue3501-4000.py | 2 +- spacy/tests/regression/test_issue4001-4500.py | 4 +-- spacy/tests/regression/test_issue4501-5000.py | 8 ++---- spacy/tests/test_cli.py | 4 +-- spacy/tests/test_language.py | 2 +- spacy/tests/test_new_example.py | 2 +- spacy/tests/test_scorer.py | 4 +-- spacy/tests/test_tok2vec.py | 2 +- .../tests/{test_gold.py => test_training.py} | 12 ++++---- spacy/tests/test_util.py | 2 +- spacy/tokenizer.pyx | 2 +- spacy/{gold => training}/__init__.pxd | 0 spacy/{gold => training}/__init__.py | 0 spacy/{gold => training}/align.py | 0 spacy/{gold => training}/augment.py | 0 spacy/{gold => training}/batchers.py | 0 .../{gold => training}/converters/__init__.py | 0 .../converters/conll_ner2docs.py | 2 +- .../converters/conllu2docs.py | 2 +- .../{gold => training}/converters/iob2docs.py | 2 +- .../converters/json2docs.py | 0 spacy/{gold => training}/corpus.py | 0 spacy/{gold => training}/example.pxd | 0 spacy/{gold => training}/example.pyx | 0 spacy/{gold => training}/gold_io.pyx | 0 spacy/{gold => training}/iob_utils.py | 0 spacy/{gold => training}/loggers.py | 0 website/README.md | 10 +++---- website/docs/api/cli.md | 8 +++--- website/docs/api/corpus.md | 4 +-- website/docs/api/data-formats.md | 2 +- website/docs/api/example.md | 6 ++-- website/docs/api/top-level.md | 12 ++++---- website/docs/api/transformer.md | 28 +++++++++---------- website/docs/usage/embeddings-transformers.md | 14 +++++----- website/docs/usage/linguistic-features.md | 2 +- website/docs/usage/processing-pipelines.md | 4 +-- website/docs/usage/training.md | 4 +-- 86 files changed, 122 insertions(+), 124 deletions(-) rename spacy/tests/{test_gold.py => test_training.py} (98%) rename spacy/{gold => training}/__init__.pxd (100%) rename spacy/{gold => training}/__init__.py (100%) rename spacy/{gold => training}/align.py (100%) rename spacy/{gold => training}/augment.py (100%) rename spacy/{gold => training}/batchers.py (100%) rename spacy/{gold => training}/converters/__init__.py (100%) rename spacy/{gold => training}/converters/conll_ner2docs.py (99%) rename spacy/{gold => training}/converters/conllu2docs.py (99%) rename spacy/{gold => training}/converters/iob2docs.py (97%) rename spacy/{gold => training}/converters/json2docs.py (100%) rename spacy/{gold => training}/corpus.py (100%) rename spacy/{gold => training}/example.pxd (100%) rename spacy/{gold => training}/example.pyx (100%) rename spacy/{gold => training}/gold_io.pyx (100%) rename spacy/{gold => training}/iob_utils.py (100%) rename spacy/{gold => training}/loggers.py (100%) diff --git a/extra/example_data/textcat_example_data/textcatjsonl_to_trainjson.py b/extra/example_data/textcat_example_data/textcatjsonl_to_trainjson.py index 66d96ff68..41b6a70da 100644 --- a/extra/example_data/textcat_example_data/textcatjsonl_to_trainjson.py +++ b/extra/example_data/textcat_example_data/textcatjsonl_to_trainjson.py @@ -1,7 +1,7 @@ from pathlib import Path import plac import spacy -from spacy.gold import docs_to_json +from spacy.training import docs_to_json import srsly import sys diff --git a/setup.py b/setup.py index d448a262c..4a4b99f22 100755 --- a/setup.py +++ b/setup.py @@ -23,7 +23,7 @@ Options.docstrings = True PACKAGES = find_packages() MOD_NAMES = [ - "spacy.gold.example", + "spacy.training.example", "spacy.parts_of_speech", "spacy.strings", "spacy.lexeme", @@ -48,7 +48,7 @@ MOD_NAMES = [ "spacy.pipeline._parser_internals.stateclass", "spacy.pipeline._parser_internals.transition_system", "spacy.tokenizer", - "spacy.gold.gold_io", + "spacy.training.gold_io", "spacy.tokens.doc", "spacy.tokens.span", "spacy.tokens.token", diff --git a/spacy/about.py b/spacy/about.py index 9bce6fd35..c6176ad36 100644 --- a/spacy/about.py +++ b/spacy/about.py @@ -1,6 +1,6 @@ # fmt: off __title__ = "spacy-nightly" -__version__ = "3.0.0a15" +__version__ = "3.0.0a16" __release__ = True __download_url__ = "https://github.com/explosion/spacy-models/releases/download" __compatibility__ = "https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json" diff --git a/spacy/cli/convert.py b/spacy/cli/convert.py index ade5a3ad4..ad89b9976 100644 --- a/spacy/cli/convert.py +++ b/spacy/cli/convert.py @@ -7,9 +7,9 @@ import re import sys from ._util import app, Arg, Opt -from ..gold import docs_to_json +from ..training import docs_to_json from ..tokens import DocBin -from ..gold.converters import iob2docs, conll_ner2docs, json2docs, conllu2docs +from ..training.converters import iob2docs, conll_ner2docs, json2docs, conllu2docs # Converters are matched by file extension except for ner/iob, which are diff --git a/spacy/cli/debug_data.py b/spacy/cli/debug_data.py index 75a81e6f5..d52f30b82 100644 --- a/spacy/cli/debug_data.py +++ b/spacy/cli/debug_data.py @@ -8,7 +8,7 @@ import typer from ._util import app, Arg, Opt, show_validation_error, parse_config_overrides from ._util import import_code, debug_cli, get_sourced_components -from ..gold import Corpus, Example +from ..training import Corpus, Example from ..pipeline._parser_internals import nonproj from ..language import Language from .. import util diff --git a/spacy/cli/evaluate.py b/spacy/cli/evaluate.py index c5cbab09a..f9954d9ad 100644 --- a/spacy/cli/evaluate.py +++ b/spacy/cli/evaluate.py @@ -5,7 +5,7 @@ import re import srsly from thinc.api import require_gpu, fix_random_seed -from ..gold import Corpus +from ..training import Corpus from ..tokens import Doc from ._util import app, Arg, Opt from ..scorer import Scorer diff --git a/spacy/cli/train.py b/spacy/cli/train.py index 146d26edb..0bc493e56 100644 --- a/spacy/cli/train.py +++ b/spacy/cli/train.py @@ -16,7 +16,7 @@ from ._util import app, Arg, Opt, parse_config_overrides, show_validation_error from ._util import import_code, get_sourced_components from ..language import Language from .. import util -from ..gold.example import Example +from ..training.example import Example from ..errors import Errors diff --git a/spacy/errors.py b/spacy/errors.py index 3c120598e..7164598b6 100644 --- a/spacy/errors.py +++ b/spacy/errors.py @@ -66,7 +66,7 @@ class Warnings: "in problems with the vocab further on in the pipeline.") W030 = ("Some entities could not be aligned in the text \"{text}\" with " "entities \"{entities}\". Use " - "`spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)`" + "`spacy.training.biluo_tags_from_offsets(nlp.make_doc(text), entities)`" " to check the alignment. Misaligned entities ('-') will be " "ignored during training.") W033 = ("Training a new {model} using a model with no lexeme normalization " diff --git a/spacy/language.py b/spacy/language.py index 6631250aa..777b0c24b 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -17,7 +17,7 @@ from timeit import default_timer as timer from .tokens.underscore import Underscore from .vocab import Vocab, create_vocab from .pipe_analysis import validate_attrs, analyze_pipes, print_pipe_analysis -from .gold import Example, validate_examples +from .training import Example, validate_examples from .scorer import Scorer from .util import create_default_optimizer, registry, SimpleFrozenList from .util import SimpleFrozenDict, combine_score_weights, CONFIG_SECTION_ORDER diff --git a/spacy/pipeline/_parser_internals/arc_eager.pyx b/spacy/pipeline/_parser_internals/arc_eager.pyx index 7db8aae0f..bb0bf35b8 100644 --- a/spacy/pipeline/_parser_internals/arc_eager.pyx +++ b/spacy/pipeline/_parser_internals/arc_eager.pyx @@ -8,7 +8,7 @@ from ...typedefs cimport hash_t, attr_t from ...strings cimport hash_string from ...structs cimport TokenC from ...tokens.doc cimport Doc, set_children_from_heads -from ...gold.example cimport Example +from ...training.example cimport Example from ...errors import Errors from .stateclass cimport StateClass from ._state cimport StateC diff --git a/spacy/pipeline/_parser_internals/ner.pyx b/spacy/pipeline/_parser_internals/ner.pyx index 2570ccdee..0351bcaf7 100644 --- a/spacy/pipeline/_parser_internals/ner.pyx +++ b/spacy/pipeline/_parser_internals/ner.pyx @@ -5,7 +5,7 @@ from cymem.cymem cimport Pool from ...typedefs cimport weight_t, attr_t from ...lexeme cimport Lexeme from ...attrs cimport IS_SPACE -from ...gold.example cimport Example +from ...training.example cimport Example from ...errors import Errors from .stateclass cimport StateClass from ._state cimport StateC diff --git a/spacy/pipeline/_parser_internals/transition_system.pxd b/spacy/pipeline/_parser_internals/transition_system.pxd index ba4c33814..458f1d5f9 100644 --- a/spacy/pipeline/_parser_internals/transition_system.pxd +++ b/spacy/pipeline/_parser_internals/transition_system.pxd @@ -3,7 +3,7 @@ from cymem.cymem cimport Pool from ...typedefs cimport attr_t, weight_t from ...structs cimport TokenC from ...strings cimport StringStore -from ...gold.example cimport Example +from ...training.example cimport Example from .stateclass cimport StateClass from ._state cimport StateC diff --git a/spacy/pipeline/attributeruler.py b/spacy/pipeline/attributeruler.py index 406112681..f64fcbc54 100644 --- a/spacy/pipeline/attributeruler.py +++ b/spacy/pipeline/attributeruler.py @@ -4,7 +4,7 @@ from pathlib import Path from .pipe import Pipe from ..errors import Errors -from ..gold import validate_examples +from ..training import validate_examples from ..language import Language from ..matcher import Matcher from ..scorer import Scorer diff --git a/spacy/pipeline/dep_parser.pyx b/spacy/pipeline/dep_parser.pyx index eee4ed535..edd791e40 100644 --- a/spacy/pipeline/dep_parser.pyx +++ b/spacy/pipeline/dep_parser.pyx @@ -9,7 +9,7 @@ from .functions import merge_subtokens from ..language import Language from ._parser_internals import nonproj from ..scorer import Scorer -from ..gold import validate_examples +from ..training import validate_examples default_model_config = """ diff --git a/spacy/pipeline/entity_linker.py b/spacy/pipeline/entity_linker.py index e9564c05f..1debadd82 100644 --- a/spacy/pipeline/entity_linker.py +++ b/spacy/pipeline/entity_linker.py @@ -12,7 +12,7 @@ from ..tokens import Doc from .pipe import Pipe, deserialize_config from ..language import Language from ..vocab import Vocab -from ..gold import Example, validate_examples +from ..training import Example, validate_examples from ..errors import Errors, Warnings from ..util import SimpleFrozenList from .. import util diff --git a/spacy/pipeline/entityruler.py b/spacy/pipeline/entityruler.py index 4f4ff230e..24bbb067f 100644 --- a/spacy/pipeline/entityruler.py +++ b/spacy/pipeline/entityruler.py @@ -9,7 +9,7 @@ from ..util import ensure_path, to_disk, from_disk, SimpleFrozenList from ..tokens import Doc, Span from ..matcher import Matcher, PhraseMatcher from ..scorer import Scorer -from ..gold import validate_examples +from ..training import validate_examples DEFAULT_ENT_ID_SEP = "||" diff --git a/spacy/pipeline/lemmatizer.py b/spacy/pipeline/lemmatizer.py index 3f3e387b7..0fd3482c4 100644 --- a/spacy/pipeline/lemmatizer.py +++ b/spacy/pipeline/lemmatizer.py @@ -8,7 +8,7 @@ from ..lookups import Lookups, load_lookups from ..scorer import Scorer from ..tokens import Doc, Token from ..vocab import Vocab -from ..gold import validate_examples +from ..training import validate_examples from .. import util diff --git a/spacy/pipeline/morphologizer.pyx b/spacy/pipeline/morphologizer.pyx index b54824ce9..57bdb28d7 100644 --- a/spacy/pipeline/morphologizer.pyx +++ b/spacy/pipeline/morphologizer.pyx @@ -16,7 +16,7 @@ from .pipe import deserialize_config from .tagger import Tagger from .. import util from ..scorer import Scorer -from ..gold import validate_examples +from ..training import validate_examples default_model_config = """ diff --git a/spacy/pipeline/multitask.pyx b/spacy/pipeline/multitask.pyx index f07d24efc..2f8940124 100644 --- a/spacy/pipeline/multitask.pyx +++ b/spacy/pipeline/multitask.pyx @@ -8,7 +8,7 @@ from ..tokens.doc cimport Doc from .pipe import Pipe from .tagger import Tagger -from ..gold import validate_examples +from ..training import validate_examples from ..language import Language from ._parser_internals import nonproj from ..attrs import POS, ID diff --git a/spacy/pipeline/ner.pyx b/spacy/pipeline/ner.pyx index d9f33ccb4..2fa5c6392 100644 --- a/spacy/pipeline/ner.pyx +++ b/spacy/pipeline/ner.pyx @@ -7,7 +7,7 @@ from ._parser_internals.ner cimport BiluoPushDown from ..language import Language from ..scorer import Scorer -from ..gold import validate_examples +from ..training import validate_examples default_model_config = """ diff --git a/spacy/pipeline/pipe.pyx b/spacy/pipeline/pipe.pyx index a6a2ff45c..324c8e19c 100644 --- a/spacy/pipeline/pipe.pyx +++ b/spacy/pipeline/pipe.pyx @@ -4,7 +4,7 @@ from thinc.api import set_dropout_rate, Model from ..tokens.doc cimport Doc -from ..gold import validate_examples +from ..training import validate_examples from ..errors import Errors from .. import util diff --git a/spacy/pipeline/sentencizer.pyx b/spacy/pipeline/sentencizer.pyx index aaf08d594..5700c2b98 100644 --- a/spacy/pipeline/sentencizer.pyx +++ b/spacy/pipeline/sentencizer.pyx @@ -7,7 +7,7 @@ from ..tokens.doc cimport Doc from .pipe import Pipe from ..language import Language from ..scorer import Scorer -from ..gold import validate_examples +from ..training import validate_examples from .. import util diff --git a/spacy/pipeline/senter.pyx b/spacy/pipeline/senter.pyx index 64e01a071..00664131b 100644 --- a/spacy/pipeline/senter.pyx +++ b/spacy/pipeline/senter.pyx @@ -11,7 +11,7 @@ from .tagger import Tagger from ..language import Language from ..errors import Errors from ..scorer import Scorer -from ..gold import validate_examples +from ..training import validate_examples from .. import util diff --git a/spacy/pipeline/simple_ner.py b/spacy/pipeline/simple_ner.py index a4a3248d2..951d89931 100644 --- a/spacy/pipeline/simple_ner.py +++ b/spacy/pipeline/simple_ner.py @@ -6,8 +6,8 @@ from thinc.util import to_numpy from itertools import islice from ..errors import Errors -from ..gold import Example, spans_from_biluo_tags, iob_to_biluo, biluo_to_iob -from ..gold import validate_examples +from ..training import Example, spans_from_biluo_tags, iob_to_biluo, biluo_to_iob +from ..training import validate_examples from ..tokens import Doc from ..language import Language from ..vocab import Vocab diff --git a/spacy/pipeline/tagger.pyx b/spacy/pipeline/tagger.pyx index a0f06aa1c..1f8b4eb7a 100644 --- a/spacy/pipeline/tagger.pyx +++ b/spacy/pipeline/tagger.pyx @@ -17,7 +17,7 @@ from ..attrs import POS, ID from ..parts_of_speech import X from ..errors import Errors, TempErrors, Warnings from ..scorer import Scorer -from ..gold import validate_examples +from ..training import validate_examples from .. import util diff --git a/spacy/pipeline/textcat.py b/spacy/pipeline/textcat.py index e1edfb5b2..4be6f580d 100644 --- a/spacy/pipeline/textcat.py +++ b/spacy/pipeline/textcat.py @@ -6,7 +6,7 @@ import numpy from .pipe import Pipe from ..language import Language -from ..gold import Example, validate_examples +from ..training import Example, validate_examples from ..errors import Errors from ..scorer import Scorer from .. import util diff --git a/spacy/pipeline/tok2vec.py b/spacy/pipeline/tok2vec.py index b5f84f324..721c67a19 100644 --- a/spacy/pipeline/tok2vec.py +++ b/spacy/pipeline/tok2vec.py @@ -3,7 +3,7 @@ from thinc.api import Model, set_dropout_rate, Optimizer, Config from itertools import islice from .pipe import Pipe -from ..gold import Example, validate_examples +from ..training import Example, validate_examples from ..tokens import Doc from ..vocab import Vocab from ..language import Language diff --git a/spacy/pipeline/transition_parser.pyx b/spacy/pipeline/transition_parser.pyx index 2361cfd7f..1350e1f12 100644 --- a/spacy/pipeline/transition_parser.pyx +++ b/spacy/pipeline/transition_parser.pyx @@ -21,7 +21,7 @@ from ..ml.parser_model cimport WeightsC, ActivationsC, SizesC, cpu_log_loss from ..ml.parser_model cimport get_c_weights, get_c_sizes from ..tokens.doc cimport Doc -from ..gold import validate_examples +from ..training import validate_examples from ..errors import Errors, Warnings from .. import util diff --git a/spacy/schemas.py b/spacy/schemas.py index 59af53301..baa893802 100644 --- a/spacy/schemas.py +++ b/spacy/schemas.py @@ -12,7 +12,7 @@ from .attrs import NAMES if TYPE_CHECKING: # This lets us add type hints for mypy etc. without causing circular imports from .language import Language # noqa: F401 - from .gold import Example # noqa: F401 + from .training import Example # noqa: F401 ItemT = TypeVar("ItemT") diff --git a/spacy/scorer.py b/spacy/scorer.py index 9b1831a91..7f7418237 100644 --- a/spacy/scorer.py +++ b/spacy/scorer.py @@ -1,7 +1,7 @@ from typing import Optional, Iterable, Dict, Any, Callable, TYPE_CHECKING import numpy as np -from .gold import Example +from .training import Example from .tokens import Token, Doc, Span from .errors import Errors from .util import get_lang_class, SimpleFrozenList diff --git a/spacy/tests/doc/test_add_entities.py b/spacy/tests/doc/test_add_entities.py index 2a4e3e499..751bd36d4 100644 --- a/spacy/tests/doc/test_add_entities.py +++ b/spacy/tests/doc/test_add_entities.py @@ -1,4 +1,4 @@ -from spacy.gold import Example +from spacy.training import Example from spacy.pipeline import EntityRecognizer from spacy.tokens import Span, Doc from spacy import registry diff --git a/spacy/tests/parser/test_add_label.py b/spacy/tests/parser/test_add_label.py index b17080f15..0da42daa2 100644 --- a/spacy/tests/parser/test_add_label.py +++ b/spacy/tests/parser/test_add_label.py @@ -3,7 +3,7 @@ from thinc.api import Adam, fix_random_seed from spacy import registry from spacy.attrs import NORM from spacy.vocab import Vocab -from spacy.gold import Example +from spacy.training import Example from spacy.tokens import Doc from spacy.pipeline import DependencyParser, EntityRecognizer from spacy.pipeline.ner import DEFAULT_NER_MODEL diff --git a/spacy/tests/parser/test_arc_eager_oracle.py b/spacy/tests/parser/test_arc_eager_oracle.py index fd1880030..826fc1d87 100644 --- a/spacy/tests/parser/test_arc_eager_oracle.py +++ b/spacy/tests/parser/test_arc_eager_oracle.py @@ -1,7 +1,7 @@ import pytest from spacy.vocab import Vocab from spacy import registry -from spacy.gold import Example +from spacy.training import Example from spacy.pipeline import DependencyParser from spacy.tokens import Doc from spacy.pipeline._parser_internals.nonproj import projectivize diff --git a/spacy/tests/parser/test_ner.py b/spacy/tests/parser/test_ner.py index c7a1ed0d2..548cd2697 100644 --- a/spacy/tests/parser/test_ner.py +++ b/spacy/tests/parser/test_ner.py @@ -4,7 +4,7 @@ from spacy.lang.en import English from spacy.language import Language from spacy.lookups import Lookups from spacy.pipeline._parser_internals.ner import BiluoPushDown -from spacy.gold import Example +from spacy.training import Example from spacy.tokens import Doc from spacy.vocab import Vocab import logging diff --git a/spacy/tests/parser/test_neural_parser.py b/spacy/tests/parser/test_neural_parser.py index 6594c7e78..0747241d8 100644 --- a/spacy/tests/parser/test_neural_parser.py +++ b/spacy/tests/parser/test_neural_parser.py @@ -1,7 +1,7 @@ import pytest from spacy import registry -from spacy.gold import Example +from spacy.training import Example from spacy.vocab import Vocab from spacy.pipeline._parser_internals.arc_eager import ArcEager from spacy.pipeline.transition_parser import Parser diff --git a/spacy/tests/parser/test_parse.py b/spacy/tests/parser/test_parse.py index fa6494eb6..8d45e2132 100644 --- a/spacy/tests/parser/test_parse.py +++ b/spacy/tests/parser/test_parse.py @@ -3,7 +3,7 @@ import pytest from spacy.lang.en import English from ..util import get_doc, apply_transition_sequence, make_tempdir from ... import util -from ...gold import Example +from ...training import Example TRAIN_DATA = [ ( diff --git a/spacy/tests/parser/test_preset_sbd.py b/spacy/tests/parser/test_preset_sbd.py index 430440576..1de05be1b 100644 --- a/spacy/tests/parser/test_preset_sbd.py +++ b/spacy/tests/parser/test_preset_sbd.py @@ -3,7 +3,7 @@ from thinc.api import Adam from spacy.attrs import NORM from spacy.vocab import Vocab from spacy import registry -from spacy.gold import Example +from spacy.training import Example from spacy.pipeline.dep_parser import DEFAULT_PARSER_MODEL from spacy.tokens import Doc from spacy.pipeline import DependencyParser diff --git a/spacy/tests/pipeline/test_attributeruler.py b/spacy/tests/pipeline/test_attributeruler.py index c12a2b650..9254688cc 100644 --- a/spacy/tests/pipeline/test_attributeruler.py +++ b/spacy/tests/pipeline/test_attributeruler.py @@ -1,6 +1,6 @@ import pytest import numpy -from spacy.gold import Example +from spacy.training import Example from spacy.lang.en import English from spacy.pipeline import AttributeRuler from spacy import util, registry diff --git a/spacy/tests/pipeline/test_entity_linker.py b/spacy/tests/pipeline/test_entity_linker.py index 776d4f451..c43d2c58e 100644 --- a/spacy/tests/pipeline/test_entity_linker.py +++ b/spacy/tests/pipeline/test_entity_linker.py @@ -4,7 +4,7 @@ import pytest from spacy.kb import KnowledgeBase, get_candidates, Candidate from spacy import util, registry -from spacy.gold import Example +from spacy.training import Example from spacy.lang.en import English from spacy.tests.util import make_tempdir from spacy.tokens import Span diff --git a/spacy/tests/pipeline/test_morphologizer.py b/spacy/tests/pipeline/test_morphologizer.py index f52fb5401..864c7332e 100644 --- a/spacy/tests/pipeline/test_morphologizer.py +++ b/spacy/tests/pipeline/test_morphologizer.py @@ -1,7 +1,7 @@ import pytest from spacy import util -from spacy.gold import Example +from spacy.training import Example from spacy.lang.en import English from spacy.language import Language from spacy.tests.util import make_tempdir diff --git a/spacy/tests/pipeline/test_senter.py b/spacy/tests/pipeline/test_senter.py index 8941eae9a..1752df5d0 100644 --- a/spacy/tests/pipeline/test_senter.py +++ b/spacy/tests/pipeline/test_senter.py @@ -1,7 +1,7 @@ import pytest from spacy import util -from spacy.gold import Example +from spacy.training import Example from spacy.lang.en import English from spacy.language import Language from spacy.tests.util import make_tempdir diff --git a/spacy/tests/pipeline/test_simple_ner.py b/spacy/tests/pipeline/test_simple_ner.py index 3148eda0a..940743ce0 100644 --- a/spacy/tests/pipeline/test_simple_ner.py +++ b/spacy/tests/pipeline/test_simple_ner.py @@ -1,6 +1,6 @@ import pytest from spacy.lang.en import English -from spacy.gold import Example +from spacy.training import Example from spacy import util from ..util import make_tempdir diff --git a/spacy/tests/pipeline/test_tagger.py b/spacy/tests/pipeline/test_tagger.py index 89f40c5bf..cd5927675 100644 --- a/spacy/tests/pipeline/test_tagger.py +++ b/spacy/tests/pipeline/test_tagger.py @@ -1,6 +1,6 @@ import pytest from spacy import util -from spacy.gold import Example +from spacy.training import Example from spacy.lang.en import English from spacy.language import Language diff --git a/spacy/tests/pipeline/test_textcat.py b/spacy/tests/pipeline/test_textcat.py index 59c0fce49..3f9506bb1 100644 --- a/spacy/tests/pipeline/test_textcat.py +++ b/spacy/tests/pipeline/test_textcat.py @@ -10,7 +10,7 @@ from spacy.tokens import Doc from spacy.pipeline.tok2vec import DEFAULT_TOK2VEC_MODEL from ..util import make_tempdir -from ...gold import Example +from ...training import Example TRAIN_DATA = [ diff --git a/spacy/tests/regression/test_issue1-1000.py b/spacy/tests/regression/test_issue1-1000.py index 5c93ea3c8..ed5bcc1a5 100644 --- a/spacy/tests/regression/test_issue1-1000.py +++ b/spacy/tests/regression/test_issue1-1000.py @@ -1,7 +1,7 @@ import pytest import random from spacy import util -from spacy.gold import Example +from spacy.training import Example from spacy.matcher import Matcher from spacy.attrs import IS_PUNCT, ORTH, LOWER from spacy.vocab import Vocab diff --git a/spacy/tests/regression/test_issue1501-2000.py b/spacy/tests/regression/test_issue1501-2000.py index 4988575ea..c1d726db6 100644 --- a/spacy/tests/regression/test_issue1501-2000.py +++ b/spacy/tests/regression/test_issue1501-2000.py @@ -3,7 +3,7 @@ import gc import numpy import copy -from spacy.gold import Example +from spacy.training import Example from spacy.lang.en import English from spacy.lang.en.stop_words import STOP_WORDS from spacy.lang.lex_attrs import is_stop diff --git a/spacy/tests/regression/test_issue2001-2500.py b/spacy/tests/regression/test_issue2001-2500.py index 259ca9b0c..357fbb84e 100644 --- a/spacy/tests/regression/test_issue2001-2500.py +++ b/spacy/tests/regression/test_issue2001-2500.py @@ -3,7 +3,7 @@ import numpy from spacy.tokens import Doc from spacy.matcher import Matcher from spacy.displacy import render -from spacy.gold import iob_to_biluo +from spacy.training import iob_to_biluo from spacy.lang.it import Italian from spacy.lang.en import English diff --git a/spacy/tests/regression/test_issue2501-3000.py b/spacy/tests/regression/test_issue2501-3000.py index dd8f282b8..beb8faca1 100644 --- a/spacy/tests/regression/test_issue2501-3000.py +++ b/spacy/tests/regression/test_issue2501-3000.py @@ -1,6 +1,6 @@ import pytest from spacy import displacy -from spacy.gold import Example +from spacy.training import Example from spacy.lang.en import English from spacy.lang.ja import Japanese from spacy.lang.xx import MultiLanguage diff --git a/spacy/tests/regression/test_issue3501-4000.py b/spacy/tests/regression/test_issue3501-4000.py index f853b7aa7..d36e693c7 100644 --- a/spacy/tests/regression/test_issue3501-4000.py +++ b/spacy/tests/regression/test_issue3501-4000.py @@ -9,7 +9,7 @@ from spacy.tokens import Doc, Token from spacy.matcher import Matcher, PhraseMatcher from spacy.errors import MatchPatternError from spacy.util import minibatch -from spacy.gold import Example +from spacy.training import Example from spacy.lang.hi import Hindi from spacy.lang.es import Spanish from spacy.lang.en import English diff --git a/spacy/tests/regression/test_issue4001-4500.py b/spacy/tests/regression/test_issue4001-4500.py index e846841d4..2beccedcf 100644 --- a/spacy/tests/regression/test_issue4001-4500.py +++ b/spacy/tests/regression/test_issue4001-4500.py @@ -2,8 +2,8 @@ import pytest from spacy.pipeline import Pipe from spacy.matcher import PhraseMatcher, Matcher from spacy.tokens import Doc, Span, DocBin -from spacy.gold import Example, Corpus -from spacy.gold.converters import json2docs +from spacy.training import Example, Corpus +from spacy.training.converters import json2docs from spacy.vocab import Vocab from spacy.lang.en import English from spacy.util import minibatch, ensure_path, load_model diff --git a/spacy/tests/regression/test_issue4501-5000.py b/spacy/tests/regression/test_issue4501-5000.py index d83a2c718..9454d7f0c 100644 --- a/spacy/tests/regression/test_issue4501-5000.py +++ b/spacy/tests/regression/test_issue4501-5000.py @@ -1,9 +1,7 @@ import pytest -from mock import Mock -from spacy.matcher import DependencyMatcher from spacy.tokens import Doc, Span, DocBin -from spacy.gold import Example -from spacy.gold.converters.conllu2docs import conllu2docs +from spacy.training import Example +from spacy.training.converters.conllu2docs import conllu2docs from spacy.lang.en import English from spacy.kb import KnowledgeBase from spacy.vocab import Vocab @@ -12,7 +10,7 @@ from spacy.util import ensure_path, load_model_from_path import numpy import pickle -from ..util import get_doc, make_tempdir +from ..util import make_tempdir def test_issue4528(en_vocab): diff --git a/spacy/tests/test_cli.py b/spacy/tests/test_cli.py index aa8ea6051..e8c83cbad 100644 --- a/spacy/tests/test_cli.py +++ b/spacy/tests/test_cli.py @@ -1,8 +1,8 @@ import pytest from click import NoSuchOption -from spacy.gold import docs_to_json, biluo_tags_from_offsets -from spacy.gold.converters import iob2docs, conll_ner2docs, conllu2docs +from spacy.training import docs_to_json, biluo_tags_from_offsets +from spacy.training.converters import iob2docs, conll_ner2docs, conllu2docs from spacy.lang.en import English from spacy.schemas import ProjectConfigSchema, RecommendationSchema, validate from spacy.cli.pretrain import make_docs diff --git a/spacy/tests/test_language.py b/spacy/tests/test_language.py index ebc804235..840d878c2 100644 --- a/spacy/tests/test_language.py +++ b/spacy/tests/test_language.py @@ -3,7 +3,7 @@ import pytest from spacy.language import Language from spacy.tokens import Doc, Span from spacy.vocab import Vocab -from spacy.gold import Example +from spacy.training import Example from spacy.lang.en import English from spacy.util import registry diff --git a/spacy/tests/test_new_example.py b/spacy/tests/test_new_example.py index 321eaae95..597809286 100644 --- a/spacy/tests/test_new_example.py +++ b/spacy/tests/test_new_example.py @@ -1,5 +1,5 @@ import pytest -from spacy.gold.example import Example +from spacy.training.example import Example from spacy.tokens import Doc from spacy.vocab import Vocab diff --git a/spacy/tests/test_scorer.py b/spacy/tests/test_scorer.py index 6dae14210..fb96c0361 100644 --- a/spacy/tests/test_scorer.py +++ b/spacy/tests/test_scorer.py @@ -1,8 +1,8 @@ from numpy.testing import assert_almost_equal, assert_array_almost_equal import pytest from pytest import approx -from spacy.gold import Example -from spacy.gold.iob_utils import biluo_tags_from_offsets +from spacy.training import Example +from spacy.training.iob_utils import biluo_tags_from_offsets from spacy.scorer import Scorer, ROCAUCScore from spacy.scorer import _roc_auc_score, _roc_curve from .util import get_doc diff --git a/spacy/tests/test_tok2vec.py b/spacy/tests/test_tok2vec.py index 37e02a5b2..fb30c6ae5 100644 --- a/spacy/tests/test_tok2vec.py +++ b/spacy/tests/test_tok2vec.py @@ -6,7 +6,7 @@ from spacy.ml.models.tok2vec import MishWindowEncoder, MaxoutWindowEncoder from spacy.pipeline.tok2vec import Tok2Vec, Tok2VecListener from spacy.vocab import Vocab from spacy.tokens import Doc -from spacy.gold import Example +from spacy.training import Example from spacy import util from spacy.lang.en import English from .util import get_batch diff --git a/spacy/tests/test_gold.py b/spacy/tests/test_training.py similarity index 98% rename from spacy/tests/test_gold.py rename to spacy/tests/test_training.py index 334d9fc24..1926aca1f 100644 --- a/spacy/tests/test_gold.py +++ b/spacy/tests/test_training.py @@ -1,9 +1,10 @@ import numpy -from spacy.gold import biluo_tags_from_offsets, offsets_from_biluo_tags, Alignment -from spacy.gold import spans_from_biluo_tags, iob_to_biluo -from spacy.gold import Corpus, docs_to_json -from spacy.gold.example import Example -from spacy.gold.converters import json2docs +from spacy.training import biluo_tags_from_offsets, offsets_from_biluo_tags, Alignment +from spacy.training import spans_from_biluo_tags, iob_to_biluo +from spacy.training import Corpus, docs_to_json +from spacy.training.example import Example +from spacy.training.converters import json2docs +from spacy.training.augment import make_orth_variants_example from spacy.lang.en import English from spacy.tokens import Doc, DocBin from spacy.util import get_words_and_spaces, minibatch @@ -12,7 +13,6 @@ import pytest import srsly from .util import make_tempdir -from ..gold.augment import make_orth_variants_example @pytest.fixture diff --git a/spacy/tests/test_util.py b/spacy/tests/test_util.py index 40cd71eb5..1f073ab32 100644 --- a/spacy/tests/test_util.py +++ b/spacy/tests/test_util.py @@ -5,7 +5,7 @@ from .util import get_random_doc from spacy import util from spacy.util import dot_to_object, SimpleFrozenList from thinc.api import Config, Optimizer -from spacy.gold.batchers import minibatch_by_words +from spacy.training.batchers import minibatch_by_words from ..lang.en import English from ..lang.nl import Dutch from ..language import DEFAULT_CONFIG_PATH diff --git a/spacy/tokenizer.pyx b/spacy/tokenizer.pyx index 5e7222d40..787cca652 100644 --- a/spacy/tokenizer.pyx +++ b/spacy/tokenizer.pyx @@ -24,7 +24,7 @@ from .util import registry from .attrs import intify_attrs from .symbols import ORTH from .scorer import Scorer -from .gold import validate_examples +from .training import validate_examples cdef class Tokenizer: diff --git a/spacy/gold/__init__.pxd b/spacy/training/__init__.pxd similarity index 100% rename from spacy/gold/__init__.pxd rename to spacy/training/__init__.pxd diff --git a/spacy/gold/__init__.py b/spacy/training/__init__.py similarity index 100% rename from spacy/gold/__init__.py rename to spacy/training/__init__.py diff --git a/spacy/gold/align.py b/spacy/training/align.py similarity index 100% rename from spacy/gold/align.py rename to spacy/training/align.py diff --git a/spacy/gold/augment.py b/spacy/training/augment.py similarity index 100% rename from spacy/gold/augment.py rename to spacy/training/augment.py diff --git a/spacy/gold/batchers.py b/spacy/training/batchers.py similarity index 100% rename from spacy/gold/batchers.py rename to spacy/training/batchers.py diff --git a/spacy/gold/converters/__init__.py b/spacy/training/converters/__init__.py similarity index 100% rename from spacy/gold/converters/__init__.py rename to spacy/training/converters/__init__.py diff --git a/spacy/gold/converters/conll_ner2docs.py b/spacy/training/converters/conll_ner2docs.py similarity index 99% rename from spacy/gold/converters/conll_ner2docs.py rename to spacy/training/converters/conll_ner2docs.py index c04a77f07..8dcaf2599 100644 --- a/spacy/gold/converters/conll_ner2docs.py +++ b/spacy/training/converters/conll_ner2docs.py @@ -1,7 +1,7 @@ from wasabi import Printer from .. import tags_to_entities -from ...gold import iob_to_biluo +from ...training import iob_to_biluo from ...lang.xx import MultiLanguage from ...tokens import Doc, Span from ...util import load_model diff --git a/spacy/gold/converters/conllu2docs.py b/spacy/training/converters/conllu2docs.py similarity index 99% rename from spacy/gold/converters/conllu2docs.py rename to spacy/training/converters/conllu2docs.py index 11ee86182..85afdeef3 100644 --- a/spacy/gold/converters/conllu2docs.py +++ b/spacy/training/converters/conllu2docs.py @@ -1,7 +1,7 @@ import re from .conll_ner2docs import n_sents_info -from ...gold import iob_to_biluo, spans_from_biluo_tags +from ...training import iob_to_biluo, spans_from_biluo_tags from ...tokens import Doc, Token, Span from ...vocab import Vocab from wasabi import Printer diff --git a/spacy/gold/converters/iob2docs.py b/spacy/training/converters/iob2docs.py similarity index 97% rename from spacy/gold/converters/iob2docs.py rename to spacy/training/converters/iob2docs.py index eebf1266b..f8076c5ab 100644 --- a/spacy/gold/converters/iob2docs.py +++ b/spacy/training/converters/iob2docs.py @@ -1,7 +1,7 @@ from wasabi import Printer from .conll_ner2docs import n_sents_info -from ...gold import iob_to_biluo, tags_to_entities +from ...training import iob_to_biluo, tags_to_entities from ...tokens import Doc, Span from ...util import minibatch diff --git a/spacy/gold/converters/json2docs.py b/spacy/training/converters/json2docs.py similarity index 100% rename from spacy/gold/converters/json2docs.py rename to spacy/training/converters/json2docs.py diff --git a/spacy/gold/corpus.py b/spacy/training/corpus.py similarity index 100% rename from spacy/gold/corpus.py rename to spacy/training/corpus.py diff --git a/spacy/gold/example.pxd b/spacy/training/example.pxd similarity index 100% rename from spacy/gold/example.pxd rename to spacy/training/example.pxd diff --git a/spacy/gold/example.pyx b/spacy/training/example.pyx similarity index 100% rename from spacy/gold/example.pyx rename to spacy/training/example.pyx diff --git a/spacy/gold/gold_io.pyx b/spacy/training/gold_io.pyx similarity index 100% rename from spacy/gold/gold_io.pyx rename to spacy/training/gold_io.pyx diff --git a/spacy/gold/iob_utils.py b/spacy/training/iob_utils.py similarity index 100% rename from spacy/gold/iob_utils.py rename to spacy/training/iob_utils.py diff --git a/spacy/gold/loggers.py b/spacy/training/loggers.py similarity index 100% rename from spacy/gold/loggers.py rename to spacy/training/loggers.py diff --git a/website/README.md b/website/README.md index f3a64d1cb..10a75161b 100644 --- a/website/README.md +++ b/website/README.md @@ -289,11 +289,11 @@ always be the **last element** in the row. > | Column 1 | Column 2 ~~List[Doc]~~ | > ``` -| Name | Description | -| ------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `vocab` | The shared vocabulary. ~~Vocab~~ | -| `model` | The Thinc [`Model`](https://thinc.ai/docs/api-model) wrapping the transformer. ~~Model[List[Doc], FullTransformerBatch]~~ | -| `annotation_setter` | Function that takes a batch of `Doc` objects and transformer outputs can set additional annotations on the `Doc`. ~~Callable[[List[Doc], FullTransformerBatch], None]~~ | +| Name | Description | +| ----------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `vocab` | The shared vocabulary. ~~Vocab~~ | +| `model` | The Thinc [`Model`](https://thinc.ai/docs/api-model) wrapping the transformer. ~~Model[List[Doc], FullTransformerBatch]~~ | +| `set_extra_annotations` | Function that takes a batch of `Doc` objects and transformer outputs and can set additional annotations on the `Doc`. ~~Callable[[List[Doc], FullTransformerBatch], None]~~ | ### List {#list} diff --git a/website/docs/api/cli.md b/website/docs/api/cli.md index 7852d0482..0291d6dca 100644 --- a/website/docs/api/cli.md +++ b/website/docs/api/cli.md @@ -357,12 +357,12 @@ File /path/to/spacy/ml/models/tok2vec.py (line 207) ℹ [training.logger] Registry @loggers Name spacy.ConsoleLogger.v1 -Module spacy.gold.loggers +Module spacy.training.loggers File /path/to/spacy/gold/loggers.py (line 8) ℹ [training.batcher] Registry @batchers Name spacy.batch_by_words.v1 -Module spacy.gold.batchers +Module spacy.training.batchers File /path/to/spacy/gold/batchers.py (line 49) ℹ [training.batcher.size] Registry @schedules @@ -372,7 +372,7 @@ File /Users/ines/Repos/explosion/thinc/thinc/schedules.py (line 43) ℹ [training.dev_corpus] Registry @readers Name spacy.Corpus.v1 -Module spacy.gold.corpus +Module spacy.training.corpus File /path/to/spacy/gold/corpus.py (line 18) ℹ [training.optimizer] Registry @optimizers @@ -387,7 +387,7 @@ File /Users/ines/Repos/explosion/thinc/thinc/schedules.py (line 91) ℹ [training.train_corpus] Registry @readers Name spacy.Corpus.v1 -Module spacy.gold.corpus +Module spacy.training.corpus File /path/to/spacy/gold/corpus.py (line 18) ``` diff --git a/website/docs/api/corpus.md b/website/docs/api/corpus.md index 86cfa9121..b913d9a05 100644 --- a/website/docs/api/corpus.md +++ b/website/docs/api/corpus.md @@ -58,7 +58,7 @@ train/test skew. > #### Example > > ```python -> from spacy.gold import Corpus +> from spacy.training import Corpus > > # With a single file > corpus = Corpus("./data/train.spacy") @@ -82,7 +82,7 @@ Yield examples from the data. > #### Example > > ```python -> from spacy.gold import Corpus +> from spacy.training import Corpus > import spacy > > corpus = Corpus("./train.spacy") diff --git a/website/docs/api/data-formats.md b/website/docs/api/data-formats.md index 3fd2818f4..6a3b528c6 100644 --- a/website/docs/api/data-formats.md +++ b/website/docs/api/data-formats.md @@ -175,7 +175,7 @@ run [`spacy pretrain`](/api/cli#pretrain). > > ```python > from spacy.tokens import DocBin -> from spacy.gold import Corpus +> from spacy.training import Corpus > > doc_bin = DocBin(docs=docs) > doc_bin.to_disk("./data.spacy") diff --git a/website/docs/api/example.md b/website/docs/api/example.md index 2434cce43..132e9e8f5 100644 --- a/website/docs/api/example.md +++ b/website/docs/api/example.md @@ -22,7 +22,7 @@ both documents. > > ```python > from spacy.tokens import Doc -> from spacy.gold import Example +> from spacy.training import Example > > words = ["hello", "world", "!"] > spaces = [True, False, False] @@ -48,7 +48,7 @@ see the [training format documentation](/api/data-formats#dict-input). > > ```python > from spacy.tokens import Doc -> from spacy.gold import Example +> from spacy.training import Example > > predicted = Doc(vocab, words=["Apply", "some", "sunscreen"]) > token_ref = ["Apply", "some", "sun", "screen"] @@ -301,7 +301,7 @@ tokenizations add up to the same string. For example, you'll be able to align > #### Example > > ```python -> from spacy.gold import Alignment +> from spacy.training import Alignment > > bert_tokens = ["obama", "'", "s", "podcast"] > spacy_tokens = ["obama", "'s", "podcast"] diff --git a/website/docs/api/top-level.md b/website/docs/api/top-level.md index 7f2eb2e66..7f66abb5f 100644 --- a/website/docs/api/top-level.md +++ b/website/docs/api/top-level.md @@ -538,7 +538,7 @@ sequences in the batch. ## Training data and alignment {#gold source="spacy/gold"} -### gold.biluo_tags_from_offsets {#biluo_tags_from_offsets tag="function"} +### training.biluo_tags_from_offsets {#biluo_tags_from_offsets tag="function"} Encode labelled spans into per-token tags, using the [BILUO scheme](/usage/linguistic-features#accessing-ner) (Begin, In, Last, Unit, @@ -554,7 +554,7 @@ single-token entity. > #### Example > > ```python -> from spacy.gold import biluo_tags_from_offsets +> from spacy.training import biluo_tags_from_offsets > > doc = nlp("I like London.") > entities = [(7, 13, "LOC")] @@ -568,7 +568,7 @@ single-token entity. | `entities` | A sequence of `(start, end, label)` triples. `start` and `end` should be character-offset integers denoting the slice into the original string. ~~List[Tuple[int, int, Union[str, int]]]~~ | | **RETURNS** | A list of strings, describing the [BILUO](/usage/linguistic-features#accessing-ner) tags. ~~List[str]~~ | -### gold.offsets_from_biluo_tags {#offsets_from_biluo_tags tag="function"} +### training.offsets_from_biluo_tags {#offsets_from_biluo_tags tag="function"} Encode per-token tags following the [BILUO scheme](/usage/linguistic-features#accessing-ner) into entity offsets. @@ -576,7 +576,7 @@ Encode per-token tags following the > #### Example > > ```python -> from spacy.gold import offsets_from_biluo_tags +> from spacy.training import offsets_from_biluo_tags > > doc = nlp("I like London.") > tags = ["O", "O", "U-LOC", "O"] @@ -590,7 +590,7 @@ Encode per-token tags following the | `entities` | A sequence of [BILUO](/usage/linguistic-features#accessing-ner) tags with each tag describing one token. Each tag string will be of the form of either `""`, `"O"` or `"{action}-{label}"`, where action is one of `"B"`, `"I"`, `"L"`, `"U"`. ~~List[str]~~ | | **RETURNS** | A sequence of `(start, end, label)` triples. `start` and `end` will be character-offset integers denoting the slice into the original string. ~~List[Tuple[int, int, str]]~~ | -### gold.spans_from_biluo_tags {#spans_from_biluo_tags tag="function" new="2.1"} +### training.spans_from_biluo_tags {#spans_from_biluo_tags tag="function" new="2.1"} Encode per-token tags following the [BILUO scheme](/usage/linguistic-features#accessing-ner) into @@ -600,7 +600,7 @@ token-based tags, e.g. to overwrite the `doc.ents`. > #### Example > > ```python -> from spacy.gold import spans_from_biluo_tags +> from spacy.training import spans_from_biluo_tags > > doc = nlp("I like London.") > tags = ["O", "O", "U-LOC", "O"] diff --git a/website/docs/api/transformer.md b/website/docs/api/transformer.md index b41a18890..fc8a8deef 100644 --- a/website/docs/api/transformer.md +++ b/website/docs/api/transformer.md @@ -61,11 +61,11 @@ on the transformer architectures and their arguments and hyperparameters. > nlp.add_pipe("transformer", config=DEFAULT_CONFIG) > ``` -| Setting | Description | -| ------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `max_batch_items` | Maximum size of a padded batch. Defaults to `4096`. ~~int~~ | -| `annotation_setter` | Function that takes a batch of `Doc` objects and transformer outputs to set additional annotations on the `Doc`. The `Doc._.transformer_data` attribute is set prior to calling the callback. Defaults to `null_annotation_setter` (no additional annotations). ~~Callable[[List[Doc], FullTransformerBatch], None]~~ | -| `model` | The Thinc [`Model`](https://thinc.ai/docs/api-model) wrapping the transformer. Defaults to [TransformerModel](/api/architectures#TransformerModel). ~~Model[List[Doc], FullTransformerBatch]~~ | +| Setting | Description | +| ----------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `max_batch_items` | Maximum size of a padded batch. Defaults to `4096`. ~~int~~ | +| `set_extra_annotations` | Function that takes a batch of `Doc` objects and transformer outputs to set additional annotations on the `Doc`. The `Doc._.transformer_data` attribute is set prior to calling the callback. Defaults to `null_annotation_setter` (no additional annotations). ~~Callable[[List[Doc], FullTransformerBatch], None]~~ | +| `model` | The Thinc [`Model`](https://thinc.ai/docs/api-model) wrapping the transformer. Defaults to [TransformerModel](/api/architectures#TransformerModel). ~~Model[List[Doc], FullTransformerBatch]~~ | ```python https://github.com/explosion/spacy-transformers/blob/master/spacy_transformers/pipeline_component.py @@ -102,14 +102,14 @@ attribute. You can also provide a callback to set additional annotations. In your application, you would normally use a shortcut for this and instantiate the component using its string name and [`nlp.add_pipe`](/api/language#create_pipe). -| Name | Description | -| ------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `vocab` | The shared vocabulary. ~~Vocab~~ | -| `model` | The Thinc [`Model`](https://thinc.ai/docs/api-model) wrapping the transformer. Usually you will want to use the [TransformerModel](/api/architectures#TransformerModel) layer for this. ~~Model[List[Doc], FullTransformerBatch]~~ | -| `annotation_setter` | Function that takes a batch of `Doc` objects and transformer outputs and stores the annotations on the `Doc`. The `Doc._.trf_data` attribute is set prior to calling the callback. By default, no additional annotations are set. ~~Callable[[List[Doc], FullTransformerBatch], None]~~ | -| _keyword-only_ | | -| `name` | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~ | -| `max_batch_items` | Maximum size of a padded batch. Defaults to `128*32`. ~~int~~ | +| Name | Description | +| ----------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `vocab` | The shared vocabulary. ~~Vocab~~ | +| `model` | The Thinc [`Model`](https://thinc.ai/docs/api-model) wrapping the transformer. Usually you will want to use the [TransformerModel](/api/architectures#TransformerModel) layer for this. ~~Model[List[Doc], FullTransformerBatch]~~ | +| `set_extra_annotations` | Function that takes a batch of `Doc` objects and transformer outputs and stores the annotations on the `Doc`. The `Doc._.trf_data` attribute is set prior to calling the callback. By default, no additional annotations are set. ~~Callable[[List[Doc], FullTransformerBatch], None]~~ | +| _keyword-only_ | | +| `name` | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~ | +| `max_batch_items` | Maximum size of a padded batch. Defaults to `128*32`. ~~int~~ | ## Transformer.\_\_call\_\_ {#call tag="method"} @@ -205,7 +205,7 @@ modifying them. Assign the extracted features to the Doc objects. By default, the [`TransformerData`](/api/transformer#transformerdata) object is written to the -[`Doc._.trf_data`](#custom-attributes) attribute. Your annotation_setter +[`Doc._.trf_data`](#custom-attributes) attribute. Your `set_extra_annotations` callback is then called, if provided. > #### Example diff --git a/website/docs/usage/embeddings-transformers.md b/website/docs/usage/embeddings-transformers.md index abd92a8ac..5215c0ae5 100644 --- a/website/docs/usage/embeddings-transformers.md +++ b/website/docs/usage/embeddings-transformers.md @@ -253,10 +253,10 @@ for doc in nlp.pipe(["some text", "some other text"]): You can also customize how the [`Transformer`](/api/transformer) component sets annotations onto the [`Doc`](/api/doc), by specifying a custom -`annotation_setter`. This callback will be called with the raw input and output -data for the whole batch, along with the batch of `Doc` objects, allowing you to -implement whatever you need. The annotation setter is called with a batch of -[`Doc`](/api/doc) objects and a +`set_extra_annotations` function. This callback will be called with the raw +input and output data for the whole batch, along with the batch of `Doc` +objects, allowing you to implement whatever you need. The annotation setter is +called with a batch of [`Doc`](/api/doc) objects and a [`FullTransformerBatch`](/api/transformer#fulltransformerbatch) containing the transformers data for the batch. @@ -267,7 +267,7 @@ def custom_annotation_setter(docs, trf_data): doc._.custom_attr = data nlp = spacy.load("en_core_trf_lg") -nlp.get_pipe("transformer").annotation_setter = custom_annotation_setter +nlp.get_pipe("transformer").set_extra_annotations = custom_annotation_setter doc = nlp("This is a text") assert isinstance(doc._.custom_attr, TransformerData) print(doc._.custom_attr.tensors) @@ -314,7 +314,7 @@ component: > get_spans=get_doc_spans, > tokenizer_config={"use_fast": True}, > ), -> annotation_setter=null_annotation_setter, +> set_extra_annotations=null_annotation_setter, > max_batch_items=4096, > ) > ``` @@ -333,7 +333,7 @@ tokenizer_config = {"use_fast": true} [components.transformer.model.get_spans] @span_getters = "spacy-transformers.doc_spans.v1" -[components.transformer.annotation_setter] +[components.transformer.set_extra_annotations] @annotation_setters = "spacy-transformers.null_annotation_setter.v1" ``` diff --git a/website/docs/usage/linguistic-features.md b/website/docs/usage/linguistic-features.md index b36e9b71f..3cf6316c9 100644 --- a/website/docs/usage/linguistic-features.md +++ b/website/docs/usage/linguistic-features.md @@ -1366,7 +1366,7 @@ token. ```python ### {executable="true"} -from spacy.gold import Alignment +from spacy.training import Alignment other_tokens = ["i", "listened", "to", "obama", "'", "s", "podcasts", "."] spacy_tokens = ["i", "listened", "to", "obama", "'s", "podcasts", "."] diff --git a/website/docs/usage/processing-pipelines.md b/website/docs/usage/processing-pipelines.md index 0da350f27..a875df29c 100644 --- a/website/docs/usage/processing-pipelines.md +++ b/website/docs/usage/processing-pipelines.md @@ -1500,7 +1500,7 @@ add those entities to the `doc.ents`, you can wrap it in a custom pipeline component function and pass it the token texts from the `Doc` object received by the component. -The [`gold.spans_from_biluo_tags`](/api/top-level#spans_from_biluo_tags) is very +The [`training.spans_from_biluo_tags`](/api/top-level#spans_from_biluo_tags) is very helpful here, because it takes a `Doc` object and token-based BILUO tags and returns a sequence of `Span` objects in the `Doc` with added labels. So all your wrapper has to do is compute the entity spans and overwrite the `doc.ents`. @@ -1515,7 +1515,7 @@ wrapper has to do is compute the entity spans and overwrite the `doc.ents`. ```python ### {highlight="1,8-9"} import your_custom_entity_recognizer -from spacy.gold import offsets_from_biluo_tags +from spacy.training import offsets_from_biluo_tags from spacy.language import Language @Language.component("custom_ner_wrapper") diff --git a/website/docs/usage/training.md b/website/docs/usage/training.md index 9c18e4606..066aa3e98 100644 --- a/website/docs/usage/training.md +++ b/website/docs/usage/training.md @@ -735,7 +735,7 @@ as **config settings** – in this case, `source`. ### functions.py {highlight="7-8"} from typing import Callable, Iterator, List import spacy -from spacy.gold import Example +from spacy.training import Example from spacy.language import Language import random @@ -783,7 +783,7 @@ annotations are the same. ### functions.py from typing import Callable, Iterable, Iterator, List import spacy -from spacy.gold import Example +from spacy.training import Example @spacy.registry.batchers("filtering_batch.v1") def filter_batch(size: int) -> Callable[[Iterable[Example]], Iterator[List[Example]]]: