mirror of https://github.com/explosion/spaCy.git
Tagger: use unnormalized probabilities for inference (#10197)
* Tagger: use unnormalized probabilities for inference Using unnormalized softmax avoids use of the relatively expensive exp function, which can significantly speed up non-transformer models (e.g. I got a speedup of 27% on a German tagging + parsing pipeline). * Add spacy.Tagger.v2 with configurable normalization Normalization of probabilities is disabled by default to improve performance. * Update documentation, models, and tests to spacy.Tagger.v2 * Move Tagger.v1 to spacy-legacy * docs/architectures: run prettier * Unnormalized softmax is now a Softmax_v2 option * Require thinc 8.0.14 and spacy-legacy 3.0.9
This commit is contained in:
parent
e8357923ec
commit
e5debc68e4
|
@ -5,7 +5,7 @@ requires = [
|
|||
"cymem>=2.0.2,<2.1.0",
|
||||
"preshed>=3.0.2,<3.1.0",
|
||||
"murmurhash>=0.28.0,<1.1.0",
|
||||
"thinc>=8.0.12,<8.1.0",
|
||||
"thinc>=8.0.14,<8.1.0",
|
||||
"blis>=0.4.0,<0.8.0",
|
||||
"pathy",
|
||||
"numpy>=1.15.0",
|
||||
|
|
|
@ -3,7 +3,7 @@ spacy-legacy>=3.0.9,<3.1.0
|
|||
spacy-loggers>=1.0.0,<2.0.0
|
||||
cymem>=2.0.2,<2.1.0
|
||||
preshed>=3.0.2,<3.1.0
|
||||
thinc>=8.0.12,<8.1.0
|
||||
thinc>=8.0.14,<8.1.0
|
||||
blis>=0.4.0,<0.8.0
|
||||
ml_datasets>=0.2.0,<0.3.0
|
||||
murmurhash>=0.28.0,<1.1.0
|
||||
|
|
|
@ -38,7 +38,7 @@ setup_requires =
|
|||
cymem>=2.0.2,<2.1.0
|
||||
preshed>=3.0.2,<3.1.0
|
||||
murmurhash>=0.28.0,<1.1.0
|
||||
thinc>=8.0.12,<8.1.0
|
||||
thinc>=8.0.14,<8.1.0
|
||||
install_requires =
|
||||
# Our libraries
|
||||
spacy-legacy>=3.0.9,<3.1.0
|
||||
|
@ -46,7 +46,7 @@ install_requires =
|
|||
murmurhash>=0.28.0,<1.1.0
|
||||
cymem>=2.0.2,<2.1.0
|
||||
preshed>=3.0.2,<3.1.0
|
||||
thinc>=8.0.12,<8.1.0
|
||||
thinc>=8.0.14,<8.1.0
|
||||
blis>=0.4.0,<0.8.0
|
||||
wasabi>=0.8.1,<1.1.0
|
||||
srsly>=2.4.1,<3.0.0
|
||||
|
|
|
@ -54,7 +54,7 @@ stride = 96
|
|||
factory = "morphologizer"
|
||||
|
||||
[components.morphologizer.model]
|
||||
@architectures = "spacy.Tagger.v1"
|
||||
@architectures = "spacy.Tagger.v2"
|
||||
nO = null
|
||||
|
||||
[components.morphologizer.model.tok2vec]
|
||||
|
@ -70,7 +70,7 @@ grad_factor = 1.0
|
|||
factory = "tagger"
|
||||
|
||||
[components.tagger.model]
|
||||
@architectures = "spacy.Tagger.v1"
|
||||
@architectures = "spacy.Tagger.v2"
|
||||
nO = null
|
||||
|
||||
[components.tagger.model.tok2vec]
|
||||
|
@ -238,7 +238,7 @@ maxout_pieces = 3
|
|||
factory = "morphologizer"
|
||||
|
||||
[components.morphologizer.model]
|
||||
@architectures = "spacy.Tagger.v1"
|
||||
@architectures = "spacy.Tagger.v2"
|
||||
nO = null
|
||||
|
||||
[components.morphologizer.model.tok2vec]
|
||||
|
@ -251,7 +251,7 @@ width = ${components.tok2vec.model.encode.width}
|
|||
factory = "tagger"
|
||||
|
||||
[components.tagger.model]
|
||||
@architectures = "spacy.Tagger.v1"
|
||||
@architectures = "spacy.Tagger.v2"
|
||||
nO = null
|
||||
|
||||
[components.tagger.model.tok2vec]
|
||||
|
|
|
@ -1,14 +1,14 @@
|
|||
from typing import Optional, List
|
||||
from thinc.api import zero_init, with_array, Softmax, chain, Model
|
||||
from thinc.api import zero_init, with_array, Softmax_v2, chain, Model
|
||||
from thinc.types import Floats2d
|
||||
|
||||
from ...util import registry
|
||||
from ...tokens import Doc
|
||||
|
||||
|
||||
@registry.architectures("spacy.Tagger.v1")
|
||||
@registry.architectures("spacy.Tagger.v2")
|
||||
def build_tagger_model(
|
||||
tok2vec: Model[List[Doc], List[Floats2d]], nO: Optional[int] = None
|
||||
tok2vec: Model[List[Doc], List[Floats2d]], nO: Optional[int] = None, normalize=False
|
||||
) -> Model[List[Doc], List[Floats2d]]:
|
||||
"""Build a tagger model, using a provided token-to-vector component. The tagger
|
||||
model simply adds a linear layer with softmax activation to predict scores
|
||||
|
@ -19,7 +19,9 @@ def build_tagger_model(
|
|||
"""
|
||||
# TODO: glorot_uniform_init seems to work a bit better than zero_init here?!
|
||||
t2v_width = tok2vec.get_dim("nO") if tok2vec.has_dim("nO") else None
|
||||
output_layer = Softmax(nO, t2v_width, init_W=zero_init)
|
||||
output_layer = Softmax_v2(
|
||||
nO, t2v_width, init_W=zero_init, normalize_outputs=normalize
|
||||
)
|
||||
softmax = with_array(output_layer) # type: ignore
|
||||
model = chain(tok2vec, softmax)
|
||||
model.set_ref("tok2vec", tok2vec)
|
||||
|
|
|
@ -25,7 +25,7 @@ BACKWARD_EXTEND = False
|
|||
|
||||
default_model_config = """
|
||||
[model]
|
||||
@architectures = "spacy.Tagger.v1"
|
||||
@architectures = "spacy.Tagger.v2"
|
||||
|
||||
[model.tok2vec]
|
||||
@architectures = "spacy.Tok2Vec.v2"
|
||||
|
|
|
@ -20,7 +20,7 @@ BACKWARD_OVERWRITE = False
|
|||
|
||||
default_model_config = """
|
||||
[model]
|
||||
@architectures = "spacy.Tagger.v1"
|
||||
@architectures = "spacy.Tagger.v2"
|
||||
|
||||
[model.tok2vec]
|
||||
@architectures = "spacy.HashEmbedCNN.v2"
|
||||
|
|
|
@ -27,7 +27,7 @@ BACKWARD_OVERWRITE = False
|
|||
|
||||
default_model_config = """
|
||||
[model]
|
||||
@architectures = "spacy.Tagger.v1"
|
||||
@architectures = "spacy.Tagger.v2"
|
||||
|
||||
[model.tok2vec]
|
||||
@architectures = "spacy.HashEmbedCNN.v2"
|
||||
|
|
|
@ -100,7 +100,7 @@ cfg_string = """
|
|||
factory = "tagger"
|
||||
|
||||
[components.tagger.model]
|
||||
@architectures = "spacy.Tagger.v1"
|
||||
@architectures = "spacy.Tagger.v2"
|
||||
nO = null
|
||||
|
||||
[components.tagger.model.tok2vec]
|
||||
|
@ -263,7 +263,7 @@ cfg_string_multi = """
|
|||
factory = "tagger"
|
||||
|
||||
[components.tagger.model]
|
||||
@architectures = "spacy.Tagger.v1"
|
||||
@architectures = "spacy.Tagger.v2"
|
||||
nO = null
|
||||
|
||||
[components.tagger.model.tok2vec]
|
||||
|
@ -373,7 +373,7 @@ cfg_string_multi_textcat = """
|
|||
factory = "tagger"
|
||||
|
||||
[components.tagger.model]
|
||||
@architectures = "spacy.Tagger.v1"
|
||||
@architectures = "spacy.Tagger.v2"
|
||||
nO = null
|
||||
|
||||
[components.tagger.model.tok2vec]
|
||||
|
|
|
@ -59,7 +59,7 @@ subword_features = true
|
|||
factory = "tagger"
|
||||
|
||||
[components.tagger.model]
|
||||
@architectures = "spacy.Tagger.v1"
|
||||
@architectures = "spacy.Tagger.v2"
|
||||
|
||||
[components.tagger.model.tok2vec]
|
||||
@architectures = "spacy.Tok2VecListener.v1"
|
||||
|
@ -110,7 +110,7 @@ subword_features = true
|
|||
factory = "tagger"
|
||||
|
||||
[components.tagger.model]
|
||||
@architectures = "spacy.Tagger.v1"
|
||||
@architectures = "spacy.Tagger.v2"
|
||||
|
||||
[components.tagger.model.tok2vec]
|
||||
@architectures = "spacy.Tok2VecListener.v1"
|
||||
|
|
|
@ -70,7 +70,7 @@ factory = "ner"
|
|||
factory = "tagger"
|
||||
|
||||
[components.tagger.model]
|
||||
@architectures = "spacy.Tagger.v1"
|
||||
@architectures = "spacy.Tagger.v2"
|
||||
nO = null
|
||||
|
||||
[components.tagger.model.tok2vec]
|
||||
|
|
|
@ -38,7 +38,7 @@ subword_features = true
|
|||
factory = "tagger"
|
||||
|
||||
[components.tagger.model]
|
||||
@architectures = "spacy.Tagger.v1"
|
||||
@architectures = "spacy.Tagger.v2"
|
||||
|
||||
[components.tagger.model.tok2vec]
|
||||
@architectures = "spacy.Tok2VecListener.v1"
|
||||
|
@ -62,7 +62,7 @@ pipeline = ["tagger"]
|
|||
factory = "tagger"
|
||||
|
||||
[components.tagger.model]
|
||||
@architectures = "spacy.Tagger.v1"
|
||||
@architectures = "spacy.Tagger.v2"
|
||||
|
||||
[components.tagger.model.tok2vec]
|
||||
@architectures = "spacy.HashEmbedCNN.v1"
|
||||
|
@ -106,7 +106,7 @@ subword_features = true
|
|||
factory = "tagger"
|
||||
|
||||
[components.tagger.model]
|
||||
@architectures = "spacy.Tagger.v1"
|
||||
@architectures = "spacy.Tagger.v2"
|
||||
|
||||
[components.tagger.model.tok2vec]
|
||||
@architectures = "spacy.Tok2VecListener.v1"
|
||||
|
|
|
@ -241,7 +241,7 @@ maxout_pieces = 3
|
|||
factory = "tagger"
|
||||
|
||||
[components.tagger.model]
|
||||
@architectures = "spacy.Tagger.v1"
|
||||
@architectures = "spacy.Tagger.v2"
|
||||
nO = null
|
||||
|
||||
[components.tagger.model.tok2vec]
|
||||
|
|
|
@ -104,7 +104,7 @@ consisting of a CNN and a layer-normalized maxout activation function.
|
|||
> factory = "tagger"
|
||||
>
|
||||
> [components.tagger.model]
|
||||
> @architectures = "spacy.Tagger.v1"
|
||||
> @architectures = "spacy.Tagger.v2"
|
||||
>
|
||||
> [components.tagger.model.tok2vec]
|
||||
> @architectures = "spacy.Tok2VecListener.v1"
|
||||
|
@ -158,8 +158,8 @@ be configured with the `attrs` argument. The suggested attributes are `NORM`,
|
|||
`PREFIX`, `SUFFIX` and `SHAPE`. This lets the model take into account some
|
||||
subword information, without construction a fully character-based
|
||||
representation. If pretrained vectors are available, they can be included in the
|
||||
representation as well, with the vectors table kept static (i.e. it's
|
||||
not updated).
|
||||
representation as well, with the vectors table kept static (i.e. it's not
|
||||
updated).
|
||||
|
||||
| Name | Description |
|
||||
| ------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
|
@ -613,14 +613,15 @@ same signature, but the `use_upper` argument was `True` by default.
|
|||
|
||||
## Tagging architectures {#tagger source="spacy/ml/models/tagger.py"}
|
||||
|
||||
### spacy.Tagger.v1 {#Tagger}
|
||||
### spacy.Tagger.v2 {#Tagger}
|
||||
|
||||
> #### Example Config
|
||||
>
|
||||
> ```ini
|
||||
> [model]
|
||||
> @architectures = "spacy.Tagger.v1"
|
||||
> @architectures = "spacy.Tagger.v2"
|
||||
> nO = null
|
||||
> normalize = false
|
||||
>
|
||||
> [model.tok2vec]
|
||||
> # ...
|
||||
|
@ -634,8 +635,18 @@ the token vectors.
|
|||
| ----------- | ------------------------------------------------------------------------------------------ |
|
||||
| `tok2vec` | Subnetwork to map tokens into vector representations. ~~Model[List[Doc], List[Floats2d]]~~ |
|
||||
| `nO` | The number of tags to output. Inferred from the data if `None`. ~~Optional[int]~~ |
|
||||
| `normalize` | Normalize probabilities during inference. Defaults to `False`. ~~bool~~ |
|
||||
| **CREATES** | The model using the architecture. ~~Model[List[Doc], List[Floats2d]]~~ |
|
||||
|
||||
<Accordion title="Previous versions of spacy.Tagger" spaced>
|
||||
|
||||
- The `normalize` argument was added in `spacy.Tagger.v2`. `spacy.Tagger.v1`
|
||||
always normalizes probabilities during inference.
|
||||
|
||||
The other arguments are shared between all versions.
|
||||
|
||||
</Accordion>
|
||||
|
||||
## Text classification architectures {#textcat source="spacy/ml/models/textcat.py"}
|
||||
|
||||
A text classification architecture needs to take a [`Doc`](/api/doc) as input,
|
||||
|
|
Loading…
Reference in New Issue