spaCy/spacy/ml/models/simple_ner.py

83 lines
2.3 KiB
Python
Raw Normal View History

Adapt parser and NER for transformers (#5449) * Draft layer for BILUO actions * Fixes to biluo layer * WIP on BILUO layer * Add tests for BILUO layer * Format * Fix transitions * Update test * Link in the simple_ner * Update BILUO tagger * Update __init__ * Import simple_ner * Update test * Import * Add files * Add config * Fix label passing for BILUO and tagger * Fix label handling for simple_ner component * Update simple NER test * Update config * Hack train script * Update BILUO layer * Fix SimpleNER component * Update train_from_config * Add biluo_to_iob helper * Add IOB layer * Add IOBTagger model * Update biluo layer * Update SimpleNER tagger * Update BILUO * Read random seed in train-from-config * Update use of normal_init * Fix normalization of gradient in SimpleNER * Update IOBTagger * Remove print * Tweak masking in BILUO * Add dropout in SimpleNER * Update thinc * Tidy up simple_ner * Fix biluo model * Unhack train-from-config * Update setup.cfg and requirements * Add tb_framework.py for parser model * Try to avoid memory leak in BILUO * Move ParserModel into spacy.ml, avoid need for subclass. * Use updated parser model * Remove incorrect call to model.initializre in PrecomputableAffine * Update parser model * Avoid divide by zero in tagger * Add extra dropout layer in tagger * Refine minibatch_by_words function to avoid oom * Fix parser model after refactor * Try to avoid div-by-zero in SimpleNER * Fix infinite loop in minibatch_by_words * Use SequenceCategoricalCrossentropy in Tagger * Fix parser model when hidden layer * Remove extra dropout from tagger * Add extra nan check in tagger * Fix thinc version * Update tests and imports * Fix test * Update test * Update tests * Fix tests * Fix test Co-authored-by: Ines Montani <ines@ines.io>
2020-05-18 20:23:33 +00:00
import functools
from typing import List, Tuple, Dict, Optional
from thinc.api import Ops, Model, Linear, Softmax, with_array, softmax_activation, padded2list
from thinc.api import chain, list2padded, configure_normal_init
from thinc.api import Dropout
from thinc.types import Padded, Ints1d, Ints3d, Floats2d, Floats3d
from ...tokens import Doc
from .._biluo import BILUO
from .._iob import IOB
from ...util import registry
@registry.architectures.register("spacy.BiluoTagger.v1")
def BiluoTagger(tok2vec: Model[List[Doc], List[Floats2d]]) -> Model[List[Doc], List[Floats2d]]:
biluo = BILUO()
linear = Linear(
nO=None,
nI=tok2vec.get_dim("nO"),
init_W=configure_normal_init(mean=0.02)
)
model = chain(
tok2vec,
list2padded(),
with_array(chain(Dropout(0.1), linear)),
biluo,
with_array(softmax_activation()),
padded2list()
)
return Model(
"biluo-tagger",
forward,
init=init,
layers=[model, linear],
refs={"tok2vec": tok2vec, "linear": linear, "biluo": biluo},
dims={"nO": None},
attrs={"get_num_actions": biluo.attrs["get_num_actions"]}
)
@registry.architectures.register("spacy.IOBTagger.v1")
def IOBTagger(tok2vec: Model[List[Doc], List[Floats2d]]) -> Model[List[Doc], List[Floats2d]]:
biluo = IOB()
linear = Linear(nO=None, nI=tok2vec.get_dim("nO"))
model = chain(
tok2vec,
list2padded(),
with_array(linear),
biluo,
with_array(softmax_activation()),
padded2list()
)
return Model(
"iob-tagger",
forward,
init=init,
layers=[model],
refs={"tok2vec": tok2vec, "linear": linear, "biluo": biluo},
dims={"nO": None},
attrs={"get_num_actions": biluo.attrs["get_num_actions"]}
)
def init(model: Model[List[Doc], List[Floats2d]], X=None, Y=None) -> None:
if model.get_dim("nO") is None and Y:
model.set_dim("nO", Y[0].shape[1])
nO = model.get_dim("nO")
biluo = model.get_ref("biluo")
linear = model.get_ref("linear")
biluo.set_dim("nO", nO)
if linear.has_dim("nO") is None:
linear.set_dim("nO", nO)
model.layers[0].initialize(X=X, Y=Y)
def forward(model: Model, X: List[Doc], is_train: bool):
return model.layers[0](X, is_train)
__all__ = ["BiluoTagger"]