From e6dde97295022efe299bfa65a73c8d9b96eba8c4 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Tue, 25 Sep 2018 10:57:59 +0200 Subject: [PATCH] Add function to make morphologizer model --- spacy/_ml.py | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/spacy/_ml.py b/spacy/_ml.py index 231f6a7a4..f37938671 100644 --- a/spacy/_ml.py +++ b/spacy/_ml.py @@ -483,7 +483,33 @@ class MultiSoftmax(Affine): return output__BO, finish_update -def build_tagger_model(class_nums, **cfg): +def build_tagger_model(nr_class, **cfg): + embed_size = util.env_opt('embed_size', 7000) + if 'token_vector_width' in cfg: + token_vector_width = cfg['token_vector_width'] + else: + token_vector_width = util.env_opt('token_vector_width', 128) + pretrained_vectors = cfg.get('pretrained_vectors') + subword_features = cfg.get('subword_features', True) + with Model.define_operators({'>>': chain, '+': add}): + if 'tok2vec' in cfg: + tok2vec = cfg['tok2vec'] + else: + tok2vec = Tok2Vec(token_vector_width, embed_size, + subword_features=subword_features, + pretrained_vectors=pretrained_vectors) + softmax = with_flatten( + Softmax(nr_class, token_vector_width)) + model = ( + tok2vec + >> softmax + ) + model.nI = None + model.tok2vec = tok2vec + model.softmax = softmax + return model + +def build_morphologizer_model(class_nums, **cfg): embed_size = util.env_opt('embed_size', 7000) if 'token_vector_width' in cfg: token_vector_width = cfg['token_vector_width']