From 00de30bcc28379ffb28be4d0b0c28ce9391eabb8 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Tue, 28 Jul 2020 23:06:30 +0200 Subject: [PATCH] Update CharacterEmbed function --- spacy/ml/models/tok2vec.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/spacy/ml/models/tok2vec.py b/spacy/ml/models/tok2vec.py index 881f25a3b..acd9dc0b0 100644 --- a/spacy/ml/models/tok2vec.py +++ b/spacy/ml/models/tok2vec.py @@ -119,15 +119,16 @@ def MultiHashEmbed( @registry.architectures.register("spacy.CharacterEmbed.v1") -def CharacterEmbed(columns, width, rows, nM, nC, features, dropout): - norm = HashEmbed( - nO=width, nV=rows, column=columns.index("NORM"), dropout=dropout, seed=5 +def CharacterEmbed(width: int, rows: int, nM: int, nC: int): + model = concatenate( + _character_embed.CharacterEmbed(nM=nM, nC=nC), + chain( + FeatureExtractor([NORM]), + with_array(HashEmbed(nO=width, nV=rows, column=0, seed=5)) + ) ) - chr_embed = _character_embed.CharacterEmbed(nM=nM, nC=nC) - with Model.define_operators({">>": chain, "|": concatenate}): - embed_layer = chr_embed | features >> with_array(norm) - embed_layer.set_dim("nO", nM * nC + width) - return embed_layer + model.set_dim("nO", nM * nC + width) + return model @registry.architectures.register("spacy.MaxoutWindowEncoder.v1")