diff --git a/spacy/_ml.py b/spacy/_ml.py index 0dda71477..6723e5cf9 100644 --- a/spacy/_ml.py +++ b/spacy/_ml.py @@ -307,6 +307,7 @@ def PyTorchBiLSTM(nO, nI, depth, dropout=0.2): import torch.nn from thinc.api import with_square_sequences from thinc.extra.wrappers import PyTorchWrapperRNN + if depth == 0: return layerize(noop()) model = torch.nn.LSTM(nI, nO // 2, depth, bidirectional=True, dropout=dropout) @@ -322,7 +323,7 @@ def Tok2Vec(width, embed_size, **kwargs): bilstm_depth = kwargs.get("bilstm_depth", 0) cols = ["ID", "NORM", "PREFIX", "SUFFIX", "SHAPE", "ORTH"] - + doc2feats_cfg = {"arch": "spacy.Doc2Feats.v1", "config": {"columns": cols}} if char_embed: embed_cfg = { @@ -332,13 +333,10 @@ def Tok2Vec(width, embed_size, **kwargs): "chars": 6, "@mix": { "arch": "spacy.LayerNormalizedMaxout.v1", - "config": { - "width": width, - "pieces": 3 - } + "config": {"width": width, "pieces": 3}, }, - "@embed_features": None - } + "@embed_features": None, + }, } else: embed_cfg = { @@ -351,12 +349,9 @@ def Tok2Vec(width, embed_size, **kwargs): "@pretrained_vectors": None, "@mix": { "arch": "spacy.LayerNormalizedMaxout.v1", - "config": { - "width": width, - "pieces": 3 - } + "config": {"width": width, "pieces": 3}, }, - } + }, } if pretrained_vectors: embed_cfg["config"]["@pretrained_vectors"] = { @@ -364,8 +359,8 @@ def Tok2Vec(width, embed_size, **kwargs): "config": { "vectors_name": pretrained_vectors, "width": width, - "column": cols.index(ID) - } + "column": cols.index(ID), + }, } cnn_cfg = { "arch": "spacy.MaxoutWindowEncoder.v1", @@ -373,35 +368,26 @@ def Tok2Vec(width, embed_size, **kwargs): "width": width, "window_size": 1, "pieces": cnn_maxout_pieces, - "depth": conv_depth - } + "depth": conv_depth, + }, } bilstm_cfg = { "arch": "spacy.TorchBiLSTMEncoder.v1", - "config": { - "width": width, - "depth": bilstm_depth, - } + "config": {"width": width, "depth": bilstm_depth}, } if conv_depth == 0 and bilstm_depth == 0: encode_cfg = {} elif conv_depth >= 1 and bilstm_depth >= 1: encode_cfg = { "arch": "thinc.FeedForward.v1", - "config": { - "children": [cnn_cfg, bilstm_cfg] - } + "config": {"children": [cnn_cfg, bilstm_cfg]}, } elif conv_depth >= 1: encode_cfg = cnn_cfg else: encode_cfg = bilstm_cfg - config = { - "@doc2feats": doc2feats_cfg, - "@embed": embed_cfg, - "@encode": encode_cfg - } + config = {"@doc2feats": doc2feats_cfg, "@embed": embed_cfg, "@encode": encode_cfg} return new_ml.Tok2Vec(config) diff --git a/spacy/cli/train.py b/spacy/cli/train.py index c5441a8ff..13fcae37f 100644 --- a/spacy/cli/train.py +++ b/spacy/cli/train.py @@ -262,8 +262,11 @@ def train( exits=1, ) train_docs = corpus.train_docs( - nlp, noise_level=noise_level, gold_preproc=gold_preproc, max_length=0, - ignore_misaligned=True + nlp, + noise_level=noise_level, + gold_preproc=gold_preproc, + max_length=0, + ignore_misaligned=True, ) train_labels = set() if textcat_multilabel: @@ -344,7 +347,7 @@ def train( orth_variant_level=orth_variant_level, gold_preproc=gold_preproc, max_length=0, - ignore_misaligned=True + ignore_misaligned=True, ) if raw_text: random.shuffle(raw_text) @@ -383,8 +386,11 @@ def train( if hasattr(component, "cfg"): component.cfg["beam_width"] = beam_width dev_docs = list( - corpus.dev_docs(nlp_loaded, gold_preproc=gold_preproc, - ignore_misaligned=True) + corpus.dev_docs( + nlp_loaded, + gold_preproc=gold_preproc, + ignore_misaligned=True, + ) ) nwords = sum(len(doc_gold[0]) for doc_gold in dev_docs) start_time = timer() @@ -401,8 +407,11 @@ def train( if hasattr(component, "cfg"): component.cfg["beam_width"] = beam_width dev_docs = list( - corpus.dev_docs(nlp_loaded, gold_preproc=gold_preproc, - ignore_misaligned=True) + corpus.dev_docs( + nlp_loaded, + gold_preproc=gold_preproc, + ignore_misaligned=True, + ) ) start_time = timer() scorer = nlp_loaded.evaluate(dev_docs, verbose=verbose) diff --git a/spacy/language.py b/spacy/language.py index a7d1f3a70..05838f21b 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -131,9 +131,7 @@ class Language(object): Defaults = BaseDefaults lang = None - factories = { - "tokenizer": lambda nlp: nlp.Defaults.create_tokenizer(nlp), - } + factories = {"tokenizer": lambda nlp: nlp.Defaults.create_tokenizer(nlp)} def __init__( self, vocab=True, make_doc=True, max_length=10 ** 6, meta={}, **kwargs diff --git a/spacy/ml/__init__.py b/spacy/ml/__init__.py index 057484590..57e7ef571 100644 --- a/spacy/ml/__init__.py +++ b/spacy/ml/__init__.py @@ -1 +1,5 @@ -from .tok2vec import Tok2Vec +# coding: utf8 +from __future__ import unicode_literals + +from .tok2vec import Tok2Vec # noqa: F401 +from .common import FeedForward, LayerNormalizedMaxout # noqa: F401 diff --git a/spacy/ml/common.py b/spacy/ml/common.py index 9c9328d72..963d4dc35 100644 --- a/spacy/ml/common.py +++ b/spacy/ml/common.py @@ -13,6 +13,7 @@ def FeedForward(config): model.cfg = config return model + @register_architecture("spacy.LayerNormalizedMaxout.v1") def LayerNormalizedMaxout(config): width = config["width"] diff --git a/spacy/ml/tok2vec.py b/spacy/ml/tok2vec.py index e41c28ea2..d78a45191 100644 --- a/spacy/ml/tok2vec.py +++ b/spacy/ml/tok2vec.py @@ -9,7 +9,6 @@ from thinc.misc import Residual, LayerNorm, FeatureExtracter from ..util import make_layer, register_architecture from ._wire import concatenate_lists -from .common import * @register_architecture("spacy.Tok2Vec.v1") @@ -81,8 +80,7 @@ def MaxoutWindowEncoder(config): depth = config["depth"] cnn = chain( - ExtractWindow(nW=nW), - LayerNorm(Maxout(nO, nO * ((nW * 2) + 1), pieces=nP)), + ExtractWindow(nW=nW), LayerNorm(Maxout(nO, nO * ((nW * 2) + 1), pieces=nP)) ) model = clone(Residual(cnn), depth) model.nO = nO diff --git a/spacy/tests/regression/test_issue4402.py b/spacy/tests/regression/test_issue4402.py index d213253ed..2e1b69000 100644 --- a/spacy/tests/regression/test_issue4402.py +++ b/spacy/tests/regression/test_issue4402.py @@ -2,7 +2,7 @@ from __future__ import unicode_literals import srsly -from spacy.gold import GoldCorpus, json_to_tuple +from spacy.gold import GoldCorpus from spacy.lang.en import English from spacy.tests.util import make_tempdir @@ -94,4 +94,3 @@ json_data = [ ], } ] - diff --git a/spacy/tests/test_gold.py b/spacy/tests/test_gold.py index 0f3112e07..05fc95c37 100644 --- a/spacy/tests/test_gold.py +++ b/spacy/tests/test_gold.py @@ -205,4 +205,3 @@ def test_align(tokens_a, tokens_b, expected): # check symmetry cost, a2b, b2a, a2b_multi, b2a_multi = align(tokens_b, tokens_a) assert (cost, list(b2a), list(a2b), b2a_multi, a2b_multi) == expected - diff --git a/spacy/tests/test_misc.py b/spacy/tests/test_misc.py index 2a92155f0..4075ccf64 100644 --- a/spacy/tests/test_misc.py +++ b/spacy/tests/test_misc.py @@ -96,14 +96,14 @@ def test_PrecomputableAffine(nO=4, nI=5, nF=3, nP=2): def test_prefer_gpu(): try: - import cupy + import cupy # noqa: F401 except ImportError: assert not prefer_gpu() def test_require_gpu(): try: - import cupy + import cupy # noqa: F401 except ImportError: with pytest.raises(ValueError): require_gpu()