mirror of https://github.com/explosion/spaCy.git
Tidy up and auto-format
This commit is contained in:
parent
92018b9cd4
commit
c5e41247e8
42
spacy/_ml.py
42
spacy/_ml.py
|
@ -307,6 +307,7 @@ def PyTorchBiLSTM(nO, nI, depth, dropout=0.2):
|
|||
import torch.nn
|
||||
from thinc.api import with_square_sequences
|
||||
from thinc.extra.wrappers import PyTorchWrapperRNN
|
||||
|
||||
if depth == 0:
|
||||
return layerize(noop())
|
||||
model = torch.nn.LSTM(nI, nO // 2, depth, bidirectional=True, dropout=dropout)
|
||||
|
@ -322,7 +323,7 @@ def Tok2Vec(width, embed_size, **kwargs):
|
|||
bilstm_depth = kwargs.get("bilstm_depth", 0)
|
||||
|
||||
cols = ["ID", "NORM", "PREFIX", "SUFFIX", "SHAPE", "ORTH"]
|
||||
|
||||
|
||||
doc2feats_cfg = {"arch": "spacy.Doc2Feats.v1", "config": {"columns": cols}}
|
||||
if char_embed:
|
||||
embed_cfg = {
|
||||
|
@ -332,13 +333,10 @@ def Tok2Vec(width, embed_size, **kwargs):
|
|||
"chars": 6,
|
||||
"@mix": {
|
||||
"arch": "spacy.LayerNormalizedMaxout.v1",
|
||||
"config": {
|
||||
"width": width,
|
||||
"pieces": 3
|
||||
}
|
||||
"config": {"width": width, "pieces": 3},
|
||||
},
|
||||
"@embed_features": None
|
||||
}
|
||||
"@embed_features": None,
|
||||
},
|
||||
}
|
||||
else:
|
||||
embed_cfg = {
|
||||
|
@ -351,12 +349,9 @@ def Tok2Vec(width, embed_size, **kwargs):
|
|||
"@pretrained_vectors": None,
|
||||
"@mix": {
|
||||
"arch": "spacy.LayerNormalizedMaxout.v1",
|
||||
"config": {
|
||||
"width": width,
|
||||
"pieces": 3
|
||||
}
|
||||
"config": {"width": width, "pieces": 3},
|
||||
},
|
||||
}
|
||||
},
|
||||
}
|
||||
if pretrained_vectors:
|
||||
embed_cfg["config"]["@pretrained_vectors"] = {
|
||||
|
@ -364,8 +359,8 @@ def Tok2Vec(width, embed_size, **kwargs):
|
|||
"config": {
|
||||
"vectors_name": pretrained_vectors,
|
||||
"width": width,
|
||||
"column": cols.index(ID)
|
||||
}
|
||||
"column": cols.index(ID),
|
||||
},
|
||||
}
|
||||
cnn_cfg = {
|
||||
"arch": "spacy.MaxoutWindowEncoder.v1",
|
||||
|
@ -373,35 +368,26 @@ def Tok2Vec(width, embed_size, **kwargs):
|
|||
"width": width,
|
||||
"window_size": 1,
|
||||
"pieces": cnn_maxout_pieces,
|
||||
"depth": conv_depth
|
||||
}
|
||||
"depth": conv_depth,
|
||||
},
|
||||
}
|
||||
|
||||
bilstm_cfg = {
|
||||
"arch": "spacy.TorchBiLSTMEncoder.v1",
|
||||
"config": {
|
||||
"width": width,
|
||||
"depth": bilstm_depth,
|
||||
}
|
||||
"config": {"width": width, "depth": bilstm_depth},
|
||||
}
|
||||
if conv_depth == 0 and bilstm_depth == 0:
|
||||
encode_cfg = {}
|
||||
elif conv_depth >= 1 and bilstm_depth >= 1:
|
||||
encode_cfg = {
|
||||
"arch": "thinc.FeedForward.v1",
|
||||
"config": {
|
||||
"children": [cnn_cfg, bilstm_cfg]
|
||||
}
|
||||
"config": {"children": [cnn_cfg, bilstm_cfg]},
|
||||
}
|
||||
elif conv_depth >= 1:
|
||||
encode_cfg = cnn_cfg
|
||||
else:
|
||||
encode_cfg = bilstm_cfg
|
||||
config = {
|
||||
"@doc2feats": doc2feats_cfg,
|
||||
"@embed": embed_cfg,
|
||||
"@encode": encode_cfg
|
||||
}
|
||||
config = {"@doc2feats": doc2feats_cfg, "@embed": embed_cfg, "@encode": encode_cfg}
|
||||
return new_ml.Tok2Vec(config)
|
||||
|
||||
|
||||
|
|
|
@ -262,8 +262,11 @@ def train(
|
|||
exits=1,
|
||||
)
|
||||
train_docs = corpus.train_docs(
|
||||
nlp, noise_level=noise_level, gold_preproc=gold_preproc, max_length=0,
|
||||
ignore_misaligned=True
|
||||
nlp,
|
||||
noise_level=noise_level,
|
||||
gold_preproc=gold_preproc,
|
||||
max_length=0,
|
||||
ignore_misaligned=True,
|
||||
)
|
||||
train_labels = set()
|
||||
if textcat_multilabel:
|
||||
|
@ -344,7 +347,7 @@ def train(
|
|||
orth_variant_level=orth_variant_level,
|
||||
gold_preproc=gold_preproc,
|
||||
max_length=0,
|
||||
ignore_misaligned=True
|
||||
ignore_misaligned=True,
|
||||
)
|
||||
if raw_text:
|
||||
random.shuffle(raw_text)
|
||||
|
@ -383,8 +386,11 @@ def train(
|
|||
if hasattr(component, "cfg"):
|
||||
component.cfg["beam_width"] = beam_width
|
||||
dev_docs = list(
|
||||
corpus.dev_docs(nlp_loaded, gold_preproc=gold_preproc,
|
||||
ignore_misaligned=True)
|
||||
corpus.dev_docs(
|
||||
nlp_loaded,
|
||||
gold_preproc=gold_preproc,
|
||||
ignore_misaligned=True,
|
||||
)
|
||||
)
|
||||
nwords = sum(len(doc_gold[0]) for doc_gold in dev_docs)
|
||||
start_time = timer()
|
||||
|
@ -401,8 +407,11 @@ def train(
|
|||
if hasattr(component, "cfg"):
|
||||
component.cfg["beam_width"] = beam_width
|
||||
dev_docs = list(
|
||||
corpus.dev_docs(nlp_loaded, gold_preproc=gold_preproc,
|
||||
ignore_misaligned=True)
|
||||
corpus.dev_docs(
|
||||
nlp_loaded,
|
||||
gold_preproc=gold_preproc,
|
||||
ignore_misaligned=True,
|
||||
)
|
||||
)
|
||||
start_time = timer()
|
||||
scorer = nlp_loaded.evaluate(dev_docs, verbose=verbose)
|
||||
|
|
|
@ -131,9 +131,7 @@ class Language(object):
|
|||
Defaults = BaseDefaults
|
||||
lang = None
|
||||
|
||||
factories = {
|
||||
"tokenizer": lambda nlp: nlp.Defaults.create_tokenizer(nlp),
|
||||
}
|
||||
factories = {"tokenizer": lambda nlp: nlp.Defaults.create_tokenizer(nlp)}
|
||||
|
||||
def __init__(
|
||||
self, vocab=True, make_doc=True, max_length=10 ** 6, meta={}, **kwargs
|
||||
|
|
|
@ -1 +1,5 @@
|
|||
from .tok2vec import Tok2Vec
|
||||
# coding: utf8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .tok2vec import Tok2Vec # noqa: F401
|
||||
from .common import FeedForward, LayerNormalizedMaxout # noqa: F401
|
||||
|
|
|
@ -13,6 +13,7 @@ def FeedForward(config):
|
|||
model.cfg = config
|
||||
return model
|
||||
|
||||
|
||||
@register_architecture("spacy.LayerNormalizedMaxout.v1")
|
||||
def LayerNormalizedMaxout(config):
|
||||
width = config["width"]
|
||||
|
|
|
@ -9,7 +9,6 @@ from thinc.misc import Residual, LayerNorm, FeatureExtracter
|
|||
|
||||
from ..util import make_layer, register_architecture
|
||||
from ._wire import concatenate_lists
|
||||
from .common import *
|
||||
|
||||
|
||||
@register_architecture("spacy.Tok2Vec.v1")
|
||||
|
@ -81,8 +80,7 @@ def MaxoutWindowEncoder(config):
|
|||
depth = config["depth"]
|
||||
|
||||
cnn = chain(
|
||||
ExtractWindow(nW=nW),
|
||||
LayerNorm(Maxout(nO, nO * ((nW * 2) + 1), pieces=nP)),
|
||||
ExtractWindow(nW=nW), LayerNorm(Maxout(nO, nO * ((nW * 2) + 1), pieces=nP))
|
||||
)
|
||||
model = clone(Residual(cnn), depth)
|
||||
model.nO = nO
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
import srsly
|
||||
from spacy.gold import GoldCorpus, json_to_tuple
|
||||
from spacy.gold import GoldCorpus
|
||||
|
||||
from spacy.lang.en import English
|
||||
from spacy.tests.util import make_tempdir
|
||||
|
@ -94,4 +94,3 @@ json_data = [
|
|||
],
|
||||
}
|
||||
]
|
||||
|
||||
|
|
|
@ -205,4 +205,3 @@ def test_align(tokens_a, tokens_b, expected):
|
|||
# check symmetry
|
||||
cost, a2b, b2a, a2b_multi, b2a_multi = align(tokens_b, tokens_a)
|
||||
assert (cost, list(b2a), list(a2b), b2a_multi, a2b_multi) == expected
|
||||
|
||||
|
|
|
@ -96,14 +96,14 @@ def test_PrecomputableAffine(nO=4, nI=5, nF=3, nP=2):
|
|||
|
||||
def test_prefer_gpu():
|
||||
try:
|
||||
import cupy
|
||||
import cupy # noqa: F401
|
||||
except ImportError:
|
||||
assert not prefer_gpu()
|
||||
|
||||
|
||||
def test_require_gpu():
|
||||
try:
|
||||
import cupy
|
||||
import cupy # noqa: F401
|
||||
except ImportError:
|
||||
with pytest.raises(ValueError):
|
||||
require_gpu()
|
||||
|
|
Loading…
Reference in New Issue