The Parser is now a Pipe (2) (#5844)

* moving syntax folder to _parser_internals

* moving nn_parser and transition_system

* move nn_parser and transition_system out of internals folder

* moving nn_parser code into transition_system file

* rename transition_system to transition_parser

* moving parser_model and _state to ml

* move _state back to internals

* The Parser now inherits from Pipe!

* small code fixes

* removing unnecessary imports

* remove link_vectors_to_models

* transition_system to internals folder

* little bit more cleanup

* newlines
This commit is contained in:
Sofie Van Landeghem 2020-07-30 23:30:54 +02:00 committed by GitHub
parent 3449c45fd9
commit ca491722ad
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
39 changed files with 124 additions and 158 deletions

View File

@ -16,7 +16,7 @@ from bin.ud import conll17_ud_eval
from spacy.tokens import Token, Doc from spacy.tokens import Token, Doc
from spacy.gold import Example from spacy.gold import Example
from spacy.util import compounding, minibatch, minibatch_by_words from spacy.util import compounding, minibatch, minibatch_by_words
from spacy.syntax.nonproj import projectivize from spacy.pipeline._parser_internals.nonproj import projectivize
from spacy.matcher import Matcher from spacy.matcher import Matcher
from spacy import displacy from spacy import displacy
from collections import defaultdict from collections import defaultdict

View File

@ -13,7 +13,7 @@ import spacy
import spacy.util import spacy.util
from spacy.tokens import Token, Doc from spacy.tokens import Token, Doc
from spacy.gold import Example from spacy.gold import Example
from spacy.syntax.nonproj import projectivize from spacy.pipeline._parser_internals.nonproj import projectivize
from collections import defaultdict from collections import defaultdict
from spacy.matcher import Matcher from spacy.matcher import Matcher

View File

@ -31,6 +31,7 @@ MOD_NAMES = [
"spacy.vocab", "spacy.vocab",
"spacy.attrs", "spacy.attrs",
"spacy.kb", "spacy.kb",
"spacy.ml.parser_model",
"spacy.morphology", "spacy.morphology",
"spacy.pipeline.dep_parser", "spacy.pipeline.dep_parser",
"spacy.pipeline.morphologizer", "spacy.pipeline.morphologizer",
@ -40,14 +41,14 @@ MOD_NAMES = [
"spacy.pipeline.sentencizer", "spacy.pipeline.sentencizer",
"spacy.pipeline.senter", "spacy.pipeline.senter",
"spacy.pipeline.tagger", "spacy.pipeline.tagger",
"spacy.syntax.stateclass", "spacy.pipeline.transition_parser",
"spacy.syntax._state", "spacy.pipeline._parser_internals.arc_eager",
"spacy.pipeline._parser_internals.ner",
"spacy.pipeline._parser_internals.nonproj",
"spacy.pipeline._parser_internals._state",
"spacy.pipeline._parser_internals.stateclass",
"spacy.pipeline._parser_internals.transition_system",
"spacy.tokenizer", "spacy.tokenizer",
"spacy.syntax.nn_parser",
"spacy.syntax._parser_model",
"spacy.syntax.nonproj",
"spacy.syntax.transition_system",
"spacy.syntax.arc_eager",
"spacy.gold.gold_io", "spacy.gold.gold_io",
"spacy.tokens.doc", "spacy.tokens.doc",
"spacy.tokens.span", "spacy.tokens.span",
@ -57,7 +58,6 @@ MOD_NAMES = [
"spacy.matcher.matcher", "spacy.matcher.matcher",
"spacy.matcher.phrasematcher", "spacy.matcher.phrasematcher",
"spacy.matcher.dependencymatcher", "spacy.matcher.dependencymatcher",
"spacy.syntax.ner",
"spacy.symbols", "spacy.symbols",
"spacy.vectors", "spacy.vectors",
] ]

View File

@ -10,7 +10,7 @@ from thinc.api import Config
from ._util import app, Arg, Opt, show_validation_error, parse_config_overrides from ._util import app, Arg, Opt, show_validation_error, parse_config_overrides
from ._util import import_code, debug_cli from ._util import import_code, debug_cli
from ..gold import Corpus, Example from ..gold import Corpus, Example
from ..syntax import nonproj from ..pipeline._parser_internals import nonproj
from ..language import Language from ..language import Language
from .. import util from .. import util

View File

@ -10,7 +10,7 @@ from .align import Alignment
from .iob_utils import biluo_to_iob, biluo_tags_from_offsets, biluo_tags_from_doc from .iob_utils import biluo_to_iob, biluo_tags_from_offsets, biluo_tags_from_doc
from .iob_utils import spans_from_biluo_tags from .iob_utils import spans_from_biluo_tags
from ..errors import Errors, Warnings from ..errors import Errors, Warnings
from ..syntax import nonproj from ..pipeline._parser_internals import nonproj
cpdef Doc annotations2doc(vocab, tok_annot, doc_annot): cpdef Doc annotations2doc(vocab, tok_annot, doc_annot):

View File

@ -1,8 +1,6 @@
from libc.string cimport memset, memcpy from libc.string cimport memset, memcpy
from libc.stdlib cimport calloc, free, realloc from ..typedefs cimport weight_t, hash_t
from ..typedefs cimport weight_t, class_t, hash_t from ..pipeline._parser_internals._state cimport StateC
from ._state cimport StateC
cdef struct SizesC: cdef struct SizesC:

View File

@ -1,29 +1,18 @@
# cython: infer_types=True, cdivision=True, boundscheck=False # cython: infer_types=True, cdivision=True, boundscheck=False
cimport cython.parallel
cimport numpy as np cimport numpy as np
from libc.math cimport exp from libc.math cimport exp
from libcpp.vector cimport vector
from libc.string cimport memset, memcpy from libc.string cimport memset, memcpy
from libc.stdlib cimport calloc, free, realloc from libc.stdlib cimport calloc, free, realloc
from cymem.cymem cimport Pool
from thinc.extra.search cimport Beam
from thinc.backends.linalg cimport Vec, VecVec from thinc.backends.linalg cimport Vec, VecVec
cimport blis.cy cimport blis.cy
import numpy import numpy
import numpy.random import numpy.random
from thinc.api import Linear, Model, CupyOps, NumpyOps, use_ops, noop from thinc.api import Model, CupyOps, NumpyOps
from ..typedefs cimport weight_t, class_t, hash_t
from ..tokens.doc cimport Doc
from .stateclass cimport StateClass
from .transition_system cimport Transition
from ..compat import copy_array
from ..errors import Errors, TempErrors
from ..util import create_default_optimizer
from .. import util from .. import util
from . import nonproj from ..typedefs cimport weight_t, class_t, hash_t
from ..pipeline._parser_internals.stateclass cimport StateClass
cdef WeightsC get_c_weights(model) except *: cdef WeightsC get_c_weights(model) except *:

View File

@ -1,5 +1,5 @@
from thinc.api import Model, noop, use_ops, Linear from thinc.api import Model, noop, use_ops, Linear
from ..syntax._parser_model import ParserStepModel from .parser_model import ParserStepModel
def TransitionModel(tok2vec, lower, upper, dropout=0.2, unseen_classes=set()): def TransitionModel(tok2vec, lower, upper, dropout=0.2, unseen_classes=set()):

View File

@ -1,15 +1,14 @@
from libc.string cimport memcpy, memset, memmove from libc.string cimport memcpy, memset
from libc.stdlib cimport malloc, calloc, free from libc.stdlib cimport calloc, free
from libc.stdint cimport uint32_t, uint64_t from libc.stdint cimport uint32_t, uint64_t
from cpython.exc cimport PyErr_CheckSignals, PyErr_SetFromErrno from cpython.exc cimport PyErr_CheckSignals, PyErr_SetFromErrno
from murmurhash.mrmr cimport hash64 from murmurhash.mrmr cimport hash64
from ..vocab cimport EMPTY_LEXEME from ...vocab cimport EMPTY_LEXEME
from ..structs cimport TokenC, SpanC from ...structs cimport TokenC, SpanC
from ..lexeme cimport Lexeme from ...lexeme cimport Lexeme
from ..symbols cimport punct from ...attrs cimport IS_SPACE
from ..attrs cimport IS_SPACE from ...typedefs cimport attr_t
from ..typedefs cimport attr_t
cdef inline bint is_space_token(const TokenC* token) nogil: cdef inline bint is_space_token(const TokenC* token) nogil:

View File

@ -1,8 +1,6 @@
from cymem.cymem cimport Pool
from .stateclass cimport StateClass from .stateclass cimport StateClass
from ..typedefs cimport weight_t, attr_t from ...typedefs cimport weight_t, attr_t
from .transition_system cimport TransitionSystem, Transition from .transition_system cimport Transition, TransitionSystem
cdef class ArcEager(TransitionSystem): cdef class ArcEager(TransitionSystem):

View File

@ -1,24 +1,17 @@
# cython: profile=True, cdivision=True, infer_types=True # cython: profile=True, cdivision=True, infer_types=True
from cpython.ref cimport Py_INCREF
from cymem.cymem cimport Pool, Address from cymem.cymem cimport Pool, Address
from libc.stdint cimport int32_t from libc.stdint cimport int32_t
from collections import defaultdict, Counter from collections import defaultdict, Counter
import json
from ..typedefs cimport hash_t, attr_t from ...typedefs cimport hash_t, attr_t
from ..strings cimport hash_string from ...strings cimport hash_string
from ..structs cimport TokenC from ...structs cimport TokenC
from ..tokens.doc cimport Doc, set_children_from_heads from ...tokens.doc cimport Doc, set_children_from_heads
from ...gold.example cimport Example
from ...errors import Errors
from .stateclass cimport StateClass from .stateclass cimport StateClass
from ._state cimport StateC from ._state cimport StateC
from .transition_system cimport move_cost_func_t, label_cost_func_t
from ..gold.example cimport Example
from ..errors import Errors
from .nonproj import is_nonproj_tree
from . import nonproj
# Calculate cost as gold/not gold. We don't use scalar value anyway. # Calculate cost as gold/not gold. We don't use scalar value anyway.
cdef int BINARY_COSTS = 1 cdef int BINARY_COSTS = 1

View File

@ -1,6 +1,4 @@
from .transition_system cimport TransitionSystem from .transition_system cimport TransitionSystem
from .transition_system cimport Transition
from ..typedefs cimport attr_t
cdef class BiluoPushDown(TransitionSystem): cdef class BiluoPushDown(TransitionSystem):

View File

@ -2,17 +2,14 @@ from collections import Counter
from libc.stdint cimport int32_t from libc.stdint cimport int32_t
from cymem.cymem cimport Pool from cymem.cymem cimport Pool
from ..typedefs cimport weight_t from ...typedefs cimport weight_t, attr_t
from ...lexeme cimport Lexeme
from ...attrs cimport IS_SPACE
from ...gold.example cimport Example
from ...errors import Errors
from .stateclass cimport StateClass from .stateclass cimport StateClass
from ._state cimport StateC from ._state cimport StateC
from .transition_system cimport Transition from .transition_system cimport Transition, do_func_t
from .transition_system cimport do_func_t
from ..lexeme cimport Lexeme
from ..attrs cimport IS_SPACE
from ..gold.iob_utils import biluo_tags_from_offsets
from ..gold.example cimport Example
from ..errors import Errors
cdef enum: cdef enum:

View File

@ -5,9 +5,9 @@ scheme.
""" """
from copy import copy from copy import copy
from ..tokens.doc cimport Doc, set_children_from_heads from ...tokens.doc cimport Doc, set_children_from_heads
from ..errors import Errors from ...errors import Errors
DELIMITER = '||' DELIMITER = '||'

View File

@ -1,12 +1,8 @@
from libc.string cimport memcpy, memset
from cymem.cymem cimport Pool from cymem.cymem cimport Pool
cimport cython
from ..structs cimport TokenC, SpanC from ...structs cimport TokenC, SpanC
from ..typedefs cimport attr_t from ...typedefs cimport attr_t
from ..vocab cimport EMPTY_LEXEME
from ._state cimport StateC from ._state cimport StateC

View File

@ -1,7 +1,7 @@
# cython: infer_types=True # cython: infer_types=True
import numpy import numpy
from ..tokens.doc cimport Doc from ...tokens.doc cimport Doc
cdef class StateClass: cdef class StateClass:

View File

@ -1,11 +1,11 @@
from cymem.cymem cimport Pool from cymem.cymem cimport Pool
from ..typedefs cimport attr_t, weight_t from ...typedefs cimport attr_t, weight_t
from ..structs cimport TokenC from ...structs cimport TokenC
from ..strings cimport StringStore from ...strings cimport StringStore
from ...gold.example cimport Example
from .stateclass cimport StateClass from .stateclass cimport StateClass
from ._state cimport StateC from ._state cimport StateC
from ..gold.example cimport Example
cdef struct Transition: cdef struct Transition:

View File

@ -1,19 +1,17 @@
# cython: infer_types=True # cython: infer_types=True
from __future__ import print_function from __future__ import print_function
from cpython.ref cimport Py_INCREF
from cymem.cymem cimport Pool from cymem.cymem cimport Pool
from collections import Counter from collections import Counter
import srsly import srsly
from ..typedefs cimport weight_t from ...typedefs cimport weight_t, attr_t
from ..tokens.doc cimport Doc from ...tokens.doc cimport Doc
from ..structs cimport TokenC from ...structs cimport TokenC
from .stateclass cimport StateClass from .stateclass cimport StateClass
from ..typedefs cimport attr_t
from ..errors import Errors from ...errors import Errors
from .. import util from ... import util
cdef weight_t MIN_SCORE = -90000 cdef weight_t MIN_SCORE = -90000

View File

@ -1,13 +1,13 @@
# cython: infer_types=True, profile=True, binding=True # cython: infer_types=True, profile=True, binding=True
from typing import Optional, Iterable from typing import Optional, Iterable
from thinc.api import CosineDistance, to_categorical, get_array_module, Model, Config from thinc.api import Model, Config
from ..syntax.nn_parser cimport Parser from .transition_parser cimport Parser
from ..syntax.arc_eager cimport ArcEager from ._parser_internals.arc_eager cimport ArcEager
from .functions import merge_subtokens from .functions import merge_subtokens
from ..language import Language from ..language import Language
from ..syntax import nonproj from ._parser_internals import nonproj
from ..scorer import Scorer from ..scorer import Scorer

View File

@ -222,9 +222,9 @@ class EntityLinker(Pipe):
set_dropout_rate(self.model, drop) set_dropout_rate(self.model, drop)
if not sentence_docs: if not sentence_docs:
warnings.warn(Warnings.W093.format(name="Entity Linker")) warnings.warn(Warnings.W093.format(name="Entity Linker"))
return 0.0 return losses
sentence_encodings, bp_context = self.model.begin_update(sentence_docs) sentence_encodings, bp_context = self.model.begin_update(sentence_docs)
loss, d_scores = self.get_similarity_loss( loss, d_scores = self.get_loss(
sentence_encodings=sentence_encodings, examples=examples sentence_encodings=sentence_encodings, examples=examples
) )
bp_context(d_scores) bp_context(d_scores)
@ -235,7 +235,7 @@ class EntityLinker(Pipe):
self.set_annotations(docs, predictions) self.set_annotations(docs, predictions)
return losses return losses
def get_similarity_loss(self, examples: Iterable[Example], sentence_encodings): def get_loss(self, examples: Iterable[Example], sentence_encodings):
entity_encodings = [] entity_encodings = []
for eg in examples: for eg in examples:
kb_ids = eg.get_aligned("ENT_KB_ID", as_string=True) kb_ids = eg.get_aligned("ENT_KB_ID", as_string=True)
@ -247,7 +247,7 @@ class EntityLinker(Pipe):
entity_encodings = self.model.ops.asarray(entity_encodings, dtype="float32") entity_encodings = self.model.ops.asarray(entity_encodings, dtype="float32")
if sentence_encodings.shape != entity_encodings.shape: if sentence_encodings.shape != entity_encodings.shape:
err = Errors.E147.format( err = Errors.E147.format(
method="get_similarity_loss", msg="gold entities do not match up" method="get_loss", msg="gold entities do not match up"
) )
raise RuntimeError(err) raise RuntimeError(err)
gradients = self.distance.get_grad(sentence_encodings, entity_encodings) gradients = self.distance.get_grad(sentence_encodings, entity_encodings)
@ -337,13 +337,13 @@ class EntityLinker(Pipe):
final_kb_ids.append(candidates[0].entity_) final_kb_ids.append(candidates[0].entity_)
else: else:
random.shuffle(candidates) random.shuffle(candidates)
# this will set all prior probabilities to 0 if they should be excluded from the model # set all prior probabilities to 0 if incl_prior=False
prior_probs = xp.asarray( prior_probs = xp.asarray(
[c.prior_prob for c in candidates] [c.prior_prob for c in candidates]
) )
if not self.cfg.get("incl_prior"): if not self.cfg.get("incl_prior"):
prior_probs = xp.asarray( prior_probs = xp.asarray(
[0.0 for c in candidates] [0.0 for _ in candidates]
) )
scores = prior_probs scores = prior_probs
# add in similarity from the context # add in similarity from the context

View File

@ -1,7 +1,7 @@
# cython: infer_types=True, profile=True, binding=True # cython: infer_types=True, profile=True, binding=True
from typing import Optional from typing import Optional
import numpy import numpy
from thinc.api import CosineDistance, to_categorical, to_categorical, Model, Config from thinc.api import CosineDistance, to_categorical, Model, Config
from thinc.api import set_dropout_rate from thinc.api import set_dropout_rate
from ..tokens.doc cimport Doc from ..tokens.doc cimport Doc
@ -9,7 +9,7 @@ from ..tokens.doc cimport Doc
from .pipe import Pipe from .pipe import Pipe
from .tagger import Tagger from .tagger import Tagger
from ..language import Language from ..language import Language
from ..syntax import nonproj from ._parser_internals import nonproj
from ..attrs import POS, ID from ..attrs import POS, ID
from ..errors import Errors from ..errors import Errors
@ -219,3 +219,6 @@ class ClozeMultitask(Pipe):
if losses is not None: if losses is not None:
losses[self.name] += loss losses[self.name] += loss
def add_label(self, label):
raise NotImplementedError

View File

@ -1,9 +1,9 @@
# cython: infer_types=True, profile=True, binding=True # cython: infer_types=True, profile=True, binding=True
from typing import Optional, Iterable from typing import Optional, Iterable
from thinc.api import CosineDistance, to_categorical, get_array_module, Model, Config from thinc.api import Model, Config
from ..syntax.nn_parser cimport Parser from .transition_parser cimport Parser
from ..syntax.ner cimport BiluoPushDown from ._parser_internals.ner cimport BiluoPushDown
from ..language import Language from ..language import Language
from ..scorer import Scorer from ..scorer import Scorer

2
spacy/pipeline/pipe.pxd Normal file
View File

@ -0,0 +1,2 @@
cdef class Pipe:
cdef public str name

View File

@ -8,7 +8,7 @@ from ..errors import Errors
from .. import util from .. import util
class Pipe: cdef class Pipe:
"""This class is a base class and not instantiated directly. Trainable """This class is a base class and not instantiated directly. Trainable
pipeline components like the EntityRecognizer or TextCategorizer inherit pipeline components like the EntityRecognizer or TextCategorizer inherit
from it and it defines the interface that components should follow to from it and it defines the interface that components should follow to
@ -17,8 +17,6 @@ class Pipe:
DOCS: https://spacy.io/api/pipe DOCS: https://spacy.io/api/pipe
""" """
name = None
def __init__(self, vocab, model, name, **cfg): def __init__(self, vocab, model, name, **cfg):
"""Initialize a pipeline component. """Initialize a pipeline component.

View File

@ -203,3 +203,9 @@ class Sentencizer(Pipe):
cfg = srsly.read_json(path) cfg = srsly.read_json(path)
self.punct_chars = set(cfg.get("punct_chars", self.default_punct_chars)) self.punct_chars = set(cfg.get("punct_chars", self.default_punct_chars))
return self return self
def get_loss(self, examples, scores):
raise NotImplementedError
def add_label(self, label):
raise NotImplementedError

View File

@ -109,7 +109,7 @@ class SentenceRecognizer(Tagger):
for eg in examples: for eg in examples:
eg_truth = [] eg_truth = []
for x in eg.get_aligned("sent_start"): for x in eg.get_aligned("sent_start"):
if x == None: if x is None:
eg_truth.append(None) eg_truth.append(None)
elif x == 1: elif x == 1:
eg_truth.append(labels[1]) eg_truth.append(labels[1])

View File

@ -131,8 +131,6 @@ class SimpleNER(Pipe):
return losses return losses
def get_loss(self, examples: List[Example], scores) -> Tuple[List[Floats2d], float]: def get_loss(self, examples: List[Example], scores) -> Tuple[List[Floats2d], float]:
loss = 0
d_scores = []
truths = [] truths = []
for eg in examples: for eg in examples:
tags = eg.get_aligned("TAG", as_string=True) tags = eg.get_aligned("TAG", as_string=True)
@ -159,7 +157,6 @@ class SimpleNER(Pipe):
if not hasattr(get_examples, "__call__"): if not hasattr(get_examples, "__call__"):
gold_tuples = get_examples gold_tuples = get_examples
get_examples = lambda: gold_tuples get_examples = lambda: gold_tuples
labels = _get_labels(get_examples())
for label in _get_labels(get_examples()): for label in _get_labels(get_examples()):
self.add_label(label) self.add_label(label)
labels = self.labels labels = self.labels

View File

@ -238,8 +238,11 @@ class TextCategorizer(Pipe):
DOCS: https://spacy.io/api/textcategorizer#rehearse DOCS: https://spacy.io/api/textcategorizer#rehearse
""" """
if losses is not None:
losses.setdefault(self.name, 0.0)
if self._rehearsal_model is None: if self._rehearsal_model is None:
return return losses
try: try:
docs = [eg.predicted for eg in examples] docs = [eg.predicted for eg in examples]
except AttributeError: except AttributeError:
@ -250,7 +253,7 @@ class TextCategorizer(Pipe):
raise TypeError(err) raise TypeError(err)
if not any(len(doc) for doc in docs): if not any(len(doc) for doc in docs):
# Handle cases where there are no tokens in any docs. # Handle cases where there are no tokens in any docs.
return return losses
set_dropout_rate(self.model, drop) set_dropout_rate(self.model, drop)
scores, bp_scores = self.model.begin_update(docs) scores, bp_scores = self.model.begin_update(docs)
target = self._rehearsal_model(examples) target = self._rehearsal_model(examples)
@ -259,7 +262,6 @@ class TextCategorizer(Pipe):
if sgd is not None: if sgd is not None:
self.model.finish_update(sgd) self.model.finish_update(sgd)
if losses is not None: if losses is not None:
losses.setdefault(self.name, 0.0)
losses[self.name] += (gradient ** 2).sum() losses[self.name] += (gradient ** 2).sum()
return losses return losses

View File

@ -199,6 +199,9 @@ class Tok2Vec(Pipe):
docs = [Doc(self.vocab, words=["hello"])] docs = [Doc(self.vocab, words=["hello"])]
self.model.initialize(X=docs) self.model.initialize(X=docs)
def add_label(self, label):
raise NotImplementedError
class Tok2VecListener(Model): class Tok2VecListener(Model):
"""A layer that gets fed its answers from an upstream connection, """A layer that gets fed its answers from an upstream connection,

View File

@ -1,16 +1,15 @@
from .stateclass cimport StateClass from cymem.cymem cimport Pool
from .arc_eager cimport TransitionSystem
from ..vocab cimport Vocab from ..vocab cimport Vocab
from ..tokens.doc cimport Doc from .pipe cimport Pipe
from ..structs cimport TokenC from ._parser_internals.transition_system cimport Transition, TransitionSystem
from ._state cimport StateC from ._parser_internals._state cimport StateC
from ._parser_model cimport WeightsC, ActivationsC, SizesC from ..ml.parser_model cimport WeightsC, ActivationsC, SizesC
cdef class Parser: cdef class Parser(Pipe):
cdef readonly Vocab vocab cdef readonly Vocab vocab
cdef public object model cdef public object model
cdef public str name
cdef public object _rehearsal_model cdef public object _rehearsal_model
cdef readonly TransitionSystem moves cdef readonly TransitionSystem moves
cdef readonly object cfg cdef readonly object cfg

View File

@ -1,42 +1,32 @@
# cython: infer_types=True, cdivision=True, boundscheck=False # cython: infer_types=True, cdivision=True, boundscheck=False
cimport cython.parallel from __future__ import print_function
from cymem.cymem cimport Pool
cimport numpy as np cimport numpy as np
from itertools import islice from itertools import islice
from cpython.ref cimport PyObject, Py_XDECREF
from cpython.exc cimport PyErr_CheckSignals, PyErr_SetFromErrno
from libc.math cimport exp
from libcpp.vector cimport vector from libcpp.vector cimport vector
from libc.string cimport memset, memcpy from libc.string cimport memset
from libc.stdlib cimport calloc, free from libc.stdlib cimport calloc, free
from cymem.cymem cimport Pool
from thinc.backends.linalg cimport Vec, VecVec
from thinc.api import chain, clone, Linear, list2array, NumpyOps, CupyOps, use_ops
from thinc.api import get_array_module, zero_init, set_dropout_rate
from itertools import islice
import srsly import srsly
from ._parser_internals.stateclass cimport StateClass
from ..ml.parser_model cimport alloc_activations, free_activations
from ..ml.parser_model cimport predict_states, arg_max_if_valid
from ..ml.parser_model cimport WeightsC, ActivationsC, SizesC, cpu_log_loss
from ..ml.parser_model cimport get_c_weights, get_c_sizes
from ..tokens.doc cimport Doc
from ..errors import Errors, Warnings
from .. import util
from ..util import create_default_optimizer
from thinc.api import set_dropout_rate
import numpy.random import numpy.random
import numpy import numpy
import warnings import warnings
from ..tokens.doc cimport Doc
from ..typedefs cimport weight_t, class_t, hash_t
from ._parser_model cimport alloc_activations, free_activations
from ._parser_model cimport predict_states, arg_max_if_valid
from ._parser_model cimport WeightsC, ActivationsC, SizesC, cpu_log_loss
from ._parser_model cimport get_c_weights, get_c_sizes
from .stateclass cimport StateClass
from ._state cimport StateC
from .transition_system cimport Transition
from ..util import create_default_optimizer, registry cdef class Parser(Pipe):
from ..compat import copy_array
from ..errors import Errors, Warnings
from .. import util
from . import nonproj
cdef class Parser:
""" """
Base class of the DependencyParser and EntityRecognizer. Base class of the DependencyParser and EntityRecognizer.
""" """
@ -107,7 +97,7 @@ cdef class Parser:
@property @property
def tok2vec(self): def tok2vec(self):
'''Return the embedding and convolutional layer of the model.''' """Return the embedding and convolutional layer of the model."""
return self.model.get_ref("tok2vec") return self.model.get_ref("tok2vec")
@property @property
@ -138,13 +128,13 @@ cdef class Parser:
raise NotImplementedError raise NotImplementedError
def init_multitask_objectives(self, get_examples, pipeline, **cfg): def init_multitask_objectives(self, get_examples, pipeline, **cfg):
'''Setup models for secondary objectives, to benefit from multi-task """Setup models for secondary objectives, to benefit from multi-task
learning. This method is intended to be overridden by subclasses. learning. This method is intended to be overridden by subclasses.
For instance, the dependency parser can benefit from sharing For instance, the dependency parser can benefit from sharing
an input representation with a label prediction model. These auxiliary an input representation with a label prediction model. These auxiliary
models are discarded after training. models are discarded after training.
''' """
pass pass
def use_params(self, params): def use_params(self, params):

View File

@ -4,8 +4,8 @@ from spacy import registry
from spacy.gold import Example from spacy.gold import Example
from spacy.pipeline import DependencyParser from spacy.pipeline import DependencyParser
from spacy.tokens import Doc from spacy.tokens import Doc
from spacy.syntax.nonproj import projectivize from spacy.pipeline._parser_internals.nonproj import projectivize
from spacy.syntax.arc_eager import ArcEager from spacy.pipeline._parser_internals.arc_eager import ArcEager
from spacy.pipeline.dep_parser import DEFAULT_PARSER_MODEL from spacy.pipeline.dep_parser import DEFAULT_PARSER_MODEL

View File

@ -5,7 +5,7 @@ from spacy.lang.en import English
from spacy.language import Language from spacy.language import Language
from spacy.lookups import Lookups from spacy.lookups import Lookups
from spacy.syntax.ner import BiluoPushDown from spacy.pipeline._parser_internals.ner import BiluoPushDown
from spacy.gold import Example from spacy.gold import Example
from spacy.tokens import Doc from spacy.tokens import Doc
from spacy.vocab import Vocab from spacy.vocab import Vocab

View File

@ -3,8 +3,8 @@ import pytest
from spacy import registry from spacy import registry
from spacy.gold import Example from spacy.gold import Example
from spacy.vocab import Vocab from spacy.vocab import Vocab
from spacy.syntax.arc_eager import ArcEager from spacy.pipeline._parser_internals.arc_eager import ArcEager
from spacy.syntax.nn_parser import Parser from spacy.pipeline.transition_parser import Parser
from spacy.tokens.doc import Doc from spacy.tokens.doc import Doc
from thinc.api import Model from thinc.api import Model
from spacy.pipeline.tok2vec import DEFAULT_TOK2VEC_MODEL from spacy.pipeline.tok2vec import DEFAULT_TOK2VEC_MODEL

View File

@ -1,7 +1,7 @@
import pytest import pytest
from spacy.syntax.nonproj import ancestors, contains_cycle, is_nonproj_arc from spacy.pipeline._parser_internals.nonproj import ancestors, contains_cycle, is_nonproj_arc
from spacy.syntax.nonproj import is_nonproj_tree from spacy.pipeline._parser_internals.nonproj import is_nonproj_tree
from spacy.syntax import nonproj from spacy.pipeline._parser_internals import nonproj
from ..util import get_doc from ..util import get_doc