mirror of https://github.com/explosion/spaCy.git
Improve way noun chunks iterator is looked up
This commit is contained in:
parent
51e1541ddb
commit
92ae36f84e
|
@ -107,7 +107,8 @@ class BaseDefaults(object):
|
||||||
'tags': lambda nlp, **cfg: [NeuralTagger(nlp.vocab, **cfg)],
|
'tags': lambda nlp, **cfg: [NeuralTagger(nlp.vocab, **cfg)],
|
||||||
'dependencies': lambda nlp, **cfg: [
|
'dependencies': lambda nlp, **cfg: [
|
||||||
NeuralDependencyParser(nlp.vocab, **cfg),
|
NeuralDependencyParser(nlp.vocab, **cfg),
|
||||||
nonproj.deprojectivize],
|
nonproj.deprojectivize,
|
||||||
|
],
|
||||||
'entities': lambda nlp, **cfg: [NeuralEntityRecognizer(nlp.vocab, **cfg)],
|
'entities': lambda nlp, **cfg: [NeuralEntityRecognizer(nlp.vocab, **cfg)],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -126,6 +127,7 @@ class BaseDefaults(object):
|
||||||
lemma_index = {}
|
lemma_index = {}
|
||||||
morph_rules = {}
|
morph_rules = {}
|
||||||
lex_attr_getters = LEX_ATTRS
|
lex_attr_getters = LEX_ATTRS
|
||||||
|
syntax_iterators = {}
|
||||||
|
|
||||||
|
|
||||||
class Language(object):
|
class Language(object):
|
||||||
|
|
|
@ -26,7 +26,6 @@ from ..attrs cimport ID, ORTH, NORM, LOWER, SHAPE, PREFIX, SUFFIX, LENGTH, CLUST
|
||||||
from ..attrs cimport LENGTH, POS, LEMMA, TAG, DEP, HEAD, SPACY, ENT_IOB, ENT_TYPE
|
from ..attrs cimport LENGTH, POS, LEMMA, TAG, DEP, HEAD, SPACY, ENT_IOB, ENT_TYPE
|
||||||
from ..attrs cimport SENT_START
|
from ..attrs cimport SENT_START
|
||||||
from ..parts_of_speech cimport CCONJ, PUNCT, NOUN, univ_pos_t
|
from ..parts_of_speech cimport CCONJ, PUNCT, NOUN, univ_pos_t
|
||||||
from ..syntax.iterators import CHUNKERS
|
|
||||||
from ..util import normalize_slice
|
from ..util import normalize_slice
|
||||||
from ..compat import is_config
|
from ..compat import is_config
|
||||||
from .. import about
|
from .. import about
|
||||||
|
@ -65,6 +64,9 @@ cdef attr_t get_token_attr(const TokenC* token, attr_id_t feat_name) nogil:
|
||||||
else:
|
else:
|
||||||
return Lexeme.get_struct_attr(token.lex, feat_name)
|
return Lexeme.get_struct_attr(token.lex, feat_name)
|
||||||
|
|
||||||
|
def _get_chunker(lang):
|
||||||
|
cls = util.get_lang_class(lang)
|
||||||
|
return cls.Defaults.syntax_iterators.get('noun_chunks')
|
||||||
|
|
||||||
cdef class Doc:
|
cdef class Doc:
|
||||||
"""A sequence of Token objects. Access sentences and named entities, export
|
"""A sequence of Token objects. Access sentences and named entities, export
|
||||||
|
@ -117,7 +119,7 @@ cdef class Doc:
|
||||||
self.user_data = {}
|
self.user_data = {}
|
||||||
self._py_tokens = []
|
self._py_tokens = []
|
||||||
self._vector = None
|
self._vector = None
|
||||||
self.noun_chunks_iterator = CHUNKERS.get(self.vocab.lang)
|
self.noun_chunks_iterator = _get_chunker(self.vocab.lang)
|
||||||
cdef unicode orth
|
cdef unicode orth
|
||||||
cdef bint has_space
|
cdef bint has_space
|
||||||
if orths_and_spaces is None and words is not None:
|
if orths_and_spaces is None and words is not None:
|
||||||
|
|
Loading…
Reference in New Issue