mirror of https://github.com/explosion/spaCy.git
Improve way noun chunks iterator is looked up
This commit is contained in:
parent
51e1541ddb
commit
92ae36f84e
|
@ -107,7 +107,8 @@ class BaseDefaults(object):
|
|||
'tags': lambda nlp, **cfg: [NeuralTagger(nlp.vocab, **cfg)],
|
||||
'dependencies': lambda nlp, **cfg: [
|
||||
NeuralDependencyParser(nlp.vocab, **cfg),
|
||||
nonproj.deprojectivize],
|
||||
nonproj.deprojectivize,
|
||||
],
|
||||
'entities': lambda nlp, **cfg: [NeuralEntityRecognizer(nlp.vocab, **cfg)],
|
||||
}
|
||||
|
||||
|
@ -126,6 +127,7 @@ class BaseDefaults(object):
|
|||
lemma_index = {}
|
||||
morph_rules = {}
|
||||
lex_attr_getters = LEX_ATTRS
|
||||
syntax_iterators = {}
|
||||
|
||||
|
||||
class Language(object):
|
||||
|
|
|
@ -26,7 +26,6 @@ from ..attrs cimport ID, ORTH, NORM, LOWER, SHAPE, PREFIX, SUFFIX, LENGTH, CLUST
|
|||
from ..attrs cimport LENGTH, POS, LEMMA, TAG, DEP, HEAD, SPACY, ENT_IOB, ENT_TYPE
|
||||
from ..attrs cimport SENT_START
|
||||
from ..parts_of_speech cimport CCONJ, PUNCT, NOUN, univ_pos_t
|
||||
from ..syntax.iterators import CHUNKERS
|
||||
from ..util import normalize_slice
|
||||
from ..compat import is_config
|
||||
from .. import about
|
||||
|
@ -65,6 +64,9 @@ cdef attr_t get_token_attr(const TokenC* token, attr_id_t feat_name) nogil:
|
|||
else:
|
||||
return Lexeme.get_struct_attr(token.lex, feat_name)
|
||||
|
||||
def _get_chunker(lang):
|
||||
cls = util.get_lang_class(lang)
|
||||
return cls.Defaults.syntax_iterators.get('noun_chunks')
|
||||
|
||||
cdef class Doc:
|
||||
"""A sequence of Token objects. Access sentences and named entities, export
|
||||
|
@ -117,7 +119,7 @@ cdef class Doc:
|
|||
self.user_data = {}
|
||||
self._py_tokens = []
|
||||
self._vector = None
|
||||
self.noun_chunks_iterator = CHUNKERS.get(self.vocab.lang)
|
||||
self.noun_chunks_iterator = _get_chunker(self.vocab.lang)
|
||||
cdef unicode orth
|
||||
cdef bint has_space
|
||||
if orths_and_spaces is None and words is not None:
|
||||
|
|
Loading…
Reference in New Issue