Improve way noun chunks iterator is looked up

This commit is contained in:
Matthew Honnibal 2017-06-04 21:53:39 +02:00
parent 51e1541ddb
commit 92ae36f84e
2 changed files with 7 additions and 3 deletions

View File

@ -107,7 +107,8 @@ class BaseDefaults(object):
'tags': lambda nlp, **cfg: [NeuralTagger(nlp.vocab, **cfg)], 'tags': lambda nlp, **cfg: [NeuralTagger(nlp.vocab, **cfg)],
'dependencies': lambda nlp, **cfg: [ 'dependencies': lambda nlp, **cfg: [
NeuralDependencyParser(nlp.vocab, **cfg), NeuralDependencyParser(nlp.vocab, **cfg),
nonproj.deprojectivize], nonproj.deprojectivize,
],
'entities': lambda nlp, **cfg: [NeuralEntityRecognizer(nlp.vocab, **cfg)], 'entities': lambda nlp, **cfg: [NeuralEntityRecognizer(nlp.vocab, **cfg)],
} }
@ -126,6 +127,7 @@ class BaseDefaults(object):
lemma_index = {} lemma_index = {}
morph_rules = {} morph_rules = {}
lex_attr_getters = LEX_ATTRS lex_attr_getters = LEX_ATTRS
syntax_iterators = {}
class Language(object): class Language(object):

View File

@ -26,7 +26,6 @@ from ..attrs cimport ID, ORTH, NORM, LOWER, SHAPE, PREFIX, SUFFIX, LENGTH, CLUST
from ..attrs cimport LENGTH, POS, LEMMA, TAG, DEP, HEAD, SPACY, ENT_IOB, ENT_TYPE from ..attrs cimport LENGTH, POS, LEMMA, TAG, DEP, HEAD, SPACY, ENT_IOB, ENT_TYPE
from ..attrs cimport SENT_START from ..attrs cimport SENT_START
from ..parts_of_speech cimport CCONJ, PUNCT, NOUN, univ_pos_t from ..parts_of_speech cimport CCONJ, PUNCT, NOUN, univ_pos_t
from ..syntax.iterators import CHUNKERS
from ..util import normalize_slice from ..util import normalize_slice
from ..compat import is_config from ..compat import is_config
from .. import about from .. import about
@ -65,6 +64,9 @@ cdef attr_t get_token_attr(const TokenC* token, attr_id_t feat_name) nogil:
else: else:
return Lexeme.get_struct_attr(token.lex, feat_name) return Lexeme.get_struct_attr(token.lex, feat_name)
def _get_chunker(lang):
cls = util.get_lang_class(lang)
return cls.Defaults.syntax_iterators.get('noun_chunks')
cdef class Doc: cdef class Doc:
"""A sequence of Token objects. Access sentences and named entities, export """A sequence of Token objects. Access sentences and named entities, export
@ -117,7 +119,7 @@ cdef class Doc:
self.user_data = {} self.user_data = {}
self._py_tokens = [] self._py_tokens = []
self._vector = None self._vector = None
self.noun_chunks_iterator = CHUNKERS.get(self.vocab.lang) self.noun_chunks_iterator = _get_chunker(self.vocab.lang)
cdef unicode orth cdef unicode orth
cdef bint has_space cdef bint has_space
if orths_and_spaces is None and words is not None: if orths_and_spaces is None and words is not None: