From 92ae36f84e677393cd303f58035e8ad8e0d965b7 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sun, 4 Jun 2017 21:53:39 +0200 Subject: [PATCH] Improve way noun chunks iterator is looked up --- spacy/language.py | 4 +++- spacy/tokens/doc.pyx | 6 ++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/spacy/language.py b/spacy/language.py index 16acbe63b..e559e7c58 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -107,7 +107,8 @@ class BaseDefaults(object): 'tags': lambda nlp, **cfg: [NeuralTagger(nlp.vocab, **cfg)], 'dependencies': lambda nlp, **cfg: [ NeuralDependencyParser(nlp.vocab, **cfg), - nonproj.deprojectivize], + nonproj.deprojectivize, + ], 'entities': lambda nlp, **cfg: [NeuralEntityRecognizer(nlp.vocab, **cfg)], } @@ -126,6 +127,7 @@ class BaseDefaults(object): lemma_index = {} morph_rules = {} lex_attr_getters = LEX_ATTRS + syntax_iterators = {} class Language(object): diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx index daf36bb85..30b5f2f0b 100644 --- a/spacy/tokens/doc.pyx +++ b/spacy/tokens/doc.pyx @@ -26,7 +26,6 @@ from ..attrs cimport ID, ORTH, NORM, LOWER, SHAPE, PREFIX, SUFFIX, LENGTH, CLUST from ..attrs cimport LENGTH, POS, LEMMA, TAG, DEP, HEAD, SPACY, ENT_IOB, ENT_TYPE from ..attrs cimport SENT_START from ..parts_of_speech cimport CCONJ, PUNCT, NOUN, univ_pos_t -from ..syntax.iterators import CHUNKERS from ..util import normalize_slice from ..compat import is_config from .. import about @@ -65,6 +64,9 @@ cdef attr_t get_token_attr(const TokenC* token, attr_id_t feat_name) nogil: else: return Lexeme.get_struct_attr(token.lex, feat_name) +def _get_chunker(lang): + cls = util.get_lang_class(lang) + return cls.Defaults.syntax_iterators.get('noun_chunks') cdef class Doc: """A sequence of Token objects. Access sentences and named entities, export @@ -117,7 +119,7 @@ cdef class Doc: self.user_data = {} self._py_tokens = [] self._vector = None - self.noun_chunks_iterator = CHUNKERS.get(self.vocab.lang) + self.noun_chunks_iterator = _get_chunker(self.vocab.lang) cdef unicode orth cdef bint has_space if orths_and_spaces is None and words is not None: