diff --git a/spacy/syntax/iterators.pxd b/spacy/syntax/iterators.pxd index 5c574426f..f5ea7e632 100644 --- a/spacy/syntax/iterators.pxd +++ b/spacy/syntax/iterators.pxd @@ -1,6 +1,8 @@ from spacy.tokens.doc cimport Doc +cdef dict CHUNKERS + cdef class DocIterator: cdef Doc _doc diff --git a/spacy/syntax/iterators.pyx b/spacy/syntax/iterators.pyx index 78679b8ce..b0edea36b 100644 --- a/spacy/syntax/iterators.pyx +++ b/spacy/syntax/iterators.pyx @@ -5,6 +5,8 @@ from spacy.tokens.token cimport Token from spacy.parts_of_speech cimport NOUN +CHUNKERS = {'en':EnglishNounChunks, 'de':GermanNounChunks} + # base class for document iterators cdef class DocIterator: def __init__(self, Doc doc): diff --git a/spacy/syntax/parser.pyx b/spacy/syntax/parser.pyx index c7b88d5b8..ba1f4f1b8 100644 --- a/spacy/syntax/parser.pyx +++ b/spacy/syntax/parser.pyx @@ -47,8 +47,7 @@ from ._parse_features cimport fill_context from .stateclass cimport StateClass from ._state cimport StateC -from spacy.syntax.iterators cimport DocIterator, EnglishNounChunks, GermanNounChunks -CHUNKERS = {'en':EnglishNounChunks, 'de':GermanNounChunks} +from spacy.syntax.iterators cimport CHUNKERS, DocIterator, EnglishNounChunks, GermanNounChunks DEBUG = False diff --git a/spacy/tests/tokens/test_noun_chunks.py b/spacy/tests/tokens/test_noun_chunks.py index 55f484e62..cf72e9ce1 100644 --- a/spacy/tests/tokens/test_noun_chunks.py +++ b/spacy/tests/tokens/test_noun_chunks.py @@ -3,11 +3,11 @@ import numpy as np from spacy.attrs import HEAD, DEP from spacy.symbols import nsubj, dobj, punct, amod, nmod, conj, cc, root from spacy.en import English - +from spacy.syntax.iterators import EnglishNounChunks def test_not_nested(): - nlp = English(parser=False) + nlp = English(parser=False, entity=False) sent = u'''Peter has chronic command and control issues'''.strip() tokens = nlp(sent) tokens.from_array( @@ -22,6 +22,7 @@ def test_not_nested(): [-2, conj], [-5, dobj] ], dtype='int32')) + tokens.noun_chunks = EnglishNounChunks for chunk in tokens.noun_chunks: print(chunk.text) word_occurred = {} @@ -31,3 +32,4 @@ def test_not_nested(): word_occurred[word.text] += 1 for word, freq in word_occurred.items(): assert freq == 1, (word, [chunk.text for chunk in tokens.noun_chunks]) +