diff --git a/spacy/tests/tokens/test_noun_chunks.py b/spacy/tests/tokens/test_noun_chunks.py new file mode 100644 index 000000000..9f1111e8b --- /dev/null +++ b/spacy/tests/tokens/test_noun_chunks.py @@ -0,0 +1,31 @@ +import numpy as np + +from spacy.attrs import HEAD, DEP +from spacy.symbols import nsubj, dobj, punct, amod, nmod, conj, cc, root +from spacy.en import English + + + +def test_not_nested(): + nlp = English(parser=False) + sent = u'''Peter has chronic command and control issues'''.strip() + tokens = nlp(sent) + tokens.from_array( + [HEAD, DEP], + np.asarray( + [ + [1, nsubj], + [0, root], + [4, amod], + [3, nmod], + [-1, cc], + [-2, conj], + [-5, dobj] + ], dtype='int32')) + word_occurred = {} + for chunk in tokens.noun_chunks: + for word in chunk: + word_occurred.setdefault(word.text, 0) + word_occurred[word.text] += 1 + for word, freq in word_occurred.items(): + assert freq == 1, (word, [chunk.text for chunk in tokens.noun_chunks])