From e649242927b7284d435844ca4ee84479f55ba705 Mon Sep 17 00:00:00 2001
From: Adriane Boyd <adrianeboyd@gmail.com>
Date: Thu, 14 Jan 2021 07:33:31 +0100
Subject: [PATCH] Prevent overlapping noun chunks for Spanish (#6712)

* Prevent overlapping noun chunks in Spanish noun chunk iterator
* Clean up similar code in Danish noun chunk iterator
---
 spacy/lang/da/syntax_iterators.py |  7 -------
 spacy/lang/es/syntax_iterators.py | 14 +++++---------
 2 files changed, 5 insertions(+), 16 deletions(-)

diff --git a/spacy/lang/da/syntax_iterators.py b/spacy/lang/da/syntax_iterators.py
index c6b944193..f2bc3ee9b 100644
--- a/spacy/lang/da/syntax_iterators.py
+++ b/spacy/lang/da/syntax_iterators.py
@@ -9,12 +9,6 @@ def noun_chunks(doclike):
     def is_verb_token(tok):
         return tok.pos in [VERB, AUX]
 
-    def next_token(tok):
-        try:
-            return tok.nbor()
-        except IndexError:
-            return None
-
     def get_left_bound(doc, root):
         left_bound = root
         for tok in reversed(list(root.lefts)):
@@ -67,7 +61,6 @@ def noun_chunks(doclike):
     np_right_deps = [doc.vocab.strings.add(label) for label in right_labels]
     stop_deps = [doc.vocab.strings.add(label) for label in stop_labels]
 
-    chunks = []
     prev_right = -1
     for token in doclike:
         if token.pos in [PROPN, NOUN, PRON]:
diff --git a/spacy/lang/es/syntax_iterators.py b/spacy/lang/es/syntax_iterators.py
index d4572b682..d67eef2d6 100644
--- a/spacy/lang/es/syntax_iterators.py
+++ b/spacy/lang/es/syntax_iterators.py
@@ -20,27 +20,23 @@ def noun_chunks(doclike):
     np_left_deps = [doc.vocab.strings.add(label) for label in left_labels]
     np_right_deps = [doc.vocab.strings.add(label) for label in right_labels]
     stop_deps = [doc.vocab.strings.add(label) for label in stop_labels]
+
+    prev_right = -1
     for token in doclike:
         if token.pos in [PROPN, NOUN, PRON]:
             left, right = noun_bounds(
                 doc, token, np_left_deps, np_right_deps, stop_deps
             )
+            if left.i <= prev_right:
+                continue
             yield left.i, right.i + 1, np_label
-            token = right
-        token = next_token(token)
+            prev_right = right.i
 
 
 def is_verb_token(token):
     return token.pos in [VERB, AUX]
 
 
-def next_token(token):
-    try:
-        return token.nbor()
-    except IndexError:
-        return None
-
-
 def noun_bounds(doc, root, np_left_deps, np_right_deps, stop_deps):
     left_bound = root
     for token in reversed(list(root.lefts)):