From 686225eaddd56fac86cb18a3e172d84371ea8be1 Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Wed, 18 Apr 2018 18:44:01 -0400
Subject: [PATCH] Fix Spanish noun_chunks (resolves #2210)

Make sure 'NP' label is added to StringStore and move noun_bounds helper into a closure to allow reusing label sets
---
 spacy/lang/es/syntax_iterators.py | 38 +++++++++++++++----------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/spacy/lang/es/syntax_iterators.py b/spacy/lang/es/syntax_iterators.py
index c414897a0..d38bff2a3 100644
--- a/spacy/lang/es/syntax_iterators.py
+++ b/spacy/lang/es/syntax_iterators.py
@@ -6,13 +6,30 @@ from ...symbols import NOUN, PROPN, PRON, VERB, AUX
 
 def noun_chunks(obj):
     doc = obj.doc
-    np_label = doc.vocab.strings['NP']
-    left_labels = ['det', 'fixed', 'neg'] #['nunmod', 'det', 'appos', 'fixed']
+    np_label = doc.vocab.strings.add('NP')
+    left_labels = ['det', 'fixed', 'neg'] # ['nunmod', 'det', 'appos', 'fixed']
     right_labels = ['flat', 'fixed', 'compound', 'neg']
     stop_labels = ['punct']
     np_left_deps = [doc.vocab.strings[label] for label in left_labels]
     np_right_deps = [doc.vocab.strings[label] for label in right_labels]
     stop_deps = [doc.vocab.strings[label] for label in stop_labels]
+
+    def noun_bounds(root):
+        left_bound = root
+        for token in reversed(list(root.lefts)):
+            if token.dep in np_left_deps:
+                left_bound = token
+        right_bound = root
+        for token in root.rights:
+            if (token.dep in np_right_deps):
+                left, right = noun_bounds(token)
+                if list(filter(lambda t: is_verb_token(t) or t.dep in stop_deps,
+                            doc[left_bound.i: right.i])):
+                    break
+                else:
+                    right_bound = right
+        return left_bound, right_bound
+
     token = doc[0]
     while token and token.i < len(doc):
         if token.pos in [PROPN, NOUN, PRON]:
@@ -33,23 +50,6 @@ def next_token(token):
         return None
 
 
-def noun_bounds(root):
-    left_bound = root
-    for token in reversed(list(root.lefts)):
-        if token.dep in np_left_deps:
-            left_bound = token
-    right_bound = root
-    for token in root.rights:
-        if (token.dep in np_right_deps):
-            left, right = noun_bounds(token)
-            if list(filter(lambda t: is_verb_token(t) or t.dep in stop_deps,
-                           doc[left_bound.i: right.i])):
-                break
-            else:
-                right_bound = right
-    return left_bound, right_bound
-
-
 SYNTAX_ITERATORS = {
     'noun_chunks': noun_chunks
 }