From e40465487c045bb19adf65d965e93b23a35e3b19 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ole=20Henrik=20Skogstr=C3=B8m?= <ole@amplify.no>
Date: Tue, 30 Jan 2018 15:44:29 +0100
Subject: [PATCH 01/11] Added french syntax iterator with explenation

---
 spacy/lang/nb/__init__.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/spacy/lang/nb/__init__.py b/spacy/lang/nb/__init__.py
index 900e59626..b6ec65e1e 100644
--- a/spacy/lang/nb/__init__.py
+++ b/spacy/lang/nb/__init__.py
@@ -13,6 +13,12 @@ from ...language import Language
 from ...attrs import LANG, NORM
 from ...util import update_exc, add_lookups
 
+# Borrowing french syntax parser because both languages use
+# universal dependencies for tagging/parsing.
+# Read here for more:
+# https://github.com/explosion/spaCy/pull/1882#issuecomment-361409573
+from ..fr.syntax_iterators import SYNTAX_ITERATORS
+
 
 class NorwegianDefaults(Language.Defaults):
     lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
@@ -22,6 +28,7 @@ class NorwegianDefaults(Language.Defaults):
     stop_words = STOP_WORDS
     tag_map = TAG_MAP
     lemma_lookup = LOOKUP
+    syntax_iterators = SYNTAX_ITERATORS
 
 
 class Norwegian(Language):

From f4a7d1a423964876208f0caccf0ba7a19d4832a0 Mon Sep 17 00:00:00 2001
From: Motoki Wu <tokestermw@gmail.com>
Date: Tue, 30 Jan 2018 18:29:54 -0800
Subject: [PATCH 02/11] make to sure pass in **cfg to each component when
 training

---
 spacy/language.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/spacy/language.py b/spacy/language.py
index ae62f918a..a2b945c49 100644
--- a/spacy/language.py
+++ b/spacy/language.py
@@ -461,7 +461,8 @@ class Language(object):
             if hasattr(proc, 'begin_training'):
                 proc.begin_training(get_gold_tuples(),
                                     pipeline=self.pipeline,
-                                    sgd=self._optimizer)
+                                    sgd=self._optimizer,
+                                    **cfg)
         return self._optimizer
 
     def evaluate(self, docs_golds, verbose=False):

From 54062b7326b998c0fe3015ae9d78816762d52c25 Mon Sep 17 00:00:00 2001
From: Motoki Wu <tokestermw@gmail.com>
Date: Tue, 30 Jan 2018 18:30:19 -0800
Subject: [PATCH 03/11] added tests for issue #1915

---
 spacy/tests/regression/test_issue1915.py | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)
 create mode 100644 spacy/tests/regression/test_issue1915.py

diff --git a/spacy/tests/regression/test_issue1915.py b/spacy/tests/regression/test_issue1915.py
new file mode 100644
index 000000000..23cf6dc73
--- /dev/null
+++ b/spacy/tests/regression/test_issue1915.py
@@ -0,0 +1,19 @@
+# coding: utf8
+
+from __future__ import unicode_literals
+from ...language import Language
+
+
+def test_simple_ner():
+    cfg = {
+        'hidden_depth': 2,  # should error out
+    }
+
+    nlp = Language()
+    nlp.add_pipe(nlp.create_pipe('ner'))
+    nlp.get_pipe('ner').add_label('answer')
+    try:
+        nlp.begin_training(**cfg)
+        assert False  # should error out
+    except ValueError:
+        assert True

From 002ee80ddf1e3616e9d957abbdab76180d45aa27 Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Fri, 2 Feb 2018 20:32:08 +0100
Subject: [PATCH 04/11] Add html5lib to setup.py to fix six error (see #1924)

---
 setup.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/setup.py b/setup.py
index a387c605c..7c26a7491 100755
--- a/setup.py
+++ b/setup.py
@@ -192,6 +192,7 @@ def setup_package():
                 'thinc>=6.10.1,<6.11.0',
                 'plac<1.0.0,>=0.9.6',
                 'six',
+                'html5lib==1.0b8',
                 'pathlib',
                 'ujson>=1.35',
                 'dill>=0.2,<0.3',

From 9df9da34a3280664277f326da573993affbe7be6 Mon Sep 17 00:00:00 2001
From: Ali Zarezade <ali.zarezade@yahoo.com>
Date: Sat, 3 Feb 2018 17:21:34 +0330
Subject: [PATCH 05/11] Fix init_model issue

Fixing issue #1928
---
 spacy/cli/init_model.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/spacy/cli/init_model.py b/spacy/cli/init_model.py
index 6e3369f4d..99a6e87eb 100644
--- a/spacy/cli/init_model.py
+++ b/spacy/cli/init_model.py
@@ -36,7 +36,7 @@ def init_model(lang, output_dir, freqs_loc, clusters_loc=None, vectors_loc=None,
     vectors_loc = ensure_path(vectors_loc)
 
     probs, oov_prob = read_freqs(freqs_loc)
-    vectors_data, vector_keys = read_vectors(vectors_loc) if vectors_loc else None, None
+    vectors_data, vector_keys = read_vectors(vectors_loc) if vectors_loc else (None, None)
     clusters = read_clusters(clusters_loc) if clusters_loc else {}
 
     nlp = create_model(lang, probs, oov_prob, clusters, vectors_data, vector_keys, prune_vectors)
@@ -69,7 +69,7 @@ def create_model(lang, probs, oov_prob, clusters, vectors_data, vector_keys, pru
         lex_added += 1
     nlp.vocab.cfg.update({'oov_prob': oov_prob})
 
-    if vectors_data:
+    if len(vectors_data):
         nlp.vocab.vectors = Vectors(data=vectors_data, keys=vector_keys)
     if prune_vectors >= 1:
         nlp.vocab.prune_vectors(prune_vectors)

From 35272eade8b7088ce7159a67ae220891a05de886 Mon Sep 17 00:00:00 2001
From: sayf eddine hammemi <sayf.piratos@gmail.com>
Date: Sun, 4 Feb 2018 20:45:24 +0100
Subject: [PATCH 06/11] Accept contributer agreement.

---
 .github/CONTRIBUTOR_AGREEMENT.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/CONTRIBUTOR_AGREEMENT.md b/.github/CONTRIBUTOR_AGREEMENT.md
index f34603065..a8c741ce1 100644
--- a/.github/CONTRIBUTOR_AGREEMENT.md
+++ b/.github/CONTRIBUTOR_AGREEMENT.md
@@ -87,11 +87,11 @@ U.S. Federal law. Any choice of law rules will not apply.
 7. Please place an “x” on one of the applicable statement below. Please do NOT
 mark both statements:
 
-    * [ ] I am signing on behalf of myself as an individual and no other person
+    * [x] I am signing on behalf of myself as an individual and no other person
     or entity, including my employer, has or will have rights with respect to my
     contributions.
 
-    * [ ] I am signing on behalf of my employer or a legal entity and I have the
+    * [x] I am signing on behalf of my employer or a legal entity and I have the
     actual authority to contractually bind that entity.
 
 ## Contributor Details

From 86e7727855047ad036f72fa7d30ecee65c5975c3 Mon Sep 17 00:00:00 2001
From: sayf eddine hammemi <sayf.piratos@gmail.com>
Date: Sun, 4 Feb 2018 20:36:32 +0100
Subject: [PATCH 07/11] Fix typo in the word build.

---
 website/usage/_install/_instructions.jade | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/website/usage/_install/_instructions.jade b/website/usage/_install/_instructions.jade
index aeab67d2f..677c9d176 100644
--- a/website/usage/_install/_instructions.jade
+++ b/website/usage/_install/_instructions.jade
@@ -185,7 +185,7 @@ p
 
 p
     |  Install a version of the
-    |  #[+a("http://landinghub.visualstudio.com/visual-cpp-build-tools") Visual C++ Bulild Tools] or
+    |  #[+a("http://landinghub.visualstudio.com/visual-cpp-build-tools") Visual C++ Build Tools] or
     |  #[+a("https://www.visualstudio.com/vs/visual-studio-express/") Visual Studio Express]
     |  that matches the version that was used to compile your Python
     |  interpreter. For official distributions these are:

From 251a7805fe1d64f4c7c3890648b46776c5c6a5b2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ole=20Henrik=20Skogstr=C3=B8m?= <Ole Henrik Skogstrøm>
Date: Mon, 5 Feb 2018 14:45:05 +0100
Subject: [PATCH 08/11] Copied French syntax iterator to simplify future
 changes

---
 spacy/lang/nb/__init__.py         |  2 +-
 spacy/lang/nb/syntax_iterators.py | 42 +++++++++++++++++++++++++++++++
 2 files changed, 43 insertions(+), 1 deletion(-)
 create mode 100644 spacy/lang/nb/syntax_iterators.py

diff --git a/spacy/lang/nb/__init__.py b/spacy/lang/nb/__init__.py
index b6ec65e1e..629f4d6d8 100644
--- a/spacy/lang/nb/__init__.py
+++ b/spacy/lang/nb/__init__.py
@@ -17,7 +17,7 @@ from ...util import update_exc, add_lookups
 # universal dependencies for tagging/parsing.
 # Read here for more:
 # https://github.com/explosion/spaCy/pull/1882#issuecomment-361409573
-from ..fr.syntax_iterators import SYNTAX_ITERATORS
+from .syntax_iterators import SYNTAX_ITERATORS
 
 
 class NorwegianDefaults(Language.Defaults):
diff --git a/spacy/lang/nb/syntax_iterators.py b/spacy/lang/nb/syntax_iterators.py
new file mode 100644
index 000000000..c9de4f084
--- /dev/null
+++ b/spacy/lang/nb/syntax_iterators.py
@@ -0,0 +1,42 @@
+# coding: utf8
+from __future__ import unicode_literals
+
+from ...symbols import NOUN, PROPN, PRON
+
+
+def noun_chunks(obj):
+    """
+    Detect base noun phrases from a dependency parse. Works on both Doc and Span.
+    """
+    labels = ['nsubj', 'nsubj:pass', 'obj', 'iobj', 'ROOT', 'appos', 'nmod', 'nmod:poss']
+    doc = obj.doc  # Ensure works on both Doc and Span.
+    np_deps = [doc.vocab.strings[label] for label in labels]
+    conj = doc.vocab.strings.add('conj')
+    np_label = doc.vocab.strings.add('NP')
+    seen = set()
+    for i, word in enumerate(obj):
+        if word.pos not in (NOUN, PROPN, PRON):
+            continue
+        # Prevent nested chunks from being produced
+        if word.i in seen:
+            continue
+        if word.dep in np_deps:
+            if any(w.i in seen for w in word.subtree):
+                continue
+            seen.update(j for j in range(word.left_edge.i, word.right_edge.i+1))
+            yield word.left_edge.i, word.right_edge.i+1, np_label
+        elif word.dep == conj:
+            head = word.head
+            while head.dep == conj and head.head.i < head.i:
+                head = head.head
+            # If the head is an NP, and we're coordinated to it, we're an NP
+            if head.dep in np_deps:
+                if any(w.i in seen for w in word.subtree):
+                    continue
+                seen.update(j for j in range(word.left_edge.i, word.right_edge.i+1))
+                yield word.left_edge.i, word.right_edge.i+1, np_label
+
+
+SYNTAX_ITERATORS = {
+    'noun_chunks': noun_chunks
+}

From 697b60fbab85bf5064c50cb8755ff9d39ab9eb0f Mon Sep 17 00:00:00 2001
From: Ryan Matthews <mhaddy@gmail.com>
Date: Mon, 5 Feb 2018 16:17:54 -0500
Subject: [PATCH 09/11] Typo correction in Readme file

Missing the "sm" in the en_core_web_....load() call.
---
 README.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.rst b/README.rst
index a47084254..a50d9bd70 100644
--- a/README.rst
+++ b/README.rst
@@ -218,7 +218,7 @@ then call its ``load()`` method:
     import spacy
     import en_core_web_sm
 
-    nlp = en_core_web_.load()
+    nlp = en_core_web_sm.load()
     doc = nlp(u'This is a sentence.')
 
 📖 **For more info and examples, check out the**

From 58eb178667d9319dddaf9990ea519375cc407dca Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Wed, 7 Feb 2018 01:08:30 +0100
Subject: [PATCH 10/11] Update Doc.char_span docs [ci skip]

---
 website/api/doc.jade | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/website/api/doc.jade b/website/api/doc.jade
index fd2fc34ef..7dc5e9842 100644
--- a/website/api/doc.jade
+++ b/website/api/doc.jade
@@ -245,7 +245,9 @@ p Check whether an extension has been registered on the #[code Doc] class.
     +tag method
     +tag-new(2)
 
-p Create a #[code Span] object from the slice #[code doc.text[start : end]].
+p
+    |  Create a #[code Span] object from the slice #[code doc.text[start : end]].
+    |  Returns #[code None] if the character indices don't map to a valid span.
 
 +aside-code("Example").
     doc = nlp(u'I like New York')
@@ -276,7 +278,7 @@ p Create a #[code Span] object from the slice #[code doc.text[start : end]].
     +row("foot")
         +cell returns
         +cell #[code Span]
-        +cell The newly constructed object.
+        +cell The newly constructed object or #[code None].
 
 +h(2, "similarity") Doc.similarity
     +tag method

From f377c483e4caad212187e516d8aa121d8f957b50 Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Wed, 7 Feb 2018 01:08:42 +0100
Subject: [PATCH 11/11] Add note on manual entity order in displaCy [ci skip]

---
 website/usage/_visualizers/_html.jade | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/website/usage/_visualizers/_html.jade b/website/usage/_visualizers/_html.jade
index 648a6de80..e95b3527a 100644
--- a/website/usage/_visualizers/_html.jade
+++ b/website/usage/_visualizers/_html.jade
@@ -74,7 +74,8 @@ p
     |  #[+a("https://github.com/tensorflow/models/tree/master/research/syntaxnet") SyntaxNet].
     |  Simply convert the dependency parse or recognised entities to displaCy's
     |  format and set #[code manual=True] on either #[code render()] or
-    |  #[code serve()].
+    |  #[code serve()]. When setting #[code ents] manually, make sure to supply
+    |  them in the right order, i.e. starting with the lowest start position.
 
 +aside-code("Example").
     ex = [{'text': 'But Google is starting from behind.',