From f08c871adf6f126c2ea7112804c813b977bcb167 Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Fri, 29 Jun 2018 14:32:16 +0200
Subject: [PATCH 1/8] Fix typo in Language.from_disk

---
 spacy/language.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spacy/language.py b/spacy/language.py
index e1e01d0ca..6b0ee6361 100644
--- a/spacy/language.py
+++ b/spacy/language.py
@@ -650,7 +650,7 @@ class Language(object):
         for name, proc in self.pipeline:
             if name in disable:
                 continue
-            if not hasattr(proc, 'to_disk'):
+            if not hasattr(proc, 'from_disk'):
                 continue
             deserializers[name] = lambda p, proc=proc: proc.from_disk(p, vocab=False)
         exclude = {p: False for p in disable}

From 526be4082329d16ecf7b1fa40b81f2008396a325 Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Fri, 29 Jun 2018 14:33:12 +0200
Subject: [PATCH 2/8] Add test for 46d8a66

---
 .../serialize/test_serialize_language.py      | 23 +++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/spacy/tests/serialize/test_serialize_language.py b/spacy/tests/serialize/test_serialize_language.py
index 9b6a011c9..5d1ac4c92 100644
--- a/spacy/tests/serialize/test_serialize_language.py
+++ b/spacy/tests/serialize/test_serialize_language.py
@@ -3,8 +3,10 @@ from __future__ import unicode_literals
 
 from ..util import make_tempdir
 from ...language import Language
+from ...tokenizer import Tokenizer
 
 import pytest
+import re
 
 
 @pytest.fixture
@@ -27,3 +29,24 @@ def test_serialize_language_meta_disk(meta_data):
         language.to_disk(d)
         new_language = Language().from_disk(d)
     assert new_language.meta == language.meta
+
+
+def test_serialize_with_custom_tokenizer():
+    """Test that serialization with custom tokenizer works without token_match.
+    See: https://support.prodi.gy/t/how-to-save-a-custom-tokenizer/661/2
+    """
+    prefix_re = re.compile(r'''1/|2/|:[0-9][0-9][A-K]:|:[0-9][0-9]:''')
+    suffix_re = re.compile(r'''''')
+    infix_re = re.compile(r'''[~]''')
+
+    def custom_tokenizer(nlp):
+        return Tokenizer(nlp.vocab,
+                         {},
+                         prefix_search=prefix_re.search,
+                         suffix_search=suffix_re.search,
+                         infix_finditer=infix_re.finditer)
+
+    nlp = Language()
+    nlp.tokenizer = custom_tokenizer(nlp)
+    with make_tempdir() as d:
+        nlp.to_disk(d)

From 3786942ff10de2c5144daa963d103a6549145db7 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Fri, 29 Jun 2018 15:13:45 +0200
Subject: [PATCH 3/8] Fix tagger when docs are empty

---
 spacy/pipeline.pyx | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/spacy/pipeline.pyx b/spacy/pipeline.pyx
index ed4e4c066..339bf4f1c 100644
--- a/spacy/pipeline.pyx
+++ b/spacy/pipeline.pyx
@@ -449,7 +449,8 @@ class Tagger(Pipe):
     def predict(self, docs):
         if not any(len(doc) for doc in docs):
             # Handle case where there are no tokens in any docs.
-            return [self.model.ops.allocate((0, self.model.nO)) for doc in docs]
+            n_labels = len(self.labels)
+            return [self.model.ops.allocate((0, n_labels)) for doc in docs]
         tokvecs = self.model.tok2vec(docs)
         scores = self.model.softmax(tokvecs)
         guesses = []

From a1b05048d0da75f02b64a9b4719ce40137551234 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Fri, 29 Jun 2018 16:05:40 +0200
Subject: [PATCH 4/8] Fix tagger when doc is empty

---
 spacy/pipeline.pyx | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/spacy/pipeline.pyx b/spacy/pipeline.pyx
index 339bf4f1c..faea20935 100644
--- a/spacy/pipeline.pyx
+++ b/spacy/pipeline.pyx
@@ -450,7 +450,9 @@ class Tagger(Pipe):
         if not any(len(doc) for doc in docs):
             # Handle case where there are no tokens in any docs.
             n_labels = len(self.labels)
-            return [self.model.ops.allocate((0, n_labels)) for doc in docs]
+            guesses = [self.model.ops.allocate((0, n_labels)) for doc in docs]
+            tokvecs = self.model.ops.allocate((0, self.model.tok2vec.nO))
+            return guesses, tokvecs
         tokvecs = self.model.tok2vec(docs)
         scores = self.model.softmax(tokvecs)
         guesses = []

From d0f9f13543272c4ca3514d2b500f4f61ca521d59 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Fri, 29 Jun 2018 19:01:44 +0200
Subject: [PATCH 5/8] Update Makefile

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 84f026180..928bc81ff 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 SHELL := /bin/bash
 sha = $(shell "git" "rev-parse" "--short" "HEAD")
 
-dist/spacy.pex : 
+dist/spacy.pex : spacy/*.pyx spacy/*.pxd spacy/*/*.pyx spacy/*/*.pxd
 	python3.6 -m venv env3.6
 	source env3.6/bin/activate
 	env3.6/bin/pip install wheel

From 01ace9734d776a53cd835d482692e11f748d722a Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Fri, 29 Jun 2018 19:21:38 +0200
Subject: [PATCH 6/8] Make pipeline work on empty docs

---
 spacy/pipeline.pyx         | 2 +-
 spacy/syntax/nn_parser.pyx | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/spacy/pipeline.pyx b/spacy/pipeline.pyx
index faea20935..edc793158 100644
--- a/spacy/pipeline.pyx
+++ b/spacy/pipeline.pyx
@@ -482,7 +482,7 @@ class Tagger(Pipe):
                     if lemma != 0 and lemma != doc.c[j].lex.orth:
                         doc.c[j].lemma = lemma
                 idx += 1
-            if tensors is not None:
+            if tensors is not None and len(tensors):
                 if isinstance(doc.tensor, numpy.ndarray) \
                 and not isinstance(tensors[i], numpy.ndarray):
                     doc.extend_tensor(tensors[i].get())
diff --git a/spacy/syntax/nn_parser.pyx b/spacy/syntax/nn_parser.pyx
index 21ee603a3..91e4b6852 100644
--- a/spacy/syntax/nn_parser.pyx
+++ b/spacy/syntax/nn_parser.pyx
@@ -217,6 +217,8 @@ cdef class Parser:
     def predict(self, docs, beam_width=1, beam_density=0.0, drop=0.):
         if isinstance(docs, Doc):
             docs = [docs]
+        if not any(len(doc) for doc in docs):
+            return self.moves.init_batch(docs)
         if beam_width < 2:
             return self.greedy_parse(docs, drop=drop)
         else:

From 2ec2192000e99414a7bbbf11c069c92606da4dc5 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Fri, 29 Jun 2018 19:43:02 +0200
Subject: [PATCH 7/8] Revert #1389: Don't overrule rules when lemma exception
 is present

---
 spacy/lemmatizer.py | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/spacy/lemmatizer.py b/spacy/lemmatizer.py
index b4323e424..ee1a35ef1 100644
--- a/spacy/lemmatizer.py
+++ b/spacy/lemmatizer.py
@@ -95,16 +95,15 @@ def lemmatize(string, index, exceptions, rules):
     forms = []
     forms.extend(exceptions.get(string, []))
     oov_forms = []
-    if not forms:
-        for old, new in rules:
-            if string.endswith(old):
-                form = string[:len(string) - len(old)] + new
-                if not form:
-                    pass
-                elif form in index or not form.isalpha():
-                    forms.append(form)
-                else:
-                    oov_forms.append(form)
+    for old, new in rules:
+        if string.endswith(old):
+            form = string[:len(string) - len(old)] + new
+            if not form:
+                pass
+            elif form in index or not form.isalpha():
+                forms.append(form)
+            else:
+                oov_forms.append(form)
     if not forms:
         forms.extend(oov_forms)
     if not forms:

From 3c3020fccc82027aa9dcc6a32741cc7505dbbd0e Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Fri, 29 Jun 2018 21:21:30 +0200
Subject: [PATCH 8/8] Update Makefile

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 928bc81ff..ce148c9a0 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 SHELL := /bin/bash
 sha = $(shell "git" "rev-parse" "--short" "HEAD")
 
-dist/spacy.pex : spacy/*.pyx spacy/*.pxd spacy/*/*.pyx spacy/*/*.pxd
+dist/spacy.pex : spacy/*.py* spacy/*/*.py*
 	python3.6 -m venv env3.6
 	source env3.6/bin/activate
 	env3.6/bin/pip install wheel